├── .dockerignore ├── .env ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature-or-enhancement-.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── docker-build.yml ├── .gitignore ├── .husky ├── commit-msg └── pre-commit ├── .npmrc ├── .prettierrc.js ├── .windsurfrules ├── ARCHITECTURE.md ├── Dockerfile ├── LICENSE ├── README.md ├── README.zh-CN.md ├── app ├── api │ ├── check-update │ │ └── route.js │ ├── llm │ │ ├── fetch-models │ │ │ └── route.js │ │ ├── model │ │ │ └── route.js │ │ ├── ollama │ │ │ └── models │ │ │ │ └── route.js │ │ └── providers │ │ │ └── route.js │ ├── projects │ │ ├── [projectId] │ │ │ ├── batch-generateGA │ │ │ │ └── route.js │ │ │ ├── chunks │ │ │ │ ├── [chunkId] │ │ │ │ │ ├── questions │ │ │ │ │ │ └── route.js │ │ │ │ │ └── route.js │ │ │ │ ├── name │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── config │ │ │ │ └── route.js │ │ │ ├── custom-split │ │ │ │ └── route.js │ │ │ ├── datasets │ │ │ │ ├── [datasetId] │ │ │ │ │ └── route.js │ │ │ │ ├── export │ │ │ │ │ └── route.js │ │ │ │ ├── optimize │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── distill │ │ │ │ ├── questions │ │ │ │ │ ├── by-tag │ │ │ │ │ │ └── route.js │ │ │ │ │ └── route.js │ │ │ │ └── tags │ │ │ │ │ ├── all │ │ │ │ │ └── route.js │ │ │ │ │ └── route.js │ │ │ ├── files │ │ │ │ ├── [fileId] │ │ │ │ │ └── ga-pairs │ │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── generate-questions │ │ │ │ └── route.js │ │ │ ├── huggingface │ │ │ │ └── upload │ │ │ │ │ └── route.js │ │ │ ├── llamaFactory │ │ │ │ ├── checkConfig │ │ │ │ │ └── route.js │ │ │ │ └── generate │ │ │ │ │ └── route.js │ │ │ ├── model-config │ │ │ │ ├── [modelConfigId] │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── models │ │ │ │ ├── [modelId] │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ ├── pdf │ │ │ │ └── route.js │ │ │ ├── playground │ │ │ │ └── chat │ │ │ │ │ ├── route.js │ │ │ │ │ └── stream │ │ │ │ │ └── route.js │ │ │ ├── preview │ │ │ │ └── [fileId] │ │ │ │ │ └── route.js │ │ │ ├── questions │ │ │ │ ├── [questionId] │ │ │ │ │ └── route.js │ │ │ │ ├── batch-delete │ │ │ │ │ └── route.js │ │ │ │ ├── route.js │ │ │ │ └── tree │ │ │ │ │ └── route.js │ │ │ ├── route.js │ │ │ ├── split │ │ │ │ └── route.js │ │ │ ├── tags │ │ │ │ └── route.js │ │ │ ├── tasks │ │ │ │ ├── [taskId] │ │ │ │ │ └── route.js │ │ │ │ ├── list │ │ │ │ │ └── route.js │ │ │ │ └── route.js │ │ │ └── text-split │ │ │ │ └── route.js │ │ ├── delete-directory │ │ │ └── route.js │ │ ├── migrate │ │ │ └── route.js │ │ ├── open-directory │ │ │ └── route.js │ │ ├── route.js │ │ └── unmigrated │ │ │ └── route.js │ └── update │ │ └── route.js ├── dataset-square │ └── page.js ├── globals.css ├── layout.js ├── page.js └── projects │ └── [projectId] │ ├── datasets │ ├── [datasetId] │ │ ├── page.js │ │ └── useDatasetDetails.js │ └── page.js │ ├── distill │ ├── autoDistillService.js │ └── page.js │ ├── layout.js │ ├── page.js │ ├── playground │ └── page.js │ ├── questions │ ├── components │ │ └── QuestionEditDialog.js │ ├── hooks │ │ └── useQuestionEdit.js │ └── page.js │ ├── settings │ ├── components │ │ └── PromptSettings.js │ └── page.js │ ├── tasks │ └── page.js │ └── text-split │ ├── page.js │ ├── useChunks.js │ ├── usePdfProcessing.js │ ├── useQuestionGeneration.js │ └── useTextSplit.js ├── commitlint.config.mjs ├── components ├── ExportDatasetDialog.js ├── I18nProvider.js ├── LanguageSwitcher.js ├── ModelSelect.js ├── Navbar.js ├── TaskIcon.js ├── ThemeRegistry.js ├── UpdateChecker.js ├── common │ └── MessageAlert.js ├── dataset-square │ ├── DatasetSearchBar.js │ ├── DatasetSiteCard.js │ └── DatasetSiteList.js ├── datasets │ ├── DatasetHeader.js │ ├── DatasetMetadata.js │ ├── EditableField.js │ └── OptimizeDialog.js ├── distill │ ├── AutoDistillDialog.js │ ├── AutoDistillProgress.js │ ├── ConfirmDialog.js │ ├── DistillTreeView.js │ ├── QuestionGenerationDialog.js │ ├── QuestionListItem.js │ ├── TagGenerationDialog.js │ ├── TagMenu.js │ ├── TagTreeItem.js │ └── utils.js ├── export │ ├── HuggingFaceTab.js │ ├── LlamaFactoryTab.js │ └── LocalExportTab.js ├── home │ ├── CreateProjectDialog.js │ ├── HeroSection.js │ ├── MigrationDialog.js │ ├── ParticleBackground.js │ ├── ProjectCard.js │ ├── ProjectList.js │ └── StatsCard.js ├── mga │ ├── GaPairsIndicator.js │ └── GaPairsManager.js ├── playground │ ├── ChatArea.js │ ├── ChatMessage.js │ ├── MessageInput.js │ ├── ModelSelector.js │ └── PlaygroundHeader.js ├── questions │ ├── QuestionListView.js │ └── QuestionTreeView.js ├── settings │ ├── BasicSettings.js │ ├── ModelSettings.js │ └── TaskSettings.js ├── tasks │ ├── TaskActions.js │ ├── TaskFilters.js │ ├── TaskProgress.js │ ├── TaskStatusChip.js │ └── TasksTable.js └── text-split │ ├── ChunkCard.js │ ├── ChunkDeleteDialog.js │ ├── ChunkList.js │ ├── ChunkListHeader.js │ ├── ChunkViewDialog.js │ ├── DomainAnalysis.js │ ├── FileUploader.js │ ├── LoadingBackdrop.js │ ├── MarkdownViewDialog.js │ ├── PdfSettings.js │ └── components │ ├── DeleteConfirmDialog.js │ ├── DirectoryView.js │ ├── DomainTreeActionDialog.js │ ├── DomainTreeView.js │ ├── FileList.js │ ├── PdfProcessingDialog.js │ ├── TabPanel.js │ └── UploadArea.js ├── constant ├── model.js ├── setting.js └── sites.json ├── docker-compose.yml ├── electron ├── entitlements.mac.plist ├── loading.html ├── main.js ├── modules │ ├── cache.js │ ├── database.js │ ├── db-updater.js │ ├── ipc-handlers.js │ ├── logger.js │ ├── menu.js │ ├── server.js │ ├── updater.js │ └── window-manager.js ├── preload.js └── util.js ├── hooks ├── useDebounce.js ├── useGenerateDataset.js ├── useModelPlayground.js ├── useSnackbar.js └── useTaskSettings.js ├── jsconfig.json ├── lib ├── db │ ├── base.js │ ├── chunks.js │ ├── datasets.js │ ├── fileToDb.js │ ├── files.js │ ├── ga-pairs.js │ ├── index.js │ ├── llm-models.js │ ├── llm-providers.js │ ├── model-config.js │ ├── projects.js │ ├── questions.js │ ├── tags.js │ ├── texts.js │ └── upload-files.js ├── file │ ├── pdf-processing │ │ ├── core │ │ │ └── index.js │ │ └── strategy │ │ │ ├── default.js │ │ │ ├── index.js │ │ │ ├── mineru.js │ │ │ └── vision.js │ ├── split-markdown │ │ ├── core │ │ │ ├── parser.js │ │ │ ├── splitter.js │ │ │ ├── summary.js │ │ │ └── toc.js │ │ ├── index.js │ │ ├── output │ │ │ ├── fileWriter.js │ │ │ └── formatter.js │ │ └── utils │ │ │ └── common.js │ └── text-splitter.js ├── i18n.js ├── llm │ ├── common │ │ └── util.js │ ├── core │ │ ├── index.js │ │ └── providers │ │ │ ├── base.js │ │ │ ├── ollama.js │ │ │ ├── openai.js │ │ │ ├── openrouter.js │ │ │ └── zhipu.js │ └── prompts │ │ ├── addLabel.js │ │ ├── addLabelEn.js │ │ ├── answer.js │ │ ├── answerEn.js │ │ ├── distillQuestions.js │ │ ├── distillQuestionsEn.js │ │ ├── distillTags.js │ │ ├── distillTagsEn.js │ │ ├── enhancedAnswer.js │ │ ├── enhancedAnswerEn.js │ │ ├── ga-generation.js │ │ ├── ga-generationEn.js │ │ ├── label.js │ │ ├── labelEn.js │ │ ├── labelRevise.js │ │ ├── labelReviseEn.js │ │ ├── newAnswer.js │ │ ├── newAnswerEn.js │ │ ├── optimalTitle.js │ │ ├── optimalTitleEn.js │ │ ├── optimizeCot.js │ │ ├── optimizeCotEn.js │ │ ├── pdfToMarkdown.js │ │ ├── pdfToMarkdownEn.js │ │ ├── question.js │ │ └── questionEn.js ├── services │ ├── datasets │ │ └── index.js │ ├── ga-generation.js │ ├── ga-pairs.js │ ├── models.js │ ├── questions │ │ └── index.js │ └── tasks │ │ ├── answer-generation.js │ │ ├── index.js │ │ ├── question-generation.js │ │ └── recovery.js ├── store.js └── util │ ├── async.js │ ├── domain-tree.js │ ├── file.js │ ├── logger.js │ └── request.js ├── local-db └── empty.txt ├── locales ├── en │ └── translation.json └── zh-CN │ └── translation.json ├── next.config.js ├── package-lock.json ├── package.json ├── pnpm-lock.yaml ├── prisma ├── generate-template.js ├── schema.prisma └── sql.json ├── public └── imgs │ ├── 1.png │ ├── 10.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ ├── aw.jpg │ ├── aws.png │ ├── bg.png │ ├── bg2.png │ ├── cn-arc.png │ ├── default-dataset.png │ ├── en-arc.png │ ├── garden.jpg │ ├── github.png │ ├── google.png │ ├── huggingface.png │ ├── kaggle.png │ ├── linux.png │ ├── lluga.png │ ├── logo.icns │ ├── logo.ico │ ├── logo.png │ ├── logo.svg │ ├── mac.png │ ├── models │ ├── chatglm.svg │ ├── claude.svg │ ├── deepseek.svg │ ├── default.svg │ ├── doubao.svg │ ├── gemini.svg │ ├── glm.svg │ ├── gpt.svg │ ├── hunyuan.svg │ ├── llama.svg │ ├── qwen.svg │ ├── wenxin.svg │ └── yi.svg │ ├── modelscope.png │ ├── opendatalab.png │ ├── weichat.jpg │ └── windows.png └── styles ├── globals.css ├── home.js └── playground.js /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .next 3 | .git 4 | .github 5 | README.md 6 | README.zh-CN.md 7 | .gitignore 8 | .env.local 9 | .env.development.local 10 | .env.test.local 11 | .env.production.local 12 | /test 13 | /local-db 14 | /video -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # When adding additional environment variables, the schema in "/src/env.js" 2 | # should be updated accordingly. 3 | 4 | # Prisma 5 | # https://www.prisma.io/docs/reference/database-reference/connection-urls#env 6 | DATABASE_URL="file:./db.sqlite" 7 | 8 | LOCAL_DB_PATH=./local-db -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '[Bug]' 5 | labels: bug 6 | assignees: '' 7 | --- 8 | 9 | **注意:请务必按照此模版填写 ISSUES 信息,否则 ISSUE 将不会得到回复** 10 | 11 | **问题描述** 12 | 清晰、简洁地描述该问题的具体情况。 13 | 14 | **桌面设备(请完善以下信息)** 15 | 16 | - 操作系统:[例如:、Window、MAC] 17 | - 浏览器:[例如:谷歌浏览器(Chrome),苹果浏览器(Safari)] 18 | - Easy Dataset 版本:[例如:1.2.2] 19 | 20 | **使用模型** 21 | 22 | - 模型提供商:例如火山引擎 23 | - 模型名称:例如 DeepSeek R1 24 | 25 | **复现步骤** 26 | 重现该问题的操作步骤: 27 | 28 | 1. 进入“……”页面。 29 | 2. 点击“……”。 30 | 3. 向下滚动到“……”。 31 | 4. 这时会看到错误提示。 32 | 33 | **预期结果** 34 | 清晰、简洁地描述你原本期望出现的情况。 35 | 36 | **截图** 37 | 如果有必要,请附上截图,以便更好地说明你的问题。 38 | 39 | **其他相关信息** 40 | 在此处添加关于该问题的其他任何相关背景信息。 41 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-or-enhancement-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'Feature or enhancement ' 3 | about: Suggest an idea for this project 4 | title: '[Feature]' 5 | labels: enhancement 6 | assignees: '' 7 | --- 8 | 9 | **你的功能请求是否与某个问题相关?请描述。** 10 | 清晰、简洁地描述一下存在的问题是什么。例如:当我[具体情况]时,我总是感到很沮丧。 11 | 12 | **描述你期望的解决方案** 13 | 清晰、简洁地描述你希望实现的情况。 14 | 15 | **描述你考虑过的替代方案** 16 | 清晰、简洁地描述你所考虑过的任何其他解决方案或功能。 17 | 18 | **其他相关信息** 19 | 在此处添加与该功能请求相关的其他任何背景信息或截图。 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask questions you want to know 4 | title: '[Question]' 5 | labels: question 6 | assignees: '' 7 | --- 8 | 9 | **注意:请务必按照此模版填写 ISSUES 信息,否则 ISSUE 将不会得到回复** 10 | 11 | **问题描述** 12 | 清晰、简洁地描述该问题的具体情况。 13 | 14 | **桌面设备(请完善以下信息)** 15 | 16 | - 操作系统:[例如:、Window、MAC] 17 | - 浏览器:[例如:谷歌浏览器(Chrome),苹果浏览器(Safari)] 18 | - Easy Dataset 版本:[例如:1.2.2] 19 | 20 | **使用模型** 21 | 22 | - 模型提供商:例如火山引擎 23 | - 模型名称:例如 DeepSeek R1 24 | 25 | **复现步骤** 26 | 重现该问题的操作步骤: 27 | 28 | 1. 进入“……”页面。 29 | 2. 点击“……”。 30 | 3. 向下滚动到“……”。 31 | 4. 这时会看到错误提示。 32 | 33 | **预期结果** 34 | 清晰、简洁地描述你原本期望出现的情况。 35 | 36 | **截图** 37 | 如果有必要,请附上截图,以便更好地说明你的问题。 38 | 39 | **其他相关信息** 40 | 在此处添加关于该问题的其他任何相关背景信息。 41 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### 变更类型- [ ] 新功能(feat) 2 | 3 | - [ ] 修复(fix) 4 | - [ ] 文档(docs) 5 | - [ ] 重构(refactor) 6 | 7 | ### 变更描述- 简要说明修改内容(关联Issue:#123) 8 | 9 | ### 文档更新- [ ] README.md 10 | 11 | - [ ] 贡献指南 12 | - [ ] 接口文档(如有) 13 | -------------------------------------------------------------------------------- /.github/workflows/docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push Docker image on Tag 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | docker-image-release: 10 | runs-on: ubuntu-latest 11 | 12 | permissions: 13 | contents: read 14 | packages: write 15 | 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@v4 19 | 20 | - name: Set up Docker Buildx 21 | uses: docker/setup-buildx-action@v3 22 | 23 | - name: Log in to GitHub Container Registry 24 | uses: docker/login-action@v3 25 | with: 26 | registry: ghcr.io 27 | username: ${{ github.actor }} 28 | password: ${{ secrets.GITHUB_TOKEN }} 29 | 30 | - name: Extract metadata for Docker 31 | id: meta 32 | uses: docker/metadata-action@v5 33 | with: 34 | images: ghcr.io/${{ github.repository_owner }}/easy-dataset 35 | tags: | 36 | type=ref,event=tag 37 | type=raw,value=latest,enable={{is_default_branch}} 38 | 39 | - name: Build and push Docker image 40 | uses: docker/build-push-action@v5 41 | with: 42 | context: . 43 | push: true 44 | platforms: linux/amd64,linux/arm64 45 | tags: ${{ steps.meta.outputs.tags }} 46 | labels: ${{ steps.meta.outputs.labels }} 47 | cache-from: type=gha 48 | cache-to: type=gha,mode=max 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build 3 | .vscode 4 | website-local.json 5 | ai-local.json 6 | .next 7 | .DS_Store 8 | tsconfig.tsbuildinfo 9 | mock-login-callback.ts 10 | .env.local 11 | /src/test/crawler 12 | /src/test/mock 13 | /test 14 | /dist 15 | /prisma/*.sqlite 16 | .idea 17 | !local-db/empty.txt 18 | /local-db 19 | prisma/local-db/db.sqlite 20 | 21 | -------------------------------------------------------------------------------- /.husky/commit-msg: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | npx commitlint --edit "$1" 4 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | npx lint-staged 2 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | # 国内用户可使用淘宝源加速 (Chinese users can use Taobao registry for faster downloads) 2 | # registry=https://registry.npmmirror.com 3 | registry=https://registry.npmjs.org -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | semi: true, 3 | trailingComma: 'none', 4 | singleQuote: true, 5 | tabWidth: 2, 6 | useTabs: false, 7 | bracketSpacing: true, 8 | arrowParens: 'avoid', 9 | proseWrap: 'preserve', 10 | jsxBracketSameLine: true, 11 | printWidth: 120, 12 | endOfLine: 'auto' 13 | }; 14 | -------------------------------------------------------------------------------- /.windsurfrules: -------------------------------------------------------------------------------- 1 | # Easy DataSet 项目架构设计 2 | 3 | ## 项目概述 4 | 5 | Easy DataSet 是一个用于创建大模型微调数据集的应用程序。用户可以上传文本文件,系统会自动分割文本并生成问题,最终生成用于微调的数据集。 6 | 7 | ## 技术栈 8 | 9 | - **前端框架**: Next.js 14 (App Router) 10 | - **UI 框架**: Material-UI (MUI) 11 | - **数据存储**: fs 文件系统模拟数据库 12 | - **开发语言**: JavaScript 13 | - **依赖管理**: pnpm 14 | 15 | ## 目录结构 16 | 17 | ``` 18 | easy-dataset/ 19 | ├── app/ # Next.js 应用目录 20 | │ ├── api/ # API 路由 21 | │ │ └── projects/ # 项目相关 API 22 | │ ├── projects/ # 项目相关页面 23 | │ │ ├── [projectId]/ # 项目详情页面 24 | │ └── page.js # 主页 25 | ├── components/ # React 组件 26 | │ ├── home/ # 主页相关组件 27 | │ │ ├── HeroSection.js 28 | │ │ ├── ProjectList.js 29 | │ │ └── StatsCard.js 30 | │ ├── Navbar.js # 导航栏组件 31 | │ └── CreateProjectDialog.js 32 | ├── lib/ # 工具库 33 | │ └── db/ # 数据库模块 34 | │ ├── base.js # 基础工具函数 35 | │ ├── projects.js # 项目管理 36 | │ ├── texts.js # 文本处理 37 | │ ├── datasets.js # 数据集管理 38 | │ └── index.js # 模块导出 39 | ├── styles/ # 样式文件 40 | │ └── home.js # 主页样式 41 | └── local-db/ # 本地数据库目录 42 | ``` 43 | 44 | ## 核心模块设计 45 | 46 | ### 1. 数据库模块 (`lib/db/`) 47 | 48 | #### base.js 49 | - 提供基础的文件操作功能 50 | - 确保数据库目录存在 51 | - 读写 JSON 文件的工具函数 52 | 53 | #### projects.js 54 | - 项目的 CRUD 操作 55 | - 项目配置管理 56 | - 项目目录结构维护 57 | 58 | #### texts.js 59 | - 文献处理功能 60 | - 文本片段存储和检索 61 | - 文件上传处理 62 | 63 | #### datasets.js 64 | - 数据集生成和管理 65 | - 问题列表管理 66 | - 标签树管理 67 | 68 | ### 2. 前端组件 (`components/`) 69 | 70 | #### Navbar.js 71 | - 顶部导航栏 72 | - 项目切换 73 | - 模型选择 74 | - 主题切换 75 | 76 | #### home/ 目录组件 77 | - HeroSection.js: 主页顶部展示区 78 | - ProjectList.js: 项目列表展示 79 | - StatsCard.js: 数据统计展示 80 | - CreateProjectDialog.js: 创建项目的对话框 81 | 82 | ### 3. 页面路由 (`app/`) 83 | 84 | #### 主页 (`page.js`) 85 | - 项目列表展示 86 | - 创建项目入口 87 | - 数据统计展示 88 | 89 | #### 项目详情页 (`projects/[projectId]/`) 90 | - text-split/: 文献处理页面 91 | - questions/: 问题列表页面 92 | - datasets/: 数据集页面 93 | - settings/: 项目设置页面 94 | 95 | #### API 路由 (`api/`) 96 | - projects/: 项目管理 API 97 | - texts/: 文本处理 API 98 | - questions/: 问题生成 API 99 | - datasets/: 数据集管理 API 100 | 101 | ## 数据流设计 102 | 103 | ### 项目创建流程 104 | 1. 用户通过主页或导航栏创建新项目 105 | 2. 填写项目基本信息(名称、描述) 106 | 3. 系统创建项目目录和初始配置文件 107 | 4. 重定向到项目详情页 108 | 109 | ### 文献处理流程 110 | 1. 用户上传 Markdown 文件 111 | 2. 系统保存原始文件到项目目录 112 | 3. 调用文本分割服务,生成片段和目录结构 113 | 4. 展示分割结果和提取的目录 114 | 115 | ### 问题生成流程 116 | 1. 用户选择需要生成问题的文本片段 117 | 2. 系统调用大模型API生成问题 118 | 3. 保存问题到问题列表和标签树 119 | 120 | ### 数据集生成流程 121 | 1. 用户选择需要生成答案的问题 122 | 2. 系统调用大模型API生成答案 123 | 3. 保存数据集结果 124 | 4. 提供导出功能 125 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 创建包含pnpm的基础镜像 2 | FROM node:20-alpine AS pnpm-base 3 | RUN npm install -g pnpm@9 4 | 5 | # 构建阶段 6 | FROM pnpm-base AS builder 7 | WORKDIR /app 8 | 9 | # 添加构建参数,用于识别目标平台 10 | ARG TARGETPLATFORM 11 | 12 | # 安装构建依赖 13 | RUN apk add --no-cache --virtual .build-deps \ 14 | python3 \ 15 | make \ 16 | g++ \ 17 | cairo-dev \ 18 | pango-dev \ 19 | jpeg-dev \ 20 | giflib-dev \ 21 | librsvg-dev \ 22 | build-base \ 23 | pixman-dev \ 24 | pkgconfig 25 | 26 | # 复制依赖文件和npm配置并安装(.npmrc中可配置国内源加速) 27 | COPY package.json pnpm-lock.yaml .npmrc ./ 28 | RUN pnpm install 29 | 30 | # 复制源代码 31 | COPY . . 32 | 33 | # 根据目标平台设置Prisma二进制目标并构建应用 34 | RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ 35 | echo "Configuring for ARM64 platform"; \ 36 | sed -i 's/binaryTargets = \[.*\]/binaryTargets = \["linux-musl-arm64-openssl-3.0.x"\]/' prisma/schema.prisma; \ 37 | PRISMA_CLI_BINARY_TARGETS="linux-musl-arm64-openssl-3.0.x" pnpm build; \ 38 | else \ 39 | echo "Configuring for AMD64 platform (default)"; \ 40 | sed -i 's/binaryTargets = \[.*\]/binaryTargets = \["linux-musl-openssl-3.0.x"\]/' prisma/schema.prisma; \ 41 | PRISMA_CLI_BINARY_TARGETS="linux-musl-openssl-3.0.x" pnpm build; \ 42 | fi 43 | 44 | # 构建完成后移除开发依赖,只保留生产依赖 45 | RUN pnpm prune --prod 46 | 47 | # 运行阶段 48 | FROM pnpm-base AS runner 49 | WORKDIR /app 50 | 51 | # 只安装运行时依赖 52 | RUN apk add --no-cache \ 53 | cairo \ 54 | pango \ 55 | jpeg \ 56 | giflib \ 57 | librsvg \ 58 | pixman 59 | 60 | # 复制package.json和.env文件 61 | COPY package.json .env ./ 62 | 63 | # 从构建阶段复制精简后的node_modules(只包含生产依赖) 64 | COPY --from=builder /app/node_modules ./node_modules 65 | 66 | # 从构建阶段复制构建产物 67 | COPY --from=builder /app/.next ./.next 68 | COPY --from=builder /app/public ./public 69 | COPY --from=builder /app/electron ./electron 70 | COPY --from=builder /app/prisma ./prisma 71 | 72 | # 设置生产环境 73 | ENV NODE_ENV=production 74 | 75 | EXPOSE 1717 76 | CMD ["pnpm", "start"] 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2025 Easy Dataset Project 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | 16 | You should have received a copy of the GNU Affero General Public License 17 | along with this program. If not, see https://www.gnu.org/licenses/. 18 | 19 | Additional Terms for Easy Dataset: 20 | 21 | 1. Contact Information 22 | If you wish to use Easy Dataset under different terms, please contact the 23 | copyright holders at: 1009903985@qq.com 24 | 25 | 2. Branding Restrictions 26 | You may not use the names "Easy Dataset" or "EasyDataset" to endorse or 27 | promote products derived from this software without prior written permission. 28 | 29 | 3. Disclaimer of Warranty 30 | The software is provided "as is", without warranty of any kind, express or 31 | implied, including but not limited to the warranties of merchantability, 32 | fitness for a particular purpose and noninfringement. In no event shall the 33 | authors or copyright holders be liable for any claim, damages or other 34 | liability, whether in an action of contract, tort or otherwise, arising from, 35 | out of or in connection with the software or the use or other dealings in the 36 | software. 37 | 38 | 4. Compliance with Laws 39 | You are responsible for ensuring your use of the software complies with all 40 | applicable laws, including but not limited to export control regulations. -------------------------------------------------------------------------------- /app/api/check-update/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs'; 4 | 5 | // 获取当前版本 6 | function getCurrentVersion() { 7 | try { 8 | const packageJsonPath = path.join(process.cwd(), 'package.json'); 9 | const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); 10 | return packageJson.version; 11 | } catch (error) { 12 | console.error('读取版本信息失败:', error); 13 | return '1.0.0'; 14 | } 15 | } 16 | 17 | // 从 GitHub 获取最新版本 18 | async function getLatestVersion() { 19 | try { 20 | const owner = 'ConardLi'; 21 | const repo = 'easy-dataset'; 22 | const response = await fetch(`https://api.github.com/repos/${owner}/${repo}/releases/latest`); 23 | 24 | if (!response.ok) { 25 | throw new Error(`GitHub API 请求失败: ${response.status}`); 26 | } 27 | 28 | const data = await response.json(); 29 | return data.tag_name.replace('v', ''); 30 | } catch (error) { 31 | console.error('获取最新版本失败:', error); 32 | return null; 33 | } 34 | } 35 | 36 | // 检查是否有更新 37 | export async function GET() { 38 | try { 39 | const currentVersion = getCurrentVersion(); 40 | const latestVersion = await getLatestVersion(); 41 | 42 | if (!latestVersion) { 43 | return NextResponse.json({ 44 | hasUpdate: false, 45 | currentVersion, 46 | latestVersion: null, 47 | error: '获取最新版本失败' 48 | }); 49 | } 50 | 51 | // 简单的版本比较 52 | const hasUpdate = compareVersions(latestVersion, currentVersion) > 0; 53 | 54 | return NextResponse.json({ 55 | hasUpdate, 56 | currentVersion, 57 | latestVersion, 58 | releaseUrl: hasUpdate ? `https://github.com/ConardLi/easy-dataset/releases/tag/v${latestVersion}` : null 59 | }); 60 | } catch (error) { 61 | console.error('检查更新失败:', error); 62 | } 63 | } 64 | 65 | // 简单的版本比较函数 66 | function compareVersions(a, b) { 67 | const partsA = a.split('.').map(Number); 68 | const partsB = b.split('.').map(Number); 69 | 70 | for (let i = 0; i < Math.max(partsA.length, partsB.length); i++) { 71 | const numA = i < partsA.length ? partsA[i] : 0; 72 | const numB = i < partsB.length ? partsB[i] : 0; 73 | 74 | if (numA > numB) return 1; 75 | if (numA < numB) return -1; 76 | } 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /app/api/llm/fetch-models/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import axios from 'axios'; 3 | 4 | // 从模型提供商获取模型列表 5 | export async function POST(request) { 6 | try { 7 | const { endpoint, providerId, apiKey } = await request.json(); 8 | 9 | if (!endpoint) { 10 | return NextResponse.json({ error: '缺少 endpoint 参数' }, { status: 400 }); 11 | } 12 | 13 | let url = endpoint.replace(/\/$/, ''); // 去除末尾的斜杠 14 | url += providerId === 'ollama' ? '/tags' : '/models'; 15 | 16 | const headers = {}; 17 | if (apiKey) { 18 | headers.Authorization = `Bearer ${apiKey}`; 19 | } 20 | 21 | const response = await axios.get(url, { headers }); 22 | 23 | // 根据不同提供商格式化返回数据 24 | let formattedModels = []; 25 | if (providerId === 'ollama') { 26 | formattedModels = response.data.models.map(item => ({ 27 | modelId: item.model, 28 | modelName: item.name, 29 | providerId 30 | })); 31 | } else { 32 | // 默认处理方式(适用于 OpenAI 等) 33 | formattedModels = response.data.data.map(item => ({ 34 | modelId: item.id, 35 | modelName: item.id, 36 | providerId 37 | })); 38 | } 39 | 40 | return NextResponse.json(formattedModels); 41 | } catch (error) { 42 | console.error('获取模型列表失败:', error); 43 | 44 | // 处理特定错误 45 | if (error.response) { 46 | if (error.response.status === 401) { 47 | return NextResponse.json({ error: 'API Key 无效' }, { status: 401 }); 48 | } 49 | return NextResponse.json( 50 | { error: `获取模型列表失败: ${error.response.statusText}` }, 51 | { status: error.response.status } 52 | ); 53 | } 54 | 55 | return NextResponse.json({ error: `获取模型列表失败: ${error.message}` }, { status: 500 }); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /app/api/llm/model/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { createLlmModels, getLlmModelsByProviderId } from '@/lib/db/llm-models'; // 导入db实例 3 | 4 | // 获取LLM模型 5 | export async function GET(request) { 6 | try { 7 | const searchParams = request.nextUrl.searchParams; 8 | let providerId = searchParams.get('providerId'); 9 | if (!providerId) { 10 | return NextResponse.json({ error: '参数错误' }, { status: 400 }); 11 | } 12 | const models = await getLlmModelsByProviderId(providerId); 13 | if (!models) { 14 | return NextResponse.json({ error: 'LLM provider not found' }, { status: 404 }); 15 | } 16 | return NextResponse.json(models); 17 | } catch (error) { 18 | console.error('Database query error:', error); 19 | return NextResponse.json({ error: 'Database query failed' }, { status: 500 }); 20 | } 21 | } 22 | 23 | //同步最新模型列表 24 | export async function POST(request) { 25 | try { 26 | const { newModels, providerId } = await request.json(); 27 | const models = await getLlmModelsByProviderId(providerId); 28 | const existingModelIds = models.map(model => model.modelId); 29 | const diffModels = newModels.filter(item => !existingModelIds.includes(item.modelId)); 30 | if (diffModels.length > 0) { 31 | return NextResponse.json(await createLlmModels(diffModels)); 32 | } else { 33 | return NextResponse.json({ message: 'No new models to insert' }, { status: 200 }); 34 | } 35 | } catch (error) { 36 | return NextResponse.json({ error: 'Database insert failed' }, { status: 500 }); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /app/api/llm/ollama/models/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | 3 | const OllamaClient = require('@/lib/llm/core/providers/ollama'); 4 | 5 | // 设置为强制动态路由,防止静态生成 6 | export const dynamic = 'force-dynamic'; 7 | 8 | export async function GET(request) { 9 | try { 10 | // 从查询参数中获取 host 和 port 11 | const { searchParams } = new URL(request.url); 12 | const host = searchParams.get('host') || '127.0.0.1'; 13 | const port = searchParams.get('port') || '11434'; 14 | 15 | // 创建 Ollama API 实例 16 | const ollama = new OllamaClient({ 17 | endpoint: `http://${host}:${port}/api` 18 | }); 19 | // 获取模型列表 20 | const models = await ollama.getModels(); 21 | return NextResponse.json(models); 22 | } catch (error) { 23 | // console.error('fetch Ollama models error:', error); 24 | return NextResponse.json({ error: 'fetch Models failed' }, { status: 500 }); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /app/api/llm/providers/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getLlmProviders } from '@/lib/db/llm-providers'; 3 | 4 | // 获取 LLM 提供商数据 5 | export async function GET() { 6 | try { 7 | const result = await getLlmProviders(); 8 | return NextResponse.json(result); 9 | } catch (error) { 10 | console.error('Database query error:', error); 11 | return NextResponse.json({ error: 'Database query failed' }, { status: 500 }); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/chunks/[chunkId]/questions/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getQuestionsForChunk } from '@/lib/db/questions'; 3 | import logger from '@/lib/util/logger'; 4 | import questionService from '@/lib/services/questions'; 5 | 6 | // 为指定文本块生成问题 7 | export async function POST(request, { params }) { 8 | try { 9 | const { projectId, chunkId } = params; 10 | 11 | // 验证项目ID和文本块ID 12 | if (!projectId || !chunkId) { 13 | return NextResponse.json({ error: 'Project ID or text block ID cannot be empty' }, { status: 400 }); 14 | } // 获取请求体 15 | const { model, language = '中文', number, enableGaExpansion = false } = await request.json(); 16 | 17 | if (!model) { 18 | return NextResponse.json({ error: 'Model cannot be empty' }, { status: 400 }); 19 | } 20 | 21 | // 后续会根据是否有GA对来选择是否启用GA扩展选择服务函数 22 | const serviceFunc = questionService.generateQuestionsForChunkWithGA; 23 | 24 | // 使用问题生成服务 25 | const result = await serviceFunc(projectId, chunkId, { 26 | model, 27 | language, 28 | number, 29 | enableGaExpansion 30 | }); 31 | 32 | // 统一返回格式,确保包含GA扩展信息 33 | const response = { 34 | chunkId, 35 | questions: result.questions || result.labelQuestions || [], 36 | total: result.total || (result.questions || result.labelQuestions || []).length, 37 | gaExpansionUsed: result.gaExpansionUsed || false, 38 | gaPairsCount: result.gaPairsCount || 0, 39 | expectedTotal: result.expectedTotal || result.total 40 | }; 41 | 42 | // 返回生成的问题 43 | return NextResponse.json(response); 44 | } catch (error) { 45 | logger.error('Error generating questions:', error); 46 | return NextResponse.json({ error: error.message || 'Error generating questions' }, { status: 500 }); 47 | } 48 | } 49 | 50 | // 获取指定文本块的问题 51 | export async function GET(request, { params }) { 52 | try { 53 | const { projectId, chunkId } = params; 54 | 55 | // 验证项目ID和文本块ID 56 | if (!projectId || !chunkId) { 57 | return NextResponse.json({ error: 'The item ID or text block ID cannot be empty' }, { status: 400 }); 58 | } 59 | 60 | // 获取文本块的问题 61 | const questions = await getQuestionsForChunk(projectId, chunkId); 62 | 63 | // 返回问题列表 64 | return NextResponse.json({ 65 | chunkId, 66 | questions, 67 | total: questions.length 68 | }); 69 | } catch (error) { 70 | console.error('Error getting questions:', error); 71 | return NextResponse.json({ error: error.message || 'Error getting questions' }, { status: 500 }); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/chunks/[chunkId]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { deleteChunkById, getChunkById, updateChunkById } from '@/lib/db/chunks'; 3 | 4 | // 获取文本块内容 5 | export async function GET(request, { params }) { 6 | try { 7 | const { projectId, chunkId } = params; 8 | // 验证参数 9 | if (!projectId) { 10 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 11 | } 12 | if (!chunkId) { 13 | return NextResponse.json({ error: 'Text block ID cannot be empty' }, { status: 400 }); 14 | } 15 | // 获取文本块内容 16 | const chunk = await getChunkById(chunkId); 17 | 18 | return NextResponse.json(chunk); 19 | } catch (error) { 20 | console.error('Failed to get text block content:', error); 21 | return NextResponse.json({ error: error.message || 'Failed to get text block content' }, { status: 500 }); 22 | } 23 | } 24 | 25 | // 删除文本块 26 | export async function DELETE(request, { params }) { 27 | try { 28 | const { projectId, chunkId } = params; 29 | // 验证参数 30 | if (!projectId) { 31 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 32 | } 33 | if (!chunkId) { 34 | return NextResponse.json({ error: 'Text block ID cannot be empty' }, { status: 400 }); 35 | } 36 | await deleteChunkById(chunkId); 37 | 38 | return NextResponse.json({ message: 'Text block deleted successfully' }); 39 | } catch (error) { 40 | console.error('Failed to delete text block:', error); 41 | return NextResponse.json({ error: error.message || 'Failed to delete text block' }, { status: 500 }); 42 | } 43 | } 44 | 45 | // 编辑文本块内容 46 | export async function PATCH(request, { params }) { 47 | try { 48 | const { projectId, chunkId } = params; 49 | 50 | // 验证参数 51 | if (!projectId) { 52 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 53 | } 54 | 55 | if (!chunkId) { 56 | return NextResponse.json({ error: '文本块ID不能为空' }, { status: 400 }); 57 | } 58 | 59 | // 解析请求体获取新内容 60 | const requestData = await request.json(); 61 | const { content } = requestData; 62 | 63 | if (!content) { 64 | return NextResponse.json({ error: '内容不能为空' }, { status: 400 }); 65 | } 66 | 67 | let res = await updateChunkById(chunkId, { content }); 68 | return NextResponse.json(res); 69 | } catch (error) { 70 | console.error('编辑文本块失败:', error); 71 | return NextResponse.json({ error: error.message || '编辑文本块失败' }, { status: 500 }); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/chunks/name/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getChunkByName } from '@/lib/db/chunks'; 3 | 4 | /** 5 | * 根据文本块名称获取文本块 6 | * @param {Request} request 请求对象 7 | * @param {object} context 上下文,包含路径参数 8 | * @returns {Promise} 响应对象 9 | */ 10 | export async function GET(request, { params }) { 11 | try { 12 | const { projectId } = params; 13 | 14 | // 从查询参数中获取 chunkName 15 | const { searchParams } = new URL(request.url); 16 | const chunkName = searchParams.get('chunkName'); 17 | 18 | if (!chunkName) { 19 | return NextResponse.json({ error: '文本块名称不能为空' }, { status: 400 }); 20 | } 21 | 22 | // 根据名称和项目ID查询文本块 23 | const chunk = await getChunkByName(projectId, chunkName); 24 | 25 | if (!chunk) { 26 | return NextResponse.json({ error: '未找到指定的文本块' }, { status: 404 }); 27 | } 28 | 29 | // 返回文本块信息 30 | return NextResponse.json(chunk); 31 | } catch (error) { 32 | console.error('根据名称获取文本块失败:', error); 33 | return NextResponse.json({ error: '获取文本块失败: ' + error.message }, { status: 500 }); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/chunks/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { deleteChunkById, getChunkByFileIds, getChunkById, getChunksByFileIds, updateChunkById } from '@/lib/db/chunks'; 3 | 4 | // 获取文本块内容 5 | export async function POST(request, { params }) { 6 | try { 7 | const { projectId } = params; 8 | // 验证参数 9 | if (!projectId) { 10 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 11 | } 12 | const { array } = await request.json(); 13 | // 获取文本块内容 14 | const chunk = await getChunksByFileIds(array); 15 | 16 | return NextResponse.json(chunk); 17 | } catch (error) { 18 | console.error('Failed to get text block content:', error); 19 | return NextResponse.json({ error: error.message || 'Failed to get text block content' }, { status: 500 }); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/config/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getProject, updateProject, getTaskConfig } from '@/lib/db/projects'; 3 | 4 | // 获取项目配置 5 | export async function GET(request, { params }) { 6 | try { 7 | const projectId = params.projectId; 8 | const config = await getProject(projectId); 9 | const taskConfig = await getTaskConfig(projectId); 10 | return NextResponse.json({ ...config, ...taskConfig }); 11 | } catch (error) { 12 | console.error('获取项目配置失败:', error); 13 | return NextResponse.json({ error: error.message }, { status: 500 }); 14 | } 15 | } 16 | 17 | // 更新项目配置 18 | export async function PUT(request, { params }) { 19 | try { 20 | const projectId = params.projectId; 21 | const newConfig = await request.json(); 22 | const currentConfig = await getProject(projectId); 23 | 24 | // 只更新 prompts 部分 25 | const updatedConfig = { 26 | ...currentConfig, 27 | ...newConfig.prompts 28 | }; 29 | 30 | const config = await updateProject(projectId, updatedConfig); 31 | return NextResponse.json(config); 32 | } catch (error) { 33 | console.error('更新项目配置失败:', error); 34 | return NextResponse.json({ error: error.message }, { status: 500 }); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/datasets/[datasetId]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getDatasetsById, getDatasetsCounts, getNavigationItems } from '@/lib/db/datasets'; 3 | import { getEncoding } from '@langchain/core/utils/tiktoken'; 4 | 5 | /** 6 | * 获取项目的所有数据集 7 | */ 8 | export async function GET(request, { params }) { 9 | try { 10 | const { projectId, datasetId } = params; 11 | // 验证项目ID 12 | if (!projectId) { 13 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 14 | } 15 | if (!datasetId) { 16 | return NextResponse.json({ error: '数据集ID不能为空' }, { status: 400 }); 17 | } 18 | const { searchParams } = new URL(request.url); 19 | const operateType = searchParams.get('operateType'); 20 | if (operateType !== null) { 21 | const data = await getNavigationItems(projectId, datasetId, operateType); 22 | return NextResponse.json(data); 23 | } 24 | const datasets = await getDatasetsById(datasetId); 25 | let counts = await getDatasetsCounts(projectId); 26 | 27 | const tokenCounts = { 28 | answerTokens: 0, 29 | cotTokens: 0 30 | }; 31 | 32 | try { 33 | if (datasets.answer || datasets.cot) { 34 | // 使用 cl100k_base 编码,适用于 gpt-3.5-turbo 和 gpt-4 35 | const encoding = await getEncoding('cl100k_base'); 36 | 37 | if (datasets.answer) { 38 | const tokens = encoding.encode(datasets.answer); 39 | tokenCounts.answerTokens = tokens.length; 40 | } 41 | 42 | if (datasets.cot) { 43 | const tokens = encoding.encode(datasets.cot); 44 | tokenCounts.cotTokens = tokens.length; 45 | } 46 | } 47 | } catch (error) { 48 | console.error('计算Token数量失败:', error); 49 | } 50 | 51 | return NextResponse.json({ datasets, ...counts, ...tokenCounts }); 52 | } catch (error) { 53 | console.error('获取数据集详情失败:', error); 54 | return NextResponse.json( 55 | { 56 | error: error.message || '获取数据集详情失败' 57 | }, 58 | { status: 500 } 59 | ); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/datasets/export/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getDatasets } from '@/lib/db/datasets'; 3 | 4 | /** 5 | * 获取导出数据集 6 | */ 7 | export async function GET(request, { params }) { 8 | try { 9 | const { projectId } = params; 10 | const { searchParams } = new URL(request.url); 11 | // 验证项目ID 12 | if (!projectId) { 13 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 14 | } 15 | let status = searchParams.get('status'); 16 | let confirmed = undefined; 17 | if (status === 'confirmed') confirmed = true; 18 | if (status === 'unconfirmed') confirmed = false; 19 | // 获取数据集 20 | let datasets = await getDatasets(projectId, confirmed); 21 | return NextResponse.json(datasets); 22 | } catch (error) { 23 | console.error('获取数据集失败:', error); 24 | return NextResponse.json( 25 | { 26 | error: error.message || '获取数据集失败' 27 | }, 28 | { status: 500 } 29 | ); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/datasets/optimize/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getDatasetsById, updateDataset } from '@/lib/db/datasets'; 3 | import LLMClient from '@/lib/llm/core/index'; 4 | import getNewAnswerPrompt from '@/lib/llm/prompts/newAnswer'; 5 | import getNewAnswerEnPrompt from '@/lib/llm/prompts/newAnswerEn'; 6 | import { extractJsonFromLLMOutput } from '@/lib/llm/common/util'; 7 | 8 | // 优化数据集答案 9 | export async function POST(request, { params }) { 10 | try { 11 | const { projectId } = params; 12 | 13 | // 验证项目ID 14 | if (!projectId) { 15 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 16 | } 17 | 18 | // 获取请求体 19 | const { datasetId, model, advice, language } = await request.json(); 20 | 21 | if (!datasetId) { 22 | return NextResponse.json({ error: 'Dataset ID cannot be empty' }, { status: 400 }); 23 | } 24 | 25 | if (!model) { 26 | return NextResponse.json({ error: 'Model cannot be empty' }, { status: 400 }); 27 | } 28 | 29 | if (!advice) { 30 | return NextResponse.json({ error: 'Please provide optimization suggestions' }, { status: 400 }); 31 | } 32 | 33 | // 获取数据集内容 34 | const dataset = await getDatasetsById(datasetId); 35 | if (!dataset) { 36 | return NextResponse.json({ error: 'Dataset does not exist' }, { status: 404 }); 37 | } 38 | 39 | // 创建LLM客户端 40 | const llmClient = new LLMClient(model); 41 | 42 | // 生成优化后的答案和思维链 43 | const prompt = 44 | language === 'en' 45 | ? getNewAnswerEnPrompt(dataset.question, dataset.answer || '', dataset.cot || '', advice) 46 | : getNewAnswerPrompt(dataset.question, dataset.answer || '', dataset.cot || '', advice); 47 | 48 | const response = await llmClient.getResponse(prompt); 49 | 50 | // 从LLM输出中提取JSON格式的优化结果 51 | const optimizedResult = extractJsonFromLLMOutput(response); 52 | 53 | if (!optimizedResult || !optimizedResult.answer) { 54 | return NextResponse.json({ error: 'Failed to optimize answer, please try again' }, { status: 500 }); 55 | } 56 | 57 | // 更新数据集 58 | const updatedDataset = { 59 | ...dataset, 60 | answer: optimizedResult.answer, 61 | cot: optimizedResult.cot || dataset.cot 62 | }; 63 | 64 | await updateDataset(updatedDataset); 65 | 66 | // 返回优化后的数据集 67 | return NextResponse.json({ 68 | success: true, 69 | dataset: updatedDataset 70 | }); 71 | } catch (error) { 72 | console.error('Failed to optimize answer:', error); 73 | return NextResponse.json({ error: error.message || 'Failed to optimize answer' }, { status: 500 }); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/distill/questions/by-tag/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { db } from '@/lib/db'; 3 | 4 | /** 5 | * 根据标签ID获取问题列表 6 | */ 7 | export async function GET(request, { params }) { 8 | try { 9 | const { projectId } = params; 10 | const { searchParams } = new URL(request.url); 11 | const tagId = searchParams.get('tagId'); 12 | 13 | console.log('[distill/questions/by-tag] 请求参数:', { projectId, tagId }); 14 | 15 | // 验证参数 16 | if (!projectId) { 17 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 18 | } 19 | 20 | if (!tagId) { 21 | return NextResponse.json({ error: '标签ID不能为空' }, { status: 400 }); 22 | } 23 | 24 | // 获取标签信息 25 | const tag = await db.tags.findUnique({ 26 | where: { id: tagId } 27 | }); 28 | 29 | if (!tag) { 30 | return NextResponse.json({ error: '标签不存在' }, { status: 404 }); 31 | } 32 | 33 | console.log('[distill/questions/by-tag] 标签信息:', tag); 34 | 35 | // 获取或创建蒸馏文本块 36 | let distillChunk = await db.chunks.findFirst({ 37 | where: { 38 | projectId, 39 | name: 'Distilled Content' 40 | } 41 | }); 42 | 43 | if (!distillChunk) { 44 | // 创建一个特殊的蒸馏文本块 45 | distillChunk = await db.chunks.create({ 46 | data: { 47 | name: 'Distilled Content', 48 | projectId, 49 | fileId: 'distilled', 50 | fileName: 'distilled.md', 51 | content: 52 | 'This text block is used to store questions generated through data distillation and is not related to actual literature.', 53 | summary: 'Questions generated through data distillation', 54 | size: 0 55 | } 56 | }); 57 | } 58 | const questions = await db.questions.findMany({ 59 | where: { 60 | projectId, 61 | label: tag.label, 62 | chunkId: distillChunk.id 63 | } 64 | }); 65 | 66 | return NextResponse.json(questions); 67 | } catch (error) { 68 | console.error('[distill/questions/by-tag] 获取问题失败:', error); 69 | return NextResponse.json({ error: error.message || '获取问题失败' }, { status: 500 }); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/distill/tags/all/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { db } from '@/lib/db'; 3 | 4 | /** 5 | * 获取项目的所有蒸馏标签 6 | */ 7 | export async function GET(request, { params }) { 8 | try { 9 | const { projectId } = params; 10 | 11 | // 验证项目ID 12 | if (!projectId) { 13 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 14 | } 15 | 16 | // 获取所有标签 17 | const tags = await db.tags.findMany({ 18 | where: { 19 | projectId 20 | }, 21 | orderBy: { 22 | label: 'asc' 23 | } 24 | }); 25 | 26 | return NextResponse.json(tags); 27 | } catch (error) { 28 | console.error('获取蒸馏标签失败:', error); 29 | return NextResponse.json({ error: error.message || '获取蒸馏标签失败' }, { status: 500 }); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/llamaFactory/checkConfig/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | 6 | export async function GET(request, { params }) { 7 | try { 8 | const { projectId } = params; 9 | if (!projectId) { 10 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 11 | } 12 | 13 | const projectRoot = await getProjectRoot(); 14 | const projectPath = path.join(projectRoot, projectId); 15 | const configPath = path.join(projectPath, 'dataset_info.json'); 16 | 17 | const exists = fs.existsSync(configPath); 18 | 19 | return NextResponse.json({ 20 | exists, 21 | configPath: exists ? configPath : null 22 | }); 23 | } catch (error) { 24 | console.error('Error checking Llama Factory config:', error); 25 | return NextResponse.json({ error: error.message }, { status: 500 }); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/model-config/[modelConfigId]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { deleteModelConfigById } from '@/lib/db/model-config'; 3 | 4 | // 删除模型配置 5 | export async function DELETE(request, { params }) { 6 | try { 7 | const { projectId, modelConfigId } = params; 8 | // 验证项目 ID 9 | if (!projectId) { 10 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 11 | } 12 | await deleteModelConfigById(modelConfigId); 13 | return NextResponse.json(true); 14 | } catch (error) { 15 | console.error('Error obtaining model configuration:', error); 16 | return NextResponse.json({ error: 'Failed to obtain model configuration' }, { status: 500 }); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/model-config/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { createInitModelConfig, getModelConfigByProjectId, saveModelConfig } from '@/lib/db/model-config'; 3 | import { DEFAULT_MODEL_SETTINGS, MODEL_PROVIDERS } from '@/constant/model'; 4 | import { getProject } from '@/lib/db/projects'; 5 | 6 | // 获取模型配置列表 7 | export async function GET(request, { params }) { 8 | try { 9 | const { projectId } = params; 10 | // 验证项目 ID 11 | if (!projectId) { 12 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 13 | } 14 | let modelConfigList = await getModelConfigByProjectId(projectId); 15 | if (!modelConfigList || modelConfigList.length === 0) { 16 | let insertModelConfigList = []; 17 | MODEL_PROVIDERS.forEach(item => { 18 | let data = { 19 | projectId: projectId, 20 | providerId: item.id, 21 | providerName: item.name, 22 | endpoint: item.defaultEndpoint, 23 | apiKey: '', 24 | modelId: item.defaultModels.length > 0 ? item.defaultModels[0] : '', 25 | modelName: item.defaultModels.length > 0 ? item.defaultModels[0] : '', 26 | type: 'text', 27 | temperature: DEFAULT_MODEL_SETTINGS.temperature, 28 | maxTokens: DEFAULT_MODEL_SETTINGS.maxTokens, 29 | topK: 0, 30 | topP: 0, 31 | status: 1 32 | }; 33 | insertModelConfigList.push(data); 34 | }); 35 | modelConfigList = await createInitModelConfig(insertModelConfigList); 36 | } 37 | let project = await getProject(projectId); 38 | return NextResponse.json({ data: modelConfigList, defaultModelConfigId: project.defaultModelConfigId }); 39 | } catch (error) { 40 | console.error('Error obtaining model configuration:', error); 41 | return NextResponse.json({ error: 'Failed to obtain model configuration' }, { status: 500 }); 42 | } 43 | } 44 | 45 | // 保存模型配置 46 | export async function POST(request, { params }) { 47 | try { 48 | const { projectId } = params; 49 | 50 | // 验证项目 ID 51 | if (!projectId) { 52 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 53 | } 54 | // 获取请求体 55 | const modelConfig = await request.json(); 56 | 57 | // 验证请求体 58 | if (!modelConfig) { 59 | return NextResponse.json({ error: 'The model configuration cannot be empty ' }, { status: 400 }); 60 | } 61 | modelConfig.projectId = projectId; 62 | if (!modelConfig.modelId) { 63 | modelConfig.modelId = modelConfig.modelName; 64 | } 65 | const res = await saveModelConfig(modelConfig); 66 | 67 | return NextResponse.json(res); 68 | } catch (error) { 69 | console.error('Error updating model configuration:', error); 70 | return NextResponse.json({ error: 'Failed to update model configuration' }, { status: 500 }); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/pdf/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import PdfProcessor from '@/lib/file/pdf-processing/core'; 3 | import { deleteChunkAndFile } from '@/lib/db/chunks'; 4 | import { getProject, updateProject } from '@/lib/db/projects'; 5 | 6 | // Replace the deprecated config export with the new export syntax 7 | export const dynamic = 'force-dynamic'; 8 | // This tells Next.js not to parse the request body automatically 9 | export const bodyParser = false; 10 | 11 | // 处理PDF文件 12 | export async function GET(request, { params }) { 13 | try { 14 | const { projectId } = params; 15 | 16 | const fileName = request.nextUrl.searchParams.get('fileName'); 17 | 18 | let strategy = request.nextUrl.searchParams.get('strategy'); 19 | 20 | const currentLanguage = request.nextUrl.searchParams.get('currentLanguage'); 21 | 22 | const visionModel = request.nextUrl.searchParams.get('modelId'); 23 | 24 | // 验证项目ID 25 | if (!projectId) { 26 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 27 | } 28 | if (!fileName) { 29 | return NextResponse.json({ error: '文件名不能为空' }, { status: 400 }); 30 | } 31 | 32 | //如果没有正确获取到strategy字段,则使用默认配置 33 | if (!strategy) { 34 | strategy = 'default'; 35 | } 36 | 37 | // 获取项目信息 38 | const project = await getProject(projectId); 39 | 40 | // 创建处理器 41 | const processor = new PdfProcessor(strategy); 42 | 43 | // 使用当前策略处理 44 | const result = await processor.process(projectId, fileName, { 45 | language: currentLanguage, 46 | visionModelId: visionModel 47 | }); 48 | 49 | //先检查PDF转换是否成功,再将转换后的文件写入配置 50 | if (!result.success) { 51 | throw new Error(result.error); 52 | } 53 | await updateProject(projectId, { 54 | ...project 55 | }); 56 | 57 | return NextResponse.json({ 58 | projectId, 59 | project, 60 | batch_id: result.data 61 | }); 62 | } catch (error) { 63 | console.error('PDF处理流程出错:', error); 64 | return NextResponse.json({ error: error.message || 'PDF处理流程' }, { status: 500 }); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/playground/chat/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import LLMClient from '@/lib/llm/core/index'; 3 | 4 | export async function POST(request, { params }) { 5 | try { 6 | const { projectId } = params; 7 | 8 | // 验证项目ID 9 | if (!projectId) { 10 | return NextResponse.json({ error: 'The project ID cannot be empty' }, { status: 400 }); 11 | } 12 | 13 | // 获取请求体 14 | const { model, messages } = await request.json(); 15 | 16 | // 验证请求参数 17 | if (!model) { 18 | return NextResponse.json({ error: 'The model parameters cannot be empty' }, { status: 400 }); 19 | } 20 | 21 | if (!Array.isArray(messages) || messages.length === 0) { 22 | return NextResponse.json({ error: 'The message list cannot be empty' }, { status: 400 }); 23 | } 24 | 25 | // 使用自定义的LLM客户端 26 | const llmClient = new LLMClient(model); 27 | 28 | // 格式化消息历史 29 | const formattedMessages = messages.map(msg => { 30 | // 处理纯文本消息 31 | if (typeof msg.content === 'string') { 32 | return { 33 | role: msg.role, 34 | content: msg.content 35 | }; 36 | } 37 | // 处理包含图片的复合消息(用于视觉模型) 38 | else if (Array.isArray(msg.content)) { 39 | return { 40 | role: msg.role, 41 | content: msg.content 42 | }; 43 | } 44 | // 默认情况 45 | return { 46 | role: msg.role, 47 | content: msg.content 48 | }; 49 | }); 50 | 51 | // 调用LLM API 52 | let response = ''; 53 | try { 54 | response = await llmClient.getResponse(formattedMessages); 55 | } catch (error) { 56 | console.error('Failed to call LLM API:', error); 57 | return NextResponse.json( 58 | { 59 | error: `Failed to call ${model.provider} model: ${error.message}` 60 | }, 61 | { status: 500 } 62 | ); 63 | } 64 | 65 | return NextResponse.json({ response }); 66 | } catch (error) { 67 | console.error('Failed to process chat request:', error); 68 | return NextResponse.json({ error: `Failed to process chat request: ${error.message}` }, { status: 500 }); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/playground/chat/stream/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import LLMClient from '@/lib/llm/core/index'; 3 | 4 | /** 5 | * 流式输出的聊天接口 6 | */ 7 | export async function POST(request, { params }) { 8 | const { projectId } = params; 9 | 10 | try { 11 | const body = await request.json(); 12 | const { model, messages } = body; 13 | 14 | if (!model || !messages) { 15 | return NextResponse.json({ error: 'Missing necessary parameters' }, { status: 400 }); 16 | } 17 | 18 | // 创建 LLM 客户端 19 | const llmClient = new LLMClient(model); 20 | 21 | // 格式化消息历史 22 | const formattedMessages = messages.map(msg => { 23 | // 处理纯文本消息 24 | if (typeof msg.content === 'string') { 25 | return { 26 | role: msg.role, 27 | content: msg.content 28 | }; 29 | } 30 | // 处理包含图片的复合消息(用于视觉模型) 31 | else if (Array.isArray(msg.content)) { 32 | return { 33 | role: msg.role, 34 | content: msg.content 35 | }; 36 | } 37 | // 默认情况 38 | return { 39 | role: msg.role, 40 | content: msg.content 41 | }; 42 | }); 43 | 44 | try { 45 | // 调用流式 API 46 | const stream = await llmClient.chatStream(formattedMessages); 47 | // 返回流式响应 48 | return stream; 49 | } catch (error) { 50 | console.error('Failed to call LLM API:', error); 51 | return NextResponse.json( 52 | { 53 | error: `Failed to call ${model.provider} model: ${error.message}` 54 | }, 55 | { status: 500 } 56 | ); 57 | } 58 | } catch (error) { 59 | console.error('Failed to process stream chat request:', error); 60 | return NextResponse.json({ error: `Failed to process stream chat request: ${error.message}` }, { status: 500 }); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/preview/[fileId]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | import { getUploadFileInfoById } from '@/lib/db/upload-files'; 6 | 7 | // 获取文件内容 8 | export async function GET(request, { params }) { 9 | try { 10 | const { projectId, fileId } = params; 11 | 12 | // 验证参数 13 | if (!projectId) { 14 | return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 }); 15 | } 16 | 17 | // 获取项目根目录 18 | let fileInfo = await getUploadFileInfoById(fileId); 19 | if (!fileInfo) { 20 | return NextResponse.json({ error: 'file does not exist' }, { status: 400 }); 21 | } 22 | 23 | // 获取文件路径 24 | let filePath = path.join(fileInfo.path, fileInfo.fileName); 25 | if (fileInfo.fileExt !== '.md') { 26 | filePath = path.join(fileInfo.path, fileInfo.fileName.replace(/\.[^/.]+$/, '.md')); 27 | } 28 | //获取文件 29 | const buffer = fs.readFileSync(filePath); 30 | 31 | const text = buffer.toString('utf-8'); 32 | 33 | return NextResponse.json({ 34 | fileId: fileId, 35 | fileName: fileInfo.fileName, 36 | content: text 37 | }); 38 | } catch (error) { 39 | console.error('Failed to get text block content:', error); 40 | return NextResponse.json({ error: error.message || 'Failed to get text block content' }, { status: 500 }); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/questions/[questionId]/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { deleteQuestion } from '@/lib/db/questions'; 3 | 4 | // 删除单个问题 5 | export async function DELETE(request, { params }) { 6 | try { 7 | const { projectId, questionId } = params; 8 | 9 | // 验证参数 10 | if (!projectId) { 11 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 12 | } 13 | 14 | if (!questionId) { 15 | return NextResponse.json({ error: 'Question ID is required' }, { status: 400 }); 16 | } 17 | 18 | // 删除问题 19 | await deleteQuestion(questionId); 20 | 21 | return NextResponse.json({ success: true, message: 'Delete successful' }); 22 | } catch (error) { 23 | console.error('Delete failed:', error); 24 | return NextResponse.json({ error: error.message || 'Delete failed' }, { status: 500 }); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/questions/batch-delete/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { batchDeleteQuestions } from '@/lib/db/questions'; 3 | 4 | // 批量删除问题 5 | export async function DELETE(request) { 6 | try { 7 | const body = await request.json(); 8 | const { questionIds } = body; 9 | 10 | // 验证参数 11 | if (questionIds.length === 0) { 12 | return NextResponse.json({ error: 'Question ID is required' }, { status: 400 }); 13 | } 14 | 15 | // 删除问题 16 | await batchDeleteQuestions(questionIds); 17 | 18 | return NextResponse.json({ success: true, message: 'Delete successful' }); 19 | } catch (error) { 20 | console.error('Delete failed:', error); 21 | return NextResponse.json({ error: error.message || 'Delete failed' }, { status: 500 }); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/questions/tree/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { getQuestionsForTree, getQuestionsByTag } from '@/lib/db/questions'; 3 | 4 | /** 5 | * 获取项目的问题树形视图数据 6 | * @param {Request} request - 请求对象 7 | * @param {Object} params - 路由参数 8 | * @returns {Promise} - 包含问题数据的响应 9 | */ 10 | export async function GET(request, { params }) { 11 | try { 12 | const { projectId } = params; 13 | 14 | // 验证项目ID 15 | if (!projectId) { 16 | return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 }); 17 | } 18 | 19 | const { searchParams } = new URL(request.url); 20 | const tag = searchParams.get('tag'); 21 | const input = searchParams.get('input'); 22 | const tagsOnly = searchParams.get('tagsOnly') === 'true'; 23 | const isDistill = searchParams.get('isDistill') === 'true'; 24 | 25 | if (tag) { 26 | // 获取指定标签的问题数据(包含完整字段) 27 | const questions = await getQuestionsByTag(projectId, tag, input, isDistill); 28 | return NextResponse.json(questions); 29 | } else if (tagsOnly) { 30 | // 只获取标签信息(仅包含 id 和 label 字段) 31 | const treeData = await getQuestionsForTree(projectId, input, isDistill); 32 | return NextResponse.json(treeData); 33 | } else { 34 | // 兼容原有请求,获取树形视图数据(仅包含 id 和 label 字段) 35 | const treeData = await getQuestionsForTree(projectId, null, isDistill); 36 | return NextResponse.json(treeData); 37 | } 38 | } catch (error) { 39 | console.error('获取问题树形数据失败:', error); 40 | return NextResponse.json({ error: error.message || '获取问题树形数据失败' }, { status: 500 }); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/route.js: -------------------------------------------------------------------------------- 1 | // 获取项目详情 2 | import { deleteProject, getProject, updateProject, getTaskConfig } from '@/lib/db/projects'; 3 | 4 | export async function GET(request, { params }) { 5 | try { 6 | const { projectId } = params; 7 | const project = await getProject(projectId); 8 | const taskConfig = await getTaskConfig(projectId); 9 | if (!project) { 10 | return Response.json({ error: '项目不存在' }, { status: 404 }); 11 | } 12 | return Response.json({ ...project, taskConfig }); 13 | } catch (error) { 14 | console.error('获取项目详情出错:', error); 15 | return Response.json({ error: error.message }, { status: 500 }); 16 | } 17 | } 18 | 19 | // 更新项目 20 | export async function PUT(request, { params }) { 21 | try { 22 | const { projectId } = params; 23 | const projectData = await request.json(); 24 | 25 | // 验证必要的字段 26 | if (!projectData.name && !projectData.defaultModelConfigId) { 27 | return Response.json({ error: '项目名称不能为空' }, { status: 400 }); 28 | } 29 | 30 | const updatedProject = await updateProject(projectId, projectData); 31 | 32 | if (!updatedProject) { 33 | return Response.json({ error: '项目不存在' }, { status: 404 }); 34 | } 35 | 36 | return Response.json(updatedProject); 37 | } catch (error) { 38 | console.error('更新项目出错:', error); 39 | return Response.json({ error: error.message }, { status: 500 }); 40 | } 41 | } 42 | 43 | // 删除项目 44 | export async function DELETE(request, { params }) { 45 | try { 46 | const { projectId } = params; 47 | const success = await deleteProject(projectId); 48 | 49 | if (!success) { 50 | return Response.json({ error: '项目不存在' }, { status: 404 }); 51 | } 52 | 53 | return Response.json({ success: true }); 54 | } catch (error) { 55 | console.error('删除项目出错:', error); 56 | return Response.json({ error: error.message }, { status: 500 }); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /app/api/projects/[projectId]/tasks/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import path from 'path'; 3 | import fs from 'fs/promises'; 4 | import { getProjectRoot } from '@/lib/db/base'; 5 | import { getTaskConfig } from '@/lib/db/projects'; 6 | import { PrismaClient } from '@prisma/client'; 7 | 8 | // 获取任务配置 9 | export async function GET(request, { params }) { 10 | try { 11 | const { projectId } = params; 12 | 13 | // 验证项目 ID 14 | if (!projectId) { 15 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 16 | } 17 | 18 | // 获取项目根目录 19 | const projectRoot = await getProjectRoot(); 20 | const projectPath = path.join(projectRoot, projectId); 21 | 22 | // 检查项目是否存在 23 | try { 24 | await fs.access(projectPath); 25 | } catch (error) { 26 | return NextResponse.json({ error: 'Project does not exist' + projectPath }, { status: 404 }); 27 | } 28 | 29 | const taskConfig = await getTaskConfig(projectId); 30 | return NextResponse.json(taskConfig); 31 | } catch (error) { 32 | console.error('Failed to obtain task configuration:', error); 33 | return NextResponse.json({ error: 'Failed to obtain task configuration' }, { status: 500 }); 34 | } 35 | } 36 | 37 | // 更新任务配置 38 | export async function PUT(request, { params }) { 39 | try { 40 | const { projectId } = params; 41 | 42 | // 验证项目 ID 43 | if (!projectId) { 44 | return NextResponse.json({ error: 'Project ID is required' }, { status: 400 }); 45 | } 46 | 47 | // 获取请求体 48 | const taskConfig = await request.json(); 49 | 50 | // 验证请求体 51 | if (!taskConfig) { 52 | return NextResponse.json({ error: 'Task configuration cannot be empty' }, { status: 400 }); 53 | } 54 | 55 | // 获取项目根目录 56 | const projectRoot = await getProjectRoot(); 57 | const projectPath = path.join(projectRoot, projectId); 58 | 59 | // 检查项目是否存在 60 | try { 61 | await fs.access(projectPath); 62 | } catch (error) { 63 | return NextResponse.json({ error: 'Project does not exist' }, { status: 404 }); 64 | } 65 | 66 | // 获取任务配置文件路径 67 | const taskConfigPath = path.join(projectPath, 'task-config.json'); 68 | 69 | // 写入任务配置文件 70 | await fs.writeFile(taskConfigPath, JSON.stringify(taskConfig, null, 2), 'utf-8'); 71 | 72 | return NextResponse.json({ message: 'Task configuration updated successfully' }); 73 | } catch (error) { 74 | console.error('Failed to update task configuration:', error); 75 | return NextResponse.json({ error: 'Failed to update task configuration' }, { status: 500 }); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /app/api/projects/delete-directory/route.js: -------------------------------------------------------------------------------- 1 | import { getProjectRoot } from '@/lib/db/base'; 2 | import { NextResponse } from 'next/server'; 3 | import path from 'path'; 4 | import fs from 'fs'; 5 | import { promisify } from 'util'; 6 | 7 | const rmdir = promisify(fs.rm); 8 | 9 | /** 10 | * 删除项目目录 11 | * @returns {Promise} 操作结果响应 12 | */ 13 | export async function POST(request) { 14 | try { 15 | const { projectId } = await request.json(); 16 | 17 | if (!projectId) { 18 | return NextResponse.json( 19 | { 20 | success: false, 21 | error: '项目ID不能为空' 22 | }, 23 | { status: 400 } 24 | ); 25 | } 26 | 27 | // 获取项目根目录 28 | const projectRoot = await getProjectRoot(); 29 | const projectPath = path.join(projectRoot, projectId); 30 | 31 | // 检查目录是否存在 32 | if (!fs.existsSync(projectPath)) { 33 | return NextResponse.json( 34 | { 35 | success: false, 36 | error: '项目目录不存在' 37 | }, 38 | { status: 404 } 39 | ); 40 | } 41 | 42 | // 递归删除目录 43 | await rmdir(projectPath, { recursive: true, force: true }); 44 | 45 | return NextResponse.json({ 46 | success: true, 47 | message: '项目目录已删除' 48 | }); 49 | } catch (error) { 50 | console.error('删除项目目录出错:', error); 51 | return NextResponse.json( 52 | { 53 | success: false, 54 | error: error.message 55 | }, 56 | { status: 500 } 57 | ); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /app/api/projects/open-directory/route.js: -------------------------------------------------------------------------------- 1 | import { getProjectRoot } from '@/lib/db/base'; 2 | import { NextResponse } from 'next/server'; 3 | import path from 'path'; 4 | import { exec } from 'child_process'; 5 | import { promisify } from 'util'; 6 | 7 | const execAsync = promisify(exec); 8 | 9 | /** 10 | * 打开项目目录 11 | * @returns {Promise} 操作结果响应 12 | */ 13 | export async function POST(request) { 14 | try { 15 | const { projectId } = await request.json(); 16 | 17 | if (!projectId) { 18 | return NextResponse.json( 19 | { 20 | success: false, 21 | error: '项目ID不能为空' 22 | }, 23 | { status: 400 } 24 | ); 25 | } 26 | 27 | // 获取项目根目录 28 | const projectRoot = await getProjectRoot(); 29 | const projectPath = path.join(projectRoot, projectId); 30 | 31 | // 根据操作系统打开目录 32 | const platform = process.platform; 33 | let command; 34 | 35 | if (platform === 'win32') { 36 | // Windows 37 | command = `explorer "${projectPath}"`; 38 | } else if (platform === 'darwin') { 39 | // macOS 40 | command = `open "${projectPath}"`; 41 | } else { 42 | // Linux 和其他系统 43 | command = `xdg-open "${projectPath}"`; 44 | } 45 | 46 | await execAsync(command); 47 | 48 | return NextResponse.json({ 49 | success: true, 50 | message: '已打开项目目录' 51 | }); 52 | } catch (error) { 53 | console.error('打开项目目录出错:', error); 54 | return NextResponse.json( 55 | { 56 | success: false, 57 | error: error.message 58 | }, 59 | { status: 500 } 60 | ); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /app/api/projects/route.js: -------------------------------------------------------------------------------- 1 | import { createProject, getProjects, isExistByName } from '@/lib/db/projects'; 2 | import { createInitModelConfig, getModelConfigByProjectId } from '@/lib/db/model-config'; 3 | 4 | export async function POST(request) { 5 | try { 6 | const projectData = await request.json(); 7 | // 验证必要的字段 8 | if (!projectData.name) { 9 | return Response.json({ error: '项目名称不能为空' }, { status: 400 }); 10 | } 11 | 12 | // 验证项目名称是否已存在 13 | if (await isExistByName(projectData.name)) { 14 | return Response.json({ error: '项目名称已存在' }, { status: 400 }); 15 | } 16 | // 创建项目 17 | const newProject = await createProject(projectData); 18 | // 如果指定了要复用的项目配置 19 | if (projectData.reuseConfigFrom) { 20 | let data = await getModelConfigByProjectId(projectData.reuseConfigFrom); 21 | 22 | let newData = data.map(item => { 23 | delete item.id; 24 | return { 25 | ...item, 26 | projectId: newProject.id 27 | }; 28 | }); 29 | await createInitModelConfig(newData); 30 | } 31 | return Response.json(newProject, { status: 201 }); 32 | } catch (error) { 33 | console.error('创建项目出错:', error); 34 | return Response.json({ error: error.message }, { status: 500 }); 35 | } 36 | } 37 | 38 | export async function GET(request) { 39 | try { 40 | // 获取所有项目 41 | const projects = await getProjects(); 42 | return Response.json(projects); 43 | } catch (error) { 44 | console.error('获取项目列表出错:', error); 45 | return Response.json({ error: error.message }, { status: 500 }); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /app/api/projects/unmigrated/route.js: -------------------------------------------------------------------------------- 1 | import { getProjectRoot } from '@/lib/db/base'; 2 | import { db } from '@/lib/db/index'; 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | import { NextResponse } from 'next/server'; 6 | 7 | /** 8 | * 获取未迁移的项目列表 9 | * @returns {Promise} 包含未迁移项目列表的响应 10 | */ 11 | export async function GET(request) { 12 | // 获取当前请求的 URL,从中提取查询参数 13 | const { searchParams } = new URL(request.url); 14 | // 这行代码是关键,强制每次请求都是不同的 15 | const timestamp = searchParams.get('_t') || Date.now(); 16 | try { 17 | // 获取项目根目录 18 | const projectRoot = await getProjectRoot(); 19 | 20 | // 读取根目录下的所有文件夹(每个文件夹代表一个项目) 21 | const files = await fs.promises.readdir(projectRoot, { withFileTypes: true }); 22 | 23 | // 过滤出目录类型的条目 24 | const projectDirs = files.filter(file => file.isDirectory()); 25 | 26 | // 如果没有项目目录,则直接返回空列表 27 | if (projectDirs.length === 0) { 28 | return NextResponse.json({ 29 | success: true, 30 | data: [] 31 | }); 32 | } 33 | 34 | // 获取所有项目ID 35 | const projectIds = projectDirs.map(dir => dir.name); 36 | 37 | // 批量查询已迁移的项目 38 | const existingProjects = await db.projects.findMany({ 39 | where: { 40 | id: { 41 | in: projectIds 42 | } 43 | }, 44 | select: { 45 | id: true 46 | } 47 | }); 48 | 49 | // 转换为集合以便快速查找 50 | const existingProjectIds = new Set(existingProjects.map(p => p.id)); 51 | 52 | // 筛选出未迁移的项目 53 | const unmigratedProjectDirs = projectDirs.filter(dir => !existingProjectIds.has(dir.name)); 54 | 55 | // 获取未迁移项目的ID列表 56 | const unmigratedProjects = unmigratedProjectDirs.map(dir => dir.name); 57 | 58 | return NextResponse.json({ 59 | success: true, 60 | data: unmigratedProjects, 61 | projectRoot, 62 | number: Date.now(), 63 | timestamp 64 | }); 65 | } catch (error) { 66 | console.error('获取未迁移项目列表出错:', error); 67 | return NextResponse.json( 68 | { 69 | success: false, 70 | error: error.message 71 | }, 72 | { status: 500 } 73 | ); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /app/api/update/route.js: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { exec } from 'child_process'; 3 | import path from 'path'; 4 | import fs from 'fs'; 5 | 6 | // 执行更新脚本 7 | export async function POST() { 8 | try { 9 | // 检查是否在客户端环境中运行 10 | const desktopDir = path.join(process.cwd(), 'desktop'); 11 | const updaterPath = path.join(desktopDir, 'scripts', 'updater.js'); 12 | 13 | if (!fs.existsSync(updaterPath)) { 14 | return NextResponse.json( 15 | { 16 | success: false, 17 | message: '更新功能仅在客户端环境中可用' 18 | }, 19 | { status: 400 } 20 | ); 21 | } 22 | 23 | // 执行更新脚本 24 | return new Promise(resolve => { 25 | const updaterProcess = exec(`node "${updaterPath}"`, { cwd: process.cwd() }); 26 | 27 | let output = ''; 28 | 29 | updaterProcess.stdout.on('data', data => { 30 | output += data.toString(); 31 | console.log(`Update output: ${data}`); 32 | }); 33 | 34 | updaterProcess.stderr.on('data', data => { 35 | output += data.toString(); 36 | console.error(`Update error: ${data}`); 37 | }); 38 | 39 | updaterProcess.on('close', code => { 40 | console.log(`Update process exit, exit code: ${code}`); 41 | 42 | if (code === 0) { 43 | resolve( 44 | NextResponse.json({ 45 | success: true, 46 | message: 'Update successful, application will restart' 47 | }) 48 | ); 49 | } else { 50 | resolve( 51 | NextResponse.json( 52 | { 53 | success: false, 54 | message: `Update failed, exit code: ${code}, output: ${output}` 55 | }, 56 | { status: 500 } 57 | ) 58 | ); 59 | } 60 | }); 61 | }); 62 | } catch (error) { 63 | console.error('Failed to execute update:', error); 64 | return NextResponse.json( 65 | { 66 | success: false, 67 | message: `Failed to execute update: ${error.message}` 68 | }, 69 | { status: 500 } 70 | ); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /app/globals.css: -------------------------------------------------------------------------------- 1 | * { 2 | box-sizing: border-box; 3 | padding: 0; 4 | margin: 0; 5 | } 6 | 7 | html, 8 | body { 9 | max-width: 100vw; 10 | overflow-x: hidden; 11 | height: 100%; 12 | -webkit-font-smoothing: antialiased; 13 | -moz-osx-font-smoothing: grayscale; 14 | } 15 | 16 | a { 17 | color: inherit; 18 | text-decoration: none; 19 | } 20 | 21 | /* 渐变文本样式 */ 22 | .gradient-text { 23 | background: linear-gradient(90deg, #2a5caa 0%, #8b5cf6 100%); 24 | -webkit-background-clip: text; 25 | -webkit-text-fill-color: transparent; 26 | background-clip: text; 27 | text-fill-color: transparent; 28 | } 29 | 30 | /* 页面容器下间距 */ 31 | main { 32 | min-height: calc(100vh - 64px); 33 | } 34 | 35 | /* 自定义滚动条 */ 36 | ::-webkit-scrollbar { 37 | width: 8px; 38 | height: 8px; 39 | } 40 | 41 | ::-webkit-scrollbar-track { 42 | background: transparent; 43 | } 44 | 45 | ::-webkit-scrollbar-thumb { 46 | background-color: rgba(0, 0, 0, 0.2); 47 | border-radius: 4px; 48 | } 49 | 50 | ::-webkit-scrollbar-thumb:hover { 51 | background-color: rgba(0, 0, 0, 0.3); 52 | } 53 | 54 | /* 暗色模式滚动条 */ 55 | [data-theme='dark'] ::-webkit-scrollbar-thumb { 56 | background-color: rgba(255, 255, 255, 0.2); 57 | } 58 | 59 | [data-theme='dark'] ::-webkit-scrollbar-thumb:hover { 60 | background-color: rgba(255, 255, 255, 0.3); 61 | } 62 | 63 | /* 方便的间距类 */ 64 | .mt-1 { 65 | margin-top: 8px; 66 | } 67 | .mt-2 { 68 | margin-top: 16px; 69 | } 70 | .mt-3 { 71 | margin-top: 24px; 72 | } 73 | .mt-4 { 74 | margin-top: 32px; 75 | } 76 | .mb-1 { 77 | margin-bottom: 8px; 78 | } 79 | .mb-2 { 80 | margin-bottom: 16px; 81 | } 82 | .mb-3 { 83 | margin-bottom: 24px; 84 | } 85 | .mb-4 { 86 | margin-bottom: 32px; 87 | } 88 | 89 | /* 响应式样式 */ 90 | @media (max-width: 600px) { 91 | .hide-on-mobile { 92 | display: none !important; 93 | } 94 | } 95 | 96 | /* 输入框和选择框边框简化 */ 97 | .plain-select .MuiOutlinedInput-notchedOutline, 98 | .plain-input .MuiOutlinedInput-notchedOutline { 99 | border-color: transparent !important; 100 | } 101 | 102 | /* 卡片悬停效果 */ 103 | .hover-card { 104 | transition: 105 | transform 0.2s ease, 106 | box-shadow 0.2s ease; 107 | } 108 | 109 | .hover-card:hover { 110 | transform: translateY(-4px); 111 | box-shadow: 0 12px 20px rgba(0, 0, 0, 0.1); 112 | } 113 | 114 | [data-theme='dark'] .hover-card:hover { 115 | box-shadow: 0 12px 20px rgba(0, 0, 0, 0.3); 116 | } 117 | -------------------------------------------------------------------------------- /app/layout.js: -------------------------------------------------------------------------------- 1 | import './globals.css'; 2 | import ThemeRegistry from '@/components/ThemeRegistry'; 3 | import I18nProvider from '@/components/I18nProvider'; 4 | import { Toaster } from 'sonner'; 5 | import { Provider } from 'jotai'; 6 | 7 | export const metadata = { 8 | title: 'Easy Dataset', 9 | description: '一个强大的 LLM 数据集生成工具', 10 | icons: { 11 | icon: '/imgs/logo.ico' // 更新为正确的文件名 12 | } 13 | }; 14 | 15 | export default function RootLayout({ children }) { 16 | return ( 17 | 18 | 19 | 20 | 21 | 22 | {children} 23 | 24 | 25 | 26 | 27 | 28 | 29 | ); 30 | } 31 | -------------------------------------------------------------------------------- /app/projects/[projectId]/page.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useEffect } from 'react'; 4 | import { useRouter } from 'next/navigation'; 5 | import axios from 'axios'; 6 | import { toast } from 'sonner'; 7 | import { useSetAtom } from 'jotai/index'; 8 | import { modelConfigListAtom, selectedModelInfoAtom } from '@/lib/store'; 9 | 10 | export default function ProjectPage({ params }) { 11 | const router = useRouter(); 12 | const setConfigList = useSetAtom(modelConfigListAtom); 13 | const setSelectedModelInfo = useSetAtom(selectedModelInfoAtom); 14 | const { projectId } = params; 15 | 16 | // 默认重定向到文本分割页面 17 | useEffect(() => { 18 | getModelConfigList(projectId); 19 | router.push(`/projects/${projectId}/text-split`); 20 | }, [projectId, router]); 21 | 22 | const getModelConfigList = projectId => { 23 | axios 24 | .get(`/api/projects/${projectId}/model-config`) 25 | .then(response => { 26 | setConfigList(response.data.data); 27 | if (response.data.defaultModelConfigId) { 28 | setSelectedModelInfo(response.data.data.find(item => item.id === response.data.defaultModelConfigId)); 29 | } else { 30 | setSelectedModelInfo(''); 31 | } 32 | }) 33 | .catch(error => { 34 | toast.error('get model list error'); 35 | }); 36 | }; 37 | 38 | return null; 39 | } 40 | -------------------------------------------------------------------------------- /app/projects/[projectId]/questions/hooks/useQuestionEdit.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState } from 'react'; 4 | import { useTranslation } from 'react-i18next'; 5 | import request from '@/lib/util/request'; 6 | 7 | export function useQuestionEdit(projectId, onSuccess) { 8 | const { t } = useTranslation(); 9 | const [editDialogOpen, setEditDialogOpen] = useState(false); 10 | const [editMode, setEditMode] = useState('create'); 11 | const [editingQuestion, setEditingQuestion] = useState(null); 12 | 13 | const handleOpenCreateDialog = () => { 14 | setEditMode('create'); 15 | setEditingQuestion(null); 16 | setEditDialogOpen(true); 17 | }; 18 | 19 | const handleOpenEditDialog = question => { 20 | setEditMode('edit'); 21 | setEditingQuestion(question); 22 | setEditDialogOpen(true); 23 | }; 24 | 25 | const handleCloseDialog = () => { 26 | setEditDialogOpen(false); 27 | setEditingQuestion(null); 28 | }; 29 | 30 | const handleSubmitQuestion = async formData => { 31 | try { 32 | const response = await request(`/api/projects/${projectId}/questions`, { 33 | method: editMode === 'create' ? 'POST' : 'PUT', 34 | headers: { 35 | 'Content-Type': 'application/json' 36 | }, 37 | body: JSON.stringify( 38 | editMode === 'create' 39 | ? { 40 | question: formData.question, 41 | chunkId: formData.chunkId, 42 | label: formData.label 43 | } 44 | : { 45 | id: formData.id, 46 | question: formData.question, 47 | chunkId: formData.chunkId, 48 | label: formData.label 49 | } 50 | ) 51 | }); 52 | 53 | if (!response.ok) { 54 | const errorData = await response.json(); 55 | throw new Error(errorData.error || t('questions.operationFailed')); 56 | } 57 | 58 | // 获取更新后的问题数据 59 | const updatedQuestion = await response.json(); 60 | 61 | // 直接更新问题列表中的数据,而不是重新获取整个列表 62 | if (onSuccess) { 63 | onSuccess(updatedQuestion); 64 | } 65 | handleCloseDialog(); 66 | } catch (error) { 67 | console.error('操作失败:', error); 68 | } 69 | }; 70 | 71 | return { 72 | editDialogOpen, 73 | editMode, 74 | editingQuestion, 75 | handleOpenCreateDialog, 76 | handleOpenEditDialog, 77 | handleCloseDialog, 78 | handleSubmitQuestion 79 | }; 80 | } 81 | -------------------------------------------------------------------------------- /commitlint.config.mjs: -------------------------------------------------------------------------------- 1 | export default { extends: ['@commitlint/config-conventional'] }; 2 | -------------------------------------------------------------------------------- /components/I18nProvider.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useEffect } from 'react'; 4 | import i18n from '@/lib/i18n'; 5 | import { I18nextProvider } from 'react-i18next'; 6 | 7 | export default function I18nProvider({ children }) { 8 | useEffect(() => { 9 | // 确保i18n只在客户端初始化 10 | if (typeof window !== 'undefined') { 11 | // 这里可以添加任何客户端特定的i18n初始化逻辑 12 | } 13 | }, []); 14 | 15 | return {children}; 16 | } 17 | -------------------------------------------------------------------------------- /components/LanguageSwitcher.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useTranslation } from 'react-i18next'; 4 | import { IconButton, Tooltip, useTheme, Typography } from '@mui/material'; 5 | 6 | export default function LanguageSwitcher() { 7 | const { i18n } = useTranslation(); 8 | const theme = useTheme(); 9 | 10 | const toggleLanguage = () => { 11 | const newLang = i18n.language === 'zh-CN' ? 'en' : 'zh-CN'; 12 | i18n.changeLanguage(newLang); 13 | }; 14 | 15 | return ( 16 | 17 | 30 | 31 | {i18n.language === 'zh-CN' ? 'EN' : '中'} 32 | 33 | 34 | 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /components/common/MessageAlert.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Snackbar, Alert } from '@mui/material'; 4 | 5 | export default function MessageAlert({ message, onClose }) { 6 | if (!message) return null; 7 | 8 | const severity = message.severity || 'error'; 9 | const text = typeof message === 'string' ? message : message.message; 10 | 11 | return ( 12 | 18 | 19 | {text} 20 | 21 | 22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /components/datasets/OptimizeDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Dialog, DialogTitle, DialogContent, DialogActions, Button, TextField, CircularProgress } from '@mui/material'; 4 | import { useState } from 'react'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | /** 8 | * AI优化对话框组件 9 | */ 10 | export default function OptimizeDialog({ open, onClose, onConfirm, loading }) { 11 | const [advice, setAdvice] = useState(''); 12 | const { t } = useTranslation(); 13 | 14 | const handleConfirm = () => { 15 | onConfirm(advice); 16 | setAdvice(''); 17 | }; 18 | 19 | const handleClose = () => { 20 | if (!loading) { 21 | onClose(); 22 | setAdvice(''); 23 | } 24 | }; 25 | 26 | return ( 27 | 28 | {t('datasets.optimizeTitle')} 29 | 30 | setAdvice(e.target.value)} 40 | disabled={loading} 41 | placeholder={t('datasets.optimizePlaceholder')} 42 | /> 43 | 44 | 45 | 48 | 51 | 52 | 53 | ); 54 | } 55 | -------------------------------------------------------------------------------- /components/distill/ConfirmDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Dialog, DialogActions, DialogTitle, Button } from '@mui/material'; 4 | 5 | /** 6 | * 通用确认对话框组件 7 | * @param {Object} props 8 | * @param {boolean} props.open - 对话框是否打开 9 | * @param {Function} props.onClose - 关闭对话框的回调 10 | * @param {Function} props.onConfirm - 确认操作的回调 11 | * @param {string} props.title - 对话框标题 12 | * @param {string} props.cancelText - 取消按钮文本 13 | * @param {string} props.confirmText - 确认按钮文本 14 | */ 15 | export default function ConfirmDialog({ 16 | open, 17 | onClose, 18 | onConfirm, 19 | title, 20 | cancelText = '取消', 21 | confirmText = '确认', 22 | confirmColor = 'error' 23 | }) { 24 | return ( 25 | 26 | {title} 27 | 28 | 31 | 34 | 35 | 36 | ); 37 | } 38 | -------------------------------------------------------------------------------- /components/distill/TagMenu.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Menu, MenuItem, ListItemIcon, ListItemText } from '@mui/material'; 4 | import DeleteIcon from '@mui/icons-material/Delete'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | /** 8 | * 标签操作菜单组件 9 | * @param {Object} props 10 | * @param {HTMLElement} props.anchorEl - 菜单锚点元素 11 | * @param {boolean} props.open - 菜单是否打开 12 | * @param {Function} props.onClose - 关闭菜单的回调 13 | * @param {Function} props.onDelete - 删除操作的回调 14 | */ 15 | export default function TagMenu({ anchorEl, open, onClose, onDelete }) { 16 | const { t } = useTranslation(); 17 | 18 | return ( 19 | 20 | 21 | 22 | 23 | 24 | {t('common.delete')} 25 | 26 | 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /components/distill/utils.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | /** 4 | * 按照标签前面的序号对标签进行排序 5 | * @param {Array} tags - 标签数组 6 | * @returns {Array} 排序后的标签数组 7 | */ 8 | export const sortTagsByNumber = tags => { 9 | return [...tags].sort((a, b) => { 10 | // 提取标签前面的序号 11 | const getNumberPrefix = label => { 12 | // 匹配形如 1, 1.1, 1.1.2 的序号 13 | const match = label.match(/^([\d.]+)\s/); 14 | if (match) { 15 | return match[1]; // 返回完整的序号字符串,如 "1.10" 16 | } 17 | return null; // 没有序号 18 | }; 19 | 20 | const aPrefix = getNumberPrefix(a.label); 21 | const bPrefix = getNumberPrefix(b.label); 22 | 23 | // 如果两个标签都有序号,按序号比较 24 | if (aPrefix && bPrefix) { 25 | // 将序号分解为数组,然后按数值比较 26 | const aParts = aPrefix.split('.').map(num => parseInt(num, 10)); 27 | const bParts = bPrefix.split('.').map(num => parseInt(num, 10)); 28 | 29 | // 比较序号数组 30 | for (let i = 0; i < Math.min(aParts.length, bParts.length); i++) { 31 | if (aParts[i] !== bParts[i]) { 32 | return aParts[i] - bParts[i]; // 数值比较,确保 1.2 排在 1.10 前面 33 | } 34 | } 35 | // 如果前面的数字都相同,则较短的序号在前 36 | return aParts.length - bParts.length; 37 | } 38 | // 如果只有一个标签有序号,则有序号的在前 39 | else if (aPrefix) { 40 | return -1; 41 | } else if (bPrefix) { 42 | return 1; 43 | } 44 | // 如果都没有序号,则按原来的字母序排序 45 | else { 46 | return a.label.localeCompare(b.label, 'zh-CN'); 47 | } 48 | }); 49 | }; 50 | 51 | /** 52 | * 获取标签的完整路径 53 | * @param {Object} tag - 标签对象 54 | * @param {Array} allTags - 所有标签数组 55 | * @returns {string} 标签路径,如 "标签1 > 标签2 > 标签3" 56 | */ 57 | export const getTagPath = (tag, allTags) => { 58 | if (!tag) return ''; 59 | 60 | const findPath = (currentTag, path = []) => { 61 | const newPath = [currentTag.label, ...path]; 62 | 63 | if (!currentTag.parentId) return newPath; 64 | 65 | const parentTag = allTags.find(t => t.id === currentTag.parentId); 66 | if (!parentTag) return newPath; 67 | 68 | return findPath(parentTag, newPath); 69 | }; 70 | 71 | return findPath(tag).join(' > '); 72 | }; 73 | -------------------------------------------------------------------------------- /components/playground/ModelSelector.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { 3 | FormControl, 4 | InputLabel, 5 | Select, 6 | MenuItem, 7 | OutlinedInput, 8 | Box, 9 | Chip, 10 | Checkbox, 11 | ListItemText 12 | } from '@mui/material'; 13 | import { useTranslation } from 'react-i18next'; 14 | 15 | const ITEM_HEIGHT = 48; 16 | const ITEM_PADDING_TOP = 8; 17 | const MenuProps = { 18 | PaperProps: { 19 | style: { 20 | maxHeight: ITEM_HEIGHT * 4.5 + ITEM_PADDING_TOP, 21 | width: 250 22 | } 23 | } 24 | }; 25 | 26 | /** 27 | * 模型选择组件 28 | * @param {Object} props 29 | * @param {Array} props.models - 可用模型列表 30 | * @param {Array} props.selectedModels - 已选择的模型ID列表 31 | * @param {Function} props.onChange - 选择改变时的回调函数 32 | */ 33 | export default function ModelSelector({ models, selectedModels, onChange }) { 34 | // 获取模型名称 35 | const getModelName = modelId => { 36 | const model = models.find(m => m.id === modelId); 37 | return model ? `${model.providerName}: ${model.modelName}` : modelId; 38 | }; 39 | const { t } = useTranslation(); 40 | 41 | return ( 42 | 43 | {t('playground.selectModelMax3')} 44 | 79 | 80 | ); 81 | } 82 | -------------------------------------------------------------------------------- /components/playground/PlaygroundHeader.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { Grid, Button, Divider, FormControl, InputLabel, Select, MenuItem } from '@mui/material'; 5 | import DeleteIcon from '@mui/icons-material/Delete'; 6 | import { useTheme } from '@mui/material/styles'; 7 | import ModelSelector from './ModelSelector'; 8 | import { playgroundStyles } from '@/styles/playground'; 9 | import { useTranslation } from 'react-i18next'; 10 | 11 | const PlaygroundHeader = ({ 12 | availableModels, 13 | selectedModels, 14 | handleModelSelection, 15 | handleClearConversations, 16 | conversations, 17 | outputMode, 18 | handleOutputModeChange 19 | }) => { 20 | const theme = useTheme(); 21 | const styles = playgroundStyles(theme); 22 | const { t } = useTranslation(); 23 | 24 | const isClearDisabled = selectedModels.length === 0 || Object.values(conversations).every(conv => conv.length === 0); 25 | 26 | return ( 27 | <> 28 | 29 | 30 | 31 | 32 | 33 | 34 | {t('playground.outputMode')} 35 | 45 | 46 | 47 | 48 | 58 | 59 | 60 | 61 | 62 | 63 | ); 64 | }; 65 | 66 | export default PlaygroundHeader; 67 | -------------------------------------------------------------------------------- /components/tasks/TaskActions.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { IconButton, Tooltip } from '@mui/material'; 5 | import DeleteIcon from '@mui/icons-material/Delete'; 6 | import StopCircleIcon from '@mui/icons-material/StopCircle'; 7 | import { useTranslation } from 'react-i18next'; 8 | 9 | // 任务操作组件 10 | export default function TaskActions({ task, onAbort, onDelete }) { 11 | const { t } = useTranslation(); 12 | 13 | // 处理中的任务显示中断按钮,其他状态显示删除按钮 14 | return task.status === 0 ? ( 15 | 16 | onAbort(task.id)}> 17 | 18 | 19 | 20 | ) : ( 21 | 22 | onDelete(task.id)}> 23 | 24 | 25 | 26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /components/tasks/TaskFilters.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { 5 | Box, 6 | FormControl, 7 | InputLabel, 8 | Select, 9 | MenuItem, 10 | OutlinedInput, 11 | IconButton, 12 | Tooltip, 13 | CircularProgress 14 | } from '@mui/material'; 15 | import RefreshIcon from '@mui/icons-material/Refresh'; 16 | import { useTranslation } from 'react-i18next'; 17 | 18 | // 任务筛选组件 19 | export default function TaskFilters({ statusFilter, setStatusFilter, typeFilter, setTypeFilter, loading, onRefresh }) { 20 | const { t } = useTranslation(); 21 | 22 | return ( 23 | 24 | {/* 状态筛选 */} 25 | 26 | {t('tasks.filters.status')} 27 | 38 | 39 | 40 | {/* 类型筛选 */} 41 | 42 | {t('tasks.filters.type')} 43 | 54 | 55 | 56 | {/* 刷新按钮 */} 57 | 58 | 59 | {loading ? : } 60 | 61 | 62 | 63 | ); 64 | } 65 | -------------------------------------------------------------------------------- /components/tasks/TaskProgress.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { Stack, LinearProgress, Typography } from '@mui/material'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | // 任务进度组件 8 | export default function TaskProgress({ task }) { 9 | const { t } = useTranslation(); 10 | 11 | // 如果没有总数,则不显示进度条 12 | if (task.totalCount === 0) return '-'; 13 | 14 | // 计算进度百分比 15 | const progress = (task.completedCount / task.totalCount) * 100; 16 | 17 | return ( 18 | 19 | 31 | 32 | {task.completedCount} / {task.totalCount} ({Math.round(progress)}%) 33 | 34 | 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /components/tasks/TaskStatusChip.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React from 'react'; 4 | import { Chip, CircularProgress, Box } from '@mui/material'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | // 任务状态显示组件 8 | export default function TaskStatusChip({ status }) { 9 | const { t } = useTranslation(); 10 | 11 | // 状态映射配置 12 | const STATUS_CONFIG = { 13 | 0: { 14 | label: t('tasks.status.processing'), 15 | color: 'warning', 16 | loading: true 17 | }, 18 | 1: { 19 | label: t('tasks.status.completed'), 20 | color: 'success' 21 | }, 22 | 2: { 23 | label: t('tasks.status.failed'), 24 | color: 'error' 25 | }, 26 | 3: { 27 | label: t('tasks.status.aborted'), 28 | color: 'default' 29 | } 30 | }; 31 | 32 | const statusInfo = STATUS_CONFIG[status] || { 33 | label: t('tasks.status.unknown'), 34 | color: 'default' 35 | }; 36 | 37 | // 处理中状态显示加载动画 38 | if (status === 0) { 39 | return ( 40 | 41 | 42 | 43 | 44 | ); 45 | } 46 | 47 | return ; 48 | } 49 | -------------------------------------------------------------------------------- /components/text-split/ChunkDeleteDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Dialog, DialogTitle, DialogContent, DialogContentText, DialogActions, Button } from '@mui/material'; 4 | import { useTranslation } from 'react-i18next'; 5 | 6 | export default function ChunkDeleteDialog({ open, onClose, onConfirm }) { 7 | const { t } = useTranslation(); 8 | return ( 9 | 15 | {t('common.confirmDelete')}? 16 | 17 | {t('common.confirmDelete')}? 18 | 19 | 20 | 21 | 24 | 25 | 26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /components/text-split/ChunkViewDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, Button, Dialog, DialogTitle, DialogContent, DialogActions, CircularProgress } from '@mui/material'; 4 | import ReactMarkdown from 'react-markdown'; 5 | import { useTranslation } from 'react-i18next'; 6 | 7 | export default function ChunkViewDialog({ open, chunk, onClose }) { 8 | const { t } = useTranslation(); 9 | return ( 10 | 11 | {t('textSplit.chunkDetails', { chunkId: chunk?.name })} 12 | 13 | {chunk ? ( 14 | 15 | {chunk.content} 16 | 17 | ) : ( 18 | 19 | 20 | 21 | )} 22 | 23 | 24 | 25 | 26 | 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /components/text-split/LoadingBackdrop.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Backdrop, Paper, CircularProgress, Typography, Box, LinearProgress } from '@mui/material'; 4 | 5 | export default function LoadingBackdrop({ open, title, description, progress = null }) { 6 | return ( 7 | theme.zIndex.drawer + 1, 11 | position: 'fixed', 12 | backdropFilter: 'blur(3px)' 13 | }} 14 | open={open} 15 | > 16 | 29 | 30 | {title} 31 | 32 | {description} 33 | 34 | 35 | {progress && progress.total > 0 && ( 36 | 37 | 38 | 39 | {progress.completed}/{progress.total} ({progress.percentage}%) 40 | 41 | {progress.questionCount > 0 && ( 42 | 43 | 已生成问题数: {progress.questionCount} 44 | 45 | )} 46 | 47 | 48 | 49 | )} 50 | 51 | 52 | ); 53 | } 54 | -------------------------------------------------------------------------------- /components/text-split/PdfSettings.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, Select, MenuItem, Typography, FormControl, InputLabel } from '@mui/material'; 4 | import { useTranslation } from 'react-i18next'; 5 | 6 | export default function PdfSettings({ pdfStrategy, setPdfStrategy, selectedViosnModel, setSelectedViosnModel }) { 7 | const { t } = useTranslation(); 8 | 9 | return ( 10 | 11 | 12 | {t('textSplit.pdfStrategy')} 13 | 23 | 24 | 25 | {pdfStrategy === 'vision' && ( 26 | 27 | {t('textSplit.visionModel')} 28 | 39 | 40 | )} 41 | 42 | ); 43 | } 44 | -------------------------------------------------------------------------------- /components/text-split/components/DeleteConfirmDialog.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { 4 | Dialog, 5 | DialogTitle, 6 | DialogContent, 7 | DialogContentText, 8 | DialogActions, 9 | Button, 10 | Typography, 11 | Box, 12 | Alert 13 | } from '@mui/material'; 14 | import { useTranslation } from 'react-i18next'; 15 | 16 | export default function DeleteConfirmDialog({ open, fileName, onClose, onConfirm }) { 17 | const { t } = useTranslation(); 18 | return ( 19 | 27 | 28 | {t('common.confirmDelete')}「{fileName}」? 29 | 30 | 31 | {t('common.confirmDeleteDescription')} 32 | 33 | 34 | 35 | {t('textSplit.deleteFileWarning')} 36 | 37 | 38 | 39 | • {t('textSplit.deleteFileWarningChunks')} 40 | 41 | 42 | • {t('textSplit.deleteFileWarningQuestions')} 43 | 44 | 45 | • {t('textSplit.deleteFileWarningDatasets')} 46 | 47 | 48 | 49 | 50 | 51 | 54 | 57 | 58 | 59 | ); 60 | } 61 | -------------------------------------------------------------------------------- /components/text-split/components/DirectoryView.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box, List, ListItem, ListItemIcon, ListItemText, Collapse, IconButton } from '@mui/material'; 4 | import FolderIcon from '@mui/icons-material/Folder'; 5 | import ArticleIcon from '@mui/icons-material/Article'; 6 | import ExpandLess from '@mui/icons-material/ExpandLess'; 7 | import ExpandMore from '@mui/icons-material/ExpandMore'; 8 | import { useTheme } from '@mui/material/styles'; 9 | 10 | /** 11 | * 目录结构组件 12 | * @param {Object} props 13 | * @param {Array} props.items - 目录项数组 14 | * @param {Object} props.expandedItems - 展开状态对象 15 | * @param {Function} props.onToggleItem - 展开/折叠回调 16 | * @param {number} props.level - 当前层级 17 | * @param {string} props.parentId - 父级ID 18 | */ 19 | export default function DirectoryView({ items, expandedItems, onToggleItem, level = 0, parentId = '' }) { 20 | const theme = useTheme(); 21 | 22 | if (!items || items.length === 0) return null; 23 | 24 | return ( 25 | 0 ? 2 : 0 }}> 26 | {items.map((item, index) => { 27 | const itemId = `${parentId}-${index}`; 28 | const hasChildren = item.children && item.children.length > 0; 29 | const isExpanded = expandedItems[itemId] || false; 30 | 31 | return ( 32 | 33 | 0 ? `1px solid ${theme.palette.divider}` : 'none', 37 | ml: level > 0 ? 1 : 0 38 | }} 39 | > 40 | 41 | {hasChildren ? : } 42 | 43 | 50 | {hasChildren && ( 51 | onToggleItem(itemId)}> 52 | {isExpanded ? : } 53 | 54 | )} 55 | 56 | 57 | {hasChildren && ( 58 | 59 | 66 | 67 | )} 68 | 69 | ); 70 | })} 71 | 72 | ); 73 | } 74 | -------------------------------------------------------------------------------- /components/text-split/components/DomainTreeView.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box } from '@mui/material'; 4 | import { TreeView, TreeItem } from '@mui/lab'; 5 | import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; 6 | import ChevronRightIcon from '@mui/icons-material/ChevronRight'; 7 | 8 | /** 9 | * 领域知识树组件 10 | * @param {Object} props 11 | * @param {Array} props.nodes - 树节点数组 12 | */ 13 | export default function DomainTreeView({ nodes = [] }) { 14 | if (!nodes || nodes.length === 0) return null; 15 | 16 | const renderTreeItems = nodes => { 17 | return nodes.map((node, index) => ( 18 | 19 | {node.children && node.children.length > 0 && renderTreeItems(node.children)} 20 | 21 | )); 22 | }; 23 | 24 | return ( 25 | } 27 | defaultExpandIcon={} 28 | sx={{ flexGrow: 1, overflowY: 'auto' }} 29 | > 30 | {renderTreeItems(nodes)} 31 | 32 | ); 33 | } 34 | -------------------------------------------------------------------------------- /components/text-split/components/TabPanel.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { Box } from '@mui/material'; 4 | 5 | /** 6 | * 标签页面板组件 7 | * @param {Object} props 8 | * @param {number} props.value - 当前激活的标签索引 9 | * @param {number} props.index - 当前面板对应的索引 10 | * @param {ReactNode} props.children - 子组件 11 | */ 12 | export default function TabPanel({ value, index, children }) { 13 | return ( 14 | 23 | ); 24 | } 25 | -------------------------------------------------------------------------------- /constant/model.js: -------------------------------------------------------------------------------- 1 | export const MODEL_PROVIDERS = [ 2 | { 3 | id: 'ollama', 4 | name: 'Ollama', 5 | defaultEndpoint: 'http://127.0.0.1:11434/api', 6 | defaultModels: [] 7 | }, 8 | { 9 | id: 'openai', 10 | name: 'OpenAI', 11 | defaultEndpoint: 'https://api.openai.com/v1/', 12 | defaultModels: ['gpt-4o', 'gpt-4o-mini', 'o1-mini'] 13 | }, 14 | { 15 | id: 'siliconcloud', 16 | name: '硅基流动', 17 | defaultEndpoint: 'https://api.siliconflow.cn/v1/', 18 | defaultModels: [ 19 | 'deepseek-ai/DeepSeek-R1', 20 | 'deepseek-ai/DeepSeek-V3', 21 | 'Qwen2.5-7B-Instruct', 22 | 'meta-llama/Llama-3.3-70B-Instruct' 23 | ] 24 | }, 25 | { 26 | id: 'deepseek', 27 | name: 'DeepSeek', 28 | defaultEndpoint: 'https://api.deepseek.com/v1/', 29 | defaultModels: ['deepseek-chat', 'deepseek-reasoner'] 30 | }, 31 | { 32 | id: '302ai', 33 | name: '302.AI', 34 | defaultEndpoint: 'https://api.302.ai/v1/', 35 | defaultModels: ['Doubao-pro-128k', 'deepseek-r1', 'kimi-latest', 'qwen-max'] 36 | }, 37 | { 38 | id: 'zhipu', 39 | name: '智谱AI', 40 | defaultEndpoint: 'https://open.bigmodel.cn/api/paas/v4/', 41 | defaultModels: ['glm-4-flash', 'glm-4-flashx', 'glm-4-plus', 'glm-4-long'] 42 | }, 43 | { 44 | id: 'Doubao', 45 | name: '火山引擎', 46 | defaultEndpoint: 'https://ark.cn-beijing.volces.com/api/v3/', 47 | defaultModels: [] 48 | }, 49 | { 50 | id: 'groq', 51 | name: 'Groq', 52 | defaultEndpoint: 'https://api.groq.com/openai', 53 | defaultModels: ['Gemma 7B', 'LLaMA3 8B', 'LLaMA3 70B'] 54 | }, 55 | { 56 | id: 'grok', 57 | name: 'Grok', 58 | defaultEndpoint: 'https://api.x.ai/v1', 59 | defaultModels: ['Grok Beta'] 60 | }, 61 | { 62 | id: 'OpenRouter', 63 | name: 'OpenRouter', 64 | defaultEndpoint: 'https://openrouter.ai/api/v1/', 65 | defaultModels: [ 66 | 'google/gemma-2-9b-it:free', 67 | 'meta-llama/llama-3-8b-instruct:free', 68 | 'microsoft/phi-3-mini-128k-instruct:free' 69 | ] 70 | }, 71 | { 72 | id: 'alibailian', 73 | name: '阿里云百炼', 74 | defaultEndpoint: 'https://dashscope.aliyuncs.com/compatible-mode/v1', 75 | defaultModels: ['qwen-max-latest', 'qwen-max-2025-01-25'] 76 | } 77 | ]; 78 | 79 | export const DEFAULT_MODEL_SETTINGS = { 80 | temperature: 0.7, 81 | maxTokens: 8192 82 | }; 83 | -------------------------------------------------------------------------------- /constant/setting.js: -------------------------------------------------------------------------------- 1 | // 默认项目任务配置 2 | export const DEFAULT_SETTINGS = { 3 | textSplitMinLength: 1500, 4 | textSplitMaxLength: 2000, 5 | questionGenerationLength: 240, 6 | questionMaskRemovingProbability: 60, 7 | huggingfaceToken: '', 8 | concurrencyLimit: 5, 9 | visionConcurrencyLimit: 5 10 | }; 11 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | easy-dataset: 3 | image: ghcr.io/conardli/easy-dataset 4 | container_name: easy-dataset 5 | ports: 6 | - '1717:1717' 7 | volumes: 8 | - ${LOCAL_DB_PATH}:/app/local-db 9 | restart: unless-stopped 10 | -------------------------------------------------------------------------------- /electron/entitlements.mac.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | com.apple.security.cs.allow-jit 6 | 7 | com.apple.security.cs.allow-unsigned-executable-memory 8 | 9 | com.apple.security.cs.allow-dyld-environment-variables 10 | 11 | com.apple.security.network.client 12 | 13 | com.apple.security.files.user-selected.read-write 14 | 15 | 16 | -------------------------------------------------------------------------------- /electron/main.js: -------------------------------------------------------------------------------- 1 | const { app, dialog, ipcMain } = require('electron'); 2 | const { setupLogging, setupIpcLogging } = require('./modules/logger'); 3 | const { createWindow, loadAppUrl, openDevTools, getMainWindow } = require('./modules/window-manager'); 4 | const { createMenu } = require('./modules/menu'); 5 | const { startNextServer } = require('./modules/server'); 6 | const { setupAutoUpdater } = require('./modules/updater'); 7 | const { initializeDatabase } = require('./modules/database'); 8 | const { clearCache } = require('./modules/cache'); 9 | const { setupIpcHandlers } = require('./modules/ipc-handlers'); 10 | 11 | // 是否是开发环境 12 | const isDev = process.env.NODE_ENV === 'development'; 13 | const port = 1717; 14 | let mainWindow; 15 | 16 | // 当 Electron 完成初始化时创建窗口 17 | app.whenReady().then(async () => { 18 | try { 19 | // 设置日志系统 20 | setupLogging(app); 21 | 22 | // 设置 IPC 处理程序 23 | setupIpcHandlers(app, isDev); 24 | setupIpcLogging(ipcMain, app, isDev); 25 | 26 | // 初始化数据库 27 | await initializeDatabase(app); 28 | 29 | // 创建主窗口 30 | mainWindow = createWindow(isDev, port); 31 | 32 | // 创建菜单 33 | createMenu(mainWindow, () => clearCache(app)); 34 | 35 | // 在开发环境中加载 localhost URL 36 | if (isDev) { 37 | loadAppUrl(`http://localhost:${port}`); 38 | openDevTools(); 39 | } else { 40 | // 在生产环境中启动 Next.js 服务 41 | const appUrl = await startNextServer(port, app); 42 | loadAppUrl(appUrl); 43 | } 44 | 45 | // 设置自动更新 46 | setupAutoUpdater(mainWindow); 47 | 48 | // 应用启动完成后的一段时间后自动检查更新 49 | setTimeout(() => { 50 | if (!isDev) { 51 | const { autoUpdater } = require('electron-updater'); 52 | autoUpdater.checkForUpdates().catch(err => { 53 | console.error('Automatic update check failed:', err); 54 | }); 55 | } 56 | }, 10000); // Check for updates after 10 seconds 57 | } catch (error) { 58 | console.error('An error occurred during application initialization:', error); 59 | dialog.showErrorBox( 60 | 'Application Initialization Error', 61 | `An error occurred during startup, which may affect application functionality. 62 | Error details: ${error.message}` 63 | ); 64 | } 65 | }); 66 | 67 | // 当所有窗口关闭时退出应用 68 | app.on('window-all-closed', () => { 69 | if (process.platform !== 'darwin') { 70 | app.quit(); 71 | } 72 | }); 73 | 74 | app.on('activate', () => { 75 | if (BrowserWindow.getAllWindows().length === 0) { 76 | mainWindow = createWindow(isDev, port); 77 | } 78 | }); 79 | 80 | // 应用退出前清理 81 | app.on('before-quit', () => { 82 | console.log('应用正在退出...'); 83 | }); 84 | -------------------------------------------------------------------------------- /electron/modules/cache.js: -------------------------------------------------------------------------------- 1 | const { clearLogs } = require('./logger'); 2 | const { clearDatabaseCache } = require('./database'); 3 | 4 | /** 5 | * 清除缓存函数 - 清理logs和local-db目录 6 | * @param {Object} app Electron app 对象 7 | * @returns {Promise} 操作是否成功 8 | */ 9 | async function clearCache(app) { 10 | // 清理日志目录 11 | await clearLogs(app); 12 | 13 | // 清理数据库缓存 14 | await clearDatabaseCache(app); 15 | 16 | return true; 17 | } 18 | 19 | module.exports = { 20 | clearCache 21 | }; 22 | -------------------------------------------------------------------------------- /electron/modules/ipc-handlers.js: -------------------------------------------------------------------------------- 1 | const { ipcMain } = require('electron'); 2 | const { checkUpdate, downloadUpdate, installUpdate } = require('./updater'); 3 | 4 | /** 5 | * 设置 IPC 处理程序 6 | * @param {Object} app Electron app 对象 7 | * @param {boolean} isDev 是否为开发环境 8 | */ 9 | function setupIpcHandlers(app, isDev) { 10 | // 获取用户数据路径 11 | ipcMain.on('get-user-data-path', event => { 12 | event.returnValue = app.getPath('userData'); 13 | }); 14 | 15 | // 检查更新 16 | ipcMain.handle('check-update', async () => { 17 | return await checkUpdate(isDev); 18 | }); 19 | 20 | // 下载更新 21 | ipcMain.handle('download-update', async () => { 22 | return await downloadUpdate(); 23 | }); 24 | 25 | // 安装更新 26 | ipcMain.handle('install-update', () => { 27 | return installUpdate(); 28 | }); 29 | } 30 | 31 | module.exports = { 32 | setupIpcHandlers 33 | }; 34 | -------------------------------------------------------------------------------- /electron/modules/logger.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | 4 | /** 5 | * 设置应用日志系统 6 | * @param {Object} app Electron app 对象 7 | * @returns {string} 日志文件路径 8 | */ 9 | function setupLogging(app) { 10 | const logDir = path.join(app.getPath('userData'), 'logs'); 11 | if (!fs.existsSync(logDir)) { 12 | fs.mkdirSync(logDir, { recursive: true }); 13 | } 14 | 15 | const logFilePath = path.join(logDir, `app-${new Date().toISOString().slice(0, 10)}.log`); 16 | 17 | // 创建自定义日志函数 18 | global.appLog = (message, level = 'info') => { 19 | const timestamp = new Date().toISOString(); 20 | const logEntry = `[${timestamp}] [${level.toUpperCase()}] ${message}\n`; 21 | 22 | // 同时输出到控制台和日志文件 23 | console.log(message); 24 | fs.appendFileSync(logFilePath, logEntry); 25 | }; 26 | 27 | // 捕获全局未处理异常并记录 28 | process.on('uncaughtException', error => { 29 | global.appLog(`未捕获的异常: ${error.stack || error}`, 'error'); 30 | }); 31 | 32 | return logFilePath; 33 | } 34 | 35 | /** 36 | * 设置 IPC 日志处理程序 37 | * @param {Object} ipcMain IPC 主进程对象 38 | * @param {Object} app Electron app 对象 39 | * @param {boolean} isDev 是否为开发环境 40 | */ 41 | function setupIpcLogging(ipcMain, app, isDev) { 42 | ipcMain.on('log', (event, { level, message }) => { 43 | const timestamp = new Date().toISOString(); 44 | const logEntry = `[${timestamp}] [${level.toUpperCase()}] ${message}\n`; 45 | 46 | // 只在客户端环境下写入文件 47 | if (!isDev || true) { 48 | const logsDir = path.join(app.getPath('userData'), 'logs'); 49 | if (!fs.existsSync(logsDir)) { 50 | fs.mkdirSync(logsDir, { recursive: true }); 51 | } 52 | const logFile = path.join(logsDir, `${new Date().toISOString().split('T')[0]}.log`); 53 | fs.appendFileSync(logFile, logEntry); 54 | } 55 | 56 | // 同时输出到控制台 57 | console[level](message); 58 | }); 59 | } 60 | 61 | /** 62 | * 清理日志文件 63 | * @param {Object} app Electron app 对象 64 | * @returns {Promise} 65 | */ 66 | async function clearLogs(app) { 67 | const logsDir = path.join(app.getPath('userData'), 'logs'); 68 | if (fs.existsSync(logsDir)) { 69 | // 读取目录下所有文件 70 | const files = await fs.promises.readdir(logsDir); 71 | // 删除所有文件 72 | for (const file of files) { 73 | const filePath = path.join(logsDir, file); 74 | await fs.promises.unlink(filePath); 75 | global.appLog(`已删除日志文件: ${filePath}`); 76 | } 77 | } 78 | } 79 | 80 | module.exports = { 81 | setupLogging, 82 | setupIpcLogging, 83 | clearLogs 84 | }; 85 | -------------------------------------------------------------------------------- /electron/preload.js: -------------------------------------------------------------------------------- 1 | const { contextBridge, ipcRenderer } = require('electron'); 2 | 3 | // 在渲染进程中暴露安全的 API 4 | contextBridge.exposeInMainWorld('electron', { 5 | // 获取应用版本 6 | getAppVersion: () => ipcRenderer.invoke('get-app-version'), 7 | 8 | // 获取当前语言 9 | getLanguage: () => { 10 | // 尝试从本地存储获取语言设置 11 | const storedLang = localStorage.getItem('i18nextLng'); 12 | // 如果存在则返回,否则返回系统语言或默认为中文 13 | return storedLang || navigator.language.startsWith('zh') ? 'zh' : 'en'; 14 | }, 15 | 16 | // 获取用户数据目录 17 | getUserDataPath: () => { 18 | try { 19 | return ipcRenderer.sendSync('get-user-data-path'); 20 | } catch (error) { 21 | console.error('获取用户数据目录失败:', error); 22 | return null; 23 | } 24 | }, 25 | 26 | // 更新相关 API 27 | updater: { 28 | // 检查更新 29 | checkForUpdates: () => ipcRenderer.invoke('check-update'), 30 | 31 | // 下载更新 32 | downloadUpdate: () => ipcRenderer.invoke('download-update'), 33 | 34 | // 安装更新 35 | installUpdate: () => ipcRenderer.invoke('install-update'), 36 | 37 | // 监听更新事件 38 | onUpdateAvailable: callback => { 39 | const handler = (_, info) => callback(info); 40 | ipcRenderer.on('update-available', handler); 41 | return () => ipcRenderer.removeListener('update-available', handler); 42 | }, 43 | 44 | onUpdateNotAvailable: callback => { 45 | const handler = () => callback(); 46 | ipcRenderer.on('update-not-available', handler); 47 | return () => ipcRenderer.removeListener('update-not-available', handler); 48 | }, 49 | 50 | onUpdateError: callback => { 51 | const handler = (_, error) => callback(error); 52 | ipcRenderer.on('update-error', handler); 53 | return () => ipcRenderer.removeListener('update-error', handler); 54 | }, 55 | 56 | onDownloadProgress: callback => { 57 | const handler = (_, progress) => callback(progress); 58 | ipcRenderer.on('download-progress', handler); 59 | return () => ipcRenderer.removeListener('download-progress', handler); 60 | }, 61 | 62 | onUpdateDownloaded: callback => { 63 | const handler = (_, info) => callback(info); 64 | ipcRenderer.on('update-downloaded', handler); 65 | return () => ipcRenderer.removeListener('update-downloaded', handler); 66 | } 67 | } 68 | }); 69 | 70 | // 通知渲染进程 preload 脚本已加载完成 71 | window.addEventListener('DOMContentLoaded', () => { 72 | console.log('Electron preload script loaded'); 73 | }); 74 | -------------------------------------------------------------------------------- /electron/util.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const fs = require('fs'); 3 | 4 | // 获取应用版本 5 | const getAppVersion = () => { 6 | try { 7 | const packageJsonPath = path.join(__dirname, '../package.json'); 8 | if (fs.existsSync(packageJsonPath)) { 9 | const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); 10 | return packageJson.version; 11 | } 12 | return '1.0.0'; 13 | } catch (error) { 14 | console.error('读取版本信息失败:', error); 15 | return '1.0.0'; 16 | } 17 | }; 18 | 19 | module.exports = { getAppVersion }; 20 | -------------------------------------------------------------------------------- /hooks/useDebounce.js: -------------------------------------------------------------------------------- 1 | import { useEffect, useState } from 'react'; 2 | 3 | export function useDebounce(value, delay = 500) { 4 | const [debouncedValue, setDebouncedValue] = useState(value); 5 | useEffect(() => { 6 | const timer = setTimeout(() => { 7 | setDebouncedValue(value); 8 | }, delay); 9 | return () => { 10 | clearTimeout(timer); 11 | }; 12 | }, [value, delay]); 13 | return debouncedValue; 14 | } 15 | -------------------------------------------------------------------------------- /hooks/useSnackbar.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useState, useCallback } from 'react'; 4 | import { Snackbar, Alert } from '@mui/material'; 5 | 6 | export const useSnackbar = () => { 7 | const [open, setOpen] = useState(false); 8 | const [message, setMessage] = useState(''); 9 | const [severity, setSeverity] = useState('info'); 10 | 11 | const showMessage = useCallback((newMessage, newSeverity = 'info') => { 12 | setMessage(newMessage); 13 | setSeverity(newSeverity); 14 | setOpen(true); 15 | }, []); 16 | 17 | const showSuccess = useCallback( 18 | message => { 19 | showMessage(message, 'success'); 20 | }, 21 | [showMessage] 22 | ); 23 | 24 | const showError = useCallback( 25 | message => { 26 | showMessage(message, 'error'); 27 | }, 28 | [showMessage] 29 | ); 30 | 31 | const showInfo = useCallback( 32 | message => { 33 | showMessage(message, 'info'); 34 | }, 35 | [showMessage] 36 | ); 37 | 38 | const showWarning = useCallback( 39 | message => { 40 | showMessage(message, 'warning'); 41 | }, 42 | [showMessage] 43 | ); 44 | 45 | const handleClose = useCallback(() => { 46 | setOpen(false); 47 | }, []); 48 | 49 | const SnackbarComponent = useCallback( 50 | () => ( 51 | 57 | 58 | {message} 59 | 60 | 61 | ), 62 | [open, message, severity, handleClose] 63 | ); 64 | 65 | return { 66 | showMessage, 67 | showSuccess, 68 | showError, 69 | showInfo, 70 | showWarning, 71 | SnackbarComponent 72 | }; 73 | }; 74 | -------------------------------------------------------------------------------- /hooks/useTaskSettings.js: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from 'react'; 2 | import { useTranslation } from 'react-i18next'; 3 | import { DEFAULT_SETTINGS } from '@/constant/setting'; 4 | 5 | export default function useTaskSettings(projectId) { 6 | const { t } = useTranslation(); 7 | const [taskSettings, setTaskSettings] = useState({ 8 | ...DEFAULT_SETTINGS 9 | }); 10 | const [loading, setLoading] = useState(true); 11 | const [error, setError] = useState(null); 12 | const [success, setSuccess] = useState(false); 13 | 14 | useEffect(() => { 15 | async function fetchTaskSettings() { 16 | try { 17 | setLoading(true); 18 | const response = await fetch(`/api/projects/${projectId}/tasks`); 19 | if (!response.ok) { 20 | throw new Error(t('settings.fetchTasksFailed')); 21 | } 22 | 23 | const data = await response.json(); 24 | 25 | // 如果没有配置,使用默认值 26 | if (Object.keys(data).length === 0) { 27 | setTaskSettings({ 28 | ...DEFAULT_SETTINGS 29 | }); 30 | } else { 31 | setTaskSettings({ 32 | ...DEFAULT_SETTINGS, 33 | ...data 34 | }); 35 | } 36 | } catch (error) { 37 | console.error('获取任务配置出错:', error); 38 | setError(error.message); 39 | } finally { 40 | setLoading(false); 41 | } 42 | } 43 | 44 | fetchTaskSettings(); 45 | }, [projectId, t]); 46 | 47 | return { 48 | taskSettings, 49 | setTaskSettings, 50 | loading, 51 | error, 52 | success, 53 | setSuccess 54 | }; 55 | } 56 | -------------------------------------------------------------------------------- /jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "baseUrl": ".", 4 | "paths": { 5 | "@/*": ["./*"] 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /lib/db/files.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 文件操作辅助函数 3 | */ 4 | const path = require('path'); 5 | const { promises: fs } = require('fs'); 6 | const { getProjectRoot, readJSON } = require('./base'); 7 | const { getProject } = require('./projects'); 8 | const { getUploadFileInfoById } = require('./upload-files'); 9 | 10 | /** 11 | * 获取项目文件内容 12 | * @param {string} projectId - 项目ID 13 | * @param {string} fileName - 文件名 14 | * @returns {Promise} 文件内容 15 | */ 16 | async function getProjectFileContent(projectId, fileName) { 17 | try { 18 | // 获取项目根目录 19 | const projectRoot = await getProjectRoot(); 20 | const projectPath = path.join(projectRoot, projectId); 21 | const filePath = path.join(projectPath, 'files', fileName); 22 | 23 | // 读取文件内容 24 | const content = await fs.readFile(filePath, 'utf-8'); 25 | return content; 26 | } catch (error) { 27 | console.error('获取项目文件内容失败:', error); 28 | return ''; 29 | } 30 | } 31 | 32 | /** 33 | * 根据文件ID获取项目文件内容 34 | * @param {string} projectId - 项目ID 35 | * @param {string} fileId - 文件ID 36 | * @returns {Promise} 文件内容 37 | */ 38 | async function getProjectFileContentById(projectId, fileId) { 39 | try { 40 | // 获取文件信息 41 | const fileInfo = await getUploadFileInfoById(fileId); 42 | if (!fileInfo) { 43 | throw new Error('文件不存在'); 44 | } 45 | 46 | // 获取项目根目录 47 | const projectRoot = await getProjectRoot(); 48 | const projectPath = path.join(projectRoot, projectId); 49 | const filePath = path.join(projectPath, 'files', fileInfo.fileName); 50 | 51 | // 读取文件内容 52 | const content = await fs.readFile(filePath, 'utf-8'); 53 | return content; 54 | } catch (error) { 55 | console.error('根据ID获取项目文件内容失败:', error); 56 | return ''; 57 | } 58 | } 59 | 60 | module.exports = { 61 | getProjectFileContent, 62 | getProjectFileContentById 63 | }; 64 | -------------------------------------------------------------------------------- /lib/db/index.js: -------------------------------------------------------------------------------- 1 | import { PrismaClient } from '@prisma/client'; 2 | 3 | const createPrismaClient = () => 4 | new PrismaClient({ 5 | // log: process.env.NODE_ENV === 'development' ? ['query', 'error', 'warn'] : ['error'] 6 | log: ['error'] 7 | }); 8 | 9 | const globalForPrisma = globalThis; 10 | 11 | export const db = globalForPrisma.prisma || createPrismaClient(); 12 | 13 | if (process.env.NODE_ENV !== 'production') globalForPrisma.prisma = db; 14 | -------------------------------------------------------------------------------- /lib/db/llm-models.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | import { db } from '@/lib/db/index'; 3 | 4 | export async function getLlmModelsByProviderId(providerId) { 5 | try { 6 | return await db.llmModels.findMany({ where: { providerId } }); 7 | } catch (error) { 8 | console.error('Failed to get llmModels by providerId in database'); 9 | throw error; 10 | } 11 | } 12 | 13 | export async function createLlmModels(models) { 14 | try { 15 | return await db.llmModels.createMany({ data: models }); 16 | } catch (error) { 17 | console.error('Failed to create llmModels in database'); 18 | throw error; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /lib/db/llm-providers.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | import { db } from '@/lib/db/index'; 3 | 4 | export async function getLlmProviders() { 5 | try { 6 | let list = await db.llmProviders.findMany(); 7 | if (list.length !== 0) { 8 | return list; 9 | } 10 | 11 | let data = [ 12 | { 13 | id: 'ollama', 14 | name: 'Ollama', 15 | apiUrl: 'http://127.0.0.1:11434/api' 16 | }, 17 | { 18 | id: 'openai', 19 | name: 'OpenAI', 20 | apiUrl: 'https://api.openai.com/v1/' 21 | }, 22 | { 23 | id: 'siliconcloud', 24 | name: '硅基流动', 25 | apiUrl: 'https://api.siliconflow.cn/v1/' 26 | }, 27 | { 28 | id: 'deepseek', 29 | name: 'DeepSeek', 30 | apiUrl: 'https://api.deepseek.com/v1/' 31 | }, 32 | { 33 | id: '302ai', 34 | name: '302.AI', 35 | apiUrl: 'https://api.302.ai/v1/' 36 | }, 37 | { 38 | id: 'zhipu', 39 | name: '智谱AI', 40 | apiUrl: 'https://open.bigmodel.cn/api/paas/v4/' 41 | }, 42 | { 43 | id: 'Doubao', 44 | name: '火山引擎', 45 | apiUrl: 'https://ark.cn-beijing.volces.com/api/v3/' 46 | }, 47 | { 48 | id: 'groq', 49 | name: 'Groq', 50 | apiUrl: 'https://api.groq.com/openai' 51 | }, 52 | { 53 | id: 'grok', 54 | name: 'Grok', 55 | apiUrl: 'https://api.x.ai' 56 | }, 57 | { 58 | id: 'openRouter', 59 | name: 'OpenRouter', 60 | apiUrl: 'https://openrouter.ai/api/v1/' 61 | }, 62 | { 63 | id: 'alibailian', 64 | name: '阿里云百炼', 65 | apiUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1' 66 | } 67 | ]; 68 | await db.llmProviders.createMany({ data }); 69 | return data; 70 | } catch (error) { 71 | console.error('Failed to get llmProviders in database'); 72 | throw error; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /lib/db/model-config.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | import { db } from '@/lib/db/index'; 3 | import { nanoid } from 'nanoid'; 4 | 5 | export async function getModelConfigByProjectId(projectId) { 6 | try { 7 | return await db.modelConfig.findMany({ where: { projectId } }); 8 | } catch (error) { 9 | console.error('Failed to get modelConfig by projectId in database'); 10 | throw error; 11 | } 12 | } 13 | 14 | export async function createInitModelConfig(data) { 15 | try { 16 | return await db.modelConfig.createManyAndReturn({ data }); 17 | } catch (error) { 18 | console.error('Failed to create init modelConfig list in database'); 19 | throw error; 20 | } 21 | } 22 | 23 | export async function getModelConfigById(id) { 24 | try { 25 | return await db.modelConfig.findUnique({ where: { id } }); 26 | } catch (error) { 27 | console.error('Failed to get modelConfig by id in database'); 28 | throw error; 29 | } 30 | } 31 | 32 | export async function deleteModelConfigById(id) { 33 | try { 34 | return await db.modelConfig.delete({ where: { id } }); 35 | } catch (error) { 36 | console.error('Failed to delete modelConfig by id in database'); 37 | throw error; 38 | } 39 | } 40 | 41 | export async function saveModelConfig(models) { 42 | try { 43 | if (!models.id) { 44 | models.id = nanoid(12); 45 | } 46 | return await db.modelConfig.upsert({ create: models, update: models, where: { id: models.id } }); 47 | } catch (error) { 48 | console.error('Failed to create modelConfig in database'); 49 | throw error; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /lib/db/texts.js: -------------------------------------------------------------------------------- 1 | 'use server'; 2 | 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | import { getProjectRoot, ensureDir } from './base'; 6 | 7 | // 获取项目中所有原始文件 8 | export async function getFiles(projectId) { 9 | const projectRoot = await getProjectRoot(); 10 | const projectPath = path.join(projectRoot, projectId); 11 | const filesDir = path.join(projectPath, 'files'); 12 | await fs.promises.access(filesDir); 13 | const files = await fs.promises.readdir(filesDir); 14 | const fileStats = await Promise.all( 15 | files.map(async fileName => { 16 | // 跳过非文件项目 17 | const filePath = path.join(filesDir, fileName); 18 | const stats = await fs.promises.stat(filePath); 19 | 20 | // 只返回Markdown文件,跳过其他文件 21 | if (!fileName.endsWith('.md')) { 22 | return null; 23 | } 24 | 25 | return { 26 | name: fileName, 27 | path: filePath, 28 | size: stats.size, 29 | createdAt: stats.birthtime 30 | }; 31 | }) 32 | ); 33 | return fileStats.filter(Boolean); // 过滤掉null值 34 | } 35 | 36 | // 删除项目中的原始文件及相关的文本块 37 | export async function deleteFile(projectId, fileName) { 38 | const projectRoot = await getProjectRoot(); 39 | const projectPath = path.join(projectRoot, projectId); 40 | const filesDir = path.join(projectPath, 'files'); 41 | const chunksDir = path.join(projectPath, 'chunks'); 42 | const tocDir = path.join(projectPath, 'toc'); 43 | 44 | // 确保目录存在 45 | await ensureDir(tocDir); 46 | 47 | // 删除原始文件 48 | const filePath = path.join(filesDir, fileName); 49 | try { 50 | await fs.promises.access(filePath); 51 | await fs.promises.unlink(filePath); 52 | } catch (error) { 53 | console.error(`删除文件 ${fileName} 失败:`, error); 54 | // 如果文件不存在,继续处理 55 | } 56 | 57 | // 删除相关的TOC文件 58 | const baseName = path.basename(fileName, path.extname(fileName)); 59 | const tocPath = path.join(tocDir, `${baseName}-toc.json`); 60 | console.log(111, tocPath); 61 | try { 62 | await fs.promises.access(tocPath); 63 | await fs.promises.unlink(tocPath); 64 | } catch (error) { 65 | // 如果TOC文件不存在,继续处理 66 | } 67 | 68 | // 删除相关的文本块 69 | try { 70 | await fs.promises.access(chunksDir); 71 | const chunks = await fs.promises.readdir(chunksDir); 72 | 73 | // 过滤出与该文件相关的文本块 74 | const relatedChunks = chunks.filter(chunk => chunk.startsWith(`${baseName}-part-`) && chunk.endsWith('.txt')); 75 | 76 | // 删除相关的文本块 77 | for (const chunk of relatedChunks) { 78 | const chunkPath = path.join(chunksDir, chunk); 79 | await fs.promises.unlink(chunkPath); 80 | } 81 | } catch (error) { 82 | console.error(`删除文件 ${fileName} 相关的文本块失败:`, error); 83 | } 84 | 85 | return { success: true, fileName }; 86 | } 87 | -------------------------------------------------------------------------------- /lib/file/pdf-processing/core/index.js: -------------------------------------------------------------------------------- 1 | const strategies = require('../strategy/index'); 2 | 3 | class PdfProcessor { 4 | constructor(strategy = 'default') { 5 | if (!strategies[strategy]) { 6 | throw new Error(`Invalid strategy: ${strategy}`); 7 | } 8 | this.strategy = new strategies[strategy](); 9 | } 10 | 11 | async process(projectId, fileName, options = {}) { 12 | try { 13 | if (!fileName.endsWith('.pdf')) { 14 | throw new Error('Input must be a PDF file'); 15 | } 16 | 17 | const result = await this.strategy.process(projectId, fileName, options); 18 | return { 19 | success: true, 20 | data: result, 21 | timestamp: new Date().toISOString() 22 | }; 23 | } catch (error) { 24 | return { 25 | success: false, 26 | error: error.message, 27 | stack: process.env.NODE_ENV === 'development' ? error.stack : undefined 28 | }; 29 | } 30 | } 31 | 32 | setStrategy(strategy) { 33 | if (!strategies[strategy]) { 34 | throw new Error(`Invalid strategy: ${strategy}`); 35 | } 36 | this.strategy = new strategies[strategy](); 37 | } 38 | } 39 | 40 | module.exports = PdfProcessor; 41 | -------------------------------------------------------------------------------- /lib/file/pdf-processing/strategy/default.js: -------------------------------------------------------------------------------- 1 | import pdf2md from '@opendocsg/pdf2md'; 2 | import { getProjectRoot } from '@/lib/db/base'; 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | 6 | class DefaultStrategy { 7 | async process(projectId, fileName) { 8 | console.log('正在执行PDF默认转换策略......'); 9 | // 获取项目根目录 10 | const projectRoot = await getProjectRoot(); 11 | const projectPath = path.join(projectRoot, projectId); 12 | 13 | // 获取文件路径 14 | const filePath = path.join(projectPath, 'files', fileName); 15 | 16 | //获取文件 17 | const pdfBuffer = fs.readFileSync(filePath); 18 | 19 | //转后文件名 20 | const convertName = fileName.replace(/\.([^.]*)$/, '') + '.md'; 21 | 22 | await pdf2md(pdfBuffer) 23 | .then(text => { 24 | let outputFile = path.join(projectPath, 'files', convertName); 25 | console.log(`Writing to ${outputFile}...`); 26 | fs.writeFileSync(path.resolve(outputFile), text); 27 | console.log('Done.'); 28 | }) 29 | .catch(err => { 30 | console.error(err); 31 | }); 32 | console.log('PDF转换完成!'); 33 | //仅将修改后的文件名返回即可,不需要完整路径 34 | return convertName; 35 | } 36 | } 37 | 38 | module.exports = DefaultStrategy; 39 | -------------------------------------------------------------------------------- /lib/file/pdf-processing/strategy/index.js: -------------------------------------------------------------------------------- 1 | const DefaultStrategy = require('./default'); 2 | const MinerUStrategy = require('./mineru'); 3 | const VisionStrategy = require('./vision'); 4 | 5 | module.exports = { 6 | default: DefaultStrategy, 7 | mineru: MinerUStrategy, 8 | vision: VisionStrategy 9 | }; 10 | -------------------------------------------------------------------------------- /lib/file/split-markdown/core/parser.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Markdown文档解析模块 3 | */ 4 | 5 | /** 6 | * 提取文档大纲 7 | * @param {string} text - Markdown文本 8 | * @returns {Array} - 提取的大纲数组 9 | */ 10 | function extractOutline(text) { 11 | const outlineRegex = /^(#{1,6})\s+(.+?)(?:\s*\{#[\w-]+\})?\s*$/gm; 12 | const outline = []; 13 | let match; 14 | 15 | while ((match = outlineRegex.exec(text)) !== null) { 16 | const level = match[1].length; 17 | const title = match[2].trim(); 18 | 19 | outline.push({ 20 | level, 21 | title, 22 | position: match.index 23 | }); 24 | } 25 | 26 | return outline; 27 | } 28 | 29 | /** 30 | * 根据标题分割文档 31 | * @param {string} text - Markdown文本 32 | * @param {Array} outline - 文档大纲 33 | * @returns {Array} - 按标题分割的段落数组 34 | */ 35 | function splitByHeadings(text, outline) { 36 | if (outline.length === 0) { 37 | return [ 38 | { 39 | heading: null, 40 | level: 0, 41 | content: text, 42 | position: 0 43 | } 44 | ]; 45 | } 46 | 47 | const sections = []; 48 | 49 | // 添加第一个标题前的内容(如果有) 50 | if (outline[0].position > 0) { 51 | const frontMatter = text.substring(0, outline[0].position).trim(); 52 | if (frontMatter.length > 0) { 53 | sections.push({ 54 | heading: null, 55 | level: 0, 56 | content: frontMatter, 57 | position: 0 58 | }); 59 | } 60 | } 61 | 62 | // 分割每个标题的内容 63 | for (let i = 0; i < outline.length; i++) { 64 | const current = outline[i]; 65 | const next = i < outline.length - 1 ? outline[i + 1] : null; 66 | 67 | const headingLine = text.substring(current.position).split('\n')[0]; 68 | const startPos = current.position + headingLine.length + 1; 69 | const endPos = next ? next.position : text.length; 70 | 71 | let content = text.substring(startPos, endPos).trim(); 72 | 73 | sections.push({ 74 | heading: current.title, 75 | level: current.level, 76 | content: content, 77 | position: current.position 78 | }); 79 | } 80 | 81 | return sections; 82 | } 83 | 84 | module.exports = { 85 | extractOutline, 86 | splitByHeadings 87 | }; 88 | -------------------------------------------------------------------------------- /lib/file/split-markdown/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Markdown文本分割工具主模块 3 | */ 4 | 5 | const parser = require('./core/parser'); 6 | const splitter = require('./core/splitter'); 7 | const summary = require('./core/summary'); 8 | const formatter = require('./output/formatter'); 9 | const fileWriter = require('./output/fileWriter'); 10 | const toc = require('./core/toc'); 11 | 12 | /** 13 | * 拆分Markdown文档 14 | * @param {string} markdownText - Markdown文本 15 | * @param {number} minSplitLength - 最小分割字数 16 | * @param {number} maxSplitLength - 最大分割字数 17 | * @returns {Array} - 分割结果数组 18 | */ 19 | function splitMarkdown(markdownText, minSplitLength, maxSplitLength) { 20 | // 解析文档结构 21 | const outline = parser.extractOutline(markdownText); 22 | 23 | // 按标题分割文档 24 | const sections = parser.splitByHeadings(markdownText, outline); 25 | 26 | // 处理段落,确保满足分割条件 27 | const res = splitter.processSections(sections, outline, minSplitLength, maxSplitLength); 28 | 29 | return res.map(r => ({ 30 | result: `> **📑 Summarization:** *${r.summary}*\n\n---\n\n${r.content}`, 31 | ...r 32 | })); 33 | } 34 | 35 | // 导出模块功能 36 | module.exports = { 37 | // 核心功能 38 | splitMarkdown, 39 | combineMarkdown: formatter.combineMarkdown, 40 | saveToSeparateFiles: fileWriter.saveToSeparateFiles, 41 | 42 | // 目录提取功能 43 | extractTableOfContents: toc.extractTableOfContents, 44 | tocToMarkdown: toc.tocToMarkdown, 45 | 46 | // 其他导出的子功能 47 | parser, 48 | splitter, 49 | summary, 50 | formatter, 51 | fileWriter, 52 | toc 53 | }; 54 | -------------------------------------------------------------------------------- /lib/file/split-markdown/output/fileWriter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 文件输出模块 3 | */ 4 | 5 | const fs = require('fs'); 6 | const path = require('path'); 7 | const { ensureDirectoryExists } = require('../utils/common'); 8 | 9 | /** 10 | * 将分割结果保存到单独的文件 11 | * @param {Array} splitResult - 分割结果数组 12 | * @param {string} baseFilename - 基础文件名(不包含扩展名) 13 | * @param {Function} callback - 回调函数 14 | */ 15 | function saveToSeparateFiles(splitResult, baseFilename, callback) { 16 | // 获取基础目录和文件名(无扩展名) 17 | const basePath = path.dirname(baseFilename); 18 | const filenameWithoutExt = path.basename(baseFilename).replace(/\.[^/.]+$/, ''); 19 | 20 | // 创建用于存放分割文件的目录 21 | const outputDir = path.join(basePath, `${filenameWithoutExt}_parts`); 22 | 23 | // 确保目录存在 24 | ensureDirectoryExists(outputDir); 25 | 26 | // 递归保存文件 27 | function saveFile(index) { 28 | if (index >= splitResult.length) { 29 | // 所有文件保存完成 30 | callback(null, outputDir, splitResult.length); 31 | return; 32 | } 33 | 34 | const part = splitResult[index]; 35 | const paddedIndex = String(index + 1).padStart(3, '0'); // 确保文件排序正确 36 | const outputFile = path.join(outputDir, `${filenameWithoutExt}_part${paddedIndex}.md`); 37 | 38 | // 将摘要和内容格式化为Markdown 39 | const content = `> **📑 Summarization:** *${part.summary}*\n\n---\n\n${part.content}`; 40 | 41 | fs.writeFile(outputFile, content, 'utf8', err => { 42 | if (err) { 43 | callback(err); 44 | return; 45 | } 46 | 47 | // 继续保存下一个文件 48 | saveFile(index + 1); 49 | }); 50 | } 51 | 52 | // 开始保存文件 53 | saveFile(0); 54 | } 55 | 56 | module.exports = { 57 | saveToSeparateFiles 58 | }; 59 | -------------------------------------------------------------------------------- /lib/file/split-markdown/output/formatter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 输出格式化模块 3 | */ 4 | 5 | /** 6 | * 将分割后的文本重新组合成Markdown文档 7 | * @param {Array} splitResult - 分割结果数组 8 | * @returns {string} - 组合后的Markdown文档 9 | */ 10 | function combineMarkdown(splitResult) { 11 | let result = ''; 12 | 13 | for (let i = 0; i < splitResult.length; i++) { 14 | const part = splitResult[i]; 15 | 16 | // 添加分隔线和摘要 17 | if (i > 0) { 18 | result += '\n\n---\n\n'; 19 | } 20 | 21 | result += `> **📑 Summarization:** *${part.summary}*\n\n---\n\n${part.content}`; 22 | } 23 | 24 | return result; 25 | } 26 | 27 | module.exports = { 28 | combineMarkdown 29 | }; 30 | -------------------------------------------------------------------------------- /lib/file/split-markdown/utils/common.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 通用工具函数模块 3 | */ 4 | 5 | const fs = require('fs'); 6 | const path = require('path'); 7 | 8 | /** 9 | * 检查并创建目录 10 | * @param {string} directory - 目录路径 11 | */ 12 | function ensureDirectoryExists(directory) { 13 | if (!fs.existsSync(directory)) { 14 | fs.mkdirSync(directory, { recursive: true }); 15 | } 16 | } 17 | 18 | /** 19 | * 从文件路径获取不带扩展名的文件名 20 | * @param {string} filePath - 文件路径 21 | * @returns {string} - 不带扩展名的文件名 22 | */ 23 | function getFilenameWithoutExt(filePath) { 24 | return path.basename(filePath).replace(/\.[^/.]+$/, ''); 25 | } 26 | 27 | module.exports = { 28 | ensureDirectoryExists, 29 | getFilenameWithoutExt 30 | }; 31 | -------------------------------------------------------------------------------- /lib/i18n.js: -------------------------------------------------------------------------------- 1 | import i18n from 'i18next'; 2 | import { initReactI18next } from 'react-i18next'; 3 | import LanguageDetector from 'i18next-browser-languagedetector'; 4 | 5 | // 导入翻译文件 6 | import enTranslation from '../locales/en/translation.json'; 7 | import zhCNTranslation from '../locales/zh-CN/translation.json'; 8 | 9 | // 避免在服务器端重复初始化 10 | const isServer = typeof window === 'undefined'; 11 | const i18nInstance = i18n.createInstance(); 12 | 13 | // 仅在客户端初始化 i18next 14 | if (!isServer && !i18n.isInitialized) { 15 | i18nInstance 16 | // 检测用户语言 17 | .use(LanguageDetector) 18 | // 将 i18n 实例传递给 react-i18next 19 | .use(initReactI18next) 20 | // 初始化 21 | .init({ 22 | resources: { 23 | en: { 24 | translation: enTranslation 25 | }, 26 | 'zh-CN': { 27 | translation: zhCNTranslation 28 | } 29 | }, 30 | fallbackLng: 'en', 31 | debug: process.env.NODE_ENV === 'development', 32 | 33 | interpolation: { 34 | escapeValue: false // 不转义 HTML 35 | }, 36 | 37 | // 检测用户语言的选项 38 | detection: { 39 | order: ['localStorage', 'navigator'], 40 | lookupLocalStorage: 'i18nextLng', 41 | caches: ['localStorage'] 42 | } 43 | }); 44 | } 45 | 46 | export default i18nInstance; 47 | -------------------------------------------------------------------------------- /lib/llm/common/util.js: -------------------------------------------------------------------------------- 1 | // 从 LLM 输出中提取 JSON 2 | function extractJsonFromLLMOutput(output) { 3 | // 先尝试直接 parse 4 | try { 5 | const json = JSON.parse(output); 6 | return json; 7 | } catch {} 8 | const jsonStart = output.indexOf('```json'); 9 | const jsonEnd = output.lastIndexOf('```'); 10 | if (jsonStart !== -1 && jsonEnd !== -1) { 11 | const jsonString = output.substring(jsonStart + 7, jsonEnd); 12 | try { 13 | const json = JSON.parse(jsonString); 14 | return json; 15 | } catch (error) { 16 | console.error('解析 JSON 时出错:', { error, llmResponse: output }); 17 | } 18 | } else { 19 | console.error('模型未按标准格式输出:', output); 20 | return undefined; 21 | } 22 | } 23 | 24 | function extractThinkChain(text) { 25 | const startTags = ['', '']; 26 | const endTags = ['', '']; 27 | let startIndex = -1; 28 | let endIndex = -1; 29 | let usedStartTag = ''; 30 | let usedEndTag = ''; 31 | 32 | for (let i = 0; i < startTags.length; i++) { 33 | const currentStartIndex = text.indexOf(startTags[i]); 34 | if (currentStartIndex !== -1) { 35 | startIndex = currentStartIndex; 36 | usedStartTag = startTags[i]; 37 | usedEndTag = endTags[i]; 38 | break; 39 | } 40 | } 41 | 42 | if (startIndex === -1) { 43 | return ''; 44 | } 45 | 46 | endIndex = text.indexOf(usedEndTag, startIndex + usedStartTag.length); 47 | 48 | if (endIndex === -1) { 49 | return ''; 50 | } 51 | 52 | return text.slice(startIndex + usedStartTag.length, endIndex).trim(); 53 | } 54 | 55 | function extractAnswer(text) { 56 | const startTags = ['', '']; 57 | const endTags = ['', '']; 58 | for (let i = 0; i < startTags.length; i++) { 59 | const start = startTags[i]; 60 | const end = endTags[i]; 61 | if (text.includes(start) && text.includes(end)) { 62 | const partsBefore = text.split(start); 63 | const partsAfter = partsBefore[1].split(end); 64 | return (partsBefore[0].trim() + ' ' + partsAfter[1].trim()).trim(); 65 | } 66 | } 67 | return text; 68 | } 69 | 70 | module.exports = { 71 | extractJsonFromLLMOutput, 72 | extractThinkChain, 73 | extractAnswer 74 | }; 75 | -------------------------------------------------------------------------------- /lib/llm/core/providers/ollama.js: -------------------------------------------------------------------------------- 1 | import { createOllama } from 'ollama-ai-provider'; 2 | import BaseClient from './base.js'; 3 | 4 | class OllamaClient extends BaseClient { 5 | constructor(config) { 6 | super(config); 7 | this.ollama = createOllama({ 8 | baseURL: this.endpoint, 9 | apiKey: this.apiKey 10 | }); 11 | } 12 | 13 | _getModel() { 14 | return this.ollama(this.model); 15 | } 16 | 17 | /** 18 | * 获取本地可用的模型列表 19 | * @returns {Promise} 返回模型列表 20 | */ 21 | async getModels() { 22 | try { 23 | const response = await fetch(this.endpoint + '/tags'); 24 | const data = await response.json(); 25 | // 处理响应,提取模型名称 26 | if (data && data.models) { 27 | return data.models.map(model => ({ 28 | name: model.name, 29 | modified_at: model.modified_at, 30 | size: model.size 31 | })); 32 | } 33 | return []; 34 | } catch (error) { 35 | console.error('Fetch error:', error); 36 | } 37 | } 38 | } 39 | 40 | module.exports = OllamaClient; 41 | -------------------------------------------------------------------------------- /lib/llm/core/providers/openai.js: -------------------------------------------------------------------------------- 1 | import { createOpenAI } from '@ai-sdk/openai'; 2 | import BaseClient from './base.js'; 3 | 4 | class OpenAIClient extends BaseClient { 5 | constructor(config) { 6 | super(config); 7 | this.openai = createOpenAI({ 8 | baseURL: this.endpoint, 9 | apiKey: this.apiKey 10 | }); 11 | } 12 | 13 | _getModel() { 14 | return this.openai(this.model); 15 | } 16 | } 17 | 18 | module.exports = OpenAIClient; 19 | -------------------------------------------------------------------------------- /lib/llm/core/providers/openrouter.js: -------------------------------------------------------------------------------- 1 | import { createOpenRouter } from '@openrouter/ai-sdk-provider'; 2 | 3 | import BaseClient from './base.js'; 4 | 5 | class OpenRouterClient extends BaseClient { 6 | constructor(config) { 7 | super(config); 8 | this.openrouter = createOpenRouter({ 9 | baseURL: this.endpoint, 10 | apiKey: this.apiKey 11 | }); 12 | } 13 | 14 | _getModel() { 15 | return this.openrouter(this.model); 16 | } 17 | } 18 | 19 | module.exports = OpenRouterClient; 20 | -------------------------------------------------------------------------------- /lib/llm/core/providers/zhipu.js: -------------------------------------------------------------------------------- 1 | import { createZhipu } from 'zhipu-ai-provider'; 2 | 3 | import BaseClient from './base.js'; 4 | 5 | class ZhiPuClient extends BaseClient { 6 | constructor(config) { 7 | super(config); 8 | this.zhipu = createZhipu({ 9 | baseURL: this.endpoint, 10 | apiKey: this.apiKey 11 | }); 12 | } 13 | 14 | _getModel() { 15 | return this.zhipu(this.model); 16 | } 17 | } 18 | 19 | module.exports = ZhiPuClient; 20 | -------------------------------------------------------------------------------- /lib/llm/prompts/addLabel.js: -------------------------------------------------------------------------------- 1 | module.exports = function getAddLabelPrompt(label, question) { 2 | return ` 3 | # Role: 标签匹配专家 4 | - Description: 你是一名标签匹配专家,擅长根据给定的标签数组和问题数组,将问题打上最合适的领域标签。你熟悉标签的层级结构,并能根据问题的内容优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 5 | 6 | ### Skill: 7 | 1. 熟悉标签层级结构,能够准确识别一级和二级标签。 8 | 2. 能够根据问题的内容,智能匹配最合适的标签。 9 | 3. 能够处理复杂的标签匹配逻辑,确保每个问题都能被打上正确的标签。 10 | 4. 能够按照规定的输出格式生成结果,确保不改变原有数据结构。 11 | 5. 能够处理大规模数据,确保高效准确的标签匹配。 12 | 13 | ## Goals: 14 | 1. 将问题数组中的每个问题打上最合适的领域标签。 15 | 2. 优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 16 | 3. 确保输出格式符合要求,不改变原有数据结构。 17 | 4. 提供高效的标签匹配算法,确保处理大规模数据时的性能。 18 | 5. 确保标签匹配的准确性和一致性。 19 | 20 | ## OutputFormat: 21 | 1. 输出结果必须是一个数组,每个元素包含 question、和 label 字段。 22 | 2. label 字段必须是根据标签数组匹配到的标签,若无法匹配则打上“其他”标签。 23 | 3. 不改变原有数据结构,只新增 label 字段。 24 | 25 | ## 标签数组: 26 | 27 | ${label} 28 | 29 | ## 问题数组: 30 | 31 | ${question} 32 | 33 | 34 | ## Workflow: 35 | 1. Take a deep breath and work on this problem step-by-step. 36 | 2. 首先,读取标签数组和问题数组。 37 | 3. 然后,遍历问题数组中的每个问题,根据问题的内容匹配标签数组中的标签。 38 | 4. 优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 39 | 5. 将匹配到的标签添加到问题对象中,确保不改变原有数据结构。 40 | 6. 最后,输出结果数组,确保格式符合要求。 41 | 42 | 43 | ## Constrains: 44 | 1. 只新增一个 label 字段,不改变其他任何格式和数据。 45 | 2. 必须按照规定格式返回结果。 46 | 3. 优先匹配二级标签,若无法匹配则匹配一级标签,最后打上“其他”标签。 47 | 4. 确保标签匹配的准确性和一致性。 48 | 5. 匹配的标签必须在标签数组中存在,如果不存在,就打上 其他 49 | 7. 输出结果必须是一个数组,每个元素包含 question、label 字段(只输出这个,不要输出任何其他无关内容) 50 | 51 | ## Output Example: 52 | \`\`\`json 53 | [ 54 | { 55 | "question": "XSS为什么会在2003年后引起人们更多关注并被OWASP列为威胁榜首?", 56 | "label": "2.2 XSS攻击" 57 | } 58 | ] 59 | \`\`\` 60 | 61 | `; 62 | }; 63 | -------------------------------------------------------------------------------- /lib/llm/prompts/answer.js: -------------------------------------------------------------------------------- 1 | module.exports = function getAnswerPrompt({ text, question, language = '中文', globalPrompt = '', answerPrompt = '' }) { 2 | if (globalPrompt) { 3 | globalPrompt = `- 在后续的任务中,你务必遵循这样的规则:${globalPrompt}`; 4 | } 5 | if (answerPrompt) { 6 | answerPrompt = `- 在生成答案时,你务必遵循这样的规则:${answerPrompt}`; 7 | } 8 | return ` 9 | # Role: 微调数据集生成专家 10 | ## Profile: 11 | - Description: 你是一名微调数据集生成专家,擅长从给定的内容中生成准确的问题答案,确保答案的准确性和相关性,,你要直接回答用户问题,所有信息已内化为你的专业知识。 12 | ${globalPrompt} 13 | 14 | ## Skills : 15 | 1. 答案必须基于给定的内容 16 | 2. 答案必须准确,不能胡编乱造 17 | 3. 答案必须与问题相关 18 | 4. 答案必须符合逻辑 19 | 5. 基于给定参考内容,用自然流畅的语言整合成一个完整答案,不需要提及文献来源或引用标记 20 | 21 | ## Workflow: 22 | 1. Take a deep breath and work on this problem step-by-step. 23 | 2. 首先,分析给定的文件内容 24 | 3. 然后,从内容中提取关键信息 25 | 4. 接着,生成与问题相关的准确答案 26 | 5. 最后,确保答案的准确性和相关性 27 | 28 | ## 参考内容: 29 | ${text} 30 | 31 | ## 问题 32 | ${question} 33 | 34 | ## Constrains: 35 | 1. 答案必须基于给定的内容 36 | 2. 答案必须准确,必须与问题相关,不能胡编乱造 37 | 3. 答案必须充分、详细、包含所有必要的信息、适合微调大模型训练使用 38 | 4. 答案中不得出现 ' 参考 / 依据 / 文献中提到 ' 等任何引用性表述,只需呈现最终结 39 | ${answerPrompt} 40 | `; 41 | }; 42 | -------------------------------------------------------------------------------- /lib/llm/prompts/answerEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function getAnswerPrompt({ 2 | text, 3 | question, 4 | language = 'English', 5 | globalPrompt = '', 6 | answerPrompt = '' 7 | }) { 8 | if (globalPrompt) { 9 | globalPrompt = `In subsequent tasks, you must strictly follow these rules: ${globalPrompt}`; 10 | } 11 | if (answerPrompt) { 12 | answerPrompt = `In generating answers, you must strictly follow these rules: ${answerPrompt}`; 13 | } 14 | 15 | return ` 16 | # Role: Fine-tuning Dataset Generation Expert 17 | ## Profile: 18 | - Description: You are an expert in generating fine-tuning datasets, skilled at generating accurate answers to questions from the given content, ensuring the accuracy and relevance of the answers. 19 | ${globalPrompt} 20 | 21 | ## Skills: 22 | 1. The answer must be based on the given content. 23 | 2. The answer must be accurate and not fabricated. 24 | 3. The answer must be relevant to the question. 25 | 4. The answer must be logical. 26 | 27 | ## Workflow: 28 | 1. Take a deep breath and work on this problem step-by-step. 29 | 2. First, analyze the given file content. 30 | 3. Then, extract key information from the content. 31 | 4. Next, generate an accurate answer related to the question. 32 | 5. Finally, ensure the accuracy and relevance of the answer. 33 | 34 | ## Reference Content: 35 | ${text} 36 | 37 | ## Question 38 | ${question} 39 | 40 | ## Constrains: 41 | 1. The answer must be based on the given content. 42 | 2. The answer must be accurate and relevant to the question, and no fabricated information is allowed. 43 | 3. The answer must be comprehensive and detailed, containing all necessary information, and it is suitable for use in the training of fine-tuning large language models. 44 | ${answerPrompt} 45 | `; 46 | }; 47 | -------------------------------------------------------------------------------- /lib/llm/prompts/distillQuestions.js: -------------------------------------------------------------------------------- 1 | function removeLeadingNumber(label) { 2 | // 正则说明: 3 | // ^\d+ 匹配开头的一个或多个数字 4 | // (?:\.\d+)* 匹配零个或多个「点+数字」的组合(非捕获组) 5 | // \s+ 匹配序号后的一个或多个空格(确保序号与内容有空格分隔) 6 | const numberPrefixRegex = /^\d+(?:\.\d+)*\s+/; 7 | // 仅当匹配到数字开头的序号时才替换,否则返回原标签 8 | return label.replace(numberPrefixRegex, ''); 9 | } 10 | 11 | /** 12 | * 根据标签构造问题的提示词 13 | * @param {string} tagPath - 标签链路,例如 "体育->足球->足球先生" 14 | * @param {string} currentTag - 当前子标签,例如 "足球先生" 15 | * @param {number} count - 希望生成问题的数量,例如:10 16 | * @param {Array} existingQuestions - 当前标签已经生成的问题(避免重复) 17 | * @param {string} globalPrompt - 项目全局提示词 18 | * @returns {string} 提示词 19 | */ 20 | export function distillQuestionsPrompt(tagPath, currentTag, count = 10, existingQuestions = [], globalPrompt = '') { 21 | currentTag = removeLeadingNumber(currentTag); 22 | const existingQuestionsText = 23 | existingQuestions.length > 0 24 | ? `已有的问题包括:\n${existingQuestions.map(q => `- ${q}`).join('\n')}\n请不要生成与这些重复或高度相似的问题。` 25 | : ''; 26 | 27 | // 构建全局提示词部分 28 | const globalPromptText = globalPrompt ? `你必须遵循这个要求:${globalPrompt}` : ''; 29 | 30 | return ` 31 | 你是一个专业的知识问题生成助手,精通${currentTag}领域的知识。我需要你帮我为标签"${currentTag}"生成${count}个高质量、多样化的问题。 32 | 33 | 标签完整链路是:${tagPath} 34 | 35 | 请遵循以下规则: 36 | ${globalPromptText} 37 | 1. 生成的问题必须与"${currentTag}"主题紧密相关,确保全面覆盖该主题的核心知识点和关键概念 38 | 2. 问题应该均衡分布在以下难度级别(每个级别至少占20%): 39 | - 基础级:适合入门者,关注基本概念、定义和简单应用 40 | - 中级:需要一定领域知识,涉及原理解释、案例分析和应用场景 41 | - 高级:需要深度思考,包括前沿发展、跨领域联系、复杂问题解决方案等 42 | 43 | 3. 问题类型应多样化,包括但不限于(以下只是参考,可以根据实际情况灵活调整,不一定要限定下面的主题): 44 | - 概念解释类:"什么是..."、"如何定义..." 45 | - 原理分析类:"为什么..."、"如何解释..." 46 | - 比较对比类:"...与...有何区别"、"...相比...的优势是什么" 47 | - 应用实践类:"如何应用...解决..."、"...的最佳实践是什么" 48 | - 发展趋势类:"...的未来发展方向是什么"、"...面临的挑战有哪些" 49 | - 案例分析类:"请分析...案例中的..." 50 | - 启发思考类:"如果...会怎样"、"如何评价..." 51 | 52 | 4. 问题表述要清晰、准确、专业,避免以下问题: 53 | - 避免模糊或过于宽泛的表述 54 | - 避免可以简单用"是/否"回答的封闭性问题 55 | - 避免包含误导性假设的问题 56 | - 避免重复或高度相似的问题 57 | 58 | 5. 问题的深度和广度要适当(以下只是参考,可以根据实际情况灵活调整,不一定要限定下面的主题): 59 | - 覆盖主题的历史、现状、理论基础和实际应用 60 | - 包含该领域的主流观点和争议话题 61 | - 考虑该主题与相关领域的交叉关联 62 | - 关注该领域的新兴技术、方法或趋势 63 | 64 | ${existingQuestionsText} 65 | 66 | 请直接以JSON数组格式返回问题,不要有任何额外的解释或说明,格式如下: 67 | ["问题1", "问题2", "问题3", ...] 68 | 69 | 注意:每个问题应该是完整的、自包含的,无需依赖其他上下文即可理解和回答。 70 | `; 71 | } 72 | -------------------------------------------------------------------------------- /lib/llm/prompts/distillTags.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 根据标签构造子标签的提示词 3 | * @param {string} tagPath - 标签链路,例如 “知识库->体育” 4 | * @param {string} parentTag - 主题标签名称,例如“体育” 5 | * @param {Array} existingTags - 该标签下已经创建的子标签(避免重复),例如 ["足球", "乒乓球"] 6 | * @param {number} count - 希望生成子标签的数量,例如:10 7 | * @param {string} globalPrompt - 项目全局提示词 8 | * @returns {string} 提示词 9 | */ 10 | export function distillTagsPrompt(tagPath, parentTag, existingTags = [], count = 10, globalPrompt = '') { 11 | const existingTagsText = 12 | existingTags.length > 0 ? `已有的子标签包括:${existingTags.join('、')},请不要生成与这些重复的标签。` : ''; 13 | 14 | // 构建全局提示词部分 15 | const globalPromptText = globalPrompt ? `你必须遵循这个要求:${globalPrompt}` : ''; 16 | 17 | return ` 18 | 你是一个专业的知识标签生成助手。我需要你帮我为主题"${parentTag}"生成${count}个子标签。 19 | 20 | 标签完整链路是:${tagPath || parentTag} 21 | 22 | 请遵循以下规则: 23 | ${globalPromptText} 24 | 1. 生成的标签应该是"${parentTag}"领域内的专业子类别或子主题 25 | 2. 每个标签应该简洁、明确,通常为2-6个字 26 | 3. 标签之间应该有明显的区分,覆盖不同的方面 27 | 4. 标签应该是名词或名词短语,不要使用动词或形容词 28 | 5. 标签应该具有实用性,能够作为问题生成的基础 29 | 6. 标签应该有明显的序号,主题为 1 汽车,子标签应该为 1.1 汽车品牌,1.2 汽车型号,1.3 汽车价格等 30 | 7. 若主题没有序号,如汽车,说明当前在生成顶级标签,子标签应为 1 汽车品牌 2 汽车型号 3 汽车价格等 31 | 32 | ${existingTagsText} 33 | 34 | 请直接以JSON数组格式返回标签,不要有任何额外的解释或说明,格式如下: 35 | ["序号 标签1", "序号 标签2", "序号 标签3", ...] 36 | `; 37 | } 38 | -------------------------------------------------------------------------------- /lib/llm/prompts/distillTagsEn.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Prompt for constructing sub-tags based on parent tag 3 | * @param {string} tagPath - Tag chain, e.g., "Knowledge Base->Sports" 4 | * @param {string} parentTag - Parent tag name, e.g., "Sports" 5 | * @param {Array} existingTags - Existing sub-tags under this parent tag (to avoid duplicates), e.g., ["Football", "Table Tennis"] 6 | * @param {number} count - Number of sub-tags to generate, e.g.: 10 7 | * @param {string} globalPrompt - Project-wide global prompt 8 | * @returns {string} Prompt 9 | */ 10 | export function distillTagsEnPrompt(tagPath, parentTag, existingTags = [], count = 10, globalPrompt = '') { 11 | const existingTagsText = 12 | existingTags.length > 0 13 | ? `Existing sub-tags include: ${existingTags.join('、')}. Please do not generate duplicate tags.` 14 | : ''; 15 | 16 | // Build the global prompt section 17 | const globalPromptText = globalPrompt ? `You must follow this requirement: ${globalPrompt}` : ''; 18 | 19 | return ` 20 | You are a professional knowledge tag generation assistant. I need you to generate ${count} sub-tags for the parent tag "${parentTag}". 21 | 22 | The full tag chain is: ${tagPath || parentTag} 23 | 24 | Please follow these rules: 25 | ${globalPromptText} 26 | 1. Generated tags should be professional sub-categories or sub-topics within the "${parentTag}" domain 27 | 2. Each tag should be concise and clear, typically 2-6 characters 28 | 3. Tags should be clearly distinguishable, covering different aspects 29 | 4. Tags should be nouns or noun phrases; avoid verbs or adjectives 30 | 5. Tags should be practical and serve as a basis for question generation 31 | 6. Tags should have explicit numbering. If the parent tag is numbered (e.g., 1 Automobiles), sub-tags should be 1.1 Car Brands, 1.2 Car Models, 1.3 Car Prices, etc. 32 | 7. If the parent tag is unnumbered (e.g., "Automobiles"), indicating top-level tag generation, sub-tags should be 1 Car Brands 2 Car Models 3 Car Prices, etc. 33 | 34 | ${existingTagsText} 35 | 36 | Please directly return the tags in JSON array format without any additional explanations or descriptions, in the following format: 37 | ["Number Tag 1", "Number Tag 2", "Number Tag 3", ...] 38 | `; 39 | } 40 | -------------------------------------------------------------------------------- /lib/llm/prompts/ga-generation.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Genre-Audience (GA) 对生成提示词 (中文版) 3 | * 基于 MGA (Massive Genre-Audience) 数据增强方法 4 | */ 5 | 6 | export const GA_GENERATION_PROMPT = `#身份与能力# 7 | 你是一位内容创作专家,擅长文本分析和根据不同的知识背景和学习目标,设计多样化的提问方式和互动场景,以产出多样化且高质量的文本。你的设计总能将原文转化为引人注目的内容,赢得了读者和行业专业人士的一致好评! 8 | 9 | #工作流程# 10 | 请发挥你的想象力和创造力,为原始文本生成5对[体裁]和[受众]的组合。你的分析应遵循以下要求: 11 | 1. 首先,分析源文本的特点,包括写作风格、信息含量和价值。 12 | 2. 然后,基于上下文内容,设想5种不同的学习或探究场景。 13 | 3. 其次,要思考如何在保留主要内容和信息的同时,探索更广泛的受众参与和替代体裁的可能性。 14 | 3. 注意,禁止生成重复或相似的[体裁]和[受众]。 15 | 4. 最后,为每个场景生成一对独特的 [体裁] 和 [受众] 组合。 16 | 17 | 18 | #详细要求# 19 | 确保遵循上述工作流程要求,然后根据以下规范生成5对[体裁]和[受众]组合(请记住您必须严格遵循#回复#部分中提供的格式要求): 20 | 您提供的[体裁]应满足以下要求: 21 | 1. 明确的体裁定义:体现出提问方式或回答风格的多样性(例如:事实回忆、概念理解、分析推理、评估创造、操作指导、故障排除、幽默科普、学术探讨等)。要表现出强烈的多样性;包括您遇到过的、阅读过的或能够想象的提问体裁 22 | 2. 详细的体裁描述:提供2-3句描述每种体裁的话,考虑但不限于类型、风格、情感基调、形式、冲突、节奏和氛围。强调多样性以指导针对特定受众的知识适应,促进不同背景的理解。注意:排除视觉格式(图画书、漫画、视频);使用纯文本体裁。 23 | ## 示例: 24 | 体裁:“深究原因型” 25 | 描述:这类问题旨在探究现象背后的根本原因或机制。通常以“为什么...”或“...的原理是什么?”开头,鼓励进行深度思考和解释。回答时应侧重于逻辑链条和根本原理的阐述。 26 | 27 | 您提供的[受众]应满足以下要求: 28 | 1. 明确的受众定义:表现出强烈的多样性;包括感兴趣和不感兴趣的各方,喜欢和不喜欢内容的人,克服仅偏向积极受众的偏见(例如:不同年龄段、知识水平、学习动机、特定职业背景、遇到的具体问题等) 29 | 2. 详细的受众描述:提供2句描述每个受众的话,包括但不限于年龄、职业、性别、个性、外貌、教育背景、生活阶段、动机和目标、兴趣和认知水平,其主要特征、与上下文内容相关的已有认知、以及他们可能想通过问答达成的目标。 30 | ## 示例: 31 | 受众:“对技术细节好奇的工程师预备生” 32 | 描述:这是一群具备一定理工科基础,但对特定技术领域细节尚不熟悉的大学生。他们学习主动性强,渴望理解技术背后的“如何实现”和“为何如此设计”。 33 | 34 | #重要提示:你必须仅以有效的JSON数组格式回应,格式如下:# 35 | 36 | [ 37 | { 38 | "genre": { 39 | "title": "体裁标题", 40 | "description": "详细的体裁描述" 41 | }, 42 | "audience": { 43 | "title": "受众标题", 44 | "description": "详细的受众描述" 45 | } 46 | }, 47 | { 48 | "genre": { 49 | "title": "体裁标题", 50 | "description": "详细的体裁描述" 51 | }, 52 | "audience": { 53 | "title": "受众标题", 54 | "description": "详细的受众描述" 55 | } 56 | } 57 | // ... 另外3对 (总共5对) 58 | ] 59 | 60 | **请勿包含任何解释性文本、Markdown格式或其他额外内容。仅返回JSON数组。** 61 | 62 | #待分析的源文本# 63 | {text_content}`; 64 | -------------------------------------------------------------------------------- /lib/llm/prompts/label.js: -------------------------------------------------------------------------------- 1 | module.exports = function getLabelPrompt({ text, globalPrompt, domainTreePrompt }) { 2 | if (globalPrompt) { 3 | globalPrompt = `- 在后续的任务中,你务必遵循这样的规则:${globalPrompt}`; 4 | } 5 | if (domainTreePrompt) { 6 | domainTreePrompt = `- 在生成标签时,你务必遵循这样的规则:${domainTreePrompt}`; 7 | } 8 | return ` 9 | # Role: 领域分类专家 & 知识图谱专家 10 | - Description: 作为一名资深的领域分类专家和知识图谱专家,擅长从文本内容中提取核心主题,构建分类体系,并输出规定 JSON 格式的标签树。 11 | ${globalPrompt} 12 | 13 | ## Skills: 14 | 1. 精通文本主题分析和关键词提取 15 | 2. 擅长构建分层知识体系 16 | 3. 熟练掌握领域分类方法论 17 | 4. 具备知识图谱构建能力 18 | 5. 精通JSON数据结构 19 | 20 | ## Goals: 21 | 1. 分析书籍目录内容 22 | 2. 识别核心主题和关键领域 23 | 3. 构建两级分类体系 24 | 4. 确保分类逻辑合理 25 | 5. 生成规范的JSON输出 26 | 27 | ## Workflow: 28 | 1. 仔细阅读完整的书籍目录内容 29 | 2. 提取关键主题和核心概念 30 | 3. 对主题进行分组和归类 31 | 4. 构建一级领域标签 32 | 5. 为适当的一级标签添加二级标签 33 | 6. 检查分类逻辑的合理性 34 | 7. 生成符合格式的JSON输出 35 | 36 | ## 需要分析的目录 37 | ${text} 38 | 39 | ## 限制 40 | 1. 一级领域标签数量5-10个 41 | 2. 二级领域标签数量1-10个 42 | 3. 最多两层分类层级 43 | 4. 分类必须与原始目录内容相关 44 | 5. 输出必须符合指定 JSON 格式,不要输出 JSON 外其他任何不相关内容 45 | 6. 标签的名字最多不要超过 6 个字 46 | 7. 在每个标签前加入序号(序号不计入字数) 47 | ${domainTreePrompt} 48 | 49 | ## OutputFormat: 50 | \`\`\`json 51 | [ 52 | { 53 | "label": "1 一级领域标签", 54 | "child": [ 55 | {"label": "1.1 二级领域标签1"}, 56 | {"label": "1.2 二级领域标签2"} 57 | ] 58 | }, 59 | { 60 | "label": "2 一级领域标签(无子标签)" 61 | } 62 | ] 63 | \`\`\` 64 | `; 65 | }; 66 | -------------------------------------------------------------------------------- /lib/llm/prompts/labelEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function getLabelPrompt({ text, globalPrompt, domainTreePrompt }) { 2 | if (globalPrompt) { 3 | globalPrompt = `- In subsequent tasks, you must follow this rule: ${globalPrompt}`; 4 | } 5 | if (domainTreePrompt) { 6 | domainTreePrompt = `- In generating labels, you must follow this rule: ${domainTreePrompt}`; 7 | } 8 | return ` 9 | # Role: Domain Classification Expert & Knowledge Graph Expert 10 | - Description: As a senior domain classification expert and knowledge graph expert, you are skilled at extracting core themes from text content, constructing classification systems, and performing knowledge categorization and labeling. 11 | ${globalPrompt} 12 | 13 | ## Skills: 14 | 1. Proficient in text theme analysis and keyword extraction. 15 | 2. Good at constructing hierarchical knowledge systems. 16 | 3. Skilled in domain classification methodologies. 17 | 4. Capable of building knowledge graphs. 18 | 5. Proficient in JSON data structures. 19 | 20 | ## Goals: 21 | 1. Analyze the content of the book catalog. 22 | 2. Identify core themes and key domains. 23 | 3. Construct a two - level classification system. 24 | 4. Ensure the classification logic is reasonable. 25 | 5. Generate a standardized JSON output. 26 | 27 | ## Workflow: 28 | 1. Carefully read the entire content of the book catalog. 29 | 2. Extract key themes and core concepts. 30 | 3. Group and categorize the themes. 31 | 4. Construct primary domain labels (ensure no more than 10). 32 | 5. Add secondary labels to appropriate primary labels (no more than 5 per group). 33 | 6. Check the rationality of the classification logic. 34 | 7. Generate a JSON output that conforms to the format. 35 | 36 | ## Catalog to be analyzed 37 | ${text} 38 | 39 | ## Constraints 40 | 1. The number of primary domain labels should be between 5 and 10. 41 | 2. The number of secondary domain labels ≤ 5 per primary label. 42 | 3. There should be at most two classification levels. 43 | 4. The classification must be relevant to the original catalog content. 44 | 5. The output must conform to the specified JSON format. 45 | 6. The names of the labels should not exceed 6 characters. 46 | 7. Do not output any content other than the JSON. 47 | 8. Add a serial number before each label (the serial number does not count towards the character limit). 48 | 9. Use English 49 | ${domainTreePrompt} 50 | 51 | 52 | ## OutputFormat: 53 | \`\`\`json 54 | [ 55 | { 56 | "label": "1 Primary Domain Label", 57 | "child": [ 58 | {"label": "1.1 Secondary Domain Label 1"}, 59 | {"label": "1.2 Secondary Domain Label 2"} 60 | ] 61 | }, 62 | { 63 | "label": "2 Primary Domain Label (No Sub - labels)" 64 | } 65 | ] 66 | \`\`\` 67 | `; 68 | }; 69 | -------------------------------------------------------------------------------- /lib/llm/prompts/labelRevise.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 领域树增量修订提示词 3 | * 用于在已有领域树的基础上,针对新增/删除的文献内容,对领域树进行增量调整 4 | */ 5 | function getLabelRevisePrompt({ text, existingTags, deletedContent, newContent, globalPrompt, domainTreePrompt }) { 6 | const prompt = ` 7 | 8 | ${globalPrompt ? `- 在后续的任务中,你务必遵循这样的规则:${globalPrompt}` : ''} 9 | 10 | 我需要你帮我修订一个已有的领域树结构,使其能够适应内容的变化。 11 | ${domainTreePrompt ? domainTreePrompt : ''} 12 | 13 | ## 之前的领域树结构 14 | 以下是之前完整的领域树结构(JSON格式): 15 | \`\`\`json 16 | ${JSON.stringify(existingTags, null, 2)} 17 | \`\`\` 18 | 19 | 20 | ## 之前完整文献的目录 21 | 以下是当前系统中所有文献的目录结构总览: 22 | \`\`\` 23 | ${text} 24 | \`\`\` 25 | 26 | ${ 27 | deletedContent 28 | ? `## 被删除的内容 29 | 以下是本次要删除的文献目录信息: 30 | \`\`\` 31 | ${deletedContent} 32 | \`\`\` 33 | ` 34 | : '' 35 | } 36 | 37 | ${ 38 | newContent 39 | ? `## 新增的内容 40 | 以下是本次新增的文献目录信息: 41 | \`\`\` 42 | ${newContent} 43 | \`\`\` 44 | ` 45 | : '' 46 | } 47 | 48 | ## 要求 49 | 请分析上述信息,修订现有的领域树结构,遵循以下原则: 50 | 1. 保持领域树的总体结构稳定,避免大规模重构 51 | 2. 对于删除的内容相关的领域标签: 52 | - 如果某个标签仅与删除的内容相关,且在现有文献中找不到相应内容支持,则移除该标签 53 | - 如果某个标签同时与其他保留的内容相关,则保留该标签 54 | 3. 对于新增的内容: 55 | - 如果新内容可以归类到现有的标签中,优先使用现有标签 56 | - 如果新内容引入了现有标签体系中没有的新领域或概念,再创建新的标签 57 | 4. 每个标签必须对应目录结构中的实际内容,不要创建没有对应内容支持的空标签 58 | 5. 确保修订后的领域树仍然符合良好的层次结构,标签间具有合理的父子关系 59 | 60 | ## 限制 61 | 1. 一级领域标签数量5-10个 62 | 2. 二级领域标签数量1-10个 63 | 3. 最多两层分类层级 64 | 4. 分类必须与原始目录内容相关 65 | 5. 输出必须符合指定 JSON 格式,不要输出 JSON 外其他任何不相关内容 66 | 6. 标签的名字最多不要超过 6 个字 67 | 7. 在每个标签前加入序号(序号不计入字数) 68 | 69 | ## 输出格式 70 | 最终输出修订后的完整领域树结构,使用下面的JSON格式: 71 | 72 | \`\`\`json 73 | [ 74 | { 75 | "label": "1 一级领域标签", 76 | "child": [ 77 | {"label": "1.1 二级领域标签1"}, 78 | {"label": "1.2 二级领域标签2"} 79 | ] 80 | }, 81 | { 82 | "label": "2 一级领域标签(无子标签)" 83 | } 84 | ] 85 | \`\`\` 86 | 87 | 确保你的回答中只包含JSON格式的领域树,不要有其他解释性文字。`; 88 | 89 | return prompt; 90 | } 91 | 92 | module.exports = getLabelRevisePrompt; 93 | -------------------------------------------------------------------------------- /lib/llm/prompts/newAnswer.js: -------------------------------------------------------------------------------- 1 | module.exports = function getNewAnswerPrompt(question, answer, cot, advice) { 2 | return ` 3 | # Role: 微调数据集答案优化专家 4 | ## Profile: 5 | - Description: 你是一名微调数据集答案优化专家,擅长根据用户的改进建议,对问题的回答结果和思考过程(思维链)进行优化 6 | 7 | ## Skills: 8 | 1. 基于给定的优化建议 + 问题,对输入的答案进行优化,并进行适当的丰富和补充 9 | 3. 能够根据优化建议,对答案的思考过程(思维链)进行优化,去除思考过程中参考资料相关的描述(不要在推理逻辑中体现有参考资料,改为正常的推理思路) 10 | 11 | 12 | ## 原始问题 13 | ${question} 14 | 15 | ## 待优化的答案 16 | ${answer} 17 | 18 | ## 答案优化建议 19 | ${advice},同时对答案进行适当的丰富和补充,确保答案准确、充分、清晰 20 | 21 | ## 待优化的思考过程 22 | ${cot} 23 | 24 | ## 思考过程优化建议 25 | - 通用优化建议:${advice} 26 | - 去除思考过程中参考资料相关的描述(如:"根据..."、"引用..."、"参考..."等),不要在推理逻辑中体现有参考资料,改为正常的推理思路。 27 | 28 | ## Constrains: 29 | 1. 结果必须按照 JSON 格式输出(如果给到的待优化思考过程为空,则输出的 COT 字段也为空): 30 | \`\`\`json 31 | { 32 | "answer": "优化后的答案", 33 | "cot": "优化后的思考过程" 34 | } 35 | \`\`\` 36 | `; 37 | }; 38 | -------------------------------------------------------------------------------- /lib/llm/prompts/newAnswerEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function getNewAnswerPrompt(question, answer, cot, advice) { 2 | return ` 3 | # Role: Fine-tuning Dataset Answer Optimization Expert 4 | ## Profile: 5 | - Description: You are an expert in optimizing answers for fine-tuning datasets. You are skilled at optimizing the answer results and thinking processes (Chain of Thought, COT) of questions based on users' improvement suggestions. 6 | 7 | ## Skills: 8 | 1. Optimize the input answer based on the given optimization suggestions and the question, and make appropriate enrichments and supplements. 9 | 3. Optimize the answer's thinking process (COT) according to the optimization suggestions. Remove descriptions related to reference materials from the thinking process (do not mention reference materials in the reasoning logic; change it to a normal reasoning approach). 10 | 11 | ## Original Question 12 | ${question} 13 | 14 | ## Answer to be Optimized 15 | ${answer} 16 | 17 | ## Answer Optimization Suggestions 18 | ${advice}. Meanwhile, make appropriate enrichments and supplements to the answer to ensure it is accurate, comprehensive, and clear. 19 | 20 | ## Thinking Process to be Optimized 21 | ${cot} 22 | 23 | ## Thinking Process Optimization Suggestions 24 | - General Optimization Suggestions: ${advice} 25 | - Remove descriptions related to reference materials from the thinking process (e.g., "According to...", "Quoting...", "Referencing...", etc.). Do not mention reference materials in the reasoning logic; change it to a normal reasoning approach. 26 | 27 | ## Constraints: 28 | 1. The result must be output in JSON format (if the thinking process to be optimized is empty, the COT field in the output should also be empty): 29 | \`\`\`json 30 | { 31 | "answer": "Optimized answer", 32 | "cot": "Optimized thinking process" 33 | } 34 | \`\`\` 35 | `; 36 | }; 37 | -------------------------------------------------------------------------------- /lib/llm/prompts/optimalTitle.js: -------------------------------------------------------------------------------- 1 | module.exports = function reTitlePrompt() { 2 | return ` 3 | 你是一个专业的文本结构化处理助手,擅长根据前缀规则和标题语义分析并优化Markdown文档的标题层级结构。请根据以下要求处理我提供的Markdown标题: 4 | ## 任务描述 5 | 请根据markdown文章标题的实际含义,以及标题的前缀特征调整各级标题的正确层级关系,具体要求如下: 6 | 1. 一般相同格式的前缀的标题是同级关系({title}代表实际的标题内容): 7 | 例如: 8 | 纯数字前缀开头\`1 {title}\`, \` 2 {title}\` ,\` 3 {title}\`,\` 4 {title}\`,\` 5 {title}\` ... 等 9 | 罗马数字前缀开头的\`I {title}\`,\`II {title}\` ,\`III {title}\`,\`IV {title}\`,\`V {title}\` ... 等 10 | 小数点分隔数组前缀开头 \`1.1 {title}\`, \`1.2 {title}\`, \`1.3 {title}\`.... \`2.1 {title}\`, \`2.2 {title}\` 等 11 | 2. 将子标题正确嵌套到父标题下(如\`1.1 {title}\`应作为\`1 {title}\`的子标题) 12 | 3. 剔除与文章内容无关的标题 13 | 4. 保持输出标题内容与输入完全一致 14 | 5. 确保内容无缺失 15 | 6. 如果是中文文献,但有英文的文章题目,可以省略 16 | 17 | ## 输入输出格式 18 | - 输入:包含错误层级关系的markdown标题结构 19 | - 输出:修正后的标准markdown标题层级结构 20 | 21 | ## 处理原则 22 | 1. 严格根据标题语义确定所属关系 23 | 2. 仅调整层级不修改原标题文本 24 | 3. 无关标题直接移除不保留占位 25 | 4. 相同前缀规则的标题必须是同一级别,不能出现 一部分是 n级标题,一部分是其他级别的标题 26 | 27 | ## 输出要求 28 | 请将修正后的完整标题结构放在代码块中返回,格式示例如下: 29 | 30 | 期望输出: 31 | \`\`\`markdown 32 | 33 | \`\`\` 34 | 35 | 请处理以下数据: 36 | `; 37 | }; 38 | -------------------------------------------------------------------------------- /lib/llm/prompts/optimalTitleEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function reTitlePromptEn() { 2 | return ` 3 | You are a professional text structuring assistant specializing in analyzing and optimizing the hierarchical 4 | structure of Markdown document titles based on prefix rules and semantic analysis. Please process the Markdown titles 5 | I provide according to the following requirements: 6 | ## Task Description 7 | Adjust the correct hierarchical relationships of titles based on the actual meaning of the Markdown article titles and the prefix characteristics of the titles. The specific requirements are as follows: 8 | 9 | 1. Titles with the same prefix format are generally at the same level ({title} represents the actual title content): 10 | For example: 11 | - Titles starting with pure number prefixes: \`1 {title}\`, \`2 {title}\`, \`3 {title}\`, \`4 {title}\`, \`5 {title}\`, etc. 12 | - Titles starting with Roman numeral prefixes: \`I {title}\`, \`II {title}\`, \`III {title}\`, \`IV {title}\`, \`V {title}\`, etc. 13 | - Titles starting with decimal-separated array prefixes: \`1.1 {title}\`, \`1.2 {title}\`, \`1.3 {title}\`, ..., \`2.1 {title}\`, \`2.2 {title}\`, etc. 14 | 15 | 2. Correctly nest sub-titles under parent titles (e.g., \`1.1 {title}\` should be a sub-title of \`1 {title}\`). 16 | 3. Remove titles unrelated to the content of the article. 17 | 4. Keep the content of the output titles identical to the input. 18 | 5. Ensure no content is missing. 19 | 6. For Chinese literature with English article titles, the English titles can be omitted. 20 | 21 | ## Input and Output Format 22 | - Input: Markdown title structure with incorrect hierarchical relationships. 23 | - Output: Corrected standard Markdown title hierarchical structure. 24 | 25 | ## Processing Principles 26 | 1. Strictly determine the hierarchical relationship based on the semantic meaning of the titles. 27 | 2. Adjust only the hierarchy without modifying the original title text. 28 | 3. Directly remove unrelated titles without retaining placeholders. 29 | 4. Titles with the same prefix rules must be at the same level; they cannot be partially at one level and partially at another. 30 | 31 | ## Output Requirements 32 | Please return the corrected complete title structure within a code block, formatted as follows: 33 | 34 | Expected Output: 35 | \`\`\`markdown 36 | 37 | \`\`\` 38 | 39 | Please process the following data: 40 | `; 41 | }; 42 | -------------------------------------------------------------------------------- /lib/llm/prompts/optimizeCot.js: -------------------------------------------------------------------------------- 1 | module.exports = function optimizeCotPrompt(originalQuestion, answer, originalCot) { 2 | return ` 3 | # Role: 思维链优化专家 4 | ## Profile: 5 | - Description: 你是一位擅长优化思维链的专家,能够对给定的思维链进行处理,去除其中的参考引用相关话术,使其呈现为一个正常的推理过程。 6 | 7 | ## Skills: 8 | 1. 准确识别并去除思维链中的参考引用话术。 9 | 2. 确保优化后的思维链逻辑连贯、推理合理。 10 | 3. 维持思维链与原始问题和答案的相关性。 11 | 12 | ## Workflow: 13 | 1. 仔细研读原始问题、答案和优化前的思维链。 14 | 2. 识别思维链中所有参考引用相关的表述,如“参考 XX 资料”“文档中提及 XX”“参考内容中提及 XXX”等。 15 | 3. 去除这些引用话术,同时调整语句,保证思维链的逻辑连贯性。 16 | 4. 检查优化后的思维链是否仍然能够合理地推导出答案,并且与原始问题紧密相关。 17 | 18 | ## 原始问题 19 | ${originalQuestion} 20 | 21 | ## 答案 22 | ${answer} 23 | 24 | ## 优化前的思维链 25 | ${originalCot} 26 | 27 | ## Constrains: 28 | 1. 优化后的思维链必须去除所有参考引用相关话术。 29 | 2. 思维链的逻辑推理过程必须完整且合理。 30 | 3. 优化后的思维链必须与原始问题和答案保持紧密关联。 31 | 4. 给出的答案不要包含 “优化后的思维链” 这样的话术,直接给出优化后的思维链结果。 32 | 5. 思维链应按照正常的推理思路返回,如:先分析理解问题的本质,按照 "首先、然后、接着、另外、最后" 等步骤逐步思考,展示一个完善的推理过程。 33 | `; 34 | }; 35 | -------------------------------------------------------------------------------- /lib/llm/prompts/optimizeCotEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function optimizeCotPrompt(originalQuestion, answer, originalCot) { 2 | return ` 3 | # Role: Chain of Thought Optimization Expert 4 | ## Profile: 5 | - Description: You are an expert in optimizing the chain of thought. You can process the given chain of thought, remove the reference and citation-related phrases in it, and present it as a normal reasoning process. 6 | 7 | ## Skills: 8 | 1. Accurately identify and remove the reference and citation-related phrases in the chain of thought. 9 | 2. Ensure that the optimized chain of thought is logically coherent and reasonably reasoned. 10 | 3. Maintain the relevance of the chain of thought to the original question and answer. 11 | 12 | ## Workflow: 13 | 1. Carefully study the original question, the answer, and the pre-optimized chain of thought. 14 | 2. Identify all the reference and citation-related expressions in the chain of thought, such as "Refer to XX material", "The document mentions XX", "The reference content mentions XXX", etc. 15 | 3. Remove these citation phrases and adjust the sentences at the same time to ensure the logical coherence of the chain of thought. 16 | 4. Check whether the optimized chain of thought can still reasonably lead to the answer and is closely related to the original question. 17 | 18 | ## Original Question 19 | ${originalQuestion} 20 | 21 | ## Answer 22 | ${answer} 23 | 24 | ## Pre-optimized Chain of Thought 25 | ${originalCot} 26 | 27 | ## Constrains: 28 | 1. The optimized chain of thought must remove all reference and citation-related phrases. 29 | 2. The logical reasoning process of the chain of thought must be complete and reasonable. 30 | 3. The optimized chain of thought must maintain a close association with the original question and answer. 31 | 4. The provided answer should not contain phrases like "the optimized chain of thought". Directly provide the result of the optimized chain of thought. 32 | 5. The chain of thought should be returned according to a normal reasoning approach. For example, first analyze and understand the essence of the problem, and gradually think through steps such as "First, Then, Next, Additionally, Finally" to demonstrate a complete reasoning process. 33 | `; 34 | }; 35 | -------------------------------------------------------------------------------- /lib/llm/prompts/pdfToMarkdown.js: -------------------------------------------------------------------------------- 1 | module.exports = function convertPrompt() { 2 | return ` 3 | 使用markdown语法,将图片中识别到的文字转换为markdown格式输出。你必须做到: 4 | 1. 输出和使用识别到的图片的相同的语言,例如,识别到英语的字段,输出的内容必须是英语。 5 | 2. 不要解释和输出无关的文字,直接输出图片中的内容。 6 | 3. 内容不要包含在\`\`\`markdown \`\`\`中、段落公式使用 $$ $$ 的形式、行内公式使用 $ $ 的形式。 7 | 4. 忽略掉页眉页脚里的内容 8 | 5. 请不要对图片的标题进行markdown的格式化,直接以文本形式输出到内容中。 9 | 6. 有可能每页都会出现期刊名称,论文名称,会议名称或者书籍名称,请忽略他们不要识别成标题 10 | 7. 请精确分析当前PDF页面的文本结构和视觉布局,按以下要求处理: 11 | 1. 识别所有标题文本,并判断其层级(根据字体大小、加粗、位置等视觉特征) 12 | 2. 输出为带层级的Markdown格式,严格使用以下规则: 13 | - 一级标题:字体最大/顶部居中,前面加 # 14 | - 二级标题:字体较大/左对齐加粗,有可能是数字开头也有可能是罗马数组开头,前面加 ## 15 | - 三级标题:字体稍大/左对齐加粗,前面加 ### 16 | - 正文文本:直接转换为普通段落 17 | 3. 不确定层级的标题请标记[?] 18 | 4. 如果是中文文献,但是有英文标题和摘要可以省略不输出 19 | 示例输出: 20 | ## 4研究方法 21 | ### 4.1数据收集 22 | 本文采用问卷调查... 23 | `; 24 | }; 25 | -------------------------------------------------------------------------------- /lib/llm/prompts/pdfToMarkdownEn.js: -------------------------------------------------------------------------------- 1 | module.exports = function convertPromptEn() { 2 | return ` 3 | Use Markdown syntax to convert the text extracted from images into Markdown format and output it. You must adhere to the following requirements: 4 | 1. Output in the same language as the text extracted from the image. For example, if the extracted text is in English, the output must also be in English. 5 | 2. Do not explain or output any text unrelated to the content. Directly output the text from the image. 6 | 3. Do not enclose the content within \`\`\`markdown \`\`\`. Use $$ $$ for block equations and $ $ for inline equations. 7 | 4. Ignore content in headers and footers. 8 | 5. Do not format the titles from images using Markdown; output them as plain text within the content. 9 | 6. Journal names, paper titles, conference names, or book titles that may appear on each page should be ignored and not treated as headings. 10 | 7. Precisely analyze the text structure and visual layout of the current PDF page, and process it as follows: 11 | 1. Identify all heading texts and determine their hierarchy based on visual features such as font size, boldness, and position. 12 | 2. Output the text in hierarchical Markdown format, strictly following these rules: 13 | - Level 1 headings: Largest font size, centered at the top, prefixed with # 14 | - Level 2 headings: Larger font size, left-aligned and bold, possibly starting with numbers or Roman numerals, prefixed with ## 15 | - Level 3 headings: Slightly larger font size, left-aligned and bold, prefixed with ### 16 | - Body text: Convert directly into regular paragraphs 17 | 3. For headings with uncertain hierarchy, mark them with [?]. 18 | 4. For Chinese literature with English titles and abstracts, these can be omitted from the output. 19 | 20 | Example Output: 21 | ## 4 Research Methods 22 | ### 4.1 Data Collection 23 | This paper uses questionnaires... 24 | `; 25 | }; 26 | -------------------------------------------------------------------------------- /lib/llm/prompts/question.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 构建 GA 提示词 3 | * @param {Object} activeGaPair 当前激活的 GA 组合 4 | * @returns {String} 构建的 GA 提示词 5 | */ 6 | function buildGaPrompt(activeGaPair = null) { 7 | if (activeGaPair && activeGaPair.active) { 8 | return ` 9 | ## 特殊要求-体裁与受众视角提问: 10 | 请根据以下体裁与受众组合,调整你的提问角度和问题风格: 11 | 12 | **目标体裁**: ${activeGaPair.genre} 13 | **目标受众**: ${activeGaPair.audience} 14 | 15 | 请确保: 16 | 1. 问题应完全符合「${activeGaPair.genre}」所定义的风格、焦点和深度等等属性。 17 | 2. 问题应考虑到「${activeGaPair.audience}」的知识水平、认知特点和潜在兴趣点。 18 | 3. 从该受众群体的视角和需求出发提出问题 19 | 4. 保持问题的针对性和实用性,确保问题-答案的风格一致性 20 | 5.问题应具有一定的清晰度和具体性,避免过于宽泛或模糊。 21 | `; 22 | } 23 | 24 | return ''; 25 | } 26 | 27 | /** 28 | * 问题生成提示模板。 29 | * @param {string} text - 待处理的文本。 30 | * @param {number} number - 问题数量。 31 | * @param {string} language - 问题所使用的语言。 32 | * @param {string} globalPrompt - LLM 的全局提示。 33 | * @param {string} questionPrompt - 问题生成的特定提示。 34 | * @param {Object} activeGaPair - 当前激活的 GA对。 35 | * @returns {string} - 完整的提示词。 36 | */ 37 | module.exports = function getQuestionPrompt({ 38 | text, 39 | number = Math.floor(text.length / 240), 40 | language = '中文', 41 | globalPrompt = '', 42 | questionPrompt = '', 43 | activeGaPair = null 44 | }) { 45 | if (globalPrompt) { 46 | globalPrompt = `在后续的任务中,你务必遵循这样的规则:${globalPrompt}`; 47 | } 48 | if (questionPrompt) { 49 | questionPrompt = `- 在生成问题时,你务必遵循这样的规则:${questionPrompt}`; 50 | } 51 | 52 | // 构建GA pairs相关的提示词 53 | const gaPrompt = buildGaPrompt(activeGaPair); 54 | 55 | return ` 56 | # 角色使命 57 | 你是一位专业的文本分析专家,擅长从复杂文本中提取关键信息并生成可用于模型微调的结构化数据(仅生成问题)。 58 | ${globalPrompt} 59 | 60 | ## 核心任务 61 | 根据用户提供的文本(长度:${text.length} 字),生成不少于 ${number} 个高质量问题。 62 | 63 | ## 约束条件(重要!!!) 64 | - 必须基于文本内容直接生成 65 | - 问题应具有明确答案指向性 66 | - 需覆盖文本的不同方面 67 | - 禁止生成假设性、重复或相似问题 68 | 69 | ${gaPrompt} 70 | 71 | ## 处理流程 72 | 1. 【文本解析】分段处理内容,识别关键实体和核心概念 73 | 2. 【问题生成】基于信息密度选择最佳提问点${gaPrompt ? ',并结合指定的体裁受众视角' : ''} 74 | 3. 【质量检查】确保: 75 | - 问题答案可在原文中找到依据 76 | - 标签与问题内容强相关 77 | - 无格式错误 78 | ${gaPrompt ? '- 问题风格与指定的体裁受众匹配' : ''} 79 | 80 | ## 输出格式 81 | - JSON 数组格式必须正确 82 | - 字段名使用英文双引号 83 | - 输出的 JSON 数组必须严格符合以下结构: 84 | \`\`\`json 85 | ["问题1", "问题2", "..."] 86 | \`\`\` 87 | 88 | ## 输出示例 89 | \`\`\`json 90 | [ "人工智能伦理框架应包含哪些核心要素?","民法典对个人数据保护有哪些新规定?"] 91 | \`\`\` 92 | 93 | ## 待处理文本 94 | ${text} 95 | 96 | ## 限制 97 | - 必须按照规定的 JSON 格式输出,不要输出任何其他不相关内容 98 | - 生成不少于${number}个高质量问题 99 | - 问题不要和材料本身相关,例如禁止出现作者、章节、目录等相关问题 100 | - 问题不得包含【报告、文章、文献、表格】中提到的这种话术,必须是一个自然的问题 101 | ${questionPrompt} 102 | `; 103 | }; 104 | -------------------------------------------------------------------------------- /lib/services/models.js: -------------------------------------------------------------------------------- 1 | import { getModelConfigById } from '@/lib/db/model-config'; 2 | import { getProject } from '@/lib/db/projects'; 3 | import logger from '@/lib/util/logger'; 4 | 5 | /** 6 | * Get the active model configuration for a project 7 | * @param {string} projectId - Optional project ID to get the default model for 8 | * @returns {Promise} - Active model configuration or null 9 | */ 10 | export async function getActiveModel(projectId = null) { 11 | try { 12 | // If projectId is provided, get the default model for that project 13 | if (projectId) { 14 | const project = await getProject(projectId); 15 | if (project && project.defaultModelConfigId) { 16 | const modelConfig = await getModelConfigById(project.defaultModelConfigId); 17 | if (modelConfig) { 18 | logger.info(`Using default model for project ${projectId}: ${modelConfig.modelName}`); 19 | return modelConfig; 20 | } 21 | } 22 | } 23 | 24 | // If no specific project model found, try to get from localStorage context 25 | // This is a fallback for when the function is called without context 26 | logger.warn('No active model found'); 27 | return null; 28 | } catch (error) { 29 | logger.error('Failed to get active model:', error); 30 | return null; 31 | } 32 | } 33 | 34 | /** 35 | * Get active model by ID 36 | * @param {string} modelConfigId - Model configuration ID 37 | * @returns {Promise} - Model configuration or null 38 | */ 39 | export async function getModelById(modelConfigId) { 40 | try { 41 | if (!modelConfigId) { 42 | logger.warn('No model ID provided'); 43 | return null; 44 | } 45 | 46 | const modelConfig = await getModelConfigById(modelConfigId); 47 | if (modelConfig) { 48 | logger.info(`Retrieved model: ${modelConfig.modelName}`); 49 | return modelConfig; 50 | } 51 | 52 | logger.warn(`Model not found with ID: ${modelConfigId}`); 53 | return null; 54 | } catch (error) { 55 | logger.error('Failed to get model by ID:', error); 56 | return null; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /lib/services/tasks/recovery.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 任务恢复服务 3 | * 用于在服务启动时检查并恢复未完成的任务 4 | */ 5 | 6 | import { PrismaClient } from '@prisma/client'; 7 | import { processAnswerGenerationTask } from './answer-generation'; 8 | import { processQuestionGenerationTask } from './question-generation'; 9 | 10 | const prisma = new PrismaClient(); 11 | 12 | // 服务初始化标志,确保只执行一次 13 | let initialized = false; 14 | 15 | /** 16 | * 恢复未完成的任务 17 | * 在应用启动时自动执行一次 18 | */ 19 | export async function recoverPendingTasks() { 20 | // 如果已经初始化过,直接返回 21 | if (process.env.INITED) { 22 | return; 23 | } 24 | 25 | process.env.INITED = true; 26 | 27 | try { 28 | console.log('开始检查未完成任务...'); 29 | 30 | // 查找所有处理中的任务 31 | const pendingTasks = await prisma.task.findMany({ 32 | where: { 33 | status: 0 // 处理中的任务 34 | } 35 | }); 36 | 37 | if (pendingTasks.length === 0) { 38 | console.log('没有需要恢复的任务'); 39 | initialized = true; 40 | return; 41 | } 42 | 43 | console.log(`找到 ${pendingTasks.length} 个未完成任务,开始恢复...`); 44 | 45 | // 遍历处理每个任务 46 | for (const task of pendingTasks) { 47 | try { 48 | // 根据任务类型调用对应的处理函数 49 | switch (task.taskType) { 50 | case 'question-generation': 51 | // 异步处理,不等待完成 52 | processQuestionGenerationTask(task).catch(error => { 53 | console.error(`恢复问题生成任务 ${task.id} 失败:`, error); 54 | }); 55 | break; 56 | case 'answer-generation': 57 | // 异步处理,不等待完成 58 | processAnswerGenerationTask(task).catch(error => { 59 | console.error(`恢复答案生成任务 ${task.id} 失败:`, error); 60 | }); 61 | break; 62 | // 可以在这里添加其他类型的任务处理 63 | default: 64 | console.warn(`未知的任务类型: ${task.taskType}`); 65 | await prisma.task.update({ 66 | where: { id: task.id }, 67 | data: { 68 | status: 2, 69 | detail: '未知的任务类型', 70 | note: '未知的任务类型', 71 | endTime: new Date() 72 | } 73 | }); 74 | } 75 | } catch (error) { 76 | console.error(`恢复任务 ${task.id} 失败:`, error); 77 | } 78 | } 79 | 80 | console.log('任务恢复服务已启动,未完成任务将在后台继续处理'); 81 | initialized = true; 82 | } catch (error) { 83 | console.error('任务恢复服务出错:', error); 84 | // 即使出错也标记为已初始化,避免反复尝试 85 | initialized = true; 86 | } 87 | } 88 | 89 | // 在模块加载时自动执行恢复 90 | recoverPendingTasks().catch(error => { 91 | console.error('执行任务恢复失败:', error); 92 | }); 93 | -------------------------------------------------------------------------------- /lib/store.js: -------------------------------------------------------------------------------- 1 | import { atomWithStorage } from 'jotai/utils'; 2 | 3 | // 模型配置列表 4 | export const modelConfigListAtom = atomWithStorage('modelConfigList', []); 5 | export const selectedModelInfoAtom = atomWithStorage('selectedModelInfo', {}); 6 | -------------------------------------------------------------------------------- /lib/util/async.js: -------------------------------------------------------------------------------- 1 | // 并行处理数组的辅助函数,限制并发数 2 | export const processInParallel = async (items, processFunction, concurrencyLimit, onProgress) => { 3 | const results = []; 4 | const inProgress = new Set(); 5 | const queue = [...items]; 6 | let completedCount = 0; 7 | 8 | while (queue.length > 0 || inProgress.size > 0) { 9 | // 如果有空闲槽位且队列中还有任务,启动新任务 10 | while (inProgress.size < concurrencyLimit && queue.length > 0) { 11 | const item = queue.shift(); 12 | const promise = processFunction(item).then(result => { 13 | inProgress.delete(promise); 14 | onProgress && onProgress(++completedCount, items.length); 15 | return result; 16 | }); 17 | inProgress.add(promise); 18 | results.push(promise); 19 | } 20 | 21 | // 等待其中一个任务完成 22 | if (inProgress.size > 0) { 23 | await Promise.race(inProgress); 24 | } 25 | } 26 | 27 | return Promise.all(results); 28 | }; 29 | -------------------------------------------------------------------------------- /lib/util/file.js: -------------------------------------------------------------------------------- 1 | import { createHash } from 'crypto'; 2 | import { createReadStream } from 'fs'; 3 | 4 | export async function getFileMD5(filePath) { 5 | return new Promise((resolve, reject) => { 6 | const hash = createHash('md5'); 7 | const stream = createReadStream(filePath); 8 | 9 | stream.on('data', chunk => hash.update(chunk)); 10 | stream.on('end', () => resolve(hash.digest('hex'))); 11 | stream.on('error', reject); 12 | }); 13 | } 14 | 15 | export function filterDomainTree(tree = []) { 16 | for (let i = 0; i < tree.length; i++) { 17 | const { child } = tree[i]; 18 | delete tree[i].id; 19 | delete tree[i].projectId; 20 | delete tree[i].parentId; 21 | delete tree[i].questionCount; 22 | filterDomainTree(child); 23 | } 24 | return tree; 25 | } 26 | -------------------------------------------------------------------------------- /lib/util/logger.js: -------------------------------------------------------------------------------- 1 | // lib/utils/logger.js 2 | const isElectron = typeof process !== 'undefined' && process.versions && process.versions.electron; 3 | 4 | function log(level, ...args) { 5 | try { 6 | const message = args.map(arg => (typeof arg === 'object' ? JSON.stringify(arg) : arg)).join(' '); 7 | if (isElectron) { 8 | // 在 Electron 环境下,将日志写入文件 9 | const { ipcRenderer } = require('electron'); 10 | ipcRenderer.send('log', { level, message }); 11 | } else { 12 | // 在非 Electron 环境下,只输出到控制台 13 | console[level](...args); 14 | } 15 | } catch (error) { 16 | console.error('Failed to log:', error); 17 | } 18 | } 19 | 20 | export default { 21 | info: (...args) => log('info', ...args), 22 | error: (...args) => log('error', ...args), 23 | warn: (...args) => log('warn', ...args), 24 | debug: (...args) => log('debug', ...args) 25 | }; 26 | -------------------------------------------------------------------------------- /lib/util/request.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 封装的通用重试函数,用于在操作失败后自动重试 3 | * @param {Function} asyncOperation - 需要执行的异步操作函数 4 | * @param {Object} options - 配置选项 5 | * @param {number} options.retries - 重试次数,默认为1 6 | * @param {number} options.delay - 重试前的延迟时间(毫秒),默认为0 7 | * @param {Function} options.onRetry - 重试前的回调函数,接收错误和当前重试次数作为参数 8 | * @returns {Promise} - 返回异步操作的结果 9 | */ 10 | export const withRetry = async (asyncOperation, options = {}) => { 11 | const { retries = 1, delay = 0, onRetry = null } = options; 12 | let lastError; 13 | 14 | // 尝试执行操作,包括初次尝试和后续重试 15 | for (let attempt = 0; attempt <= retries; attempt++) { 16 | try { 17 | return await asyncOperation(); 18 | } catch (error) { 19 | lastError = error; 20 | 21 | // 如果这是最后一次尝试,则不再重试 22 | if (attempt === retries) { 23 | break; 24 | } 25 | 26 | // 如果提供了重试回调,则执行 27 | if (onRetry && typeof onRetry === 'function') { 28 | onRetry(error, attempt + 1); 29 | } 30 | 31 | // 如果设置了延迟,则等待指定时间 32 | if (delay > 0) { 33 | await new Promise(resolve => setTimeout(resolve, delay)); 34 | } 35 | } 36 | } 37 | 38 | // 如果所有尝试都失败,则抛出最后一个错误 39 | throw lastError; 40 | }; 41 | 42 | /** 43 | * 封装的fetch函数,支持自动重试 44 | * @param {string} url - 请求URL 45 | * @param {Object} options - fetch选项 46 | * @param {Object} retryOptions - 重试选项 47 | * @returns {Promise} - 返回fetch响应 48 | */ 49 | export const fetchWithRetry = async (url, options = {}, retryOptions = {}) => { 50 | return withRetry(() => fetch(url, options), retryOptions); 51 | }; 52 | 53 | export default fetchWithRetry; 54 | -------------------------------------------------------------------------------- /local-db/empty.txt: -------------------------------------------------------------------------------- 1 | Easy Dataset -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | // 最佳实践配置示例 2 | module.exports = { 3 | experimental: { 4 | serverComponentsExternalPackages: ['@opendocsg/pdf2md', 'pdfjs-dist'] 5 | }, 6 | webpack: (config, { isServer }) => { 7 | if (!isServer) { 8 | config.externals.push({ 9 | unpdf: 'window.unpdf', 10 | 'pdfjs-dist': 'window.pdfjsLib' 11 | }); 12 | } else { 13 | config.externals.push('canvas'); 14 | } 15 | return config; 16 | } 17 | }; 18 | -------------------------------------------------------------------------------- /prisma/generate-template.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 此脚本用于生成空的模板数据库文件(template.sqlite) 3 | * 该文件将在应用打包时被包含,并在用户首次启动应用时作为初始数据库 4 | */ 5 | 6 | const fs = require('fs'); 7 | const path = require('path'); 8 | const { execSync } = require('child_process'); 9 | 10 | const templatePath = path.join(__dirname, 'template.sqlite'); 11 | const sqlitePath = path.join(__dirname, 'empty.db.sqlite'); 12 | 13 | // 如果存在旧的模板文件,先删除 14 | if (fs.existsSync(templatePath)) { 15 | console.log('删除旧的模板数据库...'); 16 | fs.unlinkSync(templatePath); 17 | } 18 | 19 | // 如果存在临时数据库文件,先删除 20 | if (fs.existsSync(sqlitePath)) { 21 | console.log('删除临时数据库文件...'); 22 | fs.unlinkSync(sqlitePath); 23 | } 24 | 25 | try { 26 | console.log('设置临时数据库路径...'); 27 | // 设置 DATABASE_URL 环境变量 28 | process.env.DATABASE_URL = `file:${sqlitePath}`; 29 | 30 | console.log('执行 prisma db push 创建新的数据库架构...'); 31 | // 执行 prisma db push 创建数据库架构 32 | execSync('npx prisma db push', { stdio: 'inherit' }); 33 | 34 | console.log('将生成的数据库文件复制为模板...'); 35 | // 复制生成的数据库文件为模板 36 | fs.copyFileSync(sqlitePath, templatePath); 37 | 38 | console.log(`✅ 模板数据库已成功生成: ${templatePath}`); 39 | } catch (error) { 40 | console.error('❌ 生成模板数据库失败:', error); 41 | process.exit(1); 42 | } finally { 43 | // 清理: 删除临时数据库文件 44 | if (fs.existsSync(sqlitePath)) { 45 | console.log('清理临时数据库文件...'); 46 | fs.unlinkSync(sqlitePath); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /prisma/sql.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "version": "1.2.5", 4 | "sql": "ALTER TABLE Projects ADD COLUMN test VARCHAR(255) DEFAULT '';" 5 | }, 6 | { 7 | "version": "1.3.3", 8 | "sql": "CREATE TABLE IF NOT EXISTS Task (\n id VARCHAR(255) NOT NULL,\n projectId VARCHAR(255) NOT NULL,\n taskType VARCHAR(255) NOT NULL,\n status INT NOT NULL,\n startTime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n endTime TIMESTAMP NULL,\n completedCount INT DEFAULT 0,\n totalCount INT DEFAULT 0,\n modelInfo TEXT NOT NULL,\n language VARCHAR(20) DEFAULT 'zh-CN',\n detail TEXT DEFAULT '',\n note TEXT DEFAULT '',\n createAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n updateAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n PRIMARY KEY (id),\n FOREIGN KEY (projectId) REFERENCES Projects(id) ON DELETE CASCADE\n);\n\nCREATE INDEX idx_task_projectId ON Task(projectId);" 9 | }, 10 | { 11 | "version": "1.3.6", 12 | "sql": "CREATE TABLE IF NOT EXISTS GaPairs (\n id VARCHAR(255) NOT NULL,\n projectId VARCHAR(255) NOT NULL,\n fileId VARCHAR(255) NOT NULL,\n pairNumber INT NOT NULL,\n genreTitle VARCHAR(255) NOT NULL,\n genreDesc TEXT NOT NULL,\n audienceTitle VARCHAR(255) NOT NULL,\n audienceDesc TEXT NOT NULL,\n isActive BOOLEAN DEFAULT 1 NOT NULL,\n createAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n updateAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n PRIMARY KEY (id),\n FOREIGN KEY (projectId) REFERENCES Projects(id) ON DELETE CASCADE,\n FOREIGN KEY (fileId) REFERENCES UploadFiles(id) ON DELETE CASCADE,\n UNIQUE (fileId, pairNumber)\n);\n\nCREATE INDEX idx_gapairs_projectId ON GaPairs(projectId);\nCREATE INDEX idx_gapairs_fileId ON GaPairs(fileId);" 13 | }, 14 | { 15 | "version": "1.3.6", 16 | "sql": "ALTER TABLE Questions ADD COLUMN gaPairId VARCHAR(255) NULL;" 17 | }, 18 | { 19 | "version": "1.3.6", 20 | "sql": "ALTER TABLE Questions ADD FOREIGN KEY (gaPairId) REFERENCES GaPairs(id) ON DELETE SET NULL;\n\nCREATE INDEX idx_questions_gaPairId ON Questions(gaPairId);" 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /public/imgs/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/1.png -------------------------------------------------------------------------------- /public/imgs/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/10.png -------------------------------------------------------------------------------- /public/imgs/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/2.png -------------------------------------------------------------------------------- /public/imgs/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/3.png -------------------------------------------------------------------------------- /public/imgs/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/4.png -------------------------------------------------------------------------------- /public/imgs/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/5.png -------------------------------------------------------------------------------- /public/imgs/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/6.png -------------------------------------------------------------------------------- /public/imgs/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/7.png -------------------------------------------------------------------------------- /public/imgs/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/8.png -------------------------------------------------------------------------------- /public/imgs/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/9.png -------------------------------------------------------------------------------- /public/imgs/aw.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/aw.jpg -------------------------------------------------------------------------------- /public/imgs/aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/aws.png -------------------------------------------------------------------------------- /public/imgs/bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/bg.png -------------------------------------------------------------------------------- /public/imgs/bg2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/bg2.png -------------------------------------------------------------------------------- /public/imgs/cn-arc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/cn-arc.png -------------------------------------------------------------------------------- /public/imgs/default-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/default-dataset.png -------------------------------------------------------------------------------- /public/imgs/en-arc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/en-arc.png -------------------------------------------------------------------------------- /public/imgs/garden.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/garden.jpg -------------------------------------------------------------------------------- /public/imgs/github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/github.png -------------------------------------------------------------------------------- /public/imgs/google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/google.png -------------------------------------------------------------------------------- /public/imgs/huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/huggingface.png -------------------------------------------------------------------------------- /public/imgs/kaggle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/kaggle.png -------------------------------------------------------------------------------- /public/imgs/linux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/linux.png -------------------------------------------------------------------------------- /public/imgs/lluga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/lluga.png -------------------------------------------------------------------------------- /public/imgs/logo.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/logo.icns -------------------------------------------------------------------------------- /public/imgs/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/logo.ico -------------------------------------------------------------------------------- /public/imgs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/logo.png -------------------------------------------------------------------------------- /public/imgs/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /public/imgs/mac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/mac.png -------------------------------------------------------------------------------- /public/imgs/models/chatglm.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/models/claude.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/models/default.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/models/gemini.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/models/glm.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/models/hunyuan.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/models/qwen.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/models/yi.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/imgs/modelscope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/modelscope.png -------------------------------------------------------------------------------- /public/imgs/opendatalab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/opendatalab.png -------------------------------------------------------------------------------- /public/imgs/weichat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/weichat.jpg -------------------------------------------------------------------------------- /public/imgs/windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ConardLi/easy-dataset/b2c80af9cee549d7e4930f3dc6b570ab80adefee/public/imgs/windows.png -------------------------------------------------------------------------------- /styles/globals.css: -------------------------------------------------------------------------------- 1 | /* 添加流式输出的闪烁光标动画 */ 2 | @keyframes blink { 3 | 0% { 4 | opacity: 1; 5 | } 6 | 50% { 7 | opacity: 0; 8 | } 9 | 100% { 10 | opacity: 1; 11 | } 12 | } 13 | 14 | .blinking-cursor { 15 | animation: blink 1s infinite; 16 | display: inline-block; 17 | font-weight: bold; 18 | color: #666; 19 | } 20 | -------------------------------------------------------------------------------- /styles/playground.js: -------------------------------------------------------------------------------- 1 | // 模型测试页面样式 2 | import { alpha } from '@mui/material/styles'; 3 | 4 | export const playgroundStyles = theme => ({ 5 | container: { 6 | p: 3, 7 | height: 'calc(100vh - 64px)', 8 | display: 'flex', 9 | flexDirection: 'column' 10 | }, 11 | mainPaper: { 12 | p: 3, 13 | flex: 1, 14 | display: 'flex', 15 | flexDirection: 'column', 16 | mb: 2, 17 | borderRadius: 2 18 | }, 19 | controlsContainer: { 20 | mb: 2 21 | }, 22 | clearButton: { 23 | height: '56px' 24 | }, 25 | divider: { 26 | mb: 2 27 | }, 28 | emptyStateBox: { 29 | flex: 1, 30 | display: 'flex', 31 | justifyContent: 'center', 32 | alignItems: 'center', 33 | mb: 2, 34 | p: 2, 35 | bgcolor: theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.03)' : 'rgba(0,0,0,0.02)', 36 | borderRadius: 1 37 | }, 38 | chatContainer: { 39 | flex: 1, 40 | mb: 2 41 | }, 42 | modelPaper: { 43 | height: '100%', 44 | display: 'flex', 45 | flexDirection: 'column', 46 | border: `1px solid ${theme.palette.divider}`, 47 | borderRadius: 1, 48 | overflow: 'hidden' 49 | }, 50 | modelHeader: { 51 | p: 1, 52 | bgcolor: theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.05)' : 'primary.light', 53 | color: theme.palette.mode === 'dark' ? 'white' : 'white', 54 | fontWeight: 'medium', 55 | textAlign: 'center', 56 | display: 'flex', 57 | alignItems: 'center', 58 | justifyContent: 'center' 59 | }, 60 | modelChatBox: { 61 | flex: 1, 62 | overflowY: 'auto', 63 | p: 2, 64 | bgcolor: theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.03)' : 'rgba(0,0,0,0.02)' 65 | }, 66 | emptyChatBox: { 67 | display: 'flex', 68 | justifyContent: 'center', 69 | alignItems: 'center', 70 | height: '100%' 71 | }, 72 | inputContainer: { 73 | display: 'flex', 74 | gap: 1, 75 | mt: 2 76 | }, 77 | sendButton: { 78 | minWidth: '120px', 79 | height: '56px', 80 | marginLeft: '20px' 81 | } 82 | }); 83 | --------------------------------------------------------------------------------