├── 首页白.jpg ├── 首页黑.jpg ├── 提示词设置.jpg ├── ssh_test.txt ├── 新版设置页面.jpg ├── 知识库页面2.jpg ├── 记录管理页面.jpg ├── api设置与并发设置.jpg ├── assets ├── apiset.jpg ├── logo.icns ├── logo.ico ├── logo.jpg ├── logo.png ├── history.jpg └── Proofreading.jpg ├── .husky ├── commit-msg └── pre-commit ├── src ├── renderer │ ├── assets │ │ ├── logo.png │ │ ├── apiset.jpg │ │ ├── history.jpg │ │ ├── Proofreading.jpg │ │ └── css │ │ │ └── common.css │ ├── views │ │ ├── logs.vue │ │ ├── OutputWord.vue │ │ ├── Home.vue │ │ ├── Work.vue │ │ ├── ProofSet.vue │ │ ├── history.vue │ │ ├── About.vue │ │ └── Dictionary.vue │ ├── index.css │ ├── renderer.ts │ ├── stores │ │ ├── embeddingStore.ts │ │ ├── apiStore.ts │ │ └── store.ts │ ├── router │ │ └── index.js │ ├── electron.d.ts │ └── App.vue └── main │ ├── logger.ts │ ├── wordProcess.ts │ ├── main.ts │ ├── preload.ts │ ├── database.ts │ ├── chat.ts │ ├── pdfUtils.ts │ ├── lancedb.ts │ ├── ipcHandlers.ts │ └── proof.ts ├── .vscode └── settings.json ├── .prettierrc ├── index.html ├── tsconfig.json ├── .eslintrc.js ├── vite.renderer.config.ts ├── vite.main.config.ts ├── .eslintrc.json ├── vite.preload.config.ts ├── LICENSE ├── forge.env.d.ts ├── .gitignore ├── .versionrc ├── changelog.config.js ├── forge.config.ts ├── package.json ├── vite.base.config.ts ├── README.md ├── lancedbNativePro.ts └── test.html /首页白.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/首页白.jpg -------------------------------------------------------------------------------- /首页黑.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/首页黑.jpg -------------------------------------------------------------------------------- /提示词设置.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/提示词设置.jpg -------------------------------------------------------------------------------- /ssh_test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/ssh_test.txt -------------------------------------------------------------------------------- /新版设置页面.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/新版设置页面.jpg -------------------------------------------------------------------------------- /知识库页面2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/知识库页面2.jpg -------------------------------------------------------------------------------- /记录管理页面.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/记录管理页面.jpg -------------------------------------------------------------------------------- /api设置与并发设置.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/api设置与并发设置.jpg -------------------------------------------------------------------------------- /assets/apiset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/apiset.jpg -------------------------------------------------------------------------------- /assets/logo.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.icns -------------------------------------------------------------------------------- /assets/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.ico -------------------------------------------------------------------------------- /assets/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.jpg -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.png -------------------------------------------------------------------------------- /.husky/commit-msg: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | 4 | npx commitlint --edit $1 -------------------------------------------------------------------------------- /assets/history.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/history.jpg -------------------------------------------------------------------------------- /assets/Proofreading.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/Proofreading.jpg -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | 4 | npx lint-staged --allow-empty $1 5 | -------------------------------------------------------------------------------- /src/renderer/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/logo.png -------------------------------------------------------------------------------- /src/renderer/assets/apiset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/apiset.jpg -------------------------------------------------------------------------------- /src/renderer/assets/history.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/history.jpg -------------------------------------------------------------------------------- /src/renderer/assets/Proofreading.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/Proofreading.jpg -------------------------------------------------------------------------------- /src/renderer/views/logs.vue: -------------------------------------------------------------------------------- 1 | 4 | 5 | 8 | 9 | -------------------------------------------------------------------------------- /src/renderer/views/OutputWord.vue: -------------------------------------------------------------------------------- 1 | 4 | 5 | 8 | 9 | -------------------------------------------------------------------------------- /src/renderer/views/Home.vue: -------------------------------------------------------------------------------- 1 | 6 | 7 | 10 | 11 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | "editor.defaultFormatter": "esbenp.prettier-vscode", 4 | "[shellscript]": { 5 | "editor.defaultFormatter": "foxundermoon.shell-format" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 120, 3 | "tabWidth": 2, 4 | "useTabs": false, 5 | "semi": false, 6 | "singleQuote": true, 7 | "arrowParens": "avoid", 8 | "jaxbracketSameLine": false, 9 | "trailingComma": "none" 10 | } 11 | -------------------------------------------------------------------------------- /src/renderer/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; 3 | margin: 0; 4 | padding: 0; 5 | height: 100vh; 6 | overflow: hidden; 7 | } 8 | 9 | #app { 10 | height: 100%; 11 | } 12 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Font Mini 6 | 7 | 8 |
9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "module": "commonjs", 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "esModuleInterop": true, 8 | "noImplicitAny": true, 9 | "sourceMap": true, 10 | "baseUrl": ".", 11 | "outDir": "dist", 12 | "types": ["node"], 13 | "moduleResolution": "node", 14 | "resolveJsonModule": true 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/renderer/assets/css/common.css: -------------------------------------------------------------------------------- 1 | html, 2 | body, 3 | #app { 4 | margin: 0; 5 | padding: 0; 6 | height: 100%; 7 | } 8 | 9 | /* 深色模式样式 */ 10 | .dark { 11 | background-color: #1d1e1f; 12 | color: #ffffff; 13 | } 14 | 15 | .dark ::-webkit-scrollbar { 16 | width: 6px; 17 | } 18 | 19 | .dark ::-webkit-scrollbar-thumb { 20 | background-color: #555; 21 | border-radius: 3px; 22 | } 23 | 24 | .dark ::-webkit-scrollbar-track { 25 | background-color: #2d2d2d; 26 | } -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: [ 3 | 'eslint:recommended', 4 | 'plugin:vue/vue3-recommended', 5 | 'plugin:@typescript-eslint/recommended', 6 | 'prettier' 7 | ], 8 | rules: { 9 | // 降低缩进错误严重性为警告,并设置缩进为2个空格 10 | 'indent': ['warn', 2], 11 | // 允许在Vue模板中使用任意缩进 12 | 'vue/html-indent': ['warn', 2, { 13 | 'attribute': 1, 14 | 'baseIndent': 1, 15 | 'caseIndent': true, 16 | 'closeBracket': 0, 17 | 'alignAttributesVertical': false, 18 | 'ignores': [] 19 | }] 20 | } 21 | }; -------------------------------------------------------------------------------- /src/renderer/renderer.ts: -------------------------------------------------------------------------------- 1 | import { createApp } from 'vue' 2 | import App from './App.vue' 3 | import router from './router' 4 | // 引入 Element Plus 组件库和样式 5 | import ElementPlus from 'element-plus' 6 | import { createPinia } from 'pinia' 7 | import 'element-plus/dist/index.css' 8 | import piniaPluginPersistedstate from 'pinia-plugin-persistedstate' 9 | // 引入中文语言包 10 | import zhCn from 'element-plus/es/locale/lang/zh-cn' // 根据需要选择语言 11 | const app = createApp(App) 12 | const pinia = createPinia() 13 | pinia.use(piniaPluginPersistedstate) 14 | 15 | app.use(router) 16 | app.use(pinia) 17 | // 使用 Element Plus 18 | app.use(ElementPlus, { 19 | locale: zhCn 20 | }) 21 | 22 | app.mount('#app') 23 | -------------------------------------------------------------------------------- /vite.renderer.config.ts: -------------------------------------------------------------------------------- 1 | import type { ConfigEnv, UserConfig } from 'vite' 2 | import vue from '@vitejs/plugin-vue' 3 | import { defineConfig } from 'vite' 4 | import { pluginExposeRenderer } from './vite.base.config' 5 | 6 | // https://vitejs.dev/config 7 | export default defineConfig(env => { 8 | const forgeEnv = env as ConfigEnv<'renderer'> 9 | const { root, mode, forgeConfigSelf } = forgeEnv 10 | const name = forgeConfigSelf.name ?? '' 11 | 12 | return { 13 | root, 14 | mode, 15 | base: './', 16 | build: { 17 | outDir: `.vite/renderer/${name}` 18 | }, 19 | plugins: [pluginExposeRenderer(name), vue()], 20 | resolve: { 21 | preserveSymlinks: true 22 | }, 23 | clearScreen: false 24 | } as UserConfig 25 | }) 26 | -------------------------------------------------------------------------------- /vite.main.config.ts: -------------------------------------------------------------------------------- 1 | import type { ConfigEnv, UserConfig } from 'vite' 2 | import { defineConfig, mergeConfig } from 'vite' 3 | import { getBuildConfig, getBuildDefine, external, pluginHotRestart } from './vite.base.config' 4 | 5 | // https://vitejs.dev/config 6 | export default defineConfig(env => { 7 | const forgeEnv = env as ConfigEnv<'build'> 8 | const { forgeConfigSelf } = forgeEnv 9 | const define = getBuildDefine(forgeEnv) 10 | 11 | const config: UserConfig = { 12 | build: { 13 | lib: { 14 | entry: forgeConfigSelf.entry, 15 | fileName: () => '[name].js', 16 | formats: ['cjs'] 17 | }, 18 | rollupOptions: { 19 | external 20 | } 21 | }, 22 | plugins: [pluginHotRestart('restart')], 23 | define, 24 | resolve: { 25 | // Load the Node.js entry. 26 | mainFields: ['module', 'jsnext:main', 'jsnext'] 27 | } 28 | } 29 | 30 | return mergeConfig(getBuildConfig(forgeEnv), config) 31 | }) 32 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "env": { 4 | "browser": true, 5 | "es6": true, 6 | "node": true 7 | }, 8 | "extends": [ 9 | "eslint:recommended", 10 | "plugin:@typescript-eslint/eslint-recommended", 11 | "plugin:@typescript-eslint/recommended", 12 | "plugin:import/recommended", 13 | "plugin:import/electron", 14 | "plugin:import/typescript", 15 | "plugin:vue/vue3-recommended" 16 | ], 17 | "parser": "vue-eslint-parser", 18 | "parserOptions": { 19 | "ecmaVersion": 2020, 20 | "parser": "@typescript-eslint/parser", 21 | "sourceType": "module" 22 | }, 23 | "rules": { 24 | "max-len": ["error", { "code": 120 }], 25 | "indent": ["error", 2, { "SwitchCase": 1 }], 26 | "semi": ["error", "never"], 27 | "quotes": ["error", "single"], 28 | "arrow-parens": ["error", "as-needed"], 29 | "brace-style": ["error", "1tbs"], 30 | "comma-dangle": ["error", "never"], 31 | "@typescript-eslint/no-explicit-any": "off" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/renderer/views/Work.vue: -------------------------------------------------------------------------------- 1 | 14 | 15 | 20 | 21 | -------------------------------------------------------------------------------- /vite.preload.config.ts: -------------------------------------------------------------------------------- 1 | import type { ConfigEnv, UserConfig } from 'vite' 2 | import { defineConfig, mergeConfig } from 'vite' 3 | import { getBuildConfig, external, pluginHotRestart } from './vite.base.config' 4 | 5 | // https://vitejs.dev/config 6 | export default defineConfig(env => { 7 | const forgeEnv = env as ConfigEnv<'build'> 8 | const { forgeConfigSelf } = forgeEnv 9 | 10 | const config: UserConfig = { 11 | build: { 12 | rollupOptions: { 13 | external, 14 | // Preload scripts may contain Web assets, so use the `build.rollupOptions.input` instead `build.lib.entry`. 15 | input: forgeConfigSelf.entry, 16 | output: { 17 | format: 'cjs', 18 | // It should not be split chunks. 19 | inlineDynamicImports: true, 20 | entryFileNames: '[name].js', 21 | chunkFileNames: '[name].js', 22 | assetFileNames: '[name].[ext]' 23 | } 24 | } 25 | }, 26 | plugins: [pluginHotRestart('reload')] 27 | } 28 | 29 | return mergeConfig(getBuildConfig(forgeEnv), config) 30 | }) 31 | -------------------------------------------------------------------------------- /src/main/logger.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | import path from 'path' 3 | import { app } from 'electron' 4 | 5 | // 创建日志目录和文件路径 6 | const logDir = path.join(app.getAppPath(), '..', '..', 'logs') 7 | const logFile = path.join(logDir, 'main.log') 8 | 9 | // 确保日志目录存在 10 | function ensureLogDir(): void { 11 | try { 12 | if (!fs.existsSync(logDir)) { 13 | fs.mkdirSync(logDir, { recursive: true }) 14 | } 15 | } catch (error) { 16 | // 如果创建日志目录失败,则只能打印到控制台 17 | console.error('Failed to create log directory:', error) 18 | } 19 | } 20 | 21 | // 写入日志的方法 22 | function writeLog(message: string): void { 23 | try { 24 | const timestamp = new Date().toISOString() 25 | const logMessage = `[${timestamp}] ${message}\n` 26 | 27 | ensureLogDir() 28 | fs.appendFileSync(logFile, logMessage, 'utf8') 29 | } catch (error) { 30 | // 如果写入日志失败,则只能打印到控制台 31 | console.error('Failed to write to log file:', error) 32 | } 33 | } 34 | 35 | // 在开发环境中导出额外的方法用于测试 36 | if (process.env.NODE_ENV === 'development') { 37 | Object.assign(global, { writeLog }) 38 | } 39 | 40 | export { writeLog, logFile } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 CZ600 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /forge.env.d.ts: -------------------------------------------------------------------------------- 1 | export {} // Make this a module 2 | 3 | declare global { 4 | // This allows TypeScript to pick up the magic constants that's auto-generated by Forge's Vite 5 | // plugin that tells the Electron app where to look for the Vite-bundled app code (depending on 6 | // whether you're running in development or production). 7 | const MAIN_WINDOW_VITE_DEV_SERVER_URL: string 8 | const MAIN_WINDOW_VITE_NAME: string 9 | 10 | namespace NodeJS { 11 | interface Process { 12 | // Used for hot reload after preload scripts. 13 | viteDevServers: Record; 14 | } 15 | } 16 | 17 | type VitePluginConfig = ConstructorParameters[0]; 18 | 19 | interface VitePluginRuntimeKeys { 20 | VITE_DEV_SERVER_URL: `${string}_VITE_DEV_SERVER_URL`; 21 | VITE_NAME: `${string}_VITE_NAME`; 22 | } 23 | } 24 | 25 | declare module 'vite' { 26 | interface ConfigEnv { 27 | root: string; 28 | forgeConfig: VitePluginConfig; 29 | forgeConfigSelf: VitePluginConfig[K][number]; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/renderer/views/ProofSet.vue: -------------------------------------------------------------------------------- 1 | 17 | 18 | 53 | 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | 9 | # Diagnostic reports (https://nodejs.org/api/report.html) 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | *.pid.lock 17 | .DS_Store 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # TypeScript v1 declaration files 40 | typings/ 41 | 42 | # TypeScript cache 43 | *.tsbuildinfo 44 | 45 | # Optional npm cache directory 46 | .npm 47 | 48 | # Optional eslint cache 49 | .eslintcache 50 | 51 | # Optional REPL history 52 | .node_repl_history 53 | 54 | # Output of 'npm pack' 55 | *.tgz 56 | 57 | # Yarn Integrity file 58 | .yarn-integrity 59 | 60 | # dotenv environment variables file 61 | .env 62 | .env.test 63 | 64 | # parcel-bundler cache (https://parceljs.org/) 65 | .cache 66 | 67 | # next.js build output 68 | .next 69 | 70 | # nuxt.js build output 71 | .nuxt 72 | 73 | # vuepress build output 74 | .vuepress/dist 75 | 76 | # Serverless directories 77 | .serverless/ 78 | 79 | # FuseBox cache 80 | .fusebox/ 81 | 82 | # DynamoDB Local files 83 | .dynamodb/ 84 | 85 | # Webpack 86 | .webpack/ 87 | 88 | # Vite 89 | .vite/ 90 | 91 | # Electron-Forge 92 | out/ 93 | 94 | dist-electron-builder/ 95 | dist/ 96 | -------------------------------------------------------------------------------- /.versionrc: -------------------------------------------------------------------------------- 1 | { 2 | "header": "# 更新日志 \n\n", 3 | "types": [ 4 | { 5 | "type": "feat", 6 | "section": "✨ Features | 新功能", 7 | "hidden": false 8 | }, 9 | { 10 | "type": "fix", 11 | "section": "🐛 Bug Fixes | Bug 修复", 12 | "hidden": false 13 | }, 14 | { 15 | "type": "init", 16 | "section": "🎉 Init | 初始化", 17 | "hidden": true 18 | }, 19 | { 20 | "type": "docs", 21 | "section": "✏️ Documentation | 文档", 22 | "hidden": false 23 | }, 24 | { 25 | "type": "style", 26 | "section": "💄 Styles | 风格", 27 | "hidden": true 28 | }, 29 | { 30 | "type": "refactor", 31 | "section": "♻️ Code Refactoring | 代码重构", 32 | "hidden": true 33 | }, 34 | { 35 | "type": "perf", 36 | "section": "⚡ Performance Improvements | 性能优化", 37 | "hidden": true 38 | }, 39 | { 40 | "type": "test", 41 | "section": "✅ Tests | 测试", 42 | "hidden": true 43 | }, 44 | { 45 | "type": "revert", 46 | "section": "⏪ Revert | 回退", 47 | "hidden": true 48 | }, 49 | { 50 | "type": "build", 51 | "section": "📦‍ Build System | 打包构建", 52 | "hidden": true 53 | }, 54 | { 55 | "type": "chore", 56 | "section": "🚀 Chore | 构建/工程依赖/工具", 57 | "hidden": true 58 | }, 59 | { 60 | "type": "ci", 61 | "section": "👷 Continuous Integration | CI 配置", 62 | "hidden": true 63 | } 64 | ] 65 | } 66 | -------------------------------------------------------------------------------- /src/renderer/stores/embeddingStore.ts: -------------------------------------------------------------------------------- 1 | // stores/embeddingStore.ts 2 | import { defineStore } from 'pinia' 3 | 4 | export interface EmbeddingAPIConfig { 5 | apiURL: string 6 | apiKey: string 7 | modelName: string 8 | } 9 | 10 | export const useEmbeddingStore = defineStore('embedding', { 11 | state: () => ({ 12 | config: { 13 | apiURL: '', 14 | apiKey: '', 15 | modelName: '' 16 | } as EmbeddingAPIConfig, 17 | activeRepositoryName: '' // 记录正在查看的仓库名称 18 | }), 19 | 20 | getters: { 21 | getAPIConfig: (state) => state.config, 22 | getAPIURL: (state) => state.config.apiURL, 23 | getAPIKey: (state) => state.config.apiKey, 24 | getModelName: (state) => state.config.modelName, 25 | getActiveRepositoryName: (state) => state.activeRepositoryName, 26 | isConfigured: (state) => state.config.apiURL && state.config.apiKey && state.config.modelName 27 | }, 28 | 29 | actions: { 30 | setConfig(config: EmbeddingAPIConfig) { 31 | this.config = { ...config } 32 | }, 33 | 34 | setAPIURL(url: string) { 35 | this.config.apiURL = url 36 | }, 37 | 38 | setAPIKey(key: string) { 39 | this.config.apiKey = key 40 | }, 41 | 42 | setModelName(name: string) { 43 | this.config.modelName = name 44 | }, 45 | 46 | setActiveRepositoryName(name: string) { 47 | this.activeRepositoryName = name 48 | }, 49 | 50 | clearAll() { 51 | this.config.apiURL = '' 52 | this.config.apiKey = '' 53 | this.config.modelName = '' 54 | this.activeRepositoryName = '' 55 | } 56 | }, 57 | 58 | persist: { 59 | key: 'embeddingConfig', 60 | storage: localStorage, 61 | paths: ['config', 'activeRepositoryName'] 62 | } 63 | }) 64 | -------------------------------------------------------------------------------- /src/renderer/router/index.js: -------------------------------------------------------------------------------- 1 | import { createRouter, createWebHistory } from 'vue-router' 2 | import Home from '../views/Home.vue' 3 | import About from '../views/About.vue' 4 | import Work from '../views/Work.vue' 5 | import APISet from '../views/APISet.vue' 6 | import Proof from '../views/Proof.vue' 7 | import ProofSet from '../views/ProofSet.vue' 8 | import History from '../views/history.vue' 9 | import Logs from '../views/logs.vue' 10 | import Dictionary from '../views/Dictionary.vue' 11 | import OutputWord from '../views/OutputWord.vue' 12 | import path from 'path' 13 | const routes = [ 14 | { 15 | path: '/', 16 | name: 'Home', 17 | redirect: '/work/proof' 18 | }, 19 | { 20 | path: '/about', 21 | name: 'About', 22 | component: About 23 | }, 24 | { 25 | path: '/work', 26 | name: 'Work', 27 | component: Work, 28 | redirect: '/work/proof', 29 | children: [ 30 | { 31 | path: 'api', 32 | name: 'APISet', 33 | component: APISet 34 | }, 35 | { 36 | path: 'proof', 37 | name: 'Proof', 38 | component: Proof 39 | }, 40 | { 41 | path: 'set', 42 | name: 'Set', 43 | component: ProofSet 44 | }, 45 | { 46 | path: 'history', 47 | name: 'History', 48 | component: History 49 | }, 50 | { 51 | path: 'logs', 52 | name: 'Logs', 53 | component: Logs 54 | }, 55 | { 56 | path: 'dictionary', 57 | name: 'Dictionary', 58 | component: Dictionary 59 | }, 60 | { 61 | path: 'ouputWord', 62 | name: 'OutputWord', 63 | component: OutputWord 64 | } 65 | ] 66 | } 67 | 68 | // 动态路由示例 69 | ] 70 | 71 | const router = createRouter({ 72 | history: createWebHistory(), 73 | routes 74 | }) 75 | 76 | export default router 77 | -------------------------------------------------------------------------------- /changelog.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | disableEmoji: false, 3 | list: ['test', 'feat', 'fix', 'chore', 'docs', 'refactor', 'style', 'ci', 'perf', 'release', 'revert', 'build'], 4 | maxMessageLength: 64, 5 | minMessageLength: 3, 6 | questions: ['type', 'scope', 'subject', 'body', 'breaking', 'issues', 'lerna'], 7 | scopes: [], 8 | types: { 9 | chore: { 10 | description: 'Chore | 构建/工程依赖/工具', 11 | emoji: '🚀', // 当前类型的commit所显示的表情 12 | value: 'chore' 13 | }, 14 | ci: { 15 | description: 'Continuous Integration | CI 配置', 16 | emoji: '👷', 17 | value: 'ci' 18 | }, 19 | docs: { 20 | description: 'Documentation | 文档', 21 | emoji: '✏️ ', 22 | value: 'docs' 23 | }, 24 | feat: { 25 | description: 'Features | 新功能', 26 | emoji: '✨', 27 | value: 'feat' 28 | }, 29 | fix: { 30 | description: 'Bug Fixes | Bug 修复', 31 | emoji: '🐛', 32 | value: 'fix' 33 | }, 34 | perf: { 35 | description: 'Performance Improvements | 性能优化', 36 | emoji: '⚡', 37 | value: 'perf' 38 | }, 39 | refactor: { 40 | description: 'Code Refactoring | 代码重构', 41 | emoji: '♻️ ', 42 | value: 'refactor' 43 | }, 44 | release: { 45 | description: 'Create a release commit | 发版提交', 46 | emoji: '🏹', 47 | value: 'release' 48 | }, 49 | style: { 50 | description: 'Styles | 风格', 51 | emoji: '💄', 52 | value: 'style' 53 | }, 54 | revert: { 55 | description: 'Revert | 回退', 56 | emoji: '⏪', 57 | value: 'revert' 58 | }, 59 | build: { 60 | description: 'Build System | 打包构建', 61 | emoji: '📦', 62 | value: 'build' 63 | }, 64 | test: { 65 | description: 'Tests | 测试', 66 | emoji: '✅', 67 | value: 'test' 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/renderer/stores/apiStore.ts: -------------------------------------------------------------------------------- 1 | // stores/apiStore.ts 2 | import { defineStore } from 'pinia' 3 | import { reactive } from 'vue' 4 | 5 | interface ApiSettings { 6 | id: number | null 7 | URL: string 8 | key: string 9 | name: string 10 | time: string 11 | parallel: number 12 | TimeLimit: number | null 13 | total_tokens: number 14 | customPrompt: string | null 15 | } 16 | 17 | // 默认值作为常量,便于维护 18 | const defaultApiSettings: ApiSettings = { 19 | id: null, 20 | URL: '', 21 | key: '', 22 | name: '', 23 | time: '', 24 | parallel: 30, 25 | TimeLimit: null, 26 | total_tokens: 0, 27 | customPrompt: null 28 | } 29 | 30 | export const useApiStore = defineStore( 31 | 'apiSettings', 32 | () => { 33 | // 使用默认值初始化 34 | const selectedApi = reactive({ ...defaultApiSettings }) 35 | 36 | function setSelectedApi(api: Partial) { 37 | Object.assign(selectedApi, api) 38 | } 39 | 40 | function clearSelectedApi() { 41 | Object.assign(selectedApi, defaultApiSettings) 42 | } 43 | 44 | function setParallel(parallelSet: number) { 45 | selectedApi.parallel = parallelSet 46 | } 47 | 48 | function setTimeLimit(TimeLimit: number | null) { 49 | selectedApi.TimeLimit = TimeLimit 50 | } 51 | 52 | function setTotalTokens(total_tokens: number) { 53 | selectedApi.total_tokens = total_tokens 54 | } 55 | 56 | function addTotalTokens(new_tokens: number) { 57 | selectedApi.total_tokens += new_tokens 58 | } 59 | 60 | function setCustomPrompt(prompt: string | null) { 61 | selectedApi.customPrompt = prompt 62 | } 63 | 64 | return { 65 | selectedApi, 66 | setSelectedApi, 67 | clearSelectedApi, 68 | setParallel, 69 | setTimeLimit, 70 | setTotalTokens, 71 | addTotalTokens, 72 | setCustomPrompt 73 | } 74 | }, 75 | { 76 | persist: { 77 | key: 'apiSettings', 78 | storage: localStorage, 79 | pick: ['selectedApi'] // 明确指定需要持久化的路径 80 | } 81 | } 82 | ) 83 | -------------------------------------------------------------------------------- /forge.config.ts: -------------------------------------------------------------------------------- 1 | import type { ForgeConfig } from '@electron-forge/shared-types' 2 | import { MakerSquirrel } from '@electron-forge/maker-squirrel' 3 | import { MakerZIP } from '@electron-forge/maker-zip' 4 | import { MakerDeb } from '@electron-forge/maker-deb' 5 | import { MakerRpm } from '@electron-forge/maker-rpm' 6 | import { VitePlugin } from '@electron-forge/plugin-vite' 7 | import { FusesPlugin } from '@electron-forge/plugin-fuses' 8 | import { FuseV1Options, FuseVersion } from '@electron/fuses' 9 | import path from 'path' 10 | import { AutoUnpackNativesPlugin } from '@electron-forge/plugin-auto-unpack-natives' 11 | 12 | const config: ForgeConfig = { 13 | packagerConfig: { 14 | asar: true, 15 | icon: path.resolve(__dirname, 'assets/logo.ico'), 16 | name: 'AutoDocxProofreading', 17 | executableName: 'DocxProofread', 18 | extraResource: ['node_modules/@lancedb/lancedb-win32-x64-msvc'] 19 | }, 20 | rebuildConfig: {}, 21 | makers: [ 22 | new MakerSquirrel({ 23 | name: 'AutoDocxProofreading', 24 | authors: 'CZ600', 25 | iconUrl: 'https://raw.githubusercontent.com/CZ600/AutoDocxProofread/base_url/assets/logo.ico', 26 | setupIcon: 'assets/logo.ico' 27 | }), 28 | new MakerZIP({}, ['darwin']), 29 | new MakerRpm({}), 30 | new MakerDeb({}) 31 | ], 32 | plugins: [ 33 | new VitePlugin({ 34 | // `build` can specify multiple entry builds, which can be Main process, Preload scripts, Worker process, etc. 35 | // If you are familiar with Vite configuration, it will look really familiar. 36 | build: [ 37 | { 38 | // `entry` is just an alias for `build.lib.entry` in the corresponding file of `config`. 39 | entry: 'src/main/main.ts', 40 | config: 'vite.main.config.ts' 41 | }, 42 | { 43 | entry: 'src/main/preload.ts', 44 | config: 'vite.preload.config.ts' 45 | } 46 | ], 47 | renderer: [ 48 | { 49 | name: 'main_window', 50 | config: 'vite.renderer.config.ts' 51 | } 52 | ] 53 | }), 54 | // Fuses are used to enable/disable various Electron functionality 55 | // at package time, before code signing the application 56 | new FusesPlugin({ 57 | version: FuseVersion.V1, 58 | [FuseV1Options.RunAsNode]: false, 59 | [FuseV1Options.EnableCookieEncryption]: true, 60 | [FuseV1Options.EnableNodeOptionsEnvironmentVariable]: false, 61 | [FuseV1Options.EnableNodeCliInspectArguments]: false, 62 | [FuseV1Options.EnableEmbeddedAsarIntegrityValidation]: true, 63 | [FuseV1Options.OnlyLoadAppFromAsar]: true 64 | }) 65 | ] 66 | } 67 | 68 | export default config 69 | -------------------------------------------------------------------------------- /src/renderer/stores/store.ts: -------------------------------------------------------------------------------- 1 | // stores/store.ts 2 | import { defineStore, getActivePinia } from 'pinia' 3 | 4 | export interface CorrectionResult { 5 | applied: boolean 6 | id: string 7 | original: string 8 | reason: string 9 | suggested: string 10 | type: string 11 | } 12 | 13 | export const fileInfoStore = defineStore('fileInfo', { 14 | state: () => ({ 15 | filePath: '', 16 | fileName: '', 17 | proofModel: '', 18 | results: [] as CorrectionResult[] 19 | }), 20 | 21 | getters: { 22 | getFilePath: state => state.filePath, 23 | getFileName: state => state.fileName, 24 | getProofModel: state => state.proofModel, 25 | getResults: state => state.results, 26 | isFilePathEmpty: state => !state.filePath, 27 | isFileNameEmpty: state => !state.fileName, 28 | isProofModelEmpty: state => !state.proofModel, 29 | isResultsEmpty: state => state.results.length === 0 30 | }, 31 | 32 | actions: { 33 | setFilePath(filePath: string) { 34 | this.filePath = filePath 35 | }, 36 | setFileName(fileName: string) { 37 | this.fileName = fileName 38 | }, 39 | setProofModel(proofModel: string) { 40 | this.proofModel = proofModel 41 | }, 42 | setCorrectResult(results: CorrectionResult[]) { 43 | this.results = results 44 | }, 45 | clearAll() { 46 | this.filePath = '' 47 | this.fileName = '' 48 | this.proofModel = '' 49 | this.results = [] 50 | } 51 | }, 52 | 53 | // ✅ 关键:启用持久化,字段名必须和 state 一致 54 | persist: { 55 | key: 'fileInfo', 56 | storage: localStorage, 57 | paths: ['filePath', 'fileName', 'proofModel', 'results'] // ✅ 确保这四个字段都包含 58 | } 59 | }) 60 | 61 | export const embeddingSet = defineStore('embeddingSet', { 62 | state: () => ({ 63 | ActiveRepositoryName: '', // 记录正在查看的仓库名称 64 | apiURL: '', // api设置等 65 | apiKey: '', 66 | modelName: '' 67 | }), 68 | getters: { 69 | getActive: state => state.ActiveRepositoryName, 70 | getAPIURL: state => state.apiURL, 71 | getAPIKey: state => state.apiKey, 72 | getModelName: state => state.modelName 73 | }, 74 | actions: { 75 | setActive(activeName: string) { 76 | this.ActiveRepositoryName = activeName 77 | }, 78 | setURL(URL: string) { 79 | this.apiURL = URL 80 | }, 81 | setKey(Key: string) { 82 | this.apiKey = Key 83 | }, 84 | setModelName(Name: string) { 85 | this.modelName = Name 86 | }, 87 | clearAll() { 88 | this.ActiveRepositoryName = '' 89 | this.apiKey = '' 90 | this.apiURL = '' 91 | this.modelName = '' 92 | } 93 | }, 94 | // ✅ 关键:启用持久化,字段名必须和 state 一致 95 | persist: { 96 | key: 'embeddingSet', 97 | storage: localStorage, 98 | paths: ['ActiveRepositoryName', 'apiURL', 'apiKey', 'modelName'] // ✅ 确保这四个字段都包含 99 | } 100 | }) -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "AutoDocxProofreading", 3 | "productName": "AutoDocxProofreading", 4 | "version": "1.1.3", 5 | "description": "An intelligent long-document proofreading software powered by large language models. 基于大模型的简单的docx格式文档的校对", 6 | "main": ".vite/build/main.js", 7 | "scripts": { 8 | "start": "electron-forge start", 9 | "package": "electron-forge package", 10 | "make": "electron-forge make", 11 | "publish": "electron-forge publish", 12 | "release-major": "standard-version --release-as major", 13 | "release-minor": "standard-version --release-as minor", 14 | "release-patch": "standard-version --release-as patch", 15 | "lint:fix": "eslint --ext .js,.ts,.vue . --fix", 16 | "commit": "git add . && git-cz" 17 | }, 18 | "eslintConfig": { 19 | "extends": "./.eslintrc.js" 20 | }, 21 | "devDependencies": { 22 | "@commitlint/cli": "^19.3.0", 23 | "@commitlint/config-conventional": "^19.2.2", 24 | "@electron-forge/cli": "^7.4.0", 25 | "@electron-forge/maker-deb": "^7.4.0", 26 | "@electron-forge/maker-rpm": "^7.4.0", 27 | "@electron-forge/maker-squirrel": "^7.4.0", 28 | "@electron-forge/maker-wix": "^7.10.2", 29 | "@electron-forge/maker-zip": "^7.4.0", 30 | "@electron-forge/plugin-auto-unpack-natives": "^7.10.2", 31 | "@electron-forge/plugin-fuses": "^7.4.0", 32 | "@electron-forge/plugin-vite": "^7.4.0", 33 | "@electron/fuses": "^1.8.0", 34 | "@typescript-eslint/eslint-plugin": "^5.0.0", 35 | "@typescript-eslint/parser": "^5.0.0", 36 | "@vitejs/plugin-vue": "^5.0.4", 37 | "electron": "30.0.2", 38 | "eslint": "^8.0.1", 39 | "eslint-plugin-import": "^2.25.0", 40 | "eslint-plugin-vue": "^9.25.0", 41 | "git-cz": "^4.9.0", 42 | "husky": "^9.0.11", 43 | "lint-staged": "^15.2.2", 44 | "prettier": "^3.2.5", 45 | "standard-version": "^9.5.0", 46 | "ts-node": "^10.0.0", 47 | "typescript": "~4.5.4", 48 | "vite": "^5.0.12" 49 | }, 50 | "keywords": [], 51 | "author": { 52 | "name": "CZ600", 53 | "url": "https://github.com/CZ600" 54 | }, 55 | "license": "MIT", 56 | "dependencies": { 57 | "@google/genai": "^1.17.0", 58 | "@google/generative-ai": "^0.24.1", 59 | "@lancedb/lancedb": "^0.22.1", 60 | "@types/node": "^24.3.1", 61 | "@vueuse/core": "^14.0.0", 62 | "apache-arrow": "^18.1.0", 63 | "axios": "^1.11.0", 64 | "bottleneck": "^2.19.5", 65 | "cli-progress": "^3.12.0", 66 | "docx": "^9.5.1", 67 | "docx-preview": "^0.3.6", 68 | "docxtemplater": "^3.66.3", 69 | "electron-squirrel-startup": "^1.0.0", 70 | "element-plus": "^2.11.1", 71 | "fast-xml-parser": "^5.2.5", 72 | "file-saver": "^2.0.5", 73 | "jszip": "^3.10.1", 74 | "mammoth": "^1.11.0", 75 | "marked": "^16.3.0", 76 | "openai": "^5.19.1", 77 | "ora": "^8.2.0", 78 | "p-limit": "^7.1.1", 79 | "pdf-parse": "^2.1.7", 80 | "pinia": "^3.0.3", 81 | "pinia-plugin-persistedstate": "^4.5.0", 82 | "pizzip": "^3.2.0", 83 | "sqlite": "^5.1.1", 84 | "sqlite-vec": "^0.1.7-alpha.2", 85 | "sqlite3": "^5.1.7", 86 | "util": "^0.12.5", 87 | "uuid": "^13.0.0", 88 | "vue": "^3.4.27", 89 | "vue-demi": "^0.14.10", 90 | "vue-markdown": "^2.2.4", 91 | "vue-router": "^4.5.1" 92 | }, 93 | "lint-staged": { 94 | "*.{js,ts,vue}": "npm run lint:fix" 95 | }, 96 | "commitlint": { 97 | "extends": [ 98 | "@commitlint/config-conventional" 99 | ] 100 | }, 101 | "config": { 102 | "commitizen": { 103 | "path": "git-cz" 104 | } 105 | } 106 | } -------------------------------------------------------------------------------- /src/main/wordProcess.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs-extra' 2 | import JSZip from 'jszip' 3 | 4 | interface Replacement { 5 | original: string 6 | suggested: string 7 | } 8 | 9 | /** 10 | * 安全替换 Word 文档正文中的文本(仅处理 标签内的内容) 11 | * 1. 解码XML实体为可读文本 12 | * 2. 执行替换操作 13 | * 3. 重新编码为XML安全字符串 14 | * 4. 严格保留原始XML结构 15 | * 16 | * @param inputPath 原始 .docx 文件路径 17 | * @param outputPath 输出 .docx 文件路径 18 | * @param replacements 替换规则数组 19 | */ 20 | export async function replaceTextInDocx( 21 | inputPath: string, 22 | outputPath: string, 23 | replacements: Replacement[] 24 | ): Promise { 25 | // 1. 读取原始文件 26 | console.log('replace items:', replacements) 27 | const content = await fs.readFile(inputPath) 28 | const zip = await JSZip.loadAsync(content) 29 | 30 | // 2. 仅处理正文文件(关键改进:只关注正文) 31 | const filePath = 'word/document.xml' 32 | const file = zip.file(filePath) 33 | if (!file) { 34 | throw new Error(`[ERROR] 正文文件不存在: ${filePath}`) 35 | } 36 | 37 | try { 38 | let xmlStr = await file.async('text') 39 | if (typeof xmlStr !== 'string' || xmlStr.trim() === '') { 40 | throw new Error(`[ERROR] 正文文件为空: ${filePath}`) 41 | } 42 | 43 | // 辅助函数:XML实体解码(& → &) 44 | const decodeXmlEntities = (str: string): string => { 45 | return str 46 | .replace(/&/g, '&') 47 | .replace(//g, '>') 49 | .replace(/"/g, '"') 50 | .replace(/'/g, "'") 51 | } 52 | 53 | // 辅助函数:XML实体编码(& → &) 54 | const encodeXmlEntities = (str: string): string => { 55 | return str 56 | .replace(/&/g, '&') 57 | .replace(//g, '>') 59 | .replace(/"/g, '"') 60 | .replace(/'/g, ''') 61 | } 62 | 63 | // 辅助函数:正则特殊字符转义 64 | const escapeRegExp = (str: string): string => { 65 | return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') 66 | } 67 | 68 | // 核心:匹配所有 标签(关键改进:精准定位文本节点) 69 | const tTagRegex = /(]*>)([^<]*)(<\/w:t>)/g 70 | let hasReplaced = false 71 | 72 | const newXmlStr = xmlStr.replace(tTagRegex, (match, openTag, textContent, closeTag) => { 73 | // 步骤1: XML解码 → 用户可读文本 74 | const plainText = decodeXmlEntities(textContent) 75 | console.log(`[INFO] 正在处理文本: ${plainText}`) 76 | let replacedText = plainText 77 | 78 | // 步骤2: 应用替换规则(关键改进:在解码后的文本上操作) 79 | for (const { original, suggested } of replacements) { 80 | if (!original) continue 81 | 82 | // 严格转义原始字符串(防止正则注入) 83 | const safeOriginal = escapeRegExp(original) 84 | const regex = new RegExp(safeOriginal, 'g') 85 | 86 | const before = replacedText 87 | replacedText = replacedText.replace(regex, suggested) 88 | 89 | if (replacedText !== before) { 90 | hasReplaced = true 91 | } 92 | } 93 | 94 | // 步骤3: 重新编码为XML安全字符串 95 | const safeReplacedText = encodeXmlEntities(replacedText) 96 | 97 | // 步骤4: 重建原始XML结构 98 | return `${openTag}${safeReplacedText}${closeTag}` 99 | }) 100 | 101 | if (hasReplaced) { 102 | zip.file(filePath, newXmlStr) 103 | console.log(`[SUCCESS] 已替换正文文本: ${filePath}`) 104 | } else { 105 | console.log(`[INFO] 未找到匹配的替换内容: ${filePath}`) 106 | } 107 | 108 | // 4. 生成新 DOCX 文件 109 | const buffer = await zip.generateAsync({ 110 | type: 'nodebuffer', 111 | compression: 'DEFLATE', 112 | compressionOptions: { level: 6 } 113 | }) 114 | 115 | // 5. 写入输出文件 116 | await fs.writeFile(outputPath, buffer) 117 | console.log(`[INFO] 文档已保存至: ${outputPath}`) 118 | } catch (err) { 119 | console.error(`[FATAL] 处理正文文件失败: ${filePath}`, err) 120 | throw err 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /vite.base.config.ts: -------------------------------------------------------------------------------- 1 | import { builtinModules } from 'node:module' 2 | import type { AddressInfo } from 'node:net' 3 | import type { ConfigEnv, Plugin, UserConfig } from 'vite' 4 | import pkg from './package.json' 5 | 6 | export const builtins = ['electron', ...builtinModules.map(m => [m, `node:${m}`]).flat()] 7 | 8 | export const external = [ 9 | ...builtins, 10 | ...Object.keys('dependencies' in pkg ? (pkg.dependencies as Record) : {}), 11 | // 添加 LanceDB 相关的原生模块作为外部依赖 12 | '@lancedb/lancedb', 13 | '@lancedb/lancedb-win32-x64-msvc', 14 | '@lancedb/lancedb-darwin-arm64', 15 | '@lancedb/lancedb-darwin-x64', 16 | '@lancedb/lancedb-linux-arm64-gnu', 17 | '@lancedb/lancedb-linux-x64-gnu' 18 | ] 19 | 20 | export function getBuildConfig(env: ConfigEnv<'build'>): UserConfig { 21 | const { root, mode, command } = env 22 | 23 | return { 24 | root, 25 | mode, 26 | build: { 27 | // Prevent multiple builds from interfering with each other. 28 | emptyOutDir: false, 29 | // 🚧 Multiple builds may conflict. 30 | outDir: '.vite/build', 31 | watch: command === 'serve' ? {} : null, 32 | minify: command === 'build' 33 | }, 34 | clearScreen: false 35 | } 36 | } 37 | 38 | export function getDefineKeys(names: string[]) { 39 | const define: { [name: string]: VitePluginRuntimeKeys } = {} 40 | 41 | return names.reduce((acc, name) => { 42 | const NAME = name.toUpperCase() 43 | const keys: VitePluginRuntimeKeys = { 44 | VITE_DEV_SERVER_URL: `${NAME}_VITE_DEV_SERVER_URL`, 45 | VITE_NAME: `${NAME}_VITE_NAME` 46 | } 47 | 48 | return { ...acc, [name]: keys } 49 | }, define) 50 | } 51 | 52 | export function getBuildDefine(env: ConfigEnv<'build'>) { 53 | const { command, forgeConfig } = env 54 | const names = forgeConfig.renderer.filter(({ name }) => name != null).map(({ name }) => name) 55 | const defineKeys = getDefineKeys(names) 56 | const define = Object.entries(defineKeys).reduce( 57 | (acc, [name, keys]) => { 58 | const { VITE_DEV_SERVER_URL, VITE_NAME } = keys 59 | const def = { 60 | [VITE_DEV_SERVER_URL]: command === 'serve' ? JSON.stringify(process.env[VITE_DEV_SERVER_URL]) : undefined, 61 | [VITE_NAME]: JSON.stringify(name) 62 | } 63 | return { ...acc, ...def } 64 | }, 65 | {} as Record 66 | ) 67 | 68 | return define 69 | } 70 | 71 | export function pluginExposeRenderer(name: string): Plugin { 72 | const { VITE_DEV_SERVER_URL } = getDefineKeys([name])[name] 73 | 74 | return { 75 | name: '@electron-forge/plugin-vite:expose-renderer', 76 | configureServer(server) { 77 | process.viteDevServers ??= {} 78 | // Expose server for preload scripts hot reload. 79 | process.viteDevServers[name] = server 80 | 81 | server.httpServer?.once('listening', () => { 82 | const addressInfo = server.httpServer?.address() as AddressInfo 83 | // Expose env constant for main process use. 84 | process.env[VITE_DEV_SERVER_URL] = `http://localhost:${addressInfo?.port}` 85 | }) 86 | } 87 | } 88 | } 89 | 90 | export function pluginHotRestart(command: 'reload' | 'restart'): Plugin { 91 | return { 92 | name: '@electron-forge/plugin-vite:hot-restart', 93 | closeBundle() { 94 | if (command === 'reload') { 95 | for (const server of Object.values(process.viteDevServers)) { 96 | // Preload scripts hot reload. 97 | server.ws.send({ type: 'full-reload' }) 98 | } 99 | } else { 100 | // Main process hot restart. 101 | // https://github.com/electron/forge/blob/v7.2.0/packages/api/core/src/api/start.ts#L216-L223 102 | process.stdin.emit('data', 'rs') 103 | } 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/renderer/views/history.vue: -------------------------------------------------------------------------------- 1 | 34 | 35 | 118 | 119 | -------------------------------------------------------------------------------- /src/main/main.ts: -------------------------------------------------------------------------------- 1 | import { app, BrowserWindow, session } from 'electron' // app是必须引入的, 2 | import path from 'path' 3 | 4 | // 将LanceDB原生模块路径添加到PATH环境变量,确保运行时能正确加载 5 | 6 | import { registerIpcHandlers } from './ipcHandlers' 7 | import { initLanceDB } from './lancedb' 8 | // main.js 或主进程中的其他文件 9 | // main.js 或打包入口 10 | 11 | // 为pdf-parse库提供浏览器API的polyfill 12 | // 为了在nodejs环境下正常使用pdf-parse库而添加的 13 | if (typeof (global as any).DOMMatrix === 'undefined') { 14 | ;(global as any).DOMMatrix = class DOMMatrix { 15 | constructor() { 16 | // 空实现 17 | } 18 | } 19 | } 20 | 21 | if (typeof (global as any).ImageData === 'undefined') { 22 | ;(global as any).ImageData = class ImageData { 23 | constructor() { 24 | // 空实现 25 | } 26 | } 27 | } 28 | 29 | if (typeof (global as any).Path2D === 'undefined') { 30 | ;(global as any).Path2D = class Path2D { 31 | constructor() { 32 | // 空实现 33 | } 34 | } 35 | } 36 | 37 | // Handle creating/removing shortcuts on Windows when installing/uninstalling. 38 | if (require('electron-squirrel-startup')) { 39 | app.quit() 40 | } 41 | 42 | const createWindow = () => { 43 | // Create the browser window. 44 | const mainWindow = new BrowserWindow({ 45 | width: 1300, 46 | height: 1200, 47 | title: 'AutoDocxProofreading', 48 | // autoHideMenuBar: true, // 禁用菜单栏 49 | icon: path.join(process.resourcesPath, 'assets', 'logo.ico'), 50 | 51 | ...(process.platform === 'linux' ? { icon: path.join(process.resourcesPath, 'assets', 'logo.ico') } : {}), 52 | 53 | webPreferences: { 54 | preload: path.join(__dirname, 'preload.js'), 55 | nodeIntegration: false, 56 | contextIsolation: true 57 | }, 58 | // 设置窗口样式 59 | // remove the default titlebar 60 | titleBarStyle: 'hidden', 61 | // expose window controls in Windows/Linux 62 | ...(process.platform !== 'darwin' ? { titleBarOverlay: true } : {}), 63 | titleBarOverlay: { 64 | color: 'rgba(255, 255, 255, 0)', 65 | symbolColor: '#807e85ff', 66 | height: 60 67 | } 68 | }) 69 | 70 | // load the index.html of the app. 71 | if (MAIN_WINDOW_VITE_DEV_SERVER_URL) { 72 | mainWindow.loadURL(MAIN_WINDOW_VITE_DEV_SERVER_URL) 73 | // Open the DevTools. 74 | mainWindow.webContents.openDevTools() 75 | } else { 76 | mainWindow.loadFile(path.join(__dirname, `../renderer/${MAIN_WINDOW_VITE_NAME}/index.html`)) 77 | } 78 | } 79 | 80 | app.whenReady().then(async () => { 81 | // 当应用准备好之后,回调函数 82 | console.log('app is ready') 83 | console.log('then will create a window') 84 | 85 | console.log('中文测试') 86 | createWindow() 87 | 88 | // 设置 Content-Security-Policy(CSP),跨站脚本攻击 (XSS) 和其他代码注入攻击 89 | session.defaultSession.webRequest.onHeadersReceived((details, callback) => { 90 | callback({ 91 | responseHeaders: { 92 | ...details.responseHeaders, 93 | 'Content-Security-Policy': ["script-src 'self'"] 94 | } 95 | }) 96 | }) 97 | // 当窗口被激活的时候,要判断是否有窗口打开,如果没有打开,那么就创建一个窗口(也是针对苹果系统作出的优化) 98 | app.on('activate', () => { 99 | // On macOS it's common to re-create a window in the app when the 100 | // dock icon is clicked and there are no other windows open. 101 | if (BrowserWindow.getAllWindows().length === 0) { 102 | createWindow() 103 | } 104 | }) 105 | 106 | // 判断是否为开发环境 107 | const isDev = MAIN_WINDOW_VITE_DEV_SERVER_URL !== undefined 108 | 109 | let nativeModulePath 110 | if (isDev) { 111 | // 开发环境:假设原生模块在项目根目录的 resources/ 下 112 | // const projectRoot = app.getAppPath(); // 项目根目录 113 | // nativeModulePath = path.join(projectRoot, 'resources', 'lancedb-win32-x64-msvc'); 114 | // 开发环境不设置 115 | } else { 116 | // 生产环境:原生模块应位于 resources/ 目录下(且需 unpacked) 117 | const installDir = path.dirname(app.getPath('exe')) 118 | const resourcesPath = path.join(installDir, 'resources') 119 | nativeModulePath = path.join(resourcesPath, 'lancedb-win32-x64-msvc') 120 | } 121 | 122 | process.env.LANCEDB_NATIVE_PATH = nativeModulePath 123 | process.env.PATH = `${nativeModulePath};${process.env.PATH}` 124 | 125 | try { 126 | await initLanceDB() 127 | console.log('LanceDB initialized successfully') 128 | } catch (error) { 129 | console.error('Failed to initialize LanceDB:', error) 130 | } 131 | }) 132 | 133 | // Quit when all windows are closed, except on macOS. There, it's common 134 | // for applications and their menu bar to stay active until the user quits 135 | // explicitly with Cmd + Q. 136 | app.on('window-all-closed', () => { 137 | // 当所有的窗口都关闭的时候并且不是macos的时候,那么关闭软件 138 | if (process.platform !== 'darwin') { 139 | app.quit() 140 | } 141 | }) 142 | 143 | registerIpcHandlers() 144 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoDocxProof 智能文档校对应用 2 | 3 |

4 | Logo 5 |

6 | 7 |

8 | 一款基于 Electron、Vue 3 和 TypeScript 构建的智能长文档校对桌面应用程序 9 |

10 | 11 | ## 📝 项目简介 12 | 13 | AutoDocxProofread(智能校对)是一款专为长文档校对而设计的桌面应用程序。它能够帮助用户有效检测 Word 文档中的错别字、标点符号错误、语法问题和文本一致性问题,并提供修改建议。 14 | 15 | 针对大模型在处理长文档时存在的遗忘和幻觉问题,软件设计了专门的架构来增强校对的准确性,并能直接导出校对后的文档。并且软件采用了并行处理架构,显著提升大模型处理长文档的速度。新版本引入了本地知识库功能,支持RAG功能给模型校对参考。 16 | 17 | ### 更新记录 18 | 19 | - v1.1.3 20 | - 新增了请求频率限制,优化对于第三方api转接站的支持 21 | - 优化深色模式显示效果 22 | - 新增token用量统计 23 | - 优化界面效果,改善交互逻辑 24 | - v1.1.2 25 | - 修复全文润色模式中,不使用rag功能时无法正常校对的bug 26 | - 新增昼夜模式切换功能 27 | - 模型并发限制可调,可以适应不同的api供应商对于并发的限制要求 28 | - v1.1.1 29 | - 修复rag功能可用性bug 30 | - v1.1.0 31 | - 重构界面,优化使用逻辑 32 | - 提升软件可用性 33 | 34 | ### 核心功能与软件优势 35 | 36 | - **多种校对模式**: 37 | - 逐句精校:适合需要高精度校对的短文本 38 | - 逐段校正:适合长篇文献的校对 39 | - 全文润色:对整篇文档进行语言润色和优化 40 | 41 | - **智能错误识别**: 42 | - 错别字检测 43 | - 标点符号错误识别 44 | - 语法问题检测 45 | 46 | - **知识库系统**: 47 | - 创建和管理多个本地知识库 48 | - 支持PDF、word和txt文档导入作为参考材料 49 | - 基于向量数据库的RAG检索增强生成算法 50 | 51 | - **更快的处理速度和用户友好的操作体验**: 52 | - 使用并行处理的方式优化处理效率,显著提升对于长文本的校对速度 53 | - 清晰的错误展示和修改建议 54 | - 一键应用修改建议 55 | - 响应式设计,支持窗口缩放 56 | 57 | - **便捷的 API 配置管理**: 58 | - 兼容openai接口,支持多种大语言模型 API 59 | - 灵活的 API 配置管理 60 | - 支持对于并发数量和请求速度的设置 61 | 62 | - **清晰的历史记录管理**: 63 | - 清晰查看历史记录,包括时间、校对模型、校对文件路径和具体的结果 64 | - 支持对结果的批量管理 65 | 66 | ### 使用展示 67 | 68 | 用户需要先在功能设置页面选择一个大模型后再开始校对操作。在文档校对页面,首先选择需要校对的文档后,再选择校对模式,选择使用的知识库(非必选),然后开始校对。软件会将校对的结果显示在右边栏,并在文本中高亮展示,以方便查看。然后可以选择是否接受这些修改,可以导出接受修改后的文档: 69 | 70 | ![文档校对页面](首页白.jpg) 71 | 72 | 本应用可以自行设置api,兼容满足openai规范的api接口,推荐使用非推理模型,并且可以限制并发请求数量: 73 | 74 | ![设置界面](新版设置页面.jpg) 75 | ![提示词设置](提示词设置.jpg) 76 | 77 | 本应用还可以浏览和管理校对记录: 78 | 79 | ![历史记录界面](记录管理页面.jpg) 80 | 81 | 知识库管理界面: 82 | 83 | ![知识库管理](知识库页面2.jpg) 84 | 85 | 昼夜模式切换效果: 86 | 87 | ![深色主题](首页黑.jpg) 88 | 89 | > 注意:校对结果的准确度很大程度上取决于模型能力,软件无法保证校对结果的完全准确,还需要人工再次检验。 90 | 91 | > 提示1:结果导出功能尚不完善,无法精准的将所有的结果应用到文档中,可能存在疏漏。 92 | 93 | > 提示2:全文润色功能适合较短篇幅的文档。逐句校对对token的消耗很大。 94 | 95 | ## 🛠 技术栈 96 | 97 | - **主框架**:[Electron](https://www.electronjs.org/) + [Vue 3](https://vuejs.org/) + [TypeScript](https://www.typescriptlang.org/) 98 | - **UI 组件库**:[Element Plus](https://element-plus.org/) 99 | - **构建工具**:[Vite](https://vitejs.dev/) + [Electron Forge](https://www.electronforge.io/) 100 | - **文档处理**:[Mammoth](https://github.com/mwilliamson/mammoth.js) + [Docxtemplater](https://github.com/open-xml-templating/docxtemplater) 101 | - **向量数据库**:[LanceDB](https://lancedb.com/) 102 | - **代码规范**:[ESLint](https://eslint.org/) + [Prettier](https://prettier.io/) 103 | - **版本管理**:[Standard Version](https://github.com/conventional-changelog/standard-version) 104 | 105 | ## 🚀 快速开始 106 | 107 | ### 环境要求 108 | 109 | - Node.js >= 16.x 110 | - npm 或 yarn 111 | 112 | ### 安装依赖 113 | 114 | ```bash 115 | npm install 116 | ``` 117 | 118 | ### 开发模式运行 119 | 120 | ```bash 121 | npm run start 122 | ``` 123 | 124 | ## 📦 项目结构 125 | 126 | ``` 127 | . 128 | ├── src/ 129 | │ ├── main/ # 主进程代码 130 | │ │ ├── chat.ts # AI 对话相关功能 131 | │ │ ├── database.ts # 数据库操作 132 | │ │ ├── ipcHandlers.ts # IPC 通信处理 133 | │ │ ├── lancedb.ts # 向量数据库操作 134 | │ │ ├── main.ts # 主进程入口 135 | │ │ ├── pdfUtils.ts # PDF文档处理 136 | │ │ ├── preload.ts # 预加载脚本 137 | │ │ ├── proof.ts # 文档校对核心逻辑 138 | │ │ └── wordProcess.ts # Word 文档处理 139 | │ └── renderer/ # 渲染进程代码 140 | │ ├── router/ # 路由配置 141 | │ ├── stores/ # Pinia存储目录 142 | │ ├── views/ # 页面组件 143 | │ ├── App.vue # 根组件 144 | │ └── renderer.ts # 渲染进程入口 145 | ├── assets/ # 静态资源 146 | ├── out/ # 构建输出目录 147 | └── forge.config.ts # Electron Forge 配置 148 | ``` 149 | 150 | ## 🎯 使用指南 151 | 152 | ### 1. 配置 API 153 | 154 | 首次使用需要配置支持的大语言模型 API: 155 | 156 | 1. 点击导航栏中的"工作区" 157 | 2. 选择"API 设置"选项卡 158 | 3. 填写 API 地址、密钥和模型名称 159 | 4. 点击"测试连接"验证配置 160 | 5. 点击"保存配置"保存设置 161 | 162 | ### 2. 创建知识库 163 | 164 | 1. 点击导航栏中的"知识库" 165 | 2. 选择"Embedding模型"(需要选择专门的embedding模型) 166 | 3. 点击"添加知识库"按钮创建新知识库 167 | 4. 选择知识库后可添加PDF文件作为参考材料 168 | 169 | ### 3. 文档校对 170 | 171 | 1. 点击导航栏中的"工作区" 172 | 2. 选择"文档校对"选项卡 173 | 3. 点击"选择 DOCX 文件"按钮选择要校对的 Word 文档 174 | 4. (可选)选择知识库以增强校对准确性 175 | 5. 选择合适的校对模式: 176 | - **逐句精校**:适合需要高精度校对的短文本 177 | - **逐段校正**:适合长篇文献的校对 178 | - **全文润色**:对整篇文档进行语言润色和优化 179 | 6. 点击"开始校正"按钮开始校对过程 180 | 7. 在右侧栏查看校对结果和修改建议 181 | 8. 点击"应用修改"按钮接受建议的修改 182 | 9. 点击"导出结果"按钮保存修改后的文档 183 | 184 | ## 🔧 开发计划 185 | 186 | 1. 大语言模型的格式化输出转word文档 187 | 2. 增强用户界面交互体验 188 | 3. 优化.docx文件的处理算法 189 | 190 | ## 📄 许可证 191 | 192 | 本项目采用 MIT 许可证 - 查看 [LICENSE](LICENSE) 文件了解详情 193 | 194 | ## 致谢 195 | 196 | 部分代码使用了night-peiqi的https://github.com/night-peiqi/electron-vue3-typescript-template 197 | -------------------------------------------------------------------------------- /src/renderer/electron.d.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Electron API 类型支持应与 preload.ts 中的API一致 3 | * 你需要告诉 TypeScript,windows 类型中心增加的属性和接口情况 4 | * 防止运行时错误 5 | */ 6 | import { proofreadLargeDocument, ProofreadingCorrection } from './proof' 7 | import { apiSettings } from './ipcHandlers' 8 | 9 | export interface proofHistory { 10 | id?: number 11 | filePath: string 12 | apiURL: string 13 | modelName: string 14 | created_at?: string 15 | result: string 16 | } 17 | 18 | // LanceDB 相关接口类型定义 19 | export interface LanceDBInsertParams { 20 | repositoryName: string 21 | fileName: string 22 | text: string 23 | metadata: Record 24 | } 25 | 26 | export interface LanceDBQueryParams { 27 | queryText: string 28 | limit: number 29 | filter: string 30 | fileName: string 31 | } 32 | 33 | export interface LanceDBUpdateParams { 34 | repositoryName: string 35 | id: number 36 | text: string 37 | metadata: Record 38 | } 39 | 40 | export interface LanceDBDeleteParams { 41 | repositoryName: string 42 | id: number 43 | } 44 | 45 | export interface ModelConfig { 46 | modelName: string 47 | apiKey: string 48 | apiURL: string 49 | } 50 | 51 | export interface PDFProcessParams { 52 | repositoryName: string 53 | filePath: string 54 | metadata: Record 55 | } 56 | 57 | export interface PDFSelectAndProcessParams { 58 | repositoryName: string 59 | metadata: Record 60 | } 61 | 62 | export interface PDFGetChunksParams { 63 | documentId: string 64 | repositoryName: string 65 | } 66 | 67 | export default interface ElectronApi { 68 | // test 69 | message: (file: string) => void 70 | receiveAndReturn: (characters: string) => string 71 | test: string 72 | // 文档加载 73 | selectDocxFile: () => string 74 | readDocxFile: (filePath: string) => { 75 | path: string 76 | content: string 77 | } 78 | // api设置和管理(接入数据库) 79 | APISettings: (URL: string, Key: string, modelName: string) => Promise 80 | getAPISettings: () => Promise<{ 81 | URL: string 82 | Key: string 83 | modelName: string 84 | parallel?: number 85 | TimeLimit?: number | null 86 | }> 87 | deleteOneAPI: (id: number) => Promise<{ 88 | isSuccess: boolean 89 | }> 90 | getALLAPISettings: () => Promise< 91 | { 92 | id: number 93 | URL: string 94 | Key: string 95 | modelName: string 96 | created_at: string 97 | }[] 98 | > 99 | testAPI: (url: string, key: string, modelName: string) => boolean 100 | selectAPISetting: ( 101 | url: string, 102 | key: string, 103 | modelName: string, 104 | parallel?: number, 105 | TimeLimit?: number | null 106 | ) => boolean 107 | 108 | // 文档处理接口 109 | processDocx: ( 110 | model: string, 111 | filePath: string, 112 | repositoryNameList?: string[], 113 | embeddingConfig?: apiSettings, 114 | setTimeLimit?: number, 115 | parallelSet?: number 116 | ) => Promise<{ 117 | proofResult: ProofreadingCorrection[] 118 | token_usage: number 119 | }> // 进行了更新 120 | exportCorrectedDocx: (config: any) => Promise 121 | 122 | // 提示词处理接口 123 | getDefaultPrompt: () => Promise 124 | setNewPrompt: (newPrompt: string) => Promise 125 | 126 | // 历史记录接口 127 | getAllHistory: () => proofHistory[] 128 | deleteAllHistory: () => Promise 129 | getHistoryById: (id: number) => Promise 130 | deleteHistoryById: (id: number) => Promise 131 | insertOneHistory: (filePath: string, apiURL: string, modelName: string, resultCorrect: string) => Promise 132 | 133 | // LanceDB 相关接口 134 | lancedbInsert: (params: LanceDBInsertParams, modelConfig: ModelConfig) => Promise 135 | lancedbQuery: (params: LanceDBQueryParams, modelConfig: ModelConfig) => Promise 136 | lancedbUpdate: (params: LanceDBUpdateParams, modelConfig: ModelConfig) => Promise 137 | lancedbDelete: (params: LanceDBDeleteParams) => Promise 138 | listRepositories: () => Promise 139 | createRepository: (params: { 140 | repositoryName: string 141 | modelName: string 142 | apiKey: string 143 | apiURL: string 144 | }) => Promise 145 | deleteRepository: (repositoryName: string) => Promise 146 | deleteDocumentByName: (repositoryName: string, filename: string) => Promise 147 | listFilenamesInRepository: (repositoryName: string) => Promise 148 | 149 | // PDF 处理相关接口 150 | processPDF: (params: PDFProcessParams, modelConfig: ModelConfig) => Promise 151 | selectAndProcessPDF: (repositoryName: string, modelConfig: ModelConfig) => Promise // 支持处理pdf、txt、docx文件 152 | getPDFChunks: (params: PDFGetChunksParams) => Promise 153 | // 设置embedding api 154 | getEmbeddingAPI: () => Promise<{ URL: string; Key: string; modelName: string }> 155 | setEmbeddingAPI: (apiKey: string, apiURL: string, modelName: string) => Promise 156 | getEnvPath: () => Promise 157 | } 158 | 159 | declare global { 160 | interface Window { 161 | electronAPI: ElectronApi 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/renderer/App.vue: -------------------------------------------------------------------------------- 1 | 76 | 77 | 108 | 109 | -------------------------------------------------------------------------------- /src/main/preload.ts: -------------------------------------------------------------------------------- 1 | import { contextBridge, ipcRenderer } from 'electron' 2 | import { get } from 'http' 3 | import test from 'node:test' 4 | import { setNewPrompt } from './proof' 5 | import { apiSettings } from './database' 6 | 7 | console.log('this message from the preload') 8 | 9 | // contextBridge.exposeInMainWorld 是一个安全机制,它允许你在预加载脚本中定义一些函数或对象,并将它们注入到网页的全局 window 对象中。 10 | // 第一个参数 electronAPI 表示将要挂载到window上的属性名称 11 | // 第二个参数,是一个对象,包含了想要暴露给前端的函数或者值 12 | contextBridge.exposeInMainWorld('electronAPI', { 13 | // 定义了一个名叫message的方法,接受字符串参数message 14 | message: (message: string) => { 15 | // 向主进程发送消息,单向通信方式 16 | // 发送的消息,通道名称是message,数据是传入的message的字符串 17 | ipcRenderer.send('message', message) // 调用主进程的接口,向主进程发送一条 *异步* 消息 18 | }, 19 | // 定义了一个名叫receiveAndReturn的方法,接受字符串参数message,然后返回一个字符串 20 | receiveAndReturn: (message: string) => { 21 | // 使用ipcRenderer.invoke()方法,调用主进程的接口,向主进程发送一条 *双向通讯请求* 的消息 (也是异步) 22 | // 向主进程发送消息,并返回处理结果,双向通信方式 23 | return ipcRenderer.invoke('receiveAndReturn', message) // invoke会等待主进程使用ipcMain。handle 返回一个promise的值 24 | // 由于返回的是一个promise 所以前端需要await它 25 | }, 26 | test: process.version, 27 | 28 | selectDocxFile: () => ipcRenderer.invoke('select-docx-file'), 29 | // 可选:如果需要主进程读取文件内容 30 | readDocxFile: (filePath: string) => ipcRenderer.invoke('read-docx-file', filePath), 31 | APISettings: (url: string, key: string, modelName: string) => 32 | ipcRenderer.invoke('set-api', url, key, modelName), 33 | getALLAPISettings: () => ipcRenderer.invoke('get-all-api-settings', {}), 34 | deleteOneAPI: (id: number) => ipcRenderer.invoke('delete-one-api-setting', id), 35 | testAPI: (url: string, key: string, modelName: string) => ipcRenderer.invoke('test-api', url, key, modelName), 36 | selectAPISetting: (url: string, key: string, modelName: string, parallel?: number, TimeLimit?: number | null) => 37 | ipcRenderer.invoke('selectAPISetting', url, key, modelName, parallel, TimeLimit), 38 | getAPISettings: () => ipcRenderer.invoke('get-api-settings', {}), 39 | // 文档校对处理函数 40 | processDocx: ( 41 | model: string, 42 | filePath: string, 43 | repositoryNameList?: string[], 44 | embeddingConfig?: apiSettings, 45 | setTimeLimit?: number, 46 | parallelSet?: number 47 | ) => { 48 | // 确保传递的参数是可序列化的 49 | const serializableParams = { 50 | model, 51 | filePath, 52 | repositoryNameList: repositoryNameList ? [...repositoryNameList] : undefined, 53 | embeddingConfig: embeddingConfig ? { ...embeddingConfig } : undefined, 54 | setTimeLimit: setTimeLimit || undefined, 55 | parallelSet: parallelSet || 30 56 | } 57 | 58 | return ipcRenderer.invoke( 59 | 'process-docx', 60 | serializableParams.model, 61 | serializableParams.filePath, 62 | serializableParams.repositoryNameList, 63 | serializableParams.embeddingConfig, 64 | serializableParams.setTimeLimit, 65 | serializableParams.parallelSet 66 | ) 67 | }, 68 | 69 | // 导出修正到文件中 70 | exportCorrectedDocx: (config: any) => { 71 | // 确保传递的参数是可序列化的 72 | const serializableConfig = JSON.parse(JSON.stringify(config)) 73 | return ipcRenderer.invoke('exportCorrectedDocx', serializableConfig) 74 | }, 75 | // 获取默认的提示词 76 | getDefaultPrompt: () => ipcRenderer.invoke('getDefaultPrompt'), 77 | // 设置新的提示词 78 | setNewPrompt: (prompt: string) => ipcRenderer.invoke('setPrompt', prompt), 79 | // 获取所有历史记录 80 | deleteAllHistory: () => ipcRenderer.invoke('deleteAllHistory'), 81 | // 获取所有历史记录 82 | getAllHistory: () => ipcRenderer.invoke('getAllHistory'), 83 | // 获取指定id的历史记录 84 | getHistoryById: (id: number) => ipcRenderer.invoke('getHistoryById', id), 85 | // 删除指定id的历史记录, 86 | deleteHistoryById: (id: number) => ipcRenderer.invoke('deleteHistoryById', id), 87 | // 插入一条历史记录 88 | insertOneHistory: (filePath: string, apiURL: string, modelName: string, resultCorrect: string) => 89 | ipcRenderer.invoke('insertOneHistory', filePath, apiURL, modelName, resultCorrect), 90 | 91 | // LanceDB 相关接口 92 | lancedbInsert: (params: any, modelConfig: any) => ipcRenderer.invoke('lancedb:insert', params, modelConfig), 93 | lancedbQuery: (params: any, modelConfig: any) => ipcRenderer.invoke('lancedb:query', params, modelConfig), 94 | lancedbUpdate: (params: any, modelConfig: any) => ipcRenderer.invoke('lancedb:update', params, modelConfig), 95 | lancedbDelete: (params: any) => ipcRenderer.invoke('lancedb:delete', params), 96 | listRepositories: () => ipcRenderer.invoke('listRepositories'), 97 | createRepository: (params: any) => ipcRenderer.invoke('createRepository', params), 98 | deleteRepository: (repositoryName: string) => ipcRenderer.invoke('deleteRepository', repositoryName), 99 | deleteDocumentByName: (repositoryName: string, filename: string) => 100 | ipcRenderer.invoke('deleteDocumentByName', repositoryName, filename), 101 | listFilenamesInRepository: (repositoryName: string) => 102 | ipcRenderer.invoke('listFilenamesInRepository', repositoryName), 103 | // PDF 处理相关接口 104 | processPDF: (params: any, modelConfig: any) => ipcRenderer.invoke('pdf:process', params, modelConfig), 105 | selectAndProcessPDF: (repositoryName: string, modelConfig: any) => 106 | ipcRenderer.invoke('pdf:select-and-process', repositoryName, modelConfig), 107 | getPDFChunks: (params: any) => ipcRenderer.invoke('pdf:get-chunks', params), 108 | setEmbeddingAPI: (apiKey: string, apiURL: string, modelName: string) => 109 | ipcRenderer.invoke('setEmbeddingAPI', apiKey, apiURL, modelName), 110 | getEmbeddingAPI: () => ipcRenderer.invoke('getEmbeddingAPI'), 111 | getEnvPath: () => ipcRenderer.invoke('getEnvPath') // 调试用,检验打包后的 112 | }) 113 | -------------------------------------------------------------------------------- /src/main/database.ts: -------------------------------------------------------------------------------- 1 | import { app } from 'electron' // 新增导入 2 | import { Database, open } from 'sqlite' 3 | import sqlite3 from 'sqlite3' 4 | import path from 'path' // 新增导入 5 | import fs from 'fs' 6 | import { promises } from 'dns' 7 | import { b } from 'vite/dist/node/types.d-aGj9QkWt' 8 | import { getEmbedding } from './chat' 9 | 10 | // 定义数据类型(TypeScript 类型安全) 11 | export interface User { 12 | id?: number 13 | name: string 14 | email: string 15 | created_at?: string 16 | } 17 | 18 | export interface apiSettings { 19 | id?: number 20 | apiURL: string 21 | apiKey: string 22 | modelName: string 23 | created_at?: string 24 | } 25 | 26 | export interface proofHistory { 27 | id?: number 28 | filePath: string 29 | apiURL: string 30 | modelName: string 31 | created_at?: string 32 | result: string 33 | } 34 | 35 | // 该类实现了对api数据和历史记录的数据库的管理操作 36 | export class DB { 37 | private static instance: Database 38 | // 使用系统标准路径 39 | private static get DB_PATH(): string { 40 | // 获取系统标准用户数据目录 41 | const userDataPath = app.getPath('userData') 42 | // 创建 data 子目录(避免污染根目录) 43 | return path.join(userDataPath, 'data', 'app.db') 44 | } 45 | static async getInstance(): Promise { 46 | if (!DB.instance) { 47 | // 确保目录存在(自动创建) 48 | const dir = path.dirname(DB.DB_PATH) 49 | await fs.promises.mkdir(dir, { recursive: true }) 50 | 51 | DB.instance = await open({ 52 | filename: DB.DB_PATH, 53 | driver: sqlite3.Database 54 | }) 55 | 56 | // 初始化表结构 57 | // 创建存储API设置的表 58 | await DB.instance.exec(` 59 | CREATE TABLE IF NOT EXISTS api_settings ( 60 | id INTEGER PRIMARY KEY AUTOINCREMENT, 61 | apiURL TEXT NOT NULL, 62 | apiKey TEXT NOT NULL, 63 | modelName TEXT NOT NULL, 64 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP 65 | ) 66 | `) 67 | // 创建存储校对历史的表 68 | await DB.instance.exec( 69 | ` 70 | CREATE TABLE IF NOT EXISTS proof_history ( 71 | id INTEGER PRIMARY KEY AUTOINCREMENT, 72 | filePath TEXT NOT NULL, 73 | apiURL TEXT NOT NULL, 74 | modelName TEXT NOT NULL, 75 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP, 76 | result TEXT NOT NULL 77 | ) 78 | ` 79 | ) 80 | } 81 | return DB.instance 82 | } 83 | 84 | // 查询记录的条目数量 85 | static async getAPISettingsCount(): Promise { 86 | const db = await DB.getInstance() 87 | const result = await db.get(`SELECT COUNT(*) as count FROM api_settings`) 88 | return result.count 89 | } 90 | 91 | static async getHistoryCount(): Promise { 92 | const db = await DB.getInstance() 93 | const result = await db.get(`SELECT COUNT(*) as count FROM proof_history`) 94 | return result.count 95 | } 96 | /** 97 | * 插入一条 API 设置记录 98 | * @param setting apiSettings 对象(不含 id) 99 | * @returns 新记录的 id 100 | */ 101 | static async insertAPISetting(apiURL: string, apiKey: string, modelName: string): Promise { 102 | const db = await DB.getInstance() 103 | try { 104 | const result = await db.run( 105 | `INSERT INTO api_settings (apiURL, apiKey, modelName) VALUES (?, ?, ?)`, 106 | apiURL, 107 | apiKey, 108 | modelName 109 | ) 110 | return result.lastID 111 | } catch (error) { 112 | console.error('插入 API 设置失败:', error) 113 | throw error // 或根据需求返回 -1 / null 114 | } 115 | } 116 | 117 | static async insertOneHistory(filePath: string, apiURL: string, modelName: string, result: string): Promise { 118 | const db = await DB.getInstance() 119 | if (!filePath || !apiURL || !modelName || !result) { 120 | const errorMsg = '插入历史记录参数不完整: ' + JSON.stringify({ filePath, apiURL, modelName, result: !!result }) 121 | console.error(errorMsg) 122 | throw new Error(errorMsg) 123 | } 124 | 125 | // 验证JSON格式 126 | try { 127 | JSON.parse(result) 128 | } catch (parseError) { 129 | const errorMsg = 'result参数不是有效的JSON: ' + parseError.message 130 | console.error(errorMsg) 131 | throw new Error(errorMsg) 132 | } 133 | 134 | try { 135 | const res = await db.run( 136 | `INSERT INTO proof_history (filePath, apiURL, modelName, result) VALUES (?, ?, ?, ?)`, 137 | filePath, 138 | apiURL, 139 | modelName, 140 | result 141 | ) 142 | return res.lastID 143 | } catch (error) { 144 | console.error('插入校对记录失败:', error) 145 | throw error 146 | } 147 | } 148 | 149 | // 根据id查询api记录 150 | 151 | static async getAPISettingById(id: number): Promise { 152 | const db = await DB.getInstance() 153 | const result = await db.get(`SELECT * FROM api_settings WHERE id = ?`, id) 154 | return result || null 155 | } 156 | 157 | static async getHistoryById(id: number): Promise { 158 | const db = await DB.getInstance() 159 | const result = await db.get(`SELECT * FROM proof_history WHERE id = ?`, id) 160 | return result || null 161 | } 162 | 163 | // 删除指定的数据集 164 | static async deleteAPISettingById(id: number): Promise { 165 | const db = await DB.getInstance() 166 | const result = await db.run(`DELETE FROM api_settings WHERE id = ?`, id) 167 | return result.changes > 0 // 如果有行被删除,返回 true,否则返回 false 168 | } 169 | 170 | static async deleteHistoryById(id: number): Promise { 171 | const db = await DB.getInstance() 172 | const result = await db.run(`DELETE FROM proof_history WHERE id = ?`, id) 173 | return result.changes > 0 // 如果有行被删除,返回 true,否则返回 false 174 | } 175 | 176 | // 删除所有数据集 177 | static async deleteALLSettings(): Promise { 178 | const db = await DB.getInstance() 179 | await db.run(`DELETE FROM api_settings`) 180 | const result = await db.run(`SELECT * FROM api_settings`) 181 | const count = await DB.getAPISettingsCount() 182 | if (result.changes === count) { 183 | return true 184 | } else { 185 | return false 186 | } 187 | } 188 | 189 | static async deleteALLHistory(): Promise { 190 | const db = await DB.getInstance() 191 | await db.run(`DELETE FROM proof_history`) 192 | const count = await DB.getHistoryCount() 193 | if (count === 0) { 194 | return true 195 | } else { 196 | return false 197 | } 198 | } 199 | 200 | /** 201 | * 查询所有 API 设置记录 202 | * @returns apiSettings 数组 203 | */ 204 | static async getAllAPISettings(): Promise { 205 | // 返回apiSettings 数组 206 | const db = await DB.getInstance() 207 | const rows = await db.all( 208 | `SELECT id, apiURL, apiKey, modelName, created_at FROM api_settings ORDER BY created_at DESC` 209 | ) 210 | console.log('the result of the search of all ', rows) 211 | return rows 212 | } 213 | 214 | static async getALLHistory(): Promise { 215 | // 获取所有校对记录 216 | const db = await DB.getInstance() 217 | const rows = await db.all( 218 | `SELECT id, filePath, apiURL, modelName, created_at, result FROM proof_history ORDER BY created_at DESC` 219 | ) 220 | console.log('the result of the search of all ', rows) 221 | return rows 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /src/main/chat.ts: -------------------------------------------------------------------------------- 1 | // 导入自 '@google/generative-ai' 2 | import { 3 | GoogleGenerativeAI, 4 | GenerationConfig, 5 | SafetySetting, 6 | HarmCategory, 7 | HarmBlockThreshold, 8 | Part 9 | } from '@google/generative-ai' 10 | import OpenAI from 'openai' 11 | import { basename } from 'path' 12 | /** 13 | * 调用 Gemini API 进行单次对话。 14 | * 15 | * @param systemPrompt - 给模型的系统指令。这是一个对象,包含role和parts。 16 | * @param userPrompt - 用户的提问。 17 | * @param apiKey - Google AI API 密钥。 18 | * @param modelName - 要使用的模型名称,例如 "gemini-1.5-flash"。 19 | * @returns A Promise that resolves to the model's text response. 20 | */ 21 | 22 | // gemini接口的实现 23 | // 但是实际上没有调用 24 | export async function getGeminiResponse( 25 | systemPrompt: string, 26 | userPrompt: string, 27 | apiKey: string, 28 | modelName: string 29 | ): Promise { 30 | if (!apiKey) { 31 | throw new Error('API key is missing. Please provide a valid API key.') 32 | } 33 | 34 | try { 35 | // 初始化时传入 API Key 36 | const genAI = new GoogleGenerativeAI(apiKey) 37 | 38 | // 获取模型,现在可以直接在 getGenerativeModel 中设置 system instruction 39 | const model = genAI.getGenerativeModel({ 40 | model: modelName, 41 | systemInstruction: { 42 | role: 'system', // 或者 'model',但通常对于指令是 'user' 43 | parts: [{ text: systemPrompt }] 44 | } 45 | }) 46 | 47 | const generationConfig: GenerationConfig = { 48 | temperature: 0.9, 49 | topK: 1, 50 | topP: 1, 51 | maxOutputTokens: 2048 52 | } 53 | 54 | const safetySettings: SafetySetting[] = [ 55 | { 56 | category: HarmCategory.HARM_CATEGORY_HARASSMENT, 57 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE 58 | }, 59 | { 60 | category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, 61 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE 62 | }, 63 | { 64 | category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, 65 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE 66 | }, 67 | { 68 | category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, 69 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE 70 | } 71 | ] 72 | 73 | // generateContent 现在只需要传入用户的 prompt 即可 74 | const result = await model.generateContent({ 75 | contents: [{ role: 'user', parts: [{ text: userPrompt }] }], 76 | generationConfig, 77 | safetySettings 78 | }) 79 | 80 | const response = result.response 81 | 82 | if (response.promptFeedback?.blockReason) { 83 | throw new Error(`Request was blocked due to: ${response.promptFeedback.blockReason}`) 84 | } 85 | 86 | if (!response.candidates || response.candidates.length === 0) { 87 | throw new Error('No response candidates found.') 88 | } 89 | 90 | // 从 candidates 中获取文本 91 | const text = response.candidates[0].content.parts.map(part => part.text).join('') 92 | return text 93 | } catch (error) { 94 | console.error('An error occurred while calling the Gemini API:', error) 95 | // 抛出更具体的错误信息 96 | if (error instanceof Error) { 97 | throw new Error(`Gemini API call failed: ${error.message}`) 98 | } else { 99 | throw new Error('An unknown error occurred during the Gemini API call.') 100 | } 101 | } 102 | } 103 | // openai的接口 带 null 安全 104 | 105 | export async function OpenaiGen( 106 | systemPrompt: string, 107 | userPrompt: string, 108 | apiKey: string, 109 | modelName: string, 110 | apiURL: string 111 | ): Promise<{ result: string; total_tokens: number }> { 112 | if (!apiKey) { 113 | throw new Error('API key is missing. Please provide a valid API key.') 114 | } 115 | 116 | try { 117 | const openai = new OpenAI({ 118 | apiKey: apiKey, 119 | baseURL: apiURL 120 | }) 121 | 122 | const chatCompletion = await openai.chat.completions.create({ 123 | model: modelName, 124 | messages: [ 125 | { role: 'system', content: systemPrompt }, 126 | { role: 'user', content: userPrompt } 127 | ] 128 | }) 129 | 130 | // 添加健壮性检查 131 | if (!chatCompletion || !chatCompletion.choices || !Array.isArray(chatCompletion.choices) || chatCompletion.choices.length === 0) { 132 | console.error('Invalid API response:', chatCompletion) 133 | throw new Error('API返回了无效的响应格式,choices字段缺失或为空') 134 | } 135 | 136 | const result = chatCompletion.choices[0]?.message?.content ?? '' 137 | const total_tokens = chatCompletion.usage?.total_tokens ?? 0 138 | 139 | return { result, total_tokens } 140 | } catch (error) { 141 | console.error('An error occurred while calling the OpenAI-compatible API:', error) 142 | if (error instanceof Error) { 143 | throw new Error(`OpenAI API call failed: ${error.message}`) 144 | } else { 145 | throw new Error('An unknown error occurred during the OpenAI API call.') 146 | } 147 | } 148 | } 149 | 150 | // 测试api可用性 151 | export async function testAPI(apiURL: string, apiKey: string, modelName: string): Promise { 152 | try { 153 | const openai = new OpenAI({ 154 | apiKey: apiKey, 155 | baseURL: apiURL 156 | }) 157 | 158 | const chatCompletion = await openai.chat.completions.create({ 159 | model: modelName, 160 | messages: [{ role: 'user', content: '你好' }] 161 | }) 162 | 163 | // 检查响应有效性 164 | if (!chatCompletion || !chatCompletion.choices || !Array.isArray(chatCompletion.choices) || chatCompletion.choices.length === 0) { 165 | console.error('API test failed - invalid response:', chatCompletion) 166 | return false 167 | } 168 | 169 | console.log('the result of connet test:', chatCompletion.choices[0].message.content) 170 | 171 | return true 172 | } catch (error) { 173 | console.error('An error occurred while calling the Gemini API:', error) 174 | return false 175 | } 176 | } 177 | 178 | // test modelname:doubao-embedding-text-240715 179 | // test url: https://ark.cn-beijing.volces.com/api/v3/ 180 | export async function getEmbedding(text: string | string[], modelName: string, apiKey_input: string, apiURL: string) { 181 | // 参数有效性检查 182 | if (!text || (Array.isArray(text) && text.length === 0)) { 183 | throw new Error('Text parameter is required and cannot be empty') 184 | } 185 | 186 | if (!modelName) { 187 | throw new Error('Model name is required') 188 | } 189 | 190 | if (!apiKey_input) { 191 | throw new Error('API key is required') 192 | } 193 | 194 | if (!apiURL) { 195 | throw new Error('API URL is required') 196 | } 197 | 198 | // 对于数组类型,检查每个元素是否为字符串 199 | if (Array.isArray(text)) { 200 | for (let i = 0; i < text.length; i++) { 201 | if (typeof text[i] !== 'string') { 202 | throw new Error(`Element at index ${i} is not a string`) 203 | } 204 | if (text[i].trim() === '') { 205 | throw new Error(`Element at index ${i} is an empty string`) 206 | } 207 | } 208 | } else if (typeof text !== 'string') { 209 | throw new Error('Text parameter must be a string or an array of strings') 210 | } else if (text.trim() === '') { 211 | throw new Error('Text parameter cannot be an empty string') 212 | } 213 | 214 | const openai = new OpenAI({ 215 | apiKey: apiKey_input, 216 | baseURL: apiURL 217 | }) 218 | 219 | try { 220 | const response = await openai.embeddings.create({ 221 | model: modelName, 222 | input: text 223 | }) 224 | 225 | // 检查响应有效性 226 | if (!response || !response.data || !Array.isArray(response.data) || response.data.length === 0) { 227 | throw new Error('嵌入API返回了无效的响应格式,data字段缺失或为空') 228 | } 229 | 230 | // 返回embedding结果 231 | if (typeof text === 'string') { 232 | return response.data[0].embedding 233 | } 234 | if (Array.isArray(text)) { 235 | return response.data.map(item => item.embedding) 236 | } 237 | 238 | return response.data[0].embedding 239 | } catch (error: any) { 240 | console.log('error getting embedding:', error) 241 | 242 | // 提供更详细的错误信息 243 | if (error.status === 404) { 244 | throw new Error( 245 | `嵌入API调用失败,状态码404: 请检查API地址(${apiURL})和模型名称(${modelName})是否正确,该模型可能不支持嵌入功能` 246 | ) 247 | } else if (error.status === 401) { 248 | throw new Error(`嵌入API调用失败,认证错误: API密钥无效或权限不足`) 249 | } else if (error.status === 400) { 250 | throw new Error(`嵌入API调用失败,请求错误: ${error.message}`) 251 | } else { 252 | throw new Error(`嵌入API调用失败: ${error.message || '未知错误'}`) 253 | } 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /lancedbNativePro.ts: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | /* tslint:disable */ 3 | /* eslint-disable */ 4 | /* prettier-ignore */ 5 | /* auto-generated by NAPI-RS */ 6 | const { existsSync, readFileSync } = require('fs'); 7 | const { join } = require('path') 8 | const { platform, arch, env } = process 9 | 10 | let nativeBinding = null 11 | let localFileExisted = false 12 | let loadError = null 13 | // LANCEDB_NATIVE_PATH 14 | // 新增:获取自定义绑定路径的函数 15 | function getBindingPath(filename) { 16 | // 检查环境变量,优先使用环境变量指定的路径 17 | const customBindingPath = env.LANCEDB_NATIVE_PATH 18 | if (customBindingPath) { 19 | console.log('env path of msvc:', customBindingPath) 20 | // 如果环境变量指定的是完整路径(包含文件名) 21 | if (customBindingPath.endsWith('.node')) { 22 | return customBindingPath 23 | } 24 | // 如果环境变量指定的是目录,则拼接文件名 25 | return join(customBindingPath, filename) 26 | } 27 | // 默认行为:使用 __dirname 28 | return join(__dirname, filename) 29 | } 30 | 31 | function isMusl() { 32 | // For Node 10 33 | if (!process.report || typeof process.report.getReport !== 'function') { 34 | try { 35 | const lddPath = require('child_process').execSync('which ldd').toString().trim() 36 | return readFileSync(lddPath, 'utf8').includes('musl') 37 | } catch (e) { 38 | return true 39 | } 40 | } else { 41 | const { glibcVersionRuntime } = process.report.getReport().header 42 | return !glibcVersionRuntime 43 | } 44 | } 45 | 46 | switch (platform) { 47 | case 'android': 48 | switch (arch) { 49 | case 'arm64': 50 | // 修改:使用 getBindingPath 替代硬编码的路径 51 | const androidArm64Path = getBindingPath('lancedb.android-arm64.node') 52 | localFileExisted = existsSync(androidArm64Path) 53 | try { 54 | if (localFileExisted) { 55 | nativeBinding = require(androidArm64Path) 56 | } else { 57 | nativeBinding = require('@lancedb/lancedb-android-arm64') 58 | } 59 | } catch (e) { 60 | loadError = e 61 | } 62 | break 63 | case 'arm': 64 | const androidArmPath = getBindingPath('lancedb.android-arm-eabi.node') 65 | localFileExisted = existsSync(androidArmPath) 66 | try { 67 | if (localFileExisted) { 68 | nativeBinding = require(androidArmPath) 69 | } else { 70 | nativeBinding = require('@lancedb/lancedb-android-arm-eabi') 71 | } 72 | } catch (e) { 73 | loadError = e 74 | } 75 | break 76 | default: 77 | throw new Error(`Unsupported architecture on Android ${arch}`) 78 | } 79 | break 80 | case 'win32': 81 | switch (arch) { 82 | case 'x64': 83 | // 修改:Windows x64 使用新的路径获取方式 84 | const winX64Path = getBindingPath('lancedb.win32-x64-msvc.node') 85 | localFileExisted = existsSync(winX64Path) 86 | try { 87 | if (localFileExisted) { 88 | nativeBinding = require(winX64Path) 89 | } else { 90 | nativeBinding = require('@lancedb/lancedb-win32-x64-msvc') 91 | } 92 | } catch (e) { 93 | loadError = e 94 | } 95 | break 96 | case 'ia32': 97 | const winIa32Path = getBindingPath('lancedb.win32-ia32-msvc.node') 98 | localFileExisted = existsSync(winIa32Path) 99 | try { 100 | if (localFileExisted) { 101 | nativeBinding = require(winIa32Path) 102 | } else { 103 | nativeBinding = require('@lancedb/lancedb-win32-ia32-msvc') 104 | } 105 | } catch (e) { 106 | loadError = e 107 | } 108 | break 109 | case 'arm64': 110 | const winArm64Path = getBindingPath('lancedb.win32-arm64-msvc.node') 111 | localFileExisted = existsSync(winArm64Path) 112 | try { 113 | if (localFileExisted) { 114 | nativeBinding = require(winArm64Path) 115 | } else { 116 | nativeBinding = require('@lancedb/lancedb-win32-arm64-msvc') 117 | } 118 | } catch (e) { 119 | loadError = e 120 | } 121 | break 122 | default: 123 | throw new Error(`Unsupported architecture on Windows: ${arch}`) 124 | } 125 | break 126 | case 'darwin': 127 | // 修改:macOS 通用版本 128 | const darwinUniversalPath = getBindingPath('lancedb.darwin-universal.node') 129 | localFileExisted = existsSync(darwinUniversalPath) 130 | try { 131 | if (localFileExisted) { 132 | nativeBinding = require(darwinUniversalPath) 133 | } else { 134 | nativeBinding = require('@lancedb/lancedb-darwin-universal') 135 | } 136 | break 137 | } catch {} 138 | switch (arch) { 139 | case 'x64': 140 | const darwinX64Path = getBindingPath('lancedb.darwin-x64.node') 141 | localFileExisted = existsSync(darwinX64Path) 142 | try { 143 | if (localFileExisted) { 144 | nativeBinding = require(darwinX64Path) 145 | } else { 146 | nativeBinding = require('@lancedb/lancedb-darwin-x64') 147 | } 148 | } catch (e) { 149 | loadError = e 150 | } 151 | break 152 | case 'arm64': 153 | const darwinArm64Path = getBindingPath('lancedb.darwin-arm64.node') 154 | localFileExisted = existsSync(darwinArm64Path) 155 | try { 156 | if (localFileExisted) { 157 | nativeBinding = require(darwinArm64Path) 158 | } else { 159 | nativeBinding = require('@lancedb/lancedb-darwin-arm64') 160 | } 161 | } catch (e) { 162 | loadError = e 163 | } 164 | break 165 | default: 166 | throw new Error(`Unsupported architecture on macOS: ${arch}`) 167 | } 168 | break 169 | case 'freebsd': 170 | if (arch !== 'x64') { 171 | throw new Error(`Unsupported architecture on FreeBSD: ${arch}`) 172 | } 173 | const freebsdPath = getBindingPath('lancedb.freebsd-x64.node') 174 | localFileExisted = existsSync(freebsdPath) 175 | try { 176 | if (localFileExisted) { 177 | nativeBinding = require(freebsdPath) 178 | } else { 179 | nativeBinding = require('@lancedb/lancedb-freebsd-x64') 180 | } 181 | } catch (e) { 182 | loadError = e 183 | } 184 | break 185 | case 'linux': 186 | switch (arch) { 187 | case 'x64': 188 | if (isMusl()) { 189 | const linuxX64MuslPath = getBindingPath('lancedb.linux-x64-musl.node') 190 | localFileExisted = existsSync(linuxX64MuslPath) 191 | try { 192 | if (localFileExisted) { 193 | nativeBinding = require(linuxX64MuslPath) 194 | } else { 195 | nativeBinding = require('@lancedb/lancedb-linux-x64-musl') 196 | } 197 | } catch (e) { 198 | loadError = e 199 | } 200 | } else { 201 | const linuxX64GnuPath = getBindingPath('lancedb.linux-x64-gnu.node') 202 | localFileExisted = existsSync(linuxX64GnuPath) 203 | try { 204 | if (localFileExisted) { 205 | nativeBinding = require(linuxX64GnuPath) 206 | } else { 207 | nativeBinding = require('@lancedb/lancedb-linux-x64-gnu') 208 | } 209 | } catch (e) { 210 | loadError = e 211 | } 212 | } 213 | break 214 | case 'arm64': 215 | if (isMusl()) { 216 | const linuxArm64MuslPath = getBindingPath('lancedb.linux-arm64-musl.node') 217 | localFileExisted = existsSync(linuxArm64MuslPath) 218 | try { 219 | if (localFileExisted) { 220 | nativeBinding = require(linuxArm64MuslPath) 221 | } else { 222 | nativeBinding = require('@lancedb/lancedb-linux-arm64-musl') 223 | } 224 | } catch (e) { 225 | loadError = e 226 | } 227 | } else { 228 | const linuxArm64GnuPath = getBindingPath('lancedb.linux-arm64-gnu.node') 229 | localFileExisted = existsSync(linuxArm64GnuPath) 230 | try { 231 | if (localFileExisted) { 232 | nativeBinding = require(linuxArm64GnuPath) 233 | } else { 234 | nativeBinding = require('@lancedb/lancedb-linux-arm64-gnu') 235 | } 236 | } catch (e) { 237 | loadError = e 238 | } 239 | } 240 | break 241 | // ... 其他 Linux 架构的修改类似,已省略以保持简洁 242 | default: 243 | throw new Error(`Unsupported architecture on Linux: ${arch}`) 244 | } 245 | break 246 | default: 247 | throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`) 248 | } 249 | 250 | if (!nativeBinding) { 251 | if (loadError) { 252 | throw loadError 253 | } 254 | throw new Error(`Failed to load native binding`) 255 | } 256 | 257 | const { 258 | Connection, 259 | JsHeaderProvider, 260 | Index, 261 | RecordBatchIterator, 262 | NativeMergeInsertBuilder, 263 | Query, 264 | VectorQuery, 265 | TakeQuery, 266 | JsFullTextQuery, 267 | Reranker, 268 | RrfReranker, 269 | Session, 270 | Table, 271 | TagContents, 272 | Tags 273 | } = nativeBinding 274 | module.exports.Connection = Connection 275 | module.exports.JsHeaderProvider = JsHeaderProvider 276 | module.exports.Index = Index 277 | module.exports.RecordBatchIterator = RecordBatchIterator 278 | module.exports.NativeMergeInsertBuilder = NativeMergeInsertBuilder 279 | module.exports.Query = Query 280 | module.exports.VectorQuery = VectorQuery 281 | module.exports.TakeQuery = TakeQuery 282 | module.exports.JsFullTextQuery = JsFullTextQuery 283 | module.exports.Reranker = Reranker 284 | module.exports.RrfReranker = RrfReranker 285 | module.exports.Session = Session 286 | module.exports.Table = Table 287 | module.exports.TagContents = TagContents 288 | module.exports.Tags = Tags 289 | -------------------------------------------------------------------------------- /src/main/pdfUtils.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | import path from 'path' 3 | import { promisify } from 'util' 4 | import * as lancedb from '@lancedb/lancedb' 5 | import { readFile } from 'node:fs/promises' 6 | import { insertDocument, getOrCreateTable, initLanceDB } from './lancedb' 7 | 8 | // 使用动态导入方式导入 uuid 9 | let uuidv4: any 10 | 11 | async function initializeUUID() { 12 | if (!uuidv4) { 13 | const uuidModule = await import('uuid') 14 | uuidv4 = uuidModule.v4 15 | } 16 | return uuidv4 17 | } 18 | 19 | const stat = promisify(fs.stat) 20 | 21 | /** 22 | * 检查文件是否存在 23 | */ 24 | export async function fileExists(filePath: string): Promise { 25 | try { 26 | const stats = await stat(filePath) 27 | return stats.isFile() 28 | } catch (error) { 29 | return false 30 | } 31 | } 32 | 33 | /** 34 | * 读取PDF文件内容 35 | */ 36 | export async function extractTextFromPDF(filePath: string): Promise { 37 | if (!(await fileExists(filePath))) { 38 | throw new Error(`PDF file not found at path: ${filePath}`) 39 | } 40 | 41 | try { 42 | const pdfParse = require('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') 43 | const dataBuffer = await readFile(filePath) 44 | const data = await pdfParse(dataBuffer) 45 | return data.text 46 | } catch (error) { 47 | console.error('Error extracting text from PDF:', error) 48 | throw new Error(`Failed to extract text from PDF: ${error.message}`) 49 | } 50 | } 51 | 52 | /** 53 | * 文本质量检查配置 54 | */ 55 | interface TextQualityConfig { 56 | minChunkLength: number // 最小chunk长度 57 | minWordCount: number // 最小单词数 58 | maxPunctuationRatio: number // 最大标点符号比例 59 | minAlphanumericRatio: number // 最小字母数字比例 60 | } 61 | 62 | const DEFAULT_QUALITY_CONFIG: TextQualityConfig = { 63 | minChunkLength: 20, // 至少20个字符 64 | minWordCount: 3, // 至少3个单词 65 | maxPunctuationRatio: 0.5, // 标点符号不超过50% 66 | minAlphanumericRatio: 0.3 // 字母数字至少30% 67 | } 68 | 69 | /** 70 | * 检查文本片段是否有效 71 | */ 72 | function isValidChunk(text: string, config: TextQualityConfig = DEFAULT_QUALITY_CONFIG): boolean { 73 | if (!text || text.trim().length < config.minChunkLength) { 74 | return false 75 | } 76 | 77 | const trimmed = text.trim() 78 | 79 | // 检查单词数量(支持中英文) 80 | const words = trimmed.split(/\s+/).filter(w => w.length > 0) 81 | const chineseChars = trimmed.match(/[\u4e00-\u9fa5]/g)?.length || 0 82 | const totalWordCount = words.length + Math.floor(chineseChars / 2) // 中文2个字符算1个词 83 | 84 | if (totalWordCount < config.minWordCount) { 85 | return false 86 | } 87 | 88 | // 检查标点符号比例 89 | const punctuationCount = (trimmed.match(/[.,;:!?。,、;:!?…—\-\(\)\[\]\{\}]/g) || []).length 90 | if (punctuationCount / trimmed.length > config.maxPunctuationRatio) { 91 | return false 92 | } 93 | 94 | // 检查字母数字比例 95 | const alphanumericCount = (trimmed.match(/[a-zA-Z0-9\u4e00-\u9fa5]/g) || []).length 96 | if (alphanumericCount / trimmed.length < config.minAlphanumericRatio) { 97 | return false 98 | } 99 | 100 | return true 101 | } 102 | 103 | /** 104 | * 规范化文本:清理多余空白,保留段落结构 105 | */ 106 | function normalizeText(text: string): string { 107 | return ( 108 | text 109 | // 移除零宽字符和特殊空白 110 | .replace(/[\u200B-\u200D\uFEFF]/g, '') 111 | // 统一换行符 112 | .replace(/\r\n/g, '\n') 113 | // 保留双换行(段落分隔),其他换行转为空格 114 | .replace(/\n\n+/g, '\n\n') 115 | .replace(/([^\n])\n([^\n])/g, '$1 $2') 116 | // 规范化空格 117 | .replace(/[ \t]+/g, ' ') 118 | // 清理行首行尾空格 119 | .split('\n') 120 | .map(line => line.trim()) 121 | .join('\n') 122 | .trim() 123 | ) 124 | } 125 | 126 | /** 127 | * 查找最佳分割点 128 | */ 129 | function findBestSplitPoint(text: string, maxPos: number): number { 130 | // 分割优先级:段落 > 句子 > 短语 > 空格 131 | 132 | // 1. 在段落边界(双换行) 133 | const paragraphEnd = text.lastIndexOf('\n\n', maxPos) 134 | if (paragraphEnd > maxPos * 0.5) { 135 | return paragraphEnd + 2 136 | } 137 | 138 | // 2. 在句子边界 139 | const sentenceEnds = [ 140 | text.lastIndexOf('。', maxPos), 141 | text.lastIndexOf('!', maxPos), 142 | text.lastIndexOf('?', maxPos), 143 | text.lastIndexOf('. ', maxPos), 144 | text.lastIndexOf('! ', maxPos), 145 | text.lastIndexOf('? ', maxPos), 146 | text.lastIndexOf('.\n', maxPos), 147 | text.lastIndexOf('!\n', maxPos), 148 | text.lastIndexOf('?\n', maxPos) 149 | ] 150 | const bestSentenceEnd = Math.max(...sentenceEnds) 151 | if (bestSentenceEnd > maxPos * 0.6) { 152 | // 找到标点符号后的第一个非空字符位置 153 | const punctuation = text[bestSentenceEnd] 154 | let endPos = bestSentenceEnd + 1 155 | if (punctuation === '.' || punctuation === '!' || punctuation === '?') { 156 | while (endPos < text.length && /[\s\n]/.test(text[endPos])) { 157 | endPos++ 158 | } 159 | } 160 | return endPos 161 | } 162 | 163 | // 3. 在短语边界(逗号、分号等) 164 | const phraseEnds = [ 165 | text.lastIndexOf(',', maxPos), 166 | text.lastIndexOf('、', maxPos), 167 | text.lastIndexOf(';', maxPos), 168 | text.lastIndexOf(', ', maxPos), 169 | text.lastIndexOf('; ', maxPos), 170 | text.lastIndexOf(',\n', maxPos) 171 | ] 172 | const bestPhraseEnd = Math.max(...phraseEnds) 173 | if (bestPhraseEnd > maxPos * 0.7) { 174 | return bestPhraseEnd + 1 175 | } 176 | 177 | // 4. 在单换行处 178 | const singleLineBreak = text.lastIndexOf('\n', maxPos) 179 | if (singleLineBreak > maxPos * 0.7) { 180 | return singleLineBreak + 1 181 | } 182 | 183 | // 5. 最后在空格处 184 | const spaceEnd = text.lastIndexOf(' ', maxPos) 185 | if (spaceEnd > maxPos * 0.5) { 186 | return spaceEnd + 1 187 | } 188 | 189 | // 6. 实在找不到合适位置,返回maxPos 190 | return maxPos 191 | } 192 | 193 | /** 194 | * 智能分割文本为段落 195 | * @param text 输入文本 196 | * @param maxChunkSize 最大chunk大小 197 | * @param minChunkSize 最小chunk大小(避免过小片段) 198 | * @param overlap 重叠大小 199 | * @param qualityConfig 质量检查配置 200 | */ 201 | export function splitTextIntoChunks( 202 | text: string, 203 | maxChunkSize: number = 1000, 204 | minChunkSize: number = 100, 205 | overlap: number = 100, 206 | qualityConfig: TextQualityConfig = DEFAULT_QUALITY_CONFIG 207 | ): string[] { 208 | // 参数验证 209 | if (maxChunkSize <= 0) { 210 | throw new Error('maxChunkSize must be greater than 0') 211 | } 212 | if (minChunkSize < 0 || minChunkSize > maxChunkSize) { 213 | throw new Error('minChunkSize must be between 0 and maxChunkSize') 214 | } 215 | if (overlap < 0 || overlap >= maxChunkSize) { 216 | throw new Error('overlap must be between 0 and maxChunkSize') 217 | } 218 | 219 | if (!text || text.trim().length === 0) { 220 | return [] 221 | } 222 | 223 | // 规范化文本 224 | const normalizedText = normalizeText(text) 225 | const chunks: string[] = [] 226 | let currentPos = 0 227 | 228 | while (currentPos < normalizedText.length) { 229 | // 确定chunk的结束位置 230 | const targetEndPos = Math.min(currentPos + maxChunkSize, normalizedText.length) 231 | 232 | let endPos: number 233 | if (targetEndPos >= normalizedText.length) { 234 | // 已到文本末尾 235 | endPos = normalizedText.length 236 | } else { 237 | // 查找最佳分割点 238 | endPos = findBestSplitPoint(normalizedText.slice(currentPos), targetEndPos - currentPos) + currentPos 239 | 240 | // 确保不会产生太小的chunk 241 | if (endPos - currentPos < minChunkSize && endPos < normalizedText.length) { 242 | endPos = Math.min(currentPos + maxChunkSize, normalizedText.length) 243 | } 244 | } 245 | 246 | // 提取chunk 247 | const chunk = normalizedText.slice(currentPos, endPos).trim() 248 | 249 | // 验证chunk质量 250 | if (isValidChunk(chunk, qualityConfig)) { 251 | chunks.push(chunk) 252 | } else { 253 | console.warn(`Skipped invalid chunk at position ${currentPos}: too short or low quality`) 254 | // 即使chunk无效,也要前进,避免死循环 255 | if (endPos <= currentPos) { 256 | endPos = currentPos + Math.min(minChunkSize, normalizedText.length - currentPos) 257 | } 258 | } 259 | 260 | // 计算下一个起始位置(考虑重叠) 261 | const nextPos = endPos - overlap 262 | 263 | // 确保有进展,防止死循环 264 | if (nextPos <= currentPos) { 265 | currentPos = endPos 266 | } else { 267 | currentPos = nextPos 268 | } 269 | 270 | // 安全检查:如果没有进展,强制前进 271 | if (currentPos >= normalizedText.length - 1) { 272 | break 273 | } 274 | } 275 | 276 | // 后处理:合并过小的相邻chunks 277 | const mergedChunks = mergeSmallChunks(chunks, minChunkSize, maxChunkSize) 278 | 279 | return mergedChunks 280 | } 281 | 282 | /** 283 | * 合并过小的相邻chunks 284 | */ 285 | function mergeSmallChunks(chunks: string[], minSize: number, maxSize: number): string[] { 286 | if (chunks.length === 0) return [] 287 | 288 | const result: string[] = [] 289 | let currentChunk = chunks[0] 290 | 291 | for (let i = 1; i < chunks.length; i++) { 292 | const nextChunk = chunks[i] 293 | 294 | // 如果当前chunk太小,尝试与下一个合并 295 | if (currentChunk.length < minSize && currentChunk.length + nextChunk.length <= maxSize) { 296 | currentChunk = currentChunk + '\n' + nextChunk 297 | } else { 298 | result.push(currentChunk) 299 | currentChunk = nextChunk 300 | } 301 | } 302 | 303 | // 添加最后一个chunk 304 | if (currentChunk) { 305 | result.push(currentChunk) 306 | } 307 | 308 | return result 309 | } 310 | 311 | /** 312 | * 处理文档并存入向量数据库 313 | */ 314 | export async function processDocument( 315 | repositoryName: string, 316 | filePath: string, 317 | documentId: string = '', 318 | chunkSize: number = 1000, 319 | overlap: number = 100, 320 | modelName: string, 321 | apiKey: string, 322 | apiURL: string, 323 | options?: { 324 | minChunkSize?: number 325 | qualityConfig?: Partial 326 | } 327 | ) { 328 | // 初始化 uuid 329 | const v4 = await initializeUUID() 330 | if (!documentId) { 331 | documentId = v4() 332 | } 333 | 334 | // 1. 提取文本根据文件类型 335 | const ext = path.extname(filePath).toLowerCase() 336 | let text: string 337 | 338 | switch (ext) { 339 | case '.pdf': 340 | text = await extractTextFromPDF(filePath) 341 | break 342 | case '.txt': 343 | text = await extractTextFromTXT(filePath) 344 | break 345 | case '.docx': 346 | text = await extractTextFromDOCX(filePath) 347 | break 348 | default: 349 | throw new Error(`Unsupported file type: ${ext}`) 350 | } 351 | 352 | // 2. 分割文本为段落 353 | const minChunkSize = options?.minChunkSize || Math.floor(chunkSize * 0.3) 354 | const qualityConfig = { 355 | ...DEFAULT_QUALITY_CONFIG, 356 | ...options?.qualityConfig 357 | } 358 | 359 | const chunks = splitTextIntoChunks(text, chunkSize, minChunkSize, overlap, qualityConfig) 360 | 361 | console.log( 362 | `Document split into ${chunks.length} chunks. Average size: ${Math.round(chunks.reduce((sum, c) => sum + c.length, 0) / chunks.length)} chars` 363 | ) 364 | 365 | // 3. 获取文件名作为基础元数据 366 | const fileName = path.basename(filePath) 367 | const baseMetadata = { 368 | source: ext.substring(1), 369 | fileName, 370 | filePath, 371 | documentId, 372 | totalPages: chunks.length, 373 | processedAt: new Date().toISOString() 374 | } 375 | 376 | // 4. 逐段处理并存入数据库 377 | const results = [] 378 | for (let i = 0; i < chunks.length; i++) { 379 | const chunkMetadata = { 380 | ...baseMetadata, 381 | chunkIndex: i, 382 | totalChunks: chunks.length, 383 | chunkId: `${documentId}-${i}`, 384 | chunkLength: chunks[i].length 385 | } 386 | 387 | // 插入到向量数据库 388 | const result = await insertDocument(repositoryName, chunks[i], fileName, chunkMetadata, modelName, apiKey, apiURL) 389 | 390 | results.push(result) 391 | } 392 | 393 | return { 394 | documentId, 395 | fileName, 396 | chunksProcessed: chunks.length, 397 | averageChunkSize: Math.round(chunks.reduce((sum, c) => sum + c.length, 0) / chunks.length), 398 | results 399 | } 400 | } 401 | 402 | /** 403 | * 从数据库中检索特定文档的所有段落 404 | */ 405 | export async function getPDFDocumentChunks(repositoryName: string, documentId: string) { 406 | const tbl = await getOrCreateTable(repositoryName, 'default', 'dummy', 'dummy') 407 | if (!tbl) throw new Error('Documents table does not exist') 408 | 409 | // 查询特定documentId的所有段落 410 | const results = await tbl.search([0]).where(`metadata.documentId = '${documentId}'`).toArray() 411 | 412 | // 按chunkIndex排序 413 | return results 414 | .map((result: any) => ({ 415 | id: result.id, 416 | text: result.text, 417 | score: result._distance, 418 | metadata: result.metadata 419 | })) 420 | .sort((a: any, b: any) => a.metadata.chunkIndex - b.metadata.chunkIndex) 421 | } 422 | 423 | /** 424 | * 读取TXT文件内容 425 | */ 426 | export async function extractTextFromTXT(filePath: string): Promise { 427 | if (!(await fileExists(filePath))) { 428 | throw new Error(`TXT file not found at path: ${filePath}`) 429 | } 430 | const data = await readFile(filePath, 'utf-8') 431 | return data 432 | } 433 | 434 | /** 435 | * 读取DOCX文件内容 436 | */ 437 | let mammoth: any 438 | 439 | async function initializeMammoth() { 440 | if (!mammoth) { 441 | const mammothModule = await import('mammoth') 442 | mammoth = mammothModule 443 | } 444 | return mammoth 445 | } 446 | 447 | export async function extractTextFromDOCX(filePath: string): Promise { 448 | if (!(await fileExists(filePath))) { 449 | throw new Error(`DOCX file not found at path: ${filePath}`) 450 | } 451 | const mammoth = await initializeMammoth() 452 | const buffer = await readFile(filePath) 453 | const result = await mammoth.extractRawText({ buffer }) 454 | return result.value 455 | } 456 | -------------------------------------------------------------------------------- /test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Word文档章节标题提取器 8 | 9 | 207 | 208 | 209 | 210 |
211 |

Word文档章节标题提取器

212 | 213 |
214 |
📄
215 |

点击选择或拖拽Word文档到此处

216 | 217 | 218 |
支持格式:.docx
219 |
220 | 221 |
222 | 226 | 227 |
228 | 提示:此工具会自动提取Word文档中的章节标题(H1-H6级别)。 229 | 处理过程中可能会显示一些警告信息,这些是正常的,不会影响标题提取功能。 230 |
231 | 232 | 236 |
237 | 238 | 417 | 418 | 419 | -------------------------------------------------------------------------------- /src/renderer/views/About.vue: -------------------------------------------------------------------------------- 1 | 203 | 204 | 267 | 268 | 347 | -------------------------------------------------------------------------------- /src/main/lancedb.ts: -------------------------------------------------------------------------------- 1 | import { app } from 'electron' 2 | import path from 'path' 3 | import { getEmbedding } from './chat' 4 | import * as arrow from 'apache-arrow' 5 | 6 | let lancedb: typeof import('@lancedb/lancedb') | null = null 7 | 8 | async function getLanceDB() { 9 | if (!lancedb) { 10 | try { 11 | lancedb = await import('@lancedb/lancedb') 12 | } catch (error) { 13 | console.error('Failed to import LanceDB:', error) 14 | throw error 15 | } 16 | } 17 | return lancedb 18 | } 19 | 20 | const DB_PATH = path.join(app.getPath('userData'), 'vector-db') 21 | let db: lancedb.Connection | null = null 22 | 23 | // ======================== 24 | // 🛠️ 工具函数 25 | // ======================== 26 | 27 | /** 28 | * 安全地将 repositoryName 转为合法表名 29 | */ 30 | function sanitizeTableName(name: string): string { 31 | if (!name || typeof name !== 'string') { 32 | throw new Error('Repository name must be a non-empty string') 33 | } 34 | return name.replace(/[^a-zA-Z0-9_]/g, '_').toLowerCase() 35 | } 36 | 37 | /** 38 | * 生成唯一 ID 39 | */ 40 | function generateId(): number { 41 | return Date.now() * 1000 + Math.floor(Math.random() * 1000) 42 | } 43 | 44 | /** 45 | * 创建表的 schema 46 | */ 47 | function createTableSchema(dimension: number): arrow.Schema { 48 | return new arrow.Schema([ 49 | new arrow.Field('id', new arrow.Int32(), false), 50 | new arrow.Field('text', new arrow.Utf8(), true), 51 | new arrow.Field('filename', new arrow.Utf8(), true), 52 | new arrow.Field('vector', new arrow.FixedSizeList(dimension, new arrow.Field('item', new arrow.Float32())), false), 53 | new arrow.Field('metadata', new arrow.Utf8(), true) // 存储 JSON 字符串 54 | ]) 55 | } 56 | 57 | // ======================== 58 | // 🔌 数据库连接管理 59 | // ======================== 60 | 61 | /** 62 | * 初始化数据库连接(幂等) 63 | */ 64 | export async function initLanceDB(): Promise { 65 | if (!db) { 66 | try { 67 | const ldb = await getLanceDB() 68 | db = await ldb.connect(DB_PATH) 69 | console.log(`✅ Connected to LanceDB at ${DB_PATH}`) 70 | } catch (error) { 71 | console.error('Failed to connect to LanceDB:', error) 72 | throw new Error(`数据库连接失败: ${error.message}`) 73 | } 74 | } 75 | return db 76 | } 77 | 78 | /** 79 | * 关闭数据库连接 80 | */ 81 | export async function closeLanceDB(): Promise { 82 | if (db) { 83 | try { 84 | // LanceDB 可能没有显式的 close 方法,根据实际 API 调整 85 | db = null 86 | console.log('✅ LanceDB connection closed') 87 | } catch (error) { 88 | console.error('Failed to close LanceDB:', error) 89 | } 90 | } 91 | } 92 | 93 | // ======================== 94 | // 🗃️ 数据库级操作(跨表) 95 | // ======================== 96 | 97 | /** 98 | * 获取所有知识库(表)名称列表 99 | */ 100 | export async function listRepositories(): Promise { 101 | try { 102 | await initLanceDB() 103 | const tables = await db!.tableNames() 104 | // 过滤掉 LanceDB 内部表(通常以下划线开头) 105 | return tables.filter((name: string) => !name.startsWith('_')) 106 | } catch (error) { 107 | console.error('Failed to list repositories:', error) 108 | throw new Error(`获取知识库列表失败: ${error.message}`) 109 | } 110 | } 111 | 112 | /** 113 | * 创建一个空的知识库表(不插入数据) 114 | * @param repositoryName 知识库名称 115 | * @param modelName embedding 模型名(用于确定向量维度) 116 | */ 117 | export async function createRepository( 118 | repositoryName: string, 119 | modelName: string, 120 | apiKey: string, 121 | apiURL: string 122 | ): Promise { 123 | try { 124 | await initLanceDB() 125 | const tableName = sanitizeTableName(repositoryName) 126 | console.log('Creating repository:', repositoryName, 'with model:', modelName) 127 | 128 | // 检查是否已存在 129 | const existingTables = await db!.tableNames() 130 | if (existingTables.includes(tableName)) { 131 | throw new Error(`Repository "${repositoryName}" already exists`) 132 | } 133 | 134 | // 获取 embedding 维度 135 | const sampleEmbedding = await getEmbedding('Sample text for schema creation', modelName, apiKey, apiURL) 136 | const dimension = sampleEmbedding.length 137 | 138 | // 创建表 139 | const schema = createTableSchema(dimension) 140 | const ldb = await getLanceDB() 141 | await db!.createTable(tableName, [], { schema }) 142 | 143 | console.log(`✅ Created repository: ${repositoryName} (dim=${dimension})`) 144 | } catch (error) { 145 | console.error('Failed to create repository:', error) 146 | throw new Error(`创建知识库失败: ${error.message}`) 147 | } 148 | } 149 | 150 | /** 151 | * 删除整个知识库(表) 152 | */ 153 | export async function deleteRepository(repositoryName: string): Promise { 154 | try { 155 | await initLanceDB() 156 | const tableName = sanitizeTableName(repositoryName) 157 | 158 | const tables = await db!.tableNames() 159 | if (!tables.includes(tableName)) { 160 | throw new Error(`Repository "${repositoryName}" does not exist`) 161 | } 162 | 163 | await db!.dropTable(tableName) 164 | console.log(`🗑️ Deleted repository: ${repositoryName}`) 165 | } catch (error) { 166 | console.error('Failed to delete repository:', error) 167 | throw new Error(`删除知识库失败: ${error.message}`) 168 | } 169 | } 170 | 171 | // ======================== 172 | // 📄 表内文档操作(单表) 173 | // ======================== 174 | 175 | /** 176 | * 获取或创建指定知识库的表(内部使用) 177 | */ 178 | export async function getOrCreateTable( 179 | repositoryName: string, 180 | modelName: string, 181 | apiKey: string, 182 | apiURL: string 183 | ): Promise { 184 | await initLanceDB() 185 | const tableName = sanitizeTableName(repositoryName) 186 | 187 | // 先尝试打开 188 | try { 189 | return await db!.openTable(tableName) 190 | } catch (openError) { 191 | // 表不存在,尝试创建 192 | try { 193 | const sampleEmbedding = await getEmbedding('Sample text for dimension detection', modelName, apiKey, apiURL) 194 | const dimension = sampleEmbedding.length 195 | const schema = createTableSchema(dimension) 196 | const ldb = await getLanceDB() 197 | return await db!.createTable(tableName, [], { schema }) 198 | } catch (createError: any) { 199 | // 如果是因为表已存在而失败,再次尝试打开 200 | if (createError.message?.includes('already exists') || createError.message?.includes('Table already exists')) { 201 | return await db!.openTable(tableName) 202 | } 203 | console.error('Failed to create table:', createError) 204 | throw new Error(`创建表失败: ${createError.message}`) 205 | } 206 | } 207 | } 208 | 209 | /** 210 | * 插入文档(自动生成 ID) 211 | */ 212 | export async function insertDocument( 213 | repositoryName: string, 214 | text: string, 215 | filename: string, 216 | metadata: Record = {}, 217 | modelName: string, 218 | apiKey: string, 219 | apiURL: string 220 | ): Promise<{ id: number; text: string; filename: string; metadata: Record }> { 221 | if (!filename) { 222 | throw new Error('filename is required') 223 | } 224 | if (!text || text.trim().length === 0) { 225 | throw new Error('text cannot be empty') 226 | } 227 | 228 | try { 229 | const table = await getOrCreateTable(repositoryName, modelName, apiKey, apiURL) 230 | const embedding = await getEmbedding(text, modelName, apiKey, apiURL) 231 | const id = generateId() 232 | 233 | await table.add([ 234 | { 235 | id, 236 | text, 237 | filename, 238 | vector: embedding, 239 | metadata: JSON.stringify(metadata) 240 | } 241 | ]) 242 | 243 | console.log(`📥 Inserted doc into ${repositoryName} (file: ${filename}, id: ${id})`) 244 | return { id, text, filename, metadata } 245 | } catch (error) { 246 | console.error('Failed to insert document:', error) 247 | throw new Error(`插入文档失败: ${error.message}`) 248 | } 249 | } 250 | 251 | /** 252 | * 查询相似文档(支持按 filename 过滤) 253 | * @param filter SQL WHERE 子句条件(不包括 filename),例如: "id > 100" 254 | */ 255 | export async function queryDocuments( 256 | repositoryName: string, 257 | queryText: string, 258 | modelName: string, 259 | apiKey: string, 260 | apiURL: string, 261 | limit: number = 5, 262 | filter: string = '', 263 | filename?: string 264 | ): Promise> { 265 | try { 266 | const table = await getOrCreateTable(repositoryName, modelName, apiKey, apiURL) 267 | const embedding = await getEmbedding(queryText, modelName, apiKey, apiURL) 268 | console.log('qureyText:', queryText) 269 | 270 | let whereClause = filter 271 | if (filename) { 272 | // 转义单引号以防止 SQL 注入 273 | const pureFilename = path.basename(filename) 274 | const escapedFilename = pureFilename.replace(/'/g, "''") 275 | whereClause = whereClause 276 | ? `filename = '${escapedFilename}' AND (${whereClause})` 277 | : `filename = '${escapedFilename}'` 278 | } 279 | 280 | let searchQuery = table.search(embedding).limit(limit) 281 | if (whereClause) { 282 | searchQuery = searchQuery.where(whereClause) 283 | } 284 | 285 | const results = await searchQuery.toArray() 286 | const resultMap = results.map((r: any) => ({ 287 | id: r.id, 288 | text: r.text, 289 | filename: r.filename, 290 | score: r._distance, 291 | meta: r.metadata ? JSON.parse(r.metadata) : {} 292 | })) 293 | resultMap.forEach((element: any) => { 294 | console.log('the query result text:', element.text) 295 | console.log('the query result score:', element.score) 296 | }) 297 | console.log(`🔍 RAG查询详情: 298 | 仓库: ${repositoryName} 299 | 查询文本: ${queryText.substring(0, 50)}... 300 | 实际WHERE条件: ${whereClause} 301 | 返回结果数: ${results.length} 302 | 首条结果分数: ${results[0]?._distance}`) 303 | return resultMap 304 | } catch (error) { 305 | console.error('Failed to query documents:', error) 306 | throw new Error(`查询文档失败: ${error.message}`) 307 | } 308 | } 309 | 310 | /** 311 | * 查询指定文件的所有文档(非向量搜索,全量返回) 312 | */ 313 | export async function getDocumentsByFilename( 314 | repositoryName: string, 315 | filename: string 316 | ): Promise> { 317 | if (!filename) { 318 | throw new Error('filename is required') 319 | } 320 | 321 | try { 322 | await initLanceDB() 323 | const tableName = sanitizeTableName(repositoryName) 324 | const table = await db!.openTable(tableName) 325 | 326 | const escapedFilename = filename.replace(/'/g, "''") 327 | 328 | // 使用 query().where() 替代 filter() 329 | const results = await table.query().where(`filename = '${escapedFilename}'`).toArray() 330 | 331 | return results.map((r: any) => ({ 332 | id: r.id, 333 | text: r.text, 334 | filename: r.filename, 335 | meta: r.metadata ? JSON.parse(r.metadata) : {} 336 | })) 337 | } catch (error) { 338 | console.error('Failed to get documents by filename:', error) 339 | throw new Error(`获取文件文档失败: ${error.message}`) 340 | } 341 | } 342 | 343 | /** 344 | * 删除指定文件的所有文档(弃用) 345 | */ 346 | export async function deleteDocumentsByFilename(repositoryName: string, filename: string): Promise { 347 | if (!filename) { 348 | throw new Error('filename is required') 349 | } 350 | 351 | try { 352 | await initLanceDB() 353 | const tableName = sanitizeTableName(repositoryName) 354 | const table = await db!.openTable(tableName) 355 | 356 | const escapedFilename = filename.replace(/'/g, "''") 357 | await table.delete(`filename = '${escapedFilename}'`) 358 | 359 | console.log(`🗑️ Deleted all docs with filename: ${filename} in ${repositoryName}`) 360 | 361 | // LanceDB 的 delete 方法可能不返回删除数量,这里返回 1 表示操作成功 362 | return 1 363 | } catch (error) { 364 | console.error('Failed to delete documents by filename:', error) 365 | throw new Error(`删除文件文档失败: ${error.message}`) 366 | } 367 | } 368 | 369 | /** 370 | * 更新文档(保留 filename 不变) 371 | */ 372 | export async function updateDocument( 373 | repositoryName: string, 374 | id: number, 375 | newText: string, 376 | newMeta: Record = {}, 377 | modelName: string, 378 | apiKey: string, 379 | apiURL: string 380 | ): Promise<{ id: number; text: string; meta: Record }> { 381 | try { 382 | const table = await getOrCreateTable(repositoryName, modelName, apiKey, apiURL) 383 | const embedding = await getEmbedding(newText, modelName, apiKey, apiURL) 384 | 385 | // 先查询获取原始 filename 386 | const existingDocs = await table.query().where(`id = ${id}`).limit(1).toArray() 387 | 388 | if (existingDocs.length === 0) { 389 | throw new Error(`Document with id ${id} not found`) 390 | } 391 | 392 | const existingDoc = existingDocs[0] 393 | 394 | // 使用 LanceDB 的 update 方法 395 | await table.update({ 396 | where: `id = ${id}`, 397 | values: { 398 | text: newText, 399 | vector: embedding, 400 | metadata: JSON.stringify(newMeta) 401 | } 402 | }) 403 | 404 | console.log(`✏️ Updated doc ${id} in ${repositoryName}`) 405 | return { id, text: newText, meta: newMeta } 406 | } catch (error) { 407 | console.error('Failed to update document:', error) 408 | throw new Error(`更新文档失败: ${error.message}`) 409 | } 410 | } 411 | 412 | /** 413 | * 删除单个文档(按 ID) 414 | */ 415 | export async function deleteDocument(repositoryName: string, id: number): Promise<{ id: number }> { 416 | try { 417 | await initLanceDB() 418 | const tableName = sanitizeTableName(repositoryName) 419 | const table = await db!.openTable(tableName) 420 | 421 | await table.delete(`id = ${id}`) 422 | console.log(`🗑️ Deleted doc ${id} from ${repositoryName}`) 423 | 424 | return { id } 425 | } catch (error) { 426 | console.error('Failed to delete document:', error) 427 | throw new Error(`删除文档失败: ${error.message}`) 428 | } 429 | } 430 | 431 | /** 432 | * 删除指定文件名的所有文档(修复后的版本) 433 | */ 434 | export async function deleteDocumentByName(repositoryName: string, filename: string): Promise<{ filename: string }> { 435 | if (!filename) { 436 | throw new Error('filename is required') 437 | } 438 | 439 | try { 440 | await initLanceDB() 441 | const tableName = sanitizeTableName(repositoryName) 442 | const table = await db!.openTable(tableName) 443 | 444 | const escapedFilename = filename.replace(/'/g, "''") 445 | await table.delete(`filename = '${escapedFilename}'`) 446 | 447 | console.log(`🗑️ Deleted docs with filename: ${filename} from ${repositoryName}`) 448 | 449 | return { filename } 450 | } catch (error) { 451 | console.error('Failed to delete document by name:', error) 452 | throw new Error(`删除文档失败: ${error.message}`) 453 | } 454 | } 455 | 456 | /** 457 | * 获取指定知识库中所有不重复的文件名列表 458 | * @param repositoryName 知识库名称 459 | * @returns 去重后的文件名数组 460 | */ 461 | export async function listFilenamesInRepository(repositoryName: string): Promise { 462 | try { 463 | await initLanceDB() 464 | const tableName = sanitizeTableName(repositoryName) 465 | 466 | // 检查表是否存在 467 | const tables = await db!.tableNames() 468 | if (!tables.includes(tableName)) { 469 | throw new Error(`Repository "${repositoryName}" does not exist`) 470 | } 471 | 472 | const table = await db!.openTable(tableName) 473 | 474 | // 查询所有文档的filename字段 475 | const results = await table.query().select('filename').toArray() 476 | 477 | // 提取并去重文件名 478 | const filenames = [...new Set(results.map((r: any) => r.filename).filter(Boolean))] 479 | 480 | console.log(`📁 Found ${filenames.length} unique filenames in ${repositoryName}`) 481 | return filenames 482 | } catch (error) { 483 | console.error('Failed to list filenames:', error) 484 | throw new Error(`获取文件名列表失败: ${error.message}`) 485 | } 486 | } 487 | 488 | /** 489 | * 查询指定表中的所有对象 490 | * @param repositoryName 知识库名称 491 | * @returns 表中所有文档的数组 492 | */ 493 | export async function getAllDocuments( 494 | repositoryName: string 495 | ): Promise> { 496 | try { 497 | await initLanceDB() 498 | const tableName = sanitizeTableName(repositoryName) 499 | 500 | // 检查表是否存在 501 | const tables = await db!.tableNames() 502 | if (!tables.includes(tableName)) { 503 | throw new Error(`Repository "${repositoryName}" does not exist`) 504 | } 505 | 506 | const table = await db!.openTable(tableName) 507 | 508 | // 查询所有文档 509 | const results = await table.query().toArray() 510 | 511 | return results.map((r: any) => ({ 512 | id: r.id, 513 | text: r.text, 514 | filename: r.filename, 515 | meta: r.metadata ? JSON.parse(r.metadata) : {} 516 | })) 517 | } catch (error) { 518 | console.error('Failed to get all documents:', error) 519 | throw new Error(`获取所有文档失败: ${error.message}`) 520 | } 521 | } 522 | -------------------------------------------------------------------------------- /src/main/ipcHandlers.ts: -------------------------------------------------------------------------------- 1 | import { ipcMain } from 'electron' 2 | import { dialog } from 'electron' 3 | import { DB } from './database' 4 | import { testAPI } from './chat' 5 | import { proofreadDocument, getDefaultPrompt, setNewPrompt } from './proof' 6 | import { deleteDocumentByName, listFilenamesInRepository } from './lancedb' 7 | import { Mode } from '@google/genai' 8 | import * as mammoth from 'mammoth' 9 | import { replaceTextInDocx } from './wordProcess' 10 | import { 11 | deleteRepository, 12 | initLanceDB, 13 | insertDocument, 14 | queryDocuments, 15 | updateDocument, 16 | deleteDocument, 17 | listRepositories, 18 | createRepository 19 | } from './lancedb' 20 | import { processDocument, getPDFDocumentChunks } from './pdfUtils' 21 | import { list } from 'changelog.config' 22 | import { error } from 'console' 23 | import { eventNames, env } from 'process' 24 | // const { platform, arch, env } = process; 25 | export interface apiSettings { 26 | apiURL: string 27 | apiKey: string 28 | modelName: string 29 | parallel?: number 30 | TimeLimit?: number | null 31 | } 32 | 33 | let api_info: apiSettings = { 34 | apiURL: '', 35 | apiKey: '', 36 | modelName: '', 37 | parallel: 30, 38 | TimeLimit: null 39 | } 40 | 41 | // 全局embedding_api变量已移除,由Pinia store管理 42 | export const registerIpcHandlers = () => { 43 | // 单向通信:接收渲染进程的消息 44 | // 监听消息,通道是message 45 | ipcMain.on('message', (event, message: string) => { 46 | console.log('Received message', message) 47 | }) 48 | 49 | // 双向通信:接收渲染进程的消息,并返回结果 50 | ipcMain.handle('receiveAndReturn', (event, message: string) => { 51 | console.log('receiveAndReturn', message) 52 | 53 | // 想返回什么都可以 54 | const ret = { 55 | rawData: message, 56 | newData: `neight-peiqi${message}` 57 | } 58 | return ret 59 | }) 60 | const path = require('path') 61 | const fs = require('fs') 62 | // 处理文件选择请求 63 | ipcMain.handle('select-docx-file', async () => { 64 | try { 65 | const result = await dialog.showOpenDialog({ 66 | title: '选择 DOCX 文件', 67 | filters: [{ name: 'Word 文档', extensions: ['docx'] }], 68 | properties: ['openFile'] 69 | }) 70 | 71 | if (result.canceled || result.filePaths.length === 0) { 72 | return null 73 | } 74 | 75 | // 返回文件路径 76 | return result.filePaths[0] 77 | } catch (error) { 78 | console.error('文件选择错误:', error) 79 | throw error 80 | } 81 | }) 82 | 83 | // 处理文件读取请求(可选,如果需要主进程读取文件内容) 84 | ipcMain.handle('read-docx-file', async (event, filePath) => { 85 | try { 86 | const data = await fs.promises.readFile(filePath) 87 | return { 88 | path: filePath, 89 | content: data.toString('base64') 90 | } 91 | } catch (error) { 92 | console.error('cannot read file:', error) 93 | throw error 94 | } 95 | }) 96 | 97 | ipcMain.handle('set-api', async (event, URL, Key, modelName) => { 98 | try { 99 | console.log('add a new api setting:', URL, Key, modelName) 100 | api_info.apiKey = Key 101 | api_info.apiURL = URL 102 | api_info.modelName = modelName 103 | const result = await DB.insertAPISetting(URL, Key, modelName) 104 | console.log('the result of the new api setting adding:', result) 105 | if (result) { 106 | return 'success' 107 | } else { 108 | return 'error' 109 | } 110 | } catch (error) { 111 | return 'error' 112 | } 113 | }) 114 | // 获取所有api设置 115 | ipcMain.handle('get-all-api-settings', async event => { 116 | return await DB.getAllAPISettings() 117 | }) 118 | 119 | ipcMain.handle('delete-one-api-setting', async (event, id) => { 120 | const result = await DB.deleteAPISettingById(id) 121 | if (result) { 122 | return { 123 | isSuccess: true 124 | } 125 | } else { 126 | return { 127 | isSuccess: false 128 | } 129 | } 130 | }) 131 | 132 | ipcMain.handle('test-api', async (event, URL, Key, modelName) => { 133 | if (!URL || !Key || !modelName) { 134 | console.log('Please input all the parameters!') 135 | return false 136 | } else { 137 | console.log('Testing API:', URL, Key, modelName) 138 | } 139 | const result = await testAPI(URL, Key, modelName) 140 | return result 141 | }) 142 | 143 | ipcMain.handle('selectAPISetting', async (event, URL, Key, modelName, parallel = 30, TimeLimit = null) => { 144 | api_info.apiKey = Key 145 | api_info.apiURL = URL 146 | api_info.modelName = modelName 147 | api_info.parallel = parallel 148 | api_info.TimeLimit = TimeLimit 149 | console.log('Selected API:', URL, Key, modelName, parallel, TimeLimit) 150 | return true 151 | }) 152 | 153 | ipcMain.handle('get-api-settings', async event => { 154 | return { 155 | URL: api_info.apiURL, 156 | Key: api_info.apiKey, 157 | modelName: api_info.modelName, 158 | parallel: api_info.parallel || 30, 159 | TimeLimit: api_info.TimeLimit 160 | } 161 | }) 162 | 163 | // 处理文档校对请求 164 | // 更新了对于rag功能的支持,实现了并行操作,提升性能 165 | ipcMain.handle( 166 | 'process-docx', 167 | async ( 168 | event, 169 | Model, 170 | filePath, 171 | repositoryNameList?: string[], 172 | embeddingConfig?: apiSettings, 173 | setTimeLimit?: number, 174 | parallelSet: number = 30 175 | ) => { 176 | try { 177 | // 三种校对模式:mode: 'section' | 'sentence' | 'full', 178 | console.log( 179 | '-----------------------------------------------processing docx file-------------------------------------------------------' 180 | ) 181 | console.info('Processing settings:', Model, filePath) 182 | console.info('embedding settings:', repositoryNameList, embeddingConfig) 183 | console.info('the parallel set is:', parallelSet) 184 | console.info('the time limit of process is:', setTimeLimit) 185 | 186 | if (!Model || !filePath) { 187 | return { 188 | isSuccess: false, 189 | message: 'Please select a model and a file!' 190 | } 191 | } 192 | 193 | if (!api_info.apiKey || !api_info.apiURL || !api_info.modelName) { 194 | return { 195 | isSuccess: false, 196 | message: 'Please select an API setting!' 197 | } 198 | } 199 | if (Model === 'wordError') { 200 | console.log('will process by the model:', api_info.apiKey, api_info.apiURL, api_info.modelName) 201 | const { proofResult, token_usage } = await proofreadDocument( 202 | filePath, 203 | 'sentence', 204 | api_info.apiKey, 205 | api_info.modelName, 206 | api_info.apiURL, 207 | repositoryNameList, 208 | embeddingConfig, 209 | parallelSet, 210 | setTimeLimit 211 | ) 212 | // 确保返回的数据是可克隆的 213 | try { 214 | const result = { 215 | proofResult: JSON.parse(JSON.stringify(proofResult)), 216 | token_usage: token_usage 217 | } 218 | return result 219 | } catch (error) { 220 | console.error('序列化校对结果时出错:', error) 221 | return { 222 | proofResult: null, 223 | token_usage: token_usage 224 | } 225 | } 226 | } else if (Model === 'ComprehensiveError') { 227 | console.log('will process by the model:', api_info.apiKey, api_info.apiURL, api_info.modelName) 228 | const { proofResult, token_usage } = await proofreadDocument( 229 | filePath, 230 | 'section', 231 | api_info.apiKey, 232 | api_info.modelName, 233 | api_info.apiURL, 234 | repositoryNameList, 235 | embeddingConfig, 236 | parallelSet, 237 | setTimeLimit 238 | ) 239 | // 确保返回的数据是可克隆的 240 | try { 241 | const result = { 242 | proofResult: JSON.parse(JSON.stringify(proofResult)), 243 | token_usage: token_usage 244 | } 245 | return result 246 | } catch (error) { 247 | console.error('序列化校对结果时出错:', error) 248 | return { 249 | proofResult: null, 250 | token_usage: token_usage 251 | } 252 | } 253 | } else if (Model === 'polish') { 254 | console.log('will process by the model:', api_info.apiKey, api_info.apiURL, api_info.modelName) 255 | const { proofResult, token_usage } = await proofreadDocument( 256 | filePath, 257 | 'full', 258 | api_info.apiKey, 259 | api_info.modelName, 260 | api_info.apiURL, 261 | repositoryNameList, 262 | embeddingConfig, 263 | parallelSet, 264 | setTimeLimit 265 | ) 266 | // 确保返回的数据是可克隆的 267 | try { 268 | const result = { 269 | proofResult: JSON.parse(JSON.stringify(proofResult)), 270 | token_usage: token_usage 271 | } 272 | return result 273 | } catch (error) { 274 | console.error('序列化校对结果时出错:', error) 275 | return { 276 | proofResult: null, 277 | token_usage: token_usage 278 | } 279 | } 280 | } 281 | } catch (error) { 282 | console.error('处理文档校对请求时出错:', error) 283 | return { 284 | proofResult: null, 285 | token_usage: 0 286 | } 287 | } 288 | } 289 | ) 290 | 291 | // 新增的返回值形式 292 | interface ResponseData { 293 | success: boolean 294 | message: string 295 | data?: T 296 | } 297 | 298 | interface Correction { 299 | original: string 300 | suggested: string 301 | } 302 | 303 | // 导出修正后的DOCX文件 304 | ipcMain.handle('exportCorrectedDocx', async (event, config) => { 305 | try { 306 | // 确保传递的数据是可克隆的 307 | const serializableConfig = JSON.parse(JSON.stringify(config)) 308 | 309 | const filePath = serializableConfig.originalFilePath 310 | const newPath = filePath.replace(/(\.\w+)$/, '_new$1') // 正则捕获“最后一个点+扩展名” 311 | const correctedText = serializableConfig.appliedCorrections.map((correction: Correction) => ({ 312 | origin: correction.original, 313 | suggested: correction.suggested 314 | })) 315 | 316 | await replaceTextInDocx(filePath, newPath, correctedText) 317 | return true 318 | } catch (error) { 319 | console.error('output error:', error) 320 | throw error 321 | } 322 | }) 323 | // 获取默认提示词 324 | ipcMain.handle('getDefaultPrompt', async event => { 325 | const prompt = await getDefaultPrompt() 326 | return prompt 327 | }) 328 | // 设置提示词(注意,这里设置的提示词没有长期记忆功能,只能暂时设置 329 | ipcMain.handle('setPrompt', async (event, newPrompt) => { 330 | if (newPrompt) { 331 | const result = await setNewPrompt(newPrompt) 332 | if (result) { 333 | return true 334 | } else { 335 | return false 336 | } 337 | } else { 338 | throw new Error('Please input a prompt!') 339 | } 340 | }) 341 | // 历史记录 - 获取全部的历史记录 342 | ipcMain.handle('getAllHistory', async event => { 343 | const result = await DB.getALLHistory() 344 | if (result) { 345 | return result 346 | } else { 347 | throw new Error('No history found!') 348 | } 349 | }) 350 | // 历史记录 - 删除全部的历史记录 351 | ipcMain.handle('deleteAllHistory', async event => { 352 | const result = await DB.deleteALLHistory() 353 | if (result) { 354 | return true 355 | } else { 356 | throw new Error('delete history failed!') 357 | } 358 | }) 359 | // 历史记录 - 根据id查询记录 360 | ipcMain.handle('getHistoryById', async (event, id) => { 361 | if (id) { 362 | const result = await DB.getHistoryById(id) 363 | if (result) { 364 | return result 365 | } else { 366 | throw new Error('No history found by id: ${id}') 367 | } 368 | } 369 | }) 370 | 371 | ipcMain.handle('deleteHistoryById', async (event, id) => { 372 | try { 373 | const result = await DB.deleteHistoryById(id) 374 | return result 375 | } catch (error) { 376 | console.error('删除历史记录失败:', error) 377 | return false 378 | } 379 | }) 380 | // 历史记录- 插入一条数据 381 | ipcMain.handle( 382 | 'insertOneHistory', 383 | async (event, filePath: string, apiURL: string, modelName: string, resultCorrect: string) => { 384 | try { 385 | // 参数验证 386 | if (!filePath || !apiURL || !modelName || !resultCorrect) { 387 | const errorMsg = 388 | '参数不完整: ' + JSON.stringify({ filePath, apiURL, modelName, resultCorrect: !!resultCorrect }) 389 | console.error(errorMsg) 390 | return { success: false, error: errorMsg } 391 | } 392 | 393 | // 尝试解析JSON以验证数据有效性 394 | try { 395 | JSON.parse(resultCorrect) 396 | } catch (parseError) { 397 | const errorMsg = 'resultCorrect不是有效的JSON字符串: ' + parseError.message 398 | console.error(errorMsg) 399 | return { success: false, error: errorMsg } 400 | } 401 | 402 | const result = await DB.insertOneHistory(filePath, apiURL, modelName, resultCorrect) 403 | return { success: true, id: result } 404 | } catch (error) { 405 | console.error('插入历史记录失败:', error) 406 | return { success: false, error: error.message } 407 | } 408 | } 409 | ) 410 | //----------------------------------------The implementation of this RAG---------------------------------------- 411 | // 向量数据库 - 插入文档 412 | ipcMain.handle('lancedb:insert', async (event, { repositoryName, fileName, text, id, metadata }, modelConfig) => { 413 | return insertDocument( 414 | repositoryName, 415 | text, 416 | fileName, 417 | metadata, 418 | modelConfig.modelName, 419 | modelConfig.apiKey, 420 | modelConfig.apiURL 421 | ) 422 | }) 423 | 424 | // 向量数据库 - 查询文档 425 | ipcMain.handle('lancedb:query', async (event, { queryText, limit, filter, fileName }, modelConfig) => { 426 | return queryDocuments( 427 | queryText, 428 | modelConfig.modelName, 429 | modelConfig.apiKey, 430 | modelConfig.apiURL, 431 | limit, 432 | filter, 433 | fileName 434 | ) 435 | }) 436 | 437 | // 向量数据库 - 更新文档 438 | ipcMain.handle('lancedb:update', async (event, { repositoryName, id, text, metadata }, modelConfig) => { 439 | return updateDocument( 440 | repositoryName, 441 | id, 442 | text, 443 | metadata, 444 | modelConfig.modelName, 445 | modelConfig.apiKey, 446 | modelConfig.apiURL 447 | ) 448 | }) 449 | 450 | // 向量数据库 - 删除文档 451 | ipcMain.handle('lancedb:delete', async (event, { repositoryName, id }) => { 452 | return deleteDocument(repositoryName, id) 453 | }) 454 | // get all the tables(lancedb) 455 | // 向量数据库 - 查询所有的表 456 | ipcMain.handle('listRepositories', async event => { 457 | const result = await listRepositories() 458 | return result 459 | }) 460 | // 向量数据库 - 创建一个空的知识表 461 | ipcMain.handle('createRepository', async (event, { repositoryName, modelName, apiKey, apiURL }) => { 462 | try { 463 | await createRepository(repositoryName, modelName, apiKey, apiURL) 464 | return true 465 | } catch (error) { 466 | console.log('error when create a empty repository:', error) 467 | throw error 468 | } 469 | }) 470 | // 向量数据库 - 删除整个表(单个知识库) 471 | ipcMain.handle('deleteRepository', (event, repositoryName: string) => { 472 | try { 473 | deleteRepository(repositoryName) 474 | return true 475 | } catch (error) { 476 | console.log('failed to delete ${repositoryName} because:', error) 477 | throw error 478 | } 479 | }) 480 | // 向量数据库 - 481 | 482 | // IPC处理器 - 处理PDF文件(弃用) 483 | ipcMain.handle('pdf:process', async (event, { repositoryName, filePath }, modelConfig) => { 484 | try { 485 | return await processDocument( 486 | repositoryName, 487 | filePath, 488 | undefined, // 自动生成documentId 489 | 500, // 默认chunk大小 490 | 50, // 默认重叠大小 491 | modelConfig.modelName, 492 | modelConfig.apiKey, 493 | modelConfig.apiURL 494 | ) 495 | } catch (error) { 496 | console.error('Failed to process PDF:', error) 497 | throw error 498 | } 499 | }) 500 | 501 | // IPC处理器 - 选择并处理文档文件 502 | ipcMain.handle('pdf:select-and-process', async (event, repositoryName, modelConfig) => { 503 | const { filePaths } = await dialog.showOpenDialog({ 504 | properties: ['openFile'], 505 | filters: [{ name: 'Document Files', extensions: ['pdf', 'docx', 'txt'] }] 506 | }) 507 | 508 | if (!filePaths || filePaths.length === 0) { 509 | console.log('User selected nothing!') 510 | return false 511 | } 512 | 513 | try { 514 | return await processDocument( 515 | repositoryName, 516 | filePaths[0], 517 | '', // 自动生成documentId 518 | 500, 519 | 50, 520 | modelConfig.modelName, 521 | modelConfig.apiKey, 522 | modelConfig.apiURL 523 | ) 524 | } catch (error) { 525 | console.error('Document processing failed:', error) 526 | throw error 527 | } 528 | }) 529 | 530 | // IPC处理器 - 获取PDF文档的所有段落 531 | ipcMain.handle('pdf:get-chunks', async (event, { documentId, repositoryName }) => { 532 | return getPDFDocumentChunks(repositoryName, documentId) 533 | }) 534 | // 根据指定的文件名称,删除该名称下的所有文档块 535 | ipcMain.handle('deleteDocumentByName', async (event, repositoryName, filename) => { 536 | const deleteFileName = await deleteDocumentByName(repositoryName, filename) 537 | if (deleteFileName) { 538 | return deleteFileName 539 | } else { 540 | throw error('delete the file ${repositoryName} error') 541 | } 542 | }) 543 | // 获取不重复的文件列表 544 | ipcMain.handle('listFilenamesInRepository', async (event, repositoryName) => { 545 | const fileList = await listFilenamesInRepository(repositoryName) 546 | return fileList 547 | }) 548 | // 设置embedding模型 - 通过其他机制由前端Pinia store管理,不再需要此IPC处理 549 | // ipcMain.handle('setEmbeddingAPI', ...) 已移除 550 | 551 | // 获取embedding模型信息 - 通过其他机制由前端Pinia store管理,不再需要此IPC处理 552 | // ipcMain.handle('getEmbeddingAPI', ...) 已移除 553 | // 调试用接口 554 | ipcMain.handle('getEnvPath', async event => { 555 | console.log(' env.LANCEDB_NATIVE_PATH:', env.LANCEDB_NATIVE_PATH) 556 | return env.LANCEDB_NATIVE_PATH 557 | }) 558 | } 559 | -------------------------------------------------------------------------------- /src/renderer/views/Dictionary.vue: -------------------------------------------------------------------------------- 1 | 127 | 128 | 431 | 432 | 646 | -------------------------------------------------------------------------------- /src/main/proof.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs' 2 | import * as mammoth from 'mammoth' 3 | import { OpenaiGen } from './chat' 4 | import path from 'path' 5 | import { queryDocuments, getAllDocuments } from './lancedb' 6 | import { error } from 'console' 7 | 8 | // ====== 类型定义 ====== 9 | interface ProofreadingCorrection { 10 | original: string 11 | suggested: string 12 | reason: string 13 | type: 'Typo' | 'Punctuation' | 'Grammar' | 'Consistency' | string 14 | References?: string[] 15 | } 16 | 17 | interface DocumentSection { 18 | title: string 19 | content: string 20 | level: number 21 | } 22 | 23 | interface DocumentStructure { 24 | title: string 25 | sections: DocumentSection[] 26 | } 27 | 28 | interface RAGQueryResult { 29 | id: number 30 | text: string 31 | filename: string 32 | score: number 33 | meta: any 34 | } 35 | 36 | interface ApiSettings { 37 | apiKey: string 38 | apiURL: string 39 | modelName: string 40 | } 41 | 42 | // ====== 全局 Prompt ====== 43 | let defaultPrompt = ` 44 | 你是一个专业的中文文本校对专家。请仔细检查文本中的错别字、标点错误和语法问题。 45 | 要求: 46 | 1. 只校对错别字、标点错误、语法错误 47 | 2. 保持原文意思不变 48 | 3. 不要进行风格改写或内容扩展 49 | 4. 按照指定的JSON格式返回结果 50 | 请校对用户提供的文本,找出其中的错别字、标点错误和语法问题,并按照以下JSON格式返回: 51 | [ 52 | { 53 | "original": "原文错误内容(只截取原文错误的词组,不要多写,不超过15字!)", 54 | "suggested": "建议修改内容(基于原文的修改后的内容)", 55 | "reason": "错误原因的简短说明", 56 | "type": "错误类型(Typo/Punctuation/Grammar/Consistency)" 57 | } 58 | ] 59 | 如果没有任何错误,请返回空数组[]。只返回JSON数组,不要添加其他说明文字。 60 | ` 61 | 62 | const realDefaultPrompt = `你是一个专业的中文文本校对专家。请仔细检查文本中的错别字、标点错误和语法问题。 63 | 要求: 64 | 1. 只校对错别字、标点错误、语法错误 65 | 2. 保持原文意思不变 66 | 3. 不要进行风格改写或内容扩展 67 | 4. 按照指定的JSON格式返回结果 68 | 请校对用户提供的文本,找出其中的错别字、标点错误和语法问题,并按照以下JSON格式返回: 69 | [ 70 | { 71 | "original": "原文错误内容(只截取原文错误的词组,不要多写,不超过15字!)", 72 | "suggested": "建议修改内容(基于原文的修改后的内容)", 73 | "reason": "错误原因的简短说明", 74 | "type": "错误类型(Typo/Punctuation/Grammar/Consistency)" 75 | } 76 | ] 77 | 如果没有任何错误,请返回空数组[]。只返回JSON数组,不要添加其他说明文字。` 78 | 79 | const ragText = 80 | '以下内容是校对的参考内容,请结合这些文字进行校对工作(如果是双语内容,则以校对内容的语言类型为准),校对规则遵循之前讲述的要求' 81 | 82 | // ====== 并发控制工具函数 ====== 83 | /** 84 | * 一个简单的延时函数 85 | * @param ms 延时的毫秒数 86 | */ 87 | const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) 88 | 89 | /** 90 | * 带有并发数和速率限制的异步任务控制器 91 | * 92 | * @param items 要处理的元素数组 93 | * @param maxConcurrency 最大并发数 94 | * @param processor 处理单个元素的异步函数 95 | * @param options 可选配置项 96 | * @param options.requestsPerMinute 每分钟最大请求数,用于速率限制 97 | * @returns 返回一个包含所有成功处理结果的 Promise 98 | */ 99 | export async function runWithLimits( 100 | items: T[], 101 | maxConcurrency: number, 102 | processor: (item: T) => Promise, 103 | options?: { 104 | requestsPerMinute?: number 105 | } 106 | ): Promise { 107 | const results: (R | undefined)[] = new Array(items.length) 108 | const executing: Promise[] = [] 109 | 110 | // --- 新增逻辑: 速率限制初始化 --- 111 | const { requestsPerMinute } = options || {} 112 | const hasRateLimit = typeof requestsPerMinute === 'number' && requestsPerMinute > 0 113 | 114 | // 计算两次请求之间的最小时间间隔(毫秒) 115 | const minInterval = hasRateLimit ? (60 * 1000) / requestsPerMinute! : 0 116 | let lastRequestTime = 0 // 记录上一个任务开始的时间戳 117 | // --- 新增逻辑结束 --- 118 | 119 | for (let i = 0; i < items.length; i++) { 120 | // --- 核心逻辑整合 --- 121 | // 1. 首先,等待并发池出现空位(如果已满) 122 | if (executing.length >= maxConcurrency) { 123 | await Promise.race(executing) 124 | } 125 | 126 | // 2. 其次,等待满足速率限制的时间间隔 127 | if (hasRateLimit) { 128 | const now = Date.now() 129 | const elapsedTime = now - lastRequestTime 130 | if (elapsedTime < minInterval) { 131 | const delayTime = minInterval - elapsedTime 132 | await delay(delayTime) 133 | } 134 | // 更新"上一次请求时间"为当前(补足延迟后)的时间 135 | lastRequestTime = Date.now() 136 | } 137 | // --- 整合结束 --- 138 | 139 | const execute = async () => { 140 | try { 141 | results[i] = await processor(items[i]) 142 | } catch (error) { 143 | console.error(`并发任务 ${i} 失败:`, error) 144 | results[i] = undefined 145 | } 146 | } 147 | 148 | const promise = execute().then(() => { 149 | const index = executing.indexOf(promise) 150 | if (index !== -1) executing.splice(index, 1) 151 | }) 152 | executing.push(promise) 153 | } 154 | 155 | await Promise.all(executing) 156 | return results.filter((r): r is R => r !== undefined) 157 | } 158 | 159 | // ====== 导出 Prompt 管理 ====== 160 | export async function getDefaultPrompt(): Promise { 161 | return realDefaultPrompt 162 | } 163 | 164 | export async function setNewPrompt(newPrompt: string): Promise { 165 | defaultPrompt = newPrompt 166 | return true 167 | } 168 | 169 | // ====== 工具函数 ====== 170 | function splitSentences(text: string): string[] { 171 | const sentenceRegex = /[^。!?…!?]+[。!?…!?]+|[^。!?…!?]+$/g 172 | const sentences = text.match(sentenceRegex) || [] 173 | return sentences.map(s => s.trim()).filter(s => s.length > 0) 174 | } 175 | 176 | function isLikelyTitle(line: string): boolean { 177 | const trimmed = line.trim() 178 | return ( 179 | trimmed.length > 0 && 180 | trimmed.length < 100 && 181 | (trimmed.endsWith('章') || 182 | trimmed.endsWith('节') || 183 | trimmed.endsWith('篇') || 184 | /^第[一二三四五六七八九十\d]+[章节篇]/.test(trimmed) || 185 | /^[1-9][.、]\s*\S/.test(trimmed) || 186 | /^[一二三四五六七八九十][.、]\s*\S/.test(trimmed)) 187 | ) 188 | } 189 | 190 | function getHeadingLevel(line: string): number { 191 | const trimmed = line.trim() 192 | if (/^第[一二三四五六七八九十\d]+章/.test(trimmed)) return 1 193 | if (/^第[一二三四五六七八九十\d]+节/.test(trimmed)) return 2 194 | if (/^[1-9]\.\s*\S/.test(trimmed)) return 2 195 | if (/^[1-9][.1-9]*\s*\S/.test(trimmed)) return 3 196 | return 2 197 | } 198 | 199 | // ====== 文档解析 ====== 200 | async function parseWordDocument(documentPath: string): Promise { 201 | try { 202 | const result = await mammoth.extractRawText({ path: documentPath }) 203 | const text = result.value 204 | const lines = text.split('\n').filter(line => line.trim().length > 0) 205 | 206 | const sections: DocumentSection[] = [] 207 | let currentSection: DocumentSection | null = null 208 | let sectionContent: string[] = [] 209 | let documentTitle = '' 210 | 211 | for (const line of lines) { 212 | if (isLikelyTitle(line)) { 213 | if (currentSection && sectionContent.length > 0) { 214 | currentSection.content = sectionContent.join('\n') 215 | sections.push(currentSection) 216 | } 217 | 218 | if (!documentTitle) documentTitle = line.trim() 219 | 220 | currentSection = { 221 | title: line.trim(), 222 | content: '', 223 | level: getHeadingLevel(line) 224 | } 225 | sectionContent = [] 226 | } else if (currentSection) { 227 | sectionContent.push(line) 228 | } 229 | } 230 | 231 | if (currentSection && sectionContent.length > 0) { 232 | currentSection.content = sectionContent.join('\n') 233 | sections.push(currentSection) 234 | } 235 | 236 | return { 237 | title: documentTitle, 238 | sections 239 | } 240 | } catch (error) { 241 | throw new Error(`解析Word文档失败: ${error.message}`) 242 | } 243 | } 244 | 245 | // ====== 文档主题总结 ====== 246 | async function summarizeDocumentTheme( 247 | docStructure: DocumentStructure, 248 | apiKey: string, 249 | modelName: string, 250 | apiURL: string 251 | ): Promise<{ 252 | result: string 253 | total_tokens: number 254 | }> { 255 | const systemPrompt = '你是一个专业的文档分析专家。请根据提供的文档目录结构,总结文档的整体框架和主题。' 256 | const userPrompt = `文档标题: ${docStructure.title}\n\n文档目录结构:\n${docStructure.sections.map((s, i) => `${i + 1}. ${s.title}`).join('\n')}\n\n请总结这份文档的主要主题和整体框架:` 257 | 258 | try { 259 | return await OpenaiGen(systemPrompt, userPrompt, apiKey, modelName, apiURL) 260 | } catch (error) { 261 | console.error('总结文档主题时出错:', error) 262 | return { 263 | result: 'error', 264 | total_tokens: null 265 | } 266 | } 267 | } 268 | 269 | // ====== 校对结果解析 ====== 270 | function parseCorrections(result: string, ragChunks?: string[]): ProofreadingCorrection[] { 271 | // 首先尝试直接解析 272 | try { 273 | const parsed = JSON.parse(result) 274 | if (Array.isArray(parsed)) { 275 | return parsed.map(item => { 276 | if (ragChunks) { 277 | return { ...item, References: [...ragChunks] } 278 | } 279 | return item 280 | }) 281 | } else { 282 | console.warn('cannot analyze the proofreading data from LLM') 283 | return [] 284 | } 285 | } catch (error) { 286 | console.warn('直接解析JSON失败,尝试清理和提取:', error) 287 | return extractCorrectionsFromText(result, ragChunks) 288 | } 289 | } 290 | 291 | function extractCorrectionsFromText(text: string, ragChunks?: string[]): ProofreadingCorrection[] { 292 | try { 293 | // 1. 清理可能的代码块标记 294 | let cleanedText = text.trim() 295 | 296 | // 移除代码块标记(```json, ```, 或者其他语言标记) 297 | cleanedText = cleanedText.replace(/^```[\w]*\n?/g, '') 298 | cleanedText = cleanedText.replace(/\n?```$/g, '') 299 | 300 | // 移除可能的 "json" 标记 301 | cleanedText = cleanedText.replace(/^json\s*/i, '') 302 | 303 | // 移除可能的解释性文字(如 "以下是JSON:", "返回结果:" 等) 304 | cleanedText = cleanedText.replace(/^.*?以下.*?[:::]\s*/, '') 305 | cleanedText = cleanedText.replace(/^.*?返回.*?[:::]\s*/, '') 306 | 307 | // 提取JSON数组(寻找第一个 [ 和最后一个 ]) 308 | const firstBracket = cleanedText.indexOf('[') 309 | const lastBracket = cleanedText.lastIndexOf(']') 310 | 311 | if (firstBracket !== -1 && lastBracket !== -1 && lastBracket > firstBracket) { 312 | const jsonString = cleanedText.substring(firstBracket, lastBracket + 1) 313 | console.log('提取到的JSON字符串:', jsonString) 314 | 315 | const parsed = JSON.parse(jsonString) 316 | if (Array.isArray(parsed)) { 317 | const corrections = parsed.map(item => { 318 | // 验证每个字段的存在性 319 | if (item.original && item.suggested && item.reason) { 320 | if (ragChunks) { 321 | return { ...item, References: [...ragChunks] } 322 | } 323 | return item 324 | } 325 | return null 326 | }).filter((item): item is ProofreadingCorrection => item !== null) 327 | 328 | if (corrections.length > 0) { 329 | console.log(`成功解析出 ${corrections.length} 个校对结果`) 330 | return corrections 331 | } 332 | } 333 | } 334 | 335 | // 2. 尝试解析单个JSON对象 336 | try { 337 | const singleObject = JSON.parse(cleanedText) 338 | if (singleObject && typeof singleObject === 'object' && !Array.isArray(singleObject)) { 339 | if (singleObject.original && singleObject.suggested && singleObject.reason) { 340 | console.log('解析到单个校对结果') 341 | return ragChunks ? [{ ...singleObject, References: [...ragChunks] }] : [singleObject] 342 | } 343 | } 344 | } catch (e) { 345 | // 单对象解析失败,继续 346 | } 347 | 348 | // 3. 如果以上都失败,尝试从文本中提取信息 349 | console.warn('JSON解析完全失败,尝试从文本中手动提取') 350 | return parseCorrectionsFromPlainText(text, ragChunks) 351 | 352 | } catch (error) { 353 | console.error('所有解析方法都失败:', error) 354 | console.error('原始文本:', text) 355 | return [] 356 | } 357 | } 358 | 359 | function parseCorrectionsFromPlainText(text: string, ragChunks?: string[]): ProofreadingCorrection[] { 360 | const corrections: ProofreadingCorrection[] = [] 361 | const lines = text.split('\n').map(line => line.trim()).filter(line => line.length > 0) 362 | 363 | console.log('尝试从纯文本中提取校对结果,共', lines.length, '行') 364 | 365 | for (let i = 0; i < lines.length; i++) { 366 | const line = lines[i] 367 | 368 | // 查找包含 "original" 的行 369 | if (/original|原文|原内容/.test(line)) { 370 | const correction: Partial = {} 371 | 372 | // 提取 original 值 373 | const originalMatch = line.match(/["'""“”]([^""“”]+)["'""“”]/) 374 | if (originalMatch) { 375 | correction.original = originalMatch[1].trim() 376 | } 377 | 378 | // 在后续行中查找 suggested 379 | if (i + 1 < lines.length) { 380 | const suggestedLine = lines[i + 1] 381 | const suggestedMatch = suggestedLine.match(/["'""“”]([^""“”]+)["'""“”]/) 382 | if (suggestedMatch) { 383 | correction.suggested = suggestedMatch[1].trim() 384 | } 385 | } 386 | 387 | // 在后续行中查找 reason 388 | if (i + 2 < lines.length) { 389 | const reasonLine = lines[i + 2] 390 | const reasonMatch = reasonLine.match(/["'""“”]([^""“”]+)["'""“”]/) 391 | if (reasonMatch) { 392 | correction.reason = reasonMatch[1].trim() 393 | } 394 | 395 | // 查找 type 396 | const typeMatch = reasonLine.match(/"type":\s*["']([^"']+)["']/) 397 | if (typeMatch) { 398 | correction.type = typeMatch[1] 399 | } 400 | } 401 | 402 | // 如果找到了所有必需字段,添加到结果中 403 | if (correction.original && correction.suggested && correction.reason) { 404 | if (ragChunks) { 405 | correction.References = [...ragChunks] 406 | } 407 | corrections.push(correction as ProofreadingCorrection) 408 | } 409 | } 410 | } 411 | 412 | console.log('从纯文本中提取到', corrections.length, '个校对结果') 413 | return corrections 414 | } 415 | 416 | // ====== RAG 查询 ====== 417 | interface QueryDocChunkOptions { 418 | maxSelectNum?: number 419 | enableDeduplication?: boolean 420 | } 421 | 422 | const DEFAULT_MAX_SELECT_NUM = 20 423 | 424 | const queryDocChunk = async ( 425 | repositoryNameList: string[], 426 | apiKey: string, 427 | apiURL: string, 428 | modelName: string, 429 | fileName: string, 430 | content: string, 431 | filter: string, 432 | selectNum: number, 433 | options: QueryDocChunkOptions = {} 434 | ): Promise => { 435 | const { maxSelectNum = DEFAULT_MAX_SELECT_NUM, enableDeduplication = true } = options 436 | 437 | if (!Array.isArray(repositoryNameList) || repositoryNameList.length === 0) { 438 | console.warn('queryDocChunk: repositoryNameList is empty or invalid') 439 | return [] 440 | } 441 | 442 | if (!apiKey || typeof apiKey !== 'string') { 443 | throw new Error('Invalid or missing apiKey') 444 | } 445 | 446 | if (!apiURL || typeof apiURL !== 'string') { 447 | throw new Error('Invalid or missing apiURL') 448 | } 449 | 450 | if (!modelName || typeof modelName !== 'string') { 451 | throw new Error('Invalid or missing modelName') 452 | } 453 | 454 | if (!content || typeof content !== 'string' || content.trim() === '') { 455 | console.warn('queryDocChunk: empty or invalid content, returning empty result') 456 | return [] 457 | } 458 | 459 | if (!Number.isInteger(selectNum) || selectNum <= 0) { 460 | console.warn(`queryDocChunk: invalid selectNum ${selectNum}, using default 1`) 461 | selectNum = 1 462 | } 463 | 464 | const effectiveSelectNum = Math.min(selectNum, maxSelectNum) 465 | const perRepoLimit = Math.min(effectiveSelectNum, 10) 466 | 467 | const chunkList: RAGQueryResult[] = [] 468 | 469 | const queries = repositoryNameList.map(repoName => 470 | queryDocuments(repoName, content.trim(), modelName, apiKey, apiURL, perRepoLimit, filter).catch( 471 | (err): RAGQueryResult[] => { 472 | console.error(`queryDocChunk: failed to query repository "${repoName}"`, err) 473 | return [] 474 | } 475 | ) 476 | ) 477 | 478 | const results = await Promise.all(queries) 479 | 480 | for (const result of results) { 481 | if (Array.isArray(result)) { 482 | chunkList.push(...result) 483 | } 484 | } 485 | 486 | let uniqueChunks = chunkList 487 | if (enableDeduplication && chunkList.length > 0) { 488 | const seen = new Set() 489 | uniqueChunks = chunkList.filter(item => { 490 | if (typeof item.text !== 'string') return false 491 | if (seen.has(item.text)) return false 492 | seen.add(item.text) 493 | return true 494 | }) 495 | } 496 | console.info('-----------------------------------RAG Query----------------------------') 497 | console.info('the unique results of query:', uniqueChunks) 498 | console.log('the proofreading content:', content) 499 | 500 | const topChunks = uniqueChunks 501 | .filter(item => typeof item.score === 'number' && typeof item.text === 'string') 502 | .sort((a, b) => b.score - a.score) 503 | .slice(0, effectiveSelectNum) 504 | console.info('the top relative result of query:', topChunks) 505 | 506 | return topChunks.map(item => item.text) 507 | } 508 | 509 | // ====== 通用RAG校对函数 ====== 510 | async function proofreadTextWithRAG( 511 | text: string, 512 | systemContext: string, 513 | apiKey: string, 514 | modelName: string, 515 | apiURL: string, 516 | repositoryNameList?: string[], 517 | fileName?: string, 518 | embeddingConfig?: ApiSettings 519 | ): Promise<{ result: ProofreadingCorrection[]; use_tokens: number }> { 520 | try { 521 | let systemPrompt = systemContext 522 | if (repositoryNameList === undefined) { 523 | console.log('use normal proof without rag:') 524 | console.log('proof content:', text) 525 | const { result, total_tokens } = await OpenaiGen( 526 | systemPrompt, 527 | `需要校对的内容:\n${text}`, 528 | apiKey, 529 | modelName, 530 | apiURL 531 | ) 532 | return { result: parseCorrections(result), use_tokens: total_tokens } 533 | } else if (repositoryNameList.length === 0) { 534 | if (repositoryNameList === undefined) { 535 | console.log('use normal proof without rag:') 536 | console.log('proof content:', text) 537 | const { result, total_tokens } = await OpenaiGen( 538 | systemPrompt, 539 | `需要校对的内容:\n${text}`, 540 | apiKey, 541 | modelName, 542 | apiURL 543 | ) 544 | return { result: parseCorrections(result), use_tokens: total_tokens } 545 | } else if (repositoryNameList.length > 0 && fileName) { 546 | const embApiKey = embeddingConfig?.apiKey || apiKey 547 | const embApiURL = embeddingConfig?.apiURL || apiURL 548 | const embModelName = embeddingConfig?.modelName || modelName 549 | console.log('------------------------setting of RAG-------------------------------------') 550 | console.log('embedding key:', embApiKey) 551 | console.log('embedding URL:', embApiURL) 552 | console.log('embedding modelName:', embModelName) 553 | 554 | const ragChunks = await queryDocChunk( 555 | repositoryNameList, 556 | embApiKey, 557 | embApiURL, 558 | embModelName, 559 | fileName, 560 | text, 561 | '', 562 | 3 563 | ) 564 | 565 | if (ragChunks.length > 0) { 566 | const ragContext = `\n${ragText}:\n${ragChunks.map((t, i) => `${i + 1}. ${t}`).join('\n')}` 567 | systemPrompt += ragContext 568 | } 569 | 570 | const { result, total_tokens } = await OpenaiGen( 571 | systemPrompt, 572 | `需要校对的内容:\n${text}`, 573 | apiKey, 574 | modelName, 575 | apiURL 576 | ) 577 | return { result: parseCorrections(result, ragChunks), use_tokens: total_tokens } 578 | } else { 579 | console.log("the proof mode don't catch any preload,please check!") 580 | throw error("the proof mode don't catch any preload,please check!") 581 | } 582 | } 583 | } catch (error) { 584 | console.error('校对文本失败:', error) 585 | return { result: [], use_tokens: 0 } 586 | } 587 | } 588 | 589 | // ====== 主校对函数 ====== 590 | export async function proofreadDocument( 591 | documentPath: string, 592 | mode: 'section' | 'sentence' | 'full', 593 | apiKey: string, 594 | modelName: string, 595 | apiURL: string, 596 | repositoryNameList?: string[], 597 | embeddingConfig?: ApiSettings, 598 | parallelSet: number = 30, // 并发限制 599 | setTimeLimit?: number // 每分钟最高发射频率 600 | ): Promise<{ proofResult: ProofreadingCorrection[]; token_usage: number }> { 601 | console.log('process mode is:', mode) 602 | console.log('process api is:', apiURL, modelName) 603 | let total_tokens = 0 // calculate the usage of tokens 604 | const option = setTimeLimit // set the limit of request per minute 605 | ? { 606 | requestsPerMinute: setTimeLimit 607 | } 608 | : undefined 609 | 610 | try { 611 | const fileName = path.basename(documentPath) 612 | 613 | if (mode === 'full') { 614 | const fullText = await mammoth.extractRawText({ path: documentPath }) // get full text 615 | const text = fullText.value.trim() // trim 616 | if (!text) 617 | return { 618 | proofResult: null, 619 | token_usage: 0 620 | } 621 | const { result, use_tokens } = await proofreadTextWithRAG( 622 | text, 623 | defaultPrompt, 624 | apiKey, 625 | modelName, 626 | apiURL, 627 | repositoryNameList, 628 | fileName, 629 | embeddingConfig 630 | ) 631 | total_tokens += use_tokens 632 | 633 | return { proofResult: result, token_usage: total_tokens } 634 | } 635 | 636 | const docStructure = await parseWordDocument(documentPath) 637 | const documentTheme = await summarizeDocumentTheme(docStructure, apiKey, modelName, apiURL) 638 | const nonEmptySections = docStructure.sections.filter(sec => sec.content.trim().length > 0) 639 | if (nonEmptySections.length === 0) 640 | return { 641 | proofResult: null, 642 | token_usage: 0 643 | } 644 | 645 | let allCorrections: ProofreadingCorrection[] = [] 646 | 647 | if (mode === 'section') { 648 | const sectionResults = await runWithLimits( 649 | nonEmptySections, 650 | parallelSet, 651 | async section => { 652 | const systemContext = `${defaultPrompt} 653 | 文档标题: ${docStructure.title} 654 | 文档主题: ${documentTheme} 655 | 当前章节标题: ${section.title}` 656 | return proofreadTextWithRAG( 657 | section.content, 658 | systemContext, 659 | apiKey, 660 | modelName, 661 | apiURL, 662 | repositoryNameList, 663 | fileName, 664 | embeddingConfig 665 | ) 666 | }, 667 | option 668 | ) 669 | let resultList: ProofreadingCorrection[][] = [] 670 | sectionResults.forEach(item => { 671 | total_tokens += item.use_tokens 672 | resultList.push(item.result) 673 | }) 674 | allCorrections = resultList.flat() // 展开二维数组,获取最后的结果数组 675 | } else if (mode === 'sentence') { 676 | const sentenceTasks: (() => Promise<{ result: ProofreadingCorrection[]; use_tokens: number }>)[] = [] 677 | for (const section of nonEmptySections) { 678 | const sentences = splitSentences(section.content) 679 | const validSentences = sentences.filter(s => s.trim().length > 0) 680 | if (validSentences.length === 0) continue 681 | 682 | for (const sentence of validSentences) { 683 | sentenceTasks.push(async () => { 684 | const systemContext = `${defaultPrompt} 685 | 文档标题: ${docStructure.title} 686 | 文档主题: ${documentTheme} 687 | 当前章节标题: ${section.title}` 688 | return proofreadTextWithRAG( 689 | sentence, 690 | systemContext, 691 | apiKey, 692 | modelName, 693 | apiURL, 694 | repositoryNameList, 695 | fileName, 696 | embeddingConfig 697 | ) 698 | }) 699 | } 700 | } 701 | 702 | if (sentenceTasks.length > 0) { 703 | const sentenceResults = await runWithLimits(sentenceTasks, parallelSet, task => task(), option) 704 | let resultList: ProofreadingCorrection[][] = [] 705 | sentenceResults.forEach(Items => { 706 | total_tokens += Items.use_tokens 707 | resultList.push(Items.result) 708 | }) 709 | allCorrections = resultList.flat() 710 | } 711 | } 712 | 713 | // 确保可序列化 714 | const serializableCorrections = allCorrections.map(correction => ({ 715 | original: correction.original, 716 | suggested: correction.suggested, 717 | reason: correction.reason, 718 | type: correction.type, 719 | ...(correction.References ? { References: correction.References } : {}) 720 | })) 721 | 722 | console.log('校对结果:', serializableCorrections) 723 | return { proofResult: serializableCorrections, token_usage: total_tokens } 724 | } catch (error) { 725 | console.error('文档校对过程中出现错误:', error) 726 | throw error 727 | } 728 | } 729 | --------------------------------------------------------------------------------