├── 首页白.jpg
├── 首页黑.jpg
├── 提示词设置.jpg
├── ssh_test.txt
├── 新版设置页面.jpg
├── 知识库页面2.jpg
├── 记录管理页面.jpg
├── api设置与并发设置.jpg
├── assets
├── apiset.jpg
├── logo.icns
├── logo.ico
├── logo.jpg
├── logo.png
├── history.jpg
└── Proofreading.jpg
├── .husky
├── commit-msg
└── pre-commit
├── src
├── renderer
│ ├── assets
│ │ ├── logo.png
│ │ ├── apiset.jpg
│ │ ├── history.jpg
│ │ ├── Proofreading.jpg
│ │ └── css
│ │ │ └── common.css
│ ├── views
│ │ ├── logs.vue
│ │ ├── OutputWord.vue
│ │ ├── Home.vue
│ │ ├── Work.vue
│ │ ├── ProofSet.vue
│ │ ├── history.vue
│ │ ├── About.vue
│ │ └── Dictionary.vue
│ ├── index.css
│ ├── renderer.ts
│ ├── stores
│ │ ├── embeddingStore.ts
│ │ ├── apiStore.ts
│ │ └── store.ts
│ ├── router
│ │ └── index.js
│ ├── electron.d.ts
│ └── App.vue
└── main
│ ├── logger.ts
│ ├── wordProcess.ts
│ ├── main.ts
│ ├── preload.ts
│ ├── database.ts
│ ├── chat.ts
│ ├── pdfUtils.ts
│ ├── lancedb.ts
│ ├── ipcHandlers.ts
│ └── proof.ts
├── .vscode
└── settings.json
├── .prettierrc
├── index.html
├── tsconfig.json
├── .eslintrc.js
├── vite.renderer.config.ts
├── vite.main.config.ts
├── .eslintrc.json
├── vite.preload.config.ts
├── LICENSE
├── forge.env.d.ts
├── .gitignore
├── .versionrc
├── changelog.config.js
├── forge.config.ts
├── package.json
├── vite.base.config.ts
├── README.md
├── lancedbNativePro.ts
└── test.html
/首页白.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/首页白.jpg
--------------------------------------------------------------------------------
/首页黑.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/首页黑.jpg
--------------------------------------------------------------------------------
/提示词设置.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/提示词设置.jpg
--------------------------------------------------------------------------------
/ssh_test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/ssh_test.txt
--------------------------------------------------------------------------------
/新版设置页面.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/新版设置页面.jpg
--------------------------------------------------------------------------------
/知识库页面2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/知识库页面2.jpg
--------------------------------------------------------------------------------
/记录管理页面.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/记录管理页面.jpg
--------------------------------------------------------------------------------
/api设置与并发设置.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/api设置与并发设置.jpg
--------------------------------------------------------------------------------
/assets/apiset.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/apiset.jpg
--------------------------------------------------------------------------------
/assets/logo.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.icns
--------------------------------------------------------------------------------
/assets/logo.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.ico
--------------------------------------------------------------------------------
/assets/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.jpg
--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/logo.png
--------------------------------------------------------------------------------
/.husky/commit-msg:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 |
4 | npx commitlint --edit $1
--------------------------------------------------------------------------------
/assets/history.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/history.jpg
--------------------------------------------------------------------------------
/assets/Proofreading.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/assets/Proofreading.jpg
--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 |
4 | npx lint-staged --allow-empty $1
5 |
--------------------------------------------------------------------------------
/src/renderer/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/logo.png
--------------------------------------------------------------------------------
/src/renderer/assets/apiset.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/apiset.jpg
--------------------------------------------------------------------------------
/src/renderer/assets/history.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/history.jpg
--------------------------------------------------------------------------------
/src/renderer/assets/Proofreading.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CZ600/AutoDocxProofread/HEAD/src/renderer/assets/Proofreading.jpg
--------------------------------------------------------------------------------
/src/renderer/views/logs.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
8 |
9 |
--------------------------------------------------------------------------------
/src/renderer/views/OutputWord.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
8 |
9 |
--------------------------------------------------------------------------------
/src/renderer/views/Home.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 | 111
4 |
5 |
6 |
7 |
10 |
11 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "editor.formatOnSave": true,
3 | "editor.defaultFormatter": "esbenp.prettier-vscode",
4 | "[shellscript]": {
5 | "editor.defaultFormatter": "foxundermoon.shell-format"
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "printWidth": 120,
3 | "tabWidth": 2,
4 | "useTabs": false,
5 | "semi": false,
6 | "singleQuote": true,
7 | "arrowParens": "avoid",
8 | "jaxbracketSameLine": false,
9 | "trailingComma": "none"
10 | }
11 |
--------------------------------------------------------------------------------
/src/renderer/index.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
3 | margin: 0;
4 | padding: 0;
5 | height: 100vh;
6 | overflow: hidden;
7 | }
8 |
9 | #app {
10 | height: 100%;
11 | }
12 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Font Mini
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ESNext",
4 | "module": "commonjs",
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "esModuleInterop": true,
8 | "noImplicitAny": true,
9 | "sourceMap": true,
10 | "baseUrl": ".",
11 | "outDir": "dist",
12 | "types": ["node"],
13 | "moduleResolution": "node",
14 | "resolveJsonModule": true
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/renderer/assets/css/common.css:
--------------------------------------------------------------------------------
1 | html,
2 | body,
3 | #app {
4 | margin: 0;
5 | padding: 0;
6 | height: 100%;
7 | }
8 |
9 | /* 深色模式样式 */
10 | .dark {
11 | background-color: #1d1e1f;
12 | color: #ffffff;
13 | }
14 |
15 | .dark ::-webkit-scrollbar {
16 | width: 6px;
17 | }
18 |
19 | .dark ::-webkit-scrollbar-thumb {
20 | background-color: #555;
21 | border-radius: 3px;
22 | }
23 |
24 | .dark ::-webkit-scrollbar-track {
25 | background-color: #2d2d2d;
26 | }
--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | extends: [
3 | 'eslint:recommended',
4 | 'plugin:vue/vue3-recommended',
5 | 'plugin:@typescript-eslint/recommended',
6 | 'prettier'
7 | ],
8 | rules: {
9 | // 降低缩进错误严重性为警告,并设置缩进为2个空格
10 | 'indent': ['warn', 2],
11 | // 允许在Vue模板中使用任意缩进
12 | 'vue/html-indent': ['warn', 2, {
13 | 'attribute': 1,
14 | 'baseIndent': 1,
15 | 'caseIndent': true,
16 | 'closeBracket': 0,
17 | 'alignAttributesVertical': false,
18 | 'ignores': []
19 | }]
20 | }
21 | };
--------------------------------------------------------------------------------
/src/renderer/renderer.ts:
--------------------------------------------------------------------------------
1 | import { createApp } from 'vue'
2 | import App from './App.vue'
3 | import router from './router'
4 | // 引入 Element Plus 组件库和样式
5 | import ElementPlus from 'element-plus'
6 | import { createPinia } from 'pinia'
7 | import 'element-plus/dist/index.css'
8 | import piniaPluginPersistedstate from 'pinia-plugin-persistedstate'
9 | // 引入中文语言包
10 | import zhCn from 'element-plus/es/locale/lang/zh-cn' // 根据需要选择语言
11 | const app = createApp(App)
12 | const pinia = createPinia()
13 | pinia.use(piniaPluginPersistedstate)
14 |
15 | app.use(router)
16 | app.use(pinia)
17 | // 使用 Element Plus
18 | app.use(ElementPlus, {
19 | locale: zhCn
20 | })
21 |
22 | app.mount('#app')
23 |
--------------------------------------------------------------------------------
/vite.renderer.config.ts:
--------------------------------------------------------------------------------
1 | import type { ConfigEnv, UserConfig } from 'vite'
2 | import vue from '@vitejs/plugin-vue'
3 | import { defineConfig } from 'vite'
4 | import { pluginExposeRenderer } from './vite.base.config'
5 |
6 | // https://vitejs.dev/config
7 | export default defineConfig(env => {
8 | const forgeEnv = env as ConfigEnv<'renderer'>
9 | const { root, mode, forgeConfigSelf } = forgeEnv
10 | const name = forgeConfigSelf.name ?? ''
11 |
12 | return {
13 | root,
14 | mode,
15 | base: './',
16 | build: {
17 | outDir: `.vite/renderer/${name}`
18 | },
19 | plugins: [pluginExposeRenderer(name), vue()],
20 | resolve: {
21 | preserveSymlinks: true
22 | },
23 | clearScreen: false
24 | } as UserConfig
25 | })
26 |
--------------------------------------------------------------------------------
/vite.main.config.ts:
--------------------------------------------------------------------------------
1 | import type { ConfigEnv, UserConfig } from 'vite'
2 | import { defineConfig, mergeConfig } from 'vite'
3 | import { getBuildConfig, getBuildDefine, external, pluginHotRestart } from './vite.base.config'
4 |
5 | // https://vitejs.dev/config
6 | export default defineConfig(env => {
7 | const forgeEnv = env as ConfigEnv<'build'>
8 | const { forgeConfigSelf } = forgeEnv
9 | const define = getBuildDefine(forgeEnv)
10 |
11 | const config: UserConfig = {
12 | build: {
13 | lib: {
14 | entry: forgeConfigSelf.entry,
15 | fileName: () => '[name].js',
16 | formats: ['cjs']
17 | },
18 | rollupOptions: {
19 | external
20 | }
21 | },
22 | plugins: [pluginHotRestart('restart')],
23 | define,
24 | resolve: {
25 | // Load the Node.js entry.
26 | mainFields: ['module', 'jsnext:main', 'jsnext']
27 | }
28 | }
29 |
30 | return mergeConfig(getBuildConfig(forgeEnv), config)
31 | })
32 |
--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "root": true,
3 | "env": {
4 | "browser": true,
5 | "es6": true,
6 | "node": true
7 | },
8 | "extends": [
9 | "eslint:recommended",
10 | "plugin:@typescript-eslint/eslint-recommended",
11 | "plugin:@typescript-eslint/recommended",
12 | "plugin:import/recommended",
13 | "plugin:import/electron",
14 | "plugin:import/typescript",
15 | "plugin:vue/vue3-recommended"
16 | ],
17 | "parser": "vue-eslint-parser",
18 | "parserOptions": {
19 | "ecmaVersion": 2020,
20 | "parser": "@typescript-eslint/parser",
21 | "sourceType": "module"
22 | },
23 | "rules": {
24 | "max-len": ["error", { "code": 120 }],
25 | "indent": ["error", 2, { "SwitchCase": 1 }],
26 | "semi": ["error", "never"],
27 | "quotes": ["error", "single"],
28 | "arrow-parens": ["error", "as-needed"],
29 | "brace-style": ["error", "1tbs"],
30 | "comma-dangle": ["error", "never"],
31 | "@typescript-eslint/no-explicit-any": "off"
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/renderer/views/Work.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
20 |
21 |
--------------------------------------------------------------------------------
/vite.preload.config.ts:
--------------------------------------------------------------------------------
1 | import type { ConfigEnv, UserConfig } from 'vite'
2 | import { defineConfig, mergeConfig } from 'vite'
3 | import { getBuildConfig, external, pluginHotRestart } from './vite.base.config'
4 |
5 | // https://vitejs.dev/config
6 | export default defineConfig(env => {
7 | const forgeEnv = env as ConfigEnv<'build'>
8 | const { forgeConfigSelf } = forgeEnv
9 |
10 | const config: UserConfig = {
11 | build: {
12 | rollupOptions: {
13 | external,
14 | // Preload scripts may contain Web assets, so use the `build.rollupOptions.input` instead `build.lib.entry`.
15 | input: forgeConfigSelf.entry,
16 | output: {
17 | format: 'cjs',
18 | // It should not be split chunks.
19 | inlineDynamicImports: true,
20 | entryFileNames: '[name].js',
21 | chunkFileNames: '[name].js',
22 | assetFileNames: '[name].[ext]'
23 | }
24 | }
25 | },
26 | plugins: [pluginHotRestart('reload')]
27 | }
28 |
29 | return mergeConfig(getBuildConfig(forgeEnv), config)
30 | })
31 |
--------------------------------------------------------------------------------
/src/main/logger.ts:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | import path from 'path'
3 | import { app } from 'electron'
4 |
5 | // 创建日志目录和文件路径
6 | const logDir = path.join(app.getAppPath(), '..', '..', 'logs')
7 | const logFile = path.join(logDir, 'main.log')
8 |
9 | // 确保日志目录存在
10 | function ensureLogDir(): void {
11 | try {
12 | if (!fs.existsSync(logDir)) {
13 | fs.mkdirSync(logDir, { recursive: true })
14 | }
15 | } catch (error) {
16 | // 如果创建日志目录失败,则只能打印到控制台
17 | console.error('Failed to create log directory:', error)
18 | }
19 | }
20 |
21 | // 写入日志的方法
22 | function writeLog(message: string): void {
23 | try {
24 | const timestamp = new Date().toISOString()
25 | const logMessage = `[${timestamp}] ${message}\n`
26 |
27 | ensureLogDir()
28 | fs.appendFileSync(logFile, logMessage, 'utf8')
29 | } catch (error) {
30 | // 如果写入日志失败,则只能打印到控制台
31 | console.error('Failed to write to log file:', error)
32 | }
33 | }
34 |
35 | // 在开发环境中导出额外的方法用于测试
36 | if (process.env.NODE_ENV === 'development') {
37 | Object.assign(global, { writeLog })
38 | }
39 |
40 | export { writeLog, logFile }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 CZ600
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/forge.env.d.ts:
--------------------------------------------------------------------------------
1 | export {} // Make this a module
2 |
3 | declare global {
4 | // This allows TypeScript to pick up the magic constants that's auto-generated by Forge's Vite
5 | // plugin that tells the Electron app where to look for the Vite-bundled app code (depending on
6 | // whether you're running in development or production).
7 | const MAIN_WINDOW_VITE_DEV_SERVER_URL: string
8 | const MAIN_WINDOW_VITE_NAME: string
9 |
10 | namespace NodeJS {
11 | interface Process {
12 | // Used for hot reload after preload scripts.
13 | viteDevServers: Record;
14 | }
15 | }
16 |
17 | type VitePluginConfig = ConstructorParameters[0];
18 |
19 | interface VitePluginRuntimeKeys {
20 | VITE_DEV_SERVER_URL: `${string}_VITE_DEV_SERVER_URL`;
21 | VITE_NAME: `${string}_VITE_NAME`;
22 | }
23 | }
24 |
25 | declare module 'vite' {
26 | interface ConfigEnv {
27 | root: string;
28 | forgeConfig: VitePluginConfig;
29 | forgeConfigSelf: VitePluginConfig[K][number];
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/renderer/views/ProofSet.vue:
--------------------------------------------------------------------------------
1 |
2 | 在本页面设置提示词
3 |
4 |
5 | {{ defaultPrompt }}
6 |
7 |
8 |
9 |
10 |
11 | 修改提示词
12 | 恢复默认设置
13 |
14 |
15 |
16 |
17 |
18 |
53 |
54 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | lerna-debug.log*
8 |
9 | # Diagnostic reports (https://nodejs.org/api/report.html)
10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
11 |
12 | # Runtime data
13 | pids
14 | *.pid
15 | *.seed
16 | *.pid.lock
17 | .DS_Store
18 |
19 | # Directory for instrumented libs generated by jscoverage/JSCover
20 | lib-cov
21 |
22 | # Coverage directory used by tools like istanbul
23 | coverage
24 | *.lcov
25 |
26 | # nyc test coverage
27 | .nyc_output
28 |
29 | # node-waf configuration
30 | .lock-wscript
31 |
32 | # Compiled binary addons (https://nodejs.org/api/addons.html)
33 | build/Release
34 |
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 |
39 | # TypeScript v1 declaration files
40 | typings/
41 |
42 | # TypeScript cache
43 | *.tsbuildinfo
44 |
45 | # Optional npm cache directory
46 | .npm
47 |
48 | # Optional eslint cache
49 | .eslintcache
50 |
51 | # Optional REPL history
52 | .node_repl_history
53 |
54 | # Output of 'npm pack'
55 | *.tgz
56 |
57 | # Yarn Integrity file
58 | .yarn-integrity
59 |
60 | # dotenv environment variables file
61 | .env
62 | .env.test
63 |
64 | # parcel-bundler cache (https://parceljs.org/)
65 | .cache
66 |
67 | # next.js build output
68 | .next
69 |
70 | # nuxt.js build output
71 | .nuxt
72 |
73 | # vuepress build output
74 | .vuepress/dist
75 |
76 | # Serverless directories
77 | .serverless/
78 |
79 | # FuseBox cache
80 | .fusebox/
81 |
82 | # DynamoDB Local files
83 | .dynamodb/
84 |
85 | # Webpack
86 | .webpack/
87 |
88 | # Vite
89 | .vite/
90 |
91 | # Electron-Forge
92 | out/
93 |
94 | dist-electron-builder/
95 | dist/
96 |
--------------------------------------------------------------------------------
/.versionrc:
--------------------------------------------------------------------------------
1 | {
2 | "header": "# 更新日志 \n\n",
3 | "types": [
4 | {
5 | "type": "feat",
6 | "section": "✨ Features | 新功能",
7 | "hidden": false
8 | },
9 | {
10 | "type": "fix",
11 | "section": "🐛 Bug Fixes | Bug 修复",
12 | "hidden": false
13 | },
14 | {
15 | "type": "init",
16 | "section": "🎉 Init | 初始化",
17 | "hidden": true
18 | },
19 | {
20 | "type": "docs",
21 | "section": "✏️ Documentation | 文档",
22 | "hidden": false
23 | },
24 | {
25 | "type": "style",
26 | "section": "💄 Styles | 风格",
27 | "hidden": true
28 | },
29 | {
30 | "type": "refactor",
31 | "section": "♻️ Code Refactoring | 代码重构",
32 | "hidden": true
33 | },
34 | {
35 | "type": "perf",
36 | "section": "⚡ Performance Improvements | 性能优化",
37 | "hidden": true
38 | },
39 | {
40 | "type": "test",
41 | "section": "✅ Tests | 测试",
42 | "hidden": true
43 | },
44 | {
45 | "type": "revert",
46 | "section": "⏪ Revert | 回退",
47 | "hidden": true
48 | },
49 | {
50 | "type": "build",
51 | "section": "📦 Build System | 打包构建",
52 | "hidden": true
53 | },
54 | {
55 | "type": "chore",
56 | "section": "🚀 Chore | 构建/工程依赖/工具",
57 | "hidden": true
58 | },
59 | {
60 | "type": "ci",
61 | "section": "👷 Continuous Integration | CI 配置",
62 | "hidden": true
63 | }
64 | ]
65 | }
66 |
--------------------------------------------------------------------------------
/src/renderer/stores/embeddingStore.ts:
--------------------------------------------------------------------------------
1 | // stores/embeddingStore.ts
2 | import { defineStore } from 'pinia'
3 |
4 | export interface EmbeddingAPIConfig {
5 | apiURL: string
6 | apiKey: string
7 | modelName: string
8 | }
9 |
10 | export const useEmbeddingStore = defineStore('embedding', {
11 | state: () => ({
12 | config: {
13 | apiURL: '',
14 | apiKey: '',
15 | modelName: ''
16 | } as EmbeddingAPIConfig,
17 | activeRepositoryName: '' // 记录正在查看的仓库名称
18 | }),
19 |
20 | getters: {
21 | getAPIConfig: (state) => state.config,
22 | getAPIURL: (state) => state.config.apiURL,
23 | getAPIKey: (state) => state.config.apiKey,
24 | getModelName: (state) => state.config.modelName,
25 | getActiveRepositoryName: (state) => state.activeRepositoryName,
26 | isConfigured: (state) => state.config.apiURL && state.config.apiKey && state.config.modelName
27 | },
28 |
29 | actions: {
30 | setConfig(config: EmbeddingAPIConfig) {
31 | this.config = { ...config }
32 | },
33 |
34 | setAPIURL(url: string) {
35 | this.config.apiURL = url
36 | },
37 |
38 | setAPIKey(key: string) {
39 | this.config.apiKey = key
40 | },
41 |
42 | setModelName(name: string) {
43 | this.config.modelName = name
44 | },
45 |
46 | setActiveRepositoryName(name: string) {
47 | this.activeRepositoryName = name
48 | },
49 |
50 | clearAll() {
51 | this.config.apiURL = ''
52 | this.config.apiKey = ''
53 | this.config.modelName = ''
54 | this.activeRepositoryName = ''
55 | }
56 | },
57 |
58 | persist: {
59 | key: 'embeddingConfig',
60 | storage: localStorage,
61 | paths: ['config', 'activeRepositoryName']
62 | }
63 | })
64 |
--------------------------------------------------------------------------------
/src/renderer/router/index.js:
--------------------------------------------------------------------------------
1 | import { createRouter, createWebHistory } from 'vue-router'
2 | import Home from '../views/Home.vue'
3 | import About from '../views/About.vue'
4 | import Work from '../views/Work.vue'
5 | import APISet from '../views/APISet.vue'
6 | import Proof from '../views/Proof.vue'
7 | import ProofSet from '../views/ProofSet.vue'
8 | import History from '../views/history.vue'
9 | import Logs from '../views/logs.vue'
10 | import Dictionary from '../views/Dictionary.vue'
11 | import OutputWord from '../views/OutputWord.vue'
12 | import path from 'path'
13 | const routes = [
14 | {
15 | path: '/',
16 | name: 'Home',
17 | redirect: '/work/proof'
18 | },
19 | {
20 | path: '/about',
21 | name: 'About',
22 | component: About
23 | },
24 | {
25 | path: '/work',
26 | name: 'Work',
27 | component: Work,
28 | redirect: '/work/proof',
29 | children: [
30 | {
31 | path: 'api',
32 | name: 'APISet',
33 | component: APISet
34 | },
35 | {
36 | path: 'proof',
37 | name: 'Proof',
38 | component: Proof
39 | },
40 | {
41 | path: 'set',
42 | name: 'Set',
43 | component: ProofSet
44 | },
45 | {
46 | path: 'history',
47 | name: 'History',
48 | component: History
49 | },
50 | {
51 | path: 'logs',
52 | name: 'Logs',
53 | component: Logs
54 | },
55 | {
56 | path: 'dictionary',
57 | name: 'Dictionary',
58 | component: Dictionary
59 | },
60 | {
61 | path: 'ouputWord',
62 | name: 'OutputWord',
63 | component: OutputWord
64 | }
65 | ]
66 | }
67 |
68 | // 动态路由示例
69 | ]
70 |
71 | const router = createRouter({
72 | history: createWebHistory(),
73 | routes
74 | })
75 |
76 | export default router
77 |
--------------------------------------------------------------------------------
/changelog.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | disableEmoji: false,
3 | list: ['test', 'feat', 'fix', 'chore', 'docs', 'refactor', 'style', 'ci', 'perf', 'release', 'revert', 'build'],
4 | maxMessageLength: 64,
5 | minMessageLength: 3,
6 | questions: ['type', 'scope', 'subject', 'body', 'breaking', 'issues', 'lerna'],
7 | scopes: [],
8 | types: {
9 | chore: {
10 | description: 'Chore | 构建/工程依赖/工具',
11 | emoji: '🚀', // 当前类型的commit所显示的表情
12 | value: 'chore'
13 | },
14 | ci: {
15 | description: 'Continuous Integration | CI 配置',
16 | emoji: '👷',
17 | value: 'ci'
18 | },
19 | docs: {
20 | description: 'Documentation | 文档',
21 | emoji: '✏️ ',
22 | value: 'docs'
23 | },
24 | feat: {
25 | description: 'Features | 新功能',
26 | emoji: '✨',
27 | value: 'feat'
28 | },
29 | fix: {
30 | description: 'Bug Fixes | Bug 修复',
31 | emoji: '🐛',
32 | value: 'fix'
33 | },
34 | perf: {
35 | description: 'Performance Improvements | 性能优化',
36 | emoji: '⚡',
37 | value: 'perf'
38 | },
39 | refactor: {
40 | description: 'Code Refactoring | 代码重构',
41 | emoji: '♻️ ',
42 | value: 'refactor'
43 | },
44 | release: {
45 | description: 'Create a release commit | 发版提交',
46 | emoji: '🏹',
47 | value: 'release'
48 | },
49 | style: {
50 | description: 'Styles | 风格',
51 | emoji: '💄',
52 | value: 'style'
53 | },
54 | revert: {
55 | description: 'Revert | 回退',
56 | emoji: '⏪',
57 | value: 'revert'
58 | },
59 | build: {
60 | description: 'Build System | 打包构建',
61 | emoji: '📦',
62 | value: 'build'
63 | },
64 | test: {
65 | description: 'Tests | 测试',
66 | emoji: '✅',
67 | value: 'test'
68 | }
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/renderer/stores/apiStore.ts:
--------------------------------------------------------------------------------
1 | // stores/apiStore.ts
2 | import { defineStore } from 'pinia'
3 | import { reactive } from 'vue'
4 |
5 | interface ApiSettings {
6 | id: number | null
7 | URL: string
8 | key: string
9 | name: string
10 | time: string
11 | parallel: number
12 | TimeLimit: number | null
13 | total_tokens: number
14 | customPrompt: string | null
15 | }
16 |
17 | // 默认值作为常量,便于维护
18 | const defaultApiSettings: ApiSettings = {
19 | id: null,
20 | URL: '',
21 | key: '',
22 | name: '',
23 | time: '',
24 | parallel: 30,
25 | TimeLimit: null,
26 | total_tokens: 0,
27 | customPrompt: null
28 | }
29 |
30 | export const useApiStore = defineStore(
31 | 'apiSettings',
32 | () => {
33 | // 使用默认值初始化
34 | const selectedApi = reactive({ ...defaultApiSettings })
35 |
36 | function setSelectedApi(api: Partial) {
37 | Object.assign(selectedApi, api)
38 | }
39 |
40 | function clearSelectedApi() {
41 | Object.assign(selectedApi, defaultApiSettings)
42 | }
43 |
44 | function setParallel(parallelSet: number) {
45 | selectedApi.parallel = parallelSet
46 | }
47 |
48 | function setTimeLimit(TimeLimit: number | null) {
49 | selectedApi.TimeLimit = TimeLimit
50 | }
51 |
52 | function setTotalTokens(total_tokens: number) {
53 | selectedApi.total_tokens = total_tokens
54 | }
55 |
56 | function addTotalTokens(new_tokens: number) {
57 | selectedApi.total_tokens += new_tokens
58 | }
59 |
60 | function setCustomPrompt(prompt: string | null) {
61 | selectedApi.customPrompt = prompt
62 | }
63 |
64 | return {
65 | selectedApi,
66 | setSelectedApi,
67 | clearSelectedApi,
68 | setParallel,
69 | setTimeLimit,
70 | setTotalTokens,
71 | addTotalTokens,
72 | setCustomPrompt
73 | }
74 | },
75 | {
76 | persist: {
77 | key: 'apiSettings',
78 | storage: localStorage,
79 | pick: ['selectedApi'] // 明确指定需要持久化的路径
80 | }
81 | }
82 | )
83 |
--------------------------------------------------------------------------------
/forge.config.ts:
--------------------------------------------------------------------------------
1 | import type { ForgeConfig } from '@electron-forge/shared-types'
2 | import { MakerSquirrel } from '@electron-forge/maker-squirrel'
3 | import { MakerZIP } from '@electron-forge/maker-zip'
4 | import { MakerDeb } from '@electron-forge/maker-deb'
5 | import { MakerRpm } from '@electron-forge/maker-rpm'
6 | import { VitePlugin } from '@electron-forge/plugin-vite'
7 | import { FusesPlugin } from '@electron-forge/plugin-fuses'
8 | import { FuseV1Options, FuseVersion } from '@electron/fuses'
9 | import path from 'path'
10 | import { AutoUnpackNativesPlugin } from '@electron-forge/plugin-auto-unpack-natives'
11 |
12 | const config: ForgeConfig = {
13 | packagerConfig: {
14 | asar: true,
15 | icon: path.resolve(__dirname, 'assets/logo.ico'),
16 | name: 'AutoDocxProofreading',
17 | executableName: 'DocxProofread',
18 | extraResource: ['node_modules/@lancedb/lancedb-win32-x64-msvc']
19 | },
20 | rebuildConfig: {},
21 | makers: [
22 | new MakerSquirrel({
23 | name: 'AutoDocxProofreading',
24 | authors: 'CZ600',
25 | iconUrl: 'https://raw.githubusercontent.com/CZ600/AutoDocxProofread/base_url/assets/logo.ico',
26 | setupIcon: 'assets/logo.ico'
27 | }),
28 | new MakerZIP({}, ['darwin']),
29 | new MakerRpm({}),
30 | new MakerDeb({})
31 | ],
32 | plugins: [
33 | new VitePlugin({
34 | // `build` can specify multiple entry builds, which can be Main process, Preload scripts, Worker process, etc.
35 | // If you are familiar with Vite configuration, it will look really familiar.
36 | build: [
37 | {
38 | // `entry` is just an alias for `build.lib.entry` in the corresponding file of `config`.
39 | entry: 'src/main/main.ts',
40 | config: 'vite.main.config.ts'
41 | },
42 | {
43 | entry: 'src/main/preload.ts',
44 | config: 'vite.preload.config.ts'
45 | }
46 | ],
47 | renderer: [
48 | {
49 | name: 'main_window',
50 | config: 'vite.renderer.config.ts'
51 | }
52 | ]
53 | }),
54 | // Fuses are used to enable/disable various Electron functionality
55 | // at package time, before code signing the application
56 | new FusesPlugin({
57 | version: FuseVersion.V1,
58 | [FuseV1Options.RunAsNode]: false,
59 | [FuseV1Options.EnableCookieEncryption]: true,
60 | [FuseV1Options.EnableNodeOptionsEnvironmentVariable]: false,
61 | [FuseV1Options.EnableNodeCliInspectArguments]: false,
62 | [FuseV1Options.EnableEmbeddedAsarIntegrityValidation]: true,
63 | [FuseV1Options.OnlyLoadAppFromAsar]: true
64 | })
65 | ]
66 | }
67 |
68 | export default config
69 |
--------------------------------------------------------------------------------
/src/renderer/stores/store.ts:
--------------------------------------------------------------------------------
1 | // stores/store.ts
2 | import { defineStore, getActivePinia } from 'pinia'
3 |
4 | export interface CorrectionResult {
5 | applied: boolean
6 | id: string
7 | original: string
8 | reason: string
9 | suggested: string
10 | type: string
11 | }
12 |
13 | export const fileInfoStore = defineStore('fileInfo', {
14 | state: () => ({
15 | filePath: '',
16 | fileName: '',
17 | proofModel: '',
18 | results: [] as CorrectionResult[]
19 | }),
20 |
21 | getters: {
22 | getFilePath: state => state.filePath,
23 | getFileName: state => state.fileName,
24 | getProofModel: state => state.proofModel,
25 | getResults: state => state.results,
26 | isFilePathEmpty: state => !state.filePath,
27 | isFileNameEmpty: state => !state.fileName,
28 | isProofModelEmpty: state => !state.proofModel,
29 | isResultsEmpty: state => state.results.length === 0
30 | },
31 |
32 | actions: {
33 | setFilePath(filePath: string) {
34 | this.filePath = filePath
35 | },
36 | setFileName(fileName: string) {
37 | this.fileName = fileName
38 | },
39 | setProofModel(proofModel: string) {
40 | this.proofModel = proofModel
41 | },
42 | setCorrectResult(results: CorrectionResult[]) {
43 | this.results = results
44 | },
45 | clearAll() {
46 | this.filePath = ''
47 | this.fileName = ''
48 | this.proofModel = ''
49 | this.results = []
50 | }
51 | },
52 |
53 | // ✅ 关键:启用持久化,字段名必须和 state 一致
54 | persist: {
55 | key: 'fileInfo',
56 | storage: localStorage,
57 | paths: ['filePath', 'fileName', 'proofModel', 'results'] // ✅ 确保这四个字段都包含
58 | }
59 | })
60 |
61 | export const embeddingSet = defineStore('embeddingSet', {
62 | state: () => ({
63 | ActiveRepositoryName: '', // 记录正在查看的仓库名称
64 | apiURL: '', // api设置等
65 | apiKey: '',
66 | modelName: ''
67 | }),
68 | getters: {
69 | getActive: state => state.ActiveRepositoryName,
70 | getAPIURL: state => state.apiURL,
71 | getAPIKey: state => state.apiKey,
72 | getModelName: state => state.modelName
73 | },
74 | actions: {
75 | setActive(activeName: string) {
76 | this.ActiveRepositoryName = activeName
77 | },
78 | setURL(URL: string) {
79 | this.apiURL = URL
80 | },
81 | setKey(Key: string) {
82 | this.apiKey = Key
83 | },
84 | setModelName(Name: string) {
85 | this.modelName = Name
86 | },
87 | clearAll() {
88 | this.ActiveRepositoryName = ''
89 | this.apiKey = ''
90 | this.apiURL = ''
91 | this.modelName = ''
92 | }
93 | },
94 | // ✅ 关键:启用持久化,字段名必须和 state 一致
95 | persist: {
96 | key: 'embeddingSet',
97 | storage: localStorage,
98 | paths: ['ActiveRepositoryName', 'apiURL', 'apiKey', 'modelName'] // ✅ 确保这四个字段都包含
99 | }
100 | })
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "AutoDocxProofreading",
3 | "productName": "AutoDocxProofreading",
4 | "version": "1.1.3",
5 | "description": "An intelligent long-document proofreading software powered by large language models. 基于大模型的简单的docx格式文档的校对",
6 | "main": ".vite/build/main.js",
7 | "scripts": {
8 | "start": "electron-forge start",
9 | "package": "electron-forge package",
10 | "make": "electron-forge make",
11 | "publish": "electron-forge publish",
12 | "release-major": "standard-version --release-as major",
13 | "release-minor": "standard-version --release-as minor",
14 | "release-patch": "standard-version --release-as patch",
15 | "lint:fix": "eslint --ext .js,.ts,.vue . --fix",
16 | "commit": "git add . && git-cz"
17 | },
18 | "eslintConfig": {
19 | "extends": "./.eslintrc.js"
20 | },
21 | "devDependencies": {
22 | "@commitlint/cli": "^19.3.0",
23 | "@commitlint/config-conventional": "^19.2.2",
24 | "@electron-forge/cli": "^7.4.0",
25 | "@electron-forge/maker-deb": "^7.4.0",
26 | "@electron-forge/maker-rpm": "^7.4.0",
27 | "@electron-forge/maker-squirrel": "^7.4.0",
28 | "@electron-forge/maker-wix": "^7.10.2",
29 | "@electron-forge/maker-zip": "^7.4.0",
30 | "@electron-forge/plugin-auto-unpack-natives": "^7.10.2",
31 | "@electron-forge/plugin-fuses": "^7.4.0",
32 | "@electron-forge/plugin-vite": "^7.4.0",
33 | "@electron/fuses": "^1.8.0",
34 | "@typescript-eslint/eslint-plugin": "^5.0.0",
35 | "@typescript-eslint/parser": "^5.0.0",
36 | "@vitejs/plugin-vue": "^5.0.4",
37 | "electron": "30.0.2",
38 | "eslint": "^8.0.1",
39 | "eslint-plugin-import": "^2.25.0",
40 | "eslint-plugin-vue": "^9.25.0",
41 | "git-cz": "^4.9.0",
42 | "husky": "^9.0.11",
43 | "lint-staged": "^15.2.2",
44 | "prettier": "^3.2.5",
45 | "standard-version": "^9.5.0",
46 | "ts-node": "^10.0.0",
47 | "typescript": "~4.5.4",
48 | "vite": "^5.0.12"
49 | },
50 | "keywords": [],
51 | "author": {
52 | "name": "CZ600",
53 | "url": "https://github.com/CZ600"
54 | },
55 | "license": "MIT",
56 | "dependencies": {
57 | "@google/genai": "^1.17.0",
58 | "@google/generative-ai": "^0.24.1",
59 | "@lancedb/lancedb": "^0.22.1",
60 | "@types/node": "^24.3.1",
61 | "@vueuse/core": "^14.0.0",
62 | "apache-arrow": "^18.1.0",
63 | "axios": "^1.11.0",
64 | "bottleneck": "^2.19.5",
65 | "cli-progress": "^3.12.0",
66 | "docx": "^9.5.1",
67 | "docx-preview": "^0.3.6",
68 | "docxtemplater": "^3.66.3",
69 | "electron-squirrel-startup": "^1.0.0",
70 | "element-plus": "^2.11.1",
71 | "fast-xml-parser": "^5.2.5",
72 | "file-saver": "^2.0.5",
73 | "jszip": "^3.10.1",
74 | "mammoth": "^1.11.0",
75 | "marked": "^16.3.0",
76 | "openai": "^5.19.1",
77 | "ora": "^8.2.0",
78 | "p-limit": "^7.1.1",
79 | "pdf-parse": "^2.1.7",
80 | "pinia": "^3.0.3",
81 | "pinia-plugin-persistedstate": "^4.5.0",
82 | "pizzip": "^3.2.0",
83 | "sqlite": "^5.1.1",
84 | "sqlite-vec": "^0.1.7-alpha.2",
85 | "sqlite3": "^5.1.7",
86 | "util": "^0.12.5",
87 | "uuid": "^13.0.0",
88 | "vue": "^3.4.27",
89 | "vue-demi": "^0.14.10",
90 | "vue-markdown": "^2.2.4",
91 | "vue-router": "^4.5.1"
92 | },
93 | "lint-staged": {
94 | "*.{js,ts,vue}": "npm run lint:fix"
95 | },
96 | "commitlint": {
97 | "extends": [
98 | "@commitlint/config-conventional"
99 | ]
100 | },
101 | "config": {
102 | "commitizen": {
103 | "path": "git-cz"
104 | }
105 | }
106 | }
--------------------------------------------------------------------------------
/src/main/wordProcess.ts:
--------------------------------------------------------------------------------
1 | import * as fs from 'fs-extra'
2 | import JSZip from 'jszip'
3 |
4 | interface Replacement {
5 | original: string
6 | suggested: string
7 | }
8 |
9 | /**
10 | * 安全替换 Word 文档正文中的文本(仅处理 标签内的内容)
11 | * 1. 解码XML实体为可读文本
12 | * 2. 执行替换操作
13 | * 3. 重新编码为XML安全字符串
14 | * 4. 严格保留原始XML结构
15 | *
16 | * @param inputPath 原始 .docx 文件路径
17 | * @param outputPath 输出 .docx 文件路径
18 | * @param replacements 替换规则数组
19 | */
20 | export async function replaceTextInDocx(
21 | inputPath: string,
22 | outputPath: string,
23 | replacements: Replacement[]
24 | ): Promise {
25 | // 1. 读取原始文件
26 | console.log('replace items:', replacements)
27 | const content = await fs.readFile(inputPath)
28 | const zip = await JSZip.loadAsync(content)
29 |
30 | // 2. 仅处理正文文件(关键改进:只关注正文)
31 | const filePath = 'word/document.xml'
32 | const file = zip.file(filePath)
33 | if (!file) {
34 | throw new Error(`[ERROR] 正文文件不存在: ${filePath}`)
35 | }
36 |
37 | try {
38 | let xmlStr = await file.async('text')
39 | if (typeof xmlStr !== 'string' || xmlStr.trim() === '') {
40 | throw new Error(`[ERROR] 正文文件为空: ${filePath}`)
41 | }
42 |
43 | // 辅助函数:XML实体解码(& → &)
44 | const decodeXmlEntities = (str: string): string => {
45 | return str
46 | .replace(/&/g, '&')
47 | .replace(//g, '>')
49 | .replace(/"/g, '"')
50 | .replace(/'/g, "'")
51 | }
52 |
53 | // 辅助函数:XML实体编码(& → &)
54 | const encodeXmlEntities = (str: string): string => {
55 | return str
56 | .replace(/&/g, '&')
57 | .replace(//g, '>')
59 | .replace(/"/g, '"')
60 | .replace(/'/g, ''')
61 | }
62 |
63 | // 辅助函数:正则特殊字符转义
64 | const escapeRegExp = (str: string): string => {
65 | return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
66 | }
67 |
68 | // 核心:匹配所有 标签(关键改进:精准定位文本节点)
69 | const tTagRegex = /(]*>)([^<]*)(<\/w:t>)/g
70 | let hasReplaced = false
71 |
72 | const newXmlStr = xmlStr.replace(tTagRegex, (match, openTag, textContent, closeTag) => {
73 | // 步骤1: XML解码 → 用户可读文本
74 | const plainText = decodeXmlEntities(textContent)
75 | console.log(`[INFO] 正在处理文本: ${plainText}`)
76 | let replacedText = plainText
77 |
78 | // 步骤2: 应用替换规则(关键改进:在解码后的文本上操作)
79 | for (const { original, suggested } of replacements) {
80 | if (!original) continue
81 |
82 | // 严格转义原始字符串(防止正则注入)
83 | const safeOriginal = escapeRegExp(original)
84 | const regex = new RegExp(safeOriginal, 'g')
85 |
86 | const before = replacedText
87 | replacedText = replacedText.replace(regex, suggested)
88 |
89 | if (replacedText !== before) {
90 | hasReplaced = true
91 | }
92 | }
93 |
94 | // 步骤3: 重新编码为XML安全字符串
95 | const safeReplacedText = encodeXmlEntities(replacedText)
96 |
97 | // 步骤4: 重建原始XML结构
98 | return `${openTag}${safeReplacedText}${closeTag}`
99 | })
100 |
101 | if (hasReplaced) {
102 | zip.file(filePath, newXmlStr)
103 | console.log(`[SUCCESS] 已替换正文文本: ${filePath}`)
104 | } else {
105 | console.log(`[INFO] 未找到匹配的替换内容: ${filePath}`)
106 | }
107 |
108 | // 4. 生成新 DOCX 文件
109 | const buffer = await zip.generateAsync({
110 | type: 'nodebuffer',
111 | compression: 'DEFLATE',
112 | compressionOptions: { level: 6 }
113 | })
114 |
115 | // 5. 写入输出文件
116 | await fs.writeFile(outputPath, buffer)
117 | console.log(`[INFO] 文档已保存至: ${outputPath}`)
118 | } catch (err) {
119 | console.error(`[FATAL] 处理正文文件失败: ${filePath}`, err)
120 | throw err
121 | }
122 | }
123 |
--------------------------------------------------------------------------------
/vite.base.config.ts:
--------------------------------------------------------------------------------
1 | import { builtinModules } from 'node:module'
2 | import type { AddressInfo } from 'node:net'
3 | import type { ConfigEnv, Plugin, UserConfig } from 'vite'
4 | import pkg from './package.json'
5 |
6 | export const builtins = ['electron', ...builtinModules.map(m => [m, `node:${m}`]).flat()]
7 |
8 | export const external = [
9 | ...builtins,
10 | ...Object.keys('dependencies' in pkg ? (pkg.dependencies as Record) : {}),
11 | // 添加 LanceDB 相关的原生模块作为外部依赖
12 | '@lancedb/lancedb',
13 | '@lancedb/lancedb-win32-x64-msvc',
14 | '@lancedb/lancedb-darwin-arm64',
15 | '@lancedb/lancedb-darwin-x64',
16 | '@lancedb/lancedb-linux-arm64-gnu',
17 | '@lancedb/lancedb-linux-x64-gnu'
18 | ]
19 |
20 | export function getBuildConfig(env: ConfigEnv<'build'>): UserConfig {
21 | const { root, mode, command } = env
22 |
23 | return {
24 | root,
25 | mode,
26 | build: {
27 | // Prevent multiple builds from interfering with each other.
28 | emptyOutDir: false,
29 | // 🚧 Multiple builds may conflict.
30 | outDir: '.vite/build',
31 | watch: command === 'serve' ? {} : null,
32 | minify: command === 'build'
33 | },
34 | clearScreen: false
35 | }
36 | }
37 |
38 | export function getDefineKeys(names: string[]) {
39 | const define: { [name: string]: VitePluginRuntimeKeys } = {}
40 |
41 | return names.reduce((acc, name) => {
42 | const NAME = name.toUpperCase()
43 | const keys: VitePluginRuntimeKeys = {
44 | VITE_DEV_SERVER_URL: `${NAME}_VITE_DEV_SERVER_URL`,
45 | VITE_NAME: `${NAME}_VITE_NAME`
46 | }
47 |
48 | return { ...acc, [name]: keys }
49 | }, define)
50 | }
51 |
52 | export function getBuildDefine(env: ConfigEnv<'build'>) {
53 | const { command, forgeConfig } = env
54 | const names = forgeConfig.renderer.filter(({ name }) => name != null).map(({ name }) => name)
55 | const defineKeys = getDefineKeys(names)
56 | const define = Object.entries(defineKeys).reduce(
57 | (acc, [name, keys]) => {
58 | const { VITE_DEV_SERVER_URL, VITE_NAME } = keys
59 | const def = {
60 | [VITE_DEV_SERVER_URL]: command === 'serve' ? JSON.stringify(process.env[VITE_DEV_SERVER_URL]) : undefined,
61 | [VITE_NAME]: JSON.stringify(name)
62 | }
63 | return { ...acc, ...def }
64 | },
65 | {} as Record
66 | )
67 |
68 | return define
69 | }
70 |
71 | export function pluginExposeRenderer(name: string): Plugin {
72 | const { VITE_DEV_SERVER_URL } = getDefineKeys([name])[name]
73 |
74 | return {
75 | name: '@electron-forge/plugin-vite:expose-renderer',
76 | configureServer(server) {
77 | process.viteDevServers ??= {}
78 | // Expose server for preload scripts hot reload.
79 | process.viteDevServers[name] = server
80 |
81 | server.httpServer?.once('listening', () => {
82 | const addressInfo = server.httpServer?.address() as AddressInfo
83 | // Expose env constant for main process use.
84 | process.env[VITE_DEV_SERVER_URL] = `http://localhost:${addressInfo?.port}`
85 | })
86 | }
87 | }
88 | }
89 |
90 | export function pluginHotRestart(command: 'reload' | 'restart'): Plugin {
91 | return {
92 | name: '@electron-forge/plugin-vite:hot-restart',
93 | closeBundle() {
94 | if (command === 'reload') {
95 | for (const server of Object.values(process.viteDevServers)) {
96 | // Preload scripts hot reload.
97 | server.ws.send({ type: 'full-reload' })
98 | }
99 | } else {
100 | // Main process hot restart.
101 | // https://github.com/electron/forge/blob/v7.2.0/packages/api/core/src/api/start.ts#L216-L223
102 | process.stdin.emit('data', 'rs')
103 | }
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src/renderer/views/history.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | Detail
11 |
12 |
13 | Delete
14 |
15 |
16 |
17 |
18 |
19 | Delete All
20 |
21 |
22 |
23 |
24 | {{ detailContent }}
25 |
26 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
118 |
119 |
--------------------------------------------------------------------------------
/src/main/main.ts:
--------------------------------------------------------------------------------
1 | import { app, BrowserWindow, session } from 'electron' // app是必须引入的,
2 | import path from 'path'
3 |
4 | // 将LanceDB原生模块路径添加到PATH环境变量,确保运行时能正确加载
5 |
6 | import { registerIpcHandlers } from './ipcHandlers'
7 | import { initLanceDB } from './lancedb'
8 | // main.js 或主进程中的其他文件
9 | // main.js 或打包入口
10 |
11 | // 为pdf-parse库提供浏览器API的polyfill
12 | // 为了在nodejs环境下正常使用pdf-parse库而添加的
13 | if (typeof (global as any).DOMMatrix === 'undefined') {
14 | ;(global as any).DOMMatrix = class DOMMatrix {
15 | constructor() {
16 | // 空实现
17 | }
18 | }
19 | }
20 |
21 | if (typeof (global as any).ImageData === 'undefined') {
22 | ;(global as any).ImageData = class ImageData {
23 | constructor() {
24 | // 空实现
25 | }
26 | }
27 | }
28 |
29 | if (typeof (global as any).Path2D === 'undefined') {
30 | ;(global as any).Path2D = class Path2D {
31 | constructor() {
32 | // 空实现
33 | }
34 | }
35 | }
36 |
37 | // Handle creating/removing shortcuts on Windows when installing/uninstalling.
38 | if (require('electron-squirrel-startup')) {
39 | app.quit()
40 | }
41 |
42 | const createWindow = () => {
43 | // Create the browser window.
44 | const mainWindow = new BrowserWindow({
45 | width: 1300,
46 | height: 1200,
47 | title: 'AutoDocxProofreading',
48 | // autoHideMenuBar: true, // 禁用菜单栏
49 | icon: path.join(process.resourcesPath, 'assets', 'logo.ico'),
50 |
51 | ...(process.platform === 'linux' ? { icon: path.join(process.resourcesPath, 'assets', 'logo.ico') } : {}),
52 |
53 | webPreferences: {
54 | preload: path.join(__dirname, 'preload.js'),
55 | nodeIntegration: false,
56 | contextIsolation: true
57 | },
58 | // 设置窗口样式
59 | // remove the default titlebar
60 | titleBarStyle: 'hidden',
61 | // expose window controls in Windows/Linux
62 | ...(process.platform !== 'darwin' ? { titleBarOverlay: true } : {}),
63 | titleBarOverlay: {
64 | color: 'rgba(255, 255, 255, 0)',
65 | symbolColor: '#807e85ff',
66 | height: 60
67 | }
68 | })
69 |
70 | // load the index.html of the app.
71 | if (MAIN_WINDOW_VITE_DEV_SERVER_URL) {
72 | mainWindow.loadURL(MAIN_WINDOW_VITE_DEV_SERVER_URL)
73 | // Open the DevTools.
74 | mainWindow.webContents.openDevTools()
75 | } else {
76 | mainWindow.loadFile(path.join(__dirname, `../renderer/${MAIN_WINDOW_VITE_NAME}/index.html`))
77 | }
78 | }
79 |
80 | app.whenReady().then(async () => {
81 | // 当应用准备好之后,回调函数
82 | console.log('app is ready')
83 | console.log('then will create a window')
84 |
85 | console.log('中文测试')
86 | createWindow()
87 |
88 | // 设置 Content-Security-Policy(CSP),跨站脚本攻击 (XSS) 和其他代码注入攻击
89 | session.defaultSession.webRequest.onHeadersReceived((details, callback) => {
90 | callback({
91 | responseHeaders: {
92 | ...details.responseHeaders,
93 | 'Content-Security-Policy': ["script-src 'self'"]
94 | }
95 | })
96 | })
97 | // 当窗口被激活的时候,要判断是否有窗口打开,如果没有打开,那么就创建一个窗口(也是针对苹果系统作出的优化)
98 | app.on('activate', () => {
99 | // On macOS it's common to re-create a window in the app when the
100 | // dock icon is clicked and there are no other windows open.
101 | if (BrowserWindow.getAllWindows().length === 0) {
102 | createWindow()
103 | }
104 | })
105 |
106 | // 判断是否为开发环境
107 | const isDev = MAIN_WINDOW_VITE_DEV_SERVER_URL !== undefined
108 |
109 | let nativeModulePath
110 | if (isDev) {
111 | // 开发环境:假设原生模块在项目根目录的 resources/ 下
112 | // const projectRoot = app.getAppPath(); // 项目根目录
113 | // nativeModulePath = path.join(projectRoot, 'resources', 'lancedb-win32-x64-msvc');
114 | // 开发环境不设置
115 | } else {
116 | // 生产环境:原生模块应位于 resources/ 目录下(且需 unpacked)
117 | const installDir = path.dirname(app.getPath('exe'))
118 | const resourcesPath = path.join(installDir, 'resources')
119 | nativeModulePath = path.join(resourcesPath, 'lancedb-win32-x64-msvc')
120 | }
121 |
122 | process.env.LANCEDB_NATIVE_PATH = nativeModulePath
123 | process.env.PATH = `${nativeModulePath};${process.env.PATH}`
124 |
125 | try {
126 | await initLanceDB()
127 | console.log('LanceDB initialized successfully')
128 | } catch (error) {
129 | console.error('Failed to initialize LanceDB:', error)
130 | }
131 | })
132 |
133 | // Quit when all windows are closed, except on macOS. There, it's common
134 | // for applications and their menu bar to stay active until the user quits
135 | // explicitly with Cmd + Q.
136 | app.on('window-all-closed', () => {
137 | // 当所有的窗口都关闭的时候并且不是macos的时候,那么关闭软件
138 | if (process.platform !== 'darwin') {
139 | app.quit()
140 | }
141 | })
142 |
143 | registerIpcHandlers()
144 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AutoDocxProof 智能文档校对应用
2 |
3 |
4 |
5 |
6 |
7 |
8 | 一款基于 Electron、Vue 3 和 TypeScript 构建的智能长文档校对桌面应用程序
9 |
10 |
11 | ## 📝 项目简介
12 |
13 | AutoDocxProofread(智能校对)是一款专为长文档校对而设计的桌面应用程序。它能够帮助用户有效检测 Word 文档中的错别字、标点符号错误、语法问题和文本一致性问题,并提供修改建议。
14 |
15 | 针对大模型在处理长文档时存在的遗忘和幻觉问题,软件设计了专门的架构来增强校对的准确性,并能直接导出校对后的文档。并且软件采用了并行处理架构,显著提升大模型处理长文档的速度。新版本引入了本地知识库功能,支持RAG功能给模型校对参考。
16 |
17 | ### 更新记录
18 |
19 | - v1.1.3
20 | - 新增了请求频率限制,优化对于第三方api转接站的支持
21 | - 优化深色模式显示效果
22 | - 新增token用量统计
23 | - 优化界面效果,改善交互逻辑
24 | - v1.1.2
25 | - 修复全文润色模式中,不使用rag功能时无法正常校对的bug
26 | - 新增昼夜模式切换功能
27 | - 模型并发限制可调,可以适应不同的api供应商对于并发的限制要求
28 | - v1.1.1
29 | - 修复rag功能可用性bug
30 | - v1.1.0
31 | - 重构界面,优化使用逻辑
32 | - 提升软件可用性
33 |
34 | ### 核心功能与软件优势
35 |
36 | - **多种校对模式**:
37 | - 逐句精校:适合需要高精度校对的短文本
38 | - 逐段校正:适合长篇文献的校对
39 | - 全文润色:对整篇文档进行语言润色和优化
40 |
41 | - **智能错误识别**:
42 | - 错别字检测
43 | - 标点符号错误识别
44 | - 语法问题检测
45 |
46 | - **知识库系统**:
47 | - 创建和管理多个本地知识库
48 | - 支持PDF、word和txt文档导入作为参考材料
49 | - 基于向量数据库的RAG检索增强生成算法
50 |
51 | - **更快的处理速度和用户友好的操作体验**:
52 | - 使用并行处理的方式优化处理效率,显著提升对于长文本的校对速度
53 | - 清晰的错误展示和修改建议
54 | - 一键应用修改建议
55 | - 响应式设计,支持窗口缩放
56 |
57 | - **便捷的 API 配置管理**:
58 | - 兼容openai接口,支持多种大语言模型 API
59 | - 灵活的 API 配置管理
60 | - 支持对于并发数量和请求速度的设置
61 |
62 | - **清晰的历史记录管理**:
63 | - 清晰查看历史记录,包括时间、校对模型、校对文件路径和具体的结果
64 | - 支持对结果的批量管理
65 |
66 | ### 使用展示
67 |
68 | 用户需要先在功能设置页面选择一个大模型后再开始校对操作。在文档校对页面,首先选择需要校对的文档后,再选择校对模式,选择使用的知识库(非必选),然后开始校对。软件会将校对的结果显示在右边栏,并在文本中高亮展示,以方便查看。然后可以选择是否接受这些修改,可以导出接受修改后的文档:
69 |
70 | 
71 |
72 | 本应用可以自行设置api,兼容满足openai规范的api接口,推荐使用非推理模型,并且可以限制并发请求数量:
73 |
74 | 
75 | 
76 |
77 | 本应用还可以浏览和管理校对记录:
78 |
79 | 
80 |
81 | 知识库管理界面:
82 |
83 | 
84 |
85 | 昼夜模式切换效果:
86 |
87 | 
88 |
89 | > 注意:校对结果的准确度很大程度上取决于模型能力,软件无法保证校对结果的完全准确,还需要人工再次检验。
90 |
91 | > 提示1:结果导出功能尚不完善,无法精准的将所有的结果应用到文档中,可能存在疏漏。
92 |
93 | > 提示2:全文润色功能适合较短篇幅的文档。逐句校对对token的消耗很大。
94 |
95 | ## 🛠 技术栈
96 |
97 | - **主框架**:[Electron](https://www.electronjs.org/) + [Vue 3](https://vuejs.org/) + [TypeScript](https://www.typescriptlang.org/)
98 | - **UI 组件库**:[Element Plus](https://element-plus.org/)
99 | - **构建工具**:[Vite](https://vitejs.dev/) + [Electron Forge](https://www.electronforge.io/)
100 | - **文档处理**:[Mammoth](https://github.com/mwilliamson/mammoth.js) + [Docxtemplater](https://github.com/open-xml-templating/docxtemplater)
101 | - **向量数据库**:[LanceDB](https://lancedb.com/)
102 | - **代码规范**:[ESLint](https://eslint.org/) + [Prettier](https://prettier.io/)
103 | - **版本管理**:[Standard Version](https://github.com/conventional-changelog/standard-version)
104 |
105 | ## 🚀 快速开始
106 |
107 | ### 环境要求
108 |
109 | - Node.js >= 16.x
110 | - npm 或 yarn
111 |
112 | ### 安装依赖
113 |
114 | ```bash
115 | npm install
116 | ```
117 |
118 | ### 开发模式运行
119 |
120 | ```bash
121 | npm run start
122 | ```
123 |
124 | ## 📦 项目结构
125 |
126 | ```
127 | .
128 | ├── src/
129 | │ ├── main/ # 主进程代码
130 | │ │ ├── chat.ts # AI 对话相关功能
131 | │ │ ├── database.ts # 数据库操作
132 | │ │ ├── ipcHandlers.ts # IPC 通信处理
133 | │ │ ├── lancedb.ts # 向量数据库操作
134 | │ │ ├── main.ts # 主进程入口
135 | │ │ ├── pdfUtils.ts # PDF文档处理
136 | │ │ ├── preload.ts # 预加载脚本
137 | │ │ ├── proof.ts # 文档校对核心逻辑
138 | │ │ └── wordProcess.ts # Word 文档处理
139 | │ └── renderer/ # 渲染进程代码
140 | │ ├── router/ # 路由配置
141 | │ ├── stores/ # Pinia存储目录
142 | │ ├── views/ # 页面组件
143 | │ ├── App.vue # 根组件
144 | │ └── renderer.ts # 渲染进程入口
145 | ├── assets/ # 静态资源
146 | ├── out/ # 构建输出目录
147 | └── forge.config.ts # Electron Forge 配置
148 | ```
149 |
150 | ## 🎯 使用指南
151 |
152 | ### 1. 配置 API
153 |
154 | 首次使用需要配置支持的大语言模型 API:
155 |
156 | 1. 点击导航栏中的"工作区"
157 | 2. 选择"API 设置"选项卡
158 | 3. 填写 API 地址、密钥和模型名称
159 | 4. 点击"测试连接"验证配置
160 | 5. 点击"保存配置"保存设置
161 |
162 | ### 2. 创建知识库
163 |
164 | 1. 点击导航栏中的"知识库"
165 | 2. 选择"Embedding模型"(需要选择专门的embedding模型)
166 | 3. 点击"添加知识库"按钮创建新知识库
167 | 4. 选择知识库后可添加PDF文件作为参考材料
168 |
169 | ### 3. 文档校对
170 |
171 | 1. 点击导航栏中的"工作区"
172 | 2. 选择"文档校对"选项卡
173 | 3. 点击"选择 DOCX 文件"按钮选择要校对的 Word 文档
174 | 4. (可选)选择知识库以增强校对准确性
175 | 5. 选择合适的校对模式:
176 | - **逐句精校**:适合需要高精度校对的短文本
177 | - **逐段校正**:适合长篇文献的校对
178 | - **全文润色**:对整篇文档进行语言润色和优化
179 | 6. 点击"开始校正"按钮开始校对过程
180 | 7. 在右侧栏查看校对结果和修改建议
181 | 8. 点击"应用修改"按钮接受建议的修改
182 | 9. 点击"导出结果"按钮保存修改后的文档
183 |
184 | ## 🔧 开发计划
185 |
186 | 1. 大语言模型的格式化输出转word文档
187 | 2. 增强用户界面交互体验
188 | 3. 优化.docx文件的处理算法
189 |
190 | ## 📄 许可证
191 |
192 | 本项目采用 MIT 许可证 - 查看 [LICENSE](LICENSE) 文件了解详情
193 |
194 | ## 致谢
195 |
196 | 部分代码使用了night-peiqi的https://github.com/night-peiqi/electron-vue3-typescript-template
197 |
--------------------------------------------------------------------------------
/src/renderer/electron.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Electron API 类型支持应与 preload.ts 中的API一致
3 | * 你需要告诉 TypeScript,windows 类型中心增加的属性和接口情况
4 | * 防止运行时错误
5 | */
6 | import { proofreadLargeDocument, ProofreadingCorrection } from './proof'
7 | import { apiSettings } from './ipcHandlers'
8 |
9 | export interface proofHistory {
10 | id?: number
11 | filePath: string
12 | apiURL: string
13 | modelName: string
14 | created_at?: string
15 | result: string
16 | }
17 |
18 | // LanceDB 相关接口类型定义
19 | export interface LanceDBInsertParams {
20 | repositoryName: string
21 | fileName: string
22 | text: string
23 | metadata: Record
24 | }
25 |
26 | export interface LanceDBQueryParams {
27 | queryText: string
28 | limit: number
29 | filter: string
30 | fileName: string
31 | }
32 |
33 | export interface LanceDBUpdateParams {
34 | repositoryName: string
35 | id: number
36 | text: string
37 | metadata: Record
38 | }
39 |
40 | export interface LanceDBDeleteParams {
41 | repositoryName: string
42 | id: number
43 | }
44 |
45 | export interface ModelConfig {
46 | modelName: string
47 | apiKey: string
48 | apiURL: string
49 | }
50 |
51 | export interface PDFProcessParams {
52 | repositoryName: string
53 | filePath: string
54 | metadata: Record
55 | }
56 |
57 | export interface PDFSelectAndProcessParams {
58 | repositoryName: string
59 | metadata: Record
60 | }
61 |
62 | export interface PDFGetChunksParams {
63 | documentId: string
64 | repositoryName: string
65 | }
66 |
67 | export default interface ElectronApi {
68 | // test
69 | message: (file: string) => void
70 | receiveAndReturn: (characters: string) => string
71 | test: string
72 | // 文档加载
73 | selectDocxFile: () => string
74 | readDocxFile: (filePath: string) => {
75 | path: string
76 | content: string
77 | }
78 | // api设置和管理(接入数据库)
79 | APISettings: (URL: string, Key: string, modelName: string) => Promise
80 | getAPISettings: () => Promise<{
81 | URL: string
82 | Key: string
83 | modelName: string
84 | parallel?: number
85 | TimeLimit?: number | null
86 | }>
87 | deleteOneAPI: (id: number) => Promise<{
88 | isSuccess: boolean
89 | }>
90 | getALLAPISettings: () => Promise<
91 | {
92 | id: number
93 | URL: string
94 | Key: string
95 | modelName: string
96 | created_at: string
97 | }[]
98 | >
99 | testAPI: (url: string, key: string, modelName: string) => boolean
100 | selectAPISetting: (
101 | url: string,
102 | key: string,
103 | modelName: string,
104 | parallel?: number,
105 | TimeLimit?: number | null
106 | ) => boolean
107 |
108 | // 文档处理接口
109 | processDocx: (
110 | model: string,
111 | filePath: string,
112 | repositoryNameList?: string[],
113 | embeddingConfig?: apiSettings,
114 | setTimeLimit?: number,
115 | parallelSet?: number
116 | ) => Promise<{
117 | proofResult: ProofreadingCorrection[]
118 | token_usage: number
119 | }> // 进行了更新
120 | exportCorrectedDocx: (config: any) => Promise
121 |
122 | // 提示词处理接口
123 | getDefaultPrompt: () => Promise
124 | setNewPrompt: (newPrompt: string) => Promise
125 |
126 | // 历史记录接口
127 | getAllHistory: () => proofHistory[]
128 | deleteAllHistory: () => Promise
129 | getHistoryById: (id: number) => Promise
130 | deleteHistoryById: (id: number) => Promise
131 | insertOneHistory: (filePath: string, apiURL: string, modelName: string, resultCorrect: string) => Promise
132 |
133 | // LanceDB 相关接口
134 | lancedbInsert: (params: LanceDBInsertParams, modelConfig: ModelConfig) => Promise
135 | lancedbQuery: (params: LanceDBQueryParams, modelConfig: ModelConfig) => Promise
136 | lancedbUpdate: (params: LanceDBUpdateParams, modelConfig: ModelConfig) => Promise
137 | lancedbDelete: (params: LanceDBDeleteParams) => Promise
138 | listRepositories: () => Promise
139 | createRepository: (params: {
140 | repositoryName: string
141 | modelName: string
142 | apiKey: string
143 | apiURL: string
144 | }) => Promise
145 | deleteRepository: (repositoryName: string) => Promise
146 | deleteDocumentByName: (repositoryName: string, filename: string) => Promise
147 | listFilenamesInRepository: (repositoryName: string) => Promise
148 |
149 | // PDF 处理相关接口
150 | processPDF: (params: PDFProcessParams, modelConfig: ModelConfig) => Promise
151 | selectAndProcessPDF: (repositoryName: string, modelConfig: ModelConfig) => Promise // 支持处理pdf、txt、docx文件
152 | getPDFChunks: (params: PDFGetChunksParams) => Promise
153 | // 设置embedding api
154 | getEmbeddingAPI: () => Promise<{ URL: string; Key: string; modelName: string }>
155 | setEmbeddingAPI: (apiKey: string, apiURL: string, modelName: string) => Promise
156 | getEnvPath: () => Promise
157 | }
158 |
159 | declare global {
160 | interface Window {
161 | electronAPI: ElectronApi
162 | }
163 | }
164 |
--------------------------------------------------------------------------------
/src/renderer/App.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |

8 |
智能校对
9 |
10 |
11 |
12 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
108 |
109 |
--------------------------------------------------------------------------------
/src/main/preload.ts:
--------------------------------------------------------------------------------
1 | import { contextBridge, ipcRenderer } from 'electron'
2 | import { get } from 'http'
3 | import test from 'node:test'
4 | import { setNewPrompt } from './proof'
5 | import { apiSettings } from './database'
6 |
7 | console.log('this message from the preload')
8 |
9 | // contextBridge.exposeInMainWorld 是一个安全机制,它允许你在预加载脚本中定义一些函数或对象,并将它们注入到网页的全局 window 对象中。
10 | // 第一个参数 electronAPI 表示将要挂载到window上的属性名称
11 | // 第二个参数,是一个对象,包含了想要暴露给前端的函数或者值
12 | contextBridge.exposeInMainWorld('electronAPI', {
13 | // 定义了一个名叫message的方法,接受字符串参数message
14 | message: (message: string) => {
15 | // 向主进程发送消息,单向通信方式
16 | // 发送的消息,通道名称是message,数据是传入的message的字符串
17 | ipcRenderer.send('message', message) // 调用主进程的接口,向主进程发送一条 *异步* 消息
18 | },
19 | // 定义了一个名叫receiveAndReturn的方法,接受字符串参数message,然后返回一个字符串
20 | receiveAndReturn: (message: string) => {
21 | // 使用ipcRenderer.invoke()方法,调用主进程的接口,向主进程发送一条 *双向通讯请求* 的消息 (也是异步)
22 | // 向主进程发送消息,并返回处理结果,双向通信方式
23 | return ipcRenderer.invoke('receiveAndReturn', message) // invoke会等待主进程使用ipcMain。handle 返回一个promise的值
24 | // 由于返回的是一个promise 所以前端需要await它
25 | },
26 | test: process.version,
27 |
28 | selectDocxFile: () => ipcRenderer.invoke('select-docx-file'),
29 | // 可选:如果需要主进程读取文件内容
30 | readDocxFile: (filePath: string) => ipcRenderer.invoke('read-docx-file', filePath),
31 | APISettings: (url: string, key: string, modelName: string) =>
32 | ipcRenderer.invoke('set-api', url, key, modelName),
33 | getALLAPISettings: () => ipcRenderer.invoke('get-all-api-settings', {}),
34 | deleteOneAPI: (id: number) => ipcRenderer.invoke('delete-one-api-setting', id),
35 | testAPI: (url: string, key: string, modelName: string) => ipcRenderer.invoke('test-api', url, key, modelName),
36 | selectAPISetting: (url: string, key: string, modelName: string, parallel?: number, TimeLimit?: number | null) =>
37 | ipcRenderer.invoke('selectAPISetting', url, key, modelName, parallel, TimeLimit),
38 | getAPISettings: () => ipcRenderer.invoke('get-api-settings', {}),
39 | // 文档校对处理函数
40 | processDocx: (
41 | model: string,
42 | filePath: string,
43 | repositoryNameList?: string[],
44 | embeddingConfig?: apiSettings,
45 | setTimeLimit?: number,
46 | parallelSet?: number
47 | ) => {
48 | // 确保传递的参数是可序列化的
49 | const serializableParams = {
50 | model,
51 | filePath,
52 | repositoryNameList: repositoryNameList ? [...repositoryNameList] : undefined,
53 | embeddingConfig: embeddingConfig ? { ...embeddingConfig } : undefined,
54 | setTimeLimit: setTimeLimit || undefined,
55 | parallelSet: parallelSet || 30
56 | }
57 |
58 | return ipcRenderer.invoke(
59 | 'process-docx',
60 | serializableParams.model,
61 | serializableParams.filePath,
62 | serializableParams.repositoryNameList,
63 | serializableParams.embeddingConfig,
64 | serializableParams.setTimeLimit,
65 | serializableParams.parallelSet
66 | )
67 | },
68 |
69 | // 导出修正到文件中
70 | exportCorrectedDocx: (config: any) => {
71 | // 确保传递的参数是可序列化的
72 | const serializableConfig = JSON.parse(JSON.stringify(config))
73 | return ipcRenderer.invoke('exportCorrectedDocx', serializableConfig)
74 | },
75 | // 获取默认的提示词
76 | getDefaultPrompt: () => ipcRenderer.invoke('getDefaultPrompt'),
77 | // 设置新的提示词
78 | setNewPrompt: (prompt: string) => ipcRenderer.invoke('setPrompt', prompt),
79 | // 获取所有历史记录
80 | deleteAllHistory: () => ipcRenderer.invoke('deleteAllHistory'),
81 | // 获取所有历史记录
82 | getAllHistory: () => ipcRenderer.invoke('getAllHistory'),
83 | // 获取指定id的历史记录
84 | getHistoryById: (id: number) => ipcRenderer.invoke('getHistoryById', id),
85 | // 删除指定id的历史记录,
86 | deleteHistoryById: (id: number) => ipcRenderer.invoke('deleteHistoryById', id),
87 | // 插入一条历史记录
88 | insertOneHistory: (filePath: string, apiURL: string, modelName: string, resultCorrect: string) =>
89 | ipcRenderer.invoke('insertOneHistory', filePath, apiURL, modelName, resultCorrect),
90 |
91 | // LanceDB 相关接口
92 | lancedbInsert: (params: any, modelConfig: any) => ipcRenderer.invoke('lancedb:insert', params, modelConfig),
93 | lancedbQuery: (params: any, modelConfig: any) => ipcRenderer.invoke('lancedb:query', params, modelConfig),
94 | lancedbUpdate: (params: any, modelConfig: any) => ipcRenderer.invoke('lancedb:update', params, modelConfig),
95 | lancedbDelete: (params: any) => ipcRenderer.invoke('lancedb:delete', params),
96 | listRepositories: () => ipcRenderer.invoke('listRepositories'),
97 | createRepository: (params: any) => ipcRenderer.invoke('createRepository', params),
98 | deleteRepository: (repositoryName: string) => ipcRenderer.invoke('deleteRepository', repositoryName),
99 | deleteDocumentByName: (repositoryName: string, filename: string) =>
100 | ipcRenderer.invoke('deleteDocumentByName', repositoryName, filename),
101 | listFilenamesInRepository: (repositoryName: string) =>
102 | ipcRenderer.invoke('listFilenamesInRepository', repositoryName),
103 | // PDF 处理相关接口
104 | processPDF: (params: any, modelConfig: any) => ipcRenderer.invoke('pdf:process', params, modelConfig),
105 | selectAndProcessPDF: (repositoryName: string, modelConfig: any) =>
106 | ipcRenderer.invoke('pdf:select-and-process', repositoryName, modelConfig),
107 | getPDFChunks: (params: any) => ipcRenderer.invoke('pdf:get-chunks', params),
108 | setEmbeddingAPI: (apiKey: string, apiURL: string, modelName: string) =>
109 | ipcRenderer.invoke('setEmbeddingAPI', apiKey, apiURL, modelName),
110 | getEmbeddingAPI: () => ipcRenderer.invoke('getEmbeddingAPI'),
111 | getEnvPath: () => ipcRenderer.invoke('getEnvPath') // 调试用,检验打包后的
112 | })
113 |
--------------------------------------------------------------------------------
/src/main/database.ts:
--------------------------------------------------------------------------------
1 | import { app } from 'electron' // 新增导入
2 | import { Database, open } from 'sqlite'
3 | import sqlite3 from 'sqlite3'
4 | import path from 'path' // 新增导入
5 | import fs from 'fs'
6 | import { promises } from 'dns'
7 | import { b } from 'vite/dist/node/types.d-aGj9QkWt'
8 | import { getEmbedding } from './chat'
9 |
10 | // 定义数据类型(TypeScript 类型安全)
11 | export interface User {
12 | id?: number
13 | name: string
14 | email: string
15 | created_at?: string
16 | }
17 |
18 | export interface apiSettings {
19 | id?: number
20 | apiURL: string
21 | apiKey: string
22 | modelName: string
23 | created_at?: string
24 | }
25 |
26 | export interface proofHistory {
27 | id?: number
28 | filePath: string
29 | apiURL: string
30 | modelName: string
31 | created_at?: string
32 | result: string
33 | }
34 |
35 | // 该类实现了对api数据和历史记录的数据库的管理操作
36 | export class DB {
37 | private static instance: Database
38 | // 使用系统标准路径
39 | private static get DB_PATH(): string {
40 | // 获取系统标准用户数据目录
41 | const userDataPath = app.getPath('userData')
42 | // 创建 data 子目录(避免污染根目录)
43 | return path.join(userDataPath, 'data', 'app.db')
44 | }
45 | static async getInstance(): Promise {
46 | if (!DB.instance) {
47 | // 确保目录存在(自动创建)
48 | const dir = path.dirname(DB.DB_PATH)
49 | await fs.promises.mkdir(dir, { recursive: true })
50 |
51 | DB.instance = await open({
52 | filename: DB.DB_PATH,
53 | driver: sqlite3.Database
54 | })
55 |
56 | // 初始化表结构
57 | // 创建存储API设置的表
58 | await DB.instance.exec(`
59 | CREATE TABLE IF NOT EXISTS api_settings (
60 | id INTEGER PRIMARY KEY AUTOINCREMENT,
61 | apiURL TEXT NOT NULL,
62 | apiKey TEXT NOT NULL,
63 | modelName TEXT NOT NULL,
64 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP
65 | )
66 | `)
67 | // 创建存储校对历史的表
68 | await DB.instance.exec(
69 | `
70 | CREATE TABLE IF NOT EXISTS proof_history (
71 | id INTEGER PRIMARY KEY AUTOINCREMENT,
72 | filePath TEXT NOT NULL,
73 | apiURL TEXT NOT NULL,
74 | modelName TEXT NOT NULL,
75 | created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
76 | result TEXT NOT NULL
77 | )
78 | `
79 | )
80 | }
81 | return DB.instance
82 | }
83 |
84 | // 查询记录的条目数量
85 | static async getAPISettingsCount(): Promise {
86 | const db = await DB.getInstance()
87 | const result = await db.get(`SELECT COUNT(*) as count FROM api_settings`)
88 | return result.count
89 | }
90 |
91 | static async getHistoryCount(): Promise {
92 | const db = await DB.getInstance()
93 | const result = await db.get(`SELECT COUNT(*) as count FROM proof_history`)
94 | return result.count
95 | }
96 | /**
97 | * 插入一条 API 设置记录
98 | * @param setting apiSettings 对象(不含 id)
99 | * @returns 新记录的 id
100 | */
101 | static async insertAPISetting(apiURL: string, apiKey: string, modelName: string): Promise {
102 | const db = await DB.getInstance()
103 | try {
104 | const result = await db.run(
105 | `INSERT INTO api_settings (apiURL, apiKey, modelName) VALUES (?, ?, ?)`,
106 | apiURL,
107 | apiKey,
108 | modelName
109 | )
110 | return result.lastID
111 | } catch (error) {
112 | console.error('插入 API 设置失败:', error)
113 | throw error // 或根据需求返回 -1 / null
114 | }
115 | }
116 |
117 | static async insertOneHistory(filePath: string, apiURL: string, modelName: string, result: string): Promise {
118 | const db = await DB.getInstance()
119 | if (!filePath || !apiURL || !modelName || !result) {
120 | const errorMsg = '插入历史记录参数不完整: ' + JSON.stringify({ filePath, apiURL, modelName, result: !!result })
121 | console.error(errorMsg)
122 | throw new Error(errorMsg)
123 | }
124 |
125 | // 验证JSON格式
126 | try {
127 | JSON.parse(result)
128 | } catch (parseError) {
129 | const errorMsg = 'result参数不是有效的JSON: ' + parseError.message
130 | console.error(errorMsg)
131 | throw new Error(errorMsg)
132 | }
133 |
134 | try {
135 | const res = await db.run(
136 | `INSERT INTO proof_history (filePath, apiURL, modelName, result) VALUES (?, ?, ?, ?)`,
137 | filePath,
138 | apiURL,
139 | modelName,
140 | result
141 | )
142 | return res.lastID
143 | } catch (error) {
144 | console.error('插入校对记录失败:', error)
145 | throw error
146 | }
147 | }
148 |
149 | // 根据id查询api记录
150 |
151 | static async getAPISettingById(id: number): Promise {
152 | const db = await DB.getInstance()
153 | const result = await db.get(`SELECT * FROM api_settings WHERE id = ?`, id)
154 | return result || null
155 | }
156 |
157 | static async getHistoryById(id: number): Promise {
158 | const db = await DB.getInstance()
159 | const result = await db.get(`SELECT * FROM proof_history WHERE id = ?`, id)
160 | return result || null
161 | }
162 |
163 | // 删除指定的数据集
164 | static async deleteAPISettingById(id: number): Promise {
165 | const db = await DB.getInstance()
166 | const result = await db.run(`DELETE FROM api_settings WHERE id = ?`, id)
167 | return result.changes > 0 // 如果有行被删除,返回 true,否则返回 false
168 | }
169 |
170 | static async deleteHistoryById(id: number): Promise {
171 | const db = await DB.getInstance()
172 | const result = await db.run(`DELETE FROM proof_history WHERE id = ?`, id)
173 | return result.changes > 0 // 如果有行被删除,返回 true,否则返回 false
174 | }
175 |
176 | // 删除所有数据集
177 | static async deleteALLSettings(): Promise {
178 | const db = await DB.getInstance()
179 | await db.run(`DELETE FROM api_settings`)
180 | const result = await db.run(`SELECT * FROM api_settings`)
181 | const count = await DB.getAPISettingsCount()
182 | if (result.changes === count) {
183 | return true
184 | } else {
185 | return false
186 | }
187 | }
188 |
189 | static async deleteALLHistory(): Promise {
190 | const db = await DB.getInstance()
191 | await db.run(`DELETE FROM proof_history`)
192 | const count = await DB.getHistoryCount()
193 | if (count === 0) {
194 | return true
195 | } else {
196 | return false
197 | }
198 | }
199 |
200 | /**
201 | * 查询所有 API 设置记录
202 | * @returns apiSettings 数组
203 | */
204 | static async getAllAPISettings(): Promise {
205 | // 返回apiSettings 数组
206 | const db = await DB.getInstance()
207 | const rows = await db.all(
208 | `SELECT id, apiURL, apiKey, modelName, created_at FROM api_settings ORDER BY created_at DESC`
209 | )
210 | console.log('the result of the search of all ', rows)
211 | return rows
212 | }
213 |
214 | static async getALLHistory(): Promise {
215 | // 获取所有校对记录
216 | const db = await DB.getInstance()
217 | const rows = await db.all(
218 | `SELECT id, filePath, apiURL, modelName, created_at, result FROM proof_history ORDER BY created_at DESC`
219 | )
220 | console.log('the result of the search of all ', rows)
221 | return rows
222 | }
223 | }
224 |
--------------------------------------------------------------------------------
/src/main/chat.ts:
--------------------------------------------------------------------------------
1 | // 导入自 '@google/generative-ai'
2 | import {
3 | GoogleGenerativeAI,
4 | GenerationConfig,
5 | SafetySetting,
6 | HarmCategory,
7 | HarmBlockThreshold,
8 | Part
9 | } from '@google/generative-ai'
10 | import OpenAI from 'openai'
11 | import { basename } from 'path'
12 | /**
13 | * 调用 Gemini API 进行单次对话。
14 | *
15 | * @param systemPrompt - 给模型的系统指令。这是一个对象,包含role和parts。
16 | * @param userPrompt - 用户的提问。
17 | * @param apiKey - Google AI API 密钥。
18 | * @param modelName - 要使用的模型名称,例如 "gemini-1.5-flash"。
19 | * @returns A Promise that resolves to the model's text response.
20 | */
21 |
22 | // gemini接口的实现
23 | // 但是实际上没有调用
24 | export async function getGeminiResponse(
25 | systemPrompt: string,
26 | userPrompt: string,
27 | apiKey: string,
28 | modelName: string
29 | ): Promise {
30 | if (!apiKey) {
31 | throw new Error('API key is missing. Please provide a valid API key.')
32 | }
33 |
34 | try {
35 | // 初始化时传入 API Key
36 | const genAI = new GoogleGenerativeAI(apiKey)
37 |
38 | // 获取模型,现在可以直接在 getGenerativeModel 中设置 system instruction
39 | const model = genAI.getGenerativeModel({
40 | model: modelName,
41 | systemInstruction: {
42 | role: 'system', // 或者 'model',但通常对于指令是 'user'
43 | parts: [{ text: systemPrompt }]
44 | }
45 | })
46 |
47 | const generationConfig: GenerationConfig = {
48 | temperature: 0.9,
49 | topK: 1,
50 | topP: 1,
51 | maxOutputTokens: 2048
52 | }
53 |
54 | const safetySettings: SafetySetting[] = [
55 | {
56 | category: HarmCategory.HARM_CATEGORY_HARASSMENT,
57 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
58 | },
59 | {
60 | category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
61 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
62 | },
63 | {
64 | category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
65 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
66 | },
67 | {
68 | category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
69 | threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
70 | }
71 | ]
72 |
73 | // generateContent 现在只需要传入用户的 prompt 即可
74 | const result = await model.generateContent({
75 | contents: [{ role: 'user', parts: [{ text: userPrompt }] }],
76 | generationConfig,
77 | safetySettings
78 | })
79 |
80 | const response = result.response
81 |
82 | if (response.promptFeedback?.blockReason) {
83 | throw new Error(`Request was blocked due to: ${response.promptFeedback.blockReason}`)
84 | }
85 |
86 | if (!response.candidates || response.candidates.length === 0) {
87 | throw new Error('No response candidates found.')
88 | }
89 |
90 | // 从 candidates 中获取文本
91 | const text = response.candidates[0].content.parts.map(part => part.text).join('')
92 | return text
93 | } catch (error) {
94 | console.error('An error occurred while calling the Gemini API:', error)
95 | // 抛出更具体的错误信息
96 | if (error instanceof Error) {
97 | throw new Error(`Gemini API call failed: ${error.message}`)
98 | } else {
99 | throw new Error('An unknown error occurred during the Gemini API call.')
100 | }
101 | }
102 | }
103 | // openai的接口 带 null 安全
104 |
105 | export async function OpenaiGen(
106 | systemPrompt: string,
107 | userPrompt: string,
108 | apiKey: string,
109 | modelName: string,
110 | apiURL: string
111 | ): Promise<{ result: string; total_tokens: number }> {
112 | if (!apiKey) {
113 | throw new Error('API key is missing. Please provide a valid API key.')
114 | }
115 |
116 | try {
117 | const openai = new OpenAI({
118 | apiKey: apiKey,
119 | baseURL: apiURL
120 | })
121 |
122 | const chatCompletion = await openai.chat.completions.create({
123 | model: modelName,
124 | messages: [
125 | { role: 'system', content: systemPrompt },
126 | { role: 'user', content: userPrompt }
127 | ]
128 | })
129 |
130 | // 添加健壮性检查
131 | if (!chatCompletion || !chatCompletion.choices || !Array.isArray(chatCompletion.choices) || chatCompletion.choices.length === 0) {
132 | console.error('Invalid API response:', chatCompletion)
133 | throw new Error('API返回了无效的响应格式,choices字段缺失或为空')
134 | }
135 |
136 | const result = chatCompletion.choices[0]?.message?.content ?? ''
137 | const total_tokens = chatCompletion.usage?.total_tokens ?? 0
138 |
139 | return { result, total_tokens }
140 | } catch (error) {
141 | console.error('An error occurred while calling the OpenAI-compatible API:', error)
142 | if (error instanceof Error) {
143 | throw new Error(`OpenAI API call failed: ${error.message}`)
144 | } else {
145 | throw new Error('An unknown error occurred during the OpenAI API call.')
146 | }
147 | }
148 | }
149 |
150 | // 测试api可用性
151 | export async function testAPI(apiURL: string, apiKey: string, modelName: string): Promise {
152 | try {
153 | const openai = new OpenAI({
154 | apiKey: apiKey,
155 | baseURL: apiURL
156 | })
157 |
158 | const chatCompletion = await openai.chat.completions.create({
159 | model: modelName,
160 | messages: [{ role: 'user', content: '你好' }]
161 | })
162 |
163 | // 检查响应有效性
164 | if (!chatCompletion || !chatCompletion.choices || !Array.isArray(chatCompletion.choices) || chatCompletion.choices.length === 0) {
165 | console.error('API test failed - invalid response:', chatCompletion)
166 | return false
167 | }
168 |
169 | console.log('the result of connet test:', chatCompletion.choices[0].message.content)
170 |
171 | return true
172 | } catch (error) {
173 | console.error('An error occurred while calling the Gemini API:', error)
174 | return false
175 | }
176 | }
177 |
178 | // test modelname:doubao-embedding-text-240715
179 | // test url: https://ark.cn-beijing.volces.com/api/v3/
180 | export async function getEmbedding(text: string | string[], modelName: string, apiKey_input: string, apiURL: string) {
181 | // 参数有效性检查
182 | if (!text || (Array.isArray(text) && text.length === 0)) {
183 | throw new Error('Text parameter is required and cannot be empty')
184 | }
185 |
186 | if (!modelName) {
187 | throw new Error('Model name is required')
188 | }
189 |
190 | if (!apiKey_input) {
191 | throw new Error('API key is required')
192 | }
193 |
194 | if (!apiURL) {
195 | throw new Error('API URL is required')
196 | }
197 |
198 | // 对于数组类型,检查每个元素是否为字符串
199 | if (Array.isArray(text)) {
200 | for (let i = 0; i < text.length; i++) {
201 | if (typeof text[i] !== 'string') {
202 | throw new Error(`Element at index ${i} is not a string`)
203 | }
204 | if (text[i].trim() === '') {
205 | throw new Error(`Element at index ${i} is an empty string`)
206 | }
207 | }
208 | } else if (typeof text !== 'string') {
209 | throw new Error('Text parameter must be a string or an array of strings')
210 | } else if (text.trim() === '') {
211 | throw new Error('Text parameter cannot be an empty string')
212 | }
213 |
214 | const openai = new OpenAI({
215 | apiKey: apiKey_input,
216 | baseURL: apiURL
217 | })
218 |
219 | try {
220 | const response = await openai.embeddings.create({
221 | model: modelName,
222 | input: text
223 | })
224 |
225 | // 检查响应有效性
226 | if (!response || !response.data || !Array.isArray(response.data) || response.data.length === 0) {
227 | throw new Error('嵌入API返回了无效的响应格式,data字段缺失或为空')
228 | }
229 |
230 | // 返回embedding结果
231 | if (typeof text === 'string') {
232 | return response.data[0].embedding
233 | }
234 | if (Array.isArray(text)) {
235 | return response.data.map(item => item.embedding)
236 | }
237 |
238 | return response.data[0].embedding
239 | } catch (error: any) {
240 | console.log('error getting embedding:', error)
241 |
242 | // 提供更详细的错误信息
243 | if (error.status === 404) {
244 | throw new Error(
245 | `嵌入API调用失败,状态码404: 请检查API地址(${apiURL})和模型名称(${modelName})是否正确,该模型可能不支持嵌入功能`
246 | )
247 | } else if (error.status === 401) {
248 | throw new Error(`嵌入API调用失败,认证错误: API密钥无效或权限不足`)
249 | } else if (error.status === 400) {
250 | throw new Error(`嵌入API调用失败,请求错误: ${error.message}`)
251 | } else {
252 | throw new Error(`嵌入API调用失败: ${error.message || '未知错误'}`)
253 | }
254 | }
255 | }
256 |
--------------------------------------------------------------------------------
/lancedbNativePro.ts:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | /* tslint:disable */
3 | /* eslint-disable */
4 | /* prettier-ignore */
5 | /* auto-generated by NAPI-RS */
6 | const { existsSync, readFileSync } = require('fs');
7 | const { join } = require('path')
8 | const { platform, arch, env } = process
9 |
10 | let nativeBinding = null
11 | let localFileExisted = false
12 | let loadError = null
13 | // LANCEDB_NATIVE_PATH
14 | // 新增:获取自定义绑定路径的函数
15 | function getBindingPath(filename) {
16 | // 检查环境变量,优先使用环境变量指定的路径
17 | const customBindingPath = env.LANCEDB_NATIVE_PATH
18 | if (customBindingPath) {
19 | console.log('env path of msvc:', customBindingPath)
20 | // 如果环境变量指定的是完整路径(包含文件名)
21 | if (customBindingPath.endsWith('.node')) {
22 | return customBindingPath
23 | }
24 | // 如果环境变量指定的是目录,则拼接文件名
25 | return join(customBindingPath, filename)
26 | }
27 | // 默认行为:使用 __dirname
28 | return join(__dirname, filename)
29 | }
30 |
31 | function isMusl() {
32 | // For Node 10
33 | if (!process.report || typeof process.report.getReport !== 'function') {
34 | try {
35 | const lddPath = require('child_process').execSync('which ldd').toString().trim()
36 | return readFileSync(lddPath, 'utf8').includes('musl')
37 | } catch (e) {
38 | return true
39 | }
40 | } else {
41 | const { glibcVersionRuntime } = process.report.getReport().header
42 | return !glibcVersionRuntime
43 | }
44 | }
45 |
46 | switch (platform) {
47 | case 'android':
48 | switch (arch) {
49 | case 'arm64':
50 | // 修改:使用 getBindingPath 替代硬编码的路径
51 | const androidArm64Path = getBindingPath('lancedb.android-arm64.node')
52 | localFileExisted = existsSync(androidArm64Path)
53 | try {
54 | if (localFileExisted) {
55 | nativeBinding = require(androidArm64Path)
56 | } else {
57 | nativeBinding = require('@lancedb/lancedb-android-arm64')
58 | }
59 | } catch (e) {
60 | loadError = e
61 | }
62 | break
63 | case 'arm':
64 | const androidArmPath = getBindingPath('lancedb.android-arm-eabi.node')
65 | localFileExisted = existsSync(androidArmPath)
66 | try {
67 | if (localFileExisted) {
68 | nativeBinding = require(androidArmPath)
69 | } else {
70 | nativeBinding = require('@lancedb/lancedb-android-arm-eabi')
71 | }
72 | } catch (e) {
73 | loadError = e
74 | }
75 | break
76 | default:
77 | throw new Error(`Unsupported architecture on Android ${arch}`)
78 | }
79 | break
80 | case 'win32':
81 | switch (arch) {
82 | case 'x64':
83 | // 修改:Windows x64 使用新的路径获取方式
84 | const winX64Path = getBindingPath('lancedb.win32-x64-msvc.node')
85 | localFileExisted = existsSync(winX64Path)
86 | try {
87 | if (localFileExisted) {
88 | nativeBinding = require(winX64Path)
89 | } else {
90 | nativeBinding = require('@lancedb/lancedb-win32-x64-msvc')
91 | }
92 | } catch (e) {
93 | loadError = e
94 | }
95 | break
96 | case 'ia32':
97 | const winIa32Path = getBindingPath('lancedb.win32-ia32-msvc.node')
98 | localFileExisted = existsSync(winIa32Path)
99 | try {
100 | if (localFileExisted) {
101 | nativeBinding = require(winIa32Path)
102 | } else {
103 | nativeBinding = require('@lancedb/lancedb-win32-ia32-msvc')
104 | }
105 | } catch (e) {
106 | loadError = e
107 | }
108 | break
109 | case 'arm64':
110 | const winArm64Path = getBindingPath('lancedb.win32-arm64-msvc.node')
111 | localFileExisted = existsSync(winArm64Path)
112 | try {
113 | if (localFileExisted) {
114 | nativeBinding = require(winArm64Path)
115 | } else {
116 | nativeBinding = require('@lancedb/lancedb-win32-arm64-msvc')
117 | }
118 | } catch (e) {
119 | loadError = e
120 | }
121 | break
122 | default:
123 | throw new Error(`Unsupported architecture on Windows: ${arch}`)
124 | }
125 | break
126 | case 'darwin':
127 | // 修改:macOS 通用版本
128 | const darwinUniversalPath = getBindingPath('lancedb.darwin-universal.node')
129 | localFileExisted = existsSync(darwinUniversalPath)
130 | try {
131 | if (localFileExisted) {
132 | nativeBinding = require(darwinUniversalPath)
133 | } else {
134 | nativeBinding = require('@lancedb/lancedb-darwin-universal')
135 | }
136 | break
137 | } catch {}
138 | switch (arch) {
139 | case 'x64':
140 | const darwinX64Path = getBindingPath('lancedb.darwin-x64.node')
141 | localFileExisted = existsSync(darwinX64Path)
142 | try {
143 | if (localFileExisted) {
144 | nativeBinding = require(darwinX64Path)
145 | } else {
146 | nativeBinding = require('@lancedb/lancedb-darwin-x64')
147 | }
148 | } catch (e) {
149 | loadError = e
150 | }
151 | break
152 | case 'arm64':
153 | const darwinArm64Path = getBindingPath('lancedb.darwin-arm64.node')
154 | localFileExisted = existsSync(darwinArm64Path)
155 | try {
156 | if (localFileExisted) {
157 | nativeBinding = require(darwinArm64Path)
158 | } else {
159 | nativeBinding = require('@lancedb/lancedb-darwin-arm64')
160 | }
161 | } catch (e) {
162 | loadError = e
163 | }
164 | break
165 | default:
166 | throw new Error(`Unsupported architecture on macOS: ${arch}`)
167 | }
168 | break
169 | case 'freebsd':
170 | if (arch !== 'x64') {
171 | throw new Error(`Unsupported architecture on FreeBSD: ${arch}`)
172 | }
173 | const freebsdPath = getBindingPath('lancedb.freebsd-x64.node')
174 | localFileExisted = existsSync(freebsdPath)
175 | try {
176 | if (localFileExisted) {
177 | nativeBinding = require(freebsdPath)
178 | } else {
179 | nativeBinding = require('@lancedb/lancedb-freebsd-x64')
180 | }
181 | } catch (e) {
182 | loadError = e
183 | }
184 | break
185 | case 'linux':
186 | switch (arch) {
187 | case 'x64':
188 | if (isMusl()) {
189 | const linuxX64MuslPath = getBindingPath('lancedb.linux-x64-musl.node')
190 | localFileExisted = existsSync(linuxX64MuslPath)
191 | try {
192 | if (localFileExisted) {
193 | nativeBinding = require(linuxX64MuslPath)
194 | } else {
195 | nativeBinding = require('@lancedb/lancedb-linux-x64-musl')
196 | }
197 | } catch (e) {
198 | loadError = e
199 | }
200 | } else {
201 | const linuxX64GnuPath = getBindingPath('lancedb.linux-x64-gnu.node')
202 | localFileExisted = existsSync(linuxX64GnuPath)
203 | try {
204 | if (localFileExisted) {
205 | nativeBinding = require(linuxX64GnuPath)
206 | } else {
207 | nativeBinding = require('@lancedb/lancedb-linux-x64-gnu')
208 | }
209 | } catch (e) {
210 | loadError = e
211 | }
212 | }
213 | break
214 | case 'arm64':
215 | if (isMusl()) {
216 | const linuxArm64MuslPath = getBindingPath('lancedb.linux-arm64-musl.node')
217 | localFileExisted = existsSync(linuxArm64MuslPath)
218 | try {
219 | if (localFileExisted) {
220 | nativeBinding = require(linuxArm64MuslPath)
221 | } else {
222 | nativeBinding = require('@lancedb/lancedb-linux-arm64-musl')
223 | }
224 | } catch (e) {
225 | loadError = e
226 | }
227 | } else {
228 | const linuxArm64GnuPath = getBindingPath('lancedb.linux-arm64-gnu.node')
229 | localFileExisted = existsSync(linuxArm64GnuPath)
230 | try {
231 | if (localFileExisted) {
232 | nativeBinding = require(linuxArm64GnuPath)
233 | } else {
234 | nativeBinding = require('@lancedb/lancedb-linux-arm64-gnu')
235 | }
236 | } catch (e) {
237 | loadError = e
238 | }
239 | }
240 | break
241 | // ... 其他 Linux 架构的修改类似,已省略以保持简洁
242 | default:
243 | throw new Error(`Unsupported architecture on Linux: ${arch}`)
244 | }
245 | break
246 | default:
247 | throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`)
248 | }
249 |
250 | if (!nativeBinding) {
251 | if (loadError) {
252 | throw loadError
253 | }
254 | throw new Error(`Failed to load native binding`)
255 | }
256 |
257 | const {
258 | Connection,
259 | JsHeaderProvider,
260 | Index,
261 | RecordBatchIterator,
262 | NativeMergeInsertBuilder,
263 | Query,
264 | VectorQuery,
265 | TakeQuery,
266 | JsFullTextQuery,
267 | Reranker,
268 | RrfReranker,
269 | Session,
270 | Table,
271 | TagContents,
272 | Tags
273 | } = nativeBinding
274 | module.exports.Connection = Connection
275 | module.exports.JsHeaderProvider = JsHeaderProvider
276 | module.exports.Index = Index
277 | module.exports.RecordBatchIterator = RecordBatchIterator
278 | module.exports.NativeMergeInsertBuilder = NativeMergeInsertBuilder
279 | module.exports.Query = Query
280 | module.exports.VectorQuery = VectorQuery
281 | module.exports.TakeQuery = TakeQuery
282 | module.exports.JsFullTextQuery = JsFullTextQuery
283 | module.exports.Reranker = Reranker
284 | module.exports.RrfReranker = RrfReranker
285 | module.exports.Session = Session
286 | module.exports.Table = Table
287 | module.exports.TagContents = TagContents
288 | module.exports.Tags = Tags
289 |
--------------------------------------------------------------------------------
/src/main/pdfUtils.ts:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | import path from 'path'
3 | import { promisify } from 'util'
4 | import * as lancedb from '@lancedb/lancedb'
5 | import { readFile } from 'node:fs/promises'
6 | import { insertDocument, getOrCreateTable, initLanceDB } from './lancedb'
7 |
8 | // 使用动态导入方式导入 uuid
9 | let uuidv4: any
10 |
11 | async function initializeUUID() {
12 | if (!uuidv4) {
13 | const uuidModule = await import('uuid')
14 | uuidv4 = uuidModule.v4
15 | }
16 | return uuidv4
17 | }
18 |
19 | const stat = promisify(fs.stat)
20 |
21 | /**
22 | * 检查文件是否存在
23 | */
24 | export async function fileExists(filePath: string): Promise {
25 | try {
26 | const stats = await stat(filePath)
27 | return stats.isFile()
28 | } catch (error) {
29 | return false
30 | }
31 | }
32 |
33 | /**
34 | * 读取PDF文件内容
35 | */
36 | export async function extractTextFromPDF(filePath: string): Promise {
37 | if (!(await fileExists(filePath))) {
38 | throw new Error(`PDF file not found at path: ${filePath}`)
39 | }
40 |
41 | try {
42 | const pdfParse = require('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
43 | const dataBuffer = await readFile(filePath)
44 | const data = await pdfParse(dataBuffer)
45 | return data.text
46 | } catch (error) {
47 | console.error('Error extracting text from PDF:', error)
48 | throw new Error(`Failed to extract text from PDF: ${error.message}`)
49 | }
50 | }
51 |
52 | /**
53 | * 文本质量检查配置
54 | */
55 | interface TextQualityConfig {
56 | minChunkLength: number // 最小chunk长度
57 | minWordCount: number // 最小单词数
58 | maxPunctuationRatio: number // 最大标点符号比例
59 | minAlphanumericRatio: number // 最小字母数字比例
60 | }
61 |
62 | const DEFAULT_QUALITY_CONFIG: TextQualityConfig = {
63 | minChunkLength: 20, // 至少20个字符
64 | minWordCount: 3, // 至少3个单词
65 | maxPunctuationRatio: 0.5, // 标点符号不超过50%
66 | minAlphanumericRatio: 0.3 // 字母数字至少30%
67 | }
68 |
69 | /**
70 | * 检查文本片段是否有效
71 | */
72 | function isValidChunk(text: string, config: TextQualityConfig = DEFAULT_QUALITY_CONFIG): boolean {
73 | if (!text || text.trim().length < config.minChunkLength) {
74 | return false
75 | }
76 |
77 | const trimmed = text.trim()
78 |
79 | // 检查单词数量(支持中英文)
80 | const words = trimmed.split(/\s+/).filter(w => w.length > 0)
81 | const chineseChars = trimmed.match(/[\u4e00-\u9fa5]/g)?.length || 0
82 | const totalWordCount = words.length + Math.floor(chineseChars / 2) // 中文2个字符算1个词
83 |
84 | if (totalWordCount < config.minWordCount) {
85 | return false
86 | }
87 |
88 | // 检查标点符号比例
89 | const punctuationCount = (trimmed.match(/[.,;:!?。,、;:!?…—\-\(\)\[\]\{\}]/g) || []).length
90 | if (punctuationCount / trimmed.length > config.maxPunctuationRatio) {
91 | return false
92 | }
93 |
94 | // 检查字母数字比例
95 | const alphanumericCount = (trimmed.match(/[a-zA-Z0-9\u4e00-\u9fa5]/g) || []).length
96 | if (alphanumericCount / trimmed.length < config.minAlphanumericRatio) {
97 | return false
98 | }
99 |
100 | return true
101 | }
102 |
103 | /**
104 | * 规范化文本:清理多余空白,保留段落结构
105 | */
106 | function normalizeText(text: string): string {
107 | return (
108 | text
109 | // 移除零宽字符和特殊空白
110 | .replace(/[\u200B-\u200D\uFEFF]/g, '')
111 | // 统一换行符
112 | .replace(/\r\n/g, '\n')
113 | // 保留双换行(段落分隔),其他换行转为空格
114 | .replace(/\n\n+/g, '\n\n')
115 | .replace(/([^\n])\n([^\n])/g, '$1 $2')
116 | // 规范化空格
117 | .replace(/[ \t]+/g, ' ')
118 | // 清理行首行尾空格
119 | .split('\n')
120 | .map(line => line.trim())
121 | .join('\n')
122 | .trim()
123 | )
124 | }
125 |
126 | /**
127 | * 查找最佳分割点
128 | */
129 | function findBestSplitPoint(text: string, maxPos: number): number {
130 | // 分割优先级:段落 > 句子 > 短语 > 空格
131 |
132 | // 1. 在段落边界(双换行)
133 | const paragraphEnd = text.lastIndexOf('\n\n', maxPos)
134 | if (paragraphEnd > maxPos * 0.5) {
135 | return paragraphEnd + 2
136 | }
137 |
138 | // 2. 在句子边界
139 | const sentenceEnds = [
140 | text.lastIndexOf('。', maxPos),
141 | text.lastIndexOf('!', maxPos),
142 | text.lastIndexOf('?', maxPos),
143 | text.lastIndexOf('. ', maxPos),
144 | text.lastIndexOf('! ', maxPos),
145 | text.lastIndexOf('? ', maxPos),
146 | text.lastIndexOf('.\n', maxPos),
147 | text.lastIndexOf('!\n', maxPos),
148 | text.lastIndexOf('?\n', maxPos)
149 | ]
150 | const bestSentenceEnd = Math.max(...sentenceEnds)
151 | if (bestSentenceEnd > maxPos * 0.6) {
152 | // 找到标点符号后的第一个非空字符位置
153 | const punctuation = text[bestSentenceEnd]
154 | let endPos = bestSentenceEnd + 1
155 | if (punctuation === '.' || punctuation === '!' || punctuation === '?') {
156 | while (endPos < text.length && /[\s\n]/.test(text[endPos])) {
157 | endPos++
158 | }
159 | }
160 | return endPos
161 | }
162 |
163 | // 3. 在短语边界(逗号、分号等)
164 | const phraseEnds = [
165 | text.lastIndexOf(',', maxPos),
166 | text.lastIndexOf('、', maxPos),
167 | text.lastIndexOf(';', maxPos),
168 | text.lastIndexOf(', ', maxPos),
169 | text.lastIndexOf('; ', maxPos),
170 | text.lastIndexOf(',\n', maxPos)
171 | ]
172 | const bestPhraseEnd = Math.max(...phraseEnds)
173 | if (bestPhraseEnd > maxPos * 0.7) {
174 | return bestPhraseEnd + 1
175 | }
176 |
177 | // 4. 在单换行处
178 | const singleLineBreak = text.lastIndexOf('\n', maxPos)
179 | if (singleLineBreak > maxPos * 0.7) {
180 | return singleLineBreak + 1
181 | }
182 |
183 | // 5. 最后在空格处
184 | const spaceEnd = text.lastIndexOf(' ', maxPos)
185 | if (spaceEnd > maxPos * 0.5) {
186 | return spaceEnd + 1
187 | }
188 |
189 | // 6. 实在找不到合适位置,返回maxPos
190 | return maxPos
191 | }
192 |
193 | /**
194 | * 智能分割文本为段落
195 | * @param text 输入文本
196 | * @param maxChunkSize 最大chunk大小
197 | * @param minChunkSize 最小chunk大小(避免过小片段)
198 | * @param overlap 重叠大小
199 | * @param qualityConfig 质量检查配置
200 | */
201 | export function splitTextIntoChunks(
202 | text: string,
203 | maxChunkSize: number = 1000,
204 | minChunkSize: number = 100,
205 | overlap: number = 100,
206 | qualityConfig: TextQualityConfig = DEFAULT_QUALITY_CONFIG
207 | ): string[] {
208 | // 参数验证
209 | if (maxChunkSize <= 0) {
210 | throw new Error('maxChunkSize must be greater than 0')
211 | }
212 | if (minChunkSize < 0 || minChunkSize > maxChunkSize) {
213 | throw new Error('minChunkSize must be between 0 and maxChunkSize')
214 | }
215 | if (overlap < 0 || overlap >= maxChunkSize) {
216 | throw new Error('overlap must be between 0 and maxChunkSize')
217 | }
218 |
219 | if (!text || text.trim().length === 0) {
220 | return []
221 | }
222 |
223 | // 规范化文本
224 | const normalizedText = normalizeText(text)
225 | const chunks: string[] = []
226 | let currentPos = 0
227 |
228 | while (currentPos < normalizedText.length) {
229 | // 确定chunk的结束位置
230 | const targetEndPos = Math.min(currentPos + maxChunkSize, normalizedText.length)
231 |
232 | let endPos: number
233 | if (targetEndPos >= normalizedText.length) {
234 | // 已到文本末尾
235 | endPos = normalizedText.length
236 | } else {
237 | // 查找最佳分割点
238 | endPos = findBestSplitPoint(normalizedText.slice(currentPos), targetEndPos - currentPos) + currentPos
239 |
240 | // 确保不会产生太小的chunk
241 | if (endPos - currentPos < minChunkSize && endPos < normalizedText.length) {
242 | endPos = Math.min(currentPos + maxChunkSize, normalizedText.length)
243 | }
244 | }
245 |
246 | // 提取chunk
247 | const chunk = normalizedText.slice(currentPos, endPos).trim()
248 |
249 | // 验证chunk质量
250 | if (isValidChunk(chunk, qualityConfig)) {
251 | chunks.push(chunk)
252 | } else {
253 | console.warn(`Skipped invalid chunk at position ${currentPos}: too short or low quality`)
254 | // 即使chunk无效,也要前进,避免死循环
255 | if (endPos <= currentPos) {
256 | endPos = currentPos + Math.min(minChunkSize, normalizedText.length - currentPos)
257 | }
258 | }
259 |
260 | // 计算下一个起始位置(考虑重叠)
261 | const nextPos = endPos - overlap
262 |
263 | // 确保有进展,防止死循环
264 | if (nextPos <= currentPos) {
265 | currentPos = endPos
266 | } else {
267 | currentPos = nextPos
268 | }
269 |
270 | // 安全检查:如果没有进展,强制前进
271 | if (currentPos >= normalizedText.length - 1) {
272 | break
273 | }
274 | }
275 |
276 | // 后处理:合并过小的相邻chunks
277 | const mergedChunks = mergeSmallChunks(chunks, minChunkSize, maxChunkSize)
278 |
279 | return mergedChunks
280 | }
281 |
282 | /**
283 | * 合并过小的相邻chunks
284 | */
285 | function mergeSmallChunks(chunks: string[], minSize: number, maxSize: number): string[] {
286 | if (chunks.length === 0) return []
287 |
288 | const result: string[] = []
289 | let currentChunk = chunks[0]
290 |
291 | for (let i = 1; i < chunks.length; i++) {
292 | const nextChunk = chunks[i]
293 |
294 | // 如果当前chunk太小,尝试与下一个合并
295 | if (currentChunk.length < minSize && currentChunk.length + nextChunk.length <= maxSize) {
296 | currentChunk = currentChunk + '\n' + nextChunk
297 | } else {
298 | result.push(currentChunk)
299 | currentChunk = nextChunk
300 | }
301 | }
302 |
303 | // 添加最后一个chunk
304 | if (currentChunk) {
305 | result.push(currentChunk)
306 | }
307 |
308 | return result
309 | }
310 |
311 | /**
312 | * 处理文档并存入向量数据库
313 | */
314 | export async function processDocument(
315 | repositoryName: string,
316 | filePath: string,
317 | documentId: string = '',
318 | chunkSize: number = 1000,
319 | overlap: number = 100,
320 | modelName: string,
321 | apiKey: string,
322 | apiURL: string,
323 | options?: {
324 | minChunkSize?: number
325 | qualityConfig?: Partial
326 | }
327 | ) {
328 | // 初始化 uuid
329 | const v4 = await initializeUUID()
330 | if (!documentId) {
331 | documentId = v4()
332 | }
333 |
334 | // 1. 提取文本根据文件类型
335 | const ext = path.extname(filePath).toLowerCase()
336 | let text: string
337 |
338 | switch (ext) {
339 | case '.pdf':
340 | text = await extractTextFromPDF(filePath)
341 | break
342 | case '.txt':
343 | text = await extractTextFromTXT(filePath)
344 | break
345 | case '.docx':
346 | text = await extractTextFromDOCX(filePath)
347 | break
348 | default:
349 | throw new Error(`Unsupported file type: ${ext}`)
350 | }
351 |
352 | // 2. 分割文本为段落
353 | const minChunkSize = options?.minChunkSize || Math.floor(chunkSize * 0.3)
354 | const qualityConfig = {
355 | ...DEFAULT_QUALITY_CONFIG,
356 | ...options?.qualityConfig
357 | }
358 |
359 | const chunks = splitTextIntoChunks(text, chunkSize, minChunkSize, overlap, qualityConfig)
360 |
361 | console.log(
362 | `Document split into ${chunks.length} chunks. Average size: ${Math.round(chunks.reduce((sum, c) => sum + c.length, 0) / chunks.length)} chars`
363 | )
364 |
365 | // 3. 获取文件名作为基础元数据
366 | const fileName = path.basename(filePath)
367 | const baseMetadata = {
368 | source: ext.substring(1),
369 | fileName,
370 | filePath,
371 | documentId,
372 | totalPages: chunks.length,
373 | processedAt: new Date().toISOString()
374 | }
375 |
376 | // 4. 逐段处理并存入数据库
377 | const results = []
378 | for (let i = 0; i < chunks.length; i++) {
379 | const chunkMetadata = {
380 | ...baseMetadata,
381 | chunkIndex: i,
382 | totalChunks: chunks.length,
383 | chunkId: `${documentId}-${i}`,
384 | chunkLength: chunks[i].length
385 | }
386 |
387 | // 插入到向量数据库
388 | const result = await insertDocument(repositoryName, chunks[i], fileName, chunkMetadata, modelName, apiKey, apiURL)
389 |
390 | results.push(result)
391 | }
392 |
393 | return {
394 | documentId,
395 | fileName,
396 | chunksProcessed: chunks.length,
397 | averageChunkSize: Math.round(chunks.reduce((sum, c) => sum + c.length, 0) / chunks.length),
398 | results
399 | }
400 | }
401 |
402 | /**
403 | * 从数据库中检索特定文档的所有段落
404 | */
405 | export async function getPDFDocumentChunks(repositoryName: string, documentId: string) {
406 | const tbl = await getOrCreateTable(repositoryName, 'default', 'dummy', 'dummy')
407 | if (!tbl) throw new Error('Documents table does not exist')
408 |
409 | // 查询特定documentId的所有段落
410 | const results = await tbl.search([0]).where(`metadata.documentId = '${documentId}'`).toArray()
411 |
412 | // 按chunkIndex排序
413 | return results
414 | .map((result: any) => ({
415 | id: result.id,
416 | text: result.text,
417 | score: result._distance,
418 | metadata: result.metadata
419 | }))
420 | .sort((a: any, b: any) => a.metadata.chunkIndex - b.metadata.chunkIndex)
421 | }
422 |
423 | /**
424 | * 读取TXT文件内容
425 | */
426 | export async function extractTextFromTXT(filePath: string): Promise {
427 | if (!(await fileExists(filePath))) {
428 | throw new Error(`TXT file not found at path: ${filePath}`)
429 | }
430 | const data = await readFile(filePath, 'utf-8')
431 | return data
432 | }
433 |
434 | /**
435 | * 读取DOCX文件内容
436 | */
437 | let mammoth: any
438 |
439 | async function initializeMammoth() {
440 | if (!mammoth) {
441 | const mammothModule = await import('mammoth')
442 | mammoth = mammothModule
443 | }
444 | return mammoth
445 | }
446 |
447 | export async function extractTextFromDOCX(filePath: string): Promise {
448 | if (!(await fileExists(filePath))) {
449 | throw new Error(`DOCX file not found at path: ${filePath}`)
450 | }
451 | const mammoth = await initializeMammoth()
452 | const buffer = await readFile(filePath)
453 | const result = await mammoth.extractRawText({ buffer })
454 | return result.value
455 | }
456 |
--------------------------------------------------------------------------------
/test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Word文档章节标题提取器
8 |
9 |
207 |
208 |
209 |
210 |
211 |
Word文档章节标题提取器
212 |
213 |
214 |
📄
215 |
点击选择或拖拽Word文档到此处
216 |
217 |
218 |
支持格式:.docx
219 |
220 |
221 |
222 |
226 |
227 |
228 | 提示:此工具会自动提取Word文档中的章节标题(H1-H6级别)。
229 | 处理过程中可能会显示一些警告信息,这些是正常的,不会影响标题提取功能。
230 |
231 |
232 |
233 |
提取的章节标题
234 |
235 |
236 |
237 |
238 |
417 |
418 |
419 |
--------------------------------------------------------------------------------
/src/renderer/views/About.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |

7 |
一款基于 Electron、Vue 3 和 TypeScript 构建的智能长文档校对桌面应用程序
8 |
9 |
10 |
11 |
12 |
13 | 📝 项目简介
14 |
15 | AutoDocxProofread(智能校对助手)是一款专为长文档校对而设计的桌面应用程序。它能够帮助用户有效检测 Word
16 | 文档中的错别字、标点符号错误、语法问题和文本一致性问题,并提供修改建议。
17 |
18 |
19 | 针对大模型在处理长文档时存在的遗忘和幻觉问题,软件设计了专门的架构来增强校对的准确性,并能直接导出校对后的文档。并且软件采用了并行处理架构,显著提升大模型处理长文档的速度。新版本引入了本地知识库功能,支持RAG功能给模型校对参考。
20 |
21 |
22 | 核心功能与软件优势
23 |
24 |
25 |
26 | - 逐句精校:适合需要高精度校对的短文本
27 | - 逐段校正:适合长篇文献的校对
28 | - 全文润色:对整篇文档进行语言润色和优化
29 |
30 |
31 |
32 |
33 | - 错别字检测
34 | - 标点符号错误识别
35 | - 语法问题检测
36 |
37 |
38 |
39 |
40 | - 创建和管理多个本地知识库
41 | - 支持PDF、Word和TXT文档导入作为参考材料
42 | - 基于向量数据库的RAG检索增强生成算法
43 |
44 |
45 |
46 |
47 | - 使用并行处理的方式优化处理效率,显著提升对于长文本的校对速度
48 | - 清晰的错误展示和修改建议
49 | - 一键应用修改建议
50 | - 响应式设计,支持窗口缩放
51 |
52 |
53 |
54 |
55 | - 兼容 OpenAI 接口,支持多种大语言模型 API
56 | - 灵活的 API 配置管理
57 |
58 |
59 |
60 |
61 | - 清晰查看历史记录,包括时间、校对模型、校对文件路径和具体的结果
62 | - 支持对结果的批量管理
63 |
64 |
65 |
66 |
67 |
69 |
71 |
73 |
74 |
75 |
76 |
77 |
78 | 🛠 技术栈
79 |
80 |
81 | Electron +
82 | Vue 3 +
83 | TypeScript
84 |
85 |
86 | Element Plus
87 |
88 |
89 | Vite +
90 | Electron Forge
91 |
92 |
93 | Mammoth
94 | +
95 | Docxtemplater
97 |
98 |
99 | LanceDB
100 |
101 |
102 | ESLint +
103 | Prettier
104 |
105 |
106 | Standard Version
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | 🚀 快速开始
116 | 环境要求
117 |
118 | - Node.js >= 16.x
119 | - npm 或 yarn
120 |
121 |
122 | 安装依赖
123 | npm install
124 |
125 | 开发模式运行
126 | npm run start
127 |
128 |
129 |
130 |
131 | 🎯 使用指南
132 |
133 |
134 |
135 |
136 | - 点击导航栏中的"工作区"
137 | - 选择"API 设置"选项卡
138 | - 填写 API 地址、密钥和模型名称
139 | - 点击"测试连接"验证配置
140 | - 点击"保存配置"保存设置
141 | - 配置并发限制、选择是否开启请求频率限制
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 | - 点击导航栏中的"知识库"
150 | - 选择"Embedding模型"(需要选择专门的embedding模型)
151 | - 点击"添加知识库"按钮创建新知识库
152 | - 选择知识库后可添加PDF文件作为参考材料
153 |
154 |
155 |
156 |
157 |
158 |
159 | - 点击导航栏中的"工作区"
160 | - 选择"文档校对"选项卡
161 | - 点击"选择 DOCX 文件"按钮选择要校对的 Word 文档
162 | - (可选)选择知识库以增强校对准确性
163 | - 选择合适的校对模式
164 | - 点击"开始校正"按钮开始校对过程
165 | - 在右侧栏查看校对结果和修改建议
166 | - 点击"应用修改"按钮接受建议的修改
167 | - 点击"导出结果"按钮保存修改后的文档
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 | 🔧 开发计划
178 |
179 | - 大语言模型的格式化输出转 Word 文档
180 | - 增强用户界面交互体验
181 | - 优化 .docx 文件的处理算法
182 |
183 |
184 | 📖 版本情况
185 | 当前版本:v1.1.3
186 | 本软件会持续更新,最新版本可以在本项目页面上下载
187 |
188 | 项目地址:https://github.com/CZ600/AutoDocxProofread
189 |
190 | 📖 致谢
191 | 部分代码使用了night-peiqi的项目:
192 |
194 | https://github.com/night-peiqi/electron-vue3-typescript-template
195 |
196 | 📄 许可证
197 | 本项目采用 MIT 许可证 - 查看 LICENSE 文件了解详情
198 |
199 |
200 |
201 |
202 |
203 |
204 |
267 |
268 |
347 |
--------------------------------------------------------------------------------
/src/main/lancedb.ts:
--------------------------------------------------------------------------------
1 | import { app } from 'electron'
2 | import path from 'path'
3 | import { getEmbedding } from './chat'
4 | import * as arrow from 'apache-arrow'
5 |
6 | let lancedb: typeof import('@lancedb/lancedb') | null = null
7 |
8 | async function getLanceDB() {
9 | if (!lancedb) {
10 | try {
11 | lancedb = await import('@lancedb/lancedb')
12 | } catch (error) {
13 | console.error('Failed to import LanceDB:', error)
14 | throw error
15 | }
16 | }
17 | return lancedb
18 | }
19 |
20 | const DB_PATH = path.join(app.getPath('userData'), 'vector-db')
21 | let db: lancedb.Connection | null = null
22 |
23 | // ========================
24 | // 🛠️ 工具函数
25 | // ========================
26 |
27 | /**
28 | * 安全地将 repositoryName 转为合法表名
29 | */
30 | function sanitizeTableName(name: string): string {
31 | if (!name || typeof name !== 'string') {
32 | throw new Error('Repository name must be a non-empty string')
33 | }
34 | return name.replace(/[^a-zA-Z0-9_]/g, '_').toLowerCase()
35 | }
36 |
37 | /**
38 | * 生成唯一 ID
39 | */
40 | function generateId(): number {
41 | return Date.now() * 1000 + Math.floor(Math.random() * 1000)
42 | }
43 |
44 | /**
45 | * 创建表的 schema
46 | */
47 | function createTableSchema(dimension: number): arrow.Schema {
48 | return new arrow.Schema([
49 | new arrow.Field('id', new arrow.Int32(), false),
50 | new arrow.Field('text', new arrow.Utf8(), true),
51 | new arrow.Field('filename', new arrow.Utf8(), true),
52 | new arrow.Field('vector', new arrow.FixedSizeList(dimension, new arrow.Field('item', new arrow.Float32())), false),
53 | new arrow.Field('metadata', new arrow.Utf8(), true) // 存储 JSON 字符串
54 | ])
55 | }
56 |
57 | // ========================
58 | // 🔌 数据库连接管理
59 | // ========================
60 |
61 | /**
62 | * 初始化数据库连接(幂等)
63 | */
64 | export async function initLanceDB(): Promise {
65 | if (!db) {
66 | try {
67 | const ldb = await getLanceDB()
68 | db = await ldb.connect(DB_PATH)
69 | console.log(`✅ Connected to LanceDB at ${DB_PATH}`)
70 | } catch (error) {
71 | console.error('Failed to connect to LanceDB:', error)
72 | throw new Error(`数据库连接失败: ${error.message}`)
73 | }
74 | }
75 | return db
76 | }
77 |
78 | /**
79 | * 关闭数据库连接
80 | */
81 | export async function closeLanceDB(): Promise {
82 | if (db) {
83 | try {
84 | // LanceDB 可能没有显式的 close 方法,根据实际 API 调整
85 | db = null
86 | console.log('✅ LanceDB connection closed')
87 | } catch (error) {
88 | console.error('Failed to close LanceDB:', error)
89 | }
90 | }
91 | }
92 |
93 | // ========================
94 | // 🗃️ 数据库级操作(跨表)
95 | // ========================
96 |
97 | /**
98 | * 获取所有知识库(表)名称列表
99 | */
100 | export async function listRepositories(): Promise {
101 | try {
102 | await initLanceDB()
103 | const tables = await db!.tableNames()
104 | // 过滤掉 LanceDB 内部表(通常以下划线开头)
105 | return tables.filter((name: string) => !name.startsWith('_'))
106 | } catch (error) {
107 | console.error('Failed to list repositories:', error)
108 | throw new Error(`获取知识库列表失败: ${error.message}`)
109 | }
110 | }
111 |
112 | /**
113 | * 创建一个空的知识库表(不插入数据)
114 | * @param repositoryName 知识库名称
115 | * @param modelName embedding 模型名(用于确定向量维度)
116 | */
117 | export async function createRepository(
118 | repositoryName: string,
119 | modelName: string,
120 | apiKey: string,
121 | apiURL: string
122 | ): Promise {
123 | try {
124 | await initLanceDB()
125 | const tableName = sanitizeTableName(repositoryName)
126 | console.log('Creating repository:', repositoryName, 'with model:', modelName)
127 |
128 | // 检查是否已存在
129 | const existingTables = await db!.tableNames()
130 | if (existingTables.includes(tableName)) {
131 | throw new Error(`Repository "${repositoryName}" already exists`)
132 | }
133 |
134 | // 获取 embedding 维度
135 | const sampleEmbedding = await getEmbedding('Sample text for schema creation', modelName, apiKey, apiURL)
136 | const dimension = sampleEmbedding.length
137 |
138 | // 创建表
139 | const schema = createTableSchema(dimension)
140 | const ldb = await getLanceDB()
141 | await db!.createTable(tableName, [], { schema })
142 |
143 | console.log(`✅ Created repository: ${repositoryName} (dim=${dimension})`)
144 | } catch (error) {
145 | console.error('Failed to create repository:', error)
146 | throw new Error(`创建知识库失败: ${error.message}`)
147 | }
148 | }
149 |
150 | /**
151 | * 删除整个知识库(表)
152 | */
153 | export async function deleteRepository(repositoryName: string): Promise {
154 | try {
155 | await initLanceDB()
156 | const tableName = sanitizeTableName(repositoryName)
157 |
158 | const tables = await db!.tableNames()
159 | if (!tables.includes(tableName)) {
160 | throw new Error(`Repository "${repositoryName}" does not exist`)
161 | }
162 |
163 | await db!.dropTable(tableName)
164 | console.log(`🗑️ Deleted repository: ${repositoryName}`)
165 | } catch (error) {
166 | console.error('Failed to delete repository:', error)
167 | throw new Error(`删除知识库失败: ${error.message}`)
168 | }
169 | }
170 |
171 | // ========================
172 | // 📄 表内文档操作(单表)
173 | // ========================
174 |
175 | /**
176 | * 获取或创建指定知识库的表(内部使用)
177 | */
178 | export async function getOrCreateTable(
179 | repositoryName: string,
180 | modelName: string,
181 | apiKey: string,
182 | apiURL: string
183 | ): Promise {
184 | await initLanceDB()
185 | const tableName = sanitizeTableName(repositoryName)
186 |
187 | // 先尝试打开
188 | try {
189 | return await db!.openTable(tableName)
190 | } catch (openError) {
191 | // 表不存在,尝试创建
192 | try {
193 | const sampleEmbedding = await getEmbedding('Sample text for dimension detection', modelName, apiKey, apiURL)
194 | const dimension = sampleEmbedding.length
195 | const schema = createTableSchema(dimension)
196 | const ldb = await getLanceDB()
197 | return await db!.createTable(tableName, [], { schema })
198 | } catch (createError: any) {
199 | // 如果是因为表已存在而失败,再次尝试打开
200 | if (createError.message?.includes('already exists') || createError.message?.includes('Table already exists')) {
201 | return await db!.openTable(tableName)
202 | }
203 | console.error('Failed to create table:', createError)
204 | throw new Error(`创建表失败: ${createError.message}`)
205 | }
206 | }
207 | }
208 |
209 | /**
210 | * 插入文档(自动生成 ID)
211 | */
212 | export async function insertDocument(
213 | repositoryName: string,
214 | text: string,
215 | filename: string,
216 | metadata: Record = {},
217 | modelName: string,
218 | apiKey: string,
219 | apiURL: string
220 | ): Promise<{ id: number; text: string; filename: string; metadata: Record }> {
221 | if (!filename) {
222 | throw new Error('filename is required')
223 | }
224 | if (!text || text.trim().length === 0) {
225 | throw new Error('text cannot be empty')
226 | }
227 |
228 | try {
229 | const table = await getOrCreateTable(repositoryName, modelName, apiKey, apiURL)
230 | const embedding = await getEmbedding(text, modelName, apiKey, apiURL)
231 | const id = generateId()
232 |
233 | await table.add([
234 | {
235 | id,
236 | text,
237 | filename,
238 | vector: embedding,
239 | metadata: JSON.stringify(metadata)
240 | }
241 | ])
242 |
243 | console.log(`📥 Inserted doc into ${repositoryName} (file: ${filename}, id: ${id})`)
244 | return { id, text, filename, metadata }
245 | } catch (error) {
246 | console.error('Failed to insert document:', error)
247 | throw new Error(`插入文档失败: ${error.message}`)
248 | }
249 | }
250 |
251 | /**
252 | * 查询相似文档(支持按 filename 过滤)
253 | * @param filter SQL WHERE 子句条件(不包括 filename),例如: "id > 100"
254 | */
255 | export async function queryDocuments(
256 | repositoryName: string,
257 | queryText: string,
258 | modelName: string,
259 | apiKey: string,
260 | apiURL: string,
261 | limit: number = 5,
262 | filter: string = '',
263 | filename?: string
264 | ): Promise> {
265 | try {
266 | const table = await getOrCreateTable(repositoryName, modelName, apiKey, apiURL)
267 | const embedding = await getEmbedding(queryText, modelName, apiKey, apiURL)
268 | console.log('qureyText:', queryText)
269 |
270 | let whereClause = filter
271 | if (filename) {
272 | // 转义单引号以防止 SQL 注入
273 | const pureFilename = path.basename(filename)
274 | const escapedFilename = pureFilename.replace(/'/g, "''")
275 | whereClause = whereClause
276 | ? `filename = '${escapedFilename}' AND (${whereClause})`
277 | : `filename = '${escapedFilename}'`
278 | }
279 |
280 | let searchQuery = table.search(embedding).limit(limit)
281 | if (whereClause) {
282 | searchQuery = searchQuery.where(whereClause)
283 | }
284 |
285 | const results = await searchQuery.toArray()
286 | const resultMap = results.map((r: any) => ({
287 | id: r.id,
288 | text: r.text,
289 | filename: r.filename,
290 | score: r._distance,
291 | meta: r.metadata ? JSON.parse(r.metadata) : {}
292 | }))
293 | resultMap.forEach((element: any) => {
294 | console.log('the query result text:', element.text)
295 | console.log('the query result score:', element.score)
296 | })
297 | console.log(`🔍 RAG查询详情:
298 | 仓库: ${repositoryName}
299 | 查询文本: ${queryText.substring(0, 50)}...
300 | 实际WHERE条件: ${whereClause}
301 | 返回结果数: ${results.length}
302 | 首条结果分数: ${results[0]?._distance}`)
303 | return resultMap
304 | } catch (error) {
305 | console.error('Failed to query documents:', error)
306 | throw new Error(`查询文档失败: ${error.message}`)
307 | }
308 | }
309 |
310 | /**
311 | * 查询指定文件的所有文档(非向量搜索,全量返回)
312 | */
313 | export async function getDocumentsByFilename(
314 | repositoryName: string,
315 | filename: string
316 | ): Promise> {
317 | if (!filename) {
318 | throw new Error('filename is required')
319 | }
320 |
321 | try {
322 | await initLanceDB()
323 | const tableName = sanitizeTableName(repositoryName)
324 | const table = await db!.openTable(tableName)
325 |
326 | const escapedFilename = filename.replace(/'/g, "''")
327 |
328 | // 使用 query().where() 替代 filter()
329 | const results = await table.query().where(`filename = '${escapedFilename}'`).toArray()
330 |
331 | return results.map((r: any) => ({
332 | id: r.id,
333 | text: r.text,
334 | filename: r.filename,
335 | meta: r.metadata ? JSON.parse(r.metadata) : {}
336 | }))
337 | } catch (error) {
338 | console.error('Failed to get documents by filename:', error)
339 | throw new Error(`获取文件文档失败: ${error.message}`)
340 | }
341 | }
342 |
343 | /**
344 | * 删除指定文件的所有文档(弃用)
345 | */
346 | export async function deleteDocumentsByFilename(repositoryName: string, filename: string): Promise {
347 | if (!filename) {
348 | throw new Error('filename is required')
349 | }
350 |
351 | try {
352 | await initLanceDB()
353 | const tableName = sanitizeTableName(repositoryName)
354 | const table = await db!.openTable(tableName)
355 |
356 | const escapedFilename = filename.replace(/'/g, "''")
357 | await table.delete(`filename = '${escapedFilename}'`)
358 |
359 | console.log(`🗑️ Deleted all docs with filename: ${filename} in ${repositoryName}`)
360 |
361 | // LanceDB 的 delete 方法可能不返回删除数量,这里返回 1 表示操作成功
362 | return 1
363 | } catch (error) {
364 | console.error('Failed to delete documents by filename:', error)
365 | throw new Error(`删除文件文档失败: ${error.message}`)
366 | }
367 | }
368 |
369 | /**
370 | * 更新文档(保留 filename 不变)
371 | */
372 | export async function updateDocument(
373 | repositoryName: string,
374 | id: number,
375 | newText: string,
376 | newMeta: Record = {},
377 | modelName: string,
378 | apiKey: string,
379 | apiURL: string
380 | ): Promise<{ id: number; text: string; meta: Record }> {
381 | try {
382 | const table = await getOrCreateTable(repositoryName, modelName, apiKey, apiURL)
383 | const embedding = await getEmbedding(newText, modelName, apiKey, apiURL)
384 |
385 | // 先查询获取原始 filename
386 | const existingDocs = await table.query().where(`id = ${id}`).limit(1).toArray()
387 |
388 | if (existingDocs.length === 0) {
389 | throw new Error(`Document with id ${id} not found`)
390 | }
391 |
392 | const existingDoc = existingDocs[0]
393 |
394 | // 使用 LanceDB 的 update 方法
395 | await table.update({
396 | where: `id = ${id}`,
397 | values: {
398 | text: newText,
399 | vector: embedding,
400 | metadata: JSON.stringify(newMeta)
401 | }
402 | })
403 |
404 | console.log(`✏️ Updated doc ${id} in ${repositoryName}`)
405 | return { id, text: newText, meta: newMeta }
406 | } catch (error) {
407 | console.error('Failed to update document:', error)
408 | throw new Error(`更新文档失败: ${error.message}`)
409 | }
410 | }
411 |
412 | /**
413 | * 删除单个文档(按 ID)
414 | */
415 | export async function deleteDocument(repositoryName: string, id: number): Promise<{ id: number }> {
416 | try {
417 | await initLanceDB()
418 | const tableName = sanitizeTableName(repositoryName)
419 | const table = await db!.openTable(tableName)
420 |
421 | await table.delete(`id = ${id}`)
422 | console.log(`🗑️ Deleted doc ${id} from ${repositoryName}`)
423 |
424 | return { id }
425 | } catch (error) {
426 | console.error('Failed to delete document:', error)
427 | throw new Error(`删除文档失败: ${error.message}`)
428 | }
429 | }
430 |
431 | /**
432 | * 删除指定文件名的所有文档(修复后的版本)
433 | */
434 | export async function deleteDocumentByName(repositoryName: string, filename: string): Promise<{ filename: string }> {
435 | if (!filename) {
436 | throw new Error('filename is required')
437 | }
438 |
439 | try {
440 | await initLanceDB()
441 | const tableName = sanitizeTableName(repositoryName)
442 | const table = await db!.openTable(tableName)
443 |
444 | const escapedFilename = filename.replace(/'/g, "''")
445 | await table.delete(`filename = '${escapedFilename}'`)
446 |
447 | console.log(`🗑️ Deleted docs with filename: ${filename} from ${repositoryName}`)
448 |
449 | return { filename }
450 | } catch (error) {
451 | console.error('Failed to delete document by name:', error)
452 | throw new Error(`删除文档失败: ${error.message}`)
453 | }
454 | }
455 |
456 | /**
457 | * 获取指定知识库中所有不重复的文件名列表
458 | * @param repositoryName 知识库名称
459 | * @returns 去重后的文件名数组
460 | */
461 | export async function listFilenamesInRepository(repositoryName: string): Promise {
462 | try {
463 | await initLanceDB()
464 | const tableName = sanitizeTableName(repositoryName)
465 |
466 | // 检查表是否存在
467 | const tables = await db!.tableNames()
468 | if (!tables.includes(tableName)) {
469 | throw new Error(`Repository "${repositoryName}" does not exist`)
470 | }
471 |
472 | const table = await db!.openTable(tableName)
473 |
474 | // 查询所有文档的filename字段
475 | const results = await table.query().select('filename').toArray()
476 |
477 | // 提取并去重文件名
478 | const filenames = [...new Set(results.map((r: any) => r.filename).filter(Boolean))]
479 |
480 | console.log(`📁 Found ${filenames.length} unique filenames in ${repositoryName}`)
481 | return filenames
482 | } catch (error) {
483 | console.error('Failed to list filenames:', error)
484 | throw new Error(`获取文件名列表失败: ${error.message}`)
485 | }
486 | }
487 |
488 | /**
489 | * 查询指定表中的所有对象
490 | * @param repositoryName 知识库名称
491 | * @returns 表中所有文档的数组
492 | */
493 | export async function getAllDocuments(
494 | repositoryName: string
495 | ): Promise> {
496 | try {
497 | await initLanceDB()
498 | const tableName = sanitizeTableName(repositoryName)
499 |
500 | // 检查表是否存在
501 | const tables = await db!.tableNames()
502 | if (!tables.includes(tableName)) {
503 | throw new Error(`Repository "${repositoryName}" does not exist`)
504 | }
505 |
506 | const table = await db!.openTable(tableName)
507 |
508 | // 查询所有文档
509 | const results = await table.query().toArray()
510 |
511 | return results.map((r: any) => ({
512 | id: r.id,
513 | text: r.text,
514 | filename: r.filename,
515 | meta: r.metadata ? JSON.parse(r.metadata) : {}
516 | }))
517 | } catch (error) {
518 | console.error('Failed to get all documents:', error)
519 | throw new Error(`获取所有文档失败: ${error.message}`)
520 | }
521 | }
522 |
--------------------------------------------------------------------------------
/src/main/ipcHandlers.ts:
--------------------------------------------------------------------------------
1 | import { ipcMain } from 'electron'
2 | import { dialog } from 'electron'
3 | import { DB } from './database'
4 | import { testAPI } from './chat'
5 | import { proofreadDocument, getDefaultPrompt, setNewPrompt } from './proof'
6 | import { deleteDocumentByName, listFilenamesInRepository } from './lancedb'
7 | import { Mode } from '@google/genai'
8 | import * as mammoth from 'mammoth'
9 | import { replaceTextInDocx } from './wordProcess'
10 | import {
11 | deleteRepository,
12 | initLanceDB,
13 | insertDocument,
14 | queryDocuments,
15 | updateDocument,
16 | deleteDocument,
17 | listRepositories,
18 | createRepository
19 | } from './lancedb'
20 | import { processDocument, getPDFDocumentChunks } from './pdfUtils'
21 | import { list } from 'changelog.config'
22 | import { error } from 'console'
23 | import { eventNames, env } from 'process'
24 | // const { platform, arch, env } = process;
25 | export interface apiSettings {
26 | apiURL: string
27 | apiKey: string
28 | modelName: string
29 | parallel?: number
30 | TimeLimit?: number | null
31 | }
32 |
33 | let api_info: apiSettings = {
34 | apiURL: '',
35 | apiKey: '',
36 | modelName: '',
37 | parallel: 30,
38 | TimeLimit: null
39 | }
40 |
41 | // 全局embedding_api变量已移除,由Pinia store管理
42 | export const registerIpcHandlers = () => {
43 | // 单向通信:接收渲染进程的消息
44 | // 监听消息,通道是message
45 | ipcMain.on('message', (event, message: string) => {
46 | console.log('Received message', message)
47 | })
48 |
49 | // 双向通信:接收渲染进程的消息,并返回结果
50 | ipcMain.handle('receiveAndReturn', (event, message: string) => {
51 | console.log('receiveAndReturn', message)
52 |
53 | // 想返回什么都可以
54 | const ret = {
55 | rawData: message,
56 | newData: `neight-peiqi${message}`
57 | }
58 | return ret
59 | })
60 | const path = require('path')
61 | const fs = require('fs')
62 | // 处理文件选择请求
63 | ipcMain.handle('select-docx-file', async () => {
64 | try {
65 | const result = await dialog.showOpenDialog({
66 | title: '选择 DOCX 文件',
67 | filters: [{ name: 'Word 文档', extensions: ['docx'] }],
68 | properties: ['openFile']
69 | })
70 |
71 | if (result.canceled || result.filePaths.length === 0) {
72 | return null
73 | }
74 |
75 | // 返回文件路径
76 | return result.filePaths[0]
77 | } catch (error) {
78 | console.error('文件选择错误:', error)
79 | throw error
80 | }
81 | })
82 |
83 | // 处理文件读取请求(可选,如果需要主进程读取文件内容)
84 | ipcMain.handle('read-docx-file', async (event, filePath) => {
85 | try {
86 | const data = await fs.promises.readFile(filePath)
87 | return {
88 | path: filePath,
89 | content: data.toString('base64')
90 | }
91 | } catch (error) {
92 | console.error('cannot read file:', error)
93 | throw error
94 | }
95 | })
96 |
97 | ipcMain.handle('set-api', async (event, URL, Key, modelName) => {
98 | try {
99 | console.log('add a new api setting:', URL, Key, modelName)
100 | api_info.apiKey = Key
101 | api_info.apiURL = URL
102 | api_info.modelName = modelName
103 | const result = await DB.insertAPISetting(URL, Key, modelName)
104 | console.log('the result of the new api setting adding:', result)
105 | if (result) {
106 | return 'success'
107 | } else {
108 | return 'error'
109 | }
110 | } catch (error) {
111 | return 'error'
112 | }
113 | })
114 | // 获取所有api设置
115 | ipcMain.handle('get-all-api-settings', async event => {
116 | return await DB.getAllAPISettings()
117 | })
118 |
119 | ipcMain.handle('delete-one-api-setting', async (event, id) => {
120 | const result = await DB.deleteAPISettingById(id)
121 | if (result) {
122 | return {
123 | isSuccess: true
124 | }
125 | } else {
126 | return {
127 | isSuccess: false
128 | }
129 | }
130 | })
131 |
132 | ipcMain.handle('test-api', async (event, URL, Key, modelName) => {
133 | if (!URL || !Key || !modelName) {
134 | console.log('Please input all the parameters!')
135 | return false
136 | } else {
137 | console.log('Testing API:', URL, Key, modelName)
138 | }
139 | const result = await testAPI(URL, Key, modelName)
140 | return result
141 | })
142 |
143 | ipcMain.handle('selectAPISetting', async (event, URL, Key, modelName, parallel = 30, TimeLimit = null) => {
144 | api_info.apiKey = Key
145 | api_info.apiURL = URL
146 | api_info.modelName = modelName
147 | api_info.parallel = parallel
148 | api_info.TimeLimit = TimeLimit
149 | console.log('Selected API:', URL, Key, modelName, parallel, TimeLimit)
150 | return true
151 | })
152 |
153 | ipcMain.handle('get-api-settings', async event => {
154 | return {
155 | URL: api_info.apiURL,
156 | Key: api_info.apiKey,
157 | modelName: api_info.modelName,
158 | parallel: api_info.parallel || 30,
159 | TimeLimit: api_info.TimeLimit
160 | }
161 | })
162 |
163 | // 处理文档校对请求
164 | // 更新了对于rag功能的支持,实现了并行操作,提升性能
165 | ipcMain.handle(
166 | 'process-docx',
167 | async (
168 | event,
169 | Model,
170 | filePath,
171 | repositoryNameList?: string[],
172 | embeddingConfig?: apiSettings,
173 | setTimeLimit?: number,
174 | parallelSet: number = 30
175 | ) => {
176 | try {
177 | // 三种校对模式:mode: 'section' | 'sentence' | 'full',
178 | console.log(
179 | '-----------------------------------------------processing docx file-------------------------------------------------------'
180 | )
181 | console.info('Processing settings:', Model, filePath)
182 | console.info('embedding settings:', repositoryNameList, embeddingConfig)
183 | console.info('the parallel set is:', parallelSet)
184 | console.info('the time limit of process is:', setTimeLimit)
185 |
186 | if (!Model || !filePath) {
187 | return {
188 | isSuccess: false,
189 | message: 'Please select a model and a file!'
190 | }
191 | }
192 |
193 | if (!api_info.apiKey || !api_info.apiURL || !api_info.modelName) {
194 | return {
195 | isSuccess: false,
196 | message: 'Please select an API setting!'
197 | }
198 | }
199 | if (Model === 'wordError') {
200 | console.log('will process by the model:', api_info.apiKey, api_info.apiURL, api_info.modelName)
201 | const { proofResult, token_usage } = await proofreadDocument(
202 | filePath,
203 | 'sentence',
204 | api_info.apiKey,
205 | api_info.modelName,
206 | api_info.apiURL,
207 | repositoryNameList,
208 | embeddingConfig,
209 | parallelSet,
210 | setTimeLimit
211 | )
212 | // 确保返回的数据是可克隆的
213 | try {
214 | const result = {
215 | proofResult: JSON.parse(JSON.stringify(proofResult)),
216 | token_usage: token_usage
217 | }
218 | return result
219 | } catch (error) {
220 | console.error('序列化校对结果时出错:', error)
221 | return {
222 | proofResult: null,
223 | token_usage: token_usage
224 | }
225 | }
226 | } else if (Model === 'ComprehensiveError') {
227 | console.log('will process by the model:', api_info.apiKey, api_info.apiURL, api_info.modelName)
228 | const { proofResult, token_usage } = await proofreadDocument(
229 | filePath,
230 | 'section',
231 | api_info.apiKey,
232 | api_info.modelName,
233 | api_info.apiURL,
234 | repositoryNameList,
235 | embeddingConfig,
236 | parallelSet,
237 | setTimeLimit
238 | )
239 | // 确保返回的数据是可克隆的
240 | try {
241 | const result = {
242 | proofResult: JSON.parse(JSON.stringify(proofResult)),
243 | token_usage: token_usage
244 | }
245 | return result
246 | } catch (error) {
247 | console.error('序列化校对结果时出错:', error)
248 | return {
249 | proofResult: null,
250 | token_usage: token_usage
251 | }
252 | }
253 | } else if (Model === 'polish') {
254 | console.log('will process by the model:', api_info.apiKey, api_info.apiURL, api_info.modelName)
255 | const { proofResult, token_usage } = await proofreadDocument(
256 | filePath,
257 | 'full',
258 | api_info.apiKey,
259 | api_info.modelName,
260 | api_info.apiURL,
261 | repositoryNameList,
262 | embeddingConfig,
263 | parallelSet,
264 | setTimeLimit
265 | )
266 | // 确保返回的数据是可克隆的
267 | try {
268 | const result = {
269 | proofResult: JSON.parse(JSON.stringify(proofResult)),
270 | token_usage: token_usage
271 | }
272 | return result
273 | } catch (error) {
274 | console.error('序列化校对结果时出错:', error)
275 | return {
276 | proofResult: null,
277 | token_usage: token_usage
278 | }
279 | }
280 | }
281 | } catch (error) {
282 | console.error('处理文档校对请求时出错:', error)
283 | return {
284 | proofResult: null,
285 | token_usage: 0
286 | }
287 | }
288 | }
289 | )
290 |
291 | // 新增的返回值形式
292 | interface ResponseData {
293 | success: boolean
294 | message: string
295 | data?: T
296 | }
297 |
298 | interface Correction {
299 | original: string
300 | suggested: string
301 | }
302 |
303 | // 导出修正后的DOCX文件
304 | ipcMain.handle('exportCorrectedDocx', async (event, config) => {
305 | try {
306 | // 确保传递的数据是可克隆的
307 | const serializableConfig = JSON.parse(JSON.stringify(config))
308 |
309 | const filePath = serializableConfig.originalFilePath
310 | const newPath = filePath.replace(/(\.\w+)$/, '_new$1') // 正则捕获“最后一个点+扩展名”
311 | const correctedText = serializableConfig.appliedCorrections.map((correction: Correction) => ({
312 | origin: correction.original,
313 | suggested: correction.suggested
314 | }))
315 |
316 | await replaceTextInDocx(filePath, newPath, correctedText)
317 | return true
318 | } catch (error) {
319 | console.error('output error:', error)
320 | throw error
321 | }
322 | })
323 | // 获取默认提示词
324 | ipcMain.handle('getDefaultPrompt', async event => {
325 | const prompt = await getDefaultPrompt()
326 | return prompt
327 | })
328 | // 设置提示词(注意,这里设置的提示词没有长期记忆功能,只能暂时设置
329 | ipcMain.handle('setPrompt', async (event, newPrompt) => {
330 | if (newPrompt) {
331 | const result = await setNewPrompt(newPrompt)
332 | if (result) {
333 | return true
334 | } else {
335 | return false
336 | }
337 | } else {
338 | throw new Error('Please input a prompt!')
339 | }
340 | })
341 | // 历史记录 - 获取全部的历史记录
342 | ipcMain.handle('getAllHistory', async event => {
343 | const result = await DB.getALLHistory()
344 | if (result) {
345 | return result
346 | } else {
347 | throw new Error('No history found!')
348 | }
349 | })
350 | // 历史记录 - 删除全部的历史记录
351 | ipcMain.handle('deleteAllHistory', async event => {
352 | const result = await DB.deleteALLHistory()
353 | if (result) {
354 | return true
355 | } else {
356 | throw new Error('delete history failed!')
357 | }
358 | })
359 | // 历史记录 - 根据id查询记录
360 | ipcMain.handle('getHistoryById', async (event, id) => {
361 | if (id) {
362 | const result = await DB.getHistoryById(id)
363 | if (result) {
364 | return result
365 | } else {
366 | throw new Error('No history found by id: ${id}')
367 | }
368 | }
369 | })
370 |
371 | ipcMain.handle('deleteHistoryById', async (event, id) => {
372 | try {
373 | const result = await DB.deleteHistoryById(id)
374 | return result
375 | } catch (error) {
376 | console.error('删除历史记录失败:', error)
377 | return false
378 | }
379 | })
380 | // 历史记录- 插入一条数据
381 | ipcMain.handle(
382 | 'insertOneHistory',
383 | async (event, filePath: string, apiURL: string, modelName: string, resultCorrect: string) => {
384 | try {
385 | // 参数验证
386 | if (!filePath || !apiURL || !modelName || !resultCorrect) {
387 | const errorMsg =
388 | '参数不完整: ' + JSON.stringify({ filePath, apiURL, modelName, resultCorrect: !!resultCorrect })
389 | console.error(errorMsg)
390 | return { success: false, error: errorMsg }
391 | }
392 |
393 | // 尝试解析JSON以验证数据有效性
394 | try {
395 | JSON.parse(resultCorrect)
396 | } catch (parseError) {
397 | const errorMsg = 'resultCorrect不是有效的JSON字符串: ' + parseError.message
398 | console.error(errorMsg)
399 | return { success: false, error: errorMsg }
400 | }
401 |
402 | const result = await DB.insertOneHistory(filePath, apiURL, modelName, resultCorrect)
403 | return { success: true, id: result }
404 | } catch (error) {
405 | console.error('插入历史记录失败:', error)
406 | return { success: false, error: error.message }
407 | }
408 | }
409 | )
410 | //----------------------------------------The implementation of this RAG----------------------------------------
411 | // 向量数据库 - 插入文档
412 | ipcMain.handle('lancedb:insert', async (event, { repositoryName, fileName, text, id, metadata }, modelConfig) => {
413 | return insertDocument(
414 | repositoryName,
415 | text,
416 | fileName,
417 | metadata,
418 | modelConfig.modelName,
419 | modelConfig.apiKey,
420 | modelConfig.apiURL
421 | )
422 | })
423 |
424 | // 向量数据库 - 查询文档
425 | ipcMain.handle('lancedb:query', async (event, { queryText, limit, filter, fileName }, modelConfig) => {
426 | return queryDocuments(
427 | queryText,
428 | modelConfig.modelName,
429 | modelConfig.apiKey,
430 | modelConfig.apiURL,
431 | limit,
432 | filter,
433 | fileName
434 | )
435 | })
436 |
437 | // 向量数据库 - 更新文档
438 | ipcMain.handle('lancedb:update', async (event, { repositoryName, id, text, metadata }, modelConfig) => {
439 | return updateDocument(
440 | repositoryName,
441 | id,
442 | text,
443 | metadata,
444 | modelConfig.modelName,
445 | modelConfig.apiKey,
446 | modelConfig.apiURL
447 | )
448 | })
449 |
450 | // 向量数据库 - 删除文档
451 | ipcMain.handle('lancedb:delete', async (event, { repositoryName, id }) => {
452 | return deleteDocument(repositoryName, id)
453 | })
454 | // get all the tables(lancedb)
455 | // 向量数据库 - 查询所有的表
456 | ipcMain.handle('listRepositories', async event => {
457 | const result = await listRepositories()
458 | return result
459 | })
460 | // 向量数据库 - 创建一个空的知识表
461 | ipcMain.handle('createRepository', async (event, { repositoryName, modelName, apiKey, apiURL }) => {
462 | try {
463 | await createRepository(repositoryName, modelName, apiKey, apiURL)
464 | return true
465 | } catch (error) {
466 | console.log('error when create a empty repository:', error)
467 | throw error
468 | }
469 | })
470 | // 向量数据库 - 删除整个表(单个知识库)
471 | ipcMain.handle('deleteRepository', (event, repositoryName: string) => {
472 | try {
473 | deleteRepository(repositoryName)
474 | return true
475 | } catch (error) {
476 | console.log('failed to delete ${repositoryName} because:', error)
477 | throw error
478 | }
479 | })
480 | // 向量数据库 -
481 |
482 | // IPC处理器 - 处理PDF文件(弃用)
483 | ipcMain.handle('pdf:process', async (event, { repositoryName, filePath }, modelConfig) => {
484 | try {
485 | return await processDocument(
486 | repositoryName,
487 | filePath,
488 | undefined, // 自动生成documentId
489 | 500, // 默认chunk大小
490 | 50, // 默认重叠大小
491 | modelConfig.modelName,
492 | modelConfig.apiKey,
493 | modelConfig.apiURL
494 | )
495 | } catch (error) {
496 | console.error('Failed to process PDF:', error)
497 | throw error
498 | }
499 | })
500 |
501 | // IPC处理器 - 选择并处理文档文件
502 | ipcMain.handle('pdf:select-and-process', async (event, repositoryName, modelConfig) => {
503 | const { filePaths } = await dialog.showOpenDialog({
504 | properties: ['openFile'],
505 | filters: [{ name: 'Document Files', extensions: ['pdf', 'docx', 'txt'] }]
506 | })
507 |
508 | if (!filePaths || filePaths.length === 0) {
509 | console.log('User selected nothing!')
510 | return false
511 | }
512 |
513 | try {
514 | return await processDocument(
515 | repositoryName,
516 | filePaths[0],
517 | '', // 自动生成documentId
518 | 500,
519 | 50,
520 | modelConfig.modelName,
521 | modelConfig.apiKey,
522 | modelConfig.apiURL
523 | )
524 | } catch (error) {
525 | console.error('Document processing failed:', error)
526 | throw error
527 | }
528 | })
529 |
530 | // IPC处理器 - 获取PDF文档的所有段落
531 | ipcMain.handle('pdf:get-chunks', async (event, { documentId, repositoryName }) => {
532 | return getPDFDocumentChunks(repositoryName, documentId)
533 | })
534 | // 根据指定的文件名称,删除该名称下的所有文档块
535 | ipcMain.handle('deleteDocumentByName', async (event, repositoryName, filename) => {
536 | const deleteFileName = await deleteDocumentByName(repositoryName, filename)
537 | if (deleteFileName) {
538 | return deleteFileName
539 | } else {
540 | throw error('delete the file ${repositoryName} error')
541 | }
542 | })
543 | // 获取不重复的文件列表
544 | ipcMain.handle('listFilenamesInRepository', async (event, repositoryName) => {
545 | const fileList = await listFilenamesInRepository(repositoryName)
546 | return fileList
547 | })
548 | // 设置embedding模型 - 通过其他机制由前端Pinia store管理,不再需要此IPC处理
549 | // ipcMain.handle('setEmbeddingAPI', ...) 已移除
550 |
551 | // 获取embedding模型信息 - 通过其他机制由前端Pinia store管理,不再需要此IPC处理
552 | // ipcMain.handle('getEmbeddingAPI', ...) 已移除
553 | // 调试用接口
554 | ipcMain.handle('getEnvPath', async event => {
555 | console.log(' env.LANCEDB_NATIVE_PATH:', env.LANCEDB_NATIVE_PATH)
556 | return env.LANCEDB_NATIVE_PATH
557 | })
558 | }
559 |
--------------------------------------------------------------------------------
/src/renderer/views/Dictionary.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
43 |
44 |
45 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
欢迎使用知识库管理系统
72 |
请选择或创建一个知识库开始使用
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
90 |
91 |
92 |
93 |
94 | {{ row }}
95 |
96 |
97 |
98 | 删除
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
124 |
125 |
126 |
127 |
128 |
431 |
432 |
646 |
--------------------------------------------------------------------------------
/src/main/proof.ts:
--------------------------------------------------------------------------------
1 | import * as fs from 'fs'
2 | import * as mammoth from 'mammoth'
3 | import { OpenaiGen } from './chat'
4 | import path from 'path'
5 | import { queryDocuments, getAllDocuments } from './lancedb'
6 | import { error } from 'console'
7 |
8 | // ====== 类型定义 ======
9 | interface ProofreadingCorrection {
10 | original: string
11 | suggested: string
12 | reason: string
13 | type: 'Typo' | 'Punctuation' | 'Grammar' | 'Consistency' | string
14 | References?: string[]
15 | }
16 |
17 | interface DocumentSection {
18 | title: string
19 | content: string
20 | level: number
21 | }
22 |
23 | interface DocumentStructure {
24 | title: string
25 | sections: DocumentSection[]
26 | }
27 |
28 | interface RAGQueryResult {
29 | id: number
30 | text: string
31 | filename: string
32 | score: number
33 | meta: any
34 | }
35 |
36 | interface ApiSettings {
37 | apiKey: string
38 | apiURL: string
39 | modelName: string
40 | }
41 |
42 | // ====== 全局 Prompt ======
43 | let defaultPrompt = `
44 | 你是一个专业的中文文本校对专家。请仔细检查文本中的错别字、标点错误和语法问题。
45 | 要求:
46 | 1. 只校对错别字、标点错误、语法错误
47 | 2. 保持原文意思不变
48 | 3. 不要进行风格改写或内容扩展
49 | 4. 按照指定的JSON格式返回结果
50 | 请校对用户提供的文本,找出其中的错别字、标点错误和语法问题,并按照以下JSON格式返回:
51 | [
52 | {
53 | "original": "原文错误内容(只截取原文错误的词组,不要多写,不超过15字!)",
54 | "suggested": "建议修改内容(基于原文的修改后的内容)",
55 | "reason": "错误原因的简短说明",
56 | "type": "错误类型(Typo/Punctuation/Grammar/Consistency)"
57 | }
58 | ]
59 | 如果没有任何错误,请返回空数组[]。只返回JSON数组,不要添加其他说明文字。
60 | `
61 |
62 | const realDefaultPrompt = `你是一个专业的中文文本校对专家。请仔细检查文本中的错别字、标点错误和语法问题。
63 | 要求:
64 | 1. 只校对错别字、标点错误、语法错误
65 | 2. 保持原文意思不变
66 | 3. 不要进行风格改写或内容扩展
67 | 4. 按照指定的JSON格式返回结果
68 | 请校对用户提供的文本,找出其中的错别字、标点错误和语法问题,并按照以下JSON格式返回:
69 | [
70 | {
71 | "original": "原文错误内容(只截取原文错误的词组,不要多写,不超过15字!)",
72 | "suggested": "建议修改内容(基于原文的修改后的内容)",
73 | "reason": "错误原因的简短说明",
74 | "type": "错误类型(Typo/Punctuation/Grammar/Consistency)"
75 | }
76 | ]
77 | 如果没有任何错误,请返回空数组[]。只返回JSON数组,不要添加其他说明文字。`
78 |
79 | const ragText =
80 | '以下内容是校对的参考内容,请结合这些文字进行校对工作(如果是双语内容,则以校对内容的语言类型为准),校对规则遵循之前讲述的要求'
81 |
82 | // ====== 并发控制工具函数 ======
83 | /**
84 | * 一个简单的延时函数
85 | * @param ms 延时的毫秒数
86 | */
87 | const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms))
88 |
89 | /**
90 | * 带有并发数和速率限制的异步任务控制器
91 | *
92 | * @param items 要处理的元素数组
93 | * @param maxConcurrency 最大并发数
94 | * @param processor 处理单个元素的异步函数
95 | * @param options 可选配置项
96 | * @param options.requestsPerMinute 每分钟最大请求数,用于速率限制
97 | * @returns 返回一个包含所有成功处理结果的 Promise
98 | */
99 | export async function runWithLimits(
100 | items: T[],
101 | maxConcurrency: number,
102 | processor: (item: T) => Promise,
103 | options?: {
104 | requestsPerMinute?: number
105 | }
106 | ): Promise {
107 | const results: (R | undefined)[] = new Array(items.length)
108 | const executing: Promise[] = []
109 |
110 | // --- 新增逻辑: 速率限制初始化 ---
111 | const { requestsPerMinute } = options || {}
112 | const hasRateLimit = typeof requestsPerMinute === 'number' && requestsPerMinute > 0
113 |
114 | // 计算两次请求之间的最小时间间隔(毫秒)
115 | const minInterval = hasRateLimit ? (60 * 1000) / requestsPerMinute! : 0
116 | let lastRequestTime = 0 // 记录上一个任务开始的时间戳
117 | // --- 新增逻辑结束 ---
118 |
119 | for (let i = 0; i < items.length; i++) {
120 | // --- 核心逻辑整合 ---
121 | // 1. 首先,等待并发池出现空位(如果已满)
122 | if (executing.length >= maxConcurrency) {
123 | await Promise.race(executing)
124 | }
125 |
126 | // 2. 其次,等待满足速率限制的时间间隔
127 | if (hasRateLimit) {
128 | const now = Date.now()
129 | const elapsedTime = now - lastRequestTime
130 | if (elapsedTime < minInterval) {
131 | const delayTime = minInterval - elapsedTime
132 | await delay(delayTime)
133 | }
134 | // 更新"上一次请求时间"为当前(补足延迟后)的时间
135 | lastRequestTime = Date.now()
136 | }
137 | // --- 整合结束 ---
138 |
139 | const execute = async () => {
140 | try {
141 | results[i] = await processor(items[i])
142 | } catch (error) {
143 | console.error(`并发任务 ${i} 失败:`, error)
144 | results[i] = undefined
145 | }
146 | }
147 |
148 | const promise = execute().then(() => {
149 | const index = executing.indexOf(promise)
150 | if (index !== -1) executing.splice(index, 1)
151 | })
152 | executing.push(promise)
153 | }
154 |
155 | await Promise.all(executing)
156 | return results.filter((r): r is R => r !== undefined)
157 | }
158 |
159 | // ====== 导出 Prompt 管理 ======
160 | export async function getDefaultPrompt(): Promise {
161 | return realDefaultPrompt
162 | }
163 |
164 | export async function setNewPrompt(newPrompt: string): Promise {
165 | defaultPrompt = newPrompt
166 | return true
167 | }
168 |
169 | // ====== 工具函数 ======
170 | function splitSentences(text: string): string[] {
171 | const sentenceRegex = /[^。!?…!?]+[。!?…!?]+|[^。!?…!?]+$/g
172 | const sentences = text.match(sentenceRegex) || []
173 | return sentences.map(s => s.trim()).filter(s => s.length > 0)
174 | }
175 |
176 | function isLikelyTitle(line: string): boolean {
177 | const trimmed = line.trim()
178 | return (
179 | trimmed.length > 0 &&
180 | trimmed.length < 100 &&
181 | (trimmed.endsWith('章') ||
182 | trimmed.endsWith('节') ||
183 | trimmed.endsWith('篇') ||
184 | /^第[一二三四五六七八九十\d]+[章节篇]/.test(trimmed) ||
185 | /^[1-9][.、]\s*\S/.test(trimmed) ||
186 | /^[一二三四五六七八九十][.、]\s*\S/.test(trimmed))
187 | )
188 | }
189 |
190 | function getHeadingLevel(line: string): number {
191 | const trimmed = line.trim()
192 | if (/^第[一二三四五六七八九十\d]+章/.test(trimmed)) return 1
193 | if (/^第[一二三四五六七八九十\d]+节/.test(trimmed)) return 2
194 | if (/^[1-9]\.\s*\S/.test(trimmed)) return 2
195 | if (/^[1-9][.1-9]*\s*\S/.test(trimmed)) return 3
196 | return 2
197 | }
198 |
199 | // ====== 文档解析 ======
200 | async function parseWordDocument(documentPath: string): Promise {
201 | try {
202 | const result = await mammoth.extractRawText({ path: documentPath })
203 | const text = result.value
204 | const lines = text.split('\n').filter(line => line.trim().length > 0)
205 |
206 | const sections: DocumentSection[] = []
207 | let currentSection: DocumentSection | null = null
208 | let sectionContent: string[] = []
209 | let documentTitle = ''
210 |
211 | for (const line of lines) {
212 | if (isLikelyTitle(line)) {
213 | if (currentSection && sectionContent.length > 0) {
214 | currentSection.content = sectionContent.join('\n')
215 | sections.push(currentSection)
216 | }
217 |
218 | if (!documentTitle) documentTitle = line.trim()
219 |
220 | currentSection = {
221 | title: line.trim(),
222 | content: '',
223 | level: getHeadingLevel(line)
224 | }
225 | sectionContent = []
226 | } else if (currentSection) {
227 | sectionContent.push(line)
228 | }
229 | }
230 |
231 | if (currentSection && sectionContent.length > 0) {
232 | currentSection.content = sectionContent.join('\n')
233 | sections.push(currentSection)
234 | }
235 |
236 | return {
237 | title: documentTitle,
238 | sections
239 | }
240 | } catch (error) {
241 | throw new Error(`解析Word文档失败: ${error.message}`)
242 | }
243 | }
244 |
245 | // ====== 文档主题总结 ======
246 | async function summarizeDocumentTheme(
247 | docStructure: DocumentStructure,
248 | apiKey: string,
249 | modelName: string,
250 | apiURL: string
251 | ): Promise<{
252 | result: string
253 | total_tokens: number
254 | }> {
255 | const systemPrompt = '你是一个专业的文档分析专家。请根据提供的文档目录结构,总结文档的整体框架和主题。'
256 | const userPrompt = `文档标题: ${docStructure.title}\n\n文档目录结构:\n${docStructure.sections.map((s, i) => `${i + 1}. ${s.title}`).join('\n')}\n\n请总结这份文档的主要主题和整体框架:`
257 |
258 | try {
259 | return await OpenaiGen(systemPrompt, userPrompt, apiKey, modelName, apiURL)
260 | } catch (error) {
261 | console.error('总结文档主题时出错:', error)
262 | return {
263 | result: 'error',
264 | total_tokens: null
265 | }
266 | }
267 | }
268 |
269 | // ====== 校对结果解析 ======
270 | function parseCorrections(result: string, ragChunks?: string[]): ProofreadingCorrection[] {
271 | // 首先尝试直接解析
272 | try {
273 | const parsed = JSON.parse(result)
274 | if (Array.isArray(parsed)) {
275 | return parsed.map(item => {
276 | if (ragChunks) {
277 | return { ...item, References: [...ragChunks] }
278 | }
279 | return item
280 | })
281 | } else {
282 | console.warn('cannot analyze the proofreading data from LLM')
283 | return []
284 | }
285 | } catch (error) {
286 | console.warn('直接解析JSON失败,尝试清理和提取:', error)
287 | return extractCorrectionsFromText(result, ragChunks)
288 | }
289 | }
290 |
291 | function extractCorrectionsFromText(text: string, ragChunks?: string[]): ProofreadingCorrection[] {
292 | try {
293 | // 1. 清理可能的代码块标记
294 | let cleanedText = text.trim()
295 |
296 | // 移除代码块标记(```json, ```, 或者其他语言标记)
297 | cleanedText = cleanedText.replace(/^```[\w]*\n?/g, '')
298 | cleanedText = cleanedText.replace(/\n?```$/g, '')
299 |
300 | // 移除可能的 "json" 标记
301 | cleanedText = cleanedText.replace(/^json\s*/i, '')
302 |
303 | // 移除可能的解释性文字(如 "以下是JSON:", "返回结果:" 等)
304 | cleanedText = cleanedText.replace(/^.*?以下.*?[:::]\s*/, '')
305 | cleanedText = cleanedText.replace(/^.*?返回.*?[:::]\s*/, '')
306 |
307 | // 提取JSON数组(寻找第一个 [ 和最后一个 ])
308 | const firstBracket = cleanedText.indexOf('[')
309 | const lastBracket = cleanedText.lastIndexOf(']')
310 |
311 | if (firstBracket !== -1 && lastBracket !== -1 && lastBracket > firstBracket) {
312 | const jsonString = cleanedText.substring(firstBracket, lastBracket + 1)
313 | console.log('提取到的JSON字符串:', jsonString)
314 |
315 | const parsed = JSON.parse(jsonString)
316 | if (Array.isArray(parsed)) {
317 | const corrections = parsed.map(item => {
318 | // 验证每个字段的存在性
319 | if (item.original && item.suggested && item.reason) {
320 | if (ragChunks) {
321 | return { ...item, References: [...ragChunks] }
322 | }
323 | return item
324 | }
325 | return null
326 | }).filter((item): item is ProofreadingCorrection => item !== null)
327 |
328 | if (corrections.length > 0) {
329 | console.log(`成功解析出 ${corrections.length} 个校对结果`)
330 | return corrections
331 | }
332 | }
333 | }
334 |
335 | // 2. 尝试解析单个JSON对象
336 | try {
337 | const singleObject = JSON.parse(cleanedText)
338 | if (singleObject && typeof singleObject === 'object' && !Array.isArray(singleObject)) {
339 | if (singleObject.original && singleObject.suggested && singleObject.reason) {
340 | console.log('解析到单个校对结果')
341 | return ragChunks ? [{ ...singleObject, References: [...ragChunks] }] : [singleObject]
342 | }
343 | }
344 | } catch (e) {
345 | // 单对象解析失败,继续
346 | }
347 |
348 | // 3. 如果以上都失败,尝试从文本中提取信息
349 | console.warn('JSON解析完全失败,尝试从文本中手动提取')
350 | return parseCorrectionsFromPlainText(text, ragChunks)
351 |
352 | } catch (error) {
353 | console.error('所有解析方法都失败:', error)
354 | console.error('原始文本:', text)
355 | return []
356 | }
357 | }
358 |
359 | function parseCorrectionsFromPlainText(text: string, ragChunks?: string[]): ProofreadingCorrection[] {
360 | const corrections: ProofreadingCorrection[] = []
361 | const lines = text.split('\n').map(line => line.trim()).filter(line => line.length > 0)
362 |
363 | console.log('尝试从纯文本中提取校对结果,共', lines.length, '行')
364 |
365 | for (let i = 0; i < lines.length; i++) {
366 | const line = lines[i]
367 |
368 | // 查找包含 "original" 的行
369 | if (/original|原文|原内容/.test(line)) {
370 | const correction: Partial = {}
371 |
372 | // 提取 original 值
373 | const originalMatch = line.match(/["'""“”]([^""“”]+)["'""“”]/)
374 | if (originalMatch) {
375 | correction.original = originalMatch[1].trim()
376 | }
377 |
378 | // 在后续行中查找 suggested
379 | if (i + 1 < lines.length) {
380 | const suggestedLine = lines[i + 1]
381 | const suggestedMatch = suggestedLine.match(/["'""“”]([^""“”]+)["'""“”]/)
382 | if (suggestedMatch) {
383 | correction.suggested = suggestedMatch[1].trim()
384 | }
385 | }
386 |
387 | // 在后续行中查找 reason
388 | if (i + 2 < lines.length) {
389 | const reasonLine = lines[i + 2]
390 | const reasonMatch = reasonLine.match(/["'""“”]([^""“”]+)["'""“”]/)
391 | if (reasonMatch) {
392 | correction.reason = reasonMatch[1].trim()
393 | }
394 |
395 | // 查找 type
396 | const typeMatch = reasonLine.match(/"type":\s*["']([^"']+)["']/)
397 | if (typeMatch) {
398 | correction.type = typeMatch[1]
399 | }
400 | }
401 |
402 | // 如果找到了所有必需字段,添加到结果中
403 | if (correction.original && correction.suggested && correction.reason) {
404 | if (ragChunks) {
405 | correction.References = [...ragChunks]
406 | }
407 | corrections.push(correction as ProofreadingCorrection)
408 | }
409 | }
410 | }
411 |
412 | console.log('从纯文本中提取到', corrections.length, '个校对结果')
413 | return corrections
414 | }
415 |
416 | // ====== RAG 查询 ======
417 | interface QueryDocChunkOptions {
418 | maxSelectNum?: number
419 | enableDeduplication?: boolean
420 | }
421 |
422 | const DEFAULT_MAX_SELECT_NUM = 20
423 |
424 | const queryDocChunk = async (
425 | repositoryNameList: string[],
426 | apiKey: string,
427 | apiURL: string,
428 | modelName: string,
429 | fileName: string,
430 | content: string,
431 | filter: string,
432 | selectNum: number,
433 | options: QueryDocChunkOptions = {}
434 | ): Promise => {
435 | const { maxSelectNum = DEFAULT_MAX_SELECT_NUM, enableDeduplication = true } = options
436 |
437 | if (!Array.isArray(repositoryNameList) || repositoryNameList.length === 0) {
438 | console.warn('queryDocChunk: repositoryNameList is empty or invalid')
439 | return []
440 | }
441 |
442 | if (!apiKey || typeof apiKey !== 'string') {
443 | throw new Error('Invalid or missing apiKey')
444 | }
445 |
446 | if (!apiURL || typeof apiURL !== 'string') {
447 | throw new Error('Invalid or missing apiURL')
448 | }
449 |
450 | if (!modelName || typeof modelName !== 'string') {
451 | throw new Error('Invalid or missing modelName')
452 | }
453 |
454 | if (!content || typeof content !== 'string' || content.trim() === '') {
455 | console.warn('queryDocChunk: empty or invalid content, returning empty result')
456 | return []
457 | }
458 |
459 | if (!Number.isInteger(selectNum) || selectNum <= 0) {
460 | console.warn(`queryDocChunk: invalid selectNum ${selectNum}, using default 1`)
461 | selectNum = 1
462 | }
463 |
464 | const effectiveSelectNum = Math.min(selectNum, maxSelectNum)
465 | const perRepoLimit = Math.min(effectiveSelectNum, 10)
466 |
467 | const chunkList: RAGQueryResult[] = []
468 |
469 | const queries = repositoryNameList.map(repoName =>
470 | queryDocuments(repoName, content.trim(), modelName, apiKey, apiURL, perRepoLimit, filter).catch(
471 | (err): RAGQueryResult[] => {
472 | console.error(`queryDocChunk: failed to query repository "${repoName}"`, err)
473 | return []
474 | }
475 | )
476 | )
477 |
478 | const results = await Promise.all(queries)
479 |
480 | for (const result of results) {
481 | if (Array.isArray(result)) {
482 | chunkList.push(...result)
483 | }
484 | }
485 |
486 | let uniqueChunks = chunkList
487 | if (enableDeduplication && chunkList.length > 0) {
488 | const seen = new Set()
489 | uniqueChunks = chunkList.filter(item => {
490 | if (typeof item.text !== 'string') return false
491 | if (seen.has(item.text)) return false
492 | seen.add(item.text)
493 | return true
494 | })
495 | }
496 | console.info('-----------------------------------RAG Query----------------------------')
497 | console.info('the unique results of query:', uniqueChunks)
498 | console.log('the proofreading content:', content)
499 |
500 | const topChunks = uniqueChunks
501 | .filter(item => typeof item.score === 'number' && typeof item.text === 'string')
502 | .sort((a, b) => b.score - a.score)
503 | .slice(0, effectiveSelectNum)
504 | console.info('the top relative result of query:', topChunks)
505 |
506 | return topChunks.map(item => item.text)
507 | }
508 |
509 | // ====== 通用RAG校对函数 ======
510 | async function proofreadTextWithRAG(
511 | text: string,
512 | systemContext: string,
513 | apiKey: string,
514 | modelName: string,
515 | apiURL: string,
516 | repositoryNameList?: string[],
517 | fileName?: string,
518 | embeddingConfig?: ApiSettings
519 | ): Promise<{ result: ProofreadingCorrection[]; use_tokens: number }> {
520 | try {
521 | let systemPrompt = systemContext
522 | if (repositoryNameList === undefined) {
523 | console.log('use normal proof without rag:')
524 | console.log('proof content:', text)
525 | const { result, total_tokens } = await OpenaiGen(
526 | systemPrompt,
527 | `需要校对的内容:\n${text}`,
528 | apiKey,
529 | modelName,
530 | apiURL
531 | )
532 | return { result: parseCorrections(result), use_tokens: total_tokens }
533 | } else if (repositoryNameList.length === 0) {
534 | if (repositoryNameList === undefined) {
535 | console.log('use normal proof without rag:')
536 | console.log('proof content:', text)
537 | const { result, total_tokens } = await OpenaiGen(
538 | systemPrompt,
539 | `需要校对的内容:\n${text}`,
540 | apiKey,
541 | modelName,
542 | apiURL
543 | )
544 | return { result: parseCorrections(result), use_tokens: total_tokens }
545 | } else if (repositoryNameList.length > 0 && fileName) {
546 | const embApiKey = embeddingConfig?.apiKey || apiKey
547 | const embApiURL = embeddingConfig?.apiURL || apiURL
548 | const embModelName = embeddingConfig?.modelName || modelName
549 | console.log('------------------------setting of RAG-------------------------------------')
550 | console.log('embedding key:', embApiKey)
551 | console.log('embedding URL:', embApiURL)
552 | console.log('embedding modelName:', embModelName)
553 |
554 | const ragChunks = await queryDocChunk(
555 | repositoryNameList,
556 | embApiKey,
557 | embApiURL,
558 | embModelName,
559 | fileName,
560 | text,
561 | '',
562 | 3
563 | )
564 |
565 | if (ragChunks.length > 0) {
566 | const ragContext = `\n${ragText}:\n${ragChunks.map((t, i) => `${i + 1}. ${t}`).join('\n')}`
567 | systemPrompt += ragContext
568 | }
569 |
570 | const { result, total_tokens } = await OpenaiGen(
571 | systemPrompt,
572 | `需要校对的内容:\n${text}`,
573 | apiKey,
574 | modelName,
575 | apiURL
576 | )
577 | return { result: parseCorrections(result, ragChunks), use_tokens: total_tokens }
578 | } else {
579 | console.log("the proof mode don't catch any preload,please check!")
580 | throw error("the proof mode don't catch any preload,please check!")
581 | }
582 | }
583 | } catch (error) {
584 | console.error('校对文本失败:', error)
585 | return { result: [], use_tokens: 0 }
586 | }
587 | }
588 |
589 | // ====== 主校对函数 ======
590 | export async function proofreadDocument(
591 | documentPath: string,
592 | mode: 'section' | 'sentence' | 'full',
593 | apiKey: string,
594 | modelName: string,
595 | apiURL: string,
596 | repositoryNameList?: string[],
597 | embeddingConfig?: ApiSettings,
598 | parallelSet: number = 30, // 并发限制
599 | setTimeLimit?: number // 每分钟最高发射频率
600 | ): Promise<{ proofResult: ProofreadingCorrection[]; token_usage: number }> {
601 | console.log('process mode is:', mode)
602 | console.log('process api is:', apiURL, modelName)
603 | let total_tokens = 0 // calculate the usage of tokens
604 | const option = setTimeLimit // set the limit of request per minute
605 | ? {
606 | requestsPerMinute: setTimeLimit
607 | }
608 | : undefined
609 |
610 | try {
611 | const fileName = path.basename(documentPath)
612 |
613 | if (mode === 'full') {
614 | const fullText = await mammoth.extractRawText({ path: documentPath }) // get full text
615 | const text = fullText.value.trim() // trim
616 | if (!text)
617 | return {
618 | proofResult: null,
619 | token_usage: 0
620 | }
621 | const { result, use_tokens } = await proofreadTextWithRAG(
622 | text,
623 | defaultPrompt,
624 | apiKey,
625 | modelName,
626 | apiURL,
627 | repositoryNameList,
628 | fileName,
629 | embeddingConfig
630 | )
631 | total_tokens += use_tokens
632 |
633 | return { proofResult: result, token_usage: total_tokens }
634 | }
635 |
636 | const docStructure = await parseWordDocument(documentPath)
637 | const documentTheme = await summarizeDocumentTheme(docStructure, apiKey, modelName, apiURL)
638 | const nonEmptySections = docStructure.sections.filter(sec => sec.content.trim().length > 0)
639 | if (nonEmptySections.length === 0)
640 | return {
641 | proofResult: null,
642 | token_usage: 0
643 | }
644 |
645 | let allCorrections: ProofreadingCorrection[] = []
646 |
647 | if (mode === 'section') {
648 | const sectionResults = await runWithLimits(
649 | nonEmptySections,
650 | parallelSet,
651 | async section => {
652 | const systemContext = `${defaultPrompt}
653 | 文档标题: ${docStructure.title}
654 | 文档主题: ${documentTheme}
655 | 当前章节标题: ${section.title}`
656 | return proofreadTextWithRAG(
657 | section.content,
658 | systemContext,
659 | apiKey,
660 | modelName,
661 | apiURL,
662 | repositoryNameList,
663 | fileName,
664 | embeddingConfig
665 | )
666 | },
667 | option
668 | )
669 | let resultList: ProofreadingCorrection[][] = []
670 | sectionResults.forEach(item => {
671 | total_tokens += item.use_tokens
672 | resultList.push(item.result)
673 | })
674 | allCorrections = resultList.flat() // 展开二维数组,获取最后的结果数组
675 | } else if (mode === 'sentence') {
676 | const sentenceTasks: (() => Promise<{ result: ProofreadingCorrection[]; use_tokens: number }>)[] = []
677 | for (const section of nonEmptySections) {
678 | const sentences = splitSentences(section.content)
679 | const validSentences = sentences.filter(s => s.trim().length > 0)
680 | if (validSentences.length === 0) continue
681 |
682 | for (const sentence of validSentences) {
683 | sentenceTasks.push(async () => {
684 | const systemContext = `${defaultPrompt}
685 | 文档标题: ${docStructure.title}
686 | 文档主题: ${documentTheme}
687 | 当前章节标题: ${section.title}`
688 | return proofreadTextWithRAG(
689 | sentence,
690 | systemContext,
691 | apiKey,
692 | modelName,
693 | apiURL,
694 | repositoryNameList,
695 | fileName,
696 | embeddingConfig
697 | )
698 | })
699 | }
700 | }
701 |
702 | if (sentenceTasks.length > 0) {
703 | const sentenceResults = await runWithLimits(sentenceTasks, parallelSet, task => task(), option)
704 | let resultList: ProofreadingCorrection[][] = []
705 | sentenceResults.forEach(Items => {
706 | total_tokens += Items.use_tokens
707 | resultList.push(Items.result)
708 | })
709 | allCorrections = resultList.flat()
710 | }
711 | }
712 |
713 | // 确保可序列化
714 | const serializableCorrections = allCorrections.map(correction => ({
715 | original: correction.original,
716 | suggested: correction.suggested,
717 | reason: correction.reason,
718 | type: correction.type,
719 | ...(correction.References ? { References: correction.References } : {})
720 | }))
721 |
722 | console.log('校对结果:', serializableCorrections)
723 | return { proofResult: serializableCorrections, token_usage: total_tokens }
724 | } catch (error) {
725 | console.error('文档校对过程中出现错误:', error)
726 | throw error
727 | }
728 | }
729 |
--------------------------------------------------------------------------------