├── .dockerignore
├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── blank.yaml
│ ├── config.yml
│ ├── 功能请求_cn.yaml
│ ├── 功能请求_en.yaml
│ ├── 问题反馈_cn.yaml
│ └── 问题反馈_en.yaml
├── dependabot.yml
├── release-drafter.yml
└── workflows
│ ├── black.format.yml
│ ├── exe-build.yml
│ ├── fork-build.yml
│ ├── fork-test.yml
│ ├── python-publish.yml
│ └── python-test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── LICENSE
├── README.md
├── app.json
├── docs
├── ADVANCED.md
├── APIS.md
├── CODE_OF_CONDUCT.md
├── README_GUI.md
├── README_ja-JP.md
├── README_ko-KR.md
├── README_zh-CN.md
├── README_zh-TW.md
└── images
│ ├── after.png
│ ├── banner.png
│ ├── before.png
│ ├── cmd.explained.png
│ ├── cmd.explained.zh.png
│ ├── gui.gif
│ └── preview.gif
├── pdf2zh
├── __init__.py
├── backend.py
├── cache.py
├── config.py
├── converter.py
├── doclayout.py
├── gui.py
├── high_level.py
├── mcp_server.py
├── pdf2zh.py
├── pdfinterp.py
└── translator.py
├── pyproject.toml
├── script
├── Dockerfile.China
├── Dockerfile.Demo
├── _pystand_static.int
└── setup.bat
├── setup.cfg
└── test
├── file
├── translate.cli.font.unknown.pdf
├── translate.cli.plain.text.pdf
└── translate.cli.text.with.figure.pdf
├── test_cache.py
├── test_converter.py
├── test_doclayout.py
└── test_translator.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | .github
2 | docs
3 | .git
4 | .pre-commit-config.yaml
5 | uv.lock
6 | pdf2zh_files
7 | gui/pdf2zh_files
8 | gradio_files
9 | tmp
10 | gui/gradio_files
11 | gui/tmp
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 |
17 | # C extensions
18 | *.so
19 |
20 | # Distribution / packaging
21 | .Python
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | wheels/
34 | share/python-wheels/
35 | *.egg-info/
36 | .installed.cfg
37 | *.egg
38 | MANIFEST
39 |
40 | # PyInstaller
41 | # Usually these files are written by a python script from a template
42 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
43 | *.manifest
44 | *.spec
45 |
46 | # Installer logs
47 | pip-log.txt
48 | pip-delete-this-directory.txt
49 |
50 | # Unit test / coverage reports
51 | htmlcov/
52 | .tox/
53 | .nox/
54 | .coverage
55 | .coverage.*
56 | .cache
57 | nosetests.xml
58 | coverage.xml
59 | *.cover
60 | *.py,cover
61 | .hypothesis/
62 | .pytest_cache/
63 | cover/
64 |
65 | # Translations
66 | *.mo
67 | *.pot
68 |
69 | # Django stuff:
70 | *.log
71 | local_settings.py
72 | db.sqlite3
73 | db.sqlite3-journal
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | .pybuilder/
87 | target/
88 |
89 | # Jupyter Notebook
90 | .ipynb_checkpoints
91 |
92 | # IPython
93 | profile_default/
94 | ipython_config.py
95 |
96 | # pyenv
97 | # For a library or package, you might want to ignore these files since the code is
98 | # intended to run in multiple environments; otherwise, check them in:
99 | # .python-version
100 |
101 | # pipenv
102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
105 | # install all needed dependencies.
106 | #Pipfile.lock
107 |
108 | # poetry
109 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
110 | # This is especially recommended for binary packages to ensure reproducibility, and is more
111 | # commonly ignored for libraries.
112 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
113 | #poetry.lock
114 |
115 | # pdm
116 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
117 | #pdm.lock
118 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
119 | # in version control.
120 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
121 | .pdm.toml
122 | .pdm-python
123 | .pdm-build/
124 |
125 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
126 | __pypackages__/
127 |
128 | # Celery stuff
129 | celerybeat-schedule
130 | celerybeat.pid
131 |
132 | # SageMath parsed files
133 | *.sage.py
134 |
135 | # Environments
136 | .env
137 | .venv
138 | env/
139 | venv/
140 | ENV/
141 | env.bak/
142 | venv.bak/
143 |
144 | # Spyder project settings
145 | .spyderproject
146 | .spyproject
147 |
148 | # Rope project settings
149 | .ropeproject
150 |
151 | # mkdocs documentation
152 | /site
153 |
154 | # mypy
155 | .mypy_cache/
156 | .dmypy.json
157 | dmypy.json
158 |
159 | # Pyre type checker
160 | .pyre/
161 |
162 | # pytype static type analyzer
163 | .pytype/
164 |
165 | # Cython debug symbols
166 | cython_debug/
167 |
168 | # PyCharm
169 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
170 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
171 | # and can be added to the global gitignore or merged into this file. For a more nuclear
172 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
173 | .idea/
174 | .vscode
175 | .DS_Store
176 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [Byaidu, reycn, Wybxc, hellofinch] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/blank.yaml:
--------------------------------------------------------------------------------
1 | name: Blank Issue
2 | description: Create a blank issue for discussion
3 | body:
4 | - type: checkboxes
5 | id: checks
6 | attributes:
7 | label: before ...
8 | options:
9 | - label: This issue is not about question or bug.
10 | required: true
11 | - type: textarea
12 | id: describe
13 | attributes:
14 | label: Add a description
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/功能请求_cn.yaml:
--------------------------------------------------------------------------------
1 | name: 功能请求
2 | description: 使用中文进行功能请求
3 | labels: ['enhancement']
4 | body:
5 | - type: textarea
6 | id: describe
7 | attributes:
8 | label: 在什么场景下,需要你请求的功能?
9 | description: 简要描述相关的使用场景
10 | validations:
11 | required: false
12 | - type: textarea
13 | id: solution
14 | attributes:
15 | label: 解决方案
16 | description: 描述你想要的解决方案
17 | validations:
18 | required: false
19 | - type: textarea
20 | id: additional
21 | attributes:
22 | label: 其他内容
23 | description: 关于该功能请求的任何其他项目。
24 | validations:
25 | required: false
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/功能请求_en.yaml:
--------------------------------------------------------------------------------
1 | name: Feature request
2 | description: Suggest an idea for this project
3 | labels: ['enhancement']
4 | body:
5 | - type: textarea
6 | id: describe
7 | attributes:
8 | label: Is your feature request related to a problem?
9 | description: A clear and concise description of what the problem is
10 | placeholder: Ex. I'm always frustrated when ...
11 | validations:
12 | required: false
13 | - type: textarea
14 | id: solution
15 | attributes:
16 | label: Describe the solution you'd like
17 | description: A clear and concise description of what you want to happen
18 | validations:
19 | required: false
20 | - type: textarea
21 | id: additional
22 | attributes:
23 | label: Additional context
24 | description: Add any other projects about the feature request here.
25 | validations:
26 | required: false
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/问题反馈_cn.yaml:
--------------------------------------------------------------------------------
1 | name: 上报 Bug
2 | description: 使用中文进行 Bug 报告
3 | labels: ['bug']
4 | body:
5 | - type: checkboxes
6 | id: checks
7 | attributes:
8 | label: 在提问之前...
9 | options:
10 | - label: 我已经搜索了现有的 issues
11 | required: true
12 | - label: 我在提问题之前至少花费了 5 分钟来思考和准备
13 | required: true
14 | - label: 我已经认真且完整的阅读了 wiki
15 | required: true
16 | - label: 我已经认真检查了问题和网络环境无关(包括但不限于Google不可用,模型下载失败)
17 | required: true
18 | - type: markdown
19 | attributes:
20 | value: |
21 | 感谢您使用本项目并反馈!
22 | 请再次确认上述复选框所述的内容已经认真执行!
23 | - type: textarea
24 | id: environment
25 | attributes:
26 | label: 使用的环境
27 | description: |
28 | examples:
29 | - **OS**: Ubuntu 24.10
30 | - **Python**: 3.12.0
31 | - **pdf2zh**: 1.9.0
32 | value: |
33 | - OS:
34 | - Python:
35 | - pdf2zh:
36 | render: markdown
37 | validations:
38 | required: false
39 | - type: dropdown
40 | id: install
41 | attributes:
42 | label: 请选择安装方式
43 | options:
44 | - pip
45 | - exe
46 | - 源码
47 | - docker
48 | validations:
49 | required: true
50 | - type: textarea
51 | id: describe
52 | attributes:
53 | label: 描述你的问题
54 | description: 简要描述你的问题
55 | validations:
56 | required: true
57 | - type: textarea
58 | id: reproduce
59 | attributes:
60 | label: 如何复现
61 | description: 重现该行为的步骤
62 | value: |
63 | 1. 执行 '...'
64 | 2. 选择 '....'
65 | 3. 出现问题
66 | validations:
67 | required: false
68 | - type: textarea
69 | id: expected
70 | attributes:
71 | label: 预期行为
72 | description: 简要描述你期望得到的反馈
73 | validations:
74 | required: false
75 | - type: textarea
76 | id: logs
77 | attributes:
78 | label: 相关 Logs
79 | description: 请复制并粘贴任何相关的日志输出。
80 | render: Text
81 | validations:
82 | required: false
83 | - type: textarea
84 | id: PDFfile
85 | attributes:
86 | label: 原始PDF文件
87 | description: |
88 | 如果涉及到排版错误的问题,请一定提供原始的PDF文件,方便复现错误。
89 | validations:
90 | required: false
91 | - type: textarea
92 | id: others
93 | attributes:
94 | label: 还有别的吗?
95 | description: |
96 | 相关的配置?链接?参考资料?
97 | 任何能让我们对你所遇到的问题有更多了解的东西。
98 | validations:
99 | required: false
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/问题反馈_en.yaml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: Create a report to help us improve
3 | labels: ['bug']
4 | body:
5 | - type: checkboxes
6 | id: checks
7 | attributes:
8 | label: Before you asking
9 | options:
10 | - label: I have searched the existing issues
11 | required: true
12 | - label: I spend at least 5 minutes for thinking and preparing
13 | required: true
14 | - label: I have thoroughly and completely read the wiki.
15 | required: true
16 | - label: I have carefully checked the issue, and it is unrelated to the network environment.
17 | required: true
18 | - type: markdown
19 | attributes:
20 | value: |
21 | Thank you for using this project and providing feedback!
22 | - type: textarea
23 | id: environment
24 | attributes:
25 | label: Environment
26 | description: |
27 | examples:
28 | - **OS**: Ubuntu 24.10
29 | - **Python**: 3.12.0
30 | - **pdf2zh**: 1.9.0
31 | value: |
32 | - OS:
33 | - Python:
34 | - pdf2zh:
35 | render: markdown
36 | validations:
37 | required: false
38 | - type: dropdown
39 | id: install
40 | attributes:
41 | label: How to install pdf2zh
42 | options:
43 | - pip
44 | - exe
45 | - source
46 | - docker
47 | validations:
48 | required: true
49 | - type: textarea
50 | id: describe
51 | attributes:
52 | label: Describe the bug
53 | description: A clear and concise description of what the bug is.
54 | validations:
55 | required: true
56 | - type: textarea
57 | id: reproduce
58 | attributes:
59 | label: To Reproduce
60 | description: Steps to reproduce the behavior
61 | value: |
62 | 1. execute '...'
63 | 2. select '....'
64 | 3. see errors
65 | validations:
66 | required: false
67 | - type: textarea
68 | id: expected
69 | attributes:
70 | label: Expected behavior
71 | description: A clear and concise description of what you expected to happen.
72 | validations:
73 | required: false
74 | - type: textarea
75 | id: logs
76 | attributes:
77 | label: Relevant log output
78 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
79 | render: Text
80 | validations:
81 | required: false
82 | - type: textarea
83 | id: PDFfile
84 | attributes:
85 | label: Origin PDF file
86 | description: |
87 | If the issue involves formatting errors, please provide the original PDF file to facilitate reproduction of the error.
88 | validations:
89 | required: false
90 | - type: textarea
91 | id: others
92 | attributes:
93 | label: Anything else?
94 | description: |
95 | Related configs? Links? References?
96 | Anything that will give us more context about the issue you are encountering!
97 | validations:
98 | required: false
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: github-actions
4 | directory: "/"
5 | schedule:
6 | interval: weekly
7 | # - package-ecosystem: pip
8 | # directory: "/.github/workflows"
9 | # schedule:
10 | # interval: weekly
11 | # - package-ecosystem: pip
12 | # directory: "/docs"
13 | # schedule:
14 | # interval: weekly
15 | - package-ecosystem: pip
16 | directory: "/"
17 | schedule:
18 | interval: weekly
19 | versioning-strategy: lockfile-only
20 | allow:
21 | - dependency-type: "all"
--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
1 | name-template: 'v$RESOLVED_VERSION'
2 | tag-template: 'v$RESOLVED_VERSION'
3 | categories:
4 | - title: '🚀 Features'
5 | labels:
6 | - 'feature'
7 | - 'enhancement'
8 | - title: '🐛 Bug Fixes'
9 | labels:
10 | - 'fix'
11 | - 'bugfix'
12 | - 'bug'
13 | - title: '🧰 Maintenance'
14 | labels:
15 | - 'chore'
16 | - 'maintenance'
17 | - 'refactor'
18 | - title: '📝 Documentation'
19 | labels:
20 | - 'docs'
21 | - 'documentation'
22 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
23 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions
24 | version-resolver:
25 | major:
26 | labels:
27 | - 'major'
28 | minor:
29 | labels:
30 | - 'minor'
31 | patch:
32 | labels:
33 | - 'patch'
34 | default: patch
35 | template: |
36 | ## Changes
37 |
38 | $CHANGES
39 |
40 | ## Contributors
41 |
42 | $CONTRIBUTORS
43 |
44 | ## Windows Specific
45 |
46 | If you cannot open it after downloading, please install https://aka.ms/vs/17/release/vc_redist.x64.exe and try again.
47 |
48 | ## Assets
49 |
50 | - pdf2zh-v$RESOLVED_VERSION-win64.zip: pdf2zh **without** assets(font, model, etc.)
51 | - pdf2zh-v$RESOLVED_VERSION-with-assets-win64.zip: (**Recommended**) pdf2zh **with** assets(font, model, etc.)
52 |
53 | > [!NOTE]
54 | >
55 | > The version without assets will also dynamically download resources when running, but the download may fail due to network issues.
--------------------------------------------------------------------------------
/.github/workflows/black.format.yml:
--------------------------------------------------------------------------------
1 | name: Format Code with Black
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | lint:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v4
10 | - uses: psf/black@stable
--------------------------------------------------------------------------------
/.github/workflows/exe-build.yml:
--------------------------------------------------------------------------------
1 | name: windows exe Release Workflow
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | release_version:
7 | description: 'Release Version (e.g., v1.0.0)'
8 | required: true
9 | type: string
10 | # push:
11 | # debug purpose
12 | env:
13 | WIN_EXE_PYTHON_VERSION: 3.12.9
14 | jobs:
15 | build-win64-exe:
16 | runs-on: windows-latest
17 | steps:
18 | - name: checkout babeldoc metadata
19 | uses: actions/checkout@v4
20 | with:
21 | repository: funstory-ai/BabelDOC
22 | path: babeldoctemp1234567
23 | token: ${{ secrets.GITHUB_TOKEN }}
24 | sparse-checkout: babeldoc/assets/embedding_assets_metadata.py
25 | - name: Cached Assets
26 | id: cache-assets
27 | uses: actions/cache@v4.2.2
28 | with:
29 | path: ~/.cache/babeldoc
30 | key: test-1-babeldoc-assets-${{ hashFiles('babeldoctemp1234567/babeldoc/assets/embedding_assets_metadata.py') }}
31 | - name: 检出代码
32 | uses: actions/checkout@v4
33 |
34 | - name: Setup uv with Python ${{ env.WIN_EXE_PYTHON_VERSION }}
35 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1
36 | with:
37 | python-version: ${{ env.WIN_EXE_PYTHON_VERSION }}
38 | enable-cache: true
39 | cache-dependency-glob: "pyproject.toml"
40 |
41 | - name: 执行所有任务(创建目录、下载、解压、复制文件、安装依赖)
42 | shell: pwsh
43 | run: |
44 | Write-Host "==== 删除 babeldoctemp1234567 文件夹 ===="
45 | if (Test-Path "./babeldoctemp1234567") {
46 | Remove-Item -Path "./babeldoctemp1234567" -Recurse -Force
47 | Write-Host "babeldoctemp1234567 文件夹已成功删除"
48 | } else {
49 | Write-Host "babeldoctemp1234567 文件夹不存在,无需删除"
50 | }
51 | Write-Host "==== 创建必要的目录 ===="
52 | New-Item -Path "./build" -ItemType Directory -Force
53 | New-Item -Path "./build/runtime" -ItemType Directory -Force
54 | New-Item -Path "./dep_build" -ItemType Directory -Force
55 |
56 | Write-Host "==== 复制代码到 dep_build ===="
57 | Get-ChildItem -Path "./" -Exclude "dep_build", "build" | Copy-Item -Destination "./dep_build" -Recurse -Force
58 |
59 | Write-Host "==== 下载并解压 Python ${{ env.WIN_EXE_PYTHON_VERSION }} ===="
60 | Write-Host "pythonUrl: https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip"
61 | $pythonUrl = "https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip"
62 | $pythonZip = "./dep_build/python.zip"
63 | Invoke-WebRequest -Uri $pythonUrl -OutFile $pythonZip
64 | Expand-Archive -Path $pythonZip -DestinationPath "./build/runtime" -Force
65 |
66 | Write-Host "==== 下载并解压 PyStand ===="
67 | $pystandUrl = "https://github.com/skywind3000/PyStand/releases/download/1.1.4/PyStand-v1.1.4-exe.zip"
68 | $pystandZip = "./dep_build/PyStand.zip"
69 | Invoke-WebRequest -Uri $pystandUrl -OutFile $pystandZip
70 | Expand-Archive -Path $pystandZip -DestinationPath "./dep_build/PyStand" -Force
71 |
72 | Write-Host "==== 复制 PyStand.exe 到 build 并重命名 ===="
73 | $pystandExe = "./dep_build/PyStand/PyStand-x64-CLI/PyStand.exe"
74 | $destExe = "./build/pdf2zh.exe"
75 | if (Test-Path $pystandExe) {
76 | Copy-Item -Path $pystandExe -Destination $destExe -Force
77 | } else {
78 | Write-Host "错误: PyStand.exe 未找到!"
79 | exit 1
80 | }
81 | Write-Host "==== 创建 Python venv 在 dep_build ===="
82 | uv venv ./dep_build/venv
83 |
84 | ./dep_build/venv/Scripts/activate
85 |
86 | Write-Host "==== 在 venv 环境中安装项目依赖 ===="
87 | uv pip install .
88 |
89 | Write-Host "==== 复制 venv/Lib/site-packages 到 build/ ===="
90 | Copy-Item -Path "./dep_build/venv/Lib/site-packages" -Destination "./build/site-packages" -Recurse -Force
91 |
92 | Write-Host "==== 复制 script/_pystand_static.int 到 build/ ===="
93 | $staticFile = "./script/_pystand_static.int"
94 | $destStatic = "./build/_pystand_static.int"
95 | if (Test-Path $staticFile) {
96 | Copy-Item -Path $staticFile -Destination $destStatic -Force
97 | } else {
98 | Write-Host "错误: script/_pystand_static.int 未找到!"
99 | exit 1
100 | }
101 |
102 | uv run --active babeldoc --generate-offline-assets ./build
103 |
104 | - name: Upload build artifact
105 | uses: actions/upload-artifact@v4
106 | with:
107 | name: win64-exe
108 | path: ./build
109 | if-no-files-found: error
110 | compression-level: 9
111 | include-hidden-files: true
112 |
113 | test-win64-exe:
114 | needs:
115 | - build-win64-exe
116 | runs-on: windows-latest
117 | steps:
118 | - name: 检出代码
119 | uses: actions/checkout@v4
120 |
121 | - name: Download build artifact
122 | uses: actions/download-artifact@v4
123 | with:
124 | name: win64-exe
125 | path: ./build
126 |
127 | - name: Test show version
128 | run: |
129 | ./build/pdf2zh.exe --version
130 |
131 | - name: Test - Translate a PDF file with plain text only
132 | run: |
133 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file
134 |
135 | - name: Test - Translate a PDF file figure
136 | run: |
137 | ./build/pdf2zh.exe ./test/file/translate.cli.text.with.figure.pdf -o ./test/file
138 |
139 | - name: Delete offline assets and cache
140 | shell: pwsh
141 | run: |
142 | Write-Host "==== 查找并删除离线资源包 ===="
143 | $offlineAssetsPath = Get-ChildItem -Path "./build" -Filter "offline_assets_*.zip" -Recurse | Select-Object -First 1 -ExpandProperty FullName
144 | if ($offlineAssetsPath) {
145 | Write-Host "找到离线资源包: $offlineAssetsPath"
146 | Remove-Item -Path $offlineAssetsPath -Force
147 | Write-Host "已删除离线资源包"
148 | } else {
149 | Write-Host "未找到离线资源包"
150 | }
151 |
152 | Write-Host "==== 删除缓存目录 ===="
153 | $cachePath = "$env:USERPROFILE/.cache/babeldoc"
154 | if (Test-Path $cachePath) {
155 | Remove-Item -Path $cachePath -Recurse -Force
156 | Write-Host "已删除缓存目录: $cachePath"
157 | } else {
158 | Write-Host "缓存目录不存在: $cachePath"
159 | }
160 |
161 | - name: Test - Translate without offline assets
162 | run: |
163 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file
164 |
165 | - name: Upload test results
166 | uses: actions/upload-artifact@v4
167 | with:
168 | name: test-results
169 | path: ./test/file/
170 |
171 |
--------------------------------------------------------------------------------
/.github/workflows/fork-build.yml:
--------------------------------------------------------------------------------
1 | name: fork-build
2 |
3 | on:
4 | workflow_dispatch:
5 | # debug purpose
6 | # push:
7 |
8 | env:
9 | REGISTRY: ghcr.io
10 | REPO_LOWER: ${{ github.repository_owner }}/${{ github.event.repository.name }}
11 | GHCR_REPO: ghcr.io/${{ github.repository }}
12 | WIN_EXE_PYTHON_VERSION: 3.12.9
13 | jobs:
14 | check-repository:
15 | name: Check if running in main repository
16 | runs-on: ubuntu-latest
17 | outputs:
18 | is_main_repo: ${{ github.repository == 'Byaidu/PDFMathTranslate' }}
19 | steps:
20 | - run: echo "Running repository check"
21 |
22 | test:
23 | uses: ./.github/workflows/python-test.yml
24 | needs: check-repository
25 | if: needs.check-repository.outputs.is_main_repo != 'true'
26 |
27 | build:
28 | strategy:
29 | fail-fast: false
30 | matrix:
31 | include:
32 | - platform: linux/amd64
33 | runner: ubuntu-latest
34 | - platform: linux/arm64
35 | runner: ubuntu-24.04-arm
36 | runs-on: ${{ matrix.runner }}
37 | needs:
38 | - check-repository
39 | - test
40 | if: needs.check-repository.outputs.is_main_repo != 'true'
41 | permissions:
42 | contents: read
43 | packages: write
44 |
45 | steps:
46 | - name: Convert to lowercase
47 | run: |
48 | echo "GHCR_REPO_LOWER=$(echo ${{ env.GHCR_REPO }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
49 |
50 | - name: Prepare
51 | run: |
52 | platform=${{ matrix.platform }}
53 | echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
54 |
55 | - name: Checkout repository
56 | uses: actions/checkout@v4
57 |
58 | - name: Docker meta
59 | id: meta
60 | uses: docker/metadata-action@v5
61 | with:
62 | images: |
63 | ${{ env.GHCR_REPO_LOWER }}
64 |
65 | - name: Login to GHCR
66 | uses: docker/login-action@v3
67 | with:
68 | registry: ghcr.io
69 | username: ${{ github.repository_owner }}
70 | password: ${{ secrets.GITHUB_TOKEN }}
71 |
72 |
73 | - name: Set up Docker Buildx
74 | uses: docker/setup-buildx-action@v3
75 |
76 | - name: Build and push by digest
77 | id: build
78 | uses: docker/build-push-action@v6
79 | with:
80 | platforms: ${{ matrix.platform }}
81 | labels: ${{ steps.meta.outputs.labels }}
82 | outputs: type=image,name=${{ env.GHCR_REPO_LOWER }},push-by-digest=true,name-canonical=true,push=true
83 | cache-from: ${{ matrix.platform == 'linux/amd64' && 'type=gha' || '' }}
84 | cache-to: ${{ matrix.platform == 'linux/amd64' && 'type=gha,mode=max' || '' }}
85 |
86 | - name: Export digest
87 | run: |
88 | mkdir -p ${{ runner.temp }}/digests
89 | digest="${{ steps.build.outputs.digest }}"
90 | touch "${{ runner.temp }}/digests/${digest#sha256:}"
91 |
92 | - name: Upload digest
93 | uses: actions/upload-artifact@v4
94 | with:
95 | name: digests-${{ env.PLATFORM_PAIR }}
96 | path: ${{ runner.temp }}/digests/*
97 | if-no-files-found: error
98 | retention-days: 1
99 |
100 | merge:
101 | runs-on: ubuntu-latest
102 | needs:
103 | - check-repository
104 | - test
105 | - build
106 | if: needs.check-repository.outputs.is_main_repo != 'true'
107 | permissions:
108 | contents: read
109 | packages: write
110 |
111 | steps:
112 | - name: Convert to lowercase
113 | run: |
114 | echo "GHCR_REPO_LOWER=$(echo ${{ env.GHCR_REPO }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
115 |
116 | - name: Download digests
117 | uses: actions/download-artifact@v4
118 | with:
119 | path: ${{ runner.temp }}/digests
120 | pattern: digests-*
121 | merge-multiple: true
122 |
123 | - name: Login to GHCR
124 | uses: docker/login-action@v3
125 | with:
126 | registry: ghcr.io
127 | username: ${{ github.repository_owner }}
128 | password: ${{ secrets.GITHUB_TOKEN }}
129 |
130 | - name: Set up Docker Buildx
131 | uses: docker/setup-buildx-action@v3
132 |
133 | - name: Docker meta
134 | id: meta
135 | uses: docker/metadata-action@v5
136 | with:
137 | images: |
138 | ${{ env.GHCR_REPO_LOWER }}
139 | tags: |
140 | type=raw,value=dev
141 | type=semver,pattern={{version}}
142 | type=semver,pattern={{major}}.{{minor}}
143 |
144 | - name: Create manifest list and push
145 | working-directory: ${{ runner.temp }}/digests
146 | run: |
147 | docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
148 | $(printf '${{ env.GHCR_REPO_LOWER }}@sha256:%s ' *)
149 |
150 | - name: Inspect image
151 | run: |
152 | docker buildx imagetools inspect ${{ env.GHCR_REPO_LOWER }}:${{ steps.meta.outputs.version }}
153 |
154 | build-win64-exe:
155 | runs-on: windows-latest
156 | needs:
157 | - check-repository
158 | if: needs.check-repository.outputs.is_main_repo != 'true'
159 | steps:
160 | - name: 检出代码
161 | uses: actions/checkout@v4
162 |
163 | - name: Setup uv with Python ${{ env.WIN_EXE_PYTHON_VERSION }}
164 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1
165 | with:
166 | python-version: ${{ env.WIN_EXE_PYTHON_VERSION }}
167 | enable-cache: true
168 | cache-dependency-glob: "pyproject.toml"
169 |
170 | - name: 执行所有任务(创建目录、下载、解压、复制文件、安装依赖)
171 | shell: pwsh
172 | run: |
173 | Write-Host "==== 创建必要的目录 ===="
174 | New-Item -Path "./build" -ItemType Directory -Force
175 | New-Item -Path "./build/runtime" -ItemType Directory -Force
176 | New-Item -Path "./dep_build" -ItemType Directory -Force
177 |
178 | Write-Host "==== 复制代码到 dep_build ===="
179 | Get-ChildItem -Path "./" -Exclude "dep_build", "build" | Copy-Item -Destination "./dep_build" -Recurse -Force
180 |
181 | Write-Host "==== 下载并解压 Python ${{ env.WIN_EXE_PYTHON_VERSION }} ===="
182 | Write-Host "pythonUrl: https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip"
183 | $pythonUrl = "https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip"
184 | $pythonZip = "./dep_build/python.zip"
185 | Invoke-WebRequest -Uri $pythonUrl -OutFile $pythonZip
186 | Expand-Archive -Path $pythonZip -DestinationPath "./build/runtime" -Force
187 |
188 | Write-Host "==== 下载 Visual C++ Redistributable 安装程序 ===="
189 | $vcRedistUrl = "https://aka.ms/vs/17/release/vc_redist.x64.exe"
190 | $vcRedistPath = "./build/无法运行请安装vc_redist.x64.exe"
191 | Invoke-WebRequest -Uri $vcRedistUrl -OutFile $vcRedistPath
192 | Write-Host "已下载 Visual C++ Redistributable 安装程序到: $vcRedistPath"
193 |
194 | Write-Host "==== 下载并解压 PyStand ===="
195 | $pystandUrl = "https://github.com/skywind3000/PyStand/releases/download/1.1.4/PyStand-v1.1.4-exe.zip"
196 | $pystandZip = "./dep_build/PyStand.zip"
197 | Invoke-WebRequest -Uri $pystandUrl -OutFile $pystandZip
198 | Expand-Archive -Path $pystandZip -DestinationPath "./dep_build/PyStand" -Force
199 |
200 | Write-Host "==== 复制 PyStand.exe 到 build 并重命名 ===="
201 | $pystandExe = "./dep_build/PyStand/PyStand-x64-CLI/PyStand.exe"
202 | $destExe = "./build/pdf2zh.exe"
203 | if (Test-Path $pystandExe) {
204 | Copy-Item -Path $pystandExe -Destination $destExe -Force
205 | } else {
206 | Write-Host "错误: PyStand.exe 未找到!"
207 | exit 1
208 | }
209 | Write-Host "==== 创建 Python venv 在 dep_build ===="
210 | uv venv ./dep_build/venv
211 |
212 | ./dep_build/venv/Scripts/activate
213 |
214 | Write-Host "==== 在 venv 环境中安装项目依赖 ===="
215 | uv pip install .
216 |
217 | Write-Host "==== 复制 venv/Lib/site-packages 到 build/ ===="
218 | Copy-Item -Path "./dep_build/venv/Lib/site-packages" -Destination "./build/site-packages" -Recurse -Force
219 |
220 | Write-Host "==== 复制 script/_pystand_static.int 到 build/ ===="
221 | $staticFile = "./script/_pystand_static.int"
222 | $destStatic = "./build/_pystand_static.int"
223 | if (Test-Path $staticFile) {
224 | Copy-Item -Path $staticFile -Destination $destStatic -Force
225 | } else {
226 | Write-Host "错误: script/_pystand_static.int 未找到!"
227 | exit 1
228 | }
229 |
230 | - name: Upload build artifact
231 | uses: actions/upload-artifact@v4
232 | with:
233 | name: win64-exe
234 | path: ./build
235 | if-no-files-found: error
236 | compression-level: 1
237 | include-hidden-files: true
238 |
239 | test-win64-exe:
240 | needs:
241 | - build-win64-exe
242 | - check-repository
243 | if: needs.check-repository.outputs.is_main_repo != 'true'
244 | runs-on: windows-latest
245 | steps:
246 | - name: 检出代码
247 | uses: actions/checkout@v4
248 |
249 | - name: Download build artifact
250 | uses: actions/download-artifact@v4
251 | with:
252 | name: win64-exe
253 | path: ./build
254 |
255 | - name: Test show version (online mode)
256 | run: |
257 | ./build/pdf2zh.exe --version
258 |
259 | - name: Test - Translate a PDF file with plain text only (online mode)
260 | run: |
261 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file
262 |
263 | - name: Test - Translate a PDF file figure
264 | run: |
265 | ./build/pdf2zh.exe ./test/file/translate.cli.text.with.figure.pdf -o ./test/file
266 |
267 | - name: Test - Translate without offline assets (online mode)
268 | run: |
269 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file
270 |
271 | - name: Upload test results
272 | uses: actions/upload-artifact@v4
273 | with:
274 | name: test-results
275 | path: ./test/file/
276 | if-no-files-found: error
277 |
278 | - name: Setup uv with Python ${{ env.WIN_EXE_PYTHON_VERSION }}
279 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1
280 | with:
281 | python-version: ${{ env.WIN_EXE_PYTHON_VERSION }}
282 | enable-cache: true
283 | cache-dependency-glob: "pyproject.toml"
284 |
285 | - name: Generate offline assets
286 | shell: pwsh
287 | run: |
288 | Write-Host "==== 生成离线资源包 ===="
289 | uv run --active babeldoc --generate-offline-assets ./build
290 |
291 | - name: Delete cache
292 | shell: pwsh
293 | run: |
294 | Write-Host "==== 删除缓存目录 ===="
295 | $cachePath = "$env:USERPROFILE/.cache/babeldoc"
296 | if (Test-Path $cachePath) {
297 | Remove-Item -Path $cachePath -Recurse -Force
298 | Write-Host "已删除缓存目录: $cachePath"
299 | } else {
300 | Write-Host "缓存目录不存在: $cachePath"
301 | }
302 |
303 | - name: Test - Translate with offline assets (offline mode)
304 | run: |
305 | Write-Host "==== 测试离线资源包 ===="
306 | New-Item -Path "./test/file/offline_result" -ItemType Directory -Force
307 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file/offline_result
308 |
309 | - name: Upload offline test results
310 | uses: actions/upload-artifact@v4
311 | with:
312 | name: offline-test-results
313 | path: ./test/file/offline_result/
314 | if-no-files-found: error
315 |
316 | - name: Upload build with offline assets artifact
317 | uses: actions/upload-artifact@v4
318 | with:
319 | name: win64-exe-with-assets
320 | path: ./build
321 | if-no-files-found: error
322 | compression-level: 1
323 | include-hidden-files: true
--------------------------------------------------------------------------------
/.github/workflows/fork-test.yml:
--------------------------------------------------------------------------------
1 | name: fork-test
2 |
3 | on:
4 | push:
5 | branches: [ "main", "master" ]
6 |
7 | env:
8 | REGISTRY: ghcr.io
9 | REPO_LOWER: ${{ github.repository_owner }}/${{ github.event.repository.name }}
10 | GHCR_REPO: ghcr.io/${{ github.repository }}
11 | WIN_EXE_PYTHON_VERSION: 3.12.9
12 | jobs:
13 | check-repository:
14 | name: Check if running in main repository
15 | runs-on: ubuntu-latest
16 | outputs:
17 | is_main_repo: ${{ github.repository == 'Byaidu/PDFMathTranslate' }}
18 | steps:
19 | - run: echo "Running repository check"
20 |
21 | test:
22 | uses: ./.github/workflows/python-test.yml
23 | needs: check-repository
24 | if: needs.check-repository.outputs.is_main_repo != 'true'
--------------------------------------------------------------------------------
/.github/workflows/python-test.yml:
--------------------------------------------------------------------------------
1 | name: Test and Build Python Package
2 |
3 | on:
4 | push:
5 | branches:
6 | - '**'
7 | - '!main'
8 | - '!master'
9 | pull_request:
10 | workflow_call:
11 |
12 | jobs:
13 | build-and-test:
14 | runs-on: ${{ matrix.runner }}
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | python-version: ["3.10", "3.11", "3.12"]
19 | runner:
20 | - ubuntu-latest
21 | - ubuntu-24.04-arm
22 | steps:
23 | - name: checkout babeldoc metadata
24 | uses: actions/checkout@v4
25 | with:
26 | repository: funstory-ai/BabelDOC
27 | path: babeldoctemp1234567
28 | token: ${{ secrets.GITHUB_TOKEN }}
29 | sparse-checkout: babeldoc/assets/embedding_assets_metadata.py
30 | - name: Cached Assets
31 | id: cache-assets
32 | uses: actions/cache@v4.2.2
33 | with:
34 | path: ~/.cache/babeldoc
35 | key: test-1-babeldoc-assets-${{ hashFiles('babeldoctemp1234567/babeldoc/assets/embedding_assets_metadata.py') }}
36 | - uses: actions/checkout@v4
37 | - name: Setup uv with Python ${{ matrix.python-version }}
38 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1
39 | with:
40 | python-version: ${{ matrix.python-version }}
41 | enable-cache: true
42 | cache-dependency-glob: "pyproject.toml"
43 | - name: Install dependencies
44 | run: |
45 | uv sync
46 |
47 | - name: Test - Unit Test
48 | run: |
49 | uv run pytest .
50 |
51 | - name: Test - Translate a PDF file with plain text only
52 | run: uv run pdf2zh ./test/file/translate.cli.plain.text.pdf -o ./test/file
53 |
54 | - name: Test - Translate a PDF file figure
55 | run: uv run pdf2zh ./test/file/translate.cli.text.with.figure.pdf -o ./test/file
56 |
57 | # - name: Test - Translate a PDF file with unknown font
58 | # run:
59 | # pdf2zh ./test/file/translate.cli.font.unknown.pdf
60 |
61 | - name: Test - Start GUI and exit
62 | run: timeout 10 uv run pdf2zh -i || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
63 |
64 | - name: Build as a package
65 | run: uv build
66 |
67 | - name: Upload test results
68 | uses: actions/upload-artifact@v4
69 | with:
70 | name: test-results-${{ matrix.python-version }}-${{ matrix.runner }}
71 | path: ./test/file/
72 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | pdf2zh_files
2 | gui/pdf2zh_files
3 | gradio_files
4 | tmp
5 | gui/gradio_files
6 | gui/tmp
7 | # Byte-compiled / optimized / DLL files
8 | __pycache__/
9 | *.py[cod]
10 | *$py.class
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .nox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | *.py,cover
56 | .hypothesis/
57 | .pytest_cache/
58 | cover/
59 |
60 | # Translations
61 | *.mo
62 | *.pot
63 |
64 | # Django stuff:
65 | *.log
66 | local_settings.py
67 | db.sqlite3
68 | db.sqlite3-journal
69 |
70 | # Flask stuff:
71 | instance/
72 | .webassets-cache
73 |
74 | # Scrapy stuff:
75 | .scrapy
76 |
77 | # Sphinx documentation
78 | docs/_build/
79 |
80 | # PyBuilder
81 | .pybuilder/
82 | target/
83 |
84 | # Jupyter Notebook
85 | .ipynb_checkpoints
86 |
87 | # IPython
88 | profile_default/
89 | ipython_config.py
90 |
91 | # pyenv
92 | # For a library or package, you might want to ignore these files since the code is
93 | # intended to run in multiple environments; otherwise, check them in:
94 | # .python-version
95 |
96 | # pipenv
97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | # install all needed dependencies.
101 | #Pipfile.lock
102 |
103 | # poetry
104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | # This is especially recommended for binary packages to ensure reproducibility, and is more
106 | # commonly ignored for libraries.
107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 |
110 | # pdm
111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | # in version control.
115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 |
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 |
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 |
127 | # SageMath parsed files
128 | *.sage.py
129 |
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | pdf2zh-dev/
139 |
140 | # Spyder project settings
141 | .spyderproject
142 | .spyproject
143 |
144 | # Rope project settings
145 | .ropeproject
146 |
147 | # mkdocs documentation
148 | /site
149 |
150 | # mypy
151 | .mypy_cache/
152 | .dmypy.json
153 | dmypy.json
154 |
155 | # Pyre type checker
156 | .pyre/
157 |
158 | # pytype static type analyzer
159 | .pytype/
160 |
161 | # Cython debug symbols
162 | cython_debug/
163 |
164 | # PyCharm
165 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
167 | # and can be added to the global gitignore or merged into this file. For a more nuclear
168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
169 | .idea/
170 | .vscode
171 | .DS_Store
172 | uv.lock
173 | *.pdf
174 | *.docx
175 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | files: '^.*\.py$'
4 | repos:
5 | - repo: local
6 | hooks:
7 | - id: black
8 | name: black
9 | entry: black --check --diff --color
10 | language: python
11 | - id: flake8
12 | name: flake8
13 | entry: flake8 --ignore E203,E261,E501,W503,E741
14 | language: python
15 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
2 |
3 | WORKDIR /app
4 |
5 |
6 | EXPOSE 7860
7 |
8 | ENV PYTHONUNBUFFERED=1
9 |
10 | # # Download all required fonts
11 | # ADD "https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf" /app/
12 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifCN-Regular.ttf" /app/
13 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifTW-Regular.ttf" /app/
14 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifJP-Regular.ttf" /app/
15 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifKR-Regular.ttf" /app/
16 |
17 | RUN apt-get update && \
18 | apt-get install --no-install-recommends -y libgl1 libglib2.0-0 libxext6 libsm6 libxrender1 && \
19 | rm -rf /var/lib/apt/lists/*
20 |
21 | COPY pyproject.toml .
22 | RUN uv pip install --system --no-cache -r pyproject.toml && babeldoc --version && babeldoc --warmup
23 |
24 | COPY . .
25 |
26 | RUN uv pip install --system --no-cache . && uv pip install --system --no-cache -U babeldoc "pymupdf<1.25.3" && babeldoc --version && babeldoc --warmup
27 |
28 | CMD ["pdf2zh", "-i"]
29 |
--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "PDFMathTranslate",
3 | "description": "PDF scientific paper translation and bilingual comparison.",
4 | "repository": "https://github.com/Byaidu/PDFMathTranslate"
5 | }
--------------------------------------------------------------------------------
/docs/ADVANCED.md:
--------------------------------------------------------------------------------
1 | [**Documentation**](https://github.com/Byaidu/PDFMathTranslate) > **Advanced Usage** _(current)_
2 |
3 | ---
4 |
5 |
Table of Contents
6 |
7 | - [Full / partial translation](#partial)
8 | - [Specify source and target languages](#language)
9 | - [Translate with different services](#services)
10 | - [Translate wih exceptions](#exceptions)
11 | - [Multi-threads](#threads)
12 | - [Custom prompt](#prompt)
13 | - [Authorization](#auth)
14 | - [Custom configuration file](#cofig)
15 | - [Fonts Subseting](#fonts-subset)
16 | - [Translation cache](#cache)
17 |
18 | ---
19 |
20 | Full / partial translation
21 |
22 | - Entire document
23 |
24 | ```bash
25 | pdf2zh example.pdf
26 | ```
27 |
28 | - Part of the document
29 |
30 | ```bash
31 | pdf2zh example.pdf -p 1-3,5
32 | ```
33 |
34 | [⬆️ Back to top](#toc)
35 |
36 | ---
37 |
38 | Specify source and target languages
39 |
40 | See [Google Languages Codes](https://developers.google.com/admin-sdk/directory/v1/languages), [DeepL Languages Codes](https://developers.deepl.com/docs/resources/supported-languages)
41 |
42 | ```bash
43 | pdf2zh example.pdf -li en -lo ja
44 | ```
45 |
46 | [⬆️ Back to top](#toc)
47 |
48 | ---
49 |
50 | Translate with different services
51 |
52 | We've provided a detailed table on the required [environment variables](https://chatgpt.com/share/6734a83d-9d48-800e-8a46-f57ca6e8bcb4) for each translation service. Make sure to set them before using the respective service.
53 |
54 | | **Translator** | **Service** | **Environment Variables** | **Default Values** | **Notes** |
55 | |----------------------|----------------|-----------------------------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
56 | | **Google (Default)** | `google` | None | N/A | None |
57 | | **Bing** | `bing` | None | N/A | None |
58 | | **DeepL** | `deepl` | `DEEPL_AUTH_KEY` | `[Your Key]` | See [DeepL](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API) |
59 | | **DeepLX** | `deeplx` | `DEEPLX_ENDPOINT` | `https://api.deepl.com/translate` | See [DeepLX](https://github.com/OwO-Network/DeepLX) |
60 | | **Ollama** | `ollama` | `OLLAMA_HOST`, `OLLAMA_MODEL` | `http://127.0.0.1:11434`, `gemma2` | See [Ollama](https://github.com/ollama/ollama) |
61 | | **Xinference** | `xinference` | `XINFERENCE_HOST`, `XINFERENCE_MODEL` | `http://127.0.0.1:9997`, `gemma-2-it` | See [Xinference](https://github.com/xorbitsai/inference) |
62 | | **OpenAI** | `openai` | `OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL` | `https://api.openai.com/v1`, `[Your Key]`, `gpt-4o-mini` | See [OpenAI](https://platform.openai.com/docs/overview) |
63 | | **AzureOpenAI** | `azure-openai` | `AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_MODEL` | `[Your Endpoint]`, `[Your Key]`, `gpt-4o-mini` | See [Azure OpenAI](https://learn.microsoft.com/zh-cn/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line%2Cjavascript-keyless%2Ctypescript-keyless%2Cpython&pivots=programming-language-python) |
64 | | **Zhipu** | `zhipu` | `ZHIPU_API_KEY`, `ZHIPU_MODEL` | `[Your Key]`, `glm-4-flash` | See [Zhipu](https://open.bigmodel.cn/dev/api/thirdparty-frame/openai-sdk) |
65 | | **ModelScope** | `ModelScope` | `MODELSCOPE_API_KEY`, `MODELSCOPE_MODEL` | `[Your Key]`, `Qwen/Qwen2.5-Coder-32B-Instruct` | See [ModelScope](https://www.modelscope.cn/docs/model-service/API-Inference/intro) |
66 | | **Silicon** | `silicon` | `SILICON_API_KEY`, `SILICON_MODEL` | `[Your Key]`, `Qwen/Qwen2.5-7B-Instruct` | See [SiliconCloud](https://docs.siliconflow.cn/quickstart) |
67 | | **Gemini** | `gemini` | `GEMINI_API_KEY`, `GEMINI_MODEL` | `[Your Key]`, `gemini-1.5-flash` | See [Gemini](https://ai.google.dev/gemini-api/docs/openai) |
68 | | **Azure** | `azure` | `AZURE_ENDPOINT`, `AZURE_API_KEY` | `https://api.translator.azure.cn`, `[Your Key]` | See [Azure](https://docs.azure.cn/en-us/ai-services/translator/text-translation-overview) |
69 | | **Tencent** | `tencent` | `TENCENTCLOUD_SECRET_ID`, `TENCENTCLOUD_SECRET_KEY` | `[Your ID]`, `[Your Key]` | See [Tencent](https://www.tencentcloud.com/products/tmt?from_qcintl=122110104) |
70 | | **Dify** | `dify` | `DIFY_API_URL`, `DIFY_API_KEY` | `[Your DIFY URL]`, `[Your Key]` | See [Dify](https://github.com/langgenius/dify),Three variables, lang_out, lang_in, and text, need to be defined in Dify's workflow input. |
71 | | **AnythingLLM** | `anythingllm` | `AnythingLLM_URL`, `AnythingLLM_APIKEY` | `[Your AnythingLLM URL]`, `[Your Key]` | See [anything-llm](https://github.com/Mintplex-Labs/anything-llm) |
72 | |**Argos Translate**|`argos`| | |See [argos-translate](https://github.com/argosopentech/argos-translate)|
73 | |**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |See [Grok](https://docs.x.ai/docs/overview)|
74 | |**Groq**|`groq`| `GROQ_API_KEY`, `GROQ_MODEL` | `[Your GROQ_API_KEY]`, `llama-3-3-70b-versatile` |See [Groq](https://console.groq.com/docs/models)|
75 | |**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |See [DeepSeek](https://www.deepseek.com/)|
76 | |**OpenAI-Liked**|`openailiked`| `OPENAILIKED_BASE_URL`, `OPENAILIKED_API_KEY`, `OPENAILIKED_MODEL` | `url`, `[Your Key]`, `model name` | None |
77 | |**Ali Qwen Translation**|`qwen-mt`| `ALI_MODEL`, `ALI_API_KEY`, `ALI_DOMAINS` | `qwen-mt-turbo`, `[Your Key]`, `scientific paper` | Tranditional Chinese are not yet supported, it will be translated into Simplified Chinese. More see [Qwen MT](https://bailian.console.aliyun.com/?spm=5176.28197581.0.0.72e329a4HRxe99#/model-market/detail/qwen-mt-turbo) |
78 |
79 | For large language models that are compatible with the OpenAI API but not listed in the table above, you can set environment variables using the same method outlined for OpenAI in the table.
80 |
81 | Use `-s service` or `-s service:model` to specify service:
82 |
83 | ```bash
84 | pdf2zh example.pdf -s openai:gpt-4o-mini
85 | ```
86 |
87 | Or specify model with environment variables:
88 |
89 | ```bash
90 | set OPENAI_MODEL=gpt-4o-mini
91 | pdf2zh example.pdf -s openai
92 | ```
93 |
94 | For PowerShell user:
95 |
96 | ```shell
97 | $env:OPENAI_MODEL = gpt-4o-mini
98 | pdf2zh example.pdf -s openai
99 | ```
100 |
101 | [⬆️ Back to top](#toc)
102 |
103 | ---
104 |
105 | Translate wih exceptions
106 |
107 | Use regex to specify formula fonts and characters that need to be preserved:
108 |
109 | ```bash
110 | pdf2zh example.pdf -f "(CM[^RT].*|MS.*|.*Ital)" -c "(\(|\||\)|\+|=|\d|[\u0080-\ufaff])"
111 | ```
112 |
113 | Preserve `Latex`, `Mono`, `Code`, `Italic`, `Symbol` and `Math` fonts by default:
114 |
115 | ```bash
116 | pdf2zh example.pdf -f "(CM[^R]|MS.M|XY|MT|BL|RM|EU|LA|RS|LINE|LCIRCLE|TeX-|rsfs|txsy|wasy|stmary|.*Mono|.*Code|.*Ital|.*Sym|.*Math)"
117 | ```
118 |
119 | [⬆️ Back to top](#toc)
120 |
121 | ---
122 |
123 | Multi-threads
124 |
125 | Use `-t` to specify how many threads to use in translation:
126 |
127 | ```bash
128 | pdf2zh example.pdf -t 1
129 | ```
130 |
131 | [⬆️ Back to top](#toc)
132 |
133 | ---
134 |
135 | Custom prompt
136 |
137 | Note: System prompt is currently not supported. See [this change](https://github.com/Byaidu/PDFMathTranslate/pull/637).
138 |
139 | Use `--prompt` to specify which prompt to use in llm:
140 |
141 | ```bash
142 | pdf2zh example.pdf --prompt prompt.txt
143 | ```
144 |
145 | For example:
146 |
147 | ```txt
148 | You are a professional, authentic machine translation engine. Only Output the translated text, do not include any other text.
149 |
150 | Translate the following markdown source text to ${lang_out}. Keep the formula notation {v*} unchanged. Output translation directly without any additional text.
151 |
152 | Source Text: ${text}
153 |
154 | Translated Text:
155 | ```
156 |
157 | In custom prompt file, there are three variables can be used.
158 |
159 | |**variables**|**comment**|
160 | |-|-|
161 | |`lang_in`|input language|
162 | |`lang_out`|output language|
163 | |`text`|text need to be translated|
164 |
165 | [⬆️ Back to top](#toc)
166 |
167 | ---
168 |
169 | Authorization
170 |
171 | Use `--authorized` to specify which user to use Web UI and custom the login page:
172 |
173 | ```bash
174 | pdf2zh example.pdf --authorized users.txt auth.html
175 | ```
176 |
177 | example users.txt
178 | Each line contains two elements, username, and password, separated by a comma.
179 |
180 | ```
181 | admin,123456
182 | user1,password1
183 | user2,abc123
184 | guest,guest123
185 | test,test123
186 | ```
187 |
188 | example auth.html
189 |
190 | ```html
191 |
192 |
193 |
194 | Simple HTML
195 |
196 |
197 | Hello, World!
198 | Welcome to my simple HTML page.
199 |
200 |
201 | ```
202 |
203 | [⬆️ Back to top](#toc)
204 |
205 | ---
206 |
207 | Custom configuration file
208 |
209 | Use `--config` to specify which file to configure the PDFMathTranslate:
210 |
211 | ```bash
212 | pdf2zh example.pdf --config config.json
213 | ```
214 |
215 | ```bash
216 | pdf2zh -i --config config.json
217 | ```
218 |
219 | example config.json
220 |
221 | ```json
222 | {
223 | "USE_MODELSCOPE": "0",
224 | "PDF2ZH_LANG_FROM": "English",
225 | "PDF2ZH_LANG_TO": "Simplified Chinese",
226 | "NOTO_FONT_PATH": "/app/SourceHanSerifCN-Regular.ttf",
227 | "translators": [
228 | {
229 | "name": "deeplx",
230 | "envs": {
231 | "DEEPLX_ENDPOINT": "http://localhost:1188/translate/",
232 | "DEEPLX_ACCESS_TOKEN": null
233 | }
234 | },
235 | {
236 | "name": "ollama",
237 | "envs": {
238 | "OLLAMA_HOST": "http://127.0.0.1:11434",
239 | "OLLAMA_MODEL": "gemma2"
240 | }
241 | }
242 | ]
243 | }
244 | ```
245 |
246 | By default, the config file is saved in the `~/.config/PDFMathTranslate/config.json`. The program will start by reading the contents of config.json, and after that it will read the contents of the environment variables. When an environment variable is available, the contents of the environment variable are used first and the file is updated.
247 |
248 | [⬆️ Back to top](#toc)
249 |
250 | ---
251 |
252 | Fonts subsetting
253 |
254 | By default, PDFMathTranslate uses fonts subsetting to decrease sizes of output files. You can use `--skip-subset-fonts` option to disable fonts subsetting when encoutering compatibility issues.
255 |
256 | ```bash
257 | pdf2zh example.pdf --skip-subset-fonts
258 | ```
259 |
260 | [⬆️ Back to top](#toc)
261 |
262 | ---
263 |
264 | Translation cache
265 |
266 | PDFMathTranslate caches translated texts to increase speed and avoid unnecessary API calls for same contents. You can use `--ignore-cache` option to ignore translation cache and force retranslation.
267 |
268 | ```bash
269 | pdf2zh example.pdf --ignore-cache
270 | ```
271 |
272 | [⬆️ Back to top](#toc)
273 |
274 | ---
275 |
276 | Deployment as a public services
277 |
278 | PDFMathTranslate has added the features of **enabling partial services** and **hiding Backend information** in
279 | the configuration file. You can enable these by setting `ENABLED_SERVICES` and `HIDDEN_GRADIO_DETAILS` in the
280 | configuration file. Among them:
281 |
282 | - `ENABLED_SERVICES` allows you to choose to enable only certain options, limiting the number of available services.
283 | - `HIDDEN_GRADIO_DETAILS` will hide the real API_KEY on the web, preventing users from obtaining server-side keys.
284 |
285 | A usable configuration is as follows:
286 |
287 | ```json
288 | {
289 | "USE_MODELSCOPE": "0",
290 | "translators": [
291 | {
292 | "name": "grok",
293 | "envs": {
294 | "GORK_API_KEY": null,
295 | "GORK_MODEL": "grok-2-1212"
296 | }
297 | },
298 | {
299 | "name": "openai",
300 | "envs": {
301 | "OPENAI_BASE_URL": "https://api.openai.com/v1",
302 | "OPENAI_API_KEY": "sk-xxxx",
303 | "OPENAI_MODEL": "gpt-4o-mini"
304 | }
305 | }
306 | ],
307 | "ENABLED_SERVICES": [
308 | "OpenAI",
309 | "Grok"
310 | ],
311 | "HIDDEN_GRADIO_DETAILS": true,
312 | "PDF2ZH_LANG_FROM": "English",
313 | "PDF2ZH_LANG_TO": "Simplified Chinese",
314 | "NOTO_FONT_PATH": "/app/SourceHanSerifCN-Regular.ttf"
315 | }
316 | ```
317 |
318 | [⬆️ Back to top](#toc)
319 |
320 |
321 | ---
322 |
323 | MCP
324 |
325 | PDFMathTranslate can run as MCP server. To use this, you need to run `uv pip install pdf2zh`, and config `claude_desktop_config.json`, an example config is as follows:
326 |
327 | ``` json
328 | {
329 | "mcpServers": {
330 | "filesystem": {
331 | "command": "npx",
332 | "args": [
333 | "-y",
334 | "@modelcontextprotocol/server-filesystem",
335 | "/path/to/Document"
336 | ]
337 | },
338 | "translate_pdf": {
339 | "command": "uv",
340 | "args": [
341 | "run",
342 | "pdf2zh",
343 | "--mcp"
344 | ]
345 | }
346 | }
347 | }
348 | ```
349 |
350 | [filesystem](https://github.com/modelcontextprotocol/servers/tree/main/src/filesystem) is a reuqired mcp server to find pdf file, and `translate_pdf` is our mcp server.
351 |
352 | To test if the mcp server works, you can open claude desktop and tell
353 |
354 | ```
355 | find the `test.pdf` in my Document folder and translate it to Chinese
356 | ```
357 |
--------------------------------------------------------------------------------
/docs/APIS.md:
--------------------------------------------------------------------------------
1 | [**Documentation**](https://github.com/Byaidu/PDFMathTranslate) > **API Details** _(current)_
2 |
3 | Table of Content
4 | The present project supports two types of APIs, All methods need the Redis;
5 |
6 | - [Functional calls in Python](#api-python)
7 | - [HTTP protocols](#api-http)
8 |
9 | ---
10 |
11 | Python
12 |
13 | As `pdf2zh` is an installed module in Python, we expose two methods for other programs to call in any Python scripts.
14 |
15 | For example, if you want translate a document from English to Chinese using Google Translate, you may use the following code:
16 |
17 | ```python
18 | from pdf2zh import translate, translate_stream
19 |
20 | params = {
21 | 'lang_in': 'en',
22 | 'lang_out': 'zh',
23 | 'service': 'google',
24 | 'thread': 4,
25 | }
26 | ```
27 | Translate with files:
28 | ```python
29 | (file_mono, file_dual) = translate(files=['example.pdf'], **params)[0]
30 | ```
31 | Translate with stream:
32 | ```python
33 | with open('example.pdf', 'rb') as f:
34 | (stream_mono, stream_dual) = translate_stream(stream=f.read(), **params)
35 | ```
36 |
37 | [⬆️ Back to top](#toc)
38 |
39 | ---
40 |
41 | HTTP
42 |
43 | In a more flexible way, you can communicate with the program using HTTP protocols, if:
44 |
45 | 1. Install and run backend
46 |
47 | ```bash
48 | pip install pdf2zh[backend]
49 | pdf2zh --flask
50 | pdf2zh --celery worker
51 | ```
52 |
53 | 2. Using HTTP protocols as follows:
54 |
55 | - Submit translate task
56 |
57 | ```bash
58 | curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}"
59 | {"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"}
60 | ```
61 |
62 | - Check Progress
63 |
64 | ```bash
65 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a
66 | {"info":{"n":13,"total":506},"state":"PROGRESS"}
67 | ```
68 |
69 | - Check Progress _(if finished)_
70 |
71 | ```bash
72 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a
73 | {"state":"SUCCESS"}
74 | ```
75 |
76 | - Save monolingual file
77 |
78 | ```bash
79 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/mono --output example-mono.pdf
80 | ```
81 |
82 | - Save bilingual file
83 |
84 | ```bash
85 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/dual --output example-dual.pdf
86 | ```
87 |
88 | - Interrupt if running and delete the task
89 | ```bash
90 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a -X DELETE
91 | ```
92 |
93 | [⬆️ Back to top](#toc)
94 |
95 | ---
96 |
--------------------------------------------------------------------------------
/docs/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | aw@funstory.ai .
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/docs/README_GUI.md:
--------------------------------------------------------------------------------
1 | # Interact with GUI
2 |
3 | This subfolder provides the GUI mode of `pdf2zh`.
4 |
5 | ## Usage
6 |
7 | 1. Run `pdf2zh -i`
8 |
9 | 2. Drop the PDF file into the window and click `Translate`.
10 |
11 | ### Environment Variables
12 |
13 | You can set the source and target languages using environment variables:
14 |
15 | - `PDF2ZH_LANG_FROM`: Sets the source language. Defaults to "English".
16 | - `PDF2ZH_LANG_TO`: Sets the target language. Defaults to "Simplified Chinese".
17 |
18 | ### Supported Languages
19 |
20 | The following languages are supported:
21 |
22 | - English
23 | - Simplified Chinese
24 | - Traditional Chinese
25 | - French
26 | - German
27 | - Japanese
28 | - Korean
29 | - Russian
30 | - Spanish
31 | - Italian
32 |
33 | ## Preview
34 |
35 |
36 |
37 |
38 | ## Maintainance
39 |
40 | GUI maintained by [Rongxin](https://github.com/reycn)
41 |
--------------------------------------------------------------------------------
/docs/README_ja-JP.md:
--------------------------------------------------------------------------------
1 |
35 |
36 | 科学 PDF 文書の翻訳およびバイリンガル比較ツール
37 |
38 | - 📊 数式、チャート、目次、注釈を保持 *([プレビュー](#preview))*
39 | - 🌐 [複数の言語](#language) と [多様な翻訳サービス](#services) をサポート
40 | - 🤖 [コマンドラインツール](#usage)、[インタラクティブユーザーインターフェース](#gui)、および [Docker](#docker) を提供
41 |
42 | フィードバックは [GitHub Issues](https://github.com/Byaidu/PDFMathTranslate/issues)、[Telegram グループ](https://t.me/+Z9_SgnxmsmA5NzBl)
43 |
44 | 最近の更新
45 |
46 | - [2024年11月26日] CLIがオンラインファイルをサポートするようになりました *(by [@reycn](https://github.com/reycn))*
47 | - [2024年11月24日] 依存関係のサイズを削減するために [ONNX](https://github.com/onnx/onnx) サポートを追加しました *(by [@Wybxc](https://github.com/Wybxc))*
48 | - [2024年11月23日] 🌟 [公共サービス](#demo) がオンラインになりました! *(by [@Byaidu](https://github.com/Byaidu))*
49 | - [2024年11月23日] ウェブボットを防ぐためのファイアウォールを追加しました *(by [@Byaidu](https://github.com/Byaidu))*
50 | - [2024年11月22日] GUIがイタリア語をサポートし、改善されました *(by [@Byaidu](https://github.com/Byaidu), [@reycn](https://github.com/reycn))*
51 | - [2024年11月22日] デプロイされたサービスを他の人と共有できるようになりました *(by [@Zxis233](https://github.com/Zxis233))*
52 | - [2024年11月22日] Tencent翻訳をサポートしました *(by [@hellofinch](https://github.com/hellofinch))*
53 | - [2024年11月21日] GUIがバイリンガルドキュメントのダウンロードをサポートするようになりました *(by [@reycn](https://github.com/reycn))*
54 | - [2024年11月20日] 🌟 [デモ](#demo) がオンラインになりました! *(by [@reycn](https://github.com/reycn))*
55 |
56 | プレビュー
57 |
58 |
59 |
60 |
61 |
62 | 公共サービス 🌟
63 |
64 | ### 無料サービス ( )
65 |
66 | インストールなしで [公共サービス](https://pdf2zh.com/) をオンラインで試すことができます。
67 |
68 | ### デモ
69 |
70 | インストールなしで [HuggingFace上のデモ](https://huggingface.co/spaces/reycn/PDFMathTranslate-Docker), [ModelScope上のデモ](https://www.modelscope.cn/studios/AI-ModelScope/PDFMathTranslate) を試すことができます。
71 | デモの計算リソースは限られているため、乱用しないようにしてください。
72 |
73 | インストールと使用方法
74 |
75 | このプロジェクトを使用するための4つの方法を提供しています:[コマンドライン](#cmd)、[ポータブル](#portable)、[GUI](#gui)、および [Docker](#docker)。
76 |
77 | pdf2zhの実行には追加モデル(`wybxc/DocLayout-YOLO-DocStructBench-onnx`)が必要です。このモデルはModelScopeでも見つけることができます。起動時にこのモデルのダウンロードに問題がある場合は、以下の環境変数を使用してください:
78 |
79 | ```shell
80 | set HF_ENDPOINT=https://hf-mirror.com
81 | ```
82 |
83 | For PowerShell user:
84 | ```shell
85 | $env:HF_ENDPOINT = https://hf-mirror.com
86 | ```
87 |
88 | 方法1. コマンドライン
89 |
90 | 1. Pythonがインストールされていること (バージョン3.10 <= バージョン <= 3.12)
91 | 2. パッケージをインストールします:
92 |
93 | ```bash
94 | pip install pdf2zh
95 | ```
96 |
97 | 3. 翻訳を実行し、[現在の作業ディレクトリ](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444) にファイルを生成します:
98 |
99 | ```bash
100 | pdf2zh document.pdf
101 | ```
102 |
103 | 方法2. ポータブル
104 |
105 | Python環境を事前にインストールする必要はありません
106 |
107 | [setup.bat](https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/refs/heads/main/script/setup.bat) をダウンロードしてダブルクリックして実行します
108 |
109 | 方法3. GUI
110 |
111 | 1. Pythonがインストールされていること (バージョン3.10 <= バージョン <= 3.12)
112 | 2. パッケージをインストールします:
113 |
114 | ```bash
115 | pip install pdf2zh
116 | ```
117 |
118 | 3. ブラウザで使用を開始します:
119 |
120 | ```bash
121 | pdf2zh -i
122 | ```
123 |
124 | 4. ブラウザが自動的に起動しない場合は、次のURLを開きます:
125 |
126 | ```bash
127 | http://localhost:7860/
128 | ```
129 |
130 |
131 |
132 | 詳細については、[GUIのドキュメント](./README_GUI.md) を参照してください。
133 |
134 | 方法4. Docker
135 |
136 | 1. プルして実行します:
137 |
138 | ```bash
139 | docker pull byaidu/pdf2zh
140 | docker run -d -p 7860:7860 byaidu/pdf2zh
141 | ```
142 |
143 | 2. ブラウザで開きます:
144 |
145 | ```
146 | http://localhost:7860/
147 | ```
148 |
149 | クラウドサービスでのDockerデプロイメント用:
150 |
151 |
161 |
162 | 高度なオプション
163 |
164 | コマンドラインで翻訳コマンドを実行し、現在の作業ディレクトリに翻訳されたドキュメント `example-mono.pdf` とバイリンガルドキュメント `example-dual.pdf` を生成します。デフォルトではGoogle翻訳サービスを使用します。More support translation services can find [HERE](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#services).
165 |
166 |
167 |
168 |
169 | 以下の表に、参考のためにすべての高度なオプションをリストしました:
170 |
171 | | オプション | 機能 | 例 |
172 | | -------- | ------- |------- |
173 | | files | ローカルファイル | `pdf2zh ~/local.pdf` |
174 | | links | オンラインファイル | `pdf2zh http://arxiv.org/paper.pdf` |
175 | | `-i` | [GUIに入る](#gui) | `pdf2zh -i` |
176 | | `-p` | [部分的なドキュメント翻訳](#partial) | `pdf2zh example.pdf -p 1` |
177 | | `-li` | [ソース言語](#languages) | `pdf2zh example.pdf -li en` |
178 | | `-lo` | [ターゲット言語](#languages) | `pdf2zh example.pdf -lo zh` |
179 | | `-s` | [翻訳サービス](#services) | `pdf2zh example.pdf -s deepl` |
180 | | `-t` | [マルチスレッド](#threads) | `pdf2zh example.pdf -t 1` |
181 | | `-o` | 出力ディレクトリ | `pdf2zh example.pdf -o output` |
182 | | `-f`, `-c` | [例外](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
183 | | `--share` | [gradio公開リンクを取得] | `pdf2zh -i --share` |
184 | | `--authorized` | [[ウェブ認証とカスタム認証ページの追加](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.)] | `pdf2zh -i --authorized users.txt [auth.html]` |
185 | | `--prompt` | [カスタムビッグモデルのプロンプトを使用する] | `pdf2zh --prompt [prompt.txt]` |
186 | | `--onnx` | [カスタムDocLayout-YOLO ONNXモデルの使用] | `pdf2zh --onnx [onnx/model/path]` |
187 | | `--serverport` | [カスタムWebUIポートを使用する] | `pdf2zh --serverport 7860` |
188 | | `--dir` | [batch translate] | `pdf2zh --dir /path/to/translate/` |
189 | | `--config` | [configuration file](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#cofig) | `pdf2zh --config /path/to/config/config.json` |
190 | | `--serverport` | [custom gradio server port] | `pdf2zh --serverport 7860` |
191 |
192 | 全文または部分的なドキュメント翻訳
193 |
194 | - **全文翻訳**
195 |
196 | ```bash
197 | pdf2zh example.pdf
198 | ```
199 |
200 | - **部分翻訳**
201 |
202 | ```bash
203 | pdf2zh example.pdf -p 1-3,5
204 | ```
205 |
206 | ソース言語とターゲット言語を指定
207 |
208 | [Google Languages Codes](https://developers.google.com/admin-sdk/directory/v1/languages)、[DeepL Languages Codes](https://developers.deepl.com/docs/resources/supported-languages) を参照してください
209 |
210 | ```bash
211 | pdf2zh example.pdf -li en -lo ja
212 | ```
213 |
214 | 異なるサービスで翻訳
215 |
216 | 以下の表は、各翻訳サービスに必要な [環境変数](https://chatgpt.com/share/6734a83d-9d48-800e-8a46-f57ca6e8bcb4) を示しています。各サービスを使用する前に、これらの変数を設定してください。
217 |
218 | |**Translator**|**Service**|**Environment Variables**|**Default Values**|**Notes**|
219 | |-|-|-|-|-|
220 | |**Google (Default)**|`google`|None|N/A|None|
221 | |**Bing**|`bing`|None|N/A|None|
222 | |**DeepL**|`deepl`|`DEEPL_AUTH_KEY`|`[Your Key]`|See [DeepL](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API)|
223 | |**DeepLX**|`deeplx`|`DEEPLX_ENDPOINT`|`https://api.deepl.com/translate`|See [DeepLX](https://github.com/OwO-Network/DeepLX)|
224 | |**Ollama**|`ollama`|`OLLAMA_HOST`, `OLLAMA_MODEL`|`http://127.0.0.1:11434`, `gemma2`|See [Ollama](https://github.com/ollama/ollama)|
225 | |**OpenAI**|`openai`|`OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL`|`https://api.openai.com/v1`, `[Your Key]`, `gpt-4o-mini`|See [OpenAI](https://platform.openai.com/docs/overview)|
226 | |**AzureOpenAI**|`azure-openai`|`AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_MODEL`|`[Your Endpoint]`, `[Your Key]`, `gpt-4o-mini`|See [Azure OpenAI](https://learn.microsoft.com/zh-cn/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line%2Cjavascript-keyless%2Ctypescript-keyless%2Cpython&pivots=programming-language-python)|
227 | |**Zhipu**|`zhipu`|`ZHIPU_API_KEY`, `ZHIPU_MODEL`|`[Your Key]`, `glm-4-flash`|See [Zhipu](https://open.bigmodel.cn/dev/api/thirdparty-frame/openai-sdk)|
228 | | **ModelScope** | `modelscope` |`MODELSCOPE_API_KEY`, `MODELSCOPE_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-Coder-32B-Instruct`| See [ModelScope](https://www.modelscope.cn/docs/model-service/API-Inference/intro)|
229 | |**Silicon**|`silicon`|`SILICON_API_KEY`, `SILICON_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-7B-Instruct`|See [SiliconCloud](https://docs.siliconflow.cn/quickstart)|
230 | |**Gemini**|`gemini`|`GEMINI_API_KEY`, `GEMINI_MODEL`|`[Your Key]`, `gemini-1.5-flash`|See [Gemini](https://ai.google.dev/gemini-api/docs/openai)|
231 | |**Azure**|`azure`|`AZURE_ENDPOINT`, `AZURE_API_KEY`|`https://api.translator.azure.cn`, `[Your Key]`|See [Azure](https://docs.azure.cn/en-us/ai-services/translator/text-translation-overview)|
232 | |**Tencent**|`tencent`|`TENCENTCLOUD_SECRET_ID`, `TENCENTCLOUD_SECRET_KEY`|`[Your ID]`, `[Your Key]`|See [Tencent](https://www.tencentcloud.com/products/tmt?from_qcintl=122110104)|
233 | |**Dify**|`dify`|`DIFY_API_URL`, `DIFY_API_KEY`|`[Your DIFY URL]`, `[Your Key]`|See [Dify](https://github.com/langgenius/dify),Three variables, lang_out, lang_in, and text, need to be defined in Dify's workflow input.|
234 | |**AnythingLLM**|`anythingllm`|`AnythingLLM_URL`, `AnythingLLM_APIKEY`|`[Your AnythingLLM URL]`, `[Your Key]`|See [anything-llm](https://github.com/Mintplex-Labs/anything-llm)|
235 | |**Argos Translate**|`argos`| | |See [argos-translate](https://github.com/argosopentech/argos-translate)|
236 | |**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |See [Grok](https://docs.x.ai/docs/overview)|
237 | |**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |See [DeepSeek](https://www.deepseek.com/)|
238 | |**OpenAI-Liked**|`openailiked`| `OPENAILIKED_BASE_URL`, `OPENAILIKED_API_KEY`, `OPENAILIKED_MODEL` | `url`, `[Your Key]`, `model name` | None |
239 |
240 | (need Japenese translation)
241 | For large language models that are compatible with the OpenAI API but not listed in the table above, you can set environment variables using the same method outlined for OpenAI in the table.
242 |
243 | `-s service` または `-s service:model` を使用してサービスを指定します:
244 |
245 | ```bash
246 | pdf2zh example.pdf -s openai:gpt-4o-mini
247 | ```
248 |
249 | または環境変数でモデルを指定します:
250 |
251 | ```bash
252 | set OPENAI_MODEL=gpt-4o-mini
253 | pdf2zh example.pdf -s openai
254 | ```
255 |
256 | For PowerShell user:
257 | ```shell
258 | $env:OPENAI_MODEL = gpt-4o-mini
259 | pdf2zh example.pdf -s openai
260 | ```
261 |
262 | 例外を指定して翻訳
263 |
264 | 正規表現を使用して保持する必要がある数式フォントと文字を指定します:
265 |
266 | ```bash
267 | pdf2zh example.pdf -f "(CM[^RT].*|MS.*|.*Ital)" -c "(\(|\||\)|\+|=|\d|[\u0080-\ufaff])"
268 | ```
269 |
270 | デフォルトで `Latex`、`Mono`、`Code`、`Italic`、`Symbol` および `Math` フォントを保持します:
271 |
272 | ```bash
273 | pdf2zh example.pdf -f "(CM[^R]|MS.M|XY|MT|BL|RM|EU|LA|RS|LINE|LCIRCLE|TeX-|rsfs|txsy|wasy|stmary|.*Mono|.*Code|.*Ital|.*Sym|.*Math)"
274 | ```
275 |
276 | スレッド数を指定
277 |
278 | `-t` を使用して翻訳に使用するスレッド数を指定します:
279 |
280 | ```bash
281 | pdf2zh example.pdf -t 1
282 | ```
283 |
284 | カスタム プロンプト
285 |
286 | `--prompt`を使用して、LLMで使用するプロンプトを指定します:
287 |
288 | ```bash
289 | pdf2zh example.pdf -pr prompt.txt
290 | ```
291 |
292 |
293 | `prompt.txt`の例:
294 |
295 | ```txt
296 | [
297 | {
298 | "role": "system",
299 | "content": "You are a professional,authentic machine translation engine.",
300 | },
301 | {
302 | "role": "user",
303 | "content": "Translate the following markdown source text to ${lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: ${text}\nTranslated Text:",
304 | },
305 | ]
306 | ```
307 |
308 |
309 | カスタムプロンプトファイルでは、以下の3つの変数が使用できます。
310 |
311 | |**変数**|**内容**|
312 | |-|-|
313 | |`lang_in`|ソース言語|
314 | |`lang_out`|ターゲット言語|
315 | |`text`|翻訳するテキスト|
316 |
317 | API
318 |
319 | ### Python
320 |
321 | ```python
322 | from pdf2zh import translate, translate_stream
323 |
324 | params = {"lang_in": "en", "lang_out": "zh", "service": "google", "thread": 4}
325 | file_mono, file_dual = translate(files=["example.pdf"], **params)[0]
326 | with open("example.pdf", "rb") as f:
327 | stream_mono, stream_dual = translate_stream(stream=f.read(), **params)
328 | ```
329 |
330 | ### HTTP
331 |
332 | ```bash
333 | pip install pdf2zh[backend]
334 | pdf2zh --flask
335 | pdf2zh --celery worker
336 | ```
337 |
338 | ```bash
339 | curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}"
340 | {"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"}
341 |
342 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a
343 | {"info":{"n":13,"total":506},"state":"PROGRESS"}
344 |
345 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a
346 | {"state":"SUCCESS"}
347 |
348 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/mono --output example-mono.pdf
349 |
350 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/dual --output example-dual.pdf
351 |
352 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a -X DELETE
353 | ```
354 |
355 | 謝辞
356 |
357 | - ドキュメントのマージ:[PyMuPDF](https://github.com/pymupdf/PyMuPDF)
358 |
359 | - ドキュメントの解析:[Pdfminer.six](https://github.com/pdfminer/pdfminer.six)
360 |
361 | - ドキュメントの抽出:[MinerU](https://github.com/opendatalab/MinerU)
362 |
363 | - ドキュメントプレビュー:[Gradio PDF](https://github.com/freddyaboulton/gradio-pdf)
364 |
365 | - マルチスレッド翻訳:[MathTranslate](https://github.com/SUSYUSTC/MathTranslate)
366 |
367 | - レイアウト解析:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
368 |
369 | - ドキュメント標準:[PDF Explained](https://zxyle.github.io/PDF-Explained/)、[PDF Cheat Sheets](https://pdfa.org/resource/pdf-cheat-sheets/)
370 |
371 | - 多言語フォント:[Go Noto Universal](https://github.com/satbyy/go-noto-universal)
372 |
373 | 貢献者
374 |
375 |
376 |
377 |
378 |
379 | 
380 |
381 | スター履歴
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
--------------------------------------------------------------------------------
/docs/README_zh-CN.md:
--------------------------------------------------------------------------------
1 |
35 |
36 | 科学 PDF 文档翻译及双语对照工具
37 |
38 | - 📊 保留公式、图表、目录和注释 *([预览效果](#preview))*
39 | - 🌐 支持 [多种语言](./ADVANCED.md#language) 和 [诸多翻译服务](./ADVANCED.md#services)
40 | - 🤖 提供 [命令行工具](#usage),[图形交互界面](#gui),以及 [容器化部署](#docker)
41 |
42 | 欢迎在 [GitHub Issues](https://github.com/Byaidu/PDFMathTranslate/issues) 或 [Telegram 用户群](https://t.me/+Z9_SgnxmsmA5NzBl)
43 |
44 | 有关如何贡献的详细信息,请查阅 [贡献指南](https://github.com/Byaidu/PDFMathTranslate/wiki/Contribution-Guide---%E8%B4%A1%E7%8C%AE%E6%8C%87%E5%8D%97)
45 |
46 | 更新
47 |
48 | - [2025 年 2 月 22 日] 更好的发布 CI 和精心打包的 windows-amd64 exe (由 [@awwaawwa](https://github.com/awwaawwa) 提供)
49 | - [2024 年 12 月 24 日] 翻译器现在支持在 [Xinference](https://github.com/xorbitsai/inference) 上使用本地模型 _(由 [@imClumsyPanda](https://github.com/imClumsyPanda) 提供)_
50 | - [2024 年 12 月 19 日] 现在支持非 PDF/A 文档,使用 `-cp` _(由 [@reycn](https://github.com/reycn) 提供)_
51 | - [2024 年 12 月 13 日] 额外支持后端 _(由 [@YadominJinta](https://github.com/YadominJinta) 提供)_
52 | - [2024 年 12 月 10 日] 翻译器现在支持 Azure 上的 OpenAI 模型 _(由 [@yidasanqian](https://github.com/yidasanqian) 提供)_
53 |
54 | 预览
55 |
56 |
57 |
58 |
59 | 在线演示 🌟
60 |
61 | 在线服务 🌟
62 |
63 | 您可以通过以下演示尝试我们的应用程序:
64 |
65 | - [公共免费服务](https://pdf2zh.com/) 在线使用,无需安装 _(推荐)_。
66 | - [沉浸式翻译 - BabelDOC](https://app.immersivetranslate.com/babel-doc/) 每月免费 1000 页 _(推荐)_
67 | - [在 HuggingFace 上托管的演示](https://huggingface.co/spaces/reycn/PDFMathTranslate-Docker)
68 | - [在 ModelScope 上托管的演示](https://www.modelscope.cn/studios/AI-ModelScope/PDFMathTranslate) 无需安装。
69 |
70 | 请注意演示的计算资源有限,请避免滥用它们。
71 | 安装和使用
72 |
73 | ### 方法
74 |
75 | 针对不同的使用案例,我们提供不同的方法来使用我们的程序:
76 |
77 |
78 | 1. UV 安装
79 |
80 | 1. 安装 Python (3.10 <= 版本 <= 3.12)
81 | 2. 安装我们的包:
82 |
83 | ```bash
84 | pip install uv
85 | uv tool install --python 3.12 pdf2zh
86 | ```
87 |
88 | 3. 执行翻译,文件生成在 [当前工作目录](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444):
89 |
90 | ```bash
91 | pdf2zh document.pdf
92 | ```
93 |
94 |
95 |
96 |
97 | 2. Windows exe
98 |
99 | 1. 从 [发布页面](https://github.com/Byaidu/PDFMathTranslate/releases) 下载 pdf2zh-version-win64.zip
100 |
101 | 2. 解压缩并双击 `pdf2zh.exe` 运行。
102 |
103 |
104 |
105 |
106 | 3. 图形用户界面
107 | 1. 安装 Python (3.10 <= 版本 <= 3.12)
108 | 2. 安装我们的包:
109 |
110 | ```bash
111 | pip install pdf2zh
112 | ```
113 |
114 | 3. 在浏览器中开始使用:
115 |
116 | ```bash
117 | pdf2zh -i
118 | ```
119 |
120 | 4. 如果您的浏览器没有自动启动,请访问
121 |
122 | ```bash
123 | http://localhost:7860/
124 | ```
125 |
126 |
127 |
128 | 有关更多详细信息,请参阅 [GUI 文档](./README_GUI.md)。
129 |
130 |
131 |
132 |
133 | 4. Docker
134 |
135 | 1. 拉取并运行:
136 |
137 | ```bash
138 | docker pull byaidu/pdf2zh
139 | docker run -d -p 7860:7860 byaidu/pdf2zh
140 | ```
141 |
142 | 2. 在浏览器中打开:
143 |
144 | ```
145 | http://localhost:7860/
146 | ```
147 |
148 | 对于云服务上的 docker 部署:
149 |
150 |
160 |
161 |
162 |
163 |
164 | 5. Zotero 插件
165 |
166 | 有关更多细节,请参见 [Zotero PDF2zh](https://github.com/guaguastandup/zotero-pdf2zh)。
167 |
168 |
169 |
170 |
171 | 6. 命令行
172 |
173 | 1. 已安装 Python(3.10 <= 版本 <= 3.12)
174 | 2. 安装我们的包:
175 |
176 | ```bash
177 | pip install pdf2zh
178 | ```
179 |
180 | 3. 执行翻译,文件生成在 [当前工作目录](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444):
181 |
182 | ```bash
183 | pdf2zh document.pdf
184 | ```
185 |
186 |
187 |
188 | > [!TIP]
189 | >
190 | > - 如果你使用 Windows 并在下载后无法打开文件,请安装 [vc_redist.x64.exe](https://aka.ms/vs/17/release/vc_redist.x64.exe) 并重试。
191 | >
192 | > - 如果你无法访问 Docker Hub,请尝试在 [GitHub 容器注册中心](https://github.com/Byaidu/PDFMathTranslate/pkgs/container/pdfmathtranslate) 上使用该镜像。
193 | > ```bash
194 | > docker pull ghcr.io/byaidu/pdfmathtranslate
195 | > docker run -d -p 7860:7860 ghcr.io/byaidu/pdfmathtranslate
196 | > ```
197 |
198 | ### 无法安装?
199 |
200 | 当前程序在工作前需要一个 AI 模型 (`wybxc/DocLayout-YOLO-DocStructBench-onnx`),一些用户由于网络问题无法下载。如果你在下载此模型时遇到问题,我们提供以下环境变量的解决方法:
201 |
202 | ```shell
203 | set HF_ENDPOINT=https://hf-mirror.com
204 | ```
205 |
206 | 对于 PowerShell 用户:
207 |
208 | ```shell
209 | $env:HF_ENDPOINT = https://hf-mirror.com
210 | ```
211 |
212 | 如果此解决方案对您无效或您遇到其他问题,请参阅 [常见问题解答](https://github.com/Byaidu/PDFMathTranslate/wiki#-faq--%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)。
213 |
214 |
215 | 高级选项
216 |
217 | 在命令行中执行翻译命令,在当前工作目录下生成译文文档 `example-mono.pdf` 和双语对照文档 `example-dual.pdf`,默认使用 Google 翻译服务,更多支持的服务在[这里](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#services))。
218 |
219 |
220 |
221 | 在下表中,我们列出了所有高级选项供参考:
222 |
223 | | 选项 | 功能 | 示例 |
224 | | ------------ | ------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- |
225 | | files | 本地文件 | `pdf2zh ~/local.pdf` |
226 | | links | 在线文件 | `pdf2zh http://arxiv.org/paper.pdf` |
227 | | `-i` | [进入 GUI](#gui) | `pdf2zh -i` |
228 | | `-p` | [部分文档翻译](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#partial) | `pdf2zh example.pdf -p 1` |
229 | | `-li` | [源语言](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#languages) | `pdf2zh example.pdf -li en` |
230 | | `-lo` | [目标语言](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#languages) | `pdf2zh example.pdf -lo zh` |
231 | | `-s` | [翻译服务](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#services) | `pdf2zh example.pdf -s deepl` |
232 | | `-t` | [多线程](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#threads) | `pdf2zh example.pdf -t 1` |
233 | | `-o` | 输出目录 | `pdf2zh example.pdf -o output` |
234 | | `-f`, `-c` | [异常](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
235 | | `-cp` | 兼容模式 | `pdf2zh example.pdf --compatible` |
236 | | `--share` | 公开链接 | `pdf2zh -i --share` |
237 | | `--authorized` | [授权](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#auth) | `pdf2zh -i --authorized users.txt [auth.html]` |
238 | | `--prompt` | [自定义提示](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#prompt) | `pdf2zh --prompt [prompt.txt]` |
239 | | `--onnx` | [使用自定义 DocLayout-YOLO ONNX 模型] | `pdf2zh --onnx [onnx/model/path]` |
240 | | `--serverport` | [使用自定义 WebUI 端口] | `pdf2zh --serverport 7860` |
241 | | `--dir` | [批量翻译] | `pdf2zh --dir /path/to/translate/` |
242 | | `--config` | [配置文件](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#cofig) | `pdf2zh --config /path/to/config/config.json` |
243 | | `--serverport` | [自定义 gradio 服务器端口] | `pdf2zh --serverport 7860` |
244 | | `--babeldoc`| 使用实验性后端 [BabelDOC](https://funstory-ai.github.io/BabelDOC/) 翻译 |`pdf2zh --babeldoc` -s openai example.pdf|
245 |
246 | 有关详细说明,请参阅我们的文档 [高级用法](./ADVANCED.md),以获取每个选项的完整列表。
247 |
248 | 二次开发 (API)
249 |
250 | 当前的 pdf2zh API 暂时已弃用。API 将在 [pdf2zh 2.0](https://github.com/Byaidu/PDFMathTranslate/issues/586)发布后重新提供。对于需要程序化访问的用户,请使用[BabelDOC](https://github.com/funstory-ai/BabelDOC)的 `babeldoc.high_level.async_translate` 函数。
251 |
252 | API 暂时弃用意味着:相关代码暂时不会被移除,但不会提供技术支持,也不会修复 bug。
253 |
254 |
257 |
258 | 待办事项
259 |
260 | - [ ] 使用基于 DocLayNet 的模型解析布局,[PaddleX](https://github.com/PaddlePaddle/PaddleX/blob/17cc27ac3842e7880ca4aad92358d3ef8555429a/paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py#L81),[PaperMage](https://github.com/allenai/papermage/blob/9cd4bb48cbedab45d0f7a455711438f1632abebe/README.md?plain=1#L102),[SAM2](https://github.com/facebookresearch/sam2)
261 |
262 | - [ ] 修复页面旋转、目录、列表格式
263 |
264 | - [ ] 修复旧论文中的像素公式
265 |
266 | - [ ] 异步重试,除了 KeyboardInterrupt
267 |
268 | - [ ] 针对西方语言的 Knuth–Plass 算法
269 |
270 | - [ ] 支持非 PDF/A 文件
271 |
272 | - [ ] [Zotero](https://github.com/zotero/zotero) 和 [Obsidian](https://github.com/obsidianmd/obsidian-releases) 的插件
273 |
274 | 致谢
275 |
276 | - [Immersive Translation](https://immersivetranslate.com) 为此项目的活跃贡献者提供每月的专业会员兑换码,详细信息请查看:[CONTRIBUTOR_REWARD.md](https://github.com/funstory-ai/BabelDOC/blob/main/docs/CONTRIBUTOR_REWARD.md)
277 |
278 | - 文档合并:[PyMuPDF](https://github.com/pymupdf/PyMuPDF)
279 |
280 | - 文档解析:[Pdfminer.six](https://github.com/pdfminer/pdfminer.six)
281 |
282 | - 文档提取:[MinerU](https://github.com/opendatalab/MinerU)
283 |
284 | - 文档预览:[Gradio PDF](https://github.com/freddyaboulton/gradio-pdf)
285 |
286 | - 多线程翻译:[MathTranslate](https://github.com/SUSYUSTC/MathTranslate)
287 |
288 | - 布局解析:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
289 |
290 | - 文档标准:[PDF Explained](https://zxyle.github.io/PDF-Explained/),[PDF Cheat Sheets](https://pdfa.org/resource/pdf-cheat-sheets/)
291 |
292 | - 多语言字体:[Go Noto Universal](https://github.com/satbyy/go-noto-universal)
293 |
294 | 贡献者
295 |
296 |
297 |
298 |
299 |
300 | 
301 |
302 | 星标历史
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
--------------------------------------------------------------------------------
/docs/README_zh-TW.md:
--------------------------------------------------------------------------------
1 |
35 |
36 | 科學 PDF 文件翻譯及雙語對照工具
37 |
38 | - 📊 保留公式、圖表、目錄和註釋 *([預覽效果](#preview))*
39 | - 🌐 支援 [多種語言](#language) 和 [諸多翻譯服務](#services)
40 | - 🤖 提供 [命令列工具](#usage)、[圖形使用者介面](#gui),以及 [容器化部署](#docker)
41 |
42 | 歡迎在 [GitHub Issues](https://github.com/Byaidu/PDFMathTranslate/issues) 或 [Telegram 使用者群](https://t.me/+Z9_SgnxmsmA5NzBl)(https://qm.qq.com/q/DixZCxQej0) 中提出回饋
43 |
44 | 如需瞭解如何貢獻的詳細資訊,請查閱 [貢獻指南](https://github.com/Byaidu/PDFMathTranslate/wiki/Contribution-Guide---%E8%B4%A1%E7%8C%AE%E6%8C%87%E5%8D%97)
45 |
46 | 近期更新
47 |
48 | - [Dec. 24 2024] 翻譯功能支援接入由 [Xinference](https://github.com/xorbitsai/inference) 執行的本機 LLM _(by [@imClumsyPanda](https://github.com/imClumsyPanda))_
49 | - [Nov. 26 2024] CLI 現在已支援(多個)線上 PDF 檔 *(by [@reycn](https://github.com/reycn))*
50 | - [Nov. 24 2024] 為了降低依賴大小,提供 [ONNX](https://github.com/onnx/onnx) 支援 *(by [@Wybxc](https://github.com/Wybxc))*
51 | - [Nov. 23 2024] 🌟 [免費公共服務](#demo) 上線! *(by [@Byaidu](https://github.com/Byaidu))*
52 | - [Nov. 23 2024] 新增防止網頁爬蟲的防火牆 *(by [@Byaidu](https://github.com/Byaidu))*
53 | - [Nov. 22 2024] 圖形使用者介面現已支援義大利語並進行了一些更新 *(by [@Byaidu](https://github.com/Byaidu), [@reycn](https://github.com/reycn))*
54 | - [Nov. 22 2024] 現在你可以將自己部署的服務分享給朋友 *(by [@Zxis233](https://github.com/Zxis233))*
55 | - [Nov. 22 2024] 支援騰訊翻譯 *(by [@hellofinch](https://github.com/hellofinch))*
56 | - [Nov. 21 2024] 圖形使用者介面現在支援下載雙語文件 *(by [@reycn](https://github.com/reycn))*
57 | - [Nov. 20 2024] 🌟 提供了 [線上示範](#demo)! *(by [@reycn](https://github.com/reycn))*
58 |
59 | 效果預覽
60 |
61 |
62 |
63 |
64 |
65 | 線上示範 🌟
66 |
67 | ### 免費服務 ( )
68 |
69 | 你可以立即嘗試 [免費公共服務](https://pdf2zh.com/) 而無需安裝
70 |
71 | ### 線上示範
72 |
73 | 你可以直接在 [HuggingFace 上的線上示範](https://huggingface.co/spaces/reycn/PDFMathTranslate-Docker)和[魔搭的線上示範](https://www.modelscope.cn/studios/AI-ModelScope/PDFMathTranslate)進行嘗試,無需安裝。
74 | 請注意,示範使用的運算資源有限,請勿濫用。
75 |
76 | 安裝與使用
77 |
78 | 我們提供了四種使用此專案的方法:[命令列工具](#cmd)、[便攜式安裝](#portable)、[圖形使用者介面](#gui) 與 [容器化部署](#docker)。
79 |
80 | pdf2zh 在執行時需要額外下載模型(`wybxc/DocLayout-YOLO-DocStructBench-onnx`),該模型也可在魔搭(ModelScope)上取得。如果在啟動時下載該模型時遇到問題,請使用如下環境變數:
81 | ```shell
82 | set HF_ENDPOINT=https://hf-mirror.com
83 | ```
84 |
85 | 方法一、命令列工具
86 |
87 | 1. 確保已安裝 Python 版本大於 3.10 且小於 3.12
88 | 2. 安裝此程式:
89 |
90 | ```bash
91 | pip install pdf2zh
92 | ```
93 |
94 | 3. 執行翻譯,生成檔案位於 [目前工作目錄](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444):
95 |
96 | ```bash
97 | pdf2zh document.pdf
98 | ```
99 |
100 | 方法二、便攜式安裝
101 |
102 | 無需預先安裝 Python 環境
103 |
104 | 下載 [setup.bat](https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/refs/heads/main/script/setup.bat) 並直接雙擊執行
105 |
106 | 方法三、圖形使用者介面
107 |
108 | 1. 確保已安裝 Python 版本大於 3.10 且小於 3.12
109 | 2. 安裝此程式:
110 |
111 | ```bash
112 | pip install pdf2zh
113 | ```
114 |
115 | 3. 在瀏覽器中啟動使用:
116 |
117 | ```bash
118 | pdf2zh -i
119 | ```
120 |
121 | 4. 如果您的瀏覽器沒有自動開啟並跳轉,請手動在瀏覽器開啟:
122 |
123 | ```bash
124 | http://localhost:7860/
125 | ```
126 |
127 |
128 |
129 | 查看 [documentation for GUI](/README_GUI.md) 以獲取詳細說明
130 |
131 | 方法四、容器化部署
132 |
133 | 1. 拉取 Docker 映像檔並執行:
134 |
135 | ```bash
136 | docker pull byaidu/pdf2zh
137 | docker run -d -p 7860:7860 byaidu/pdf2zh
138 | ```
139 |
140 | 2. 透過瀏覽器開啟:
141 |
142 | ```
143 | http://localhost:7860/
144 | ```
145 |
146 | 用於在雲服務上部署容器映像檔:
147 |
148 |
158 |
159 | 高級選項
160 |
161 | 在命令列中執行翻譯指令,並在目前工作目錄下生成譯文檔案 `example-mono.pdf` 和雙語對照檔案 `example-dual.pdf`。預設使用 Google 翻譯服務。
162 |
163 |
164 |
165 | 以下表格列出了所有高級選項,供參考:
166 |
167 | | Option | 功能 | 範例 |
168 | | -------- | ------- |------- |
169 | | files | 本機檔案 | `pdf2zh ~/local.pdf` |
170 | | links | 線上檔案 | `pdf2zh http://arxiv.org/paper.pdf` |
171 | | `-i` | [進入圖形介面](#gui) | `pdf2zh -i` |
172 | | `-p` | [僅翻譯部分文件](#partial) | `pdf2zh example.pdf -p 1` |
173 | | `-li` | [原文語言](#language) | `pdf2zh example.pdf -li en` |
174 | | `-lo` | [目標語言](#language) | `pdf2zh example.pdf -lo zh` |
175 | | `-s` | [指定翻譯服務](#services) | `pdf2zh example.pdf -s deepl` |
176 | | `-t` | [多執行緒](#threads) | `pdf2zh example.pdf -t 1` |
177 | | `-o` | 輸出目錄 | `pdf2zh example.pdf -o output` |
178 | | `-f`, `-c` | [例外規則](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` |
179 | | `--share` | [獲取 gradio 公開連結] | `pdf2zh -i --share` |
180 | | `--authorized` | [[添加網頁認證及自訂認證頁面](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.)] | `pdf2zh -i --authorized users.txt [auth.html]` |
181 | | `--prompt` | [使用自訂的大模型 Prompt] | `pdf2zh --prompt [prompt.txt]` |
182 | | `--onnx` | [使用自訂的 DocLayout-YOLO ONNX 模型] | `pdf2zh --onnx [onnx/model/path]` |
183 | | `--serverport` | [自訂 WebUI 埠號] | `pdf2zh --serverport 7860` |
184 | | `--dir` | [資料夾翻譯] | `pdf2zh --dir /path/to/translate/` |
185 |
186 | 全文或部分文件翻譯
187 |
188 | - **全文翻譯**
189 |
190 | ```bash
191 | pdf2zh example.pdf
192 | ```
193 |
194 | - **部分翻譯**
195 |
196 | ```bash
197 | pdf2zh example.pdf -p 1-3,5
198 | ```
199 |
200 | 指定原文語言與目標語言
201 |
202 | 可參考 [Google 語言代碼](https://developers.google.com/admin-sdk/directory/v1/languages)、[DeepL 語言代碼](https://developers.deepl.com/docs/resources/supported-languages)
203 |
204 | ```bash
205 | pdf2zh example.pdf -li en -lo ja
206 | ```
207 |
208 | 使用不同的翻譯服務
209 |
210 | 下表列出了每個翻譯服務所需的 [環境變數](https://chatgpt.com/share/6734a83d-9d48-800e-8a46-f57ca6e8bcb4)。在使用前,請先確保已設定好對應的變數。
211 |
212 | |**Translator**|**Service**|**Environment Variables**|**Default Values**|**Notes**|
213 | |-|-|-|-|-|
214 | |**Google (Default)**|`google`|無|N/A|無|
215 | |**Bing**|`bing`|無|N/A|無|
216 | |**DeepL**|`deepl`|`DEEPL_AUTH_KEY`|`[Your Key]`|參閱 [DeepL](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API)|
217 | |**DeepLX**|`deeplx`|`DEEPLX_ENDPOINT`|`https://api.deepl.com/translate`|參閱 [DeepLX](https://github.com/OwO-Network/DeepLX)|
218 | |**Ollama**|`ollama`|`OLLAMA_HOST`, `OLLAMA_MODEL`|`http://127.0.0.1:11434`, `gemma2`|參閱 [Ollama](https://github.com/ollama/ollama)|
219 | |**OpenAI**|`openai`|`OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL`|`https://api.openai.com/v1`, `[Your Key]`, `gpt-4o-mini`|參閱 [OpenAI](https://platform.openai.com/docs/overview)|
220 | |**AzureOpenAI**|`azure-openai`|`AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_MODEL`|`[Your Endpoint]`, `[Your Key]`, `gpt-4o-mini`|參閱 [Azure OpenAI](https://learn.microsoft.com/zh-cn/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line%2Cjavascript-keyless%2Ctypescript-keyless%2Cpython&pivots=programming-language-python)|
221 | |**Zhipu**|`zhipu`|`ZHIPU_API_KEY`, `ZHIPU_MODEL`|`[Your Key]`, `glm-4-flash`|參閱 [Zhipu](https://open.bigmodel.cn/dev/api/thirdparty-frame/openai-sdk)|
222 | | **ModelScope** | `modelscope` |`MODELSCOPE_API_KEY`, `MODELSCOPE_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-Coder-32B-Instruct`| 參閱 [ModelScope](https://www.modelscope.cn/docs/model-service/API-Inference/intro)|
223 | |**Silicon**|`silicon`|`SILICON_API_KEY`, `SILICON_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-7B-Instruct`|參閱 [SiliconCloud](https://docs.siliconflow.cn/quickstart)|
224 | |**Gemini**|`gemini`|`GEMINI_API_KEY`, `GEMINI_MODEL`|`[Your Key]`, `gemini-1.5-flash`|參閱 [Gemini](https://ai.google.dev/gemini-api/docs/openai)|
225 | |**Azure**|`azure`|`AZURE_ENDPOINT`, `AZURE_API_KEY`|`https://api.translator.azure.cn`, `[Your Key]`|參閱 [Azure](https://docs.azure.cn/en-us/ai-services/translator/text-translation-overview)|
226 | |**Tencent**|`tencent`|`TENCENTCLOUD_SECRET_ID`, `TENCENTCLOUD_SECRET_KEY`|`[Your ID]`, `[Your Key]`|參閱 [Tencent](https://www.tencentcloud.com/products/tmt?from_qcintl=122110104)|
227 | |**Dify**|`dify`|`DIFY_API_URL`, `DIFY_API_KEY`|`[Your DIFY URL]`, `[Your Key]`|參閱 [Dify](https://github.com/langgenius/dify),需要在 Dify 的工作流程輸入中定義三個變數:lang_out、lang_in、text。|
228 | |**AnythingLLM**|`anythingllm`|`AnythingLLM_URL`, `AnythingLLM_APIKEY`|`[Your AnythingLLM URL]`, `[Your Key]`|參閱 [anything-llm](https://github.com/Mintplex-Labs/anything-llm)|
229 | |**Argos Translate**|`argos`| | |參閱 [argos-translate](https://github.com/argosopentech/argos-translate)|
230 | |**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |參閱 [Grok](https://docs.x.ai/docs/overview)|
231 | |**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |參閱 [DeepSeek](https://www.deepseek.com/)|
232 | |**OpenAI-Liked**|`openailiked`| `OPENAILIKED_BASE_URL`, `OPENAILIKED_API_KEY`, `OPENAILIKED_MODEL` | `url`, `[Your Key]`, `model name` | 無 |
233 |
234 | 對於不在上述表格中,但兼容 OpenAI API 的大語言模型,可以使用與 OpenAI 相同的方式設定環境變數。
235 |
236 | 使用 `-s service` 或 `-s service:model` 指定翻譯服務:
237 |
238 | ```bash
239 | pdf2zh example.pdf -s openai:gpt-4o-mini
240 | ```
241 |
242 | 或使用環境變數指定模型:
243 |
244 | ```bash
245 | set OPENAI_MODEL=gpt-4o-mini
246 | pdf2zh example.pdf -s openai
247 | ```
248 |
249 | 指定例外規則
250 |
251 | 使用正則表達式指定需要保留的公式字體與字元:
252 |
253 | ```bash
254 | pdf2zh example.pdf -f "(CM[^RT].*|MS.*|.*Ital)" -c "(\(|\||\)|\+|=|\d|[\u0080-\ufaff])"
255 | ```
256 |
257 | 預設保留 `Latex`, `Mono`, `Code`, `Italic`, `Symbol` 以及 `Math` 字體:
258 |
259 | ```bash
260 | pdf2zh example.pdf -f "(CM[^R]|MS.M|XY|MT|BL|RM|EU|LA|RS|LINE|LCIRCLE|TeX-|rsfs|txsy|wasy|stmary|.*Mono|.*Code|.*Ital|.*Sym|.*Math)"
261 | ```
262 |
263 | 指定執行緒數量
264 |
265 | 使用 `-t` 參數指定翻譯使用的執行緒數量:
266 |
267 | ```bash
268 | pdf2zh example.pdf -t 1
269 | ```
270 |
271 | 自訂大模型 Prompt
272 |
273 | 使用 `--prompt` 指定在使用大模型翻譯時所採用的 Prompt 檔案。
274 |
275 | ```bash
276 | pdf2zh example.pdf -pr prompt.txt
277 | ```
278 |
279 | 範例 `prompt.txt` 檔案內容:
280 |
281 | ```
282 | [
283 | {
284 | "role": "system",
285 | "content": "You are a professional,authentic machine translation engine.",
286 | },
287 | {
288 | "role": "user",
289 | "content": "Translate the following markdown source text to ${lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: ${text}\nTranslated Text:",
290 | },
291 | ]
292 | ```
293 |
294 | 在自訂 Prompt 檔案中,可以使用以下三個內建變數來傳遞參數:
295 | |**變數名稱**|**說明**|
296 | |-|-|
297 | |`lang_in`|輸入語言|
298 | |`lang_out`|輸出語言|
299 | |`text`|需要翻譯的文本|
300 |
301 | API
302 |
303 | ### Python
304 |
305 | ```python
306 | from pdf2zh import translate, translate_stream
307 |
308 | params = {"lang_in": "en", "lang_out": "zh", "service": "google", "thread": 4}
309 | file_mono, file_dual = translate(files=["example.pdf"], **params)[0]
310 | with open("example.pdf", "rb") as f:
311 | stream_mono, stream_dual = translate_stream(stream=f.read(), **params)
312 | ```
313 |
314 | ### HTTP
315 |
316 | ```bash
317 | pip install pdf2zh[backend]
318 | pdf2zh --flask
319 | pdf2zh --celery worker
320 | ```
321 |
322 | ```bash
323 | curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}"
324 | {"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"}
325 |
326 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a
327 | {"info":{"n":13,"total":506},"state":"PROGRESS"}
328 |
329 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a
330 | {"state":"SUCCESS"}
331 |
332 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/mono --output example-mono.pdf
333 |
334 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/dual --output example-dual.pdf
335 |
336 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a -X DELETE
337 | ```
338 |
339 | 致謝
340 |
341 | - 文件合併:[PyMuPDF](https://github.com/pymupdf/PyMuPDF)
342 | - 文件解析:[Pdfminer.six](https://github.com/pdfminer/pdfminer.six)
343 | - 文件提取:[MinerU](https://github.com/opendatalab/MinerU)
344 | - 文件預覽:[Gradio PDF](https://github.com/freddyaboulton/gradio-pdf)
345 | - 多執行緒翻譯:[MathTranslate](https://github.com/SUSYUSTC/MathTranslate)
346 | - 版面解析:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
347 | - PDF 標準:[PDF Explained](https://zxyle.github.io/PDF-Explained/)、[PDF Cheat Sheets](https://pdfa.org/resource/pdf-cheat-sheets/)
348 | - 多語言字型:[Go Noto Universal](https://github.com/satbyy/go-noto-universal)
349 |
350 | 貢獻者
351 |
352 |
353 |
354 |
355 |
356 | 
357 |
358 | 星標歷史
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
--------------------------------------------------------------------------------
/docs/images/after.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/after.png
--------------------------------------------------------------------------------
/docs/images/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/banner.png
--------------------------------------------------------------------------------
/docs/images/before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/before.png
--------------------------------------------------------------------------------
/docs/images/cmd.explained.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/cmd.explained.png
--------------------------------------------------------------------------------
/docs/images/cmd.explained.zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/cmd.explained.zh.png
--------------------------------------------------------------------------------
/docs/images/gui.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/gui.gif
--------------------------------------------------------------------------------
/docs/images/preview.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/preview.gif
--------------------------------------------------------------------------------
/pdf2zh/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pdf2zh.high_level import translate, translate_stream
3 |
4 | log = logging.getLogger(__name__)
5 |
6 | __version__ = "1.9.6"
7 | __author__ = "Byaidu"
8 | __all__ = ["translate", "translate_stream"]
9 |
--------------------------------------------------------------------------------
/pdf2zh/backend.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request, send_file
2 | from celery import Celery, Task
3 | from celery.result import AsyncResult
4 | from pdf2zh import translate_stream
5 | import tqdm
6 | import json
7 | import io
8 | from pdf2zh.doclayout import ModelInstance
9 | from pdf2zh.config import ConfigManager
10 |
11 | flask_app = Flask("pdf2zh")
12 | flask_app.config.from_mapping(
13 | CELERY=dict(
14 | broker_url=ConfigManager.get("CELERY_BROKER", "redis://127.0.0.1:6379/0"),
15 | result_backend=ConfigManager.get("CELERY_RESULT", "redis://127.0.0.1:6379/0"),
16 | )
17 | )
18 |
19 |
20 | def celery_init_app(app: Flask) -> Celery:
21 | class FlaskTask(Task):
22 | def __call__(self, *args, **kwargs):
23 | with app.app_context():
24 | return self.run(*args, **kwargs)
25 |
26 | celery_app = Celery(app.name)
27 | celery_app.config_from_object(app.config["CELERY"])
28 | celery_app.Task = FlaskTask
29 | celery_app.set_default()
30 | celery_app.autodiscover_tasks()
31 | app.extensions["celery"] = celery_app
32 | return celery_app
33 |
34 |
35 | celery_app = celery_init_app(flask_app)
36 |
37 |
38 | @celery_app.task(bind=True)
39 | def translate_task(
40 | self: Task,
41 | stream: bytes,
42 | args: dict,
43 | ):
44 | def progress_bar(t: tqdm.tqdm):
45 | self.update_state(state="PROGRESS", meta={"n": t.n, "total": t.total}) # noqa
46 | print(f"Translating {t.n} / {t.total} pages")
47 |
48 | doc_mono, doc_dual = translate_stream(
49 | stream,
50 | callback=progress_bar,
51 | model=ModelInstance.value,
52 | **args,
53 | )
54 | return doc_mono, doc_dual
55 |
56 |
57 | @flask_app.route("/v1/translate", methods=["POST"])
58 | def create_translate_tasks():
59 | file = request.files["file"]
60 | stream = file.stream.read()
61 | print(request.form.get("data"))
62 | args = json.loads(request.form.get("data"))
63 | task = translate_task.delay(stream, args)
64 | return {"id": task.id}
65 |
66 |
67 | @flask_app.route("/v1/translate/", methods=["GET"])
68 | def get_translate_task(id: str):
69 | result: AsyncResult = celery_app.AsyncResult(id)
70 | if str(result.state) == "PROGRESS":
71 | return {"state": str(result.state), "info": result.info}
72 | else:
73 | return {"state": str(result.state)}
74 |
75 |
76 | @flask_app.route("/v1/translate/", methods=["DELETE"])
77 | def delete_translate_task(id: str):
78 | result: AsyncResult = celery_app.AsyncResult(id)
79 | result.revoke(terminate=True)
80 | return {"state": str(result.state)}
81 |
82 |
83 | @flask_app.route("/v1/translate//")
84 | def get_translate_result(id: str, format: str):
85 | result = celery_app.AsyncResult(id)
86 | if not result.ready():
87 | return {"error": "task not finished"}, 400
88 | if not result.successful():
89 | return {"error": "task failed"}, 400
90 | doc_mono, doc_dual = result.get()
91 | to_send = doc_mono if format == "mono" else doc_dual
92 | return send_file(io.BytesIO(to_send), "application/pdf")
93 |
94 |
95 | if __name__ == "__main__":
96 | flask_app.run()
97 |
--------------------------------------------------------------------------------
/pdf2zh/cache.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import json
4 | from peewee import Model, SqliteDatabase, AutoField, CharField, TextField, SQL
5 | from typing import Optional
6 |
7 |
8 | # we don't init the database here
9 | db = SqliteDatabase(None)
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class _TranslationCache(Model):
14 | id = AutoField()
15 | translate_engine = CharField(max_length=20)
16 | translate_engine_params = TextField()
17 | original_text = TextField()
18 | translation = TextField()
19 |
20 | class Meta:
21 | database = db
22 | constraints = [
23 | SQL(
24 | """
25 | UNIQUE (
26 | translate_engine,
27 | translate_engine_params,
28 | original_text
29 | )
30 | ON CONFLICT REPLACE
31 | """
32 | )
33 | ]
34 |
35 |
36 | class TranslationCache:
37 | @staticmethod
38 | def _sort_dict_recursively(obj):
39 | if isinstance(obj, dict):
40 | return {
41 | k: TranslationCache._sort_dict_recursively(v)
42 | for k in sorted(obj.keys())
43 | for v in [obj[k]]
44 | }
45 | elif isinstance(obj, list):
46 | return [TranslationCache._sort_dict_recursively(item) for item in obj]
47 | return obj
48 |
49 | def __init__(self, translate_engine: str, translate_engine_params: dict = None):
50 | assert (
51 | len(translate_engine) < 20
52 | ), "current cache require translate engine name less than 20 characters"
53 | self.translate_engine = translate_engine
54 | self.replace_params(translate_engine_params)
55 |
56 | # The program typically starts multi-threaded translation
57 | # only after cache parameters are fully configured,
58 | # so thread safety doesn't need to be considered here.
59 | def replace_params(self, params: dict = None):
60 | if params is None:
61 | params = {}
62 | self.params = params
63 | params = self._sort_dict_recursively(params)
64 | self.translate_engine_params = json.dumps(params)
65 |
66 | def update_params(self, params: dict = None):
67 | if params is None:
68 | params = {}
69 | self.params.update(params)
70 | self.replace_params(self.params)
71 |
72 | def add_params(self, k: str, v):
73 | self.params[k] = v
74 | self.replace_params(self.params)
75 |
76 | # Since peewee and the underlying sqlite are thread-safe,
77 | # get and set operations don't need locks.
78 | def get(self, original_text: str) -> Optional[str]:
79 | result = _TranslationCache.get_or_none(
80 | translate_engine=self.translate_engine,
81 | translate_engine_params=self.translate_engine_params,
82 | original_text=original_text,
83 | )
84 | return result.translation if result else None
85 |
86 | def set(self, original_text: str, translation: str):
87 | try:
88 | _TranslationCache.create(
89 | translate_engine=self.translate_engine,
90 | translate_engine_params=self.translate_engine_params,
91 | original_text=original_text,
92 | translation=translation,
93 | )
94 | except Exception as e:
95 | logger.debug(f"Error setting cache: {e}")
96 |
97 |
98 | def init_db(remove_exists=False):
99 | cache_folder = os.path.join(os.path.expanduser("~"), ".cache", "pdf2zh")
100 | os.makedirs(cache_folder, exist_ok=True)
101 | # The current version does not support database migration, so add the version number to the file name.
102 | cache_db_path = os.path.join(cache_folder, "cache.v1.db")
103 | if remove_exists and os.path.exists(cache_db_path):
104 | os.remove(cache_db_path)
105 | db.init(
106 | cache_db_path,
107 | pragmas={
108 | "journal_mode": "wal",
109 | "busy_timeout": 1000,
110 | },
111 | )
112 | db.create_tables([_TranslationCache], safe=True)
113 |
114 |
115 | def init_test_db():
116 | import tempfile
117 |
118 | cache_db_path = tempfile.mktemp(suffix=".db")
119 | test_db = SqliteDatabase(
120 | cache_db_path,
121 | pragmas={
122 | "journal_mode": "wal",
123 | "busy_timeout": 1000,
124 | },
125 | )
126 | test_db.bind([_TranslationCache], bind_refs=False, bind_backrefs=False)
127 | test_db.connect()
128 | test_db.create_tables([_TranslationCache], safe=True)
129 | return test_db
130 |
131 |
132 | def clean_test_db(test_db):
133 | test_db.drop_tables([_TranslationCache])
134 | test_db.close()
135 | db_path = test_db.database
136 | if os.path.exists(db_path):
137 | os.remove(test_db.database)
138 | wal_path = db_path + "-wal"
139 | if os.path.exists(wal_path):
140 | os.remove(wal_path)
141 | shm_path = db_path + "-shm"
142 | if os.path.exists(shm_path):
143 | os.remove(shm_path)
144 |
145 |
146 | init_db()
147 |
--------------------------------------------------------------------------------
/pdf2zh/config.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 | from threading import RLock # 改成 RLock
4 | import os
5 | import copy
6 |
7 |
8 | class ConfigManager:
9 | _instance = None
10 | _lock = RLock() # 用 RLock 替换 Lock,允许在同一个线程中重复获取锁
11 |
12 | @classmethod
13 | def get_instance(cls):
14 | """获取单例实例"""
15 | # 先判断是否存在实例,如果不存在再加锁进行初始化
16 | if cls._instance is None:
17 | with cls._lock:
18 | if cls._instance is None:
19 | cls._instance = cls()
20 | return cls._instance
21 |
22 | def __init__(self):
23 | # 防止重复初始化
24 | if hasattr(self, "_initialized") and self._initialized:
25 | return
26 | self._initialized = True
27 |
28 | self._config_path = Path.home() / ".config" / "PDFMathTranslate" / "config.json"
29 | self._config_data = {}
30 |
31 | # 这里不要再加锁,因为外层可能已经加了锁 (get_instance), RLock也无妨
32 | self._ensure_config_exists()
33 |
34 | def _ensure_config_exists(self, isInit=True):
35 | """确保配置文件存在,如果不存在则创建默认配置"""
36 | # 这里也不需要显式再次加锁,原因同上,方法体中再调用 _load_config(),
37 | # 而 _load_config() 内部会加锁。因为 RLock 是可重入的,不会阻塞。
38 | if not self._config_path.exists():
39 | if isInit:
40 | self._config_path.parent.mkdir(parents=True, exist_ok=True)
41 | self._config_data = {} # 默认配置内容
42 | self._save_config()
43 | else:
44 | raise ValueError(f"config file {self._config_path} not found!")
45 | else:
46 | self._load_config()
47 |
48 | def _load_config(self):
49 | """从 config.json 中加载配置"""
50 | with self._lock: # 加锁确保线程安全
51 | with self._config_path.open("r", encoding="utf-8") as f:
52 | self._config_data = json.load(f)
53 |
54 | def _save_config(self):
55 | """保存配置到 config.json"""
56 | with self._lock: # 加锁确保线程安全
57 | # 移除循环引用并写入
58 | cleaned_data = self._remove_circular_references(self._config_data)
59 | with self._config_path.open("w", encoding="utf-8") as f:
60 | json.dump(cleaned_data, f, indent=4, ensure_ascii=False)
61 |
62 | def _remove_circular_references(self, obj, seen=None):
63 | """递归移除循环引用"""
64 | if seen is None:
65 | seen = set()
66 | obj_id = id(obj)
67 | if obj_id in seen:
68 | return None # 遇到已处理过的对象,视为循环引用
69 | seen.add(obj_id)
70 |
71 | if isinstance(obj, dict):
72 | return {
73 | k: self._remove_circular_references(v, seen) for k, v in obj.items()
74 | }
75 | elif isinstance(obj, list):
76 | return [self._remove_circular_references(i, seen) for i in obj]
77 | return obj
78 |
79 | @classmethod
80 | def custome_config(cls, file_path):
81 | """使用自定义路径加载配置文件"""
82 | custom_path = Path(file_path)
83 | if not custom_path.exists():
84 | raise ValueError(f"Config file {custom_path} not found!")
85 | # 加锁
86 | with cls._lock:
87 | instance = cls()
88 | instance._config_path = custom_path
89 | # 此处传 isInit=False,若不存在则报错;若存在则正常 _load_config()
90 | instance._ensure_config_exists(isInit=False)
91 | cls._instance = instance
92 |
93 | @classmethod
94 | def get(cls, key, default=None):
95 | """获取配置值"""
96 | instance = cls.get_instance()
97 | # 读取时,加锁或不加锁都行。但为了统一,我们在修改配置前后都要加锁。
98 | # get 只要最终需要保存,则会加锁 -> _save_config()
99 | if key in instance._config_data:
100 | return instance._config_data[key]
101 |
102 | # 若环境变量中存在该 key,则使用环境变量并写回 config
103 | if key in os.environ:
104 | value = os.environ[key]
105 | instance._config_data[key] = value
106 | instance._save_config()
107 | return value
108 |
109 | # 若 default 不为 None,则设置并保存
110 | if default is not None:
111 | instance._config_data[key] = default
112 | instance._save_config()
113 | return default
114 |
115 | # 找不到则抛出异常
116 | # raise KeyError(f"{key} is not found in config file or environment variables.")
117 | return default
118 |
119 | @classmethod
120 | def set(cls, key, value):
121 | """设置配置值并保存"""
122 | instance = cls.get_instance()
123 | with instance._lock:
124 | instance._config_data[key] = value
125 | instance._save_config()
126 |
127 | @classmethod
128 | def get_translator_by_name(cls, name):
129 | """根据 name 获取对应的 translator 配置"""
130 | instance = cls.get_instance()
131 | translators = instance._config_data.get("translators", [])
132 | for translator in translators:
133 | if translator.get("name") == name:
134 | return translator["envs"]
135 | return None
136 |
137 | @classmethod
138 | def set_translator_by_name(cls, name, new_translator_envs):
139 | """根据 name 设置或更新 translator 配置"""
140 | instance = cls.get_instance()
141 | with instance._lock:
142 | translators = instance._config_data.get("translators", [])
143 | for translator in translators:
144 | if translator.get("name") == name:
145 | translator["envs"] = copy.deepcopy(new_translator_envs)
146 | instance._save_config()
147 | return
148 | translators.append(
149 | {"name": name, "envs": copy.deepcopy(new_translator_envs)}
150 | )
151 | instance._config_data["translators"] = translators
152 | instance._save_config()
153 |
154 | @classmethod
155 | def get_env_by_translatername(cls, translater_name, name, default=None):
156 | """根据 name 获取对应的 translator 配置"""
157 | instance = cls.get_instance()
158 | translators = instance._config_data.get("translators", [])
159 | for translator in translators:
160 | if translator.get("name") == translater_name.name:
161 | if translator["envs"][name]:
162 | return translator["envs"][name]
163 | else:
164 | with instance._lock:
165 | translator["envs"][name] = default
166 | instance._save_config()
167 | return default
168 |
169 | with instance._lock:
170 | translators = instance._config_data.get("translators", [])
171 | for translator in translators:
172 | if translator.get("name") == translater_name.name:
173 | translator["envs"][name] = default
174 | instance._save_config()
175 | return default
176 | translators.append(
177 | {
178 | "name": translater_name.name,
179 | "envs": copy.deepcopy(translater_name.envs),
180 | }
181 | )
182 | instance._config_data["translators"] = translators
183 | instance._save_config()
184 | return default
185 |
186 | @classmethod
187 | def delete(cls, key):
188 | """删除配置值并保存"""
189 | instance = cls.get_instance()
190 | with instance._lock:
191 | if key in instance._config_data:
192 | del instance._config_data[key]
193 | instance._save_config()
194 |
195 | @classmethod
196 | def clear(cls):
197 | """删除配置值并保存"""
198 | instance = cls.get_instance()
199 | with instance._lock:
200 | instance._config_data = {}
201 | instance._save_config()
202 |
203 | @classmethod
204 | def all(cls):
205 | """返回所有配置项"""
206 | instance = cls.get_instance()
207 | # 这里只做读取操作,一般可不加锁。不过为了保险也可以加锁。
208 | return instance._config_data
209 |
210 | @classmethod
211 | def remove(cls):
212 | instance = cls.get_instance()
213 | with instance._lock:
214 | os.remove(instance._config_path)
215 |
--------------------------------------------------------------------------------
/pdf2zh/doclayout.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import os.path
3 |
4 | import cv2
5 | import numpy as np
6 | import ast
7 | from babeldoc.assets.assets import get_doclayout_onnx_model_path
8 |
9 | try:
10 | import onnx
11 | import onnxruntime
12 | except ImportError as e:
13 | if "DLL load failed" in str(e):
14 | raise OSError(
15 | "Microsoft Visual C++ Redistributable is not installed. "
16 | "Download it at https://aka.ms/vs/17/release/vc_redist.x64.exe"
17 | ) from e
18 | raise
19 |
20 | from huggingface_hub import hf_hub_download
21 |
22 | from pdf2zh.config import ConfigManager
23 |
24 |
25 | class DocLayoutModel(abc.ABC):
26 | @staticmethod
27 | def load_onnx():
28 | model = OnnxModel.from_pretrained()
29 | return model
30 |
31 | @staticmethod
32 | def load_available():
33 | return DocLayoutModel.load_onnx()
34 |
35 | @property
36 | @abc.abstractmethod
37 | def stride(self) -> int:
38 | """Stride of the model input."""
39 | pass
40 |
41 | @abc.abstractmethod
42 | def predict(self, image, imgsz=1024, **kwargs) -> list:
43 | """
44 | Predict the layout of a document page.
45 |
46 | Args:
47 | image: The image of the document page.
48 | imgsz: Resize the image to this size. Must be a multiple of the stride.
49 | **kwargs: Additional arguments.
50 | """
51 | pass
52 |
53 |
54 | class YoloResult:
55 | """Helper class to store detection results from ONNX model."""
56 |
57 | def __init__(self, boxes, names):
58 | self.boxes = [YoloBox(data=d) for d in boxes]
59 | self.boxes.sort(key=lambda x: x.conf, reverse=True)
60 | self.names = names
61 |
62 |
63 | class YoloBox:
64 | """Helper class to store detection results from ONNX model."""
65 |
66 | def __init__(self, data):
67 | self.xyxy = data[:4]
68 | self.conf = data[-2]
69 | self.cls = data[-1]
70 |
71 |
72 | class OnnxModel(DocLayoutModel):
73 | def __init__(self, model_path: str):
74 | self.model_path = model_path
75 |
76 | model = onnx.load(model_path)
77 | metadata = {d.key: d.value for d in model.metadata_props}
78 | self._stride = ast.literal_eval(metadata["stride"])
79 | self._names = ast.literal_eval(metadata["names"])
80 |
81 | self.model = onnxruntime.InferenceSession(model.SerializeToString())
82 |
83 | @staticmethod
84 | def from_pretrained():
85 | pth = get_doclayout_onnx_model_path()
86 | return OnnxModel(pth)
87 |
88 | @property
89 | def stride(self):
90 | return self._stride
91 |
92 | def resize_and_pad_image(self, image, new_shape):
93 | """
94 | Resize and pad the image to the specified size, ensuring dimensions are multiples of stride.
95 |
96 | Parameters:
97 | - image: Input image
98 | - new_shape: Target size (integer or (height, width) tuple)
99 | - stride: Padding alignment stride, default 32
100 |
101 | Returns:
102 | - Processed image
103 | """
104 | if isinstance(new_shape, int):
105 | new_shape = (new_shape, new_shape)
106 |
107 | h, w = image.shape[:2]
108 | new_h, new_w = new_shape
109 |
110 | # Calculate scaling ratio
111 | r = min(new_h / h, new_w / w)
112 | resized_h, resized_w = int(round(h * r)), int(round(w * r))
113 |
114 | # Resize image
115 | image = cv2.resize(
116 | image, (resized_w, resized_h), interpolation=cv2.INTER_LINEAR
117 | )
118 |
119 | # Calculate padding size and align to stride multiple
120 | pad_w = (new_w - resized_w) % self.stride
121 | pad_h = (new_h - resized_h) % self.stride
122 | top, bottom = pad_h // 2, pad_h - pad_h // 2
123 | left, right = pad_w // 2, pad_w - pad_w // 2
124 |
125 | # Add padding
126 | image = cv2.copyMakeBorder(
127 | image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)
128 | )
129 |
130 | return image
131 |
132 | def scale_boxes(self, img1_shape, boxes, img0_shape):
133 | """
134 | Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originally
135 | specified in (img1_shape) to the shape of a different image (img0_shape).
136 |
137 | Args:
138 | img1_shape (tuple): The shape of the image that the bounding boxes are for,
139 | in the format of (height, width).
140 | boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
141 | img0_shape (tuple): the shape of the target image, in the format of (height, width).
142 |
143 | Returns:
144 | boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
145 | """
146 |
147 | # Calculate scaling ratio
148 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
149 |
150 | # Calculate padding size
151 | pad_x = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1)
152 | pad_y = round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)
153 |
154 | # Remove padding and scale boxes
155 | boxes[..., :4] = (boxes[..., :4] - [pad_x, pad_y, pad_x, pad_y]) / gain
156 | return boxes
157 |
158 | def predict(self, image, imgsz=1024, **kwargs):
159 | # Preprocess input image
160 | orig_h, orig_w = image.shape[:2]
161 | pix = self.resize_and_pad_image(image, new_shape=imgsz)
162 | pix = np.transpose(pix, (2, 0, 1)) # CHW
163 | pix = np.expand_dims(pix, axis=0) # BCHW
164 | pix = pix.astype(np.float32) / 255.0 # Normalize to [0, 1]
165 | new_h, new_w = pix.shape[2:]
166 |
167 | # Run inference
168 | preds = self.model.run(None, {"images": pix})[0]
169 |
170 | # Postprocess predictions
171 | preds = preds[preds[..., 4] > 0.25]
172 | preds[..., :4] = self.scale_boxes(
173 | (new_h, new_w), preds[..., :4], (orig_h, orig_w)
174 | )
175 | return [YoloResult(boxes=preds, names=self._names)]
176 |
177 |
178 | class ModelInstance:
179 | value: OnnxModel = None
180 |
--------------------------------------------------------------------------------
/pdf2zh/high_level.py:
--------------------------------------------------------------------------------
1 | """Functions that can be used for the most common use-cases for pdf2zh.six"""
2 |
3 | import asyncio
4 | import io
5 | import os
6 | import re
7 | import sys
8 | import tempfile
9 | import logging
10 | from asyncio import CancelledError
11 | from pathlib import Path
12 | from string import Template
13 | from typing import Any, BinaryIO, List, Optional, Dict
14 |
15 | import numpy as np
16 | import requests
17 | import tqdm
18 | from pdfminer.pdfdocument import PDFDocument
19 | from pdfminer.pdfexceptions import PDFValueError
20 | from pdfminer.pdfinterp import PDFResourceManager
21 | from pdfminer.pdfpage import PDFPage
22 | from pdfminer.pdfparser import PDFParser
23 | from pymupdf import Document, Font
24 |
25 | from pdf2zh.converter import TranslateConverter
26 | from pdf2zh.doclayout import OnnxModel
27 | from pdf2zh.pdfinterp import PDFPageInterpreterEx
28 |
29 | from pdf2zh.config import ConfigManager
30 | from babeldoc.assets.assets import get_font_and_metadata
31 |
32 | NOTO_NAME = "noto"
33 |
34 | logger = logging.getLogger(__name__)
35 |
36 | noto_list = [
37 | "am", # Amharic
38 | "ar", # Arabic
39 | "bn", # Bengali
40 | "bg", # Bulgarian
41 | "chr", # Cherokee
42 | "el", # Greek
43 | "gu", # Gujarati
44 | "iw", # Hebrew
45 | "hi", # Hindi
46 | "kn", # Kannada
47 | "ml", # Malayalam
48 | "mr", # Marathi
49 | "ru", # Russian
50 | "sr", # Serbian
51 | "ta", # Tamil
52 | "te", # Telugu
53 | "th", # Thai
54 | "ur", # Urdu
55 | "uk", # Ukrainian
56 | ]
57 |
58 |
59 | def check_files(files: List[str]) -> List[str]:
60 | files = [
61 | f for f in files if not f.startswith("http://")
62 | ] # exclude online files, http
63 | files = [
64 | f for f in files if not f.startswith("https://")
65 | ] # exclude online files, https
66 | missing_files = [file for file in files if not os.path.exists(file)]
67 | return missing_files
68 |
69 |
70 | def translate_patch(
71 | inf: BinaryIO,
72 | pages: Optional[list[int]] = None,
73 | vfont: str = "",
74 | vchar: str = "",
75 | thread: int = 0,
76 | doc_zh: Document = None,
77 | lang_in: str = "",
78 | lang_out: str = "",
79 | service: str = "",
80 | noto_name: str = "",
81 | noto: Font = None,
82 | callback: object = None,
83 | cancellation_event: asyncio.Event = None,
84 | model: OnnxModel = None,
85 | envs: Dict = None,
86 | prompt: Template = None,
87 | ignore_cache: bool = False,
88 | **kwarg: Any,
89 | ) -> None:
90 | rsrcmgr = PDFResourceManager()
91 | layout = {}
92 | device = TranslateConverter(
93 | rsrcmgr,
94 | vfont,
95 | vchar,
96 | thread,
97 | layout,
98 | lang_in,
99 | lang_out,
100 | service,
101 | noto_name,
102 | noto,
103 | envs,
104 | prompt,
105 | ignore_cache,
106 | )
107 |
108 | assert device is not None
109 | obj_patch = {}
110 | interpreter = PDFPageInterpreterEx(rsrcmgr, device, obj_patch)
111 | if pages:
112 | total_pages = len(pages)
113 | else:
114 | total_pages = doc_zh.page_count
115 |
116 | parser = PDFParser(inf)
117 | doc = PDFDocument(parser)
118 | with tqdm.tqdm(total=total_pages) as progress:
119 | for pageno, page in enumerate(PDFPage.create_pages(doc)):
120 | if cancellation_event and cancellation_event.is_set():
121 | raise CancelledError("task cancelled")
122 | if pages and (pageno not in pages):
123 | continue
124 | progress.update()
125 | if callback:
126 | callback(progress)
127 | page.pageno = pageno
128 | pix = doc_zh[page.pageno].get_pixmap()
129 | image = np.fromstring(pix.samples, np.uint8).reshape(
130 | pix.height, pix.width, 3
131 | )[:, :, ::-1]
132 | page_layout = model.predict(image, imgsz=int(pix.height / 32) * 32)[0]
133 | # kdtree 是不可能 kdtree 的,不如直接渲染成图片,用空间换时间
134 | box = np.ones((pix.height, pix.width))
135 | h, w = box.shape
136 | vcls = ["abandon", "figure", "table", "isolate_formula", "formula_caption"]
137 | for i, d in enumerate(page_layout.boxes):
138 | if page_layout.names[int(d.cls)] not in vcls:
139 | x0, y0, x1, y1 = d.xyxy.squeeze()
140 | x0, y0, x1, y1 = (
141 | np.clip(int(x0 - 1), 0, w - 1),
142 | np.clip(int(h - y1 - 1), 0, h - 1),
143 | np.clip(int(x1 + 1), 0, w - 1),
144 | np.clip(int(h - y0 + 1), 0, h - 1),
145 | )
146 | box[y0:y1, x0:x1] = i + 2
147 | for i, d in enumerate(page_layout.boxes):
148 | if page_layout.names[int(d.cls)] in vcls:
149 | x0, y0, x1, y1 = d.xyxy.squeeze()
150 | x0, y0, x1, y1 = (
151 | np.clip(int(x0 - 1), 0, w - 1),
152 | np.clip(int(h - y1 - 1), 0, h - 1),
153 | np.clip(int(x1 + 1), 0, w - 1),
154 | np.clip(int(h - y0 + 1), 0, h - 1),
155 | )
156 | box[y0:y1, x0:x1] = 0
157 | layout[page.pageno] = box
158 | # 新建一个 xref 存放新指令流
159 | page.page_xref = doc_zh.get_new_xref() # hack 插入页面的新 xref
160 | doc_zh.update_object(page.page_xref, "<<>>")
161 | doc_zh.update_stream(page.page_xref, b"")
162 | doc_zh[page.pageno].set_contents(page.page_xref)
163 | interpreter.process_page(page)
164 |
165 | device.close()
166 | return obj_patch
167 |
168 |
169 | def translate_stream(
170 | stream: bytes,
171 | pages: Optional[list[int]] = None,
172 | lang_in: str = "",
173 | lang_out: str = "",
174 | service: str = "",
175 | thread: int = 0,
176 | vfont: str = "",
177 | vchar: str = "",
178 | callback: object = None,
179 | cancellation_event: asyncio.Event = None,
180 | model: OnnxModel = None,
181 | envs: Dict = None,
182 | prompt: Template = None,
183 | skip_subset_fonts: bool = False,
184 | ignore_cache: bool = False,
185 | **kwarg: Any,
186 | ):
187 | font_list = [("tiro", None)]
188 |
189 | font_path = download_remote_fonts(lang_out.lower())
190 | noto_name = NOTO_NAME
191 | noto = Font(noto_name, font_path)
192 | font_list.append((noto_name, font_path))
193 |
194 | doc_en = Document(stream=stream)
195 | stream = io.BytesIO()
196 | doc_en.save(stream)
197 | doc_zh = Document(stream=stream)
198 | page_count = doc_zh.page_count
199 | # font_list = [("GoNotoKurrent-Regular.ttf", font_path), ("tiro", None)]
200 | font_id = {}
201 | for page in doc_zh:
202 | for font in font_list:
203 | font_id[font[0]] = page.insert_font(font[0], font[1])
204 | xreflen = doc_zh.xref_length()
205 | for xref in range(1, xreflen):
206 | for label in ["Resources/", ""]: # 可能是基于 xobj 的 res
207 | try: # xref 读写可能出错
208 | font_res = doc_zh.xref_get_key(xref, f"{label}Font")
209 | target_key_prefix = f"{label}Font/"
210 | if font_res[0] == "xref":
211 | resource_xref_id = re.search("(\\d+) 0 R", font_res[1]).group(1)
212 | xref = int(resource_xref_id)
213 | font_res = ("dict", doc_zh.xref_object(xref))
214 | target_key_prefix = ""
215 |
216 | if font_res[0] == "dict":
217 | for font in font_list:
218 | target_key = f"{target_key_prefix}{font[0]}"
219 | font_exist = doc_zh.xref_get_key(xref, target_key)
220 | if font_exist[0] == "null":
221 | doc_zh.xref_set_key(
222 | xref,
223 | target_key,
224 | f"{font_id[font[0]]} 0 R",
225 | )
226 | except Exception:
227 | pass
228 |
229 | fp = io.BytesIO()
230 |
231 | doc_zh.save(fp)
232 | obj_patch: dict = translate_patch(fp, **locals())
233 |
234 | for obj_id, ops_new in obj_patch.items():
235 | # ops_old=doc_en.xref_stream(obj_id)
236 | # print(obj_id)
237 | # print(ops_old)
238 | # print(ops_new.encode())
239 | doc_zh.update_stream(obj_id, ops_new.encode())
240 |
241 | doc_en.insert_file(doc_zh)
242 | for id in range(page_count):
243 | doc_en.move_page(page_count + id, id * 2 + 1)
244 | if not skip_subset_fonts:
245 | doc_zh.subset_fonts(fallback=True)
246 | doc_en.subset_fonts(fallback=True)
247 | return (
248 | doc_zh.write(deflate=True, garbage=3, use_objstms=1),
249 | doc_en.write(deflate=True, garbage=3, use_objstms=1),
250 | )
251 |
252 |
253 | def convert_to_pdfa(input_path, output_path):
254 | """
255 | Convert PDF to PDF/A format
256 |
257 | Args:
258 | input_path: Path to source PDF file
259 | output_path: Path to save PDF/A file
260 | """
261 | from pikepdf import Dictionary, Name, Pdf
262 |
263 | # Open the PDF file
264 | pdf = Pdf.open(input_path)
265 |
266 | # Add PDF/A conformance metadata
267 | metadata = {
268 | "pdfa_part": "2",
269 | "pdfa_conformance": "B",
270 | "title": pdf.docinfo.get("/Title", ""),
271 | "author": pdf.docinfo.get("/Author", ""),
272 | "creator": "PDF Math Translate",
273 | }
274 |
275 | with pdf.open_metadata() as meta:
276 | meta.load_from_docinfo(pdf.docinfo)
277 | meta["pdfaid:part"] = metadata["pdfa_part"]
278 | meta["pdfaid:conformance"] = metadata["pdfa_conformance"]
279 |
280 | # Create OutputIntent dictionary
281 | output_intent = Dictionary(
282 | {
283 | "/Type": Name("/OutputIntent"),
284 | "/S": Name("/GTS_PDFA1"),
285 | "/OutputConditionIdentifier": "sRGB IEC61966-2.1",
286 | "/RegistryName": "http://www.color.org",
287 | "/Info": "sRGB IEC61966-2.1",
288 | }
289 | )
290 |
291 | # Add output intent to PDF root
292 | if "/OutputIntents" not in pdf.Root:
293 | pdf.Root.OutputIntents = [output_intent]
294 | else:
295 | pdf.Root.OutputIntents.append(output_intent)
296 |
297 | # Save as PDF/A
298 | pdf.save(output_path, linearize=True)
299 | pdf.close()
300 |
301 |
302 | def translate(
303 | files: list[str],
304 | output: str = "",
305 | pages: Optional[list[int]] = None,
306 | lang_in: str = "",
307 | lang_out: str = "",
308 | service: str = "",
309 | thread: int = 0,
310 | vfont: str = "",
311 | vchar: str = "",
312 | callback: object = None,
313 | compatible: bool = False,
314 | cancellation_event: asyncio.Event = None,
315 | model: OnnxModel = None,
316 | envs: Dict = None,
317 | prompt: Template = None,
318 | skip_subset_fonts: bool = False,
319 | ignore_cache: bool = False,
320 | **kwarg: Any,
321 | ):
322 | if not files:
323 | raise PDFValueError("No files to process.")
324 |
325 | missing_files = check_files(files)
326 |
327 | if missing_files:
328 | print("The following files do not exist:", file=sys.stderr)
329 | for file in missing_files:
330 | print(f" {file}", file=sys.stderr)
331 | raise PDFValueError("Some files do not exist.")
332 |
333 | result_files = []
334 |
335 | for file in files:
336 | if type(file) is str and (
337 | file.startswith("http://") or file.startswith("https://")
338 | ):
339 | print("Online files detected, downloading...")
340 | try:
341 | r = requests.get(file, allow_redirects=True)
342 | if r.status_code == 200:
343 | with tempfile.NamedTemporaryFile(
344 | suffix=".pdf", delete=False
345 | ) as tmp_file:
346 | print(f"Writing the file: {file}...")
347 | tmp_file.write(r.content)
348 | file = tmp_file.name
349 | else:
350 | r.raise_for_status()
351 | except Exception as e:
352 | raise PDFValueError(
353 | f"Errors occur in downloading the PDF file. Please check the link(s).\nError:\n{e}"
354 | )
355 | filename = os.path.splitext(os.path.basename(file))[0]
356 |
357 | # If the commandline has specified converting to PDF/A format
358 | # --compatible / -cp
359 | if compatible:
360 | with tempfile.NamedTemporaryFile(
361 | suffix="-pdfa.pdf", delete=False
362 | ) as tmp_pdfa:
363 | print(f"Converting {file} to PDF/A format...")
364 | convert_to_pdfa(file, tmp_pdfa.name)
365 | doc_raw = open(tmp_pdfa.name, "rb")
366 | os.unlink(tmp_pdfa.name)
367 | else:
368 | doc_raw = open(file, "rb")
369 | s_raw = doc_raw.read()
370 | doc_raw.close()
371 |
372 | temp_dir = Path(tempfile.gettempdir())
373 | file_path = Path(file)
374 | try:
375 | if file_path.exists() and file_path.resolve().is_relative_to(
376 | temp_dir.resolve()
377 | ):
378 | file_path.unlink(missing_ok=True)
379 | logger.debug(f"Cleaned temp file: {file_path}")
380 | except Exception as e:
381 | logger.warning(f"Failed to clean temp file {file_path}", exc_info=True)
382 |
383 | s_mono, s_dual = translate_stream(
384 | s_raw,
385 | **locals(),
386 | )
387 | file_mono = Path(output) / f"{filename}-mono.pdf"
388 | file_dual = Path(output) / f"{filename}-dual.pdf"
389 | doc_mono = open(file_mono, "wb")
390 | doc_dual = open(file_dual, "wb")
391 | doc_mono.write(s_mono)
392 | doc_dual.write(s_dual)
393 | doc_mono.close()
394 | doc_dual.close()
395 | result_files.append((str(file_mono), str(file_dual)))
396 |
397 | return result_files
398 |
399 |
400 | def download_remote_fonts(lang: str):
401 | lang = lang.lower()
402 | LANG_NAME_MAP = {
403 | **{la: "GoNotoKurrent-Regular.ttf" for la in noto_list},
404 | **{
405 | la: f"SourceHanSerif{region}-Regular.ttf"
406 | for region, langs in {
407 | "CN": ["zh-cn", "zh-hans", "zh"],
408 | "TW": ["zh-tw", "zh-hant"],
409 | "JP": ["ja"],
410 | "KR": ["ko"],
411 | }.items()
412 | for la in langs
413 | },
414 | }
415 | font_name = LANG_NAME_MAP.get(lang, "GoNotoKurrent-Regular.ttf")
416 |
417 | # docker
418 | font_path = ConfigManager.get("NOTO_FONT_PATH", Path("/app", font_name).as_posix())
419 | if not Path(font_path).exists():
420 | font_path, _ = get_font_and_metadata(font_name)
421 | font_path = font_path.as_posix()
422 |
423 | logger.info(f"use font: {font_path}")
424 |
425 | return font_path
426 |
--------------------------------------------------------------------------------
/pdf2zh/mcp_server.py:
--------------------------------------------------------------------------------
1 | from mcp.server import Server
2 | from mcp.server.fastmcp import FastMCP, Context
3 | from mcp.server.sse import SseServerTransport
4 | from starlette.applications import Starlette
5 | from starlette.requests import Request
6 | from starlette.routing import Mount, Route
7 | from pdf2zh import translate_stream
8 | from pdf2zh.doclayout import ModelInstance
9 | from pathlib import Path
10 |
11 | import contextlib
12 | import io
13 | import os
14 |
15 |
16 | def create_mcp_app() -> FastMCP:
17 | mcp = FastMCP("pdf2zh")
18 |
19 | @mcp.tool()
20 | async def translate_pdf(
21 | file: str, lang_in: str, lang_out: str, ctx: Context
22 | ) -> str:
23 | """
24 | translate given pdf. Argument `file` is absolute path of input pdf,
25 | `lang_in` and `lang_out` is translate from and to language, and
26 | should be like google translate lang_code. `lang_in` can be `auto`
27 | if you can't determine input language.
28 | """
29 |
30 | with open(file, "rb") as f:
31 | file_bytes = f.read()
32 | await ctx.log(level="info", message=f"start translate {file}")
33 | with contextlib.redirect_stdout(io.StringIO()):
34 | doc_mono_bytes, doc_dual_bytes = translate_stream(
35 | file_bytes,
36 | lang_in=lang_in,
37 | lang_out=lang_out,
38 | service="google",
39 | model=ModelInstance.value,
40 | thread=4,
41 | )
42 | await ctx.log(level="info", message="translate complete")
43 | output_path = Path(os.path.dirname(file))
44 | filename = os.path.splitext(os.path.basename(file))[0]
45 | doc_mono = output_path / f"{filename}-mono.pdf"
46 | doc_dual = output_path / f"{filename}-dual.pdf"
47 | with open(doc_mono, "wb") as f:
48 | f.write(doc_mono_bytes)
49 | with open(doc_dual, "wb") as f:
50 | f.write(doc_dual_bytes)
51 | return f"""------------
52 | translate complete
53 | mono pdf file: {doc_mono.absolute()}
54 | dual pdf file: {doc_dual.absolute()}
55 | """
56 |
57 | return mcp
58 |
59 |
60 | def create_starlette_app(mcp_server: Server, *, debug: bool = False) -> Starlette:
61 | sse = SseServerTransport("/messages/")
62 |
63 | async def handle_sse(request: Request) -> None:
64 | async with sse.connect_sse(request.scope, request.receive, request._send) as (
65 | read_stream,
66 | write_stream,
67 | ):
68 | await mcp_server.run(
69 | read_stream, write_stream, mcp_server.create_initialization_options()
70 | )
71 |
72 | return Starlette(
73 | debug=debug,
74 | routes=[
75 | Route("/sse", endpoint=handle_sse),
76 | Mount("/messages/", app=sse.handle_post_message),
77 | ],
78 | )
79 |
80 |
81 | if __name__ == "__main__":
82 | import argparse
83 |
84 | mcp = create_mcp_app()
85 | mcp_server = mcp._mcp_server
86 | parser = argparse.ArgumentParser(description="Run MCP SSE-based PDF2ZH server")
87 |
88 | parser.add_argument(
89 | "--sse",
90 | default=False,
91 | action="store_true",
92 | help="Run the server with SSE transport or STDIO",
93 | )
94 | parser.add_argument(
95 | "--host", type=str, default="127.0.0.1", required=False, help="Host to bind"
96 | )
97 | parser.add_argument(
98 | "--port", type=int, default=3001, required=False, help="Port to bind"
99 | )
100 |
101 | args = parser.parse_args()
102 | if args.sse and args.host and args.port:
103 | import uvicorn
104 |
105 | starlette_app = create_starlette_app(mcp_server, debug=True)
106 | uvicorn.run(starlette_app, host=args.host, port=args.port)
107 | else:
108 | mcp.run()
109 |
--------------------------------------------------------------------------------
/pdf2zh/pdf2zh.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """A command line tool for extracting text and images from PDF and
3 | output it to plain text, html, xml or tags.
4 | """
5 |
6 | from __future__ import annotations
7 |
8 | import argparse
9 | import logging
10 | import sys
11 | from string import Template
12 | from typing import List, Optional
13 |
14 | from pdf2zh import __version__, log
15 | from pdf2zh.high_level import translate, download_remote_fonts
16 | from pdf2zh.doclayout import OnnxModel, ModelInstance
17 | import os
18 |
19 | from pdf2zh.config import ConfigManager
20 | from babeldoc.translation_config import TranslationConfig as YadtConfig
21 | from babeldoc.high_level import async_translate as yadt_translate
22 | from babeldoc.high_level import init as yadt_init
23 | from babeldoc.main import create_progress_handler
24 |
25 | logger = logging.getLogger(__name__)
26 |
27 |
28 | def create_parser() -> argparse.ArgumentParser:
29 | parser = argparse.ArgumentParser(description=__doc__, add_help=True)
30 | parser.add_argument(
31 | "files",
32 | type=str,
33 | default=None,
34 | nargs="*",
35 | help="One or more paths to PDF files.",
36 | )
37 | parser.add_argument(
38 | "--version",
39 | "-v",
40 | action="version",
41 | version=f"pdf2zh v{__version__}",
42 | )
43 | parser.add_argument(
44 | "--debug",
45 | "-d",
46 | default=False,
47 | action="store_true",
48 | help="Use debug logging level.",
49 | )
50 | parse_params = parser.add_argument_group(
51 | "Parser",
52 | description="Used during PDF parsing",
53 | )
54 | parse_params.add_argument(
55 | "--pages",
56 | "-p",
57 | type=str,
58 | help="The list of page numbers to parse.",
59 | )
60 | parse_params.add_argument(
61 | "--vfont",
62 | "-f",
63 | type=str,
64 | default="",
65 | help="The regex to math font name of formula.",
66 | )
67 | parse_params.add_argument(
68 | "--vchar",
69 | "-c",
70 | type=str,
71 | default="",
72 | help="The regex to math character of formula.",
73 | )
74 | parse_params.add_argument(
75 | "--lang-in",
76 | "-li",
77 | type=str,
78 | default="en",
79 | help="The code of source language.",
80 | )
81 | parse_params.add_argument(
82 | "--lang-out",
83 | "-lo",
84 | type=str,
85 | default="zh",
86 | help="The code of target language.",
87 | )
88 | parse_params.add_argument(
89 | "--service",
90 | "-s",
91 | type=str,
92 | default="google",
93 | help="The service to use for translation.",
94 | )
95 | parse_params.add_argument(
96 | "--output",
97 | "-o",
98 | type=str,
99 | default="",
100 | help="Output directory for files.",
101 | )
102 | parse_params.add_argument(
103 | "--thread",
104 | "-t",
105 | type=int,
106 | default=4,
107 | help="The number of threads to execute translation.",
108 | )
109 | parse_params.add_argument(
110 | "--interactive",
111 | "-i",
112 | action="store_true",
113 | help="Interact with GUI.",
114 | )
115 | parse_params.add_argument(
116 | "--share",
117 | action="store_true",
118 | help="Enable Gradio Share",
119 | )
120 | parse_params.add_argument(
121 | "--flask",
122 | action="store_true",
123 | help="flask",
124 | )
125 | parse_params.add_argument(
126 | "--celery",
127 | action="store_true",
128 | help="celery",
129 | )
130 | parse_params.add_argument(
131 | "--authorized",
132 | type=str,
133 | nargs="+",
134 | help="user name and password.",
135 | )
136 | parse_params.add_argument(
137 | "--prompt",
138 | type=str,
139 | help="user custom prompt.",
140 | )
141 |
142 | parse_params.add_argument(
143 | "--compatible",
144 | "-cp",
145 | action="store_true",
146 | help="Convert the PDF file into PDF/A format to improve compatibility.",
147 | )
148 |
149 | parse_params.add_argument(
150 | "--onnx",
151 | type=str,
152 | help="custom onnx model path.",
153 | )
154 |
155 | parse_params.add_argument(
156 | "--serverport",
157 | type=int,
158 | help="custom WebUI port.",
159 | )
160 |
161 | parse_params.add_argument(
162 | "--dir",
163 | action="store_true",
164 | help="translate directory.",
165 | )
166 |
167 | parse_params.add_argument(
168 | "--config",
169 | type=str,
170 | help="config file.",
171 | )
172 |
173 | parse_params.add_argument(
174 | "--babeldoc",
175 | default=False,
176 | action="store_true",
177 | help="Use experimental backend babeldoc.",
178 | )
179 |
180 | parse_params.add_argument(
181 | "--skip-subset-fonts",
182 | action="store_true",
183 | help="Skip font subsetting. "
184 | "This option can improve compatibility "
185 | "but will increase the size of the output file.",
186 | )
187 |
188 | parse_params.add_argument(
189 | "--ignore-cache",
190 | action="store_true",
191 | help="Ignore cache and force retranslation.",
192 | )
193 |
194 | parse_params.add_argument(
195 | "--mcp", action="store_true", help="Launch pdf2zh MCP server in STDIO mode"
196 | )
197 |
198 | parse_params.add_argument(
199 | "--sse", action="store_true", help="Launch pdf2zh MCP server in SSE mode"
200 | )
201 |
202 | return parser
203 |
204 |
205 | def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
206 | parsed_args = create_parser().parse_args(args=args)
207 |
208 | if parsed_args.pages:
209 | pages = []
210 | for p in parsed_args.pages.split(","):
211 | if "-" in p:
212 | start, end = p.split("-")
213 | pages.extend(range(int(start) - 1, int(end)))
214 | else:
215 | pages.append(int(p) - 1)
216 | parsed_args.raw_pages = parsed_args.pages
217 | parsed_args.pages = pages
218 |
219 | return parsed_args
220 |
221 |
222 | def find_all_files_in_directory(directory_path):
223 | """
224 | Recursively search all PDF files in the given directory and return their paths as a list.
225 |
226 | :param directory_path: str, the path to the directory to search
227 | :return: list of PDF file paths
228 | """
229 | # Check if the provided path is a directory
230 | if not os.path.isdir(directory_path):
231 | raise ValueError(f"The provided path '{directory_path}' is not a directory.")
232 |
233 | file_paths = []
234 |
235 | # Walk through the directory recursively
236 | for root, _, files in os.walk(directory_path):
237 | for file in files:
238 | # Check if the file is a PDF
239 | if file.lower().endswith(".pdf"):
240 | # Append the full file path to the list
241 | file_paths.append(os.path.join(root, file))
242 |
243 | return file_paths
244 |
245 |
246 | def main(args: Optional[List[str]] = None) -> int:
247 | from rich.logging import RichHandler
248 |
249 | logging.basicConfig(level=logging.INFO, handlers=[RichHandler()])
250 |
251 | # disable httpx, openai, httpcore, http11 logs
252 | logging.getLogger("httpx").setLevel("CRITICAL")
253 | logging.getLogger("httpx").propagate = False
254 | logging.getLogger("openai").setLevel("CRITICAL")
255 | logging.getLogger("openai").propagate = False
256 | logging.getLogger("httpcore").setLevel("CRITICAL")
257 | logging.getLogger("httpcore").propagate = False
258 | logging.getLogger("http11").setLevel("CRITICAL")
259 | logging.getLogger("http11").propagate = False
260 |
261 | parsed_args = parse_args(args)
262 |
263 | if parsed_args.config:
264 | ConfigManager.custome_config(parsed_args.config)
265 |
266 | if parsed_args.debug:
267 | log.setLevel(logging.DEBUG)
268 |
269 | if parsed_args.onnx:
270 | ModelInstance.value = OnnxModel(parsed_args.onnx)
271 | else:
272 | ModelInstance.value = OnnxModel.load_available()
273 |
274 | if parsed_args.interactive:
275 | from pdf2zh.gui import setup_gui
276 |
277 | if parsed_args.serverport:
278 | setup_gui(
279 | parsed_args.share, parsed_args.authorized, int(parsed_args.serverport)
280 | )
281 | else:
282 | setup_gui(parsed_args.share, parsed_args.authorized)
283 | return 0
284 |
285 | if parsed_args.flask:
286 | from pdf2zh.backend import flask_app
287 |
288 | flask_app.run(port=11008)
289 | return 0
290 |
291 | if parsed_args.celery:
292 | from pdf2zh.backend import celery_app
293 |
294 | celery_app.start(argv=sys.argv[2:])
295 | return 0
296 |
297 | if parsed_args.prompt:
298 | try:
299 | with open(parsed_args.prompt, "r", encoding="utf-8") as file:
300 | content = file.read()
301 | parsed_args.prompt = Template(content)
302 | except Exception:
303 | raise ValueError("prompt error.")
304 |
305 | if parsed_args.mcp:
306 | logging.getLogger("mcp").setLevel(logging.ERROR)
307 | from pdf2zh.mcp_server import create_mcp_app, create_starlette_app
308 |
309 | mcp = create_mcp_app()
310 | if parsed_args.sse:
311 | import uvicorn
312 |
313 | starlette_app = create_starlette_app(mcp._mcp_server)
314 | uvicorn.run(starlette_app)
315 | return 0
316 | mcp.run()
317 | return 0
318 |
319 | print(parsed_args)
320 | if parsed_args.babeldoc:
321 | return yadt_main(parsed_args)
322 | if parsed_args.dir:
323 | untranlate_file = find_all_files_in_directory(parsed_args.files[0])
324 | parsed_args.files = untranlate_file
325 | translate(model=ModelInstance.value, **vars(parsed_args))
326 | return 0
327 |
328 | translate(model=ModelInstance.value, **vars(parsed_args))
329 | return 0
330 |
331 |
332 | def yadt_main(parsed_args) -> int:
333 | if parsed_args.dir:
334 | untranlate_file = find_all_files_in_directory(parsed_args.files[0])
335 | else:
336 | untranlate_file = parsed_args.files
337 | lang_in = parsed_args.lang_in
338 | lang_out = parsed_args.lang_out
339 | ignore_cache = parsed_args.ignore_cache
340 | outputdir = None
341 | if parsed_args.output:
342 | outputdir = parsed_args.output
343 |
344 | # yadt require init before translate
345 | yadt_init()
346 | font_path = download_remote_fonts(lang_out.lower())
347 |
348 | param = parsed_args.service.split(":", 1)
349 | service_name = param[0]
350 | service_model = param[1] if len(param) > 1 else None
351 |
352 | envs = {}
353 | prompt = []
354 |
355 | if parsed_args.prompt:
356 | try:
357 | with open(parsed_args.prompt, "r", encoding="utf-8") as file:
358 | content = file.read()
359 | prompt = Template(content)
360 | except Exception:
361 | raise ValueError("prompt error.")
362 |
363 | from pdf2zh.translator import (
364 | AzureOpenAITranslator,
365 | GoogleTranslator,
366 | BingTranslator,
367 | DeepLTranslator,
368 | DeepLXTranslator,
369 | OllamaTranslator,
370 | OpenAITranslator,
371 | ZhipuTranslator,
372 | ModelScopeTranslator,
373 | SiliconTranslator,
374 | GeminiTranslator,
375 | AzureTranslator,
376 | TencentTranslator,
377 | DifyTranslator,
378 | AnythingLLMTranslator,
379 | XinferenceTranslator,
380 | ArgosTranslator,
381 | GrokTranslator,
382 | GroqTranslator,
383 | DeepseekTranslator,
384 | OpenAIlikedTranslator,
385 | QwenMtTranslator,
386 | )
387 |
388 | for translator in [
389 | GoogleTranslator,
390 | BingTranslator,
391 | DeepLTranslator,
392 | DeepLXTranslator,
393 | OllamaTranslator,
394 | XinferenceTranslator,
395 | AzureOpenAITranslator,
396 | OpenAITranslator,
397 | ZhipuTranslator,
398 | ModelScopeTranslator,
399 | SiliconTranslator,
400 | GeminiTranslator,
401 | AzureTranslator,
402 | TencentTranslator,
403 | DifyTranslator,
404 | AnythingLLMTranslator,
405 | ArgosTranslator,
406 | GrokTranslator,
407 | GroqTranslator,
408 | DeepseekTranslator,
409 | OpenAIlikedTranslator,
410 | QwenMtTranslator,
411 | ]:
412 | if service_name == translator.name:
413 | translator = translator(
414 | lang_in,
415 | lang_out,
416 | service_model,
417 | envs=envs,
418 | prompt=prompt,
419 | ignore_cache=ignore_cache,
420 | )
421 | break
422 | else:
423 | raise ValueError("Unsupported translation service")
424 | import asyncio
425 |
426 | for file in untranlate_file:
427 | file = file.strip("\"'")
428 | yadt_config = YadtConfig(
429 | input_file=file,
430 | font=font_path,
431 | pages=",".join((str(x) for x in getattr(parsed_args, "raw_pages", []))),
432 | output_dir=outputdir,
433 | doc_layout_model=None,
434 | translator=translator,
435 | debug=parsed_args.debug,
436 | lang_in=lang_in,
437 | lang_out=lang_out,
438 | no_dual=False,
439 | no_mono=False,
440 | qps=parsed_args.thread,
441 | )
442 |
443 | async def yadt_translate_coro(yadt_config):
444 | progress_context, progress_handler = create_progress_handler(yadt_config)
445 | # 开始翻译
446 | with progress_context:
447 | async for event in yadt_translate(yadt_config):
448 | progress_handler(event)
449 | if yadt_config.debug:
450 | logger.debug(event)
451 | if event["type"] == "finish":
452 | result = event["translate_result"]
453 | logger.info("Translation Result:")
454 | logger.info(f" Original PDF: {result.original_pdf_path}")
455 | logger.info(f" Time Cost: {result.total_seconds:.2f}s")
456 | logger.info(f" Mono PDF: {result.mono_pdf_path or 'None'}")
457 | logger.info(f" Dual PDF: {result.dual_pdf_path or 'None'}")
458 | break
459 |
460 | asyncio.run(yadt_translate_coro(yadt_config))
461 | return 0
462 |
463 |
464 | if __name__ == "__main__":
465 | sys.exit(main())
466 |
--------------------------------------------------------------------------------
/pdf2zh/pdfinterp.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Any, Dict, Optional, Sequence, Tuple, cast
3 | import numpy as np
4 |
5 | from pdfminer import settings
6 | from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace
7 | from pdfminer.pdfdevice import PDFDevice
8 | from pdfminer.pdfinterp import (
9 | PDFPageInterpreter,
10 | PDFResourceManager,
11 | PDFContentParser,
12 | PDFInterpreterError,
13 | Color,
14 | PDFStackT,
15 | LITERAL_FORM,
16 | LITERAL_IMAGE,
17 | )
18 | from pdfminer.pdffont import PDFFont
19 | from pdfminer.pdfpage import PDFPage
20 | from pdfminer.pdftypes import (
21 | PDFObjRef,
22 | dict_value,
23 | list_value,
24 | resolve1,
25 | stream_value,
26 | )
27 | from pdfminer.psexceptions import PSEOF
28 | from pdfminer.psparser import (
29 | PSKeyword,
30 | keyword_name,
31 | literal_name,
32 | )
33 | from pdfminer.utils import (
34 | MATRIX_IDENTITY,
35 | Matrix,
36 | Rect,
37 | mult_matrix,
38 | apply_matrix_pt,
39 | )
40 |
41 | log = logging.getLogger(__name__)
42 |
43 |
44 | def safe_float(o: Any) -> Optional[float]:
45 | try:
46 | return float(o)
47 | except (TypeError, ValueError):
48 | return None
49 |
50 |
51 | class PDFPageInterpreterEx(PDFPageInterpreter):
52 | """Processor for the content of a PDF page
53 |
54 | Reference: PDF Reference, Appendix A, Operator Summary
55 | """
56 |
57 | def __init__(
58 | self, rsrcmgr: PDFResourceManager, device: PDFDevice, obj_patch
59 | ) -> None:
60 | self.rsrcmgr = rsrcmgr
61 | self.device = device
62 | self.obj_patch = obj_patch
63 |
64 | def dup(self) -> "PDFPageInterpreterEx":
65 | return self.__class__(self.rsrcmgr, self.device, self.obj_patch)
66 |
67 | def init_resources(self, resources: Dict[object, object]) -> None:
68 | # 重载设置 fontid 和 descent
69 | """Prepare the fonts and XObjects listed in the Resource attribute."""
70 | self.resources = resources
71 | self.fontmap: Dict[object, PDFFont] = {}
72 | self.fontid: Dict[PDFFont, object] = {}
73 | self.xobjmap = {}
74 | self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy()
75 | if not resources:
76 | return
77 |
78 | def get_colorspace(spec: object) -> Optional[PDFColorSpace]:
79 | if isinstance(spec, list):
80 | name = literal_name(spec[0])
81 | else:
82 | name = literal_name(spec)
83 | if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2:
84 | return PDFColorSpace(name, stream_value(spec[1])["N"])
85 | elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2:
86 | return PDFColorSpace(name, len(list_value(spec[1])))
87 | else:
88 | return PREDEFINED_COLORSPACE.get(name)
89 |
90 | for k, v in dict_value(resources).items():
91 | # log.debug("Resource: %r: %r", k, v)
92 | if k == "Font":
93 | for fontid, spec in dict_value(v).items():
94 | objid = None
95 | if isinstance(spec, PDFObjRef):
96 | objid = spec.objid
97 | spec = dict_value(spec)
98 | self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
99 | self.fontmap[fontid].descent = 0 # hack fix descent
100 | self.fontid[self.fontmap[fontid]] = fontid
101 | elif k == "ColorSpace":
102 | for csid, spec in dict_value(v).items():
103 | colorspace = get_colorspace(resolve1(spec))
104 | if colorspace is not None:
105 | self.csmap[csid] = colorspace
106 | elif k == "ProcSet":
107 | self.rsrcmgr.get_procset(list_value(v))
108 | elif k == "XObject":
109 | for xobjid, xobjstrm in dict_value(v).items():
110 | self.xobjmap[xobjid] = xobjstrm
111 |
112 | def do_S(self) -> None:
113 | # 重载过滤非公式线条
114 | """Stroke path"""
115 |
116 | def is_black(color: Color) -> bool:
117 | if isinstance(color, Tuple):
118 | return sum(color) == 0
119 | else:
120 | return color == 0
121 |
122 | if (
123 | len(self.curpath) == 2
124 | and self.curpath[0][0] == "m"
125 | and self.curpath[1][0] == "l"
126 | and apply_matrix_pt(self.ctm, self.curpath[0][-2:])[1]
127 | == apply_matrix_pt(self.ctm, self.curpath[1][-2:])[1]
128 | and is_black(self.graphicstate.scolor)
129 | ): # 独立直线,水平,黑色
130 | # print(apply_matrix_pt(self.ctm,self.curpath[0][-2:]),apply_matrix_pt(self.ctm,self.curpath[1][-2:]),self.graphicstate.scolor)
131 | self.device.paint_path(self.graphicstate, True, False, False, self.curpath)
132 | self.curpath = []
133 | return "n"
134 | else:
135 | self.curpath = []
136 |
137 | ############################################################
138 | # 重载过滤非公式线条(F/B)
139 | def do_f(self) -> None:
140 | """Fill path using nonzero winding number rule"""
141 | # self.device.paint_path(self.graphicstate, False, True, False, self.curpath)
142 | self.curpath = []
143 |
144 | def do_F(self) -> None:
145 | """Fill path using nonzero winding number rule (obsolete)"""
146 |
147 | def do_f_a(self) -> None:
148 | """Fill path using even-odd rule"""
149 | # self.device.paint_path(self.graphicstate, False, True, True, self.curpath)
150 | self.curpath = []
151 |
152 | def do_B(self) -> None:
153 | """Fill and stroke path using nonzero winding number rule"""
154 | # self.device.paint_path(self.graphicstate, True, True, False, self.curpath)
155 | self.curpath = []
156 |
157 | def do_B_a(self) -> None:
158 | """Fill and stroke path using even-odd rule"""
159 | # self.device.paint_path(self.graphicstate, True, True, True, self.curpath)
160 | self.curpath = []
161 |
162 | ############################################################
163 | # 重载返回调用参数(SCN)
164 | def do_SCN(self) -> None:
165 | """Set color for stroking operations."""
166 | if self.scs:
167 | n = self.scs.ncomponents
168 | else:
169 | if settings.STRICT:
170 | raise PDFInterpreterError("No colorspace specified!")
171 | n = 1
172 | args = self.pop(n)
173 | self.graphicstate.scolor = cast(Color, args)
174 | return args
175 |
176 | def do_scn(self) -> None:
177 | """Set color for nonstroking operations"""
178 | if self.ncs:
179 | n = self.ncs.ncomponents
180 | else:
181 | if settings.STRICT:
182 | raise PDFInterpreterError("No colorspace specified!")
183 | n = 1
184 | args = self.pop(n)
185 | self.graphicstate.ncolor = cast(Color, args)
186 | return args
187 |
188 | def do_SC(self) -> None:
189 | """Set color for stroking operations"""
190 | return self.do_SCN()
191 |
192 | def do_sc(self) -> None:
193 | """Set color for nonstroking operations"""
194 | return self.do_scn()
195 |
196 | def do_Do(self, xobjid_arg: PDFStackT) -> None:
197 | # 重载设置 xobj 的 obj_patch
198 | """Invoke named XObject"""
199 | xobjid = literal_name(xobjid_arg)
200 | try:
201 | xobj = stream_value(self.xobjmap[xobjid])
202 | except KeyError:
203 | if settings.STRICT:
204 | raise PDFInterpreterError("Undefined xobject id: %r" % xobjid)
205 | return
206 | # log.debug("Processing xobj: %r", xobj)
207 | subtype = xobj.get("Subtype")
208 | if subtype is LITERAL_FORM and "BBox" in xobj:
209 | interpreter = self.dup()
210 | bbox = cast(Rect, list_value(xobj["BBox"]))
211 | matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY)))
212 | # According to PDF reference 1.7 section 4.9.1, XObjects in
213 | # earlier PDFs (prior to v1.2) use the page's Resources entry
214 | # instead of having their own Resources entry.
215 | xobjres = xobj.get("Resources")
216 | if xobjres:
217 | resources = dict_value(xobjres)
218 | else:
219 | resources = self.resources.copy()
220 | self.device.begin_figure(xobjid, bbox, matrix)
221 | ctm = mult_matrix(matrix, self.ctm)
222 | ops_base = interpreter.render_contents(
223 | resources,
224 | [xobj],
225 | ctm=ctm,
226 | )
227 | self.ncs = interpreter.ncs
228 | self.scs = interpreter.scs
229 | try: # 有的时候 form 字体加不上这里会烂掉
230 | self.device.fontid = interpreter.fontid
231 | self.device.fontmap = interpreter.fontmap
232 | ops_new = self.device.end_figure(xobjid)
233 | ctm_inv = np.linalg.inv(np.array(ctm[:4]).reshape(2, 2))
234 | np_version = np.__version__
235 | if np_version.split(".")[0] >= "2":
236 | pos_inv = -np.asmatrix(ctm[4:]) * ctm_inv
237 | else:
238 | pos_inv = -np.mat(ctm[4:]) * ctm_inv
239 | a, b, c, d = ctm_inv.reshape(4).tolist()
240 | e, f = pos_inv.tolist()[0]
241 | self.obj_patch[self.xobjmap[xobjid].objid] = (
242 | f"q {ops_base}Q {a} {b} {c} {d} {e} {f} cm {ops_new}"
243 | )
244 | except Exception:
245 | pass
246 | elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj:
247 | self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY)
248 | self.device.render_image(xobjid, xobj)
249 | self.device.end_figure(xobjid)
250 | else:
251 | # unsupported xobject type.
252 | pass
253 |
254 | def process_page(self, page: PDFPage) -> None:
255 | # 重载设置 page 的 obj_patch
256 | # log.debug("Processing page: %r", page)
257 | # print(page.mediabox,page.cropbox)
258 | # (x0, y0, x1, y1) = page.mediabox
259 | (x0, y0, x1, y1) = page.cropbox
260 | if page.rotate == 90:
261 | ctm = (0, -1, 1, 0, -y0, x1)
262 | elif page.rotate == 180:
263 | ctm = (-1, 0, 0, -1, x1, y1)
264 | elif page.rotate == 270:
265 | ctm = (0, 1, -1, 0, y1, -x0)
266 | else:
267 | ctm = (1, 0, 0, 1, -x0, -y0)
268 | self.device.begin_page(page, ctm)
269 | ops_base = self.render_contents(page.resources, page.contents, ctm=ctm)
270 | self.device.fontid = self.fontid
271 | self.device.fontmap = self.fontmap
272 | ops_new = self.device.end_page(page)
273 | # 上面渲染的时候会根据 cropbox 减掉页面偏移得到真实坐标,这里输出的时候需要用 cm 把页面偏移加回来
274 | self.obj_patch[page.page_xref] = (
275 | f"q {ops_base}Q 1 0 0 1 {x0} {y0} cm {ops_new}" # ops_base 里可能有图,需要让 ops_new 里的文字覆盖在上面,使用 q/Q 重置位置矩阵
276 | )
277 | for obj in page.contents:
278 | self.obj_patch[obj.objid] = ""
279 |
280 | def render_contents(
281 | self,
282 | resources: Dict[object, object],
283 | streams: Sequence[object],
284 | ctm: Matrix = MATRIX_IDENTITY,
285 | ) -> None:
286 | # 重载返回指令流
287 | """Render the content streams.
288 |
289 | This method may be called recursively.
290 | """
291 | # log.debug(
292 | # "render_contents: resources=%r, streams=%r, ctm=%r",
293 | # resources,
294 | # streams,
295 | # ctm,
296 | # )
297 | self.init_resources(resources)
298 | self.init_state(ctm)
299 | return self.execute(list_value(streams))
300 |
301 | def execute(self, streams: Sequence[object]) -> None:
302 | # 重载返回指令流
303 | ops = ""
304 | try:
305 | parser = PDFContentParser(streams)
306 | except PSEOF:
307 | # empty page
308 | return
309 | while True:
310 | try:
311 | (_, obj) = parser.nextobject()
312 | except PSEOF:
313 | break
314 | if isinstance(obj, PSKeyword):
315 | name = keyword_name(obj)
316 | method = "do_%s" % name.replace("*", "_a").replace('"', "_w").replace(
317 | "'",
318 | "_q",
319 | )
320 | if hasattr(self, method):
321 | func = getattr(self, method)
322 | nargs = func.__code__.co_argcount - 1
323 | if nargs:
324 | args = self.pop(nargs)
325 | # log.debug("exec: %s %r", name, args)
326 | if len(args) == nargs:
327 | func(*args)
328 | if not (
329 | name[0] == "T"
330 | or name in ['"', "'", "EI", "MP", "DP", "BMC", "BDC"]
331 | ): # 过滤 T 系列文字指令,因为 EI 的参数是 obj 所以也需要过滤(只在少数文档中画横线时使用),过滤 marked 系列指令
332 | p = " ".join(
333 | [
334 | (
335 | f"{x:f}"
336 | if isinstance(x, float)
337 | else str(x).replace("'", "")
338 | )
339 | for x in args
340 | ]
341 | )
342 | ops += f"{p} {name} "
343 | else:
344 | # log.debug("exec: %s", name)
345 | targs = func()
346 | if targs is None:
347 | targs = []
348 | if not (name[0] == "T" or name in ["BI", "ID", "EMC"]):
349 | p = " ".join(
350 | [
351 | (
352 | f"{x:f}"
353 | if isinstance(x, float)
354 | else str(x).replace("'", "")
355 | )
356 | for x in targs
357 | ]
358 | )
359 | ops += f"{p} {name} "
360 | elif settings.STRICT:
361 | error_msg = "Unknown operator: %r" % name
362 | raise PDFInterpreterError(error_msg)
363 | else:
364 | self.push(obj)
365 | # print('REV DATA',ops)
366 | return ops
367 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "pdf2zh"
3 | version = "1.9.6"
4 | description = "Latex PDF Translator"
5 | authors = [{ name = "Byaidu", email = "byaidux@gmail.com" }]
6 | license = "AGPL-3.0"
7 | readme = "README.md"
8 | requires-python = ">=3.10,<3.13"
9 | classifiers = [
10 | "Programming Language :: Python :: 3",
11 | "Operating System :: OS Independent",
12 | ]
13 | dependencies = [
14 | "requests",
15 | # for arm64 linux whells
16 | "pymupdf<1.25.3",
17 | "tqdm",
18 | "tenacity",
19 | "numpy",
20 | "ollama",
21 | "xinference-client",
22 | "deepl",
23 | "openai>=1.0.0",
24 | "azure-ai-translation-text<=1.0.1",
25 | "gradio",
26 | "huggingface_hub",
27 | "onnx",
28 | "onnxruntime",
29 | "opencv-python-headless",
30 | "tencentcloud-sdk-python-tmt",
31 | "pdfminer.six>=20240706",
32 | "gradio_pdf>=0.0.21",
33 | "pikepdf",
34 | "peewee>=3.17.8",
35 | "fontTools",
36 | "babeldoc>=0.1.22, <0.3.0",
37 | "rich",
38 | ]
39 |
40 | [project.optional-dependencies]
41 | backend = [
42 | "flask",
43 | "celery",
44 | "redis"
45 | ]
46 | argostranslate = [
47 | "argostranslate"
48 | ]
49 | mcp = [
50 | "mcp>=1.6.0",
51 | ]
52 |
53 | [dependency-groups]
54 | dev = [
55 | "black",
56 | "flake8",
57 | "pre-commit",
58 | "pytest",
59 | "build",
60 | "bumpver>=2024.1130",
61 | ]
62 |
63 | [project.urls]
64 | Homepage = "https://github.com/Byaidu/PDFMathTranslate"
65 |
66 | [build-system]
67 | requires = ["hatchling"]
68 | build-backend = "hatchling.build"
69 |
70 | [project.scripts]
71 | pdf2zh = "pdf2zh.pdf2zh:main"
72 |
73 | [tool.flake8]
74 | ignore = ["E203", "E261", "E501", "W503", "E741"]
75 | max-line-length = 88
76 |
77 |
78 |
79 | [bumpver]
80 | current_version = "1.9.6"
81 | version_pattern = "MAJOR.MINOR.PATCH[.PYTAGNUM]"
82 |
83 | [bumpver.file_patterns]
84 | "pyproject.toml" = [
85 | 'current_version = "{version}"',
86 | 'version = "{version}"'
87 | ]
88 | "pdf2zh/__init__.py" = [
89 | '__version__ = "{version}"'
90 | ]
91 |
--------------------------------------------------------------------------------
/script/Dockerfile.China:
--------------------------------------------------------------------------------
1 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
2 |
3 | WORKDIR /app
4 |
5 |
6 | EXPOSE 7860
7 |
8 | ENV PYTHONUNBUFFERED=1
9 | ADD "https://ghgo.xyz/https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf" /app
10 | RUN apt-get update && \
11 | apt-get install --no-install-recommends -y libgl1 && \
12 | rm -rf /var/lib/apt/lists/* && uv pip install --system --no-cache huggingface-hub && \
13 | python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('wybxc/DocLayout-YOLO-DocStructBench-onnx','doclayout_yolo_docstructbench_imgsz1024.onnx');"
14 |
15 | COPY . .
16 |
17 | RUN uv pip install --system --no-cache .
18 |
19 | CMD ["pdf2zh", "-i"]
20 |
--------------------------------------------------------------------------------
/script/Dockerfile.Demo:
--------------------------------------------------------------------------------
1 | FROM python:3.12
2 |
3 | WORKDIR /app
4 |
5 | COPY . .
6 |
7 | EXPOSE 7860
8 |
9 | ENV PYTHONUNBUFFERED=1
10 |
11 | RUN apt-get update && apt-get install -y libgl1
12 |
13 | RUN pip install .
14 |
15 | RUN mkdir -p /data
16 | RUN chmod 777 /data
17 | RUN mkdir -p /app
18 | RUN chmod 777 /app
19 | RUN mkdir -p /.cache
20 | RUN chmod 777 /.cache
21 | RUN mkdir -p ./gradio_files
22 | RUN chmod 777 ./gradio_files
23 |
24 | CMD ["pdf2zh", "-i"]
25 |
--------------------------------------------------------------------------------
/script/_pystand_static.int:
--------------------------------------------------------------------------------
1 | import sys
2 | import pdf2zh.pdf2zh
3 | import os
4 | import babeldoc.assets.assets
5 | import pathlib
6 |
7 | WAIT_FOR_INPUT = False
8 | if len(sys.argv) == 1:
9 | sys.argv.append("-i") # 无参数时自动添加 -i 参数
10 | WAIT_FOR_INPUT = True
11 |
12 | files = os.listdir(os.path.dirname(__file__))
13 | for file in files:
14 | if file.endswith(".zip") and file.startswith("offline_assets_"):
15 | print('find offline_assets_zip file: ', file, ' try restore...')
16 | babeldoc.assets.assets.restore_offline_assets_package(pathlib.Path(os.path.dirname(__file__)))
17 |
18 | try:
19 | code = pdf2zh.pdf2zh.main()
20 | print(f"pdf2zh.pdf2zh.main() return code: {code}")
21 | if WAIT_FOR_INPUT:
22 | input("Press Enter to continue...")
23 | sys.exit(code)
24 | except Exception:
25 | import traceback
26 | traceback.print_exc()
27 | if WAIT_FOR_INPUT:
28 | input("Press Enter to continue...")
29 | sys.exit(1)
--------------------------------------------------------------------------------
/script/setup.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | setlocal enabledelayedexpansion
3 |
4 | set PYTHON_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-embed-amd64.zip
5 | set PIP_URL=https://bootstrap.pypa.io/get-pip.py
6 | set HF_ENDPOINT=https://hf-mirror.com
7 | set PIP_MIRROR=https://mirrors.aliyun.com/pypi/simple
8 |
9 | if not exist pdf2zh_dist/python.exe (
10 | powershell -Command "& {Invoke-WebRequest -Uri !PYTHON_URL! -OutFile python.zip}"
11 | powershell -Command "& {Expand-Archive -Path python.zip -DestinationPath pdf2zh_dist -Force}"
12 | del python.zip
13 | echo import site >> pdf2zh_dist/python312._pth
14 | )
15 | cd pdf2zh_dist
16 |
17 | if not exist Scripts/pip.exe (
18 | powershell -Command "& {Invoke-WebRequest -Uri !PIP_URL! -OutFile get-pip.py}"
19 | python get-pip.py
20 | )
21 | path Scripts
22 |
23 | pip install --no-warn-script-location --upgrade setuptools -i !PIP_MIRROR!
24 | pip install --no-warn-script-location --upgrade pdf2zh -i !PIP_MIRROR!
25 | pdf2zh -i
26 |
27 | pause
28 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | ignore = E203,E261,E501,W503,E741
4 | exclude = .git,build,dist,docs
--------------------------------------------------------------------------------
/test/file/translate.cli.font.unknown.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/test/file/translate.cli.font.unknown.pdf
--------------------------------------------------------------------------------
/test/file/translate.cli.plain.text.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/test/file/translate.cli.plain.text.pdf
--------------------------------------------------------------------------------
/test/file/translate.cli.text.with.figure.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/test/file/translate.cli.text.with.figure.pdf
--------------------------------------------------------------------------------
/test/test_cache.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from pdf2zh import cache
3 | import threading
4 | import multiprocessing
5 | import random
6 | import string
7 |
8 |
9 | class TestCache(unittest.TestCase):
10 | def setUp(self):
11 | self.test_db = cache.init_test_db()
12 |
13 | def tearDown(self):
14 | # Clean up
15 | cache.clean_test_db(self.test_db)
16 |
17 | def test_basic_set_get(self):
18 | """Test basic set and get operations"""
19 | cache_instance = cache.TranslationCache("test_engine")
20 |
21 | # Test get with non-existent entry
22 | result = cache_instance.get("hello")
23 | self.assertIsNone(result)
24 |
25 | # Test set and get
26 | cache_instance.set("hello", "你好")
27 | result = cache_instance.get("hello")
28 | self.assertEqual(result, "你好")
29 |
30 | def test_cache_overwrite(self):
31 | """Test that cache entries can be overwritten"""
32 | cache_instance = cache.TranslationCache("test_engine")
33 |
34 | # Set initial translation
35 | cache_instance.set("hello", "你好")
36 |
37 | # Overwrite with new translation
38 | cache_instance.set("hello", "您好")
39 |
40 | # Verify the new translation is returned
41 | result = cache_instance.get("hello")
42 | self.assertEqual(result, "您好")
43 |
44 | def test_non_string_params(self):
45 | """Test that non-string parameters are automatically converted to JSON"""
46 | params = {"model": "gpt-3.5", "temperature": 0.7}
47 | cache_instance = cache.TranslationCache("test_engine", params)
48 |
49 | # Test that params are converted to JSON string internally
50 | cache_instance.set("hello", "你好")
51 | result = cache_instance.get("hello")
52 | self.assertEqual(result, "你好")
53 |
54 | # Test with different param types
55 | array_params = ["param1", "param2"]
56 | cache_instance2 = cache.TranslationCache("test_engine", array_params)
57 | cache_instance2.set("hello", "你好2")
58 | self.assertEqual(cache_instance2.get("hello"), "你好2")
59 |
60 | # Test with nested structures
61 | nested_params = {"options": {"temp": 0.8, "models": ["a", "b"]}}
62 | cache_instance3 = cache.TranslationCache("test_engine", nested_params)
63 | cache_instance3.set("hello", "你好3")
64 | self.assertEqual(cache_instance3.get("hello"), "你好3")
65 |
66 | def test_engine_distinction(self):
67 | """Test that cache distinguishes between different translation engines"""
68 | cache1 = cache.TranslationCache("engine1")
69 | cache2 = cache.TranslationCache("engine2")
70 |
71 | # Set same text with different engines
72 | cache1.set("hello", "你好 1")
73 | cache2.set("hello", "你好 2")
74 |
75 | # Verify each engine gets its own translation
76 | self.assertEqual(cache1.get("hello"), "你好 1")
77 | self.assertEqual(cache2.get("hello"), "你好 2")
78 |
79 | def test_params_distinction(self):
80 | """Test that cache distinguishes between different engine parameters"""
81 | params1 = {"param": "value1"}
82 | params2 = {"param": "value2"}
83 | cache1 = cache.TranslationCache("test_engine", params1)
84 | cache2 = cache.TranslationCache("test_engine", params2)
85 |
86 | # Set same text with different parameters
87 | cache1.set("hello", "你好 1")
88 | cache2.set("hello", "你好 2")
89 |
90 | # Verify each parameter set gets its own translation
91 | self.assertEqual(cache1.get("hello"), "你好 1")
92 | self.assertEqual(cache2.get("hello"), "你好 2")
93 |
94 | def test_consistent_param_serialization(self):
95 | """Test that dictionary parameters are consistently serialized regardless of key order"""
96 | # Test simple dictionary
97 | params1 = {"b": 1, "a": 2}
98 | params2 = {"a": 2, "b": 1}
99 | cache1 = cache.TranslationCache("test_engine", params1)
100 | cache2 = cache.TranslationCache("test_engine", params2)
101 | self.assertEqual(cache1.translate_engine_params, cache2.translate_engine_params)
102 |
103 | # Test nested dictionary
104 | params1 = {"outer2": {"inner2": 2, "inner1": 1}, "outer1": 3}
105 | params2 = {"outer1": 3, "outer2": {"inner1": 1, "inner2": 2}}
106 | cache1 = cache.TranslationCache("test_engine", params1)
107 | cache2 = cache.TranslationCache("test_engine", params2)
108 | self.assertEqual(cache1.translate_engine_params, cache2.translate_engine_params)
109 |
110 | # Test dictionary with list of dictionaries
111 | params1 = {"b": [{"y": 1, "x": 2}], "a": 3}
112 | params2 = {"a": 3, "b": [{"x": 2, "y": 1}]}
113 | cache1 = cache.TranslationCache("test_engine", params1)
114 | cache2 = cache.TranslationCache("test_engine", params2)
115 | self.assertEqual(cache1.translate_engine_params, cache2.translate_engine_params)
116 |
117 | # Test that different values still produce different results
118 | params1 = {"a": 1, "b": 2}
119 | params2 = {"a": 2, "b": 1}
120 | cache1 = cache.TranslationCache("test_engine", params1)
121 | cache2 = cache.TranslationCache("test_engine", params2)
122 | self.assertNotEqual(
123 | cache1.translate_engine_params, cache2.translate_engine_params
124 | )
125 |
126 | def test_cache_with_sorted_params(self):
127 | """Test that cache works correctly with sorted parameters"""
128 | params1 = {"b": [{"y": 1, "x": 2}], "a": 3}
129 | params2 = {"a": 3, "b": [{"x": 2, "y": 1}]}
130 |
131 | # Both caches should work with the same key
132 | cache1 = cache.TranslationCache("test_engine", params1)
133 | cache1.set("hello", "你好")
134 |
135 | cache2 = cache.TranslationCache("test_engine", params2)
136 | self.assertEqual(cache2.get("hello"), "你好")
137 |
138 | def test_append_params(self):
139 | """Test the append_params method"""
140 | cache_instance = cache.TranslationCache("test_engine", {"initial": "value"})
141 |
142 | # Test appending new parameter
143 | cache_instance.add_params("new_param", "new_value")
144 | self.assertEqual(
145 | cache_instance.params, {"initial": "value", "new_param": "new_value"}
146 | )
147 |
148 | # Test that cache with appended params works correctly
149 | cache_instance.set("hello", "你好")
150 | self.assertEqual(cache_instance.get("hello"), "你好")
151 |
152 | # Test overwriting existing parameter
153 | cache_instance.add_params("initial", "new_value")
154 | self.assertEqual(
155 | cache_instance.params, {"initial": "new_value", "new_param": "new_value"}
156 | )
157 |
158 | # Cache should work with updated params
159 | cache_instance.set("hello2", "你好2")
160 | self.assertEqual(cache_instance.get("hello2"), "你好2")
161 |
162 | # Sometimes the problem of "database is locked" occurs. Temporarily disable this test.
163 | # def test_thread_safety(self):
164 | # """Test thread safety of cache operations"""
165 | # cache_instance = cache.TranslationCache("test_engine")
166 | # lock = threading.Lock()
167 | # results = []
168 | # num_threads = multiprocessing.cpu_count()
169 | # items_per_thread = 100
170 |
171 | # def generate_random_text(length=10):
172 | # return "".join(
173 | # random.choices(string.ascii_letters + string.digits, k=length)
174 | # )
175 |
176 | # def worker():
177 | # thread_results = [] # 线程本地存储结果
178 | # for _ in range(items_per_thread):
179 | # text = generate_random_text()
180 | # translation = f"翻译_{text}"
181 |
182 | # # Write operation
183 | # cache_instance.set(text, translation)
184 |
185 | # # Read operation - verify our own write
186 | # result = cache_instance.get(text)
187 | # thread_results.append((text, result))
188 |
189 | # # 所有操作完成后,一次性加锁并追加结果
190 | # with lock:
191 | # results.extend(thread_results)
192 |
193 | # # Create threads equal to CPU core count
194 | # threads = []
195 | # for _ in range(num_threads):
196 | # thread = threading.Thread(target=worker)
197 | # threads.append(thread)
198 | # thread.start()
199 |
200 | # # Wait for all threads to complete
201 | # for thread in threads:
202 | # thread.join()
203 |
204 | # # Verify all operations were successful
205 | # expected_total = num_threads * items_per_thread
206 | # self.assertEqual(len(results), expected_total)
207 |
208 | # # Verify each thread got its correct value
209 | # for text, result in results:
210 | # expected = f"翻译_{text}"
211 | # self.assertEqual(result, expected)
212 |
213 |
214 | if __name__ == "__main__":
215 | unittest.main()
216 |
--------------------------------------------------------------------------------
/test/test_converter.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest.mock import Mock, patch, MagicMock
3 | from pdfminer.layout import LTPage, LTChar, LTLine
4 | from pdfminer.pdfinterp import PDFResourceManager
5 | from pdf2zh.converter import PDFConverterEx, TranslateConverter
6 |
7 |
8 | class TestPDFConverterEx(unittest.TestCase):
9 | def setUp(self):
10 | self.rsrcmgr = PDFResourceManager()
11 | self.converter = PDFConverterEx(self.rsrcmgr)
12 |
13 | def test_begin_page(self):
14 | mock_page = Mock()
15 | mock_page.pageno = 1
16 | mock_page.cropbox = (0, 0, 100, 200)
17 | mock_ctm = [1, 0, 0, 1, 0, 0]
18 | self.converter.begin_page(mock_page, mock_ctm)
19 | self.assertIsNotNone(self.converter.cur_item)
20 | self.assertEqual(self.converter.cur_item.pageid, 1)
21 |
22 | def test_render_char(self):
23 | mock_matrix = (1, 2, 3, 4, 5, 6)
24 | mock_font = Mock()
25 | mock_font.to_unichr.return_value = "A"
26 | mock_font.char_width.return_value = 10
27 | mock_font.char_disp.return_value = (0, 0)
28 | graphic_state = Mock()
29 | self.converter.cur_item = Mock()
30 | result = self.converter.render_char(
31 | mock_matrix,
32 | mock_font,
33 | fontsize=12,
34 | scaling=1.0,
35 | rise=0,
36 | cid=65,
37 | ncs=None,
38 | graphicstate=graphic_state,
39 | )
40 | self.assertEqual(result, 120.0) # Expected text width
41 |
42 |
43 | class TestTranslateConverter(unittest.TestCase):
44 | def setUp(self):
45 | self.rsrcmgr = PDFResourceManager()
46 | self.layout = {1: Mock()}
47 | self.translator_class = Mock()
48 | self.converter = TranslateConverter(
49 | self.rsrcmgr,
50 | layout=self.layout,
51 | lang_in="en",
52 | lang_out="zh",
53 | service="google",
54 | )
55 |
56 | def test_translator_initialization(self):
57 | self.assertIsNotNone(self.converter.translator)
58 | self.assertEqual(self.converter.translator.lang_in, "en")
59 | self.assertEqual(self.converter.translator.lang_out, "zh-CN")
60 |
61 | @patch("pdf2zh.converter.TranslateConverter.receive_layout")
62 | def test_receive_layout(self, mock_receive_layout):
63 | mock_page = LTPage(1, (0, 0, 100, 200))
64 | mock_font = Mock()
65 | mock_font.fontname.return_value = "mock_font"
66 | mock_page.add(
67 | LTChar(
68 | matrix=(1, 2, 3, 4, 5, 6),
69 | font=mock_font,
70 | fontsize=12,
71 | scaling=1.0,
72 | rise=0,
73 | text="A",
74 | textwidth=10,
75 | textdisp=(1.0, 1.0),
76 | ncs=Mock(),
77 | graphicstate=Mock(),
78 | )
79 | )
80 | self.converter.receive_layout(mock_page)
81 | mock_receive_layout.assert_called_once_with(mock_page)
82 |
83 | def test_receive_layout_with_complex_formula(self):
84 | ltpage = LTPage(1, (0, 0, 500, 500))
85 | ltchar = Mock()
86 | ltchar.fontname.return_value = "mock_font"
87 | ltline = LTLine(0.1, (0, 0), (10, 20))
88 | ltpage.add(ltchar)
89 | ltpage.add(ltline)
90 | mock_layout = MagicMock()
91 | mock_layout.shape = (100, 100)
92 | mock_layout.__getitem__.return_value = -1
93 | self.converter.layout = [None, mock_layout]
94 | self.converter.thread = 1
95 | result = self.converter.receive_layout(ltpage)
96 | self.assertIsNotNone(result)
97 |
98 | def test_invalid_translation_service(self):
99 | with self.assertRaises(ValueError):
100 | TranslateConverter(
101 | self.rsrcmgr,
102 | layout=self.layout,
103 | lang_in="en",
104 | lang_out="zh",
105 | service="InvalidService",
106 | )
107 |
108 |
109 | if __name__ == "__main__":
110 | unittest.main()
111 |
--------------------------------------------------------------------------------
/test/test_doclayout.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest.mock import patch, MagicMock
3 | import numpy as np
4 | from pdf2zh.doclayout import (
5 | OnnxModel,
6 | YoloResult,
7 | YoloBox,
8 | )
9 |
10 |
11 | class TestOnnxModel(unittest.TestCase):
12 | @patch("onnx.load")
13 | @patch("onnxruntime.InferenceSession")
14 | def setUp(self, mock_inference_session, mock_onnx_load):
15 | # Mock ONNX model metadata
16 | mock_model = MagicMock()
17 | mock_model.metadata_props = [
18 | MagicMock(key="stride", value="32"),
19 | MagicMock(key="names", value="['class1', 'class2']"),
20 | ]
21 | mock_onnx_load.return_value = mock_model
22 |
23 | # Initialize OnnxModel with a fake path
24 | self.model_path = "fake_model_path.onnx"
25 | self.model = OnnxModel(self.model_path)
26 |
27 | def test_stride_property(self):
28 | # Test that stride is correctly set from model metadata
29 | self.assertEqual(self.model.stride, 32)
30 |
31 | def test_resize_and_pad_image(self):
32 | # Create a dummy image (100x200)
33 | image = np.ones((100, 200, 3), dtype=np.uint8)
34 | resized_image = self.model.resize_and_pad_image(image, 1024)
35 |
36 | # Validate the output shape
37 | self.assertEqual(resized_image.shape[0], 512)
38 | self.assertEqual(resized_image.shape[1], 1024)
39 |
40 | # Check that padding has been added
41 | padded_height = resized_image.shape[0] - image.shape[0]
42 | padded_width = resized_image.shape[1] - image.shape[1]
43 | self.assertGreater(padded_height, 0)
44 | self.assertGreater(padded_width, 0)
45 |
46 | def test_scale_boxes(self):
47 | img1_shape = (1024, 1024) # Model input shape
48 | img0_shape = (500, 300) # Original image shape
49 | boxes = np.array([[512, 512, 768, 768]]) # Example bounding box
50 |
51 | scaled_boxes = self.model.scale_boxes(img1_shape, boxes, img0_shape)
52 |
53 | # Verify the output is scaled correctly
54 | self.assertEqual(scaled_boxes.shape, boxes.shape)
55 | self.assertTrue(np.all(scaled_boxes <= max(img0_shape)))
56 |
57 | def test_predict(self):
58 | # Mock model inference output
59 | mock_output = np.random.random((1, 300, 6))
60 | self.model.model.run.return_value = [mock_output]
61 |
62 | # Create a dummy image
63 | image = np.ones((500, 300, 3), dtype=np.uint8)
64 |
65 | results = self.model.predict(image)
66 |
67 | # Validate predictions
68 | self.assertEqual(len(results), 1)
69 | self.assertIsInstance(results[0], YoloResult)
70 | self.assertGreater(len(results[0].boxes), 0)
71 | self.assertIsInstance(results[0].boxes[0], YoloBox)
72 |
73 |
74 | class TestYoloResult(unittest.TestCase):
75 | def test_yolo_result(self):
76 | # Example prediction data
77 | boxes = [
78 | [100, 200, 300, 400, 0.9, 0],
79 | [50, 100, 150, 200, 0.8, 1],
80 | ]
81 | names = ["class1", "class2"]
82 |
83 | result = YoloResult(boxes, names)
84 |
85 | # Validate the number of boxes and their order by confidence
86 | self.assertEqual(len(result.boxes), 2)
87 | self.assertGreater(result.boxes[0].conf, result.boxes[1].conf)
88 | self.assertEqual(result.names, names)
89 |
90 |
91 | class TestYoloBox(unittest.TestCase):
92 | def test_yolo_box(self):
93 | # Example box data
94 | box_data = [100, 200, 300, 400, 0.9, 0]
95 |
96 | box = YoloBox(box_data)
97 |
98 | # Validate box properties
99 | self.assertEqual(box.xyxy, box_data[:4])
100 | self.assertEqual(box.conf, box_data[4])
101 | self.assertEqual(box.cls, box_data[5])
102 |
103 |
104 | if __name__ == "__main__":
105 | unittest.main()
106 |
--------------------------------------------------------------------------------
/test/test_translator.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from textwrap import dedent
3 | from unittest import mock
4 |
5 | from ollama import ResponseError as OllamaResponseError
6 |
7 | from pdf2zh import cache
8 | from pdf2zh.config import ConfigManager
9 | from pdf2zh.translator import BaseTranslator, OllamaTranslator, OpenAIlikedTranslator
10 |
11 | # Since it is necessary to test whether the functionality meets the expected requirements,
12 | # private functions and private methods are allowed to be called.
13 | # pyright: reportPrivateUsage=false
14 |
15 |
16 | class AutoIncreaseTranslator(BaseTranslator):
17 | name = "auto_increase"
18 | n = 0
19 |
20 | def do_translate(self, text):
21 | self.n += 1
22 | return str(self.n)
23 |
24 |
25 | class TestTranslator(unittest.TestCase):
26 | def setUp(self):
27 | self.test_db = cache.init_test_db()
28 |
29 | def tearDown(self):
30 | cache.clean_test_db(self.test_db)
31 |
32 | def test_cache(self):
33 | translator = AutoIncreaseTranslator("en", "zh", "test", False)
34 | # First translation should be cached
35 | text = "Hello World"
36 | first_result = translator.translate(text)
37 |
38 | # Second translation should return the same result from cache
39 | second_result = translator.translate(text)
40 | self.assertEqual(first_result, second_result)
41 |
42 | # Different input should give different result
43 | different_text = "Different Text"
44 | different_result = translator.translate(different_text)
45 | self.assertNotEqual(first_result, different_result)
46 |
47 | # Test cache with ignore_cache=True
48 | translator.ignore_cache = True
49 | no_cache_result = translator.translate(text)
50 | self.assertNotEqual(first_result, no_cache_result)
51 |
52 | def test_add_cache_impact_parameters(self):
53 | translator = AutoIncreaseTranslator("en", "zh", "test", False)
54 |
55 | # Test cache with added parameters
56 | text = "Hello World"
57 | first_result = translator.translate(text)
58 | translator.add_cache_impact_parameters("test", "value")
59 | second_result = translator.translate(text)
60 | self.assertNotEqual(first_result, second_result)
61 |
62 | # Test cache with ignore_cache=True
63 | no_cache_result1 = translator.translate(text, ignore_cache=True)
64 | self.assertNotEqual(first_result, no_cache_result1)
65 |
66 | translator.ignore_cache = True
67 | no_cache_result2 = translator.translate(text)
68 | self.assertNotEqual(no_cache_result1, no_cache_result2)
69 |
70 | # Test cache with ignore_cache=False
71 | translator.ignore_cache = False
72 | cache_result = translator.translate(text)
73 | self.assertEqual(no_cache_result2, cache_result)
74 |
75 | # Test cache with another parameter
76 | translator.add_cache_impact_parameters("test2", "value2")
77 | another_result = translator.translate(text)
78 | self.assertNotEqual(second_result, another_result)
79 |
80 | def test_base_translator_throw(self):
81 | translator = BaseTranslator("en", "zh", "test", False)
82 | with self.assertRaises(NotImplementedError):
83 | translator.translate("Hello World")
84 |
85 |
86 | class TestOpenAIlikedTranslator(unittest.TestCase):
87 | def setUp(self) -> None:
88 | self.default_envs = {
89 | "OPENAILIKED_BASE_URL": "https://api.openailiked.com",
90 | "OPENAILIKED_API_KEY": "test_api_key",
91 | "OPENAILIKED_MODEL": "test_model",
92 | }
93 |
94 | def test_missing_base_url_raises_error(self):
95 | """测试缺失 OPENAILIKED_BASE_URL 时抛出异常"""
96 | ConfigManager.clear()
97 | with self.assertRaises(ValueError) as context:
98 | OpenAIlikedTranslator(
99 | lang_in="en", lang_out="zh", model="test_model", envs={}
100 | )
101 | self.assertIn("The OPENAILIKED_BASE_URL is missing.", str(context.exception))
102 |
103 | def test_missing_model_raises_error(self):
104 | """测试缺失 OPENAILIKED_MODEL 时抛出异常"""
105 | envs_without_model = {
106 | "OPENAILIKED_BASE_URL": "https://api.openailiked.com",
107 | "OPENAILIKED_API_KEY": "test_api_key",
108 | }
109 | ConfigManager.clear()
110 | with self.assertRaises(ValueError) as context:
111 | OpenAIlikedTranslator(
112 | lang_in="en", lang_out="zh", model=None, envs=envs_without_model
113 | )
114 | self.assertIn("The OPENAILIKED_MODEL is missing.", str(context.exception))
115 |
116 | def test_initialization_with_valid_envs(self):
117 | """测试使用有效的环境变量初始化"""
118 | ConfigManager.clear()
119 | translator = OpenAIlikedTranslator(
120 | lang_in="en",
121 | lang_out="zh",
122 | model=None,
123 | envs=self.default_envs,
124 | )
125 | self.assertEqual(
126 | translator.envs["OPENAILIKED_BASE_URL"],
127 | self.default_envs["OPENAILIKED_BASE_URL"],
128 | )
129 | self.assertEqual(
130 | translator.envs["OPENAILIKED_API_KEY"],
131 | self.default_envs["OPENAILIKED_API_KEY"],
132 | )
133 | self.assertEqual(translator.model, self.default_envs["OPENAILIKED_MODEL"])
134 |
135 | def test_default_api_key_fallback(self):
136 | """测试当 OPENAILIKED_API_KEY 为空时使用默认值"""
137 | envs_without_key = {
138 | "OPENAILIKED_BASE_URL": "https://api.openailiked.com",
139 | "OPENAILIKED_MODEL": "test_model",
140 | }
141 | ConfigManager.clear()
142 | translator = OpenAIlikedTranslator(
143 | lang_in="en",
144 | lang_out="zh",
145 | model=None,
146 | envs=envs_without_key,
147 | )
148 | self.assertEqual(
149 | translator.envs["OPENAILIKED_BASE_URL"],
150 | self.default_envs["OPENAILIKED_BASE_URL"],
151 | )
152 | self.assertIsNone(translator.envs["OPENAILIKED_API_KEY"])
153 |
154 |
155 | class TestOllamaTranslator(unittest.TestCase):
156 | def test_do_translate(self):
157 | translator = OllamaTranslator(lang_in="en", lang_out="zh", model="test:3b")
158 | with mock.patch.object(translator, "client") as mock_client:
159 | chat_response = mock_client.chat.return_value
160 | chat_response.message.content = dedent(
161 | """\
162 |
163 | Thinking...
164 |
165 |
166 | 天空呈现蓝色是因为...
167 | """
168 | )
169 |
170 | text = "The sky appears blue because of..."
171 | translated_result = translator.do_translate(text)
172 | mock_client.chat.assert_called_once_with(
173 | model="test:3b",
174 | messages=translator.prompt(text, prompt_template=None),
175 | options={
176 | "temperature": translator.options["temperature"],
177 | "num_predict": translator.options["num_predict"],
178 | },
179 | )
180 | self.assertEqual("天空呈现蓝色是因为...", translated_result)
181 |
182 | # response error
183 | mock_client.chat.side_effect = OllamaResponseError("an error status")
184 | with self.assertRaises(OllamaResponseError):
185 | mock_client.chat()
186 |
187 | def test_remove_cot_content(self):
188 | fake_cot_resp_text = dedent(
189 | """\
190 |
191 |
192 |
193 |
194 | The sky appears blue because of..."""
195 | )
196 | removed_cot_content = OllamaTranslator._remove_cot_content(fake_cot_resp_text)
197 | excepted_content = "The sky appears blue because of..."
198 | self.assertEqual(excepted_content, removed_cot_content.strip())
199 | # process response content without cot
200 | non_cot_content = OllamaTranslator._remove_cot_content(excepted_content)
201 | self.assertEqual(excepted_content, non_cot_content)
202 |
203 | # `_remove_cot_content` should not process text that's outside the ` ` tags
204 | fake_cot_resp_text_with_think_tag = dedent(
205 | """\
206 |
207 |
208 |
209 |
210 | The sky appears blue because of......
211 | The user asked me to include the tag at the end of my reply, so I added the tag. """
212 | )
213 |
214 | only_removed_cot_content = OllamaTranslator._remove_cot_content(
215 | fake_cot_resp_text_with_think_tag
216 | )
217 | excepted_not_retain_cot_content = dedent(
218 | """\
219 | The sky appears blue because of......
220 | The user asked me to include the tag at the end of my reply, so I added the tag. """
221 | )
222 | self.assertEqual(
223 | excepted_not_retain_cot_content, only_removed_cot_content.strip()
224 | )
225 |
226 |
227 | if __name__ == "__main__":
228 | unittest.main()
229 |
--------------------------------------------------------------------------------