├── .dockerignore ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── blank.yaml │ ├── config.yml │ ├── 功能请求_cn.yaml │ ├── 功能请求_en.yaml │ ├── 问题反馈_cn.yaml │ └── 问题反馈_en.yaml ├── dependabot.yml ├── release-drafter.yml └── workflows │ ├── black.format.yml │ ├── exe-build.yml │ ├── fork-build.yml │ ├── fork-test.yml │ ├── python-publish.yml │ └── python-test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── app.json ├── docs ├── ADVANCED.md ├── APIS.md ├── CODE_OF_CONDUCT.md ├── README_GUI.md ├── README_ja-JP.md ├── README_ko-KR.md ├── README_zh-CN.md ├── README_zh-TW.md └── images │ ├── after.png │ ├── banner.png │ ├── before.png │ ├── cmd.explained.png │ ├── cmd.explained.zh.png │ ├── gui.gif │ └── preview.gif ├── pdf2zh ├── __init__.py ├── backend.py ├── cache.py ├── config.py ├── converter.py ├── doclayout.py ├── gui.py ├── high_level.py ├── mcp_server.py ├── pdf2zh.py ├── pdfinterp.py └── translator.py ├── pyproject.toml ├── script ├── Dockerfile.China ├── Dockerfile.Demo ├── _pystand_static.int └── setup.bat ├── setup.cfg └── test ├── file ├── translate.cli.font.unknown.pdf ├── translate.cli.plain.text.pdf └── translate.cli.text.with.figure.pdf ├── test_cache.py ├── test_converter.py ├── test_doclayout.py └── test_translator.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .github 2 | docs 3 | .git 4 | .pre-commit-config.yaml 5 | uv.lock 6 | pdf2zh_files 7 | gui/pdf2zh_files 8 | gradio_files 9 | tmp 10 | gui/gradio_files 11 | gui/tmp 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | cover/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | .pybuilder/ 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # IPython 93 | profile_default/ 94 | ipython_config.py 95 | 96 | # pyenv 97 | # For a library or package, you might want to ignore these files since the code is 98 | # intended to run in multiple environments; otherwise, check them in: 99 | # .python-version 100 | 101 | # pipenv 102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 105 | # install all needed dependencies. 106 | #Pipfile.lock 107 | 108 | # poetry 109 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 110 | # This is especially recommended for binary packages to ensure reproducibility, and is more 111 | # commonly ignored for libraries. 112 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 113 | #poetry.lock 114 | 115 | # pdm 116 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 117 | #pdm.lock 118 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 119 | # in version control. 120 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 121 | .pdm.toml 122 | .pdm-python 123 | .pdm-build/ 124 | 125 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 126 | __pypackages__/ 127 | 128 | # Celery stuff 129 | celerybeat-schedule 130 | celerybeat.pid 131 | 132 | # SageMath parsed files 133 | *.sage.py 134 | 135 | # Environments 136 | .env 137 | .venv 138 | env/ 139 | venv/ 140 | ENV/ 141 | env.bak/ 142 | venv.bak/ 143 | 144 | # Spyder project settings 145 | .spyderproject 146 | .spyproject 147 | 148 | # Rope project settings 149 | .ropeproject 150 | 151 | # mkdocs documentation 152 | /site 153 | 154 | # mypy 155 | .mypy_cache/ 156 | .dmypy.json 157 | dmypy.json 158 | 159 | # Pyre type checker 160 | .pyre/ 161 | 162 | # pytype static type analyzer 163 | .pytype/ 164 | 165 | # Cython debug symbols 166 | cython_debug/ 167 | 168 | # PyCharm 169 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 170 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 171 | # and can be added to the global gitignore or merged into this file. For a more nuclear 172 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 173 | .idea/ 174 | .vscode 175 | .DS_Store 176 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [Byaidu, reycn, Wybxc, hellofinch] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/blank.yaml: -------------------------------------------------------------------------------- 1 | name: Blank Issue 2 | description: Create a blank issue for discussion 3 | body: 4 | - type: checkboxes 5 | id: checks 6 | attributes: 7 | label: before ... 8 | options: 9 | - label: This issue is not about question or bug. 10 | required: true 11 | - type: textarea 12 | id: describe 13 | attributes: 14 | label: Add a description -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/功能请求_cn.yaml: -------------------------------------------------------------------------------- 1 | name: 功能请求 2 | description: 使用中文进行功能请求 3 | labels: ['enhancement'] 4 | body: 5 | - type: textarea 6 | id: describe 7 | attributes: 8 | label: 在什么场景下,需要你请求的功能? 9 | description: 简要描述相关的使用场景 10 | validations: 11 | required: false 12 | - type: textarea 13 | id: solution 14 | attributes: 15 | label: 解决方案 16 | description: 描述你想要的解决方案 17 | validations: 18 | required: false 19 | - type: textarea 20 | id: additional 21 | attributes: 22 | label: 其他内容 23 | description: 关于该功能请求的任何其他项目。 24 | validations: 25 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/功能请求_en.yaml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for this project 3 | labels: ['enhancement'] 4 | body: 5 | - type: textarea 6 | id: describe 7 | attributes: 8 | label: Is your feature request related to a problem? 9 | description: A clear and concise description of what the problem is 10 | placeholder: Ex. I'm always frustrated when ... 11 | validations: 12 | required: false 13 | - type: textarea 14 | id: solution 15 | attributes: 16 | label: Describe the solution you'd like 17 | description: A clear and concise description of what you want to happen 18 | validations: 19 | required: false 20 | - type: textarea 21 | id: additional 22 | attributes: 23 | label: Additional context 24 | description: Add any other projects about the feature request here. 25 | validations: 26 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/问题反馈_cn.yaml: -------------------------------------------------------------------------------- 1 | name: 上报 Bug 2 | description: 使用中文进行 Bug 报告 3 | labels: ['bug'] 4 | body: 5 | - type: checkboxes 6 | id: checks 7 | attributes: 8 | label: 在提问之前... 9 | options: 10 | - label: 我已经搜索了现有的 issues 11 | required: true 12 | - label: 我在提问题之前至少花费了 5 分钟来思考和准备 13 | required: true 14 | - label: 我已经认真且完整的阅读了 wiki 15 | required: true 16 | - label: 我已经认真检查了问题和网络环境无关(包括但不限于Google不可用,模型下载失败) 17 | required: true 18 | - type: markdown 19 | attributes: 20 | value: | 21 | 感谢您使用本项目并反馈! 22 | 请再次确认上述复选框所述的内容已经认真执行! 23 | - type: textarea 24 | id: environment 25 | attributes: 26 | label: 使用的环境 27 | description: | 28 | examples: 29 | - **OS**: Ubuntu 24.10 30 | - **Python**: 3.12.0 31 | - **pdf2zh**: 1.9.0 32 | value: | 33 | - OS: 34 | - Python: 35 | - pdf2zh: 36 | render: markdown 37 | validations: 38 | required: false 39 | - type: dropdown 40 | id: install 41 | attributes: 42 | label: 请选择安装方式 43 | options: 44 | - pip 45 | - exe 46 | - 源码 47 | - docker 48 | validations: 49 | required: true 50 | - type: textarea 51 | id: describe 52 | attributes: 53 | label: 描述你的问题 54 | description: 简要描述你的问题 55 | validations: 56 | required: true 57 | - type: textarea 58 | id: reproduce 59 | attributes: 60 | label: 如何复现 61 | description: 重现该行为的步骤 62 | value: | 63 | 1. 执行 '...' 64 | 2. 选择 '....' 65 | 3. 出现问题 66 | validations: 67 | required: false 68 | - type: textarea 69 | id: expected 70 | attributes: 71 | label: 预期行为 72 | description: 简要描述你期望得到的反馈 73 | validations: 74 | required: false 75 | - type: textarea 76 | id: logs 77 | attributes: 78 | label: 相关 Logs 79 | description: 请复制并粘贴任何相关的日志输出。 80 | render: Text 81 | validations: 82 | required: false 83 | - type: textarea 84 | id: PDFfile 85 | attributes: 86 | label: 原始PDF文件 87 | description: | 88 | 如果涉及到排版错误的问题,请一定提供原始的PDF文件,方便复现错误。 89 | validations: 90 | required: false 91 | - type: textarea 92 | id: others 93 | attributes: 94 | label: 还有别的吗? 95 | description: | 96 | 相关的配置?链接?参考资料? 97 | 任何能让我们对你所遇到的问题有更多了解的东西。 98 | validations: 99 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/问题反馈_en.yaml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Create a report to help us improve 3 | labels: ['bug'] 4 | body: 5 | - type: checkboxes 6 | id: checks 7 | attributes: 8 | label: Before you asking 9 | options: 10 | - label: I have searched the existing issues 11 | required: true 12 | - label: I spend at least 5 minutes for thinking and preparing 13 | required: true 14 | - label: I have thoroughly and completely read the wiki. 15 | required: true 16 | - label: I have carefully checked the issue, and it is unrelated to the network environment. 17 | required: true 18 | - type: markdown 19 | attributes: 20 | value: | 21 | Thank you for using this project and providing feedback! 22 | - type: textarea 23 | id: environment 24 | attributes: 25 | label: Environment 26 | description: | 27 | examples: 28 | - **OS**: Ubuntu 24.10 29 | - **Python**: 3.12.0 30 | - **pdf2zh**: 1.9.0 31 | value: | 32 | - OS: 33 | - Python: 34 | - pdf2zh: 35 | render: markdown 36 | validations: 37 | required: false 38 | - type: dropdown 39 | id: install 40 | attributes: 41 | label: How to install pdf2zh 42 | options: 43 | - pip 44 | - exe 45 | - source 46 | - docker 47 | validations: 48 | required: true 49 | - type: textarea 50 | id: describe 51 | attributes: 52 | label: Describe the bug 53 | description: A clear and concise description of what the bug is. 54 | validations: 55 | required: true 56 | - type: textarea 57 | id: reproduce 58 | attributes: 59 | label: To Reproduce 60 | description: Steps to reproduce the behavior 61 | value: | 62 | 1. execute '...' 63 | 2. select '....' 64 | 3. see errors 65 | validations: 66 | required: false 67 | - type: textarea 68 | id: expected 69 | attributes: 70 | label: Expected behavior 71 | description: A clear and concise description of what you expected to happen. 72 | validations: 73 | required: false 74 | - type: textarea 75 | id: logs 76 | attributes: 77 | label: Relevant log output 78 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. 79 | render: Text 80 | validations: 81 | required: false 82 | - type: textarea 83 | id: PDFfile 84 | attributes: 85 | label: Origin PDF file 86 | description: | 87 | If the issue involves formatting errors, please provide the original PDF file to facilitate reproduction of the error. 88 | validations: 89 | required: false 90 | - type: textarea 91 | id: others 92 | attributes: 93 | label: Anything else? 94 | description: | 95 | Related configs? Links? References? 96 | Anything that will give us more context about the issue you are encountering! 97 | validations: 98 | required: false -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | # - package-ecosystem: pip 8 | # directory: "/.github/workflows" 9 | # schedule: 10 | # interval: weekly 11 | # - package-ecosystem: pip 12 | # directory: "/docs" 13 | # schedule: 14 | # interval: weekly 15 | - package-ecosystem: pip 16 | directory: "/" 17 | schedule: 18 | interval: weekly 19 | versioning-strategy: lockfile-only 20 | allow: 21 | - dependency-type: "all" -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: '🐛 Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: '🧰 Maintenance' 14 | labels: 15 | - 'chore' 16 | - 'maintenance' 17 | - 'refactor' 18 | - title: '📝 Documentation' 19 | labels: 20 | - 'docs' 21 | - 'documentation' 22 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 23 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions 24 | version-resolver: 25 | major: 26 | labels: 27 | - 'major' 28 | minor: 29 | labels: 30 | - 'minor' 31 | patch: 32 | labels: 33 | - 'patch' 34 | default: patch 35 | template: | 36 | ## Changes 37 | 38 | $CHANGES 39 | 40 | ## Contributors 41 | 42 | $CONTRIBUTORS 43 | 44 | ## Windows Specific 45 | 46 | If you cannot open it after downloading, please install https://aka.ms/vs/17/release/vc_redist.x64.exe and try again. 47 | 48 | ## Assets 49 | 50 | - pdf2zh-v$RESOLVED_VERSION-win64.zip: pdf2zh **without** assets(font, model, etc.) 51 | - pdf2zh-v$RESOLVED_VERSION-with-assets-win64.zip: (**Recommended**) pdf2zh **with** assets(font, model, etc.) 52 | 53 | > [!NOTE] 54 | > 55 | > The version without assets will also dynamically download resources when running, but the download may fail due to network issues. -------------------------------------------------------------------------------- /.github/workflows/black.format.yml: -------------------------------------------------------------------------------- 1 | name: Format Code with Black 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: psf/black@stable -------------------------------------------------------------------------------- /.github/workflows/exe-build.yml: -------------------------------------------------------------------------------- 1 | name: windows exe Release Workflow 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | release_version: 7 | description: 'Release Version (e.g., v1.0.0)' 8 | required: true 9 | type: string 10 | # push: 11 | # debug purpose 12 | env: 13 | WIN_EXE_PYTHON_VERSION: 3.12.9 14 | jobs: 15 | build-win64-exe: 16 | runs-on: windows-latest 17 | steps: 18 | - name: checkout babeldoc metadata 19 | uses: actions/checkout@v4 20 | with: 21 | repository: funstory-ai/BabelDOC 22 | path: babeldoctemp1234567 23 | token: ${{ secrets.GITHUB_TOKEN }} 24 | sparse-checkout: babeldoc/assets/embedding_assets_metadata.py 25 | - name: Cached Assets 26 | id: cache-assets 27 | uses: actions/cache@v4.2.2 28 | with: 29 | path: ~/.cache/babeldoc 30 | key: test-1-babeldoc-assets-${{ hashFiles('babeldoctemp1234567/babeldoc/assets/embedding_assets_metadata.py') }} 31 | - name: 检出代码 32 | uses: actions/checkout@v4 33 | 34 | - name: Setup uv with Python ${{ env.WIN_EXE_PYTHON_VERSION }} 35 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1 36 | with: 37 | python-version: ${{ env.WIN_EXE_PYTHON_VERSION }} 38 | enable-cache: true 39 | cache-dependency-glob: "pyproject.toml" 40 | 41 | - name: 执行所有任务(创建目录、下载、解压、复制文件、安装依赖) 42 | shell: pwsh 43 | run: | 44 | Write-Host "==== 删除 babeldoctemp1234567 文件夹 ====" 45 | if (Test-Path "./babeldoctemp1234567") { 46 | Remove-Item -Path "./babeldoctemp1234567" -Recurse -Force 47 | Write-Host "babeldoctemp1234567 文件夹已成功删除" 48 | } else { 49 | Write-Host "babeldoctemp1234567 文件夹不存在,无需删除" 50 | } 51 | Write-Host "==== 创建必要的目录 ====" 52 | New-Item -Path "./build" -ItemType Directory -Force 53 | New-Item -Path "./build/runtime" -ItemType Directory -Force 54 | New-Item -Path "./dep_build" -ItemType Directory -Force 55 | 56 | Write-Host "==== 复制代码到 dep_build ====" 57 | Get-ChildItem -Path "./" -Exclude "dep_build", "build" | Copy-Item -Destination "./dep_build" -Recurse -Force 58 | 59 | Write-Host "==== 下载并解压 Python ${{ env.WIN_EXE_PYTHON_VERSION }} ====" 60 | Write-Host "pythonUrl: https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip" 61 | $pythonUrl = "https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip" 62 | $pythonZip = "./dep_build/python.zip" 63 | Invoke-WebRequest -Uri $pythonUrl -OutFile $pythonZip 64 | Expand-Archive -Path $pythonZip -DestinationPath "./build/runtime" -Force 65 | 66 | Write-Host "==== 下载并解压 PyStand ====" 67 | $pystandUrl = "https://github.com/skywind3000/PyStand/releases/download/1.1.4/PyStand-v1.1.4-exe.zip" 68 | $pystandZip = "./dep_build/PyStand.zip" 69 | Invoke-WebRequest -Uri $pystandUrl -OutFile $pystandZip 70 | Expand-Archive -Path $pystandZip -DestinationPath "./dep_build/PyStand" -Force 71 | 72 | Write-Host "==== 复制 PyStand.exe 到 build 并重命名 ====" 73 | $pystandExe = "./dep_build/PyStand/PyStand-x64-CLI/PyStand.exe" 74 | $destExe = "./build/pdf2zh.exe" 75 | if (Test-Path $pystandExe) { 76 | Copy-Item -Path $pystandExe -Destination $destExe -Force 77 | } else { 78 | Write-Host "错误: PyStand.exe 未找到!" 79 | exit 1 80 | } 81 | Write-Host "==== 创建 Python venv 在 dep_build ====" 82 | uv venv ./dep_build/venv 83 | 84 | ./dep_build/venv/Scripts/activate 85 | 86 | Write-Host "==== 在 venv 环境中安装项目依赖 ====" 87 | uv pip install . 88 | 89 | Write-Host "==== 复制 venv/Lib/site-packages 到 build/ ====" 90 | Copy-Item -Path "./dep_build/venv/Lib/site-packages" -Destination "./build/site-packages" -Recurse -Force 91 | 92 | Write-Host "==== 复制 script/_pystand_static.int 到 build/ ====" 93 | $staticFile = "./script/_pystand_static.int" 94 | $destStatic = "./build/_pystand_static.int" 95 | if (Test-Path $staticFile) { 96 | Copy-Item -Path $staticFile -Destination $destStatic -Force 97 | } else { 98 | Write-Host "错误: script/_pystand_static.int 未找到!" 99 | exit 1 100 | } 101 | 102 | uv run --active babeldoc --generate-offline-assets ./build 103 | 104 | - name: Upload build artifact 105 | uses: actions/upload-artifact@v4 106 | with: 107 | name: win64-exe 108 | path: ./build 109 | if-no-files-found: error 110 | compression-level: 9 111 | include-hidden-files: true 112 | 113 | test-win64-exe: 114 | needs: 115 | - build-win64-exe 116 | runs-on: windows-latest 117 | steps: 118 | - name: 检出代码 119 | uses: actions/checkout@v4 120 | 121 | - name: Download build artifact 122 | uses: actions/download-artifact@v4 123 | with: 124 | name: win64-exe 125 | path: ./build 126 | 127 | - name: Test show version 128 | run: | 129 | ./build/pdf2zh.exe --version 130 | 131 | - name: Test - Translate a PDF file with plain text only 132 | run: | 133 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file 134 | 135 | - name: Test - Translate a PDF file figure 136 | run: | 137 | ./build/pdf2zh.exe ./test/file/translate.cli.text.with.figure.pdf -o ./test/file 138 | 139 | - name: Delete offline assets and cache 140 | shell: pwsh 141 | run: | 142 | Write-Host "==== 查找并删除离线资源包 ====" 143 | $offlineAssetsPath = Get-ChildItem -Path "./build" -Filter "offline_assets_*.zip" -Recurse | Select-Object -First 1 -ExpandProperty FullName 144 | if ($offlineAssetsPath) { 145 | Write-Host "找到离线资源包: $offlineAssetsPath" 146 | Remove-Item -Path $offlineAssetsPath -Force 147 | Write-Host "已删除离线资源包" 148 | } else { 149 | Write-Host "未找到离线资源包" 150 | } 151 | 152 | Write-Host "==== 删除缓存目录 ====" 153 | $cachePath = "$env:USERPROFILE/.cache/babeldoc" 154 | if (Test-Path $cachePath) { 155 | Remove-Item -Path $cachePath -Recurse -Force 156 | Write-Host "已删除缓存目录: $cachePath" 157 | } else { 158 | Write-Host "缓存目录不存在: $cachePath" 159 | } 160 | 161 | - name: Test - Translate without offline assets 162 | run: | 163 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file 164 | 165 | - name: Upload test results 166 | uses: actions/upload-artifact@v4 167 | with: 168 | name: test-results 169 | path: ./test/file/ 170 | 171 | -------------------------------------------------------------------------------- /.github/workflows/fork-build.yml: -------------------------------------------------------------------------------- 1 | name: fork-build 2 | 3 | on: 4 | workflow_dispatch: 5 | # debug purpose 6 | # push: 7 | 8 | env: 9 | REGISTRY: ghcr.io 10 | REPO_LOWER: ${{ github.repository_owner }}/${{ github.event.repository.name }} 11 | GHCR_REPO: ghcr.io/${{ github.repository }} 12 | WIN_EXE_PYTHON_VERSION: 3.12.9 13 | jobs: 14 | check-repository: 15 | name: Check if running in main repository 16 | runs-on: ubuntu-latest 17 | outputs: 18 | is_main_repo: ${{ github.repository == 'Byaidu/PDFMathTranslate' }} 19 | steps: 20 | - run: echo "Running repository check" 21 | 22 | test: 23 | uses: ./.github/workflows/python-test.yml 24 | needs: check-repository 25 | if: needs.check-repository.outputs.is_main_repo != 'true' 26 | 27 | build: 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | include: 32 | - platform: linux/amd64 33 | runner: ubuntu-latest 34 | - platform: linux/arm64 35 | runner: ubuntu-24.04-arm 36 | runs-on: ${{ matrix.runner }} 37 | needs: 38 | - check-repository 39 | - test 40 | if: needs.check-repository.outputs.is_main_repo != 'true' 41 | permissions: 42 | contents: read 43 | packages: write 44 | 45 | steps: 46 | - name: Convert to lowercase 47 | run: | 48 | echo "GHCR_REPO_LOWER=$(echo ${{ env.GHCR_REPO }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV 49 | 50 | - name: Prepare 51 | run: | 52 | platform=${{ matrix.platform }} 53 | echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV 54 | 55 | - name: Checkout repository 56 | uses: actions/checkout@v4 57 | 58 | - name: Docker meta 59 | id: meta 60 | uses: docker/metadata-action@v5 61 | with: 62 | images: | 63 | ${{ env.GHCR_REPO_LOWER }} 64 | 65 | - name: Login to GHCR 66 | uses: docker/login-action@v3 67 | with: 68 | registry: ghcr.io 69 | username: ${{ github.repository_owner }} 70 | password: ${{ secrets.GITHUB_TOKEN }} 71 | 72 | 73 | - name: Set up Docker Buildx 74 | uses: docker/setup-buildx-action@v3 75 | 76 | - name: Build and push by digest 77 | id: build 78 | uses: docker/build-push-action@v6 79 | with: 80 | platforms: ${{ matrix.platform }} 81 | labels: ${{ steps.meta.outputs.labels }} 82 | outputs: type=image,name=${{ env.GHCR_REPO_LOWER }},push-by-digest=true,name-canonical=true,push=true 83 | cache-from: ${{ matrix.platform == 'linux/amd64' && 'type=gha' || '' }} 84 | cache-to: ${{ matrix.platform == 'linux/amd64' && 'type=gha,mode=max' || '' }} 85 | 86 | - name: Export digest 87 | run: | 88 | mkdir -p ${{ runner.temp }}/digests 89 | digest="${{ steps.build.outputs.digest }}" 90 | touch "${{ runner.temp }}/digests/${digest#sha256:}" 91 | 92 | - name: Upload digest 93 | uses: actions/upload-artifact@v4 94 | with: 95 | name: digests-${{ env.PLATFORM_PAIR }} 96 | path: ${{ runner.temp }}/digests/* 97 | if-no-files-found: error 98 | retention-days: 1 99 | 100 | merge: 101 | runs-on: ubuntu-latest 102 | needs: 103 | - check-repository 104 | - test 105 | - build 106 | if: needs.check-repository.outputs.is_main_repo != 'true' 107 | permissions: 108 | contents: read 109 | packages: write 110 | 111 | steps: 112 | - name: Convert to lowercase 113 | run: | 114 | echo "GHCR_REPO_LOWER=$(echo ${{ env.GHCR_REPO }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV 115 | 116 | - name: Download digests 117 | uses: actions/download-artifact@v4 118 | with: 119 | path: ${{ runner.temp }}/digests 120 | pattern: digests-* 121 | merge-multiple: true 122 | 123 | - name: Login to GHCR 124 | uses: docker/login-action@v3 125 | with: 126 | registry: ghcr.io 127 | username: ${{ github.repository_owner }} 128 | password: ${{ secrets.GITHUB_TOKEN }} 129 | 130 | - name: Set up Docker Buildx 131 | uses: docker/setup-buildx-action@v3 132 | 133 | - name: Docker meta 134 | id: meta 135 | uses: docker/metadata-action@v5 136 | with: 137 | images: | 138 | ${{ env.GHCR_REPO_LOWER }} 139 | tags: | 140 | type=raw,value=dev 141 | type=semver,pattern={{version}} 142 | type=semver,pattern={{major}}.{{minor}} 143 | 144 | - name: Create manifest list and push 145 | working-directory: ${{ runner.temp }}/digests 146 | run: | 147 | docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ 148 | $(printf '${{ env.GHCR_REPO_LOWER }}@sha256:%s ' *) 149 | 150 | - name: Inspect image 151 | run: | 152 | docker buildx imagetools inspect ${{ env.GHCR_REPO_LOWER }}:${{ steps.meta.outputs.version }} 153 | 154 | build-win64-exe: 155 | runs-on: windows-latest 156 | needs: 157 | - check-repository 158 | if: needs.check-repository.outputs.is_main_repo != 'true' 159 | steps: 160 | - name: 检出代码 161 | uses: actions/checkout@v4 162 | 163 | - name: Setup uv with Python ${{ env.WIN_EXE_PYTHON_VERSION }} 164 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1 165 | with: 166 | python-version: ${{ env.WIN_EXE_PYTHON_VERSION }} 167 | enable-cache: true 168 | cache-dependency-glob: "pyproject.toml" 169 | 170 | - name: 执行所有任务(创建目录、下载、解压、复制文件、安装依赖) 171 | shell: pwsh 172 | run: | 173 | Write-Host "==== 创建必要的目录 ====" 174 | New-Item -Path "./build" -ItemType Directory -Force 175 | New-Item -Path "./build/runtime" -ItemType Directory -Force 176 | New-Item -Path "./dep_build" -ItemType Directory -Force 177 | 178 | Write-Host "==== 复制代码到 dep_build ====" 179 | Get-ChildItem -Path "./" -Exclude "dep_build", "build" | Copy-Item -Destination "./dep_build" -Recurse -Force 180 | 181 | Write-Host "==== 下载并解压 Python ${{ env.WIN_EXE_PYTHON_VERSION }} ====" 182 | Write-Host "pythonUrl: https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip" 183 | $pythonUrl = "https://www.python.org/ftp/python/${{ env.WIN_EXE_PYTHON_VERSION }}/python-${{ env.WIN_EXE_PYTHON_VERSION }}-embed-amd64.zip" 184 | $pythonZip = "./dep_build/python.zip" 185 | Invoke-WebRequest -Uri $pythonUrl -OutFile $pythonZip 186 | Expand-Archive -Path $pythonZip -DestinationPath "./build/runtime" -Force 187 | 188 | Write-Host "==== 下载 Visual C++ Redistributable 安装程序 ====" 189 | $vcRedistUrl = "https://aka.ms/vs/17/release/vc_redist.x64.exe" 190 | $vcRedistPath = "./build/无法运行请安装vc_redist.x64.exe" 191 | Invoke-WebRequest -Uri $vcRedistUrl -OutFile $vcRedistPath 192 | Write-Host "已下载 Visual C++ Redistributable 安装程序到: $vcRedistPath" 193 | 194 | Write-Host "==== 下载并解压 PyStand ====" 195 | $pystandUrl = "https://github.com/skywind3000/PyStand/releases/download/1.1.4/PyStand-v1.1.4-exe.zip" 196 | $pystandZip = "./dep_build/PyStand.zip" 197 | Invoke-WebRequest -Uri $pystandUrl -OutFile $pystandZip 198 | Expand-Archive -Path $pystandZip -DestinationPath "./dep_build/PyStand" -Force 199 | 200 | Write-Host "==== 复制 PyStand.exe 到 build 并重命名 ====" 201 | $pystandExe = "./dep_build/PyStand/PyStand-x64-CLI/PyStand.exe" 202 | $destExe = "./build/pdf2zh.exe" 203 | if (Test-Path $pystandExe) { 204 | Copy-Item -Path $pystandExe -Destination $destExe -Force 205 | } else { 206 | Write-Host "错误: PyStand.exe 未找到!" 207 | exit 1 208 | } 209 | Write-Host "==== 创建 Python venv 在 dep_build ====" 210 | uv venv ./dep_build/venv 211 | 212 | ./dep_build/venv/Scripts/activate 213 | 214 | Write-Host "==== 在 venv 环境中安装项目依赖 ====" 215 | uv pip install . 216 | 217 | Write-Host "==== 复制 venv/Lib/site-packages 到 build/ ====" 218 | Copy-Item -Path "./dep_build/venv/Lib/site-packages" -Destination "./build/site-packages" -Recurse -Force 219 | 220 | Write-Host "==== 复制 script/_pystand_static.int 到 build/ ====" 221 | $staticFile = "./script/_pystand_static.int" 222 | $destStatic = "./build/_pystand_static.int" 223 | if (Test-Path $staticFile) { 224 | Copy-Item -Path $staticFile -Destination $destStatic -Force 225 | } else { 226 | Write-Host "错误: script/_pystand_static.int 未找到!" 227 | exit 1 228 | } 229 | 230 | - name: Upload build artifact 231 | uses: actions/upload-artifact@v4 232 | with: 233 | name: win64-exe 234 | path: ./build 235 | if-no-files-found: error 236 | compression-level: 1 237 | include-hidden-files: true 238 | 239 | test-win64-exe: 240 | needs: 241 | - build-win64-exe 242 | - check-repository 243 | if: needs.check-repository.outputs.is_main_repo != 'true' 244 | runs-on: windows-latest 245 | steps: 246 | - name: 检出代码 247 | uses: actions/checkout@v4 248 | 249 | - name: Download build artifact 250 | uses: actions/download-artifact@v4 251 | with: 252 | name: win64-exe 253 | path: ./build 254 | 255 | - name: Test show version (online mode) 256 | run: | 257 | ./build/pdf2zh.exe --version 258 | 259 | - name: Test - Translate a PDF file with plain text only (online mode) 260 | run: | 261 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file 262 | 263 | - name: Test - Translate a PDF file figure 264 | run: | 265 | ./build/pdf2zh.exe ./test/file/translate.cli.text.with.figure.pdf -o ./test/file 266 | 267 | - name: Test - Translate without offline assets (online mode) 268 | run: | 269 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file 270 | 271 | - name: Upload test results 272 | uses: actions/upload-artifact@v4 273 | with: 274 | name: test-results 275 | path: ./test/file/ 276 | if-no-files-found: error 277 | 278 | - name: Setup uv with Python ${{ env.WIN_EXE_PYTHON_VERSION }} 279 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1 280 | with: 281 | python-version: ${{ env.WIN_EXE_PYTHON_VERSION }} 282 | enable-cache: true 283 | cache-dependency-glob: "pyproject.toml" 284 | 285 | - name: Generate offline assets 286 | shell: pwsh 287 | run: | 288 | Write-Host "==== 生成离线资源包 ====" 289 | uv run --active babeldoc --generate-offline-assets ./build 290 | 291 | - name: Delete cache 292 | shell: pwsh 293 | run: | 294 | Write-Host "==== 删除缓存目录 ====" 295 | $cachePath = "$env:USERPROFILE/.cache/babeldoc" 296 | if (Test-Path $cachePath) { 297 | Remove-Item -Path $cachePath -Recurse -Force 298 | Write-Host "已删除缓存目录: $cachePath" 299 | } else { 300 | Write-Host "缓存目录不存在: $cachePath" 301 | } 302 | 303 | - name: Test - Translate with offline assets (offline mode) 304 | run: | 305 | Write-Host "==== 测试离线资源包 ====" 306 | New-Item -Path "./test/file/offline_result" -ItemType Directory -Force 307 | ./build/pdf2zh.exe ./test/file/translate.cli.plain.text.pdf -o ./test/file/offline_result 308 | 309 | - name: Upload offline test results 310 | uses: actions/upload-artifact@v4 311 | with: 312 | name: offline-test-results 313 | path: ./test/file/offline_result/ 314 | if-no-files-found: error 315 | 316 | - name: Upload build with offline assets artifact 317 | uses: actions/upload-artifact@v4 318 | with: 319 | name: win64-exe-with-assets 320 | path: ./build 321 | if-no-files-found: error 322 | compression-level: 1 323 | include-hidden-files: true -------------------------------------------------------------------------------- /.github/workflows/fork-test.yml: -------------------------------------------------------------------------------- 1 | name: fork-test 2 | 3 | on: 4 | push: 5 | branches: [ "main", "master" ] 6 | 7 | env: 8 | REGISTRY: ghcr.io 9 | REPO_LOWER: ${{ github.repository_owner }}/${{ github.event.repository.name }} 10 | GHCR_REPO: ghcr.io/${{ github.repository }} 11 | WIN_EXE_PYTHON_VERSION: 3.12.9 12 | jobs: 13 | check-repository: 14 | name: Check if running in main repository 15 | runs-on: ubuntu-latest 16 | outputs: 17 | is_main_repo: ${{ github.repository == 'Byaidu/PDFMathTranslate' }} 18 | steps: 19 | - run: echo "Running repository check" 20 | 21 | test: 22 | uses: ./.github/workflows/python-test.yml 23 | needs: check-repository 24 | if: needs.check-repository.outputs.is_main_repo != 'true' -------------------------------------------------------------------------------- /.github/workflows/python-test.yml: -------------------------------------------------------------------------------- 1 | name: Test and Build Python Package 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | - '!main' 8 | - '!master' 9 | pull_request: 10 | workflow_call: 11 | 12 | jobs: 13 | build-and-test: 14 | runs-on: ${{ matrix.runner }} 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: ["3.10", "3.11", "3.12"] 19 | runner: 20 | - ubuntu-latest 21 | - ubuntu-24.04-arm 22 | steps: 23 | - name: checkout babeldoc metadata 24 | uses: actions/checkout@v4 25 | with: 26 | repository: funstory-ai/BabelDOC 27 | path: babeldoctemp1234567 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | sparse-checkout: babeldoc/assets/embedding_assets_metadata.py 30 | - name: Cached Assets 31 | id: cache-assets 32 | uses: actions/cache@v4.2.2 33 | with: 34 | path: ~/.cache/babeldoc 35 | key: test-1-babeldoc-assets-${{ hashFiles('babeldoctemp1234567/babeldoc/assets/embedding_assets_metadata.py') }} 36 | - uses: actions/checkout@v4 37 | - name: Setup uv with Python ${{ matrix.python-version }} 38 | uses: astral-sh/setup-uv@f94ec6bedd8674c4426838e6b50417d36b6ab231 # v5.3.1 39 | with: 40 | python-version: ${{ matrix.python-version }} 41 | enable-cache: true 42 | cache-dependency-glob: "pyproject.toml" 43 | - name: Install dependencies 44 | run: | 45 | uv sync 46 | 47 | - name: Test - Unit Test 48 | run: | 49 | uv run pytest . 50 | 51 | - name: Test - Translate a PDF file with plain text only 52 | run: uv run pdf2zh ./test/file/translate.cli.plain.text.pdf -o ./test/file 53 | 54 | - name: Test - Translate a PDF file figure 55 | run: uv run pdf2zh ./test/file/translate.cli.text.with.figure.pdf -o ./test/file 56 | 57 | # - name: Test - Translate a PDF file with unknown font 58 | # run: 59 | # pdf2zh ./test/file/translate.cli.font.unknown.pdf 60 | 61 | - name: Test - Start GUI and exit 62 | run: timeout 10 uv run pdf2zh -i || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi 63 | 64 | - name: Build as a package 65 | run: uv build 66 | 67 | - name: Upload test results 68 | uses: actions/upload-artifact@v4 69 | with: 70 | name: test-results-${{ matrix.python-version }}-${{ matrix.runner }} 71 | path: ./test/file/ 72 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | pdf2zh_files 2 | gui/pdf2zh_files 3 | gradio_files 4 | tmp 5 | gui/gradio_files 6 | gui/tmp 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | pdf2zh-dev/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | 164 | # PyCharm 165 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 167 | # and can be added to the global gitignore or merged into this file. For a more nuclear 168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 169 | .idea/ 170 | .vscode 171 | .DS_Store 172 | uv.lock 173 | *.pdf 174 | *.docx 175 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | files: '^.*\.py$' 4 | repos: 5 | - repo: local 6 | hooks: 7 | - id: black 8 | name: black 9 | entry: black --check --diff --color 10 | language: python 11 | - id: flake8 12 | name: flake8 13 | entry: flake8 --ignore E203,E261,E501,W503,E741 14 | language: python 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim 2 | 3 | WORKDIR /app 4 | 5 | 6 | EXPOSE 7860 7 | 8 | ENV PYTHONUNBUFFERED=1 9 | 10 | # # Download all required fonts 11 | # ADD "https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf" /app/ 12 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifCN-Regular.ttf" /app/ 13 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifTW-Regular.ttf" /app/ 14 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifJP-Regular.ttf" /app/ 15 | # ADD "https://github.com/timelic/source-han-serif/releases/download/main/SourceHanSerifKR-Regular.ttf" /app/ 16 | 17 | RUN apt-get update && \ 18 | apt-get install --no-install-recommends -y libgl1 libglib2.0-0 libxext6 libsm6 libxrender1 && \ 19 | rm -rf /var/lib/apt/lists/* 20 | 21 | COPY pyproject.toml . 22 | RUN uv pip install --system --no-cache -r pyproject.toml && babeldoc --version && babeldoc --warmup 23 | 24 | COPY . . 25 | 26 | RUN uv pip install --system --no-cache . && uv pip install --system --no-cache -U babeldoc "pymupdf<1.25.3" && babeldoc --version && babeldoc --warmup 27 | 28 | CMD ["pdf2zh", "-i"] 29 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PDFMathTranslate", 3 | "description": "PDF scientific paper translation and bilingual comparison.", 4 | "repository": "https://github.com/Byaidu/PDFMathTranslate" 5 | } -------------------------------------------------------------------------------- /docs/ADVANCED.md: -------------------------------------------------------------------------------- 1 | [**Documentation**](https://github.com/Byaidu/PDFMathTranslate) > **Advanced Usage** _(current)_ 2 | 3 | --- 4 | 5 |

Table of Contents

6 | 7 | - [Full / partial translation](#partial) 8 | - [Specify source and target languages](#language) 9 | - [Translate with different services](#services) 10 | - [Translate wih exceptions](#exceptions) 11 | - [Multi-threads](#threads) 12 | - [Custom prompt](#prompt) 13 | - [Authorization](#auth) 14 | - [Custom configuration file](#cofig) 15 | - [Fonts Subseting](#fonts-subset) 16 | - [Translation cache](#cache) 17 | 18 | --- 19 | 20 |

Full / partial translation

21 | 22 | - Entire document 23 | 24 | ```bash 25 | pdf2zh example.pdf 26 | ``` 27 | 28 | - Part of the document 29 | 30 | ```bash 31 | pdf2zh example.pdf -p 1-3,5 32 | ``` 33 | 34 | [⬆️ Back to top](#toc) 35 | 36 | --- 37 | 38 |

Specify source and target languages

39 | 40 | See [Google Languages Codes](https://developers.google.com/admin-sdk/directory/v1/languages), [DeepL Languages Codes](https://developers.deepl.com/docs/resources/supported-languages) 41 | 42 | ```bash 43 | pdf2zh example.pdf -li en -lo ja 44 | ``` 45 | 46 | [⬆️ Back to top](#toc) 47 | 48 | --- 49 | 50 |

Translate with different services

51 | 52 | We've provided a detailed table on the required [environment variables](https://chatgpt.com/share/6734a83d-9d48-800e-8a46-f57ca6e8bcb4) for each translation service. Make sure to set them before using the respective service. 53 | 54 | | **Translator** | **Service** | **Environment Variables** | **Default Values** | **Notes** | 55 | |----------------------|----------------|-----------------------------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 56 | | **Google (Default)** | `google` | None | N/A | None | 57 | | **Bing** | `bing` | None | N/A | None | 58 | | **DeepL** | `deepl` | `DEEPL_AUTH_KEY` | `[Your Key]` | See [DeepL](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API) | 59 | | **DeepLX** | `deeplx` | `DEEPLX_ENDPOINT` | `https://api.deepl.com/translate` | See [DeepLX](https://github.com/OwO-Network/DeepLX) | 60 | | **Ollama** | `ollama` | `OLLAMA_HOST`, `OLLAMA_MODEL` | `http://127.0.0.1:11434`, `gemma2` | See [Ollama](https://github.com/ollama/ollama) | 61 | | **Xinference** | `xinference` | `XINFERENCE_HOST`, `XINFERENCE_MODEL` | `http://127.0.0.1:9997`, `gemma-2-it` | See [Xinference](https://github.com/xorbitsai/inference) | 62 | | **OpenAI** | `openai` | `OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL` | `https://api.openai.com/v1`, `[Your Key]`, `gpt-4o-mini` | See [OpenAI](https://platform.openai.com/docs/overview) | 63 | | **AzureOpenAI** | `azure-openai` | `AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_MODEL` | `[Your Endpoint]`, `[Your Key]`, `gpt-4o-mini` | See [Azure OpenAI](https://learn.microsoft.com/zh-cn/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line%2Cjavascript-keyless%2Ctypescript-keyless%2Cpython&pivots=programming-language-python) | 64 | | **Zhipu** | `zhipu` | `ZHIPU_API_KEY`, `ZHIPU_MODEL` | `[Your Key]`, `glm-4-flash` | See [Zhipu](https://open.bigmodel.cn/dev/api/thirdparty-frame/openai-sdk) | 65 | | **ModelScope** | `ModelScope` | `MODELSCOPE_API_KEY`, `MODELSCOPE_MODEL` | `[Your Key]`, `Qwen/Qwen2.5-Coder-32B-Instruct` | See [ModelScope](https://www.modelscope.cn/docs/model-service/API-Inference/intro) | 66 | | **Silicon** | `silicon` | `SILICON_API_KEY`, `SILICON_MODEL` | `[Your Key]`, `Qwen/Qwen2.5-7B-Instruct` | See [SiliconCloud](https://docs.siliconflow.cn/quickstart) | 67 | | **Gemini** | `gemini` | `GEMINI_API_KEY`, `GEMINI_MODEL` | `[Your Key]`, `gemini-1.5-flash` | See [Gemini](https://ai.google.dev/gemini-api/docs/openai) | 68 | | **Azure** | `azure` | `AZURE_ENDPOINT`, `AZURE_API_KEY` | `https://api.translator.azure.cn`, `[Your Key]` | See [Azure](https://docs.azure.cn/en-us/ai-services/translator/text-translation-overview) | 69 | | **Tencent** | `tencent` | `TENCENTCLOUD_SECRET_ID`, `TENCENTCLOUD_SECRET_KEY` | `[Your ID]`, `[Your Key]` | See [Tencent](https://www.tencentcloud.com/products/tmt?from_qcintl=122110104) | 70 | | **Dify** | `dify` | `DIFY_API_URL`, `DIFY_API_KEY` | `[Your DIFY URL]`, `[Your Key]` | See [Dify](https://github.com/langgenius/dify),Three variables, lang_out, lang_in, and text, need to be defined in Dify's workflow input. | 71 | | **AnythingLLM** | `anythingllm` | `AnythingLLM_URL`, `AnythingLLM_APIKEY` | `[Your AnythingLLM URL]`, `[Your Key]` | See [anything-llm](https://github.com/Mintplex-Labs/anything-llm) | 72 | |**Argos Translate**|`argos`| | |See [argos-translate](https://github.com/argosopentech/argos-translate)| 73 | |**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |See [Grok](https://docs.x.ai/docs/overview)| 74 | |**Groq**|`groq`| `GROQ_API_KEY`, `GROQ_MODEL` | `[Your GROQ_API_KEY]`, `llama-3-3-70b-versatile` |See [Groq](https://console.groq.com/docs/models)| 75 | |**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |See [DeepSeek](https://www.deepseek.com/)| 76 | |**OpenAI-Liked**|`openailiked`| `OPENAILIKED_BASE_URL`, `OPENAILIKED_API_KEY`, `OPENAILIKED_MODEL` | `url`, `[Your Key]`, `model name` | None | 77 | |**Ali Qwen Translation**|`qwen-mt`| `ALI_MODEL`, `ALI_API_KEY`, `ALI_DOMAINS` | `qwen-mt-turbo`, `[Your Key]`, `scientific paper` | Tranditional Chinese are not yet supported, it will be translated into Simplified Chinese. More see [Qwen MT](https://bailian.console.aliyun.com/?spm=5176.28197581.0.0.72e329a4HRxe99#/model-market/detail/qwen-mt-turbo) | 78 | 79 | For large language models that are compatible with the OpenAI API but not listed in the table above, you can set environment variables using the same method outlined for OpenAI in the table. 80 | 81 | Use `-s service` or `-s service:model` to specify service: 82 | 83 | ```bash 84 | pdf2zh example.pdf -s openai:gpt-4o-mini 85 | ``` 86 | 87 | Or specify model with environment variables: 88 | 89 | ```bash 90 | set OPENAI_MODEL=gpt-4o-mini 91 | pdf2zh example.pdf -s openai 92 | ``` 93 | 94 | For PowerShell user: 95 | 96 | ```shell 97 | $env:OPENAI_MODEL = gpt-4o-mini 98 | pdf2zh example.pdf -s openai 99 | ``` 100 | 101 | [⬆️ Back to top](#toc) 102 | 103 | --- 104 | 105 |

Translate wih exceptions

106 | 107 | Use regex to specify formula fonts and characters that need to be preserved: 108 | 109 | ```bash 110 | pdf2zh example.pdf -f "(CM[^RT].*|MS.*|.*Ital)" -c "(\(|\||\)|\+|=|\d|[\u0080-\ufaff])" 111 | ``` 112 | 113 | Preserve `Latex`, `Mono`, `Code`, `Italic`, `Symbol` and `Math` fonts by default: 114 | 115 | ```bash 116 | pdf2zh example.pdf -f "(CM[^R]|MS.M|XY|MT|BL|RM|EU|LA|RS|LINE|LCIRCLE|TeX-|rsfs|txsy|wasy|stmary|.*Mono|.*Code|.*Ital|.*Sym|.*Math)" 117 | ``` 118 | 119 | [⬆️ Back to top](#toc) 120 | 121 | --- 122 | 123 |

Multi-threads

124 | 125 | Use `-t` to specify how many threads to use in translation: 126 | 127 | ```bash 128 | pdf2zh example.pdf -t 1 129 | ``` 130 | 131 | [⬆️ Back to top](#toc) 132 | 133 | --- 134 | 135 |

Custom prompt

136 | 137 | Note: System prompt is currently not supported. See [this change](https://github.com/Byaidu/PDFMathTranslate/pull/637). 138 | 139 | Use `--prompt` to specify which prompt to use in llm: 140 | 141 | ```bash 142 | pdf2zh example.pdf --prompt prompt.txt 143 | ``` 144 | 145 | For example: 146 | 147 | ```txt 148 | You are a professional, authentic machine translation engine. Only Output the translated text, do not include any other text. 149 | 150 | Translate the following markdown source text to ${lang_out}. Keep the formula notation {v*} unchanged. Output translation directly without any additional text. 151 | 152 | Source Text: ${text} 153 | 154 | Translated Text: 155 | ``` 156 | 157 | In custom prompt file, there are three variables can be used. 158 | 159 | |**variables**|**comment**| 160 | |-|-| 161 | |`lang_in`|input language| 162 | |`lang_out`|output language| 163 | |`text`|text need to be translated| 164 | 165 | [⬆️ Back to top](#toc) 166 | 167 | --- 168 | 169 |

Authorization

170 | 171 | Use `--authorized` to specify which user to use Web UI and custom the login page: 172 | 173 | ```bash 174 | pdf2zh example.pdf --authorized users.txt auth.html 175 | ``` 176 | 177 | example users.txt 178 | Each line contains two elements, username, and password, separated by a comma. 179 | 180 | ``` 181 | admin,123456 182 | user1,password1 183 | user2,abc123 184 | guest,guest123 185 | test,test123 186 | ``` 187 | 188 | example auth.html 189 | 190 | ```html 191 | 192 | 193 | 194 | Simple HTML 195 | 196 | 197 |

Hello, World!

198 |

Welcome to my simple HTML page.

199 | 200 | 201 | ``` 202 | 203 | [⬆️ Back to top](#toc) 204 | 205 | --- 206 | 207 |

Custom configuration file

208 | 209 | Use `--config` to specify which file to configure the PDFMathTranslate: 210 | 211 | ```bash 212 | pdf2zh example.pdf --config config.json 213 | ``` 214 | 215 | ```bash 216 | pdf2zh -i --config config.json 217 | ``` 218 | 219 | example config.json 220 | 221 | ```json 222 | { 223 | "USE_MODELSCOPE": "0", 224 | "PDF2ZH_LANG_FROM": "English", 225 | "PDF2ZH_LANG_TO": "Simplified Chinese", 226 | "NOTO_FONT_PATH": "/app/SourceHanSerifCN-Regular.ttf", 227 | "translators": [ 228 | { 229 | "name": "deeplx", 230 | "envs": { 231 | "DEEPLX_ENDPOINT": "http://localhost:1188/translate/", 232 | "DEEPLX_ACCESS_TOKEN": null 233 | } 234 | }, 235 | { 236 | "name": "ollama", 237 | "envs": { 238 | "OLLAMA_HOST": "http://127.0.0.1:11434", 239 | "OLLAMA_MODEL": "gemma2" 240 | } 241 | } 242 | ] 243 | } 244 | ``` 245 | 246 | By default, the config file is saved in the `~/.config/PDFMathTranslate/config.json`. The program will start by reading the contents of config.json, and after that it will read the contents of the environment variables. When an environment variable is available, the contents of the environment variable are used first and the file is updated. 247 | 248 | [⬆️ Back to top](#toc) 249 | 250 | --- 251 | 252 |

Fonts subsetting

253 | 254 | By default, PDFMathTranslate uses fonts subsetting to decrease sizes of output files. You can use `--skip-subset-fonts` option to disable fonts subsetting when encoutering compatibility issues. 255 | 256 | ```bash 257 | pdf2zh example.pdf --skip-subset-fonts 258 | ``` 259 | 260 | [⬆️ Back to top](#toc) 261 | 262 | --- 263 | 264 |

Translation cache

265 | 266 | PDFMathTranslate caches translated texts to increase speed and avoid unnecessary API calls for same contents. You can use `--ignore-cache` option to ignore translation cache and force retranslation. 267 | 268 | ```bash 269 | pdf2zh example.pdf --ignore-cache 270 | ``` 271 | 272 | [⬆️ Back to top](#toc) 273 | 274 | --- 275 | 276 |

Deployment as a public services

277 | 278 | PDFMathTranslate has added the features of **enabling partial services** and **hiding Backend information** in 279 | the configuration file. You can enable these by setting `ENABLED_SERVICES` and `HIDDEN_GRADIO_DETAILS` in the 280 | configuration file. Among them: 281 | 282 | - `ENABLED_SERVICES` allows you to choose to enable only certain options, limiting the number of available services. 283 | - `HIDDEN_GRADIO_DETAILS` will hide the real API_KEY on the web, preventing users from obtaining server-side keys. 284 | 285 | A usable configuration is as follows: 286 | 287 | ```json 288 | { 289 | "USE_MODELSCOPE": "0", 290 | "translators": [ 291 | { 292 | "name": "grok", 293 | "envs": { 294 | "GORK_API_KEY": null, 295 | "GORK_MODEL": "grok-2-1212" 296 | } 297 | }, 298 | { 299 | "name": "openai", 300 | "envs": { 301 | "OPENAI_BASE_URL": "https://api.openai.com/v1", 302 | "OPENAI_API_KEY": "sk-xxxx", 303 | "OPENAI_MODEL": "gpt-4o-mini" 304 | } 305 | } 306 | ], 307 | "ENABLED_SERVICES": [ 308 | "OpenAI", 309 | "Grok" 310 | ], 311 | "HIDDEN_GRADIO_DETAILS": true, 312 | "PDF2ZH_LANG_FROM": "English", 313 | "PDF2ZH_LANG_TO": "Simplified Chinese", 314 | "NOTO_FONT_PATH": "/app/SourceHanSerifCN-Regular.ttf" 315 | } 316 | ``` 317 | 318 | [⬆️ Back to top](#toc) 319 | 320 | 321 | --- 322 | 323 |

MCP

324 | 325 | PDFMathTranslate can run as MCP server. To use this, you need to run `uv pip install pdf2zh`, and config `claude_desktop_config.json`, an example config is as follows: 326 | 327 | ``` json 328 | { 329 | "mcpServers": { 330 | "filesystem": { 331 | "command": "npx", 332 | "args": [ 333 | "-y", 334 | "@modelcontextprotocol/server-filesystem", 335 | "/path/to/Document" 336 | ] 337 | }, 338 | "translate_pdf": { 339 | "command": "uv", 340 | "args": [ 341 | "run", 342 | "pdf2zh", 343 | "--mcp" 344 | ] 345 | } 346 | } 347 | } 348 | ``` 349 | 350 | [filesystem](https://github.com/modelcontextprotocol/servers/tree/main/src/filesystem) is a reuqired mcp server to find pdf file, and `translate_pdf` is our mcp server. 351 | 352 | To test if the mcp server works, you can open claude desktop and tell 353 | 354 | ``` 355 | find the `test.pdf` in my Document folder and translate it to Chinese 356 | ``` 357 | -------------------------------------------------------------------------------- /docs/APIS.md: -------------------------------------------------------------------------------- 1 | [**Documentation**](https://github.com/Byaidu/PDFMathTranslate) > **API Details** _(current)_ 2 | 3 |

Table of Content

4 | The present project supports two types of APIs, All methods need the Redis; 5 | 6 | - [Functional calls in Python](#api-python) 7 | - [HTTP protocols](#api-http) 8 | 9 | --- 10 | 11 |

Python

12 | 13 | As `pdf2zh` is an installed module in Python, we expose two methods for other programs to call in any Python scripts. 14 | 15 | For example, if you want translate a document from English to Chinese using Google Translate, you may use the following code: 16 | 17 | ```python 18 | from pdf2zh import translate, translate_stream 19 | 20 | params = { 21 | 'lang_in': 'en', 22 | 'lang_out': 'zh', 23 | 'service': 'google', 24 | 'thread': 4, 25 | } 26 | ``` 27 | Translate with files: 28 | ```python 29 | (file_mono, file_dual) = translate(files=['example.pdf'], **params)[0] 30 | ``` 31 | Translate with stream: 32 | ```python 33 | with open('example.pdf', 'rb') as f: 34 | (stream_mono, stream_dual) = translate_stream(stream=f.read(), **params) 35 | ``` 36 | 37 | [⬆️ Back to top](#toc) 38 | 39 | --- 40 | 41 |

HTTP

42 | 43 | In a more flexible way, you can communicate with the program using HTTP protocols, if: 44 | 45 | 1. Install and run backend 46 | 47 | ```bash 48 | pip install pdf2zh[backend] 49 | pdf2zh --flask 50 | pdf2zh --celery worker 51 | ``` 52 | 53 | 2. Using HTTP protocols as follows: 54 | 55 | - Submit translate task 56 | 57 | ```bash 58 | curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}" 59 | {"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"} 60 | ``` 61 | 62 | - Check Progress 63 | 64 | ```bash 65 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a 66 | {"info":{"n":13,"total":506},"state":"PROGRESS"} 67 | ``` 68 | 69 | - Check Progress _(if finished)_ 70 | 71 | ```bash 72 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a 73 | {"state":"SUCCESS"} 74 | ``` 75 | 76 | - Save monolingual file 77 | 78 | ```bash 79 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/mono --output example-mono.pdf 80 | ``` 81 | 82 | - Save bilingual file 83 | 84 | ```bash 85 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/dual --output example-dual.pdf 86 | ``` 87 | 88 | - Interrupt if running and delete the task 89 | ```bash 90 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a -X DELETE 91 | ``` 92 | 93 | [⬆️ Back to top](#toc) 94 | 95 | --- 96 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | aw@funstory.ai . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /docs/README_GUI.md: -------------------------------------------------------------------------------- 1 | # Interact with GUI 2 | 3 | This subfolder provides the GUI mode of `pdf2zh`. 4 | 5 | ## Usage 6 | 7 | 1. Run `pdf2zh -i` 8 | 9 | 2. Drop the PDF file into the window and click `Translate`. 10 | 11 | ### Environment Variables 12 | 13 | You can set the source and target languages using environment variables: 14 | 15 | - `PDF2ZH_LANG_FROM`: Sets the source language. Defaults to "English". 16 | - `PDF2ZH_LANG_TO`: Sets the target language. Defaults to "Simplified Chinese". 17 | 18 | ### Supported Languages 19 | 20 | The following languages are supported: 21 | 22 | - English 23 | - Simplified Chinese 24 | - Traditional Chinese 25 | - French 26 | - German 27 | - Japanese 28 | - Korean 29 | - Russian 30 | - Spanish 31 | - Italian 32 | 33 | ## Preview 34 | 35 | 36 | 37 | 38 | ## Maintainance 39 | 40 | GUI maintained by [Rongxin](https://github.com/reycn) 41 | -------------------------------------------------------------------------------- /docs/README_ja-JP.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | [English](../README.md) | [简体中文](README_zh-CN.md) | [繁體中文](README_zh-TW.md) | 日本語 4 | 5 | PDF2ZH 6 | 7 |

PDFMathTranslate

8 | 9 |

10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |

31 | 32 | Byaidu%2FPDFMathTranslate | Trendshift 33 | 34 |
35 | 36 | 科学 PDF 文書の翻訳およびバイリンガル比較ツール 37 | 38 | - 📊 数式、チャート、目次、注釈を保持 *([プレビュー](#preview))* 39 | - 🌐 [複数の言語](#language) と [多様な翻訳サービス](#services) をサポート 40 | - 🤖 [コマンドラインツール](#usage)、[インタラクティブユーザーインターフェース](#gui)、および [Docker](#docker) を提供 41 | 42 | フィードバックは [GitHub Issues](https://github.com/Byaidu/PDFMathTranslate/issues)、[Telegram グループ](https://t.me/+Z9_SgnxmsmA5NzBl) 43 | 44 |

最近の更新

45 | 46 | - [2024年11月26日] CLIがオンラインファイルをサポートするようになりました *(by [@reycn](https://github.com/reycn))* 47 | - [2024年11月24日] 依存関係のサイズを削減するために [ONNX](https://github.com/onnx/onnx) サポートを追加しました *(by [@Wybxc](https://github.com/Wybxc))* 48 | - [2024年11月23日] 🌟 [公共サービス](#demo) がオンラインになりました! *(by [@Byaidu](https://github.com/Byaidu))* 49 | - [2024年11月23日] ウェブボットを防ぐためのファイアウォールを追加しました *(by [@Byaidu](https://github.com/Byaidu))* 50 | - [2024年11月22日] GUIがイタリア語をサポートし、改善されました *(by [@Byaidu](https://github.com/Byaidu), [@reycn](https://github.com/reycn))* 51 | - [2024年11月22日] デプロイされたサービスを他の人と共有できるようになりました *(by [@Zxis233](https://github.com/Zxis233))* 52 | - [2024年11月22日] Tencent翻訳をサポートしました *(by [@hellofinch](https://github.com/hellofinch))* 53 | - [2024年11月21日] GUIがバイリンガルドキュメントのダウンロードをサポートするようになりました *(by [@reycn](https://github.com/reycn))* 54 | - [2024年11月20日] 🌟 [デモ](#demo) がオンラインになりました! *(by [@reycn](https://github.com/reycn))* 55 | 56 |

プレビュー

57 | 58 |
59 | 60 |
61 | 62 |

公共サービス 🌟

63 | 64 | ### 無料サービス () 65 | 66 | インストールなしで [公共サービス](https://pdf2zh.com/) をオンラインで試すことができます。 67 | 68 | ### デモ 69 | 70 | インストールなしで [HuggingFace上のデモ](https://huggingface.co/spaces/reycn/PDFMathTranslate-Docker), [ModelScope上のデモ](https://www.modelscope.cn/studios/AI-ModelScope/PDFMathTranslate) を試すことができます。 71 | デモの計算リソースは限られているため、乱用しないようにしてください。 72 | 73 |

インストールと使用方法

74 | 75 | このプロジェクトを使用するための4つの方法を提供しています:[コマンドライン](#cmd)、[ポータブル](#portable)、[GUI](#gui)、および [Docker](#docker)。 76 | 77 | pdf2zhの実行には追加モデル(`wybxc/DocLayout-YOLO-DocStructBench-onnx`)が必要です。このモデルはModelScopeでも見つけることができます。起動時にこのモデルのダウンロードに問題がある場合は、以下の環境変数を使用してください: 78 | 79 | ```shell 80 | set HF_ENDPOINT=https://hf-mirror.com 81 | ``` 82 | 83 | For PowerShell user: 84 | ```shell 85 | $env:HF_ENDPOINT = https://hf-mirror.com 86 | ``` 87 | 88 |

方法1. コマンドライン

89 | 90 | 1. Pythonがインストールされていること (バージョン3.10 <= バージョン <= 3.12) 91 | 2. パッケージをインストールします: 92 | 93 | ```bash 94 | pip install pdf2zh 95 | ``` 96 | 97 | 3. 翻訳を実行し、[現在の作業ディレクトリ](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444) にファイルを生成します: 98 | 99 | ```bash 100 | pdf2zh document.pdf 101 | ``` 102 | 103 |

方法2. ポータブル

104 | 105 | Python環境を事前にインストールする必要はありません 106 | 107 | [setup.bat](https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/refs/heads/main/script/setup.bat) をダウンロードしてダブルクリックして実行します 108 | 109 |

方法3. GUI

110 | 111 | 1. Pythonがインストールされていること (バージョン3.10 <= バージョン <= 3.12) 112 | 2. パッケージをインストールします: 113 | 114 | ```bash 115 | pip install pdf2zh 116 | ``` 117 | 118 | 3. ブラウザで使用を開始します: 119 | 120 | ```bash 121 | pdf2zh -i 122 | ``` 123 | 124 | 4. ブラウザが自動的に起動しない場合は、次のURLを開きます: 125 | 126 | ```bash 127 | http://localhost:7860/ 128 | ``` 129 | 130 | 131 | 132 | 詳細については、[GUIのドキュメント](./README_GUI.md) を参照してください。 133 | 134 |

方法4. Docker

135 | 136 | 1. プルして実行します: 137 | 138 | ```bash 139 | docker pull byaidu/pdf2zh 140 | docker run -d -p 7860:7860 byaidu/pdf2zh 141 | ``` 142 | 143 | 2. ブラウザで開きます: 144 | 145 | ``` 146 | http://localhost:7860/ 147 | ``` 148 | 149 | クラウドサービスでのDockerデプロイメント用: 150 | 151 |
152 | 153 | Deploy 154 | 155 | Deploy to Koyeb 156 | 157 | Deploy on Zeabur 158 | 159 | Deploy to Koyeb 160 |
161 | 162 |

高度なオプション

163 | 164 | コマンドラインで翻訳コマンドを実行し、現在の作業ディレクトリに翻訳されたドキュメント `example-mono.pdf` とバイリンガルドキュメント `example-dual.pdf` を生成します。デフォルトではGoogle翻訳サービスを使用します。More support translation services can find [HERE](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#services). 165 | 166 | 167 | cmd 168 | 169 | 以下の表に、参考のためにすべての高度なオプションをリストしました: 170 | 171 | | オプション | 機能 | 例 | 172 | | -------- | ------- |------- | 173 | | files | ローカルファイル | `pdf2zh ~/local.pdf` | 174 | | links | オンラインファイル | `pdf2zh http://arxiv.org/paper.pdf` | 175 | | `-i` | [GUIに入る](#gui) | `pdf2zh -i` | 176 | | `-p` | [部分的なドキュメント翻訳](#partial) | `pdf2zh example.pdf -p 1` | 177 | | `-li` | [ソース言語](#languages) | `pdf2zh example.pdf -li en` | 178 | | `-lo` | [ターゲット言語](#languages) | `pdf2zh example.pdf -lo zh` | 179 | | `-s` | [翻訳サービス](#services) | `pdf2zh example.pdf -s deepl` | 180 | | `-t` | [マルチスレッド](#threads) | `pdf2zh example.pdf -t 1` | 181 | | `-o` | 出力ディレクトリ | `pdf2zh example.pdf -o output` | 182 | | `-f`, `-c` | [例外](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` | 183 | | `--share` | [gradio公開リンクを取得] | `pdf2zh -i --share` | 184 | | `--authorized` | [[ウェブ認証とカスタム認証ページの追加](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.)] | `pdf2zh -i --authorized users.txt [auth.html]` | 185 | | `--prompt` | [カスタムビッグモデルのプロンプトを使用する] | `pdf2zh --prompt [prompt.txt]` | 186 | | `--onnx` | [カスタムDocLayout-YOLO ONNXモデルの使用] | `pdf2zh --onnx [onnx/model/path]` | 187 | | `--serverport` | [カスタムWebUIポートを使用する] | `pdf2zh --serverport 7860` | 188 | | `--dir` | [batch translate] | `pdf2zh --dir /path/to/translate/` | 189 | | `--config` | [configuration file](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#cofig) | `pdf2zh --config /path/to/config/config.json` | 190 | | `--serverport` | [custom gradio server port] | `pdf2zh --serverport 7860` | 191 | 192 |

全文または部分的なドキュメント翻訳

193 | 194 | - **全文翻訳** 195 | 196 | ```bash 197 | pdf2zh example.pdf 198 | ``` 199 | 200 | - **部分翻訳** 201 | 202 | ```bash 203 | pdf2zh example.pdf -p 1-3,5 204 | ``` 205 | 206 |

ソース言語とターゲット言語を指定

207 | 208 | [Google Languages Codes](https://developers.google.com/admin-sdk/directory/v1/languages)、[DeepL Languages Codes](https://developers.deepl.com/docs/resources/supported-languages) を参照してください 209 | 210 | ```bash 211 | pdf2zh example.pdf -li en -lo ja 212 | ``` 213 | 214 |

異なるサービスで翻訳

215 | 216 | 以下の表は、各翻訳サービスに必要な [環境変数](https://chatgpt.com/share/6734a83d-9d48-800e-8a46-f57ca6e8bcb4) を示しています。各サービスを使用する前に、これらの変数を設定してください。 217 | 218 | |**Translator**|**Service**|**Environment Variables**|**Default Values**|**Notes**| 219 | |-|-|-|-|-| 220 | |**Google (Default)**|`google`|None|N/A|None| 221 | |**Bing**|`bing`|None|N/A|None| 222 | |**DeepL**|`deepl`|`DEEPL_AUTH_KEY`|`[Your Key]`|See [DeepL](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API)| 223 | |**DeepLX**|`deeplx`|`DEEPLX_ENDPOINT`|`https://api.deepl.com/translate`|See [DeepLX](https://github.com/OwO-Network/DeepLX)| 224 | |**Ollama**|`ollama`|`OLLAMA_HOST`, `OLLAMA_MODEL`|`http://127.0.0.1:11434`, `gemma2`|See [Ollama](https://github.com/ollama/ollama)| 225 | |**OpenAI**|`openai`|`OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL`|`https://api.openai.com/v1`, `[Your Key]`, `gpt-4o-mini`|See [OpenAI](https://platform.openai.com/docs/overview)| 226 | |**AzureOpenAI**|`azure-openai`|`AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_MODEL`|`[Your Endpoint]`, `[Your Key]`, `gpt-4o-mini`|See [Azure OpenAI](https://learn.microsoft.com/zh-cn/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line%2Cjavascript-keyless%2Ctypescript-keyless%2Cpython&pivots=programming-language-python)| 227 | |**Zhipu**|`zhipu`|`ZHIPU_API_KEY`, `ZHIPU_MODEL`|`[Your Key]`, `glm-4-flash`|See [Zhipu](https://open.bigmodel.cn/dev/api/thirdparty-frame/openai-sdk)| 228 | | **ModelScope** | `modelscope` |`MODELSCOPE_API_KEY`, `MODELSCOPE_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-Coder-32B-Instruct`| See [ModelScope](https://www.modelscope.cn/docs/model-service/API-Inference/intro)| 229 | |**Silicon**|`silicon`|`SILICON_API_KEY`, `SILICON_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-7B-Instruct`|See [SiliconCloud](https://docs.siliconflow.cn/quickstart)| 230 | |**Gemini**|`gemini`|`GEMINI_API_KEY`, `GEMINI_MODEL`|`[Your Key]`, `gemini-1.5-flash`|See [Gemini](https://ai.google.dev/gemini-api/docs/openai)| 231 | |**Azure**|`azure`|`AZURE_ENDPOINT`, `AZURE_API_KEY`|`https://api.translator.azure.cn`, `[Your Key]`|See [Azure](https://docs.azure.cn/en-us/ai-services/translator/text-translation-overview)| 232 | |**Tencent**|`tencent`|`TENCENTCLOUD_SECRET_ID`, `TENCENTCLOUD_SECRET_KEY`|`[Your ID]`, `[Your Key]`|See [Tencent](https://www.tencentcloud.com/products/tmt?from_qcintl=122110104)| 233 | |**Dify**|`dify`|`DIFY_API_URL`, `DIFY_API_KEY`|`[Your DIFY URL]`, `[Your Key]`|See [Dify](https://github.com/langgenius/dify),Three variables, lang_out, lang_in, and text, need to be defined in Dify's workflow input.| 234 | |**AnythingLLM**|`anythingllm`|`AnythingLLM_URL`, `AnythingLLM_APIKEY`|`[Your AnythingLLM URL]`, `[Your Key]`|See [anything-llm](https://github.com/Mintplex-Labs/anything-llm)| 235 | |**Argos Translate**|`argos`| | |See [argos-translate](https://github.com/argosopentech/argos-translate)| 236 | |**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |See [Grok](https://docs.x.ai/docs/overview)| 237 | |**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |See [DeepSeek](https://www.deepseek.com/)| 238 | |**OpenAI-Liked**|`openailiked`| `OPENAILIKED_BASE_URL`, `OPENAILIKED_API_KEY`, `OPENAILIKED_MODEL` | `url`, `[Your Key]`, `model name` | None | 239 | 240 | (need Japenese translation) 241 | For large language models that are compatible with the OpenAI API but not listed in the table above, you can set environment variables using the same method outlined for OpenAI in the table. 242 | 243 | `-s service` または `-s service:model` を使用してサービスを指定します: 244 | 245 | ```bash 246 | pdf2zh example.pdf -s openai:gpt-4o-mini 247 | ``` 248 | 249 | または環境変数でモデルを指定します: 250 | 251 | ```bash 252 | set OPENAI_MODEL=gpt-4o-mini 253 | pdf2zh example.pdf -s openai 254 | ``` 255 | 256 | For PowerShell user: 257 | ```shell 258 | $env:OPENAI_MODEL = gpt-4o-mini 259 | pdf2zh example.pdf -s openai 260 | ``` 261 | 262 |

例外を指定して翻訳

263 | 264 | 正規表現を使用して保持する必要がある数式フォントと文字を指定します: 265 | 266 | ```bash 267 | pdf2zh example.pdf -f "(CM[^RT].*|MS.*|.*Ital)" -c "(\(|\||\)|\+|=|\d|[\u0080-\ufaff])" 268 | ``` 269 | 270 | デフォルトで `Latex`、`Mono`、`Code`、`Italic`、`Symbol` および `Math` フォントを保持します: 271 | 272 | ```bash 273 | pdf2zh example.pdf -f "(CM[^R]|MS.M|XY|MT|BL|RM|EU|LA|RS|LINE|LCIRCLE|TeX-|rsfs|txsy|wasy|stmary|.*Mono|.*Code|.*Ital|.*Sym|.*Math)" 274 | ``` 275 | 276 |

スレッド数を指定

277 | 278 | `-t` を使用して翻訳に使用するスレッド数を指定します: 279 | 280 | ```bash 281 | pdf2zh example.pdf -t 1 282 | ``` 283 | 284 |

カスタム プロンプト

285 | 286 | `--prompt`を使用して、LLMで使用するプロンプトを指定します: 287 | 288 | ```bash 289 | pdf2zh example.pdf -pr prompt.txt 290 | ``` 291 | 292 | 293 | `prompt.txt`の例: 294 | 295 | ```txt 296 | [ 297 | { 298 | "role": "system", 299 | "content": "You are a professional,authentic machine translation engine.", 300 | }, 301 | { 302 | "role": "user", 303 | "content": "Translate the following markdown source text to ${lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: ${text}\nTranslated Text:", 304 | }, 305 | ] 306 | ``` 307 | 308 | 309 | カスタムプロンプトファイルでは、以下の3つの変数が使用できます。 310 | 311 | |**変数**|**内容**| 312 | |-|-| 313 | |`lang_in`|ソース言語| 314 | |`lang_out`|ターゲット言語| 315 | |`text`|翻訳するテキスト| 316 | 317 |

API

318 | 319 | ### Python 320 | 321 | ```python 322 | from pdf2zh import translate, translate_stream 323 | 324 | params = {"lang_in": "en", "lang_out": "zh", "service": "google", "thread": 4} 325 | file_mono, file_dual = translate(files=["example.pdf"], **params)[0] 326 | with open("example.pdf", "rb") as f: 327 | stream_mono, stream_dual = translate_stream(stream=f.read(), **params) 328 | ``` 329 | 330 | ### HTTP 331 | 332 | ```bash 333 | pip install pdf2zh[backend] 334 | pdf2zh --flask 335 | pdf2zh --celery worker 336 | ``` 337 | 338 | ```bash 339 | curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}" 340 | {"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"} 341 | 342 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a 343 | {"info":{"n":13,"total":506},"state":"PROGRESS"} 344 | 345 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a 346 | {"state":"SUCCESS"} 347 | 348 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/mono --output example-mono.pdf 349 | 350 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/dual --output example-dual.pdf 351 | 352 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a -X DELETE 353 | ``` 354 | 355 |

謝辞

356 | 357 | - ドキュメントのマージ:[PyMuPDF](https://github.com/pymupdf/PyMuPDF) 358 | 359 | - ドキュメントの解析:[Pdfminer.six](https://github.com/pdfminer/pdfminer.six) 360 | 361 | - ドキュメントの抽出:[MinerU](https://github.com/opendatalab/MinerU) 362 | 363 | - ドキュメントプレビュー:[Gradio PDF](https://github.com/freddyaboulton/gradio-pdf) 364 | 365 | - マルチスレッド翻訳:[MathTranslate](https://github.com/SUSYUSTC/MathTranslate) 366 | 367 | - レイアウト解析:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) 368 | 369 | - ドキュメント標準:[PDF Explained](https://zxyle.github.io/PDF-Explained/)、[PDF Cheat Sheets](https://pdfa.org/resource/pdf-cheat-sheets/) 370 | 371 | - 多言語フォント:[Go Noto Universal](https://github.com/satbyy/go-noto-universal) 372 | 373 |

貢献者

374 | 375 | 376 | 377 | 378 | 379 | ![Alt](https://repobeats.axiom.co/api/embed/dfa7583da5332a11468d686fbd29b92320a6a869.svg "Repobeats analytics image") 380 | 381 |

スター履歴

382 | 383 | 384 | 385 | 386 | 387 | Star History Chart 388 | 389 | 390 | -------------------------------------------------------------------------------- /docs/README_zh-CN.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | [English](../README.md) | 简体中文 | [繁體中文](README_zh-TW.md) | [日本語](README_ja-JP.md) 4 | 5 | PDF2ZH 6 | 7 |

PDFMathTranslate

8 | 9 |

10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |

31 | 32 | Byaidu%2FPDFMathTranslate | Trendshift 33 | 34 |
35 | 36 | 科学 PDF 文档翻译及双语对照工具 37 | 38 | - 📊 保留公式、图表、目录和注释 *([预览效果](#preview))* 39 | - 🌐 支持 [多种语言](./ADVANCED.md#language) 和 [诸多翻译服务](./ADVANCED.md#services) 40 | - 🤖 提供 [命令行工具](#usage),[图形交互界面](#gui),以及 [容器化部署](#docker) 41 | 42 | 欢迎在 [GitHub Issues](https://github.com/Byaidu/PDFMathTranslate/issues) 或 [Telegram 用户群](https://t.me/+Z9_SgnxmsmA5NzBl) 43 | 44 | 有关如何贡献的详细信息,请查阅 [贡献指南](https://github.com/Byaidu/PDFMathTranslate/wiki/Contribution-Guide---%E8%B4%A1%E7%8C%AE%E6%8C%87%E5%8D%97) 45 | 46 |

更新

47 | 48 | - [2025 年 2 月 22 日] 更好的发布 CI 和精心打包的 windows-amd64 exe (由 [@awwaawwa](https://github.com/awwaawwa) 提供) 49 | - [2024 年 12 月 24 日] 翻译器现在支持在 [Xinference](https://github.com/xorbitsai/inference) 上使用本地模型 _(由 [@imClumsyPanda](https://github.com/imClumsyPanda) 提供)_ 50 | - [2024 年 12 月 19 日] 现在支持非 PDF/A 文档,使用 `-cp` _(由 [@reycn](https://github.com/reycn) 提供)_ 51 | - [2024 年 12 月 13 日] 额外支持后端 _(由 [@YadominJinta](https://github.com/YadominJinta) 提供)_ 52 | - [2024 年 12 月 10 日] 翻译器现在支持 Azure 上的 OpenAI 模型 _(由 [@yidasanqian](https://github.com/yidasanqian) 提供)_ 53 | 54 |

预览

55 |
56 | 57 |
58 | 59 |

在线演示 🌟

60 | 61 |

在线服务 🌟

62 | 63 | 您可以通过以下演示尝试我们的应用程序: 64 | 65 | - [公共免费服务](https://pdf2zh.com/) 在线使用,无需安装 _(推荐)_。 66 | - [沉浸式翻译 - BabelDOC](https://app.immersivetranslate.com/babel-doc/) 每月免费 1000 页 _(推荐)_ 67 | - [在 HuggingFace 上托管的演示](https://huggingface.co/spaces/reycn/PDFMathTranslate-Docker) 68 | - [在 ModelScope 上托管的演示](https://www.modelscope.cn/studios/AI-ModelScope/PDFMathTranslate) 无需安装。 69 | 70 | 请注意演示的计算资源有限,请避免滥用它们。 71 |

安装和使用

72 | 73 | ### 方法 74 | 75 | 针对不同的使用案例,我们提供不同的方法来使用我们的程序: 76 | 77 |
78 | 1. UV 安装 79 | 80 | 1. 安装 Python (3.10 <= 版本 <= 3.12) 81 | 2. 安装我们的包: 82 | 83 | ```bash 84 | pip install uv 85 | uv tool install --python 3.12 pdf2zh 86 | ``` 87 | 88 | 3. 执行翻译,文件生成在 [当前工作目录](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444): 89 | 90 | ```bash 91 | pdf2zh document.pdf 92 | ``` 93 | 94 |
95 | 96 |
97 | 2. Windows exe 98 | 99 | 1. 从 [发布页面](https://github.com/Byaidu/PDFMathTranslate/releases) 下载 pdf2zh-version-win64.zip 100 | 101 | 2. 解压缩并双击 `pdf2zh.exe` 运行。 102 | 103 |
104 | 105 |
106 | 3. 图形用户界面 107 | 1. 安装 Python (3.10 <= 版本 <= 3.12) 108 | 2. 安装我们的包: 109 | 110 | ```bash 111 | pip install pdf2zh 112 | ``` 113 | 114 | 3. 在浏览器中开始使用: 115 | 116 | ```bash 117 | pdf2zh -i 118 | ``` 119 | 120 | 4. 如果您的浏览器没有自动启动,请访问 121 | 122 | ```bash 123 | http://localhost:7860/ 124 | ``` 125 | 126 | 127 | 128 | 有关更多详细信息,请参阅 [GUI 文档](./README_GUI.md)。 129 | 130 |
131 | 132 |
133 | 4. Docker 134 | 135 | 1. 拉取并运行: 136 | 137 | ```bash 138 | docker pull byaidu/pdf2zh 139 | docker run -d -p 7860:7860 byaidu/pdf2zh 140 | ``` 141 | 142 | 2. 在浏览器中打开: 143 | 144 | ``` 145 | http://localhost:7860/ 146 | ``` 147 | 148 | 对于云服务上的 docker 部署: 149 | 150 |
151 | 152 | 部署 153 | 154 | 部署到 Koyeb 155 | 156 | 在 Zeabur 上部署 157 | 158 | 部署到 Koyeb 159 |
160 | 161 |
162 | 163 |
164 | 5. Zotero 插件 165 | 166 | 有关更多细节,请参见 [Zotero PDF2zh](https://github.com/guaguastandup/zotero-pdf2zh)。 167 | 168 |
169 | 170 |
171 | 6. 命令行 172 | 173 | 1. 已安装 Python(3.10 <= 版本 <= 3.12) 174 | 2. 安装我们的包: 175 | 176 | ```bash 177 | pip install pdf2zh 178 | ``` 179 | 180 | 3. 执行翻译,文件生成在 [当前工作目录](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444): 181 | 182 | ```bash 183 | pdf2zh document.pdf 184 | ``` 185 | 186 |
187 | 188 | > [!TIP] 189 | > 190 | > - 如果你使用 Windows 并在下载后无法打开文件,请安装 [vc_redist.x64.exe](https://aka.ms/vs/17/release/vc_redist.x64.exe) 并重试。 191 | > 192 | > - 如果你无法访问 Docker Hub,请尝试在 [GitHub 容器注册中心](https://github.com/Byaidu/PDFMathTranslate/pkgs/container/pdfmathtranslate) 上使用该镜像。 193 | > ```bash 194 | > docker pull ghcr.io/byaidu/pdfmathtranslate 195 | > docker run -d -p 7860:7860 ghcr.io/byaidu/pdfmathtranslate 196 | > ``` 197 | 198 | ### 无法安装? 199 | 200 | 当前程序在工作前需要一个 AI 模型 (`wybxc/DocLayout-YOLO-DocStructBench-onnx`),一些用户由于网络问题无法下载。如果你在下载此模型时遇到问题,我们提供以下环境变量的解决方法: 201 | 202 | ```shell 203 | set HF_ENDPOINT=https://hf-mirror.com 204 | ``` 205 | 206 | 对于 PowerShell 用户: 207 | 208 | ```shell 209 | $env:HF_ENDPOINT = https://hf-mirror.com 210 | ``` 211 | 212 | 如果此解决方案对您无效或您遇到其他问题,请参阅 [常见问题解答](https://github.com/Byaidu/PDFMathTranslate/wiki#-faq--%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)。 213 | 214 | 215 |

高级选项

216 | 217 | 在命令行中执行翻译命令,在当前工作目录下生成译文文档 `example-mono.pdf` 和双语对照文档 `example-dual.pdf`,默认使用 Google 翻译服务,更多支持的服务在[这里](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#services))。 218 | 219 | cmd 220 | 221 | 在下表中,我们列出了所有高级选项供参考: 222 | 223 | | 选项 | 功能 | 示例 | 224 | | ------------ | ------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- | 225 | | files | 本地文件 | `pdf2zh ~/local.pdf` | 226 | | links | 在线文件 | `pdf2zh http://arxiv.org/paper.pdf` | 227 | | `-i` | [进入 GUI](#gui) | `pdf2zh -i` | 228 | | `-p` | [部分文档翻译](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#partial) | `pdf2zh example.pdf -p 1` | 229 | | `-li` | [源语言](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#languages) | `pdf2zh example.pdf -li en` | 230 | | `-lo` | [目标语言](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#languages) | `pdf2zh example.pdf -lo zh` | 231 | | `-s` | [翻译服务](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#services) | `pdf2zh example.pdf -s deepl` | 232 | | `-t` | [多线程](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#threads) | `pdf2zh example.pdf -t 1` | 233 | | `-o` | 输出目录 | `pdf2zh example.pdf -o output` | 234 | | `-f`, `-c` | [异常](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` | 235 | | `-cp` | 兼容模式 | `pdf2zh example.pdf --compatible` | 236 | | `--share` | 公开链接 | `pdf2zh -i --share` | 237 | | `--authorized` | [授权](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#auth) | `pdf2zh -i --authorized users.txt [auth.html]` | 238 | | `--prompt` | [自定义提示](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#prompt) | `pdf2zh --prompt [prompt.txt]` | 239 | | `--onnx` | [使用自定义 DocLayout-YOLO ONNX 模型] | `pdf2zh --onnx [onnx/model/path]` | 240 | | `--serverport` | [使用自定义 WebUI 端口] | `pdf2zh --serverport 7860` | 241 | | `--dir` | [批量翻译] | `pdf2zh --dir /path/to/translate/` | 242 | | `--config` | [配置文件](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.md#cofig) | `pdf2zh --config /path/to/config/config.json` | 243 | | `--serverport` | [自定义 gradio 服务器端口] | `pdf2zh --serverport 7860` | 244 | | `--babeldoc`| 使用实验性后端 [BabelDOC](https://funstory-ai.github.io/BabelDOC/) 翻译 |`pdf2zh --babeldoc` -s openai example.pdf| 245 | 246 | 有关详细说明,请参阅我们的文档 [高级用法](./ADVANCED.md),以获取每个选项的完整列表。 247 | 248 |

二次开发 (API)

249 | 250 | 当前的 pdf2zh API 暂时已弃用。API 将在 [pdf2zh 2.0](https://github.com/Byaidu/PDFMathTranslate/issues/586)发布后重新提供。对于需要程序化访问的用户,请使用[BabelDOC](https://github.com/funstory-ai/BabelDOC)的 `babeldoc.high_level.async_translate` 函数。 251 | 252 | API 暂时弃用意味着:相关代码暂时不会被移除,但不会提供技术支持,也不会修复 bug。 253 | 254 | 257 | 258 |

待办事项

259 | 260 | - [ ] 使用基于 DocLayNet 的模型解析布局,[PaddleX](https://github.com/PaddlePaddle/PaddleX/blob/17cc27ac3842e7880ca4aad92358d3ef8555429a/paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py#L81),[PaperMage](https://github.com/allenai/papermage/blob/9cd4bb48cbedab45d0f7a455711438f1632abebe/README.md?plain=1#L102),[SAM2](https://github.com/facebookresearch/sam2) 261 | 262 | - [ ] 修复页面旋转、目录、列表格式 263 | 264 | - [ ] 修复旧论文中的像素公式 265 | 266 | - [ ] 异步重试,除了 KeyboardInterrupt 267 | 268 | - [ ] 针对西方语言的 Knuth–Plass 算法 269 | 270 | - [ ] 支持非 PDF/A 文件 271 | 272 | - [ ] [Zotero](https://github.com/zotero/zotero) 和 [Obsidian](https://github.com/obsidianmd/obsidian-releases) 的插件 273 | 274 |

致谢

275 | 276 | - [Immersive Translation](https://immersivetranslate.com) 为此项目的活跃贡献者提供每月的专业会员兑换码,详细信息请查看:[CONTRIBUTOR_REWARD.md](https://github.com/funstory-ai/BabelDOC/blob/main/docs/CONTRIBUTOR_REWARD.md) 277 | 278 | - 文档合并:[PyMuPDF](https://github.com/pymupdf/PyMuPDF) 279 | 280 | - 文档解析:[Pdfminer.six](https://github.com/pdfminer/pdfminer.six) 281 | 282 | - 文档提取:[MinerU](https://github.com/opendatalab/MinerU) 283 | 284 | - 文档预览:[Gradio PDF](https://github.com/freddyaboulton/gradio-pdf) 285 | 286 | - 多线程翻译:[MathTranslate](https://github.com/SUSYUSTC/MathTranslate) 287 | 288 | - 布局解析:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) 289 | 290 | - 文档标准:[PDF Explained](https://zxyle.github.io/PDF-Explained/),[PDF Cheat Sheets](https://pdfa.org/resource/pdf-cheat-sheets/) 291 | 292 | - 多语言字体:[Go Noto Universal](https://github.com/satbyy/go-noto-universal) 293 | 294 |

贡献者

295 | 296 | 297 | 298 | 299 | 300 | ![Alt](https://repobeats.axiom.co/api/embed/dfa7583da5332a11468d686fbd29b92320a6a869.svg "Repobeats analytics image") 301 | 302 |

星标历史

303 | 304 | 305 | 306 | 307 | 308 | 星标历史图表 309 | 310 | 311 | -------------------------------------------------------------------------------- /docs/README_zh-TW.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | [English](../README.md) | [简体中文](README_zh-CN.md) | 繁體中文 | [日本語](README_ja-JP.md) 4 | 5 | PDF2ZH 6 | 7 |

PDFMathTranslate

8 | 9 |

10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |

31 | 32 | Byaidu%2FPDFMathTranslate | Trendshift 33 | 34 |
35 | 36 | 科學 PDF 文件翻譯及雙語對照工具 37 | 38 | - 📊 保留公式、圖表、目錄和註釋 *([預覽效果](#preview))* 39 | - 🌐 支援 [多種語言](#language) 和 [諸多翻譯服務](#services) 40 | - 🤖 提供 [命令列工具](#usage)、[圖形使用者介面](#gui),以及 [容器化部署](#docker) 41 | 42 | 歡迎在 [GitHub Issues](https://github.com/Byaidu/PDFMathTranslate/issues) 或 [Telegram 使用者群](https://t.me/+Z9_SgnxmsmA5NzBl)(https://qm.qq.com/q/DixZCxQej0) 中提出回饋 43 | 44 | 如需瞭解如何貢獻的詳細資訊,請查閱 [貢獻指南](https://github.com/Byaidu/PDFMathTranslate/wiki/Contribution-Guide---%E8%B4%A1%E7%8C%AE%E6%8C%87%E5%8D%97) 45 | 46 |

近期更新

47 | 48 | - [Dec. 24 2024] 翻譯功能支援接入由 [Xinference](https://github.com/xorbitsai/inference) 執行的本機 LLM _(by [@imClumsyPanda](https://github.com/imClumsyPanda))_ 49 | - [Nov. 26 2024] CLI 現在已支援(多個)線上 PDF 檔 *(by [@reycn](https://github.com/reycn))* 50 | - [Nov. 24 2024] 為了降低依賴大小,提供 [ONNX](https://github.com/onnx/onnx) 支援 *(by [@Wybxc](https://github.com/Wybxc))* 51 | - [Nov. 23 2024] 🌟 [免費公共服務](#demo) 上線! *(by [@Byaidu](https://github.com/Byaidu))* 52 | - [Nov. 23 2024] 新增防止網頁爬蟲的防火牆 *(by [@Byaidu](https://github.com/Byaidu))* 53 | - [Nov. 22 2024] 圖形使用者介面現已支援義大利語並進行了一些更新 *(by [@Byaidu](https://github.com/Byaidu), [@reycn](https://github.com/reycn))* 54 | - [Nov. 22 2024] 現在你可以將自己部署的服務分享給朋友 *(by [@Zxis233](https://github.com/Zxis233))* 55 | - [Nov. 22 2024] 支援騰訊翻譯 *(by [@hellofinch](https://github.com/hellofinch))* 56 | - [Nov. 21 2024] 圖形使用者介面現在支援下載雙語文件 *(by [@reycn](https://github.com/reycn))* 57 | - [Nov. 20 2024] 🌟 提供了 [線上示範](#demo)! *(by [@reycn](https://github.com/reycn))* 58 | 59 |

效果預覽

60 | 61 |
62 | 63 |
64 | 65 |

線上示範 🌟

66 | 67 | ### 免費服務 () 68 | 69 | 你可以立即嘗試 [免費公共服務](https://pdf2zh.com/) 而無需安裝 70 | 71 | ### 線上示範 72 | 73 | 你可以直接在 [HuggingFace 上的線上示範](https://huggingface.co/spaces/reycn/PDFMathTranslate-Docker)和[魔搭的線上示範](https://www.modelscope.cn/studios/AI-ModelScope/PDFMathTranslate)進行嘗試,無需安裝。 74 | 請注意,示範使用的運算資源有限,請勿濫用。 75 | 76 |

安裝與使用

77 | 78 | 我們提供了四種使用此專案的方法:[命令列工具](#cmd)、[便攜式安裝](#portable)、[圖形使用者介面](#gui) 與 [容器化部署](#docker)。 79 | 80 | pdf2zh 在執行時需要額外下載模型(`wybxc/DocLayout-YOLO-DocStructBench-onnx`),該模型也可在魔搭(ModelScope)上取得。如果在啟動時下載該模型時遇到問題,請使用如下環境變數: 81 | ```shell 82 | set HF_ENDPOINT=https://hf-mirror.com 83 | ``` 84 | 85 |

方法一、命令列工具

86 | 87 | 1. 確保已安裝 Python 版本大於 3.10 且小於 3.12 88 | 2. 安裝此程式: 89 | 90 | ```bash 91 | pip install pdf2zh 92 | ``` 93 | 94 | 3. 執行翻譯,生成檔案位於 [目前工作目錄](https://chatgpt.com/share/6745ed36-9acc-800e-8a90-59204bd13444): 95 | 96 | ```bash 97 | pdf2zh document.pdf 98 | ``` 99 | 100 |

方法二、便攜式安裝

101 | 102 | 無需預先安裝 Python 環境 103 | 104 | 下載 [setup.bat](https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/refs/heads/main/script/setup.bat) 並直接雙擊執行 105 | 106 |

方法三、圖形使用者介面

107 | 108 | 1. 確保已安裝 Python 版本大於 3.10 且小於 3.12 109 | 2. 安裝此程式: 110 | 111 | ```bash 112 | pip install pdf2zh 113 | ``` 114 | 115 | 3. 在瀏覽器中啟動使用: 116 | 117 | ```bash 118 | pdf2zh -i 119 | ``` 120 | 121 | 4. 如果您的瀏覽器沒有自動開啟並跳轉,請手動在瀏覽器開啟: 122 | 123 | ```bash 124 | http://localhost:7860/ 125 | ``` 126 | 127 | 128 | 129 | 查看 [documentation for GUI](/README_GUI.md) 以獲取詳細說明 130 | 131 |

方法四、容器化部署

132 | 133 | 1. 拉取 Docker 映像檔並執行: 134 | 135 | ```bash 136 | docker pull byaidu/pdf2zh 137 | docker run -d -p 7860:7860 byaidu/pdf2zh 138 | ``` 139 | 140 | 2. 透過瀏覽器開啟: 141 | 142 | ``` 143 | http://localhost:7860/ 144 | ``` 145 | 146 | 用於在雲服務上部署容器映像檔: 147 | 148 |
149 | 150 | Deploy 151 | 152 | Deploy to Koyeb 153 | 154 | Deploy on Zeabur 155 | 156 | Deploy to Koyeb 157 |
158 | 159 |

高級選項

160 | 161 | 在命令列中執行翻譯指令,並在目前工作目錄下生成譯文檔案 `example-mono.pdf` 和雙語對照檔案 `example-dual.pdf`。預設使用 Google 翻譯服務。 162 | 163 | cmd 164 | 165 | 以下表格列出了所有高級選項,供參考: 166 | 167 | | Option | 功能 | 範例 | 168 | | -------- | ------- |------- | 169 | | files | 本機檔案 | `pdf2zh ~/local.pdf` | 170 | | links | 線上檔案 | `pdf2zh http://arxiv.org/paper.pdf` | 171 | | `-i` | [進入圖形介面](#gui) | `pdf2zh -i` | 172 | | `-p` | [僅翻譯部分文件](#partial) | `pdf2zh example.pdf -p 1` | 173 | | `-li` | [原文語言](#language) | `pdf2zh example.pdf -li en` | 174 | | `-lo` | [目標語言](#language) | `pdf2zh example.pdf -lo zh` | 175 | | `-s` | [指定翻譯服務](#services) | `pdf2zh example.pdf -s deepl` | 176 | | `-t` | [多執行緒](#threads) | `pdf2zh example.pdf -t 1` | 177 | | `-o` | 輸出目錄 | `pdf2zh example.pdf -o output` | 178 | | `-f`, `-c` | [例外規則](#exceptions) | `pdf2zh example.pdf -f "(MS.*)"` | 179 | | `--share` | [獲取 gradio 公開連結] | `pdf2zh -i --share` | 180 | | `--authorized` | [[添加網頁認證及自訂認證頁面](https://github.com/Byaidu/PDFMathTranslate/blob/main/docs/ADVANCED.)] | `pdf2zh -i --authorized users.txt [auth.html]` | 181 | | `--prompt` | [使用自訂的大模型 Prompt] | `pdf2zh --prompt [prompt.txt]` | 182 | | `--onnx` | [使用自訂的 DocLayout-YOLO ONNX 模型] | `pdf2zh --onnx [onnx/model/path]` | 183 | | `--serverport` | [自訂 WebUI 埠號] | `pdf2zh --serverport 7860` | 184 | | `--dir` | [資料夾翻譯] | `pdf2zh --dir /path/to/translate/` | 185 | 186 |

全文或部分文件翻譯

187 | 188 | - **全文翻譯** 189 | 190 | ```bash 191 | pdf2zh example.pdf 192 | ``` 193 | 194 | - **部分翻譯** 195 | 196 | ```bash 197 | pdf2zh example.pdf -p 1-3,5 198 | ``` 199 | 200 |

指定原文語言與目標語言

201 | 202 | 可參考 [Google 語言代碼](https://developers.google.com/admin-sdk/directory/v1/languages)、[DeepL 語言代碼](https://developers.deepl.com/docs/resources/supported-languages) 203 | 204 | ```bash 205 | pdf2zh example.pdf -li en -lo ja 206 | ``` 207 | 208 |

使用不同的翻譯服務

209 | 210 | 下表列出了每個翻譯服務所需的 [環境變數](https://chatgpt.com/share/6734a83d-9d48-800e-8a46-f57ca6e8bcb4)。在使用前,請先確保已設定好對應的變數。 211 | 212 | |**Translator**|**Service**|**Environment Variables**|**Default Values**|**Notes**| 213 | |-|-|-|-|-| 214 | |**Google (Default)**|`google`|無|N/A|無| 215 | |**Bing**|`bing`|無|N/A|無| 216 | |**DeepL**|`deepl`|`DEEPL_AUTH_KEY`|`[Your Key]`|參閱 [DeepL](https://support.deepl.com/hc/en-us/articles/360020695820-API-Key-for-DeepL-s-API)| 217 | |**DeepLX**|`deeplx`|`DEEPLX_ENDPOINT`|`https://api.deepl.com/translate`|參閱 [DeepLX](https://github.com/OwO-Network/DeepLX)| 218 | |**Ollama**|`ollama`|`OLLAMA_HOST`, `OLLAMA_MODEL`|`http://127.0.0.1:11434`, `gemma2`|參閱 [Ollama](https://github.com/ollama/ollama)| 219 | |**OpenAI**|`openai`|`OPENAI_BASE_URL`, `OPENAI_API_KEY`, `OPENAI_MODEL`|`https://api.openai.com/v1`, `[Your Key]`, `gpt-4o-mini`|參閱 [OpenAI](https://platform.openai.com/docs/overview)| 220 | |**AzureOpenAI**|`azure-openai`|`AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_MODEL`|`[Your Endpoint]`, `[Your Key]`, `gpt-4o-mini`|參閱 [Azure OpenAI](https://learn.microsoft.com/zh-cn/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line%2Cjavascript-keyless%2Ctypescript-keyless%2Cpython&pivots=programming-language-python)| 221 | |**Zhipu**|`zhipu`|`ZHIPU_API_KEY`, `ZHIPU_MODEL`|`[Your Key]`, `glm-4-flash`|參閱 [Zhipu](https://open.bigmodel.cn/dev/api/thirdparty-frame/openai-sdk)| 222 | | **ModelScope** | `modelscope` |`MODELSCOPE_API_KEY`, `MODELSCOPE_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-Coder-32B-Instruct`| 參閱 [ModelScope](https://www.modelscope.cn/docs/model-service/API-Inference/intro)| 223 | |**Silicon**|`silicon`|`SILICON_API_KEY`, `SILICON_MODEL`|`[Your Key]`, `Qwen/Qwen2.5-7B-Instruct`|參閱 [SiliconCloud](https://docs.siliconflow.cn/quickstart)| 224 | |**Gemini**|`gemini`|`GEMINI_API_KEY`, `GEMINI_MODEL`|`[Your Key]`, `gemini-1.5-flash`|參閱 [Gemini](https://ai.google.dev/gemini-api/docs/openai)| 225 | |**Azure**|`azure`|`AZURE_ENDPOINT`, `AZURE_API_KEY`|`https://api.translator.azure.cn`, `[Your Key]`|參閱 [Azure](https://docs.azure.cn/en-us/ai-services/translator/text-translation-overview)| 226 | |**Tencent**|`tencent`|`TENCENTCLOUD_SECRET_ID`, `TENCENTCLOUD_SECRET_KEY`|`[Your ID]`, `[Your Key]`|參閱 [Tencent](https://www.tencentcloud.com/products/tmt?from_qcintl=122110104)| 227 | |**Dify**|`dify`|`DIFY_API_URL`, `DIFY_API_KEY`|`[Your DIFY URL]`, `[Your Key]`|參閱 [Dify](https://github.com/langgenius/dify),需要在 Dify 的工作流程輸入中定義三個變數:lang_out、lang_in、text。| 228 | |**AnythingLLM**|`anythingllm`|`AnythingLLM_URL`, `AnythingLLM_APIKEY`|`[Your AnythingLLM URL]`, `[Your Key]`|參閱 [anything-llm](https://github.com/Mintplex-Labs/anything-llm)| 229 | |**Argos Translate**|`argos`| | |參閱 [argos-translate](https://github.com/argosopentech/argos-translate)| 230 | |**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |參閱 [Grok](https://docs.x.ai/docs/overview)| 231 | |**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |參閱 [DeepSeek](https://www.deepseek.com/)| 232 | |**OpenAI-Liked**|`openailiked`| `OPENAILIKED_BASE_URL`, `OPENAILIKED_API_KEY`, `OPENAILIKED_MODEL` | `url`, `[Your Key]`, `model name` | 無 | 233 | 234 | 對於不在上述表格中,但兼容 OpenAI API 的大語言模型,可以使用與 OpenAI 相同的方式設定環境變數。 235 | 236 | 使用 `-s service` 或 `-s service:model` 指定翻譯服務: 237 | 238 | ```bash 239 | pdf2zh example.pdf -s openai:gpt-4o-mini 240 | ``` 241 | 242 | 或使用環境變數指定模型: 243 | 244 | ```bash 245 | set OPENAI_MODEL=gpt-4o-mini 246 | pdf2zh example.pdf -s openai 247 | ``` 248 | 249 |

指定例外規則

250 | 251 | 使用正則表達式指定需要保留的公式字體與字元: 252 | 253 | ```bash 254 | pdf2zh example.pdf -f "(CM[^RT].*|MS.*|.*Ital)" -c "(\(|\||\)|\+|=|\d|[\u0080-\ufaff])" 255 | ``` 256 | 257 | 預設保留 `Latex`, `Mono`, `Code`, `Italic`, `Symbol` 以及 `Math` 字體: 258 | 259 | ```bash 260 | pdf2zh example.pdf -f "(CM[^R]|MS.M|XY|MT|BL|RM|EU|LA|RS|LINE|LCIRCLE|TeX-|rsfs|txsy|wasy|stmary|.*Mono|.*Code|.*Ital|.*Sym|.*Math)" 261 | ``` 262 | 263 |

指定執行緒數量

264 | 265 | 使用 `-t` 參數指定翻譯使用的執行緒數量: 266 | 267 | ```bash 268 | pdf2zh example.pdf -t 1 269 | ``` 270 | 271 |

自訂大模型 Prompt

272 | 273 | 使用 `--prompt` 指定在使用大模型翻譯時所採用的 Prompt 檔案。 274 | 275 | ```bash 276 | pdf2zh example.pdf -pr prompt.txt 277 | ``` 278 | 279 | 範例 `prompt.txt` 檔案內容: 280 | 281 | ``` 282 | [ 283 | { 284 | "role": "system", 285 | "content": "You are a professional,authentic machine translation engine.", 286 | }, 287 | { 288 | "role": "user", 289 | "content": "Translate the following markdown source text to ${lang_out}. Keep the formula notation {{v*}} unchanged. Output translation directly without any additional text.\nSource Text: ${text}\nTranslated Text:", 290 | }, 291 | ] 292 | ``` 293 | 294 | 在自訂 Prompt 檔案中,可以使用以下三個內建變數來傳遞參數: 295 | |**變數名稱**|**說明**| 296 | |-|-| 297 | |`lang_in`|輸入語言| 298 | |`lang_out`|輸出語言| 299 | |`text`|需要翻譯的文本| 300 | 301 |

API

302 | 303 | ### Python 304 | 305 | ```python 306 | from pdf2zh import translate, translate_stream 307 | 308 | params = {"lang_in": "en", "lang_out": "zh", "service": "google", "thread": 4} 309 | file_mono, file_dual = translate(files=["example.pdf"], **params)[0] 310 | with open("example.pdf", "rb") as f: 311 | stream_mono, stream_dual = translate_stream(stream=f.read(), **params) 312 | ``` 313 | 314 | ### HTTP 315 | 316 | ```bash 317 | pip install pdf2zh[backend] 318 | pdf2zh --flask 319 | pdf2zh --celery worker 320 | ``` 321 | 322 | ```bash 323 | curl http://localhost:11008/v1/translate -F "file=@example.pdf" -F "data={\"lang_in\":\"en\",\"lang_out\":\"zh\",\"service\":\"google\",\"thread\":4}" 324 | {"id":"d9894125-2f4e-45ea-9d93-1a9068d2045a"} 325 | 326 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a 327 | {"info":{"n":13,"total":506},"state":"PROGRESS"} 328 | 329 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a 330 | {"state":"SUCCESS"} 331 | 332 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/mono --output example-mono.pdf 333 | 334 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a/dual --output example-dual.pdf 335 | 336 | curl http://localhost:11008/v1/translate/d9894125-2f4e-45ea-9d93-1a9068d2045a -X DELETE 337 | ``` 338 | 339 |

致謝

340 | 341 | - 文件合併:[PyMuPDF](https://github.com/pymupdf/PyMuPDF) 342 | - 文件解析:[Pdfminer.six](https://github.com/pdfminer/pdfminer.six) 343 | - 文件提取:[MinerU](https://github.com/opendatalab/MinerU) 344 | - 文件預覽:[Gradio PDF](https://github.com/freddyaboulton/gradio-pdf) 345 | - 多執行緒翻譯:[MathTranslate](https://github.com/SUSYUSTC/MathTranslate) 346 | - 版面解析:[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) 347 | - PDF 標準:[PDF Explained](https://zxyle.github.io/PDF-Explained/)、[PDF Cheat Sheets](https://pdfa.org/resource/pdf-cheat-sheets/) 348 | - 多語言字型:[Go Noto Universal](https://github.com/satbyy/go-noto-universal) 349 | 350 |

貢獻者

351 | 352 | 353 | 354 | 355 | 356 | ![Alt](https://repobeats.axiom.co/api/embed/dfa7583da5332a11468d686fbd29b92320a6a869.svg "Repobeats analytics image") 357 | 358 |

星標歷史

359 | 360 | 361 | 362 | 363 | 364 | Star History Chart 365 | 366 | -------------------------------------------------------------------------------- /docs/images/after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/after.png -------------------------------------------------------------------------------- /docs/images/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/banner.png -------------------------------------------------------------------------------- /docs/images/before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/before.png -------------------------------------------------------------------------------- /docs/images/cmd.explained.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/cmd.explained.png -------------------------------------------------------------------------------- /docs/images/cmd.explained.zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/cmd.explained.zh.png -------------------------------------------------------------------------------- /docs/images/gui.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/gui.gif -------------------------------------------------------------------------------- /docs/images/preview.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/docs/images/preview.gif -------------------------------------------------------------------------------- /pdf2zh/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pdf2zh.high_level import translate, translate_stream 3 | 4 | log = logging.getLogger(__name__) 5 | 6 | __version__ = "1.9.6" 7 | __author__ = "Byaidu" 8 | __all__ = ["translate", "translate_stream"] 9 | -------------------------------------------------------------------------------- /pdf2zh/backend.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, send_file 2 | from celery import Celery, Task 3 | from celery.result import AsyncResult 4 | from pdf2zh import translate_stream 5 | import tqdm 6 | import json 7 | import io 8 | from pdf2zh.doclayout import ModelInstance 9 | from pdf2zh.config import ConfigManager 10 | 11 | flask_app = Flask("pdf2zh") 12 | flask_app.config.from_mapping( 13 | CELERY=dict( 14 | broker_url=ConfigManager.get("CELERY_BROKER", "redis://127.0.0.1:6379/0"), 15 | result_backend=ConfigManager.get("CELERY_RESULT", "redis://127.0.0.1:6379/0"), 16 | ) 17 | ) 18 | 19 | 20 | def celery_init_app(app: Flask) -> Celery: 21 | class FlaskTask(Task): 22 | def __call__(self, *args, **kwargs): 23 | with app.app_context(): 24 | return self.run(*args, **kwargs) 25 | 26 | celery_app = Celery(app.name) 27 | celery_app.config_from_object(app.config["CELERY"]) 28 | celery_app.Task = FlaskTask 29 | celery_app.set_default() 30 | celery_app.autodiscover_tasks() 31 | app.extensions["celery"] = celery_app 32 | return celery_app 33 | 34 | 35 | celery_app = celery_init_app(flask_app) 36 | 37 | 38 | @celery_app.task(bind=True) 39 | def translate_task( 40 | self: Task, 41 | stream: bytes, 42 | args: dict, 43 | ): 44 | def progress_bar(t: tqdm.tqdm): 45 | self.update_state(state="PROGRESS", meta={"n": t.n, "total": t.total}) # noqa 46 | print(f"Translating {t.n} / {t.total} pages") 47 | 48 | doc_mono, doc_dual = translate_stream( 49 | stream, 50 | callback=progress_bar, 51 | model=ModelInstance.value, 52 | **args, 53 | ) 54 | return doc_mono, doc_dual 55 | 56 | 57 | @flask_app.route("/v1/translate", methods=["POST"]) 58 | def create_translate_tasks(): 59 | file = request.files["file"] 60 | stream = file.stream.read() 61 | print(request.form.get("data")) 62 | args = json.loads(request.form.get("data")) 63 | task = translate_task.delay(stream, args) 64 | return {"id": task.id} 65 | 66 | 67 | @flask_app.route("/v1/translate/", methods=["GET"]) 68 | def get_translate_task(id: str): 69 | result: AsyncResult = celery_app.AsyncResult(id) 70 | if str(result.state) == "PROGRESS": 71 | return {"state": str(result.state), "info": result.info} 72 | else: 73 | return {"state": str(result.state)} 74 | 75 | 76 | @flask_app.route("/v1/translate/", methods=["DELETE"]) 77 | def delete_translate_task(id: str): 78 | result: AsyncResult = celery_app.AsyncResult(id) 79 | result.revoke(terminate=True) 80 | return {"state": str(result.state)} 81 | 82 | 83 | @flask_app.route("/v1/translate//") 84 | def get_translate_result(id: str, format: str): 85 | result = celery_app.AsyncResult(id) 86 | if not result.ready(): 87 | return {"error": "task not finished"}, 400 88 | if not result.successful(): 89 | return {"error": "task failed"}, 400 90 | doc_mono, doc_dual = result.get() 91 | to_send = doc_mono if format == "mono" else doc_dual 92 | return send_file(io.BytesIO(to_send), "application/pdf") 93 | 94 | 95 | if __name__ == "__main__": 96 | flask_app.run() 97 | -------------------------------------------------------------------------------- /pdf2zh/cache.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import json 4 | from peewee import Model, SqliteDatabase, AutoField, CharField, TextField, SQL 5 | from typing import Optional 6 | 7 | 8 | # we don't init the database here 9 | db = SqliteDatabase(None) 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class _TranslationCache(Model): 14 | id = AutoField() 15 | translate_engine = CharField(max_length=20) 16 | translate_engine_params = TextField() 17 | original_text = TextField() 18 | translation = TextField() 19 | 20 | class Meta: 21 | database = db 22 | constraints = [ 23 | SQL( 24 | """ 25 | UNIQUE ( 26 | translate_engine, 27 | translate_engine_params, 28 | original_text 29 | ) 30 | ON CONFLICT REPLACE 31 | """ 32 | ) 33 | ] 34 | 35 | 36 | class TranslationCache: 37 | @staticmethod 38 | def _sort_dict_recursively(obj): 39 | if isinstance(obj, dict): 40 | return { 41 | k: TranslationCache._sort_dict_recursively(v) 42 | for k in sorted(obj.keys()) 43 | for v in [obj[k]] 44 | } 45 | elif isinstance(obj, list): 46 | return [TranslationCache._sort_dict_recursively(item) for item in obj] 47 | return obj 48 | 49 | def __init__(self, translate_engine: str, translate_engine_params: dict = None): 50 | assert ( 51 | len(translate_engine) < 20 52 | ), "current cache require translate engine name less than 20 characters" 53 | self.translate_engine = translate_engine 54 | self.replace_params(translate_engine_params) 55 | 56 | # The program typically starts multi-threaded translation 57 | # only after cache parameters are fully configured, 58 | # so thread safety doesn't need to be considered here. 59 | def replace_params(self, params: dict = None): 60 | if params is None: 61 | params = {} 62 | self.params = params 63 | params = self._sort_dict_recursively(params) 64 | self.translate_engine_params = json.dumps(params) 65 | 66 | def update_params(self, params: dict = None): 67 | if params is None: 68 | params = {} 69 | self.params.update(params) 70 | self.replace_params(self.params) 71 | 72 | def add_params(self, k: str, v): 73 | self.params[k] = v 74 | self.replace_params(self.params) 75 | 76 | # Since peewee and the underlying sqlite are thread-safe, 77 | # get and set operations don't need locks. 78 | def get(self, original_text: str) -> Optional[str]: 79 | result = _TranslationCache.get_or_none( 80 | translate_engine=self.translate_engine, 81 | translate_engine_params=self.translate_engine_params, 82 | original_text=original_text, 83 | ) 84 | return result.translation if result else None 85 | 86 | def set(self, original_text: str, translation: str): 87 | try: 88 | _TranslationCache.create( 89 | translate_engine=self.translate_engine, 90 | translate_engine_params=self.translate_engine_params, 91 | original_text=original_text, 92 | translation=translation, 93 | ) 94 | except Exception as e: 95 | logger.debug(f"Error setting cache: {e}") 96 | 97 | 98 | def init_db(remove_exists=False): 99 | cache_folder = os.path.join(os.path.expanduser("~"), ".cache", "pdf2zh") 100 | os.makedirs(cache_folder, exist_ok=True) 101 | # The current version does not support database migration, so add the version number to the file name. 102 | cache_db_path = os.path.join(cache_folder, "cache.v1.db") 103 | if remove_exists and os.path.exists(cache_db_path): 104 | os.remove(cache_db_path) 105 | db.init( 106 | cache_db_path, 107 | pragmas={ 108 | "journal_mode": "wal", 109 | "busy_timeout": 1000, 110 | }, 111 | ) 112 | db.create_tables([_TranslationCache], safe=True) 113 | 114 | 115 | def init_test_db(): 116 | import tempfile 117 | 118 | cache_db_path = tempfile.mktemp(suffix=".db") 119 | test_db = SqliteDatabase( 120 | cache_db_path, 121 | pragmas={ 122 | "journal_mode": "wal", 123 | "busy_timeout": 1000, 124 | }, 125 | ) 126 | test_db.bind([_TranslationCache], bind_refs=False, bind_backrefs=False) 127 | test_db.connect() 128 | test_db.create_tables([_TranslationCache], safe=True) 129 | return test_db 130 | 131 | 132 | def clean_test_db(test_db): 133 | test_db.drop_tables([_TranslationCache]) 134 | test_db.close() 135 | db_path = test_db.database 136 | if os.path.exists(db_path): 137 | os.remove(test_db.database) 138 | wal_path = db_path + "-wal" 139 | if os.path.exists(wal_path): 140 | os.remove(wal_path) 141 | shm_path = db_path + "-shm" 142 | if os.path.exists(shm_path): 143 | os.remove(shm_path) 144 | 145 | 146 | init_db() 147 | -------------------------------------------------------------------------------- /pdf2zh/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from threading import RLock # 改成 RLock 4 | import os 5 | import copy 6 | 7 | 8 | class ConfigManager: 9 | _instance = None 10 | _lock = RLock() # 用 RLock 替换 Lock,允许在同一个线程中重复获取锁 11 | 12 | @classmethod 13 | def get_instance(cls): 14 | """获取单例实例""" 15 | # 先判断是否存在实例,如果不存在再加锁进行初始化 16 | if cls._instance is None: 17 | with cls._lock: 18 | if cls._instance is None: 19 | cls._instance = cls() 20 | return cls._instance 21 | 22 | def __init__(self): 23 | # 防止重复初始化 24 | if hasattr(self, "_initialized") and self._initialized: 25 | return 26 | self._initialized = True 27 | 28 | self._config_path = Path.home() / ".config" / "PDFMathTranslate" / "config.json" 29 | self._config_data = {} 30 | 31 | # 这里不要再加锁,因为外层可能已经加了锁 (get_instance), RLock也无妨 32 | self._ensure_config_exists() 33 | 34 | def _ensure_config_exists(self, isInit=True): 35 | """确保配置文件存在,如果不存在则创建默认配置""" 36 | # 这里也不需要显式再次加锁,原因同上,方法体中再调用 _load_config(), 37 | # 而 _load_config() 内部会加锁。因为 RLock 是可重入的,不会阻塞。 38 | if not self._config_path.exists(): 39 | if isInit: 40 | self._config_path.parent.mkdir(parents=True, exist_ok=True) 41 | self._config_data = {} # 默认配置内容 42 | self._save_config() 43 | else: 44 | raise ValueError(f"config file {self._config_path} not found!") 45 | else: 46 | self._load_config() 47 | 48 | def _load_config(self): 49 | """从 config.json 中加载配置""" 50 | with self._lock: # 加锁确保线程安全 51 | with self._config_path.open("r", encoding="utf-8") as f: 52 | self._config_data = json.load(f) 53 | 54 | def _save_config(self): 55 | """保存配置到 config.json""" 56 | with self._lock: # 加锁确保线程安全 57 | # 移除循环引用并写入 58 | cleaned_data = self._remove_circular_references(self._config_data) 59 | with self._config_path.open("w", encoding="utf-8") as f: 60 | json.dump(cleaned_data, f, indent=4, ensure_ascii=False) 61 | 62 | def _remove_circular_references(self, obj, seen=None): 63 | """递归移除循环引用""" 64 | if seen is None: 65 | seen = set() 66 | obj_id = id(obj) 67 | if obj_id in seen: 68 | return None # 遇到已处理过的对象,视为循环引用 69 | seen.add(obj_id) 70 | 71 | if isinstance(obj, dict): 72 | return { 73 | k: self._remove_circular_references(v, seen) for k, v in obj.items() 74 | } 75 | elif isinstance(obj, list): 76 | return [self._remove_circular_references(i, seen) for i in obj] 77 | return obj 78 | 79 | @classmethod 80 | def custome_config(cls, file_path): 81 | """使用自定义路径加载配置文件""" 82 | custom_path = Path(file_path) 83 | if not custom_path.exists(): 84 | raise ValueError(f"Config file {custom_path} not found!") 85 | # 加锁 86 | with cls._lock: 87 | instance = cls() 88 | instance._config_path = custom_path 89 | # 此处传 isInit=False,若不存在则报错;若存在则正常 _load_config() 90 | instance._ensure_config_exists(isInit=False) 91 | cls._instance = instance 92 | 93 | @classmethod 94 | def get(cls, key, default=None): 95 | """获取配置值""" 96 | instance = cls.get_instance() 97 | # 读取时,加锁或不加锁都行。但为了统一,我们在修改配置前后都要加锁。 98 | # get 只要最终需要保存,则会加锁 -> _save_config() 99 | if key in instance._config_data: 100 | return instance._config_data[key] 101 | 102 | # 若环境变量中存在该 key,则使用环境变量并写回 config 103 | if key in os.environ: 104 | value = os.environ[key] 105 | instance._config_data[key] = value 106 | instance._save_config() 107 | return value 108 | 109 | # 若 default 不为 None,则设置并保存 110 | if default is not None: 111 | instance._config_data[key] = default 112 | instance._save_config() 113 | return default 114 | 115 | # 找不到则抛出异常 116 | # raise KeyError(f"{key} is not found in config file or environment variables.") 117 | return default 118 | 119 | @classmethod 120 | def set(cls, key, value): 121 | """设置配置值并保存""" 122 | instance = cls.get_instance() 123 | with instance._lock: 124 | instance._config_data[key] = value 125 | instance._save_config() 126 | 127 | @classmethod 128 | def get_translator_by_name(cls, name): 129 | """根据 name 获取对应的 translator 配置""" 130 | instance = cls.get_instance() 131 | translators = instance._config_data.get("translators", []) 132 | for translator in translators: 133 | if translator.get("name") == name: 134 | return translator["envs"] 135 | return None 136 | 137 | @classmethod 138 | def set_translator_by_name(cls, name, new_translator_envs): 139 | """根据 name 设置或更新 translator 配置""" 140 | instance = cls.get_instance() 141 | with instance._lock: 142 | translators = instance._config_data.get("translators", []) 143 | for translator in translators: 144 | if translator.get("name") == name: 145 | translator["envs"] = copy.deepcopy(new_translator_envs) 146 | instance._save_config() 147 | return 148 | translators.append( 149 | {"name": name, "envs": copy.deepcopy(new_translator_envs)} 150 | ) 151 | instance._config_data["translators"] = translators 152 | instance._save_config() 153 | 154 | @classmethod 155 | def get_env_by_translatername(cls, translater_name, name, default=None): 156 | """根据 name 获取对应的 translator 配置""" 157 | instance = cls.get_instance() 158 | translators = instance._config_data.get("translators", []) 159 | for translator in translators: 160 | if translator.get("name") == translater_name.name: 161 | if translator["envs"][name]: 162 | return translator["envs"][name] 163 | else: 164 | with instance._lock: 165 | translator["envs"][name] = default 166 | instance._save_config() 167 | return default 168 | 169 | with instance._lock: 170 | translators = instance._config_data.get("translators", []) 171 | for translator in translators: 172 | if translator.get("name") == translater_name.name: 173 | translator["envs"][name] = default 174 | instance._save_config() 175 | return default 176 | translators.append( 177 | { 178 | "name": translater_name.name, 179 | "envs": copy.deepcopy(translater_name.envs), 180 | } 181 | ) 182 | instance._config_data["translators"] = translators 183 | instance._save_config() 184 | return default 185 | 186 | @classmethod 187 | def delete(cls, key): 188 | """删除配置值并保存""" 189 | instance = cls.get_instance() 190 | with instance._lock: 191 | if key in instance._config_data: 192 | del instance._config_data[key] 193 | instance._save_config() 194 | 195 | @classmethod 196 | def clear(cls): 197 | """删除配置值并保存""" 198 | instance = cls.get_instance() 199 | with instance._lock: 200 | instance._config_data = {} 201 | instance._save_config() 202 | 203 | @classmethod 204 | def all(cls): 205 | """返回所有配置项""" 206 | instance = cls.get_instance() 207 | # 这里只做读取操作,一般可不加锁。不过为了保险也可以加锁。 208 | return instance._config_data 209 | 210 | @classmethod 211 | def remove(cls): 212 | instance = cls.get_instance() 213 | with instance._lock: 214 | os.remove(instance._config_path) 215 | -------------------------------------------------------------------------------- /pdf2zh/doclayout.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import os.path 3 | 4 | import cv2 5 | import numpy as np 6 | import ast 7 | from babeldoc.assets.assets import get_doclayout_onnx_model_path 8 | 9 | try: 10 | import onnx 11 | import onnxruntime 12 | except ImportError as e: 13 | if "DLL load failed" in str(e): 14 | raise OSError( 15 | "Microsoft Visual C++ Redistributable is not installed. " 16 | "Download it at https://aka.ms/vs/17/release/vc_redist.x64.exe" 17 | ) from e 18 | raise 19 | 20 | from huggingface_hub import hf_hub_download 21 | 22 | from pdf2zh.config import ConfigManager 23 | 24 | 25 | class DocLayoutModel(abc.ABC): 26 | @staticmethod 27 | def load_onnx(): 28 | model = OnnxModel.from_pretrained() 29 | return model 30 | 31 | @staticmethod 32 | def load_available(): 33 | return DocLayoutModel.load_onnx() 34 | 35 | @property 36 | @abc.abstractmethod 37 | def stride(self) -> int: 38 | """Stride of the model input.""" 39 | pass 40 | 41 | @abc.abstractmethod 42 | def predict(self, image, imgsz=1024, **kwargs) -> list: 43 | """ 44 | Predict the layout of a document page. 45 | 46 | Args: 47 | image: The image of the document page. 48 | imgsz: Resize the image to this size. Must be a multiple of the stride. 49 | **kwargs: Additional arguments. 50 | """ 51 | pass 52 | 53 | 54 | class YoloResult: 55 | """Helper class to store detection results from ONNX model.""" 56 | 57 | def __init__(self, boxes, names): 58 | self.boxes = [YoloBox(data=d) for d in boxes] 59 | self.boxes.sort(key=lambda x: x.conf, reverse=True) 60 | self.names = names 61 | 62 | 63 | class YoloBox: 64 | """Helper class to store detection results from ONNX model.""" 65 | 66 | def __init__(self, data): 67 | self.xyxy = data[:4] 68 | self.conf = data[-2] 69 | self.cls = data[-1] 70 | 71 | 72 | class OnnxModel(DocLayoutModel): 73 | def __init__(self, model_path: str): 74 | self.model_path = model_path 75 | 76 | model = onnx.load(model_path) 77 | metadata = {d.key: d.value for d in model.metadata_props} 78 | self._stride = ast.literal_eval(metadata["stride"]) 79 | self._names = ast.literal_eval(metadata["names"]) 80 | 81 | self.model = onnxruntime.InferenceSession(model.SerializeToString()) 82 | 83 | @staticmethod 84 | def from_pretrained(): 85 | pth = get_doclayout_onnx_model_path() 86 | return OnnxModel(pth) 87 | 88 | @property 89 | def stride(self): 90 | return self._stride 91 | 92 | def resize_and_pad_image(self, image, new_shape): 93 | """ 94 | Resize and pad the image to the specified size, ensuring dimensions are multiples of stride. 95 | 96 | Parameters: 97 | - image: Input image 98 | - new_shape: Target size (integer or (height, width) tuple) 99 | - stride: Padding alignment stride, default 32 100 | 101 | Returns: 102 | - Processed image 103 | """ 104 | if isinstance(new_shape, int): 105 | new_shape = (new_shape, new_shape) 106 | 107 | h, w = image.shape[:2] 108 | new_h, new_w = new_shape 109 | 110 | # Calculate scaling ratio 111 | r = min(new_h / h, new_w / w) 112 | resized_h, resized_w = int(round(h * r)), int(round(w * r)) 113 | 114 | # Resize image 115 | image = cv2.resize( 116 | image, (resized_w, resized_h), interpolation=cv2.INTER_LINEAR 117 | ) 118 | 119 | # Calculate padding size and align to stride multiple 120 | pad_w = (new_w - resized_w) % self.stride 121 | pad_h = (new_h - resized_h) % self.stride 122 | top, bottom = pad_h // 2, pad_h - pad_h // 2 123 | left, right = pad_w // 2, pad_w - pad_w // 2 124 | 125 | # Add padding 126 | image = cv2.copyMakeBorder( 127 | image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114) 128 | ) 129 | 130 | return image 131 | 132 | def scale_boxes(self, img1_shape, boxes, img0_shape): 133 | """ 134 | Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originally 135 | specified in (img1_shape) to the shape of a different image (img0_shape). 136 | 137 | Args: 138 | img1_shape (tuple): The shape of the image that the bounding boxes are for, 139 | in the format of (height, width). 140 | boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2) 141 | img0_shape (tuple): the shape of the target image, in the format of (height, width). 142 | 143 | Returns: 144 | boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2) 145 | """ 146 | 147 | # Calculate scaling ratio 148 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) 149 | 150 | # Calculate padding size 151 | pad_x = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1) 152 | pad_y = round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) 153 | 154 | # Remove padding and scale boxes 155 | boxes[..., :4] = (boxes[..., :4] - [pad_x, pad_y, pad_x, pad_y]) / gain 156 | return boxes 157 | 158 | def predict(self, image, imgsz=1024, **kwargs): 159 | # Preprocess input image 160 | orig_h, orig_w = image.shape[:2] 161 | pix = self.resize_and_pad_image(image, new_shape=imgsz) 162 | pix = np.transpose(pix, (2, 0, 1)) # CHW 163 | pix = np.expand_dims(pix, axis=0) # BCHW 164 | pix = pix.astype(np.float32) / 255.0 # Normalize to [0, 1] 165 | new_h, new_w = pix.shape[2:] 166 | 167 | # Run inference 168 | preds = self.model.run(None, {"images": pix})[0] 169 | 170 | # Postprocess predictions 171 | preds = preds[preds[..., 4] > 0.25] 172 | preds[..., :4] = self.scale_boxes( 173 | (new_h, new_w), preds[..., :4], (orig_h, orig_w) 174 | ) 175 | return [YoloResult(boxes=preds, names=self._names)] 176 | 177 | 178 | class ModelInstance: 179 | value: OnnxModel = None 180 | -------------------------------------------------------------------------------- /pdf2zh/high_level.py: -------------------------------------------------------------------------------- 1 | """Functions that can be used for the most common use-cases for pdf2zh.six""" 2 | 3 | import asyncio 4 | import io 5 | import os 6 | import re 7 | import sys 8 | import tempfile 9 | import logging 10 | from asyncio import CancelledError 11 | from pathlib import Path 12 | from string import Template 13 | from typing import Any, BinaryIO, List, Optional, Dict 14 | 15 | import numpy as np 16 | import requests 17 | import tqdm 18 | from pdfminer.pdfdocument import PDFDocument 19 | from pdfminer.pdfexceptions import PDFValueError 20 | from pdfminer.pdfinterp import PDFResourceManager 21 | from pdfminer.pdfpage import PDFPage 22 | from pdfminer.pdfparser import PDFParser 23 | from pymupdf import Document, Font 24 | 25 | from pdf2zh.converter import TranslateConverter 26 | from pdf2zh.doclayout import OnnxModel 27 | from pdf2zh.pdfinterp import PDFPageInterpreterEx 28 | 29 | from pdf2zh.config import ConfigManager 30 | from babeldoc.assets.assets import get_font_and_metadata 31 | 32 | NOTO_NAME = "noto" 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | noto_list = [ 37 | "am", # Amharic 38 | "ar", # Arabic 39 | "bn", # Bengali 40 | "bg", # Bulgarian 41 | "chr", # Cherokee 42 | "el", # Greek 43 | "gu", # Gujarati 44 | "iw", # Hebrew 45 | "hi", # Hindi 46 | "kn", # Kannada 47 | "ml", # Malayalam 48 | "mr", # Marathi 49 | "ru", # Russian 50 | "sr", # Serbian 51 | "ta", # Tamil 52 | "te", # Telugu 53 | "th", # Thai 54 | "ur", # Urdu 55 | "uk", # Ukrainian 56 | ] 57 | 58 | 59 | def check_files(files: List[str]) -> List[str]: 60 | files = [ 61 | f for f in files if not f.startswith("http://") 62 | ] # exclude online files, http 63 | files = [ 64 | f for f in files if not f.startswith("https://") 65 | ] # exclude online files, https 66 | missing_files = [file for file in files if not os.path.exists(file)] 67 | return missing_files 68 | 69 | 70 | def translate_patch( 71 | inf: BinaryIO, 72 | pages: Optional[list[int]] = None, 73 | vfont: str = "", 74 | vchar: str = "", 75 | thread: int = 0, 76 | doc_zh: Document = None, 77 | lang_in: str = "", 78 | lang_out: str = "", 79 | service: str = "", 80 | noto_name: str = "", 81 | noto: Font = None, 82 | callback: object = None, 83 | cancellation_event: asyncio.Event = None, 84 | model: OnnxModel = None, 85 | envs: Dict = None, 86 | prompt: Template = None, 87 | ignore_cache: bool = False, 88 | **kwarg: Any, 89 | ) -> None: 90 | rsrcmgr = PDFResourceManager() 91 | layout = {} 92 | device = TranslateConverter( 93 | rsrcmgr, 94 | vfont, 95 | vchar, 96 | thread, 97 | layout, 98 | lang_in, 99 | lang_out, 100 | service, 101 | noto_name, 102 | noto, 103 | envs, 104 | prompt, 105 | ignore_cache, 106 | ) 107 | 108 | assert device is not None 109 | obj_patch = {} 110 | interpreter = PDFPageInterpreterEx(rsrcmgr, device, obj_patch) 111 | if pages: 112 | total_pages = len(pages) 113 | else: 114 | total_pages = doc_zh.page_count 115 | 116 | parser = PDFParser(inf) 117 | doc = PDFDocument(parser) 118 | with tqdm.tqdm(total=total_pages) as progress: 119 | for pageno, page in enumerate(PDFPage.create_pages(doc)): 120 | if cancellation_event and cancellation_event.is_set(): 121 | raise CancelledError("task cancelled") 122 | if pages and (pageno not in pages): 123 | continue 124 | progress.update() 125 | if callback: 126 | callback(progress) 127 | page.pageno = pageno 128 | pix = doc_zh[page.pageno].get_pixmap() 129 | image = np.fromstring(pix.samples, np.uint8).reshape( 130 | pix.height, pix.width, 3 131 | )[:, :, ::-1] 132 | page_layout = model.predict(image, imgsz=int(pix.height / 32) * 32)[0] 133 | # kdtree 是不可能 kdtree 的,不如直接渲染成图片,用空间换时间 134 | box = np.ones((pix.height, pix.width)) 135 | h, w = box.shape 136 | vcls = ["abandon", "figure", "table", "isolate_formula", "formula_caption"] 137 | for i, d in enumerate(page_layout.boxes): 138 | if page_layout.names[int(d.cls)] not in vcls: 139 | x0, y0, x1, y1 = d.xyxy.squeeze() 140 | x0, y0, x1, y1 = ( 141 | np.clip(int(x0 - 1), 0, w - 1), 142 | np.clip(int(h - y1 - 1), 0, h - 1), 143 | np.clip(int(x1 + 1), 0, w - 1), 144 | np.clip(int(h - y0 + 1), 0, h - 1), 145 | ) 146 | box[y0:y1, x0:x1] = i + 2 147 | for i, d in enumerate(page_layout.boxes): 148 | if page_layout.names[int(d.cls)] in vcls: 149 | x0, y0, x1, y1 = d.xyxy.squeeze() 150 | x0, y0, x1, y1 = ( 151 | np.clip(int(x0 - 1), 0, w - 1), 152 | np.clip(int(h - y1 - 1), 0, h - 1), 153 | np.clip(int(x1 + 1), 0, w - 1), 154 | np.clip(int(h - y0 + 1), 0, h - 1), 155 | ) 156 | box[y0:y1, x0:x1] = 0 157 | layout[page.pageno] = box 158 | # 新建一个 xref 存放新指令流 159 | page.page_xref = doc_zh.get_new_xref() # hack 插入页面的新 xref 160 | doc_zh.update_object(page.page_xref, "<<>>") 161 | doc_zh.update_stream(page.page_xref, b"") 162 | doc_zh[page.pageno].set_contents(page.page_xref) 163 | interpreter.process_page(page) 164 | 165 | device.close() 166 | return obj_patch 167 | 168 | 169 | def translate_stream( 170 | stream: bytes, 171 | pages: Optional[list[int]] = None, 172 | lang_in: str = "", 173 | lang_out: str = "", 174 | service: str = "", 175 | thread: int = 0, 176 | vfont: str = "", 177 | vchar: str = "", 178 | callback: object = None, 179 | cancellation_event: asyncio.Event = None, 180 | model: OnnxModel = None, 181 | envs: Dict = None, 182 | prompt: Template = None, 183 | skip_subset_fonts: bool = False, 184 | ignore_cache: bool = False, 185 | **kwarg: Any, 186 | ): 187 | font_list = [("tiro", None)] 188 | 189 | font_path = download_remote_fonts(lang_out.lower()) 190 | noto_name = NOTO_NAME 191 | noto = Font(noto_name, font_path) 192 | font_list.append((noto_name, font_path)) 193 | 194 | doc_en = Document(stream=stream) 195 | stream = io.BytesIO() 196 | doc_en.save(stream) 197 | doc_zh = Document(stream=stream) 198 | page_count = doc_zh.page_count 199 | # font_list = [("GoNotoKurrent-Regular.ttf", font_path), ("tiro", None)] 200 | font_id = {} 201 | for page in doc_zh: 202 | for font in font_list: 203 | font_id[font[0]] = page.insert_font(font[0], font[1]) 204 | xreflen = doc_zh.xref_length() 205 | for xref in range(1, xreflen): 206 | for label in ["Resources/", ""]: # 可能是基于 xobj 的 res 207 | try: # xref 读写可能出错 208 | font_res = doc_zh.xref_get_key(xref, f"{label}Font") 209 | target_key_prefix = f"{label}Font/" 210 | if font_res[0] == "xref": 211 | resource_xref_id = re.search("(\\d+) 0 R", font_res[1]).group(1) 212 | xref = int(resource_xref_id) 213 | font_res = ("dict", doc_zh.xref_object(xref)) 214 | target_key_prefix = "" 215 | 216 | if font_res[0] == "dict": 217 | for font in font_list: 218 | target_key = f"{target_key_prefix}{font[0]}" 219 | font_exist = doc_zh.xref_get_key(xref, target_key) 220 | if font_exist[0] == "null": 221 | doc_zh.xref_set_key( 222 | xref, 223 | target_key, 224 | f"{font_id[font[0]]} 0 R", 225 | ) 226 | except Exception: 227 | pass 228 | 229 | fp = io.BytesIO() 230 | 231 | doc_zh.save(fp) 232 | obj_patch: dict = translate_patch(fp, **locals()) 233 | 234 | for obj_id, ops_new in obj_patch.items(): 235 | # ops_old=doc_en.xref_stream(obj_id) 236 | # print(obj_id) 237 | # print(ops_old) 238 | # print(ops_new.encode()) 239 | doc_zh.update_stream(obj_id, ops_new.encode()) 240 | 241 | doc_en.insert_file(doc_zh) 242 | for id in range(page_count): 243 | doc_en.move_page(page_count + id, id * 2 + 1) 244 | if not skip_subset_fonts: 245 | doc_zh.subset_fonts(fallback=True) 246 | doc_en.subset_fonts(fallback=True) 247 | return ( 248 | doc_zh.write(deflate=True, garbage=3, use_objstms=1), 249 | doc_en.write(deflate=True, garbage=3, use_objstms=1), 250 | ) 251 | 252 | 253 | def convert_to_pdfa(input_path, output_path): 254 | """ 255 | Convert PDF to PDF/A format 256 | 257 | Args: 258 | input_path: Path to source PDF file 259 | output_path: Path to save PDF/A file 260 | """ 261 | from pikepdf import Dictionary, Name, Pdf 262 | 263 | # Open the PDF file 264 | pdf = Pdf.open(input_path) 265 | 266 | # Add PDF/A conformance metadata 267 | metadata = { 268 | "pdfa_part": "2", 269 | "pdfa_conformance": "B", 270 | "title": pdf.docinfo.get("/Title", ""), 271 | "author": pdf.docinfo.get("/Author", ""), 272 | "creator": "PDF Math Translate", 273 | } 274 | 275 | with pdf.open_metadata() as meta: 276 | meta.load_from_docinfo(pdf.docinfo) 277 | meta["pdfaid:part"] = metadata["pdfa_part"] 278 | meta["pdfaid:conformance"] = metadata["pdfa_conformance"] 279 | 280 | # Create OutputIntent dictionary 281 | output_intent = Dictionary( 282 | { 283 | "/Type": Name("/OutputIntent"), 284 | "/S": Name("/GTS_PDFA1"), 285 | "/OutputConditionIdentifier": "sRGB IEC61966-2.1", 286 | "/RegistryName": "http://www.color.org", 287 | "/Info": "sRGB IEC61966-2.1", 288 | } 289 | ) 290 | 291 | # Add output intent to PDF root 292 | if "/OutputIntents" not in pdf.Root: 293 | pdf.Root.OutputIntents = [output_intent] 294 | else: 295 | pdf.Root.OutputIntents.append(output_intent) 296 | 297 | # Save as PDF/A 298 | pdf.save(output_path, linearize=True) 299 | pdf.close() 300 | 301 | 302 | def translate( 303 | files: list[str], 304 | output: str = "", 305 | pages: Optional[list[int]] = None, 306 | lang_in: str = "", 307 | lang_out: str = "", 308 | service: str = "", 309 | thread: int = 0, 310 | vfont: str = "", 311 | vchar: str = "", 312 | callback: object = None, 313 | compatible: bool = False, 314 | cancellation_event: asyncio.Event = None, 315 | model: OnnxModel = None, 316 | envs: Dict = None, 317 | prompt: Template = None, 318 | skip_subset_fonts: bool = False, 319 | ignore_cache: bool = False, 320 | **kwarg: Any, 321 | ): 322 | if not files: 323 | raise PDFValueError("No files to process.") 324 | 325 | missing_files = check_files(files) 326 | 327 | if missing_files: 328 | print("The following files do not exist:", file=sys.stderr) 329 | for file in missing_files: 330 | print(f" {file}", file=sys.stderr) 331 | raise PDFValueError("Some files do not exist.") 332 | 333 | result_files = [] 334 | 335 | for file in files: 336 | if type(file) is str and ( 337 | file.startswith("http://") or file.startswith("https://") 338 | ): 339 | print("Online files detected, downloading...") 340 | try: 341 | r = requests.get(file, allow_redirects=True) 342 | if r.status_code == 200: 343 | with tempfile.NamedTemporaryFile( 344 | suffix=".pdf", delete=False 345 | ) as tmp_file: 346 | print(f"Writing the file: {file}...") 347 | tmp_file.write(r.content) 348 | file = tmp_file.name 349 | else: 350 | r.raise_for_status() 351 | except Exception as e: 352 | raise PDFValueError( 353 | f"Errors occur in downloading the PDF file. Please check the link(s).\nError:\n{e}" 354 | ) 355 | filename = os.path.splitext(os.path.basename(file))[0] 356 | 357 | # If the commandline has specified converting to PDF/A format 358 | # --compatible / -cp 359 | if compatible: 360 | with tempfile.NamedTemporaryFile( 361 | suffix="-pdfa.pdf", delete=False 362 | ) as tmp_pdfa: 363 | print(f"Converting {file} to PDF/A format...") 364 | convert_to_pdfa(file, tmp_pdfa.name) 365 | doc_raw = open(tmp_pdfa.name, "rb") 366 | os.unlink(tmp_pdfa.name) 367 | else: 368 | doc_raw = open(file, "rb") 369 | s_raw = doc_raw.read() 370 | doc_raw.close() 371 | 372 | temp_dir = Path(tempfile.gettempdir()) 373 | file_path = Path(file) 374 | try: 375 | if file_path.exists() and file_path.resolve().is_relative_to( 376 | temp_dir.resolve() 377 | ): 378 | file_path.unlink(missing_ok=True) 379 | logger.debug(f"Cleaned temp file: {file_path}") 380 | except Exception as e: 381 | logger.warning(f"Failed to clean temp file {file_path}", exc_info=True) 382 | 383 | s_mono, s_dual = translate_stream( 384 | s_raw, 385 | **locals(), 386 | ) 387 | file_mono = Path(output) / f"{filename}-mono.pdf" 388 | file_dual = Path(output) / f"{filename}-dual.pdf" 389 | doc_mono = open(file_mono, "wb") 390 | doc_dual = open(file_dual, "wb") 391 | doc_mono.write(s_mono) 392 | doc_dual.write(s_dual) 393 | doc_mono.close() 394 | doc_dual.close() 395 | result_files.append((str(file_mono), str(file_dual))) 396 | 397 | return result_files 398 | 399 | 400 | def download_remote_fonts(lang: str): 401 | lang = lang.lower() 402 | LANG_NAME_MAP = { 403 | **{la: "GoNotoKurrent-Regular.ttf" for la in noto_list}, 404 | **{ 405 | la: f"SourceHanSerif{region}-Regular.ttf" 406 | for region, langs in { 407 | "CN": ["zh-cn", "zh-hans", "zh"], 408 | "TW": ["zh-tw", "zh-hant"], 409 | "JP": ["ja"], 410 | "KR": ["ko"], 411 | }.items() 412 | for la in langs 413 | }, 414 | } 415 | font_name = LANG_NAME_MAP.get(lang, "GoNotoKurrent-Regular.ttf") 416 | 417 | # docker 418 | font_path = ConfigManager.get("NOTO_FONT_PATH", Path("/app", font_name).as_posix()) 419 | if not Path(font_path).exists(): 420 | font_path, _ = get_font_and_metadata(font_name) 421 | font_path = font_path.as_posix() 422 | 423 | logger.info(f"use font: {font_path}") 424 | 425 | return font_path 426 | -------------------------------------------------------------------------------- /pdf2zh/mcp_server.py: -------------------------------------------------------------------------------- 1 | from mcp.server import Server 2 | from mcp.server.fastmcp import FastMCP, Context 3 | from mcp.server.sse import SseServerTransport 4 | from starlette.applications import Starlette 5 | from starlette.requests import Request 6 | from starlette.routing import Mount, Route 7 | from pdf2zh import translate_stream 8 | from pdf2zh.doclayout import ModelInstance 9 | from pathlib import Path 10 | 11 | import contextlib 12 | import io 13 | import os 14 | 15 | 16 | def create_mcp_app() -> FastMCP: 17 | mcp = FastMCP("pdf2zh") 18 | 19 | @mcp.tool() 20 | async def translate_pdf( 21 | file: str, lang_in: str, lang_out: str, ctx: Context 22 | ) -> str: 23 | """ 24 | translate given pdf. Argument `file` is absolute path of input pdf, 25 | `lang_in` and `lang_out` is translate from and to language, and 26 | should be like google translate lang_code. `lang_in` can be `auto` 27 | if you can't determine input language. 28 | """ 29 | 30 | with open(file, "rb") as f: 31 | file_bytes = f.read() 32 | await ctx.log(level="info", message=f"start translate {file}") 33 | with contextlib.redirect_stdout(io.StringIO()): 34 | doc_mono_bytes, doc_dual_bytes = translate_stream( 35 | file_bytes, 36 | lang_in=lang_in, 37 | lang_out=lang_out, 38 | service="google", 39 | model=ModelInstance.value, 40 | thread=4, 41 | ) 42 | await ctx.log(level="info", message="translate complete") 43 | output_path = Path(os.path.dirname(file)) 44 | filename = os.path.splitext(os.path.basename(file))[0] 45 | doc_mono = output_path / f"{filename}-mono.pdf" 46 | doc_dual = output_path / f"{filename}-dual.pdf" 47 | with open(doc_mono, "wb") as f: 48 | f.write(doc_mono_bytes) 49 | with open(doc_dual, "wb") as f: 50 | f.write(doc_dual_bytes) 51 | return f"""------------ 52 | translate complete 53 | mono pdf file: {doc_mono.absolute()} 54 | dual pdf file: {doc_dual.absolute()} 55 | """ 56 | 57 | return mcp 58 | 59 | 60 | def create_starlette_app(mcp_server: Server, *, debug: bool = False) -> Starlette: 61 | sse = SseServerTransport("/messages/") 62 | 63 | async def handle_sse(request: Request) -> None: 64 | async with sse.connect_sse(request.scope, request.receive, request._send) as ( 65 | read_stream, 66 | write_stream, 67 | ): 68 | await mcp_server.run( 69 | read_stream, write_stream, mcp_server.create_initialization_options() 70 | ) 71 | 72 | return Starlette( 73 | debug=debug, 74 | routes=[ 75 | Route("/sse", endpoint=handle_sse), 76 | Mount("/messages/", app=sse.handle_post_message), 77 | ], 78 | ) 79 | 80 | 81 | if __name__ == "__main__": 82 | import argparse 83 | 84 | mcp = create_mcp_app() 85 | mcp_server = mcp._mcp_server 86 | parser = argparse.ArgumentParser(description="Run MCP SSE-based PDF2ZH server") 87 | 88 | parser.add_argument( 89 | "--sse", 90 | default=False, 91 | action="store_true", 92 | help="Run the server with SSE transport or STDIO", 93 | ) 94 | parser.add_argument( 95 | "--host", type=str, default="127.0.0.1", required=False, help="Host to bind" 96 | ) 97 | parser.add_argument( 98 | "--port", type=int, default=3001, required=False, help="Port to bind" 99 | ) 100 | 101 | args = parser.parse_args() 102 | if args.sse and args.host and args.port: 103 | import uvicorn 104 | 105 | starlette_app = create_starlette_app(mcp_server, debug=True) 106 | uvicorn.run(starlette_app, host=args.host, port=args.port) 107 | else: 108 | mcp.run() 109 | -------------------------------------------------------------------------------- /pdf2zh/pdf2zh.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """A command line tool for extracting text and images from PDF and 3 | output it to plain text, html, xml or tags. 4 | """ 5 | 6 | from __future__ import annotations 7 | 8 | import argparse 9 | import logging 10 | import sys 11 | from string import Template 12 | from typing import List, Optional 13 | 14 | from pdf2zh import __version__, log 15 | from pdf2zh.high_level import translate, download_remote_fonts 16 | from pdf2zh.doclayout import OnnxModel, ModelInstance 17 | import os 18 | 19 | from pdf2zh.config import ConfigManager 20 | from babeldoc.translation_config import TranslationConfig as YadtConfig 21 | from babeldoc.high_level import async_translate as yadt_translate 22 | from babeldoc.high_level import init as yadt_init 23 | from babeldoc.main import create_progress_handler 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | def create_parser() -> argparse.ArgumentParser: 29 | parser = argparse.ArgumentParser(description=__doc__, add_help=True) 30 | parser.add_argument( 31 | "files", 32 | type=str, 33 | default=None, 34 | nargs="*", 35 | help="One or more paths to PDF files.", 36 | ) 37 | parser.add_argument( 38 | "--version", 39 | "-v", 40 | action="version", 41 | version=f"pdf2zh v{__version__}", 42 | ) 43 | parser.add_argument( 44 | "--debug", 45 | "-d", 46 | default=False, 47 | action="store_true", 48 | help="Use debug logging level.", 49 | ) 50 | parse_params = parser.add_argument_group( 51 | "Parser", 52 | description="Used during PDF parsing", 53 | ) 54 | parse_params.add_argument( 55 | "--pages", 56 | "-p", 57 | type=str, 58 | help="The list of page numbers to parse.", 59 | ) 60 | parse_params.add_argument( 61 | "--vfont", 62 | "-f", 63 | type=str, 64 | default="", 65 | help="The regex to math font name of formula.", 66 | ) 67 | parse_params.add_argument( 68 | "--vchar", 69 | "-c", 70 | type=str, 71 | default="", 72 | help="The regex to math character of formula.", 73 | ) 74 | parse_params.add_argument( 75 | "--lang-in", 76 | "-li", 77 | type=str, 78 | default="en", 79 | help="The code of source language.", 80 | ) 81 | parse_params.add_argument( 82 | "--lang-out", 83 | "-lo", 84 | type=str, 85 | default="zh", 86 | help="The code of target language.", 87 | ) 88 | parse_params.add_argument( 89 | "--service", 90 | "-s", 91 | type=str, 92 | default="google", 93 | help="The service to use for translation.", 94 | ) 95 | parse_params.add_argument( 96 | "--output", 97 | "-o", 98 | type=str, 99 | default="", 100 | help="Output directory for files.", 101 | ) 102 | parse_params.add_argument( 103 | "--thread", 104 | "-t", 105 | type=int, 106 | default=4, 107 | help="The number of threads to execute translation.", 108 | ) 109 | parse_params.add_argument( 110 | "--interactive", 111 | "-i", 112 | action="store_true", 113 | help="Interact with GUI.", 114 | ) 115 | parse_params.add_argument( 116 | "--share", 117 | action="store_true", 118 | help="Enable Gradio Share", 119 | ) 120 | parse_params.add_argument( 121 | "--flask", 122 | action="store_true", 123 | help="flask", 124 | ) 125 | parse_params.add_argument( 126 | "--celery", 127 | action="store_true", 128 | help="celery", 129 | ) 130 | parse_params.add_argument( 131 | "--authorized", 132 | type=str, 133 | nargs="+", 134 | help="user name and password.", 135 | ) 136 | parse_params.add_argument( 137 | "--prompt", 138 | type=str, 139 | help="user custom prompt.", 140 | ) 141 | 142 | parse_params.add_argument( 143 | "--compatible", 144 | "-cp", 145 | action="store_true", 146 | help="Convert the PDF file into PDF/A format to improve compatibility.", 147 | ) 148 | 149 | parse_params.add_argument( 150 | "--onnx", 151 | type=str, 152 | help="custom onnx model path.", 153 | ) 154 | 155 | parse_params.add_argument( 156 | "--serverport", 157 | type=int, 158 | help="custom WebUI port.", 159 | ) 160 | 161 | parse_params.add_argument( 162 | "--dir", 163 | action="store_true", 164 | help="translate directory.", 165 | ) 166 | 167 | parse_params.add_argument( 168 | "--config", 169 | type=str, 170 | help="config file.", 171 | ) 172 | 173 | parse_params.add_argument( 174 | "--babeldoc", 175 | default=False, 176 | action="store_true", 177 | help="Use experimental backend babeldoc.", 178 | ) 179 | 180 | parse_params.add_argument( 181 | "--skip-subset-fonts", 182 | action="store_true", 183 | help="Skip font subsetting. " 184 | "This option can improve compatibility " 185 | "but will increase the size of the output file.", 186 | ) 187 | 188 | parse_params.add_argument( 189 | "--ignore-cache", 190 | action="store_true", 191 | help="Ignore cache and force retranslation.", 192 | ) 193 | 194 | parse_params.add_argument( 195 | "--mcp", action="store_true", help="Launch pdf2zh MCP server in STDIO mode" 196 | ) 197 | 198 | parse_params.add_argument( 199 | "--sse", action="store_true", help="Launch pdf2zh MCP server in SSE mode" 200 | ) 201 | 202 | return parser 203 | 204 | 205 | def parse_args(args: Optional[List[str]]) -> argparse.Namespace: 206 | parsed_args = create_parser().parse_args(args=args) 207 | 208 | if parsed_args.pages: 209 | pages = [] 210 | for p in parsed_args.pages.split(","): 211 | if "-" in p: 212 | start, end = p.split("-") 213 | pages.extend(range(int(start) - 1, int(end))) 214 | else: 215 | pages.append(int(p) - 1) 216 | parsed_args.raw_pages = parsed_args.pages 217 | parsed_args.pages = pages 218 | 219 | return parsed_args 220 | 221 | 222 | def find_all_files_in_directory(directory_path): 223 | """ 224 | Recursively search all PDF files in the given directory and return their paths as a list. 225 | 226 | :param directory_path: str, the path to the directory to search 227 | :return: list of PDF file paths 228 | """ 229 | # Check if the provided path is a directory 230 | if not os.path.isdir(directory_path): 231 | raise ValueError(f"The provided path '{directory_path}' is not a directory.") 232 | 233 | file_paths = [] 234 | 235 | # Walk through the directory recursively 236 | for root, _, files in os.walk(directory_path): 237 | for file in files: 238 | # Check if the file is a PDF 239 | if file.lower().endswith(".pdf"): 240 | # Append the full file path to the list 241 | file_paths.append(os.path.join(root, file)) 242 | 243 | return file_paths 244 | 245 | 246 | def main(args: Optional[List[str]] = None) -> int: 247 | from rich.logging import RichHandler 248 | 249 | logging.basicConfig(level=logging.INFO, handlers=[RichHandler()]) 250 | 251 | # disable httpx, openai, httpcore, http11 logs 252 | logging.getLogger("httpx").setLevel("CRITICAL") 253 | logging.getLogger("httpx").propagate = False 254 | logging.getLogger("openai").setLevel("CRITICAL") 255 | logging.getLogger("openai").propagate = False 256 | logging.getLogger("httpcore").setLevel("CRITICAL") 257 | logging.getLogger("httpcore").propagate = False 258 | logging.getLogger("http11").setLevel("CRITICAL") 259 | logging.getLogger("http11").propagate = False 260 | 261 | parsed_args = parse_args(args) 262 | 263 | if parsed_args.config: 264 | ConfigManager.custome_config(parsed_args.config) 265 | 266 | if parsed_args.debug: 267 | log.setLevel(logging.DEBUG) 268 | 269 | if parsed_args.onnx: 270 | ModelInstance.value = OnnxModel(parsed_args.onnx) 271 | else: 272 | ModelInstance.value = OnnxModel.load_available() 273 | 274 | if parsed_args.interactive: 275 | from pdf2zh.gui import setup_gui 276 | 277 | if parsed_args.serverport: 278 | setup_gui( 279 | parsed_args.share, parsed_args.authorized, int(parsed_args.serverport) 280 | ) 281 | else: 282 | setup_gui(parsed_args.share, parsed_args.authorized) 283 | return 0 284 | 285 | if parsed_args.flask: 286 | from pdf2zh.backend import flask_app 287 | 288 | flask_app.run(port=11008) 289 | return 0 290 | 291 | if parsed_args.celery: 292 | from pdf2zh.backend import celery_app 293 | 294 | celery_app.start(argv=sys.argv[2:]) 295 | return 0 296 | 297 | if parsed_args.prompt: 298 | try: 299 | with open(parsed_args.prompt, "r", encoding="utf-8") as file: 300 | content = file.read() 301 | parsed_args.prompt = Template(content) 302 | except Exception: 303 | raise ValueError("prompt error.") 304 | 305 | if parsed_args.mcp: 306 | logging.getLogger("mcp").setLevel(logging.ERROR) 307 | from pdf2zh.mcp_server import create_mcp_app, create_starlette_app 308 | 309 | mcp = create_mcp_app() 310 | if parsed_args.sse: 311 | import uvicorn 312 | 313 | starlette_app = create_starlette_app(mcp._mcp_server) 314 | uvicorn.run(starlette_app) 315 | return 0 316 | mcp.run() 317 | return 0 318 | 319 | print(parsed_args) 320 | if parsed_args.babeldoc: 321 | return yadt_main(parsed_args) 322 | if parsed_args.dir: 323 | untranlate_file = find_all_files_in_directory(parsed_args.files[0]) 324 | parsed_args.files = untranlate_file 325 | translate(model=ModelInstance.value, **vars(parsed_args)) 326 | return 0 327 | 328 | translate(model=ModelInstance.value, **vars(parsed_args)) 329 | return 0 330 | 331 | 332 | def yadt_main(parsed_args) -> int: 333 | if parsed_args.dir: 334 | untranlate_file = find_all_files_in_directory(parsed_args.files[0]) 335 | else: 336 | untranlate_file = parsed_args.files 337 | lang_in = parsed_args.lang_in 338 | lang_out = parsed_args.lang_out 339 | ignore_cache = parsed_args.ignore_cache 340 | outputdir = None 341 | if parsed_args.output: 342 | outputdir = parsed_args.output 343 | 344 | # yadt require init before translate 345 | yadt_init() 346 | font_path = download_remote_fonts(lang_out.lower()) 347 | 348 | param = parsed_args.service.split(":", 1) 349 | service_name = param[0] 350 | service_model = param[1] if len(param) > 1 else None 351 | 352 | envs = {} 353 | prompt = [] 354 | 355 | if parsed_args.prompt: 356 | try: 357 | with open(parsed_args.prompt, "r", encoding="utf-8") as file: 358 | content = file.read() 359 | prompt = Template(content) 360 | except Exception: 361 | raise ValueError("prompt error.") 362 | 363 | from pdf2zh.translator import ( 364 | AzureOpenAITranslator, 365 | GoogleTranslator, 366 | BingTranslator, 367 | DeepLTranslator, 368 | DeepLXTranslator, 369 | OllamaTranslator, 370 | OpenAITranslator, 371 | ZhipuTranslator, 372 | ModelScopeTranslator, 373 | SiliconTranslator, 374 | GeminiTranslator, 375 | AzureTranslator, 376 | TencentTranslator, 377 | DifyTranslator, 378 | AnythingLLMTranslator, 379 | XinferenceTranslator, 380 | ArgosTranslator, 381 | GrokTranslator, 382 | GroqTranslator, 383 | DeepseekTranslator, 384 | OpenAIlikedTranslator, 385 | QwenMtTranslator, 386 | ) 387 | 388 | for translator in [ 389 | GoogleTranslator, 390 | BingTranslator, 391 | DeepLTranslator, 392 | DeepLXTranslator, 393 | OllamaTranslator, 394 | XinferenceTranslator, 395 | AzureOpenAITranslator, 396 | OpenAITranslator, 397 | ZhipuTranslator, 398 | ModelScopeTranslator, 399 | SiliconTranslator, 400 | GeminiTranslator, 401 | AzureTranslator, 402 | TencentTranslator, 403 | DifyTranslator, 404 | AnythingLLMTranslator, 405 | ArgosTranslator, 406 | GrokTranslator, 407 | GroqTranslator, 408 | DeepseekTranslator, 409 | OpenAIlikedTranslator, 410 | QwenMtTranslator, 411 | ]: 412 | if service_name == translator.name: 413 | translator = translator( 414 | lang_in, 415 | lang_out, 416 | service_model, 417 | envs=envs, 418 | prompt=prompt, 419 | ignore_cache=ignore_cache, 420 | ) 421 | break 422 | else: 423 | raise ValueError("Unsupported translation service") 424 | import asyncio 425 | 426 | for file in untranlate_file: 427 | file = file.strip("\"'") 428 | yadt_config = YadtConfig( 429 | input_file=file, 430 | font=font_path, 431 | pages=",".join((str(x) for x in getattr(parsed_args, "raw_pages", []))), 432 | output_dir=outputdir, 433 | doc_layout_model=None, 434 | translator=translator, 435 | debug=parsed_args.debug, 436 | lang_in=lang_in, 437 | lang_out=lang_out, 438 | no_dual=False, 439 | no_mono=False, 440 | qps=parsed_args.thread, 441 | ) 442 | 443 | async def yadt_translate_coro(yadt_config): 444 | progress_context, progress_handler = create_progress_handler(yadt_config) 445 | # 开始翻译 446 | with progress_context: 447 | async for event in yadt_translate(yadt_config): 448 | progress_handler(event) 449 | if yadt_config.debug: 450 | logger.debug(event) 451 | if event["type"] == "finish": 452 | result = event["translate_result"] 453 | logger.info("Translation Result:") 454 | logger.info(f" Original PDF: {result.original_pdf_path}") 455 | logger.info(f" Time Cost: {result.total_seconds:.2f}s") 456 | logger.info(f" Mono PDF: {result.mono_pdf_path or 'None'}") 457 | logger.info(f" Dual PDF: {result.dual_pdf_path or 'None'}") 458 | break 459 | 460 | asyncio.run(yadt_translate_coro(yadt_config)) 461 | return 0 462 | 463 | 464 | if __name__ == "__main__": 465 | sys.exit(main()) 466 | -------------------------------------------------------------------------------- /pdf2zh/pdfinterp.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Dict, Optional, Sequence, Tuple, cast 3 | import numpy as np 4 | 5 | from pdfminer import settings 6 | from pdfminer.pdfcolor import PREDEFINED_COLORSPACE, PDFColorSpace 7 | from pdfminer.pdfdevice import PDFDevice 8 | from pdfminer.pdfinterp import ( 9 | PDFPageInterpreter, 10 | PDFResourceManager, 11 | PDFContentParser, 12 | PDFInterpreterError, 13 | Color, 14 | PDFStackT, 15 | LITERAL_FORM, 16 | LITERAL_IMAGE, 17 | ) 18 | from pdfminer.pdffont import PDFFont 19 | from pdfminer.pdfpage import PDFPage 20 | from pdfminer.pdftypes import ( 21 | PDFObjRef, 22 | dict_value, 23 | list_value, 24 | resolve1, 25 | stream_value, 26 | ) 27 | from pdfminer.psexceptions import PSEOF 28 | from pdfminer.psparser import ( 29 | PSKeyword, 30 | keyword_name, 31 | literal_name, 32 | ) 33 | from pdfminer.utils import ( 34 | MATRIX_IDENTITY, 35 | Matrix, 36 | Rect, 37 | mult_matrix, 38 | apply_matrix_pt, 39 | ) 40 | 41 | log = logging.getLogger(__name__) 42 | 43 | 44 | def safe_float(o: Any) -> Optional[float]: 45 | try: 46 | return float(o) 47 | except (TypeError, ValueError): 48 | return None 49 | 50 | 51 | class PDFPageInterpreterEx(PDFPageInterpreter): 52 | """Processor for the content of a PDF page 53 | 54 | Reference: PDF Reference, Appendix A, Operator Summary 55 | """ 56 | 57 | def __init__( 58 | self, rsrcmgr: PDFResourceManager, device: PDFDevice, obj_patch 59 | ) -> None: 60 | self.rsrcmgr = rsrcmgr 61 | self.device = device 62 | self.obj_patch = obj_patch 63 | 64 | def dup(self) -> "PDFPageInterpreterEx": 65 | return self.__class__(self.rsrcmgr, self.device, self.obj_patch) 66 | 67 | def init_resources(self, resources: Dict[object, object]) -> None: 68 | # 重载设置 fontid 和 descent 69 | """Prepare the fonts and XObjects listed in the Resource attribute.""" 70 | self.resources = resources 71 | self.fontmap: Dict[object, PDFFont] = {} 72 | self.fontid: Dict[PDFFont, object] = {} 73 | self.xobjmap = {} 74 | self.csmap: Dict[str, PDFColorSpace] = PREDEFINED_COLORSPACE.copy() 75 | if not resources: 76 | return 77 | 78 | def get_colorspace(spec: object) -> Optional[PDFColorSpace]: 79 | if isinstance(spec, list): 80 | name = literal_name(spec[0]) 81 | else: 82 | name = literal_name(spec) 83 | if name == "ICCBased" and isinstance(spec, list) and len(spec) >= 2: 84 | return PDFColorSpace(name, stream_value(spec[1])["N"]) 85 | elif name == "DeviceN" and isinstance(spec, list) and len(spec) >= 2: 86 | return PDFColorSpace(name, len(list_value(spec[1]))) 87 | else: 88 | return PREDEFINED_COLORSPACE.get(name) 89 | 90 | for k, v in dict_value(resources).items(): 91 | # log.debug("Resource: %r: %r", k, v) 92 | if k == "Font": 93 | for fontid, spec in dict_value(v).items(): 94 | objid = None 95 | if isinstance(spec, PDFObjRef): 96 | objid = spec.objid 97 | spec = dict_value(spec) 98 | self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec) 99 | self.fontmap[fontid].descent = 0 # hack fix descent 100 | self.fontid[self.fontmap[fontid]] = fontid 101 | elif k == "ColorSpace": 102 | for csid, spec in dict_value(v).items(): 103 | colorspace = get_colorspace(resolve1(spec)) 104 | if colorspace is not None: 105 | self.csmap[csid] = colorspace 106 | elif k == "ProcSet": 107 | self.rsrcmgr.get_procset(list_value(v)) 108 | elif k == "XObject": 109 | for xobjid, xobjstrm in dict_value(v).items(): 110 | self.xobjmap[xobjid] = xobjstrm 111 | 112 | def do_S(self) -> None: 113 | # 重载过滤非公式线条 114 | """Stroke path""" 115 | 116 | def is_black(color: Color) -> bool: 117 | if isinstance(color, Tuple): 118 | return sum(color) == 0 119 | else: 120 | return color == 0 121 | 122 | if ( 123 | len(self.curpath) == 2 124 | and self.curpath[0][0] == "m" 125 | and self.curpath[1][0] == "l" 126 | and apply_matrix_pt(self.ctm, self.curpath[0][-2:])[1] 127 | == apply_matrix_pt(self.ctm, self.curpath[1][-2:])[1] 128 | and is_black(self.graphicstate.scolor) 129 | ): # 独立直线,水平,黑色 130 | # print(apply_matrix_pt(self.ctm,self.curpath[0][-2:]),apply_matrix_pt(self.ctm,self.curpath[1][-2:]),self.graphicstate.scolor) 131 | self.device.paint_path(self.graphicstate, True, False, False, self.curpath) 132 | self.curpath = [] 133 | return "n" 134 | else: 135 | self.curpath = [] 136 | 137 | ############################################################ 138 | # 重载过滤非公式线条(F/B) 139 | def do_f(self) -> None: 140 | """Fill path using nonzero winding number rule""" 141 | # self.device.paint_path(self.graphicstate, False, True, False, self.curpath) 142 | self.curpath = [] 143 | 144 | def do_F(self) -> None: 145 | """Fill path using nonzero winding number rule (obsolete)""" 146 | 147 | def do_f_a(self) -> None: 148 | """Fill path using even-odd rule""" 149 | # self.device.paint_path(self.graphicstate, False, True, True, self.curpath) 150 | self.curpath = [] 151 | 152 | def do_B(self) -> None: 153 | """Fill and stroke path using nonzero winding number rule""" 154 | # self.device.paint_path(self.graphicstate, True, True, False, self.curpath) 155 | self.curpath = [] 156 | 157 | def do_B_a(self) -> None: 158 | """Fill and stroke path using even-odd rule""" 159 | # self.device.paint_path(self.graphicstate, True, True, True, self.curpath) 160 | self.curpath = [] 161 | 162 | ############################################################ 163 | # 重载返回调用参数(SCN) 164 | def do_SCN(self) -> None: 165 | """Set color for stroking operations.""" 166 | if self.scs: 167 | n = self.scs.ncomponents 168 | else: 169 | if settings.STRICT: 170 | raise PDFInterpreterError("No colorspace specified!") 171 | n = 1 172 | args = self.pop(n) 173 | self.graphicstate.scolor = cast(Color, args) 174 | return args 175 | 176 | def do_scn(self) -> None: 177 | """Set color for nonstroking operations""" 178 | if self.ncs: 179 | n = self.ncs.ncomponents 180 | else: 181 | if settings.STRICT: 182 | raise PDFInterpreterError("No colorspace specified!") 183 | n = 1 184 | args = self.pop(n) 185 | self.graphicstate.ncolor = cast(Color, args) 186 | return args 187 | 188 | def do_SC(self) -> None: 189 | """Set color for stroking operations""" 190 | return self.do_SCN() 191 | 192 | def do_sc(self) -> None: 193 | """Set color for nonstroking operations""" 194 | return self.do_scn() 195 | 196 | def do_Do(self, xobjid_arg: PDFStackT) -> None: 197 | # 重载设置 xobj 的 obj_patch 198 | """Invoke named XObject""" 199 | xobjid = literal_name(xobjid_arg) 200 | try: 201 | xobj = stream_value(self.xobjmap[xobjid]) 202 | except KeyError: 203 | if settings.STRICT: 204 | raise PDFInterpreterError("Undefined xobject id: %r" % xobjid) 205 | return 206 | # log.debug("Processing xobj: %r", xobj) 207 | subtype = xobj.get("Subtype") 208 | if subtype is LITERAL_FORM and "BBox" in xobj: 209 | interpreter = self.dup() 210 | bbox = cast(Rect, list_value(xobj["BBox"])) 211 | matrix = cast(Matrix, list_value(xobj.get("Matrix", MATRIX_IDENTITY))) 212 | # According to PDF reference 1.7 section 4.9.1, XObjects in 213 | # earlier PDFs (prior to v1.2) use the page's Resources entry 214 | # instead of having their own Resources entry. 215 | xobjres = xobj.get("Resources") 216 | if xobjres: 217 | resources = dict_value(xobjres) 218 | else: 219 | resources = self.resources.copy() 220 | self.device.begin_figure(xobjid, bbox, matrix) 221 | ctm = mult_matrix(matrix, self.ctm) 222 | ops_base = interpreter.render_contents( 223 | resources, 224 | [xobj], 225 | ctm=ctm, 226 | ) 227 | self.ncs = interpreter.ncs 228 | self.scs = interpreter.scs 229 | try: # 有的时候 form 字体加不上这里会烂掉 230 | self.device.fontid = interpreter.fontid 231 | self.device.fontmap = interpreter.fontmap 232 | ops_new = self.device.end_figure(xobjid) 233 | ctm_inv = np.linalg.inv(np.array(ctm[:4]).reshape(2, 2)) 234 | np_version = np.__version__ 235 | if np_version.split(".")[0] >= "2": 236 | pos_inv = -np.asmatrix(ctm[4:]) * ctm_inv 237 | else: 238 | pos_inv = -np.mat(ctm[4:]) * ctm_inv 239 | a, b, c, d = ctm_inv.reshape(4).tolist() 240 | e, f = pos_inv.tolist()[0] 241 | self.obj_patch[self.xobjmap[xobjid].objid] = ( 242 | f"q {ops_base}Q {a} {b} {c} {d} {e} {f} cm {ops_new}" 243 | ) 244 | except Exception: 245 | pass 246 | elif subtype is LITERAL_IMAGE and "Width" in xobj and "Height" in xobj: 247 | self.device.begin_figure(xobjid, (0, 0, 1, 1), MATRIX_IDENTITY) 248 | self.device.render_image(xobjid, xobj) 249 | self.device.end_figure(xobjid) 250 | else: 251 | # unsupported xobject type. 252 | pass 253 | 254 | def process_page(self, page: PDFPage) -> None: 255 | # 重载设置 page 的 obj_patch 256 | # log.debug("Processing page: %r", page) 257 | # print(page.mediabox,page.cropbox) 258 | # (x0, y0, x1, y1) = page.mediabox 259 | (x0, y0, x1, y1) = page.cropbox 260 | if page.rotate == 90: 261 | ctm = (0, -1, 1, 0, -y0, x1) 262 | elif page.rotate == 180: 263 | ctm = (-1, 0, 0, -1, x1, y1) 264 | elif page.rotate == 270: 265 | ctm = (0, 1, -1, 0, y1, -x0) 266 | else: 267 | ctm = (1, 0, 0, 1, -x0, -y0) 268 | self.device.begin_page(page, ctm) 269 | ops_base = self.render_contents(page.resources, page.contents, ctm=ctm) 270 | self.device.fontid = self.fontid 271 | self.device.fontmap = self.fontmap 272 | ops_new = self.device.end_page(page) 273 | # 上面渲染的时候会根据 cropbox 减掉页面偏移得到真实坐标,这里输出的时候需要用 cm 把页面偏移加回来 274 | self.obj_patch[page.page_xref] = ( 275 | f"q {ops_base}Q 1 0 0 1 {x0} {y0} cm {ops_new}" # ops_base 里可能有图,需要让 ops_new 里的文字覆盖在上面,使用 q/Q 重置位置矩阵 276 | ) 277 | for obj in page.contents: 278 | self.obj_patch[obj.objid] = "" 279 | 280 | def render_contents( 281 | self, 282 | resources: Dict[object, object], 283 | streams: Sequence[object], 284 | ctm: Matrix = MATRIX_IDENTITY, 285 | ) -> None: 286 | # 重载返回指令流 287 | """Render the content streams. 288 | 289 | This method may be called recursively. 290 | """ 291 | # log.debug( 292 | # "render_contents: resources=%r, streams=%r, ctm=%r", 293 | # resources, 294 | # streams, 295 | # ctm, 296 | # ) 297 | self.init_resources(resources) 298 | self.init_state(ctm) 299 | return self.execute(list_value(streams)) 300 | 301 | def execute(self, streams: Sequence[object]) -> None: 302 | # 重载返回指令流 303 | ops = "" 304 | try: 305 | parser = PDFContentParser(streams) 306 | except PSEOF: 307 | # empty page 308 | return 309 | while True: 310 | try: 311 | (_, obj) = parser.nextobject() 312 | except PSEOF: 313 | break 314 | if isinstance(obj, PSKeyword): 315 | name = keyword_name(obj) 316 | method = "do_%s" % name.replace("*", "_a").replace('"', "_w").replace( 317 | "'", 318 | "_q", 319 | ) 320 | if hasattr(self, method): 321 | func = getattr(self, method) 322 | nargs = func.__code__.co_argcount - 1 323 | if nargs: 324 | args = self.pop(nargs) 325 | # log.debug("exec: %s %r", name, args) 326 | if len(args) == nargs: 327 | func(*args) 328 | if not ( 329 | name[0] == "T" 330 | or name in ['"', "'", "EI", "MP", "DP", "BMC", "BDC"] 331 | ): # 过滤 T 系列文字指令,因为 EI 的参数是 obj 所以也需要过滤(只在少数文档中画横线时使用),过滤 marked 系列指令 332 | p = " ".join( 333 | [ 334 | ( 335 | f"{x:f}" 336 | if isinstance(x, float) 337 | else str(x).replace("'", "") 338 | ) 339 | for x in args 340 | ] 341 | ) 342 | ops += f"{p} {name} " 343 | else: 344 | # log.debug("exec: %s", name) 345 | targs = func() 346 | if targs is None: 347 | targs = [] 348 | if not (name[0] == "T" or name in ["BI", "ID", "EMC"]): 349 | p = " ".join( 350 | [ 351 | ( 352 | f"{x:f}" 353 | if isinstance(x, float) 354 | else str(x).replace("'", "") 355 | ) 356 | for x in targs 357 | ] 358 | ) 359 | ops += f"{p} {name} " 360 | elif settings.STRICT: 361 | error_msg = "Unknown operator: %r" % name 362 | raise PDFInterpreterError(error_msg) 363 | else: 364 | self.push(obj) 365 | # print('REV DATA',ops) 366 | return ops 367 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pdf2zh" 3 | version = "1.9.6" 4 | description = "Latex PDF Translator" 5 | authors = [{ name = "Byaidu", email = "byaidux@gmail.com" }] 6 | license = "AGPL-3.0" 7 | readme = "README.md" 8 | requires-python = ">=3.10,<3.13" 9 | classifiers = [ 10 | "Programming Language :: Python :: 3", 11 | "Operating System :: OS Independent", 12 | ] 13 | dependencies = [ 14 | "requests", 15 | # for arm64 linux whells 16 | "pymupdf<1.25.3", 17 | "tqdm", 18 | "tenacity", 19 | "numpy", 20 | "ollama", 21 | "xinference-client", 22 | "deepl", 23 | "openai>=1.0.0", 24 | "azure-ai-translation-text<=1.0.1", 25 | "gradio", 26 | "huggingface_hub", 27 | "onnx", 28 | "onnxruntime", 29 | "opencv-python-headless", 30 | "tencentcloud-sdk-python-tmt", 31 | "pdfminer.six>=20240706", 32 | "gradio_pdf>=0.0.21", 33 | "pikepdf", 34 | "peewee>=3.17.8", 35 | "fontTools", 36 | "babeldoc>=0.1.22, <0.3.0", 37 | "rich", 38 | ] 39 | 40 | [project.optional-dependencies] 41 | backend = [ 42 | "flask", 43 | "celery", 44 | "redis" 45 | ] 46 | argostranslate = [ 47 | "argostranslate" 48 | ] 49 | mcp = [ 50 | "mcp>=1.6.0", 51 | ] 52 | 53 | [dependency-groups] 54 | dev = [ 55 | "black", 56 | "flake8", 57 | "pre-commit", 58 | "pytest", 59 | "build", 60 | "bumpver>=2024.1130", 61 | ] 62 | 63 | [project.urls] 64 | Homepage = "https://github.com/Byaidu/PDFMathTranslate" 65 | 66 | [build-system] 67 | requires = ["hatchling"] 68 | build-backend = "hatchling.build" 69 | 70 | [project.scripts] 71 | pdf2zh = "pdf2zh.pdf2zh:main" 72 | 73 | [tool.flake8] 74 | ignore = ["E203", "E261", "E501", "W503", "E741"] 75 | max-line-length = 88 76 | 77 | 78 | 79 | [bumpver] 80 | current_version = "1.9.6" 81 | version_pattern = "MAJOR.MINOR.PATCH[.PYTAGNUM]" 82 | 83 | [bumpver.file_patterns] 84 | "pyproject.toml" = [ 85 | 'current_version = "{version}"', 86 | 'version = "{version}"' 87 | ] 88 | "pdf2zh/__init__.py" = [ 89 | '__version__ = "{version}"' 90 | ] 91 | -------------------------------------------------------------------------------- /script/Dockerfile.China: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim 2 | 3 | WORKDIR /app 4 | 5 | 6 | EXPOSE 7860 7 | 8 | ENV PYTHONUNBUFFERED=1 9 | ADD "https://ghgo.xyz/https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf" /app 10 | RUN apt-get update && \ 11 | apt-get install --no-install-recommends -y libgl1 && \ 12 | rm -rf /var/lib/apt/lists/* && uv pip install --system --no-cache huggingface-hub && \ 13 | python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download('wybxc/DocLayout-YOLO-DocStructBench-onnx','doclayout_yolo_docstructbench_imgsz1024.onnx');" 14 | 15 | COPY . . 16 | 17 | RUN uv pip install --system --no-cache . 18 | 19 | CMD ["pdf2zh", "-i"] 20 | -------------------------------------------------------------------------------- /script/Dockerfile.Demo: -------------------------------------------------------------------------------- 1 | FROM python:3.12 2 | 3 | WORKDIR /app 4 | 5 | COPY . . 6 | 7 | EXPOSE 7860 8 | 9 | ENV PYTHONUNBUFFERED=1 10 | 11 | RUN apt-get update && apt-get install -y libgl1 12 | 13 | RUN pip install . 14 | 15 | RUN mkdir -p /data 16 | RUN chmod 777 /data 17 | RUN mkdir -p /app 18 | RUN chmod 777 /app 19 | RUN mkdir -p /.cache 20 | RUN chmod 777 /.cache 21 | RUN mkdir -p ./gradio_files 22 | RUN chmod 777 ./gradio_files 23 | 24 | CMD ["pdf2zh", "-i"] 25 | -------------------------------------------------------------------------------- /script/_pystand_static.int: -------------------------------------------------------------------------------- 1 | import sys 2 | import pdf2zh.pdf2zh 3 | import os 4 | import babeldoc.assets.assets 5 | import pathlib 6 | 7 | WAIT_FOR_INPUT = False 8 | if len(sys.argv) == 1: 9 | sys.argv.append("-i") # 无参数时自动添加 -i 参数 10 | WAIT_FOR_INPUT = True 11 | 12 | files = os.listdir(os.path.dirname(__file__)) 13 | for file in files: 14 | if file.endswith(".zip") and file.startswith("offline_assets_"): 15 | print('find offline_assets_zip file: ', file, ' try restore...') 16 | babeldoc.assets.assets.restore_offline_assets_package(pathlib.Path(os.path.dirname(__file__))) 17 | 18 | try: 19 | code = pdf2zh.pdf2zh.main() 20 | print(f"pdf2zh.pdf2zh.main() return code: {code}") 21 | if WAIT_FOR_INPUT: 22 | input("Press Enter to continue...") 23 | sys.exit(code) 24 | except Exception: 25 | import traceback 26 | traceback.print_exc() 27 | if WAIT_FOR_INPUT: 28 | input("Press Enter to continue...") 29 | sys.exit(1) -------------------------------------------------------------------------------- /script/setup.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enabledelayedexpansion 3 | 4 | set PYTHON_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-embed-amd64.zip 5 | set PIP_URL=https://bootstrap.pypa.io/get-pip.py 6 | set HF_ENDPOINT=https://hf-mirror.com 7 | set PIP_MIRROR=https://mirrors.aliyun.com/pypi/simple 8 | 9 | if not exist pdf2zh_dist/python.exe ( 10 | powershell -Command "& {Invoke-WebRequest -Uri !PYTHON_URL! -OutFile python.zip}" 11 | powershell -Command "& {Expand-Archive -Path python.zip -DestinationPath pdf2zh_dist -Force}" 12 | del python.zip 13 | echo import site >> pdf2zh_dist/python312._pth 14 | ) 15 | cd pdf2zh_dist 16 | 17 | if not exist Scripts/pip.exe ( 18 | powershell -Command "& {Invoke-WebRequest -Uri !PIP_URL! -OutFile get-pip.py}" 19 | python get-pip.py 20 | ) 21 | path Scripts 22 | 23 | pip install --no-warn-script-location --upgrade setuptools -i !PIP_MIRROR! 24 | pip install --no-warn-script-location --upgrade pdf2zh -i !PIP_MIRROR! 25 | pdf2zh -i 26 | 27 | pause 28 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | ignore = E203,E261,E501,W503,E741 4 | exclude = .git,build,dist,docs -------------------------------------------------------------------------------- /test/file/translate.cli.font.unknown.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/test/file/translate.cli.font.unknown.pdf -------------------------------------------------------------------------------- /test/file/translate.cli.plain.text.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/test/file/translate.cli.plain.text.pdf -------------------------------------------------------------------------------- /test/file/translate.cli.text.with.figure.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Byaidu/PDFMathTranslate/02abebf091bb7bce73761113554b93027833a606/test/file/translate.cli.text.with.figure.pdf -------------------------------------------------------------------------------- /test/test_cache.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pdf2zh import cache 3 | import threading 4 | import multiprocessing 5 | import random 6 | import string 7 | 8 | 9 | class TestCache(unittest.TestCase): 10 | def setUp(self): 11 | self.test_db = cache.init_test_db() 12 | 13 | def tearDown(self): 14 | # Clean up 15 | cache.clean_test_db(self.test_db) 16 | 17 | def test_basic_set_get(self): 18 | """Test basic set and get operations""" 19 | cache_instance = cache.TranslationCache("test_engine") 20 | 21 | # Test get with non-existent entry 22 | result = cache_instance.get("hello") 23 | self.assertIsNone(result) 24 | 25 | # Test set and get 26 | cache_instance.set("hello", "你好") 27 | result = cache_instance.get("hello") 28 | self.assertEqual(result, "你好") 29 | 30 | def test_cache_overwrite(self): 31 | """Test that cache entries can be overwritten""" 32 | cache_instance = cache.TranslationCache("test_engine") 33 | 34 | # Set initial translation 35 | cache_instance.set("hello", "你好") 36 | 37 | # Overwrite with new translation 38 | cache_instance.set("hello", "您好") 39 | 40 | # Verify the new translation is returned 41 | result = cache_instance.get("hello") 42 | self.assertEqual(result, "您好") 43 | 44 | def test_non_string_params(self): 45 | """Test that non-string parameters are automatically converted to JSON""" 46 | params = {"model": "gpt-3.5", "temperature": 0.7} 47 | cache_instance = cache.TranslationCache("test_engine", params) 48 | 49 | # Test that params are converted to JSON string internally 50 | cache_instance.set("hello", "你好") 51 | result = cache_instance.get("hello") 52 | self.assertEqual(result, "你好") 53 | 54 | # Test with different param types 55 | array_params = ["param1", "param2"] 56 | cache_instance2 = cache.TranslationCache("test_engine", array_params) 57 | cache_instance2.set("hello", "你好2") 58 | self.assertEqual(cache_instance2.get("hello"), "你好2") 59 | 60 | # Test with nested structures 61 | nested_params = {"options": {"temp": 0.8, "models": ["a", "b"]}} 62 | cache_instance3 = cache.TranslationCache("test_engine", nested_params) 63 | cache_instance3.set("hello", "你好3") 64 | self.assertEqual(cache_instance3.get("hello"), "你好3") 65 | 66 | def test_engine_distinction(self): 67 | """Test that cache distinguishes between different translation engines""" 68 | cache1 = cache.TranslationCache("engine1") 69 | cache2 = cache.TranslationCache("engine2") 70 | 71 | # Set same text with different engines 72 | cache1.set("hello", "你好 1") 73 | cache2.set("hello", "你好 2") 74 | 75 | # Verify each engine gets its own translation 76 | self.assertEqual(cache1.get("hello"), "你好 1") 77 | self.assertEqual(cache2.get("hello"), "你好 2") 78 | 79 | def test_params_distinction(self): 80 | """Test that cache distinguishes between different engine parameters""" 81 | params1 = {"param": "value1"} 82 | params2 = {"param": "value2"} 83 | cache1 = cache.TranslationCache("test_engine", params1) 84 | cache2 = cache.TranslationCache("test_engine", params2) 85 | 86 | # Set same text with different parameters 87 | cache1.set("hello", "你好 1") 88 | cache2.set("hello", "你好 2") 89 | 90 | # Verify each parameter set gets its own translation 91 | self.assertEqual(cache1.get("hello"), "你好 1") 92 | self.assertEqual(cache2.get("hello"), "你好 2") 93 | 94 | def test_consistent_param_serialization(self): 95 | """Test that dictionary parameters are consistently serialized regardless of key order""" 96 | # Test simple dictionary 97 | params1 = {"b": 1, "a": 2} 98 | params2 = {"a": 2, "b": 1} 99 | cache1 = cache.TranslationCache("test_engine", params1) 100 | cache2 = cache.TranslationCache("test_engine", params2) 101 | self.assertEqual(cache1.translate_engine_params, cache2.translate_engine_params) 102 | 103 | # Test nested dictionary 104 | params1 = {"outer2": {"inner2": 2, "inner1": 1}, "outer1": 3} 105 | params2 = {"outer1": 3, "outer2": {"inner1": 1, "inner2": 2}} 106 | cache1 = cache.TranslationCache("test_engine", params1) 107 | cache2 = cache.TranslationCache("test_engine", params2) 108 | self.assertEqual(cache1.translate_engine_params, cache2.translate_engine_params) 109 | 110 | # Test dictionary with list of dictionaries 111 | params1 = {"b": [{"y": 1, "x": 2}], "a": 3} 112 | params2 = {"a": 3, "b": [{"x": 2, "y": 1}]} 113 | cache1 = cache.TranslationCache("test_engine", params1) 114 | cache2 = cache.TranslationCache("test_engine", params2) 115 | self.assertEqual(cache1.translate_engine_params, cache2.translate_engine_params) 116 | 117 | # Test that different values still produce different results 118 | params1 = {"a": 1, "b": 2} 119 | params2 = {"a": 2, "b": 1} 120 | cache1 = cache.TranslationCache("test_engine", params1) 121 | cache2 = cache.TranslationCache("test_engine", params2) 122 | self.assertNotEqual( 123 | cache1.translate_engine_params, cache2.translate_engine_params 124 | ) 125 | 126 | def test_cache_with_sorted_params(self): 127 | """Test that cache works correctly with sorted parameters""" 128 | params1 = {"b": [{"y": 1, "x": 2}], "a": 3} 129 | params2 = {"a": 3, "b": [{"x": 2, "y": 1}]} 130 | 131 | # Both caches should work with the same key 132 | cache1 = cache.TranslationCache("test_engine", params1) 133 | cache1.set("hello", "你好") 134 | 135 | cache2 = cache.TranslationCache("test_engine", params2) 136 | self.assertEqual(cache2.get("hello"), "你好") 137 | 138 | def test_append_params(self): 139 | """Test the append_params method""" 140 | cache_instance = cache.TranslationCache("test_engine", {"initial": "value"}) 141 | 142 | # Test appending new parameter 143 | cache_instance.add_params("new_param", "new_value") 144 | self.assertEqual( 145 | cache_instance.params, {"initial": "value", "new_param": "new_value"} 146 | ) 147 | 148 | # Test that cache with appended params works correctly 149 | cache_instance.set("hello", "你好") 150 | self.assertEqual(cache_instance.get("hello"), "你好") 151 | 152 | # Test overwriting existing parameter 153 | cache_instance.add_params("initial", "new_value") 154 | self.assertEqual( 155 | cache_instance.params, {"initial": "new_value", "new_param": "new_value"} 156 | ) 157 | 158 | # Cache should work with updated params 159 | cache_instance.set("hello2", "你好2") 160 | self.assertEqual(cache_instance.get("hello2"), "你好2") 161 | 162 | # Sometimes the problem of "database is locked" occurs. Temporarily disable this test. 163 | # def test_thread_safety(self): 164 | # """Test thread safety of cache operations""" 165 | # cache_instance = cache.TranslationCache("test_engine") 166 | # lock = threading.Lock() 167 | # results = [] 168 | # num_threads = multiprocessing.cpu_count() 169 | # items_per_thread = 100 170 | 171 | # def generate_random_text(length=10): 172 | # return "".join( 173 | # random.choices(string.ascii_letters + string.digits, k=length) 174 | # ) 175 | 176 | # def worker(): 177 | # thread_results = [] # 线程本地存储结果 178 | # for _ in range(items_per_thread): 179 | # text = generate_random_text() 180 | # translation = f"翻译_{text}" 181 | 182 | # # Write operation 183 | # cache_instance.set(text, translation) 184 | 185 | # # Read operation - verify our own write 186 | # result = cache_instance.get(text) 187 | # thread_results.append((text, result)) 188 | 189 | # # 所有操作完成后,一次性加锁并追加结果 190 | # with lock: 191 | # results.extend(thread_results) 192 | 193 | # # Create threads equal to CPU core count 194 | # threads = [] 195 | # for _ in range(num_threads): 196 | # thread = threading.Thread(target=worker) 197 | # threads.append(thread) 198 | # thread.start() 199 | 200 | # # Wait for all threads to complete 201 | # for thread in threads: 202 | # thread.join() 203 | 204 | # # Verify all operations were successful 205 | # expected_total = num_threads * items_per_thread 206 | # self.assertEqual(len(results), expected_total) 207 | 208 | # # Verify each thread got its correct value 209 | # for text, result in results: 210 | # expected = f"翻译_{text}" 211 | # self.assertEqual(result, expected) 212 | 213 | 214 | if __name__ == "__main__": 215 | unittest.main() 216 | -------------------------------------------------------------------------------- /test/test_converter.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import Mock, patch, MagicMock 3 | from pdfminer.layout import LTPage, LTChar, LTLine 4 | from pdfminer.pdfinterp import PDFResourceManager 5 | from pdf2zh.converter import PDFConverterEx, TranslateConverter 6 | 7 | 8 | class TestPDFConverterEx(unittest.TestCase): 9 | def setUp(self): 10 | self.rsrcmgr = PDFResourceManager() 11 | self.converter = PDFConverterEx(self.rsrcmgr) 12 | 13 | def test_begin_page(self): 14 | mock_page = Mock() 15 | mock_page.pageno = 1 16 | mock_page.cropbox = (0, 0, 100, 200) 17 | mock_ctm = [1, 0, 0, 1, 0, 0] 18 | self.converter.begin_page(mock_page, mock_ctm) 19 | self.assertIsNotNone(self.converter.cur_item) 20 | self.assertEqual(self.converter.cur_item.pageid, 1) 21 | 22 | def test_render_char(self): 23 | mock_matrix = (1, 2, 3, 4, 5, 6) 24 | mock_font = Mock() 25 | mock_font.to_unichr.return_value = "A" 26 | mock_font.char_width.return_value = 10 27 | mock_font.char_disp.return_value = (0, 0) 28 | graphic_state = Mock() 29 | self.converter.cur_item = Mock() 30 | result = self.converter.render_char( 31 | mock_matrix, 32 | mock_font, 33 | fontsize=12, 34 | scaling=1.0, 35 | rise=0, 36 | cid=65, 37 | ncs=None, 38 | graphicstate=graphic_state, 39 | ) 40 | self.assertEqual(result, 120.0) # Expected text width 41 | 42 | 43 | class TestTranslateConverter(unittest.TestCase): 44 | def setUp(self): 45 | self.rsrcmgr = PDFResourceManager() 46 | self.layout = {1: Mock()} 47 | self.translator_class = Mock() 48 | self.converter = TranslateConverter( 49 | self.rsrcmgr, 50 | layout=self.layout, 51 | lang_in="en", 52 | lang_out="zh", 53 | service="google", 54 | ) 55 | 56 | def test_translator_initialization(self): 57 | self.assertIsNotNone(self.converter.translator) 58 | self.assertEqual(self.converter.translator.lang_in, "en") 59 | self.assertEqual(self.converter.translator.lang_out, "zh-CN") 60 | 61 | @patch("pdf2zh.converter.TranslateConverter.receive_layout") 62 | def test_receive_layout(self, mock_receive_layout): 63 | mock_page = LTPage(1, (0, 0, 100, 200)) 64 | mock_font = Mock() 65 | mock_font.fontname.return_value = "mock_font" 66 | mock_page.add( 67 | LTChar( 68 | matrix=(1, 2, 3, 4, 5, 6), 69 | font=mock_font, 70 | fontsize=12, 71 | scaling=1.0, 72 | rise=0, 73 | text="A", 74 | textwidth=10, 75 | textdisp=(1.0, 1.0), 76 | ncs=Mock(), 77 | graphicstate=Mock(), 78 | ) 79 | ) 80 | self.converter.receive_layout(mock_page) 81 | mock_receive_layout.assert_called_once_with(mock_page) 82 | 83 | def test_receive_layout_with_complex_formula(self): 84 | ltpage = LTPage(1, (0, 0, 500, 500)) 85 | ltchar = Mock() 86 | ltchar.fontname.return_value = "mock_font" 87 | ltline = LTLine(0.1, (0, 0), (10, 20)) 88 | ltpage.add(ltchar) 89 | ltpage.add(ltline) 90 | mock_layout = MagicMock() 91 | mock_layout.shape = (100, 100) 92 | mock_layout.__getitem__.return_value = -1 93 | self.converter.layout = [None, mock_layout] 94 | self.converter.thread = 1 95 | result = self.converter.receive_layout(ltpage) 96 | self.assertIsNotNone(result) 97 | 98 | def test_invalid_translation_service(self): 99 | with self.assertRaises(ValueError): 100 | TranslateConverter( 101 | self.rsrcmgr, 102 | layout=self.layout, 103 | lang_in="en", 104 | lang_out="zh", 105 | service="InvalidService", 106 | ) 107 | 108 | 109 | if __name__ == "__main__": 110 | unittest.main() 111 | -------------------------------------------------------------------------------- /test/test_doclayout.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch, MagicMock 3 | import numpy as np 4 | from pdf2zh.doclayout import ( 5 | OnnxModel, 6 | YoloResult, 7 | YoloBox, 8 | ) 9 | 10 | 11 | class TestOnnxModel(unittest.TestCase): 12 | @patch("onnx.load") 13 | @patch("onnxruntime.InferenceSession") 14 | def setUp(self, mock_inference_session, mock_onnx_load): 15 | # Mock ONNX model metadata 16 | mock_model = MagicMock() 17 | mock_model.metadata_props = [ 18 | MagicMock(key="stride", value="32"), 19 | MagicMock(key="names", value="['class1', 'class2']"), 20 | ] 21 | mock_onnx_load.return_value = mock_model 22 | 23 | # Initialize OnnxModel with a fake path 24 | self.model_path = "fake_model_path.onnx" 25 | self.model = OnnxModel(self.model_path) 26 | 27 | def test_stride_property(self): 28 | # Test that stride is correctly set from model metadata 29 | self.assertEqual(self.model.stride, 32) 30 | 31 | def test_resize_and_pad_image(self): 32 | # Create a dummy image (100x200) 33 | image = np.ones((100, 200, 3), dtype=np.uint8) 34 | resized_image = self.model.resize_and_pad_image(image, 1024) 35 | 36 | # Validate the output shape 37 | self.assertEqual(resized_image.shape[0], 512) 38 | self.assertEqual(resized_image.shape[1], 1024) 39 | 40 | # Check that padding has been added 41 | padded_height = resized_image.shape[0] - image.shape[0] 42 | padded_width = resized_image.shape[1] - image.shape[1] 43 | self.assertGreater(padded_height, 0) 44 | self.assertGreater(padded_width, 0) 45 | 46 | def test_scale_boxes(self): 47 | img1_shape = (1024, 1024) # Model input shape 48 | img0_shape = (500, 300) # Original image shape 49 | boxes = np.array([[512, 512, 768, 768]]) # Example bounding box 50 | 51 | scaled_boxes = self.model.scale_boxes(img1_shape, boxes, img0_shape) 52 | 53 | # Verify the output is scaled correctly 54 | self.assertEqual(scaled_boxes.shape, boxes.shape) 55 | self.assertTrue(np.all(scaled_boxes <= max(img0_shape))) 56 | 57 | def test_predict(self): 58 | # Mock model inference output 59 | mock_output = np.random.random((1, 300, 6)) 60 | self.model.model.run.return_value = [mock_output] 61 | 62 | # Create a dummy image 63 | image = np.ones((500, 300, 3), dtype=np.uint8) 64 | 65 | results = self.model.predict(image) 66 | 67 | # Validate predictions 68 | self.assertEqual(len(results), 1) 69 | self.assertIsInstance(results[0], YoloResult) 70 | self.assertGreater(len(results[0].boxes), 0) 71 | self.assertIsInstance(results[0].boxes[0], YoloBox) 72 | 73 | 74 | class TestYoloResult(unittest.TestCase): 75 | def test_yolo_result(self): 76 | # Example prediction data 77 | boxes = [ 78 | [100, 200, 300, 400, 0.9, 0], 79 | [50, 100, 150, 200, 0.8, 1], 80 | ] 81 | names = ["class1", "class2"] 82 | 83 | result = YoloResult(boxes, names) 84 | 85 | # Validate the number of boxes and their order by confidence 86 | self.assertEqual(len(result.boxes), 2) 87 | self.assertGreater(result.boxes[0].conf, result.boxes[1].conf) 88 | self.assertEqual(result.names, names) 89 | 90 | 91 | class TestYoloBox(unittest.TestCase): 92 | def test_yolo_box(self): 93 | # Example box data 94 | box_data = [100, 200, 300, 400, 0.9, 0] 95 | 96 | box = YoloBox(box_data) 97 | 98 | # Validate box properties 99 | self.assertEqual(box.xyxy, box_data[:4]) 100 | self.assertEqual(box.conf, box_data[4]) 101 | self.assertEqual(box.cls, box_data[5]) 102 | 103 | 104 | if __name__ == "__main__": 105 | unittest.main() 106 | -------------------------------------------------------------------------------- /test/test_translator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from textwrap import dedent 3 | from unittest import mock 4 | 5 | from ollama import ResponseError as OllamaResponseError 6 | 7 | from pdf2zh import cache 8 | from pdf2zh.config import ConfigManager 9 | from pdf2zh.translator import BaseTranslator, OllamaTranslator, OpenAIlikedTranslator 10 | 11 | # Since it is necessary to test whether the functionality meets the expected requirements, 12 | # private functions and private methods are allowed to be called. 13 | # pyright: reportPrivateUsage=false 14 | 15 | 16 | class AutoIncreaseTranslator(BaseTranslator): 17 | name = "auto_increase" 18 | n = 0 19 | 20 | def do_translate(self, text): 21 | self.n += 1 22 | return str(self.n) 23 | 24 | 25 | class TestTranslator(unittest.TestCase): 26 | def setUp(self): 27 | self.test_db = cache.init_test_db() 28 | 29 | def tearDown(self): 30 | cache.clean_test_db(self.test_db) 31 | 32 | def test_cache(self): 33 | translator = AutoIncreaseTranslator("en", "zh", "test", False) 34 | # First translation should be cached 35 | text = "Hello World" 36 | first_result = translator.translate(text) 37 | 38 | # Second translation should return the same result from cache 39 | second_result = translator.translate(text) 40 | self.assertEqual(first_result, second_result) 41 | 42 | # Different input should give different result 43 | different_text = "Different Text" 44 | different_result = translator.translate(different_text) 45 | self.assertNotEqual(first_result, different_result) 46 | 47 | # Test cache with ignore_cache=True 48 | translator.ignore_cache = True 49 | no_cache_result = translator.translate(text) 50 | self.assertNotEqual(first_result, no_cache_result) 51 | 52 | def test_add_cache_impact_parameters(self): 53 | translator = AutoIncreaseTranslator("en", "zh", "test", False) 54 | 55 | # Test cache with added parameters 56 | text = "Hello World" 57 | first_result = translator.translate(text) 58 | translator.add_cache_impact_parameters("test", "value") 59 | second_result = translator.translate(text) 60 | self.assertNotEqual(first_result, second_result) 61 | 62 | # Test cache with ignore_cache=True 63 | no_cache_result1 = translator.translate(text, ignore_cache=True) 64 | self.assertNotEqual(first_result, no_cache_result1) 65 | 66 | translator.ignore_cache = True 67 | no_cache_result2 = translator.translate(text) 68 | self.assertNotEqual(no_cache_result1, no_cache_result2) 69 | 70 | # Test cache with ignore_cache=False 71 | translator.ignore_cache = False 72 | cache_result = translator.translate(text) 73 | self.assertEqual(no_cache_result2, cache_result) 74 | 75 | # Test cache with another parameter 76 | translator.add_cache_impact_parameters("test2", "value2") 77 | another_result = translator.translate(text) 78 | self.assertNotEqual(second_result, another_result) 79 | 80 | def test_base_translator_throw(self): 81 | translator = BaseTranslator("en", "zh", "test", False) 82 | with self.assertRaises(NotImplementedError): 83 | translator.translate("Hello World") 84 | 85 | 86 | class TestOpenAIlikedTranslator(unittest.TestCase): 87 | def setUp(self) -> None: 88 | self.default_envs = { 89 | "OPENAILIKED_BASE_URL": "https://api.openailiked.com", 90 | "OPENAILIKED_API_KEY": "test_api_key", 91 | "OPENAILIKED_MODEL": "test_model", 92 | } 93 | 94 | def test_missing_base_url_raises_error(self): 95 | """测试缺失 OPENAILIKED_BASE_URL 时抛出异常""" 96 | ConfigManager.clear() 97 | with self.assertRaises(ValueError) as context: 98 | OpenAIlikedTranslator( 99 | lang_in="en", lang_out="zh", model="test_model", envs={} 100 | ) 101 | self.assertIn("The OPENAILIKED_BASE_URL is missing.", str(context.exception)) 102 | 103 | def test_missing_model_raises_error(self): 104 | """测试缺失 OPENAILIKED_MODEL 时抛出异常""" 105 | envs_without_model = { 106 | "OPENAILIKED_BASE_URL": "https://api.openailiked.com", 107 | "OPENAILIKED_API_KEY": "test_api_key", 108 | } 109 | ConfigManager.clear() 110 | with self.assertRaises(ValueError) as context: 111 | OpenAIlikedTranslator( 112 | lang_in="en", lang_out="zh", model=None, envs=envs_without_model 113 | ) 114 | self.assertIn("The OPENAILIKED_MODEL is missing.", str(context.exception)) 115 | 116 | def test_initialization_with_valid_envs(self): 117 | """测试使用有效的环境变量初始化""" 118 | ConfigManager.clear() 119 | translator = OpenAIlikedTranslator( 120 | lang_in="en", 121 | lang_out="zh", 122 | model=None, 123 | envs=self.default_envs, 124 | ) 125 | self.assertEqual( 126 | translator.envs["OPENAILIKED_BASE_URL"], 127 | self.default_envs["OPENAILIKED_BASE_URL"], 128 | ) 129 | self.assertEqual( 130 | translator.envs["OPENAILIKED_API_KEY"], 131 | self.default_envs["OPENAILIKED_API_KEY"], 132 | ) 133 | self.assertEqual(translator.model, self.default_envs["OPENAILIKED_MODEL"]) 134 | 135 | def test_default_api_key_fallback(self): 136 | """测试当 OPENAILIKED_API_KEY 为空时使用默认值""" 137 | envs_without_key = { 138 | "OPENAILIKED_BASE_URL": "https://api.openailiked.com", 139 | "OPENAILIKED_MODEL": "test_model", 140 | } 141 | ConfigManager.clear() 142 | translator = OpenAIlikedTranslator( 143 | lang_in="en", 144 | lang_out="zh", 145 | model=None, 146 | envs=envs_without_key, 147 | ) 148 | self.assertEqual( 149 | translator.envs["OPENAILIKED_BASE_URL"], 150 | self.default_envs["OPENAILIKED_BASE_URL"], 151 | ) 152 | self.assertIsNone(translator.envs["OPENAILIKED_API_KEY"]) 153 | 154 | 155 | class TestOllamaTranslator(unittest.TestCase): 156 | def test_do_translate(self): 157 | translator = OllamaTranslator(lang_in="en", lang_out="zh", model="test:3b") 158 | with mock.patch.object(translator, "client") as mock_client: 159 | chat_response = mock_client.chat.return_value 160 | chat_response.message.content = dedent( 161 | """\ 162 | 163 | Thinking... 164 | 165 | 166 | 天空呈现蓝色是因为... 167 | """ 168 | ) 169 | 170 | text = "The sky appears blue because of..." 171 | translated_result = translator.do_translate(text) 172 | mock_client.chat.assert_called_once_with( 173 | model="test:3b", 174 | messages=translator.prompt(text, prompt_template=None), 175 | options={ 176 | "temperature": translator.options["temperature"], 177 | "num_predict": translator.options["num_predict"], 178 | }, 179 | ) 180 | self.assertEqual("天空呈现蓝色是因为...", translated_result) 181 | 182 | # response error 183 | mock_client.chat.side_effect = OllamaResponseError("an error status") 184 | with self.assertRaises(OllamaResponseError): 185 | mock_client.chat() 186 | 187 | def test_remove_cot_content(self): 188 | fake_cot_resp_text = dedent( 189 | """\ 190 | 191 | 192 | 193 | 194 | The sky appears blue because of...""" 195 | ) 196 | removed_cot_content = OllamaTranslator._remove_cot_content(fake_cot_resp_text) 197 | excepted_content = "The sky appears blue because of..." 198 | self.assertEqual(excepted_content, removed_cot_content.strip()) 199 | # process response content without cot 200 | non_cot_content = OllamaTranslator._remove_cot_content(excepted_content) 201 | self.assertEqual(excepted_content, non_cot_content) 202 | 203 | # `_remove_cot_content` should not process text that's outside the `` tags 204 | fake_cot_resp_text_with_think_tag = dedent( 205 | """\ 206 | 207 | 208 | 209 | 210 | The sky appears blue because of...... 211 | The user asked me to include the tag at the end of my reply, so I added the tag. """ 212 | ) 213 | 214 | only_removed_cot_content = OllamaTranslator._remove_cot_content( 215 | fake_cot_resp_text_with_think_tag 216 | ) 217 | excepted_not_retain_cot_content = dedent( 218 | """\ 219 | The sky appears blue because of...... 220 | The user asked me to include the tag at the end of my reply, so I added the tag. """ 221 | ) 222 | self.assertEqual( 223 | excepted_not_retain_cot_content, only_removed_cot_content.strip() 224 | ) 225 | 226 | 227 | if __name__ == "__main__": 228 | unittest.main() 229 | --------------------------------------------------------------------------------