├── .gitattributes ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bugreport.md │ ├── featurerequest.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md ├── labeler.yml └── workflows │ ├── build-docker.yml │ ├── build.yml │ ├── coverage-comment.yml │ ├── labeler.yml │ ├── release-test.yml │ ├── test.yml │ ├── typos.yml │ └── upload-gh-pages.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LGPL_LICENSE ├── LICENSE ├── Makefile ├── README.md ├── _typos.toml ├── build_util ├── check_release_build.py ├── codesign.bash ├── create_venv_and_generate_licenses.bash ├── merge_update_infos.py ├── modify_pyinstaller.bash └── process_voicevox_resource.bash ├── default.csv ├── default_setting.yml ├── docs ├── VOICEVOX音声合成エンジンとの連携.md ├── api │ └── .gitkeep ├── licenses │ ├── cuda │ │ └── EULA.txt │ ├── cudnn │ │ └── LICENSE │ ├── open_jtalk │ │ ├── COPYING │ │ ├── mecab-naist-jdic │ │ │ └── COPYING │ │ └── mecab │ │ │ └── COPYING │ └── world │ │ └── LICENSE.txt └── res │ └── マルチエンジン概念図.svg ├── engine_manifest.json ├── engine_manifest_assets ├── dependency_licenses.json ├── downloadable_libraries.json ├── icon.png ├── terms_of_service.md └── update_infos.json ├── generate_licenses.py ├── get_cost_candidates.py ├── make_docs.py ├── poetry.lock ├── presets.yaml ├── pyproject.toml ├── requirements-dev.txt ├── requirements-license.txt ├── requirements-test.txt ├── requirements.txt ├── run.py ├── run.spec ├── setup.cfg ├── speaker_info ├── 35b2c544-660e-401e-b503-0e14c635303a │ ├── icons │ │ └── 8.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ └── 8.png │ └── voice_samples │ │ ├── 8_001.wav │ │ ├── 8_002.wav │ │ └── 8_003.wav ├── 388f246b-8c41-4ac1-8e2d-5d79f3ff56d9 │ ├── icons │ │ ├── 1.png │ │ ├── 3.png │ │ ├── 5.png │ │ └── 7.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ └── 3.png │ └── voice_samples │ │ ├── 1_001.wav │ │ ├── 1_002.wav │ │ ├── 1_003.wav │ │ ├── 3_001.wav │ │ ├── 3_002.wav │ │ ├── 3_003.wav │ │ ├── 5_001.wav │ │ ├── 5_002.wav │ │ ├── 5_003.wav │ │ ├── 7_001.wav │ │ ├── 7_002.wav │ │ └── 7_003.wav ├── 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff │ ├── icons │ │ ├── 0.png │ │ ├── 2.png │ │ ├── 4.png │ │ └── 6.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ ├── 0.png │ │ ├── 2.png │ │ ├── 4.png │ │ └── 6.png │ └── voice_samples │ │ ├── 0_001.wav │ │ ├── 0_002.wav │ │ ├── 0_003.wav │ │ ├── 2_001.wav │ │ ├── 2_002.wav │ │ ├── 2_003.wav │ │ ├── 4_001.wav │ │ ├── 4_002.wav │ │ ├── 4_003.wav │ │ ├── 6_001.wav │ │ ├── 6_002.wav │ │ └── 6_003.wav └── b1a81618-b27b-40d2-b0ea-27a9ad408c4b │ ├── icons │ └── 9.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ └── voice_samples │ ├── 9_001.wav │ ├── 9_002.wav │ └── 9_003.wav ├── test ├── __init__.py ├── presets-test-1.yaml ├── presets-test-2.yaml ├── presets-test-3.yaml ├── presets-test-4.yaml ├── test_acoustic_feature_extractor.py ├── test_connect_base64_waves.py ├── test_full_context_label.py ├── test_kana_parser.py ├── test_mock_synthesis_engine.py ├── test_mora_list.py ├── test_mora_to_text.py ├── test_preset.py ├── test_synthesis_engine.py ├── test_synthesis_engine_base.py ├── test_user_dict.py ├── test_user_dict_model.py └── test_word_types.py ├── ui_template └── ui.html └── voicevox_engine ├── __init__.py ├── acoustic_feature_extractor.py ├── cancellable_engine.py ├── dev ├── core │ ├── __init__.py │ └── mock.py └── synthesis_engine │ ├── __init__.py │ └── mock.py ├── engine_manifest ├── EngineManifest.py ├── EngineManifestLoader.py └── __init__.py ├── full_context_label.py ├── kana_parser.py ├── metas ├── Metas.py ├── MetasStore.py └── __init__.py ├── model.py ├── mora_list.py ├── morphing.py ├── part_of_speech_data.py ├── preset ├── Preset.py ├── PresetError.py ├── PresetManager.py └── __init__.py ├── setting ├── Setting.py ├── SettingLoader.py └── __init__.py ├── synthesis_engine ├── __init__.py ├── core_wrapper.py ├── make_synthesis_engines.py ├── synthesis_engine.py └── synthesis_engine_base.py ├── user_dict.py └── utility ├── __init__.py ├── connect_base64_waves.py ├── mutex_utility.py └── path_utility.py /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.png -text 3 | *.wav -text -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @VOICEVOX/maintainer 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bugreport.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: 不具合の報告 4 | labels: バグ 5 | --- 6 | 7 | ## 不具合の内容 8 | 9 | 10 | 11 | ### 現象・ログ 12 | 13 | 14 | 15 | ### 再現手順 16 | 17 | 18 | 19 | ### 期待動作 20 | 21 | 22 | 23 | ## VOICEVOXのバージョン 24 | 25 | 0.?.0 26 | 27 | 28 | 29 | ## OSの種類/ディストリ/バージョン 30 | 31 | 32 | 33 | - [ ] Windows 34 | - [ ] macOS 35 | - [ ] Linux 36 | 37 | 44 | 45 | ## その他 46 | 47 | 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/featurerequest.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: 機能要望・改善提案 4 | labels: 機能向上 5 | --- 6 | 7 | ## 内容 8 | 9 | 10 | 11 | 12 | ### Pros 良くなる点 13 | 14 | 15 | 16 | ### Cons 悪くなる点 17 | 18 | 19 | 20 | ### 実現方法 21 | 22 | 23 | 24 | ## VOICEVOXのバージョン 25 | 26 | 0.?.0 27 | 28 | 29 | 30 | ## OSの種類/ディストリ/バージョン 31 | 32 | 33 | 34 | - [ ] Windows 35 | - [ ] macOS 36 | - [ ] Linux 37 | 38 | 45 | 46 | ## その他 47 | 48 | 49 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: 質問 (既存のIssueや一般事例を良く調べてからしてください) 4 | labels: 要議論 5 | --- 6 | 7 | ## 質問の内容 8 | 9 | 10 | 11 | ## VOICEVOXのバージョン 12 | 13 | 0.?.0 14 | 15 | 16 | 17 | ## OSの種類/ディストリ/バージョン 18 | 19 | 20 | 21 | - [ ] Windows 22 | - [ ] macOS 23 | - [ ] Linux 24 | 25 | 32 | 33 | ## その他 34 | 35 | 36 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## 内容 2 | 3 | 6 | 7 | ## 関連 Issue 8 | 9 | 17 | 18 | ## スクリーンショット・動画など 19 | 20 | 23 | 24 | ## その他 25 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | 'OS:mac': 2 | - '\[x\] macOS' 3 | 'OS:linux': 4 | - '\[x\] Linux' 5 | 'OS:win': 6 | - '\[x\] Windows' 7 | -------------------------------------------------------------------------------- /.github/workflows/build-docker.yml: -------------------------------------------------------------------------------- 1 | name: build-docker 2 | on: 3 | push: 4 | branches: 5 | - master 6 | release: 7 | types: 8 | - created 9 | workflow_dispatch: 10 | inputs: 11 | version: 12 | description: "バージョン情報(A.BB.C / A.BB.C-preview.D)" 13 | required: true 14 | 15 | env: 16 | IMAGE_NAME: ${{ secrets.DOCKERHUB_USERNAME }}/voicevox_engine 17 | PYTHON_VERSION: "3.8.10" 18 | VOICEVOX_RESOURCE_VERSION: "0.14.1" 19 | VOICEVOX_CORE_VERSION: "0.14.2" 20 | VOICEVOX_ENGINE_VERSION: 21 | |- # releaseタグ名か、workflow_dispatchでのバージョン名か、latestが入る 22 | ${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }} 23 | 24 | jobs: 25 | build-docker: 26 | runs-on: ${{ matrix.os }} 27 | 28 | strategy: 29 | matrix: 30 | os: [ubuntu-latest] 31 | tag: 32 | - "" 33 | - cpu 34 | - cpu-ubuntu20.04 35 | - nvidia 36 | - nvidia-ubuntu20.04 37 | - cpu-ubuntu18.04 38 | - nvidia-ubuntu18.04 39 | include: 40 | # Ubuntu 20.04 41 | - tag: "" 42 | target: runtime-env 43 | base_image: ubuntu:20.04 44 | base_runtime_image: ubuntu:20.04 45 | voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu 46 | onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz 47 | - tag: cpu 48 | target: runtime-env 49 | base_image: ubuntu:20.04 50 | base_runtime_image: ubuntu:20.04 51 | voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu 52 | onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz 53 | - tag: cpu-ubuntu20.04 54 | target: runtime-env 55 | base_image: ubuntu:20.04 56 | base_runtime_image: ubuntu:20.04 57 | voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu 58 | onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz 59 | - tag: nvidia 60 | target: runtime-nvidia-env 61 | base_image: ubuntu:20.04 62 | base_runtime_image: nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 63 | voicevox_core_asset_prefix: voicevox_core-linux-x64-gpu 64 | onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz 65 | - tag: nvidia-ubuntu20.04 66 | target: runtime-nvidia-env 67 | base_image: ubuntu:20.04 68 | base_runtime_image: nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 69 | voicevox_core_asset_prefix: voicevox_core-linux-x64-gpu 70 | onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz 71 | # Ubuntu 18.04 72 | - tag: cpu-ubuntu18.04 73 | target: runtime-env 74 | base_image: ubuntu:18.04 75 | base_runtime_image: ubuntu:18.04 76 | voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu 77 | onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz 78 | - tag: nvidia-ubuntu18.04 79 | target: runtime-nvidia-env 80 | base_image: ubuntu:18.04 81 | base_runtime_image: nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu18.04 82 | voicevox_core_asset_prefix: voicevox_core-linux-x64-gpu 83 | onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz 84 | 85 | steps: 86 | - uses: actions/checkout@v3 87 | 88 | - name: Setup Docker Buildx 89 | id: buildx 90 | uses: docker/setup-buildx-action@v2 91 | 92 | - name: Login to DockerHub 93 | uses: docker/login-action@v2 94 | with: 95 | username: ${{ secrets.DOCKERHUB_USERNAME }} 96 | password: ${{ secrets.DOCKERHUB_TOKEN }} 97 | 98 | # Download VOICEVOX RESOURCE 99 | - name: Prepare VOICEVOX RESOURCE cache 100 | uses: actions/cache@v3 101 | id: voicevox-resource-cache 102 | with: 103 | key: voicevox-resource-${{ env.VOICEVOX_RESOURCE_VERSION }} 104 | path: download/resource 105 | 106 | - name: Checkout VOICEVOX RESOURCE 107 | if: steps.voicevox-resource-cache.outputs.cache-hit != 'true' 108 | uses: actions/checkout@v3 109 | with: 110 | repository: VOICEVOX/voicevox_resource 111 | ref: ${{ env.VOICEVOX_RESOURCE_VERSION }} 112 | path: download/resource 113 | 114 | # Merge VOICEVOX RESOURCE 115 | - name: Merge VOICEVOX RESOURCE 116 | shell: bash 117 | env: 118 | DOWNLOAD_RESOURCE_PATH: download/resource 119 | run: bash build_util/process_voicevox_resource.bash 120 | 121 | - name: Build and Deploy Docker image 122 | uses: docker/build-push-action@v3 123 | env: 124 | IMAGE_TAG: 125 | |- # If it's a release, add the version, otherwise add the `latest` 126 | ${{ ( 127 | matrix.tag != '' && ( 128 | format('{0}:{1}-{2}', env.IMAGE_NAME, matrix.tag, env.VOICEVOX_ENGINE_VERSION) 129 | ) || format('{0}:{1}', env.IMAGE_NAME, env.VOICEVOX_ENGINE_VERSION) 130 | ) }} 131 | VOICEVOX_CORE_ASSET_NAME: ${{ matrix.voicevox_core_asset_prefix }}-${{ env.VOICEVOX_CORE_VERSION }} 132 | with: 133 | context: . 134 | builder: ${{ steps.buildx.outputs.name }} 135 | file: ./Dockerfile 136 | build-args: | 137 | BASE_IMAGE=${{ matrix.base_image }} 138 | BASE_RUNTIME_IMAGE=${{ matrix.base_runtime_image }} 139 | PYTHON_VERSION=${{ env.PYTHON_VERSION }} 140 | VOICEVOX_ENGINE_VERSION=${{ env.VOICEVOX_ENGINE_VERSION }} 141 | VOICEVOX_CORE_ASSET_NAME=${{ env.VOICEVOX_CORE_ASSET_NAME }} 142 | VOICEVOX_CORE_VERSION=${{ env.VOICEVOX_CORE_VERSION }} 143 | VOICEVOX_RESOURCE_VERSION=${{ env.VOICEVOX_RESOURCE_VERSION }} 144 | ONNXRUNTIME_URL=${{ matrix.onnxruntime_url }} 145 | target: ${{ matrix.target }} 146 | push: true 147 | tags: ${{ env.IMAGE_TAG }} 148 | cache-from: type=registry,ref=${{ env.IMAGE_TAG }}-buildcache 149 | cache-to: type=registry,ref=${{ env.IMAGE_TAG }}-buildcache,mode=max 150 | -------------------------------------------------------------------------------- /.github/workflows/coverage-comment.yml: -------------------------------------------------------------------------------- 1 | name: Coverage Report Comment 2 | 3 | on: 4 | workflow_run: 5 | workflows: 6 | - test 7 | types: 8 | - completed 9 | workflow_dispatch: 10 | 11 | jobs: 12 | comment: 13 | runs-on: ubuntu-latest 14 | if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success' 15 | steps: 16 | - name: Download coverage report 17 | uses: actions/github-script@v5.0.0 18 | with: 19 | script: | 20 | const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ 21 | owner: context.repo.owner, 22 | repo: context.repo.repo, 23 | run_id: ${{ github.event.workflow_run.id }}, 24 | }) 25 | const matchArtifact = artifacts.data.artifacts.filter((artifact) => { 26 | return artifact.name == 'report' 27 | })[0] 28 | const download = await github.rest.actions.downloadArtifact({ 29 | owner: context.repo.owner, 30 | repo: context.repo.repo, 31 | artifact_id: matchArtifact.id, 32 | archive_format: 'zip', 33 | }) 34 | const fs = require('fs') 35 | fs.writeFileSync('${{github.workspace}}/report.zip', Buffer.from(download.data)) 36 | 37 | - name: Unzip report 38 | run: unzip report.zip 39 | 40 | - name: Comment coverage result to Pull Requests 41 | uses: actions/github-script@v5.0.0 42 | with: 43 | github-token: ${{ secrets.GITHUB_TOKEN }} 44 | script: | 45 | const fs = require('fs') 46 | const baseReport = fs.readFileSync('report.txt', 'utf8').toString().split('\n') 47 | let report = '' 48 | for (let i = 0; i < baseReport.length; i++) { 49 | const line = baseReport[i].split(' ').filter(v => v) 50 | if (i === 1 && line.length === 1) { 51 | report += "|:---|---:|---:|---:|\n" 52 | } else if (line.length === 1) { 53 | continue 54 | } else { 55 | if (i !== 0 && line.length === 4) { 56 | const parcent = Number(line[3].replace("%", "")) 57 | let color = 'green' 58 | if (parcent < 50) { 59 | color = 'red' 60 | } else if (parcent < 90) { 61 | color = 'orange' 62 | } 63 | line[3] = `![coverage-${parcent}%](https://img.shields.io/badge/coverage-${parcent}%25-${color}.svg)` 64 | } 65 | report += "|" + line.join("|") + "|\n" 66 | } 67 | if (line[0] === 'TOTAL') break 68 | } 69 | 70 | const issue_number = Number(fs.readFileSync('pr_num.txt')) 71 | const body = `## Coverage Result\n\n
\nResultを開く\n\n${report}\n
` 72 | 73 | let listComments = await github.rest.issues.listComments({ 74 | issue_number, 75 | owner: context.repo.owner, 76 | repo: context.repo.repo, 77 | }) 78 | listComments = listComments.data.filter((comment) => { 79 | return comment.body.includes('Coverage Result') && comment.user.login.includes('github-actions') 80 | }) 81 | 82 | if (listComments.length === 0) { 83 | github.rest.issues.createComment({ 84 | issue_number, 85 | owner: context.repo.owner, 86 | repo: context.repo.repo, 87 | body, 88 | }) 89 | } else { 90 | github.rest.issues.updateComment({ 91 | comment_id: listComments[0].id, 92 | owner: context.repo.owner, 93 | repo: context.repo.repo, 94 | body, 95 | }) 96 | } 97 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | name: Issue Labeler 2 | on: 3 | issues: 4 | types: [opened] 5 | 6 | jobs: 7 | triage: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: github/issue-labeler@v2.0 11 | with: 12 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 13 | configuration-path: .github/labeler.yml 14 | enable-versioned-regex: 0 15 | -------------------------------------------------------------------------------- /.github/workflows/release-test.yml: -------------------------------------------------------------------------------- 1 | name: Test Release Build 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | version: 7 | type: string 8 | required: true 9 | repo_url: 10 | type: string 11 | required: false 12 | workflow_dispatch: 13 | inputs: 14 | version: 15 | type: string 16 | description: "テストしたいタグ名" 17 | required: true 18 | repo_url: 19 | type: string 20 | description: "リポジトリのURL(省略可能)" 21 | required: false 22 | 23 | env: 24 | REPO_URL: 25 | |- # repo_url指定時はrepo_urlを、それ以外はgithubのリポジトリURLを使用 26 | ${{ (github.event.inputs || inputs).repo_url || format('{0}/{1}', github.server_url, github.repository) }} 27 | VERSION: |- # version指定時はversionを、それ以外はタグ名を使用 28 | ${{ (github.event.inputs || inputs).version }} 29 | 30 | jobs: 31 | test: 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | include: 36 | - os: ubuntu-20.04 37 | target: linux-cpu 38 | - os: ubuntu-20.04 39 | target: linux-nvidia 40 | - os: macos-11 41 | target: macos-x64 42 | - os: windows-2019 43 | target: windows-cpu 44 | - os: windows-2019 45 | target: windows-nvidia 46 | - os: windows-2019 47 | target: windows-directml 48 | 49 | runs-on: ${{ matrix.os }} 50 | 51 | steps: 52 | - name: declare variables 53 | id: vars 54 | shell: bash 55 | run: | 56 | echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> $GITHUB_OUTPUT 57 | echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT 58 | 59 | - uses: actions/checkout@v2 60 | 61 | - uses: actions/setup-python@v2 62 | with: 63 | python-version: "3.8.10" 64 | cache: pip 65 | 66 | - name: Download 67 | shell: bash -euxv {0} 68 | run: | 69 | mkdir -p download 70 | curl -L -o "download/list.txt" "${{ steps.vars.outputs.release_url }}/${{ steps.vars.outputs.package_name }}.7z.txt" 71 | cat "download/list.txt" | xargs -I '%' curl -L -o "download/%" "${{ steps.vars.outputs.release_url }}/%" 72 | 7z x "download/$(head -n1 download/list.txt)" 73 | mv ${{ matrix.target }} dist/ 74 | 75 | - name: chmod +x 76 | if: startsWith(matrix.target, 'linux') || startsWith(matrix.target, 'macos') 77 | shell: bash 78 | run: chmod +x dist/run 79 | 80 | - name: Install libsndfile1 81 | if: startsWith(matrix.target, 'linux') 82 | run: | 83 | sudo apt-get update 84 | sudo apt-get install libsndfile1 85 | 86 | - name: Install requirements 87 | run: | 88 | pip install -r requirements-test.txt 89 | 90 | - name: Test 91 | shell: bash 92 | run: python build_util/check_release_build.py --dist_dir dist/ 93 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: 7 | - "**" 8 | workflow_dispatch: 9 | 10 | jobs: 11 | test: 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | os: [ubuntu-20.04, windows-latest] # [ubuntu-20.04, macos-latest, windows-latest] 16 | python: ["3.8.10"] 17 | include: 18 | - os: ubuntu-20.04 19 | path: ~/.cache/pip 20 | # - os: macos-latest 21 | # path: ~/Library/Caches/pip 22 | - os: windows-latest 23 | path: ~\AppData\Local\pip\Cache 24 | 25 | steps: 26 | - uses: actions/checkout@v3 27 | 28 | - name: Set up Python ${{ matrix.python }} 29 | uses: actions/setup-python@v4 30 | with: 31 | python-version: ${{ matrix.python }} 32 | cache: pip 33 | 34 | - name: Install libraries for ubuntu 35 | if: matrix.os == 'ubuntu-20.04' 36 | run: sudo apt-get install libsndfile1 37 | 38 | - name: Install dependencies 39 | run: | 40 | python -m pip install --upgrade pip setuptools wheel 41 | python -m pip install -r requirements-test.txt 42 | 43 | - run: pysen run lint 44 | 45 | - name: Run pytest and get coverage 46 | run: | 47 | coverage run --omit=test/* -m pytest 48 | 49 | - name: Submit coverage to Coveralls 50 | if: matrix.os == 'ubuntu-20.04' 51 | run: coveralls --service=github 52 | env: 53 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 54 | 55 | - name: Create coverage result 56 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04' 57 | run: | 58 | mkdir report 59 | coverage report > report/report.txt 60 | echo ${{ github.event.number }} > report/pr_num.txt 61 | 62 | - name: Upload coverage result 63 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04' 64 | uses: actions/upload-artifact@v3 65 | with: 66 | name: report 67 | path: report/ 68 | 69 | - name: Check licenses 70 | shell: bash 71 | run: | 72 | OUTPUT_LICENSE_JSON_PATH=/dev/null \ 73 | bash build_util/create_venv_and_generate_licenses.bash 74 | -------------------------------------------------------------------------------- /.github/workflows/typos.yml: -------------------------------------------------------------------------------- 1 | name: Check typos 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: 7 | - "**" 8 | workflow_dispatch: 9 | 10 | jobs: 11 | typos: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: typos-action 18 | uses: crate-ci/typos@v1.12.12 19 | -------------------------------------------------------------------------------- /.github/workflows/upload-gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: upload-docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - "master" 7 | 8 | env: 9 | PYTHON_VERSION: "3.8.10" 10 | PUBLISH_DIR: "./docs/api" 11 | PUBLISH_BRANCH: "gh-pages" 12 | DESTINATION_DIR: "api" 13 | 14 | jobs: 15 | upload-doc: 16 | runs-on: ubuntu-20.04 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - name: Setup Python 21 | id: setup-python 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ env.PYTHON_VERSION }} 25 | cache: pip 26 | 27 | - name: Install libraries for ubuntu 28 | run: sudo apt-get install libsndfile1 29 | 30 | - name: Install Python dependencies 31 | shell: bash 32 | run: | 33 | pip install -r requirements.txt 34 | 35 | - name: Make documents 36 | shell: bash 37 | run: | 38 | python make_docs.py 39 | 40 | - name: Deploy to GitHub Pages 41 | uses: peaceiris/actions-gh-pages@v3 42 | with: 43 | github_token: ${{ secrets.GITHUB_TOKEN }} 44 | publish_dir: ${{ env.PUBLISH_DIR }} 45 | publish_branch: ${{ env.PUBLISH_BRANCH }} 46 | destination_dir: ${{ env.DESTINATION_DIR }} 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # VOICEVOX specifics 2 | ## Artifacts of nuitka 3 | *.dist 4 | *.build 5 | /build 6 | /cache 7 | ## Artifact of generating licenses 8 | /licenses.json 9 | licenses_venv/ 10 | 11 | # Copied from `https://github.com/github/gitignore/blob/main/Python.gitignore` @2022-01-10 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | cover/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | Pipfile.lock 101 | 102 | # poetry 103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 104 | # This is especially recommended for binary packages to ensure reproducibility, and is more 105 | # commonly ignored for libraries. 106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 107 | # poetry.lock 108 | 109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 110 | __pypackages__/ 111 | 112 | # Celery stuff 113 | celerybeat-schedule 114 | celerybeat.pid 115 | 116 | # SageMath parsed files 117 | *.sage.py 118 | 119 | # Environments 120 | .env 121 | .venv 122 | env/ 123 | venv/ 124 | ENV/ 125 | env.bak/ 126 | venv.bak/ 127 | 128 | # Spyder project settings 129 | .spyderproject 130 | .spyproject 131 | 132 | # Rope project settings 133 | .ropeproject 134 | 135 | # mkdocs documentation 136 | /site 137 | 138 | # mypy 139 | .mypy_cache/ 140 | .dmypy.json 141 | dmypy.json 142 | 143 | # Pyre type checker 144 | .pyre/ 145 | 146 | # pytype static type analyzer 147 | .pytype/ 148 | 149 | # Cython debug symbols 150 | cython_debug/ 151 | 152 | # PyCharm 153 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 154 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 155 | # and can be added to the global gitignore or merged into this file. For a more nuclear 156 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 157 | .idea/ 158 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: local 5 | hooks: 6 | - id: pysen-lint 7 | name: pysen-lint 8 | entry: pysen run lint 9 | language: python 10 | types: [file, python] 11 | stages: [push] 12 | pass_filenames: false 13 | -------------------------------------------------------------------------------- /LGPL_LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LGPL v3 と、ソースコードの公開が不要な別ライセンスのデュアルライセンスです。 2 | 3 | 1. LGPL v3 4 | 5 | LGPL_LICENSEを参照してください。 6 | 7 | 2. ソースコードの公開が不要な別ライセンス 8 | 9 | 別ライセンスを取得したい場合は、ヒホ(twitter: @hiho_karuta)に求めてください。 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CMD= 2 | NOCACHE= 3 | 4 | ARGS:= 5 | ifeq ($(NOCACHE),1) 6 | ARGS:=$(ARGS) --no-cache 7 | endif 8 | 9 | # Ubuntu 20.04 10 | .PHONY: build-linux-docker-ubuntu20.04 11 | build-linux-docker-ubuntu20.04: 12 | docker buildx build . \ 13 | -t voicevox/voicevox_engine:cpu-ubuntu20.04-latest \ 14 | --target runtime-env \ 15 | --progress plain \ 16 | --build-arg BASE_IMAGE=ubuntu:20.04 \ 17 | --build-arg BASE_RUNTIME_IMAGE=ubuntu:20.04 \ 18 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz \ 19 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_cpu_x64.so $(ARGS) 20 | 21 | .PHONY: run-linux-docker-ubuntu20.04 22 | run-linux-docker-ubuntu20.04: 23 | docker run --rm -it \ 24 | -p '127.0.0.1:50021:50021' $(ARGS) \ 25 | voicevox/voicevox_engine:cpu-ubuntu20.04-latest $(CMD) 26 | 27 | .PHONY: build-linux-docker-nvidia-ubuntu20.04 28 | build-linux-docker-nvidia-ubuntu20.04: 29 | docker buildx build . \ 30 | -t voicevox/voicevox_engine:nvidia-ubuntu20.04-latest \ 31 | --target runtime-nvidia-env \ 32 | --progress plain \ 33 | --build-arg BASE_IMAGE=ubuntu:20.04 \ 34 | --build-arg BASE_RUNTIME_IMAGE=nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 \ 35 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz \ 36 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_gpu_x64_nvidia.so $(ARGS) 37 | 38 | .PHONY: run-linux-docker-nvidia-ubuntu20.04 39 | run-linux-docker-nvidia-ubuntu20.04: 40 | docker run --rm -it \ 41 | --gpus all \ 42 | -p '127.0.0.1:50021:50021' $(ARGS) \ 43 | voicevox/voicevox_engine:nvidia-ubuntu20.04-latest $(CMD) 44 | 45 | 46 | # Ubuntu 18.04 47 | .PHONY: build-linux-docker-ubuntu18.04 48 | build-linux-docker-ubuntu18.04: 49 | docker buildx build . \ 50 | -t voicevox/voicevox_engine:cpu-ubuntu18.04-latest \ 51 | --target runtime-env \ 52 | --progress plain \ 53 | --build-arg BASE_IMAGE=ubuntu:18.04 \ 54 | --build-arg BASE_RUNTIME_IMAGE=ubuntu:18.04 \ 55 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz \ 56 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_cpu_x64.so $(ARGS) 57 | 58 | .PHONY: run-linux-docker-ubuntu18.04 59 | run-linux-docker-ubuntu18.04: 60 | docker run --rm -it \ 61 | -p '127.0.0.1:50021:50021' $(ARGS) \ 62 | voicevox/voicevox_engine:cpu-ubuntu18.04-latest $(CMD) 63 | 64 | .PHONY: build-linux-docker-nvidia-ubuntu18.04 65 | build-linux-docker-nvidia-ubuntu18.04: 66 | docker buildx build . \ 67 | -t voicevox/voicevox_engine:nvidia-ubuntu18.04-latest \ 68 | --target runtime-nvidia-env \ 69 | --progress plain \ 70 | --build-arg BASE_IMAGE=ubuntu:18.04 \ 71 | --build-arg BASE_RUNTIME_IMAGE=nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu18.04 \ 72 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz \ 73 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_gpu_x64_nvidia.so $(ARGS) 74 | 75 | .PHONY: run-linux-docker-nvidia-ubuntu18.04 76 | run-linux-docker-nvidia-ubuntu18.04: 77 | docker run --rm -it \ 78 | --gpus all \ 79 | -p '127.0.0.1:50021:50021' $(ARGS) \ 80 | voicevox/voicevox_engine:nvidia-ubuntu18.04-latest $(CMD) 81 | 82 | 83 | # VOICEVOX Core env for test 84 | .PHONY: build-linux-docker-download-core-env-ubuntu18.04 85 | build-linux-docker-download-core-env-ubuntu18.04: 86 | docker buildx build . \ 87 | -t voicevox/voicevox_engine:download-core-env-ubuntu18.04 \ 88 | --target download-core-env \ 89 | --progress plain \ 90 | --build-arg BASE_IMAGE=ubuntu:18.04 $(ARGS) 91 | 92 | .PHONY: run-linux-docker-download-core-env-ubuntu18.04 93 | run-linux-docker-download-core-env-ubuntu18.04: 94 | docker run --rm -it $(ARGS) \ 95 | voicevox/voicevox_engine:download-core-env-ubuntu18.04 $(CMD) 96 | 97 | 98 | # ONNX Runtime env for test 99 | .PHONY: build-linux-docker-download-onnxruntime-env-ubuntu18.04 100 | build-linux-docker-download-onnxruntime-env-ubuntu18.04: 101 | docker buildx build . \ 102 | -t voicevox/voicevox_engine:download-onnxruntime-env-ubuntu18.04 \ 103 | --target download-onnxruntime-env \ 104 | --progress plain \ 105 | --build-arg BASE_IMAGE=ubuntu:18.04 $(ARGS) 106 | 107 | .PHONY: run-linux-docker-download-onnxruntime-env-ubuntu18.04 108 | run-linux-docker-download-onnxruntime-env-ubuntu18.04: 109 | docker run --rm -it $(ARGS) \ 110 | voicevox/voicevox_engine:download-onnxruntime-env-ubuntu18.04 $(CMD) 111 | 112 | 113 | # Python env for test 114 | .PHONY: build-linux-docker-compile-python-env 115 | build-linux-docker-compile-python-env: 116 | docker buildx build . \ 117 | -t voicevox/voicevox_engine:compile-python-env \ 118 | --target compile-python-env \ 119 | --progress plain \ 120 | --build-arg BASE_IMAGE=ubuntu:20.04 $(ARGS) 121 | 122 | .PHONY: run-linux-docker-compile-python-env 123 | run-linux-docker-compile-python-env: 124 | docker run --rm -it $(ARGS) \ 125 | voicevox/voicevox_engine:compile-python-env $(CMD) 126 | -------------------------------------------------------------------------------- /_typos.toml: -------------------------------------------------------------------------------- 1 | # Files for typos 2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started 3 | 4 | [default.extend-identifiers] 5 | 6 | [default.extend-words] 7 | ba="ba" # 7zコマンドの-baオプション 8 | datas="datas" # PyInstallerの引数 9 | 10 | [files] 11 | extend-exclude = ["package-lock.json", "src/store/project.ts", "*.svg"] 12 | -------------------------------------------------------------------------------- /build_util/check_release_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | ビルド結果をテストする 3 | """ 4 | import argparse 5 | import json 6 | import time 7 | from io import BytesIO 8 | from pathlib import Path 9 | from subprocess import Popen 10 | from urllib.parse import urlencode 11 | from urllib.request import Request, urlopen 12 | 13 | import soundfile 14 | 15 | base_url = "http://localhost:50021/" 16 | 17 | 18 | def test_release_build(dist_dir: Path) -> None: 19 | run_file = dist_dir / "run" 20 | if not run_file.exists(): 21 | run_file = dist_dir / "run.exe" 22 | 23 | # 起動 24 | process = Popen([run_file.absolute()], cwd=dist_dir) 25 | time.sleep(120) # 待機 26 | 27 | # バージョン取得テスト 28 | req = Request(base_url + "version") 29 | with urlopen(req) as res: 30 | assert len(res.read()) > 0 31 | 32 | # テキスト -> クエリ 33 | text = "こんにちは、音声合成の世界へようこそ" 34 | req = Request( 35 | base_url + "audio_query?" + urlencode({"speaker": "1", "text": text}), 36 | method="POST", 37 | ) 38 | with urlopen(req) as res: 39 | query = json.loads(res.read().decode("utf-8")) 40 | 41 | # クエリ -> 音声 42 | req = Request(base_url + "synthesis?speaker=1", method="POST") 43 | req.add_header("Content-Type", "application/json") 44 | req.data = json.dumps(query).encode("utf-8") 45 | with urlopen(req) as res: 46 | wave = res.read() 47 | soundfile.read(BytesIO(wave)) 48 | 49 | # エンジンマニフェスト 50 | req = Request(base_url + "engine_manifest", method="GET") 51 | with urlopen(req) as res: 52 | manifest = json.loads(res.read().decode("utf-8")) 53 | assert "uuid" in manifest 54 | 55 | # プロセスが稼働中であることを確認 56 | assert process.poll() is None 57 | 58 | # 停止 59 | process.terminate() 60 | 61 | 62 | if __name__ == "__main__": 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument("--dist_dir", type=Path, default=Path("dist/")) 65 | args = parser.parse_args() 66 | test_release_build(dist_dir=args.dist_dir) 67 | -------------------------------------------------------------------------------- /build_util/codesign.bash: -------------------------------------------------------------------------------- 1 | # !!! コードサイニング証明書を取り扱うので取り扱い注意 !!! 2 | 3 | set -eu 4 | 5 | if [ ! -v CERT_BASE64 ]; then 6 | echo "CERT_BASE64が未定義です" 7 | exit 1 8 | fi 9 | if [ ! -v CERT_PASSWORD ]; then 10 | echo "CERT_PASSWORDが未定義です" 11 | exit 1 12 | fi 13 | 14 | if [ $# -ne 1 ]; then 15 | echo "引数の数が一致しません" 16 | exit 1 17 | fi 18 | target_file_glob="$1" 19 | 20 | # 証明書 21 | CERT_PATH=cert.pfx 22 | echo -n "$CERT_BASE64" | base64 -d - > $CERT_PATH 23 | 24 | # 指定ファイルに署名する 25 | function codesign() { 26 | TARGET="$1" 27 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 28 | powershell "& '$SIGNTOOL' sign /fd SHA256 /td SHA256 /tr http://timestamp.digicert.com /f $CERT_PATH /p $CERT_PASSWORD '$TARGET'" 29 | } 30 | 31 | # 指定ファイルが署名されているか 32 | function is_signed() { 33 | TARGET="$1" 34 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 35 | powershell "& '$SIGNTOOL' verify /pa '$TARGET'" || return 1 36 | } 37 | 38 | # 署名されていなければ署名 39 | ls $target_file_glob | while read target_file; do 40 | if is_signed "$target_file"; then 41 | echo "署名済み: $target_file" 42 | else 43 | echo "署名: $target_file" 44 | codesign "$target_file" 45 | fi 46 | done 47 | 48 | # 証明書を消去 49 | rm $CERT_PATH 50 | -------------------------------------------------------------------------------- /build_util/create_venv_and_generate_licenses.bash: -------------------------------------------------------------------------------- 1 | # 仮想環境を作ってrequirements.txtをインストールし、ライセンス一覧を生成する 2 | 3 | set -eux 4 | 5 | if [ ! -v OUTPUT_LICENSE_JSON_PATH ]; then 6 | echo "OUTPUT_LICENSE_JSON_PATHが未定義です" 7 | exit 1 8 | fi 9 | 10 | VENV_PATH="licenses_venv" 11 | 12 | python -m venv $VENV_PATH 13 | if [ -d "$VENV_PATH/Scripts" ]; then 14 | source $VENV_PATH/Scripts/activate 15 | else 16 | source $VENV_PATH/bin/activate 17 | fi 18 | 19 | pip install -r requirements-license.txt 20 | python generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH 21 | 22 | deactivate 23 | 24 | rm -rf $VENV_PATH 25 | -------------------------------------------------------------------------------- /build_util/merge_update_infos.py: -------------------------------------------------------------------------------- 1 | """ 2 | 更新履歴をマージする。 3 | """ 4 | 5 | import argparse 6 | import json 7 | from collections import OrderedDict 8 | from pathlib import Path 9 | from typing import Dict, List, Union 10 | 11 | 12 | def merge_json_string(src: str, dst: str) -> str: 13 | """ 14 | バージョンが同じ場合は要素を結合する 15 | >>> src = '[{"version": "0.0.1", "a": ["a1"], "b": ["b1", "b2"]}]' 16 | >>> dst = '[{"version": "0.0.1", "a": ["a2"], "b": ["b1", "b3"]}]' 17 | >>> merge_json_string(src, dst) 18 | '[{"version": "0.0.1", "a": ["a1", "a2"], "b": ["b1", "b2", "b3"]}]' 19 | 20 | バージョンが無かった場合は無視される 21 | >>> src = '[{"version": "1"}]' 22 | >>> dst = '[{"version": "1"}, {"version": "2"}]' 23 | >>> merge_json_string(src, dst) 24 | '[{"version": "1"}]' 25 | """ 26 | src_json: List[Dict[str, Union[str, List[str]]]] = json.loads(src) 27 | dst_json: List[Dict[str, Union[str, List[str]]]] = json.loads(dst) 28 | 29 | for src_item in src_json: 30 | for dst_item in dst_json: 31 | if src_item["version"] == dst_item["version"]: 32 | for key in src_item: 33 | if key == "version": 34 | continue 35 | 36 | # 異なるものがあった場合だけ後ろに付け足す 37 | src_item[key] = list( 38 | OrderedDict.fromkeys(src_item[key] + dst_item[key]) 39 | ) 40 | 41 | return json.dumps(src_json) 42 | 43 | 44 | def merge_update_infos(src_path: Path, dst_path: Path, output_path: Path) -> None: 45 | src = src_path.read_text(encoding="utf-8") 46 | dst = dst_path.read_text(encoding="utf-8") 47 | merged = merge_json_string(src, dst) 48 | output_path.write_text(merged) 49 | 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument("src_path", type=Path) 54 | parser.add_argument("dst_path", type=Path) 55 | parser.add_argument("output_path", type=Path) 56 | args = parser.parse_args() 57 | merge_update_infos(args.src_path, args.dst_path, args.output_path) 58 | -------------------------------------------------------------------------------- /build_util/modify_pyinstaller.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # PyInstallerをカスタマイズしてから再インストールする 4 | # 良いGPUが自動的に選択されるようにしている 5 | # https://github.com/VOICEVOX/voicevox_engine/issues/502 6 | 7 | set -eux 8 | 9 | pyinstaller_version=$(pyinstaller -v) 10 | tempdir=$(mktemp -dt modify_pyinstaller.XXXXXXXX) 11 | trap 'rm -rf "$tempdir"' EXIT 12 | git clone https://github.com/pyinstaller/pyinstaller.git "$tempdir" -b "v$pyinstaller_version" --depth 1 13 | cat > "$tempdir/bootloader/src/symbols.c" << EOF 14 | #ifdef _WIN32 15 | #include 16 | 17 | // https://docs.nvidia.com/gameworks/content/technologies/desktop/optimus.htm 18 | __declspec(dllexport) DWORD NvOptimusEnablement = 0x00000001; 19 | 20 | // https://gpuopen.com/learn/amdpowerxpressrequesthighperformance/ 21 | __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x00000001; 22 | #endif 23 | EOF 24 | (cd "$tempdir/bootloader" && python ./waf all) 25 | pip install -U "$tempdir" 26 | -------------------------------------------------------------------------------- /build_util/process_voicevox_resource.bash: -------------------------------------------------------------------------------- 1 | set -eux 2 | 3 | if [ ! -v DOWNLOAD_RESOURCE_PATH ]; then 4 | echo "DOWNLOAD_RESOURCE_PATHが未定義です" 5 | exit 1 6 | fi 7 | 8 | rm -r speaker_info 9 | cp -r $DOWNLOAD_RESOURCE_PATH/character_info speaker_info 10 | 11 | # .png_largeファイルを消去する 12 | rm speaker_info/*/icons/*.png_large 13 | 14 | # マニフェスト 15 | jq -s '.[0] * .[1]' engine_manifest.json $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest.json \ 16 | > engine_manifest.json.tmp 17 | mv engine_manifest.json.tmp engine_manifest.json 18 | 19 | python build_util/merge_update_infos.py \ 20 | engine_manifest_assets/update_infos.json \ 21 | $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/update_infos.json \ 22 | engine_manifest_assets/update_infos.json 23 | 24 | for f in $(ls $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/* | grep -v update_infos.json); do 25 | cp $f ./engine_manifest_assets/ 26 | done 27 | -------------------------------------------------------------------------------- /default.csv: -------------------------------------------------------------------------------- 1 | 朱司,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アカシ,アカシ,1/3,C1 2 | 青山,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,アオヤマ,アオヤマ,2/4,C1 3 | 雨晴,1350,1350,7000,名詞,固有名詞,人名,姓,*,*,*,アメハレ,アメハレ,2/4,C1 4 | 櫻歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オウカ,オーカ,1/3,C1 5 | 音街,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オトマチ,オトマチ,2/4,C1 6 | 春日部,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,カスカベ,カスカベ,0/4,C1 7 | 麒ヶ島,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キガシマ,キガシマ,2/4,C1 8 | 紲星,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キズナ,キズナ,1/3,C1 9 | 九州,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,キュウシュウ,キュウシュウ,1/4,C1 10 | キョウコ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,キョオコ,キョオコ,1/3,C1 11 | 玄野,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,クロノ,クロノ,1/3,C1 12 | 剣崎,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,ケンザキ,ケンザキ,1/4,C1 13 | 後鬼,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ゴキ,ゴキ,1/2,C1 14 | 虎太郎,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,コタロウ,コタロー,4/4,C1 15 | 琴葉,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,コトノハ,コトノハ,0/4,C1 16 | 小夜,1351,1351,2200,名詞,固有名詞,人名,名,*,*,*,サヨ,サヨ,1/2,C1 17 | 四国,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,シコク,シコク,1/3,C1 18 | 白上,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,シラカミ,シラカミ,4/4,C1 19 | ずんだもん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ズンダモン,ズンダモン,1/5,C1 20 | そら,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ソラ,ソラ,1/2,C1 21 | 宗麟,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ソウリン,ソウリン,1/4,C1 22 | タイプT,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タイプティー,タイプティー,4/5,C1 23 | 波音,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナミネ,ナミネ,0/3,C1 24 | 武宏,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タケヒロ,タケヒロ,2/4,C1 25 | ちび式じい,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,チビシキジー,チビシキジー,5/6,C1 26 | 月読,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ツクヨミ,ツクヨミ,0/4,C1 27 | つむぎ,1351,1351,7450,名詞,固有名詞,人名,名,*,*,*,ツムギ,ツムギ,0/3,C1 28 | ナースロボ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナースロボ,ナースロボ,4/5,C1 29 | No.7,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ナンバーセブン,ナンバーセブン,5/7,C1 30 | はう,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,ハウ,ハウ,1/2,C1 31 | 桜乃,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルノ,ハルノ,1/3,C1 32 | ひまり,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ヒマリ,ヒマリ,0/3,C1 33 | 紅桜,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ベニザクラ,ベニザクラ,3/5,C1 34 | 聖騎士,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,ホーリーナイト,ホーリーナイト,5/7,C1 35 | WhiteCUL,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ホワイトカル,ホワイトカル,5/6,C1 36 | ミコ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,ミコ,ミコ,1/2,C1 37 | 水奈瀬,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ミナセ,ミナセ,2/3,C1 38 | 冥鳴,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,メイメイ,メイメイ,1/4,C1 39 | 鳴花,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,メイカ,メイカ,1/3,C1 40 | めたん,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,メタン,メタン,1/3,C1 41 | 雌雄,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,メスオ,メスオ,0/3,C1 42 | もち子さん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,モチコサン,モチコサン,1/5,C1 43 | モチノ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,モチノ,モチノ,0/3,C1 44 | 結月,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ユヅキ,ユヅキ,1/3,C1 45 | 弓鶴,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ユヅル,ユヅル,0/3,C1 46 | リツ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,リツ,リツ,1/2,C1 47 | 六花,1351,1351,4900,名詞,固有名詞,人名,名,*,*,*,リッカ,リッカ,1/3,C1 48 | 龍星,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,リュウセイ,リュウセイ,1/4,C1 49 | 雀松,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ワカマツ,ワカマツ,2/4,C1 50 | COEIROINK,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1 51 | coeiroink,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1 52 | CoeFont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1 53 | coefont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1 54 | TALQu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1 55 | talqu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1 56 | VOICEVOX,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1 57 | voicevox,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1 -------------------------------------------------------------------------------- /default_setting.yml: -------------------------------------------------------------------------------- 1 | allow_origin: null 2 | cors_policy_mode: localapps 3 | -------------------------------------------------------------------------------- /docs/VOICEVOX音声合成エンジンとの連携.md: -------------------------------------------------------------------------------- 1 | メモ書き程度ですが、どういう方針で開発を進めているかを紹介します。 2 | 3 | - バージョンが上がっても、`/audio_query`で返ってくる値をそのまま`/synthesis`に POST すれば音声合成できるようにする予定です 4 | - `AudioQuery`のパラメータは増えますが、なるべくデフォルト値で以前と変わらない音声が生成されるようにします 5 | - バージョン 0.7 から音声スタイルが実装されました。スタイルの情報は`/speakers`から取得できます 6 | - スタイルの情報にある`style_id`を`speaker`に指定することで、今まで通り音声合成ができます 7 | - style_id の指定先が speaker なのは互換性のためです 8 | -------------------------------------------------------------------------------- /docs/api/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/docs/api/.gitkeep -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/COPYING: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The Japanese TTS System "Open JTalk" */ 3 | /* developed by HTS Working Group */ 4 | /* http://open-jtalk.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* All rights reserved. */ 11 | /* */ 12 | /* Redistribution and use in source and binary forms, with or */ 13 | /* without modification, are permitted provided that the following */ 14 | /* conditions are met: */ 15 | /* */ 16 | /* - Redistributions of source code must retain the above copyright */ 17 | /* notice, this list of conditions and the following disclaimer. */ 18 | /* - Redistributions in binary form must reproduce the above */ 19 | /* copyright notice, this list of conditions and the following */ 20 | /* disclaimer in the documentation and/or other materials provided */ 21 | /* with the distribution. */ 22 | /* - Neither the name of the HTS working group nor the names of its */ 23 | /* contributors may be used to endorse or promote products derived */ 24 | /* from this software without specific prior written permission. */ 25 | /* */ 26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 38 | /* POSSIBILITY OF SUCH DAMAGE. */ 39 | /* ----------------------------------------------------------------- */ 40 | -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/mecab-naist-jdic/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, Nara Institute of Science and Technology, Japan. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | Neither the name of the Nara Institute of Science and Technology 15 | (NAIST) nor the names of its contributors may be used to endorse or 16 | promote products derived from this software without specific prior 17 | written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | /* ----------------------------------------------------------------- */ 32 | /* The Japanese TTS System "Open JTalk" */ 33 | /* developed by HTS Working Group */ 34 | /* http://open-jtalk.sourceforge.net/ */ 35 | /* ----------------------------------------------------------------- */ 36 | /* */ 37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 38 | /* Department of Computer Science */ 39 | /* */ 40 | /* All rights reserved. */ 41 | /* */ 42 | /* Redistribution and use in source and binary forms, with or */ 43 | /* without modification, are permitted provided that the following */ 44 | /* conditions are met: */ 45 | /* */ 46 | /* - Redistributions of source code must retain the above copyright */ 47 | /* notice, this list of conditions and the following disclaimer. */ 48 | /* - Redistributions in binary form must reproduce the above */ 49 | /* copyright notice, this list of conditions and the following */ 50 | /* disclaimer in the documentation and/or other materials provided */ 51 | /* with the distribution. */ 52 | /* - Neither the name of the HTS working group nor the names of its */ 53 | /* contributors may be used to endorse or promote products derived */ 54 | /* from this software without specific prior written permission. */ 55 | /* */ 56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 68 | /* POSSIBILITY OF SUCH DAMAGE. */ 69 | /* ----------------------------------------------------------------- */ 70 | -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/mecab/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2001-2008, Taku Kudo 2 | Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above 9 | copyright notice, this list of conditions and the 10 | following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the 14 | following disclaimer in the documentation and/or other 15 | materials provided with the distribution. 16 | 17 | * Neither the name of the Nippon Telegraph and Telegraph Corporation 18 | nor the names of its contributors may be used to endorse or 19 | promote products derived from this software without specific 20 | prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED 23 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 28 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | /* ----------------------------------------------------------------- */ 32 | /* The Japanese TTS System "Open JTalk" */ 33 | /* developed by HTS Working Group */ 34 | /* http://open-jtalk.sourceforge.net/ */ 35 | /* ----------------------------------------------------------------- */ 36 | /* */ 37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 38 | /* Department of Computer Science */ 39 | /* */ 40 | /* All rights reserved. */ 41 | /* */ 42 | /* Redistribution and use in source and binary forms, with or */ 43 | /* without modification, are permitted provided that the following */ 44 | /* conditions are met: */ 45 | /* */ 46 | /* - Redistributions of source code must retain the above copyright */ 47 | /* notice, this list of conditions and the following disclaimer. */ 48 | /* - Redistributions in binary form must reproduce the above */ 49 | /* copyright notice, this list of conditions and the following */ 50 | /* disclaimer in the documentation and/or other materials provided */ 51 | /* with the distribution. */ 52 | /* - Neither the name of the HTS working group nor the names of its */ 53 | /* contributors may be used to endorse or promote products derived */ 54 | /* from this software without specific prior written permission. */ 55 | /* */ 56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 68 | /* POSSIBILITY OF SUCH DAMAGE. */ 69 | /* ----------------------------------------------------------------- */ 70 | -------------------------------------------------------------------------------- /docs/licenses/world/LICENSE.txt: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* WORLD: High-quality speech analysis, */ 3 | /* manipulation and synthesis system */ 4 | /* developed by M. Morise */ 5 | /* http://www.kisc.meiji.ac.jp/~mmorise/world/english/ */ 6 | /* ----------------------------------------------------------------- */ 7 | /* */ 8 | /* Copyright (c) 2010 M. Morise */ 9 | /* */ 10 | /* All rights reserved. */ 11 | /* */ 12 | /* Redistribution and use in source and binary forms, with or */ 13 | /* without modification, are permitted provided that the following */ 14 | /* conditions are met: */ 15 | /* */ 16 | /* - Redistributions of source code must retain the above copyright */ 17 | /* notice, this list of conditions and the following disclaimer. */ 18 | /* - Redistributions in binary form must reproduce the above */ 19 | /* copyright notice, this list of conditions and the following */ 20 | /* disclaimer in the documentation and/or other materials provided */ 21 | /* with the distribution. */ 22 | /* - Neither the name of the M. Morise nor the names of its */ 23 | /* contributors may be used to endorse or promote products derived */ 24 | /* from this software without specific prior written permission. */ 25 | /* */ 26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 38 | /* POSSIBILITY OF SUCH DAMAGE. */ 39 | /* ----------------------------------------------------------------- */ 40 | -------------------------------------------------------------------------------- /engine_manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": "0.13.1", 3 | "name": "DUMMY Engine", 4 | "brand_name": "DUMMY", 5 | "uuid": "c7b58856-bd56-4aa1-afb7-b8415f824b06", 6 | "version": "999.999.999", 7 | "url": "https://github.com/VOICEVOX/voicevox_engine", 8 | "command": "run", 9 | "port": 50021, 10 | "icon": "engine_manifest_assets/icon.png", 11 | "default_sampling_rate": 24000, 12 | "terms_of_service": "engine_manifest_assets/terms_of_service.md", 13 | "update_infos": "engine_manifest_assets/update_infos.json", 14 | "dependency_licenses": "engine_manifest_assets/dependency_licenses.json", 15 | "downloadable_libraries_path": null, 16 | "downloadable_libraries_url": null, 17 | "supported_features": { 18 | "adjust_mora_pitch": { 19 | "type": "bool", 20 | "value": true, 21 | "name": "モーラごとの音高の調整" 22 | }, 23 | "adjust_phoneme_length": { 24 | "type": "bool", 25 | "value": true, 26 | "name": "音素ごとの長さの調整" 27 | }, 28 | "adjust_speed_scale": { 29 | "type": "bool", 30 | "value": true, 31 | "name": "全体の話速の調整" 32 | }, 33 | "adjust_pitch_scale": { 34 | "type": "bool", 35 | "value": true, 36 | "name": "全体の音高の調整" 37 | }, 38 | "adjust_intonation_scale": { 39 | "type": "bool", 40 | "value": true, 41 | "name": "全体の抑揚の調整" 42 | }, 43 | "adjust_volume_scale": { 44 | "type": "bool", 45 | "value": true, 46 | "name": "全体の音量の調整" 47 | }, 48 | "interrogative_upspeak": { 49 | "type": "bool", 50 | "value": true, 51 | "name": "疑問文の自動調整" 52 | }, 53 | "synthesis_morphing" : { 54 | "type": "bool", 55 | "value": true, 56 | "name": "2人の話者でモーフィングした音声を合成" 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /engine_manifest_assets/dependency_licenses.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "dummy library", 4 | "version": "0.0.1", 5 | "license": "dummy license", 6 | "text": "dummy license text" 7 | } 8 | ] -------------------------------------------------------------------------------- /engine_manifest_assets/downloadable_libraries.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "download_url": "", 4 | "bytes": "1000", 5 | "speaker": { 6 | "name": "dummy1", 7 | "speaker_uuid": "dummy1", 8 | "styles": [ 9 | { 10 | "name": "style1", 11 | "id": 0 12 | }, 13 | { 14 | "name": "style2", 15 | "id": 1 16 | } 17 | ], 18 | "version": "0.0.1" 19 | }, 20 | "speaker_info": { 21 | "policy": "", 22 | "portrait": "", 23 | "style_infos": [ 24 | { 25 | "id": 0, 26 | "icon": "", 27 | "voice_samples": [ 28 | "", 29 | "", 30 | "" 31 | ] 32 | }, 33 | { 34 | "id": 1, 35 | "icon": "", 36 | "voice_samples": [ 37 | "", 38 | "", 39 | "" 40 | ] 41 | } 42 | ] 43 | } 44 | } 45 | ] 46 | -------------------------------------------------------------------------------- /engine_manifest_assets/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/engine_manifest_assets/icon.png -------------------------------------------------------------------------------- /engine_manifest_assets/terms_of_service.md: -------------------------------------------------------------------------------- 1 | dummy teams of service -------------------------------------------------------------------------------- /engine_manifest_assets/update_infos.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "version": "0.14.3", 4 | "descriptions": [ 5 | "キャラクター「†聖騎士 紅桜†」「雀松朱司」「麒ヶ島宗麟」を追加", 6 | "同時書き込みで辞書が破損する問題を修正" 7 | ], 8 | "contributors": ["Hiroshiba"] 9 | }, 10 | { 11 | "version": "0.14.2", 12 | "descriptions": ["DirectML版の生成が遅い問題を修正"], 13 | "contributors": [] 14 | }, 15 | { 16 | "version": "0.14.1", 17 | "descriptions": ["AquesTalkライクな記法で生成した音声のバグを修正"], 18 | "contributors": [] 19 | }, 20 | { 21 | "version": "0.14.0", 22 | "descriptions": [ 23 | "コアをRust言語に移行", 24 | "セキュリティアップデート", 25 | "スタイルごとに異なる立ち絵の提供を可能に", 26 | "VVPPファイルの提供", 27 | "設定GUIの提供", 28 | "プリセットの保存", 29 | "モーフィングAPIの仕様変更", 30 | "DirectML利用時に適したGPUを自動選択", 31 | "開発環境の向上", 32 | "バグ修正" 33 | ], 34 | "contributors": [ 35 | "aoirint", 36 | "Appletigerv", 37 | "haru3me", 38 | "Hiroshiba", 39 | "ksk001100", 40 | "masinc", 41 | "misogihagi", 42 | "My-MC", 43 | "nebocco", 44 | "PickledChair", 45 | "qryxip", 46 | "qwerty2501", 47 | "sabonerune", 48 | "sarisia", 49 | "Segu-g", 50 | "sevenc-nanashi", 51 | "shigobu", 52 | "smly", 53 | "takana-v", 54 | "ts-klassen", 55 | "whiteball", 56 | "y-chan" 57 | ] 58 | } 59 | ] 60 | -------------------------------------------------------------------------------- /get_cost_candidates.py: -------------------------------------------------------------------------------- 1 | """ 2 | voicevox_engine/part_of_speech_data.pyのcost_candidatesを計算するプログラムです。 3 | 引数のnaist_jdic_pathには、open_jtalkのsrc/mecab-naist-jdic/naist-jdic.csvを指定してください。 4 | 5 | 実行例: 6 | python get_cost_candidates.py --naist_jdic_path=/path/to/naist-jdic.csv \ 7 | --pos=名詞 \ 8 | --pos_detail_1=固有名詞 \ 9 | --pos_detail_2=一般 \ 10 | --pos_detail_3=* 11 | 12 | cost_candidatesの値の詳細は以下の通りです。 13 | - 1番目の値はnaist_jdic内の同一品詞の最小コストから1を引いたもの、11番目の値は最大コストに1を足したものです。 14 | - 2番目の値はnaist_jdic内の同一品詞のコストの下位1%、10番目の値は99%の値です。 15 | - 6番目の値はnaist_jdic内の同一品詞のコストの最頻値です。 16 | - 2番目から6番目、6番目から10番目までの値は一定割合で増加するようになっています。 17 | """ 18 | 19 | import argparse 20 | import statistics 21 | from pathlib import Path 22 | from typing import List 23 | 24 | import numpy as np 25 | 26 | 27 | def get_candidates( 28 | naist_jdic_path: Path, 29 | pos: str, 30 | pos_detail_1: str, 31 | pos_detail_2: str, 32 | pos_detail_3: str, 33 | ) -> List[int]: 34 | costs = [] 35 | with naist_jdic_path.open(encoding="utf-8") as f: 36 | for line in f: 37 | ( 38 | _, 39 | _, 40 | _, 41 | _cost, 42 | _pos, 43 | _pos_detail_1, 44 | _pos_detail_2, 45 | _pos_detail_3, 46 | _, 47 | _, 48 | _, 49 | _, 50 | _, 51 | _, 52 | _, 53 | ) = line.split(",") 54 | if (_pos, _pos_detail_1, _pos_detail_2, _pos_detail_3) == ( 55 | pos, 56 | pos_detail_1, 57 | pos_detail_2, 58 | pos_detail_3, 59 | ): 60 | costs.append(int(_cost)) 61 | assert len(costs) > 0 62 | cost_min = min(costs) - 1 63 | cost_1per = np.quantile(costs, 0.01).astype(np.int64) 64 | cost_mode = statistics.mode(costs) 65 | cost_99per = np.quantile(costs, 0.99).astype(np.int64) 66 | cost_max = max(costs) + 1 67 | return ( 68 | [cost_min] 69 | + [int(cost_1per + (cost_mode - cost_1per) * i / 4) for i in range(5)] 70 | + [int(cost_mode + (cost_99per - cost_mode) * i / 4) for i in range(1, 5)] 71 | + [cost_max] 72 | ) 73 | 74 | 75 | if __name__ == "__main__": 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument("--naist_jdic_path", type=Path) 78 | parser.add_argument("--pos", type=str) 79 | parser.add_argument("--pos_detail_1", type=str) 80 | parser.add_argument("--pos_detail_2", type=str) 81 | parser.add_argument("--pos_detail_3", type=str) 82 | args = parser.parse_args() 83 | print( 84 | get_candidates( 85 | naist_jdic_path=args.naist_jdic_path, 86 | pos=args.pos, 87 | pos_detail_1=args.pos_detail_1, 88 | pos_detail_2=args.pos_detail_2, 89 | pos_detail_3=args.pos_detail_3, 90 | ) 91 | ) 92 | -------------------------------------------------------------------------------- /make_docs.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from voicevox_engine.dev.core import mock as core 4 | from voicevox_engine.dev.synthesis_engine.mock import MockSynthesisEngine 5 | from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader 6 | 7 | if __name__ == "__main__": 8 | import run 9 | 10 | app = run.generate_app( 11 | synthesis_engines={"mock": MockSynthesisEngine(speakers=core.metas())}, 12 | latest_core_version="mock", 13 | setting_loader=SettingLoader(USER_SETTING_PATH), 14 | ) 15 | with open("docs/api/index.html", "w") as f: 16 | f.write( 17 | """ 18 | 19 | 20 | voicevox_engine API Document 21 | 22 | 23 | 24 | 25 |
26 | 27 | 30 | 31 | """ 32 | % json.dumps(app.openapi()) 33 | ) 34 | -------------------------------------------------------------------------------- /presets.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: サンプルプリセット 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pysen] 2 | version = "0.10.3" 3 | 4 | [tool.pysen.lint] 5 | enable_black = true 6 | enable_flake8 = true 7 | enable_isort = true 8 | enable_mypy = false # TODO: eliminate errors and enable at CI 9 | mypy_preset = "entry" # TODO: "strict" 10 | line_length = 88 11 | py_version = "py38" 12 | isort_known_first_party = ["voicevox_engine"] 13 | isort_known_third_party = ["numpy"] 14 | [[tool.pysen.lint.mypy_targets]] 15 | paths = [".", "voicevox_engine/"] 16 | 17 | [tool.black] # automatically generated by pysen 18 | line-length = 88 19 | target-version = ["py38"] 20 | 21 | 22 | [tool.isort] # automatically generated by pysen 23 | default_section = "THIRDPARTY" 24 | ensure_newline_before_comments = true 25 | force_grid_wrap = 0 26 | force_single_line = false 27 | include_trailing_comma = true 28 | known_first_party = ["voicevox_engine"] 29 | known_third_party = ["numpy"] 30 | line_length = 88 31 | multi_line_output = 3 32 | use_parentheses = true 33 | 34 | [tool.poetry] 35 | name = "voicevox_engine" 36 | version = "0.0.0" 37 | description = "" 38 | authors = ["Hiroshiba "] 39 | 40 | [tool.poetry.dependencies] 41 | python = "~3.8,>=3.8.1" 42 | numpy = "^1.20.0" 43 | fastapi = "^0.70.0" 44 | python-multipart = "^0.0.5" 45 | uvicorn = "^0.15.0" 46 | aiofiles = "^0.7.0" 47 | soundfile = "^0.10.3.post1" 48 | scipy = "^1.7.1" 49 | pyyaml = "^6.0" 50 | pyworld = "^0.3.0" 51 | appdirs = "^1.4.4" 52 | requests = "^2.28.1" 53 | jinja2 = "^3.1.2" 54 | pyopenjtalk = {git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae"} 55 | 56 | [tool.poetry.group.dev.dependencies] 57 | cython = "^0.29.24" 58 | pyinstaller = "^5.3" 59 | pre-commit = "^2.16.0" 60 | atomicwrites = "^1.4.0" 61 | colorama = "^0.4.4" 62 | poetry = "^1.3.1" 63 | 64 | [tool.poetry.group.test.dependencies] 65 | pysen = "~0.10.3" 66 | black = "^22.12.0" 67 | flake8-bugbear = "^23.1.0" 68 | flake8 = "^6.0.0" 69 | isort = "^5.12.0" 70 | mypy = "~0.991" 71 | pytest = "^6.2.5" 72 | coveralls = "^3.2.0" 73 | 74 | [tool.poetry.group.license.dependencies] 75 | pip-licenses = "^3.5.3" 76 | 77 | [build-system] 78 | requires = ["poetry-core"] 79 | build-backend = "poetry.core.masonry.api" 80 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 2 | altgraph==0.17.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 3 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 4 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 5 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 6 | atomicwrites==1.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 7 | attrs==22.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 8 | cachecontrol[filecache]==0.12.11 ; python_full_version >= "3.8.1" and python_version < "3.9" 9 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9" 10 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 11 | cfgv==3.3.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 12 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 13 | cleo==2.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 14 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 15 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 16 | crashtest==0.4.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 17 | cryptography==39.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "linux" 18 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9" 19 | distlib==0.3.6 ; python_full_version >= "3.8.1" and python_version < "3.9" 20 | dulwich==0.20.50 ; python_full_version >= "3.8.1" and python_version < "3.9" 21 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 22 | filelock==3.8.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 23 | future==0.18.2 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32" 24 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 25 | html5lib==1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 26 | identify==2.5.11 ; python_full_version >= "3.8.1" and python_version < "3.9" 27 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 28 | importlib-metadata==4.13.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 29 | importlib-resources==5.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 30 | jaraco-classes==3.2.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 31 | jeepney==0.8.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "linux" 32 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 33 | jsonschema==4.17.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 34 | keyring==23.13.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 35 | lockfile==0.12.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 36 | macholib==1.16.2 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "darwin" 37 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 38 | more-itertools==9.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 39 | msgpack==1.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 40 | nodeenv==1.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 41 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 42 | packaging==22.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 43 | pefile==2022.5.30 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32" 44 | pexpect==4.8.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 45 | pkginfo==1.9.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 46 | pkgutil-resolve-name==1.3.10 ; python_full_version >= "3.8.1" and python_version < "3.9" 47 | platformdirs==2.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 48 | poetry-core==1.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 49 | poetry-plugin-export==1.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 50 | poetry==1.3.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 51 | pre-commit==2.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 52 | ptyprocess==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 53 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9" 54 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 55 | pyinstaller-hooks-contrib==2022.14 ; python_full_version >= "3.8.1" and python_version < "3.9" 56 | pyinstaller==5.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 57 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9" 58 | pyrsistent==0.19.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 59 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9" 60 | pywin32-ctypes==0.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32" 61 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 62 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 63 | rapidfuzz==2.13.7 ; python_full_version >= "3.8.1" and python_version < "3.9" 64 | requests-toolbelt==0.10.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 65 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 66 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 67 | secretstorage==3.3.3 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "linux" 68 | setuptools==65.6.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 69 | shellingham==1.5.0.post1 ; python_full_version >= "3.8.1" and python_version < "3.9" 70 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 71 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 72 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9" 73 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 74 | toml==0.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 75 | tomli==2.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 76 | tomlkit==0.11.6 ; python_full_version >= "3.8.1" and python_version < "3.9" 77 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 78 | trove-classifiers==2022.12.22 ; python_full_version >= "3.8.1" and python_version < "3.9" 79 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 80 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9" 81 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 82 | virtualenv==20.17.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 83 | webencodings==0.5.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 84 | xattr==0.10.1 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "darwin" 85 | zipp==3.11.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 86 | -------------------------------------------------------------------------------- /requirements-license.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 2 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 3 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 4 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 5 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9" 6 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 7 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 8 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 9 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" and platform_system == "Windows" 10 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9" 11 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 12 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 13 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 14 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 15 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 16 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 17 | pip-licenses==3.5.5 ; python_full_version >= "3.8.1" and python_version < "3.9" 18 | ptable==0.9.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 19 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9" 20 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 21 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9" 22 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9" 23 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 24 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 25 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 26 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 27 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 28 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 29 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9" 30 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 31 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 32 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 33 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9" 34 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 35 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 2 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 3 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 4 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 5 | atomicwrites==1.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32" 6 | attrs==22.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 7 | black==22.12.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 8 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9" 9 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 10 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 11 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 12 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32" or python_full_version >= "3.8.1" and python_version < "3.9" and platform_system == "Windows" 13 | colorlog==4.8.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 14 | coverage==5.5 ; python_full_version >= "3.8.1" and python_version < "3.9" 15 | coveralls==3.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 16 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9" 17 | dacite==1.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 18 | docopt==0.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 19 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 20 | flake8-bugbear==23.1.20 ; python_full_version >= "3.8.1" and python_version < "3.9" 21 | flake8==6.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 22 | gitdb==4.0.10 ; python_full_version >= "3.8.1" and python_version < "3.9" 23 | gitpython==3.1.29 ; python_full_version >= "3.8.1" and python_version < "3.9" 24 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 25 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 26 | iniconfig==1.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 27 | isort==5.12.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 28 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 29 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 30 | mccabe==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 31 | mypy-extensions==0.4.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 32 | mypy==0.991 ; python_full_version >= "3.8.1" and python_version < "3.9" 33 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 34 | packaging==22.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 35 | pathspec==0.10.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 36 | platformdirs==2.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 37 | pluggy==1.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 38 | py==1.11.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 39 | pycodestyle==2.10.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 40 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9" 41 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 42 | pyflakes==3.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 43 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9" 44 | pysen==0.10.3 ; python_full_version >= "3.8.1" and python_version < "3.9" 45 | pytest==6.2.5 ; python_full_version >= "3.8.1" and python_version < "3.9" 46 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9" 47 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 48 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 49 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 50 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 51 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 52 | smmap==5.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 53 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 54 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9" 55 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 56 | toml==0.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 57 | tomli==2.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 58 | tomlkit==0.11.6 ; python_full_version >= "3.8.1" and python_version < "3.9" 59 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 60 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 61 | unidiff==0.7.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 62 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9" 63 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 64 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 2 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 3 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 4 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 5 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9" 6 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 7 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 8 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 9 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" and platform_system == "Windows" 10 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9" 11 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 12 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 13 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9" 14 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 15 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 16 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 17 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9" 18 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9" 19 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9" 20 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9" 21 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 22 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 23 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 24 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 25 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 26 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 27 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9" 28 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 29 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9" 30 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 31 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9" 32 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9" 33 | -------------------------------------------------------------------------------- /run.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # このファイルはPyInstallerによって自動生成されたもので、それをカスタマイズして使用しています。 3 | from PyInstaller.utils.hooks import collect_data_files 4 | import os 5 | 6 | datas = [ 7 | ('engine_manifest_assets', 'engine_manifest_assets'), 8 | ('speaker_info', 'speaker_info'), 9 | ('engine_manifest.json', '.'), 10 | ('default.csv', '.'), 11 | ('licenses.json', '.'), 12 | ('presets.yaml', '.'), 13 | ('default_setting.yml', '.'), 14 | ('ui_template', 'ui_template'), 15 | ('model', 'model'), 16 | ] 17 | datas += collect_data_files('pyopenjtalk') 18 | 19 | # コアとONNX Runtimeはバイナリであるが、`binaries`に加えると 20 | # 依存関係のパスがPyInstallerに書き換えらるので、`datas`に加える 21 | # 参考: https://github.com/VOICEVOX/voicevox_engine/pull/446#issuecomment-1210052318 22 | libcore_path = os.environ.get('LIBCORE_PATH') 23 | if libcore_path: 24 | print('LIBCORE_PATH is found:', libcore_path) 25 | if not os.path.isfile(libcore_path): 26 | raise Exception("LIBCORE_PATH was found, but it is not file!") 27 | datas += [(libcore_path, ".")] 28 | 29 | libonnxruntime_path = os.environ.get('LIBONNXRUNTIME_PATH') 30 | if libonnxruntime_path: 31 | print('LIBONNXRUNTIME_PATH is found:', libonnxruntime_path) 32 | if not os.path.isfile(libonnxruntime_path): 33 | raise Exception("LIBCORE_PATH was found, but it is not file!") 34 | datas += [(libonnxruntime_path, ".")] 35 | 36 | 37 | block_cipher = None 38 | 39 | 40 | a = Analysis( 41 | ['run.py'], 42 | pathex=[], 43 | binaries=[], 44 | datas=datas, 45 | hiddenimports=[], 46 | hookspath=[], 47 | hooksconfig={}, 48 | runtime_hooks=[], 49 | excludes=[], 50 | win_no_prefer_redirects=False, 51 | win_private_assemblies=False, 52 | cipher=block_cipher, 53 | noarchive=False, 54 | ) 55 | 56 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 57 | 58 | exe = EXE( 59 | pyz, 60 | a.scripts, 61 | [], 62 | exclude_binaries=True, 63 | name='run', 64 | debug=False, 65 | bootloader_ignore_signals=False, 66 | strip=False, 67 | upx=True, 68 | console=True, 69 | disable_windowed_traceback=False, 70 | argv_emulation=False, 71 | target_arch=None, 72 | codesign_identity=None, 73 | entitlements_file=None, 74 | ) 75 | 76 | coll = COLLECT( 77 | exe, 78 | a.binaries, 79 | a.zipfiles, 80 | a.datas, 81 | strip=False, 82 | upx=True, 83 | upx_exclude=[], 84 | name='run', 85 | ) 86 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # automatically generated by pysen 3 | # e203: black treats : as a binary operator 4 | # e231: black doesn't put a space after , 5 | # e501: black may exceed the line-length to follow other style rules 6 | # w503 or w504: either one needs to be disabled to select w error codes 7 | # ignore = E203,E231,E501,W503 8 | max-line-length = 88 9 | select = B,B950,C,E,F,W 10 | # e741: do not use variables named 'I', 'O', or 'l' 11 | ignore = E203,W503,E741 12 | 13 | 14 | [mypy] 15 | # automatically generated by pysen 16 | check_untyped_defs = True 17 | disallow_any_decorated = False 18 | disallow_any_generics = True 19 | disallow_any_unimported = False 20 | disallow_incomplete_defs = True 21 | disallow_subclassing_any = True 22 | disallow_untyped_calls = False 23 | disallow_untyped_decorators = False 24 | disallow_untyped_defs = False 25 | ignore_errors = False 26 | ignore_missing_imports = True 27 | no_implicit_optional = True 28 | python_version = 3.7 29 | show_error_codes = True 30 | strict_equality = True 31 | strict_optional = True 32 | warn_redundant_casts = True 33 | warn_return_any = False 34 | warn_unreachable = True 35 | warn_unused_configs = True 36 | warn_unused_ignores = True 37 | 38 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "NOTHING" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/policy.md: -------------------------------------------------------------------------------- 1 | dummy3 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "SELF_ONLY" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/policy.md: -------------------------------------------------------------------------------- 1 | dummy2 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/policy.md: -------------------------------------------------------------------------------- 1 | dummy1 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "ALL" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/policy.md: -------------------------------------------------------------------------------- 1 | dummy4 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/test/__init__.py -------------------------------------------------------------------------------- /test/presets-test-1.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 2 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-2.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: not_int 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 2 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-3.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 1 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/test/presets-test-4.yaml -------------------------------------------------------------------------------- /test/test_connect_base64_waves.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from unittest import TestCase 4 | 5 | import numpy as np 6 | import numpy.testing 7 | import soundfile 8 | from scipy.signal import resample 9 | 10 | from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves 11 | 12 | 13 | def generate_sine_wave_ndarray( 14 | seconds: float, samplerate: int, frequency: float 15 | ) -> np.ndarray: 16 | x = np.linspace(0, seconds, int(seconds * samplerate), endpoint=False) 17 | wave = np.sin(2 * np.pi * frequency * x).astype(np.float32) 18 | 19 | return wave 20 | 21 | 22 | def encode_bytes(wave_ndarray: np.ndarray, samplerate: int) -> bytes: 23 | wave_bio = io.BytesIO() 24 | soundfile.write( 25 | file=wave_bio, 26 | data=wave_ndarray, 27 | samplerate=samplerate, 28 | format="WAV", 29 | subtype="FLOAT", 30 | ) 31 | wave_bio.seek(0) 32 | 33 | return wave_bio.getvalue() 34 | 35 | 36 | def generate_sine_wave_bytes( 37 | seconds: float, samplerate: int, frequency: float 38 | ) -> bytes: 39 | wave_ndarray = generate_sine_wave_ndarray(seconds, samplerate, frequency) 40 | return encode_bytes(wave_ndarray, samplerate) 41 | 42 | 43 | def encode_base64(wave_bytes: bytes) -> str: 44 | return base64.standard_b64encode(wave_bytes).decode("utf-8") 45 | 46 | 47 | def generate_sine_wave_base64(seconds: float, samplerate: int, frequency: float) -> str: 48 | wave_bytes = generate_sine_wave_bytes(seconds, samplerate, frequency) 49 | wave_base64 = encode_base64(wave_bytes) 50 | return wave_base64 51 | 52 | 53 | class TestConnectBase64Waves(TestCase): 54 | def test_connect(self): 55 | samplerate = 1000 56 | wave = generate_sine_wave_ndarray( 57 | seconds=2, samplerate=samplerate, frequency=10 58 | ) 59 | wave_base64 = encode_base64(encode_bytes(wave, samplerate=samplerate)) 60 | 61 | wave_x2_ref = np.concatenate([wave, wave]) 62 | 63 | wave_x2, _ = connect_base64_waves(waves=[wave_base64, wave_base64]) 64 | 65 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 66 | 67 | self.assertTrue((wave_x2_ref == wave_x2).all()) 68 | 69 | def test_no_wave_error(self): 70 | self.assertRaises(ConnectBase64WavesException, connect_base64_waves, waves=[]) 71 | 72 | def test_invalid_base64_error(self): 73 | wave_1000hz = generate_sine_wave_base64( 74 | seconds=2, samplerate=1000, frequency=10 75 | ) 76 | wave_1000hz_broken = wave_1000hz[1:] # remove head 1 char 77 | 78 | self.assertRaises( 79 | ConnectBase64WavesException, 80 | connect_base64_waves, 81 | waves=[ 82 | wave_1000hz_broken, 83 | ], 84 | ) 85 | 86 | def test_invalid_wave_file_error(self): 87 | wave_1000hz = generate_sine_wave_bytes(seconds=2, samplerate=1000, frequency=10) 88 | wave_1000hz_broken_bytes = wave_1000hz[1:] # remove head 1 byte 89 | wave_1000hz_broken = encode_base64(wave_1000hz_broken_bytes) 90 | 91 | self.assertRaises( 92 | ConnectBase64WavesException, 93 | connect_base64_waves, 94 | waves=[ 95 | wave_1000hz_broken, 96 | ], 97 | ) 98 | 99 | def test_different_frequency(self): 100 | wave_24000hz = generate_sine_wave_ndarray( 101 | seconds=1, samplerate=24000, frequency=10 102 | ) 103 | wave_1000hz = generate_sine_wave_ndarray( 104 | seconds=2, samplerate=1000, frequency=10 105 | ) 106 | wave_24000_base64 = encode_base64(encode_bytes(wave_24000hz, samplerate=24000)) 107 | wave_1000_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000)) 108 | 109 | wave_1000hz_to2400hz = resample(wave_1000hz, 24000 * len(wave_1000hz) // 1000) 110 | wave_x2_ref = np.concatenate([wave_24000hz, wave_1000hz_to2400hz]) 111 | 112 | wave_x2, _ = connect_base64_waves(waves=[wave_24000_base64, wave_1000_base64]) 113 | 114 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 115 | numpy.testing.assert_array_almost_equal(wave_x2_ref, wave_x2) 116 | 117 | def test_different_channels(self): 118 | wave_1000hz = generate_sine_wave_ndarray( 119 | seconds=2, samplerate=1000, frequency=10 120 | ) 121 | wave_2ch_1000hz = np.array([wave_1000hz, wave_1000hz]).T 122 | wave_1ch_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000)) 123 | wave_2ch_base64 = encode_base64(encode_bytes(wave_2ch_1000hz, samplerate=1000)) 124 | 125 | wave_x2_ref = np.concatenate([wave_2ch_1000hz, wave_2ch_1000hz]) 126 | 127 | wave_x2, _ = connect_base64_waves(waves=[wave_1ch_base64, wave_2ch_base64]) 128 | 129 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 130 | self.assertTrue((wave_x2_ref == wave_x2).all()) 131 | -------------------------------------------------------------------------------- /test/test_mock_synthesis_engine.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine 4 | from voicevox_engine.kana_parser import create_kana 5 | from voicevox_engine.model import AccentPhrase, AudioQuery, Mora 6 | 7 | 8 | class TestMockSynthesisEngine(TestCase): 9 | def setUp(self): 10 | super().setUp() 11 | 12 | self.accent_phrases_hello_hiho = [ 13 | AccentPhrase( 14 | moras=[ 15 | Mora( 16 | text="コ", 17 | consonant="k", 18 | consonant_length=0.0, 19 | vowel="o", 20 | vowel_length=0.0, 21 | pitch=0.0, 22 | ), 23 | Mora( 24 | text="ン", 25 | consonant=None, 26 | consonant_length=None, 27 | vowel="N", 28 | vowel_length=0.0, 29 | pitch=0.0, 30 | ), 31 | Mora( 32 | text="ニ", 33 | consonant="n", 34 | consonant_length=0.0, 35 | vowel="i", 36 | vowel_length=0.0, 37 | pitch=0.0, 38 | ), 39 | Mora( 40 | text="チ", 41 | consonant="ch", 42 | consonant_length=0.0, 43 | vowel="i", 44 | vowel_length=0.0, 45 | pitch=0.0, 46 | ), 47 | Mora( 48 | text="ワ", 49 | consonant="w", 50 | consonant_length=0.0, 51 | vowel="a", 52 | vowel_length=0.0, 53 | pitch=0.0, 54 | ), 55 | ], 56 | accent=5, 57 | pause_mora=Mora( 58 | text="、", 59 | consonant=None, 60 | consonant_length=None, 61 | vowel="pau", 62 | vowel_length=0.0, 63 | pitch=0.0, 64 | ), 65 | ), 66 | AccentPhrase( 67 | moras=[ 68 | Mora( 69 | text="ヒ", 70 | consonant="h", 71 | consonant_length=0.0, 72 | vowel="i", 73 | vowel_length=0.0, 74 | pitch=0.0, 75 | ), 76 | Mora( 77 | text="ホ", 78 | consonant="h", 79 | consonant_length=0.0, 80 | vowel="o", 81 | vowel_length=0.0, 82 | pitch=0.0, 83 | ), 84 | Mora( 85 | text="デ", 86 | consonant="d", 87 | consonant_length=0.0, 88 | vowel="e", 89 | vowel_length=0.0, 90 | pitch=0.0, 91 | ), 92 | Mora( 93 | text="ス", 94 | consonant="s", 95 | consonant_length=0.0, 96 | vowel="U", 97 | vowel_length=0.0, 98 | pitch=0.0, 99 | ), 100 | ], 101 | accent=1, 102 | pause_mora=None, 103 | ), 104 | ] 105 | self.engine = MockSynthesisEngine(speakers="", supported_devices="") 106 | 107 | def test_replace_phoneme_length(self): 108 | self.assertEqual( 109 | self.engine.replace_phoneme_length( 110 | accent_phrases=self.accent_phrases_hello_hiho, 111 | speaker_id=0, 112 | ), 113 | self.accent_phrases_hello_hiho, 114 | ) 115 | 116 | def test_replace_mora_pitch(self): 117 | self.assertEqual( 118 | self.engine.replace_mora_pitch( 119 | accent_phrases=self.accent_phrases_hello_hiho, 120 | speaker_id=0, 121 | ), 122 | self.accent_phrases_hello_hiho, 123 | ) 124 | 125 | def test_synthesis(self): 126 | self.engine.synthesis( 127 | AudioQuery( 128 | accent_phrases=self.accent_phrases_hello_hiho, 129 | speedScale=1, 130 | pitchScale=0, 131 | intonationScale=1, 132 | volumeScale=1, 133 | prePhonemeLength=0.1, 134 | postPhonemeLength=0.1, 135 | outputSamplingRate=24000, 136 | outputStereo=False, 137 | kana=create_kana(self.accent_phrases_hello_hiho), 138 | ), 139 | speaker_id=0, 140 | ) 141 | -------------------------------------------------------------------------------- /test/test_mora_list.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.mora_list import openjtalk_mora2text 4 | 5 | 6 | class TestOpenJTalkMoraList(TestCase): 7 | def test_mora2text(self): 8 | self.assertEqual("ッ", openjtalk_mora2text["cl"]) 9 | self.assertEqual("ティ", openjtalk_mora2text["ti"]) 10 | self.assertEqual("トゥ", openjtalk_mora2text["tu"]) 11 | self.assertEqual("ディ", openjtalk_mora2text["di"]) 12 | # GitHub issue #60 13 | self.assertEqual("ギェ", openjtalk_mora2text["gye"]) 14 | self.assertEqual("イェ", openjtalk_mora2text["ye"]) 15 | 16 | def test_mora2text_injective(self): 17 | """異なるモーラが同じ読みがなに対応しないか確認する""" 18 | values = list(openjtalk_mora2text.values()) 19 | uniq_values = list(set(values)) 20 | self.assertCountEqual(values, uniq_values) 21 | -------------------------------------------------------------------------------- /test/test_mora_to_text.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | # TODO: import from voicevox_engine.synthesis_engine.mora 4 | from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text 5 | 6 | 7 | class TestMoraToText(TestCase): 8 | def test_voice(self): 9 | self.assertEqual(mora_to_text("a"), "ア") 10 | self.assertEqual(mora_to_text("i"), "イ") 11 | self.assertEqual(mora_to_text("ka"), "カ") 12 | self.assertEqual(mora_to_text("N"), "ン") 13 | self.assertEqual(mora_to_text("cl"), "ッ") 14 | self.assertEqual(mora_to_text("gye"), "ギェ") 15 | self.assertEqual(mora_to_text("ye"), "イェ") 16 | self.assertEqual(mora_to_text("wo"), "ウォ") 17 | 18 | def test_unvoice(self): 19 | self.assertEqual(mora_to_text("A"), "ア") 20 | self.assertEqual(mora_to_text("I"), "イ") 21 | self.assertEqual(mora_to_text("kA"), "カ") 22 | self.assertEqual(mora_to_text("gyE"), "ギェ") 23 | self.assertEqual(mora_to_text("yE"), "イェ") 24 | self.assertEqual(mora_to_text("wO"), "ウォ") 25 | 26 | def test_invalid_mora(self): 27 | """変なモーラが来ても例外を投げない""" 28 | self.assertEqual(mora_to_text("x"), "x") 29 | self.assertEqual(mora_to_text(""), "") 30 | -------------------------------------------------------------------------------- /test/test_user_dict_model.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from unittest import TestCase 3 | 4 | from pydantic import ValidationError 5 | 6 | from voicevox_engine.kana_parser import parse_kana 7 | from voicevox_engine.model import UserDictWord 8 | 9 | 10 | class TestUserDictWords(TestCase): 11 | def setUp(self): 12 | self.test_model = { 13 | "surface": "テスト", 14 | "priority": 0, 15 | "part_of_speech": "名詞", 16 | "part_of_speech_detail_1": "固有名詞", 17 | "part_of_speech_detail_2": "一般", 18 | "part_of_speech_detail_3": "*", 19 | "inflectional_type": "*", 20 | "inflectional_form": "*", 21 | "stem": "*", 22 | "yomi": "テスト", 23 | "pronunciation": "テスト", 24 | "accent_type": 0, 25 | "accent_associative_rule": "*", 26 | } 27 | 28 | def test_valid_word(self): 29 | test_value = deepcopy(self.test_model) 30 | try: 31 | UserDictWord(**test_value) 32 | except ValidationError as e: 33 | self.fail(f"Unexpected Validation Error\n{str(e)}") 34 | 35 | def test_convert_to_zenkaku(self): 36 | test_value = deepcopy(self.test_model) 37 | test_value["surface"] = "test" 38 | self.assertEqual(UserDictWord(**test_value).surface, "test") 39 | 40 | def test_count_mora(self): 41 | test_value = deepcopy(self.test_model) 42 | self.assertEqual(UserDictWord(**test_value).mora_count, 3) 43 | 44 | def test_count_mora_x(self): 45 | test_value = deepcopy(self.test_model) 46 | for s in [chr(i) for i in range(12449, 12533)]: 47 | if s in ["ァ", "ィ", "ゥ", "ェ", "ォ", "ッ", "ャ", "ュ", "ョ", "ヮ"]: 48 | continue 49 | for x in "ァィゥェォャュョ": 50 | expected_count = 0 51 | test_value["pronunciation"] = s + x 52 | for accent_phrase in parse_kana( 53 | test_value["pronunciation"] + "'", 54 | ): 55 | expected_count += len(accent_phrase.moras) 56 | with self.subTest(s=s, x=x): 57 | self.assertEqual( 58 | UserDictWord(**test_value).mora_count, 59 | expected_count, 60 | ) 61 | 62 | def test_count_mora_xwa(self): 63 | test_value = deepcopy(self.test_model) 64 | test_value["pronunciation"] = "クヮンセイ" 65 | expected_count = 0 66 | for accent_phrase in parse_kana( 67 | test_value["pronunciation"] + "'", 68 | ): 69 | expected_count += len(accent_phrase.moras) 70 | self.assertEqual( 71 | UserDictWord(**test_value).mora_count, 72 | expected_count, 73 | ) 74 | 75 | def test_invalid_pronunciation_not_katakana(self): 76 | test_value = deepcopy(self.test_model) 77 | test_value["pronunciation"] = "ぼいぼ" 78 | with self.assertRaises(ValidationError): 79 | UserDictWord(**test_value) 80 | 81 | def test_invalid_pronunciation_invalid_sutegana(self): 82 | test_value = deepcopy(self.test_model) 83 | test_value["pronunciation"] = "アィウェォ" 84 | with self.assertRaises(ValidationError): 85 | UserDictWord(**test_value) 86 | 87 | def test_invalid_pronunciation_invalid_xwa(self): 88 | test_value = deepcopy(self.test_model) 89 | test_value["pronunciation"] = "アヮ" 90 | with self.assertRaises(ValidationError): 91 | UserDictWord(**test_value) 92 | 93 | def test_count_mora_voiced_sound(self): 94 | test_value = deepcopy(self.test_model) 95 | test_value["pronunciation"] = "ボイボ" 96 | self.assertEqual(UserDictWord(**test_value).mora_count, 3) 97 | 98 | def test_invalid_accent_type(self): 99 | test_value = deepcopy(self.test_model) 100 | test_value["accent_type"] = 4 101 | with self.assertRaises(ValidationError): 102 | UserDictWord(**test_value) 103 | 104 | def test_invalid_accent_type_2(self): 105 | test_value = deepcopy(self.test_model) 106 | test_value["accent_type"] = -1 107 | with self.assertRaises(ValidationError): 108 | UserDictWord(**test_value) 109 | -------------------------------------------------------------------------------- /test/test_word_types.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.model import WordTypes 4 | from voicevox_engine.part_of_speech_data import part_of_speech_data 5 | 6 | 7 | class TestWordTypes(TestCase): 8 | def test_word_types(self): 9 | self.assertCountEqual(list(WordTypes), list(part_of_speech_data.keys())) 10 | -------------------------------------------------------------------------------- /ui_template/ui.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | VOICEVOX Engine 設定 6 | 10 | 11 | 17 | 22 | 23 | 24 | 25 |
26 |
27 | 30 | 31 |
32 | 33 | 44 |
45 |

46 | allまたはlocalappsを指定。allはすべてを許可します。 47 |

48 |

49 | localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。 50 |

51 |

52 | その他のオリジンはallow_originオプションで追加できます。デフォルトはlocalapps。 53 |

54 |
55 |
56 | 57 |
58 | 59 | 65 |
66 | 許可するオリジンを指定します。複数指定する場合は、直後にスペースで区切って追加できます。 67 |
68 |
69 | 70 | 108 | 109 | 117 |
118 |
119 | 120 | 121 | -------------------------------------------------------------------------------- /voicevox_engine/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "latest" 2 | -------------------------------------------------------------------------------- /voicevox_engine/acoustic_feature_extractor.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from enum import Enum 3 | from pathlib import Path 4 | from typing import List, Sequence 5 | 6 | import numpy 7 | 8 | 9 | class BasePhoneme(object): 10 | """ 11 | 音素の応用クラス群の抽象基底クラス 12 | 13 | Attributes 14 | ---------- 15 | phoneme_list : Sequence[str] 16 | 音素のリスト 17 | num_phoneme : int 18 | 音素リストの要素数 19 | space_phoneme : str 20 | 読点に値する音素 21 | """ 22 | 23 | phoneme_list: Sequence[str] 24 | num_phoneme: int 25 | space_phoneme: str 26 | 27 | def __init__( 28 | self, 29 | phoneme: str, 30 | start: float, 31 | end: float, 32 | ): 33 | self.phoneme = phoneme 34 | self.start = numpy.round(start, decimals=2) 35 | self.end = numpy.round(end, decimals=2) 36 | 37 | def __repr__(self): 38 | return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})" 39 | 40 | def __eq__(self, o: object): 41 | return isinstance(o, BasePhoneme) and ( 42 | self.phoneme == o.phoneme and self.start == o.start and self.end == o.end 43 | ) 44 | 45 | def verify(self): 46 | """ 47 | 音素クラスとして、データが正しいかassertする 48 | """ 49 | assert self.phoneme in self.phoneme_list, f"{self.phoneme} is not defined." 50 | 51 | @property 52 | def phoneme_id(self): 53 | """ 54 | phoneme_id (phoneme list内でのindex)を取得する 55 | Returns 56 | ------- 57 | id : int 58 | phoneme_idを返す 59 | """ 60 | return self.phoneme_list.index(self.phoneme) 61 | 62 | @property 63 | def duration(self): 64 | """ 65 | 音素継続期間を取得する 66 | Returns 67 | ------- 68 | duration : int 69 | 音素継続期間を返す 70 | """ 71 | return self.end - self.start 72 | 73 | @property 74 | def onehot(self): 75 | """ 76 | phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す 77 | Returns 78 | ------- 79 | onehot : numpu.ndarray 80 | 関数内で変更された配列を返す 81 | """ 82 | array = numpy.zeros(self.num_phoneme, dtype=bool) 83 | array[self.phoneme_id] = True 84 | return array 85 | 86 | @classmethod 87 | def parse(cls, s: str): 88 | """ 89 | 文字列をパースして音素クラスを作る 90 | Parameters 91 | ---------- 92 | s : str 93 | パースしたい文字列 94 | 95 | Returns 96 | ------- 97 | phoneme : BasePhoneme 98 | パース結果を用いた音素クラスを返す 99 | 100 | Examples 101 | -------- 102 | >>> BasePhoneme.parse('1.7425000 1.9125000 o:') 103 | Phoneme(phoneme='o:', start=1.74, end=1.91) 104 | """ 105 | words = s.split() 106 | return cls( 107 | start=float(words[0]), 108 | end=float(words[1]), 109 | phoneme=words[2], 110 | ) 111 | 112 | @classmethod 113 | @abstractmethod 114 | def convert(cls, phonemes: List["BasePhoneme"]) -> List["BasePhoneme"]: 115 | raise NotImplementedError 116 | 117 | @classmethod 118 | def load_lab_list(cls, path: Path): 119 | """ 120 | labファイルを読み込む 121 | Parameters 122 | ---------- 123 | path : Path 124 | 読み込みたいlabファイルのパス 125 | 126 | Returns 127 | ------- 128 | phonemes : List[BasePhoneme] 129 | パース結果を用いた音素クラスを返す 130 | """ 131 | phonemes = [cls.parse(s) for s in path.read_text().split("\n") if len(s) > 0] 132 | phonemes = cls.convert(phonemes) 133 | 134 | for phoneme in phonemes: 135 | phoneme.verify() 136 | return phonemes 137 | 138 | @classmethod 139 | def save_lab_list(cls, phonemes: List["BasePhoneme"], path: Path): 140 | """ 141 | 音素クラスのリストをlabファイル形式で保存する 142 | Parameters 143 | ---------- 144 | phonemes : List[BasePhoneme] 145 | 保存したい音素クラスのリスト 146 | path : Path 147 | labファイルの保存先パス 148 | """ 149 | text = "\n".join( 150 | [ 151 | f"{numpy.round(p.start, decimals=2):.2f}\t" 152 | f"{numpy.round(p.end, decimals=2):.2f}\t" 153 | f"{p.phoneme}" 154 | for p in phonemes 155 | ] 156 | ) 157 | path.write_text(text) 158 | 159 | 160 | class JvsPhoneme(BasePhoneme): 161 | """ 162 | JVS(Japanese versatile speech)コーパスに含まれる音素群クラス 163 | 164 | Attributes 165 | ---------- 166 | phoneme_list : Sequence[str] 167 | 音素のリスト 168 | num_phoneme : int 169 | 音素リストの要素数 170 | space_phoneme : str 171 | 読点に値する音素 172 | """ 173 | 174 | phoneme_list = ( 175 | "pau", 176 | "I", 177 | "N", 178 | "U", 179 | "a", 180 | "b", 181 | "by", 182 | "ch", 183 | "cl", 184 | "d", 185 | "dy", 186 | "e", 187 | "f", 188 | "g", 189 | "gy", 190 | "h", 191 | "hy", 192 | "i", 193 | "j", 194 | "k", 195 | "ky", 196 | "m", 197 | "my", 198 | "n", 199 | "ny", 200 | "o", 201 | "p", 202 | "py", 203 | "r", 204 | "ry", 205 | "s", 206 | "sh", 207 | "t", 208 | "ts", 209 | "u", 210 | "v", 211 | "w", 212 | "y", 213 | "z", 214 | ) 215 | num_phoneme = len(phoneme_list) 216 | space_phoneme = "pau" 217 | 218 | @classmethod 219 | def convert(cls, phonemes: List["JvsPhoneme"]) -> List["JvsPhoneme"]: 220 | """ 221 | 最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する 222 | Parameters 223 | ---------- 224 | phonemes : List[JvsPhoneme] 225 | 変換したいphonemeのリスト 226 | 227 | Returns 228 | ------- 229 | phonemes : List[JvsPhoneme] 230 | 変換されたphonemeのリスト 231 | """ 232 | if "sil" in phonemes[0].phoneme: 233 | phonemes[0].phoneme = cls.space_phoneme 234 | if "sil" in phonemes[-1].phoneme: 235 | phonemes[-1].phoneme = cls.space_phoneme 236 | return phonemes 237 | 238 | 239 | class OjtPhoneme(BasePhoneme): 240 | """ 241 | OpenJTalkに含まれる音素群クラス 242 | 243 | Attributes 244 | ---------- 245 | phoneme_list : Sequence[str] 246 | 音素のリスト 247 | num_phoneme : int 248 | 音素リストの要素数 249 | space_phoneme : str 250 | 読点に値する音素 251 | """ 252 | 253 | phoneme_list = ( 254 | "pau", 255 | "A", 256 | "E", 257 | "I", 258 | "N", 259 | "O", 260 | "U", 261 | "a", 262 | "b", 263 | "by", 264 | "ch", 265 | "cl", 266 | "d", 267 | "dy", 268 | "e", 269 | "f", 270 | "g", 271 | "gw", 272 | "gy", 273 | "h", 274 | "hy", 275 | "i", 276 | "j", 277 | "k", 278 | "kw", 279 | "ky", 280 | "m", 281 | "my", 282 | "n", 283 | "ny", 284 | "o", 285 | "p", 286 | "py", 287 | "r", 288 | "ry", 289 | "s", 290 | "sh", 291 | "t", 292 | "ts", 293 | "ty", 294 | "u", 295 | "v", 296 | "w", 297 | "y", 298 | "z", 299 | ) 300 | num_phoneme = len(phoneme_list) 301 | space_phoneme = "pau" 302 | 303 | @classmethod 304 | def convert(cls, phonemes: List["OjtPhoneme"]): 305 | """ 306 | 最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する 307 | Parameters 308 | ---------- 309 | phonemes : List[OjtPhoneme] 310 | 変換したいphonemeのリスト 311 | 312 | Returns 313 | ------- 314 | phonemes : List[OjtPhoneme] 315 | 変換されたphonemeのリスト 316 | """ 317 | if "sil" in phonemes[0].phoneme: 318 | phonemes[0].phoneme = cls.space_phoneme 319 | if "sil" in phonemes[-1].phoneme: 320 | phonemes[-1].phoneme = cls.space_phoneme 321 | return phonemes 322 | 323 | 324 | class PhonemeType(str, Enum): 325 | jvs = "jvs" 326 | openjtalk = "openjtalk" 327 | 328 | 329 | phoneme_type_to_class = { 330 | PhonemeType.jvs: JvsPhoneme, 331 | PhonemeType.openjtalk: OjtPhoneme, 332 | } 333 | -------------------------------------------------------------------------------- /voicevox_engine/cancellable_engine.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import queue 4 | from distutils.version import LooseVersion 5 | from multiprocessing import Pipe, Process 6 | from multiprocessing.connection import Connection 7 | from tempfile import NamedTemporaryFile 8 | from typing import List, Optional, Tuple 9 | 10 | import soundfile 11 | 12 | # FIXME: remove FastAPI dependency 13 | from fastapi import HTTPException, Request 14 | 15 | from .model import AudioQuery 16 | from .synthesis_engine import make_synthesis_engines 17 | 18 | 19 | class CancellableEngine: 20 | """ 21 | 音声合成のキャンセル機能に関するクラス 22 | 初期化後は、synthesis関数で音声合成できる 23 | (オリジナルと比べ引数が増えているので注意) 24 | 25 | Attributes 26 | ---------- 27 | watch_con_list: List[Tuple[Request, Process]] 28 | Requestは接続の監視に使用され、Processは通信切断時のプロセスキルに使用される 29 | クライアントから接続があるとListにTupleが追加される 30 | 接続が切断、もしくは音声合成が終了すると削除される 31 | procs_and_cons: queue.Queue[Tuple[Process, Connection]] 32 | 音声合成の準備が終わっているプロセスのList 33 | (音声合成中のプロセスは入っていない) 34 | """ 35 | 36 | def __init__(self, args: argparse.Namespace) -> None: 37 | """ 38 | 変数の初期化を行う 39 | また、args.init_processesの数だけプロセスを起動し、procs_and_consに格納する 40 | """ 41 | self.args = args 42 | if not self.args.enable_cancellable_synthesis: 43 | raise HTTPException( 44 | status_code=404, 45 | detail="実験的機能はデフォルトで無効になっています。使用するには引数を指定してください。", 46 | ) 47 | 48 | self.watch_con_list: List[Tuple[Request, Process]] = [] 49 | self.procs_and_cons: queue.Queue[Tuple[Process, Connection]] = queue.Queue() 50 | for _ in range(self.args.init_processes): 51 | self.procs_and_cons.put(self.start_new_proc()) 52 | 53 | def start_new_proc( 54 | self, 55 | ) -> Tuple[Process, Connection]: 56 | """ 57 | 新しく開始したプロセスを返す関数 58 | 59 | Returns 60 | ------- 61 | ret_proc: Process 62 | 新規のプロセス 63 | sub_proc_con1: Connection 64 | ret_procのプロセスと通信するためのPipe 65 | """ 66 | sub_proc_con1, sub_proc_con2 = Pipe(True) 67 | ret_proc = Process( 68 | target=start_synthesis_subprocess, 69 | kwargs={ 70 | "args": self.args, 71 | "sub_proc_con": sub_proc_con2, 72 | }, 73 | daemon=True, 74 | ) 75 | ret_proc.start() 76 | return ret_proc, sub_proc_con1 77 | 78 | def finalize_con( 79 | self, 80 | req: Request, 81 | proc: Process, 82 | sub_proc_con: Optional[Connection], 83 | ) -> None: 84 | """ 85 | 接続が切断された時の処理を行う関数 86 | watch_con_listからの削除、プロセスの後処理を行う 87 | プロセスが生きている場合はそのままprocs_and_consに加える 88 | 死んでいる場合は新しく生成したものをprocs_and_consに加える 89 | 90 | Parameters 91 | ---------- 92 | req: fastapi.Request 93 | 接続確立時に受け取ったものをそのまま渡せばよい 94 | https://fastapi.tiangolo.com/advanced/using-request-directly/ 95 | proc: Process 96 | 音声合成を行っていたプロセス 97 | sub_proc_con: Connection, optional 98 | 音声合成を行っていたプロセスとのPipe 99 | 指定されていない場合、プロセスは再利用されず終了される 100 | """ 101 | try: 102 | self.watch_con_list.remove((req, proc)) 103 | except ValueError: 104 | pass 105 | try: 106 | if not proc.is_alive() or sub_proc_con is None: 107 | proc.close() 108 | raise ValueError 109 | # プロセスが死んでいない場合は再利用する 110 | self.procs_and_cons.put((proc, sub_proc_con)) 111 | except ValueError: 112 | # プロセスが死んでいるので新しく作り直す 113 | self.procs_and_cons.put(self.start_new_proc()) 114 | 115 | def _synthesis_impl( 116 | self, 117 | query: AudioQuery, 118 | speaker_id: int, 119 | request: Request, 120 | core_version: Optional[str], 121 | ) -> str: 122 | """ 123 | 音声合成を行う関数 124 | 通常エンジンの引数に比べ、requestが必要になっている 125 | また、返り値がファイル名になっている 126 | 127 | Parameters 128 | ---------- 129 | query: AudioQuery 130 | speaker_id: int 131 | request: fastapi.Request 132 | 接続確立時に受け取ったものをそのまま渡せばよい 133 | https://fastapi.tiangolo.com/advanced/using-request-directly/ 134 | core_version: str 135 | 136 | Returns 137 | ------- 138 | f_name: str 139 | 生成された音声ファイルの名前 140 | """ 141 | proc, sub_proc_con1 = self.procs_and_cons.get() 142 | self.watch_con_list.append((request, proc)) 143 | try: 144 | sub_proc_con1.send((query, speaker_id, core_version)) 145 | f_name = sub_proc_con1.recv() 146 | except EOFError: 147 | raise HTTPException(status_code=422, detail="既にサブプロセスは終了されています") 148 | except Exception: 149 | self.finalize_con(request, proc, sub_proc_con1) 150 | raise 151 | 152 | self.finalize_con(request, proc, sub_proc_con1) 153 | return f_name 154 | 155 | async def catch_disconnection(self): 156 | """ 157 | 接続監視を行うコルーチン 158 | """ 159 | while True: 160 | await asyncio.sleep(1) 161 | for con in self.watch_con_list: 162 | req, proc = con 163 | if await req.is_disconnected(): 164 | try: 165 | if proc.is_alive(): 166 | proc.terminate() 167 | proc.join() 168 | proc.close() 169 | except ValueError: 170 | pass 171 | finally: 172 | self.finalize_con(req, proc, None) 173 | 174 | 175 | def start_synthesis_subprocess( 176 | args: argparse.Namespace, 177 | sub_proc_con: Connection, 178 | ): 179 | """ 180 | 音声合成を行うサブプロセスで行うための関数 181 | pickle化の関係でグローバルに書いている 182 | 183 | Parameters 184 | ---------- 185 | args: argparse.Namespace 186 | 起動時に作られたものをそのまま渡す 187 | sub_proc_con: Connection 188 | メインプロセスと通信するためのPipe 189 | """ 190 | 191 | synthesis_engines = make_synthesis_engines( 192 | use_gpu=args.use_gpu, 193 | voicelib_dirs=args.voicelib_dir, 194 | voicevox_dir=args.voicevox_dir, 195 | runtime_dirs=args.runtime_dir, 196 | cpu_num_threads=args.cpu_num_threads, 197 | enable_mock=args.enable_mock, 198 | ) 199 | assert len(synthesis_engines) != 0, "音声合成エンジンがありません。" 200 | latest_core_version = str(max([LooseVersion(ver) for ver in synthesis_engines])) 201 | while True: 202 | try: 203 | query, speaker_id, core_version = sub_proc_con.recv() 204 | if core_version is None: 205 | _engine = synthesis_engines[latest_core_version] 206 | elif core_version in synthesis_engines: 207 | _engine = synthesis_engines[core_version] 208 | else: 209 | # バージョンが見つからないエラー 210 | sub_proc_con.send("") 211 | continue 212 | wave = _engine._synthesis_impl(query, speaker_id) 213 | with NamedTemporaryFile(delete=False) as f: 214 | soundfile.write( 215 | file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV" 216 | ) 217 | sub_proc_con.send(f.name) 218 | except Exception: 219 | sub_proc_con.close() 220 | raise 221 | -------------------------------------------------------------------------------- /voicevox_engine/dev/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .mock import ( 2 | decode_forward, 3 | initialize, 4 | metas, 5 | supported_devices, 6 | yukarin_s_forward, 7 | yukarin_sa_forward, 8 | ) 9 | 10 | __all__ = [ 11 | "decode_forward", 12 | "initialize", 13 | "yukarin_s_forward", 14 | "yukarin_sa_forward", 15 | "metas", 16 | "supported_devices", 17 | ] 18 | -------------------------------------------------------------------------------- /voicevox_engine/dev/core/mock.py: -------------------------------------------------------------------------------- 1 | import json 2 | from logging import getLogger 3 | from typing import Any, Dict, List 4 | 5 | import numpy as np 6 | from pyopenjtalk import tts 7 | from scipy.signal import resample 8 | 9 | DUMMY_TEXT = "これはダミーのテキストです" 10 | 11 | 12 | def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None: 13 | pass 14 | 15 | 16 | def yukarin_s_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 17 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 18 | logger.info( 19 | "Sorry, yukarin_s_forward() is a mock. Return values are incorrect.", 20 | ) 21 | return np.ones(length) / 5 22 | 23 | 24 | def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 25 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 26 | logger.info( 27 | "Sorry, yukarin_sa_forward() is a mock. Return values are incorrect.", 28 | ) 29 | return np.ones((1, length)) * 5 30 | 31 | 32 | def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 33 | """ 34 | 合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます(DUMMY_TEXT) 35 | 参照→SynthesisEngine のdocstring [Mock] 36 | 37 | Parameters 38 | ---------- 39 | length : int 40 | フレームの長さ 41 | 42 | Returns 43 | ------- 44 | wave : np.ndarray 45 | 音声合成した波形データ 46 | 47 | Note 48 | ------- 49 | ここで行う音声合成では、調声(ピッチ等)を反映しない 50 | また、入力内容によらず常に固定の文言を読み上げる 51 | 52 | # pyopenjtalk.tts()の出力仕様 53 | dtype=np.float64, 16 bit, mono 48000 Hz 54 | 55 | # resampleの説明 56 | 非モックdecode_forwardと合わせるために、出力を24kHzに変換した。 57 | """ 58 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 59 | logger.info( 60 | "Sorry, decode_forward() is a mock. Return values are incorrect.", 61 | ) 62 | wave, sr = tts(DUMMY_TEXT) 63 | wave = resample( 64 | wave.astype("int16"), 65 | 24000 * len(wave) // 48000, 66 | ) 67 | return wave 68 | 69 | 70 | def metas() -> str: 71 | return json.dumps( 72 | [ 73 | { 74 | "name": "dummy1", 75 | "styles": [ 76 | {"name": "style0", "id": 0}, 77 | {"name": "style1", "id": 2}, 78 | {"name": "style2", "id": 4}, 79 | {"name": "style3", "id": 6}, 80 | ], 81 | "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", 82 | "version": "mock", 83 | }, 84 | { 85 | "name": "dummy2", 86 | "styles": [ 87 | {"name": "style0", "id": 1}, 88 | {"name": "style1", "id": 3}, 89 | {"name": "style2", "id": 5}, 90 | {"name": "style3", "id": 7}, 91 | ], 92 | "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9", 93 | "version": "mock", 94 | }, 95 | { 96 | "name": "dummy3", 97 | "styles": [ 98 | {"name": "style0", "id": 8}, 99 | ], 100 | "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a", 101 | "version": "mock", 102 | }, 103 | { 104 | "name": "dummy4", 105 | "styles": [ 106 | {"name": "style0", "id": 9}, 107 | ], 108 | "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b", 109 | "version": "mock", 110 | }, 111 | ] 112 | ) 113 | 114 | 115 | def supported_devices() -> str: 116 | return json.dumps( 117 | { 118 | "cpu": True, 119 | "cuda": False, 120 | } 121 | ) 122 | -------------------------------------------------------------------------------- /voicevox_engine/dev/synthesis_engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .mock import MockSynthesisEngine 2 | 3 | __all__ = ["MockSynthesisEngine"] 4 | -------------------------------------------------------------------------------- /voicevox_engine/dev/synthesis_engine/mock.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from typing import Any, Dict, List, Optional 3 | 4 | import numpy as np 5 | from pyopenjtalk import tts 6 | from scipy.signal import resample 7 | 8 | from ...model import AccentPhrase, AudioQuery 9 | from ...synthesis_engine import SynthesisEngineBase 10 | from ...synthesis_engine.synthesis_engine import to_flatten_moras 11 | 12 | 13 | class MockSynthesisEngine(SynthesisEngineBase): 14 | """ 15 | SynthesisEngine [Mock] 16 | """ 17 | 18 | def __init__( 19 | self, 20 | speakers: str, 21 | supported_devices: Optional[str] = None, 22 | ): 23 | """ 24 | __init__ [Mock] 25 | """ 26 | super().__init__() 27 | 28 | self._speakers = speakers 29 | self._supported_devices = supported_devices 30 | self.default_sampling_rate = 24000 31 | 32 | @property 33 | def speakers(self) -> str: 34 | return self._speakers 35 | 36 | @property 37 | def supported_devices(self) -> Optional[str]: 38 | return self._supported_devices 39 | 40 | def replace_phoneme_length( 41 | self, accent_phrases: List[AccentPhrase], speaker_id: int 42 | ) -> List[AccentPhrase]: 43 | """ 44 | replace_phoneme_length 入力accent_phrasesを変更せずにそのまま返します [Mock] 45 | 46 | Parameters 47 | ---------- 48 | accent_phrases : List[AccentPhrase] 49 | フレーズ句のリスト 50 | speaker_id : int 51 | 話者 52 | 53 | Returns 54 | ------- 55 | List[AccentPhrase] 56 | フレーズ句のリスト(変更なし) 57 | """ 58 | return accent_phrases 59 | 60 | def replace_mora_pitch( 61 | self, accent_phrases: List[AccentPhrase], speaker_id: int 62 | ) -> List[AccentPhrase]: 63 | """ 64 | replace_mora_pitch 入力accent_phrasesを変更せずにそのまま返します [Mock] 65 | 66 | Parameters 67 | ---------- 68 | accent_phrases : List[AccentPhrase] 69 | フレーズ句のリスト 70 | speaker_id : int 71 | 話者 72 | 73 | Returns 74 | ------- 75 | List[AccentPhrase] 76 | フレーズ句のリスト(変更なし) 77 | """ 78 | return accent_phrases 79 | 80 | def _synthesis_impl(self, query: AudioQuery, speaker_id: int) -> np.ndarray: 81 | """ 82 | synthesis voicevox coreを使わずに、音声合成する [Mock] 83 | 84 | Parameters 85 | ---------- 86 | query : AudioQuery 87 | /audio_query APIで得たjson 88 | speaker_id : int 89 | 話者 90 | 91 | Returns 92 | ------- 93 | wave [npt.NDArray[np.int16]] 94 | 音声波形データをNumPy配列で返します 95 | """ 96 | # recall text in katakana 97 | flatten_moras = to_flatten_moras(query.accent_phrases) 98 | kana_text = "".join([mora.text for mora in flatten_moras]) 99 | 100 | wave = self.forward(kana_text) 101 | 102 | # volume 103 | wave *= query.volumeScale 104 | 105 | return wave.astype("int16") 106 | 107 | def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray: 108 | """ 109 | forward tts via pyopenjtalk.tts() 110 | 参照→SynthesisEngine のdocstring [Mock] 111 | 112 | Parameters 113 | ---------- 114 | text : str 115 | 入力文字列(例:読み上げたい文章をカタカナにした文字列、等) 116 | 117 | Returns 118 | ------- 119 | wave [npt.NDArray[np.int16]] 120 | 音声波形データをNumPy配列で返します 121 | 122 | Note 123 | ------- 124 | ここで行う音声合成では、調声(ピッチ等)を反映しない 125 | 126 | # pyopenjtalk.tts()の出力仕様 127 | dtype=np.float64, 16 bit, mono 48000 Hz 128 | 129 | # resampleの説明 130 | 非モック実装(decode_forward)と合わせるために、出力を24kHzに変換した。 131 | """ 132 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 133 | logger.info("[Mock] input text: %s" % text) 134 | wave, sr = tts(text) 135 | wave = resample(wave, 24000 * len(wave) // 48000) 136 | return wave 137 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/EngineManifest.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class UpdateInfo(BaseModel): 7 | """ 8 | エンジンのアップデート情報 9 | """ 10 | 11 | version: str = Field(title="エンジンのバージョン名") 12 | descriptions: List[str] = Field(title="アップデートの詳細についての説明") 13 | contributors: Optional[List[str]] = Field(title="貢献者名") 14 | 15 | 16 | class LicenseInfo(BaseModel): 17 | """ 18 | 依存ライブラリのライセンス情報 19 | """ 20 | 21 | name: str = Field(title="依存ライブラリ名") 22 | version: Optional[str] = Field(title="依存ライブラリのバージョン") 23 | license: Optional[str] = Field(title="依存ライブラリのライセンス名") 24 | text: str = Field(title="依存ライブラリのライセンス本文") 25 | 26 | 27 | class SupportedFeatures(BaseModel): 28 | """ 29 | エンジンが持つ機能の一覧 30 | """ 31 | 32 | adjust_mora_pitch: bool = Field(title="モーラごとの音高の調整") 33 | adjust_phoneme_length: bool = Field(title="音素ごとの長さの調整") 34 | adjust_speed_scale: bool = Field(title="全体の話速の調整") 35 | adjust_pitch_scale: bool = Field(title="全体の音高の調整") 36 | adjust_intonation_scale: bool = Field(title="全体の抑揚の調整") 37 | adjust_volume_scale: bool = Field(title="全体の音量の調整") 38 | interrogative_upspeak: bool = Field(title="疑問文の自動調整") 39 | synthesis_morphing: bool = Field(title="2人の話者でモーフィングした音声を合成") 40 | 41 | 42 | class EngineManifest(BaseModel): 43 | """ 44 | エンジン自体に関する情報 45 | """ 46 | 47 | manifest_version: str = Field(title="マニフェストのバージョン") 48 | name: str = Field(title="エンジン名") 49 | brand_name: str = Field(title="ブランド名") 50 | uuid: str = Field(title="エンジンのUUID") 51 | url: str = Field(title="エンジンのURL") 52 | icon: str = Field(title="エンジンのアイコンをBASE64エンコードしたもの") 53 | default_sampling_rate: int = Field(title="デフォルトのサンプリング周波数") 54 | terms_of_service: str = Field(title="エンジンの利用規約") 55 | update_infos: List[UpdateInfo] = Field(title="エンジンのアップデート情報") 56 | dependency_licenses: List[LicenseInfo] = Field(title="依存関係のライセンス情報") 57 | downloadable_libraries_path: Optional[str] = Field( 58 | title="ダウンロード可能な音声ライブラリ情報を取得するためのローカルjsonパス" 59 | ) 60 | downloadable_libraries_url: Optional[str] = Field( 61 | title="ダウンロード可能な音声ライブラリ情報を取得するためのAPIのURL" 62 | ) 63 | supported_features: SupportedFeatures = Field(title="エンジンが持つ機能") 64 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/EngineManifestLoader.py: -------------------------------------------------------------------------------- 1 | import json 2 | from base64 import b64encode 3 | from pathlib import Path 4 | 5 | from .EngineManifest import EngineManifest, LicenseInfo, UpdateInfo 6 | 7 | 8 | class EngineManifestLoader: 9 | def __init__(self, manifest_path: Path, root_dir: Path): 10 | self.manifest_path = manifest_path 11 | self.root_dir = root_dir 12 | 13 | def load_manifest(self) -> EngineManifest: 14 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8")) 15 | 16 | manifest = EngineManifest( 17 | manifest_version=manifest["manifest_version"], 18 | name=manifest["name"], 19 | brand_name=manifest["brand_name"], 20 | uuid=manifest["uuid"], 21 | url=manifest["url"], 22 | default_sampling_rate=manifest["default_sampling_rate"], 23 | icon=b64encode((self.root_dir / manifest["icon"]).read_bytes()).decode( 24 | "utf-8" 25 | ), 26 | terms_of_service=(self.root_dir / manifest["terms_of_service"]).read_text( 27 | "utf-8" 28 | ), 29 | update_infos=[ 30 | UpdateInfo(**update_info) 31 | for update_info in json.loads( 32 | (self.root_dir / manifest["update_infos"]).read_text("utf-8") 33 | ) 34 | ], 35 | dependency_licenses=[ 36 | LicenseInfo(**license_info) 37 | for license_info in json.loads( 38 | (self.root_dir / manifest["dependency_licenses"]).read_text("utf-8") 39 | ) 40 | ], 41 | downloadable_libraries_path=manifest["downloadable_libraries_path"], 42 | downloadable_libraries_url=manifest["downloadable_libraries_url"], 43 | supported_features={ 44 | key: item["value"] 45 | for key, item in manifest["supported_features"].items() 46 | }, 47 | ) 48 | return manifest 49 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/__init__.py: -------------------------------------------------------------------------------- 1 | from .EngineManifest import EngineManifest 2 | from .EngineManifestLoader import EngineManifestLoader 3 | 4 | __all__ = [ 5 | "EngineManifest", 6 | "EngineManifestLoader", 7 | ] 8 | -------------------------------------------------------------------------------- /voicevox_engine/kana_parser.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode 4 | from .mora_list import openjtalk_text2mora 5 | 6 | LOOP_LIMIT = 300 7 | UNVOICE_SYMBOL = "_" 8 | ACCENT_SYMBOL = "'" 9 | NOPAUSE_DELIMITER = "/" 10 | PAUSE_DELIMITER = "、" 11 | WIDE_INTERROGATION_MARK = "?" 12 | 13 | text2mora_with_unvoice = {} 14 | for text, (consonant, vowel) in openjtalk_text2mora.items(): 15 | text2mora_with_unvoice[text] = Mora( 16 | text=text, 17 | consonant=consonant if len(consonant) > 0 else None, 18 | consonant_length=0 if len(consonant) > 0 else None, 19 | vowel=vowel, 20 | vowel_length=0, 21 | pitch=0, 22 | is_interrogative=False, 23 | ) 24 | if vowel in ["a", "i", "u", "e", "o"]: 25 | text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora( 26 | text=text, 27 | consonant=consonant if len(consonant) > 0 else None, 28 | consonant_length=0 if len(consonant) > 0 else None, 29 | vowel=vowel.upper(), 30 | vowel_length=0, 31 | pitch=0, 32 | is_interrogative=False, 33 | ) 34 | 35 | 36 | def _text_to_accent_phrase(phrase: str) -> AccentPhrase: 37 | """ 38 | longest matchにより読み仮名からAccentPhraseを生成 39 | 入力長Nに対し計算量O(N^2) 40 | """ 41 | accent_index: Optional[int] = None 42 | moras: List[Mora] = [] 43 | 44 | base_index = 0 # パース開始位置。ここから右の文字列をstackに詰めていく。 45 | stack = "" # 保留中の文字列 46 | matched_text: Optional[str] = None # 保留中の文字列内で最後にマッチした仮名 47 | 48 | outer_loop = 0 49 | while base_index < len(phrase): 50 | outer_loop += 1 51 | if phrase[base_index] == ACCENT_SYMBOL: 52 | if len(moras) == 0: 53 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase) 54 | if accent_index is not None: 55 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase) 56 | accent_index = len(moras) 57 | base_index += 1 58 | continue 59 | for watch_index in range(base_index, len(phrase)): 60 | if phrase[watch_index] == ACCENT_SYMBOL: 61 | break 62 | # 普通の文字の場合 63 | stack += phrase[watch_index] 64 | if stack in text2mora_with_unvoice: 65 | matched_text = stack 66 | # push mora 67 | if matched_text is None: 68 | raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack) 69 | else: 70 | moras.append(text2mora_with_unvoice[matched_text].copy(deep=True)) 71 | base_index += len(matched_text) 72 | stack = "" 73 | matched_text = None 74 | if outer_loop > LOOP_LIMIT: 75 | raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP) 76 | if accent_index is None: 77 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase) 78 | else: 79 | return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None) 80 | 81 | 82 | def parse_kana(text: str) -> List[AccentPhrase]: 83 | """ 84 | AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換 85 | """ 86 | 87 | parsed_results: List[AccentPhrase] = [] 88 | phrase_base = 0 89 | if len(text) == 0: 90 | raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1) 91 | 92 | for i in range(len(text) + 1): 93 | if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]: 94 | phrase = text[phrase_base:i] 95 | if len(phrase) == 0: 96 | raise ParseKanaError( 97 | ParseKanaErrorCode.EMPTY_PHRASE, 98 | position=str(len(parsed_results) + 1), 99 | ) 100 | phrase_base = i + 1 101 | 102 | is_interrogative = WIDE_INTERROGATION_MARK in phrase 103 | if is_interrogative: 104 | if WIDE_INTERROGATION_MARK in phrase[:-1]: 105 | raise ParseKanaError( 106 | ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase 107 | ) 108 | phrase = phrase.replace(WIDE_INTERROGATION_MARK, "") 109 | 110 | accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase) 111 | if i < len(text) and text[i] == PAUSE_DELIMITER: 112 | accent_phrase.pause_mora = Mora( 113 | text="、", 114 | consonant=None, 115 | consonant_length=None, 116 | vowel="pau", 117 | vowel_length=0, 118 | pitch=0, 119 | ) 120 | accent_phrase.is_interrogative = is_interrogative 121 | 122 | parsed_results.append(accent_phrase) 123 | 124 | return parsed_results 125 | 126 | 127 | def create_kana(accent_phrases: List[AccentPhrase]) -> str: 128 | text = "" 129 | for i, phrase in enumerate(accent_phrases): 130 | for j, mora in enumerate(phrase.moras): 131 | if mora.vowel in ["A", "I", "U", "E", "O"]: 132 | text += UNVOICE_SYMBOL 133 | 134 | text += mora.text 135 | if j + 1 == phrase.accent: 136 | text += ACCENT_SYMBOL 137 | 138 | if phrase.is_interrogative: 139 | text += WIDE_INTERROGATION_MARK 140 | 141 | if i < len(accent_phrases) - 1: 142 | if phrase.pause_mora is None: 143 | text += NOPAUSE_DELIMITER 144 | else: 145 | text += PAUSE_DELIMITER 146 | return text 147 | -------------------------------------------------------------------------------- /voicevox_engine/metas/Metas.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class SpeakerStyle(BaseModel): 8 | """ 9 | スピーカーのスタイル情報 10 | """ 11 | 12 | name: str = Field(title="スタイル名") 13 | id: int = Field(title="スタイルID") 14 | 15 | 16 | class SpeakerSupportPermittedSynthesisMorphing(str, Enum): 17 | ALL = "ALL" # 全て許可 18 | SELF_ONLY = "SELF_ONLY" # 同じ話者内でのみ許可 19 | NOTHING = "NOTHING" # 全て禁止 20 | 21 | @classmethod 22 | def _missing_(cls, value: object) -> "SpeakerSupportPermittedSynthesisMorphing": 23 | return SpeakerSupportPermittedSynthesisMorphing.ALL 24 | 25 | 26 | class SpeakerSupportedFeatures(BaseModel): 27 | """ 28 | 話者の対応機能の情報 29 | """ 30 | 31 | permitted_synthesis_morphing: SpeakerSupportPermittedSynthesisMorphing = Field( 32 | title="モーフィング機能への対応", default=SpeakerSupportPermittedSynthesisMorphing(None) 33 | ) 34 | 35 | 36 | class CoreSpeaker(BaseModel): 37 | """ 38 | コアに含まれるスピーカー情報 39 | """ 40 | 41 | name: str = Field(title="名前") 42 | speaker_uuid: str = Field(title="スピーカーのUUID") 43 | styles: List[SpeakerStyle] = Field(title="スピーカースタイルの一覧") 44 | version: str = Field("スピーカーのバージョン") 45 | 46 | 47 | class EngineSpeaker(BaseModel): 48 | """ 49 | エンジンに含まれるスピーカー情報 50 | """ 51 | 52 | supported_features: SpeakerSupportedFeatures = Field( 53 | title="スピーカーの対応機能", default_factory=SpeakerSupportedFeatures 54 | ) 55 | 56 | 57 | class Speaker(CoreSpeaker, EngineSpeaker): 58 | """ 59 | スピーカー情報 60 | """ 61 | 62 | pass 63 | 64 | 65 | class StyleInfo(BaseModel): 66 | """ 67 | スタイルの追加情報 68 | """ 69 | 70 | id: int = Field(title="スタイルID") 71 | icon: str = Field(title="当該スタイルのアイコンをbase64エンコードしたもの") 72 | portrait: Optional[str] = Field(title="当該スタイルのportrait.pngをbase64エンコードしたもの") 73 | voice_samples: List[str] = Field(title="voice_sampleのwavファイルをbase64エンコードしたもの") 74 | 75 | 76 | class SpeakerInfo(BaseModel): 77 | """ 78 | 話者の追加情報 79 | """ 80 | 81 | policy: str = Field(title="policy.md") 82 | portrait: str = Field(title="portrait.pngをbase64エンコードしたもの") 83 | style_infos: List[StyleInfo] = Field(title="スタイルの追加情報") 84 | -------------------------------------------------------------------------------- /voicevox_engine/metas/MetasStore.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import TYPE_CHECKING, Dict, List, Tuple 4 | 5 | from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo 6 | 7 | if TYPE_CHECKING: 8 | from voicevox_engine.synthesis_engine.synthesis_engine_base import ( 9 | SynthesisEngineBase, 10 | ) 11 | 12 | 13 | class MetasStore: 14 | """ 15 | 話者やスタイルのメタ情報を管理する 16 | """ 17 | 18 | def __init__(self, engine_speakers_path: Path) -> None: 19 | self._engine_speakers_path = engine_speakers_path 20 | self._loaded_metas: Dict[str, EngineSpeaker] = { 21 | folder.name: EngineSpeaker( 22 | **json.loads((folder / "metas.json").read_text(encoding="utf-8")) 23 | ) 24 | for folder in engine_speakers_path.iterdir() 25 | } 26 | 27 | def speaker_engine_metas(self, speaker_uuid: str) -> EngineSpeaker: 28 | return self.loaded_metas[speaker_uuid] 29 | 30 | def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]: 31 | """ 32 | 与えられたmetaにエンジンのコア情報を付加して返す 33 | core_metas: コアのmetas()が返すJSONのModel 34 | """ 35 | 36 | return [ 37 | Speaker( 38 | **self.speaker_engine_metas(speaker_meta.speaker_uuid).dict(), 39 | **speaker_meta.dict(), 40 | ) 41 | for speaker_meta in core_metas 42 | ] 43 | 44 | # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで 45 | # SynthesisEngineBaseによる循環importを修正する 46 | def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]: 47 | """ 48 | 与えられたエンジンから、コア・エンジン両方の情報を含んだMetasを返す 49 | """ 50 | 51 | core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)] 52 | return self.combine_metas(core_metas) 53 | 54 | @property 55 | def engine_speakers_path(self) -> Path: 56 | return self._engine_speakers_path 57 | 58 | @property 59 | def loaded_metas(self) -> Dict[str, EngineSpeaker]: 60 | return self._loaded_metas 61 | 62 | 63 | def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]: 64 | """ 65 | `{style.id: StyleInfo}`の変換テーブル 66 | """ 67 | 68 | lookup_table = dict() 69 | for speaker in speakers: 70 | for style in speaker.styles: 71 | lookup_table[style.id] = (speaker, style) 72 | return lookup_table 73 | -------------------------------------------------------------------------------- /voicevox_engine/metas/__init__.py: -------------------------------------------------------------------------------- 1 | from . import Metas, MetasStore 2 | 3 | __all__ = [ 4 | "Metas", 5 | "MetasStore", 6 | ] 7 | -------------------------------------------------------------------------------- /voicevox_engine/mora_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | 以下のモーラ対応表はOpenJTalkのソースコードから取得し、 3 | カタカナ表記とモーラが一対一対応するように改造した。 4 | ライセンス表記: 5 | ----------------------------------------------------------------- 6 | The Japanese TTS System "Open JTalk" 7 | developed by HTS Working Group 8 | http://open-jtalk.sourceforge.net/ 9 | ----------------------------------------------------------------- 10 | 11 | Copyright (c) 2008-2014 Nagoya Institute of Technology 12 | Department of Computer Science 13 | 14 | All rights reserved. 15 | 16 | Redistribution and use in source and binary forms, with or 17 | without modification, are permitted provided that the following 18 | conditions are met: 19 | 20 | - Redistributions of source code must retain the above copyright 21 | notice, this list of conditions and the following disclaimer. 22 | - Redistributions in binary form must reproduce the above 23 | copyright notice, this list of conditions and the following 24 | disclaimer in the documentation and/or other materials provided 25 | with the distribution. 26 | - Neither the name of the HTS working group nor the names of its 27 | contributors may be used to endorse or promote products derived 28 | from this software without specific prior written permission. 29 | 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 33 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 34 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 35 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 36 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 37 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 39 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 40 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 | POSSIBILITY OF SUCH DAMAGE. 43 | """ 44 | _mora_list_minimum = [ 45 | ["ヴォ", "v", "o"], 46 | ["ヴェ", "v", "e"], 47 | ["ヴィ", "v", "i"], 48 | ["ヴァ", "v", "a"], 49 | ["ヴ", "v", "u"], 50 | ["ン", "", "N"], 51 | ["ワ", "w", "a"], 52 | ["ロ", "r", "o"], 53 | ["レ", "r", "e"], 54 | ["ル", "r", "u"], 55 | ["リョ", "ry", "o"], 56 | ["リュ", "ry", "u"], 57 | ["リャ", "ry", "a"], 58 | ["リェ", "ry", "e"], 59 | ["リ", "r", "i"], 60 | ["ラ", "r", "a"], 61 | ["ヨ", "y", "o"], 62 | ["ユ", "y", "u"], 63 | ["ヤ", "y", "a"], 64 | ["モ", "m", "o"], 65 | ["メ", "m", "e"], 66 | ["ム", "m", "u"], 67 | ["ミョ", "my", "o"], 68 | ["ミュ", "my", "u"], 69 | ["ミャ", "my", "a"], 70 | ["ミェ", "my", "e"], 71 | ["ミ", "m", "i"], 72 | ["マ", "m", "a"], 73 | ["ポ", "p", "o"], 74 | ["ボ", "b", "o"], 75 | ["ホ", "h", "o"], 76 | ["ペ", "p", "e"], 77 | ["ベ", "b", "e"], 78 | ["ヘ", "h", "e"], 79 | ["プ", "p", "u"], 80 | ["ブ", "b", "u"], 81 | ["フォ", "f", "o"], 82 | ["フェ", "f", "e"], 83 | ["フィ", "f", "i"], 84 | ["ファ", "f", "a"], 85 | ["フ", "f", "u"], 86 | ["ピョ", "py", "o"], 87 | ["ピュ", "py", "u"], 88 | ["ピャ", "py", "a"], 89 | ["ピェ", "py", "e"], 90 | ["ピ", "p", "i"], 91 | ["ビョ", "by", "o"], 92 | ["ビュ", "by", "u"], 93 | ["ビャ", "by", "a"], 94 | ["ビェ", "by", "e"], 95 | ["ビ", "b", "i"], 96 | ["ヒョ", "hy", "o"], 97 | ["ヒュ", "hy", "u"], 98 | ["ヒャ", "hy", "a"], 99 | ["ヒェ", "hy", "e"], 100 | ["ヒ", "h", "i"], 101 | ["パ", "p", "a"], 102 | ["バ", "b", "a"], 103 | ["ハ", "h", "a"], 104 | ["ノ", "n", "o"], 105 | ["ネ", "n", "e"], 106 | ["ヌ", "n", "u"], 107 | ["ニョ", "ny", "o"], 108 | ["ニュ", "ny", "u"], 109 | ["ニャ", "ny", "a"], 110 | ["ニェ", "ny", "e"], 111 | ["ニ", "n", "i"], 112 | ["ナ", "n", "a"], 113 | ["ドゥ", "d", "u"], 114 | ["ド", "d", "o"], 115 | ["トゥ", "t", "u"], 116 | ["ト", "t", "o"], 117 | ["デョ", "dy", "o"], 118 | ["デュ", "dy", "u"], 119 | ["デャ", "dy", "a"], 120 | ["デェ", "dy", "e"], 121 | ["ディ", "d", "i"], 122 | ["デ", "d", "e"], 123 | ["テョ", "ty", "o"], 124 | ["テュ", "ty", "u"], 125 | ["テャ", "ty", "a"], 126 | ["ティ", "t", "i"], 127 | ["テ", "t", "e"], 128 | ["ツォ", "ts", "o"], 129 | ["ツェ", "ts", "e"], 130 | ["ツィ", "ts", "i"], 131 | ["ツァ", "ts", "a"], 132 | ["ツ", "ts", "u"], 133 | ["ッ", "", "cl"], 134 | ["チョ", "ch", "o"], 135 | ["チュ", "ch", "u"], 136 | ["チャ", "ch", "a"], 137 | ["チェ", "ch", "e"], 138 | ["チ", "ch", "i"], 139 | ["ダ", "d", "a"], 140 | ["タ", "t", "a"], 141 | ["ゾ", "z", "o"], 142 | ["ソ", "s", "o"], 143 | ["ゼ", "z", "e"], 144 | ["セ", "s", "e"], 145 | ["ズィ", "z", "i"], 146 | ["ズ", "z", "u"], 147 | ["スィ", "s", "i"], 148 | ["ス", "s", "u"], 149 | ["ジョ", "j", "o"], 150 | ["ジュ", "j", "u"], 151 | ["ジャ", "j", "a"], 152 | ["ジェ", "j", "e"], 153 | ["ジ", "j", "i"], 154 | ["ショ", "sh", "o"], 155 | ["シュ", "sh", "u"], 156 | ["シャ", "sh", "a"], 157 | ["シェ", "sh", "e"], 158 | ["シ", "sh", "i"], 159 | ["ザ", "z", "a"], 160 | ["サ", "s", "a"], 161 | ["ゴ", "g", "o"], 162 | ["コ", "k", "o"], 163 | ["ゲ", "g", "e"], 164 | ["ケ", "k", "e"], 165 | ["グヮ", "gw", "a"], 166 | ["グ", "g", "u"], 167 | ["クヮ", "kw", "a"], 168 | ["ク", "k", "u"], 169 | ["ギョ", "gy", "o"], 170 | ["ギュ", "gy", "u"], 171 | ["ギャ", "gy", "a"], 172 | ["ギェ", "gy", "e"], 173 | ["ギ", "g", "i"], 174 | ["キョ", "ky", "o"], 175 | ["キュ", "ky", "u"], 176 | ["キャ", "ky", "a"], 177 | ["キェ", "ky", "e"], 178 | ["キ", "k", "i"], 179 | ["ガ", "g", "a"], 180 | ["カ", "k", "a"], 181 | ["オ", "", "o"], 182 | ["エ", "", "e"], 183 | ["ウォ", "w", "o"], 184 | ["ウェ", "w", "e"], 185 | ["ウィ", "w", "i"], 186 | ["ウ", "", "u"], 187 | ["イェ", "y", "e"], 188 | ["イ", "", "i"], 189 | ["ア", "", "a"], 190 | ] 191 | _mora_list_additional = [ 192 | ["ヴョ", "by", "o"], 193 | ["ヴュ", "by", "u"], 194 | ["ヴャ", "by", "a"], 195 | ["ヲ", "", "o"], 196 | ["ヱ", "", "e"], 197 | ["ヰ", "", "i"], 198 | ["ヮ", "w", "a"], 199 | ["ョ", "y", "o"], 200 | ["ュ", "y", "u"], 201 | ["ヅ", "z", "u"], 202 | ["ヂ", "j", "i"], 203 | ["ヶ", "k", "e"], 204 | ["ャ", "y", "a"], 205 | ["ォ", "", "o"], 206 | ["ェ", "", "e"], 207 | ["ゥ", "", "u"], 208 | ["ィ", "", "i"], 209 | ["ァ", "", "a"], 210 | ] 211 | 212 | openjtalk_mora2text = { 213 | consonant + vowel: text for [text, consonant, vowel] in _mora_list_minimum 214 | } 215 | openjtalk_text2mora = { 216 | text: (consonant, vowel) 217 | for [text, consonant, vowel] in _mora_list_minimum + _mora_list_additional 218 | } 219 | -------------------------------------------------------------------------------- /voicevox_engine/morphing.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from dataclasses import dataclass 3 | from itertools import chain 4 | from typing import Dict, List, Tuple 5 | 6 | import numpy as np 7 | import pyworld as pw 8 | from scipy.signal import resample 9 | 10 | from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo 11 | from .metas.MetasStore import construct_lookup 12 | from .model import AudioQuery, MorphableTargetInfo, SpeakerNotFoundError 13 | from .synthesis_engine import SynthesisEngine 14 | 15 | 16 | # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398 # noqa 17 | @dataclass(frozen=True) 18 | class MorphingParameter: 19 | fs: int 20 | frame_period: float 21 | base_f0: np.ndarray 22 | base_aperiodicity: np.ndarray 23 | base_spectrogram: np.ndarray 24 | target_spectrogram: np.ndarray 25 | 26 | 27 | def create_morphing_parameter( 28 | base_wave: np.ndarray, 29 | target_wave: np.ndarray, 30 | fs: int, 31 | ) -> MorphingParameter: 32 | frame_period = 1.0 33 | base_f0, base_time_axis = pw.harvest(base_wave, fs, frame_period=frame_period) 34 | base_spectrogram = pw.cheaptrick(base_wave, base_f0, base_time_axis, fs) 35 | base_aperiodicity = pw.d4c(base_wave, base_f0, base_time_axis, fs) 36 | 37 | target_f0, morph_time_axis = pw.harvest(target_wave, fs, frame_period=frame_period) 38 | target_spectrogram = pw.cheaptrick(target_wave, target_f0, morph_time_axis, fs) 39 | target_spectrogram.resize(base_spectrogram.shape) 40 | 41 | return MorphingParameter( 42 | fs=fs, 43 | frame_period=frame_period, 44 | base_f0=base_f0, 45 | base_aperiodicity=base_aperiodicity, 46 | base_spectrogram=base_spectrogram, 47 | target_spectrogram=target_spectrogram, 48 | ) 49 | 50 | 51 | def get_morphable_targets( 52 | speakers: List[Speaker], 53 | base_speakers: List[int], 54 | ) -> List[Dict[int, MorphableTargetInfo]]: 55 | """ 56 | speakers: 全話者の情報 57 | base_speakers: モーフィング可能か判定したいベースの話者リスト(スタイルID) 58 | """ 59 | speaker_lookup = construct_lookup(speakers) 60 | 61 | morphable_targets_arr = [] 62 | for base_speaker in base_speakers: 63 | morphable_targets = dict() 64 | for style in chain.from_iterable(speaker.styles for speaker in speakers): 65 | morphable_targets[style.id] = MorphableTargetInfo( 66 | is_morphable=is_synthesis_morphing_permitted( 67 | speaker_lookup=speaker_lookup, 68 | base_speaker=base_speaker, 69 | target_speaker=style.id, 70 | ) 71 | ) 72 | morphable_targets_arr.append(morphable_targets) 73 | 74 | return morphable_targets_arr 75 | 76 | 77 | def is_synthesis_morphing_permitted( 78 | speaker_lookup: Dict[int, Tuple[Speaker, StyleInfo]], 79 | base_speaker: int, 80 | target_speaker: int, 81 | ) -> bool: 82 | """ 83 | 指定されたspeakerがモーフィング可能かどうか返す 84 | speakerが見つからない場合はSpeakerNotFoundErrorを送出する 85 | """ 86 | 87 | base_speaker_data = speaker_lookup[base_speaker] 88 | target_speaker_data = speaker_lookup[target_speaker] 89 | 90 | if base_speaker_data is None or target_speaker_data is None: 91 | raise SpeakerNotFoundError( 92 | base_speaker if base_speaker_data is None else target_speaker 93 | ) 94 | 95 | base_speaker_info, _ = base_speaker_data 96 | target_speaker_info, _ = target_speaker_data 97 | 98 | base_speaker_uuid = base_speaker_info.speaker_uuid 99 | target_speaker_uuid = target_speaker_info.speaker_uuid 100 | 101 | base_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = ( 102 | base_speaker_info.supported_features.permitted_synthesis_morphing 103 | ) 104 | 105 | target_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = ( 106 | target_speaker_info.supported_features.permitted_synthesis_morphing 107 | ) 108 | 109 | # 禁止されている場合はFalse 110 | if ( 111 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.NOTHING 112 | or target_speaker_morphing_info 113 | == SpeakerSupportPermittedSynthesisMorphing.NOTHING 114 | ): 115 | return False 116 | # 同一話者のみの場合は同一話者判定 117 | if ( 118 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY 119 | or target_speaker_morphing_info 120 | == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY 121 | ): 122 | return base_speaker_uuid == target_speaker_uuid 123 | # 念のため許可されているかチェック 124 | return ( 125 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL 126 | and target_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL 127 | ) 128 | 129 | 130 | def synthesis_morphing_parameter( 131 | engine: SynthesisEngine, 132 | query: AudioQuery, 133 | base_speaker: int, 134 | target_speaker: int, 135 | ) -> MorphingParameter: 136 | query = deepcopy(query) 137 | 138 | # 不具合回避のためデフォルトのサンプリングレートでWORLDに掛けた後に指定のサンプリングレートに変換する 139 | query.outputSamplingRate = engine.default_sampling_rate 140 | 141 | # WORLDに掛けるため合成はモノラルで行う 142 | query.outputStereo = False 143 | 144 | base_wave = engine.synthesis(query=query, speaker_id=base_speaker).astype("float") 145 | target_wave = engine.synthesis(query=query, speaker_id=target_speaker).astype( 146 | "float" 147 | ) 148 | 149 | return create_morphing_parameter( 150 | base_wave=base_wave, 151 | target_wave=target_wave, 152 | fs=query.outputSamplingRate, 153 | ) 154 | 155 | 156 | def synthesis_morphing( 157 | morph_param: MorphingParameter, 158 | morph_rate: float, 159 | output_fs: int, 160 | output_stereo: bool = False, 161 | ) -> np.ndarray: 162 | """ 163 | 指定した割合で、パラメータをもとにモーフィングした音声を生成します。 164 | 165 | Parameters 166 | ---------- 167 | morph_param : MorphingParameter 168 | `synthesis_morphing_parameter`または`create_morphing_parameter`で作成したパラメータ 169 | 170 | morph_rate : float 171 | モーフィングの割合 172 | 0.0でベースの話者、1.0でターゲットの話者に近づきます。 173 | 174 | Returns 175 | ------- 176 | generated : np.ndarray 177 | モーフィングした音声 178 | 179 | Raises 180 | ------- 181 | ValueError 182 | morph_rate ∈ [0, 1] 183 | """ 184 | 185 | if morph_rate < 0.0 or morph_rate > 1.0: 186 | raise ValueError("morph_rateは0.0から1.0の範囲で指定してください") 187 | 188 | morph_spectrogram = ( 189 | morph_param.base_spectrogram * (1.0 - morph_rate) 190 | + morph_param.target_spectrogram * morph_rate 191 | ) 192 | 193 | y_h = pw.synthesize( 194 | morph_param.base_f0, 195 | morph_spectrogram, 196 | morph_param.base_aperiodicity, 197 | morph_param.fs, 198 | morph_param.frame_period, 199 | ) 200 | 201 | # TODO: synthesis_engine.py でのリサンプル処理と共通化する 202 | if output_fs != morph_param.fs: 203 | y_h = resample(y_h, output_fs * len(y_h) // morph_param.fs) 204 | 205 | if output_stereo: 206 | y_h = np.array([y_h, y_h]).T 207 | 208 | return y_h 209 | -------------------------------------------------------------------------------- /voicevox_engine/part_of_speech_data.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from .model import ( 4 | USER_DICT_MAX_PRIORITY, 5 | USER_DICT_MIN_PRIORITY, 6 | PartOfSpeechDetail, 7 | WordTypes, 8 | ) 9 | 10 | MIN_PRIORITY = USER_DICT_MIN_PRIORITY 11 | MAX_PRIORITY = USER_DICT_MAX_PRIORITY 12 | 13 | part_of_speech_data: Dict[WordTypes, PartOfSpeechDetail] = { 14 | WordTypes.PROPER_NOUN: PartOfSpeechDetail( 15 | part_of_speech="名詞", 16 | part_of_speech_detail_1="固有名詞", 17 | part_of_speech_detail_2="一般", 18 | part_of_speech_detail_3="*", 19 | context_id=1348, 20 | cost_candidates=[ 21 | -988, 22 | 3488, 23 | 4768, 24 | 6048, 25 | 7328, 26 | 8609, 27 | 8734, 28 | 8859, 29 | 8984, 30 | 9110, 31 | 14176, 32 | ], 33 | accent_associative_rules=[ 34 | "*", 35 | "C1", 36 | "C2", 37 | "C3", 38 | "C4", 39 | "C5", 40 | ], 41 | ), 42 | WordTypes.COMMON_NOUN: PartOfSpeechDetail( 43 | part_of_speech="名詞", 44 | part_of_speech_detail_1="一般", 45 | part_of_speech_detail_2="*", 46 | part_of_speech_detail_3="*", 47 | context_id=1345, 48 | cost_candidates=[ 49 | -4445, 50 | 49, 51 | 1473, 52 | 2897, 53 | 4321, 54 | 5746, 55 | 6554, 56 | 7362, 57 | 8170, 58 | 8979, 59 | 15001, 60 | ], 61 | accent_associative_rules=[ 62 | "*", 63 | "C1", 64 | "C2", 65 | "C3", 66 | "C4", 67 | "C5", 68 | ], 69 | ), 70 | WordTypes.VERB: PartOfSpeechDetail( 71 | part_of_speech="動詞", 72 | part_of_speech_detail_1="自立", 73 | part_of_speech_detail_2="*", 74 | part_of_speech_detail_3="*", 75 | context_id=642, 76 | cost_candidates=[ 77 | 3100, 78 | 6160, 79 | 6360, 80 | 6561, 81 | 6761, 82 | 6962, 83 | 7414, 84 | 7866, 85 | 8318, 86 | 8771, 87 | 13433, 88 | ], 89 | accent_associative_rules=[ 90 | "*", 91 | ], 92 | ), 93 | WordTypes.ADJECTIVE: PartOfSpeechDetail( 94 | part_of_speech="形容詞", 95 | part_of_speech_detail_1="自立", 96 | part_of_speech_detail_2="*", 97 | part_of_speech_detail_3="*", 98 | context_id=20, 99 | cost_candidates=[ 100 | 1527, 101 | 3266, 102 | 3561, 103 | 3857, 104 | 4153, 105 | 4449, 106 | 5149, 107 | 5849, 108 | 6549, 109 | 7250, 110 | 10001, 111 | ], 112 | accent_associative_rules=[ 113 | "*", 114 | ], 115 | ), 116 | WordTypes.SUFFIX: PartOfSpeechDetail( 117 | part_of_speech="名詞", 118 | part_of_speech_detail_1="接尾", 119 | part_of_speech_detail_2="一般", 120 | part_of_speech_detail_3="*", 121 | context_id=1358, 122 | cost_candidates=[ 123 | 4399, 124 | 5373, 125 | 6041, 126 | 6710, 127 | 7378, 128 | 8047, 129 | 9440, 130 | 10834, 131 | 12228, 132 | 13622, 133 | 15847, 134 | ], 135 | accent_associative_rules=[ 136 | "*", 137 | "C1", 138 | "C2", 139 | "C3", 140 | "C4", 141 | "C5", 142 | ], 143 | ), 144 | } 145 | -------------------------------------------------------------------------------- /voicevox_engine/preset/Preset.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class Preset(BaseModel): 5 | """ 6 | プリセット情報 7 | """ 8 | 9 | id: int = Field(title="プリセットID") 10 | name: str = Field(title="プリセット名") 11 | speaker_uuid: str = Field(title="スピーカーのUUID") 12 | style_id: int = Field(title="スタイルID") 13 | speedScale: float = Field(title="全体の話速") 14 | pitchScale: float = Field(title="全体の音高") 15 | intonationScale: float = Field(title="全体の抑揚") 16 | volumeScale: float = Field(title="全体の音量") 17 | prePhonemeLength: float = Field(title="音声の前の無音時間") 18 | postPhonemeLength: float = Field(title="音声の後の無音時間") 19 | -------------------------------------------------------------------------------- /voicevox_engine/preset/PresetError.py: -------------------------------------------------------------------------------- 1 | class PresetError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /voicevox_engine/preset/PresetManager.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | import yaml 5 | from pydantic import ValidationError, parse_obj_as 6 | 7 | from .Preset import Preset 8 | from .PresetError import PresetError 9 | 10 | 11 | class PresetManager: 12 | def __init__( 13 | self, 14 | preset_path: Path, 15 | ): 16 | self.presets = [] 17 | self.last_modified_time = 0 18 | self.preset_path = preset_path 19 | 20 | def load_presets(self): 21 | """ 22 | プリセットのYAMLファイルを読み込む 23 | 24 | Returns 25 | ------- 26 | ret: List[Preset] 27 | プリセットのリスト 28 | """ 29 | 30 | # 設定ファイルのタイムスタンプを確認 31 | try: 32 | _last_modified_time = self.preset_path.stat().st_mtime 33 | if _last_modified_time == self.last_modified_time: 34 | return self.presets 35 | except OSError: 36 | raise PresetError("プリセットの設定ファイルが見つかりません") 37 | 38 | with open(self.preset_path, mode="r", encoding="utf-8") as f: 39 | obj = yaml.safe_load(f) 40 | if obj is None: 41 | raise PresetError("プリセットの設定ファイルが空の内容です") 42 | 43 | try: 44 | _presets = parse_obj_as(List[Preset], obj) 45 | except ValidationError: 46 | raise PresetError("プリセットの設定ファイルにミスがあります") 47 | 48 | # idが一意か確認 49 | if len([preset.id for preset in _presets]) != len( 50 | {preset.id for preset in _presets} 51 | ): 52 | raise PresetError("プリセットのidに重複があります") 53 | 54 | self.presets = _presets 55 | self.last_modified_time = _last_modified_time 56 | return self.presets 57 | 58 | def add_preset(self, preset: Preset): 59 | """ 60 | YAMLファイルに新規のプリセットを追加する 61 | 62 | Parameters 63 | ---------- 64 | preset : Preset 65 | 追加するプリセットを渡す 66 | 67 | Returns 68 | ------- 69 | ret: int 70 | 追加したプリセットのプリセットID 71 | """ 72 | 73 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 74 | self.load_presets() 75 | 76 | # IDが0未満、または存在するIDなら新しいIDを決定し、配列に追加 77 | if preset.id < 0 or preset.id in {preset.id for preset in self.presets}: 78 | preset.id = max([preset.id for preset in self.presets]) + 1 79 | self.presets.append(preset) 80 | 81 | # ファイルに書き込み 82 | try: 83 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 84 | yaml.safe_dump( 85 | [preset.dict() for preset in self.presets], 86 | f, 87 | allow_unicode=True, 88 | sort_keys=False, 89 | ) 90 | except Exception as err: 91 | self.presets.pop() 92 | if isinstance(err, FileNotFoundError): 93 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 94 | else: 95 | raise err 96 | 97 | return preset.id 98 | 99 | def update_preset(self, preset: Preset): 100 | """ 101 | YAMLファイルのプリセットを更新する 102 | 103 | Parameters 104 | ---------- 105 | preset : Preset 106 | 更新するプリセットを渡す 107 | 108 | Returns 109 | ------- 110 | ret: int 111 | 更新したプリセットのプリセットID 112 | """ 113 | 114 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 115 | self.load_presets() 116 | 117 | # IDが存在するか探索 118 | prev_preset = (-1, None) 119 | for i in range(len(self.presets)): 120 | if self.presets[i].id == preset.id: 121 | prev_preset = (i, self.presets[i]) 122 | self.presets[i] = preset 123 | break 124 | else: 125 | raise PresetError("更新先のプリセットが存在しません") 126 | 127 | # ファイルに書き込み 128 | try: 129 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 130 | yaml.safe_dump( 131 | [preset.dict() for preset in self.presets], 132 | f, 133 | allow_unicode=True, 134 | sort_keys=False, 135 | ) 136 | except Exception as err: 137 | if prev_preset != (-1, None): 138 | self.presets[prev_preset[0]] = prev_preset[1] 139 | if isinstance(err, FileNotFoundError): 140 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 141 | else: 142 | raise err 143 | 144 | return preset.id 145 | 146 | def delete_preset(self, id: int): 147 | """ 148 | YAMLファイルのプリセットを削除する 149 | 150 | Parameters 151 | ---------- 152 | id: int 153 | 削除するプリセットのプリセットIDを渡す 154 | 155 | Returns 156 | ------- 157 | ret: int 158 | 削除したプリセットのプリセットID 159 | """ 160 | 161 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 162 | self.load_presets() 163 | 164 | # IDが存在するか探索 165 | buf = None 166 | buf_index = -1 167 | for i in range(len(self.presets)): 168 | if self.presets[i].id == id: 169 | buf = self.presets.pop(i) 170 | buf_index = i 171 | break 172 | else: 173 | raise PresetError("削除対象のプリセットが存在しません") 174 | 175 | # ファイルに書き込み 176 | try: 177 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 178 | yaml.safe_dump( 179 | [preset.dict() for preset in self.presets], 180 | f, 181 | allow_unicode=True, 182 | sort_keys=False, 183 | ) 184 | except FileNotFoundError: 185 | self.presets.insert(buf_index, buf) 186 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 187 | 188 | return id 189 | -------------------------------------------------------------------------------- /voicevox_engine/preset/__init__.py: -------------------------------------------------------------------------------- 1 | from .Preset import Preset 2 | from .PresetError import PresetError 3 | from .PresetManager import PresetManager 4 | 5 | __all__ = [ 6 | "Preset", 7 | "PresetManager", 8 | "PresetError", 9 | ] 10 | -------------------------------------------------------------------------------- /voicevox_engine/setting/Setting.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class CorsPolicyMode(str, Enum): 8 | """ 9 | CORSの許可モード 10 | """ 11 | 12 | all = "all" # 全てのオリジンからのリクエストを許可 13 | localapps = "localapps" # ローカルアプリケーションからのリクエストを許可 14 | 15 | 16 | class Setting(BaseModel): 17 | """ 18 | エンジンの設定情報 19 | """ 20 | 21 | cors_policy_mode: CorsPolicyMode = Field(title="リソース共有ポリシー") 22 | allow_origin: Optional[str] = Field(title="許可するオリジン") 23 | 24 | class Config: 25 | use_enum_values = True 26 | -------------------------------------------------------------------------------- /voicevox_engine/setting/SettingLoader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | from ..utility import engine_root, get_save_dir 6 | from .Setting import Setting 7 | 8 | DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yml" 9 | USER_SETTING_PATH: Path = get_save_dir() / "setting.yml" 10 | 11 | 12 | class SettingLoader: 13 | def __init__(self, setting_file_path: Path) -> None: 14 | self.setting_file_path = setting_file_path 15 | 16 | def load_setting_file(self) -> Setting: 17 | if not self.setting_file_path.is_file(): 18 | setting = yaml.safe_load(DEFAULT_SETTING_PATH.read_text(encoding="utf-8")) 19 | else: 20 | setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8")) 21 | 22 | setting = Setting( 23 | cors_policy_mode=setting["cors_policy_mode"], 24 | allow_origin=setting["allow_origin"], 25 | ) 26 | 27 | return setting 28 | 29 | def dump_setting_file(self, settings: Setting) -> None: 30 | settings_dict = settings.dict() 31 | 32 | with open(self.setting_file_path, mode="w", encoding="utf-8") as f: 33 | yaml.safe_dump(settings_dict, f) 34 | -------------------------------------------------------------------------------- /voicevox_engine/setting/__init__.py: -------------------------------------------------------------------------------- 1 | from .Setting import CorsPolicyMode, Setting 2 | from .SettingLoader import USER_SETTING_PATH, SettingLoader 3 | 4 | __all__ = [ 5 | "USER_SETTING_PATH", 6 | "CorsPolicyMode", 7 | "Setting", 8 | "SettingLoader", 9 | ] 10 | -------------------------------------------------------------------------------- /voicevox_engine/synthesis_engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .core_wrapper import CoreWrapper, load_runtime_lib 2 | from .make_synthesis_engines import make_synthesis_engines 3 | from .synthesis_engine import SynthesisEngine 4 | from .synthesis_engine_base import SynthesisEngineBase 5 | 6 | __all__ = [ 7 | "CoreWrapper", 8 | "load_runtime_lib", 9 | "make_synthesis_engines", 10 | "SynthesisEngine", 11 | "SynthesisEngineBase", 12 | ] 13 | -------------------------------------------------------------------------------- /voicevox_engine/synthesis_engine/make_synthesis_engines.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from pathlib import Path 4 | from typing import Dict, List, Optional 5 | 6 | from ..utility import engine_root, get_save_dir 7 | from .core_wrapper import CoreWrapper, load_runtime_lib 8 | from .synthesis_engine import SynthesisEngine, SynthesisEngineBase 9 | 10 | 11 | def make_synthesis_engines( 12 | use_gpu: bool, 13 | voicelib_dirs: Optional[List[Path]] = None, 14 | voicevox_dir: Optional[Path] = None, 15 | runtime_dirs: Optional[List[Path]] = None, 16 | cpu_num_threads: Optional[int] = None, 17 | enable_mock: bool = True, 18 | load_all_models: bool = False, 19 | ) -> Dict[str, SynthesisEngineBase]: 20 | """ 21 | 音声ライブラリをロードして、音声合成エンジンを生成 22 | 23 | Parameters 24 | ---------- 25 | use_gpu: bool 26 | 音声ライブラリに GPU を使わせるか否か 27 | voicelib_dirs: List[Path], optional, default=None 28 | 音声ライブラリ自体があるディレクトリのリスト 29 | voicevox_dir: Path, optional, default=None 30 | コンパイル済みのvoicevox、またはvoicevox_engineがあるディレクトリ 31 | runtime_dirs: List[Path], optional, default=None 32 | コアで使用するライブラリのあるディレクトリのリスト 33 | None のとき、voicevox_dir、カレントディレクトリになる 34 | cpu_num_threads: int, optional, default=None 35 | 音声ライブラリが、推論に用いるCPUスレッド数を設定する 36 | Noneのとき、ライブラリ側の挙動により論理コア数の半分か、物理コア数が指定される 37 | enable_mock: bool, optional, default=True 38 | コア読み込みに失敗したとき、代わりにmockを使用するかどうか 39 | load_all_models: bool, optional, default=False 40 | 起動時に全てのモデルを読み込むかどうか 41 | """ 42 | if cpu_num_threads == 0 or cpu_num_threads is None: 43 | print( 44 | "Warning: cpu_num_threads is set to 0. " 45 | + "( The library leaves the decision to the synthesis runtime )", 46 | file=sys.stderr, 47 | ) 48 | cpu_num_threads = 0 49 | 50 | if voicevox_dir is not None: 51 | if voicelib_dirs is not None: 52 | voicelib_dirs.append(voicevox_dir) 53 | else: 54 | voicelib_dirs = [voicevox_dir] 55 | if runtime_dirs is not None: 56 | runtime_dirs.append(voicevox_dir) 57 | else: 58 | runtime_dirs = [voicevox_dir] 59 | else: 60 | root_dir = engine_root() 61 | if voicelib_dirs is None: 62 | voicelib_dirs = [root_dir] 63 | if runtime_dirs is None: 64 | runtime_dirs = [root_dir] 65 | 66 | voicelib_dirs = [p.expanduser() for p in voicelib_dirs] 67 | runtime_dirs = [p.expanduser() for p in runtime_dirs] 68 | 69 | load_runtime_lib(runtime_dirs) 70 | 71 | synthesis_engines = {} 72 | 73 | if not enable_mock: 74 | 75 | def load_core_library(core_dir: Path, suppress_error: bool = False): 76 | """ 77 | 指定されたディレクトリにあるコアを読み込む。 78 | ユーザーディレクトリの場合は存在しないこともあるので、エラーを抑制すると良い。 79 | """ 80 | try: 81 | core = CoreWrapper(use_gpu, core_dir, cpu_num_threads, load_all_models) 82 | metas = json.loads(core.metas()) 83 | core_version = metas[0]["version"] 84 | if core_version in synthesis_engines: 85 | print( 86 | "Warning: Core loading is skipped because of version duplication.", 87 | file=sys.stderr, 88 | ) 89 | else: 90 | synthesis_engines[core_version] = SynthesisEngine(core=core) 91 | except Exception: 92 | if not suppress_error: 93 | raise 94 | 95 | for core_dir in voicelib_dirs: 96 | load_core_library(core_dir) 97 | 98 | # ユーザーディレクトリにあるコアを読み込む 99 | user_voicelib_dirs = [] 100 | core_libraries_dir = get_save_dir() / "core_libraries" 101 | core_libraries_dir.mkdir(exist_ok=True) 102 | user_voicelib_dirs.append(core_libraries_dir) 103 | for path in core_libraries_dir.glob("*"): 104 | if not path.is_dir(): 105 | continue 106 | user_voicelib_dirs.append(path) 107 | 108 | for core_dir in user_voicelib_dirs: 109 | load_core_library(core_dir, suppress_error=True) 110 | 111 | else: 112 | # モック追加 113 | from ..dev.core import metas as mock_metas 114 | from ..dev.core import supported_devices as mock_supported_devices 115 | from ..dev.synthesis_engine import MockSynthesisEngine 116 | 117 | if "0.0.0" not in synthesis_engines: 118 | synthesis_engines["0.0.0"] = MockSynthesisEngine( 119 | speakers=mock_metas(), supported_devices=mock_supported_devices() 120 | ) 121 | 122 | return synthesis_engines 123 | -------------------------------------------------------------------------------- /voicevox_engine/utility/__init__.py: -------------------------------------------------------------------------------- 1 | from .connect_base64_waves import ( 2 | ConnectBase64WavesException, 3 | connect_base64_waves, 4 | decode_base64_waves, 5 | ) 6 | from .mutex_utility import mutex_wrapper 7 | from .path_utility import delete_file, engine_root, get_save_dir 8 | 9 | __all__ = [ 10 | "ConnectBase64WavesException", 11 | "connect_base64_waves", 12 | "decode_base64_waves", 13 | "delete_file", 14 | "engine_root", 15 | "get_save_dir", 16 | "mutex_wrapper", 17 | ] 18 | -------------------------------------------------------------------------------- /voicevox_engine/utility/connect_base64_waves.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from typing import List, Tuple 4 | 5 | import numpy as np 6 | import soundfile 7 | from scipy.signal import resample 8 | 9 | 10 | class ConnectBase64WavesException(Exception): 11 | def __init__(self, message: str): 12 | self.message = message 13 | 14 | 15 | def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]: 16 | """ 17 | base64エンコードされた複数のwavデータをデコードする 18 | Parameters 19 | ---------- 20 | waves: list[str] 21 | base64エンコードされたwavデータのリスト 22 | Returns 23 | ------- 24 | waves_nparray_sr: List[Tuple[np.ndarray, int]] 25 | (NumPy配列の音声波形データ, サンプリングレート) 形式のタプルのリスト 26 | """ 27 | if len(waves) == 0: 28 | raise ConnectBase64WavesException("wavファイルが含まれていません") 29 | 30 | waves_nparray_sr = [] 31 | for wave in waves: 32 | try: 33 | wav_bin = base64.standard_b64decode(wave) 34 | except ValueError: 35 | raise ConnectBase64WavesException("base64デコードに失敗しました") 36 | try: 37 | _data = soundfile.read(io.BytesIO(wav_bin)) 38 | except Exception: 39 | raise ConnectBase64WavesException("wavファイルを読み込めませんでした") 40 | waves_nparray_sr.append(_data) 41 | 42 | return waves_nparray_sr 43 | 44 | 45 | def connect_base64_waves(waves: List[str]) -> Tuple[np.ndarray, int]: 46 | waves_nparray_sr = decode_base64_waves(waves) 47 | 48 | max_sampling_rate = max([sr for _, sr in waves_nparray_sr]) 49 | max_channels = max([x.ndim for x, _ in waves_nparray_sr]) 50 | assert 0 < max_channels <= 2 51 | 52 | waves_nparray_list = [] 53 | for nparray, sr in waves_nparray_sr: 54 | if sr != max_sampling_rate: 55 | nparray = resample(nparray, max_sampling_rate * len(nparray) // sr) 56 | if nparray.ndim < max_channels: 57 | nparray = np.array([nparray, nparray]).T 58 | waves_nparray_list.append(nparray) 59 | 60 | return np.concatenate(waves_nparray_list), max_sampling_rate 61 | -------------------------------------------------------------------------------- /voicevox_engine/utility/mutex_utility.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | def mutex_wrapper(lock: threading.Lock): 5 | def wrap(f): 6 | def func(*args, **kw): 7 | lock.acquire() 8 | try: 9 | return f(*args, **kw) 10 | finally: 11 | lock.release() 12 | 13 | return func 14 | 15 | return wrap 16 | -------------------------------------------------------------------------------- /voicevox_engine/utility/path_utility.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import traceback 4 | from pathlib import Path 5 | 6 | from appdirs import user_data_dir 7 | 8 | 9 | def engine_root() -> Path: 10 | if is_development(): 11 | root_dir = Path(__file__).parents[2] 12 | 13 | # Nuitka/Pyinstallerでビルドされている場合 14 | else: 15 | root_dir = Path(sys.argv[0]).parent 16 | 17 | return root_dir.resolve(strict=True) 18 | 19 | 20 | def is_development() -> bool: 21 | """ 22 | 開発版かどうか判定する関数 23 | Nuitka/Pyinstallerでコンパイルされていない場合は開発環境とする。 24 | """ 25 | # nuitkaビルドをした際はグローバルに__compiled__が含まれる 26 | if "__compiled__" in globals(): 27 | return False 28 | 29 | # pyinstallerでビルドをした際はsys.frozenが設定される 30 | elif getattr(sys, "frozen", False): 31 | return False 32 | 33 | return True 34 | 35 | 36 | def get_save_dir(): 37 | # FIXME: ファイル保存場所をエンジン固有のIDが入ったものにする 38 | # FIXME: Windowsは`voicevox-engine/voicevox-engine`ディレクトリに保存されているので 39 | # `VOICEVOX/voicevox-engine`に変更する 40 | if is_development(): 41 | app_name = "voicevox-engine-dev" 42 | else: 43 | app_name = "voicevox-engine" 44 | return Path(user_data_dir(app_name)) 45 | 46 | 47 | def delete_file(file_path: str) -> None: 48 | try: 49 | os.remove(file_path) 50 | except OSError: 51 | traceback.print_exc() 52 | --------------------------------------------------------------------------------