├── .gitattributes ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bugreport.md │ ├── featurerequest.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md ├── labeler.yml └── workflows │ ├── build-docker.yml │ ├── build.yml │ ├── coverage-comment.yml │ ├── labeler.yml │ ├── release-test-docker.yml │ ├── release-test.yml │ ├── test.yml │ ├── typos.yml │ └── upload-gh-pages.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── _typos.toml ├── build_util ├── check_release_build.py ├── codesign.bash ├── codesign_sv.bash ├── create_venv_and_generate_licenses.bash ├── merge_update_infos.py ├── modify_pyinstaller.bash └── process_voicevox_resource.bash ├── default.csv ├── default_setting.yml ├── docs ├── VOICEVOX音声合成エンジンとの連携.md ├── api │ └── .gitkeep ├── licenses │ ├── cuda │ │ └── EULA.txt │ ├── cudnn │ │ └── LICENSE │ ├── open_jtalk │ │ ├── COPYING │ │ ├── mecab-naist-jdic │ │ │ └── COPYING │ │ └── mecab │ │ │ └── COPYING │ └── world │ │ └── LICENSE.txt └── res │ └── マルチエンジン概念図.svg ├── engine_manifest.json ├── engine_manifest_assets ├── dependency_licenses.json ├── downloadable_libraries.json ├── icon.png ├── terms_of_service.md └── update_infos.json ├── generate_licenses.py ├── get_cost_candidates.py ├── library_info └── .gitkeep ├── make_docs.py ├── poetry.lock ├── presets.yaml ├── pyproject.toml ├── requirements-dev.txt ├── requirements-license.txt ├── requirements-test.txt ├── requirements.txt ├── run.py ├── run.spec ├── setup.cfg ├── speaker_info ├── 35b2c544-660e-401e-b503-0e14c635303a │ ├── icons │ │ └── 8.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ └── 8.png │ └── voice_samples │ │ ├── 8_001.wav │ │ ├── 8_002.wav │ │ └── 8_003.wav ├── 388f246b-8c41-4ac1-8e2d-5d79f3ff56d9 │ ├── icons │ │ ├── 1.png │ │ ├── 3.png │ │ ├── 5.png │ │ └── 7.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ └── 3.png │ └── voice_samples │ │ ├── 1_001.wav │ │ ├── 1_002.wav │ │ ├── 1_003.wav │ │ ├── 3_001.wav │ │ ├── 3_002.wav │ │ ├── 3_003.wav │ │ ├── 5_001.wav │ │ ├── 5_002.wav │ │ ├── 5_003.wav │ │ ├── 7_001.wav │ │ ├── 7_002.wav │ │ └── 7_003.wav ├── 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff │ ├── icons │ │ ├── 0.png │ │ ├── 2.png │ │ ├── 4.png │ │ └── 6.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ ├── 0.png │ │ ├── 2.png │ │ ├── 4.png │ │ └── 6.png │ └── voice_samples │ │ ├── 0_001.wav │ │ ├── 0_002.wav │ │ ├── 0_003.wav │ │ ├── 2_001.wav │ │ ├── 2_002.wav │ │ ├── 2_003.wav │ │ ├── 4_001.wav │ │ ├── 4_002.wav │ │ ├── 4_003.wav │ │ ├── 6_001.wav │ │ ├── 6_002.wav │ │ └── 6_003.wav └── b1a81618-b27b-40d2-b0ea-27a9ad408c4b │ ├── icons │ └── 9.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ └── voice_samples │ ├── 9_001.wav │ ├── 9_002.wav │ └── 9_003.wav ├── test ├── .gitignore ├── __init__.py ├── e2e │ ├── conftest.py │ └── test_validate_version.py ├── presets-test-1.yaml ├── presets-test-2.yaml ├── presets-test-3.yaml ├── presets-test-4.yaml ├── setting-test-load-1.yaml ├── setting-test-load-2.yaml ├── setting-test-load-3.yaml ├── test_acoustic_feature_extractor.py ├── test_connect_base64_waves.py ├── test_core_version_utility.py ├── test_downloadable_library.py ├── test_full_context_label.py ├── test_kana_parser.py ├── test_mock_synthesis_engine.py ├── test_mora_list.py ├── test_mora_to_text.py ├── test_preset.py ├── test_setting.py ├── test_sv_models.py ├── test_synthesis_engine.py ├── test_synthesis_engine_base.py ├── test_user_dict.py ├── test_user_dict_model.py ├── test_word_types.py ├── testdata │ ├── model │ │ ├── b351e601-3e98-40d4-ac1d-19529d932c22 │ │ │ ├── decoder_model.onnx │ │ │ ├── embedder_model.onnx │ │ │ └── variance_model.onnx │ │ └── libraries.json │ └── speaker_info │ │ └── 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff │ │ ├── icons │ │ └── 0.png │ │ ├── policy.md │ │ ├── portrait.png │ │ └── voice_samples │ │ └── 0_001.wav └── vvlib_manifest.json ├── ui_template └── ui.html └── voicevox_engine ├── __init__.py ├── acoustic_feature_extractor.py ├── cancellable_engine.py ├── dev ├── core │ ├── __init__.py │ └── mock.py └── synthesis_engine │ ├── __init__.py │ └── mock.py ├── downloadable_library.py ├── engine_manifest ├── EngineManifest.py ├── EngineManifestLoader.py └── __init__.py ├── full_context_label.py ├── kana_parser.py ├── metas ├── Metas.py ├── MetasStore.py └── __init__.py ├── model.py ├── mora_list.py ├── morphing.py ├── part_of_speech_data.py ├── preset ├── Preset.py ├── PresetError.py ├── PresetManager.py └── __init__.py ├── setting ├── Setting.py ├── SettingLoader.py └── __init__.py ├── sv_model.py ├── synthesis_engine ├── __init__.py ├── core_wrapper.py ├── make_synthesis_engines.py ├── synthesis_engine.py └── synthesis_engine_base.py ├── user_dict.py └── utility ├── __init__.py ├── connect_base64_waves.py ├── copy_model_and_info.py ├── core_version_utility.py ├── mutex_utility.py └── path_utility.py /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.png -text 3 | *.wav -text -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @VOICEVOX/maintainer 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bugreport.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: 不具合の報告 4 | labels: バグ 5 | --- 6 | 7 | ## 不具合の内容 8 | 9 | 10 | 11 | ### 現象・ログ 12 | 13 | 14 | 15 | ### 再現手順 16 | 17 | 18 | 19 | ### 期待動作 20 | 21 | 22 | 23 | ## VOICEVOXのバージョン 24 | 25 | 0.?.0 26 | 27 | 28 | 29 | ## OSの種類/ディストリ/バージョン 30 | 31 | 32 | 33 | - [ ] Windows 34 | - [ ] macOS 35 | - [ ] Linux 36 | 37 | 44 | 45 | ## その他 46 | 47 | 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/featurerequest.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: 機能要望・改善提案 4 | labels: 機能向上 5 | --- 6 | 7 | ## 内容 8 | 9 | 10 | 11 | 12 | ### Pros 良くなる点 13 | 14 | 15 | 16 | ### Cons 悪くなる点 17 | 18 | 19 | 20 | ### 実現方法 21 | 22 | 23 | 24 | ## VOICEVOXのバージョン 25 | 26 | 0.?.0 27 | 28 | 29 | 30 | ## OSの種類/ディストリ/バージョン 31 | 32 | 33 | 34 | - [ ] Windows 35 | - [ ] macOS 36 | - [ ] Linux 37 | 38 | 45 | 46 | ## その他 47 | 48 | 49 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: 質問 (既存のIssueや一般事例を良く調べてからしてください) 4 | labels: 要議論 5 | --- 6 | 7 | ## 質問の内容 8 | 9 | 10 | 11 | ## VOICEVOXのバージョン 12 | 13 | 0.?.0 14 | 15 | 16 | 17 | ## OSの種類/ディストリ/バージョン 18 | 19 | 20 | 21 | - [ ] Windows 22 | - [ ] macOS 23 | - [ ] Linux 24 | 25 | 32 | 33 | ## その他 34 | 35 | 36 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## 内容 2 | 3 | 6 | 7 | ## 関連 Issue 8 | 9 | 17 | 18 | ## スクリーンショット・動画など 19 | 20 | 23 | 24 | ## その他 25 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | 'OS:mac': 2 | - '\[x\] macOS' 3 | 'OS:linux': 4 | - '\[x\] Linux' 5 | 'OS:win': 6 | - '\[x\] Windows' 7 | -------------------------------------------------------------------------------- /.github/workflows/build-docker.yml: -------------------------------------------------------------------------------- 1 | name: build-docker 2 | on: 3 | push: 4 | branches: 5 | - master 6 | release: 7 | types: 8 | - created 9 | workflow_dispatch: 10 | inputs: 11 | version: 12 | description: "バージョン情報(A.BB.C / A.BB.C-preview.D)" 13 | required: true 14 | 15 | env: 16 | IMAGE_NAME: ${{ vars.DOCKERHUB_USERNAME }}/sharevox_engine 17 | PYTHON_VERSION: "3.11.3" 18 | VOICEVOX_RESOURCE_VERSION: "0.3.0-preview.3" 19 | VOICEVOX_CORE_VERSION: "0.2.1" 20 | 21 | defaults: 22 | run: 23 | shell: bash 24 | 25 | jobs: 26 | config: # 全 jobs で利用する定数の定義. `env` が利用できないコンテキストでも利用できる. 27 | runs-on: ubuntu-latest 28 | outputs: 29 | version_or_latest: ${{ steps.vars.outputs.version_or_latest }} 30 | steps: 31 | - name: declare variables 32 | id: vars 33 | run: | 34 | : # releaseタグ名か、workflow_dispatchでのバージョン名か、latestが入る 35 | echo "version_or_latest=${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}" >> $GITHUB_OUTPUT 36 | 37 | build-docker: 38 | needs: [ config ] 39 | runs-on: ${{ matrix.os }} 40 | 41 | strategy: 42 | matrix: 43 | os: [ ubuntu-latest ] 44 | tag: 45 | - "" 46 | - cpu 47 | - cpu-ubuntu20.04 48 | - nvidia 49 | - nvidia-ubuntu20.04 50 | include: 51 | # Ubuntu 20.04 52 | - tag: "" 53 | target: runtime-env 54 | base_image: ubuntu:20.04 55 | base_runtime_image: ubuntu:20.04 56 | onnxruntime_version: 1.12.1 57 | platforms: linux/amd64,linux/arm64/v8 58 | - tag: cpu 59 | target: runtime-env 60 | base_image: ubuntu:20.04 61 | base_runtime_image: ubuntu:20.04 62 | onnxruntime_version: 1.12.1 63 | platforms: linux/amd64,linux/arm64/v8 64 | - tag: cpu-ubuntu20.04 65 | target: runtime-env 66 | base_image: ubuntu:20.04 67 | base_runtime_image: ubuntu:20.04 68 | onnxruntime_version: 1.12.1 69 | platforms: linux/amd64,linux/arm64/v8 70 | - tag: nvidia 71 | target: runtime-nvidia-env 72 | base_image: ubuntu:20.04 73 | base_runtime_image: nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 74 | onnxruntime_version: 1.12.1 75 | platforms: linux/amd64 76 | - tag: nvidia-ubuntu20.04 77 | target: runtime-nvidia-env 78 | base_image: ubuntu:20.04 79 | base_runtime_image: nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 80 | onnxruntime_version: 1.12.1 81 | platforms: linux/amd64 82 | 83 | steps: 84 | - uses: actions/checkout@v3 85 | 86 | - name: Setup QEMU 87 | uses: docker/setup-qemu-action@v2 88 | 89 | - name: Setup Docker Buildx 90 | id: buildx 91 | uses: docker/setup-buildx-action@v2 92 | 93 | - name: Login to DockerHub 94 | uses: docker/login-action@v2 95 | with: 96 | username: ${{ vars.DOCKERHUB_USERNAME }} 97 | password: ${{ secrets.DOCKERHUB_TOKEN }} 98 | 99 | # Download VOICEVOX RESOURCE 100 | - name: Prepare VOICEVOX RESOURCE cache 101 | uses: actions/cache@v3 102 | id: voicevox-resource-cache 103 | with: 104 | key: voicevox-resource-${{ env.VOICEVOX_RESOURCE_VERSION }} 105 | path: download/resource 106 | 107 | - name: Checkout VOICEVOX RESOURCE 108 | if: steps.voicevox-resource-cache.outputs.cache-hit != 'true' 109 | uses: actions/checkout@v3 110 | with: 111 | repository: SHAREVOX/sharevox_resource 112 | ref: ${{ env.VOICEVOX_RESOURCE_VERSION }} 113 | path: download/resource 114 | 115 | # Merge VOICEVOX RESOURCE 116 | - name: Merge VOICEVOX RESOURCE 117 | env: 118 | DOWNLOAD_RESOURCE_PATH: download/resource 119 | run: bash build_util/process_voicevox_resource.bash 120 | 121 | - name: Build and Deploy Docker image 122 | uses: docker/build-push-action@v3 123 | env: 124 | IMAGE_TAG: 125 | |- # If it's a release, add the version, otherwise add the `latest` 126 | ${{ ( 127 | matrix.tag != '' && ( 128 | format('{0}:{1}-{2}', env.IMAGE_NAME, matrix.tag, needs.config.outputs.version_or_latest) 129 | ) || format('{0}:{1}', env.IMAGE_NAME, needs.config.outputs.version_or_latest) 130 | ) }} 131 | IMAGE_CACHE_FROM: 132 | |- # Always use the `latest` buildcache. :latest-buildcache or :{tag}-latest-buildcache 133 | ${{ ( 134 | matrix.tag != '' && ( 135 | format('type=registry,ref={0}:{1}-latest-buildcache', env.IMAGE_NAME, matrix.tag) 136 | ) || format('type=registry,ref={0}:latest-buildcache', env.IMAGE_NAME) 137 | ) }} 138 | IMAGE_CACHE_TO: 139 | |- # If it's a release, do not create buildcache, otherwise create the `latest` buildcache. :latest-buildcache or :{tag}-latest-buildcache 140 | ${{ ( 141 | needs.config.outputs.version_or_latest == 'latest' && ( 142 | matrix.tag != '' && ( 143 | format('type=registry,ref={0}:{1}-latest-buildcache,mode=max', env.IMAGE_NAME, matrix.tag) 144 | ) || format('type=registry,ref={0}:latest-buildcache,mode=max', env.IMAGE_NAME) 145 | ) || '' 146 | ) }} 147 | with: 148 | context: . 149 | builder: ${{ steps.buildx.outputs.name }} 150 | file: ./Dockerfile 151 | build-args: | 152 | BASE_IMAGE=${{ matrix.base_image }} 153 | BASE_RUNTIME_IMAGE=${{ matrix.base_runtime_image }} 154 | PYTHON_VERSION=${{ env.PYTHON_VERSION }} 155 | VOICEVOX_ENGINE_VERSION=${{ needs.config.outputs.version_or_latest }} 156 | VOICEVOX_CORE_VERSION=${{ env.VOICEVOX_CORE_VERSION }} 157 | VOICEVOX_RESOURCE_VERSION=${{ env.VOICEVOX_RESOURCE_VERSION }} 158 | USE_GPU=${{ matrix.target == 'runtime-nvidia-env' }} 159 | ONNXRUNTIME_VERSION=${{ matrix.onnxruntime_version }} 160 | target: ${{ matrix.target }} 161 | push: true 162 | tags: ${{ env.IMAGE_TAG }} 163 | cache-from: ${{ env.IMAGE_CACHE_FROM }} 164 | cache-to: ${{ env.IMAGE_CACHE_TO }} 165 | platforms: ${{ matrix.platforms }} 166 | 167 | run-release-test-workflow: 168 | # version が指定されている場合のみ実行する 169 | if: needs.config.outputs.version_or_latest != 'latest' 170 | needs: [ config, build-docker ] 171 | uses: ./.github/workflows/release-test-docker.yml 172 | with: 173 | version: ${{ needs.config.outputs.version_or_latest }} 174 | repo_url: ${{ format('{0}/{1}', github.server_url, github.repository) }} # このリポジトリのURL 175 | -------------------------------------------------------------------------------- /.github/workflows/coverage-comment.yml: -------------------------------------------------------------------------------- 1 | name: Coverage Report Comment 2 | 3 | on: 4 | workflow_run: 5 | workflows: 6 | - test 7 | types: 8 | - completed 9 | workflow_dispatch: 10 | 11 | defaults: 12 | run: 13 | shell: bash 14 | 15 | jobs: 16 | comment: 17 | runs-on: ubuntu-latest 18 | if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success' 19 | steps: 20 | - name: Download coverage report 21 | uses: actions/github-script@v5.0.0 22 | with: 23 | script: | 24 | const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ 25 | owner: context.repo.owner, 26 | repo: context.repo.repo, 27 | run_id: ${{ github.event.workflow_run.id }}, 28 | }) 29 | const matchArtifact = artifacts.data.artifacts.filter((artifact) => { 30 | return artifact.name == 'report' 31 | })[0] 32 | const download = await github.rest.actions.downloadArtifact({ 33 | owner: context.repo.owner, 34 | repo: context.repo.repo, 35 | artifact_id: matchArtifact.id, 36 | archive_format: 'zip', 37 | }) 38 | const fs = require('fs') 39 | fs.writeFileSync('${{github.workspace}}/report.zip', Buffer.from(download.data)) 40 | 41 | - name: Unzip report 42 | run: unzip report.zip 43 | 44 | - name: Comment coverage result to Pull Requests 45 | uses: actions/github-script@v5.0.0 46 | with: 47 | github-token: ${{ secrets.GITHUB_TOKEN }} 48 | script: | 49 | const fs = require('fs') 50 | const baseReport = fs.readFileSync('report.txt', 'utf8').toString().split('\n') 51 | let report = '' 52 | for (let i = 0; i < baseReport.length; i++) { 53 | const line = baseReport[i].split(' ').filter(v => v) 54 | if (i === 1 && line.length === 1) { 55 | report += "|:---|---:|---:|---:|\n" 56 | } else if (line.length === 1) { 57 | continue 58 | } else { 59 | if (i !== 0 && line.length === 4) { 60 | const parcent = Number(line[3].replace("%", "")) 61 | let color = 'green' 62 | if (parcent < 50) { 63 | color = 'red' 64 | } else if (parcent < 90) { 65 | color = 'orange' 66 | } 67 | line[3] = `![coverage-${parcent}%](https://img.shields.io/badge/coverage-${parcent}%25-${color}.svg)` 68 | } 69 | report += "|" + line.join("|") + "|\n" 70 | } 71 | if (line[0] === 'TOTAL') break 72 | } 73 | 74 | const issue_number = Number(fs.readFileSync('pr_num.txt')) 75 | const body = `## Coverage Result\n\n
\nResultを開く\n\n${report}\n
` 76 | 77 | let listComments = await github.rest.issues.listComments({ 78 | issue_number, 79 | owner: context.repo.owner, 80 | repo: context.repo.repo, 81 | }) 82 | listComments = listComments.data.filter((comment) => { 83 | return comment.body.includes('Coverage Result') && comment.user.login.includes('github-actions') 84 | }) 85 | 86 | if (listComments.length === 0) { 87 | github.rest.issues.createComment({ 88 | issue_number, 89 | owner: context.repo.owner, 90 | repo: context.repo.repo, 91 | body, 92 | }) 93 | } else { 94 | github.rest.issues.updateComment({ 95 | comment_id: listComments[0].id, 96 | owner: context.repo.owner, 97 | repo: context.repo.repo, 98 | body, 99 | }) 100 | } 101 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | name: Issue Labeler 2 | on: 3 | issues: 4 | types: [opened] 5 | defaults: 6 | run: 7 | shell: bash 8 | 9 | jobs: 10 | triage: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: github/issue-labeler@v2.0 14 | with: 15 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 16 | configuration-path: .github/labeler.yml 17 | enable-versioned-regex: 0 18 | -------------------------------------------------------------------------------- /.github/workflows/release-test-docker.yml: -------------------------------------------------------------------------------- 1 | name: Test Docker Release Build 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | version: 7 | type: string 8 | required: true 9 | repo_url: 10 | type: string 11 | required: false 12 | workflow_dispatch: 13 | inputs: 14 | version: 15 | type: string 16 | description: "テストしたいタグ名" 17 | required: true 18 | 19 | env: 20 | IMAGE_NAME: ${{ vars.DOCKERHUB_USERNAME }}/voicevox_engine 21 | VERSION: |- # version指定時はversionを、それ以外はタグ名を使用 22 | ${{ (github.event.inputs || inputs).version }} 23 | 24 | defaults: 25 | run: 26 | shell: bash 27 | 28 | jobs: 29 | test: 30 | runs-on: [ ubuntu-20.04 ] 31 | strategy: 32 | fail-fast: false 33 | matrix: 34 | tag: 35 | - "" 36 | - cpu 37 | - cpu-ubuntu20.04 38 | 39 | steps: 40 | - uses: actions/checkout@v3 41 | 42 | # 43 | # Setup Python Environment 44 | # 45 | - uses: actions/setup-python@v4 46 | with: 47 | python-version: "3.11.3" 48 | cache: pip 49 | 50 | - name: Install libsndfile1 51 | run: | 52 | sudo apt-get update 53 | sudo apt-get install libsndfile1 54 | 55 | - name: Install requirements 56 | run: | 57 | pip install -r requirements-test.txt 58 | 59 | # 60 | # Setup Docker Environment 61 | # 62 | - name: Declare variables 63 | id: docker_vars 64 | run: | 65 | if [ "${{ matrix.tag }}" != "" ]; then 66 | echo "image_tag=${{ env.IMAGE_NAME }}:${{ matrix.tag }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT 67 | else 68 | echo "image_tag=${{ env.IMAGE_NAME }}:${{ env.VERSION }}" >> $GITHUB_OUTPUT 69 | fi 70 | 71 | - name: Docker pull 72 | run: docker pull "${{ steps.docker_vars.outputs.image_tag }}" 73 | 74 | - name: Docker run 75 | run: docker run -d -p 50021:50021 "${{ steps.docker_vars.outputs.image_tag }}" 76 | 77 | # Docker コンテナが起動してから、レスポンスが返ってくるまで待機する 78 | # リトライは10回まで `/version` にアクセスしてレスポンスのステータスコードをチェック 79 | # - ステータスコードが `200` の場合は正常終了します 80 | # - ステータスコードが `200` 以外の場合は、5秒間スリープしてリトライします 81 | - name: Wait for container to start 82 | run: | 83 | set +e # curlのエラーを無視する 84 | 85 | url="http://127.0.0.1:50021/version" 86 | max_attempts=10 87 | sleep_interval=5 88 | 89 | for i in $(seq 1 $max_attempts); do 90 | status=$(curl -o /dev/null -s -w '%{http_code}\n' $url) 91 | if [ $status -eq 200 ]; then 92 | echo "Container is ready! Response status code: $status" 93 | exit 0 94 | else 95 | echo "Attempt $i/$max_attempts: Response status code $status" 96 | sleep $sleep_interval 97 | fi 98 | done 99 | exit 1 100 | 101 | - name: Test 102 | run: python build_util/check_release_build.py --skip_run_process --dist_dir dist/ 103 | -------------------------------------------------------------------------------- /.github/workflows/release-test.yml: -------------------------------------------------------------------------------- 1 | name: Test Release Build 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | version: 7 | type: string 8 | required: true 9 | repo_url: 10 | type: string 11 | required: false 12 | workflow_dispatch: 13 | inputs: 14 | version: 15 | type: string 16 | description: "テストしたいタグ名" 17 | required: true 18 | repo_url: 19 | type: string 20 | description: "リポジトリのURL(省略可能)" 21 | required: false 22 | 23 | env: 24 | REPO_URL: 25 | |- # repo_url指定時はrepo_urlを、それ以外はgithubのリポジトリURLを使用 26 | ${{ (github.event.inputs || inputs).repo_url || format('{0}/{1}', github.server_url, github.repository) }} 27 | VERSION: |- # version指定時はversionを、それ以外はタグ名を使用 28 | ${{ (github.event.inputs || inputs).version }} 29 | 30 | defaults: 31 | run: 32 | shell: bash 33 | 34 | jobs: 35 | test: 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | include: 40 | - os: ubuntu-20.04 41 | target: linux-cpu 42 | - os: ubuntu-20.04 43 | target: linux-nvidia 44 | - os: macos-11 45 | target: macos-x64 46 | - os: windows-2019 47 | target: windows-cpu 48 | - os: windows-2019 49 | target: windows-nvidia 50 | - os: windows-2019 51 | target: windows-directml 52 | 53 | runs-on: ${{ matrix.os }} 54 | 55 | steps: 56 | - name: declare variables 57 | id: vars 58 | run: | 59 | echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> $GITHUB_OUTPUT 60 | echo "package_name=sharevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT 61 | 62 | - uses: actions/checkout@v2 63 | 64 | - uses: actions/setup-python@v2 65 | with: 66 | python-version: "3.11.3" 67 | cache: pip 68 | 69 | - name: Download 70 | run: | 71 | mkdir -p download 72 | curl -L -o "download/list.txt" "${{ steps.vars.outputs.release_url }}/${{ steps.vars.outputs.package_name }}.7z.txt" 73 | cat "download/list.txt" | xargs -I '%' curl -L -o "download/%" "${{ steps.vars.outputs.release_url }}/%" 74 | 7z x "download/$(head -n1 download/list.txt)" 75 | mv ${{ matrix.target }} dist/ 76 | 77 | - name: chmod +x 78 | if: startsWith(matrix.target, 'linux') || startsWith(matrix.target, 'macos') 79 | run: chmod +x dist/run 80 | 81 | - name: Install libsndfile1 82 | if: startsWith(matrix.target, 'linux') 83 | run: | 84 | sudo apt-get update 85 | sudo apt-get install libsndfile1 86 | 87 | - name: Install requirements 88 | run: | 89 | pip install -r requirements-test.txt 90 | 91 | - name: Test 92 | run: python build_util/check_release_build.py --dist_dir dist/ 93 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: 7 | - "**" 8 | workflow_dispatch: 9 | 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | test: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [ubuntu-20.04, windows-latest] # [ubuntu-20.04, macos-latest, windows-latest] 20 | python: ["3.11.3"] 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - name: Set up Python ${{ matrix.python }} 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ matrix.python }} 29 | cache: pip 30 | 31 | - name: Install libraries for ubuntu 32 | if: matrix.os == 'ubuntu-20.04' 33 | run: | 34 | sudo apt-get update 35 | sudo apt-get install libsndfile1 36 | 37 | - name: Install dependencies 38 | run: | 39 | python -m pip install --upgrade pip setuptools wheel 40 | python -m pip install -r requirements-test.txt 41 | 42 | - name: Run poetry and check 43 | run: | 44 | poetry export --without-hashes -o requirements.txt.check 45 | poetry export --without-hashes --with dev -o requirements-dev.txt.check 46 | poetry export --without-hashes --with test -o requirements-test.txt.check 47 | poetry export --without-hashes --with license -o requirements-license.txt.check 48 | 49 | diff -q requirements.txt requirements.txt.check || \ 50 | diff -q requirements-dev.txt requirements-dev.txt.check || \ 51 | diff -q requirements-test.txt requirements-test.txt.check || \ 52 | diff -q requirements-license.txt requirements-license.txt.check > /dev/null 53 | if [ $? = 1 ]; then 54 | echo "poetry export has some diff" 55 | exit 1 56 | fi 57 | 58 | - run: pysen run lint 59 | 60 | - name: Run pytest and get coverage 61 | run: | 62 | coverage run --omit=test/* -m pytest 63 | 64 | - name: Submit coverage to Coveralls 65 | if: matrix.os == 'ubuntu-20.04' 66 | run: coveralls --service=github 67 | env: 68 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 69 | 70 | - name: Create coverage result 71 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04' 72 | run: | 73 | mkdir report 74 | coverage report > report/report.txt 75 | echo ${{ github.event.number }} > report/pr_num.txt 76 | 77 | - name: Upload coverage result 78 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04' 79 | uses: actions/upload-artifact@v3 80 | with: 81 | name: report 82 | path: report/ 83 | 84 | - name: Check licenses 85 | shell: bash 86 | run: | 87 | OUTPUT_LICENSE_JSON_PATH=/dev/null \ 88 | bash build_util/create_venv_and_generate_licenses.bash 89 | -------------------------------------------------------------------------------- /.github/workflows/typos.yml: -------------------------------------------------------------------------------- 1 | name: Check typos 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: 7 | - "**" 8 | workflow_dispatch: 9 | 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | typos: 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: typos-action 22 | uses: crate-ci/typos@v1.12.12 23 | -------------------------------------------------------------------------------- /.github/workflows/upload-gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: upload-docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - "master" 7 | 8 | env: 9 | PYTHON_VERSION: "3.11.3" 10 | PUBLISH_DIR: "./docs/api" 11 | PUBLISH_BRANCH: "gh-pages" 12 | DESTINATION_DIR: "api" 13 | 14 | defaults: 15 | run: 16 | shell: bash 17 | 18 | jobs: 19 | upload-doc: 20 | runs-on: ubuntu-20.04 21 | steps: 22 | - uses: actions/checkout@v2 23 | 24 | - name: Setup Python 25 | id: setup-python 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ env.PYTHON_VERSION }} 29 | cache: pip 30 | 31 | - name: Install libraries for ubuntu 32 | run: | 33 | sudo apt-get update 34 | sudo apt-get install libsndfile1 35 | 36 | - name: Install Python dependencies 37 | run: | 38 | pip install -r requirements.txt 39 | 40 | - name: Make documents 41 | run: | 42 | python make_docs.py 43 | 44 | - name: Deploy to GitHub Pages 45 | uses: peaceiris/actions-gh-pages@v3 46 | with: 47 | github_token: ${{ secrets.GITHUB_TOKEN }} 48 | publish_dir: ${{ env.PUBLISH_DIR }} 49 | publish_branch: ${{ env.PUBLISH_BRANCH }} 50 | destination_dir: ${{ env.DESTINATION_DIR }} 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # VOICEVOX specifics 2 | ## Artifacts of nuitka 3 | *.dist 4 | *.build 5 | /build 6 | /cache 7 | ## Artifact of generating licenses 8 | /licenses.json 9 | licenses_venv/ 10 | 11 | # Copied from `https://github.com/github/gitignore/blob/main/Python.gitignore` @2022-01-10 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | cover/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | Pipfile.lock 101 | 102 | # poetry 103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 104 | # This is especially recommended for binary packages to ensure reproducibility, and is more 105 | # commonly ignored for libraries. 106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 107 | # poetry.lock 108 | 109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 110 | __pypackages__/ 111 | 112 | # Celery stuff 113 | celerybeat-schedule 114 | celerybeat.pid 115 | 116 | # SageMath parsed files 117 | *.sage.py 118 | 119 | # Environments 120 | .env 121 | .venv 122 | env/ 123 | venv/ 124 | ENV/ 125 | env.bak/ 126 | venv.bak/ 127 | 128 | # Spyder project settings 129 | .spyderproject 130 | .spyproject 131 | 132 | # Rope project settings 133 | .ropeproject 134 | 135 | # mkdocs documentation 136 | /site 137 | 138 | # mypy 139 | .mypy_cache/ 140 | .dmypy.json 141 | dmypy.json 142 | 143 | # Pyre type checker 144 | .pyre/ 145 | 146 | # pytype static type analyzer 147 | .pytype/ 148 | 149 | # Cython debug symbols 150 | cython_debug/ 151 | 152 | # PyCharm 153 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 154 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 155 | # and can be added to the global gitignore or merged into this file. For a more nuclear 156 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 157 | .idea/ 158 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: local 5 | hooks: 6 | - id: pysen-lint 7 | name: pysen-lint 8 | entry: pysen run lint 9 | language: python 10 | types: [file, python] 11 | stages: [push] 12 | pass_filenames: false 13 | - id: poetry-export 14 | name: poetry-export 15 | entry: poetry export --without-hashes -o requirements.txt 16 | language: python 17 | stages: [push] 18 | pass_filenames: false 19 | - id: poetry-export-dev 20 | name: poetry-export-dev 21 | entry: poetry export --without-hashes --with dev -o requirements-dev.txt 22 | language: python 23 | stages: [push] 24 | pass_filenames: false 25 | - id: poetry-export-test 26 | name: poetry-export-test 27 | entry: poetry export --without-hashes --with test -o requirements-test.txt 28 | language: python 29 | stages: [push] 30 | pass_filenames: false 31 | - id: poetry-export-license 32 | name: poetry-export-license 33 | entry: poetry export --without-hashes --with license -o requirements-license.txt 34 | language: python 35 | stages: [push] 36 | pass_filenames: false 37 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CMD= 2 | NOCACHE= 3 | 4 | ARGS:= 5 | ifeq ($(NOCACHE),1) 6 | ARGS:=$(ARGS) --no-cache 7 | endif 8 | 9 | # Ubuntu 20.04 10 | .PHONY: build-linux-docker-ubuntu20.04 11 | build-linux-docker-ubuntu20.04: 12 | docker buildx build . \ 13 | -t sharevox/sharevox_engine:cpu-ubuntu20.04-latest \ 14 | --target runtime-env \ 15 | --progress plain \ 16 | --build-arg BASE_IMAGE=ubuntu:20.04 \ 17 | --build-arg BASE_RUNTIME_IMAGE=ubuntu:20.04 \ 18 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.12.1/onnxruntime-linux-x64-1.12.1.tgz \ 19 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_cpu_x64.so $(ARGS) 20 | 21 | .PHONY: run-linux-docker-ubuntu20.04 22 | run-linux-docker-ubuntu20.04: 23 | docker run --rm -it \ 24 | -p '127.0.0.1:50021:50021' $(ARGS) \ 25 | sharevox/sharevox_engine:cpu-ubuntu20.04-latest $(CMD) 26 | 27 | .PHONY: build-linux-docker-nvidia-ubuntu20.04 28 | build-linux-docker-nvidia-ubuntu20.04: 29 | docker buildx build . \ 30 | -t sharevox/sharevox_engine:nvidia-ubuntu20.04-latest \ 31 | --target runtime-nvidia-env \ 32 | --progress plain \ 33 | --build-arg BASE_IMAGE=ubuntu:20.04 \ 34 | --build-arg BASE_RUNTIME_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 \ 35 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.12.1/onnxruntime-linux-x64-gpu-1.12.1.tgz \ 36 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_gpu_x64_nvidia.so $(ARGS) 37 | 38 | .PHONY: run-linux-docker-nvidia-ubuntu20.04 39 | run-linux-docker-nvidia-ubuntu20.04: 40 | docker run --rm -it \ 41 | --gpus all \ 42 | -p '127.0.0.1:50021:50021' $(ARGS) \ 43 | sharevox/sharevox_engine:nvidia-ubuntu20.04-latest $(CMD) 44 | 45 | 46 | # Ubuntu 18.04 47 | .PHONY: build-linux-docker-ubuntu18.04 48 | build-linux-docker-ubuntu18.04: 49 | docker buildx build . \ 50 | -t sharevox/sharevox_engine:cpu-ubuntu18.04-latest \ 51 | --target runtime-env \ 52 | --progress plain \ 53 | --build-arg BASE_IMAGE=ubuntu:18.04 \ 54 | --build-arg BASE_RUNTIME_IMAGE=ubuntu:18.04 \ 55 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.12.1/onnxruntime-linux-x64-1.12.1.tgz \ 56 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_cpu_x64.so $(ARGS) 57 | 58 | .PHONY: run-linux-docker-ubuntu18.04 59 | run-linux-docker-ubuntu18.04: 60 | docker run --rm -it \ 61 | -p '127.0.0.1:50021:50021' $(ARGS) \ 62 | sharevox/sharevox_engine:cpu-ubuntu18.04-latest $(CMD) 63 | 64 | .PHONY: build-linux-docker-nvidia-ubuntu18.04 65 | build-linux-docker-nvidia-ubuntu18.04: 66 | docker buildx build . \ 67 | -t sharevox/sharevox_engine:nvidia-ubuntu18.04-latest \ 68 | --target runtime-nvidia-env \ 69 | --progress plain \ 70 | --build-arg BASE_IMAGE=ubuntu:18.04 \ 71 | --build-arg BASE_RUNTIME_IMAGE=nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu18.04 \ 72 | --build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.12.1/onnxruntime-linux-x64-gpu-1.12.1.tgz \ 73 | --build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_gpu_x64_nvidia.so $(ARGS) 74 | 75 | .PHONY: run-linux-docker-nvidia-ubuntu18.04 76 | run-linux-docker-nvidia-ubuntu18.04: 77 | docker run --rm -it \ 78 | --gpus all \ 79 | -p '127.0.0.1:50021:50021' $(ARGS) \ 80 | sharevox/sharevox_engine:nvidia-ubuntu18.04-latest $(CMD) 81 | 82 | 83 | # VOICEVOX Core env for test 84 | .PHONY: build-linux-docker-download-core-env-ubuntu18.04 85 | build-linux-docker-download-core-env-ubuntu18.04: 86 | docker buildx build . \ 87 | -t sharevox/sharevox_engine:download-core-env-ubuntu18.04 \ 88 | --target download-core-env \ 89 | --progress plain \ 90 | --build-arg BASE_IMAGE=ubuntu:18.04 $(ARGS) 91 | 92 | .PHONY: run-linux-docker-download-core-env-ubuntu18.04 93 | run-linux-docker-download-core-env-ubuntu18.04: 94 | docker run --rm -it $(ARGS) \ 95 | sharevox/sharevox_engine:download-core-env-ubuntu18.04 $(CMD) 96 | 97 | 98 | # ONNX Runtime env for test 99 | .PHONY: build-linux-docker-download-onnxruntime-env-ubuntu18.04 100 | build-linux-docker-download-onnxruntime-env-ubuntu18.04: 101 | docker buildx build . \ 102 | -t sharevox/sharevox_engine:download-onnxruntime-env-ubuntu18.04 \ 103 | --target download-onnxruntime-env \ 104 | --progress plain \ 105 | --build-arg BASE_IMAGE=ubuntu:18.04 $(ARGS) 106 | 107 | .PHONY: run-linux-docker-download-onnxruntime-env-ubuntu18.04 108 | run-linux-docker-download-onnxruntime-env-ubuntu18.04: 109 | docker run --rm -it $(ARGS) \ 110 | sharevox/sharevox_engine:download-onnxruntime-env-ubuntu18.04 $(CMD) 111 | 112 | 113 | # Python env for test 114 | .PHONY: build-linux-docker-compile-python-env 115 | build-linux-docker-compile-python-env: 116 | docker buildx build . \ 117 | -t sharevox/sharevox_engine:compile-python-env \ 118 | --target compile-python-env \ 119 | --progress plain \ 120 | --build-arg BASE_IMAGE=ubuntu:20.04 $(ARGS) 121 | 122 | .PHONY: run-linux-docker-compile-python-env 123 | run-linux-docker-compile-python-env: 124 | docker run --rm -it $(ARGS) \ 125 | sharevox/sharevox_engine:compile-python-env $(CMD) 126 | -------------------------------------------------------------------------------- /_typos.toml: -------------------------------------------------------------------------------- 1 | # Files for typos 2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started 3 | 4 | [default.extend-identifiers] 5 | 6 | [default.extend-words] 7 | ba="ba" # 7zコマンドの-baオプション 8 | datas="datas" # PyInstallerの引数 9 | 10 | [files] 11 | extend-exclude = ["package-lock.json", "src/store/project.ts", "*.svg"] 12 | -------------------------------------------------------------------------------- /build_util/check_release_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | ビルド結果をテストする 3 | """ 4 | import argparse 5 | import json 6 | import time 7 | from io import BytesIO 8 | from pathlib import Path 9 | from subprocess import Popen 10 | from urllib.parse import urlencode 11 | from urllib.request import Request, urlopen 12 | 13 | import soundfile 14 | 15 | base_url = "http://127.0.0.1:50025/" 16 | 17 | 18 | def test_release_build(dist_dir: Path, skip_run_process: bool) -> None: 19 | run_file = dist_dir / "run" 20 | if not run_file.exists(): 21 | run_file = dist_dir / "run.exe" 22 | 23 | # 起動 24 | process = None 25 | if not skip_run_process: 26 | process = Popen([run_file.absolute()], cwd=dist_dir) 27 | time.sleep(60) # 待機 28 | 29 | # バージョン取得テスト 30 | req = Request(base_url + "version") 31 | with urlopen(req) as res: 32 | assert len(res.read()) > 0 33 | 34 | # 話者一覧取得テスト 35 | req = Request(base_url + "speakers") 36 | with urlopen(req) as res: 37 | res_text = res.read().decode("utf-8") 38 | assert len(res_text) > 0 39 | res_json = json.loads(res_text) 40 | speaker = str(res_json[0]["styles"][0]["id"]) 41 | 42 | # テキスト -> クエリ 43 | text = "こんにちは、音声合成の世界へようこそ" 44 | req = Request( 45 | base_url + "audio_query?" + urlencode({"speaker": speaker, "text": text}), 46 | method="POST", 47 | ) 48 | with urlopen(req) as res: 49 | query = json.loads(res.read().decode("utf-8")) 50 | 51 | # クエリ -> 音声 52 | req = Request(base_url + f"synthesis?speaker={speaker}", method="POST") 53 | req.add_header("Content-Type", "application/json") 54 | req.data = json.dumps(query).encode("utf-8") 55 | with urlopen(req) as res: 56 | wave = res.read() 57 | soundfile.read(BytesIO(wave)) 58 | 59 | # エンジンマニフェスト 60 | req = Request(base_url + "engine_manifest", method="GET") 61 | with urlopen(req) as res: 62 | manifest = json.loads(res.read().decode("utf-8")) 63 | assert "uuid" in manifest 64 | 65 | if not skip_run_process: 66 | # プロセスが稼働中であることを確認 67 | assert process.poll() is None 68 | 69 | # 停止 70 | process.terminate() 71 | 72 | 73 | if __name__ == "__main__": 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument("--dist_dir", type=Path, default=Path("dist/")) 76 | parser.add_argument("--skip_run_process", action="store_true") 77 | args = parser.parse_args() 78 | test_release_build(dist_dir=args.dist_dir, skip_run_process=args.skip_run_process) 79 | -------------------------------------------------------------------------------- /build_util/codesign.bash: -------------------------------------------------------------------------------- 1 | # !!! コードサイニング証明書を取り扱うので取り扱い注意 !!! 2 | 3 | set -eu 4 | 5 | if [ ! -v CERT_BASE64 ]; then 6 | echo "CERT_BASE64が未定義です" 7 | exit 1 8 | fi 9 | if [ ! -v CERT_PASSWORD ]; then 10 | echo "CERT_PASSWORDが未定義です" 11 | exit 1 12 | fi 13 | 14 | if [ $# -ne 1 ]; then 15 | echo "引数の数が一致しません" 16 | exit 1 17 | fi 18 | target_file_glob="$1" 19 | 20 | # 証明書 21 | CERT_PATH=cert.pfx 22 | echo -n "$CERT_BASE64" | base64 -d - > $CERT_PATH 23 | 24 | # 指定ファイルに署名する 25 | function codesign() { 26 | TARGET="$1" 27 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 28 | powershell "& '$SIGNTOOL' sign /fd SHA256 /td SHA256 /tr http://timestamp.digicert.com /f $CERT_PATH /p $CERT_PASSWORD '$TARGET'" 29 | } 30 | 31 | # 指定ファイルが署名されているか 32 | function is_signed() { 33 | TARGET="$1" 34 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 35 | powershell "& '$SIGNTOOL' verify /pa '$TARGET'" || return 1 36 | } 37 | 38 | # 署名されていなければ署名 39 | ls $target_file_glob | while read target_file; do 40 | if is_signed "$target_file"; then 41 | echo "署名済み: $target_file" 42 | else 43 | echo "署名: $target_file" 44 | codesign "$target_file" 45 | fi 46 | done 47 | 48 | # 証明書を消去 49 | rm $CERT_PATH 50 | -------------------------------------------------------------------------------- /build_util/codesign_sv.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # !!! コードサイニング証明書を取り扱うので取り扱い注意 !!! 3 | 4 | set -eu 5 | 6 | if [ $# -ne 1 ]; then 7 | echo "引数の数が一致しません" 8 | exit 1 9 | fi 10 | target_file_glob="$1" 11 | 12 | # 指定ファイルに署名する 13 | function codesign() { 14 | TARGET="$1" 15 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 16 | powershell "& '$SIGNTOOL' sign /n 'Open Source Developer, Yuto Ashida' /fd sha1 /t http://time.certum.pl/ '$TARGET'" 17 | powershell "& '$SIGNTOOL' sign /n 'Open Source Developer, Yuto Ashida' /fd sha256 /td sha256 /tr http://time.certum.pl/ /as '$TARGET'" 18 | } 19 | 20 | # 指定ファイルが署名されているか 21 | function is_signed() { 22 | TARGET="$1" 23 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 24 | powershell "& '$SIGNTOOL' verify /pa '$TARGET'" || return 1 25 | } 26 | 27 | # 署名されていなければ署名 28 | # shellcheck disable=SC2012,SC2086 29 | ls $target_file_glob | while read -r target_file; do 30 | if is_signed "$target_file"; then 31 | echo "署名済み: $target_file" 32 | else 33 | echo "署名: $target_file" 34 | codesign "$target_file" 35 | fi 36 | done 37 | -------------------------------------------------------------------------------- /build_util/create_venv_and_generate_licenses.bash: -------------------------------------------------------------------------------- 1 | # 仮想環境を作ってrequirements.txtをインストールし、ライセンス一覧を生成する 2 | 3 | set -eux 4 | 5 | if [ ! -v OUTPUT_LICENSE_JSON_PATH ]; then 6 | echo "OUTPUT_LICENSE_JSON_PATHが未定義です" 7 | exit 1 8 | fi 9 | 10 | VENV_PATH="licenses_venv" 11 | 12 | python -m venv $VENV_PATH 13 | if [ -d "$VENV_PATH/Scripts" ]; then 14 | source $VENV_PATH/Scripts/activate 15 | else 16 | source $VENV_PATH/bin/activate 17 | fi 18 | 19 | pip install -r requirements-license.txt 20 | python generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH 21 | 22 | deactivate 23 | 24 | rm -rf $VENV_PATH 25 | -------------------------------------------------------------------------------- /build_util/merge_update_infos.py: -------------------------------------------------------------------------------- 1 | """ 2 | 更新履歴をマージする。 3 | """ 4 | 5 | import argparse 6 | import json 7 | from collections import OrderedDict 8 | from pathlib import Path 9 | from typing import Dict, List, Union 10 | 11 | 12 | def merge_json_string(src: str, dst: str) -> str: 13 | """ 14 | バージョンが同じ場合は要素を結合する 15 | >>> src = '[{"version": "0.0.1", "a": ["a1"], "b": ["b1", "b2"]}]' 16 | >>> dst = '[{"version": "0.0.1", "a": ["a2"], "b": ["b1", "b3"]}]' 17 | >>> merge_json_string(src, dst) 18 | '[{"version": "0.0.1", "a": ["a1", "a2"], "b": ["b1", "b2", "b3"]}]' 19 | 20 | バージョンが無かった場合は無視される 21 | >>> src = '[{"version": "1"}]' 22 | >>> dst = '[{"version": "1"}, {"version": "2"}]' 23 | >>> merge_json_string(src, dst) 24 | '[{"version": "1"}]' 25 | """ 26 | src_json: List[Dict[str, Union[str, List[str]]]] = json.loads(src) 27 | dst_json: List[Dict[str, Union[str, List[str]]]] = json.loads(dst) 28 | 29 | for src_item in src_json: 30 | for dst_item in dst_json: 31 | if src_item["version"] == dst_item["version"]: 32 | for key in src_item: 33 | if key == "version": 34 | continue 35 | 36 | # 異なるものがあった場合だけ後ろに付け足す 37 | src_item[key] = list( 38 | OrderedDict.fromkeys(src_item[key] + dst_item[key]) 39 | ) 40 | 41 | return json.dumps(src_json) 42 | 43 | 44 | def merge_update_infos(src_path: Path, dst_path: Path, output_path: Path) -> None: 45 | src = src_path.read_text(encoding="utf-8") 46 | dst = dst_path.read_text(encoding="utf-8") 47 | merged = merge_json_string(src, dst) 48 | output_path.write_text(merged) 49 | 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument("src_path", type=Path) 54 | parser.add_argument("dst_path", type=Path) 55 | parser.add_argument("output_path", type=Path) 56 | args = parser.parse_args() 57 | merge_update_infos(args.src_path, args.dst_path, args.output_path) 58 | -------------------------------------------------------------------------------- /build_util/modify_pyinstaller.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # PyInstallerをカスタマイズしてから再インストールする 4 | # 良いGPUが自動的に選択されるようにしている 5 | # https://github.com/VOICEVOX/voicevox_engine/issues/502 6 | 7 | set -eux 8 | 9 | pyinstaller_version=$(pyinstaller -v) 10 | tempdir=$(mktemp -dt modify_pyinstaller.XXXXXXXX) 11 | trap 'rm -rf "$tempdir"' EXIT 12 | git clone https://github.com/pyinstaller/pyinstaller.git "$tempdir" -b "v$pyinstaller_version" --depth 1 13 | cat > "$tempdir/bootloader/src/symbols.c" << EOF 14 | #ifdef _WIN32 15 | #include 16 | 17 | // https://docs.nvidia.com/gameworks/content/technologies/desktop/optimus.htm 18 | __declspec(dllexport) DWORD NvOptimusEnablement = 0x00000001; 19 | 20 | // https://gpuopen.com/learn/amdpowerxpressrequesthighperformance/ 21 | __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x00000001; 22 | #endif 23 | EOF 24 | (cd "$tempdir/bootloader" && python ./waf all) 25 | pip install -U "$tempdir" 26 | -------------------------------------------------------------------------------- /build_util/process_voicevox_resource.bash: -------------------------------------------------------------------------------- 1 | set -eux 2 | 3 | if [ ! -v DOWNLOAD_RESOURCE_PATH ]; then 4 | echo "DOWNLOAD_RESOURCE_PATHが未定義です" 5 | exit 1 6 | fi 7 | 8 | rm -r speaker_info 9 | cp -r $DOWNLOAD_RESOURCE_PATH/character_info speaker_info 10 | 11 | # キャラクター情報の前処理をする 12 | python $DOWNLOAD_RESOURCE_PATH/scripts/clean_character_info.py \ 13 | --character_info_dir speaker_info/ 14 | 15 | rm -r library_info 16 | cp -r $DOWNLOAD_RESOURCE_PATH/library_info library_info 17 | 18 | # ライブラリ情報の前処理をする 19 | # キャラクター情報の前処理スクリプトを流用 20 | python $DOWNLOAD_RESOURCE_PATH/scripts/clean_character_info.py \ 21 | --character_info_dir library_info/ 22 | 23 | # マニフェスト 24 | jq -s '.[0] * .[1]' engine_manifest.json $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest.json \ 25 | > engine_manifest.json.tmp 26 | mv engine_manifest.json.tmp engine_manifest.json 27 | 28 | python build_util/merge_update_infos.py \ 29 | engine_manifest_assets/update_infos.json \ 30 | $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/update_infos.json \ 31 | engine_manifest_assets/update_infos.json 32 | 33 | for f in $(ls $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/* | grep -v update_infos.json); do 34 | cp $f ./engine_manifest_assets/ 35 | done 36 | -------------------------------------------------------------------------------- /default.csv: -------------------------------------------------------------------------------- 1 | 朱司,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アカシ,アカシ,1/3,C1 2 | 青山,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,アオヤマ,アオヤマ,2/4,C1 3 | 雨晴,1350,1350,7000,名詞,固有名詞,人名,姓,*,*,*,アメハレ,アメハレ,2/4,C1 4 | アル,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,アル,アル,1/2,C1 5 | うさぎ,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ウサギ,ウサギ,0/3,C1 6 | 櫻歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オウカ,オーカ,1/3,C1 7 | 音街,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オトマチ,オトマチ,2/4,C1 8 | 春日部,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,カスカベ,カスカベ,0/4,C1 9 | 麒ヶ島,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キガシマ,キガシマ,2/4,C1 10 | 紲星,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キズナ,キズナ,1/3,C1 11 | 九州,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,キュウシュウ,キュウシュウ,1/4,C1 12 | キョウコ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,キョオコ,キョオコ,1/3,C1 13 | 玄野,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,クロノ,クロノ,1/3,C1 14 | 剣崎,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,ケンザキ,ケンザキ,1/4,C1 15 | 後鬼,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ゴキ,ゴキ,1/2,C1 16 | 虎太郎,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,コタロウ,コタロー,4/4,C1 17 | 琴葉,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,コトノハ,コトノハ,0/4,C1 18 | 小夜,1351,1351,2200,名詞,固有名詞,人名,名,*,*,*,サヨ,サヨ,1/2,C1 19 | 四国,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,シコク,シコク,1/3,C1 20 | 白上,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,シラカミ,シラカミ,4/4,C1 21 | ずんだもん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ズンダモン,ズンダモン,1/5,C1 22 | そら,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ソラ,ソラ,1/2,C1 23 | 宗麟,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ソウリン,ソウリン,1/4,C1 24 | タイプT,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タイプティー,タイプティー,4/5,C1 25 | 中国,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,チュウゴク,チュウゴク,1/4,C1 26 | 波音,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナミネ,ナミネ,0/3,C1 27 | 武宏,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タケヒロ,タケヒロ,2/4,C1 28 | ちび式じい,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,チビシキジー,チビシキジー,5/6,C1 29 | 月読,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ツクヨミ,ツクヨミ,0/4,C1 30 | つむぎ,1351,1351,7450,名詞,固有名詞,人名,名,*,*,*,ツムギ,ツムギ,0/3,C1 31 | ナースロボ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナースロボ,ナースロボ,4/5,C1 32 | ナナ,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,ナナ,ナナ,1/2,C1 33 | No.7,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ナンバーセブン,ナンバーセブン,5/7,C1 34 | 猫使,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,ネコツカ,ネコツカ,2/4,C1 35 | はう,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,ハウ,ハウ,1/2,C1 36 | 春歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルカ,ハルカ,1/3,C1 37 | 桜乃,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルノ,ハルノ,1/3,C1 38 | ビィ,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ビー,ビー,1/2,C1 39 | ひまり,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ヒマリ,ヒマリ,0/3,C1 40 | 紅桜,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ベニザクラ,ベニザクラ,3/5,C1 41 | 聖騎士,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,ホーリーナイト,ホーリーナイト,5/7,C1 42 | WhiteCUL,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ホワイトカル,ホワイトカル,5/6,C1 43 | ミコ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,ミコ,ミコ,1/2,C1 44 | 水奈瀬,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ミナセ,ミナセ,2/3,C1 45 | 冥鳴,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,メイメイ,メイメイ,1/4,C1 46 | 鳴花,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,メイカ,メイカ,1/3,C1 47 | めたん,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,メタン,メタン,1/3,C1 48 | 雌雄,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,メスオ,メスオ,0/3,C1 49 | もち子さん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,モチコサン,モチコサン,1/5,C1 50 | モチノ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,モチノ,モチノ,0/3,C1 51 | 結月,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ユヅキ,ユヅキ,1/3,C1 52 | 弓鶴,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ユヅル,ユヅル,0/3,C1 53 | リツ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,リツ,リツ,1/2,C1 54 | 六花,1351,1351,4900,名詞,固有名詞,人名,名,*,*,*,リッカ,リッカ,1/3,C1 55 | 龍星,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,リュウセイ,リュウセイ,1/4,C1 56 | 雀松,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ワカマツ,ワカマツ,2/4,C1 57 | COEIROINK,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1 58 | coeiroink,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1 59 | CoeFont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1 60 | coefont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1 61 | TALQu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1 62 | talqu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1 63 | VOICEVOX,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1 64 | voicevox,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1 65 | SHAREVOX,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,シェアボックス,シェアボックス,3/6,C1 66 | sharevox,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,シェアボックス,シェアボックス,3/6,C1 67 | 小春音,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,コハルネ,コハルネ,0/4,C1 68 | アミ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アミ,アミ,1/2,C1 69 | あみたろ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アミタロ,アミタロ,3/4,C1 70 | 白痴ー,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ハクチイ,ハクチー,1/4,C1 71 | つくよみちゃん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ツクヨミチャン,ツクヨミチャン,0/6,C1 72 | らごぱすたん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ラゴパスタン,ラゴパスタン,1/6,C1 73 | らごぱすブラック,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ラゴパスブラック,ラゴパスブラック,6/8,C1 74 | らごぱすホワイト,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ラゴパスホワイト,ラゴパスホワイト,7/8,C1 75 | ついなちゃん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ツイナチャン,ツイナチャン,1/5,C1 76 | 風花ゆき,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,カザハナユキ,カザハナユキ,5/6,C1 77 | 風花,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,カザハナ,カザハナ,0/4,C1 78 | 安倍広葉,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アベヒロハ,アベヒロハ,3/5,C1 79 | 安倍,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アベ,アベ,0/2,C1 80 | 鈴乃,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,スズノ,スズノ,0/3,C1 -------------------------------------------------------------------------------- /default_setting.yml: -------------------------------------------------------------------------------- 1 | allow_origin: null 2 | cors_policy_mode: localapps 3 | -------------------------------------------------------------------------------- /docs/VOICEVOX音声合成エンジンとの連携.md: -------------------------------------------------------------------------------- 1 | メモ書き程度ですが、どういう方針で開発を進めているかを紹介します。 2 | 3 | - バージョンが上がっても、`/audio_query`で返ってくる値をそのまま`/synthesis`に POST すれば音声合成できるようにする予定です 4 | - `AudioQuery`のパラメータは増えますが、なるべくデフォルト値で以前と変わらない音声が生成されるようにします 5 | - バージョン 0.7 から音声スタイルが実装されました。スタイルの情報は`/speakers`から取得できます 6 | - スタイルの情報にある`style_id`を`speaker`に指定することで、今まで通り音声合成ができます 7 | - style_id の指定先が speaker なのは互換性のためです 8 | -------------------------------------------------------------------------------- /docs/api/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/docs/api/.gitkeep -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/COPYING: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The Japanese TTS System "Open JTalk" */ 3 | /* developed by HTS Working Group */ 4 | /* http://open-jtalk.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* All rights reserved. */ 11 | /* */ 12 | /* Redistribution and use in source and binary forms, with or */ 13 | /* without modification, are permitted provided that the following */ 14 | /* conditions are met: */ 15 | /* */ 16 | /* - Redistributions of source code must retain the above copyright */ 17 | /* notice, this list of conditions and the following disclaimer. */ 18 | /* - Redistributions in binary form must reproduce the above */ 19 | /* copyright notice, this list of conditions and the following */ 20 | /* disclaimer in the documentation and/or other materials provided */ 21 | /* with the distribution. */ 22 | /* - Neither the name of the HTS working group nor the names of its */ 23 | /* contributors may be used to endorse or promote products derived */ 24 | /* from this software without specific prior written permission. */ 25 | /* */ 26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 38 | /* POSSIBILITY OF SUCH DAMAGE. */ 39 | /* ----------------------------------------------------------------- */ 40 | -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/mecab-naist-jdic/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, Nara Institute of Science and Technology, Japan. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | Neither the name of the Nara Institute of Science and Technology 15 | (NAIST) nor the names of its contributors may be used to endorse or 16 | promote products derived from this software without specific prior 17 | written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | /* ----------------------------------------------------------------- */ 32 | /* The Japanese TTS System "Open JTalk" */ 33 | /* developed by HTS Working Group */ 34 | /* http://open-jtalk.sourceforge.net/ */ 35 | /* ----------------------------------------------------------------- */ 36 | /* */ 37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 38 | /* Department of Computer Science */ 39 | /* */ 40 | /* All rights reserved. */ 41 | /* */ 42 | /* Redistribution and use in source and binary forms, with or */ 43 | /* without modification, are permitted provided that the following */ 44 | /* conditions are met: */ 45 | /* */ 46 | /* - Redistributions of source code must retain the above copyright */ 47 | /* notice, this list of conditions and the following disclaimer. */ 48 | /* - Redistributions in binary form must reproduce the above */ 49 | /* copyright notice, this list of conditions and the following */ 50 | /* disclaimer in the documentation and/or other materials provided */ 51 | /* with the distribution. */ 52 | /* - Neither the name of the HTS working group nor the names of its */ 53 | /* contributors may be used to endorse or promote products derived */ 54 | /* from this software without specific prior written permission. */ 55 | /* */ 56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 68 | /* POSSIBILITY OF SUCH DAMAGE. */ 69 | /* ----------------------------------------------------------------- */ 70 | -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/mecab/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2001-2008, Taku Kudo 2 | Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above 9 | copyright notice, this list of conditions and the 10 | following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the 14 | following disclaimer in the documentation and/or other 15 | materials provided with the distribution. 16 | 17 | * Neither the name of the Nippon Telegraph and Telegraph Corporation 18 | nor the names of its contributors may be used to endorse or 19 | promote products derived from this software without specific 20 | prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED 23 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 28 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | /* ----------------------------------------------------------------- */ 32 | /* The Japanese TTS System "Open JTalk" */ 33 | /* developed by HTS Working Group */ 34 | /* http://open-jtalk.sourceforge.net/ */ 35 | /* ----------------------------------------------------------------- */ 36 | /* */ 37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 38 | /* Department of Computer Science */ 39 | /* */ 40 | /* All rights reserved. */ 41 | /* */ 42 | /* Redistribution and use in source and binary forms, with or */ 43 | /* without modification, are permitted provided that the following */ 44 | /* conditions are met: */ 45 | /* */ 46 | /* - Redistributions of source code must retain the above copyright */ 47 | /* notice, this list of conditions and the following disclaimer. */ 48 | /* - Redistributions in binary form must reproduce the above */ 49 | /* copyright notice, this list of conditions and the following */ 50 | /* disclaimer in the documentation and/or other materials provided */ 51 | /* with the distribution. */ 52 | /* - Neither the name of the HTS working group nor the names of its */ 53 | /* contributors may be used to endorse or promote products derived */ 54 | /* from this software without specific prior written permission. */ 55 | /* */ 56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 68 | /* POSSIBILITY OF SUCH DAMAGE. */ 69 | /* ----------------------------------------------------------------- */ 70 | -------------------------------------------------------------------------------- /docs/licenses/world/LICENSE.txt: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* WORLD: High-quality speech analysis, */ 3 | /* manipulation and synthesis system */ 4 | /* developed by M. Morise */ 5 | /* http://www.kisc.meiji.ac.jp/~mmorise/world/english/ */ 6 | /* ----------------------------------------------------------------- */ 7 | /* */ 8 | /* Copyright (c) 2010 M. Morise */ 9 | /* */ 10 | /* All rights reserved. */ 11 | /* */ 12 | /* Redistribution and use in source and binary forms, with or */ 13 | /* without modification, are permitted provided that the following */ 14 | /* conditions are met: */ 15 | /* */ 16 | /* - Redistributions of source code must retain the above copyright */ 17 | /* notice, this list of conditions and the following disclaimer. */ 18 | /* - Redistributions in binary form must reproduce the above */ 19 | /* copyright notice, this list of conditions and the following */ 20 | /* disclaimer in the documentation and/or other materials provided */ 21 | /* with the distribution. */ 22 | /* - Neither the name of the M. Morise nor the names of its */ 23 | /* contributors may be used to endorse or promote products derived */ 24 | /* from this software without specific prior written permission. */ 25 | /* */ 26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 38 | /* POSSIBILITY OF SUCH DAMAGE. */ 39 | /* ----------------------------------------------------------------- */ 40 | -------------------------------------------------------------------------------- /engine_manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": "0.13.1", 3 | "name": "DUMMY SHAREVOX Engine", 4 | "brand_name": "SHAREVOX", 5 | "uuid": "8446e858-4181-44e2-ac58-4192c8b48048", 6 | "version": "999.999.999", 7 | "url": "https://github.com/SHAREVOX/sharevox_engine", 8 | "command": "run", 9 | "port": 50025, 10 | "icon": "engine_manifest_assets/icon.png", 11 | "default_sampling_rate": 48000, 12 | "terms_of_service": "engine_manifest_assets/terms_of_service.md", 13 | "update_infos": "engine_manifest_assets/update_infos.json", 14 | "dependency_licenses": "engine_manifest_assets/dependency_licenses.json", 15 | "supported_vvlib_manifest_version": "0.15.0", 16 | "supported_features": { 17 | "adjust_mora_pitch": { 18 | "type": "bool", 19 | "value": true, 20 | "name": "モーラごとの音高の調整" 21 | }, 22 | "adjust_phoneme_length": { 23 | "type": "bool", 24 | "value": true, 25 | "name": "音素ごとの長さの調整" 26 | }, 27 | "adjust_speed_scale": { 28 | "type": "bool", 29 | "value": true, 30 | "name": "全体の話速の調整" 31 | }, 32 | "adjust_pitch_scale": { 33 | "type": "bool", 34 | "value": true, 35 | "name": "全体の音高の調整" 36 | }, 37 | "adjust_intonation_scale": { 38 | "type": "bool", 39 | "value": true, 40 | "name": "全体の抑揚の調整" 41 | }, 42 | "adjust_volume_scale": { 43 | "type": "bool", 44 | "value": true, 45 | "name": "全体の音量の調整" 46 | }, 47 | "interrogative_upspeak": { 48 | "type": "bool", 49 | "value": true, 50 | "name": "疑問文の自動調整" 51 | }, 52 | "synthesis_morphing" : { 53 | "type": "bool", 54 | "value": true, 55 | "name": "2人の話者でモーフィングした音声を合成" 56 | }, 57 | "manage_library": { 58 | "type": "bool", 59 | "value": true, 60 | "name": "音声ライブラリのインストール・アンインストール" 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /engine_manifest_assets/dependency_licenses.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "dummy library", 4 | "version": "0.0.1", 5 | "license": "dummy license", 6 | "text": "dummy license text" 7 | } 8 | ] -------------------------------------------------------------------------------- /engine_manifest_assets/downloadable_libraries.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Dummy Library", 4 | "uuid": "2bb8bccf-1c3f-4bc9-959a-f388e37af3ad", 5 | "version": "0.0.1", 6 | "download_url": "https://github.com/VOICEVOX/voicevox_engine/archive/d7cf31c058bc83e1abf8e14d4231a06409c4cc2d.zip", 7 | "bytes": 1000, 8 | "speakers": [ 9 | { 10 | "speaker": { 11 | "name": "dummy1", 12 | "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", 13 | "styles": [ 14 | { 15 | "name": "style1", 16 | "id": 0 17 | }, 18 | { 19 | "name": "style2", 20 | "id": 2 21 | } 22 | ], 23 | "version": "0.0.1" 24 | }, 25 | "speaker_info": { 26 | "policy": "", 27 | "portrait": "", 28 | "style_infos": [ 29 | { 30 | "id": 0, 31 | "icon": "", 32 | "voice_samples": ["", "", ""] 33 | }, 34 | { 35 | "id": 2, 36 | "icon": "", 37 | "voice_samples": ["", "", ""] 38 | } 39 | ] 40 | } 41 | } 42 | ] 43 | } 44 | ] 45 | -------------------------------------------------------------------------------- /engine_manifest_assets/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/engine_manifest_assets/icon.png -------------------------------------------------------------------------------- /engine_manifest_assets/terms_of_service.md: -------------------------------------------------------------------------------- 1 | dummy teams of service -------------------------------------------------------------------------------- /engine_manifest_assets/update_infos.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "version": "0.2.0", 4 | "descriptions": [ 5 | "新しい音声合成手法への移行", 6 | "VOICEVOX 0.14の追従", 7 | "Rust版コアへ移行", 8 | "エンジンが起動しないことがある問題の修正", 9 | "コードサイニング証明書の付与" 10 | ], 11 | "contributors": [ 12 | "PickledChair", 13 | "qryxip" 14 | ] 15 | }, 16 | { 17 | "version": "0.1.7", 18 | "descriptions": [ 19 | "異常終了する問題の修正", 20 | "デバッグログの削除" 21 | ], 22 | "contributors": [] 23 | }, 24 | { 25 | "version": "0.1.6", 26 | "descriptions": [ 27 | "出力音声の長さや各母音・子音の長さが想定と異なっていた問題の修正", 28 | "エンジン関連の一時フォルダ・ファイルが残る問題の修正", 29 | "Docker版エンジンの提供" 30 | ], 31 | "contributors": [] 32 | }, 33 | { 34 | "version": "0.1.5", 35 | "descriptions": [ 36 | "音声ライブラリの上書きインストールができるように変更" 37 | ], 38 | "contributors": [ 39 | "task4233" 40 | ] 41 | }, 42 | { 43 | "version": "0.1.4", 44 | "descriptions": [ 45 | "複数スタイルある音声ライブラリをインストールした際の挙動の修正", 46 | "同じキャラを含む音声ライブラリをインストールした際、起動時にエラーが発生するのを修正" 47 | ], 48 | "contributors": [] 49 | }, 50 | { 51 | "version": "0.1.3", 52 | "descriptions": [ 53 | "Windowsにおいて、ユーザー名に日本語が含まれる際に起動できない問題を修正" 54 | ], 55 | "contributors": [ 56 | "shigobu" 57 | ] 58 | }, 59 | { 60 | "version": "0.1.2", 61 | "descriptions": [ 62 | "前後無音が適用されない問題の修正" 63 | ], 64 | "contributors": [] 65 | }, 66 | { 67 | "version": "0.1.1", 68 | "descriptions": [ 69 | "Linux GPU版が起動できない問題を修正", 70 | "「あ・い・う・え・お・を・ん」が調整・出力できない問題を修正" 71 | ], 72 | "contributors": [] 73 | }, 74 | { 75 | "version": "0.1.0", 76 | "descriptions": [ 77 | "ファーストリリース" 78 | ], 79 | "contributors": [] 80 | } 81 | ] 82 | -------------------------------------------------------------------------------- /get_cost_candidates.py: -------------------------------------------------------------------------------- 1 | """ 2 | voicevox_engine/part_of_speech_data.pyのcost_candidatesを計算するプログラムです。 3 | 引数のnaist_jdic_pathには、open_jtalkのsrc/mecab-naist-jdic/naist-jdic.csvを指定してください。 4 | 5 | 実行例: 6 | python get_cost_candidates.py --naist_jdic_path=/path/to/naist-jdic.csv \ 7 | --pos=名詞 \ 8 | --pos_detail_1=固有名詞 \ 9 | --pos_detail_2=一般 \ 10 | --pos_detail_3=* 11 | 12 | cost_candidatesの値の詳細は以下の通りです。 13 | - 1番目の値はnaist_jdic内の同一品詞の最小コストから1を引いたもの、11番目の値は最大コストに1を足したものです。 14 | - 2番目の値はnaist_jdic内の同一品詞のコストの下位1%、10番目の値は99%の値です。 15 | - 6番目の値はnaist_jdic内の同一品詞のコストの最頻値です。 16 | - 2番目から6番目、6番目から10番目までの値は一定割合で増加するようになっています。 17 | """ 18 | 19 | import argparse 20 | import statistics 21 | from pathlib import Path 22 | from typing import List 23 | 24 | import numpy as np 25 | 26 | 27 | def get_candidates( 28 | naist_jdic_path: Path, 29 | pos: str, 30 | pos_detail_1: str, 31 | pos_detail_2: str, 32 | pos_detail_3: str, 33 | ) -> List[int]: 34 | costs = [] 35 | with naist_jdic_path.open(encoding="utf-8") as f: 36 | for line in f: 37 | ( 38 | _, 39 | _, 40 | _, 41 | _cost, 42 | _pos, 43 | _pos_detail_1, 44 | _pos_detail_2, 45 | _pos_detail_3, 46 | _, 47 | _, 48 | _, 49 | _, 50 | _, 51 | _, 52 | _, 53 | ) = line.split(",") 54 | if (_pos, _pos_detail_1, _pos_detail_2, _pos_detail_3) == ( 55 | pos, 56 | pos_detail_1, 57 | pos_detail_2, 58 | pos_detail_3, 59 | ): 60 | costs.append(int(_cost)) 61 | assert len(costs) > 0 62 | cost_min = min(costs) - 1 63 | cost_1per = np.quantile(costs, 0.01).astype(np.int64) 64 | cost_mode = statistics.mode(costs) 65 | cost_99per = np.quantile(costs, 0.99).astype(np.int64) 66 | cost_max = max(costs) + 1 67 | return ( 68 | [cost_min] 69 | + [int(cost_1per + (cost_mode - cost_1per) * i / 4) for i in range(5)] 70 | + [int(cost_mode + (cost_99per - cost_mode) * i / 4) for i in range(1, 5)] 71 | + [cost_max] 72 | ) 73 | 74 | 75 | if __name__ == "__main__": 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument("--naist_jdic_path", type=Path) 78 | parser.add_argument("--pos", type=str) 79 | parser.add_argument("--pos_detail_1", type=str) 80 | parser.add_argument("--pos_detail_2", type=str) 81 | parser.add_argument("--pos_detail_3", type=str) 82 | args = parser.parse_args() 83 | print( 84 | get_candidates( 85 | naist_jdic_path=args.naist_jdic_path, 86 | pos=args.pos, 87 | pos_detail_1=args.pos_detail_1, 88 | pos_detail_2=args.pos_detail_2, 89 | pos_detail_3=args.pos_detail_3, 90 | ) 91 | ) 92 | -------------------------------------------------------------------------------- /library_info/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/library_info/.gitkeep -------------------------------------------------------------------------------- /make_docs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from voicevox_engine.dev.core import mock as core 5 | from voicevox_engine.dev.synthesis_engine.mock import MockSynthesisEngine 6 | from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader 7 | from voicevox_engine.utility.path_utility import get_save_dir 8 | 9 | if __name__ == "__main__": 10 | import run 11 | 12 | os.makedirs(get_save_dir() / "speaker_info", exist_ok=True) 13 | app = run.generate_app( 14 | synthesis_engines={"mock": MockSynthesisEngine(speakers=core.metas())}, 15 | latest_core_version="mock", 16 | setting_loader=SettingLoader(USER_SETTING_PATH), 17 | ) 18 | with open("docs/api/index.html", "w") as f: 19 | f.write( 20 | """ 21 | 22 | 23 | sharevox_engine API Document 24 | 25 | 26 | 27 | 28 |
29 | 30 | 33 | 34 | """ 35 | % json.dumps(app.openapi()) 36 | ) 37 | -------------------------------------------------------------------------------- /presets.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: サンプルプリセット 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pysen] 2 | version = "0.10.5" 3 | 4 | [tool.pysen.lint] 5 | enable_black = true 6 | enable_flake8 = true 7 | enable_isort = true 8 | enable_mypy = false # TODO: eliminate errors and enable at CI 9 | mypy_preset = "entry" # TODO: "strict" 10 | line_length = 88 11 | py_version = "py311" 12 | isort_known_first_party = ["voicevox_engine"] 13 | isort_known_third_party = ["numpy"] 14 | [[tool.pysen.lint.mypy_targets]] 15 | paths = [".", "voicevox_engine/"] 16 | 17 | [tool.black] # automatically generated by pysen 18 | # pysen ignores and overwrites any modifications 19 | line-length = 88 20 | target-version = ["py310", "py311"] 21 | 22 | 23 | [tool.isort] # automatically generated by pysen 24 | # pysen ignores and overwrites any modifications 25 | default_section = "THIRDPARTY" 26 | ensure_newline_before_comments = true 27 | force_grid_wrap = 0 28 | force_single_line = false 29 | include_trailing_comma = true 30 | known_first_party = ["voicevox_engine"] 31 | known_third_party = ["numpy"] 32 | line_length = 88 33 | multi_line_output = 3 34 | use_parentheses = true 35 | 36 | [tool.poetry] 37 | name = "voicevox_engine" 38 | version = "0.0.0" 39 | description = "" 40 | authors = ["Hiroshiba "] 41 | 42 | [tool.poetry.dependencies] 43 | python = "~3.11" 44 | numpy = "^1.20.0" 45 | fastapi = "^0.70.0" 46 | python-multipart = "^0.0.5" 47 | uvicorn = "^0.15.0" 48 | aiofiles = "^0.7.0" 49 | soundfile = "^0.10.3.post1" 50 | scipy = "^1.7.1" 51 | pyyaml = "^6.0" 52 | pyworld = "^0.3.0" 53 | requests = "^2.28.1" 54 | jinja2 = "^3.1.2" 55 | pyopenjtalk = {git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "b35fc89fe42948a28e33aed886ea145a51113f88"} 56 | semver = "^3.0.0" 57 | platformdirs = "^3.10.0" 58 | 59 | [tool.poetry.group.dev.dependencies] 60 | cython = "^0.29.34,>=0.29.33" # NOTE: for Python 3.11 61 | pyinstaller = "^5.6,<5.7.0" # NOTE: 5.7.0 or higher will fail to build the bootloader. 62 | pre-commit = "^2.16.0" 63 | atomicwrites = "^1.4.0" 64 | colorama = "^0.4.4" 65 | poetry = "^1.3.1" 66 | 67 | [tool.poetry.group.test.dependencies] 68 | pysen = "~0.10.5" 69 | black = "^22.12.0" 70 | flake8-bugbear = "^23.1.0" 71 | flake8 = "^6.0.0" 72 | isort = "^5.12.0" 73 | mypy = "~0.991" 74 | pytest = "^6.2.5" 75 | coveralls = "^3.2.0" 76 | poetry = "^1.3.1" 77 | 78 | [tool.poetry.group.license.dependencies] 79 | pip-licenses = "^4.2.0" 80 | 81 | [build-system] 82 | requires = ["poetry-core"] 83 | build-backend = "poetry.core.masonry.api" 84 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12" 2 | altgraph==0.17.3 ; python_version >= "3.11" and python_version < "3.12" 3 | anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12" 4 | asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12" 5 | atomicwrites==1.4.1 ; python_version >= "3.11" and python_version < "3.12" 6 | attrs==23.1.0 ; python_version >= "3.11" and python_version < "3.12" 7 | build==0.10.0 ; python_version >= "3.11" and python_version < "3.12" 8 | cachecontrol[filecache]==0.13.1 ; python_version >= "3.11" and python_version < "3.12" 9 | certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12" 10 | cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12" 11 | cfgv==3.4.0 ; python_version >= "3.11" and python_version < "3.12" 12 | charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12" 13 | cleo==2.0.1 ; python_version >= "3.11" and python_version < "3.12" 14 | click==8.1.7 ; python_version >= "3.11" and python_version < "3.12" 15 | colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" 16 | crashtest==0.4.1 ; python_version >= "3.11" and python_version < "3.12" 17 | cryptography==41.0.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "linux" 18 | cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12" 19 | distlib==0.3.7 ; python_version >= "3.11" and python_version < "3.12" 20 | dulwich==0.21.5 ; python_version >= "3.11" and python_version < "3.12" 21 | fastapi==0.70.1 ; python_version >= "3.11" and python_version < "3.12" 22 | filelock==3.12.2 ; python_version >= "3.11" and python_version < "3.12" 23 | h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12" 24 | identify==2.5.27 ; python_version >= "3.11" and python_version < "3.12" 25 | idna==3.4 ; python_version >= "3.11" and python_version < "3.12" 26 | importlib-metadata==6.8.0 ; python_version >= "3.11" and python_version < "3.12" 27 | installer==0.7.0 ; python_version >= "3.11" and python_version < "3.12" 28 | jaraco-classes==3.3.0 ; python_version >= "3.11" and python_version < "3.12" 29 | jeepney==0.8.0 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "linux" 30 | jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12" 31 | jsonschema==4.17.3 ; python_version >= "3.11" and python_version < "3.12" 32 | keyring==24.2.0 ; python_version >= "3.11" and python_version < "3.12" 33 | macholib==1.16.2 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "darwin" 34 | markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12" 35 | more-itertools==10.1.0 ; python_version >= "3.11" and python_version < "3.12" 36 | msgpack==1.0.5 ; python_version >= "3.11" and python_version < "3.12" 37 | nodeenv==1.8.0 ; python_version >= "3.11" and python_version < "3.12" 38 | numpy==1.25.2 ; python_version >= "3.11" and python_version < "3.12" 39 | packaging==23.1 ; python_version >= "3.11" and python_version < "3.12" 40 | pefile==2023.2.7 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "win32" 41 | pexpect==4.8.0 ; python_version >= "3.11" and python_version < "3.12" 42 | pkginfo==1.9.6 ; python_version >= "3.11" and python_version < "3.12" 43 | platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12" 44 | poetry-core==1.7.0 ; python_version >= "3.11" and python_version < "3.12" 45 | poetry-plugin-export==1.5.0 ; python_version >= "3.11" and python_version < "3.12" 46 | poetry==1.6.1 ; python_version >= "3.11" and python_version < "3.12" 47 | pre-commit==2.21.0 ; python_version >= "3.11" and python_version < "3.12" 48 | ptyprocess==0.7.0 ; python_version >= "3.11" and python_version < "3.12" 49 | pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12" 50 | pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12" 51 | pyinstaller-hooks-contrib==2023.7 ; python_version >= "3.11" and python_version < "3.12" 52 | pyinstaller==5.6.2 ; python_version >= "3.11" and python_version < "3.12" 53 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12" 54 | pyproject-hooks==1.0.0 ; python_version >= "3.11" and python_version < "3.12" 55 | pyrsistent==0.19.3 ; python_version >= "3.11" and python_version < "3.12" 56 | python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12" 57 | pywin32-ctypes==0.2.2 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "win32" 58 | pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12" 59 | pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12" 60 | rapidfuzz==2.15.1 ; python_version >= "3.11" and python_version < "3.12" 61 | requests-toolbelt==1.0.0 ; python_version >= "3.11" and python_version < "3.12" 62 | requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12" 63 | scipy==1.11.2 ; python_version >= "3.11" and python_version < "3.12" 64 | secretstorage==3.3.3 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "linux" 65 | semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12" 66 | setuptools==68.1.2 ; python_version >= "3.11" and python_version < "3.12" 67 | shellingham==1.5.3 ; python_version >= "3.11" and python_version < "3.12" 68 | six==1.16.0 ; python_version >= "3.11" and python_version < "3.12" 69 | sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 70 | soundfile==0.10.3.post1 ; python_version >= "3.11" and python_version < "3.12" 71 | starlette==0.16.0 ; python_version >= "3.11" and python_version < "3.12" 72 | tomlkit==0.12.1 ; python_version >= "3.11" and python_version < "3.12" 73 | tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12" 74 | trove-classifiers==2023.8.7 ; python_version >= "3.11" and python_version < "3.12" 75 | typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12" 76 | urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12" 77 | uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12" 78 | virtualenv==20.24.3 ; python_version >= "3.11" and python_version < "3.12" 79 | xattr==0.10.1 ; python_version >= "3.11" and python_version < "3.12" and sys_platform == "darwin" 80 | zipp==3.16.2 ; python_version >= "3.11" and python_version < "3.12" 81 | -------------------------------------------------------------------------------- /requirements-license.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12" 2 | anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12" 3 | asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12" 4 | certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12" 5 | cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12" 6 | charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12" 7 | click==8.1.7 ; python_version >= "3.11" and python_version < "3.12" 8 | colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows" 9 | cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12" 10 | fastapi==0.70.1 ; python_version >= "3.11" and python_version < "3.12" 11 | h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12" 12 | idna==3.4 ; python_version >= "3.11" and python_version < "3.12" 13 | jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12" 14 | markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12" 15 | numpy==1.25.2 ; python_version >= "3.11" and python_version < "3.12" 16 | pip-licenses==4.3.2 ; python_version >= "3.11" and python_version < "3.12" 17 | platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12" 18 | prettytable==3.8.0 ; python_version >= "3.11" and python_version < "3.12" 19 | pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12" 20 | pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12" 21 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12" 22 | python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12" 23 | pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12" 24 | pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12" 25 | requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12" 26 | scipy==1.11.2 ; python_version >= "3.11" and python_version < "3.12" 27 | semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12" 28 | six==1.16.0 ; python_version >= "3.11" and python_version < "3.12" 29 | sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 30 | soundfile==0.10.3.post1 ; python_version >= "3.11" and python_version < "3.12" 31 | starlette==0.16.0 ; python_version >= "3.11" and python_version < "3.12" 32 | tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12" 33 | typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12" 34 | urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12" 35 | uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12" 36 | wcwidth==0.2.6 ; python_version >= "3.11" and python_version < "3.12" 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12" 2 | anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12" 3 | asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12" 4 | certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12" 5 | cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12" 6 | charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12" 7 | click==8.1.7 ; python_version >= "3.11" and python_version < "3.12" 8 | colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows" 9 | cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12" 10 | fastapi==0.70.1 ; python_version >= "3.11" and python_version < "3.12" 11 | h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12" 12 | idna==3.4 ; python_version >= "3.11" and python_version < "3.12" 13 | jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12" 14 | markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12" 15 | numpy==1.25.2 ; python_version >= "3.11" and python_version < "3.12" 16 | platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12" 17 | pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12" 18 | pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12" 19 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12" 20 | python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12" 21 | pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12" 22 | pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12" 23 | requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12" 24 | scipy==1.11.2 ; python_version >= "3.11" and python_version < "3.12" 25 | semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12" 26 | six==1.16.0 ; python_version >= "3.11" and python_version < "3.12" 27 | sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 28 | soundfile==0.10.3.post1 ; python_version >= "3.11" and python_version < "3.12" 29 | starlette==0.16.0 ; python_version >= "3.11" and python_version < "3.12" 30 | tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12" 31 | typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12" 32 | urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12" 33 | uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12" 34 | -------------------------------------------------------------------------------- /run.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | # このファイルはPyInstallerによって自動生成されたもので、それをカスタマイズして使用しています。 3 | from PyInstaller.utils.hooks import collect_data_files 4 | import os 5 | 6 | datas = [ 7 | ('engine_manifest_assets', 'engine_manifest_assets'), 8 | ('speaker_info', 'speaker_info'), 9 | ('engine_manifest.json', '.'), 10 | ('default.csv', '.'), 11 | ('licenses.json', '.'), 12 | ('presets.yaml', '.'), 13 | ('default_setting.yml', '.'), 14 | ('ui_template', 'ui_template'), 15 | ('library_info', 'library_info'), 16 | ] 17 | datas += collect_data_files('pyopenjtalk') 18 | 19 | core_model_dir_path = os.environ.get('CORE_MODEL_DIR_PATH') 20 | if core_model_dir_path: 21 | print('CORE_MODEL_DIR_PATH is found:', core_model_dir_path) 22 | if not os.path.isdir(core_model_dir_path): 23 | raise Exception("CORE_MODEL_DIR_PATH was found, but it is not directory!") 24 | datas += [(core_model_dir_path, "model")] 25 | 26 | # コアとONNX Runtimeはバイナリであるが、`binaries`に加えると 27 | # 依存関係のパスがPyInstallerに書き換えらるので、`datas`に加える 28 | # 参考: https://github.com/VOICEVOX/voicevox_engine/pull/446#issuecomment-1210052318 29 | libcore_path = os.environ.get('LIBCORE_PATH') 30 | if libcore_path: 31 | print('LIBCORE_PATH is found:', libcore_path) 32 | if not os.path.isfile(libcore_path): 33 | raise Exception("LIBCORE_PATH was found, but it is not file!") 34 | datas += [(libcore_path, ".")] 35 | 36 | libonnxruntime_path = os.environ.get('LIBONNXRUNTIME_PATH') 37 | if libonnxruntime_path: 38 | print('LIBONNXRUNTIME_PATH is found:', libonnxruntime_path) 39 | if not os.path.isfile(libonnxruntime_path): 40 | raise Exception("LIBCORE_PATH was found, but it is not file!") 41 | datas += [(libonnxruntime_path, ".")] 42 | 43 | 44 | block_cipher = None 45 | 46 | 47 | a = Analysis( 48 | ['run.py'], 49 | pathex=[], 50 | binaries=[], 51 | datas=datas, 52 | hiddenimports=[], 53 | hookspath=[], 54 | hooksconfig={}, 55 | runtime_hooks=[], 56 | excludes=[], 57 | win_no_prefer_redirects=False, 58 | win_private_assemblies=False, 59 | cipher=block_cipher, 60 | noarchive=False, 61 | ) 62 | 63 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 64 | 65 | exe = EXE( 66 | pyz, 67 | a.scripts, 68 | [], 69 | exclude_binaries=True, 70 | name='run', 71 | debug=False, 72 | bootloader_ignore_signals=False, 73 | strip=False, 74 | upx=True, 75 | console=True, 76 | disable_windowed_traceback=False, 77 | argv_emulation=False, 78 | target_arch=None, 79 | codesign_identity=None, 80 | entitlements_file=None, 81 | ) 82 | 83 | coll = COLLECT( 84 | exe, 85 | a.binaries, 86 | a.zipfiles, 87 | a.datas, 88 | strip=False, 89 | upx=True, 90 | upx_exclude=[], 91 | name='run', 92 | ) 93 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # automatically generated by pysen 3 | # pysen ignores and overwrites any modifications 4 | # e203: black treats : as a binary operator 5 | # e231: black doesn't put a space after , 6 | # e501: black may exceed the line-length to follow other style rules 7 | # w503 or w504: either one needs to be disabled to select w error codes 8 | ignore = E203,E231,E501,W503 9 | max-line-length = 88 10 | select = B,B950,C,E,F,W 11 | 12 | [mypy] 13 | # automatically generated by pysen 14 | # pysen ignores and overwrites any modifications 15 | check_untyped_defs = True 16 | disallow_any_decorated = False 17 | disallow_any_generics = False 18 | disallow_any_unimported = False 19 | disallow_incomplete_defs = True 20 | disallow_subclassing_any = True 21 | disallow_untyped_calls = False 22 | disallow_untyped_decorators = False 23 | disallow_untyped_defs = False 24 | ignore_errors = False 25 | ignore_missing_imports = True 26 | no_implicit_optional = True 27 | python_version = 3.10 28 | show_error_codes = True 29 | strict_equality = True 30 | strict_optional = True 31 | warn_redundant_casts = True 32 | warn_return_any = False 33 | warn_unreachable = True 34 | warn_unused_configs = True 35 | warn_unused_ignores = False 36 | 37 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "NOTHING" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/policy.md: -------------------------------------------------------------------------------- 1 | dummy3 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "SELF_ONLY" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/policy.md: -------------------------------------------------------------------------------- 1 | dummy2 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/policy.md: -------------------------------------------------------------------------------- 1 | dummy1 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "ALL" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/policy.md: -------------------------------------------------------------------------------- 1 | dummy4 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | model 2 | speaker_info 3 | 4 | !testdata/model 5 | !testdata/speaker_info -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/test/__init__.py -------------------------------------------------------------------------------- /test/e2e/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from fastapi.testclient import TestClient 5 | from run import generate_app 6 | 7 | from voicevox_engine.setting import SettingLoader 8 | from voicevox_engine.synthesis_engine import make_synthesis_engines 9 | from voicevox_engine.utility.core_version_utility import get_latest_core_version 10 | 11 | 12 | @pytest.fixture(scope="session") 13 | def client(): 14 | synthesis_engines = make_synthesis_engines(use_gpu=False) 15 | latest_core_version = get_latest_core_version(versions=synthesis_engines.keys()) 16 | setting_loader = SettingLoader(Path("./default_setting.yml")) 17 | 18 | return TestClient( 19 | generate_app( 20 | synthesis_engines=synthesis_engines, 21 | latest_core_version=latest_core_version, 22 | setting_loader=setting_loader, 23 | ) 24 | ) 25 | -------------------------------------------------------------------------------- /test/e2e/test_validate_version.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | from voicevox_engine import __version__ 4 | 5 | 6 | def test_fetch_version_success(client: TestClient): 7 | response = client.get("/version") 8 | assert response.status_code == 200 9 | assert response.json() == __version__ 10 | -------------------------------------------------------------------------------- /test/presets-test-1.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 2 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-2.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: not_int 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 2 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-3.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 1 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/test/presets-test-4.yaml -------------------------------------------------------------------------------- /test/setting-test-load-1.yaml: -------------------------------------------------------------------------------- 1 | allow_origin: null 2 | cors_policy_mode: localapps 3 | -------------------------------------------------------------------------------- /test/setting-test-load-2.yaml: -------------------------------------------------------------------------------- 1 | allow_origin: null 2 | cors_policy_mode: all 3 | -------------------------------------------------------------------------------- /test/setting-test-load-3.yaml: -------------------------------------------------------------------------------- 1 | allow_origin: "192.168.254.255 192.168.255.255" 2 | cors_policy_mode: localapps 3 | -------------------------------------------------------------------------------- /test/test_connect_base64_waves.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from unittest import TestCase 4 | 5 | import numpy as np 6 | import numpy.testing 7 | import soundfile 8 | from scipy.signal import resample 9 | 10 | from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves 11 | 12 | 13 | def generate_sine_wave_ndarray( 14 | seconds: float, samplerate: int, frequency: float 15 | ) -> np.ndarray: 16 | x = np.linspace(0, seconds, int(seconds * samplerate), endpoint=False) 17 | wave = np.sin(2 * np.pi * frequency * x).astype(np.float32) 18 | 19 | return wave 20 | 21 | 22 | def encode_bytes(wave_ndarray: np.ndarray, samplerate: int) -> bytes: 23 | wave_bio = io.BytesIO() 24 | soundfile.write( 25 | file=wave_bio, 26 | data=wave_ndarray, 27 | samplerate=samplerate, 28 | format="WAV", 29 | subtype="FLOAT", 30 | ) 31 | wave_bio.seek(0) 32 | 33 | return wave_bio.getvalue() 34 | 35 | 36 | def generate_sine_wave_bytes( 37 | seconds: float, samplerate: int, frequency: float 38 | ) -> bytes: 39 | wave_ndarray = generate_sine_wave_ndarray(seconds, samplerate, frequency) 40 | return encode_bytes(wave_ndarray, samplerate) 41 | 42 | 43 | def encode_base64(wave_bytes: bytes) -> str: 44 | return base64.standard_b64encode(wave_bytes).decode("utf-8") 45 | 46 | 47 | def generate_sine_wave_base64(seconds: float, samplerate: int, frequency: float) -> str: 48 | wave_bytes = generate_sine_wave_bytes(seconds, samplerate, frequency) 49 | wave_base64 = encode_base64(wave_bytes) 50 | return wave_base64 51 | 52 | 53 | class TestConnectBase64Waves(TestCase): 54 | def test_connect(self): 55 | samplerate = 1000 56 | wave = generate_sine_wave_ndarray( 57 | seconds=2, samplerate=samplerate, frequency=10 58 | ) 59 | wave_base64 = encode_base64(encode_bytes(wave, samplerate=samplerate)) 60 | 61 | wave_x2_ref = np.concatenate([wave, wave]) 62 | 63 | wave_x2, _ = connect_base64_waves(waves=[wave_base64, wave_base64]) 64 | 65 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 66 | 67 | self.assertTrue((wave_x2_ref == wave_x2).all()) 68 | 69 | def test_no_wave_error(self): 70 | self.assertRaises(ConnectBase64WavesException, connect_base64_waves, waves=[]) 71 | 72 | def test_invalid_base64_error(self): 73 | wave_1000hz = generate_sine_wave_base64( 74 | seconds=2, samplerate=1000, frequency=10 75 | ) 76 | wave_1000hz_broken = wave_1000hz[1:] # remove head 1 char 77 | 78 | self.assertRaises( 79 | ConnectBase64WavesException, 80 | connect_base64_waves, 81 | waves=[ 82 | wave_1000hz_broken, 83 | ], 84 | ) 85 | 86 | def test_invalid_wave_file_error(self): 87 | wave_1000hz = generate_sine_wave_bytes(seconds=2, samplerate=1000, frequency=10) 88 | wave_1000hz_broken_bytes = wave_1000hz[1:] # remove head 1 byte 89 | wave_1000hz_broken = encode_base64(wave_1000hz_broken_bytes) 90 | 91 | self.assertRaises( 92 | ConnectBase64WavesException, 93 | connect_base64_waves, 94 | waves=[ 95 | wave_1000hz_broken, 96 | ], 97 | ) 98 | 99 | def test_different_frequency(self): 100 | wave_24000hz = generate_sine_wave_ndarray( 101 | seconds=1, samplerate=24000, frequency=10 102 | ) 103 | wave_1000hz = generate_sine_wave_ndarray( 104 | seconds=2, samplerate=1000, frequency=10 105 | ) 106 | wave_24000_base64 = encode_base64(encode_bytes(wave_24000hz, samplerate=24000)) 107 | wave_1000_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000)) 108 | 109 | wave_1000hz_to2400hz = resample(wave_1000hz, 24000 * len(wave_1000hz) // 1000) 110 | wave_x2_ref = np.concatenate([wave_24000hz, wave_1000hz_to2400hz]) 111 | 112 | wave_x2, _ = connect_base64_waves(waves=[wave_24000_base64, wave_1000_base64]) 113 | 114 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 115 | numpy.testing.assert_array_almost_equal(wave_x2_ref, wave_x2) 116 | 117 | def test_different_channels(self): 118 | wave_1000hz = generate_sine_wave_ndarray( 119 | seconds=2, samplerate=1000, frequency=10 120 | ) 121 | wave_2ch_1000hz = np.array([wave_1000hz, wave_1000hz]).T 122 | wave_1ch_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000)) 123 | wave_2ch_base64 = encode_base64(encode_bytes(wave_2ch_1000hz, samplerate=1000)) 124 | 125 | wave_x2_ref = np.concatenate([wave_2ch_1000hz, wave_2ch_1000hz]) 126 | 127 | wave_x2, _ = connect_base64_waves(waves=[wave_1ch_base64, wave_2ch_base64]) 128 | 129 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 130 | self.assertTrue((wave_x2_ref == wave_x2).all()) 131 | -------------------------------------------------------------------------------- /test/test_core_version_utility.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.utility import get_latest_core_version, parse_core_version 4 | 5 | 6 | class TestCoreVersion(TestCase): 7 | def test_parse_core_version(self): 8 | parse_core_version("0.0.0") 9 | parse_core_version("0.1.0") 10 | parse_core_version("0.10.0") 11 | parse_core_version("0.10.0-preview.1") 12 | parse_core_version("0.14.0") 13 | parse_core_version("0.14.0-preview.1") 14 | parse_core_version("0.14.0-preview.10") 15 | 16 | def test_get_latest_core_version(self): 17 | self.assertEqual( 18 | get_latest_core_version( 19 | versions=[ 20 | "0.0.0", 21 | "0.1.0", 22 | "0.10.0", 23 | "0.10.0-preview.1", 24 | "0.14.0", 25 | "0.14.0-preview.1", 26 | "0.14.0-preview.10", 27 | ] 28 | ), 29 | "0.14.0", 30 | ) 31 | 32 | self.assertEqual( 33 | get_latest_core_version( 34 | versions=[ 35 | "0.14.0", 36 | "0.15.0-preview.1", 37 | ] 38 | ), 39 | "0.15.0-preview.1", 40 | ) 41 | -------------------------------------------------------------------------------- /test/test_mock_synthesis_engine.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine 4 | from voicevox_engine.kana_parser import create_kana 5 | from voicevox_engine.model import AccentPhrase, AudioQuery, Mora 6 | 7 | 8 | class TestMockSynthesisEngine(TestCase): 9 | def setUp(self): 10 | super().setUp() 11 | 12 | self.accent_phrases_hello_hiho = [ 13 | AccentPhrase( 14 | moras=[ 15 | Mora( 16 | text="コ", 17 | consonant="k", 18 | consonant_length=0.0, 19 | vowel="o", 20 | vowel_length=0.0, 21 | pitch=0.0, 22 | ), 23 | Mora( 24 | text="ン", 25 | consonant=None, 26 | consonant_length=None, 27 | vowel="N", 28 | vowel_length=0.0, 29 | pitch=0.0, 30 | ), 31 | Mora( 32 | text="ニ", 33 | consonant="n", 34 | consonant_length=0.0, 35 | vowel="i", 36 | vowel_length=0.0, 37 | pitch=0.0, 38 | ), 39 | Mora( 40 | text="チ", 41 | consonant="ch", 42 | consonant_length=0.0, 43 | vowel="i", 44 | vowel_length=0.0, 45 | pitch=0.0, 46 | ), 47 | Mora( 48 | text="ワ", 49 | consonant="w", 50 | consonant_length=0.0, 51 | vowel="a", 52 | vowel_length=0.0, 53 | pitch=0.0, 54 | ), 55 | ], 56 | accent=5, 57 | pause_mora=Mora( 58 | text="、", 59 | consonant=None, 60 | consonant_length=None, 61 | vowel="pau", 62 | vowel_length=0.0, 63 | pitch=0.0, 64 | ), 65 | ), 66 | AccentPhrase( 67 | moras=[ 68 | Mora( 69 | text="ヒ", 70 | consonant="h", 71 | consonant_length=0.0, 72 | vowel="i", 73 | vowel_length=0.0, 74 | pitch=0.0, 75 | ), 76 | Mora( 77 | text="ホ", 78 | consonant="h", 79 | consonant_length=0.0, 80 | vowel="o", 81 | vowel_length=0.0, 82 | pitch=0.0, 83 | ), 84 | Mora( 85 | text="デ", 86 | consonant="d", 87 | consonant_length=0.0, 88 | vowel="e", 89 | vowel_length=0.0, 90 | pitch=0.0, 91 | ), 92 | Mora( 93 | text="ス", 94 | consonant="s", 95 | consonant_length=0.0, 96 | vowel="U", 97 | vowel_length=0.0, 98 | pitch=0.0, 99 | ), 100 | ], 101 | accent=1, 102 | pause_mora=None, 103 | ), 104 | ] 105 | self.engine = MockSynthesisEngine(speakers="", supported_devices="") 106 | 107 | def test_replace_phoneme_length(self): 108 | self.assertEqual( 109 | self.engine.replace_phoneme_length( 110 | accent_phrases=self.accent_phrases_hello_hiho, 111 | speaker_id=0, 112 | ), 113 | self.accent_phrases_hello_hiho, 114 | ) 115 | 116 | def test_replace_mora_pitch(self): 117 | self.assertEqual( 118 | self.engine.replace_mora_pitch( 119 | accent_phrases=self.accent_phrases_hello_hiho, 120 | speaker_id=0, 121 | ), 122 | self.accent_phrases_hello_hiho, 123 | ) 124 | 125 | def test_synthesis(self): 126 | self.engine.synthesis( 127 | AudioQuery( 128 | accent_phrases=self.accent_phrases_hello_hiho, 129 | speedScale=1, 130 | pitchScale=0, 131 | intonationScale=1, 132 | volumeScale=1, 133 | prePhonemeLength=0.1, 134 | postPhonemeLength=0.1, 135 | outputSamplingRate=24000, 136 | outputStereo=False, 137 | kana=create_kana(self.accent_phrases_hello_hiho), 138 | ), 139 | speaker_id=0, 140 | ) 141 | -------------------------------------------------------------------------------- /test/test_mora_list.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.mora_list import openjtalk_mora2text 4 | 5 | 6 | class TestOpenJTalkMoraList(TestCase): 7 | def test_mora2text(self): 8 | self.assertEqual("ッ", openjtalk_mora2text["cl"]) 9 | self.assertEqual("ティ", openjtalk_mora2text["ti"]) 10 | self.assertEqual("トゥ", openjtalk_mora2text["tu"]) 11 | self.assertEqual("ディ", openjtalk_mora2text["di"]) 12 | # GitHub issue #60 13 | self.assertEqual("ギェ", openjtalk_mora2text["gye"]) 14 | self.assertEqual("イェ", openjtalk_mora2text["ye"]) 15 | 16 | def test_mora2text_injective(self): 17 | """異なるモーラが同じ読みがなに対応しないか確認する""" 18 | values = list(openjtalk_mora2text.values()) 19 | uniq_values = list(set(values)) 20 | self.assertCountEqual(values, uniq_values) 21 | -------------------------------------------------------------------------------- /test/test_mora_to_text.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | # TODO: import from voicevox_engine.synthesis_engine.mora 4 | from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text 5 | 6 | 7 | class TestMoraToText(TestCase): 8 | def test_voice(self): 9 | self.assertEqual(mora_to_text("a"), "ア") 10 | self.assertEqual(mora_to_text("i"), "イ") 11 | self.assertEqual(mora_to_text("ka"), "カ") 12 | self.assertEqual(mora_to_text("N"), "ン") 13 | self.assertEqual(mora_to_text("cl"), "ッ") 14 | self.assertEqual(mora_to_text("gye"), "ギェ") 15 | self.assertEqual(mora_to_text("ye"), "イェ") 16 | self.assertEqual(mora_to_text("wo"), "ウォ") 17 | 18 | def test_unvoice(self): 19 | self.assertEqual(mora_to_text("A"), "ア") 20 | self.assertEqual(mora_to_text("I"), "イ") 21 | self.assertEqual(mora_to_text("kA"), "カ") 22 | self.assertEqual(mora_to_text("gyE"), "ギェ") 23 | self.assertEqual(mora_to_text("yE"), "イェ") 24 | self.assertEqual(mora_to_text("wO"), "ウォ") 25 | 26 | def test_invalid_mora(self): 27 | """変なモーラが来ても例外を投げない""" 28 | self.assertEqual(mora_to_text("x"), "x") 29 | self.assertEqual(mora_to_text(""), "") 30 | -------------------------------------------------------------------------------- /test/test_setting.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tempfile import TemporaryDirectory 3 | from unittest import TestCase 4 | 5 | from voicevox_engine.setting import CorsPolicyMode, Setting, SettingLoader 6 | 7 | 8 | class TestSettingLoader(TestCase): 9 | def setUp(self): 10 | self.tmp_dir = TemporaryDirectory() 11 | self.tmp_dir_path = Path(self.tmp_dir.name) 12 | 13 | def test_loading_1(self): 14 | setting_loader = SettingLoader(Path("not_exist.yaml")) 15 | settings = setting_loader.load_setting_file() 16 | 17 | self.assertEqual( 18 | settings.dict(), 19 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps}, 20 | ) 21 | 22 | def test_loading_2(self): 23 | setting_loader = SettingLoader( 24 | setting_file_path=Path("test/setting-test-load-1.yaml") 25 | ) 26 | settings = setting_loader.load_setting_file() 27 | 28 | self.assertEqual( 29 | settings.dict(), 30 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps}, 31 | ) 32 | 33 | def test_loading_3(self): 34 | setting_loader = SettingLoader( 35 | setting_file_path=Path("test/setting-test-load-2.yaml") 36 | ) 37 | settings = setting_loader.load_setting_file() 38 | 39 | self.assertEqual( 40 | settings.dict(), 41 | {"allow_origin": None, "cors_policy_mode": "all"}, 42 | ) 43 | 44 | def test_loading_4(self): 45 | setting_loader = SettingLoader( 46 | setting_file_path=Path("test/setting-test-load-3.yaml") 47 | ) 48 | settings = setting_loader.load_setting_file() 49 | 50 | self.assertEqual( 51 | settings.dict(), 52 | { 53 | "allow_origin": "192.168.254.255 192.168.255.255", 54 | "cors_policy_mode": CorsPolicyMode.localapps, 55 | }, 56 | ) 57 | 58 | def test_dump(self): 59 | setting_loader = SettingLoader( 60 | setting_file_path=Path(self.tmp_dir_path / "setting-test-dump.yaml") 61 | ) 62 | settings = Setting(cors_policy_mode=CorsPolicyMode.localapps) 63 | setting_loader.dump_setting_file(settings) 64 | 65 | self.assertTrue(setting_loader.setting_file_path.is_file()) 66 | self.assertEqual( 67 | setting_loader.load_setting_file().dict(), 68 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps}, 69 | ) 70 | 71 | def tearDown(self): 72 | self.tmp_dir.cleanup() 73 | -------------------------------------------------------------------------------- /test/test_user_dict_model.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from unittest import TestCase 3 | 4 | from pydantic import ValidationError 5 | 6 | from voicevox_engine.kana_parser import parse_kana 7 | from voicevox_engine.model import UserDictWord 8 | 9 | 10 | class TestUserDictWords(TestCase): 11 | def setUp(self): 12 | self.test_model = { 13 | "surface": "テスト", 14 | "priority": 0, 15 | "part_of_speech": "名詞", 16 | "part_of_speech_detail_1": "固有名詞", 17 | "part_of_speech_detail_2": "一般", 18 | "part_of_speech_detail_3": "*", 19 | "inflectional_type": "*", 20 | "inflectional_form": "*", 21 | "stem": "*", 22 | "yomi": "テスト", 23 | "pronunciation": "テスト", 24 | "accent_type": 0, 25 | "accent_associative_rule": "*", 26 | } 27 | 28 | def test_valid_word(self): 29 | test_value = deepcopy(self.test_model) 30 | try: 31 | UserDictWord(**test_value) 32 | except ValidationError as e: 33 | self.fail(f"Unexpected Validation Error\n{str(e)}") 34 | 35 | def test_convert_to_zenkaku(self): 36 | test_value = deepcopy(self.test_model) 37 | test_value["surface"] = "test" 38 | self.assertEqual(UserDictWord(**test_value).surface, "test") 39 | 40 | def test_count_mora(self): 41 | test_value = deepcopy(self.test_model) 42 | self.assertEqual(UserDictWord(**test_value).mora_count, 3) 43 | 44 | def test_count_mora_x(self): 45 | test_value = deepcopy(self.test_model) 46 | for s in [chr(i) for i in range(12449, 12533)]: 47 | if s in ["ァ", "ィ", "ゥ", "ェ", "ォ", "ッ", "ャ", "ュ", "ョ", "ヮ"]: 48 | continue 49 | for x in "ァィゥェォャュョ": 50 | expected_count = 0 51 | test_value["pronunciation"] = s + x 52 | for accent_phrase in parse_kana( 53 | test_value["pronunciation"] + "'", 54 | ): 55 | expected_count += len(accent_phrase.moras) 56 | with self.subTest(s=s, x=x): 57 | self.assertEqual( 58 | UserDictWord(**test_value).mora_count, 59 | expected_count, 60 | ) 61 | 62 | def test_count_mora_xwa(self): 63 | test_value = deepcopy(self.test_model) 64 | test_value["pronunciation"] = "クヮンセイ" 65 | expected_count = 0 66 | for accent_phrase in parse_kana( 67 | test_value["pronunciation"] + "'", 68 | ): 69 | expected_count += len(accent_phrase.moras) 70 | self.assertEqual( 71 | UserDictWord(**test_value).mora_count, 72 | expected_count, 73 | ) 74 | 75 | def test_invalid_pronunciation_not_katakana(self): 76 | test_value = deepcopy(self.test_model) 77 | test_value["pronunciation"] = "ぼいぼ" 78 | with self.assertRaises(ValidationError): 79 | UserDictWord(**test_value) 80 | 81 | def test_invalid_pronunciation_invalid_sutegana(self): 82 | test_value = deepcopy(self.test_model) 83 | test_value["pronunciation"] = "アィウェォ" 84 | with self.assertRaises(ValidationError): 85 | UserDictWord(**test_value) 86 | 87 | def test_invalid_pronunciation_invalid_xwa(self): 88 | test_value = deepcopy(self.test_model) 89 | test_value["pronunciation"] = "アヮ" 90 | with self.assertRaises(ValidationError): 91 | UserDictWord(**test_value) 92 | 93 | def test_count_mora_voiced_sound(self): 94 | test_value = deepcopy(self.test_model) 95 | test_value["pronunciation"] = "ボイボ" 96 | self.assertEqual(UserDictWord(**test_value).mora_count, 3) 97 | 98 | def test_invalid_accent_type(self): 99 | test_value = deepcopy(self.test_model) 100 | test_value["accent_type"] = 4 101 | with self.assertRaises(ValidationError): 102 | UserDictWord(**test_value) 103 | 104 | def test_invalid_accent_type_2(self): 105 | test_value = deepcopy(self.test_model) 106 | test_value["accent_type"] = -1 107 | with self.assertRaises(ValidationError): 108 | UserDictWord(**test_value) 109 | -------------------------------------------------------------------------------- /test/test_word_types.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.model import WordTypes 4 | from voicevox_engine.part_of_speech_data import part_of_speech_data 5 | 6 | 7 | class TestWordTypes(TestCase): 8 | def test_word_types(self): 9 | self.assertCountEqual(list(WordTypes), list(part_of_speech_data.keys())) 10 | -------------------------------------------------------------------------------- /test/testdata/model/b351e601-3e98-40d4-ac1d-19529d932c22/decoder_model.onnx: -------------------------------------------------------------------------------- 1 | decoder_model -------------------------------------------------------------------------------- /test/testdata/model/b351e601-3e98-40d4-ac1d-19529d932c22/embedder_model.onnx: -------------------------------------------------------------------------------- 1 | embedder_model -------------------------------------------------------------------------------- /test/testdata/model/b351e601-3e98-40d4-ac1d-19529d932c22/variance_model.onnx: -------------------------------------------------------------------------------- 1 | variance_model -------------------------------------------------------------------------------- /test/testdata/model/libraries.json: -------------------------------------------------------------------------------- 1 | { 2 | "official": true, 3 | "35b2c544-660e-401e-b503-0e14c635303a": true 4 | } -------------------------------------------------------------------------------- /test/testdata/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SHAREVOX/sharevox_engine/f33c7e8164d205f7de1d494d4229967c2dcac8cb/test/testdata/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png -------------------------------------------------------------------------------- /test/testdata/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/policy.md: -------------------------------------------------------------------------------- 1 | dummy policy -------------------------------------------------------------------------------- /test/testdata/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png: -------------------------------------------------------------------------------- 1 | portrait.png -------------------------------------------------------------------------------- /test/testdata/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav: -------------------------------------------------------------------------------- 1 | voice_sample1 -------------------------------------------------------------------------------- /test/vvlib_manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": "0.15.0", 3 | "name": "Test vvlib", 4 | "version": "0.0.1", 5 | "uuid": "2bb8bccf-1c3f-4bc9-959a-f388e37af3ad", 6 | "engine_name": "Test Engine", 7 | "brand_name": "Test", 8 | "engine_uuid": "c7b58856-bd56-4aa1-afb7-b8415f824b06" 9 | } -------------------------------------------------------------------------------- /ui_template/ui.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | VOICEVOX Engine 設定 6 | 10 | 11 | 17 | 22 | 23 | 24 | 25 |
26 |
27 | 30 | 31 |
32 | 33 | 44 |
45 |

46 | allまたはlocalappsを指定。allはすべてを許可します。 47 |

48 |

49 | localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。 50 |

51 |

52 | その他のオリジンはallow_originオプションで追加できます。デフォルトはlocalapps。 53 |

54 |
55 |
56 | 57 |
58 | 59 | 65 |
66 | 許可するオリジンを指定します。複数指定する場合は、直後にスペースで区切って追加できます。 67 |
68 |
69 | 70 | 108 | 109 | 117 |
118 |
119 | 120 | 121 | -------------------------------------------------------------------------------- /voicevox_engine/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "latest" 2 | -------------------------------------------------------------------------------- /voicevox_engine/dev/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .mock import ( 2 | decode_forward, 3 | initialize, 4 | metas, 5 | supported_devices, 6 | yukarin_s_forward, 7 | yukarin_sa_forward, 8 | ) 9 | 10 | __all__ = [ 11 | "decode_forward", 12 | "initialize", 13 | "yukarin_s_forward", 14 | "yukarin_sa_forward", 15 | "metas", 16 | "supported_devices", 17 | ] 18 | -------------------------------------------------------------------------------- /voicevox_engine/dev/core/mock.py: -------------------------------------------------------------------------------- 1 | import json 2 | from logging import getLogger 3 | from typing import Any, Dict, List 4 | 5 | import numpy as np 6 | from pyopenjtalk import tts 7 | from scipy.signal import resample 8 | 9 | DUMMY_TEXT = "これはダミーのテキストです" 10 | 11 | 12 | def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None: 13 | pass 14 | 15 | 16 | def yukarin_s_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 17 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 18 | logger.info( 19 | "Sorry, yukarin_s_forward() is a mock. Return values are incorrect.", 20 | ) 21 | return np.ones(length) / 5 22 | 23 | 24 | def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 25 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 26 | logger.info( 27 | "Sorry, yukarin_sa_forward() is a mock. Return values are incorrect.", 28 | ) 29 | return np.ones((1, length)) * 5 30 | 31 | 32 | def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 33 | """ 34 | 合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます(DUMMY_TEXT) 35 | 参照→SynthesisEngine のdocstring [Mock] 36 | 37 | Parameters 38 | ---------- 39 | length : int 40 | フレームの長さ 41 | 42 | Returns 43 | ------- 44 | wave : np.ndarray 45 | 音声合成した波形データ 46 | 47 | Note 48 | ------- 49 | ここで行う音声合成では、調声(ピッチ等)を反映しない 50 | また、入力内容によらず常に固定の文言を読み上げる 51 | 52 | # pyopenjtalk.tts()の出力仕様 53 | dtype=np.float64, 16 bit, mono 48000 Hz 54 | 55 | # resampleの説明 56 | 非モックdecode_forwardと合わせるために、出力を24kHzに変換した。 57 | """ 58 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 59 | logger.info( 60 | "Sorry, decode_forward() is a mock. Return values are incorrect.", 61 | ) 62 | wave, sr = tts(DUMMY_TEXT) 63 | wave = resample( 64 | wave.astype("int16"), 65 | 24000 * len(wave) // 48000, 66 | ) 67 | return wave 68 | 69 | 70 | def metas() -> str: 71 | return json.dumps( 72 | [ 73 | { 74 | "name": "dummy1", 75 | "styles": [ 76 | {"name": "style0", "id": 0}, 77 | {"name": "style1", "id": 2}, 78 | {"name": "style2", "id": 4}, 79 | {"name": "style3", "id": 6}, 80 | ], 81 | "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", 82 | "version": "mock", 83 | }, 84 | { 85 | "name": "dummy2", 86 | "styles": [ 87 | {"name": "style0", "id": 1}, 88 | {"name": "style1", "id": 3}, 89 | {"name": "style2", "id": 5}, 90 | {"name": "style3", "id": 7}, 91 | ], 92 | "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9", 93 | "version": "mock", 94 | }, 95 | { 96 | "name": "dummy3", 97 | "styles": [ 98 | {"name": "style0", "id": 8}, 99 | ], 100 | "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a", 101 | "version": "mock", 102 | }, 103 | { 104 | "name": "dummy4", 105 | "styles": [ 106 | {"name": "style0", "id": 9}, 107 | ], 108 | "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b", 109 | "version": "mock", 110 | }, 111 | ] 112 | ) 113 | 114 | 115 | def supported_devices() -> str: 116 | return json.dumps( 117 | { 118 | "cpu": True, 119 | "cuda": False, 120 | } 121 | ) 122 | -------------------------------------------------------------------------------- /voicevox_engine/dev/synthesis_engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .mock import MockSynthesisEngine 2 | 3 | __all__ = ["MockSynthesisEngine"] 4 | -------------------------------------------------------------------------------- /voicevox_engine/dev/synthesis_engine/mock.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from typing import Any, Dict, List, Optional 3 | 4 | import numpy as np 5 | from pyopenjtalk import tts 6 | from scipy.signal import resample 7 | 8 | from ...model import AccentPhrase, AudioQuery 9 | from ...synthesis_engine import SynthesisEngineBase 10 | from ...synthesis_engine.synthesis_engine import to_flatten_moras 11 | 12 | 13 | class MockSynthesisEngine(SynthesisEngineBase): 14 | """ 15 | SynthesisEngine [Mock] 16 | """ 17 | 18 | def __init__( 19 | self, 20 | speakers: str, 21 | supported_devices: Optional[str] = None, 22 | ): 23 | """ 24 | __init__ [Mock] 25 | """ 26 | super().__init__() 27 | 28 | self._speakers = speakers 29 | self._supported_devices = supported_devices 30 | self.default_sampling_rate = 24000 31 | 32 | @property 33 | def speakers(self) -> str: 34 | return self._speakers 35 | 36 | @property 37 | def supported_devices(self) -> Optional[str]: 38 | return self._supported_devices 39 | 40 | def replace_phoneme_length( 41 | self, accent_phrases: List[AccentPhrase], speaker_id: int 42 | ) -> List[AccentPhrase]: 43 | """ 44 | replace_phoneme_length 入力accent_phrasesを変更せずにそのまま返します [Mock] 45 | 46 | Parameters 47 | ---------- 48 | accent_phrases : List[AccentPhrase] 49 | フレーズ句のリスト 50 | speaker_id : int 51 | 話者 52 | 53 | Returns 54 | ------- 55 | List[AccentPhrase] 56 | フレーズ句のリスト(変更なし) 57 | """ 58 | return accent_phrases 59 | 60 | def replace_mora_pitch( 61 | self, accent_phrases: List[AccentPhrase], speaker_id: int 62 | ) -> List[AccentPhrase]: 63 | """ 64 | replace_mora_pitch 入力accent_phrasesを変更せずにそのまま返します [Mock] 65 | 66 | Parameters 67 | ---------- 68 | accent_phrases : List[AccentPhrase] 69 | フレーズ句のリスト 70 | speaker_id : int 71 | 話者 72 | 73 | Returns 74 | ------- 75 | List[AccentPhrase] 76 | フレーズ句のリスト(変更なし) 77 | """ 78 | return accent_phrases 79 | 80 | def _synthesis_impl(self, query: AudioQuery, speaker_id: int) -> np.ndarray: 81 | """ 82 | synthesis voicevox coreを使わずに、音声合成する [Mock] 83 | 84 | Parameters 85 | ---------- 86 | query : AudioQuery 87 | /audio_query APIで得たjson 88 | speaker_id : int 89 | 話者 90 | 91 | Returns 92 | ------- 93 | wave [npt.NDArray[np.int16]] 94 | 音声波形データをNumPy配列で返します 95 | """ 96 | # recall text in katakana 97 | flatten_moras = to_flatten_moras(query.accent_phrases) 98 | kana_text = "".join([mora.text for mora in flatten_moras]) 99 | 100 | wave = self.forward(kana_text) 101 | 102 | # volume 103 | wave *= query.volumeScale 104 | 105 | return wave.astype("int16") 106 | 107 | def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray: 108 | """ 109 | forward tts via pyopenjtalk.tts() 110 | 参照→SynthesisEngine のdocstring [Mock] 111 | 112 | Parameters 113 | ---------- 114 | text : str 115 | 入力文字列(例:読み上げたい文章をカタカナにした文字列、等) 116 | 117 | Returns 118 | ------- 119 | wave [npt.NDArray[np.int16]] 120 | 音声波形データをNumPy配列で返します 121 | 122 | Note 123 | ------- 124 | ここで行う音声合成では、調声(ピッチ等)を反映しない 125 | 126 | # pyopenjtalk.tts()の出力仕様 127 | dtype=np.float64, 16 bit, mono 48000 Hz 128 | 129 | # resampleの説明 130 | 非モック実装(decode_forward)と合わせるために、出力を24kHzに変換した。 131 | """ 132 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 133 | logger.info("[Mock] input text: %s" % text) 134 | wave, sr = tts(text) 135 | wave = resample(wave, 24000 * len(wave) // 48000) 136 | return wave 137 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/EngineManifest.py: -------------------------------------------------------------------------------- 1 | # マルチエンジン環境下においては、エンジンのバージョンがエディタのバージョンより 2 | # 古くなる可能性が十分に考えられる。その場合、エディタ側がEngineManifestの情報不足によって 3 | # エラーを吐いて表示が崩壊する可能性がある。これを防止するため、EngineManifest関連の定義を 4 | # 変更する際は、Optionalにする必要があることに留意しなければならない。 5 | 6 | from typing import List, Optional 7 | 8 | from pydantic import BaseModel, Field 9 | 10 | 11 | class UpdateInfo(BaseModel): 12 | """ 13 | エンジンのアップデート情報 14 | """ 15 | 16 | version: str = Field(title="エンジンのバージョン名") 17 | descriptions: List[str] = Field(title="アップデートの詳細についての説明") 18 | contributors: Optional[List[str]] = Field(title="貢献者名") 19 | 20 | 21 | class LicenseInfo(BaseModel): 22 | """ 23 | 依存ライブラリのライセンス情報 24 | """ 25 | 26 | name: str = Field(title="依存ライブラリ名") 27 | version: Optional[str] = Field(title="依存ライブラリのバージョン") 28 | license: Optional[str] = Field(title="依存ライブラリのライセンス名") 29 | text: str = Field(title="依存ライブラリのライセンス本文") 30 | 31 | 32 | class SupportedFeatures(BaseModel): 33 | """ 34 | エンジンが持つ機能の一覧 35 | """ 36 | 37 | adjust_mora_pitch: bool = Field(title="モーラごとの音高の調整") 38 | adjust_phoneme_length: bool = Field(title="音素ごとの長さの調整") 39 | adjust_speed_scale: bool = Field(title="全体の話速の調整") 40 | adjust_pitch_scale: bool = Field(title="全体の音高の調整") 41 | adjust_intonation_scale: bool = Field(title="全体の抑揚の調整") 42 | adjust_volume_scale: bool = Field(title="全体の音量の調整") 43 | interrogative_upspeak: bool = Field(title="疑問文の自動調整") 44 | synthesis_morphing: bool = Field(title="2人の話者でモーフィングした音声を合成") 45 | manage_library: Optional[bool] = Field(title="音声ライブラリのインストール・アンインストール") 46 | 47 | 48 | class EngineManifest(BaseModel): 49 | """ 50 | エンジン自体に関する情報 51 | """ 52 | 53 | manifest_version: str = Field(title="マニフェストのバージョン") 54 | name: str = Field(title="エンジン名") 55 | brand_name: str = Field(title="ブランド名") 56 | uuid: str = Field(title="エンジンのUUID") 57 | url: str = Field(title="エンジンのURL") 58 | icon: str = Field(title="エンジンのアイコンをBASE64エンコードしたもの") 59 | default_sampling_rate: int = Field(title="デフォルトのサンプリング周波数") 60 | terms_of_service: str = Field(title="エンジンの利用規約") 61 | update_infos: List[UpdateInfo] = Field(title="エンジンのアップデート情報") 62 | dependency_licenses: List[LicenseInfo] = Field(title="依存関係のライセンス情報") 63 | supported_vvlib_manifest_version: Optional[str] = Field( 64 | title="エンジンが対応するvvlibのバージョン" 65 | ) 66 | supported_features: SupportedFeatures = Field(title="エンジンが持つ機能") 67 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/EngineManifestLoader.py: -------------------------------------------------------------------------------- 1 | import json 2 | from base64 import b64encode 3 | from pathlib import Path 4 | 5 | from .EngineManifest import EngineManifest, LicenseInfo, UpdateInfo 6 | 7 | 8 | class EngineManifestLoader: 9 | def __init__(self, manifest_path: Path, root_dir: Path): 10 | self.manifest_path = manifest_path 11 | self.root_dir = root_dir 12 | 13 | def load_manifest(self) -> EngineManifest: 14 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8")) 15 | 16 | manifest = EngineManifest( 17 | manifest_version=manifest["manifest_version"], 18 | name=manifest["name"], 19 | brand_name=manifest["brand_name"], 20 | uuid=manifest["uuid"], 21 | url=manifest["url"], 22 | default_sampling_rate=manifest["default_sampling_rate"], 23 | icon=b64encode((self.root_dir / manifest["icon"]).read_bytes()).decode( 24 | "utf-8" 25 | ), 26 | terms_of_service=(self.root_dir / manifest["terms_of_service"]).read_text( 27 | "utf-8" 28 | ), 29 | update_infos=[ 30 | UpdateInfo(**update_info) 31 | for update_info in json.loads( 32 | (self.root_dir / manifest["update_infos"]).read_text("utf-8") 33 | ) 34 | ], 35 | # supported_vvlib_manifest_versionを持たないengine_manifestのために 36 | # キーが存在しない場合はNoneを返すgetを使う 37 | supported_vvlib_manifest_version=manifest.get( 38 | "supported_vvlib_manifest_version" 39 | ), 40 | dependency_licenses=[ 41 | LicenseInfo(**license_info) 42 | for license_info in json.loads( 43 | (self.root_dir / manifest["dependency_licenses"]).read_text("utf-8") 44 | ) 45 | ], 46 | supported_features={ 47 | key: item["value"] 48 | for key, item in manifest["supported_features"].items() 49 | }, 50 | ) 51 | return manifest 52 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/__init__.py: -------------------------------------------------------------------------------- 1 | from .EngineManifest import EngineManifest 2 | from .EngineManifestLoader import EngineManifestLoader 3 | 4 | __all__ = [ 5 | "EngineManifest", 6 | "EngineManifestLoader", 7 | ] 8 | -------------------------------------------------------------------------------- /voicevox_engine/kana_parser.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode 4 | from .mora_list import openjtalk_text2mora 5 | 6 | LOOP_LIMIT = 300 7 | UNVOICE_SYMBOL = "_" 8 | ACCENT_SYMBOL = "'" 9 | NOPAUSE_DELIMITER = "/" 10 | PAUSE_DELIMITER = "、" 11 | WIDE_INTERROGATION_MARK = "?" 12 | 13 | text2mora_with_unvoice = {} 14 | for text, (consonant, vowel) in openjtalk_text2mora.items(): 15 | text2mora_with_unvoice[text] = Mora( 16 | text=text, 17 | consonant=consonant if len(consonant) > 0 else None, 18 | consonant_length=0 if len(consonant) > 0 else None, 19 | vowel=vowel, 20 | vowel_length=0, 21 | pitch=0, 22 | is_interrogative=False, 23 | ) 24 | if vowel in ["a", "i", "u", "e", "o"]: 25 | text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora( 26 | text=text, 27 | consonant=consonant if len(consonant) > 0 else None, 28 | consonant_length=0 if len(consonant) > 0 else None, 29 | vowel=vowel.upper(), 30 | vowel_length=0, 31 | pitch=0, 32 | is_interrogative=False, 33 | ) 34 | 35 | 36 | def _text_to_accent_phrase(phrase: str) -> AccentPhrase: 37 | """ 38 | longest matchにより読み仮名からAccentPhraseを生成 39 | 入力長Nに対し計算量O(N^2) 40 | """ 41 | accent_index: Optional[int] = None 42 | moras: List[Mora] = [] 43 | 44 | base_index = 0 # パース開始位置。ここから右の文字列をstackに詰めていく。 45 | stack = "" # 保留中の文字列 46 | matched_text: Optional[str] = None # 保留中の文字列内で最後にマッチした仮名 47 | 48 | outer_loop = 0 49 | while base_index < len(phrase): 50 | outer_loop += 1 51 | if phrase[base_index] == ACCENT_SYMBOL: 52 | if len(moras) == 0: 53 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase) 54 | if accent_index is not None: 55 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase) 56 | accent_index = len(moras) 57 | base_index += 1 58 | continue 59 | for watch_index in range(base_index, len(phrase)): 60 | if phrase[watch_index] == ACCENT_SYMBOL: 61 | break 62 | # 普通の文字の場合 63 | stack += phrase[watch_index] 64 | if stack in text2mora_with_unvoice: 65 | matched_text = stack 66 | # push mora 67 | if matched_text is None: 68 | raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack) 69 | else: 70 | moras.append(text2mora_with_unvoice[matched_text].copy(deep=True)) 71 | base_index += len(matched_text) 72 | stack = "" 73 | matched_text = None 74 | if outer_loop > LOOP_LIMIT: 75 | raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP) 76 | if accent_index is None: 77 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase) 78 | else: 79 | return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None) 80 | 81 | 82 | def parse_kana(text: str) -> List[AccentPhrase]: 83 | """ 84 | AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換 85 | """ 86 | 87 | parsed_results: List[AccentPhrase] = [] 88 | phrase_base = 0 89 | if len(text) == 0: 90 | raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1) 91 | 92 | for i in range(len(text) + 1): 93 | if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]: 94 | phrase = text[phrase_base:i] 95 | if len(phrase) == 0: 96 | raise ParseKanaError( 97 | ParseKanaErrorCode.EMPTY_PHRASE, 98 | position=str(len(parsed_results) + 1), 99 | ) 100 | phrase_base = i + 1 101 | 102 | is_interrogative = WIDE_INTERROGATION_MARK in phrase 103 | if is_interrogative: 104 | if WIDE_INTERROGATION_MARK in phrase[:-1]: 105 | raise ParseKanaError( 106 | ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase 107 | ) 108 | phrase = phrase.replace(WIDE_INTERROGATION_MARK, "") 109 | 110 | accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase) 111 | if i < len(text) and text[i] == PAUSE_DELIMITER: 112 | accent_phrase.pause_mora = Mora( 113 | text="、", 114 | consonant=None, 115 | consonant_length=None, 116 | vowel="pau", 117 | vowel_length=0, 118 | pitch=0, 119 | ) 120 | accent_phrase.is_interrogative = is_interrogative 121 | 122 | parsed_results.append(accent_phrase) 123 | 124 | return parsed_results 125 | 126 | 127 | def create_kana(accent_phrases: List[AccentPhrase]) -> str: 128 | text = "" 129 | for i, phrase in enumerate(accent_phrases): 130 | for j, mora in enumerate(phrase.moras): 131 | if mora.vowel in ["A", "I", "U", "E", "O"]: 132 | text += UNVOICE_SYMBOL 133 | 134 | text += mora.text 135 | if j + 1 == phrase.accent: 136 | text += ACCENT_SYMBOL 137 | 138 | if phrase.is_interrogative: 139 | text += WIDE_INTERROGATION_MARK 140 | 141 | if i < len(accent_phrases) - 1: 142 | if phrase.pause_mora is None: 143 | text += NOPAUSE_DELIMITER 144 | else: 145 | text += PAUSE_DELIMITER 146 | return text 147 | -------------------------------------------------------------------------------- /voicevox_engine/metas/Metas.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class SpeakerStyle(BaseModel): 8 | """ 9 | スピーカーのスタイル情報 10 | """ 11 | 12 | name: str = Field(title="スタイル名") 13 | id: int = Field(title="スタイルID") 14 | 15 | 16 | class SpeakerSupportPermittedSynthesisMorphing(str, Enum): 17 | ALL = "ALL" # 全て許可 18 | SELF_ONLY = "SELF_ONLY" # 同じ話者内でのみ許可 19 | NOTHING = "NOTHING" # 全て禁止 20 | 21 | @classmethod 22 | def _missing_(cls, value: object) -> "SpeakerSupportPermittedSynthesisMorphing": 23 | return SpeakerSupportPermittedSynthesisMorphing.ALL 24 | 25 | 26 | class SpeakerSupportedFeatures(BaseModel): 27 | """ 28 | 話者の対応機能の情報 29 | """ 30 | 31 | permitted_synthesis_morphing: SpeakerSupportPermittedSynthesisMorphing = Field( 32 | title="モーフィング機能への対応", default=SpeakerSupportPermittedSynthesisMorphing(None) 33 | ) 34 | 35 | 36 | class CoreSpeaker(BaseModel): 37 | """ 38 | コアに含まれるスピーカー情報 39 | """ 40 | 41 | name: str = Field(title="名前") 42 | speaker_uuid: str = Field(title="スピーカーのUUID") 43 | styles: List[SpeakerStyle] = Field(title="スピーカースタイルの一覧") 44 | version: str = Field("スピーカーのバージョン") 45 | 46 | 47 | class EngineSpeaker(BaseModel): 48 | """ 49 | エンジンに含まれるスピーカー情報 50 | """ 51 | 52 | supported_features: SpeakerSupportedFeatures = Field( 53 | title="スピーカーの対応機能", default_factory=SpeakerSupportedFeatures 54 | ) 55 | 56 | 57 | class Speaker(CoreSpeaker, EngineSpeaker): 58 | """ 59 | スピーカー情報 60 | """ 61 | 62 | pass 63 | 64 | 65 | class StyleInfo(BaseModel): 66 | """ 67 | スタイルの追加情報 68 | """ 69 | 70 | id: int = Field(title="スタイルID") 71 | icon: str = Field(title="当該スタイルのアイコンをbase64エンコードしたもの") 72 | portrait: Optional[str] = Field(title="当該スタイルのportrait.pngをbase64エンコードしたもの") 73 | voice_samples: List[str] = Field(title="voice_sampleのwavファイルをbase64エンコードしたもの") 74 | 75 | 76 | class SpeakerInfo(BaseModel): 77 | """ 78 | 話者の追加情報 79 | """ 80 | 81 | policy: str = Field(title="policy.md") 82 | portrait: str = Field(title="portrait.pngをbase64エンコードしたもの") 83 | style_infos: List[StyleInfo] = Field(title="スタイルの追加情報") 84 | -------------------------------------------------------------------------------- /voicevox_engine/metas/MetasStore.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import TYPE_CHECKING, Dict, List, Tuple 4 | 5 | from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo 6 | 7 | if TYPE_CHECKING: 8 | from voicevox_engine.synthesis_engine.synthesis_engine_base import ( 9 | SynthesisEngineBase, 10 | ) 11 | 12 | 13 | class MetasStore: 14 | """ 15 | 話者やスタイルのメタ情報を管理する 16 | """ 17 | 18 | def __init__(self, engine_speakers_path: Path) -> None: 19 | self._engine_speakers_path = engine_speakers_path 20 | self._loaded_metas: Dict[str, EngineSpeaker] = { 21 | folder.name: EngineSpeaker( 22 | **json.loads( 23 | (folder / "metas.json").read_text(encoding="utf-8") 24 | if (folder / "metas.json").exists() 25 | else "{}" 26 | ) 27 | ) 28 | for folder in engine_speakers_path.iterdir() 29 | if folder.is_dir() 30 | } 31 | 32 | def speaker_engine_metas(self, speaker_uuid: str) -> EngineSpeaker: 33 | return self.loaded_metas[speaker_uuid] 34 | 35 | def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]: 36 | """ 37 | 与えられたmetaにエンジンのコア情報を付加して返す 38 | core_metas: コアのmetas()が返すJSONのModel 39 | """ 40 | 41 | return [ 42 | Speaker( 43 | **self.speaker_engine_metas(speaker_meta.speaker_uuid).dict(), 44 | **speaker_meta.dict(), 45 | ) 46 | for speaker_meta in core_metas 47 | ] 48 | 49 | # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで 50 | # SynthesisEngineBaseによる循環importを修正する 51 | def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]: 52 | """ 53 | 与えられたエンジンから、コア・エンジン両方の情報を含んだMetasを返す 54 | """ 55 | 56 | core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)] 57 | return self.combine_metas(core_metas) 58 | 59 | @property 60 | def engine_speakers_path(self) -> Path: 61 | return self._engine_speakers_path 62 | 63 | @property 64 | def loaded_metas(self) -> Dict[str, EngineSpeaker]: 65 | return self._loaded_metas 66 | 67 | 68 | def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]: 69 | """ 70 | `{style.id: StyleInfo}`の変換テーブル 71 | """ 72 | 73 | lookup_table = dict() 74 | for speaker in speakers: 75 | for style in speaker.styles: 76 | lookup_table[style.id] = (speaker, style) 77 | return lookup_table 78 | -------------------------------------------------------------------------------- /voicevox_engine/metas/__init__.py: -------------------------------------------------------------------------------- 1 | from . import Metas, MetasStore 2 | 3 | __all__ = [ 4 | "Metas", 5 | "MetasStore", 6 | ] 7 | -------------------------------------------------------------------------------- /voicevox_engine/mora_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | 以下のモーラ対応表はOpenJTalkのソースコードから取得し、 3 | カタカナ表記とモーラが一対一対応するように改造した。 4 | ライセンス表記: 5 | ----------------------------------------------------------------- 6 | The Japanese TTS System "Open JTalk" 7 | developed by HTS Working Group 8 | http://open-jtalk.sourceforge.net/ 9 | ----------------------------------------------------------------- 10 | 11 | Copyright (c) 2008-2014 Nagoya Institute of Technology 12 | Department of Computer Science 13 | 14 | All rights reserved. 15 | 16 | Redistribution and use in source and binary forms, with or 17 | without modification, are permitted provided that the following 18 | conditions are met: 19 | 20 | - Redistributions of source code must retain the above copyright 21 | notice, this list of conditions and the following disclaimer. 22 | - Redistributions in binary form must reproduce the above 23 | copyright notice, this list of conditions and the following 24 | disclaimer in the documentation and/or other materials provided 25 | with the distribution. 26 | - Neither the name of the HTS working group nor the names of its 27 | contributors may be used to endorse or promote products derived 28 | from this software without specific prior written permission. 29 | 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 33 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 34 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 35 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 36 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 37 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 39 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 40 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 | POSSIBILITY OF SUCH DAMAGE. 43 | """ 44 | _mora_list_minimum = [ 45 | ["ヴォ", "v", "o"], 46 | ["ヴェ", "v", "e"], 47 | ["ヴィ", "v", "i"], 48 | ["ヴァ", "v", "a"], 49 | ["ヴ", "v", "u"], 50 | ["ン", "", "N"], 51 | ["ワ", "w", "a"], 52 | ["ロ", "r", "o"], 53 | ["レ", "r", "e"], 54 | ["ル", "r", "u"], 55 | ["リョ", "ry", "o"], 56 | ["リュ", "ry", "u"], 57 | ["リャ", "ry", "a"], 58 | ["リェ", "ry", "e"], 59 | ["リ", "r", "i"], 60 | ["ラ", "r", "a"], 61 | ["ヨ", "y", "o"], 62 | ["ユ", "y", "u"], 63 | ["ヤ", "y", "a"], 64 | ["モ", "m", "o"], 65 | ["メ", "m", "e"], 66 | ["ム", "m", "u"], 67 | ["ミョ", "my", "o"], 68 | ["ミュ", "my", "u"], 69 | ["ミャ", "my", "a"], 70 | ["ミェ", "my", "e"], 71 | ["ミ", "m", "i"], 72 | ["マ", "m", "a"], 73 | ["ポ", "p", "o"], 74 | ["ボ", "b", "o"], 75 | ["ホ", "h", "o"], 76 | ["ペ", "p", "e"], 77 | ["ベ", "b", "e"], 78 | ["ヘ", "h", "e"], 79 | ["プ", "p", "u"], 80 | ["ブ", "b", "u"], 81 | ["フォ", "f", "o"], 82 | ["フェ", "f", "e"], 83 | ["フィ", "f", "i"], 84 | ["ファ", "f", "a"], 85 | ["フ", "f", "u"], 86 | ["ピョ", "py", "o"], 87 | ["ピュ", "py", "u"], 88 | ["ピャ", "py", "a"], 89 | ["ピェ", "py", "e"], 90 | ["ピ", "p", "i"], 91 | ["ビョ", "by", "o"], 92 | ["ビュ", "by", "u"], 93 | ["ビャ", "by", "a"], 94 | ["ビェ", "by", "e"], 95 | ["ビ", "b", "i"], 96 | ["ヒョ", "hy", "o"], 97 | ["ヒュ", "hy", "u"], 98 | ["ヒャ", "hy", "a"], 99 | ["ヒェ", "hy", "e"], 100 | ["ヒ", "h", "i"], 101 | ["パ", "p", "a"], 102 | ["バ", "b", "a"], 103 | ["ハ", "h", "a"], 104 | ["ノ", "n", "o"], 105 | ["ネ", "n", "e"], 106 | ["ヌ", "n", "u"], 107 | ["ニョ", "ny", "o"], 108 | ["ニュ", "ny", "u"], 109 | ["ニャ", "ny", "a"], 110 | ["ニェ", "ny", "e"], 111 | ["ニ", "n", "i"], 112 | ["ナ", "n", "a"], 113 | ["ドゥ", "d", "u"], 114 | ["ド", "d", "o"], 115 | ["トゥ", "t", "u"], 116 | ["ト", "t", "o"], 117 | ["デョ", "dy", "o"], 118 | ["デュ", "dy", "u"], 119 | ["デャ", "dy", "a"], 120 | ["デェ", "dy", "e"], 121 | ["ディ", "d", "i"], 122 | ["デ", "d", "e"], 123 | ["テョ", "ty", "o"], 124 | ["テュ", "ty", "u"], 125 | ["テャ", "ty", "a"], 126 | ["ティ", "t", "i"], 127 | ["テ", "t", "e"], 128 | ["ツォ", "ts", "o"], 129 | ["ツェ", "ts", "e"], 130 | ["ツィ", "ts", "i"], 131 | ["ツァ", "ts", "a"], 132 | ["ツ", "ts", "u"], 133 | ["ッ", "", "cl"], 134 | ["チョ", "ch", "o"], 135 | ["チュ", "ch", "u"], 136 | ["チャ", "ch", "a"], 137 | ["チェ", "ch", "e"], 138 | ["チ", "ch", "i"], 139 | ["ダ", "d", "a"], 140 | ["タ", "t", "a"], 141 | ["ゾ", "z", "o"], 142 | ["ソ", "s", "o"], 143 | ["ゼ", "z", "e"], 144 | ["セ", "s", "e"], 145 | ["ズィ", "z", "i"], 146 | ["ズ", "z", "u"], 147 | ["スィ", "s", "i"], 148 | ["ス", "s", "u"], 149 | ["ジョ", "j", "o"], 150 | ["ジュ", "j", "u"], 151 | ["ジャ", "j", "a"], 152 | ["ジェ", "j", "e"], 153 | ["ジ", "j", "i"], 154 | ["ショ", "sh", "o"], 155 | ["シュ", "sh", "u"], 156 | ["シャ", "sh", "a"], 157 | ["シェ", "sh", "e"], 158 | ["シ", "sh", "i"], 159 | ["ザ", "z", "a"], 160 | ["サ", "s", "a"], 161 | ["ゴ", "g", "o"], 162 | ["コ", "k", "o"], 163 | ["ゲ", "g", "e"], 164 | ["ケ", "k", "e"], 165 | ["グヮ", "gw", "a"], 166 | ["グ", "g", "u"], 167 | ["クヮ", "kw", "a"], 168 | ["ク", "k", "u"], 169 | ["ギョ", "gy", "o"], 170 | ["ギュ", "gy", "u"], 171 | ["ギャ", "gy", "a"], 172 | ["ギェ", "gy", "e"], 173 | ["ギ", "g", "i"], 174 | ["キョ", "ky", "o"], 175 | ["キュ", "ky", "u"], 176 | ["キャ", "ky", "a"], 177 | ["キェ", "ky", "e"], 178 | ["キ", "k", "i"], 179 | ["ガ", "g", "a"], 180 | ["カ", "k", "a"], 181 | ["オ", "", "o"], 182 | ["エ", "", "e"], 183 | ["ウォ", "w", "o"], 184 | ["ウェ", "w", "e"], 185 | ["ウィ", "w", "i"], 186 | ["ウ", "", "u"], 187 | ["イェ", "y", "e"], 188 | ["イ", "", "i"], 189 | ["ア", "", "a"], 190 | ] 191 | _mora_list_additional = [ 192 | ["ヴョ", "by", "o"], 193 | ["ヴュ", "by", "u"], 194 | ["ヴャ", "by", "a"], 195 | ["ヲ", "", "o"], 196 | ["ヱ", "", "e"], 197 | ["ヰ", "", "i"], 198 | ["ヮ", "w", "a"], 199 | ["ョ", "y", "o"], 200 | ["ュ", "y", "u"], 201 | ["ヅ", "z", "u"], 202 | ["ヂ", "j", "i"], 203 | ["ヶ", "k", "e"], 204 | ["ャ", "y", "a"], 205 | ["ォ", "", "o"], 206 | ["ェ", "", "e"], 207 | ["ゥ", "", "u"], 208 | ["ィ", "", "i"], 209 | ["ァ", "", "a"], 210 | ] 211 | 212 | openjtalk_mora2text = { 213 | consonant + vowel: text for [text, consonant, vowel] in _mora_list_minimum 214 | } 215 | openjtalk_text2mora = { 216 | text: (consonant, vowel) 217 | for [text, consonant, vowel] in _mora_list_minimum + _mora_list_additional 218 | } 219 | -------------------------------------------------------------------------------- /voicevox_engine/morphing.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from dataclasses import dataclass 3 | from itertools import chain 4 | from typing import Dict, List, Tuple 5 | 6 | import numpy as np 7 | import pyworld as pw 8 | from scipy.signal import resample 9 | 10 | from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo 11 | from .metas.MetasStore import construct_lookup 12 | from .model import AudioQuery, MorphableTargetInfo, SpeakerNotFoundError 13 | from .synthesis_engine import SynthesisEngine 14 | 15 | 16 | # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398 # noqa 17 | @dataclass(frozen=True) 18 | class MorphingParameter: 19 | fs: int 20 | frame_period: float 21 | base_f0: np.ndarray 22 | base_aperiodicity: np.ndarray 23 | base_spectrogram: np.ndarray 24 | target_spectrogram: np.ndarray 25 | 26 | 27 | def create_morphing_parameter( 28 | base_wave: np.ndarray, 29 | target_wave: np.ndarray, 30 | fs: int, 31 | ) -> MorphingParameter: 32 | frame_period = 1.0 33 | base_f0, base_time_axis = pw.harvest(base_wave, fs, frame_period=frame_period) 34 | base_spectrogram = pw.cheaptrick(base_wave, base_f0, base_time_axis, fs) 35 | base_aperiodicity = pw.d4c(base_wave, base_f0, base_time_axis, fs) 36 | 37 | target_f0, morph_time_axis = pw.harvest(target_wave, fs, frame_period=frame_period) 38 | target_spectrogram = pw.cheaptrick(target_wave, target_f0, morph_time_axis, fs) 39 | target_spectrogram.resize(base_spectrogram.shape) 40 | 41 | return MorphingParameter( 42 | fs=fs, 43 | frame_period=frame_period, 44 | base_f0=base_f0, 45 | base_aperiodicity=base_aperiodicity, 46 | base_spectrogram=base_spectrogram, 47 | target_spectrogram=target_spectrogram, 48 | ) 49 | 50 | 51 | def get_morphable_targets( 52 | speakers: List[Speaker], 53 | base_speakers: List[int], 54 | ) -> List[Dict[int, MorphableTargetInfo]]: 55 | """ 56 | speakers: 全話者の情報 57 | base_speakers: モーフィング可能か判定したいベースの話者リスト(スタイルID) 58 | """ 59 | speaker_lookup = construct_lookup(speakers) 60 | 61 | morphable_targets_arr = [] 62 | for base_speaker in base_speakers: 63 | morphable_targets = dict() 64 | for style in chain.from_iterable(speaker.styles for speaker in speakers): 65 | morphable_targets[style.id] = MorphableTargetInfo( 66 | is_morphable=is_synthesis_morphing_permitted( 67 | speaker_lookup=speaker_lookup, 68 | base_speaker=base_speaker, 69 | target_speaker=style.id, 70 | ) 71 | ) 72 | morphable_targets_arr.append(morphable_targets) 73 | 74 | return morphable_targets_arr 75 | 76 | 77 | def is_synthesis_morphing_permitted( 78 | speaker_lookup: Dict[int, Tuple[Speaker, StyleInfo]], 79 | base_speaker: int, 80 | target_speaker: int, 81 | ) -> bool: 82 | """ 83 | 指定されたspeakerがモーフィング可能かどうか返す 84 | speakerが見つからない場合はSpeakerNotFoundErrorを送出する 85 | """ 86 | 87 | base_speaker_data = speaker_lookup[base_speaker] 88 | target_speaker_data = speaker_lookup[target_speaker] 89 | 90 | if base_speaker_data is None or target_speaker_data is None: 91 | raise SpeakerNotFoundError( 92 | base_speaker if base_speaker_data is None else target_speaker 93 | ) 94 | 95 | base_speaker_info, _ = base_speaker_data 96 | target_speaker_info, _ = target_speaker_data 97 | 98 | base_speaker_uuid = base_speaker_info.speaker_uuid 99 | target_speaker_uuid = target_speaker_info.speaker_uuid 100 | 101 | base_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = ( 102 | base_speaker_info.supported_features.permitted_synthesis_morphing 103 | ) 104 | 105 | target_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = ( 106 | target_speaker_info.supported_features.permitted_synthesis_morphing 107 | ) 108 | 109 | # 禁止されている場合はFalse 110 | if ( 111 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.NOTHING 112 | or target_speaker_morphing_info 113 | == SpeakerSupportPermittedSynthesisMorphing.NOTHING 114 | ): 115 | return False 116 | # 同一話者のみの場合は同一話者判定 117 | if ( 118 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY 119 | or target_speaker_morphing_info 120 | == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY 121 | ): 122 | return base_speaker_uuid == target_speaker_uuid 123 | # 念のため許可されているかチェック 124 | return ( 125 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL 126 | and target_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL 127 | ) 128 | 129 | 130 | def synthesis_morphing_parameter( 131 | engine: SynthesisEngine, 132 | query: AudioQuery, 133 | base_speaker: int, 134 | target_speaker: int, 135 | ) -> MorphingParameter: 136 | query = deepcopy(query) 137 | 138 | # 不具合回避のためデフォルトのサンプリングレートでWORLDに掛けた後に指定のサンプリングレートに変換する 139 | query.outputSamplingRate = engine.default_sampling_rate 140 | 141 | # WORLDに掛けるため合成はモノラルで行う 142 | query.outputStereo = False 143 | 144 | base_wave = engine.synthesis(query=query, speaker_id=base_speaker).astype("float") 145 | target_wave = engine.synthesis(query=query, speaker_id=target_speaker).astype( 146 | "float" 147 | ) 148 | 149 | return create_morphing_parameter( 150 | base_wave=base_wave, 151 | target_wave=target_wave, 152 | fs=query.outputSamplingRate, 153 | ) 154 | 155 | 156 | def synthesis_morphing( 157 | morph_param: MorphingParameter, 158 | morph_rate: float, 159 | output_fs: int, 160 | output_stereo: bool = False, 161 | ) -> np.ndarray: 162 | """ 163 | 指定した割合で、パラメータをもとにモーフィングした音声を生成します。 164 | 165 | Parameters 166 | ---------- 167 | morph_param : MorphingParameter 168 | `synthesis_morphing_parameter`または`create_morphing_parameter`で作成したパラメータ 169 | 170 | morph_rate : float 171 | モーフィングの割合 172 | 0.0でベースの話者、1.0でターゲットの話者に近づきます。 173 | 174 | Returns 175 | ------- 176 | generated : np.ndarray 177 | モーフィングした音声 178 | 179 | Raises 180 | ------- 181 | ValueError 182 | morph_rate ∈ [0, 1] 183 | """ 184 | 185 | if morph_rate < 0.0 or morph_rate > 1.0: 186 | raise ValueError("morph_rateは0.0から1.0の範囲で指定してください") 187 | 188 | morph_spectrogram = ( 189 | morph_param.base_spectrogram * (1.0 - morph_rate) 190 | + morph_param.target_spectrogram * morph_rate 191 | ) 192 | 193 | y_h = pw.synthesize( 194 | morph_param.base_f0, 195 | morph_spectrogram, 196 | morph_param.base_aperiodicity, 197 | morph_param.fs, 198 | morph_param.frame_period, 199 | ) 200 | 201 | # TODO: synthesis_engine.py でのリサンプル処理と共通化する 202 | if output_fs != morph_param.fs: 203 | y_h = resample(y_h, output_fs * len(y_h) // morph_param.fs) 204 | 205 | if output_stereo: 206 | y_h = np.array([y_h, y_h]).T 207 | 208 | return y_h 209 | -------------------------------------------------------------------------------- /voicevox_engine/part_of_speech_data.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from .model import ( 4 | USER_DICT_MAX_PRIORITY, 5 | USER_DICT_MIN_PRIORITY, 6 | PartOfSpeechDetail, 7 | WordTypes, 8 | ) 9 | 10 | MIN_PRIORITY = USER_DICT_MIN_PRIORITY 11 | MAX_PRIORITY = USER_DICT_MAX_PRIORITY 12 | 13 | part_of_speech_data: Dict[WordTypes, PartOfSpeechDetail] = { 14 | WordTypes.PROPER_NOUN: PartOfSpeechDetail( 15 | part_of_speech="名詞", 16 | part_of_speech_detail_1="固有名詞", 17 | part_of_speech_detail_2="一般", 18 | part_of_speech_detail_3="*", 19 | context_id=1348, 20 | cost_candidates=[ 21 | -988, 22 | 3488, 23 | 4768, 24 | 6048, 25 | 7328, 26 | 8609, 27 | 8734, 28 | 8859, 29 | 8984, 30 | 9110, 31 | 14176, 32 | ], 33 | accent_associative_rules=[ 34 | "*", 35 | "C1", 36 | "C2", 37 | "C3", 38 | "C4", 39 | "C5", 40 | ], 41 | ), 42 | WordTypes.COMMON_NOUN: PartOfSpeechDetail( 43 | part_of_speech="名詞", 44 | part_of_speech_detail_1="一般", 45 | part_of_speech_detail_2="*", 46 | part_of_speech_detail_3="*", 47 | context_id=1345, 48 | cost_candidates=[ 49 | -4445, 50 | 49, 51 | 1473, 52 | 2897, 53 | 4321, 54 | 5746, 55 | 6554, 56 | 7362, 57 | 8170, 58 | 8979, 59 | 15001, 60 | ], 61 | accent_associative_rules=[ 62 | "*", 63 | "C1", 64 | "C2", 65 | "C3", 66 | "C4", 67 | "C5", 68 | ], 69 | ), 70 | WordTypes.VERB: PartOfSpeechDetail( 71 | part_of_speech="動詞", 72 | part_of_speech_detail_1="自立", 73 | part_of_speech_detail_2="*", 74 | part_of_speech_detail_3="*", 75 | context_id=642, 76 | cost_candidates=[ 77 | 3100, 78 | 6160, 79 | 6360, 80 | 6561, 81 | 6761, 82 | 6962, 83 | 7414, 84 | 7866, 85 | 8318, 86 | 8771, 87 | 13433, 88 | ], 89 | accent_associative_rules=[ 90 | "*", 91 | ], 92 | ), 93 | WordTypes.ADJECTIVE: PartOfSpeechDetail( 94 | part_of_speech="形容詞", 95 | part_of_speech_detail_1="自立", 96 | part_of_speech_detail_2="*", 97 | part_of_speech_detail_3="*", 98 | context_id=20, 99 | cost_candidates=[ 100 | 1527, 101 | 3266, 102 | 3561, 103 | 3857, 104 | 4153, 105 | 4449, 106 | 5149, 107 | 5849, 108 | 6549, 109 | 7250, 110 | 10001, 111 | ], 112 | accent_associative_rules=[ 113 | "*", 114 | ], 115 | ), 116 | WordTypes.SUFFIX: PartOfSpeechDetail( 117 | part_of_speech="名詞", 118 | part_of_speech_detail_1="接尾", 119 | part_of_speech_detail_2="一般", 120 | part_of_speech_detail_3="*", 121 | context_id=1358, 122 | cost_candidates=[ 123 | 4399, 124 | 5373, 125 | 6041, 126 | 6710, 127 | 7378, 128 | 8047, 129 | 9440, 130 | 10834, 131 | 12228, 132 | 13622, 133 | 15847, 134 | ], 135 | accent_associative_rules=[ 136 | "*", 137 | "C1", 138 | "C2", 139 | "C3", 140 | "C4", 141 | "C5", 142 | ], 143 | ), 144 | } 145 | -------------------------------------------------------------------------------- /voicevox_engine/preset/Preset.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class Preset(BaseModel): 5 | """ 6 | プリセット情報 7 | """ 8 | 9 | id: int = Field(title="プリセットID") 10 | name: str = Field(title="プリセット名") 11 | speaker_uuid: str = Field(title="スピーカーのUUID") 12 | style_id: int = Field(title="スタイルID") 13 | speedScale: float = Field(title="全体の話速") 14 | pitchScale: float = Field(title="全体の音高") 15 | intonationScale: float = Field(title="全体の抑揚") 16 | volumeScale: float = Field(title="全体の音量") 17 | prePhonemeLength: float = Field(title="音声の前の無音時間") 18 | postPhonemeLength: float = Field(title="音声の後の無音時間") 19 | -------------------------------------------------------------------------------- /voicevox_engine/preset/PresetError.py: -------------------------------------------------------------------------------- 1 | class PresetError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /voicevox_engine/preset/PresetManager.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | import yaml 5 | from pydantic import ValidationError, parse_obj_as 6 | 7 | from .Preset import Preset 8 | from .PresetError import PresetError 9 | 10 | 11 | class PresetManager: 12 | def __init__( 13 | self, 14 | preset_path: Path, 15 | ): 16 | self.presets = [] 17 | self.last_modified_time = 0 18 | self.preset_path = preset_path 19 | 20 | def load_presets(self): 21 | """ 22 | プリセットのYAMLファイルを読み込む 23 | 24 | Returns 25 | ------- 26 | ret: List[Preset] 27 | プリセットのリスト 28 | """ 29 | 30 | # 設定ファイルのタイムスタンプを確認 31 | try: 32 | _last_modified_time = self.preset_path.stat().st_mtime 33 | if _last_modified_time == self.last_modified_time: 34 | return self.presets 35 | except OSError: 36 | raise PresetError("プリセットの設定ファイルが見つかりません") 37 | 38 | with open(self.preset_path, mode="r", encoding="utf-8") as f: 39 | obj = yaml.safe_load(f) 40 | if obj is None: 41 | raise PresetError("プリセットの設定ファイルが空の内容です") 42 | 43 | try: 44 | _presets = parse_obj_as(List[Preset], obj) 45 | except ValidationError: 46 | raise PresetError("プリセットの設定ファイルにミスがあります") 47 | 48 | # idが一意か確認 49 | if len([preset.id for preset in _presets]) != len( 50 | {preset.id for preset in _presets} 51 | ): 52 | raise PresetError("プリセットのidに重複があります") 53 | 54 | self.presets = _presets 55 | self.last_modified_time = _last_modified_time 56 | return self.presets 57 | 58 | def add_preset(self, preset: Preset): 59 | """ 60 | YAMLファイルに新規のプリセットを追加する 61 | 62 | Parameters 63 | ---------- 64 | preset : Preset 65 | 追加するプリセットを渡す 66 | 67 | Returns 68 | ------- 69 | ret: int 70 | 追加したプリセットのプリセットID 71 | """ 72 | 73 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 74 | self.load_presets() 75 | 76 | # IDが0未満、または存在するIDなら新しいIDを決定し、配列に追加 77 | if preset.id < 0 or preset.id in {preset.id for preset in self.presets}: 78 | preset.id = max([preset.id for preset in self.presets]) + 1 79 | self.presets.append(preset) 80 | 81 | # ファイルに書き込み 82 | try: 83 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 84 | yaml.safe_dump( 85 | [preset.dict() for preset in self.presets], 86 | f, 87 | allow_unicode=True, 88 | sort_keys=False, 89 | ) 90 | except Exception as err: 91 | self.presets.pop() 92 | if isinstance(err, FileNotFoundError): 93 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 94 | else: 95 | raise err 96 | 97 | return preset.id 98 | 99 | def update_preset(self, preset: Preset): 100 | """ 101 | YAMLファイルのプリセットを更新する 102 | 103 | Parameters 104 | ---------- 105 | preset : Preset 106 | 更新するプリセットを渡す 107 | 108 | Returns 109 | ------- 110 | ret: int 111 | 更新したプリセットのプリセットID 112 | """ 113 | 114 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 115 | self.load_presets() 116 | 117 | # IDが存在するか探索 118 | prev_preset = (-1, None) 119 | for i in range(len(self.presets)): 120 | if self.presets[i].id == preset.id: 121 | prev_preset = (i, self.presets[i]) 122 | self.presets[i] = preset 123 | break 124 | else: 125 | raise PresetError("更新先のプリセットが存在しません") 126 | 127 | # ファイルに書き込み 128 | try: 129 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 130 | yaml.safe_dump( 131 | [preset.dict() for preset in self.presets], 132 | f, 133 | allow_unicode=True, 134 | sort_keys=False, 135 | ) 136 | except Exception as err: 137 | if prev_preset != (-1, None): 138 | self.presets[prev_preset[0]] = prev_preset[1] 139 | if isinstance(err, FileNotFoundError): 140 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 141 | else: 142 | raise err 143 | 144 | return preset.id 145 | 146 | def delete_preset(self, id: int): 147 | """ 148 | YAMLファイルのプリセットを削除する 149 | 150 | Parameters 151 | ---------- 152 | id: int 153 | 削除するプリセットのプリセットIDを渡す 154 | 155 | Returns 156 | ------- 157 | ret: int 158 | 削除したプリセットのプリセットID 159 | """ 160 | 161 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 162 | self.load_presets() 163 | 164 | # IDが存在するか探索 165 | buf = None 166 | buf_index = -1 167 | for i in range(len(self.presets)): 168 | if self.presets[i].id == id: 169 | buf = self.presets.pop(i) 170 | buf_index = i 171 | break 172 | else: 173 | raise PresetError("削除対象のプリセットが存在しません") 174 | 175 | # ファイルに書き込み 176 | try: 177 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 178 | yaml.safe_dump( 179 | [preset.dict() for preset in self.presets], 180 | f, 181 | allow_unicode=True, 182 | sort_keys=False, 183 | ) 184 | except FileNotFoundError: 185 | self.presets.insert(buf_index, buf) 186 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 187 | 188 | return id 189 | -------------------------------------------------------------------------------- /voicevox_engine/preset/__init__.py: -------------------------------------------------------------------------------- 1 | from .Preset import Preset 2 | from .PresetError import PresetError 3 | from .PresetManager import PresetManager 4 | 5 | __all__ = [ 6 | "Preset", 7 | "PresetManager", 8 | "PresetError", 9 | ] 10 | -------------------------------------------------------------------------------- /voicevox_engine/setting/Setting.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class CorsPolicyMode(str, Enum): 8 | """ 9 | CORSの許可モード 10 | """ 11 | 12 | all = "all" # 全てのオリジンからのリクエストを許可 13 | localapps = "localapps" # ローカルアプリケーションからのリクエストを許可 14 | 15 | 16 | class Setting(BaseModel): 17 | """ 18 | エンジンの設定情報 19 | """ 20 | 21 | cors_policy_mode: CorsPolicyMode = Field(title="リソース共有ポリシー") 22 | allow_origin: Optional[str] = Field(title="許可するオリジン") 23 | 24 | class Config: 25 | use_enum_values = True 26 | -------------------------------------------------------------------------------- /voicevox_engine/setting/SettingLoader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | from ..utility import engine_root, get_save_dir 6 | from .Setting import Setting 7 | 8 | DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yml" 9 | USER_SETTING_PATH: Path = get_save_dir() / "setting.yml" 10 | 11 | 12 | class SettingLoader: 13 | def __init__(self, setting_file_path: Path) -> None: 14 | self.setting_file_path = setting_file_path 15 | 16 | def load_setting_file(self) -> Setting: 17 | if not self.setting_file_path.is_file(): 18 | setting = yaml.safe_load(DEFAULT_SETTING_PATH.read_text(encoding="utf-8")) 19 | else: 20 | setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8")) 21 | 22 | setting = Setting( 23 | cors_policy_mode=setting["cors_policy_mode"], 24 | allow_origin=setting["allow_origin"], 25 | ) 26 | 27 | return setting 28 | 29 | def dump_setting_file(self, settings: Setting) -> None: 30 | settings_dict = settings.dict() 31 | 32 | with open(self.setting_file_path, mode="w", encoding="utf-8") as f: 33 | yaml.safe_dump(settings_dict, f) 34 | -------------------------------------------------------------------------------- /voicevox_engine/setting/__init__.py: -------------------------------------------------------------------------------- 1 | from .Setting import CorsPolicyMode, Setting 2 | from .SettingLoader import USER_SETTING_PATH, SettingLoader 3 | 4 | __all__ = [ 5 | "USER_SETTING_PATH", 6 | "CorsPolicyMode", 7 | "Setting", 8 | "SettingLoader", 9 | ] 10 | -------------------------------------------------------------------------------- /voicevox_engine/sv_model.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import os 4 | import shutil 5 | from pathlib import Path 6 | from typing import List 7 | 8 | from voicevox_engine.model import SVModelInfo 9 | 10 | # テストでstored_dirを切り替えたいのでmodel_dirは利用しない 11 | from voicevox_engine.utility import get_save_dir 12 | 13 | save_dir = get_save_dir() 14 | 15 | 16 | def get_all_sv_models(stored_dir: Path = save_dir) -> List[str]: 17 | """ 18 | 保存されているsv_modelsの情報をListで返却する。 19 | 20 | - libraries.jsonを読み込み、key(str)のみをListに詰めて返却する 21 | - bool値は使わないので返却しない 22 | - permission deniedされたら 23 | """ 24 | 25 | libraries = None 26 | try: 27 | with open(stored_dir / "model" / "libraries.json", "r") as f: 28 | libraries = json.load(f) 29 | except Exception as e: 30 | raise e 31 | return list(libraries.keys()) 32 | 33 | 34 | def register_sv_model( 35 | sv_model: SVModelInfo, 36 | stored_dir: Path = save_dir, 37 | ): 38 | """ 39 | 送られた単一のSVModelを保存する。返り値はない。 40 | """ 41 | 42 | try: 43 | # 既存のsv_modelsとUUIDの重複があった場合も全て新しく作り直す 44 | # 以下のディレクトリを作成する 45 | # - /model/${uuid} 46 | # - /speaker_info/${uuid} 47 | # - /speaker_info/${uuid}/icons/ 48 | # - /speaker_info/${uuid}/voice_samples/ 49 | # この後の処理で何らかのExceptionが起きた場合、上記のディレクトリを全て削除する 50 | 51 | # 意味的にはmodelsの方が正しそうだけど、実際に保存されたディレクトリ名はmodelだったので 52 | model_uuid_dir = stored_dir / "model" / sv_model.uuid 53 | already_exists = os.path.exists(model_uuid_dir) 54 | if already_exists: 55 | os.rename(model_uuid_dir, f"{model_uuid_dir}.old") 56 | os.makedirs(model_uuid_dir) 57 | 58 | # variance_model, embedder_model, decoder_modelは 59 | # それぞれbase64デコードしてから/model/${uuid}/*.onnxに保存する 60 | with open(model_uuid_dir / "variance_model.onnx", "wb") as f: 61 | f.write(base64.b64decode(sv_model.variance_model.encode("utf-8"))) 62 | with open(model_uuid_dir / "embedder_model.onnx", "wb") as f: 63 | f.write(base64.b64decode(sv_model.embedder_model.encode("utf-8"))) 64 | with open(model_uuid_dir / "decoder_model.onnx", "wb") as f: 65 | f.write(base64.b64decode(sv_model.decoder_model.encode("utf-8"))) 66 | 67 | # metasは/model/${uuid}/metas.jsonに保存する 68 | with open(model_uuid_dir / "metas.json", "w", encoding="utf-8") as f: 69 | json.dump([meta.dict() for meta in sv_model.metas], f, ensure_ascii=False) 70 | 71 | # model_config.jsonは/model/${uuid}/model_config.jsonに保存する 72 | with open(model_uuid_dir / "model_config.json", "w", encoding="utf-8") as f: 73 | json.dump(sv_model.model_config.dict(), f, ensure_ascii=False) 74 | 75 | # 異常なUUIDを含んでいないか確認する 76 | assert len(sv_model.metas) == len(sv_model.speaker_infos) 77 | for meta in sv_model.metas: 78 | assert meta.speaker_uuid in sv_model.speaker_infos.keys() 79 | 80 | # speaker_infos 81 | for speaker_uuid, speaker_info in sv_model.speaker_infos.items(): 82 | speaker_info_dir = stored_dir / "speaker_info" / speaker_uuid 83 | 84 | # 既にモデルが存在していた場合はrenameしておく 85 | if already_exists and os.path.exists(speaker_info_dir): 86 | os.rename(speaker_info_dir, f"{speaker_info_dir}.old") 87 | 88 | os.makedirs(speaker_info_dir / "icons") 89 | os.makedirs(speaker_info_dir / "voice_samples") 90 | 91 | # - policy => /speaker_info/${speaker_uuid}/policy.md 92 | with open(speaker_info_dir / "policy.md", "w", encoding="utf-8") as f: 93 | f.write(speaker_info.policy) 94 | 95 | # - portrait => base64デコードして/speaker_info/${speaker_uuid}/portrait.pngに保存 96 | with open(speaker_info_dir / "portrait.png", "wb") as f: 97 | f.write(base64.b64decode(speaker_info.portrait.encode("utf-8"))) 98 | 99 | # TODO: metas.jsonもSV Model API経由で渡せるようにする 100 | # - metas => 空のjsonを保存 101 | with open(speaker_info_dir / "metas.json", "w") as f: 102 | f.write(json.dumps({})) 103 | 104 | # - style_infosは、iconとvoiceをbase64デコードして以下の通り保存する 105 | # - id => iconとvoice_samplesの保存に使う 106 | # - icon => /speaker_info/${uuid}/icons/${id}.png 107 | # - voice_samples => /speaker_info/${uuid}/voice_samples/${id}_00{index}.wav 108 | for style_info in speaker_info.style_infos: 109 | with open( 110 | speaker_info_dir / "icons" / f"{style_info.id}.png", "wb" 111 | ) as f: 112 | f.write(base64.b64decode(style_info.icon.encode("utf-8"))) 113 | for idx, voice_sample in enumerate(style_info.voice_samples): 114 | # 既存の採番は1-indexedなので 115 | with open( 116 | speaker_info_dir 117 | / "voice_samples" 118 | / f"{style_info.id}_00{idx+1}.wav", 119 | "wb", 120 | ) as f: 121 | f.write(base64.b64decode(voice_sample.encode("utf-8"))) 122 | 123 | # 最後にlibraries.jsonに追記する 124 | # 2回ロックをかけるよりも1回のrwロックの方が整合性が保たれて良い 125 | with open(stored_dir / "model" / "libraries.json", "r+", encoding="utf-8") as f: 126 | libraries = json.load(f) 127 | libraries[sv_model.uuid] = True 128 | f.seek(0) 129 | json.dump(libraries, f, ensure_ascii=False) 130 | 131 | # backupを削除する 132 | if already_exists: 133 | shutil.rmtree(f"{model_uuid_dir}.old") 134 | for speaker_uuid in sv_model.speaker_infos.keys(): 135 | speaker_info_dir = stored_dir / "speaker_info" / speaker_uuid 136 | # 新しく追加されるspeaker_info_dirに.oldは存在しないはずなので、exists checkをする 137 | if os.path.exists(f"{speaker_info_dir}.old"): 138 | shutil.rmtree(f"{speaker_info_dir}.old") 139 | 140 | # backupを削除する 141 | if already_exists: 142 | shutil.rmtree(f"{model_uuid_dir}.old") 143 | for speaker_uuid in sv_model.speaker_infos.keys(): 144 | speaker_info_dir = stored_dir / "speaker_info" / speaker_uuid 145 | # 新しく追加されるspeaker_info_dirに.oldは存在しないはずなので、exists checkをする 146 | if os.path.exists(f"{speaker_info_dir}.old"): 147 | shutil.rmtree(f"{speaker_info_dir}.old") 148 | 149 | except Exception as e: 150 | # 削除時にエラーが発生しても無視する 151 | shutil.rmtree(stored_dir / "model" / sv_model.uuid, ignore_errors=True) 152 | for speaker_uuid in sv_model.speaker_infos.keys(): 153 | shutil.rmtree( 154 | stored_dir / "speaker_info" / speaker_uuid, ignore_errors=True 155 | ) 156 | 157 | # backupからrestoreする 158 | os.rename(f"{model_uuid_dir}.old", model_uuid_dir) 159 | for speaker_uuid in sv_model.speaker_infos.keys(): 160 | speaker_info_dir = stored_dir / "speaker_info" / speaker_uuid 161 | if os.path.exists(f"{speaker_info_dir}.old"): 162 | os.rename(f"{speaker_info_dir}.old", speaker_info_dir) 163 | raise e 164 | -------------------------------------------------------------------------------- /voicevox_engine/synthesis_engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .core_wrapper import CoreWrapper, load_runtime_lib 2 | from .make_synthesis_engines import make_synthesis_engines 3 | from .synthesis_engine import SynthesisEngine 4 | from .synthesis_engine_base import SynthesisEngineBase 5 | 6 | __all__ = [ 7 | "CoreWrapper", 8 | "load_runtime_lib", 9 | "make_synthesis_engines", 10 | "SynthesisEngine", 11 | "SynthesisEngineBase", 12 | ] 13 | -------------------------------------------------------------------------------- /voicevox_engine/synthesis_engine/make_synthesis_engines.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from pathlib import Path 4 | from typing import Dict, List, Optional 5 | 6 | from ..utility import engine_root, get_save_dir 7 | from .core_wrapper import CoreWrapper, load_runtime_lib 8 | from .synthesis_engine import SynthesisEngine, SynthesisEngineBase 9 | 10 | 11 | def make_synthesis_engines( 12 | use_gpu: bool, 13 | voicelib_dirs: Optional[List[Path]] = None, 14 | sharevox_dir: Optional[Path] = None, 15 | runtime_dirs: Optional[List[Path]] = None, 16 | cpu_num_threads: Optional[int] = None, 17 | enable_mock: bool = True, 18 | load_all_models: bool = False, 19 | ) -> Dict[str, SynthesisEngineBase]: 20 | """ 21 | 音声ライブラリをロードして、音声合成エンジンを生成 22 | 23 | Parameters 24 | ---------- 25 | use_gpu: bool 26 | 音声ライブラリに GPU を使わせるか否か 27 | voicelib_dirs: List[Path], optional, default=None 28 | 音声ライブラリ自体があるディレクトリのリスト 29 | sharevox_dir: Path, optional, default=None 30 | コンパイル済みのsharevox、またはsharevox_engineがあるディレクトリ 31 | runtime_dirs: List[Path], optional, default=None 32 | コアで使用するライブラリのあるディレクトリのリスト 33 | None のとき、sharevox_dir、カレントディレクトリになる 34 | cpu_num_threads: int, optional, default=None 35 | 音声ライブラリが、推論に用いるCPUスレッド数を設定する 36 | Noneのとき、ライブラリ側の挙動により論理コア数の半分か、物理コア数が指定される 37 | enable_mock: bool, optional, default=True 38 | コア読み込みに失敗したとき、代わりにmockを使用するかどうか 39 | load_all_models: bool, optional, default=False 40 | 起動時に全てのモデルを読み込むかどうか 41 | """ 42 | if cpu_num_threads == 0 or cpu_num_threads is None: 43 | print( 44 | "Warning: cpu_num_threads is set to 0. " 45 | + "( The library leaves the decision to the synthesis runtime )", 46 | file=sys.stderr, 47 | ) 48 | cpu_num_threads = 0 49 | 50 | if sharevox_dir is not None: 51 | if voicelib_dirs is not None: 52 | voicelib_dirs.append(sharevox_dir) 53 | else: 54 | voicelib_dirs = [sharevox_dir] 55 | if runtime_dirs is not None: 56 | runtime_dirs.append(sharevox_dir) 57 | else: 58 | runtime_dirs = [sharevox_dir] 59 | else: 60 | root_dir = engine_root() 61 | if voicelib_dirs is None: 62 | voicelib_dirs = [root_dir] 63 | if runtime_dirs is None: 64 | runtime_dirs = [root_dir] 65 | 66 | voicelib_dirs = [p.expanduser() for p in voicelib_dirs] 67 | runtime_dirs = [p.expanduser() for p in runtime_dirs] 68 | 69 | load_runtime_lib(runtime_dirs) 70 | synthesis_engines = {} 71 | 72 | if not enable_mock: 73 | 74 | def load_core_library(core_dir: Path, suppress_error: bool = False): 75 | """ 76 | 指定されたディレクトリにあるコアを読み込む。 77 | ユーザーディレクトリの場合は存在しないこともあるので、エラーを抑制すると良い。 78 | """ 79 | try: 80 | core = CoreWrapper(use_gpu, core_dir, cpu_num_threads, load_all_models) 81 | metas = json.loads(core.metas()) 82 | core_version = metas[0]["version"] 83 | print(f"Info: Loading core {core_version}.") 84 | if core_version in synthesis_engines: 85 | print( 86 | "Warning: Core loading is skipped because of version duplication.", 87 | file=sys.stderr, 88 | ) 89 | else: 90 | synthesis_engines[core_version] = SynthesisEngine(core=core) 91 | except Exception: 92 | if not suppress_error: 93 | raise 94 | 95 | for core_dir in voicelib_dirs: 96 | load_core_library(core_dir) 97 | 98 | # ユーザーディレクトリにあるコアを読み込む 99 | user_voicelib_dirs = [] 100 | core_libraries_dir = get_save_dir() / "core_libraries" 101 | core_libraries_dir.mkdir(exist_ok=True) 102 | user_voicelib_dirs.append(core_libraries_dir) 103 | for path in core_libraries_dir.glob("*"): 104 | if not path.is_dir(): 105 | continue 106 | user_voicelib_dirs.append(path) 107 | 108 | for core_dir in user_voicelib_dirs: 109 | load_core_library(core_dir, suppress_error=True) 110 | 111 | else: 112 | # モック追加 113 | from ..dev.core import metas as mock_metas 114 | from ..dev.core import supported_devices as mock_supported_devices 115 | from ..dev.synthesis_engine import MockSynthesisEngine 116 | 117 | if "0.0.0" not in synthesis_engines: 118 | print("Info: Loading mock.") 119 | synthesis_engines["0.0.0"] = MockSynthesisEngine( 120 | speakers=mock_metas(), supported_devices=mock_supported_devices() 121 | ) 122 | 123 | return synthesis_engines 124 | -------------------------------------------------------------------------------- /voicevox_engine/utility/__init__.py: -------------------------------------------------------------------------------- 1 | from .connect_base64_waves import ( 2 | ConnectBase64WavesException, 3 | connect_base64_waves, 4 | decode_base64_waves, 5 | ) 6 | from .copy_model_and_info import copy_model_and_info 7 | from .core_version_utility import get_latest_core_version, parse_core_version 8 | from .mutex_utility import mutex_wrapper 9 | from .path_utility import delete_file, engine_root, get_save_dir 10 | 11 | __all__ = [ 12 | "ConnectBase64WavesException", 13 | "connect_base64_waves", 14 | "copy_model_and_info", 15 | "decode_base64_waves", 16 | "get_latest_core_version", 17 | "parse_core_version", 18 | "delete_file", 19 | "engine_root", 20 | "get_save_dir", 21 | "mutex_wrapper", 22 | ] 23 | -------------------------------------------------------------------------------- /voicevox_engine/utility/connect_base64_waves.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from typing import List, Tuple 4 | 5 | import numpy as np 6 | import soundfile 7 | from scipy.signal import resample 8 | 9 | 10 | class ConnectBase64WavesException(Exception): 11 | def __init__(self, message: str): 12 | self.message = message 13 | 14 | 15 | def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]: 16 | """ 17 | base64エンコードされた複数のwavデータをデコードする 18 | Parameters 19 | ---------- 20 | waves: list[str] 21 | base64エンコードされたwavデータのリスト 22 | Returns 23 | ------- 24 | waves_nparray_sr: List[Tuple[np.ndarray, int]] 25 | (NumPy配列の音声波形データ, サンプリングレート) 形式のタプルのリスト 26 | """ 27 | if len(waves) == 0: 28 | raise ConnectBase64WavesException("wavファイルが含まれていません") 29 | 30 | waves_nparray_sr = [] 31 | for wave in waves: 32 | try: 33 | wav_bin = base64.standard_b64decode(wave) 34 | except ValueError: 35 | raise ConnectBase64WavesException("base64デコードに失敗しました") 36 | try: 37 | _data = soundfile.read(io.BytesIO(wav_bin)) 38 | except Exception: 39 | raise ConnectBase64WavesException("wavファイルを読み込めませんでした") 40 | waves_nparray_sr.append(_data) 41 | 42 | return waves_nparray_sr 43 | 44 | 45 | def connect_base64_waves(waves: List[str]) -> Tuple[np.ndarray, int]: 46 | waves_nparray_sr = decode_base64_waves(waves) 47 | 48 | max_sampling_rate = max([sr for _, sr in waves_nparray_sr]) 49 | max_channels = max([x.ndim for x, _ in waves_nparray_sr]) 50 | assert 0 < max_channels <= 2 51 | 52 | waves_nparray_list = [] 53 | for nparray, sr in waves_nparray_sr: 54 | if sr != max_sampling_rate: 55 | nparray = resample(nparray, max_sampling_rate * len(nparray) // sr) 56 | if nparray.ndim < max_channels: 57 | nparray = np.array([nparray, nparray]).T 58 | waves_nparray_list.append(nparray) 59 | 60 | return np.concatenate(waves_nparray_list), max_sampling_rate 61 | -------------------------------------------------------------------------------- /voicevox_engine/utility/core_version_utility.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | 3 | from semver.version import Version 4 | 5 | 6 | def parse_core_version(version: str) -> Version: 7 | return Version.parse(version) 8 | 9 | 10 | def get_latest_core_version(versions: Iterable[str]) -> str: 11 | if len(versions) == 0: 12 | raise Exception("versions must be non-empty.") 13 | 14 | return str(max(map(parse_core_version, versions))) 15 | -------------------------------------------------------------------------------- /voicevox_engine/utility/mutex_utility.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | def mutex_wrapper(lock: threading.Lock): 5 | def wrap(f): 6 | def func(*args, **kw): 7 | lock.acquire() 8 | try: 9 | return f(*args, **kw) 10 | finally: 11 | lock.release() 12 | 13 | return func 14 | 15 | return wrap 16 | -------------------------------------------------------------------------------- /voicevox_engine/utility/path_utility.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import traceback 4 | from pathlib import Path 5 | 6 | from platformdirs import user_data_dir 7 | 8 | 9 | def engine_root() -> Path: 10 | if is_development(): 11 | root_dir = Path(__file__).parents[2] 12 | 13 | # Nuitka/Pyinstallerでビルドされている場合 14 | else: 15 | root_dir = Path(sys.argv[0]).parent 16 | 17 | return root_dir.resolve(strict=True) 18 | 19 | 20 | def is_development() -> bool: 21 | """ 22 | 開発版かどうか判定する関数 23 | Nuitka/Pyinstallerでコンパイルされていない場合は開発環境とする。 24 | """ 25 | # nuitkaビルドをした際はグローバルに__compiled__が含まれる 26 | if "__compiled__" in globals(): 27 | return False 28 | 29 | # pyinstallerでビルドをした際はsys.frozenが設定される 30 | elif getattr(sys, "frozen", False): 31 | return False 32 | 33 | return True 34 | 35 | 36 | def get_save_dir(): 37 | # FIXME: ファイル保存場所をエンジン固有のIDが入ったものにする 38 | # FIXME: Windowsは`voicevox-engine/voicevox-engine`ディレクトリに保存されているので 39 | # `VOICEVOX/voicevox-engine`に変更する 40 | if is_development(): 41 | app_name = "sharevox-engine-dev" 42 | else: 43 | app_name = "sharevox-engine" 44 | return Path(user_data_dir(app_name)) 45 | 46 | 47 | def delete_file(file_path: str) -> None: 48 | try: 49 | os.remove(file_path) 50 | except OSError: 51 | traceback.print_exc() 52 | --------------------------------------------------------------------------------