├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bugreport.md │ ├── featurerequest.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md ├── labeler.yml └── workflows │ ├── build.yml │ ├── coverage-comment.yml │ ├── labeler.yml │ ├── release-test.yml │ ├── release_latest_dev.yml │ ├── test.yml │ ├── typos.yml │ └── upload-gh-pages.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── _typos.toml ├── build_util ├── check_release_build.py ├── codesign.bash ├── create_venv_and_generate_licenses.bash ├── macos │ ├── build_util_macos │ │ ├── __init__.py │ │ └── shlib_tools.py │ ├── copy_missing_dylibs.py │ └── fix_rpaths.py ├── merge_update_infos.py └── process_voicevox_resource.bash ├── default.csv ├── default_setting.yml ├── docs ├── VOICEVOX音声合成エンジンとの連携.md ├── api │ └── .gitkeep ├── licenses │ ├── cublas │ │ └── License.txt │ ├── cuda │ │ └── EULA.txt │ ├── cuda_runtime │ │ └── License.txt │ ├── cudnn │ │ ├── LICENSE │ │ └── License.txt │ ├── cufft │ │ └── License.txt │ ├── cupti │ │ └── License.txt │ ├── curand │ │ └── License.txt │ ├── cusolver │ │ └── License.txt │ ├── cusparse │ │ └── License.txt │ ├── mpg123 │ │ └── COPYING │ ├── nccl │ │ └── License.txt │ ├── nvrtc │ │ └── License.txt │ ├── nvtx │ │ └── License.txt │ ├── open_jtalk │ │ ├── COPYING │ │ ├── mecab-naist-jdic │ │ │ └── COPYING │ │ └── mecab │ │ │ └── COPYING │ └── world │ │ └── LICENSE.txt └── res │ └── マルチエンジン概念図.svg ├── engine_manifest.json ├── engine_manifest_assets ├── dependency_licenses.json ├── downloadable_libraries.json ├── icon.png ├── terms_of_service.md └── update_infos.json ├── generate_licenses.py ├── get_cost_candidates.py ├── make_docs.py ├── nuitka-config.yaml ├── poetry.lock ├── presets.yaml ├── pyproject.toml ├── requirements-dev.txt ├── requirements-license.txt ├── requirements-test.txt ├── requirements.txt ├── run.py ├── setup.cfg ├── speaker_info ├── 35b2c544-660e-401e-b503-0e14c635303a │ ├── icons │ │ └── 8.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ └── 8.png │ └── voice_samples │ │ ├── 8_001.wav │ │ ├── 8_002.wav │ │ └── 8_003.wav ├── 388f246b-8c41-4ac1-8e2d-5d79f3ff56d9 │ ├── icons │ │ ├── 1.png │ │ ├── 3.png │ │ ├── 5.png │ │ └── 7.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ └── 3.png │ └── voice_samples │ │ ├── 1_001.wav │ │ ├── 1_002.wav │ │ ├── 1_003.wav │ │ ├── 3_001.wav │ │ ├── 3_002.wav │ │ ├── 3_003.wav │ │ ├── 5_001.wav │ │ ├── 5_002.wav │ │ ├── 5_003.wav │ │ ├── 7_001.wav │ │ ├── 7_002.wav │ │ └── 7_003.wav ├── 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff │ ├── icons │ │ ├── 0.png │ │ ├── 2.png │ │ ├── 4.png │ │ └── 6.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ ├── portraits │ │ ├── 0.png │ │ ├── 2.png │ │ ├── 4.png │ │ └── 6.png │ └── voice_samples │ │ ├── 0_001.wav │ │ ├── 0_002.wav │ │ ├── 0_003.wav │ │ ├── 2_001.wav │ │ ├── 2_002.wav │ │ ├── 2_003.wav │ │ ├── 4_001.wav │ │ ├── 4_002.wav │ │ ├── 4_003.wav │ │ ├── 6_001.wav │ │ ├── 6_002.wav │ │ └── 6_003.wav └── b1a81618-b27b-40d2-b0ea-27a9ad408c4b │ ├── icons │ └── 9.png │ ├── metas.json │ ├── policy.md │ ├── portrait.png │ └── voice_samples │ ├── 9_001.wav │ ├── 9_002.wav │ └── 9_003.wav ├── test ├── __init__.py ├── e2e │ ├── conftest.py │ └── test_validate_version.py ├── presets-test-1.yaml ├── presets-test-2.yaml ├── presets-test-3.yaml ├── presets-test-4.yaml ├── setting-test-load-1.yaml ├── setting-test-load-2.yaml ├── setting-test-load-3.yaml ├── test_acoustic_feature_extractor.py ├── test_connect_base64_waves.py ├── test_core_version_utility.py ├── test_full_context_label.py ├── test_kana_parser.py ├── test_library_manager.py ├── test_mock_synthesis_engine.py ├── test_mora_list.py ├── test_mora_to_text.py ├── test_preset.py ├── test_setting.py ├── test_synthesis_engine.py ├── test_synthesis_engine_base.py ├── test_user_dict.py ├── test_user_dict_model.py ├── test_word_types.py └── vvlib_manifest.json ├── ui_template └── ui.html └── voicevox_engine ├── __init__.py ├── acoustic_feature_extractor.py ├── bridge_config ├── BridgeConfig.py ├── BridgeConfigLoader.py └── __init__.py ├── cancellable_engine.py ├── dev ├── core │ ├── __init__.py │ └── mock.py └── synthesis_engine │ ├── __init__.py │ └── mock.py ├── engine_manifest ├── EngineManifest.py ├── EngineManifestLoader.py └── __init__.py ├── full_context_label.py ├── kana_parser.py ├── library_manager.py ├── metas ├── Metas.py ├── MetasStore.py └── __init__.py ├── model.py ├── mora_list.py ├── morphing.py ├── part_of_speech_data.py ├── preset ├── Preset.py ├── PresetError.py ├── PresetManager.py └── __init__.py ├── setting ├── Setting.py ├── SettingLoader.py └── __init__.py ├── synthesis_engine ├── __init__.py ├── core_wrapper.py ├── make_synthesis_engines.py ├── synthesis_engine.py ├── synthesis_engine_base.py └── synthesis_engine_espnet.py ├── user_dict.py └── utility ├── __init__.py ├── connect_base64_waves.py ├── core_version_utility.py ├── mutex_utility.py ├── path_utility.py └── save_dir.py /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.png -text 3 | *.wav -text -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bugreport.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: 不具合の報告 4 | labels: バグ 5 | --- 6 | 7 | ## 不具合の内容 8 | 9 | 10 | 11 | ### 現象・ログ 12 | 13 | 14 | 15 | ### 再現手順 16 | 17 | 18 | 19 | ### 期待動作 20 | 21 | 22 | 23 | ## VOICEVOXのバージョン 24 | 25 | 0.?.0 26 | 27 | 28 | 29 | ## OSの種類/ディストリ/バージョン 30 | 31 | 32 | 33 | - [ ] Windows 34 | - [ ] macOS 35 | - [ ] Linux 36 | 37 | 44 | 45 | ## その他 46 | 47 | 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/featurerequest.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: 機能要望・改善提案 4 | labels: 機能向上 5 | --- 6 | 7 | ## 内容 8 | 9 | 10 | 11 | 12 | ### Pros 良くなる点 13 | 14 | 15 | 16 | ### Cons 悪くなる点 17 | 18 | 19 | 20 | ### 実現方法 21 | 22 | 23 | 24 | ## VOICEVOXのバージョン 25 | 26 | 0.?.0 27 | 28 | 29 | 30 | ## OSの種類/ディストリ/バージョン 31 | 32 | 33 | 34 | - [ ] Windows 35 | - [ ] macOS 36 | - [ ] Linux 37 | 38 | 45 | 46 | ## その他 47 | 48 | 49 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: 質問 (既存のIssueや一般事例を良く調べてからしてください) 4 | labels: 要議論 5 | --- 6 | 7 | ## 質問の内容 8 | 9 | 10 | 11 | ## VOICEVOXのバージョン 12 | 13 | 0.?.0 14 | 15 | 16 | 17 | ## OSの種類/ディストリ/バージョン 18 | 19 | 20 | 21 | - [ ] Windows 22 | - [ ] macOS 23 | - [ ] Linux 24 | 25 | 32 | 33 | ## その他 34 | 35 | 36 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## 内容 2 | 3 | 6 | 7 | ## 関連 Issue 8 | 9 | 17 | 18 | ## スクリーンショット・動画など 19 | 20 | 23 | 24 | ## その他 25 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | 'OS:mac': 2 | - '\[x\] macOS' 3 | 'OS:linux': 4 | - '\[x\] Linux' 5 | 'OS:win': 6 | - '\[x\] Windows' 7 | -------------------------------------------------------------------------------- /.github/workflows/coverage-comment.yml: -------------------------------------------------------------------------------- 1 | name: Coverage Report Comment 2 | 3 | on: 4 | workflow_run: 5 | workflows: 6 | - test 7 | types: 8 | - completed 9 | workflow_dispatch: 10 | 11 | defaults: 12 | run: 13 | shell: bash 14 | 15 | jobs: 16 | comment: 17 | runs-on: ubuntu-latest 18 | if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success' 19 | steps: 20 | - name: Download coverage report 21 | uses: actions/github-script@v5.0.0 22 | with: 23 | script: | 24 | const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ 25 | owner: context.repo.owner, 26 | repo: context.repo.repo, 27 | run_id: ${{ github.event.workflow_run.id }}, 28 | }) 29 | const matchArtifact = artifacts.data.artifacts.filter((artifact) => { 30 | return artifact.name == 'report' 31 | })[0] 32 | const download = await github.rest.actions.downloadArtifact({ 33 | owner: context.repo.owner, 34 | repo: context.repo.repo, 35 | artifact_id: matchArtifact.id, 36 | archive_format: 'zip', 37 | }) 38 | const fs = require('fs') 39 | fs.writeFileSync('${{github.workspace}}/report.zip', Buffer.from(download.data)) 40 | 41 | - name: Unzip report 42 | run: unzip report.zip 43 | 44 | - name: Comment coverage result to Pull Requests 45 | uses: actions/github-script@v5.0.0 46 | with: 47 | github-token: ${{ secrets.GITHUB_TOKEN }} 48 | script: | 49 | const fs = require('fs') 50 | const baseReport = fs.readFileSync('report.txt', 'utf8').toString().split('\n') 51 | let report = '' 52 | for (let i = 0; i < baseReport.length; i++) { 53 | const line = baseReport[i].split(' ').filter(v => v) 54 | if (i === 1 && line.length === 1) { 55 | report += "|:---|---:|---:|---:|\n" 56 | } else if (line.length === 1) { 57 | continue 58 | } else { 59 | if (i !== 0 && line.length === 4) { 60 | const parcent = Number(line[3].replace("%", "")) 61 | let color = 'green' 62 | if (parcent < 50) { 63 | color = 'red' 64 | } else if (parcent < 90) { 65 | color = 'orange' 66 | } 67 | line[3] = `![coverage-${parcent}%](https://img.shields.io/badge/coverage-${parcent}%25-${color}.svg)` 68 | } 69 | report += "|" + line.join("|") + "|\n" 70 | } 71 | if (line[0] === 'TOTAL') break 72 | } 73 | 74 | const issue_number = Number(fs.readFileSync('pr_num.txt')) 75 | const body = `## Coverage Result\n\n
\nResultを開く\n\n${report}\n
` 76 | 77 | let listComments = await github.rest.issues.listComments({ 78 | issue_number, 79 | owner: context.repo.owner, 80 | repo: context.repo.repo, 81 | }) 82 | listComments = listComments.data.filter((comment) => { 83 | return comment.body.includes('Coverage Result') && comment.user.login.includes('github-actions') 84 | }) 85 | 86 | if (listComments.length === 0) { 87 | github.rest.issues.createComment({ 88 | issue_number, 89 | owner: context.repo.owner, 90 | repo: context.repo.repo, 91 | body, 92 | }) 93 | } else { 94 | github.rest.issues.updateComment({ 95 | comment_id: listComments[0].id, 96 | owner: context.repo.owner, 97 | repo: context.repo.repo, 98 | body, 99 | }) 100 | } 101 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | name: Issue Labeler 2 | on: 3 | issues: 4 | types: [opened] 5 | defaults: 6 | run: 7 | shell: bash 8 | 9 | jobs: 10 | triage: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: github/issue-labeler@v2.0 14 | with: 15 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 16 | configuration-path: .github/labeler.yml 17 | enable-versioned-regex: 0 18 | -------------------------------------------------------------------------------- /.github/workflows/release-test.yml: -------------------------------------------------------------------------------- 1 | name: Test Release Build 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | version: 7 | type: string 8 | required: true 9 | repo_url: 10 | type: string 11 | required: false 12 | workflow_dispatch: 13 | inputs: 14 | version: 15 | type: string 16 | description: "テストしたいタグ名" 17 | required: true 18 | repo_url: 19 | type: string 20 | description: "リポジトリのURL(省略可能)" 21 | required: false 22 | 23 | env: 24 | REPO_URL: 25 | |- # repo_url指定時はrepo_urlを、それ以外はgithubのリポジトリURLを使用 26 | ${{ (github.event.inputs || inputs).repo_url || format('{0}/{1}', github.server_url, github.repository) }} 27 | VERSION: |- # version指定時はversionを、それ以外はタグ名を使用 28 | ${{ (github.event.inputs || inputs).version }} 29 | 30 | defaults: 31 | run: 32 | shell: bash 33 | 34 | jobs: 35 | test: 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | include: 40 | #- os: ubuntu-20.04 41 | # target: linux-cpu 42 | #- os: ubuntu-20.04 43 | # target: linux-nvidia 44 | #- os: macos-11 45 | # target: macos-x64 46 | - os: windows-2019 47 | target: windows-cpu 48 | #- os: windows-2019 49 | # target: windows-nvidia 50 | #- os: windows-2019 51 | # target: windows-directml 52 | 53 | runs-on: ${{ matrix.os }} 54 | 55 | steps: 56 | - name: declare variables 57 | id: vars 58 | run: | 59 | echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> $GITHUB_OUTPUT 60 | echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT 61 | 62 | - uses: actions/checkout@v2 63 | 64 | - uses: actions/setup-python@v2 65 | with: 66 | python-version: "3.11.3" 67 | cache: pip 68 | 69 | - name: Download 70 | run: | 71 | mkdir -p download 72 | curl -L -o "download/list.txt" "${{ steps.vars.outputs.release_url }}/${{ steps.vars.outputs.package_name }}.7z.txt" 73 | cat "download/list.txt" | xargs -I '%' curl -L -o "download/%" "${{ steps.vars.outputs.release_url }}/%" 74 | 7z x "download/$(head -n1 download/list.txt)" 75 | mv ${{ matrix.target }} dist/ 76 | 77 | - name: chmod +x 78 | if: startsWith(matrix.target, 'linux') || startsWith(matrix.target, 'macos') 79 | run: chmod +x dist/run 80 | 81 | - name: Install requirements 82 | run: | 83 | pip install -r requirements-test.txt 84 | 85 | - name: Test 86 | run: python build_util/check_release_build.py --dist_dir dist/ 87 | -------------------------------------------------------------------------------- /.github/workflows/release_latest_dev.yml: -------------------------------------------------------------------------------- 1 | name: Release latest dev build 2 | 3 | # masterブランチが更新されるたびに開発版をビルドしてデプロイする。 4 | # バージョン(タグ)は最新リリースのバージョンを`X.Y.Z`としたときの`X.Y+1.0-dev`。 5 | 6 | on: 7 | push: 8 | branches: 9 | - master 10 | paths-ignore: 11 | - "docs/**" 12 | - "test/**" 13 | 14 | jobs: 15 | latest-dev-build: 16 | runs-on: ubuntu-latest 17 | if: github.repository_owner == 'VOICEVOX' 18 | steps: 19 | - name: Trigger workflow_dispatch 20 | uses: actions/github-script@v6 21 | with: 22 | github-token: ${{ secrets.GITHUB_TOKEN }} 23 | script: | 24 | const latest_release = await github.rest.repos.getLatestRelease({ 25 | owner: context.repo.owner, 26 | repo: context.repo.repo 27 | }); 28 | const split_version = latest_release.data.tag_name.split('.'); 29 | const dev_version = `${split_version[0]}.${parseInt(split_version[1]) + 1}.0-dev`; 30 | github.rest.actions.createWorkflowDispatch({ 31 | owner: context.repo.owner, 32 | repo: context.repo.repo, 33 | workflow_id: 'build.yml', 34 | ref: 'master', 35 | inputs: { 36 | version: dev_version, 37 | prerelease: true 38 | } 39 | }) 40 | console.log(`Triggered workflow_dispatch for ${dev_version}`); 41 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: 7 | - "**" 8 | workflow_dispatch: 9 | 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | test: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [ubuntu-20.04, windows-latest] # [ubuntu-20.04, macos-latest, windows-latest] 20 | python: ["3.11.3"] 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - name: Set up Python ${{ matrix.python }} 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ matrix.python }} 29 | cache: pip 30 | 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip setuptools wheel 34 | python -m pip install -r requirements-test.txt 35 | 36 | - run: pysen run lint 37 | 38 | - name: Run pytest and get coverage 39 | run: | 40 | coverage run --omit=test/* -m pytest 41 | 42 | - name: Submit coverage to Coveralls 43 | if: matrix.os == 'ubuntu-20.04' 44 | run: coveralls --service=github 45 | env: 46 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 47 | 48 | - name: Create coverage result 49 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04' 50 | run: | 51 | mkdir report 52 | coverage report > report/report.txt 53 | echo ${{ github.event.number }} > report/pr_num.txt 54 | 55 | - name: Upload coverage result 56 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04' 57 | uses: actions/upload-artifact@v3 58 | with: 59 | name: report 60 | path: report/ 61 | 62 | - name: Check licenses 63 | shell: bash 64 | run: | 65 | OUTPUT_LICENSE_JSON_PATH=/dev/null \ 66 | bash build_util/create_venv_and_generate_licenses.bash 67 | -------------------------------------------------------------------------------- /.github/workflows/typos.yml: -------------------------------------------------------------------------------- 1 | name: Check typos 2 | 3 | on: 4 | push: 5 | pull_request: 6 | branches: 7 | - "**" 8 | workflow_dispatch: 9 | 10 | defaults: 11 | run: 12 | shell: bash 13 | 14 | jobs: 15 | typos: 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: typos-action 22 | uses: crate-ci/typos@v1.12.12 23 | -------------------------------------------------------------------------------- /.github/workflows/upload-gh-pages.yml: -------------------------------------------------------------------------------- 1 | name: upload-docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - "master" 7 | 8 | env: 9 | PYTHON_VERSION: "3.11.3" 10 | PUBLISH_DIR: "./docs/api" 11 | PUBLISH_BRANCH: "gh-pages" 12 | DESTINATION_DIR: "api" 13 | 14 | defaults: 15 | run: 16 | shell: bash 17 | 18 | jobs: 19 | upload-doc: 20 | runs-on: ubuntu-20.04 21 | steps: 22 | - uses: actions/checkout@v2 23 | 24 | - name: Setup Python 25 | id: setup-python 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ env.PYTHON_VERSION }} 29 | cache: pip 30 | 31 | - name: Install Python dependencies 32 | run: | 33 | pip install -r requirements.txt 34 | 35 | - name: Make documents 36 | run: | 37 | python make_docs.py 38 | 39 | - name: Deploy to GitHub Pages 40 | uses: peaceiris/actions-gh-pages@v3 41 | with: 42 | github_token: ${{ secrets.GITHUB_TOKEN }} 43 | publish_dir: ${{ env.PUBLISH_DIR }} 44 | publish_branch: ${{ env.PUBLISH_BRANCH }} 45 | destination_dir: ${{ env.DESTINATION_DIR }} 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # VOICEVOX specifics 2 | ## Artifacts of nuitka 3 | *.dist 4 | *.build 5 | /build 6 | /cache 7 | ## Artifact of generating licenses 8 | /licenses.json 9 | licenses_venv/ 10 | 11 | # Copied from `https://github.com/github/gitignore/blob/main/Python.gitignore` @2022-01-10 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | cover/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | .pybuilder/ 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # IPython 93 | profile_default/ 94 | ipython_config.py 95 | 96 | # pyenv 97 | # For a library or package, you might want to ignore these files since the code is 98 | # intended to run in multiple environments; otherwise, check them in: 99 | .python-version 100 | 101 | # pipenv 102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 105 | # install all needed dependencies. 106 | Pipfile.lock 107 | 108 | # poetry 109 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 110 | # This is especially recommended for binary packages to ensure reproducibility, and is more 111 | # commonly ignored for libraries. 112 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 113 | # poetry.lock 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | .idea/ 164 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: local 5 | hooks: 6 | - id: pysen-lint 7 | name: pysen-lint 8 | entry: pysen run lint 9 | language: python 10 | types: [file, python] 11 | stages: [push] 12 | pass_filenames: false 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Bridge Plugin 2 | 3 | Copyright (c) 2021 Hiroshiba 4 | Copyright (c) 2021 VOICEVOX 5 | Copyright (c) 2022 VOICEVOX-Bridge 6 | 7 | 8 | 本ソフトウェアのリポジトリにPull Requestを送る目的に限り、本ソフトウェアのソースコードの使用、複製、配布等を行うことを許可します。 9 | 10 | 11 | 商用・非商用を問わず、バイナリ形式の本ソフトウェアの利用及び再配布を許可します。 12 | ただし、以下を条件とします。 13 | 14 | - 本ソフトウェアによって読み込まれる音声合成モデルは、本ソフトウェアを通じた音声合成化を許諾している提供者の音声を元に作られている必要があります。 15 | 16 | - 再配布を行う場合、本ソフトウェアのファイルに関しては、再配布後も本ライセンスを適用する必要があります。 17 | 18 | 19 | 本ソフトウェアは「現状のままで」で提供され、明示的、暗黙的かどうかに拘らずあらゆる保証はないものとします。ここで言う保証は、市販性、特定用途への適合性、権利の侵害がないこと等を含みますが、これらに限定されません。 20 | 製作者は、契約行為、不法行為、またはそれ以外であろうと、ソフトウェアに起因または関連し、あるいはソフトウェアの使用またはその他の扱いによって生じる一切の請求、損害、その他の義務について何らの責任も負わないものとします。 -------------------------------------------------------------------------------- /_typos.toml: -------------------------------------------------------------------------------- 1 | # Files for typos 2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started 3 | 4 | [default.extend-identifiers] 5 | 6 | [default.extend-words] 7 | ba="ba" # 7zコマンドの-baオプション 8 | datas="datas" # PyInstallerの引数 9 | 10 | [files] 11 | extend-exclude = ["package-lock.json", "src/store/project.ts", "*.svg"] 12 | -------------------------------------------------------------------------------- /build_util/check_release_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | ビルド結果をテストする 3 | """ 4 | import argparse 5 | import json 6 | import time 7 | from io import BytesIO 8 | from pathlib import Path 9 | from subprocess import Popen 10 | from urllib.parse import urlencode 11 | from urllib.request import Request, urlopen 12 | 13 | import soundfile 14 | 15 | base_url = "http://127.0.0.1:50021/" 16 | 17 | 18 | def test_release_build(dist_dir: Path) -> None: 19 | run_file = dist_dir / "run" 20 | if not run_file.exists(): 21 | run_file = dist_dir / "run.exe" 22 | 23 | # 起動 24 | process = Popen([run_file.absolute()], cwd=dist_dir) 25 | time.sleep(120) # 待機 26 | 27 | # バージョン取得テスト 28 | req = Request(base_url + "version") 29 | with urlopen(req) as res: 30 | assert len(res.read()) > 0 31 | 32 | # テキスト -> クエリ 33 | text = "こんにちは、音声合成の世界へようこそ" 34 | req = Request( 35 | base_url + "audio_query?" + urlencode({"style_id": "1", "text": text}), 36 | method="POST", 37 | ) 38 | with urlopen(req) as res: 39 | query = json.loads(res.read().decode("utf-8")) 40 | 41 | # クエリ -> 音声 42 | req = Request(base_url + "synthesis?style_id=1", method="POST") 43 | req.add_header("Content-Type", "application/json") 44 | req.data = json.dumps(query).encode("utf-8") 45 | with urlopen(req) as res: 46 | wave = res.read() 47 | soundfile.read(BytesIO(wave)) 48 | 49 | # エンジンマニフェスト 50 | req = Request(base_url + "engine_manifest", method="GET") 51 | with urlopen(req) as res: 52 | manifest = json.loads(res.read().decode("utf-8")) 53 | assert "uuid" in manifest 54 | 55 | # プロセスが稼働中であることを確認 56 | assert process.poll() is None 57 | 58 | # 停止 59 | process.terminate() 60 | 61 | 62 | if __name__ == "__main__": 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument("--dist_dir", type=Path, default=Path("dist/")) 65 | args = parser.parse_args() 66 | test_release_build(dist_dir=args.dist_dir) 67 | -------------------------------------------------------------------------------- /build_util/codesign.bash: -------------------------------------------------------------------------------- 1 | # !!! コードサイニング証明書を取り扱うので取り扱い注意 !!! 2 | 3 | set -eu 4 | 5 | if [ ! -v CERT_BASE64 ]; then 6 | echo "CERT_BASE64が未定義です" 7 | exit 1 8 | fi 9 | if [ ! -v CERT_PASSWORD ]; then 10 | echo "CERT_PASSWORDが未定義です" 11 | exit 1 12 | fi 13 | 14 | if [ $# -ne 1 ]; then 15 | echo "引数の数が一致しません" 16 | exit 1 17 | fi 18 | target_file_glob="$1" 19 | 20 | # 証明書 21 | CERT_PATH=cert.pfx 22 | echo -n "$CERT_BASE64" | base64 -d - > $CERT_PATH 23 | 24 | # 指定ファイルに署名する 25 | function codesign() { 26 | TARGET="$1" 27 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 28 | powershell "& '$SIGNTOOL' sign /fd SHA256 /td SHA256 /tr http://timestamp.digicert.com /f $CERT_PATH /p $CERT_PASSWORD '$TARGET'" 29 | } 30 | 31 | # 指定ファイルが署名されているか 32 | function is_signed() { 33 | TARGET="$1" 34 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1) 35 | powershell "& '$SIGNTOOL' verify /pa '$TARGET'" || return 1 36 | } 37 | 38 | # 署名されていなければ署名 39 | ls $target_file_glob | while read target_file; do 40 | if is_signed "$target_file"; then 41 | echo "署名済み: $target_file" 42 | else 43 | echo "署名: $target_file" 44 | codesign "$target_file" 45 | fi 46 | done 47 | 48 | # 証明書を消去 49 | rm $CERT_PATH 50 | -------------------------------------------------------------------------------- /build_util/create_venv_and_generate_licenses.bash: -------------------------------------------------------------------------------- 1 | # 仮想環境を作ってrequirements.txtをインストールし、ライセンス一覧を生成する 2 | 3 | set -eux 4 | 5 | if [ ! -v OUTPUT_LICENSE_JSON_PATH ]; then 6 | echo "OUTPUT_LICENSE_JSON_PATHが未定義です" 7 | exit 1 8 | fi 9 | 10 | VENV_PATH="licenses_venv" 11 | 12 | python -m venv $VENV_PATH 13 | if [ -d "$VENV_PATH/Scripts" ]; then 14 | source $VENV_PATH/Scripts/activate 15 | else 16 | source $VENV_PATH/bin/activate 17 | fi 18 | 19 | pip install -r requirements-license.txt 20 | python generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH 21 | 22 | deactivate 23 | 24 | rm -rf $VENV_PATH 25 | -------------------------------------------------------------------------------- /build_util/macos/build_util_macos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/build_util/macos/build_util_macos/__init__.py -------------------------------------------------------------------------------- /build_util/macos/build_util_macos/shlib_tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | macOSにおいて共有ライブラリを操作するためのツールをまとめたモジュール 3 | """ 4 | 5 | import subprocess 6 | from pathlib import Path 7 | from typing import List 8 | 9 | 10 | def get_dylib_paths(base_path: Path) -> List[Path]: 11 | """base_path以下の全てのサブディレクトリにあるdylibファイルのリストを返す""" 12 | return list(base_path.glob("**/*.dylib")) 13 | 14 | 15 | def get_rpaths(shared_lib_path: Path) -> List[Path]: 16 | """引数で指定された共有ライブラリのrpathのリストを返す""" 17 | proc = subprocess.run(["otool", "-L", str(shared_lib_path)], stdout=subprocess.PIPE) 18 | output = proc.stdout.decode("utf-8") 19 | paths = [ 20 | Path(line.lstrip().split(" ", maxsplit=1)[0]) 21 | for line in output.splitlines()[1:] 22 | ] 23 | # 得られたパスのリストのうち、共有ライブラリ自体とライブラリ名が同じものは 24 | # rpath ではなく install ID というものなので除外 25 | return [ 26 | path 27 | for path in paths 28 | if path.name.split(".")[0] != shared_lib_path.name.split(".")[0] 29 | ] 30 | 31 | 32 | def is_distributable_rpath(rpath: Path) -> bool: 33 | """開発環境にインストールされたパッケージに依存しないrpathかどうか""" 34 | # 以下のプレフィックスで始まるrpathは配布に際して問題がない 35 | # - プレースホルダ。実行時に自動で解決される 36 | # - @executable_path/ 37 | # - @loader_path/ 38 | # - @rpath/ 39 | # - システム標準のライブラリがあるディレクトリ 40 | # - /usr/lib/ 41 | # - /System/Library/Frameworks/ 42 | # - /System/Library/PrivateFrameworks/ 43 | DISTRIBUTABLE_PREFIXES = [ 44 | "@executable_path/", 45 | "@loader_path/", 46 | "@rpath/", 47 | "/usr/lib/", 48 | "/System/Library/Frameworks/", 49 | "/System/Library/PrivateFrameworks/", 50 | ] 51 | result = False 52 | 53 | for prefix in DISTRIBUTABLE_PREFIXES: 54 | if str(rpath).startswith(prefix): 55 | result = True 56 | break 57 | else: 58 | continue 59 | 60 | return result 61 | 62 | 63 | def change_rpath(old_rpath: Path, new_rpath: Path, dylib_path: Path, base_path: Path): 64 | """dylib_pathで指定されたdylibのrpathを、old_rpathから、new_rpath(base_pathからの相対パスに変換したもの)に変更する""" 65 | relative_new_rpath = new_rpath.relative_to(base_path) 66 | subprocess.run( 67 | [ 68 | "install_name_tool", 69 | "-change", 70 | old_rpath, 71 | "@rpath/" + str(relative_new_rpath), 72 | dylib_path, 73 | ] 74 | ) 75 | 76 | 77 | class SharedLib: 78 | """共有ライブラリの情報""" 79 | 80 | __path: Path 81 | __rpaths: List[Path] 82 | 83 | def __init__(self, shared_lib_path: Path): 84 | self.__path = shared_lib_path 85 | self.__rpaths = get_rpaths(shared_lib_path) 86 | 87 | @property 88 | def path(self) -> Path: 89 | return self.__path 90 | 91 | def get_non_distributable_rpaths(self) -> List[Path]: 92 | """rpathのうち、開発環境に依存しているもののリスト""" 93 | return [rpath for rpath in self.__rpaths if not is_distributable_rpath(rpath)] 94 | -------------------------------------------------------------------------------- /build_util/macos/copy_missing_dylibs.py: -------------------------------------------------------------------------------- 1 | """ 2 | 配布物内の.dylibファイルの不足を解消するためのスクリプト 3 | 引数で指定したbase_directory以下にある.dylibファイルのrpathをチェックし、 4 | rpathの指す.dylibファイルがbase_directory以下に存在しなかった場合、 5 | rpathの指している場所からその.dylibファイルをbase_directory直下へとコピーする。 6 | """ 7 | 8 | import argparse 9 | import shutil 10 | import sys 11 | from pathlib import Path 12 | from typing import List, Set 13 | 14 | from build_util_macos.shlib_tools import SharedLib, get_dylib_paths 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "base_directory", help="copy the missing dylibs under base_directory", type=str 19 | ) 20 | args = parser.parse_args() 21 | base_dir_path = Path(args.base_directory) 22 | 23 | if not (base_dir_path.exists() and base_dir_path.is_dir()): 24 | print("could not find the directory:", str(base_dir_path), file=sys.stderr) 25 | exit(1) 26 | 27 | # base_dir_path以下の全てのサブディレクトリを探索して得たdylibのリスト 28 | dylib_paths: List[Path] = get_dylib_paths(base_dir_path) 29 | # 全てのdylibのファイル名のリスト 30 | dylib_names: List[str] = [path.name for path in dylib_paths] 31 | 32 | # 開発環境に依存したrpathを持つdylibのリスト 33 | non_distributable_dylibs: List[SharedLib] = [] 34 | for dylib_path in dylib_paths: 35 | lib = SharedLib(dylib_path) 36 | if lib.get_non_distributable_rpaths(): 37 | non_distributable_dylibs.append(lib) 38 | 39 | # 開発環境に依存したrpathの集合 40 | non_distributable_rpaths: Set[Path] = set() 41 | for dylib in non_distributable_dylibs: 42 | rpaths: Set[Path] = set([rpath for rpath in dylib.get_non_distributable_rpaths()]) 43 | non_distributable_rpaths = non_distributable_rpaths.union(rpaths) 44 | 45 | # rpathが指しているdylibのうち、base_dir_path以下に存在しないもののリスト 46 | external_dylib_paths: List[Path] = [] 47 | for rpath in non_distributable_rpaths: 48 | if not (rpath.name in dylib_names): 49 | external_dylib_paths.append(rpath) 50 | 51 | # 不足しているdylibをbase_dir_path直下にコピー 52 | for dylib_path in external_dylib_paths: 53 | shutil.copy(dylib_path, base_dir_path, follow_symlinks=True) 54 | -------------------------------------------------------------------------------- /build_util/macos/fix_rpaths.py: -------------------------------------------------------------------------------- 1 | """ 2 | 配布物内の.dylibファイルのrpathをどのようなユーザー環境においても有効になるように修正するスクリプト 3 | 引数で指定したbase_directory以下にある.dylibファイルのrpathをチェックし、 4 | 開発環境に依存した(配布先の環境に存在することが保証されていない)rpathであった場合、 5 | base_directory以下の.dylibファイルを相対パスで指すように変更する。 6 | (base_directory以下の.dylibファイルに不足がないことを前提とする。) 7 | """ 8 | 9 | import argparse 10 | import sys 11 | from pathlib import Path 12 | from typing import List, Set 13 | 14 | from build_util_macos.shlib_tools import SharedLib, change_rpath, get_dylib_paths 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "base_directory", help="fix the rpaths of the dylibs under base_directory", type=str 19 | ) 20 | args = parser.parse_args() 21 | base_dir_path = Path(args.base_directory) 22 | 23 | if not (base_dir_path.exists() and base_dir_path.is_dir()): 24 | print("could not find the directory:", str(base_dir_path), file=sys.stderr) 25 | exit(1) 26 | 27 | # base_dir_path以下の全てのサブディレクトリを探索して得たdylibのリスト 28 | internal_dylib_paths: List[Path] = get_dylib_paths(base_dir_path) 29 | # 全てのdylibのファイル名のリスト 30 | internal_dylib_names: List[str] = [path.name for path in internal_dylib_paths] 31 | 32 | # 開発環境に依存したrpathを持つdylibのリスト 33 | non_distributable_dylibs: List[SharedLib] = [] 34 | for internal_dylib_path in internal_dylib_paths: 35 | lib = SharedLib(internal_dylib_path) 36 | if lib.get_non_distributable_rpaths(): 37 | non_distributable_dylibs.append(lib) 38 | 39 | # 開発環境に依存したrpathの集合 40 | non_distributable_rpaths: Set[Path] = set() 41 | for dylib in non_distributable_dylibs: 42 | rpaths: Set[Path] = set([rpath for rpath in dylib.get_non_distributable_rpaths()]) 43 | non_distributable_rpaths = non_distributable_rpaths.union(rpaths) 44 | 45 | # rpathが指しているdylibのうち、base_dir_path以下に存在しないもののリスト 46 | external_dylib_paths: List[Path] = [] 47 | for rpath in non_distributable_rpaths: 48 | if not (rpath.name in internal_dylib_names): 49 | external_dylib_paths.append(rpath) 50 | 51 | # base_dir_path以下でdylibが不足している場合は、不足しているdylibを表示して終了 52 | if external_dylib_paths: 53 | print( 54 | f"following dylibs not found under base_dir_path ({str(base_dir_path)}):", 55 | file=sys.stderr, 56 | ) 57 | for path in external_dylib_paths: 58 | print(f"\t{path.name}", file=sys.stderr) 59 | exit(1) 60 | 61 | # 開発環境に依存したrpathを、base_dir_path以下のdylibを指すように変更 62 | for dylib in non_distributable_dylibs: 63 | for rpath in dylib.get_non_distributable_rpaths(): 64 | for internal_dylib_path in internal_dylib_paths: 65 | if internal_dylib_path.name == rpath.name: 66 | change_rpath(rpath, internal_dylib_path, dylib.path, base_dir_path) 67 | -------------------------------------------------------------------------------- /build_util/merge_update_infos.py: -------------------------------------------------------------------------------- 1 | """ 2 | 更新履歴をマージする。 3 | """ 4 | 5 | import argparse 6 | import json 7 | from collections import OrderedDict 8 | from pathlib import Path 9 | from typing import Dict, List, Union 10 | 11 | 12 | def merge_json_string(src: str, dst: str) -> str: 13 | """ 14 | バージョンが同じ場合は要素を結合する 15 | >>> src = '[{"version": "0.0.1", "a": ["a1"], "b": ["b1", "b2"]}]' 16 | >>> dst = '[{"version": "0.0.1", "a": ["a2"], "b": ["b1", "b3"]}]' 17 | >>> merge_json_string(src, dst) 18 | '[{"version": "0.0.1", "a": ["a1", "a2"], "b": ["b1", "b2", "b3"]}]' 19 | 20 | バージョンが無かった場合は無視される 21 | >>> src = '[{"version": "1"}]' 22 | >>> dst = '[{"version": "1"}, {"version": "2"}]' 23 | >>> merge_json_string(src, dst) 24 | '[{"version": "1"}]' 25 | """ 26 | src_json: List[Dict[str, Union[str, List[str]]]] = json.loads(src) 27 | dst_json: List[Dict[str, Union[str, List[str]]]] = json.loads(dst) 28 | 29 | for src_item in src_json: 30 | for dst_item in dst_json: 31 | if src_item["version"] == dst_item["version"]: 32 | for key in src_item: 33 | if key == "version": 34 | continue 35 | 36 | # 異なるものがあった場合だけ後ろに付け足す 37 | src_item[key] = list( 38 | OrderedDict.fromkeys(src_item[key] + dst_item[key]) 39 | ) 40 | 41 | return json.dumps(src_json) 42 | 43 | 44 | def merge_update_infos(src_path: Path, dst_path: Path, output_path: Path) -> None: 45 | src = src_path.read_text(encoding="utf-8") 46 | dst = dst_path.read_text(encoding="utf-8") 47 | merged = merge_json_string(src, dst) 48 | output_path.write_text(merged) 49 | 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument("src_path", type=Path) 54 | parser.add_argument("dst_path", type=Path) 55 | parser.add_argument("output_path", type=Path) 56 | args = parser.parse_args() 57 | merge_update_infos(args.src_path, args.dst_path, args.output_path) 58 | -------------------------------------------------------------------------------- /build_util/process_voicevox_resource.bash: -------------------------------------------------------------------------------- 1 | set -eux 2 | 3 | if [ ! -v DOWNLOAD_RESOURCE_PATH ]; then 4 | echo "DOWNLOAD_RESOURCE_PATHが未定義です" 5 | exit 1 6 | fi 7 | 8 | rm -r speaker_info 9 | cp -r $DOWNLOAD_RESOURCE_PATH/character_info speaker_info 10 | 11 | python $DOWNLOAD_RESOURCE_PATH/scripts/clean_character_info.py \ 12 | --character_info_dir speaker_info/ 13 | 14 | # マニフェスト 15 | jq -s '.[0] * .[1]' engine_manifest.json $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest.json \ 16 | > engine_manifest.json.tmp 17 | mv engine_manifest.json.tmp engine_manifest.json 18 | 19 | python build_util/merge_update_infos.py \ 20 | engine_manifest_assets/update_infos.json \ 21 | $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/update_infos.json \ 22 | engine_manifest_assets/update_infos.json 23 | 24 | for f in $(ls $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/* | grep -v update_infos.json); do 25 | cp $f ./engine_manifest_assets/ 26 | done 27 | -------------------------------------------------------------------------------- /default.csv: -------------------------------------------------------------------------------- 1 | 朱司,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アカシ,アカシ,1/3,C1 2 | 青山,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,アオヤマ,アオヤマ,2/4,C1 3 | 雨晴,1350,1350,7000,名詞,固有名詞,人名,姓,*,*,*,アメハレ,アメハレ,2/4,C1 4 | アル,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,アル,アル,1/2,C1 5 | うさぎ,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ウサギ,ウサギ,0/3,C1 6 | 櫻歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オウカ,オーカ,1/3,C1 7 | 音街,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オトマチ,オトマチ,2/4,C1 8 | 春日部,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,カスカベ,カスカベ,0/4,C1 9 | 麒ヶ島,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キガシマ,キガシマ,2/4,C1 10 | 紲星,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キズナ,キズナ,1/3,C1 11 | 九州,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,キュウシュウ,キュウシュウ,1/4,C1 12 | キョウコ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,キョオコ,キョオコ,1/3,C1 13 | 玄野,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,クロノ,クロノ,1/3,C1 14 | 剣崎,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,ケンザキ,ケンザキ,1/4,C1 15 | 後鬼,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ゴキ,ゴキ,1/2,C1 16 | 虎太郎,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,コタロウ,コタロー,4/4,C1 17 | 琴葉,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,コトノハ,コトノハ,0/4,C1 18 | 小夜,1351,1351,2200,名詞,固有名詞,人名,名,*,*,*,サヨ,サヨ,1/2,C1 19 | 四国,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,シコク,シコク,1/3,C1 20 | 白上,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,シラカミ,シラカミ,4/4,C1 21 | ずんだもん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ズンダモン,ズンダモン,1/5,C1 22 | そら,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ソラ,ソラ,1/2,C1 23 | 宗麟,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ソウリン,ソウリン,1/4,C1 24 | タイプT,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タイプティー,タイプティー,4/5,C1 25 | 中国,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,チュウゴク,チュウゴク,1/4,C1 26 | 波音,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナミネ,ナミネ,0/3,C1 27 | 武宏,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タケヒロ,タケヒロ,2/4,C1 28 | ちび式じい,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,チビシキジー,チビシキジー,5/6,C1 29 | 月読,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ツクヨミ,ツクヨミ,0/4,C1 30 | つむぎ,1351,1351,7450,名詞,固有名詞,人名,名,*,*,*,ツムギ,ツムギ,0/3,C1 31 | ナースロボ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナースロボ,ナースロボ,4/5,C1 32 | ナナ,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,ナナ,ナナ,1/2,C1 33 | No.7,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ナンバーセブン,ナンバーセブン,5/7,C1 34 | 猫使,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,ネコツカ,ネコツカ,2/4,C1 35 | はう,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,ハウ,ハウ,1/2,C1 36 | 春歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルカ,ハルカ,1/3,C1 37 | 桜乃,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルノ,ハルノ,1/3,C1 38 | ビィ,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ビー,ビー,1/2,C1 39 | ひまり,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ヒマリ,ヒマリ,0/3,C1 40 | 紅桜,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ベニザクラ,ベニザクラ,3/5,C1 41 | 聖騎士,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,ホーリーナイト,ホーリーナイト,5/7,C1 42 | WhiteCUL,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ホワイトカル,ホワイトカル,5/6,C1 43 | ミコ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,ミコ,ミコ,1/2,C1 44 | 水奈瀬,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ミナセ,ミナセ,2/3,C1 45 | 冥鳴,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,メイメイ,メイメイ,1/4,C1 46 | 鳴花,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,メイカ,メイカ,1/3,C1 47 | めたん,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,メタン,メタン,1/3,C1 48 | 雌雄,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,メスオ,メスオ,0/3,C1 49 | もち子さん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,モチコサン,モチコサン,1/5,C1 50 | モチノ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,モチノ,モチノ,0/3,C1 51 | 結月,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ユヅキ,ユヅキ,1/3,C1 52 | 弓鶴,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ユヅル,ユヅル,0/3,C1 53 | リツ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,リツ,リツ,1/2,C1 54 | 六花,1351,1351,4900,名詞,固有名詞,人名,名,*,*,*,リッカ,リッカ,1/3,C1 55 | 龍星,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,リュウセイ,リュウセイ,1/4,C1 56 | 雀松,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ワカマツ,ワカマツ,2/4,C1 57 | COEIROINK,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1 58 | coeiroink,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1 59 | CoeFont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1 60 | coefont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1 61 | Nemo,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ネモ,ネモ,1/2,C1 62 | nemo,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ネモ,ネモ,1/2,C1 63 | TALQu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1 64 | talqu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1 65 | VOICEVOX,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1 66 | voicevox,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1 67 | -------------------------------------------------------------------------------- /default_setting.yml: -------------------------------------------------------------------------------- 1 | allow_origin: null 2 | cors_policy_mode: localapps 3 | -------------------------------------------------------------------------------- /docs/VOICEVOX音声合成エンジンとの連携.md: -------------------------------------------------------------------------------- 1 | メモ書き程度ですが、どういう方針で開発を進めているかを紹介します。 2 | 3 | - バージョンが上がっても、`/audio_query`で返ってくる値をそのまま`/synthesis`に POST すれば音声合成できるようにする予定です 4 | - `AudioQuery`のパラメータは増えますが、なるべくデフォルト値で以前と変わらない音声が生成されるようにします 5 | - バージョン 0.7 から音声スタイルが実装されました。スタイルの情報は`/speakers`から取得できます 6 | -------------------------------------------------------------------------------- /docs/api/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/docs/api/.gitkeep -------------------------------------------------------------------------------- /docs/licenses/nccl/License.txt: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | * Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National 13 | Laboratory, the U.S. Department of Energy, nor the names of their 14 | contributors may be used to endorse or promote products derived 15 | from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 18 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 21 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 24 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 25 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | The U.S. Department of Energy funded the development of this software 30 | under subcontract 7078610 with Lawrence Berkeley National Laboratory. 31 | 32 | -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/COPYING: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The Japanese TTS System "Open JTalk" */ 3 | /* developed by HTS Working Group */ 4 | /* http://open-jtalk.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* All rights reserved. */ 11 | /* */ 12 | /* Redistribution and use in source and binary forms, with or */ 13 | /* without modification, are permitted provided that the following */ 14 | /* conditions are met: */ 15 | /* */ 16 | /* - Redistributions of source code must retain the above copyright */ 17 | /* notice, this list of conditions and the following disclaimer. */ 18 | /* - Redistributions in binary form must reproduce the above */ 19 | /* copyright notice, this list of conditions and the following */ 20 | /* disclaimer in the documentation and/or other materials provided */ 21 | /* with the distribution. */ 22 | /* - Neither the name of the HTS working group nor the names of its */ 23 | /* contributors may be used to endorse or promote products derived */ 24 | /* from this software without specific prior written permission. */ 25 | /* */ 26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 38 | /* POSSIBILITY OF SUCH DAMAGE. */ 39 | /* ----------------------------------------------------------------- */ 40 | -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/mecab-naist-jdic/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, Nara Institute of Science and Technology, Japan. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | Neither the name of the Nara Institute of Science and Technology 15 | (NAIST) nor the names of its contributors may be used to endorse or 16 | promote products derived from this software without specific prior 17 | written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | /* ----------------------------------------------------------------- */ 32 | /* The Japanese TTS System "Open JTalk" */ 33 | /* developed by HTS Working Group */ 34 | /* http://open-jtalk.sourceforge.net/ */ 35 | /* ----------------------------------------------------------------- */ 36 | /* */ 37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 38 | /* Department of Computer Science */ 39 | /* */ 40 | /* All rights reserved. */ 41 | /* */ 42 | /* Redistribution and use in source and binary forms, with or */ 43 | /* without modification, are permitted provided that the following */ 44 | /* conditions are met: */ 45 | /* */ 46 | /* - Redistributions of source code must retain the above copyright */ 47 | /* notice, this list of conditions and the following disclaimer. */ 48 | /* - Redistributions in binary form must reproduce the above */ 49 | /* copyright notice, this list of conditions and the following */ 50 | /* disclaimer in the documentation and/or other materials provided */ 51 | /* with the distribution. */ 52 | /* - Neither the name of the HTS working group nor the names of its */ 53 | /* contributors may be used to endorse or promote products derived */ 54 | /* from this software without specific prior written permission. */ 55 | /* */ 56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 68 | /* POSSIBILITY OF SUCH DAMAGE. */ 69 | /* ----------------------------------------------------------------- */ 70 | -------------------------------------------------------------------------------- /docs/licenses/open_jtalk/mecab/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2001-2008, Taku Kudo 2 | Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are 6 | permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above 9 | copyright notice, this list of conditions and the 10 | following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the 14 | following disclaimer in the documentation and/or other 15 | materials provided with the distribution. 16 | 17 | * Neither the name of the Nippon Telegraph and Telegraph Corporation 18 | nor the names of its contributors may be used to endorse or 19 | promote products derived from this software without specific 20 | prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED 23 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 28 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | /* ----------------------------------------------------------------- */ 32 | /* The Japanese TTS System "Open JTalk" */ 33 | /* developed by HTS Working Group */ 34 | /* http://open-jtalk.sourceforge.net/ */ 35 | /* ----------------------------------------------------------------- */ 36 | /* */ 37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */ 38 | /* Department of Computer Science */ 39 | /* */ 40 | /* All rights reserved. */ 41 | /* */ 42 | /* Redistribution and use in source and binary forms, with or */ 43 | /* without modification, are permitted provided that the following */ 44 | /* conditions are met: */ 45 | /* */ 46 | /* - Redistributions of source code must retain the above copyright */ 47 | /* notice, this list of conditions and the following disclaimer. */ 48 | /* - Redistributions in binary form must reproduce the above */ 49 | /* copyright notice, this list of conditions and the following */ 50 | /* disclaimer in the documentation and/or other materials provided */ 51 | /* with the distribution. */ 52 | /* - Neither the name of the HTS working group nor the names of its */ 53 | /* contributors may be used to endorse or promote products derived */ 54 | /* from this software without specific prior written permission. */ 55 | /* */ 56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 68 | /* POSSIBILITY OF SUCH DAMAGE. */ 69 | /* ----------------------------------------------------------------- */ 70 | -------------------------------------------------------------------------------- /docs/licenses/world/LICENSE.txt: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* WORLD: High-quality speech analysis, */ 3 | /* manipulation and synthesis system */ 4 | /* developed by M. Morise */ 5 | /* http://www.kisc.meiji.ac.jp/~mmorise/world/english/ */ 6 | /* ----------------------------------------------------------------- */ 7 | /* */ 8 | /* Copyright (c) 2010 M. Morise */ 9 | /* */ 10 | /* All rights reserved. */ 11 | /* */ 12 | /* Redistribution and use in source and binary forms, with or */ 13 | /* without modification, are permitted provided that the following */ 14 | /* conditions are met: */ 15 | /* */ 16 | /* - Redistributions of source code must retain the above copyright */ 17 | /* notice, this list of conditions and the following disclaimer. */ 18 | /* - Redistributions in binary form must reproduce the above */ 19 | /* copyright notice, this list of conditions and the following */ 20 | /* disclaimer in the documentation and/or other materials provided */ 21 | /* with the distribution. */ 22 | /* - Neither the name of the M. Morise nor the names of its */ 23 | /* contributors may be used to endorse or promote products derived */ 24 | /* from this software without specific prior written permission. */ 25 | /* */ 26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 38 | /* POSSIBILITY OF SUCH DAMAGE. */ 39 | /* ----------------------------------------------------------------- */ 40 | -------------------------------------------------------------------------------- /engine_manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": "0.13.1", 3 | "name": "DUMMY Engine", 4 | "brand_name": "DUMMY", 5 | "uuid": "41d9d6cb-6682-4baa-80b6-875547f71d86", 6 | "version": "999.999.999", 7 | "url": "https://github.com/voicevox-bridge/bridge-plugin", 8 | "command": "run", 9 | "port": 50021, 10 | "icon": "engine_manifest_assets/icon.png", 11 | "default_sampling_rate": 44100, 12 | "terms_of_service": "engine_manifest_assets/terms_of_service.md", 13 | "update_infos": "engine_manifest_assets/update_infos.json", 14 | "dependency_licenses": "engine_manifest_assets/dependency_licenses.json", 15 | "supported_vvlib_manifest_version": "0.15.0", 16 | "supported_features": { 17 | "adjust_mora_pitch": { 18 | "type": "bool", 19 | "value": false, 20 | "name": "モーラごとの音高の調整" 21 | }, 22 | "adjust_phoneme_length": { 23 | "type": "bool", 24 | "value": false, 25 | "name": "音素ごとの長さの調整" 26 | }, 27 | "adjust_speed_scale": { 28 | "type": "bool", 29 | "value": true, 30 | "name": "全体の話速の調整" 31 | }, 32 | "adjust_pitch_scale": { 33 | "type": "bool", 34 | "value": true, 35 | "name": "全体の音高の調整" 36 | }, 37 | "adjust_intonation_scale": { 38 | "type": "bool", 39 | "value": true, 40 | "name": "全体の抑揚の調整" 41 | }, 42 | "adjust_volume_scale": { 43 | "type": "bool", 44 | "value": true, 45 | "name": "全体の音量の調整" 46 | }, 47 | "interrogative_upspeak": { 48 | "type": "bool", 49 | "value": false, 50 | "name": "疑問文の自動調整" 51 | }, 52 | "synthesis_morphing" : { 53 | "type": "bool", 54 | "value": false, 55 | "name": "2人の話者でモーフィングした音声を合成" 56 | }, 57 | "manage_library": { 58 | "type": "bool", 59 | "value": false, 60 | "name": "音声ライブラリのインストール・アンインストール" 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /engine_manifest_assets/dependency_licenses.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "dummy library", 4 | "version": "0.0.1", 5 | "license": "dummy license", 6 | "text": "dummy license text" 7 | } 8 | ] -------------------------------------------------------------------------------- /engine_manifest_assets/downloadable_libraries.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Dummy Library", 4 | "uuid": "2bb8bccf-1c3f-4bc9-959a-f388e37af3ad", 5 | "version": "0.0.1", 6 | "download_url": "https://github.com/VOICEVOX/voicevox_engine/archive/d7cf31c058bc83e1abf8e14d4231a06409c4cc2d.zip", 7 | "bytes": 1000, 8 | "speakers": [ 9 | { 10 | "speaker": { 11 | "name": "dummy1", 12 | "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", 13 | "styles": [ 14 | { 15 | "name": "style1", 16 | "id": 0 17 | }, 18 | { 19 | "name": "style2", 20 | "id": 2 21 | } 22 | ], 23 | "version": "0.0.1" 24 | }, 25 | "speaker_info": { 26 | "policy": "", 27 | "portrait": "", 28 | "style_infos": [ 29 | { 30 | "id": 0, 31 | "icon": "", 32 | "voice_samples": ["", "", ""] 33 | }, 34 | { 35 | "id": 2, 36 | "icon": "", 37 | "voice_samples": ["", "", ""] 38 | } 39 | ] 40 | } 41 | } 42 | ] 43 | } 44 | ] 45 | -------------------------------------------------------------------------------- /engine_manifest_assets/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/engine_manifest_assets/icon.png -------------------------------------------------------------------------------- /engine_manifest_assets/terms_of_service.md: -------------------------------------------------------------------------------- 1 | dummy teams of service -------------------------------------------------------------------------------- /engine_manifest_assets/update_infos.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "version": "0.14.5", 4 | "descriptions": [ 5 | "キャラクター「中国うさぎ」を追加", 6 | "キャラクター「波音リツ」「もち子さん」のスタイルを追加" 7 | ], 8 | "contributors": [] 9 | }, 10 | { 11 | "version": "0.14.4", 12 | "descriptions": [ 13 | "キャラクター「春歌ナナ」「猫使アル」「猫使ビィ」を追加", 14 | "バグ修正" 15 | ], 16 | "contributors": ["Hiroshiba"] 17 | }, 18 | { 19 | "version": "0.14.3", 20 | "descriptions": [ 21 | "キャラクター「†聖騎士 紅桜†」「雀松朱司」「麒ヶ島宗麟」を追加", 22 | "同時書き込みで辞書が破損する問題を修正" 23 | ], 24 | "contributors": ["Hiroshiba"] 25 | }, 26 | { 27 | "version": "0.14.2", 28 | "descriptions": ["DirectML版の生成が遅い問題を修正"], 29 | "contributors": [] 30 | }, 31 | { 32 | "version": "0.14.1", 33 | "descriptions": ["AquesTalkライクな記法で生成した音声のバグを修正"], 34 | "contributors": [] 35 | }, 36 | { 37 | "version": "0.14.0", 38 | "descriptions": [ 39 | "コアをRust言語に移行", 40 | "セキュリティアップデート", 41 | "スタイルごとに異なる立ち絵の提供を可能に", 42 | "VVPPファイルの提供", 43 | "設定GUIの提供", 44 | "プリセットの保存", 45 | "モーフィングAPIの仕様変更", 46 | "DirectML利用時に適したGPUを自動選択", 47 | "開発環境の向上", 48 | "バグ修正" 49 | ], 50 | "contributors": [ 51 | "aoirint", 52 | "Appletigerv", 53 | "haru3me", 54 | "Hiroshiba", 55 | "ksk001100", 56 | "masinc", 57 | "misogihagi", 58 | "My-MC", 59 | "nebocco", 60 | "PickledChair", 61 | "qryxip", 62 | "qwerty2501", 63 | "sabonerune", 64 | "sarisia", 65 | "Segu-g", 66 | "sevenc-nanashi", 67 | "shigobu", 68 | "smly", 69 | "takana-v", 70 | "ts-klassen", 71 | "whiteball", 72 | "y-chan" 73 | ] 74 | } 75 | ] 76 | -------------------------------------------------------------------------------- /get_cost_candidates.py: -------------------------------------------------------------------------------- 1 | """ 2 | voicevox_engine/part_of_speech_data.pyのcost_candidatesを計算するプログラムです。 3 | 引数のnaist_jdic_pathには、open_jtalkのsrc/mecab-naist-jdic/naist-jdic.csvを指定してください。 4 | 5 | 実行例: 6 | python get_cost_candidates.py --naist_jdic_path=/path/to/naist-jdic.csv \ 7 | --pos=名詞 \ 8 | --pos_detail_1=固有名詞 \ 9 | --pos_detail_2=一般 \ 10 | --pos_detail_3=* 11 | 12 | cost_candidatesの値の詳細は以下の通りです。 13 | - 1番目の値はnaist_jdic内の同一品詞の最小コストから1を引いたもの、11番目の値は最大コストに1を足したものです。 14 | - 2番目の値はnaist_jdic内の同一品詞のコストの下位1%、10番目の値は99%の値です。 15 | - 6番目の値はnaist_jdic内の同一品詞のコストの最頻値です。 16 | - 2番目から6番目、6番目から10番目までの値は一定割合で増加するようになっています。 17 | """ 18 | 19 | import argparse 20 | import statistics 21 | from pathlib import Path 22 | from typing import List 23 | 24 | import numpy as np 25 | 26 | 27 | def get_candidates( 28 | naist_jdic_path: Path, 29 | pos: str, 30 | pos_detail_1: str, 31 | pos_detail_2: str, 32 | pos_detail_3: str, 33 | ) -> List[int]: 34 | costs = [] 35 | with naist_jdic_path.open(encoding="utf-8") as f: 36 | for line in f: 37 | ( 38 | _, 39 | _, 40 | _, 41 | _cost, 42 | _pos, 43 | _pos_detail_1, 44 | _pos_detail_2, 45 | _pos_detail_3, 46 | _, 47 | _, 48 | _, 49 | _, 50 | _, 51 | _, 52 | _, 53 | ) = line.split(",") 54 | if (_pos, _pos_detail_1, _pos_detail_2, _pos_detail_3) == ( 55 | pos, 56 | pos_detail_1, 57 | pos_detail_2, 58 | pos_detail_3, 59 | ): 60 | costs.append(int(_cost)) 61 | assert len(costs) > 0 62 | cost_min = min(costs) - 1 63 | cost_1per = np.quantile(costs, 0.01).astype(np.int64) 64 | cost_mode = statistics.mode(costs) 65 | cost_99per = np.quantile(costs, 0.99).astype(np.int64) 66 | cost_max = max(costs) + 1 67 | return ( 68 | [cost_min] 69 | + [int(cost_1per + (cost_mode - cost_1per) * i / 4) for i in range(5)] 70 | + [int(cost_mode + (cost_99per - cost_mode) * i / 4) for i in range(1, 5)] 71 | + [cost_max] 72 | ) 73 | 74 | 75 | if __name__ == "__main__": 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument("--naist_jdic_path", type=Path) 78 | parser.add_argument("--pos", type=str) 79 | parser.add_argument("--pos_detail_1", type=str) 80 | parser.add_argument("--pos_detail_2", type=str) 81 | parser.add_argument("--pos_detail_3", type=str) 82 | args = parser.parse_args() 83 | print( 84 | get_candidates( 85 | naist_jdic_path=args.naist_jdic_path, 86 | pos=args.pos, 87 | pos_detail_1=args.pos_detail_1, 88 | pos_detail_2=args.pos_detail_2, 89 | pos_detail_3=args.pos_detail_3, 90 | ) 91 | ) 92 | -------------------------------------------------------------------------------- /make_docs.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from voicevox_engine.dev.core import mock as core 4 | from voicevox_engine.dev.synthesis_engine.mock import MockSynthesisEngine 5 | from voicevox_engine.preset import PresetManager 6 | from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader 7 | from voicevox_engine.utility import engine_root 8 | 9 | if __name__ == "__main__": 10 | import run 11 | 12 | app = run.generate_app( 13 | synthesis_engines={"mock": MockSynthesisEngine(speakers=core.metas())}, 14 | latest_core_version="mock", 15 | setting_loader=SettingLoader(USER_SETTING_PATH), 16 | preset_manager=PresetManager( # FIXME: impl MockPresetManager 17 | preset_path=engine_root() / "presets.yaml", 18 | ), 19 | ) 20 | with open("docs/api/index.html", "w") as f: 21 | f.write( 22 | """ 23 | 24 | 25 | voicevox_engine API Document 26 | 27 | 28 | 29 | 30 |
31 | 32 | 35 | 36 | """ 37 | % json.dumps(app.openapi()) 38 | ) 39 | -------------------------------------------------------------------------------- /nuitka-config.yaml: -------------------------------------------------------------------------------- 1 | - module-name: 'librosa.decompose' 2 | anti-bloat: 3 | - description: 'remove sklearn dependency' 4 | replacements_plain: 5 | 'import sklearn.decomposition': '' 6 | 7 | - module-name: 'librosa.segment' 8 | anti-bloat: 9 | - description: 'remove sklearn dependency' 10 | replacements_plain: 11 | 'import sklearn.cluster': '' 12 | 'import sklearn.feature_extraction': '' 13 | 'import sklearn.neighbors': '' 14 | 'import sklearn': '' 15 | 16 | - module-name: 'librosa.util.utils' 17 | anti-bloat: 18 | - description: 'remove numba dependency' 19 | replacements_plain: 20 | 'import numba': '' 21 | '@numba.jit': '#' 22 | 23 | - module-name: 'librosa.util.matching' 24 | anti-bloat: 25 | - description: 'remove numba dependency' 26 | replacements_plain: 27 | 'import numba': '' 28 | '@numba.jit': '#' 29 | 'numba.uint32': 'np.uint' 30 | 31 | - module-name: 'librosa.filters' 32 | anti-bloat: 33 | - description: 'remove numba dependency' 34 | replacements_plain: 35 | 'from numba import jit': '' 36 | '@jit': '#' 37 | 38 | - module-name: 'librosa.core.audio' 39 | anti-bloat: 40 | - description: 'remove numba dependency' 41 | replacements_plain: 42 | '@jit': '#' 43 | 'from numba import jit': '' 44 | 45 | - module-name: 'librosa.core.spectrum' 46 | anti-bloat: 47 | - description: 'remove numba dependency' 48 | replacements_plain: 49 | 'from numba import jit': '' 50 | '@jit': '#' 51 | 52 | - module-name: 'librosa.core.constantq' 53 | anti-bloat: 54 | - description: 'remove numba dependency' 55 | replacements_plain: 56 | 'from numba import jit': '' 57 | '@jit': '#' 58 | 59 | - module-name: 'librosa.sequence' 60 | anti-bloat: 61 | - description: 'remove numba dependency' 62 | replacements_plain: 63 | 'from numba import jit': '' 64 | '@jit': '#' 65 | 66 | - module-name: 'librosa.feature.utils' 67 | anti-bloat: 68 | - description: 'remove numba dependency' 69 | replacements_plain: 70 | 'from numba import jit': '' 71 | '@jit': '#' 72 | 73 | - module-name: 'resampy.interpn' 74 | anti-bloat: 75 | - description: 'remove numba dependency' 76 | replacements_plain: 77 | 'from numba import guvectorize, jit, prange': | 78 | def guvectorize(*args, **kwargs): 79 | return lambda f: f 80 | 81 | def jit(*args, **kwargs): 82 | return lambda f: f 83 | 'prange(': 'range(' 84 | 85 | - module-name: 'resampy.core' 86 | anti-bloat: 87 | - description: 'remove numba dependency' 88 | replacements_plain: 89 | 'import numba': '' 90 | 'numba.TypingError': 'Exception' 91 | 92 | - module-name: 'tacotron_cleaner.cleaners' 93 | anti-bloat: 94 | - description: 'remove unidecode dependency' 95 | replacements_plain: 96 | 'from unidecode import unidecode': '' 97 | 'def convert_to_ascii(text):': '' 98 | 'return unidecode(text)': '' 99 | 100 | - module-name: 'espnet2.gan_tts.vits.monotonic_align' 101 | anti-bloat: 102 | - description: 'remove numba dependency' 103 | replacements_plain: 104 | 'from numba import njit': '' 105 | 'from numba import prange': '' 106 | '@njit': '#' 107 | 'prange(': 'range(' 108 | 109 | - module-name: 'espnet' 110 | data-files: 111 | patterns: 112 | - 'version.txt' 113 | 114 | - module-name: 'librosa' 115 | data-files: 116 | dirs: 117 | - 'util/example_data' 118 | 119 | - module-name: 'resampy' 120 | data-files: 121 | dirs: 122 | - 'data' 123 | 124 | - module-name: 'jamo' 125 | data-files: 126 | dirs: 127 | - 'data' -------------------------------------------------------------------------------- /presets.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: サンプルプリセット 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pysen] 2 | version = "0.10.3" 3 | 4 | [tool.pysen.lint] 5 | enable_black = true 6 | enable_flake8 = true 7 | enable_isort = true 8 | enable_mypy = false # TODO: eliminate errors and enable at CI 9 | mypy_preset = "entry" # TODO: "strict" 10 | line_length = 88 11 | py_version = "py310" # TODO: update to py311 after pysen supports Python 3.11 12 | isort_known_first_party = ["voicevox_engine"] 13 | isort_known_third_party = ["numpy"] 14 | [[tool.pysen.lint.mypy_targets]] 15 | paths = [".", "voicevox_engine/"] 16 | 17 | [tool.black] # automatically generated by pysen 18 | # pysen ignores and overwrites any modifications 19 | line-length = 88 20 | target-version = ["py310"] # TODO: update to py311 after pysen supports Python 3.11 21 | 22 | 23 | [tool.isort] # automatically generated by pysen 24 | # pysen ignores and overwrites any modifications 25 | default_section = "THIRDPARTY" 26 | ensure_newline_before_comments = true 27 | force_grid_wrap = 0 28 | force_single_line = false 29 | include_trailing_comma = true 30 | known_first_party = ["voicevox_engine"] 31 | known_third_party = ["numpy"] 32 | line_length = 88 33 | multi_line_output = 3 34 | use_parentheses = true 35 | 36 | [tool.poetry] 37 | name = "voicevox_engine" 38 | version = "0.0.0" 39 | description = "" 40 | authors = ["Hiroshiba "] 41 | 42 | [tool.poetry.dependencies] 43 | python = "~3.11" 44 | numpy = "^1.20.0" 45 | fastapi = "^0.103.2" 46 | python-multipart = "^0.0.5" 47 | uvicorn = "^0.15.0" 48 | aiofiles = "^0.7.0" 49 | soundfile = "^0.12.1" 50 | scipy = "^1.7.1" 51 | pyyaml = "^6.0" 52 | pyworld = "^0.3.0" 53 | requests = "^2.28.1" 54 | jinja2 = "^3.1.2" 55 | pyopenjtalk = {git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "b35fc89fe42948a28e33aed886ea145a51113f88"} 56 | espnet = {git = "https://github.com/voicevox-bridge/espnet", rev = "ff992af2279a03405257a844c48bf83f47805b1b"} 57 | semver = "^3.0.1" 58 | platformdirs = "^3.10.0" 59 | typeguard = "2.13.3" 60 | joblib = "1.2.0" 61 | 62 | [tool.poetry.group.dev.dependencies] 63 | cython = "^0.29.34,>=0.29.33" # NOTE: for Python 3.11 64 | pre-commit = "^2.16.0" 65 | atomicwrites = "^1.4.0" 66 | colorama = "^0.4.4" 67 | poetry = "^1.3.1" 68 | nuitka = "^1.3.7" 69 | 70 | [tool.poetry.group.test.dependencies] 71 | pysen = "~0.10.3" 72 | black = "^22.12.0" 73 | flake8-bugbear = "^23.1.0" 74 | flake8 = "^6.0.0" 75 | isort = "^5.12.0" 76 | mypy = "^1.6.0" 77 | pytest = "^6.2.5" 78 | coveralls = "^3.2.0" 79 | poetry = "^1.3.1" 80 | httpx = "^0.25.0" # NOTE: required by fastapi.testclient.TestClient 81 | 82 | [tool.poetry.group.license.dependencies] 83 | pip-licenses = "^4.2.0" 84 | 85 | [build-system] 86 | requires = ["poetry-core"] 87 | build-backend = "poetry.core.masonry.api" 88 | -------------------------------------------------------------------------------- /requirements-license.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12" 2 | anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12" 3 | asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12" 4 | audioread==3.0.0 ; python_version >= "3.11" and python_version < "3.12" 5 | certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12" 6 | cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12" 7 | charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12" 8 | ci-sdr==0.0.2 ; python_version >= "3.11" and python_version < "3.12" 9 | click==8.1.7 ; python_version >= "3.11" and python_version < "3.12" 10 | colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows" 11 | configargparse==1.7 ; python_version >= "3.11" and python_version < "3.12" 12 | ctc-segmentation==1.7.4 ; python_version >= "3.11" and python_version < "3.12" 13 | cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12" 14 | decorator==5.1.1 ; python_version >= "3.11" and python_version < "3.12" 15 | distance==0.1.3 ; python_version >= "3.11" and python_version < "3.12" 16 | einops==0.6.1 ; python_version >= "3.11" and python_version < "3.12" 17 | espnet @ git+https://github.com/voicevox-bridge/espnet@ff992af2279a03405257a844c48bf83f47805b1b ; python_version >= "3.11" and python_version < "3.12" 18 | espnet-tts-frontend==0.0.3 ; python_version >= "3.11" and python_version < "3.12" 19 | fastapi==0.103.2 ; python_version >= "3.11" and python_version < "3.12" 20 | filelock==3.12.4 ; python_version >= "3.11" and python_version < "3.12" 21 | g2p-en==2.1.0 ; python_version >= "3.11" and python_version < "3.12" 22 | h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12" 23 | h5py==3.9.0 ; python_version >= "3.11" and python_version < "3.12" 24 | humanfriendly==10.0 ; python_version >= "3.11" and python_version < "3.12" 25 | idna==3.4 ; python_version >= "3.11" and python_version < "3.12" 26 | inflect==7.0.0 ; python_version >= "3.11" and python_version < "3.12" 27 | jaconv==0.3.4 ; python_version >= "3.11" and python_version < "3.12" 28 | jamo==0.4.1 ; python_version >= "3.11" and python_version < "3.12" 29 | jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12" 30 | joblib==1.2.0 ; python_version >= "3.11" and python_version < "3.12" 31 | librosa==0.9.2 ; python_version >= "3.11" and python_version < "3.12" 32 | llvmlite==0.40.1 ; python_version >= "3.11" and python_version < "3.12" 33 | markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12" 34 | mpmath==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 35 | networkx==3.1 ; python_version >= "3.11" and python_version < "3.12" 36 | nltk==3.8.1 ; python_version >= "3.11" and python_version < "3.12" 37 | numba==0.57.1 ; python_version >= "3.11" and python_version < "3.12" 38 | numpy==1.24.4 ; python_version >= "3.11" and python_version < "3.12" 39 | packaging==23.1 ; python_version >= "3.11" and python_version < "3.12" 40 | pip-licenses==4.3.2 ; python_version >= "3.11" and python_version < "3.12" 41 | platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12" 42 | pooch==1.7.0 ; python_version >= "3.11" and python_version < "3.12" 43 | prettytable==3.9.0 ; python_version >= "3.11" and python_version < "3.12" 44 | pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12" 45 | pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12" 46 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12" 47 | pypinyin==0.44.0 ; python_version >= "3.11" and python_version < "3.12" 48 | pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.11" and python_version < "3.12" 49 | python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12" 50 | pytorch-wpe==0.0.1 ; python_version >= "3.11" and python_version < "3.12" 51 | pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12" 52 | pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12" 53 | regex==2023.8.8 ; python_version >= "3.11" and python_version < "3.12" 54 | requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12" 55 | resampy==0.4.2 ; python_version >= "3.11" and python_version < "3.12" 56 | scikit-learn==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 57 | scipy==1.11.2 ; python_version >= "3.11" and python_version < "3.12" 58 | semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12" 59 | sentencepiece==0.1.99 ; python_version >= "3.11" and python_version < "3.12" 60 | setuptools==68.2.2 ; python_version >= "3.11" and python_version < "3.12" 61 | six==1.16.0 ; python_version >= "3.11" and python_version < "3.12" 62 | sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 63 | soundfile==0.12.1 ; python_version >= "3.11" and python_version < "3.12" 64 | starlette==0.27.0 ; python_version >= "3.11" and python_version < "3.12" 65 | sympy==1.12 ; python_version >= "3.11" and python_version < "3.12" 66 | threadpoolctl==3.2.0 ; python_version >= "3.11" and python_version < "3.12" 67 | torch-complex==0.4.3 ; python_version >= "3.11" and python_version < "3.12" 68 | torch==2.0.1 ; python_version >= "3.11" and python_version < "3.12" 69 | tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12" 70 | typeguard==2.13.3 ; python_version >= "3.11" and python_version < "3.12" 71 | typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12" 72 | unidecode==1.3.6 ; python_version >= "3.11" and python_version < "3.12" 73 | urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12" 74 | uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12" 75 | wcwidth==0.2.6 ; python_version >= "3.11" and python_version < "3.12" 76 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12" 2 | anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12" 3 | asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12" 4 | audioread==3.0.0 ; python_version >= "3.11" and python_version < "3.12" 5 | certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12" 6 | cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12" 7 | charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12" 8 | ci-sdr==0.0.2 ; python_version >= "3.11" and python_version < "3.12" 9 | click==8.1.7 ; python_version >= "3.11" and python_version < "3.12" 10 | colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows" 11 | configargparse==1.7 ; python_version >= "3.11" and python_version < "3.12" 12 | ctc-segmentation==1.7.4 ; python_version >= "3.11" and python_version < "3.12" 13 | cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12" 14 | decorator==5.1.1 ; python_version >= "3.11" and python_version < "3.12" 15 | distance==0.1.3 ; python_version >= "3.11" and python_version < "3.12" 16 | einops==0.6.1 ; python_version >= "3.11" and python_version < "3.12" 17 | espnet @ git+https://github.com/voicevox-bridge/espnet@ff992af2279a03405257a844c48bf83f47805b1b ; python_version >= "3.11" and python_version < "3.12" 18 | espnet-tts-frontend==0.0.3 ; python_version >= "3.11" and python_version < "3.12" 19 | fastapi==0.103.2 ; python_version >= "3.11" and python_version < "3.12" 20 | filelock==3.12.4 ; python_version >= "3.11" and python_version < "3.12" 21 | g2p-en==2.1.0 ; python_version >= "3.11" and python_version < "3.12" 22 | h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12" 23 | h5py==3.9.0 ; python_version >= "3.11" and python_version < "3.12" 24 | humanfriendly==10.0 ; python_version >= "3.11" and python_version < "3.12" 25 | idna==3.4 ; python_version >= "3.11" and python_version < "3.12" 26 | inflect==7.0.0 ; python_version >= "3.11" and python_version < "3.12" 27 | jaconv==0.3.4 ; python_version >= "3.11" and python_version < "3.12" 28 | jamo==0.4.1 ; python_version >= "3.11" and python_version < "3.12" 29 | jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12" 30 | joblib==1.2.0 ; python_version >= "3.11" and python_version < "3.12" 31 | librosa==0.9.2 ; python_version >= "3.11" and python_version < "3.12" 32 | llvmlite==0.40.1 ; python_version >= "3.11" and python_version < "3.12" 33 | markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12" 34 | mpmath==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 35 | networkx==3.1 ; python_version >= "3.11" and python_version < "3.12" 36 | nltk==3.8.1 ; python_version >= "3.11" and python_version < "3.12" 37 | numba==0.57.1 ; python_version >= "3.11" and python_version < "3.12" 38 | numpy==1.24.4 ; python_version >= "3.11" and python_version < "3.12" 39 | packaging==23.1 ; python_version >= "3.11" and python_version < "3.12" 40 | platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12" 41 | pooch==1.7.0 ; python_version >= "3.11" and python_version < "3.12" 42 | pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12" 43 | pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12" 44 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12" 45 | pypinyin==0.44.0 ; python_version >= "3.11" and python_version < "3.12" 46 | pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.11" and python_version < "3.12" 47 | python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12" 48 | pytorch-wpe==0.0.1 ; python_version >= "3.11" and python_version < "3.12" 49 | pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12" 50 | pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12" 51 | regex==2023.8.8 ; python_version >= "3.11" and python_version < "3.12" 52 | requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12" 53 | resampy==0.4.2 ; python_version >= "3.11" and python_version < "3.12" 54 | scikit-learn==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 55 | scipy==1.11.2 ; python_version >= "3.11" and python_version < "3.12" 56 | semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12" 57 | sentencepiece==0.1.99 ; python_version >= "3.11" and python_version < "3.12" 58 | setuptools==68.2.2 ; python_version >= "3.11" and python_version < "3.12" 59 | six==1.16.0 ; python_version >= "3.11" and python_version < "3.12" 60 | sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12" 61 | soundfile==0.12.1 ; python_version >= "3.11" and python_version < "3.12" 62 | starlette==0.27.0 ; python_version >= "3.11" and python_version < "3.12" 63 | sympy==1.12 ; python_version >= "3.11" and python_version < "3.12" 64 | threadpoolctl==3.2.0 ; python_version >= "3.11" and python_version < "3.12" 65 | torch-complex==0.4.3 ; python_version >= "3.11" and python_version < "3.12" 66 | torch==2.0.1 ; python_version >= "3.11" and python_version < "3.12" 67 | tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12" 68 | typeguard==2.13.3 ; python_version >= "3.11" and python_version < "3.12" 69 | typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12" 70 | unidecode==1.3.6 ; python_version >= "3.11" and python_version < "3.12" 71 | urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12" 72 | uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12" 73 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # automatically generated by pysen 3 | # pysen ignores and overwrites any modifications 4 | # e203: black treats : as a binary operator 5 | # e231: black doesn't put a space after , 6 | # e501: black may exceed the line-length to follow other style rules 7 | # w503 or w504: either one needs to be disabled to select w error codes 8 | ignore = E203,E231,E501,W503 9 | max-line-length = 88 10 | select = B,B950,C,E,F,W 11 | 12 | [mypy] 13 | # automatically generated by pysen 14 | # pysen ignores and overwrites any modifications 15 | check_untyped_defs = True 16 | disallow_any_decorated = False 17 | disallow_any_generics = False 18 | disallow_any_unimported = False 19 | disallow_incomplete_defs = True 20 | disallow_subclassing_any = True 21 | disallow_untyped_calls = False 22 | disallow_untyped_decorators = False 23 | disallow_untyped_defs = False 24 | ignore_errors = False 25 | ignore_missing_imports = True 26 | no_implicit_optional = True 27 | python_version = 3.10 28 | show_error_codes = True 29 | strict_equality = True 30 | strict_optional = True 31 | warn_redundant_casts = True 32 | warn_return_any = False 33 | warn_unreachable = True 34 | warn_unused_configs = True 35 | warn_unused_ignores = False 36 | 37 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "NOTHING" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/policy.md: -------------------------------------------------------------------------------- 1 | dummy3 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav -------------------------------------------------------------------------------- /speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "SELF_ONLY" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/policy.md: -------------------------------------------------------------------------------- 1 | dummy2 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav -------------------------------------------------------------------------------- /speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/policy.md: -------------------------------------------------------------------------------- 1 | dummy1 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav -------------------------------------------------------------------------------- /speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json: -------------------------------------------------------------------------------- 1 | { 2 | "supported_features": { "permitted_synthesis_morphing": "ALL" } 3 | } 4 | -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/policy.md: -------------------------------------------------------------------------------- 1 | dummy4 policy 2 | 3 | https://voicevox.hiroshiba.jp/ 4 | -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav -------------------------------------------------------------------------------- /speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/test/__init__.py -------------------------------------------------------------------------------- /test/e2e/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from fastapi.testclient import TestClient 5 | from run import generate_app 6 | 7 | from voicevox_engine.bridge_config import BridgeConfigLoader 8 | from voicevox_engine.preset import PresetManager 9 | from voicevox_engine.setting import SettingLoader 10 | from voicevox_engine.synthesis_engine import make_synthesis_engines 11 | from voicevox_engine.utility.core_version_utility import get_latest_core_version 12 | 13 | 14 | @pytest.fixture(scope="session") 15 | def client(): 16 | bridge_config_loader = BridgeConfigLoader(Path("./invalid")) 17 | synthesis_engines = make_synthesis_engines( 18 | use_gpu=False, bridge_config_loader=bridge_config_loader 19 | ) 20 | latest_core_version = get_latest_core_version(versions=synthesis_engines.keys()) 21 | setting_loader = SettingLoader(Path("./default_setting.yml")) 22 | preset_manager = PresetManager( # FIXME: impl MockPresetManager 23 | preset_path=Path("./presets.yaml"), 24 | ) 25 | 26 | return TestClient( 27 | generate_app( 28 | synthesis_engines=synthesis_engines, 29 | latest_core_version=latest_core_version, 30 | setting_loader=setting_loader, 31 | preset_manager=preset_manager, 32 | ) 33 | ) 34 | -------------------------------------------------------------------------------- /test/e2e/test_validate_version.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | # from voicevox_engine import __version__ 4 | 5 | 6 | def test_fetch_version_success(client: TestClient): 7 | response = client.get("/version") 8 | assert response.status_code == 200 9 | # Bridge PluginはVersion取得が特殊なため、一旦コメントアウト 10 | # assert response.json() == __version__ 11 | -------------------------------------------------------------------------------- /test/presets-test-1.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 2 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-2.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: not_int 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 2 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-3.yaml: -------------------------------------------------------------------------------- 1 | - id: 1 2 | name: test 3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 4 | style_id: 0 5 | speedScale: 1 6 | pitchScale: 0 7 | intonationScale: 1 8 | volumeScale: 1 9 | prePhonemeLength: 0.1 10 | postPhonemeLength: 0.1 11 | 12 | - id: 1 13 | name: test2 14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff 15 | style_id: 2 16 | speedScale: 1.5 17 | pitchScale: 0 18 | intonationScale: 1 19 | volumeScale: 0.7 20 | prePhonemeLength: 0.5 21 | postPhonemeLength: 0.5 22 | -------------------------------------------------------------------------------- /test/presets-test-4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/test/presets-test-4.yaml -------------------------------------------------------------------------------- /test/setting-test-load-1.yaml: -------------------------------------------------------------------------------- 1 | allow_origin: null 2 | cors_policy_mode: localapps 3 | -------------------------------------------------------------------------------- /test/setting-test-load-2.yaml: -------------------------------------------------------------------------------- 1 | allow_origin: null 2 | cors_policy_mode: all 3 | -------------------------------------------------------------------------------- /test/setting-test-load-3.yaml: -------------------------------------------------------------------------------- 1 | allow_origin: "192.168.254.255 192.168.255.255" 2 | cors_policy_mode: localapps 3 | -------------------------------------------------------------------------------- /test/test_acoustic_feature_extractor.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.acoustic_feature_extractor import OjtPhoneme 4 | 5 | 6 | class TestOjtPhoneme(TestCase): 7 | def setUp(self): 8 | super().setUp() 9 | str_hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil" 10 | self.ojt_hello_hiho = OjtPhoneme.convert( 11 | [OjtPhoneme(s, i, i + 1) for i, s in enumerate(str_hello_hiho.split())] 12 | ) 13 | 14 | def test_repr_(self): 15 | self.assertEqual( 16 | self.ojt_hello_hiho[1].__repr__(), "Phoneme(phoneme='k', start=1, end=2)" 17 | ) 18 | self.assertEqual( 19 | self.ojt_hello_hiho[10].__repr__(), 20 | "Phoneme(phoneme='pau', start=10, end=11)", 21 | ) 22 | 23 | def test_phoneme_list(self): 24 | self.assertEqual(OjtPhoneme.phoneme_list[1], "A") 25 | self.assertEqual(OjtPhoneme.phoneme_list[14], "e") 26 | self.assertEqual(OjtPhoneme.phoneme_list[26], "m") 27 | self.assertEqual(OjtPhoneme.phoneme_list[38], "ts") 28 | self.assertEqual(OjtPhoneme.phoneme_list[41], "v") 29 | 30 | def test_const(self): 31 | self.assertEqual(OjtPhoneme.num_phoneme, 45) 32 | self.assertEqual(OjtPhoneme.space_phoneme, "pau") 33 | 34 | def test_convert(self): 35 | ojt_str_hello_hiho = " ".join([p.phoneme for p in self.ojt_hello_hiho]) 36 | self.assertEqual( 37 | ojt_str_hello_hiho, "pau k o N n i ch i w a pau h i h o d e s U pau" 38 | ) 39 | 40 | def test_equal(self): 41 | # ojt_hello_hihoの10番目の"a"と比較 42 | true_ojt_phoneme = OjtPhoneme("a", 9, 10) 43 | 44 | false_ojt_phoneme_1 = OjtPhoneme("k", 9, 10) 45 | false_ojt_phoneme_2 = OjtPhoneme("a", 10, 11) 46 | self.assertTrue(self.ojt_hello_hiho[9] == true_ojt_phoneme) 47 | self.assertFalse(self.ojt_hello_hiho[9] == false_ojt_phoneme_1) 48 | self.assertFalse(self.ojt_hello_hiho[9] == false_ojt_phoneme_2) 49 | 50 | def test_phoneme_id(self): 51 | ojt_str_hello_hiho = " ".join([str(p.phoneme_id) for p in self.ojt_hello_hiho]) 52 | self.assertEqual( 53 | ojt_str_hello_hiho, "0 23 30 4 28 21 10 21 42 7 0 19 21 19 30 12 14 35 6 0" 54 | ) 55 | 56 | def test_onehot(self): 57 | phoneme_id_list = [ 58 | 0, 59 | 23, 60 | 30, 61 | 4, 62 | 28, 63 | 21, 64 | 10, 65 | 21, 66 | 42, 67 | 7, 68 | 0, 69 | 19, 70 | 21, 71 | 19, 72 | 30, 73 | 12, 74 | 14, 75 | 35, 76 | 6, 77 | 0, 78 | ] 79 | for i, phoneme in enumerate(self.ojt_hello_hiho): 80 | for j in range(OjtPhoneme.num_phoneme): 81 | if phoneme_id_list[i] == j: 82 | self.assertEqual(phoneme.onehot[j], True) 83 | else: 84 | self.assertEqual(phoneme.onehot[j], False) 85 | -------------------------------------------------------------------------------- /test/test_connect_base64_waves.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from unittest import TestCase 4 | 5 | import numpy as np 6 | import numpy.testing 7 | import soundfile 8 | from scipy.signal import resample 9 | 10 | from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves 11 | 12 | 13 | def generate_sine_wave_ndarray( 14 | seconds: float, samplerate: int, frequency: float 15 | ) -> np.ndarray: 16 | x = np.linspace(0, seconds, int(seconds * samplerate), endpoint=False) 17 | wave = np.sin(2 * np.pi * frequency * x).astype(np.float32) 18 | 19 | return wave 20 | 21 | 22 | def encode_bytes(wave_ndarray: np.ndarray, samplerate: int) -> bytes: 23 | wave_bio = io.BytesIO() 24 | soundfile.write( 25 | file=wave_bio, 26 | data=wave_ndarray, 27 | samplerate=samplerate, 28 | format="WAV", 29 | subtype="FLOAT", 30 | ) 31 | wave_bio.seek(0) 32 | 33 | return wave_bio.getvalue() 34 | 35 | 36 | def generate_sine_wave_bytes( 37 | seconds: float, samplerate: int, frequency: float 38 | ) -> bytes: 39 | wave_ndarray = generate_sine_wave_ndarray(seconds, samplerate, frequency) 40 | return encode_bytes(wave_ndarray, samplerate) 41 | 42 | 43 | def encode_base64(wave_bytes: bytes) -> str: 44 | return base64.standard_b64encode(wave_bytes).decode("utf-8") 45 | 46 | 47 | def generate_sine_wave_base64(seconds: float, samplerate: int, frequency: float) -> str: 48 | wave_bytes = generate_sine_wave_bytes(seconds, samplerate, frequency) 49 | wave_base64 = encode_base64(wave_bytes) 50 | return wave_base64 51 | 52 | 53 | class TestConnectBase64Waves(TestCase): 54 | def test_connect(self): 55 | samplerate = 1000 56 | wave = generate_sine_wave_ndarray( 57 | seconds=2, samplerate=samplerate, frequency=10 58 | ) 59 | wave_base64 = encode_base64(encode_bytes(wave, samplerate=samplerate)) 60 | 61 | wave_x2_ref = np.concatenate([wave, wave]) 62 | 63 | wave_x2, _ = connect_base64_waves(waves=[wave_base64, wave_base64]) 64 | 65 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 66 | 67 | self.assertTrue((wave_x2_ref == wave_x2).all()) 68 | 69 | def test_no_wave_error(self): 70 | self.assertRaises(ConnectBase64WavesException, connect_base64_waves, waves=[]) 71 | 72 | def test_invalid_base64_error(self): 73 | wave_1000hz = generate_sine_wave_base64( 74 | seconds=2, samplerate=1000, frequency=10 75 | ) 76 | wave_1000hz_broken = wave_1000hz[1:] # remove head 1 char 77 | 78 | self.assertRaises( 79 | ConnectBase64WavesException, 80 | connect_base64_waves, 81 | waves=[ 82 | wave_1000hz_broken, 83 | ], 84 | ) 85 | 86 | def test_invalid_wave_file_error(self): 87 | wave_1000hz = generate_sine_wave_bytes(seconds=2, samplerate=1000, frequency=10) 88 | wave_1000hz_broken_bytes = wave_1000hz[1:] # remove head 1 byte 89 | wave_1000hz_broken = encode_base64(wave_1000hz_broken_bytes) 90 | 91 | self.assertRaises( 92 | ConnectBase64WavesException, 93 | connect_base64_waves, 94 | waves=[ 95 | wave_1000hz_broken, 96 | ], 97 | ) 98 | 99 | def test_different_frequency(self): 100 | wave_24000hz = generate_sine_wave_ndarray( 101 | seconds=1, samplerate=24000, frequency=10 102 | ) 103 | wave_1000hz = generate_sine_wave_ndarray( 104 | seconds=2, samplerate=1000, frequency=10 105 | ) 106 | wave_24000_base64 = encode_base64(encode_bytes(wave_24000hz, samplerate=24000)) 107 | wave_1000_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000)) 108 | 109 | wave_1000hz_to2400hz = resample(wave_1000hz, 24000 * len(wave_1000hz) // 1000) 110 | wave_x2_ref = np.concatenate([wave_24000hz, wave_1000hz_to2400hz]) 111 | 112 | wave_x2, _ = connect_base64_waves(waves=[wave_24000_base64, wave_1000_base64]) 113 | 114 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 115 | numpy.testing.assert_array_almost_equal(wave_x2_ref, wave_x2) 116 | 117 | def test_different_channels(self): 118 | wave_1000hz = generate_sine_wave_ndarray( 119 | seconds=2, samplerate=1000, frequency=10 120 | ) 121 | wave_2ch_1000hz = np.array([wave_1000hz, wave_1000hz]).T 122 | wave_1ch_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000)) 123 | wave_2ch_base64 = encode_base64(encode_bytes(wave_2ch_1000hz, samplerate=1000)) 124 | 125 | wave_x2_ref = np.concatenate([wave_2ch_1000hz, wave_2ch_1000hz]) 126 | 127 | wave_x2, _ = connect_base64_waves(waves=[wave_1ch_base64, wave_2ch_base64]) 128 | 129 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape) 130 | self.assertTrue((wave_x2_ref == wave_x2).all()) 131 | -------------------------------------------------------------------------------- /test/test_core_version_utility.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.utility import get_latest_core_version, parse_core_version 4 | 5 | 6 | class TestCoreVersion(TestCase): 7 | def test_parse_core_version(self): 8 | parse_core_version("0.0.0") 9 | parse_core_version("0.1.0") 10 | parse_core_version("0.10.0") 11 | parse_core_version("0.10.0-preview.1") 12 | parse_core_version("0.14.0") 13 | parse_core_version("0.14.0-preview.1") 14 | parse_core_version("0.14.0-preview.10") 15 | 16 | def test_get_latest_core_version(self): 17 | self.assertEqual( 18 | get_latest_core_version( 19 | versions=[ 20 | "0.0.0", 21 | "0.1.0", 22 | "0.10.0", 23 | "0.10.0-preview.1", 24 | "0.14.0", 25 | "0.14.0-preview.1", 26 | "0.14.0-preview.10", 27 | ] 28 | ), 29 | "0.14.0", 30 | ) 31 | 32 | self.assertEqual( 33 | get_latest_core_version( 34 | versions=[ 35 | "0.14.0", 36 | "0.15.0-preview.1", 37 | ] 38 | ), 39 | "0.15.0-preview.1", 40 | ) 41 | -------------------------------------------------------------------------------- /test/test_mock_synthesis_engine.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine 4 | from voicevox_engine.kana_parser import create_kana 5 | from voicevox_engine.model import AccentPhrase, AudioQuery, Mora 6 | 7 | 8 | class TestMockSynthesisEngine(TestCase): 9 | def setUp(self): 10 | super().setUp() 11 | 12 | self.accent_phrases_hello_hiho = [ 13 | AccentPhrase( 14 | moras=[ 15 | Mora( 16 | text="コ", 17 | consonant="k", 18 | consonant_length=0.0, 19 | vowel="o", 20 | vowel_length=0.0, 21 | pitch=0.0, 22 | ), 23 | Mora( 24 | text="ン", 25 | consonant=None, 26 | consonant_length=None, 27 | vowel="N", 28 | vowel_length=0.0, 29 | pitch=0.0, 30 | ), 31 | Mora( 32 | text="ニ", 33 | consonant="n", 34 | consonant_length=0.0, 35 | vowel="i", 36 | vowel_length=0.0, 37 | pitch=0.0, 38 | ), 39 | Mora( 40 | text="チ", 41 | consonant="ch", 42 | consonant_length=0.0, 43 | vowel="i", 44 | vowel_length=0.0, 45 | pitch=0.0, 46 | ), 47 | Mora( 48 | text="ワ", 49 | consonant="w", 50 | consonant_length=0.0, 51 | vowel="a", 52 | vowel_length=0.0, 53 | pitch=0.0, 54 | ), 55 | ], 56 | accent=5, 57 | pause_mora=Mora( 58 | text="、", 59 | consonant=None, 60 | consonant_length=None, 61 | vowel="pau", 62 | vowel_length=0.0, 63 | pitch=0.0, 64 | ), 65 | ), 66 | AccentPhrase( 67 | moras=[ 68 | Mora( 69 | text="ヒ", 70 | consonant="h", 71 | consonant_length=0.0, 72 | vowel="i", 73 | vowel_length=0.0, 74 | pitch=0.0, 75 | ), 76 | Mora( 77 | text="ホ", 78 | consonant="h", 79 | consonant_length=0.0, 80 | vowel="o", 81 | vowel_length=0.0, 82 | pitch=0.0, 83 | ), 84 | Mora( 85 | text="デ", 86 | consonant="d", 87 | consonant_length=0.0, 88 | vowel="e", 89 | vowel_length=0.0, 90 | pitch=0.0, 91 | ), 92 | Mora( 93 | text="ス", 94 | consonant="s", 95 | consonant_length=0.0, 96 | vowel="U", 97 | vowel_length=0.0, 98 | pitch=0.0, 99 | ), 100 | ], 101 | accent=1, 102 | pause_mora=None, 103 | ), 104 | ] 105 | self.engine = MockSynthesisEngine(speakers="", supported_devices="") 106 | 107 | def test_replace_phoneme_length(self): 108 | self.assertEqual( 109 | self.engine.replace_phoneme_length( 110 | accent_phrases=self.accent_phrases_hello_hiho, 111 | style_id=0, 112 | ), 113 | self.accent_phrases_hello_hiho, 114 | ) 115 | 116 | def test_replace_mora_pitch(self): 117 | self.assertEqual( 118 | self.engine.replace_mora_pitch( 119 | accent_phrases=self.accent_phrases_hello_hiho, 120 | style_id=0, 121 | ), 122 | self.accent_phrases_hello_hiho, 123 | ) 124 | 125 | def test_synthesis(self): 126 | self.engine.synthesis( 127 | AudioQuery( 128 | accent_phrases=self.accent_phrases_hello_hiho, 129 | speedScale=1, 130 | pitchScale=0, 131 | intonationScale=1, 132 | volumeScale=1, 133 | prePhonemeLength=0.1, 134 | postPhonemeLength=0.1, 135 | outputSamplingRate=24000, 136 | outputStereo=False, 137 | kana=create_kana(self.accent_phrases_hello_hiho), 138 | ), 139 | style_id=0, 140 | ) 141 | -------------------------------------------------------------------------------- /test/test_mora_list.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.mora_list import openjtalk_mora2text 4 | 5 | 6 | class TestOpenJTalkMoraList(TestCase): 7 | def test_mora2text(self): 8 | self.assertEqual("ッ", openjtalk_mora2text["cl"]) 9 | self.assertEqual("ティ", openjtalk_mora2text["ti"]) 10 | self.assertEqual("トゥ", openjtalk_mora2text["tu"]) 11 | self.assertEqual("ディ", openjtalk_mora2text["di"]) 12 | # GitHub issue #60 13 | self.assertEqual("ギェ", openjtalk_mora2text["gye"]) 14 | self.assertEqual("イェ", openjtalk_mora2text["ye"]) 15 | 16 | def test_mora2text_injective(self): 17 | """異なるモーラが同じ読みがなに対応しないか確認する""" 18 | values = list(openjtalk_mora2text.values()) 19 | uniq_values = list(set(values)) 20 | self.assertCountEqual(values, uniq_values) 21 | -------------------------------------------------------------------------------- /test/test_mora_to_text.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | # TODO: import from voicevox_engine.synthesis_engine.mora 4 | from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text 5 | 6 | 7 | class TestMoraToText(TestCase): 8 | def test_voice(self): 9 | self.assertEqual(mora_to_text("a"), "ア") 10 | self.assertEqual(mora_to_text("i"), "イ") 11 | self.assertEqual(mora_to_text("ka"), "カ") 12 | self.assertEqual(mora_to_text("N"), "ン") 13 | self.assertEqual(mora_to_text("cl"), "ッ") 14 | self.assertEqual(mora_to_text("gye"), "ギェ") 15 | self.assertEqual(mora_to_text("ye"), "イェ") 16 | self.assertEqual(mora_to_text("wo"), "ウォ") 17 | 18 | def test_unvoice(self): 19 | self.assertEqual(mora_to_text("A"), "ア") 20 | self.assertEqual(mora_to_text("I"), "イ") 21 | self.assertEqual(mora_to_text("kA"), "カ") 22 | self.assertEqual(mora_to_text("gyE"), "ギェ") 23 | self.assertEqual(mora_to_text("yE"), "イェ") 24 | self.assertEqual(mora_to_text("wO"), "ウォ") 25 | 26 | def test_invalid_mora(self): 27 | """変なモーラが来ても例外を投げない""" 28 | self.assertEqual(mora_to_text("x"), "x") 29 | self.assertEqual(mora_to_text(""), "") 30 | -------------------------------------------------------------------------------- /test/test_setting.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tempfile import TemporaryDirectory 3 | from unittest import TestCase 4 | 5 | from voicevox_engine.setting import CorsPolicyMode, Setting, SettingLoader 6 | 7 | 8 | class TestSettingLoader(TestCase): 9 | def setUp(self): 10 | self.tmp_dir = TemporaryDirectory() 11 | self.tmp_dir_path = Path(self.tmp_dir.name) 12 | 13 | def test_loading_1(self): 14 | setting_loader = SettingLoader(Path("not_exist.yaml")) 15 | settings = setting_loader.load_setting_file() 16 | 17 | self.assertEqual( 18 | settings.dict(), 19 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps}, 20 | ) 21 | 22 | def test_loading_2(self): 23 | setting_loader = SettingLoader( 24 | setting_file_path=Path("test/setting-test-load-1.yaml") 25 | ) 26 | settings = setting_loader.load_setting_file() 27 | 28 | self.assertEqual( 29 | settings.dict(), 30 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps}, 31 | ) 32 | 33 | def test_loading_3(self): 34 | setting_loader = SettingLoader( 35 | setting_file_path=Path("test/setting-test-load-2.yaml") 36 | ) 37 | settings = setting_loader.load_setting_file() 38 | 39 | self.assertEqual( 40 | settings.dict(), 41 | {"allow_origin": None, "cors_policy_mode": "all"}, 42 | ) 43 | 44 | def test_loading_4(self): 45 | setting_loader = SettingLoader( 46 | setting_file_path=Path("test/setting-test-load-3.yaml") 47 | ) 48 | settings = setting_loader.load_setting_file() 49 | 50 | self.assertEqual( 51 | settings.dict(), 52 | { 53 | "allow_origin": "192.168.254.255 192.168.255.255", 54 | "cors_policy_mode": CorsPolicyMode.localapps, 55 | }, 56 | ) 57 | 58 | def test_dump(self): 59 | setting_loader = SettingLoader( 60 | setting_file_path=Path(self.tmp_dir_path / "setting-test-dump.yaml") 61 | ) 62 | settings = Setting(cors_policy_mode=CorsPolicyMode.localapps) 63 | setting_loader.dump_setting_file(settings) 64 | 65 | self.assertTrue(setting_loader.setting_file_path.is_file()) 66 | self.assertEqual( 67 | setting_loader.load_setting_file().dict(), 68 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps}, 69 | ) 70 | 71 | def tearDown(self): 72 | self.tmp_dir.cleanup() 73 | -------------------------------------------------------------------------------- /test/test_user_dict_model.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from unittest import TestCase 3 | 4 | from pydantic import ValidationError 5 | 6 | from voicevox_engine.kana_parser import parse_kana 7 | from voicevox_engine.model import UserDictWord 8 | 9 | 10 | class TestUserDictWords(TestCase): 11 | def setUp(self): 12 | self.test_model = { 13 | "surface": "テスト", 14 | "priority": 0, 15 | "part_of_speech": "名詞", 16 | "part_of_speech_detail_1": "固有名詞", 17 | "part_of_speech_detail_2": "一般", 18 | "part_of_speech_detail_3": "*", 19 | "inflectional_type": "*", 20 | "inflectional_form": "*", 21 | "stem": "*", 22 | "yomi": "テスト", 23 | "pronunciation": "テスト", 24 | "accent_type": 0, 25 | "accent_associative_rule": "*", 26 | } 27 | 28 | def test_valid_word(self): 29 | test_value = deepcopy(self.test_model) 30 | try: 31 | UserDictWord(**test_value) 32 | except ValidationError as e: 33 | self.fail(f"Unexpected Validation Error\n{str(e)}") 34 | 35 | def test_convert_to_zenkaku(self): 36 | test_value = deepcopy(self.test_model) 37 | test_value["surface"] = "test" 38 | self.assertEqual(UserDictWord(**test_value).surface, "test") 39 | 40 | def test_count_mora(self): 41 | test_value = deepcopy(self.test_model) 42 | self.assertEqual(UserDictWord(**test_value).mora_count, 3) 43 | 44 | def test_count_mora_x(self): 45 | test_value = deepcopy(self.test_model) 46 | for s in [chr(i) for i in range(12449, 12533)]: 47 | if s in ["ァ", "ィ", "ゥ", "ェ", "ォ", "ッ", "ャ", "ュ", "ョ", "ヮ"]: 48 | continue 49 | for x in "ァィゥェォャュョ": 50 | expected_count = 0 51 | test_value["pronunciation"] = s + x 52 | for accent_phrase in parse_kana( 53 | test_value["pronunciation"] + "'", 54 | ): 55 | expected_count += len(accent_phrase.moras) 56 | with self.subTest(s=s, x=x): 57 | self.assertEqual( 58 | UserDictWord(**test_value).mora_count, 59 | expected_count, 60 | ) 61 | 62 | def test_count_mora_xwa(self): 63 | test_value = deepcopy(self.test_model) 64 | test_value["pronunciation"] = "クヮンセイ" 65 | expected_count = 0 66 | for accent_phrase in parse_kana( 67 | test_value["pronunciation"] + "'", 68 | ): 69 | expected_count += len(accent_phrase.moras) 70 | self.assertEqual( 71 | UserDictWord(**test_value).mora_count, 72 | expected_count, 73 | ) 74 | 75 | def test_invalid_pronunciation_not_katakana(self): 76 | test_value = deepcopy(self.test_model) 77 | test_value["pronunciation"] = "ぼいぼ" 78 | with self.assertRaises(ValidationError): 79 | UserDictWord(**test_value) 80 | 81 | def test_invalid_pronunciation_invalid_sutegana(self): 82 | test_value = deepcopy(self.test_model) 83 | test_value["pronunciation"] = "アィウェォ" 84 | with self.assertRaises(ValidationError): 85 | UserDictWord(**test_value) 86 | 87 | def test_invalid_pronunciation_invalid_xwa(self): 88 | test_value = deepcopy(self.test_model) 89 | test_value["pronunciation"] = "アヮ" 90 | with self.assertRaises(ValidationError): 91 | UserDictWord(**test_value) 92 | 93 | def test_count_mora_voiced_sound(self): 94 | test_value = deepcopy(self.test_model) 95 | test_value["pronunciation"] = "ボイボ" 96 | self.assertEqual(UserDictWord(**test_value).mora_count, 3) 97 | 98 | def test_invalid_accent_type(self): 99 | test_value = deepcopy(self.test_model) 100 | test_value["accent_type"] = 4 101 | with self.assertRaises(ValidationError): 102 | UserDictWord(**test_value) 103 | 104 | def test_invalid_accent_type_2(self): 105 | test_value = deepcopy(self.test_model) 106 | test_value["accent_type"] = -1 107 | with self.assertRaises(ValidationError): 108 | UserDictWord(**test_value) 109 | -------------------------------------------------------------------------------- /test/test_word_types.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from voicevox_engine.model import WordTypes 4 | from voicevox_engine.part_of_speech_data import part_of_speech_data 5 | 6 | 7 | class TestWordTypes(TestCase): 8 | def test_word_types(self): 9 | self.assertCountEqual(list(WordTypes), list(part_of_speech_data.keys())) 10 | -------------------------------------------------------------------------------- /test/vvlib_manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": "0.15.0", 3 | "name": "Test vvlib", 4 | "version": "0.0.1", 5 | "uuid": "2bb8bccf-1c3f-4bc9-959a-f388e37af3ad", 6 | "engine_name": "Test Engine", 7 | "brand_name": "Test", 8 | "engine_uuid": "c7b58856-bd56-4aa1-afb7-b8415f824b06" 9 | } -------------------------------------------------------------------------------- /ui_template/ui.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | VOICEVOX Engine 設定 6 | 10 | 11 | 17 | 22 | 23 | 24 | 25 |
26 |
27 | 30 | 31 |
32 | 33 | 44 |
45 |

46 | allまたはlocalappsを指定。allはすべてを許可します。 47 |

48 |

49 | localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。 50 |

51 |

52 | その他のオリジンはallow_originオプションで追加できます。デフォルトはlocalapps。 53 |

54 |
55 |
56 | 57 |
58 | 59 | 65 |
66 | 許可するオリジンを指定します。複数指定する場合は、直後にスペースで区切って追加できます。 67 |
68 |
69 | 70 | 108 | 109 | 117 |
118 |
119 | 120 | 121 | -------------------------------------------------------------------------------- /voicevox_engine/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "latest" 2 | -------------------------------------------------------------------------------- /voicevox_engine/acoustic_feature_extractor.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy 4 | 5 | 6 | class OjtPhoneme: 7 | """ 8 | OpenJTalkに含まれる音素群クラス 9 | 10 | Attributes 11 | ---------- 12 | phoneme_list : Sequence[str] 13 | 音素のリスト 14 | num_phoneme : int 15 | 音素リストの要素数 16 | space_phoneme : str 17 | 読点に値する音素 18 | """ 19 | 20 | phoneme_list = ( 21 | "pau", 22 | "A", 23 | "E", 24 | "I", 25 | "N", 26 | "O", 27 | "U", 28 | "a", 29 | "b", 30 | "by", 31 | "ch", 32 | "cl", 33 | "d", 34 | "dy", 35 | "e", 36 | "f", 37 | "g", 38 | "gw", 39 | "gy", 40 | "h", 41 | "hy", 42 | "i", 43 | "j", 44 | "k", 45 | "kw", 46 | "ky", 47 | "m", 48 | "my", 49 | "n", 50 | "ny", 51 | "o", 52 | "p", 53 | "py", 54 | "r", 55 | "ry", 56 | "s", 57 | "sh", 58 | "t", 59 | "ts", 60 | "ty", 61 | "u", 62 | "v", 63 | "w", 64 | "y", 65 | "z", 66 | ) 67 | num_phoneme = len(phoneme_list) 68 | space_phoneme = "pau" 69 | 70 | def __init__( 71 | self, 72 | phoneme: str, 73 | start: float, 74 | end: float, 75 | ): 76 | self.phoneme = phoneme 77 | self.start = numpy.round(start, decimals=2) 78 | self.end = numpy.round(end, decimals=2) 79 | 80 | def __repr__(self): 81 | return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})" 82 | 83 | def __eq__(self, o: object): 84 | return isinstance(o, OjtPhoneme) and ( 85 | self.phoneme == o.phoneme and self.start == o.start and self.end == o.end 86 | ) 87 | 88 | @property 89 | def phoneme_id(self): 90 | """ 91 | phoneme_id (phoneme list内でのindex)を取得する 92 | Returns 93 | ------- 94 | id : int 95 | phoneme_idを返す 96 | """ 97 | return self.phoneme_list.index(self.phoneme) 98 | 99 | @property 100 | def onehot(self): 101 | """ 102 | phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す 103 | Returns 104 | ------- 105 | onehot : numpu.ndarray 106 | 関数内で変更された配列を返す 107 | """ 108 | array = numpy.zeros(self.num_phoneme, dtype=bool) 109 | array[self.phoneme_id] = True 110 | return array 111 | 112 | @classmethod 113 | def convert(cls, phonemes: List["OjtPhoneme"]) -> List["OjtPhoneme"]: 114 | """ 115 | 最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する 116 | Parameters 117 | ---------- 118 | phonemes : List[OjtPhoneme] 119 | 変換したいphonemeのリスト 120 | 121 | Returns 122 | ------- 123 | phonemes : List[OjtPhoneme] 124 | 変換されたphonemeのリスト 125 | """ 126 | if "sil" in phonemes[0].phoneme: 127 | phonemes[0].phoneme = cls.space_phoneme 128 | if "sil" in phonemes[-1].phoneme: 129 | phonemes[-1].phoneme = cls.space_phoneme 130 | return phonemes 131 | -------------------------------------------------------------------------------- /voicevox_engine/bridge_config/BridgeConfig.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, Dict, Iterable, List, Literal, Optional, Union 3 | 4 | import numpy as np 5 | import torch 6 | from espnet2.bin.tts_inference import Text2Speech 7 | from espnet2.text.token_id_converter import TokenIDConverter 8 | from pydantic import BaseModel, Extra, Field 9 | 10 | from ..metas.Metas import Speaker, SpeakerStyle 11 | 12 | 13 | class TTSInferenceInitArgs(BaseModel): 14 | """ 15 | espnet2.bin.tts_inference.Text2Speechの初期化時に渡すパラメータ 16 | """ 17 | 18 | train_config: Union[Path, str] = None 19 | model_file: Union[Path, str] = None 20 | threshold: float = 0.5 21 | minlenratio: float = 0.0 22 | maxlenratio: float = 10.0 23 | use_teacher_forcing: bool = False 24 | use_att_constraint: bool = False 25 | backward_window: int = 1 26 | forward_window: int = 3 27 | speed_control_alpha: float = 1.0 28 | noise_scale: float = 0.667 29 | noise_scale_dur: float = 0.8 30 | vocoder_config: Union[Path, str] = None 31 | vocoder_file: Union[Path, str] = None 32 | dtype: str = "float32" 33 | device: str = "cpu" # use_gpu引数で上書きされる 34 | seed: int = 777 35 | always_fix_seed: bool = False 36 | 37 | 38 | class TTSInferenceCallArgs(BaseModel): 39 | """ 40 | espnet2.bin.tts_inference.Text2Speechの呼び出し時に渡すパラメータ 41 | """ 42 | 43 | class Config: 44 | arbitrary_types_allowed = True 45 | 46 | speech: Optional[Union[torch.Tensor, np.ndarray]] = None 47 | durations: Optional[Union[torch.Tensor, np.ndarray]] = None 48 | spembs: Optional[Union[torch.Tensor, np.ndarray]] = None 49 | sids: Optional[Union[torch.Tensor, np.ndarray]] = None 50 | lids: Optional[Union[torch.Tensor, np.ndarray]] = None 51 | decode_conf: Optional[Dict[str, Any]] = None 52 | 53 | 54 | class TokenIDConverterInitArgs(BaseModel): 55 | """ 56 | espnet2.text.token_id_converter.TokenIDConverterの呼び出し時に渡すパラメータ 57 | """ 58 | 59 | token_list: Union[Path, str, Iterable[str]] 60 | unk_symbol: str = "" 61 | 62 | 63 | class StyleConfig(SpeakerStyle): 64 | """ 65 | スタイルの設定のフォーマット 66 | """ 67 | 68 | class Config: 69 | arbitrary_types_allowed = True 70 | extra = Extra.ignore 71 | 72 | g2p: Literal["pyopenjtalk_accent_with_pause", "pyopenjtalk_prosody"] = Field( 73 | title="g2pの設定" 74 | ) 75 | tts_inference_init_args: TTSInferenceInitArgs = Field( 76 | title="Text2Speechクラス初期化時の引数", default=TTSInferenceInitArgs() 77 | ) 78 | tts_inference_call_args: TTSInferenceCallArgs = Field( 79 | title="Text2Speechクラス呼び出し時の引数", default=TTSInferenceCallArgs() 80 | ) 81 | token_id_converter_init_args: TokenIDConverterInitArgs = Field( 82 | title="TokenIDConverterクラス初期化時の引数", 83 | ) 84 | text2speech: Optional[Text2Speech] = Field( 85 | title="Text2Speechクラスのインスタンス(内部で使用)", default=None 86 | ) 87 | token_id_converter: Optional[TokenIDConverter] = Field( 88 | title="TokenIDConverterクラスのインスタンス(内部で使用)", default=None 89 | ) 90 | 91 | 92 | class SpeakerConfig(Speaker): 93 | """ 94 | スピーカーの設定のフォーマット 95 | """ 96 | 97 | styles: List[StyleConfig] = Field(title="スタイルの設定") 98 | 99 | 100 | class BridgeConfig(BaseModel, extra=Extra.ignore): 101 | """ 102 | エンジンの設定のフォーマット 103 | """ 104 | 105 | host: str = Field(title="エンジンのホスト", default="127.0.0.1") 106 | port: int = Field(title="エンジンのポート番号", default=50021) 107 | speakers: List[SpeakerConfig] = Field(title="スピーカー情報") 108 | engine_version: str = Field(title="エンジンのバージョン") 109 | sampling_rate: int = Field(title="出力サンプリングレート") 110 | -------------------------------------------------------------------------------- /voicevox_engine/bridge_config/BridgeConfigLoader.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | from ..engine_manifest import EngineManifestLoader 4 | from .BridgeConfig import BridgeConfig 5 | 6 | 7 | class BridgeConfigLoader: 8 | def __init__(self, config_file_dir) -> None: 9 | self.config_file_path = config_file_dir / "bridge_config.yaml" 10 | 11 | def load_config_file(self) -> BridgeConfig: 12 | if self.config_file_path.is_file(): 13 | config = yaml.safe_load(self.config_file_path.read_text(encoding="utf-8")) 14 | else: 15 | raise FileNotFoundError 16 | 17 | ( 18 | engine_version, 19 | port, 20 | sampling_rate, 21 | ) = EngineManifestLoader().load_info_for_bridge_config() 22 | 23 | config["port"] = port 24 | config["engine_version"] = engine_version 25 | config["sampling_rate"] = sampling_rate 26 | 27 | setting = BridgeConfig(**config) 28 | 29 | return setting 30 | -------------------------------------------------------------------------------- /voicevox_engine/bridge_config/__init__.py: -------------------------------------------------------------------------------- 1 | from .BridgeConfig import BridgeConfig 2 | from .BridgeConfigLoader import BridgeConfigLoader 3 | 4 | __all__ = [ 5 | "BridgeConfig", 6 | "BridgeConfigLoader", 7 | ] 8 | -------------------------------------------------------------------------------- /voicevox_engine/cancellable_engine.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import queue 3 | import sys 4 | from multiprocessing import Pipe, Process 5 | 6 | if sys.platform == "win32": 7 | from multiprocessing.connection import PipeConnection as ConnectionType 8 | else: 9 | from multiprocessing.connection import Connection as ConnectionType 10 | 11 | from pathlib import Path 12 | from tempfile import NamedTemporaryFile 13 | 14 | import soundfile 15 | 16 | # FIXME: remove FastAPI dependency 17 | from fastapi import HTTPException, Request 18 | 19 | from .bridge_config import BridgeConfigLoader 20 | from .model import AudioQuery 21 | from .synthesis_engine import make_synthesis_engines 22 | from .utility import get_latest_core_version 23 | 24 | 25 | class CancellableEngine: 26 | """ 27 | 音声合成のキャンセル機能に関するクラス 28 | 初期化後は、synthesis関数で音声合成できる 29 | (オリジナルと比べ引数が増えているので注意) 30 | 31 | パラメータ use_gpu, enable_mock は、 make_synthesis_engines を参照 32 | 33 | Attributes 34 | ---------- 35 | watch_con_list: list[tuple[Request, Process]] 36 | Requestは接続の監視に使用され、Processは通信切断時のプロセスキルに使用される 37 | クライアントから接続があるとListにTupleが追加される 38 | 接続が切断、もしくは音声合成が終了すると削除される 39 | procs_and_cons: queue.Queue[tuple[Process, ConnectionType]] 40 | 音声合成の準備が終わっているプロセスのList 41 | (音声合成中のプロセスは入っていない) 42 | """ 43 | 44 | def __init__( 45 | self, 46 | init_processes: int, 47 | use_gpu: bool, 48 | enable_mock: bool, 49 | bridge_config_dir: Path, 50 | ) -> None: 51 | """ 52 | 変数の初期化を行う 53 | また、init_processesの数だけプロセスを起動し、procs_and_consに格納する 54 | """ 55 | 56 | self.use_gpu = use_gpu 57 | self.enable_mock = enable_mock 58 | self.bridge_config_dir = bridge_config_dir 59 | 60 | self.watch_con_list: list[tuple[Request, Process]] = [] 61 | 62 | procs_and_cons: queue.Queue[tuple[Process, ConnectionType]] = queue.Queue() 63 | for _ in range(init_processes): 64 | procs_and_cons.put(self.start_new_proc()) 65 | self.procs_and_cons = procs_and_cons 66 | 67 | def start_new_proc( 68 | self, 69 | ) -> tuple[Process, ConnectionType]: 70 | """ 71 | 新しく開始したプロセスを返す関数 72 | 73 | Returns 74 | ------- 75 | ret_proc: Process 76 | 新規のプロセス 77 | sub_proc_con1: ConnectionType 78 | ret_procのプロセスと通信するためのPipe 79 | """ 80 | sub_proc_con1, sub_proc_con2 = Pipe(True) 81 | ret_proc = Process( 82 | target=start_synthesis_subprocess, 83 | kwargs={ 84 | "use_gpu": self.use_gpu, 85 | "enable_mock": self.enable_mock, 86 | "bridge_config_dir": self.bridge_config_dir, 87 | "sub_proc_con": sub_proc_con2, 88 | }, 89 | daemon=True, 90 | ) 91 | ret_proc.start() 92 | return ret_proc, sub_proc_con1 93 | 94 | def finalize_con( 95 | self, 96 | req: Request, 97 | proc: Process, 98 | sub_proc_con: ConnectionType | None, 99 | ) -> None: 100 | """ 101 | 接続が切断された時の処理を行う関数 102 | watch_con_listからの削除、プロセスの後処理を行う 103 | プロセスが生きている場合はそのままprocs_and_consに加える 104 | 死んでいる場合は新しく生成したものをprocs_and_consに加える 105 | 106 | Parameters 107 | ---------- 108 | req: fastapi.Request 109 | 接続確立時に受け取ったものをそのまま渡せばよい 110 | https://fastapi.tiangolo.com/advanced/using-request-directly/ 111 | proc: Process 112 | 音声合成を行っていたプロセス 113 | sub_proc_con: ConnectionType, optional 114 | 音声合成を行っていたプロセスとのPipe 115 | 指定されていない場合、プロセスは再利用されず終了される 116 | """ 117 | try: 118 | self.watch_con_list.remove((req, proc)) 119 | except ValueError: 120 | pass 121 | try: 122 | if not proc.is_alive() or sub_proc_con is None: 123 | proc.close() 124 | raise ValueError 125 | # プロセスが死んでいない場合は再利用する 126 | self.procs_and_cons.put((proc, sub_proc_con)) 127 | except ValueError: 128 | # プロセスが死んでいるので新しく作り直す 129 | self.procs_and_cons.put(self.start_new_proc()) 130 | 131 | def _synthesis_impl( 132 | self, 133 | query: AudioQuery, 134 | style_id: int, 135 | request: Request, 136 | core_version: str | None, 137 | ) -> str: 138 | """ 139 | 音声合成を行う関数 140 | 通常エンジンの引数に比べ、requestが必要になっている 141 | また、返り値がファイル名になっている 142 | 143 | Parameters 144 | ---------- 145 | query: AudioQuery 146 | style_id: int 147 | request: fastapi.Request 148 | 接続確立時に受け取ったものをそのまま渡せばよい 149 | https://fastapi.tiangolo.com/advanced/using-request-directly/ 150 | core_version: str 151 | 152 | Returns 153 | ------- 154 | f_name: str 155 | 生成された音声ファイルの名前 156 | """ 157 | proc, sub_proc_con1 = self.procs_and_cons.get() 158 | self.watch_con_list.append((request, proc)) 159 | try: 160 | sub_proc_con1.send((query, style_id, core_version)) 161 | f_name = sub_proc_con1.recv() 162 | except EOFError: 163 | raise HTTPException(status_code=422, detail="既にサブプロセスは終了されています") 164 | except Exception: 165 | self.finalize_con(request, proc, sub_proc_con1) 166 | raise 167 | 168 | self.finalize_con(request, proc, sub_proc_con1) 169 | return f_name 170 | 171 | async def catch_disconnection(self): 172 | """ 173 | 接続監視を行うコルーチン 174 | """ 175 | while True: 176 | await asyncio.sleep(1) 177 | for con in self.watch_con_list: 178 | req, proc = con 179 | if await req.is_disconnected(): 180 | try: 181 | if proc.is_alive(): 182 | proc.terminate() 183 | proc.join() 184 | proc.close() 185 | except ValueError: 186 | pass 187 | finally: 188 | self.finalize_con(req, proc, None) 189 | 190 | 191 | def start_synthesis_subprocess( 192 | use_gpu: bool, 193 | enable_mock: bool, 194 | bridge_config_dir: Path, 195 | sub_proc_con: ConnectionType, 196 | ) -> None: 197 | """ 198 | 音声合成を行うサブプロセスで行うための関数 199 | pickle化の関係でグローバルに書いている 200 | 201 | 引数 use_gpu, enable_mock は、 make_synthesis_engines を参照 202 | 203 | Parameters 204 | ---------- 205 | sub_proc_con: ConnectionType 206 | メインプロセスと通信するためのPipe 207 | """ 208 | 209 | synthesis_engines = make_synthesis_engines( 210 | use_gpu=use_gpu, 211 | enable_mock=enable_mock, 212 | bridge_config=BridgeConfigLoader(bridge_config_dir), 213 | ) 214 | assert len(synthesis_engines) != 0, "音声合成エンジンがありません。" 215 | latest_core_version = get_latest_core_version(versions=synthesis_engines.keys()) 216 | while True: 217 | try: 218 | query, style_id, core_version = sub_proc_con.recv() 219 | if core_version is None: 220 | _engine = synthesis_engines[latest_core_version] 221 | elif core_version in synthesis_engines: 222 | _engine = synthesis_engines[core_version] 223 | else: 224 | # バージョンが見つからないエラー 225 | sub_proc_con.send("") 226 | continue 227 | wave = _engine._synthesis_impl(query, style_id) 228 | with NamedTemporaryFile(delete=False) as f: 229 | soundfile.write( 230 | file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV" 231 | ) 232 | sub_proc_con.send(f.name) 233 | except Exception: 234 | sub_proc_con.close() 235 | raise 236 | -------------------------------------------------------------------------------- /voicevox_engine/dev/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .mock import ( 2 | decode_forward, 3 | initialize, 4 | metas, 5 | supported_devices, 6 | yukarin_s_forward, 7 | yukarin_sa_forward, 8 | ) 9 | 10 | __all__ = [ 11 | "decode_forward", 12 | "initialize", 13 | "yukarin_s_forward", 14 | "yukarin_sa_forward", 15 | "metas", 16 | "supported_devices", 17 | ] 18 | -------------------------------------------------------------------------------- /voicevox_engine/dev/core/mock.py: -------------------------------------------------------------------------------- 1 | import json 2 | from logging import getLogger 3 | from typing import Any, Dict, List 4 | 5 | import numpy as np 6 | from pyopenjtalk import tts 7 | from scipy.signal import resample 8 | 9 | DUMMY_TEXT = "これはダミーのテキストです" 10 | 11 | 12 | def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None: 13 | pass 14 | 15 | 16 | def yukarin_s_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 17 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 18 | logger.info( 19 | "Sorry, yukarin_s_forward() is a mock. Return values are incorrect.", 20 | ) 21 | return np.ones(length) / 5 22 | 23 | 24 | def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 25 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 26 | logger.info( 27 | "Sorry, yukarin_sa_forward() is a mock. Return values are incorrect.", 28 | ) 29 | return np.ones((1, length)) * 5 30 | 31 | 32 | def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray: 33 | """ 34 | 合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます(DUMMY_TEXT) 35 | 参照→SynthesisEngine のdocstring [Mock] 36 | 37 | Parameters 38 | ---------- 39 | length : int 40 | フレームの長さ 41 | 42 | Returns 43 | ------- 44 | wave : np.ndarray 45 | 音声合成した波形データ 46 | 47 | Note 48 | ------- 49 | ここで行う音声合成では、調声(ピッチ等)を反映しない 50 | また、入力内容によらず常に固定の文言を読み上げる 51 | 52 | # pyopenjtalk.tts()の出力仕様 53 | dtype=np.float64, 16 bit, mono 48000 Hz 54 | 55 | # resampleの説明 56 | 非モックdecode_forwardと合わせるために、出力を24kHzに変換した。 57 | """ 58 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 59 | logger.info( 60 | "Sorry, decode_forward() is a mock. Return values are incorrect.", 61 | ) 62 | wave, sr = tts(DUMMY_TEXT) 63 | wave = resample( 64 | wave.astype("int16"), 65 | 24000 * len(wave) // 48000, 66 | ) 67 | return wave 68 | 69 | 70 | def metas() -> str: 71 | return json.dumps( 72 | [ 73 | { 74 | "name": "dummy1", 75 | "styles": [ 76 | {"name": "style0", "id": 0}, 77 | {"name": "style1", "id": 2}, 78 | {"name": "style2", "id": 4}, 79 | {"name": "style3", "id": 6}, 80 | ], 81 | "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff", 82 | "version": "mock", 83 | "supported_features": {}, 84 | }, 85 | { 86 | "name": "dummy2", 87 | "styles": [ 88 | {"name": "style0", "id": 1}, 89 | {"name": "style1", "id": 3}, 90 | {"name": "style2", "id": 5}, 91 | {"name": "style3", "id": 7}, 92 | ], 93 | "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9", 94 | "version": "mock", 95 | "supported_features": {}, 96 | }, 97 | { 98 | "name": "dummy3", 99 | "styles": [ 100 | {"name": "style0", "id": 8}, 101 | ], 102 | "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a", 103 | "version": "mock", 104 | "supported_features": {}, 105 | }, 106 | { 107 | "name": "dummy4", 108 | "styles": [ 109 | {"name": "style0", "id": 9}, 110 | ], 111 | "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b", 112 | "version": "mock", 113 | "supported_features": {}, 114 | }, 115 | ] 116 | ) 117 | 118 | 119 | def supported_devices() -> str: 120 | return json.dumps( 121 | { 122 | "cpu": True, 123 | "cuda": False, 124 | } 125 | ) 126 | -------------------------------------------------------------------------------- /voicevox_engine/dev/synthesis_engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .mock import MockSynthesisEngine 2 | 3 | __all__ = ["MockSynthesisEngine"] 4 | -------------------------------------------------------------------------------- /voicevox_engine/dev/synthesis_engine/mock.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from typing import Any, Dict, List, Optional 3 | 4 | import numpy as np 5 | from pyopenjtalk import tts 6 | from scipy.signal import resample 7 | 8 | from ...model import AccentPhrase, AudioQuery 9 | from ...synthesis_engine import SynthesisEngineBase 10 | from ...synthesis_engine.synthesis_engine import to_flatten_moras 11 | 12 | 13 | class MockSynthesisEngine(SynthesisEngineBase): 14 | """ 15 | SynthesisEngine [Mock] 16 | """ 17 | 18 | def __init__( 19 | self, 20 | speakers: str, 21 | supported_devices: Optional[str] = None, 22 | ): 23 | """ 24 | __init__ [Mock] 25 | """ 26 | super().__init__() 27 | 28 | self._speakers = speakers 29 | self._supported_devices = supported_devices 30 | self.default_sampling_rate = 24000 31 | 32 | @property 33 | def speakers(self) -> str: 34 | return self._speakers 35 | 36 | @property 37 | def supported_devices(self) -> Optional[str]: 38 | return self._supported_devices 39 | 40 | def replace_phoneme_length( 41 | self, accent_phrases: List[AccentPhrase], style_id: int 42 | ) -> List[AccentPhrase]: 43 | """ 44 | replace_phoneme_length 入力accent_phrasesを変更せずにそのまま返します [Mock] 45 | 46 | Parameters 47 | ---------- 48 | accent_phrases : List[AccentPhrase] 49 | フレーズ句のリスト 50 | style_id : int 51 | スタイルID 52 | 53 | Returns 54 | ------- 55 | List[AccentPhrase] 56 | フレーズ句のリスト(変更なし) 57 | """ 58 | return accent_phrases 59 | 60 | def replace_mora_pitch( 61 | self, accent_phrases: List[AccentPhrase], style_id: int 62 | ) -> List[AccentPhrase]: 63 | """ 64 | replace_mora_pitch 入力accent_phrasesを変更せずにそのまま返します [Mock] 65 | 66 | Parameters 67 | ---------- 68 | accent_phrases : List[AccentPhrase] 69 | フレーズ句のリスト 70 | style_id : int 71 | スタイルID 72 | 73 | Returns 74 | ------- 75 | List[AccentPhrase] 76 | フレーズ句のリスト(変更なし) 77 | """ 78 | return accent_phrases 79 | 80 | def _synthesis_impl(self, query: AudioQuery, style_id: int) -> np.ndarray: 81 | """ 82 | synthesis voicevox coreを使わずに、音声合成する [Mock] 83 | 84 | Parameters 85 | ---------- 86 | query : AudioQuery 87 | /audio_query APIで得たjson 88 | style_id : int 89 | スタイルID 90 | 91 | Returns 92 | ------- 93 | wave [npt.NDArray[np.int16]] 94 | 音声波形データをNumPy配列で返します 95 | """ 96 | # recall text in katakana 97 | flatten_moras = to_flatten_moras(query.accent_phrases) 98 | kana_text = "".join([mora.text for mora in flatten_moras]) 99 | 100 | wave = self.forward(kana_text) 101 | 102 | # volume 103 | wave *= query.volumeScale 104 | 105 | return wave.astype("int16") 106 | 107 | def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray: 108 | """ 109 | forward tts via pyopenjtalk.tts() 110 | 参照→SynthesisEngine のdocstring [Mock] 111 | 112 | Parameters 113 | ---------- 114 | text : str 115 | 入力文字列(例:読み上げたい文章をカタカナにした文字列、等) 116 | 117 | Returns 118 | ------- 119 | wave [npt.NDArray[np.int16]] 120 | 音声波形データをNumPy配列で返します 121 | 122 | Note 123 | ------- 124 | ここで行う音声合成では、調声(ピッチ等)を反映しない 125 | 126 | # pyopenjtalk.tts()の出力仕様 127 | dtype=np.float64, 16 bit, mono 48000 Hz 128 | 129 | # resampleの説明 130 | 非モック実装(decode_forward)と合わせるために、出力を24kHzに変換した。 131 | """ 132 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため 133 | logger.info("[Mock] input text: %s" % text) 134 | wave, sr = tts(text) 135 | wave = resample(wave, 24000 * len(wave) // 48000) 136 | return wave 137 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/EngineManifest.py: -------------------------------------------------------------------------------- 1 | # マルチエンジン環境下においては、エンジンのバージョンがエディタのバージョンより 2 | # 古くなる可能性が十分に考えられる。その場合、エディタ側がEngineManifestの情報不足によって 3 | # エラーを吐いて表示が崩壊する可能性がある。これを防止するため、EngineManifest関連の定義を 4 | # 変更する際は、Optionalにする必要があることに留意しなければならない。 5 | 6 | from typing import List, Optional 7 | 8 | from pydantic import BaseModel, Field 9 | 10 | 11 | class UpdateInfo(BaseModel): 12 | """ 13 | エンジンのアップデート情報 14 | """ 15 | 16 | version: str = Field(title="エンジンのバージョン名") 17 | descriptions: List[str] = Field(title="アップデートの詳細についての説明") 18 | contributors: Optional[List[str]] = Field(title="貢献者名") 19 | 20 | 21 | class LicenseInfo(BaseModel): 22 | """ 23 | 依存ライブラリのライセンス情報 24 | """ 25 | 26 | name: str = Field(title="依存ライブラリ名") 27 | version: Optional[str] = Field(title="依存ライブラリのバージョン") 28 | license: Optional[str] = Field(title="依存ライブラリのライセンス名") 29 | text: str = Field(title="依存ライブラリのライセンス本文") 30 | 31 | 32 | class SupportedFeatures(BaseModel): 33 | """ 34 | エンジンが持つ機能の一覧 35 | """ 36 | 37 | adjust_mora_pitch: bool = Field(title="モーラごとの音高の調整") 38 | adjust_phoneme_length: bool = Field(title="音素ごとの長さの調整") 39 | adjust_speed_scale: bool = Field(title="全体の話速の調整") 40 | adjust_pitch_scale: bool = Field(title="全体の音高の調整") 41 | adjust_intonation_scale: bool = Field(title="全体の抑揚の調整") 42 | adjust_volume_scale: bool = Field(title="全体の音量の調整") 43 | interrogative_upspeak: bool = Field(title="疑問文の自動調整") 44 | synthesis_morphing: bool = Field(title="2人の話者でモーフィングした音声を合成") 45 | manage_library: Optional[bool] = Field(title="音声ライブラリのインストール・アンインストール") 46 | 47 | 48 | class EngineManifest(BaseModel): 49 | """ 50 | エンジン自体に関する情報 51 | """ 52 | 53 | manifest_version: str = Field(title="マニフェストのバージョン") 54 | name: str = Field(title="エンジン名") 55 | brand_name: str = Field(title="ブランド名") 56 | uuid: str = Field(title="エンジンのUUID") 57 | url: str = Field(title="エンジンのURL") 58 | icon: str = Field(title="エンジンのアイコンをBASE64エンコードしたもの") 59 | default_sampling_rate: int = Field(title="デフォルトのサンプリング周波数") 60 | terms_of_service: str = Field(title="エンジンの利用規約") 61 | update_infos: List[UpdateInfo] = Field(title="エンジンのアップデート情報") 62 | dependency_licenses: List[LicenseInfo] = Field(title="依存関係のライセンス情報") 63 | supported_vvlib_manifest_version: Optional[str] = Field( 64 | title="エンジンが対応するvvlibのバージョン" 65 | ) 66 | supported_features: SupportedFeatures = Field(title="エンジンが持つ機能") 67 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/EngineManifestLoader.py: -------------------------------------------------------------------------------- 1 | import json 2 | from base64 import b64encode 3 | from pathlib import Path 4 | from typing import Tuple 5 | 6 | from ..utility import engine_root 7 | from .EngineManifest import EngineManifest, LicenseInfo, UpdateInfo 8 | 9 | 10 | class EngineManifestLoader: 11 | def __init__( 12 | self, 13 | manifest_path: Path = engine_root() / "engine_manifest.json", # noqa: B008 14 | root_dir: Path = engine_root(), # noqa: B008 15 | ): 16 | self.manifest_path = manifest_path 17 | self.root_dir = root_dir 18 | 19 | def load_manifest(self) -> EngineManifest: 20 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8")) 21 | 22 | manifest = EngineManifest( 23 | manifest_version=manifest["manifest_version"], 24 | name=manifest["name"], 25 | brand_name=manifest["brand_name"], 26 | uuid=manifest["uuid"], 27 | url=manifest["url"], 28 | default_sampling_rate=manifest["default_sampling_rate"], 29 | icon=b64encode((self.root_dir / manifest["icon"]).read_bytes()).decode( 30 | "utf-8" 31 | ), 32 | terms_of_service=(self.root_dir / manifest["terms_of_service"]).read_text( 33 | "utf-8" 34 | ), 35 | update_infos=[ 36 | UpdateInfo(**update_info) 37 | for update_info in json.loads( 38 | (self.root_dir / manifest["update_infos"]).read_text("utf-8") 39 | ) 40 | ], 41 | # supported_vvlib_manifest_versionを持たないengine_manifestのために 42 | # キーが存在しない場合はNoneを返すgetを使う 43 | supported_vvlib_manifest_version=manifest.get( 44 | "supported_vvlib_manifest_version" 45 | ), 46 | dependency_licenses=[ 47 | LicenseInfo(**license_info) 48 | for license_info in json.loads( 49 | (self.root_dir / manifest["dependency_licenses"]).read_text("utf-8") 50 | ) 51 | ], 52 | supported_features={ 53 | key: item["value"] 54 | for key, item in manifest["supported_features"].items() 55 | }, 56 | ) 57 | return manifest 58 | 59 | def load_info_for_bridge_config(self) -> Tuple[str, int, int]: 60 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8")) 61 | return manifest["version"], manifest["port"], manifest["default_sampling_rate"] 62 | 63 | def load_version(self) -> str: 64 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8")) 65 | return manifest["version"] 66 | -------------------------------------------------------------------------------- /voicevox_engine/engine_manifest/__init__.py: -------------------------------------------------------------------------------- 1 | from .EngineManifest import EngineManifest 2 | from .EngineManifestLoader import EngineManifestLoader 3 | 4 | __all__ = [ 5 | "EngineManifest", 6 | "EngineManifestLoader", 7 | ] 8 | -------------------------------------------------------------------------------- /voicevox_engine/kana_parser.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode 4 | from .mora_list import openjtalk_text2mora 5 | 6 | LOOP_LIMIT = 300 7 | UNVOICE_SYMBOL = "_" 8 | ACCENT_SYMBOL = "'" 9 | NOPAUSE_DELIMITER = "/" 10 | PAUSE_DELIMITER = "、" 11 | WIDE_INTERROGATION_MARK = "?" 12 | 13 | text2mora_with_unvoice = {} 14 | for text, (consonant, vowel) in openjtalk_text2mora.items(): 15 | text2mora_with_unvoice[text] = Mora( 16 | text=text, 17 | consonant=consonant if len(consonant) > 0 else None, 18 | consonant_length=0 if len(consonant) > 0 else None, 19 | vowel=vowel, 20 | vowel_length=0, 21 | pitch=0, 22 | is_interrogative=False, 23 | ) 24 | if vowel in ["a", "i", "u", "e", "o"]: 25 | text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora( 26 | text=text, 27 | consonant=consonant if len(consonant) > 0 else None, 28 | consonant_length=0 if len(consonant) > 0 else None, 29 | vowel=vowel.upper(), 30 | vowel_length=0, 31 | pitch=0, 32 | is_interrogative=False, 33 | ) 34 | 35 | 36 | def _text_to_accent_phrase(phrase: str) -> AccentPhrase: 37 | """ 38 | longest matchにより読み仮名からAccentPhraseを生成 39 | 入力長Nに対し計算量O(N^2) 40 | """ 41 | accent_index: Optional[int] = None 42 | moras: List[Mora] = [] 43 | 44 | base_index = 0 # パース開始位置。ここから右の文字列をstackに詰めていく。 45 | stack = "" # 保留中の文字列 46 | matched_text: Optional[str] = None # 保留中の文字列内で最後にマッチした仮名 47 | 48 | outer_loop = 0 49 | while base_index < len(phrase): 50 | outer_loop += 1 51 | if phrase[base_index] == ACCENT_SYMBOL: 52 | if len(moras) == 0: 53 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase) 54 | if accent_index is not None: 55 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase) 56 | accent_index = len(moras) 57 | base_index += 1 58 | continue 59 | for watch_index in range(base_index, len(phrase)): 60 | if phrase[watch_index] == ACCENT_SYMBOL: 61 | break 62 | # 普通の文字の場合 63 | stack += phrase[watch_index] 64 | if stack in text2mora_with_unvoice: 65 | matched_text = stack 66 | # push mora 67 | if matched_text is None: 68 | raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack) 69 | else: 70 | moras.append(text2mora_with_unvoice[matched_text].copy(deep=True)) 71 | base_index += len(matched_text) 72 | stack = "" 73 | matched_text = None 74 | if outer_loop > LOOP_LIMIT: 75 | raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP) 76 | if accent_index is None: 77 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase) 78 | else: 79 | return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None) 80 | 81 | 82 | def parse_kana(text: str) -> List[AccentPhrase]: 83 | """ 84 | AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換 85 | """ 86 | 87 | parsed_results: List[AccentPhrase] = [] 88 | phrase_base = 0 89 | if len(text) == 0: 90 | raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1) 91 | 92 | for i in range(len(text) + 1): 93 | if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]: 94 | phrase = text[phrase_base:i] 95 | if len(phrase) == 0: 96 | raise ParseKanaError( 97 | ParseKanaErrorCode.EMPTY_PHRASE, 98 | position=str(len(parsed_results) + 1), 99 | ) 100 | phrase_base = i + 1 101 | 102 | is_interrogative = WIDE_INTERROGATION_MARK in phrase 103 | if is_interrogative: 104 | if WIDE_INTERROGATION_MARK in phrase[:-1]: 105 | raise ParseKanaError( 106 | ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase 107 | ) 108 | phrase = phrase.replace(WIDE_INTERROGATION_MARK, "") 109 | 110 | accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase) 111 | if i < len(text) and text[i] == PAUSE_DELIMITER: 112 | accent_phrase.pause_mora = Mora( 113 | text="、", 114 | consonant=None, 115 | consonant_length=None, 116 | vowel="pau", 117 | vowel_length=0, 118 | pitch=0, 119 | ) 120 | accent_phrase.is_interrogative = is_interrogative 121 | 122 | parsed_results.append(accent_phrase) 123 | 124 | return parsed_results 125 | 126 | 127 | def create_kana(accent_phrases: List[AccentPhrase]) -> str: 128 | text = "" 129 | for i, phrase in enumerate(accent_phrases): 130 | for j, mora in enumerate(phrase.moras): 131 | if mora.vowel in ["A", "I", "U", "E", "O"]: 132 | text += UNVOICE_SYMBOL 133 | 134 | text += mora.text 135 | if j + 1 == phrase.accent: 136 | text += ACCENT_SYMBOL 137 | 138 | if phrase.is_interrogative: 139 | text += WIDE_INTERROGATION_MARK 140 | 141 | if i < len(accent_phrases) - 1: 142 | if phrase.pause_mora is None: 143 | text += NOPAUSE_DELIMITER 144 | else: 145 | text += PAUSE_DELIMITER 146 | return text 147 | -------------------------------------------------------------------------------- /voicevox_engine/metas/Metas.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class SpeakerStyle(BaseModel): 8 | """ 9 | スピーカーのスタイル情報 10 | """ 11 | 12 | name: str = Field(title="スタイル名") 13 | id: int = Field(title="スタイルID") 14 | 15 | 16 | class SpeakerSupportPermittedSynthesisMorphing(str, Enum): 17 | ALL = "ALL" # 全て許可 18 | SELF_ONLY = "SELF_ONLY" # 同じ話者内でのみ許可 19 | NOTHING = "NOTHING" # 全て禁止 20 | 21 | @classmethod 22 | def _missing_(cls, value: object) -> "SpeakerSupportPermittedSynthesisMorphing": 23 | return SpeakerSupportPermittedSynthesisMorphing.ALL 24 | 25 | 26 | class SpeakerSupportedFeatures(BaseModel): 27 | """ 28 | 話者の対応機能の情報 29 | """ 30 | 31 | permitted_synthesis_morphing: SpeakerSupportPermittedSynthesisMorphing = Field( 32 | title="モーフィング機能への対応", default=SpeakerSupportPermittedSynthesisMorphing(None) 33 | ) 34 | 35 | 36 | class CoreSpeaker(BaseModel): 37 | """ 38 | コアに含まれるスピーカー情報 39 | """ 40 | 41 | name: str = Field(title="名前") 42 | speaker_uuid: str = Field(title="スピーカーのUUID") 43 | styles: List[SpeakerStyle] = Field(title="スピーカースタイルの一覧") 44 | version: str = Field("スピーカーのバージョン") 45 | 46 | 47 | class EngineSpeaker(BaseModel): 48 | """ 49 | エンジンに含まれるスピーカー情報 50 | """ 51 | 52 | supported_features: SpeakerSupportedFeatures = Field( 53 | title="スピーカーの対応機能", default_factory=SpeakerSupportedFeatures 54 | ) 55 | 56 | 57 | class Speaker(CoreSpeaker, EngineSpeaker): 58 | """ 59 | スピーカー情報 60 | """ 61 | 62 | pass 63 | 64 | 65 | class StyleInfo(BaseModel): 66 | """ 67 | スタイルの追加情報 68 | """ 69 | 70 | id: int = Field(title="スタイルID") 71 | icon: str = Field(title="当該スタイルのアイコンをbase64エンコードしたもの") 72 | portrait: Optional[str] = Field(title="当該スタイルのportrait.pngをbase64エンコードしたもの") 73 | voice_samples: List[str] = Field(title="voice_sampleのwavファイルをbase64エンコードしたもの") 74 | 75 | 76 | class SpeakerInfo(BaseModel): 77 | """ 78 | 話者の追加情報 79 | """ 80 | 81 | policy: str = Field(title="policy.md") 82 | portrait: str = Field(title="portrait.pngをbase64エンコードしたもの") 83 | style_infos: List[StyleInfo] = Field(title="スタイルの追加情報") 84 | -------------------------------------------------------------------------------- /voicevox_engine/metas/MetasStore.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import TYPE_CHECKING, Dict, List, Tuple 3 | 4 | from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo 5 | 6 | if TYPE_CHECKING: 7 | from voicevox_engine.synthesis_engine.synthesis_engine_base import ( 8 | SynthesisEngineBase, 9 | ) 10 | 11 | 12 | class MetasStore: 13 | """ 14 | 話者やスタイルのメタ情報を管理する 15 | """ 16 | 17 | def __init__(self, engine: "SynthesisEngineBase") -> None: 18 | self._loaded_metas: Dict[str, EngineSpeaker] = { 19 | speaker["speaker_uuid"]: EngineSpeaker( 20 | **{"supported_features": speaker["supported_features"]} 21 | ) 22 | for speaker in json.loads(engine.speakers) 23 | } 24 | 25 | def speaker_engine_metas(self, speaker_uuid: str) -> EngineSpeaker: 26 | return self.loaded_metas[speaker_uuid] 27 | 28 | def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]: 29 | """ 30 | 与えられたmetaにエンジンのコア情報を付加して返す 31 | core_metas: コアのmetas()が返すJSONのModel 32 | """ 33 | 34 | return [ 35 | Speaker( 36 | **self.speaker_engine_metas(speaker_meta.speaker_uuid).dict(), 37 | **speaker_meta.dict(), 38 | ) 39 | for speaker_meta in core_metas 40 | ] 41 | 42 | # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで 43 | # SynthesisEngineBaseによる循環importを修正する 44 | def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]: 45 | """ 46 | 与えられたエンジンから、コア・エンジン両方の情報を含んだMetasを返す 47 | """ 48 | 49 | core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)] 50 | return self.combine_metas(core_metas) 51 | 52 | @property 53 | def loaded_metas(self) -> Dict[str, EngineSpeaker]: 54 | return self._loaded_metas 55 | 56 | 57 | def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]: 58 | """ 59 | `{style.id: StyleInfo}`の変換テーブル 60 | """ 61 | 62 | lookup_table = dict() 63 | for speaker in speakers: 64 | for style in speaker.styles: 65 | lookup_table[style.id] = (speaker, style) 66 | return lookup_table 67 | -------------------------------------------------------------------------------- /voicevox_engine/metas/__init__.py: -------------------------------------------------------------------------------- 1 | from . import Metas, MetasStore 2 | 3 | __all__ = [ 4 | "Metas", 5 | "MetasStore", 6 | ] 7 | -------------------------------------------------------------------------------- /voicevox_engine/mora_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | 以下のモーラ対応表はOpenJTalkのソースコードから取得し、 3 | カタカナ表記とモーラが一対一対応するように改造した。 4 | ライセンス表記: 5 | ----------------------------------------------------------------- 6 | The Japanese TTS System "Open JTalk" 7 | developed by HTS Working Group 8 | http://open-jtalk.sourceforge.net/ 9 | ----------------------------------------------------------------- 10 | 11 | Copyright (c) 2008-2014 Nagoya Institute of Technology 12 | Department of Computer Science 13 | 14 | All rights reserved. 15 | 16 | Redistribution and use in source and binary forms, with or 17 | without modification, are permitted provided that the following 18 | conditions are met: 19 | 20 | - Redistributions of source code must retain the above copyright 21 | notice, this list of conditions and the following disclaimer. 22 | - Redistributions in binary form must reproduce the above 23 | copyright notice, this list of conditions and the following 24 | disclaimer in the documentation and/or other materials provided 25 | with the distribution. 26 | - Neither the name of the HTS working group nor the names of its 27 | contributors may be used to endorse or promote products derived 28 | from this software without specific prior written permission. 29 | 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 31 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 32 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 33 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 34 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 35 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 36 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 37 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 39 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 40 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 | POSSIBILITY OF SUCH DAMAGE. 43 | """ 44 | _mora_list_minimum = [ 45 | ["ヴォ", "v", "o"], 46 | ["ヴェ", "v", "e"], 47 | ["ヴィ", "v", "i"], 48 | ["ヴァ", "v", "a"], 49 | ["ヴ", "v", "u"], 50 | ["ン", "", "N"], 51 | ["ワ", "w", "a"], 52 | ["ロ", "r", "o"], 53 | ["レ", "r", "e"], 54 | ["ル", "r", "u"], 55 | ["リョ", "ry", "o"], 56 | ["リュ", "ry", "u"], 57 | ["リャ", "ry", "a"], 58 | ["リェ", "ry", "e"], 59 | ["リ", "r", "i"], 60 | ["ラ", "r", "a"], 61 | ["ヨ", "y", "o"], 62 | ["ユ", "y", "u"], 63 | ["ヤ", "y", "a"], 64 | ["モ", "m", "o"], 65 | ["メ", "m", "e"], 66 | ["ム", "m", "u"], 67 | ["ミョ", "my", "o"], 68 | ["ミュ", "my", "u"], 69 | ["ミャ", "my", "a"], 70 | ["ミェ", "my", "e"], 71 | ["ミ", "m", "i"], 72 | ["マ", "m", "a"], 73 | ["ポ", "p", "o"], 74 | ["ボ", "b", "o"], 75 | ["ホ", "h", "o"], 76 | ["ペ", "p", "e"], 77 | ["ベ", "b", "e"], 78 | ["ヘ", "h", "e"], 79 | ["プ", "p", "u"], 80 | ["ブ", "b", "u"], 81 | ["フォ", "f", "o"], 82 | ["フェ", "f", "e"], 83 | ["フィ", "f", "i"], 84 | ["ファ", "f", "a"], 85 | ["フ", "f", "u"], 86 | ["ピョ", "py", "o"], 87 | ["ピュ", "py", "u"], 88 | ["ピャ", "py", "a"], 89 | ["ピェ", "py", "e"], 90 | ["ピ", "p", "i"], 91 | ["ビョ", "by", "o"], 92 | ["ビュ", "by", "u"], 93 | ["ビャ", "by", "a"], 94 | ["ビェ", "by", "e"], 95 | ["ビ", "b", "i"], 96 | ["ヒョ", "hy", "o"], 97 | ["ヒュ", "hy", "u"], 98 | ["ヒャ", "hy", "a"], 99 | ["ヒェ", "hy", "e"], 100 | ["ヒ", "h", "i"], 101 | ["パ", "p", "a"], 102 | ["バ", "b", "a"], 103 | ["ハ", "h", "a"], 104 | ["ノ", "n", "o"], 105 | ["ネ", "n", "e"], 106 | ["ヌ", "n", "u"], 107 | ["ニョ", "ny", "o"], 108 | ["ニュ", "ny", "u"], 109 | ["ニャ", "ny", "a"], 110 | ["ニェ", "ny", "e"], 111 | ["ニ", "n", "i"], 112 | ["ナ", "n", "a"], 113 | ["ドゥ", "d", "u"], 114 | ["ド", "d", "o"], 115 | ["トゥ", "t", "u"], 116 | ["ト", "t", "o"], 117 | ["デョ", "dy", "o"], 118 | ["デュ", "dy", "u"], 119 | ["デャ", "dy", "a"], 120 | ["デェ", "dy", "e"], 121 | ["ディ", "d", "i"], 122 | ["デ", "d", "e"], 123 | ["テョ", "ty", "o"], 124 | ["テュ", "ty", "u"], 125 | ["テャ", "ty", "a"], 126 | ["ティ", "t", "i"], 127 | ["テ", "t", "e"], 128 | ["ツォ", "ts", "o"], 129 | ["ツェ", "ts", "e"], 130 | ["ツィ", "ts", "i"], 131 | ["ツァ", "ts", "a"], 132 | ["ツ", "ts", "u"], 133 | ["ッ", "", "cl"], 134 | ["チョ", "ch", "o"], 135 | ["チュ", "ch", "u"], 136 | ["チャ", "ch", "a"], 137 | ["チェ", "ch", "e"], 138 | ["チ", "ch", "i"], 139 | ["ダ", "d", "a"], 140 | ["タ", "t", "a"], 141 | ["ゾ", "z", "o"], 142 | ["ソ", "s", "o"], 143 | ["ゼ", "z", "e"], 144 | ["セ", "s", "e"], 145 | ["ズィ", "z", "i"], 146 | ["ズ", "z", "u"], 147 | ["スィ", "s", "i"], 148 | ["ス", "s", "u"], 149 | ["ジョ", "j", "o"], 150 | ["ジュ", "j", "u"], 151 | ["ジャ", "j", "a"], 152 | ["ジェ", "j", "e"], 153 | ["ジ", "j", "i"], 154 | ["ショ", "sh", "o"], 155 | ["シュ", "sh", "u"], 156 | ["シャ", "sh", "a"], 157 | ["シェ", "sh", "e"], 158 | ["シ", "sh", "i"], 159 | ["ザ", "z", "a"], 160 | ["サ", "s", "a"], 161 | ["ゴ", "g", "o"], 162 | ["コ", "k", "o"], 163 | ["ゲ", "g", "e"], 164 | ["ケ", "k", "e"], 165 | ["グヮ", "gw", "a"], 166 | ["グ", "g", "u"], 167 | ["クヮ", "kw", "a"], 168 | ["ク", "k", "u"], 169 | ["ギョ", "gy", "o"], 170 | ["ギュ", "gy", "u"], 171 | ["ギャ", "gy", "a"], 172 | ["ギェ", "gy", "e"], 173 | ["ギ", "g", "i"], 174 | ["キョ", "ky", "o"], 175 | ["キュ", "ky", "u"], 176 | ["キャ", "ky", "a"], 177 | ["キェ", "ky", "e"], 178 | ["キ", "k", "i"], 179 | ["ガ", "g", "a"], 180 | ["カ", "k", "a"], 181 | ["オ", "", "o"], 182 | ["エ", "", "e"], 183 | ["ウォ", "w", "o"], 184 | ["ウェ", "w", "e"], 185 | ["ウィ", "w", "i"], 186 | ["ウ", "", "u"], 187 | ["イェ", "y", "e"], 188 | ["イ", "", "i"], 189 | ["ア", "", "a"], 190 | ] 191 | _mora_list_additional = [ 192 | ["ヴョ", "by", "o"], 193 | ["ヴュ", "by", "u"], 194 | ["ヴャ", "by", "a"], 195 | ["ヲ", "", "o"], 196 | ["ヱ", "", "e"], 197 | ["ヰ", "", "i"], 198 | ["ヮ", "w", "a"], 199 | ["ョ", "y", "o"], 200 | ["ュ", "y", "u"], 201 | ["ヅ", "z", "u"], 202 | ["ヂ", "j", "i"], 203 | ["ヶ", "k", "e"], 204 | ["ャ", "y", "a"], 205 | ["ォ", "", "o"], 206 | ["ェ", "", "e"], 207 | ["ゥ", "", "u"], 208 | ["ィ", "", "i"], 209 | ["ァ", "", "a"], 210 | ] 211 | 212 | openjtalk_mora2text = { 213 | consonant + vowel: text for [text, consonant, vowel] in _mora_list_minimum 214 | } 215 | openjtalk_text2mora = { 216 | text: (consonant, vowel) 217 | for [text, consonant, vowel] in _mora_list_minimum + _mora_list_additional 218 | } 219 | -------------------------------------------------------------------------------- /voicevox_engine/morphing.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from dataclasses import dataclass 3 | from itertools import chain 4 | from typing import Dict, List, Tuple 5 | 6 | import numpy as np 7 | import pyworld as pw 8 | from scipy.signal import resample 9 | 10 | from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo 11 | from .metas.MetasStore import construct_lookup 12 | from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError 13 | from .synthesis_engine import SynthesisEngine 14 | 15 | 16 | # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398 # noqa 17 | @dataclass(frozen=True) 18 | class MorphingParameter: 19 | fs: int 20 | frame_period: float 21 | base_f0: np.ndarray 22 | base_aperiodicity: np.ndarray 23 | base_spectrogram: np.ndarray 24 | target_spectrogram: np.ndarray 25 | 26 | 27 | def create_morphing_parameter( 28 | base_wave: np.ndarray, 29 | target_wave: np.ndarray, 30 | fs: int, 31 | ) -> MorphingParameter: 32 | frame_period = 1.0 33 | base_f0, base_time_axis = pw.harvest(base_wave, fs, frame_period=frame_period) 34 | base_spectrogram = pw.cheaptrick(base_wave, base_f0, base_time_axis, fs) 35 | base_aperiodicity = pw.d4c(base_wave, base_f0, base_time_axis, fs) 36 | 37 | target_f0, morph_time_axis = pw.harvest(target_wave, fs, frame_period=frame_period) 38 | target_spectrogram = pw.cheaptrick(target_wave, target_f0, morph_time_axis, fs) 39 | target_spectrogram.resize(base_spectrogram.shape) 40 | 41 | return MorphingParameter( 42 | fs=fs, 43 | frame_period=frame_period, 44 | base_f0=base_f0, 45 | base_aperiodicity=base_aperiodicity, 46 | base_spectrogram=base_spectrogram, 47 | target_spectrogram=target_spectrogram, 48 | ) 49 | 50 | 51 | def get_morphable_targets( 52 | speakers: List[Speaker], 53 | base_speakers: List[int], 54 | ) -> List[Dict[int, MorphableTargetInfo]]: 55 | """ 56 | speakers: 全話者の情報 57 | base_speakers: モーフィング可能か判定したいベースの話者リスト(スタイルID) 58 | """ 59 | speaker_lookup = construct_lookup(speakers) 60 | 61 | morphable_targets_arr = [] 62 | for base_speaker in base_speakers: 63 | morphable_targets = dict() 64 | for style in chain.from_iterable(speaker.styles for speaker in speakers): 65 | morphable_targets[style.id] = MorphableTargetInfo( 66 | is_morphable=is_synthesis_morphing_permitted( 67 | speaker_lookup=speaker_lookup, 68 | base_speaker=base_speaker, 69 | target_speaker=style.id, 70 | ) 71 | ) 72 | morphable_targets_arr.append(morphable_targets) 73 | 74 | return morphable_targets_arr 75 | 76 | 77 | def is_synthesis_morphing_permitted( 78 | speaker_lookup: Dict[int, Tuple[Speaker, StyleInfo]], 79 | base_speaker: int, 80 | target_speaker: int, 81 | ) -> bool: 82 | """ 83 | 指定されたstyle_idがモーフィング可能かどうか返す 84 | style_idが見つからない場合はStyleIdNotFoundErrorを送出する 85 | """ 86 | 87 | base_speaker_data = speaker_lookup[base_speaker] 88 | target_speaker_data = speaker_lookup[target_speaker] 89 | 90 | if base_speaker_data is None or target_speaker_data is None: 91 | raise StyleIdNotFoundError( 92 | base_speaker if base_speaker_data is None else target_speaker 93 | ) 94 | 95 | base_speaker_info, _ = base_speaker_data 96 | target_speaker_info, _ = target_speaker_data 97 | 98 | base_speaker_uuid = base_speaker_info.speaker_uuid 99 | target_speaker_uuid = target_speaker_info.speaker_uuid 100 | 101 | base_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = ( 102 | base_speaker_info.supported_features.permitted_synthesis_morphing 103 | ) 104 | 105 | target_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = ( 106 | target_speaker_info.supported_features.permitted_synthesis_morphing 107 | ) 108 | 109 | # 禁止されている場合はFalse 110 | if ( 111 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.NOTHING 112 | or target_speaker_morphing_info 113 | == SpeakerSupportPermittedSynthesisMorphing.NOTHING 114 | ): 115 | return False 116 | # 同一話者のみの場合は同一話者判定 117 | if ( 118 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY 119 | or target_speaker_morphing_info 120 | == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY 121 | ): 122 | return base_speaker_uuid == target_speaker_uuid 123 | # 念のため許可されているかチェック 124 | return ( 125 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL 126 | and target_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL 127 | ) 128 | 129 | 130 | def synthesis_morphing_parameter( 131 | engine: SynthesisEngine, 132 | query: AudioQuery, 133 | base_speaker: int, 134 | target_speaker: int, 135 | ) -> MorphingParameter: 136 | query = deepcopy(query) 137 | 138 | # 不具合回避のためデフォルトのサンプリングレートでWORLDに掛けた後に指定のサンプリングレートに変換する 139 | query.outputSamplingRate = engine.default_sampling_rate 140 | 141 | # WORLDに掛けるため合成はモノラルで行う 142 | query.outputStereo = False 143 | 144 | base_wave = engine.synthesis(query=query, style_id=base_speaker).astype("float") 145 | target_wave = engine.synthesis(query=query, style_id=target_speaker).astype("float") 146 | 147 | return create_morphing_parameter( 148 | base_wave=base_wave, 149 | target_wave=target_wave, 150 | fs=query.outputSamplingRate, 151 | ) 152 | 153 | 154 | def synthesis_morphing( 155 | morph_param: MorphingParameter, 156 | morph_rate: float, 157 | output_fs: int, 158 | output_stereo: bool = False, 159 | ) -> np.ndarray: 160 | """ 161 | 指定した割合で、パラメータをもとにモーフィングした音声を生成します。 162 | 163 | Parameters 164 | ---------- 165 | morph_param : MorphingParameter 166 | `synthesis_morphing_parameter`または`create_morphing_parameter`で作成したパラメータ 167 | 168 | morph_rate : float 169 | モーフィングの割合 170 | 0.0でベースの話者、1.0でターゲットの話者に近づきます。 171 | 172 | Returns 173 | ------- 174 | generated : np.ndarray 175 | モーフィングした音声 176 | 177 | Raises 178 | ------- 179 | ValueError 180 | morph_rate ∈ [0, 1] 181 | """ 182 | 183 | if morph_rate < 0.0 or morph_rate > 1.0: 184 | raise ValueError("morph_rateは0.0から1.0の範囲で指定してください") 185 | 186 | morph_spectrogram = ( 187 | morph_param.base_spectrogram * (1.0 - morph_rate) 188 | + morph_param.target_spectrogram * morph_rate 189 | ) 190 | 191 | y_h = pw.synthesize( 192 | morph_param.base_f0, 193 | morph_spectrogram, 194 | morph_param.base_aperiodicity, 195 | morph_param.fs, 196 | morph_param.frame_period, 197 | ) 198 | 199 | # TODO: synthesis_engine.py でのリサンプル処理と共通化する 200 | if output_fs != morph_param.fs: 201 | y_h = resample(y_h, output_fs * len(y_h) // morph_param.fs) 202 | 203 | if output_stereo: 204 | y_h = np.array([y_h, y_h]).T 205 | 206 | return y_h 207 | -------------------------------------------------------------------------------- /voicevox_engine/part_of_speech_data.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from .model import ( 4 | USER_DICT_MAX_PRIORITY, 5 | USER_DICT_MIN_PRIORITY, 6 | PartOfSpeechDetail, 7 | WordTypes, 8 | ) 9 | 10 | MIN_PRIORITY = USER_DICT_MIN_PRIORITY 11 | MAX_PRIORITY = USER_DICT_MAX_PRIORITY 12 | 13 | part_of_speech_data: Dict[WordTypes, PartOfSpeechDetail] = { 14 | WordTypes.PROPER_NOUN: PartOfSpeechDetail( 15 | part_of_speech="名詞", 16 | part_of_speech_detail_1="固有名詞", 17 | part_of_speech_detail_2="一般", 18 | part_of_speech_detail_3="*", 19 | context_id=1348, 20 | cost_candidates=[ 21 | -988, 22 | 3488, 23 | 4768, 24 | 6048, 25 | 7328, 26 | 8609, 27 | 8734, 28 | 8859, 29 | 8984, 30 | 9110, 31 | 14176, 32 | ], 33 | accent_associative_rules=[ 34 | "*", 35 | "C1", 36 | "C2", 37 | "C3", 38 | "C4", 39 | "C5", 40 | ], 41 | ), 42 | WordTypes.COMMON_NOUN: PartOfSpeechDetail( 43 | part_of_speech="名詞", 44 | part_of_speech_detail_1="一般", 45 | part_of_speech_detail_2="*", 46 | part_of_speech_detail_3="*", 47 | context_id=1345, 48 | cost_candidates=[ 49 | -4445, 50 | 49, 51 | 1473, 52 | 2897, 53 | 4321, 54 | 5746, 55 | 6554, 56 | 7362, 57 | 8170, 58 | 8979, 59 | 15001, 60 | ], 61 | accent_associative_rules=[ 62 | "*", 63 | "C1", 64 | "C2", 65 | "C3", 66 | "C4", 67 | "C5", 68 | ], 69 | ), 70 | WordTypes.VERB: PartOfSpeechDetail( 71 | part_of_speech="動詞", 72 | part_of_speech_detail_1="自立", 73 | part_of_speech_detail_2="*", 74 | part_of_speech_detail_3="*", 75 | context_id=642, 76 | cost_candidates=[ 77 | 3100, 78 | 6160, 79 | 6360, 80 | 6561, 81 | 6761, 82 | 6962, 83 | 7414, 84 | 7866, 85 | 8318, 86 | 8771, 87 | 13433, 88 | ], 89 | accent_associative_rules=[ 90 | "*", 91 | ], 92 | ), 93 | WordTypes.ADJECTIVE: PartOfSpeechDetail( 94 | part_of_speech="形容詞", 95 | part_of_speech_detail_1="自立", 96 | part_of_speech_detail_2="*", 97 | part_of_speech_detail_3="*", 98 | context_id=20, 99 | cost_candidates=[ 100 | 1527, 101 | 3266, 102 | 3561, 103 | 3857, 104 | 4153, 105 | 4449, 106 | 5149, 107 | 5849, 108 | 6549, 109 | 7250, 110 | 10001, 111 | ], 112 | accent_associative_rules=[ 113 | "*", 114 | ], 115 | ), 116 | WordTypes.SUFFIX: PartOfSpeechDetail( 117 | part_of_speech="名詞", 118 | part_of_speech_detail_1="接尾", 119 | part_of_speech_detail_2="一般", 120 | part_of_speech_detail_3="*", 121 | context_id=1358, 122 | cost_candidates=[ 123 | 4399, 124 | 5373, 125 | 6041, 126 | 6710, 127 | 7378, 128 | 8047, 129 | 9440, 130 | 10834, 131 | 12228, 132 | 13622, 133 | 15847, 134 | ], 135 | accent_associative_rules=[ 136 | "*", 137 | "C1", 138 | "C2", 139 | "C3", 140 | "C4", 141 | "C5", 142 | ], 143 | ), 144 | } 145 | -------------------------------------------------------------------------------- /voicevox_engine/preset/Preset.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class Preset(BaseModel): 5 | """ 6 | プリセット情報 7 | """ 8 | 9 | id: int = Field(title="プリセットID") 10 | name: str = Field(title="プリセット名") 11 | speaker_uuid: str = Field(title="スピーカーのUUID") 12 | style_id: int = Field(title="スタイルID") 13 | speedScale: float = Field(title="全体の話速") 14 | pitchScale: float = Field(title="全体の音高") 15 | intonationScale: float = Field(title="全体の抑揚") 16 | volumeScale: float = Field(title="全体の音量") 17 | prePhonemeLength: float = Field(title="音声の前の無音時間") 18 | postPhonemeLength: float = Field(title="音声の後の無音時間") 19 | -------------------------------------------------------------------------------- /voicevox_engine/preset/PresetError.py: -------------------------------------------------------------------------------- 1 | class PresetError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /voicevox_engine/preset/PresetManager.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | import yaml 5 | from pydantic import ValidationError, parse_obj_as 6 | 7 | from .Preset import Preset 8 | from .PresetError import PresetError 9 | 10 | 11 | class PresetManager: 12 | def __init__( 13 | self, 14 | preset_path: Path, 15 | ): 16 | self.presets = [] 17 | self.last_modified_time = 0 18 | self.preset_path = preset_path 19 | 20 | def load_presets(self): 21 | """ 22 | プリセットのYAMLファイルを読み込む 23 | 24 | Returns 25 | ------- 26 | ret: List[Preset] 27 | プリセットのリスト 28 | """ 29 | 30 | # 設定ファイルのタイムスタンプを確認 31 | try: 32 | _last_modified_time = self.preset_path.stat().st_mtime 33 | if _last_modified_time == self.last_modified_time: 34 | return self.presets 35 | except OSError: 36 | raise PresetError("プリセットの設定ファイルが見つかりません") 37 | 38 | with open(self.preset_path, mode="r", encoding="utf-8") as f: 39 | obj = yaml.safe_load(f) 40 | if obj is None: 41 | raise PresetError("プリセットの設定ファイルが空の内容です") 42 | 43 | try: 44 | _presets = parse_obj_as(List[Preset], obj) 45 | except ValidationError: 46 | raise PresetError("プリセットの設定ファイルにミスがあります") 47 | 48 | # idが一意か確認 49 | if len([preset.id for preset in _presets]) != len( 50 | {preset.id for preset in _presets} 51 | ): 52 | raise PresetError("プリセットのidに重複があります") 53 | 54 | self.presets = _presets 55 | self.last_modified_time = _last_modified_time 56 | return self.presets 57 | 58 | def add_preset(self, preset: Preset): 59 | """ 60 | YAMLファイルに新規のプリセットを追加する 61 | 62 | Parameters 63 | ---------- 64 | preset : Preset 65 | 追加するプリセットを渡す 66 | 67 | Returns 68 | ------- 69 | ret: int 70 | 追加したプリセットのプリセットID 71 | """ 72 | 73 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 74 | self.load_presets() 75 | 76 | # IDが0未満、または存在するIDなら新しいIDを決定し、配列に追加 77 | if preset.id < 0 or preset.id in {preset.id for preset in self.presets}: 78 | preset.id = max([preset.id for preset in self.presets]) + 1 79 | self.presets.append(preset) 80 | 81 | # ファイルに書き込み 82 | try: 83 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 84 | yaml.safe_dump( 85 | [preset.dict() for preset in self.presets], 86 | f, 87 | allow_unicode=True, 88 | sort_keys=False, 89 | ) 90 | except Exception as err: 91 | self.presets.pop() 92 | if isinstance(err, FileNotFoundError): 93 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 94 | else: 95 | raise err 96 | 97 | return preset.id 98 | 99 | def update_preset(self, preset: Preset): 100 | """ 101 | YAMLファイルのプリセットを更新する 102 | 103 | Parameters 104 | ---------- 105 | preset : Preset 106 | 更新するプリセットを渡す 107 | 108 | Returns 109 | ------- 110 | ret: int 111 | 更新したプリセットのプリセットID 112 | """ 113 | 114 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 115 | self.load_presets() 116 | 117 | # IDが存在するか探索 118 | prev_preset = (-1, None) 119 | for i in range(len(self.presets)): 120 | if self.presets[i].id == preset.id: 121 | prev_preset = (i, self.presets[i]) 122 | self.presets[i] = preset 123 | break 124 | else: 125 | raise PresetError("更新先のプリセットが存在しません") 126 | 127 | # ファイルに書き込み 128 | try: 129 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 130 | yaml.safe_dump( 131 | [preset.dict() for preset in self.presets], 132 | f, 133 | allow_unicode=True, 134 | sort_keys=False, 135 | ) 136 | except Exception as err: 137 | if prev_preset != (-1, None): 138 | self.presets[prev_preset[0]] = prev_preset[1] 139 | if isinstance(err, FileNotFoundError): 140 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 141 | else: 142 | raise err 143 | 144 | return preset.id 145 | 146 | def delete_preset(self, id: int): 147 | """ 148 | YAMLファイルのプリセットを削除する 149 | 150 | Parameters 151 | ---------- 152 | id: int 153 | 削除するプリセットのプリセットIDを渡す 154 | 155 | Returns 156 | ------- 157 | ret: int 158 | 削除したプリセットのプリセットID 159 | """ 160 | 161 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す 162 | self.load_presets() 163 | 164 | # IDが存在するか探索 165 | buf = None 166 | buf_index = -1 167 | for i in range(len(self.presets)): 168 | if self.presets[i].id == id: 169 | buf = self.presets.pop(i) 170 | buf_index = i 171 | break 172 | else: 173 | raise PresetError("削除対象のプリセットが存在しません") 174 | 175 | # ファイルに書き込み 176 | try: 177 | with open(self.preset_path, mode="w", encoding="utf-8") as f: 178 | yaml.safe_dump( 179 | [preset.dict() for preset in self.presets], 180 | f, 181 | allow_unicode=True, 182 | sort_keys=False, 183 | ) 184 | except FileNotFoundError: 185 | self.presets.insert(buf_index, buf) 186 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました") 187 | 188 | return id 189 | -------------------------------------------------------------------------------- /voicevox_engine/preset/__init__.py: -------------------------------------------------------------------------------- 1 | from .Preset import Preset 2 | from .PresetError import PresetError 3 | from .PresetManager import PresetManager 4 | 5 | __all__ = [ 6 | "Preset", 7 | "PresetManager", 8 | "PresetError", 9 | ] 10 | -------------------------------------------------------------------------------- /voicevox_engine/setting/Setting.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class CorsPolicyMode(str, Enum): 8 | """ 9 | CORSの許可モード 10 | """ 11 | 12 | all = "all" # 全てのオリジンからのリクエストを許可 13 | localapps = "localapps" # ローカルアプリケーションからのリクエストを許可 14 | 15 | 16 | class Setting(BaseModel): 17 | """ 18 | エンジンの設定情報 19 | """ 20 | 21 | cors_policy_mode: CorsPolicyMode = Field(title="リソース共有ポリシー") 22 | allow_origin: Optional[str] = Field(title="許可するオリジン") 23 | 24 | class Config: 25 | use_enum_values = True 26 | -------------------------------------------------------------------------------- /voicevox_engine/setting/SettingLoader.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | from ..utility import engine_root, get_save_dir 6 | from .Setting import Setting 7 | 8 | DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yml" 9 | USER_SETTING_PATH: Path = get_save_dir() / "setting.yml" 10 | 11 | 12 | class SettingLoader: 13 | def __init__(self, setting_file_path: Path) -> None: 14 | self.setting_file_path = setting_file_path 15 | 16 | def load_setting_file(self) -> Setting: 17 | if not self.setting_file_path.is_file(): 18 | setting = yaml.safe_load(DEFAULT_SETTING_PATH.read_text(encoding="utf-8")) 19 | else: 20 | setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8")) 21 | 22 | setting = Setting( 23 | cors_policy_mode=setting["cors_policy_mode"], 24 | allow_origin=setting["allow_origin"], 25 | ) 26 | 27 | return setting 28 | 29 | def dump_setting_file(self, settings: Setting) -> None: 30 | settings_dict = settings.dict() 31 | 32 | with open(self.setting_file_path, mode="w", encoding="utf-8") as f: 33 | yaml.safe_dump(settings_dict, f) 34 | -------------------------------------------------------------------------------- /voicevox_engine/setting/__init__.py: -------------------------------------------------------------------------------- 1 | from .Setting import CorsPolicyMode, Setting 2 | from .SettingLoader import USER_SETTING_PATH, SettingLoader 3 | 4 | __all__ = [ 5 | "USER_SETTING_PATH", 6 | "CorsPolicyMode", 7 | "Setting", 8 | "SettingLoader", 9 | ] 10 | -------------------------------------------------------------------------------- /voicevox_engine/synthesis_engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .core_wrapper import CoreWrapper, load_runtime_lib 2 | from .make_synthesis_engines import make_synthesis_engines 3 | from .synthesis_engine import SynthesisEngine 4 | from .synthesis_engine_base import SynthesisEngineBase 5 | 6 | __all__ = [ 7 | "CoreWrapper", 8 | "load_runtime_lib", 9 | "make_synthesis_engines", 10 | "SynthesisEngine", 11 | "SynthesisEngineBase", 12 | ] 13 | -------------------------------------------------------------------------------- /voicevox_engine/synthesis_engine/make_synthesis_engines.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import traceback 3 | from typing import Dict 4 | 5 | from ..bridge_config import BridgeConfigLoader 6 | from .synthesis_engine_base import SynthesisEngineBase 7 | from .synthesis_engine_espnet import SynthesisEngineESPNet 8 | 9 | 10 | def make_synthesis_engines( 11 | use_gpu: bool, 12 | bridge_config_loader: BridgeConfigLoader, 13 | enable_mock: bool = True, 14 | load_all_models: bool = False, 15 | ) -> Dict[str, SynthesisEngineBase]: 16 | """ 17 | 音声ライブラリをロードして、音声合成エンジンを生成 18 | 19 | Parameters 20 | ---------- 21 | use_gpu: bool 22 | 音声ライブラリに GPU を使わせるか否か 23 | bridge_config_loader: BridgeConfigLoader 24 | BridgeConfigLoader 25 | enable_mock: bool, optional, default=True 26 | コア読み込みに失敗したとき、代わりにmockを使用するかどうか 27 | load_all_models: bool, optional, default=False 28 | 起動時に全てのモデルを読み込むかどうか 29 | """ 30 | synthesis_engines = {} 31 | try: 32 | _synthesis_engine = SynthesisEngineESPNet( 33 | bridge_config_loader=bridge_config_loader, 34 | use_gpu=use_gpu, 35 | load_all_models=load_all_models, 36 | ) 37 | synthesis_engines[_synthesis_engine.engine_version] = _synthesis_engine 38 | except Exception: 39 | if not enable_mock: 40 | raise 41 | traceback.print_exc() 42 | print( 43 | "Notice: mock-library will be used.", 44 | file=sys.stderr, 45 | ) 46 | 47 | from ..dev.core import metas as mock_metas 48 | from ..dev.core import supported_devices as mock_supported_devices 49 | from ..dev.synthesis_engine import MockSynthesisEngine 50 | 51 | if "0.0.0" not in synthesis_engines: 52 | synthesis_engines["0.0.0"] = MockSynthesisEngine( 53 | speakers=mock_metas(), supported_devices=mock_supported_devices() 54 | ) 55 | 56 | return synthesis_engines 57 | -------------------------------------------------------------------------------- /voicevox_engine/utility/__init__.py: -------------------------------------------------------------------------------- 1 | from .connect_base64_waves import ( 2 | ConnectBase64WavesException, 3 | connect_base64_waves, 4 | decode_base64_waves, 5 | ) 6 | from .core_version_utility import get_latest_core_version, parse_core_version 7 | from .mutex_utility import mutex_wrapper 8 | from .path_utility import delete_file, engine_root 9 | from .save_dir import get_save_dir 10 | 11 | __all__ = [ 12 | "ConnectBase64WavesException", 13 | "connect_base64_waves", 14 | "decode_base64_waves", 15 | "get_latest_core_version", 16 | "parse_core_version", 17 | "delete_file", 18 | "engine_root", 19 | "get_save_dir", 20 | "mutex_wrapper", 21 | ] 22 | -------------------------------------------------------------------------------- /voicevox_engine/utility/connect_base64_waves.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from typing import List, Tuple 4 | 5 | import numpy as np 6 | import soundfile 7 | from scipy.signal import resample 8 | 9 | 10 | class ConnectBase64WavesException(Exception): 11 | def __init__(self, message: str): 12 | self.message = message 13 | 14 | 15 | def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]: 16 | """ 17 | base64エンコードされた複数のwavデータをデコードする 18 | Parameters 19 | ---------- 20 | waves: list[str] 21 | base64エンコードされたwavデータのリスト 22 | Returns 23 | ------- 24 | waves_nparray_sr: List[Tuple[np.ndarray, int]] 25 | (NumPy配列の音声波形データ, サンプリングレート) 形式のタプルのリスト 26 | """ 27 | if len(waves) == 0: 28 | raise ConnectBase64WavesException("wavファイルが含まれていません") 29 | 30 | waves_nparray_sr = [] 31 | for wave in waves: 32 | try: 33 | wav_bin = base64.standard_b64decode(wave) 34 | except ValueError: 35 | raise ConnectBase64WavesException("base64デコードに失敗しました") 36 | try: 37 | _data = soundfile.read(io.BytesIO(wav_bin)) 38 | except Exception: 39 | raise ConnectBase64WavesException("wavファイルを読み込めませんでした") 40 | waves_nparray_sr.append(_data) 41 | 42 | return waves_nparray_sr 43 | 44 | 45 | def connect_base64_waves(waves: List[str]) -> Tuple[np.ndarray, int]: 46 | waves_nparray_sr = decode_base64_waves(waves) 47 | 48 | max_sampling_rate = max([sr for _, sr in waves_nparray_sr]) 49 | max_channels = max([x.ndim for x, _ in waves_nparray_sr]) 50 | assert 0 < max_channels <= 2 51 | 52 | waves_nparray_list = [] 53 | for nparray, sr in waves_nparray_sr: 54 | if sr != max_sampling_rate: 55 | nparray = resample(nparray, max_sampling_rate * len(nparray) // sr) 56 | if nparray.ndim < max_channels: 57 | nparray = np.array([nparray, nparray]).T 58 | waves_nparray_list.append(nparray) 59 | 60 | return np.concatenate(waves_nparray_list), max_sampling_rate 61 | -------------------------------------------------------------------------------- /voicevox_engine/utility/core_version_utility.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | 3 | from semver.version import Version 4 | 5 | 6 | def parse_core_version(version: str) -> Version: 7 | return Version.parse(version) 8 | 9 | 10 | def get_latest_core_version(versions: Iterable[str]) -> str: 11 | if len(versions) == 0: 12 | raise Exception("versions must be non-empty.") 13 | 14 | return str(max(map(parse_core_version, versions))) 15 | -------------------------------------------------------------------------------- /voicevox_engine/utility/mutex_utility.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | def mutex_wrapper(lock: threading.Lock): 5 | def wrap(f): 6 | def func(*args, **kw): 7 | lock.acquire() 8 | try: 9 | return f(*args, **kw) 10 | finally: 11 | lock.release() 12 | 13 | return func 14 | 15 | return wrap 16 | -------------------------------------------------------------------------------- /voicevox_engine/utility/path_utility.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import traceback 4 | from pathlib import Path 5 | 6 | # コンパイル済環境でchdirをした場合、root_dirが書き換わるので、初期値を保存しておく 7 | _root_dir = Path(sys.argv[0]).parent.resolve(strict=True) 8 | 9 | 10 | def engine_root() -> Path: 11 | if is_development(): 12 | root_dir = Path(__file__).parents[2] 13 | 14 | # Nuitka/Pyinstallerでビルドされている場合 15 | else: 16 | root_dir = Path(str(_root_dir)) 17 | 18 | return root_dir.resolve(strict=True) 19 | 20 | 21 | def is_development() -> bool: 22 | """ 23 | 開発版かどうか判定する関数 24 | Nuitka/Pyinstallerでコンパイルされていない場合は開発環境とする。 25 | """ 26 | # nuitkaビルドをした際はグローバルに__compiled__が含まれる 27 | if "__compiled__" in globals(): 28 | return False 29 | 30 | # pyinstallerでビルドをした際はsys.frozenが設定される 31 | elif getattr(sys, "frozen", False): 32 | return False 33 | 34 | return True 35 | 36 | 37 | def delete_file(file_path: str) -> None: 38 | try: 39 | os.remove(file_path) 40 | except OSError: 41 | traceback.print_exc() 42 | -------------------------------------------------------------------------------- /voicevox_engine/utility/save_dir.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from platformdirs import user_data_dir 4 | 5 | from ..engine_manifest import EngineManifestLoader 6 | 7 | 8 | def get_save_dir(): 9 | # TODO: ここの挙動が怪しいのできちんと確認する 10 | try: 11 | app_name = EngineManifestLoader().load_manifest().name 12 | except TypeError: 13 | app_name = EngineManifestLoader.EngineManifestLoader().load_manifest().name 14 | return Path(user_data_dir(app_name)) 15 | --------------------------------------------------------------------------------