├── .gitattributes
├── .github
├── ISSUE_TEMPLATE
│ ├── bugreport.md
│ ├── featurerequest.md
│ └── question.md
├── PULL_REQUEST_TEMPLATE.md
├── labeler.yml
└── workflows
│ ├── build.yml
│ ├── coverage-comment.yml
│ ├── labeler.yml
│ ├── release-test.yml
│ ├── release_latest_dev.yml
│ ├── test.yml
│ ├── typos.yml
│ └── upload-gh-pages.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── _typos.toml
├── build_util
├── check_release_build.py
├── codesign.bash
├── create_venv_and_generate_licenses.bash
├── macos
│ ├── build_util_macos
│ │ ├── __init__.py
│ │ └── shlib_tools.py
│ ├── copy_missing_dylibs.py
│ └── fix_rpaths.py
├── merge_update_infos.py
└── process_voicevox_resource.bash
├── default.csv
├── default_setting.yml
├── docs
├── VOICEVOX音声合成エンジンとの連携.md
├── api
│ └── .gitkeep
├── licenses
│ ├── cublas
│ │ └── License.txt
│ ├── cuda
│ │ └── EULA.txt
│ ├── cuda_runtime
│ │ └── License.txt
│ ├── cudnn
│ │ ├── LICENSE
│ │ └── License.txt
│ ├── cufft
│ │ └── License.txt
│ ├── cupti
│ │ └── License.txt
│ ├── curand
│ │ └── License.txt
│ ├── cusolver
│ │ └── License.txt
│ ├── cusparse
│ │ └── License.txt
│ ├── mpg123
│ │ └── COPYING
│ ├── nccl
│ │ └── License.txt
│ ├── nvrtc
│ │ └── License.txt
│ ├── nvtx
│ │ └── License.txt
│ ├── open_jtalk
│ │ ├── COPYING
│ │ ├── mecab-naist-jdic
│ │ │ └── COPYING
│ │ └── mecab
│ │ │ └── COPYING
│ └── world
│ │ └── LICENSE.txt
└── res
│ └── マルチエンジン概念図.svg
├── engine_manifest.json
├── engine_manifest_assets
├── dependency_licenses.json
├── downloadable_libraries.json
├── icon.png
├── terms_of_service.md
└── update_infos.json
├── generate_licenses.py
├── get_cost_candidates.py
├── make_docs.py
├── nuitka-config.yaml
├── poetry.lock
├── presets.yaml
├── pyproject.toml
├── requirements-dev.txt
├── requirements-license.txt
├── requirements-test.txt
├── requirements.txt
├── run.py
├── setup.cfg
├── speaker_info
├── 35b2c544-660e-401e-b503-0e14c635303a
│ ├── icons
│ │ └── 8.png
│ ├── metas.json
│ ├── policy.md
│ ├── portrait.png
│ ├── portraits
│ │ └── 8.png
│ └── voice_samples
│ │ ├── 8_001.wav
│ │ ├── 8_002.wav
│ │ └── 8_003.wav
├── 388f246b-8c41-4ac1-8e2d-5d79f3ff56d9
│ ├── icons
│ │ ├── 1.png
│ │ ├── 3.png
│ │ ├── 5.png
│ │ └── 7.png
│ ├── metas.json
│ ├── policy.md
│ ├── portrait.png
│ ├── portraits
│ │ └── 3.png
│ └── voice_samples
│ │ ├── 1_001.wav
│ │ ├── 1_002.wav
│ │ ├── 1_003.wav
│ │ ├── 3_001.wav
│ │ ├── 3_002.wav
│ │ ├── 3_003.wav
│ │ ├── 5_001.wav
│ │ ├── 5_002.wav
│ │ ├── 5_003.wav
│ │ ├── 7_001.wav
│ │ ├── 7_002.wav
│ │ └── 7_003.wav
├── 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
│ ├── icons
│ │ ├── 0.png
│ │ ├── 2.png
│ │ ├── 4.png
│ │ └── 6.png
│ ├── metas.json
│ ├── policy.md
│ ├── portrait.png
│ ├── portraits
│ │ ├── 0.png
│ │ ├── 2.png
│ │ ├── 4.png
│ │ └── 6.png
│ └── voice_samples
│ │ ├── 0_001.wav
│ │ ├── 0_002.wav
│ │ ├── 0_003.wav
│ │ ├── 2_001.wav
│ │ ├── 2_002.wav
│ │ ├── 2_003.wav
│ │ ├── 4_001.wav
│ │ ├── 4_002.wav
│ │ ├── 4_003.wav
│ │ ├── 6_001.wav
│ │ ├── 6_002.wav
│ │ └── 6_003.wav
└── b1a81618-b27b-40d2-b0ea-27a9ad408c4b
│ ├── icons
│ └── 9.png
│ ├── metas.json
│ ├── policy.md
│ ├── portrait.png
│ └── voice_samples
│ ├── 9_001.wav
│ ├── 9_002.wav
│ └── 9_003.wav
├── test
├── __init__.py
├── e2e
│ ├── conftest.py
│ └── test_validate_version.py
├── presets-test-1.yaml
├── presets-test-2.yaml
├── presets-test-3.yaml
├── presets-test-4.yaml
├── setting-test-load-1.yaml
├── setting-test-load-2.yaml
├── setting-test-load-3.yaml
├── test_acoustic_feature_extractor.py
├── test_connect_base64_waves.py
├── test_core_version_utility.py
├── test_full_context_label.py
├── test_kana_parser.py
├── test_library_manager.py
├── test_mock_synthesis_engine.py
├── test_mora_list.py
├── test_mora_to_text.py
├── test_preset.py
├── test_setting.py
├── test_synthesis_engine.py
├── test_synthesis_engine_base.py
├── test_user_dict.py
├── test_user_dict_model.py
├── test_word_types.py
└── vvlib_manifest.json
├── ui_template
└── ui.html
└── voicevox_engine
├── __init__.py
├── acoustic_feature_extractor.py
├── bridge_config
├── BridgeConfig.py
├── BridgeConfigLoader.py
└── __init__.py
├── cancellable_engine.py
├── dev
├── core
│ ├── __init__.py
│ └── mock.py
└── synthesis_engine
│ ├── __init__.py
│ └── mock.py
├── engine_manifest
├── EngineManifest.py
├── EngineManifestLoader.py
└── __init__.py
├── full_context_label.py
├── kana_parser.py
├── library_manager.py
├── metas
├── Metas.py
├── MetasStore.py
└── __init__.py
├── model.py
├── mora_list.py
├── morphing.py
├── part_of_speech_data.py
├── preset
├── Preset.py
├── PresetError.py
├── PresetManager.py
└── __init__.py
├── setting
├── Setting.py
├── SettingLoader.py
└── __init__.py
├── synthesis_engine
├── __init__.py
├── core_wrapper.py
├── make_synthesis_engines.py
├── synthesis_engine.py
├── synthesis_engine_base.py
└── synthesis_engine_espnet.py
├── user_dict.py
└── utility
├── __init__.py
├── connect_base64_waves.py
├── core_version_utility.py
├── mutex_utility.py
├── path_utility.py
└── save_dir.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | *.png -text
3 | *.wav -text
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bugreport.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug Report
3 | about: 不具合の報告
4 | labels: バグ
5 | ---
6 |
7 | ## 不具合の内容
8 |
9 |
10 |
11 | ### 現象・ログ
12 |
13 |
14 |
15 | ### 再現手順
16 |
17 |
18 |
19 | ### 期待動作
20 |
21 |
22 |
23 | ## VOICEVOXのバージョン
24 |
25 | 0.?.0
26 |
27 |
28 |
29 | ## OSの種類/ディストリ/バージョン
30 |
31 |
32 |
33 | - [ ] Windows
34 | - [ ] macOS
35 | - [ ] Linux
36 |
37 |
44 |
45 | ## その他
46 |
47 |
48 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/featurerequest.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature Request
3 | about: 機能要望・改善提案
4 | labels: 機能向上
5 | ---
6 |
7 | ## 内容
8 |
9 |
10 |
11 |
12 | ### Pros 良くなる点
13 |
14 |
15 |
16 | ### Cons 悪くなる点
17 |
18 |
19 |
20 | ### 実現方法
21 |
22 |
23 |
24 | ## VOICEVOXのバージョン
25 |
26 | 0.?.0
27 |
28 |
29 |
30 | ## OSの種類/ディストリ/バージョン
31 |
32 |
33 |
34 | - [ ] Windows
35 | - [ ] macOS
36 | - [ ] Linux
37 |
38 |
45 |
46 | ## その他
47 |
48 |
49 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Question
3 | about: 質問 (既存のIssueや一般事例を良く調べてからしてください)
4 | labels: 要議論
5 | ---
6 |
7 | ## 質問の内容
8 |
9 |
10 |
11 | ## VOICEVOXのバージョン
12 |
13 | 0.?.0
14 |
15 |
16 |
17 | ## OSの種類/ディストリ/バージョン
18 |
19 |
20 |
21 | - [ ] Windows
22 | - [ ] macOS
23 | - [ ] Linux
24 |
25 |
32 |
33 | ## その他
34 |
35 |
36 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## 内容
2 |
3 |
6 |
7 | ## 関連 Issue
8 |
9 |
17 |
18 | ## スクリーンショット・動画など
19 |
20 |
23 |
24 | ## その他
25 |
--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
1 | 'OS:mac':
2 | - '\[x\] macOS'
3 | 'OS:linux':
4 | - '\[x\] Linux'
5 | 'OS:win':
6 | - '\[x\] Windows'
7 |
--------------------------------------------------------------------------------
/.github/workflows/coverage-comment.yml:
--------------------------------------------------------------------------------
1 | name: Coverage Report Comment
2 |
3 | on:
4 | workflow_run:
5 | workflows:
6 | - test
7 | types:
8 | - completed
9 | workflow_dispatch:
10 |
11 | defaults:
12 | run:
13 | shell: bash
14 |
15 | jobs:
16 | comment:
17 | runs-on: ubuntu-latest
18 | if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success'
19 | steps:
20 | - name: Download coverage report
21 | uses: actions/github-script@v5.0.0
22 | with:
23 | script: |
24 | const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
25 | owner: context.repo.owner,
26 | repo: context.repo.repo,
27 | run_id: ${{ github.event.workflow_run.id }},
28 | })
29 | const matchArtifact = artifacts.data.artifacts.filter((artifact) => {
30 | return artifact.name == 'report'
31 | })[0]
32 | const download = await github.rest.actions.downloadArtifact({
33 | owner: context.repo.owner,
34 | repo: context.repo.repo,
35 | artifact_id: matchArtifact.id,
36 | archive_format: 'zip',
37 | })
38 | const fs = require('fs')
39 | fs.writeFileSync('${{github.workspace}}/report.zip', Buffer.from(download.data))
40 |
41 | - name: Unzip report
42 | run: unzip report.zip
43 |
44 | - name: Comment coverage result to Pull Requests
45 | uses: actions/github-script@v5.0.0
46 | with:
47 | github-token: ${{ secrets.GITHUB_TOKEN }}
48 | script: |
49 | const fs = require('fs')
50 | const baseReport = fs.readFileSync('report.txt', 'utf8').toString().split('\n')
51 | let report = ''
52 | for (let i = 0; i < baseReport.length; i++) {
53 | const line = baseReport[i].split(' ').filter(v => v)
54 | if (i === 1 && line.length === 1) {
55 | report += "|:---|---:|---:|---:|\n"
56 | } else if (line.length === 1) {
57 | continue
58 | } else {
59 | if (i !== 0 && line.length === 4) {
60 | const parcent = Number(line[3].replace("%", ""))
61 | let color = 'green'
62 | if (parcent < 50) {
63 | color = 'red'
64 | } else if (parcent < 90) {
65 | color = 'orange'
66 | }
67 | line[3] = ``
68 | }
69 | report += "|" + line.join("|") + "|\n"
70 | }
71 | if (line[0] === 'TOTAL') break
72 | }
73 |
74 | const issue_number = Number(fs.readFileSync('pr_num.txt'))
75 | const body = `## Coverage Result\n\n\nResultを開く
\n\n${report}\n `
76 |
77 | let listComments = await github.rest.issues.listComments({
78 | issue_number,
79 | owner: context.repo.owner,
80 | repo: context.repo.repo,
81 | })
82 | listComments = listComments.data.filter((comment) => {
83 | return comment.body.includes('Coverage Result') && comment.user.login.includes('github-actions')
84 | })
85 |
86 | if (listComments.length === 0) {
87 | github.rest.issues.createComment({
88 | issue_number,
89 | owner: context.repo.owner,
90 | repo: context.repo.repo,
91 | body,
92 | })
93 | } else {
94 | github.rest.issues.updateComment({
95 | comment_id: listComments[0].id,
96 | owner: context.repo.owner,
97 | repo: context.repo.repo,
98 | body,
99 | })
100 | }
101 |
--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
1 | name: Issue Labeler
2 | on:
3 | issues:
4 | types: [opened]
5 | defaults:
6 | run:
7 | shell: bash
8 |
9 | jobs:
10 | triage:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: github/issue-labeler@v2.0
14 | with:
15 | repo-token: "${{ secrets.GITHUB_TOKEN }}"
16 | configuration-path: .github/labeler.yml
17 | enable-versioned-regex: 0
18 |
--------------------------------------------------------------------------------
/.github/workflows/release-test.yml:
--------------------------------------------------------------------------------
1 | name: Test Release Build
2 |
3 | on:
4 | workflow_call:
5 | inputs:
6 | version:
7 | type: string
8 | required: true
9 | repo_url:
10 | type: string
11 | required: false
12 | workflow_dispatch:
13 | inputs:
14 | version:
15 | type: string
16 | description: "テストしたいタグ名"
17 | required: true
18 | repo_url:
19 | type: string
20 | description: "リポジトリのURL(省略可能)"
21 | required: false
22 |
23 | env:
24 | REPO_URL:
25 | |- # repo_url指定時はrepo_urlを、それ以外はgithubのリポジトリURLを使用
26 | ${{ (github.event.inputs || inputs).repo_url || format('{0}/{1}', github.server_url, github.repository) }}
27 | VERSION: |- # version指定時はversionを、それ以外はタグ名を使用
28 | ${{ (github.event.inputs || inputs).version }}
29 |
30 | defaults:
31 | run:
32 | shell: bash
33 |
34 | jobs:
35 | test:
36 | strategy:
37 | fail-fast: false
38 | matrix:
39 | include:
40 | #- os: ubuntu-20.04
41 | # target: linux-cpu
42 | #- os: ubuntu-20.04
43 | # target: linux-nvidia
44 | #- os: macos-11
45 | # target: macos-x64
46 | - os: windows-2019
47 | target: windows-cpu
48 | #- os: windows-2019
49 | # target: windows-nvidia
50 | #- os: windows-2019
51 | # target: windows-directml
52 |
53 | runs-on: ${{ matrix.os }}
54 |
55 | steps:
56 | - name: declare variables
57 | id: vars
58 | run: |
59 | echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> $GITHUB_OUTPUT
60 | echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT
61 |
62 | - uses: actions/checkout@v2
63 |
64 | - uses: actions/setup-python@v2
65 | with:
66 | python-version: "3.11.3"
67 | cache: pip
68 |
69 | - name: Download
70 | run: |
71 | mkdir -p download
72 | curl -L -o "download/list.txt" "${{ steps.vars.outputs.release_url }}/${{ steps.vars.outputs.package_name }}.7z.txt"
73 | cat "download/list.txt" | xargs -I '%' curl -L -o "download/%" "${{ steps.vars.outputs.release_url }}/%"
74 | 7z x "download/$(head -n1 download/list.txt)"
75 | mv ${{ matrix.target }} dist/
76 |
77 | - name: chmod +x
78 | if: startsWith(matrix.target, 'linux') || startsWith(matrix.target, 'macos')
79 | run: chmod +x dist/run
80 |
81 | - name: Install requirements
82 | run: |
83 | pip install -r requirements-test.txt
84 |
85 | - name: Test
86 | run: python build_util/check_release_build.py --dist_dir dist/
87 |
--------------------------------------------------------------------------------
/.github/workflows/release_latest_dev.yml:
--------------------------------------------------------------------------------
1 | name: Release latest dev build
2 |
3 | # masterブランチが更新されるたびに開発版をビルドしてデプロイする。
4 | # バージョン(タグ)は最新リリースのバージョンを`X.Y.Z`としたときの`X.Y+1.0-dev`。
5 |
6 | on:
7 | push:
8 | branches:
9 | - master
10 | paths-ignore:
11 | - "docs/**"
12 | - "test/**"
13 |
14 | jobs:
15 | latest-dev-build:
16 | runs-on: ubuntu-latest
17 | if: github.repository_owner == 'VOICEVOX'
18 | steps:
19 | - name: Trigger workflow_dispatch
20 | uses: actions/github-script@v6
21 | with:
22 | github-token: ${{ secrets.GITHUB_TOKEN }}
23 | script: |
24 | const latest_release = await github.rest.repos.getLatestRelease({
25 | owner: context.repo.owner,
26 | repo: context.repo.repo
27 | });
28 | const split_version = latest_release.data.tag_name.split('.');
29 | const dev_version = `${split_version[0]}.${parseInt(split_version[1]) + 1}.0-dev`;
30 | github.rest.actions.createWorkflowDispatch({
31 | owner: context.repo.owner,
32 | repo: context.repo.repo,
33 | workflow_id: 'build.yml',
34 | ref: 'master',
35 | inputs: {
36 | version: dev_version,
37 | prerelease: true
38 | }
39 | })
40 | console.log(`Triggered workflow_dispatch for ${dev_version}`);
41 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: test
2 |
3 | on:
4 | push:
5 | pull_request:
6 | branches:
7 | - "**"
8 | workflow_dispatch:
9 |
10 | defaults:
11 | run:
12 | shell: bash
13 |
14 | jobs:
15 | test:
16 | runs-on: ${{ matrix.os }}
17 | strategy:
18 | matrix:
19 | os: [ubuntu-20.04, windows-latest] # [ubuntu-20.04, macos-latest, windows-latest]
20 | python: ["3.11.3"]
21 |
22 | steps:
23 | - uses: actions/checkout@v3
24 |
25 | - name: Set up Python ${{ matrix.python }}
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: ${{ matrix.python }}
29 | cache: pip
30 |
31 | - name: Install dependencies
32 | run: |
33 | python -m pip install --upgrade pip setuptools wheel
34 | python -m pip install -r requirements-test.txt
35 |
36 | - run: pysen run lint
37 |
38 | - name: Run pytest and get coverage
39 | run: |
40 | coverage run --omit=test/* -m pytest
41 |
42 | - name: Submit coverage to Coveralls
43 | if: matrix.os == 'ubuntu-20.04'
44 | run: coveralls --service=github
45 | env:
46 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
47 |
48 | - name: Create coverage result
49 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04'
50 | run: |
51 | mkdir report
52 | coverage report > report/report.txt
53 | echo ${{ github.event.number }} > report/pr_num.txt
54 |
55 | - name: Upload coverage result
56 | if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04'
57 | uses: actions/upload-artifact@v3
58 | with:
59 | name: report
60 | path: report/
61 |
62 | - name: Check licenses
63 | shell: bash
64 | run: |
65 | OUTPUT_LICENSE_JSON_PATH=/dev/null \
66 | bash build_util/create_venv_and_generate_licenses.bash
67 |
--------------------------------------------------------------------------------
/.github/workflows/typos.yml:
--------------------------------------------------------------------------------
1 | name: Check typos
2 |
3 | on:
4 | push:
5 | pull_request:
6 | branches:
7 | - "**"
8 | workflow_dispatch:
9 |
10 | defaults:
11 | run:
12 | shell: bash
13 |
14 | jobs:
15 | typos:
16 | runs-on: ubuntu-latest
17 |
18 | steps:
19 | - uses: actions/checkout@v3
20 |
21 | - name: typos-action
22 | uses: crate-ci/typos@v1.12.12
23 |
--------------------------------------------------------------------------------
/.github/workflows/upload-gh-pages.yml:
--------------------------------------------------------------------------------
1 | name: upload-docs
2 |
3 | on:
4 | push:
5 | branches:
6 | - "master"
7 |
8 | env:
9 | PYTHON_VERSION: "3.11.3"
10 | PUBLISH_DIR: "./docs/api"
11 | PUBLISH_BRANCH: "gh-pages"
12 | DESTINATION_DIR: "api"
13 |
14 | defaults:
15 | run:
16 | shell: bash
17 |
18 | jobs:
19 | upload-doc:
20 | runs-on: ubuntu-20.04
21 | steps:
22 | - uses: actions/checkout@v2
23 |
24 | - name: Setup Python
25 | id: setup-python
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: ${{ env.PYTHON_VERSION }}
29 | cache: pip
30 |
31 | - name: Install Python dependencies
32 | run: |
33 | pip install -r requirements.txt
34 |
35 | - name: Make documents
36 | run: |
37 | python make_docs.py
38 |
39 | - name: Deploy to GitHub Pages
40 | uses: peaceiris/actions-gh-pages@v3
41 | with:
42 | github_token: ${{ secrets.GITHUB_TOKEN }}
43 | publish_dir: ${{ env.PUBLISH_DIR }}
44 | publish_branch: ${{ env.PUBLISH_BRANCH }}
45 | destination_dir: ${{ env.DESTINATION_DIR }}
46 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # VOICEVOX specifics
2 | ## Artifacts of nuitka
3 | *.dist
4 | *.build
5 | /build
6 | /cache
7 | ## Artifact of generating licenses
8 | /licenses.json
9 | licenses_venv/
10 |
11 | # Copied from `https://github.com/github/gitignore/blob/main/Python.gitignore` @2022-01-10
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 |
17 | # C extensions
18 | *.so
19 |
20 | # Distribution / packaging
21 | .Python
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | wheels/
34 | share/python-wheels/
35 | *.egg-info/
36 | .installed.cfg
37 | *.egg
38 | MANIFEST
39 |
40 | # PyInstaller
41 | # Usually these files are written by a python script from a template
42 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
43 | *.manifest
44 | *.spec
45 |
46 | # Installer logs
47 | pip-log.txt
48 | pip-delete-this-directory.txt
49 |
50 | # Unit test / coverage reports
51 | htmlcov/
52 | .tox/
53 | .nox/
54 | .coverage
55 | .coverage.*
56 | .cache
57 | nosetests.xml
58 | coverage.xml
59 | *.cover
60 | *.py,cover
61 | .hypothesis/
62 | .pytest_cache/
63 | cover/
64 |
65 | # Translations
66 | *.mo
67 | *.pot
68 |
69 | # Django stuff:
70 | *.log
71 | local_settings.py
72 | db.sqlite3
73 | db.sqlite3-journal
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | .pybuilder/
87 | target/
88 |
89 | # Jupyter Notebook
90 | .ipynb_checkpoints
91 |
92 | # IPython
93 | profile_default/
94 | ipython_config.py
95 |
96 | # pyenv
97 | # For a library or package, you might want to ignore these files since the code is
98 | # intended to run in multiple environments; otherwise, check them in:
99 | .python-version
100 |
101 | # pipenv
102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
105 | # install all needed dependencies.
106 | Pipfile.lock
107 |
108 | # poetry
109 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
110 | # This is especially recommended for binary packages to ensure reproducibility, and is more
111 | # commonly ignored for libraries.
112 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
113 | # poetry.lock
114 |
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
116 | __pypackages__/
117 |
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 |
122 | # SageMath parsed files
123 | *.sage.py
124 |
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 |
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 |
138 | # Rope project settings
139 | .ropeproject
140 |
141 | # mkdocs documentation
142 | /site
143 |
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 |
149 | # Pyre type checker
150 | .pyre/
151 |
152 | # pytype static type analyzer
153 | .pytype/
154 |
155 | # Cython debug symbols
156 | cython_debug/
157 |
158 | # PyCharm
159 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | # and can be added to the global gitignore or merged into this file. For a more nuclear
162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | .idea/
164 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | repos:
4 | - repo: local
5 | hooks:
6 | - id: pysen-lint
7 | name: pysen-lint
8 | entry: pysen run lint
9 | language: python
10 | types: [file, python]
11 | stages: [push]
12 | pass_filenames: false
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Bridge Plugin
2 |
3 | Copyright (c) 2021 Hiroshiba
4 | Copyright (c) 2021 VOICEVOX
5 | Copyright (c) 2022 VOICEVOX-Bridge
6 |
7 |
8 | 本ソフトウェアのリポジトリにPull Requestを送る目的に限り、本ソフトウェアのソースコードの使用、複製、配布等を行うことを許可します。
9 |
10 |
11 | 商用・非商用を問わず、バイナリ形式の本ソフトウェアの利用及び再配布を許可します。
12 | ただし、以下を条件とします。
13 |
14 | - 本ソフトウェアによって読み込まれる音声合成モデルは、本ソフトウェアを通じた音声合成化を許諾している提供者の音声を元に作られている必要があります。
15 |
16 | - 再配布を行う場合、本ソフトウェアのファイルに関しては、再配布後も本ライセンスを適用する必要があります。
17 |
18 |
19 | 本ソフトウェアは「現状のままで」で提供され、明示的、暗黙的かどうかに拘らずあらゆる保証はないものとします。ここで言う保証は、市販性、特定用途への適合性、権利の侵害がないこと等を含みますが、これらに限定されません。
20 | 製作者は、契約行為、不法行為、またはそれ以外であろうと、ソフトウェアに起因または関連し、あるいはソフトウェアの使用またはその他の扱いによって生じる一切の請求、損害、その他の義務について何らの責任も負わないものとします。
--------------------------------------------------------------------------------
/_typos.toml:
--------------------------------------------------------------------------------
1 | # Files for typos
2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started
3 |
4 | [default.extend-identifiers]
5 |
6 | [default.extend-words]
7 | ba="ba" # 7zコマンドの-baオプション
8 | datas="datas" # PyInstallerの引数
9 |
10 | [files]
11 | extend-exclude = ["package-lock.json", "src/store/project.ts", "*.svg"]
12 |
--------------------------------------------------------------------------------
/build_util/check_release_build.py:
--------------------------------------------------------------------------------
1 | """
2 | ビルド結果をテストする
3 | """
4 | import argparse
5 | import json
6 | import time
7 | from io import BytesIO
8 | from pathlib import Path
9 | from subprocess import Popen
10 | from urllib.parse import urlencode
11 | from urllib.request import Request, urlopen
12 |
13 | import soundfile
14 |
15 | base_url = "http://127.0.0.1:50021/"
16 |
17 |
18 | def test_release_build(dist_dir: Path) -> None:
19 | run_file = dist_dir / "run"
20 | if not run_file.exists():
21 | run_file = dist_dir / "run.exe"
22 |
23 | # 起動
24 | process = Popen([run_file.absolute()], cwd=dist_dir)
25 | time.sleep(120) # 待機
26 |
27 | # バージョン取得テスト
28 | req = Request(base_url + "version")
29 | with urlopen(req) as res:
30 | assert len(res.read()) > 0
31 |
32 | # テキスト -> クエリ
33 | text = "こんにちは、音声合成の世界へようこそ"
34 | req = Request(
35 | base_url + "audio_query?" + urlencode({"style_id": "1", "text": text}),
36 | method="POST",
37 | )
38 | with urlopen(req) as res:
39 | query = json.loads(res.read().decode("utf-8"))
40 |
41 | # クエリ -> 音声
42 | req = Request(base_url + "synthesis?style_id=1", method="POST")
43 | req.add_header("Content-Type", "application/json")
44 | req.data = json.dumps(query).encode("utf-8")
45 | with urlopen(req) as res:
46 | wave = res.read()
47 | soundfile.read(BytesIO(wave))
48 |
49 | # エンジンマニフェスト
50 | req = Request(base_url + "engine_manifest", method="GET")
51 | with urlopen(req) as res:
52 | manifest = json.loads(res.read().decode("utf-8"))
53 | assert "uuid" in manifest
54 |
55 | # プロセスが稼働中であることを確認
56 | assert process.poll() is None
57 |
58 | # 停止
59 | process.terminate()
60 |
61 |
62 | if __name__ == "__main__":
63 | parser = argparse.ArgumentParser()
64 | parser.add_argument("--dist_dir", type=Path, default=Path("dist/"))
65 | args = parser.parse_args()
66 | test_release_build(dist_dir=args.dist_dir)
67 |
--------------------------------------------------------------------------------
/build_util/codesign.bash:
--------------------------------------------------------------------------------
1 | # !!! コードサイニング証明書を取り扱うので取り扱い注意 !!!
2 |
3 | set -eu
4 |
5 | if [ ! -v CERT_BASE64 ]; then
6 | echo "CERT_BASE64が未定義です"
7 | exit 1
8 | fi
9 | if [ ! -v CERT_PASSWORD ]; then
10 | echo "CERT_PASSWORDが未定義です"
11 | exit 1
12 | fi
13 |
14 | if [ $# -ne 1 ]; then
15 | echo "引数の数が一致しません"
16 | exit 1
17 | fi
18 | target_file_glob="$1"
19 |
20 | # 証明書
21 | CERT_PATH=cert.pfx
22 | echo -n "$CERT_BASE64" | base64 -d - > $CERT_PATH
23 |
24 | # 指定ファイルに署名する
25 | function codesign() {
26 | TARGET="$1"
27 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1)
28 | powershell "& '$SIGNTOOL' sign /fd SHA256 /td SHA256 /tr http://timestamp.digicert.com /f $CERT_PATH /p $CERT_PASSWORD '$TARGET'"
29 | }
30 |
31 | # 指定ファイルが署名されているか
32 | function is_signed() {
33 | TARGET="$1"
34 | SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1)
35 | powershell "& '$SIGNTOOL' verify /pa '$TARGET'" || return 1
36 | }
37 |
38 | # 署名されていなければ署名
39 | ls $target_file_glob | while read target_file; do
40 | if is_signed "$target_file"; then
41 | echo "署名済み: $target_file"
42 | else
43 | echo "署名: $target_file"
44 | codesign "$target_file"
45 | fi
46 | done
47 |
48 | # 証明書を消去
49 | rm $CERT_PATH
50 |
--------------------------------------------------------------------------------
/build_util/create_venv_and_generate_licenses.bash:
--------------------------------------------------------------------------------
1 | # 仮想環境を作ってrequirements.txtをインストールし、ライセンス一覧を生成する
2 |
3 | set -eux
4 |
5 | if [ ! -v OUTPUT_LICENSE_JSON_PATH ]; then
6 | echo "OUTPUT_LICENSE_JSON_PATHが未定義です"
7 | exit 1
8 | fi
9 |
10 | VENV_PATH="licenses_venv"
11 |
12 | python -m venv $VENV_PATH
13 | if [ -d "$VENV_PATH/Scripts" ]; then
14 | source $VENV_PATH/Scripts/activate
15 | else
16 | source $VENV_PATH/bin/activate
17 | fi
18 |
19 | pip install -r requirements-license.txt
20 | python generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH
21 |
22 | deactivate
23 |
24 | rm -rf $VENV_PATH
25 |
--------------------------------------------------------------------------------
/build_util/macos/build_util_macos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/build_util/macos/build_util_macos/__init__.py
--------------------------------------------------------------------------------
/build_util/macos/build_util_macos/shlib_tools.py:
--------------------------------------------------------------------------------
1 | """
2 | macOSにおいて共有ライブラリを操作するためのツールをまとめたモジュール
3 | """
4 |
5 | import subprocess
6 | from pathlib import Path
7 | from typing import List
8 |
9 |
10 | def get_dylib_paths(base_path: Path) -> List[Path]:
11 | """base_path以下の全てのサブディレクトリにあるdylibファイルのリストを返す"""
12 | return list(base_path.glob("**/*.dylib"))
13 |
14 |
15 | def get_rpaths(shared_lib_path: Path) -> List[Path]:
16 | """引数で指定された共有ライブラリのrpathのリストを返す"""
17 | proc = subprocess.run(["otool", "-L", str(shared_lib_path)], stdout=subprocess.PIPE)
18 | output = proc.stdout.decode("utf-8")
19 | paths = [
20 | Path(line.lstrip().split(" ", maxsplit=1)[0])
21 | for line in output.splitlines()[1:]
22 | ]
23 | # 得られたパスのリストのうち、共有ライブラリ自体とライブラリ名が同じものは
24 | # rpath ではなく install ID というものなので除外
25 | return [
26 | path
27 | for path in paths
28 | if path.name.split(".")[0] != shared_lib_path.name.split(".")[0]
29 | ]
30 |
31 |
32 | def is_distributable_rpath(rpath: Path) -> bool:
33 | """開発環境にインストールされたパッケージに依存しないrpathかどうか"""
34 | # 以下のプレフィックスで始まるrpathは配布に際して問題がない
35 | # - プレースホルダ。実行時に自動で解決される
36 | # - @executable_path/
37 | # - @loader_path/
38 | # - @rpath/
39 | # - システム標準のライブラリがあるディレクトリ
40 | # - /usr/lib/
41 | # - /System/Library/Frameworks/
42 | # - /System/Library/PrivateFrameworks/
43 | DISTRIBUTABLE_PREFIXES = [
44 | "@executable_path/",
45 | "@loader_path/",
46 | "@rpath/",
47 | "/usr/lib/",
48 | "/System/Library/Frameworks/",
49 | "/System/Library/PrivateFrameworks/",
50 | ]
51 | result = False
52 |
53 | for prefix in DISTRIBUTABLE_PREFIXES:
54 | if str(rpath).startswith(prefix):
55 | result = True
56 | break
57 | else:
58 | continue
59 |
60 | return result
61 |
62 |
63 | def change_rpath(old_rpath: Path, new_rpath: Path, dylib_path: Path, base_path: Path):
64 | """dylib_pathで指定されたdylibのrpathを、old_rpathから、new_rpath(base_pathからの相対パスに変換したもの)に変更する"""
65 | relative_new_rpath = new_rpath.relative_to(base_path)
66 | subprocess.run(
67 | [
68 | "install_name_tool",
69 | "-change",
70 | old_rpath,
71 | "@rpath/" + str(relative_new_rpath),
72 | dylib_path,
73 | ]
74 | )
75 |
76 |
77 | class SharedLib:
78 | """共有ライブラリの情報"""
79 |
80 | __path: Path
81 | __rpaths: List[Path]
82 |
83 | def __init__(self, shared_lib_path: Path):
84 | self.__path = shared_lib_path
85 | self.__rpaths = get_rpaths(shared_lib_path)
86 |
87 | @property
88 | def path(self) -> Path:
89 | return self.__path
90 |
91 | def get_non_distributable_rpaths(self) -> List[Path]:
92 | """rpathのうち、開発環境に依存しているもののリスト"""
93 | return [rpath for rpath in self.__rpaths if not is_distributable_rpath(rpath)]
94 |
--------------------------------------------------------------------------------
/build_util/macos/copy_missing_dylibs.py:
--------------------------------------------------------------------------------
1 | """
2 | 配布物内の.dylibファイルの不足を解消するためのスクリプト
3 | 引数で指定したbase_directory以下にある.dylibファイルのrpathをチェックし、
4 | rpathの指す.dylibファイルがbase_directory以下に存在しなかった場合、
5 | rpathの指している場所からその.dylibファイルをbase_directory直下へとコピーする。
6 | """
7 |
8 | import argparse
9 | import shutil
10 | import sys
11 | from pathlib import Path
12 | from typing import List, Set
13 |
14 | from build_util_macos.shlib_tools import SharedLib, get_dylib_paths
15 |
16 | parser = argparse.ArgumentParser()
17 | parser.add_argument(
18 | "base_directory", help="copy the missing dylibs under base_directory", type=str
19 | )
20 | args = parser.parse_args()
21 | base_dir_path = Path(args.base_directory)
22 |
23 | if not (base_dir_path.exists() and base_dir_path.is_dir()):
24 | print("could not find the directory:", str(base_dir_path), file=sys.stderr)
25 | exit(1)
26 |
27 | # base_dir_path以下の全てのサブディレクトリを探索して得たdylibのリスト
28 | dylib_paths: List[Path] = get_dylib_paths(base_dir_path)
29 | # 全てのdylibのファイル名のリスト
30 | dylib_names: List[str] = [path.name for path in dylib_paths]
31 |
32 | # 開発環境に依存したrpathを持つdylibのリスト
33 | non_distributable_dylibs: List[SharedLib] = []
34 | for dylib_path in dylib_paths:
35 | lib = SharedLib(dylib_path)
36 | if lib.get_non_distributable_rpaths():
37 | non_distributable_dylibs.append(lib)
38 |
39 | # 開発環境に依存したrpathの集合
40 | non_distributable_rpaths: Set[Path] = set()
41 | for dylib in non_distributable_dylibs:
42 | rpaths: Set[Path] = set([rpath for rpath in dylib.get_non_distributable_rpaths()])
43 | non_distributable_rpaths = non_distributable_rpaths.union(rpaths)
44 |
45 | # rpathが指しているdylibのうち、base_dir_path以下に存在しないもののリスト
46 | external_dylib_paths: List[Path] = []
47 | for rpath in non_distributable_rpaths:
48 | if not (rpath.name in dylib_names):
49 | external_dylib_paths.append(rpath)
50 |
51 | # 不足しているdylibをbase_dir_path直下にコピー
52 | for dylib_path in external_dylib_paths:
53 | shutil.copy(dylib_path, base_dir_path, follow_symlinks=True)
54 |
--------------------------------------------------------------------------------
/build_util/macos/fix_rpaths.py:
--------------------------------------------------------------------------------
1 | """
2 | 配布物内の.dylibファイルのrpathをどのようなユーザー環境においても有効になるように修正するスクリプト
3 | 引数で指定したbase_directory以下にある.dylibファイルのrpathをチェックし、
4 | 開発環境に依存した(配布先の環境に存在することが保証されていない)rpathであった場合、
5 | base_directory以下の.dylibファイルを相対パスで指すように変更する。
6 | (base_directory以下の.dylibファイルに不足がないことを前提とする。)
7 | """
8 |
9 | import argparse
10 | import sys
11 | from pathlib import Path
12 | from typing import List, Set
13 |
14 | from build_util_macos.shlib_tools import SharedLib, change_rpath, get_dylib_paths
15 |
16 | parser = argparse.ArgumentParser()
17 | parser.add_argument(
18 | "base_directory", help="fix the rpaths of the dylibs under base_directory", type=str
19 | )
20 | args = parser.parse_args()
21 | base_dir_path = Path(args.base_directory)
22 |
23 | if not (base_dir_path.exists() and base_dir_path.is_dir()):
24 | print("could not find the directory:", str(base_dir_path), file=sys.stderr)
25 | exit(1)
26 |
27 | # base_dir_path以下の全てのサブディレクトリを探索して得たdylibのリスト
28 | internal_dylib_paths: List[Path] = get_dylib_paths(base_dir_path)
29 | # 全てのdylibのファイル名のリスト
30 | internal_dylib_names: List[str] = [path.name for path in internal_dylib_paths]
31 |
32 | # 開発環境に依存したrpathを持つdylibのリスト
33 | non_distributable_dylibs: List[SharedLib] = []
34 | for internal_dylib_path in internal_dylib_paths:
35 | lib = SharedLib(internal_dylib_path)
36 | if lib.get_non_distributable_rpaths():
37 | non_distributable_dylibs.append(lib)
38 |
39 | # 開発環境に依存したrpathの集合
40 | non_distributable_rpaths: Set[Path] = set()
41 | for dylib in non_distributable_dylibs:
42 | rpaths: Set[Path] = set([rpath for rpath in dylib.get_non_distributable_rpaths()])
43 | non_distributable_rpaths = non_distributable_rpaths.union(rpaths)
44 |
45 | # rpathが指しているdylibのうち、base_dir_path以下に存在しないもののリスト
46 | external_dylib_paths: List[Path] = []
47 | for rpath in non_distributable_rpaths:
48 | if not (rpath.name in internal_dylib_names):
49 | external_dylib_paths.append(rpath)
50 |
51 | # base_dir_path以下でdylibが不足している場合は、不足しているdylibを表示して終了
52 | if external_dylib_paths:
53 | print(
54 | f"following dylibs not found under base_dir_path ({str(base_dir_path)}):",
55 | file=sys.stderr,
56 | )
57 | for path in external_dylib_paths:
58 | print(f"\t{path.name}", file=sys.stderr)
59 | exit(1)
60 |
61 | # 開発環境に依存したrpathを、base_dir_path以下のdylibを指すように変更
62 | for dylib in non_distributable_dylibs:
63 | for rpath in dylib.get_non_distributable_rpaths():
64 | for internal_dylib_path in internal_dylib_paths:
65 | if internal_dylib_path.name == rpath.name:
66 | change_rpath(rpath, internal_dylib_path, dylib.path, base_dir_path)
67 |
--------------------------------------------------------------------------------
/build_util/merge_update_infos.py:
--------------------------------------------------------------------------------
1 | """
2 | 更新履歴をマージする。
3 | """
4 |
5 | import argparse
6 | import json
7 | from collections import OrderedDict
8 | from pathlib import Path
9 | from typing import Dict, List, Union
10 |
11 |
12 | def merge_json_string(src: str, dst: str) -> str:
13 | """
14 | バージョンが同じ場合は要素を結合する
15 | >>> src = '[{"version": "0.0.1", "a": ["a1"], "b": ["b1", "b2"]}]'
16 | >>> dst = '[{"version": "0.0.1", "a": ["a2"], "b": ["b1", "b3"]}]'
17 | >>> merge_json_string(src, dst)
18 | '[{"version": "0.0.1", "a": ["a1", "a2"], "b": ["b1", "b2", "b3"]}]'
19 |
20 | バージョンが無かった場合は無視される
21 | >>> src = '[{"version": "1"}]'
22 | >>> dst = '[{"version": "1"}, {"version": "2"}]'
23 | >>> merge_json_string(src, dst)
24 | '[{"version": "1"}]'
25 | """
26 | src_json: List[Dict[str, Union[str, List[str]]]] = json.loads(src)
27 | dst_json: List[Dict[str, Union[str, List[str]]]] = json.loads(dst)
28 |
29 | for src_item in src_json:
30 | for dst_item in dst_json:
31 | if src_item["version"] == dst_item["version"]:
32 | for key in src_item:
33 | if key == "version":
34 | continue
35 |
36 | # 異なるものがあった場合だけ後ろに付け足す
37 | src_item[key] = list(
38 | OrderedDict.fromkeys(src_item[key] + dst_item[key])
39 | )
40 |
41 | return json.dumps(src_json)
42 |
43 |
44 | def merge_update_infos(src_path: Path, dst_path: Path, output_path: Path) -> None:
45 | src = src_path.read_text(encoding="utf-8")
46 | dst = dst_path.read_text(encoding="utf-8")
47 | merged = merge_json_string(src, dst)
48 | output_path.write_text(merged)
49 |
50 |
51 | if __name__ == "__main__":
52 | parser = argparse.ArgumentParser()
53 | parser.add_argument("src_path", type=Path)
54 | parser.add_argument("dst_path", type=Path)
55 | parser.add_argument("output_path", type=Path)
56 | args = parser.parse_args()
57 | merge_update_infos(args.src_path, args.dst_path, args.output_path)
58 |
--------------------------------------------------------------------------------
/build_util/process_voicevox_resource.bash:
--------------------------------------------------------------------------------
1 | set -eux
2 |
3 | if [ ! -v DOWNLOAD_RESOURCE_PATH ]; then
4 | echo "DOWNLOAD_RESOURCE_PATHが未定義です"
5 | exit 1
6 | fi
7 |
8 | rm -r speaker_info
9 | cp -r $DOWNLOAD_RESOURCE_PATH/character_info speaker_info
10 |
11 | python $DOWNLOAD_RESOURCE_PATH/scripts/clean_character_info.py \
12 | --character_info_dir speaker_info/
13 |
14 | # マニフェスト
15 | jq -s '.[0] * .[1]' engine_manifest.json $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest.json \
16 | > engine_manifest.json.tmp
17 | mv engine_manifest.json.tmp engine_manifest.json
18 |
19 | python build_util/merge_update_infos.py \
20 | engine_manifest_assets/update_infos.json \
21 | $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/update_infos.json \
22 | engine_manifest_assets/update_infos.json
23 |
24 | for f in $(ls $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/* | grep -v update_infos.json); do
25 | cp $f ./engine_manifest_assets/
26 | done
27 |
--------------------------------------------------------------------------------
/default.csv:
--------------------------------------------------------------------------------
1 | 朱司,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アカシ,アカシ,1/3,C1
2 | 青山,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,アオヤマ,アオヤマ,2/4,C1
3 | 雨晴,1350,1350,7000,名詞,固有名詞,人名,姓,*,*,*,アメハレ,アメハレ,2/4,C1
4 | アル,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,アル,アル,1/2,C1
5 | うさぎ,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ウサギ,ウサギ,0/3,C1
6 | 櫻歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オウカ,オーカ,1/3,C1
7 | 音街,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オトマチ,オトマチ,2/4,C1
8 | 春日部,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,カスカベ,カスカベ,0/4,C1
9 | 麒ヶ島,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キガシマ,キガシマ,2/4,C1
10 | 紲星,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キズナ,キズナ,1/3,C1
11 | 九州,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,キュウシュウ,キュウシュウ,1/4,C1
12 | キョウコ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,キョオコ,キョオコ,1/3,C1
13 | 玄野,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,クロノ,クロノ,1/3,C1
14 | 剣崎,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,ケンザキ,ケンザキ,1/4,C1
15 | 後鬼,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ゴキ,ゴキ,1/2,C1
16 | 虎太郎,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,コタロウ,コタロー,4/4,C1
17 | 琴葉,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,コトノハ,コトノハ,0/4,C1
18 | 小夜,1351,1351,2200,名詞,固有名詞,人名,名,*,*,*,サヨ,サヨ,1/2,C1
19 | 四国,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,シコク,シコク,1/3,C1
20 | 白上,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,シラカミ,シラカミ,4/4,C1
21 | ずんだもん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ズンダモン,ズンダモン,1/5,C1
22 | そら,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ソラ,ソラ,1/2,C1
23 | 宗麟,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ソウリン,ソウリン,1/4,C1
24 | タイプT,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タイプティー,タイプティー,4/5,C1
25 | 中国,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,チュウゴク,チュウゴク,1/4,C1
26 | 波音,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナミネ,ナミネ,0/3,C1
27 | 武宏,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タケヒロ,タケヒロ,2/4,C1
28 | ちび式じい,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,チビシキジー,チビシキジー,5/6,C1
29 | 月読,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ツクヨミ,ツクヨミ,0/4,C1
30 | つむぎ,1351,1351,7450,名詞,固有名詞,人名,名,*,*,*,ツムギ,ツムギ,0/3,C1
31 | ナースロボ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナースロボ,ナースロボ,4/5,C1
32 | ナナ,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,ナナ,ナナ,1/2,C1
33 | No.7,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ナンバーセブン,ナンバーセブン,5/7,C1
34 | 猫使,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,ネコツカ,ネコツカ,2/4,C1
35 | はう,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,ハウ,ハウ,1/2,C1
36 | 春歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルカ,ハルカ,1/3,C1
37 | 桜乃,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルノ,ハルノ,1/3,C1
38 | ビィ,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ビー,ビー,1/2,C1
39 | ひまり,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ヒマリ,ヒマリ,0/3,C1
40 | 紅桜,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ベニザクラ,ベニザクラ,3/5,C1
41 | 聖騎士,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,ホーリーナイト,ホーリーナイト,5/7,C1
42 | WhiteCUL,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ホワイトカル,ホワイトカル,5/6,C1
43 | ミコ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,ミコ,ミコ,1/2,C1
44 | 水奈瀬,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ミナセ,ミナセ,2/3,C1
45 | 冥鳴,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,メイメイ,メイメイ,1/4,C1
46 | 鳴花,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,メイカ,メイカ,1/3,C1
47 | めたん,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,メタン,メタン,1/3,C1
48 | 雌雄,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,メスオ,メスオ,0/3,C1
49 | もち子さん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,モチコサン,モチコサン,1/5,C1
50 | モチノ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,モチノ,モチノ,0/3,C1
51 | 結月,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ユヅキ,ユヅキ,1/3,C1
52 | 弓鶴,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ユヅル,ユヅル,0/3,C1
53 | リツ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,リツ,リツ,1/2,C1
54 | 六花,1351,1351,4900,名詞,固有名詞,人名,名,*,*,*,リッカ,リッカ,1/3,C1
55 | 龍星,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,リュウセイ,リュウセイ,1/4,C1
56 | 雀松,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ワカマツ,ワカマツ,2/4,C1
57 | COEIROINK,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1
58 | coeiroink,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1
59 | CoeFont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1
60 | coefont,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1
61 | Nemo,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ネモ,ネモ,1/2,C1
62 | nemo,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ネモ,ネモ,1/2,C1
63 | TALQu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1
64 | talqu,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1
65 | VOICEVOX,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1
66 | voicevox,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1
67 |
--------------------------------------------------------------------------------
/default_setting.yml:
--------------------------------------------------------------------------------
1 | allow_origin: null
2 | cors_policy_mode: localapps
3 |
--------------------------------------------------------------------------------
/docs/VOICEVOX音声合成エンジンとの連携.md:
--------------------------------------------------------------------------------
1 | メモ書き程度ですが、どういう方針で開発を進めているかを紹介します。
2 |
3 | - バージョンが上がっても、`/audio_query`で返ってくる値をそのまま`/synthesis`に POST すれば音声合成できるようにする予定です
4 | - `AudioQuery`のパラメータは増えますが、なるべくデフォルト値で以前と変わらない音声が生成されるようにします
5 | - バージョン 0.7 から音声スタイルが実装されました。スタイルの情報は`/speakers`から取得できます
6 |
--------------------------------------------------------------------------------
/docs/api/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/docs/api/.gitkeep
--------------------------------------------------------------------------------
/docs/licenses/nccl/License.txt:
--------------------------------------------------------------------------------
1 |
2 | Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions
6 | are met:
7 | * Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 | * Redistributions in binary form must reproduce the above copyright
10 | notice, this list of conditions and the following disclaimer in the
11 | documentation and/or other materials provided with the distribution.
12 | * Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National
13 | Laboratory, the U.S. Department of Energy, nor the names of their
14 | contributors may be used to endorse or promote products derived
15 | from this software without specific prior written permission.
16 |
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
18 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
25 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | The U.S. Department of Energy funded the development of this software
30 | under subcontract 7078610 with Lawrence Berkeley National Laboratory.
31 |
32 |
--------------------------------------------------------------------------------
/docs/licenses/open_jtalk/COPYING:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The Japanese TTS System "Open JTalk" */
3 | /* developed by HTS Working Group */
4 | /* http://open-jtalk.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* All rights reserved. */
11 | /* */
12 | /* Redistribution and use in source and binary forms, with or */
13 | /* without modification, are permitted provided that the following */
14 | /* conditions are met: */
15 | /* */
16 | /* - Redistributions of source code must retain the above copyright */
17 | /* notice, this list of conditions and the following disclaimer. */
18 | /* - Redistributions in binary form must reproduce the above */
19 | /* copyright notice, this list of conditions and the following */
20 | /* disclaimer in the documentation and/or other materials provided */
21 | /* with the distribution. */
22 | /* - Neither the name of the HTS working group nor the names of its */
23 | /* contributors may be used to endorse or promote products derived */
24 | /* from this software without specific prior written permission. */
25 | /* */
26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
38 | /* POSSIBILITY OF SUCH DAMAGE. */
39 | /* ----------------------------------------------------------------- */
40 |
--------------------------------------------------------------------------------
/docs/licenses/open_jtalk/mecab-naist-jdic/COPYING:
--------------------------------------------------------------------------------
1 | Copyright (c) 2009, Nara Institute of Science and Technology, Japan.
2 |
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are
7 | met:
8 |
9 | Redistributions of source code must retain the above copyright notice,
10 | this list of conditions and the following disclaimer.
11 | Redistributions in binary form must reproduce the above copyright
12 | notice, this list of conditions and the following disclaimer in the
13 | documentation and/or other materials provided with the distribution.
14 | Neither the name of the Nara Institute of Science and Technology
15 | (NAIST) nor the names of its contributors may be used to endorse or
16 | promote products derived from this software without specific prior
17 | written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | /* ----------------------------------------------------------------- */
32 | /* The Japanese TTS System "Open JTalk" */
33 | /* developed by HTS Working Group */
34 | /* http://open-jtalk.sourceforge.net/ */
35 | /* ----------------------------------------------------------------- */
36 | /* */
37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */
38 | /* Department of Computer Science */
39 | /* */
40 | /* All rights reserved. */
41 | /* */
42 | /* Redistribution and use in source and binary forms, with or */
43 | /* without modification, are permitted provided that the following */
44 | /* conditions are met: */
45 | /* */
46 | /* - Redistributions of source code must retain the above copyright */
47 | /* notice, this list of conditions and the following disclaimer. */
48 | /* - Redistributions in binary form must reproduce the above */
49 | /* copyright notice, this list of conditions and the following */
50 | /* disclaimer in the documentation and/or other materials provided */
51 | /* with the distribution. */
52 | /* - Neither the name of the HTS working group nor the names of its */
53 | /* contributors may be used to endorse or promote products derived */
54 | /* from this software without specific prior written permission. */
55 | /* */
56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
68 | /* POSSIBILITY OF SUCH DAMAGE. */
69 | /* ----------------------------------------------------------------- */
70 |
--------------------------------------------------------------------------------
/docs/licenses/open_jtalk/mecab/COPYING:
--------------------------------------------------------------------------------
1 | Copyright (c) 2001-2008, Taku Kudo
2 | Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification, are
6 | permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above
9 | copyright notice, this list of conditions and the
10 | following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above
13 | copyright notice, this list of conditions and the
14 | following disclaimer in the documentation and/or other
15 | materials provided with the distribution.
16 |
17 | * Neither the name of the Nippon Telegraph and Telegraph Corporation
18 | nor the names of its contributors may be used to endorse or
19 | promote products derived from this software without specific
20 | prior written permission.
21 |
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
23 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
28 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | /* ----------------------------------------------------------------- */
32 | /* The Japanese TTS System "Open JTalk" */
33 | /* developed by HTS Working Group */
34 | /* http://open-jtalk.sourceforge.net/ */
35 | /* ----------------------------------------------------------------- */
36 | /* */
37 | /* Copyright (c) 2008-2016 Nagoya Institute of Technology */
38 | /* Department of Computer Science */
39 | /* */
40 | /* All rights reserved. */
41 | /* */
42 | /* Redistribution and use in source and binary forms, with or */
43 | /* without modification, are permitted provided that the following */
44 | /* conditions are met: */
45 | /* */
46 | /* - Redistributions of source code must retain the above copyright */
47 | /* notice, this list of conditions and the following disclaimer. */
48 | /* - Redistributions in binary form must reproduce the above */
49 | /* copyright notice, this list of conditions and the following */
50 | /* disclaimer in the documentation and/or other materials provided */
51 | /* with the distribution. */
52 | /* - Neither the name of the HTS working group nor the names of its */
53 | /* contributors may be used to endorse or promote products derived */
54 | /* from this software without specific prior written permission. */
55 | /* */
56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
68 | /* POSSIBILITY OF SUCH DAMAGE. */
69 | /* ----------------------------------------------------------------- */
70 |
--------------------------------------------------------------------------------
/docs/licenses/world/LICENSE.txt:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* WORLD: High-quality speech analysis, */
3 | /* manipulation and synthesis system */
4 | /* developed by M. Morise */
5 | /* http://www.kisc.meiji.ac.jp/~mmorise/world/english/ */
6 | /* ----------------------------------------------------------------- */
7 | /* */
8 | /* Copyright (c) 2010 M. Morise */
9 | /* */
10 | /* All rights reserved. */
11 | /* */
12 | /* Redistribution and use in source and binary forms, with or */
13 | /* without modification, are permitted provided that the following */
14 | /* conditions are met: */
15 | /* */
16 | /* - Redistributions of source code must retain the above copyright */
17 | /* notice, this list of conditions and the following disclaimer. */
18 | /* - Redistributions in binary form must reproduce the above */
19 | /* copyright notice, this list of conditions and the following */
20 | /* disclaimer in the documentation and/or other materials provided */
21 | /* with the distribution. */
22 | /* - Neither the name of the M. Morise nor the names of its */
23 | /* contributors may be used to endorse or promote products derived */
24 | /* from this software without specific prior written permission. */
25 | /* */
26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
38 | /* POSSIBILITY OF SUCH DAMAGE. */
39 | /* ----------------------------------------------------------------- */
40 |
--------------------------------------------------------------------------------
/engine_manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": "0.13.1",
3 | "name": "DUMMY Engine",
4 | "brand_name": "DUMMY",
5 | "uuid": "41d9d6cb-6682-4baa-80b6-875547f71d86",
6 | "version": "999.999.999",
7 | "url": "https://github.com/voicevox-bridge/bridge-plugin",
8 | "command": "run",
9 | "port": 50021,
10 | "icon": "engine_manifest_assets/icon.png",
11 | "default_sampling_rate": 44100,
12 | "terms_of_service": "engine_manifest_assets/terms_of_service.md",
13 | "update_infos": "engine_manifest_assets/update_infos.json",
14 | "dependency_licenses": "engine_manifest_assets/dependency_licenses.json",
15 | "supported_vvlib_manifest_version": "0.15.0",
16 | "supported_features": {
17 | "adjust_mora_pitch": {
18 | "type": "bool",
19 | "value": false,
20 | "name": "モーラごとの音高の調整"
21 | },
22 | "adjust_phoneme_length": {
23 | "type": "bool",
24 | "value": false,
25 | "name": "音素ごとの長さの調整"
26 | },
27 | "adjust_speed_scale": {
28 | "type": "bool",
29 | "value": true,
30 | "name": "全体の話速の調整"
31 | },
32 | "adjust_pitch_scale": {
33 | "type": "bool",
34 | "value": true,
35 | "name": "全体の音高の調整"
36 | },
37 | "adjust_intonation_scale": {
38 | "type": "bool",
39 | "value": true,
40 | "name": "全体の抑揚の調整"
41 | },
42 | "adjust_volume_scale": {
43 | "type": "bool",
44 | "value": true,
45 | "name": "全体の音量の調整"
46 | },
47 | "interrogative_upspeak": {
48 | "type": "bool",
49 | "value": false,
50 | "name": "疑問文の自動調整"
51 | },
52 | "synthesis_morphing" : {
53 | "type": "bool",
54 | "value": false,
55 | "name": "2人の話者でモーフィングした音声を合成"
56 | },
57 | "manage_library": {
58 | "type": "bool",
59 | "value": false,
60 | "name": "音声ライブラリのインストール・アンインストール"
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/engine_manifest_assets/dependency_licenses.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "dummy library",
4 | "version": "0.0.1",
5 | "license": "dummy license",
6 | "text": "dummy license text"
7 | }
8 | ]
--------------------------------------------------------------------------------
/engine_manifest_assets/downloadable_libraries.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "Dummy Library",
4 | "uuid": "2bb8bccf-1c3f-4bc9-959a-f388e37af3ad",
5 | "version": "0.0.1",
6 | "download_url": "https://github.com/VOICEVOX/voicevox_engine/archive/d7cf31c058bc83e1abf8e14d4231a06409c4cc2d.zip",
7 | "bytes": 1000,
8 | "speakers": [
9 | {
10 | "speaker": {
11 | "name": "dummy1",
12 | "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
13 | "styles": [
14 | {
15 | "name": "style1",
16 | "id": 0
17 | },
18 | {
19 | "name": "style2",
20 | "id": 2
21 | }
22 | ],
23 | "version": "0.0.1"
24 | },
25 | "speaker_info": {
26 | "policy": "",
27 | "portrait": "",
28 | "style_infos": [
29 | {
30 | "id": 0,
31 | "icon": "",
32 | "voice_samples": ["", "", ""]
33 | },
34 | {
35 | "id": 2,
36 | "icon": "",
37 | "voice_samples": ["", "", ""]
38 | }
39 | ]
40 | }
41 | }
42 | ]
43 | }
44 | ]
45 |
--------------------------------------------------------------------------------
/engine_manifest_assets/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/engine_manifest_assets/icon.png
--------------------------------------------------------------------------------
/engine_manifest_assets/terms_of_service.md:
--------------------------------------------------------------------------------
1 | dummy teams of service
--------------------------------------------------------------------------------
/engine_manifest_assets/update_infos.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "version": "0.14.5",
4 | "descriptions": [
5 | "キャラクター「中国うさぎ」を追加",
6 | "キャラクター「波音リツ」「もち子さん」のスタイルを追加"
7 | ],
8 | "contributors": []
9 | },
10 | {
11 | "version": "0.14.4",
12 | "descriptions": [
13 | "キャラクター「春歌ナナ」「猫使アル」「猫使ビィ」を追加",
14 | "バグ修正"
15 | ],
16 | "contributors": ["Hiroshiba"]
17 | },
18 | {
19 | "version": "0.14.3",
20 | "descriptions": [
21 | "キャラクター「†聖騎士 紅桜†」「雀松朱司」「麒ヶ島宗麟」を追加",
22 | "同時書き込みで辞書が破損する問題を修正"
23 | ],
24 | "contributors": ["Hiroshiba"]
25 | },
26 | {
27 | "version": "0.14.2",
28 | "descriptions": ["DirectML版の生成が遅い問題を修正"],
29 | "contributors": []
30 | },
31 | {
32 | "version": "0.14.1",
33 | "descriptions": ["AquesTalkライクな記法で生成した音声のバグを修正"],
34 | "contributors": []
35 | },
36 | {
37 | "version": "0.14.0",
38 | "descriptions": [
39 | "コアをRust言語に移行",
40 | "セキュリティアップデート",
41 | "スタイルごとに異なる立ち絵の提供を可能に",
42 | "VVPPファイルの提供",
43 | "設定GUIの提供",
44 | "プリセットの保存",
45 | "モーフィングAPIの仕様変更",
46 | "DirectML利用時に適したGPUを自動選択",
47 | "開発環境の向上",
48 | "バグ修正"
49 | ],
50 | "contributors": [
51 | "aoirint",
52 | "Appletigerv",
53 | "haru3me",
54 | "Hiroshiba",
55 | "ksk001100",
56 | "masinc",
57 | "misogihagi",
58 | "My-MC",
59 | "nebocco",
60 | "PickledChair",
61 | "qryxip",
62 | "qwerty2501",
63 | "sabonerune",
64 | "sarisia",
65 | "Segu-g",
66 | "sevenc-nanashi",
67 | "shigobu",
68 | "smly",
69 | "takana-v",
70 | "ts-klassen",
71 | "whiteball",
72 | "y-chan"
73 | ]
74 | }
75 | ]
76 |
--------------------------------------------------------------------------------
/get_cost_candidates.py:
--------------------------------------------------------------------------------
1 | """
2 | voicevox_engine/part_of_speech_data.pyのcost_candidatesを計算するプログラムです。
3 | 引数のnaist_jdic_pathには、open_jtalkのsrc/mecab-naist-jdic/naist-jdic.csvを指定してください。
4 |
5 | 実行例:
6 | python get_cost_candidates.py --naist_jdic_path=/path/to/naist-jdic.csv \
7 | --pos=名詞 \
8 | --pos_detail_1=固有名詞 \
9 | --pos_detail_2=一般 \
10 | --pos_detail_3=*
11 |
12 | cost_candidatesの値の詳細は以下の通りです。
13 | - 1番目の値はnaist_jdic内の同一品詞の最小コストから1を引いたもの、11番目の値は最大コストに1を足したものです。
14 | - 2番目の値はnaist_jdic内の同一品詞のコストの下位1%、10番目の値は99%の値です。
15 | - 6番目の値はnaist_jdic内の同一品詞のコストの最頻値です。
16 | - 2番目から6番目、6番目から10番目までの値は一定割合で増加するようになっています。
17 | """
18 |
19 | import argparse
20 | import statistics
21 | from pathlib import Path
22 | from typing import List
23 |
24 | import numpy as np
25 |
26 |
27 | def get_candidates(
28 | naist_jdic_path: Path,
29 | pos: str,
30 | pos_detail_1: str,
31 | pos_detail_2: str,
32 | pos_detail_3: str,
33 | ) -> List[int]:
34 | costs = []
35 | with naist_jdic_path.open(encoding="utf-8") as f:
36 | for line in f:
37 | (
38 | _,
39 | _,
40 | _,
41 | _cost,
42 | _pos,
43 | _pos_detail_1,
44 | _pos_detail_2,
45 | _pos_detail_3,
46 | _,
47 | _,
48 | _,
49 | _,
50 | _,
51 | _,
52 | _,
53 | ) = line.split(",")
54 | if (_pos, _pos_detail_1, _pos_detail_2, _pos_detail_3) == (
55 | pos,
56 | pos_detail_1,
57 | pos_detail_2,
58 | pos_detail_3,
59 | ):
60 | costs.append(int(_cost))
61 | assert len(costs) > 0
62 | cost_min = min(costs) - 1
63 | cost_1per = np.quantile(costs, 0.01).astype(np.int64)
64 | cost_mode = statistics.mode(costs)
65 | cost_99per = np.quantile(costs, 0.99).astype(np.int64)
66 | cost_max = max(costs) + 1
67 | return (
68 | [cost_min]
69 | + [int(cost_1per + (cost_mode - cost_1per) * i / 4) for i in range(5)]
70 | + [int(cost_mode + (cost_99per - cost_mode) * i / 4) for i in range(1, 5)]
71 | + [cost_max]
72 | )
73 |
74 |
75 | if __name__ == "__main__":
76 | parser = argparse.ArgumentParser()
77 | parser.add_argument("--naist_jdic_path", type=Path)
78 | parser.add_argument("--pos", type=str)
79 | parser.add_argument("--pos_detail_1", type=str)
80 | parser.add_argument("--pos_detail_2", type=str)
81 | parser.add_argument("--pos_detail_3", type=str)
82 | args = parser.parse_args()
83 | print(
84 | get_candidates(
85 | naist_jdic_path=args.naist_jdic_path,
86 | pos=args.pos,
87 | pos_detail_1=args.pos_detail_1,
88 | pos_detail_2=args.pos_detail_2,
89 | pos_detail_3=args.pos_detail_3,
90 | )
91 | )
92 |
--------------------------------------------------------------------------------
/make_docs.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from voicevox_engine.dev.core import mock as core
4 | from voicevox_engine.dev.synthesis_engine.mock import MockSynthesisEngine
5 | from voicevox_engine.preset import PresetManager
6 | from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
7 | from voicevox_engine.utility import engine_root
8 |
9 | if __name__ == "__main__":
10 | import run
11 |
12 | app = run.generate_app(
13 | synthesis_engines={"mock": MockSynthesisEngine(speakers=core.metas())},
14 | latest_core_version="mock",
15 | setting_loader=SettingLoader(USER_SETTING_PATH),
16 | preset_manager=PresetManager( # FIXME: impl MockPresetManager
17 | preset_path=engine_root() / "presets.yaml",
18 | ),
19 | )
20 | with open("docs/api/index.html", "w") as f:
21 | f.write(
22 | """
23 |
24 |
25 | voicevox_engine API Document
26 |
27 |
28 |
29 |
30 |
31 |
32 |
35 |
36 | """
37 | % json.dumps(app.openapi())
38 | )
39 |
--------------------------------------------------------------------------------
/nuitka-config.yaml:
--------------------------------------------------------------------------------
1 | - module-name: 'librosa.decompose'
2 | anti-bloat:
3 | - description: 'remove sklearn dependency'
4 | replacements_plain:
5 | 'import sklearn.decomposition': ''
6 |
7 | - module-name: 'librosa.segment'
8 | anti-bloat:
9 | - description: 'remove sklearn dependency'
10 | replacements_plain:
11 | 'import sklearn.cluster': ''
12 | 'import sklearn.feature_extraction': ''
13 | 'import sklearn.neighbors': ''
14 | 'import sklearn': ''
15 |
16 | - module-name: 'librosa.util.utils'
17 | anti-bloat:
18 | - description: 'remove numba dependency'
19 | replacements_plain:
20 | 'import numba': ''
21 | '@numba.jit': '#'
22 |
23 | - module-name: 'librosa.util.matching'
24 | anti-bloat:
25 | - description: 'remove numba dependency'
26 | replacements_plain:
27 | 'import numba': ''
28 | '@numba.jit': '#'
29 | 'numba.uint32': 'np.uint'
30 |
31 | - module-name: 'librosa.filters'
32 | anti-bloat:
33 | - description: 'remove numba dependency'
34 | replacements_plain:
35 | 'from numba import jit': ''
36 | '@jit': '#'
37 |
38 | - module-name: 'librosa.core.audio'
39 | anti-bloat:
40 | - description: 'remove numba dependency'
41 | replacements_plain:
42 | '@jit': '#'
43 | 'from numba import jit': ''
44 |
45 | - module-name: 'librosa.core.spectrum'
46 | anti-bloat:
47 | - description: 'remove numba dependency'
48 | replacements_plain:
49 | 'from numba import jit': ''
50 | '@jit': '#'
51 |
52 | - module-name: 'librosa.core.constantq'
53 | anti-bloat:
54 | - description: 'remove numba dependency'
55 | replacements_plain:
56 | 'from numba import jit': ''
57 | '@jit': '#'
58 |
59 | - module-name: 'librosa.sequence'
60 | anti-bloat:
61 | - description: 'remove numba dependency'
62 | replacements_plain:
63 | 'from numba import jit': ''
64 | '@jit': '#'
65 |
66 | - module-name: 'librosa.feature.utils'
67 | anti-bloat:
68 | - description: 'remove numba dependency'
69 | replacements_plain:
70 | 'from numba import jit': ''
71 | '@jit': '#'
72 |
73 | - module-name: 'resampy.interpn'
74 | anti-bloat:
75 | - description: 'remove numba dependency'
76 | replacements_plain:
77 | 'from numba import guvectorize, jit, prange': |
78 | def guvectorize(*args, **kwargs):
79 | return lambda f: f
80 |
81 | def jit(*args, **kwargs):
82 | return lambda f: f
83 | 'prange(': 'range('
84 |
85 | - module-name: 'resampy.core'
86 | anti-bloat:
87 | - description: 'remove numba dependency'
88 | replacements_plain:
89 | 'import numba': ''
90 | 'numba.TypingError': 'Exception'
91 |
92 | - module-name: 'tacotron_cleaner.cleaners'
93 | anti-bloat:
94 | - description: 'remove unidecode dependency'
95 | replacements_plain:
96 | 'from unidecode import unidecode': ''
97 | 'def convert_to_ascii(text):': ''
98 | 'return unidecode(text)': ''
99 |
100 | - module-name: 'espnet2.gan_tts.vits.monotonic_align'
101 | anti-bloat:
102 | - description: 'remove numba dependency'
103 | replacements_plain:
104 | 'from numba import njit': ''
105 | 'from numba import prange': ''
106 | '@njit': '#'
107 | 'prange(': 'range('
108 |
109 | - module-name: 'espnet'
110 | data-files:
111 | patterns:
112 | - 'version.txt'
113 |
114 | - module-name: 'librosa'
115 | data-files:
116 | dirs:
117 | - 'util/example_data'
118 |
119 | - module-name: 'resampy'
120 | data-files:
121 | dirs:
122 | - 'data'
123 |
124 | - module-name: 'jamo'
125 | data-files:
126 | dirs:
127 | - 'data'
--------------------------------------------------------------------------------
/presets.yaml:
--------------------------------------------------------------------------------
1 | - id: 1
2 | name: サンプルプリセット
3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
4 | style_id: 0
5 | speedScale: 1
6 | pitchScale: 0
7 | intonationScale: 1
8 | volumeScale: 1
9 | prePhonemeLength: 0.1
10 | postPhonemeLength: 0.1
11 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pysen]
2 | version = "0.10.3"
3 |
4 | [tool.pysen.lint]
5 | enable_black = true
6 | enable_flake8 = true
7 | enable_isort = true
8 | enable_mypy = false # TODO: eliminate errors and enable at CI
9 | mypy_preset = "entry" # TODO: "strict"
10 | line_length = 88
11 | py_version = "py310" # TODO: update to py311 after pysen supports Python 3.11
12 | isort_known_first_party = ["voicevox_engine"]
13 | isort_known_third_party = ["numpy"]
14 | [[tool.pysen.lint.mypy_targets]]
15 | paths = [".", "voicevox_engine/"]
16 |
17 | [tool.black] # automatically generated by pysen
18 | # pysen ignores and overwrites any modifications
19 | line-length = 88
20 | target-version = ["py310"] # TODO: update to py311 after pysen supports Python 3.11
21 |
22 |
23 | [tool.isort] # automatically generated by pysen
24 | # pysen ignores and overwrites any modifications
25 | default_section = "THIRDPARTY"
26 | ensure_newline_before_comments = true
27 | force_grid_wrap = 0
28 | force_single_line = false
29 | include_trailing_comma = true
30 | known_first_party = ["voicevox_engine"]
31 | known_third_party = ["numpy"]
32 | line_length = 88
33 | multi_line_output = 3
34 | use_parentheses = true
35 |
36 | [tool.poetry]
37 | name = "voicevox_engine"
38 | version = "0.0.0"
39 | description = ""
40 | authors = ["Hiroshiba "]
41 |
42 | [tool.poetry.dependencies]
43 | python = "~3.11"
44 | numpy = "^1.20.0"
45 | fastapi = "^0.103.2"
46 | python-multipart = "^0.0.5"
47 | uvicorn = "^0.15.0"
48 | aiofiles = "^0.7.0"
49 | soundfile = "^0.12.1"
50 | scipy = "^1.7.1"
51 | pyyaml = "^6.0"
52 | pyworld = "^0.3.0"
53 | requests = "^2.28.1"
54 | jinja2 = "^3.1.2"
55 | pyopenjtalk = {git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "b35fc89fe42948a28e33aed886ea145a51113f88"}
56 | espnet = {git = "https://github.com/voicevox-bridge/espnet", rev = "ff992af2279a03405257a844c48bf83f47805b1b"}
57 | semver = "^3.0.1"
58 | platformdirs = "^3.10.0"
59 | typeguard = "2.13.3"
60 | joblib = "1.2.0"
61 |
62 | [tool.poetry.group.dev.dependencies]
63 | cython = "^0.29.34,>=0.29.33" # NOTE: for Python 3.11
64 | pre-commit = "^2.16.0"
65 | atomicwrites = "^1.4.0"
66 | colorama = "^0.4.4"
67 | poetry = "^1.3.1"
68 | nuitka = "^1.3.7"
69 |
70 | [tool.poetry.group.test.dependencies]
71 | pysen = "~0.10.3"
72 | black = "^22.12.0"
73 | flake8-bugbear = "^23.1.0"
74 | flake8 = "^6.0.0"
75 | isort = "^5.12.0"
76 | mypy = "^1.6.0"
77 | pytest = "^6.2.5"
78 | coveralls = "^3.2.0"
79 | poetry = "^1.3.1"
80 | httpx = "^0.25.0" # NOTE: required by fastapi.testclient.TestClient
81 |
82 | [tool.poetry.group.license.dependencies]
83 | pip-licenses = "^4.2.0"
84 |
85 | [build-system]
86 | requires = ["poetry-core"]
87 | build-backend = "poetry.core.masonry.api"
88 |
--------------------------------------------------------------------------------
/requirements-license.txt:
--------------------------------------------------------------------------------
1 | aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
2 | anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
3 | asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
4 | audioread==3.0.0 ; python_version >= "3.11" and python_version < "3.12"
5 | certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12"
6 | cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12"
7 | charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12"
8 | ci-sdr==0.0.2 ; python_version >= "3.11" and python_version < "3.12"
9 | click==8.1.7 ; python_version >= "3.11" and python_version < "3.12"
10 | colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows"
11 | configargparse==1.7 ; python_version >= "3.11" and python_version < "3.12"
12 | ctc-segmentation==1.7.4 ; python_version >= "3.11" and python_version < "3.12"
13 | cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12"
14 | decorator==5.1.1 ; python_version >= "3.11" and python_version < "3.12"
15 | distance==0.1.3 ; python_version >= "3.11" and python_version < "3.12"
16 | einops==0.6.1 ; python_version >= "3.11" and python_version < "3.12"
17 | espnet @ git+https://github.com/voicevox-bridge/espnet@ff992af2279a03405257a844c48bf83f47805b1b ; python_version >= "3.11" and python_version < "3.12"
18 | espnet-tts-frontend==0.0.3 ; python_version >= "3.11" and python_version < "3.12"
19 | fastapi==0.103.2 ; python_version >= "3.11" and python_version < "3.12"
20 | filelock==3.12.4 ; python_version >= "3.11" and python_version < "3.12"
21 | g2p-en==2.1.0 ; python_version >= "3.11" and python_version < "3.12"
22 | h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12"
23 | h5py==3.9.0 ; python_version >= "3.11" and python_version < "3.12"
24 | humanfriendly==10.0 ; python_version >= "3.11" and python_version < "3.12"
25 | idna==3.4 ; python_version >= "3.11" and python_version < "3.12"
26 | inflect==7.0.0 ; python_version >= "3.11" and python_version < "3.12"
27 | jaconv==0.3.4 ; python_version >= "3.11" and python_version < "3.12"
28 | jamo==0.4.1 ; python_version >= "3.11" and python_version < "3.12"
29 | jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12"
30 | joblib==1.2.0 ; python_version >= "3.11" and python_version < "3.12"
31 | librosa==0.9.2 ; python_version >= "3.11" and python_version < "3.12"
32 | llvmlite==0.40.1 ; python_version >= "3.11" and python_version < "3.12"
33 | markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12"
34 | mpmath==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
35 | networkx==3.1 ; python_version >= "3.11" and python_version < "3.12"
36 | nltk==3.8.1 ; python_version >= "3.11" and python_version < "3.12"
37 | numba==0.57.1 ; python_version >= "3.11" and python_version < "3.12"
38 | numpy==1.24.4 ; python_version >= "3.11" and python_version < "3.12"
39 | packaging==23.1 ; python_version >= "3.11" and python_version < "3.12"
40 | pip-licenses==4.3.2 ; python_version >= "3.11" and python_version < "3.12"
41 | platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12"
42 | pooch==1.7.0 ; python_version >= "3.11" and python_version < "3.12"
43 | prettytable==3.9.0 ; python_version >= "3.11" and python_version < "3.12"
44 | pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12"
45 | pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12"
46 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12"
47 | pypinyin==0.44.0 ; python_version >= "3.11" and python_version < "3.12"
48 | pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.11" and python_version < "3.12"
49 | python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12"
50 | pytorch-wpe==0.0.1 ; python_version >= "3.11" and python_version < "3.12"
51 | pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12"
52 | pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12"
53 | regex==2023.8.8 ; python_version >= "3.11" and python_version < "3.12"
54 | requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12"
55 | resampy==0.4.2 ; python_version >= "3.11" and python_version < "3.12"
56 | scikit-learn==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
57 | scipy==1.11.2 ; python_version >= "3.11" and python_version < "3.12"
58 | semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
59 | sentencepiece==0.1.99 ; python_version >= "3.11" and python_version < "3.12"
60 | setuptools==68.2.2 ; python_version >= "3.11" and python_version < "3.12"
61 | six==1.16.0 ; python_version >= "3.11" and python_version < "3.12"
62 | sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
63 | soundfile==0.12.1 ; python_version >= "3.11" and python_version < "3.12"
64 | starlette==0.27.0 ; python_version >= "3.11" and python_version < "3.12"
65 | sympy==1.12 ; python_version >= "3.11" and python_version < "3.12"
66 | threadpoolctl==3.2.0 ; python_version >= "3.11" and python_version < "3.12"
67 | torch-complex==0.4.3 ; python_version >= "3.11" and python_version < "3.12"
68 | torch==2.0.1 ; python_version >= "3.11" and python_version < "3.12"
69 | tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12"
70 | typeguard==2.13.3 ; python_version >= "3.11" and python_version < "3.12"
71 | typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12"
72 | unidecode==1.3.6 ; python_version >= "3.11" and python_version < "3.12"
73 | urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12"
74 | uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12"
75 | wcwidth==0.2.6 ; python_version >= "3.11" and python_version < "3.12"
76 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiofiles==0.7.0 ; python_version >= "3.11" and python_version < "3.12"
2 | anyio==3.7.1 ; python_version >= "3.11" and python_version < "3.12"
3 | asgiref==3.7.2 ; python_version >= "3.11" and python_version < "3.12"
4 | audioread==3.0.0 ; python_version >= "3.11" and python_version < "3.12"
5 | certifi==2023.7.22 ; python_version >= "3.11" and python_version < "3.12"
6 | cffi==1.15.1 ; python_version >= "3.11" and python_version < "3.12"
7 | charset-normalizer==3.2.0 ; python_version >= "3.11" and python_version < "3.12"
8 | ci-sdr==0.0.2 ; python_version >= "3.11" and python_version < "3.12"
9 | click==8.1.7 ; python_version >= "3.11" and python_version < "3.12"
10 | colorama==0.4.6 ; python_version >= "3.11" and python_version < "3.12" and platform_system == "Windows"
11 | configargparse==1.7 ; python_version >= "3.11" and python_version < "3.12"
12 | ctc-segmentation==1.7.4 ; python_version >= "3.11" and python_version < "3.12"
13 | cython==0.29.36 ; python_version >= "3.11" and python_version < "3.12"
14 | decorator==5.1.1 ; python_version >= "3.11" and python_version < "3.12"
15 | distance==0.1.3 ; python_version >= "3.11" and python_version < "3.12"
16 | einops==0.6.1 ; python_version >= "3.11" and python_version < "3.12"
17 | espnet @ git+https://github.com/voicevox-bridge/espnet@ff992af2279a03405257a844c48bf83f47805b1b ; python_version >= "3.11" and python_version < "3.12"
18 | espnet-tts-frontend==0.0.3 ; python_version >= "3.11" and python_version < "3.12"
19 | fastapi==0.103.2 ; python_version >= "3.11" and python_version < "3.12"
20 | filelock==3.12.4 ; python_version >= "3.11" and python_version < "3.12"
21 | g2p-en==2.1.0 ; python_version >= "3.11" and python_version < "3.12"
22 | h11==0.14.0 ; python_version >= "3.11" and python_version < "3.12"
23 | h5py==3.9.0 ; python_version >= "3.11" and python_version < "3.12"
24 | humanfriendly==10.0 ; python_version >= "3.11" and python_version < "3.12"
25 | idna==3.4 ; python_version >= "3.11" and python_version < "3.12"
26 | inflect==7.0.0 ; python_version >= "3.11" and python_version < "3.12"
27 | jaconv==0.3.4 ; python_version >= "3.11" and python_version < "3.12"
28 | jamo==0.4.1 ; python_version >= "3.11" and python_version < "3.12"
29 | jinja2==3.1.2 ; python_version >= "3.11" and python_version < "3.12"
30 | joblib==1.2.0 ; python_version >= "3.11" and python_version < "3.12"
31 | librosa==0.9.2 ; python_version >= "3.11" and python_version < "3.12"
32 | llvmlite==0.40.1 ; python_version >= "3.11" and python_version < "3.12"
33 | markupsafe==2.1.3 ; python_version >= "3.11" and python_version < "3.12"
34 | mpmath==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
35 | networkx==3.1 ; python_version >= "3.11" and python_version < "3.12"
36 | nltk==3.8.1 ; python_version >= "3.11" and python_version < "3.12"
37 | numba==0.57.1 ; python_version >= "3.11" and python_version < "3.12"
38 | numpy==1.24.4 ; python_version >= "3.11" and python_version < "3.12"
39 | packaging==23.1 ; python_version >= "3.11" and python_version < "3.12"
40 | platformdirs==3.10.0 ; python_version >= "3.11" and python_version < "3.12"
41 | pooch==1.7.0 ; python_version >= "3.11" and python_version < "3.12"
42 | pycparser==2.21 ; python_version >= "3.11" and python_version < "3.12"
43 | pydantic==1.10.12 ; python_version >= "3.11" and python_version < "3.12"
44 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@b35fc89fe42948a28e33aed886ea145a51113f88 ; python_version >= "3.11" and python_version < "3.12"
45 | pypinyin==0.44.0 ; python_version >= "3.11" and python_version < "3.12"
46 | pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.11" and python_version < "3.12"
47 | python-multipart==0.0.5 ; python_version >= "3.11" and python_version < "3.12"
48 | pytorch-wpe==0.0.1 ; python_version >= "3.11" and python_version < "3.12"
49 | pyworld==0.3.4 ; python_version >= "3.11" and python_version < "3.12"
50 | pyyaml==6.0.1 ; python_version >= "3.11" and python_version < "3.12"
51 | regex==2023.8.8 ; python_version >= "3.11" and python_version < "3.12"
52 | requests==2.31.0 ; python_version >= "3.11" and python_version < "3.12"
53 | resampy==0.4.2 ; python_version >= "3.11" and python_version < "3.12"
54 | scikit-learn==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
55 | scipy==1.11.2 ; python_version >= "3.11" and python_version < "3.12"
56 | semver==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
57 | sentencepiece==0.1.99 ; python_version >= "3.11" and python_version < "3.12"
58 | setuptools==68.2.2 ; python_version >= "3.11" and python_version < "3.12"
59 | six==1.16.0 ; python_version >= "3.11" and python_version < "3.12"
60 | sniffio==1.3.0 ; python_version >= "3.11" and python_version < "3.12"
61 | soundfile==0.12.1 ; python_version >= "3.11" and python_version < "3.12"
62 | starlette==0.27.0 ; python_version >= "3.11" and python_version < "3.12"
63 | sympy==1.12 ; python_version >= "3.11" and python_version < "3.12"
64 | threadpoolctl==3.2.0 ; python_version >= "3.11" and python_version < "3.12"
65 | torch-complex==0.4.3 ; python_version >= "3.11" and python_version < "3.12"
66 | torch==2.0.1 ; python_version >= "3.11" and python_version < "3.12"
67 | tqdm==4.66.1 ; python_version >= "3.11" and python_version < "3.12"
68 | typeguard==2.13.3 ; python_version >= "3.11" and python_version < "3.12"
69 | typing-extensions==4.7.1 ; python_version >= "3.11" and python_version < "3.12"
70 | unidecode==1.3.6 ; python_version >= "3.11" and python_version < "3.12"
71 | urllib3==2.0.4 ; python_version >= "3.11" and python_version < "3.12"
72 | uvicorn==0.15.0 ; python_version >= "3.11" and python_version < "3.12"
73 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # automatically generated by pysen
3 | # pysen ignores and overwrites any modifications
4 | # e203: black treats : as a binary operator
5 | # e231: black doesn't put a space after ,
6 | # e501: black may exceed the line-length to follow other style rules
7 | # w503 or w504: either one needs to be disabled to select w error codes
8 | ignore = E203,E231,E501,W503
9 | max-line-length = 88
10 | select = B,B950,C,E,F,W
11 |
12 | [mypy]
13 | # automatically generated by pysen
14 | # pysen ignores and overwrites any modifications
15 | check_untyped_defs = True
16 | disallow_any_decorated = False
17 | disallow_any_generics = False
18 | disallow_any_unimported = False
19 | disallow_incomplete_defs = True
20 | disallow_subclassing_any = True
21 | disallow_untyped_calls = False
22 | disallow_untyped_decorators = False
23 | disallow_untyped_defs = False
24 | ignore_errors = False
25 | ignore_missing_imports = True
26 | no_implicit_optional = True
27 | python_version = 3.10
28 | show_error_codes = True
29 | strict_equality = True
30 | strict_optional = True
31 | warn_redundant_casts = True
32 | warn_return_any = False
33 | warn_unreachable = True
34 | warn_unused_configs = True
35 | warn_unused_ignores = False
36 |
37 |
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json:
--------------------------------------------------------------------------------
1 | {
2 | "supported_features": { "permitted_synthesis_morphing": "NOTHING" }
3 | }
4 |
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/policy.md:
--------------------------------------------------------------------------------
1 | dummy3 policy
2 |
3 | https://voicevox.hiroshiba.jp/
4 |
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav
--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json:
--------------------------------------------------------------------------------
1 | {
2 | "supported_features": { "permitted_synthesis_morphing": "SELF_ONLY" }
3 | }
4 |
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/policy.md:
--------------------------------------------------------------------------------
1 | dummy2 policy
2 |
3 | https://voicevox.hiroshiba.jp/
4 |
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav
--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json:
--------------------------------------------------------------------------------
1 | {}
2 |
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/policy.md:
--------------------------------------------------------------------------------
1 | dummy1 policy
2 |
3 | https://voicevox.hiroshiba.jp/
4 |
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav
--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav
--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png
--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json:
--------------------------------------------------------------------------------
1 | {
2 | "supported_features": { "permitted_synthesis_morphing": "ALL" }
3 | }
4 |
--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/policy.md:
--------------------------------------------------------------------------------
1 | dummy4 policy
2 |
3 | https://voicevox.hiroshiba.jp/
4 |
--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png
--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav
--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav
--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/test/__init__.py
--------------------------------------------------------------------------------
/test/e2e/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 | from fastapi.testclient import TestClient
5 | from run import generate_app
6 |
7 | from voicevox_engine.bridge_config import BridgeConfigLoader
8 | from voicevox_engine.preset import PresetManager
9 | from voicevox_engine.setting import SettingLoader
10 | from voicevox_engine.synthesis_engine import make_synthesis_engines
11 | from voicevox_engine.utility.core_version_utility import get_latest_core_version
12 |
13 |
14 | @pytest.fixture(scope="session")
15 | def client():
16 | bridge_config_loader = BridgeConfigLoader(Path("./invalid"))
17 | synthesis_engines = make_synthesis_engines(
18 | use_gpu=False, bridge_config_loader=bridge_config_loader
19 | )
20 | latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
21 | setting_loader = SettingLoader(Path("./default_setting.yml"))
22 | preset_manager = PresetManager( # FIXME: impl MockPresetManager
23 | preset_path=Path("./presets.yaml"),
24 | )
25 |
26 | return TestClient(
27 | generate_app(
28 | synthesis_engines=synthesis_engines,
29 | latest_core_version=latest_core_version,
30 | setting_loader=setting_loader,
31 | preset_manager=preset_manager,
32 | )
33 | )
34 |
--------------------------------------------------------------------------------
/test/e2e/test_validate_version.py:
--------------------------------------------------------------------------------
1 | from fastapi.testclient import TestClient
2 |
3 | # from voicevox_engine import __version__
4 |
5 |
6 | def test_fetch_version_success(client: TestClient):
7 | response = client.get("/version")
8 | assert response.status_code == 200
9 | # Bridge PluginはVersion取得が特殊なため、一旦コメントアウト
10 | # assert response.json() == __version__
11 |
--------------------------------------------------------------------------------
/test/presets-test-1.yaml:
--------------------------------------------------------------------------------
1 | - id: 1
2 | name: test
3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
4 | style_id: 0
5 | speedScale: 1
6 | pitchScale: 0
7 | intonationScale: 1
8 | volumeScale: 1
9 | prePhonemeLength: 0.1
10 | postPhonemeLength: 0.1
11 |
12 | - id: 2
13 | name: test2
14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
15 | style_id: 2
16 | speedScale: 1.5
17 | pitchScale: 0
18 | intonationScale: 1
19 | volumeScale: 0.7
20 | prePhonemeLength: 0.5
21 | postPhonemeLength: 0.5
22 |
--------------------------------------------------------------------------------
/test/presets-test-2.yaml:
--------------------------------------------------------------------------------
1 | - id: 1
2 | name: test
3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
4 | style_id: not_int
5 | speedScale: 1
6 | pitchScale: 0
7 | intonationScale: 1
8 | volumeScale: 1
9 | prePhonemeLength: 0.1
10 | postPhonemeLength: 0.1
11 |
12 | - id: 2
13 | name: test2
14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
15 | style_id: 2
16 | speedScale: 1.5
17 | pitchScale: 0
18 | intonationScale: 1
19 | volumeScale: 0.7
20 | prePhonemeLength: 0.5
21 | postPhonemeLength: 0.5
22 |
--------------------------------------------------------------------------------
/test/presets-test-3.yaml:
--------------------------------------------------------------------------------
1 | - id: 1
2 | name: test
3 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
4 | style_id: 0
5 | speedScale: 1
6 | pitchScale: 0
7 | intonationScale: 1
8 | volumeScale: 1
9 | prePhonemeLength: 0.1
10 | postPhonemeLength: 0.1
11 |
12 | - id: 1
13 | name: test2
14 | speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
15 | style_id: 2
16 | speedScale: 1.5
17 | pitchScale: 0
18 | intonationScale: 1
19 | volumeScale: 0.7
20 | prePhonemeLength: 0.5
21 | postPhonemeLength: 0.5
22 |
--------------------------------------------------------------------------------
/test/presets-test-4.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voicevox-bridge/bridge-plugin/5ecbf000c0ac592048a42156d5831f69eb1457c4/test/presets-test-4.yaml
--------------------------------------------------------------------------------
/test/setting-test-load-1.yaml:
--------------------------------------------------------------------------------
1 | allow_origin: null
2 | cors_policy_mode: localapps
3 |
--------------------------------------------------------------------------------
/test/setting-test-load-2.yaml:
--------------------------------------------------------------------------------
1 | allow_origin: null
2 | cors_policy_mode: all
3 |
--------------------------------------------------------------------------------
/test/setting-test-load-3.yaml:
--------------------------------------------------------------------------------
1 | allow_origin: "192.168.254.255 192.168.255.255"
2 | cors_policy_mode: localapps
3 |
--------------------------------------------------------------------------------
/test/test_acoustic_feature_extractor.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from voicevox_engine.acoustic_feature_extractor import OjtPhoneme
4 |
5 |
6 | class TestOjtPhoneme(TestCase):
7 | def setUp(self):
8 | super().setUp()
9 | str_hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil"
10 | self.ojt_hello_hiho = OjtPhoneme.convert(
11 | [OjtPhoneme(s, i, i + 1) for i, s in enumerate(str_hello_hiho.split())]
12 | )
13 |
14 | def test_repr_(self):
15 | self.assertEqual(
16 | self.ojt_hello_hiho[1].__repr__(), "Phoneme(phoneme='k', start=1, end=2)"
17 | )
18 | self.assertEqual(
19 | self.ojt_hello_hiho[10].__repr__(),
20 | "Phoneme(phoneme='pau', start=10, end=11)",
21 | )
22 |
23 | def test_phoneme_list(self):
24 | self.assertEqual(OjtPhoneme.phoneme_list[1], "A")
25 | self.assertEqual(OjtPhoneme.phoneme_list[14], "e")
26 | self.assertEqual(OjtPhoneme.phoneme_list[26], "m")
27 | self.assertEqual(OjtPhoneme.phoneme_list[38], "ts")
28 | self.assertEqual(OjtPhoneme.phoneme_list[41], "v")
29 |
30 | def test_const(self):
31 | self.assertEqual(OjtPhoneme.num_phoneme, 45)
32 | self.assertEqual(OjtPhoneme.space_phoneme, "pau")
33 |
34 | def test_convert(self):
35 | ojt_str_hello_hiho = " ".join([p.phoneme for p in self.ojt_hello_hiho])
36 | self.assertEqual(
37 | ojt_str_hello_hiho, "pau k o N n i ch i w a pau h i h o d e s U pau"
38 | )
39 |
40 | def test_equal(self):
41 | # ojt_hello_hihoの10番目の"a"と比較
42 | true_ojt_phoneme = OjtPhoneme("a", 9, 10)
43 |
44 | false_ojt_phoneme_1 = OjtPhoneme("k", 9, 10)
45 | false_ojt_phoneme_2 = OjtPhoneme("a", 10, 11)
46 | self.assertTrue(self.ojt_hello_hiho[9] == true_ojt_phoneme)
47 | self.assertFalse(self.ojt_hello_hiho[9] == false_ojt_phoneme_1)
48 | self.assertFalse(self.ojt_hello_hiho[9] == false_ojt_phoneme_2)
49 |
50 | def test_phoneme_id(self):
51 | ojt_str_hello_hiho = " ".join([str(p.phoneme_id) for p in self.ojt_hello_hiho])
52 | self.assertEqual(
53 | ojt_str_hello_hiho, "0 23 30 4 28 21 10 21 42 7 0 19 21 19 30 12 14 35 6 0"
54 | )
55 |
56 | def test_onehot(self):
57 | phoneme_id_list = [
58 | 0,
59 | 23,
60 | 30,
61 | 4,
62 | 28,
63 | 21,
64 | 10,
65 | 21,
66 | 42,
67 | 7,
68 | 0,
69 | 19,
70 | 21,
71 | 19,
72 | 30,
73 | 12,
74 | 14,
75 | 35,
76 | 6,
77 | 0,
78 | ]
79 | for i, phoneme in enumerate(self.ojt_hello_hiho):
80 | for j in range(OjtPhoneme.num_phoneme):
81 | if phoneme_id_list[i] == j:
82 | self.assertEqual(phoneme.onehot[j], True)
83 | else:
84 | self.assertEqual(phoneme.onehot[j], False)
85 |
--------------------------------------------------------------------------------
/test/test_connect_base64_waves.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import io
3 | from unittest import TestCase
4 |
5 | import numpy as np
6 | import numpy.testing
7 | import soundfile
8 | from scipy.signal import resample
9 |
10 | from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves
11 |
12 |
13 | def generate_sine_wave_ndarray(
14 | seconds: float, samplerate: int, frequency: float
15 | ) -> np.ndarray:
16 | x = np.linspace(0, seconds, int(seconds * samplerate), endpoint=False)
17 | wave = np.sin(2 * np.pi * frequency * x).astype(np.float32)
18 |
19 | return wave
20 |
21 |
22 | def encode_bytes(wave_ndarray: np.ndarray, samplerate: int) -> bytes:
23 | wave_bio = io.BytesIO()
24 | soundfile.write(
25 | file=wave_bio,
26 | data=wave_ndarray,
27 | samplerate=samplerate,
28 | format="WAV",
29 | subtype="FLOAT",
30 | )
31 | wave_bio.seek(0)
32 |
33 | return wave_bio.getvalue()
34 |
35 |
36 | def generate_sine_wave_bytes(
37 | seconds: float, samplerate: int, frequency: float
38 | ) -> bytes:
39 | wave_ndarray = generate_sine_wave_ndarray(seconds, samplerate, frequency)
40 | return encode_bytes(wave_ndarray, samplerate)
41 |
42 |
43 | def encode_base64(wave_bytes: bytes) -> str:
44 | return base64.standard_b64encode(wave_bytes).decode("utf-8")
45 |
46 |
47 | def generate_sine_wave_base64(seconds: float, samplerate: int, frequency: float) -> str:
48 | wave_bytes = generate_sine_wave_bytes(seconds, samplerate, frequency)
49 | wave_base64 = encode_base64(wave_bytes)
50 | return wave_base64
51 |
52 |
53 | class TestConnectBase64Waves(TestCase):
54 | def test_connect(self):
55 | samplerate = 1000
56 | wave = generate_sine_wave_ndarray(
57 | seconds=2, samplerate=samplerate, frequency=10
58 | )
59 | wave_base64 = encode_base64(encode_bytes(wave, samplerate=samplerate))
60 |
61 | wave_x2_ref = np.concatenate([wave, wave])
62 |
63 | wave_x2, _ = connect_base64_waves(waves=[wave_base64, wave_base64])
64 |
65 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape)
66 |
67 | self.assertTrue((wave_x2_ref == wave_x2).all())
68 |
69 | def test_no_wave_error(self):
70 | self.assertRaises(ConnectBase64WavesException, connect_base64_waves, waves=[])
71 |
72 | def test_invalid_base64_error(self):
73 | wave_1000hz = generate_sine_wave_base64(
74 | seconds=2, samplerate=1000, frequency=10
75 | )
76 | wave_1000hz_broken = wave_1000hz[1:] # remove head 1 char
77 |
78 | self.assertRaises(
79 | ConnectBase64WavesException,
80 | connect_base64_waves,
81 | waves=[
82 | wave_1000hz_broken,
83 | ],
84 | )
85 |
86 | def test_invalid_wave_file_error(self):
87 | wave_1000hz = generate_sine_wave_bytes(seconds=2, samplerate=1000, frequency=10)
88 | wave_1000hz_broken_bytes = wave_1000hz[1:] # remove head 1 byte
89 | wave_1000hz_broken = encode_base64(wave_1000hz_broken_bytes)
90 |
91 | self.assertRaises(
92 | ConnectBase64WavesException,
93 | connect_base64_waves,
94 | waves=[
95 | wave_1000hz_broken,
96 | ],
97 | )
98 |
99 | def test_different_frequency(self):
100 | wave_24000hz = generate_sine_wave_ndarray(
101 | seconds=1, samplerate=24000, frequency=10
102 | )
103 | wave_1000hz = generate_sine_wave_ndarray(
104 | seconds=2, samplerate=1000, frequency=10
105 | )
106 | wave_24000_base64 = encode_base64(encode_bytes(wave_24000hz, samplerate=24000))
107 | wave_1000_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000))
108 |
109 | wave_1000hz_to2400hz = resample(wave_1000hz, 24000 * len(wave_1000hz) // 1000)
110 | wave_x2_ref = np.concatenate([wave_24000hz, wave_1000hz_to2400hz])
111 |
112 | wave_x2, _ = connect_base64_waves(waves=[wave_24000_base64, wave_1000_base64])
113 |
114 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape)
115 | numpy.testing.assert_array_almost_equal(wave_x2_ref, wave_x2)
116 |
117 | def test_different_channels(self):
118 | wave_1000hz = generate_sine_wave_ndarray(
119 | seconds=2, samplerate=1000, frequency=10
120 | )
121 | wave_2ch_1000hz = np.array([wave_1000hz, wave_1000hz]).T
122 | wave_1ch_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000))
123 | wave_2ch_base64 = encode_base64(encode_bytes(wave_2ch_1000hz, samplerate=1000))
124 |
125 | wave_x2_ref = np.concatenate([wave_2ch_1000hz, wave_2ch_1000hz])
126 |
127 | wave_x2, _ = connect_base64_waves(waves=[wave_1ch_base64, wave_2ch_base64])
128 |
129 | self.assertEqual(wave_x2_ref.shape, wave_x2.shape)
130 | self.assertTrue((wave_x2_ref == wave_x2).all())
131 |
--------------------------------------------------------------------------------
/test/test_core_version_utility.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from voicevox_engine.utility import get_latest_core_version, parse_core_version
4 |
5 |
6 | class TestCoreVersion(TestCase):
7 | def test_parse_core_version(self):
8 | parse_core_version("0.0.0")
9 | parse_core_version("0.1.0")
10 | parse_core_version("0.10.0")
11 | parse_core_version("0.10.0-preview.1")
12 | parse_core_version("0.14.0")
13 | parse_core_version("0.14.0-preview.1")
14 | parse_core_version("0.14.0-preview.10")
15 |
16 | def test_get_latest_core_version(self):
17 | self.assertEqual(
18 | get_latest_core_version(
19 | versions=[
20 | "0.0.0",
21 | "0.1.0",
22 | "0.10.0",
23 | "0.10.0-preview.1",
24 | "0.14.0",
25 | "0.14.0-preview.1",
26 | "0.14.0-preview.10",
27 | ]
28 | ),
29 | "0.14.0",
30 | )
31 |
32 | self.assertEqual(
33 | get_latest_core_version(
34 | versions=[
35 | "0.14.0",
36 | "0.15.0-preview.1",
37 | ]
38 | ),
39 | "0.15.0-preview.1",
40 | )
41 |
--------------------------------------------------------------------------------
/test/test_mock_synthesis_engine.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine
4 | from voicevox_engine.kana_parser import create_kana
5 | from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
6 |
7 |
8 | class TestMockSynthesisEngine(TestCase):
9 | def setUp(self):
10 | super().setUp()
11 |
12 | self.accent_phrases_hello_hiho = [
13 | AccentPhrase(
14 | moras=[
15 | Mora(
16 | text="コ",
17 | consonant="k",
18 | consonant_length=0.0,
19 | vowel="o",
20 | vowel_length=0.0,
21 | pitch=0.0,
22 | ),
23 | Mora(
24 | text="ン",
25 | consonant=None,
26 | consonant_length=None,
27 | vowel="N",
28 | vowel_length=0.0,
29 | pitch=0.0,
30 | ),
31 | Mora(
32 | text="ニ",
33 | consonant="n",
34 | consonant_length=0.0,
35 | vowel="i",
36 | vowel_length=0.0,
37 | pitch=0.0,
38 | ),
39 | Mora(
40 | text="チ",
41 | consonant="ch",
42 | consonant_length=0.0,
43 | vowel="i",
44 | vowel_length=0.0,
45 | pitch=0.0,
46 | ),
47 | Mora(
48 | text="ワ",
49 | consonant="w",
50 | consonant_length=0.0,
51 | vowel="a",
52 | vowel_length=0.0,
53 | pitch=0.0,
54 | ),
55 | ],
56 | accent=5,
57 | pause_mora=Mora(
58 | text="、",
59 | consonant=None,
60 | consonant_length=None,
61 | vowel="pau",
62 | vowel_length=0.0,
63 | pitch=0.0,
64 | ),
65 | ),
66 | AccentPhrase(
67 | moras=[
68 | Mora(
69 | text="ヒ",
70 | consonant="h",
71 | consonant_length=0.0,
72 | vowel="i",
73 | vowel_length=0.0,
74 | pitch=0.0,
75 | ),
76 | Mora(
77 | text="ホ",
78 | consonant="h",
79 | consonant_length=0.0,
80 | vowel="o",
81 | vowel_length=0.0,
82 | pitch=0.0,
83 | ),
84 | Mora(
85 | text="デ",
86 | consonant="d",
87 | consonant_length=0.0,
88 | vowel="e",
89 | vowel_length=0.0,
90 | pitch=0.0,
91 | ),
92 | Mora(
93 | text="ス",
94 | consonant="s",
95 | consonant_length=0.0,
96 | vowel="U",
97 | vowel_length=0.0,
98 | pitch=0.0,
99 | ),
100 | ],
101 | accent=1,
102 | pause_mora=None,
103 | ),
104 | ]
105 | self.engine = MockSynthesisEngine(speakers="", supported_devices="")
106 |
107 | def test_replace_phoneme_length(self):
108 | self.assertEqual(
109 | self.engine.replace_phoneme_length(
110 | accent_phrases=self.accent_phrases_hello_hiho,
111 | style_id=0,
112 | ),
113 | self.accent_phrases_hello_hiho,
114 | )
115 |
116 | def test_replace_mora_pitch(self):
117 | self.assertEqual(
118 | self.engine.replace_mora_pitch(
119 | accent_phrases=self.accent_phrases_hello_hiho,
120 | style_id=0,
121 | ),
122 | self.accent_phrases_hello_hiho,
123 | )
124 |
125 | def test_synthesis(self):
126 | self.engine.synthesis(
127 | AudioQuery(
128 | accent_phrases=self.accent_phrases_hello_hiho,
129 | speedScale=1,
130 | pitchScale=0,
131 | intonationScale=1,
132 | volumeScale=1,
133 | prePhonemeLength=0.1,
134 | postPhonemeLength=0.1,
135 | outputSamplingRate=24000,
136 | outputStereo=False,
137 | kana=create_kana(self.accent_phrases_hello_hiho),
138 | ),
139 | style_id=0,
140 | )
141 |
--------------------------------------------------------------------------------
/test/test_mora_list.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from voicevox_engine.mora_list import openjtalk_mora2text
4 |
5 |
6 | class TestOpenJTalkMoraList(TestCase):
7 | def test_mora2text(self):
8 | self.assertEqual("ッ", openjtalk_mora2text["cl"])
9 | self.assertEqual("ティ", openjtalk_mora2text["ti"])
10 | self.assertEqual("トゥ", openjtalk_mora2text["tu"])
11 | self.assertEqual("ディ", openjtalk_mora2text["di"])
12 | # GitHub issue #60
13 | self.assertEqual("ギェ", openjtalk_mora2text["gye"])
14 | self.assertEqual("イェ", openjtalk_mora2text["ye"])
15 |
16 | def test_mora2text_injective(self):
17 | """異なるモーラが同じ読みがなに対応しないか確認する"""
18 | values = list(openjtalk_mora2text.values())
19 | uniq_values = list(set(values))
20 | self.assertCountEqual(values, uniq_values)
21 |
--------------------------------------------------------------------------------
/test/test_mora_to_text.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | # TODO: import from voicevox_engine.synthesis_engine.mora
4 | from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text
5 |
6 |
7 | class TestMoraToText(TestCase):
8 | def test_voice(self):
9 | self.assertEqual(mora_to_text("a"), "ア")
10 | self.assertEqual(mora_to_text("i"), "イ")
11 | self.assertEqual(mora_to_text("ka"), "カ")
12 | self.assertEqual(mora_to_text("N"), "ン")
13 | self.assertEqual(mora_to_text("cl"), "ッ")
14 | self.assertEqual(mora_to_text("gye"), "ギェ")
15 | self.assertEqual(mora_to_text("ye"), "イェ")
16 | self.assertEqual(mora_to_text("wo"), "ウォ")
17 |
18 | def test_unvoice(self):
19 | self.assertEqual(mora_to_text("A"), "ア")
20 | self.assertEqual(mora_to_text("I"), "イ")
21 | self.assertEqual(mora_to_text("kA"), "カ")
22 | self.assertEqual(mora_to_text("gyE"), "ギェ")
23 | self.assertEqual(mora_to_text("yE"), "イェ")
24 | self.assertEqual(mora_to_text("wO"), "ウォ")
25 |
26 | def test_invalid_mora(self):
27 | """変なモーラが来ても例外を投げない"""
28 | self.assertEqual(mora_to_text("x"), "x")
29 | self.assertEqual(mora_to_text(""), "")
30 |
--------------------------------------------------------------------------------
/test/test_setting.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from tempfile import TemporaryDirectory
3 | from unittest import TestCase
4 |
5 | from voicevox_engine.setting import CorsPolicyMode, Setting, SettingLoader
6 |
7 |
8 | class TestSettingLoader(TestCase):
9 | def setUp(self):
10 | self.tmp_dir = TemporaryDirectory()
11 | self.tmp_dir_path = Path(self.tmp_dir.name)
12 |
13 | def test_loading_1(self):
14 | setting_loader = SettingLoader(Path("not_exist.yaml"))
15 | settings = setting_loader.load_setting_file()
16 |
17 | self.assertEqual(
18 | settings.dict(),
19 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps},
20 | )
21 |
22 | def test_loading_2(self):
23 | setting_loader = SettingLoader(
24 | setting_file_path=Path("test/setting-test-load-1.yaml")
25 | )
26 | settings = setting_loader.load_setting_file()
27 |
28 | self.assertEqual(
29 | settings.dict(),
30 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps},
31 | )
32 |
33 | def test_loading_3(self):
34 | setting_loader = SettingLoader(
35 | setting_file_path=Path("test/setting-test-load-2.yaml")
36 | )
37 | settings = setting_loader.load_setting_file()
38 |
39 | self.assertEqual(
40 | settings.dict(),
41 | {"allow_origin": None, "cors_policy_mode": "all"},
42 | )
43 |
44 | def test_loading_4(self):
45 | setting_loader = SettingLoader(
46 | setting_file_path=Path("test/setting-test-load-3.yaml")
47 | )
48 | settings = setting_loader.load_setting_file()
49 |
50 | self.assertEqual(
51 | settings.dict(),
52 | {
53 | "allow_origin": "192.168.254.255 192.168.255.255",
54 | "cors_policy_mode": CorsPolicyMode.localapps,
55 | },
56 | )
57 |
58 | def test_dump(self):
59 | setting_loader = SettingLoader(
60 | setting_file_path=Path(self.tmp_dir_path / "setting-test-dump.yaml")
61 | )
62 | settings = Setting(cors_policy_mode=CorsPolicyMode.localapps)
63 | setting_loader.dump_setting_file(settings)
64 |
65 | self.assertTrue(setting_loader.setting_file_path.is_file())
66 | self.assertEqual(
67 | setting_loader.load_setting_file().dict(),
68 | {"allow_origin": None, "cors_policy_mode": CorsPolicyMode.localapps},
69 | )
70 |
71 | def tearDown(self):
72 | self.tmp_dir.cleanup()
73 |
--------------------------------------------------------------------------------
/test/test_user_dict_model.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 | from unittest import TestCase
3 |
4 | from pydantic import ValidationError
5 |
6 | from voicevox_engine.kana_parser import parse_kana
7 | from voicevox_engine.model import UserDictWord
8 |
9 |
10 | class TestUserDictWords(TestCase):
11 | def setUp(self):
12 | self.test_model = {
13 | "surface": "テスト",
14 | "priority": 0,
15 | "part_of_speech": "名詞",
16 | "part_of_speech_detail_1": "固有名詞",
17 | "part_of_speech_detail_2": "一般",
18 | "part_of_speech_detail_3": "*",
19 | "inflectional_type": "*",
20 | "inflectional_form": "*",
21 | "stem": "*",
22 | "yomi": "テスト",
23 | "pronunciation": "テスト",
24 | "accent_type": 0,
25 | "accent_associative_rule": "*",
26 | }
27 |
28 | def test_valid_word(self):
29 | test_value = deepcopy(self.test_model)
30 | try:
31 | UserDictWord(**test_value)
32 | except ValidationError as e:
33 | self.fail(f"Unexpected Validation Error\n{str(e)}")
34 |
35 | def test_convert_to_zenkaku(self):
36 | test_value = deepcopy(self.test_model)
37 | test_value["surface"] = "test"
38 | self.assertEqual(UserDictWord(**test_value).surface, "test")
39 |
40 | def test_count_mora(self):
41 | test_value = deepcopy(self.test_model)
42 | self.assertEqual(UserDictWord(**test_value).mora_count, 3)
43 |
44 | def test_count_mora_x(self):
45 | test_value = deepcopy(self.test_model)
46 | for s in [chr(i) for i in range(12449, 12533)]:
47 | if s in ["ァ", "ィ", "ゥ", "ェ", "ォ", "ッ", "ャ", "ュ", "ョ", "ヮ"]:
48 | continue
49 | for x in "ァィゥェォャュョ":
50 | expected_count = 0
51 | test_value["pronunciation"] = s + x
52 | for accent_phrase in parse_kana(
53 | test_value["pronunciation"] + "'",
54 | ):
55 | expected_count += len(accent_phrase.moras)
56 | with self.subTest(s=s, x=x):
57 | self.assertEqual(
58 | UserDictWord(**test_value).mora_count,
59 | expected_count,
60 | )
61 |
62 | def test_count_mora_xwa(self):
63 | test_value = deepcopy(self.test_model)
64 | test_value["pronunciation"] = "クヮンセイ"
65 | expected_count = 0
66 | for accent_phrase in parse_kana(
67 | test_value["pronunciation"] + "'",
68 | ):
69 | expected_count += len(accent_phrase.moras)
70 | self.assertEqual(
71 | UserDictWord(**test_value).mora_count,
72 | expected_count,
73 | )
74 |
75 | def test_invalid_pronunciation_not_katakana(self):
76 | test_value = deepcopy(self.test_model)
77 | test_value["pronunciation"] = "ぼいぼ"
78 | with self.assertRaises(ValidationError):
79 | UserDictWord(**test_value)
80 |
81 | def test_invalid_pronunciation_invalid_sutegana(self):
82 | test_value = deepcopy(self.test_model)
83 | test_value["pronunciation"] = "アィウェォ"
84 | with self.assertRaises(ValidationError):
85 | UserDictWord(**test_value)
86 |
87 | def test_invalid_pronunciation_invalid_xwa(self):
88 | test_value = deepcopy(self.test_model)
89 | test_value["pronunciation"] = "アヮ"
90 | with self.assertRaises(ValidationError):
91 | UserDictWord(**test_value)
92 |
93 | def test_count_mora_voiced_sound(self):
94 | test_value = deepcopy(self.test_model)
95 | test_value["pronunciation"] = "ボイボ"
96 | self.assertEqual(UserDictWord(**test_value).mora_count, 3)
97 |
98 | def test_invalid_accent_type(self):
99 | test_value = deepcopy(self.test_model)
100 | test_value["accent_type"] = 4
101 | with self.assertRaises(ValidationError):
102 | UserDictWord(**test_value)
103 |
104 | def test_invalid_accent_type_2(self):
105 | test_value = deepcopy(self.test_model)
106 | test_value["accent_type"] = -1
107 | with self.assertRaises(ValidationError):
108 | UserDictWord(**test_value)
109 |
--------------------------------------------------------------------------------
/test/test_word_types.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from voicevox_engine.model import WordTypes
4 | from voicevox_engine.part_of_speech_data import part_of_speech_data
5 |
6 |
7 | class TestWordTypes(TestCase):
8 | def test_word_types(self):
9 | self.assertCountEqual(list(WordTypes), list(part_of_speech_data.keys()))
10 |
--------------------------------------------------------------------------------
/test/vvlib_manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": "0.15.0",
3 | "name": "Test vvlib",
4 | "version": "0.0.1",
5 | "uuid": "2bb8bccf-1c3f-4bc9-959a-f388e37af3ad",
6 | "engine_name": "Test Engine",
7 | "brand_name": "Test",
8 | "engine_uuid": "c7b58856-bd56-4aa1-afb7-b8415f824b06"
9 | }
--------------------------------------------------------------------------------
/ui_template/ui.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | VOICEVOX Engine 設定
6 |
10 |
11 |
17 |
22 |
23 |
24 |
25 |
119 |
120 |
121 |
--------------------------------------------------------------------------------
/voicevox_engine/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "latest"
2 |
--------------------------------------------------------------------------------
/voicevox_engine/acoustic_feature_extractor.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy
4 |
5 |
6 | class OjtPhoneme:
7 | """
8 | OpenJTalkに含まれる音素群クラス
9 |
10 | Attributes
11 | ----------
12 | phoneme_list : Sequence[str]
13 | 音素のリスト
14 | num_phoneme : int
15 | 音素リストの要素数
16 | space_phoneme : str
17 | 読点に値する音素
18 | """
19 |
20 | phoneme_list = (
21 | "pau",
22 | "A",
23 | "E",
24 | "I",
25 | "N",
26 | "O",
27 | "U",
28 | "a",
29 | "b",
30 | "by",
31 | "ch",
32 | "cl",
33 | "d",
34 | "dy",
35 | "e",
36 | "f",
37 | "g",
38 | "gw",
39 | "gy",
40 | "h",
41 | "hy",
42 | "i",
43 | "j",
44 | "k",
45 | "kw",
46 | "ky",
47 | "m",
48 | "my",
49 | "n",
50 | "ny",
51 | "o",
52 | "p",
53 | "py",
54 | "r",
55 | "ry",
56 | "s",
57 | "sh",
58 | "t",
59 | "ts",
60 | "ty",
61 | "u",
62 | "v",
63 | "w",
64 | "y",
65 | "z",
66 | )
67 | num_phoneme = len(phoneme_list)
68 | space_phoneme = "pau"
69 |
70 | def __init__(
71 | self,
72 | phoneme: str,
73 | start: float,
74 | end: float,
75 | ):
76 | self.phoneme = phoneme
77 | self.start = numpy.round(start, decimals=2)
78 | self.end = numpy.round(end, decimals=2)
79 |
80 | def __repr__(self):
81 | return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})"
82 |
83 | def __eq__(self, o: object):
84 | return isinstance(o, OjtPhoneme) and (
85 | self.phoneme == o.phoneme and self.start == o.start and self.end == o.end
86 | )
87 |
88 | @property
89 | def phoneme_id(self):
90 | """
91 | phoneme_id (phoneme list内でのindex)を取得する
92 | Returns
93 | -------
94 | id : int
95 | phoneme_idを返す
96 | """
97 | return self.phoneme_list.index(self.phoneme)
98 |
99 | @property
100 | def onehot(self):
101 | """
102 | phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す
103 | Returns
104 | -------
105 | onehot : numpu.ndarray
106 | 関数内で変更された配列を返す
107 | """
108 | array = numpy.zeros(self.num_phoneme, dtype=bool)
109 | array[self.phoneme_id] = True
110 | return array
111 |
112 | @classmethod
113 | def convert(cls, phonemes: List["OjtPhoneme"]) -> List["OjtPhoneme"]:
114 | """
115 | 最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する
116 | Parameters
117 | ----------
118 | phonemes : List[OjtPhoneme]
119 | 変換したいphonemeのリスト
120 |
121 | Returns
122 | -------
123 | phonemes : List[OjtPhoneme]
124 | 変換されたphonemeのリスト
125 | """
126 | if "sil" in phonemes[0].phoneme:
127 | phonemes[0].phoneme = cls.space_phoneme
128 | if "sil" in phonemes[-1].phoneme:
129 | phonemes[-1].phoneme = cls.space_phoneme
130 | return phonemes
131 |
--------------------------------------------------------------------------------
/voicevox_engine/bridge_config/BridgeConfig.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Any, Dict, Iterable, List, Literal, Optional, Union
3 |
4 | import numpy as np
5 | import torch
6 | from espnet2.bin.tts_inference import Text2Speech
7 | from espnet2.text.token_id_converter import TokenIDConverter
8 | from pydantic import BaseModel, Extra, Field
9 |
10 | from ..metas.Metas import Speaker, SpeakerStyle
11 |
12 |
13 | class TTSInferenceInitArgs(BaseModel):
14 | """
15 | espnet2.bin.tts_inference.Text2Speechの初期化時に渡すパラメータ
16 | """
17 |
18 | train_config: Union[Path, str] = None
19 | model_file: Union[Path, str] = None
20 | threshold: float = 0.5
21 | minlenratio: float = 0.0
22 | maxlenratio: float = 10.0
23 | use_teacher_forcing: bool = False
24 | use_att_constraint: bool = False
25 | backward_window: int = 1
26 | forward_window: int = 3
27 | speed_control_alpha: float = 1.0
28 | noise_scale: float = 0.667
29 | noise_scale_dur: float = 0.8
30 | vocoder_config: Union[Path, str] = None
31 | vocoder_file: Union[Path, str] = None
32 | dtype: str = "float32"
33 | device: str = "cpu" # use_gpu引数で上書きされる
34 | seed: int = 777
35 | always_fix_seed: bool = False
36 |
37 |
38 | class TTSInferenceCallArgs(BaseModel):
39 | """
40 | espnet2.bin.tts_inference.Text2Speechの呼び出し時に渡すパラメータ
41 | """
42 |
43 | class Config:
44 | arbitrary_types_allowed = True
45 |
46 | speech: Optional[Union[torch.Tensor, np.ndarray]] = None
47 | durations: Optional[Union[torch.Tensor, np.ndarray]] = None
48 | spembs: Optional[Union[torch.Tensor, np.ndarray]] = None
49 | sids: Optional[Union[torch.Tensor, np.ndarray]] = None
50 | lids: Optional[Union[torch.Tensor, np.ndarray]] = None
51 | decode_conf: Optional[Dict[str, Any]] = None
52 |
53 |
54 | class TokenIDConverterInitArgs(BaseModel):
55 | """
56 | espnet2.text.token_id_converter.TokenIDConverterの呼び出し時に渡すパラメータ
57 | """
58 |
59 | token_list: Union[Path, str, Iterable[str]]
60 | unk_symbol: str = ""
61 |
62 |
63 | class StyleConfig(SpeakerStyle):
64 | """
65 | スタイルの設定のフォーマット
66 | """
67 |
68 | class Config:
69 | arbitrary_types_allowed = True
70 | extra = Extra.ignore
71 |
72 | g2p: Literal["pyopenjtalk_accent_with_pause", "pyopenjtalk_prosody"] = Field(
73 | title="g2pの設定"
74 | )
75 | tts_inference_init_args: TTSInferenceInitArgs = Field(
76 | title="Text2Speechクラス初期化時の引数", default=TTSInferenceInitArgs()
77 | )
78 | tts_inference_call_args: TTSInferenceCallArgs = Field(
79 | title="Text2Speechクラス呼び出し時の引数", default=TTSInferenceCallArgs()
80 | )
81 | token_id_converter_init_args: TokenIDConverterInitArgs = Field(
82 | title="TokenIDConverterクラス初期化時の引数",
83 | )
84 | text2speech: Optional[Text2Speech] = Field(
85 | title="Text2Speechクラスのインスタンス(内部で使用)", default=None
86 | )
87 | token_id_converter: Optional[TokenIDConverter] = Field(
88 | title="TokenIDConverterクラスのインスタンス(内部で使用)", default=None
89 | )
90 |
91 |
92 | class SpeakerConfig(Speaker):
93 | """
94 | スピーカーの設定のフォーマット
95 | """
96 |
97 | styles: List[StyleConfig] = Field(title="スタイルの設定")
98 |
99 |
100 | class BridgeConfig(BaseModel, extra=Extra.ignore):
101 | """
102 | エンジンの設定のフォーマット
103 | """
104 |
105 | host: str = Field(title="エンジンのホスト", default="127.0.0.1")
106 | port: int = Field(title="エンジンのポート番号", default=50021)
107 | speakers: List[SpeakerConfig] = Field(title="スピーカー情報")
108 | engine_version: str = Field(title="エンジンのバージョン")
109 | sampling_rate: int = Field(title="出力サンプリングレート")
110 |
--------------------------------------------------------------------------------
/voicevox_engine/bridge_config/BridgeConfigLoader.py:
--------------------------------------------------------------------------------
1 | import yaml
2 |
3 | from ..engine_manifest import EngineManifestLoader
4 | from .BridgeConfig import BridgeConfig
5 |
6 |
7 | class BridgeConfigLoader:
8 | def __init__(self, config_file_dir) -> None:
9 | self.config_file_path = config_file_dir / "bridge_config.yaml"
10 |
11 | def load_config_file(self) -> BridgeConfig:
12 | if self.config_file_path.is_file():
13 | config = yaml.safe_load(self.config_file_path.read_text(encoding="utf-8"))
14 | else:
15 | raise FileNotFoundError
16 |
17 | (
18 | engine_version,
19 | port,
20 | sampling_rate,
21 | ) = EngineManifestLoader().load_info_for_bridge_config()
22 |
23 | config["port"] = port
24 | config["engine_version"] = engine_version
25 | config["sampling_rate"] = sampling_rate
26 |
27 | setting = BridgeConfig(**config)
28 |
29 | return setting
30 |
--------------------------------------------------------------------------------
/voicevox_engine/bridge_config/__init__.py:
--------------------------------------------------------------------------------
1 | from .BridgeConfig import BridgeConfig
2 | from .BridgeConfigLoader import BridgeConfigLoader
3 |
4 | __all__ = [
5 | "BridgeConfig",
6 | "BridgeConfigLoader",
7 | ]
8 |
--------------------------------------------------------------------------------
/voicevox_engine/cancellable_engine.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import queue
3 | import sys
4 | from multiprocessing import Pipe, Process
5 |
6 | if sys.platform == "win32":
7 | from multiprocessing.connection import PipeConnection as ConnectionType
8 | else:
9 | from multiprocessing.connection import Connection as ConnectionType
10 |
11 | from pathlib import Path
12 | from tempfile import NamedTemporaryFile
13 |
14 | import soundfile
15 |
16 | # FIXME: remove FastAPI dependency
17 | from fastapi import HTTPException, Request
18 |
19 | from .bridge_config import BridgeConfigLoader
20 | from .model import AudioQuery
21 | from .synthesis_engine import make_synthesis_engines
22 | from .utility import get_latest_core_version
23 |
24 |
25 | class CancellableEngine:
26 | """
27 | 音声合成のキャンセル機能に関するクラス
28 | 初期化後は、synthesis関数で音声合成できる
29 | (オリジナルと比べ引数が増えているので注意)
30 |
31 | パラメータ use_gpu, enable_mock は、 make_synthesis_engines を参照
32 |
33 | Attributes
34 | ----------
35 | watch_con_list: list[tuple[Request, Process]]
36 | Requestは接続の監視に使用され、Processは通信切断時のプロセスキルに使用される
37 | クライアントから接続があるとListにTupleが追加される
38 | 接続が切断、もしくは音声合成が終了すると削除される
39 | procs_and_cons: queue.Queue[tuple[Process, ConnectionType]]
40 | 音声合成の準備が終わっているプロセスのList
41 | (音声合成中のプロセスは入っていない)
42 | """
43 |
44 | def __init__(
45 | self,
46 | init_processes: int,
47 | use_gpu: bool,
48 | enable_mock: bool,
49 | bridge_config_dir: Path,
50 | ) -> None:
51 | """
52 | 変数の初期化を行う
53 | また、init_processesの数だけプロセスを起動し、procs_and_consに格納する
54 | """
55 |
56 | self.use_gpu = use_gpu
57 | self.enable_mock = enable_mock
58 | self.bridge_config_dir = bridge_config_dir
59 |
60 | self.watch_con_list: list[tuple[Request, Process]] = []
61 |
62 | procs_and_cons: queue.Queue[tuple[Process, ConnectionType]] = queue.Queue()
63 | for _ in range(init_processes):
64 | procs_and_cons.put(self.start_new_proc())
65 | self.procs_and_cons = procs_and_cons
66 |
67 | def start_new_proc(
68 | self,
69 | ) -> tuple[Process, ConnectionType]:
70 | """
71 | 新しく開始したプロセスを返す関数
72 |
73 | Returns
74 | -------
75 | ret_proc: Process
76 | 新規のプロセス
77 | sub_proc_con1: ConnectionType
78 | ret_procのプロセスと通信するためのPipe
79 | """
80 | sub_proc_con1, sub_proc_con2 = Pipe(True)
81 | ret_proc = Process(
82 | target=start_synthesis_subprocess,
83 | kwargs={
84 | "use_gpu": self.use_gpu,
85 | "enable_mock": self.enable_mock,
86 | "bridge_config_dir": self.bridge_config_dir,
87 | "sub_proc_con": sub_proc_con2,
88 | },
89 | daemon=True,
90 | )
91 | ret_proc.start()
92 | return ret_proc, sub_proc_con1
93 |
94 | def finalize_con(
95 | self,
96 | req: Request,
97 | proc: Process,
98 | sub_proc_con: ConnectionType | None,
99 | ) -> None:
100 | """
101 | 接続が切断された時の処理を行う関数
102 | watch_con_listからの削除、プロセスの後処理を行う
103 | プロセスが生きている場合はそのままprocs_and_consに加える
104 | 死んでいる場合は新しく生成したものをprocs_and_consに加える
105 |
106 | Parameters
107 | ----------
108 | req: fastapi.Request
109 | 接続確立時に受け取ったものをそのまま渡せばよい
110 | https://fastapi.tiangolo.com/advanced/using-request-directly/
111 | proc: Process
112 | 音声合成を行っていたプロセス
113 | sub_proc_con: ConnectionType, optional
114 | 音声合成を行っていたプロセスとのPipe
115 | 指定されていない場合、プロセスは再利用されず終了される
116 | """
117 | try:
118 | self.watch_con_list.remove((req, proc))
119 | except ValueError:
120 | pass
121 | try:
122 | if not proc.is_alive() or sub_proc_con is None:
123 | proc.close()
124 | raise ValueError
125 | # プロセスが死んでいない場合は再利用する
126 | self.procs_and_cons.put((proc, sub_proc_con))
127 | except ValueError:
128 | # プロセスが死んでいるので新しく作り直す
129 | self.procs_and_cons.put(self.start_new_proc())
130 |
131 | def _synthesis_impl(
132 | self,
133 | query: AudioQuery,
134 | style_id: int,
135 | request: Request,
136 | core_version: str | None,
137 | ) -> str:
138 | """
139 | 音声合成を行う関数
140 | 通常エンジンの引数に比べ、requestが必要になっている
141 | また、返り値がファイル名になっている
142 |
143 | Parameters
144 | ----------
145 | query: AudioQuery
146 | style_id: int
147 | request: fastapi.Request
148 | 接続確立時に受け取ったものをそのまま渡せばよい
149 | https://fastapi.tiangolo.com/advanced/using-request-directly/
150 | core_version: str
151 |
152 | Returns
153 | -------
154 | f_name: str
155 | 生成された音声ファイルの名前
156 | """
157 | proc, sub_proc_con1 = self.procs_and_cons.get()
158 | self.watch_con_list.append((request, proc))
159 | try:
160 | sub_proc_con1.send((query, style_id, core_version))
161 | f_name = sub_proc_con1.recv()
162 | except EOFError:
163 | raise HTTPException(status_code=422, detail="既にサブプロセスは終了されています")
164 | except Exception:
165 | self.finalize_con(request, proc, sub_proc_con1)
166 | raise
167 |
168 | self.finalize_con(request, proc, sub_proc_con1)
169 | return f_name
170 |
171 | async def catch_disconnection(self):
172 | """
173 | 接続監視を行うコルーチン
174 | """
175 | while True:
176 | await asyncio.sleep(1)
177 | for con in self.watch_con_list:
178 | req, proc = con
179 | if await req.is_disconnected():
180 | try:
181 | if proc.is_alive():
182 | proc.terminate()
183 | proc.join()
184 | proc.close()
185 | except ValueError:
186 | pass
187 | finally:
188 | self.finalize_con(req, proc, None)
189 |
190 |
191 | def start_synthesis_subprocess(
192 | use_gpu: bool,
193 | enable_mock: bool,
194 | bridge_config_dir: Path,
195 | sub_proc_con: ConnectionType,
196 | ) -> None:
197 | """
198 | 音声合成を行うサブプロセスで行うための関数
199 | pickle化の関係でグローバルに書いている
200 |
201 | 引数 use_gpu, enable_mock は、 make_synthesis_engines を参照
202 |
203 | Parameters
204 | ----------
205 | sub_proc_con: ConnectionType
206 | メインプロセスと通信するためのPipe
207 | """
208 |
209 | synthesis_engines = make_synthesis_engines(
210 | use_gpu=use_gpu,
211 | enable_mock=enable_mock,
212 | bridge_config=BridgeConfigLoader(bridge_config_dir),
213 | )
214 | assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
215 | latest_core_version = get_latest_core_version(versions=synthesis_engines.keys())
216 | while True:
217 | try:
218 | query, style_id, core_version = sub_proc_con.recv()
219 | if core_version is None:
220 | _engine = synthesis_engines[latest_core_version]
221 | elif core_version in synthesis_engines:
222 | _engine = synthesis_engines[core_version]
223 | else:
224 | # バージョンが見つからないエラー
225 | sub_proc_con.send("")
226 | continue
227 | wave = _engine._synthesis_impl(query, style_id)
228 | with NamedTemporaryFile(delete=False) as f:
229 | soundfile.write(
230 | file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
231 | )
232 | sub_proc_con.send(f.name)
233 | except Exception:
234 | sub_proc_con.close()
235 | raise
236 |
--------------------------------------------------------------------------------
/voicevox_engine/dev/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .mock import (
2 | decode_forward,
3 | initialize,
4 | metas,
5 | supported_devices,
6 | yukarin_s_forward,
7 | yukarin_sa_forward,
8 | )
9 |
10 | __all__ = [
11 | "decode_forward",
12 | "initialize",
13 | "yukarin_s_forward",
14 | "yukarin_sa_forward",
15 | "metas",
16 | "supported_devices",
17 | ]
18 |
--------------------------------------------------------------------------------
/voicevox_engine/dev/core/mock.py:
--------------------------------------------------------------------------------
1 | import json
2 | from logging import getLogger
3 | from typing import Any, Dict, List
4 |
5 | import numpy as np
6 | from pyopenjtalk import tts
7 | from scipy.signal import resample
8 |
9 | DUMMY_TEXT = "これはダミーのテキストです"
10 |
11 |
12 | def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None:
13 | pass
14 |
15 |
16 | def yukarin_s_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
17 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため
18 | logger.info(
19 | "Sorry, yukarin_s_forward() is a mock. Return values are incorrect.",
20 | )
21 | return np.ones(length) / 5
22 |
23 |
24 | def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
25 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため
26 | logger.info(
27 | "Sorry, yukarin_sa_forward() is a mock. Return values are incorrect.",
28 | )
29 | return np.ones((1, length)) * 5
30 |
31 |
32 | def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
33 | """
34 | 合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます(DUMMY_TEXT)
35 | 参照→SynthesisEngine のdocstring [Mock]
36 |
37 | Parameters
38 | ----------
39 | length : int
40 | フレームの長さ
41 |
42 | Returns
43 | -------
44 | wave : np.ndarray
45 | 音声合成した波形データ
46 |
47 | Note
48 | -------
49 | ここで行う音声合成では、調声(ピッチ等)を反映しない
50 | また、入力内容によらず常に固定の文言を読み上げる
51 |
52 | # pyopenjtalk.tts()の出力仕様
53 | dtype=np.float64, 16 bit, mono 48000 Hz
54 |
55 | # resampleの説明
56 | 非モックdecode_forwardと合わせるために、出力を24kHzに変換した。
57 | """
58 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため
59 | logger.info(
60 | "Sorry, decode_forward() is a mock. Return values are incorrect.",
61 | )
62 | wave, sr = tts(DUMMY_TEXT)
63 | wave = resample(
64 | wave.astype("int16"),
65 | 24000 * len(wave) // 48000,
66 | )
67 | return wave
68 |
69 |
70 | def metas() -> str:
71 | return json.dumps(
72 | [
73 | {
74 | "name": "dummy1",
75 | "styles": [
76 | {"name": "style0", "id": 0},
77 | {"name": "style1", "id": 2},
78 | {"name": "style2", "id": 4},
79 | {"name": "style3", "id": 6},
80 | ],
81 | "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
82 | "version": "mock",
83 | "supported_features": {},
84 | },
85 | {
86 | "name": "dummy2",
87 | "styles": [
88 | {"name": "style0", "id": 1},
89 | {"name": "style1", "id": 3},
90 | {"name": "style2", "id": 5},
91 | {"name": "style3", "id": 7},
92 | ],
93 | "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9",
94 | "version": "mock",
95 | "supported_features": {},
96 | },
97 | {
98 | "name": "dummy3",
99 | "styles": [
100 | {"name": "style0", "id": 8},
101 | ],
102 | "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a",
103 | "version": "mock",
104 | "supported_features": {},
105 | },
106 | {
107 | "name": "dummy4",
108 | "styles": [
109 | {"name": "style0", "id": 9},
110 | ],
111 | "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
112 | "version": "mock",
113 | "supported_features": {},
114 | },
115 | ]
116 | )
117 |
118 |
119 | def supported_devices() -> str:
120 | return json.dumps(
121 | {
122 | "cpu": True,
123 | "cuda": False,
124 | }
125 | )
126 |
--------------------------------------------------------------------------------
/voicevox_engine/dev/synthesis_engine/__init__.py:
--------------------------------------------------------------------------------
1 | from .mock import MockSynthesisEngine
2 |
3 | __all__ = ["MockSynthesisEngine"]
4 |
--------------------------------------------------------------------------------
/voicevox_engine/dev/synthesis_engine/mock.py:
--------------------------------------------------------------------------------
1 | from logging import getLogger
2 | from typing import Any, Dict, List, Optional
3 |
4 | import numpy as np
5 | from pyopenjtalk import tts
6 | from scipy.signal import resample
7 |
8 | from ...model import AccentPhrase, AudioQuery
9 | from ...synthesis_engine import SynthesisEngineBase
10 | from ...synthesis_engine.synthesis_engine import to_flatten_moras
11 |
12 |
13 | class MockSynthesisEngine(SynthesisEngineBase):
14 | """
15 | SynthesisEngine [Mock]
16 | """
17 |
18 | def __init__(
19 | self,
20 | speakers: str,
21 | supported_devices: Optional[str] = None,
22 | ):
23 | """
24 | __init__ [Mock]
25 | """
26 | super().__init__()
27 |
28 | self._speakers = speakers
29 | self._supported_devices = supported_devices
30 | self.default_sampling_rate = 24000
31 |
32 | @property
33 | def speakers(self) -> str:
34 | return self._speakers
35 |
36 | @property
37 | def supported_devices(self) -> Optional[str]:
38 | return self._supported_devices
39 |
40 | def replace_phoneme_length(
41 | self, accent_phrases: List[AccentPhrase], style_id: int
42 | ) -> List[AccentPhrase]:
43 | """
44 | replace_phoneme_length 入力accent_phrasesを変更せずにそのまま返します [Mock]
45 |
46 | Parameters
47 | ----------
48 | accent_phrases : List[AccentPhrase]
49 | フレーズ句のリスト
50 | style_id : int
51 | スタイルID
52 |
53 | Returns
54 | -------
55 | List[AccentPhrase]
56 | フレーズ句のリスト(変更なし)
57 | """
58 | return accent_phrases
59 |
60 | def replace_mora_pitch(
61 | self, accent_phrases: List[AccentPhrase], style_id: int
62 | ) -> List[AccentPhrase]:
63 | """
64 | replace_mora_pitch 入力accent_phrasesを変更せずにそのまま返します [Mock]
65 |
66 | Parameters
67 | ----------
68 | accent_phrases : List[AccentPhrase]
69 | フレーズ句のリスト
70 | style_id : int
71 | スタイルID
72 |
73 | Returns
74 | -------
75 | List[AccentPhrase]
76 | フレーズ句のリスト(変更なし)
77 | """
78 | return accent_phrases
79 |
80 | def _synthesis_impl(self, query: AudioQuery, style_id: int) -> np.ndarray:
81 | """
82 | synthesis voicevox coreを使わずに、音声合成する [Mock]
83 |
84 | Parameters
85 | ----------
86 | query : AudioQuery
87 | /audio_query APIで得たjson
88 | style_id : int
89 | スタイルID
90 |
91 | Returns
92 | -------
93 | wave [npt.NDArray[np.int16]]
94 | 音声波形データをNumPy配列で返します
95 | """
96 | # recall text in katakana
97 | flatten_moras = to_flatten_moras(query.accent_phrases)
98 | kana_text = "".join([mora.text for mora in flatten_moras])
99 |
100 | wave = self.forward(kana_text)
101 |
102 | # volume
103 | wave *= query.volumeScale
104 |
105 | return wave.astype("int16")
106 |
107 | def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray:
108 | """
109 | forward tts via pyopenjtalk.tts()
110 | 参照→SynthesisEngine のdocstring [Mock]
111 |
112 | Parameters
113 | ----------
114 | text : str
115 | 入力文字列(例:読み上げたい文章をカタカナにした文字列、等)
116 |
117 | Returns
118 | -------
119 | wave [npt.NDArray[np.int16]]
120 | 音声波形データをNumPy配列で返します
121 |
122 | Note
123 | -------
124 | ここで行う音声合成では、調声(ピッチ等)を反映しない
125 |
126 | # pyopenjtalk.tts()の出力仕様
127 | dtype=np.float64, 16 bit, mono 48000 Hz
128 |
129 | # resampleの説明
130 | 非モック実装(decode_forward)と合わせるために、出力を24kHzに変換した。
131 | """
132 | logger = getLogger("uvicorn") # FastAPI / Uvicorn 内からの利用のため
133 | logger.info("[Mock] input text: %s" % text)
134 | wave, sr = tts(text)
135 | wave = resample(wave, 24000 * len(wave) // 48000)
136 | return wave
137 |
--------------------------------------------------------------------------------
/voicevox_engine/engine_manifest/EngineManifest.py:
--------------------------------------------------------------------------------
1 | # マルチエンジン環境下においては、エンジンのバージョンがエディタのバージョンより
2 | # 古くなる可能性が十分に考えられる。その場合、エディタ側がEngineManifestの情報不足によって
3 | # エラーを吐いて表示が崩壊する可能性がある。これを防止するため、EngineManifest関連の定義を
4 | # 変更する際は、Optionalにする必要があることに留意しなければならない。
5 |
6 | from typing import List, Optional
7 |
8 | from pydantic import BaseModel, Field
9 |
10 |
11 | class UpdateInfo(BaseModel):
12 | """
13 | エンジンのアップデート情報
14 | """
15 |
16 | version: str = Field(title="エンジンのバージョン名")
17 | descriptions: List[str] = Field(title="アップデートの詳細についての説明")
18 | contributors: Optional[List[str]] = Field(title="貢献者名")
19 |
20 |
21 | class LicenseInfo(BaseModel):
22 | """
23 | 依存ライブラリのライセンス情報
24 | """
25 |
26 | name: str = Field(title="依存ライブラリ名")
27 | version: Optional[str] = Field(title="依存ライブラリのバージョン")
28 | license: Optional[str] = Field(title="依存ライブラリのライセンス名")
29 | text: str = Field(title="依存ライブラリのライセンス本文")
30 |
31 |
32 | class SupportedFeatures(BaseModel):
33 | """
34 | エンジンが持つ機能の一覧
35 | """
36 |
37 | adjust_mora_pitch: bool = Field(title="モーラごとの音高の調整")
38 | adjust_phoneme_length: bool = Field(title="音素ごとの長さの調整")
39 | adjust_speed_scale: bool = Field(title="全体の話速の調整")
40 | adjust_pitch_scale: bool = Field(title="全体の音高の調整")
41 | adjust_intonation_scale: bool = Field(title="全体の抑揚の調整")
42 | adjust_volume_scale: bool = Field(title="全体の音量の調整")
43 | interrogative_upspeak: bool = Field(title="疑問文の自動調整")
44 | synthesis_morphing: bool = Field(title="2人の話者でモーフィングした音声を合成")
45 | manage_library: Optional[bool] = Field(title="音声ライブラリのインストール・アンインストール")
46 |
47 |
48 | class EngineManifest(BaseModel):
49 | """
50 | エンジン自体に関する情報
51 | """
52 |
53 | manifest_version: str = Field(title="マニフェストのバージョン")
54 | name: str = Field(title="エンジン名")
55 | brand_name: str = Field(title="ブランド名")
56 | uuid: str = Field(title="エンジンのUUID")
57 | url: str = Field(title="エンジンのURL")
58 | icon: str = Field(title="エンジンのアイコンをBASE64エンコードしたもの")
59 | default_sampling_rate: int = Field(title="デフォルトのサンプリング周波数")
60 | terms_of_service: str = Field(title="エンジンの利用規約")
61 | update_infos: List[UpdateInfo] = Field(title="エンジンのアップデート情報")
62 | dependency_licenses: List[LicenseInfo] = Field(title="依存関係のライセンス情報")
63 | supported_vvlib_manifest_version: Optional[str] = Field(
64 | title="エンジンが対応するvvlibのバージョン"
65 | )
66 | supported_features: SupportedFeatures = Field(title="エンジンが持つ機能")
67 |
--------------------------------------------------------------------------------
/voicevox_engine/engine_manifest/EngineManifestLoader.py:
--------------------------------------------------------------------------------
1 | import json
2 | from base64 import b64encode
3 | from pathlib import Path
4 | from typing import Tuple
5 |
6 | from ..utility import engine_root
7 | from .EngineManifest import EngineManifest, LicenseInfo, UpdateInfo
8 |
9 |
10 | class EngineManifestLoader:
11 | def __init__(
12 | self,
13 | manifest_path: Path = engine_root() / "engine_manifest.json", # noqa: B008
14 | root_dir: Path = engine_root(), # noqa: B008
15 | ):
16 | self.manifest_path = manifest_path
17 | self.root_dir = root_dir
18 |
19 | def load_manifest(self) -> EngineManifest:
20 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8"))
21 |
22 | manifest = EngineManifest(
23 | manifest_version=manifest["manifest_version"],
24 | name=manifest["name"],
25 | brand_name=manifest["brand_name"],
26 | uuid=manifest["uuid"],
27 | url=manifest["url"],
28 | default_sampling_rate=manifest["default_sampling_rate"],
29 | icon=b64encode((self.root_dir / manifest["icon"]).read_bytes()).decode(
30 | "utf-8"
31 | ),
32 | terms_of_service=(self.root_dir / manifest["terms_of_service"]).read_text(
33 | "utf-8"
34 | ),
35 | update_infos=[
36 | UpdateInfo(**update_info)
37 | for update_info in json.loads(
38 | (self.root_dir / manifest["update_infos"]).read_text("utf-8")
39 | )
40 | ],
41 | # supported_vvlib_manifest_versionを持たないengine_manifestのために
42 | # キーが存在しない場合はNoneを返すgetを使う
43 | supported_vvlib_manifest_version=manifest.get(
44 | "supported_vvlib_manifest_version"
45 | ),
46 | dependency_licenses=[
47 | LicenseInfo(**license_info)
48 | for license_info in json.loads(
49 | (self.root_dir / manifest["dependency_licenses"]).read_text("utf-8")
50 | )
51 | ],
52 | supported_features={
53 | key: item["value"]
54 | for key, item in manifest["supported_features"].items()
55 | },
56 | )
57 | return manifest
58 |
59 | def load_info_for_bridge_config(self) -> Tuple[str, int, int]:
60 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8"))
61 | return manifest["version"], manifest["port"], manifest["default_sampling_rate"]
62 |
63 | def load_version(self) -> str:
64 | manifest = json.loads(self.manifest_path.read_text(encoding="utf-8"))
65 | return manifest["version"]
66 |
--------------------------------------------------------------------------------
/voicevox_engine/engine_manifest/__init__.py:
--------------------------------------------------------------------------------
1 | from .EngineManifest import EngineManifest
2 | from .EngineManifestLoader import EngineManifestLoader
3 |
4 | __all__ = [
5 | "EngineManifest",
6 | "EngineManifestLoader",
7 | ]
8 |
--------------------------------------------------------------------------------
/voicevox_engine/kana_parser.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 |
3 | from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
4 | from .mora_list import openjtalk_text2mora
5 |
6 | LOOP_LIMIT = 300
7 | UNVOICE_SYMBOL = "_"
8 | ACCENT_SYMBOL = "'"
9 | NOPAUSE_DELIMITER = "/"
10 | PAUSE_DELIMITER = "、"
11 | WIDE_INTERROGATION_MARK = "?"
12 |
13 | text2mora_with_unvoice = {}
14 | for text, (consonant, vowel) in openjtalk_text2mora.items():
15 | text2mora_with_unvoice[text] = Mora(
16 | text=text,
17 | consonant=consonant if len(consonant) > 0 else None,
18 | consonant_length=0 if len(consonant) > 0 else None,
19 | vowel=vowel,
20 | vowel_length=0,
21 | pitch=0,
22 | is_interrogative=False,
23 | )
24 | if vowel in ["a", "i", "u", "e", "o"]:
25 | text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora(
26 | text=text,
27 | consonant=consonant if len(consonant) > 0 else None,
28 | consonant_length=0 if len(consonant) > 0 else None,
29 | vowel=vowel.upper(),
30 | vowel_length=0,
31 | pitch=0,
32 | is_interrogative=False,
33 | )
34 |
35 |
36 | def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
37 | """
38 | longest matchにより読み仮名からAccentPhraseを生成
39 | 入力長Nに対し計算量O(N^2)
40 | """
41 | accent_index: Optional[int] = None
42 | moras: List[Mora] = []
43 |
44 | base_index = 0 # パース開始位置。ここから右の文字列をstackに詰めていく。
45 | stack = "" # 保留中の文字列
46 | matched_text: Optional[str] = None # 保留中の文字列内で最後にマッチした仮名
47 |
48 | outer_loop = 0
49 | while base_index < len(phrase):
50 | outer_loop += 1
51 | if phrase[base_index] == ACCENT_SYMBOL:
52 | if len(moras) == 0:
53 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase)
54 | if accent_index is not None:
55 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase)
56 | accent_index = len(moras)
57 | base_index += 1
58 | continue
59 | for watch_index in range(base_index, len(phrase)):
60 | if phrase[watch_index] == ACCENT_SYMBOL:
61 | break
62 | # 普通の文字の場合
63 | stack += phrase[watch_index]
64 | if stack in text2mora_with_unvoice:
65 | matched_text = stack
66 | # push mora
67 | if matched_text is None:
68 | raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack)
69 | else:
70 | moras.append(text2mora_with_unvoice[matched_text].copy(deep=True))
71 | base_index += len(matched_text)
72 | stack = ""
73 | matched_text = None
74 | if outer_loop > LOOP_LIMIT:
75 | raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP)
76 | if accent_index is None:
77 | raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase)
78 | else:
79 | return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None)
80 |
81 |
82 | def parse_kana(text: str) -> List[AccentPhrase]:
83 | """
84 | AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換
85 | """
86 |
87 | parsed_results: List[AccentPhrase] = []
88 | phrase_base = 0
89 | if len(text) == 0:
90 | raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1)
91 |
92 | for i in range(len(text) + 1):
93 | if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]:
94 | phrase = text[phrase_base:i]
95 | if len(phrase) == 0:
96 | raise ParseKanaError(
97 | ParseKanaErrorCode.EMPTY_PHRASE,
98 | position=str(len(parsed_results) + 1),
99 | )
100 | phrase_base = i + 1
101 |
102 | is_interrogative = WIDE_INTERROGATION_MARK in phrase
103 | if is_interrogative:
104 | if WIDE_INTERROGATION_MARK in phrase[:-1]:
105 | raise ParseKanaError(
106 | ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase
107 | )
108 | phrase = phrase.replace(WIDE_INTERROGATION_MARK, "")
109 |
110 | accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase)
111 | if i < len(text) and text[i] == PAUSE_DELIMITER:
112 | accent_phrase.pause_mora = Mora(
113 | text="、",
114 | consonant=None,
115 | consonant_length=None,
116 | vowel="pau",
117 | vowel_length=0,
118 | pitch=0,
119 | )
120 | accent_phrase.is_interrogative = is_interrogative
121 |
122 | parsed_results.append(accent_phrase)
123 |
124 | return parsed_results
125 |
126 |
127 | def create_kana(accent_phrases: List[AccentPhrase]) -> str:
128 | text = ""
129 | for i, phrase in enumerate(accent_phrases):
130 | for j, mora in enumerate(phrase.moras):
131 | if mora.vowel in ["A", "I", "U", "E", "O"]:
132 | text += UNVOICE_SYMBOL
133 |
134 | text += mora.text
135 | if j + 1 == phrase.accent:
136 | text += ACCENT_SYMBOL
137 |
138 | if phrase.is_interrogative:
139 | text += WIDE_INTERROGATION_MARK
140 |
141 | if i < len(accent_phrases) - 1:
142 | if phrase.pause_mora is None:
143 | text += NOPAUSE_DELIMITER
144 | else:
145 | text += PAUSE_DELIMITER
146 | return text
147 |
--------------------------------------------------------------------------------
/voicevox_engine/metas/Metas.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import List, Optional
3 |
4 | from pydantic import BaseModel, Field
5 |
6 |
7 | class SpeakerStyle(BaseModel):
8 | """
9 | スピーカーのスタイル情報
10 | """
11 |
12 | name: str = Field(title="スタイル名")
13 | id: int = Field(title="スタイルID")
14 |
15 |
16 | class SpeakerSupportPermittedSynthesisMorphing(str, Enum):
17 | ALL = "ALL" # 全て許可
18 | SELF_ONLY = "SELF_ONLY" # 同じ話者内でのみ許可
19 | NOTHING = "NOTHING" # 全て禁止
20 |
21 | @classmethod
22 | def _missing_(cls, value: object) -> "SpeakerSupportPermittedSynthesisMorphing":
23 | return SpeakerSupportPermittedSynthesisMorphing.ALL
24 |
25 |
26 | class SpeakerSupportedFeatures(BaseModel):
27 | """
28 | 話者の対応機能の情報
29 | """
30 |
31 | permitted_synthesis_morphing: SpeakerSupportPermittedSynthesisMorphing = Field(
32 | title="モーフィング機能への対応", default=SpeakerSupportPermittedSynthesisMorphing(None)
33 | )
34 |
35 |
36 | class CoreSpeaker(BaseModel):
37 | """
38 | コアに含まれるスピーカー情報
39 | """
40 |
41 | name: str = Field(title="名前")
42 | speaker_uuid: str = Field(title="スピーカーのUUID")
43 | styles: List[SpeakerStyle] = Field(title="スピーカースタイルの一覧")
44 | version: str = Field("スピーカーのバージョン")
45 |
46 |
47 | class EngineSpeaker(BaseModel):
48 | """
49 | エンジンに含まれるスピーカー情報
50 | """
51 |
52 | supported_features: SpeakerSupportedFeatures = Field(
53 | title="スピーカーの対応機能", default_factory=SpeakerSupportedFeatures
54 | )
55 |
56 |
57 | class Speaker(CoreSpeaker, EngineSpeaker):
58 | """
59 | スピーカー情報
60 | """
61 |
62 | pass
63 |
64 |
65 | class StyleInfo(BaseModel):
66 | """
67 | スタイルの追加情報
68 | """
69 |
70 | id: int = Field(title="スタイルID")
71 | icon: str = Field(title="当該スタイルのアイコンをbase64エンコードしたもの")
72 | portrait: Optional[str] = Field(title="当該スタイルのportrait.pngをbase64エンコードしたもの")
73 | voice_samples: List[str] = Field(title="voice_sampleのwavファイルをbase64エンコードしたもの")
74 |
75 |
76 | class SpeakerInfo(BaseModel):
77 | """
78 | 話者の追加情報
79 | """
80 |
81 | policy: str = Field(title="policy.md")
82 | portrait: str = Field(title="portrait.pngをbase64エンコードしたもの")
83 | style_infos: List[StyleInfo] = Field(title="スタイルの追加情報")
84 |
--------------------------------------------------------------------------------
/voicevox_engine/metas/MetasStore.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import TYPE_CHECKING, Dict, List, Tuple
3 |
4 | from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo
5 |
6 | if TYPE_CHECKING:
7 | from voicevox_engine.synthesis_engine.synthesis_engine_base import (
8 | SynthesisEngineBase,
9 | )
10 |
11 |
12 | class MetasStore:
13 | """
14 | 話者やスタイルのメタ情報を管理する
15 | """
16 |
17 | def __init__(self, engine: "SynthesisEngineBase") -> None:
18 | self._loaded_metas: Dict[str, EngineSpeaker] = {
19 | speaker["speaker_uuid"]: EngineSpeaker(
20 | **{"supported_features": speaker["supported_features"]}
21 | )
22 | for speaker in json.loads(engine.speakers)
23 | }
24 |
25 | def speaker_engine_metas(self, speaker_uuid: str) -> EngineSpeaker:
26 | return self.loaded_metas[speaker_uuid]
27 |
28 | def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]:
29 | """
30 | 与えられたmetaにエンジンのコア情報を付加して返す
31 | core_metas: コアのmetas()が返すJSONのModel
32 | """
33 |
34 | return [
35 | Speaker(
36 | **self.speaker_engine_metas(speaker_meta.speaker_uuid).dict(),
37 | **speaker_meta.dict(),
38 | )
39 | for speaker_meta in core_metas
40 | ]
41 |
42 | # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで
43 | # SynthesisEngineBaseによる循環importを修正する
44 | def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]:
45 | """
46 | 与えられたエンジンから、コア・エンジン両方の情報を含んだMetasを返す
47 | """
48 |
49 | core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)]
50 | return self.combine_metas(core_metas)
51 |
52 | @property
53 | def loaded_metas(self) -> Dict[str, EngineSpeaker]:
54 | return self._loaded_metas
55 |
56 |
57 | def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]:
58 | """
59 | `{style.id: StyleInfo}`の変換テーブル
60 | """
61 |
62 | lookup_table = dict()
63 | for speaker in speakers:
64 | for style in speaker.styles:
65 | lookup_table[style.id] = (speaker, style)
66 | return lookup_table
67 |
--------------------------------------------------------------------------------
/voicevox_engine/metas/__init__.py:
--------------------------------------------------------------------------------
1 | from . import Metas, MetasStore
2 |
3 | __all__ = [
4 | "Metas",
5 | "MetasStore",
6 | ]
7 |
--------------------------------------------------------------------------------
/voicevox_engine/mora_list.py:
--------------------------------------------------------------------------------
1 | """
2 | 以下のモーラ対応表はOpenJTalkのソースコードから取得し、
3 | カタカナ表記とモーラが一対一対応するように改造した。
4 | ライセンス表記:
5 | -----------------------------------------------------------------
6 | The Japanese TTS System "Open JTalk"
7 | developed by HTS Working Group
8 | http://open-jtalk.sourceforge.net/
9 | -----------------------------------------------------------------
10 |
11 | Copyright (c) 2008-2014 Nagoya Institute of Technology
12 | Department of Computer Science
13 |
14 | All rights reserved.
15 |
16 | Redistribution and use in source and binary forms, with or
17 | without modification, are permitted provided that the following
18 | conditions are met:
19 |
20 | - Redistributions of source code must retain the above copyright
21 | notice, this list of conditions and the following disclaimer.
22 | - Redistributions in binary form must reproduce the above
23 | copyright notice, this list of conditions and the following
24 | disclaimer in the documentation and/or other materials provided
25 | with the distribution.
26 | - Neither the name of the HTS working group nor the names of its
27 | contributors may be used to endorse or promote products derived
28 | from this software without specific prior written permission.
29 |
30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
31 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
32 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
33 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
34 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
35 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
36 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
37 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
39 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
40 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 | POSSIBILITY OF SUCH DAMAGE.
43 | """
44 | _mora_list_minimum = [
45 | ["ヴォ", "v", "o"],
46 | ["ヴェ", "v", "e"],
47 | ["ヴィ", "v", "i"],
48 | ["ヴァ", "v", "a"],
49 | ["ヴ", "v", "u"],
50 | ["ン", "", "N"],
51 | ["ワ", "w", "a"],
52 | ["ロ", "r", "o"],
53 | ["レ", "r", "e"],
54 | ["ル", "r", "u"],
55 | ["リョ", "ry", "o"],
56 | ["リュ", "ry", "u"],
57 | ["リャ", "ry", "a"],
58 | ["リェ", "ry", "e"],
59 | ["リ", "r", "i"],
60 | ["ラ", "r", "a"],
61 | ["ヨ", "y", "o"],
62 | ["ユ", "y", "u"],
63 | ["ヤ", "y", "a"],
64 | ["モ", "m", "o"],
65 | ["メ", "m", "e"],
66 | ["ム", "m", "u"],
67 | ["ミョ", "my", "o"],
68 | ["ミュ", "my", "u"],
69 | ["ミャ", "my", "a"],
70 | ["ミェ", "my", "e"],
71 | ["ミ", "m", "i"],
72 | ["マ", "m", "a"],
73 | ["ポ", "p", "o"],
74 | ["ボ", "b", "o"],
75 | ["ホ", "h", "o"],
76 | ["ペ", "p", "e"],
77 | ["ベ", "b", "e"],
78 | ["ヘ", "h", "e"],
79 | ["プ", "p", "u"],
80 | ["ブ", "b", "u"],
81 | ["フォ", "f", "o"],
82 | ["フェ", "f", "e"],
83 | ["フィ", "f", "i"],
84 | ["ファ", "f", "a"],
85 | ["フ", "f", "u"],
86 | ["ピョ", "py", "o"],
87 | ["ピュ", "py", "u"],
88 | ["ピャ", "py", "a"],
89 | ["ピェ", "py", "e"],
90 | ["ピ", "p", "i"],
91 | ["ビョ", "by", "o"],
92 | ["ビュ", "by", "u"],
93 | ["ビャ", "by", "a"],
94 | ["ビェ", "by", "e"],
95 | ["ビ", "b", "i"],
96 | ["ヒョ", "hy", "o"],
97 | ["ヒュ", "hy", "u"],
98 | ["ヒャ", "hy", "a"],
99 | ["ヒェ", "hy", "e"],
100 | ["ヒ", "h", "i"],
101 | ["パ", "p", "a"],
102 | ["バ", "b", "a"],
103 | ["ハ", "h", "a"],
104 | ["ノ", "n", "o"],
105 | ["ネ", "n", "e"],
106 | ["ヌ", "n", "u"],
107 | ["ニョ", "ny", "o"],
108 | ["ニュ", "ny", "u"],
109 | ["ニャ", "ny", "a"],
110 | ["ニェ", "ny", "e"],
111 | ["ニ", "n", "i"],
112 | ["ナ", "n", "a"],
113 | ["ドゥ", "d", "u"],
114 | ["ド", "d", "o"],
115 | ["トゥ", "t", "u"],
116 | ["ト", "t", "o"],
117 | ["デョ", "dy", "o"],
118 | ["デュ", "dy", "u"],
119 | ["デャ", "dy", "a"],
120 | ["デェ", "dy", "e"],
121 | ["ディ", "d", "i"],
122 | ["デ", "d", "e"],
123 | ["テョ", "ty", "o"],
124 | ["テュ", "ty", "u"],
125 | ["テャ", "ty", "a"],
126 | ["ティ", "t", "i"],
127 | ["テ", "t", "e"],
128 | ["ツォ", "ts", "o"],
129 | ["ツェ", "ts", "e"],
130 | ["ツィ", "ts", "i"],
131 | ["ツァ", "ts", "a"],
132 | ["ツ", "ts", "u"],
133 | ["ッ", "", "cl"],
134 | ["チョ", "ch", "o"],
135 | ["チュ", "ch", "u"],
136 | ["チャ", "ch", "a"],
137 | ["チェ", "ch", "e"],
138 | ["チ", "ch", "i"],
139 | ["ダ", "d", "a"],
140 | ["タ", "t", "a"],
141 | ["ゾ", "z", "o"],
142 | ["ソ", "s", "o"],
143 | ["ゼ", "z", "e"],
144 | ["セ", "s", "e"],
145 | ["ズィ", "z", "i"],
146 | ["ズ", "z", "u"],
147 | ["スィ", "s", "i"],
148 | ["ス", "s", "u"],
149 | ["ジョ", "j", "o"],
150 | ["ジュ", "j", "u"],
151 | ["ジャ", "j", "a"],
152 | ["ジェ", "j", "e"],
153 | ["ジ", "j", "i"],
154 | ["ショ", "sh", "o"],
155 | ["シュ", "sh", "u"],
156 | ["シャ", "sh", "a"],
157 | ["シェ", "sh", "e"],
158 | ["シ", "sh", "i"],
159 | ["ザ", "z", "a"],
160 | ["サ", "s", "a"],
161 | ["ゴ", "g", "o"],
162 | ["コ", "k", "o"],
163 | ["ゲ", "g", "e"],
164 | ["ケ", "k", "e"],
165 | ["グヮ", "gw", "a"],
166 | ["グ", "g", "u"],
167 | ["クヮ", "kw", "a"],
168 | ["ク", "k", "u"],
169 | ["ギョ", "gy", "o"],
170 | ["ギュ", "gy", "u"],
171 | ["ギャ", "gy", "a"],
172 | ["ギェ", "gy", "e"],
173 | ["ギ", "g", "i"],
174 | ["キョ", "ky", "o"],
175 | ["キュ", "ky", "u"],
176 | ["キャ", "ky", "a"],
177 | ["キェ", "ky", "e"],
178 | ["キ", "k", "i"],
179 | ["ガ", "g", "a"],
180 | ["カ", "k", "a"],
181 | ["オ", "", "o"],
182 | ["エ", "", "e"],
183 | ["ウォ", "w", "o"],
184 | ["ウェ", "w", "e"],
185 | ["ウィ", "w", "i"],
186 | ["ウ", "", "u"],
187 | ["イェ", "y", "e"],
188 | ["イ", "", "i"],
189 | ["ア", "", "a"],
190 | ]
191 | _mora_list_additional = [
192 | ["ヴョ", "by", "o"],
193 | ["ヴュ", "by", "u"],
194 | ["ヴャ", "by", "a"],
195 | ["ヲ", "", "o"],
196 | ["ヱ", "", "e"],
197 | ["ヰ", "", "i"],
198 | ["ヮ", "w", "a"],
199 | ["ョ", "y", "o"],
200 | ["ュ", "y", "u"],
201 | ["ヅ", "z", "u"],
202 | ["ヂ", "j", "i"],
203 | ["ヶ", "k", "e"],
204 | ["ャ", "y", "a"],
205 | ["ォ", "", "o"],
206 | ["ェ", "", "e"],
207 | ["ゥ", "", "u"],
208 | ["ィ", "", "i"],
209 | ["ァ", "", "a"],
210 | ]
211 |
212 | openjtalk_mora2text = {
213 | consonant + vowel: text for [text, consonant, vowel] in _mora_list_minimum
214 | }
215 | openjtalk_text2mora = {
216 | text: (consonant, vowel)
217 | for [text, consonant, vowel] in _mora_list_minimum + _mora_list_additional
218 | }
219 |
--------------------------------------------------------------------------------
/voicevox_engine/morphing.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 | from dataclasses import dataclass
3 | from itertools import chain
4 | from typing import Dict, List, Tuple
5 |
6 | import numpy as np
7 | import pyworld as pw
8 | from scipy.signal import resample
9 |
10 | from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo
11 | from .metas.MetasStore import construct_lookup
12 | from .model import AudioQuery, MorphableTargetInfo, StyleIdNotFoundError
13 | from .synthesis_engine import SynthesisEngine
14 |
15 |
16 | # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398 # noqa
17 | @dataclass(frozen=True)
18 | class MorphingParameter:
19 | fs: int
20 | frame_period: float
21 | base_f0: np.ndarray
22 | base_aperiodicity: np.ndarray
23 | base_spectrogram: np.ndarray
24 | target_spectrogram: np.ndarray
25 |
26 |
27 | def create_morphing_parameter(
28 | base_wave: np.ndarray,
29 | target_wave: np.ndarray,
30 | fs: int,
31 | ) -> MorphingParameter:
32 | frame_period = 1.0
33 | base_f0, base_time_axis = pw.harvest(base_wave, fs, frame_period=frame_period)
34 | base_spectrogram = pw.cheaptrick(base_wave, base_f0, base_time_axis, fs)
35 | base_aperiodicity = pw.d4c(base_wave, base_f0, base_time_axis, fs)
36 |
37 | target_f0, morph_time_axis = pw.harvest(target_wave, fs, frame_period=frame_period)
38 | target_spectrogram = pw.cheaptrick(target_wave, target_f0, morph_time_axis, fs)
39 | target_spectrogram.resize(base_spectrogram.shape)
40 |
41 | return MorphingParameter(
42 | fs=fs,
43 | frame_period=frame_period,
44 | base_f0=base_f0,
45 | base_aperiodicity=base_aperiodicity,
46 | base_spectrogram=base_spectrogram,
47 | target_spectrogram=target_spectrogram,
48 | )
49 |
50 |
51 | def get_morphable_targets(
52 | speakers: List[Speaker],
53 | base_speakers: List[int],
54 | ) -> List[Dict[int, MorphableTargetInfo]]:
55 | """
56 | speakers: 全話者の情報
57 | base_speakers: モーフィング可能か判定したいベースの話者リスト(スタイルID)
58 | """
59 | speaker_lookup = construct_lookup(speakers)
60 |
61 | morphable_targets_arr = []
62 | for base_speaker in base_speakers:
63 | morphable_targets = dict()
64 | for style in chain.from_iterable(speaker.styles for speaker in speakers):
65 | morphable_targets[style.id] = MorphableTargetInfo(
66 | is_morphable=is_synthesis_morphing_permitted(
67 | speaker_lookup=speaker_lookup,
68 | base_speaker=base_speaker,
69 | target_speaker=style.id,
70 | )
71 | )
72 | morphable_targets_arr.append(morphable_targets)
73 |
74 | return morphable_targets_arr
75 |
76 |
77 | def is_synthesis_morphing_permitted(
78 | speaker_lookup: Dict[int, Tuple[Speaker, StyleInfo]],
79 | base_speaker: int,
80 | target_speaker: int,
81 | ) -> bool:
82 | """
83 | 指定されたstyle_idがモーフィング可能かどうか返す
84 | style_idが見つからない場合はStyleIdNotFoundErrorを送出する
85 | """
86 |
87 | base_speaker_data = speaker_lookup[base_speaker]
88 | target_speaker_data = speaker_lookup[target_speaker]
89 |
90 | if base_speaker_data is None or target_speaker_data is None:
91 | raise StyleIdNotFoundError(
92 | base_speaker if base_speaker_data is None else target_speaker
93 | )
94 |
95 | base_speaker_info, _ = base_speaker_data
96 | target_speaker_info, _ = target_speaker_data
97 |
98 | base_speaker_uuid = base_speaker_info.speaker_uuid
99 | target_speaker_uuid = target_speaker_info.speaker_uuid
100 |
101 | base_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = (
102 | base_speaker_info.supported_features.permitted_synthesis_morphing
103 | )
104 |
105 | target_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = (
106 | target_speaker_info.supported_features.permitted_synthesis_morphing
107 | )
108 |
109 | # 禁止されている場合はFalse
110 | if (
111 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.NOTHING
112 | or target_speaker_morphing_info
113 | == SpeakerSupportPermittedSynthesisMorphing.NOTHING
114 | ):
115 | return False
116 | # 同一話者のみの場合は同一話者判定
117 | if (
118 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY
119 | or target_speaker_morphing_info
120 | == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY
121 | ):
122 | return base_speaker_uuid == target_speaker_uuid
123 | # 念のため許可されているかチェック
124 | return (
125 | base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL
126 | and target_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL
127 | )
128 |
129 |
130 | def synthesis_morphing_parameter(
131 | engine: SynthesisEngine,
132 | query: AudioQuery,
133 | base_speaker: int,
134 | target_speaker: int,
135 | ) -> MorphingParameter:
136 | query = deepcopy(query)
137 |
138 | # 不具合回避のためデフォルトのサンプリングレートでWORLDに掛けた後に指定のサンプリングレートに変換する
139 | query.outputSamplingRate = engine.default_sampling_rate
140 |
141 | # WORLDに掛けるため合成はモノラルで行う
142 | query.outputStereo = False
143 |
144 | base_wave = engine.synthesis(query=query, style_id=base_speaker).astype("float")
145 | target_wave = engine.synthesis(query=query, style_id=target_speaker).astype("float")
146 |
147 | return create_morphing_parameter(
148 | base_wave=base_wave,
149 | target_wave=target_wave,
150 | fs=query.outputSamplingRate,
151 | )
152 |
153 |
154 | def synthesis_morphing(
155 | morph_param: MorphingParameter,
156 | morph_rate: float,
157 | output_fs: int,
158 | output_stereo: bool = False,
159 | ) -> np.ndarray:
160 | """
161 | 指定した割合で、パラメータをもとにモーフィングした音声を生成します。
162 |
163 | Parameters
164 | ----------
165 | morph_param : MorphingParameter
166 | `synthesis_morphing_parameter`または`create_morphing_parameter`で作成したパラメータ
167 |
168 | morph_rate : float
169 | モーフィングの割合
170 | 0.0でベースの話者、1.0でターゲットの話者に近づきます。
171 |
172 | Returns
173 | -------
174 | generated : np.ndarray
175 | モーフィングした音声
176 |
177 | Raises
178 | -------
179 | ValueError
180 | morph_rate ∈ [0, 1]
181 | """
182 |
183 | if morph_rate < 0.0 or morph_rate > 1.0:
184 | raise ValueError("morph_rateは0.0から1.0の範囲で指定してください")
185 |
186 | morph_spectrogram = (
187 | morph_param.base_spectrogram * (1.0 - morph_rate)
188 | + morph_param.target_spectrogram * morph_rate
189 | )
190 |
191 | y_h = pw.synthesize(
192 | morph_param.base_f0,
193 | morph_spectrogram,
194 | morph_param.base_aperiodicity,
195 | morph_param.fs,
196 | morph_param.frame_period,
197 | )
198 |
199 | # TODO: synthesis_engine.py でのリサンプル処理と共通化する
200 | if output_fs != morph_param.fs:
201 | y_h = resample(y_h, output_fs * len(y_h) // morph_param.fs)
202 |
203 | if output_stereo:
204 | y_h = np.array([y_h, y_h]).T
205 |
206 | return y_h
207 |
--------------------------------------------------------------------------------
/voicevox_engine/part_of_speech_data.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 | from .model import (
4 | USER_DICT_MAX_PRIORITY,
5 | USER_DICT_MIN_PRIORITY,
6 | PartOfSpeechDetail,
7 | WordTypes,
8 | )
9 |
10 | MIN_PRIORITY = USER_DICT_MIN_PRIORITY
11 | MAX_PRIORITY = USER_DICT_MAX_PRIORITY
12 |
13 | part_of_speech_data: Dict[WordTypes, PartOfSpeechDetail] = {
14 | WordTypes.PROPER_NOUN: PartOfSpeechDetail(
15 | part_of_speech="名詞",
16 | part_of_speech_detail_1="固有名詞",
17 | part_of_speech_detail_2="一般",
18 | part_of_speech_detail_3="*",
19 | context_id=1348,
20 | cost_candidates=[
21 | -988,
22 | 3488,
23 | 4768,
24 | 6048,
25 | 7328,
26 | 8609,
27 | 8734,
28 | 8859,
29 | 8984,
30 | 9110,
31 | 14176,
32 | ],
33 | accent_associative_rules=[
34 | "*",
35 | "C1",
36 | "C2",
37 | "C3",
38 | "C4",
39 | "C5",
40 | ],
41 | ),
42 | WordTypes.COMMON_NOUN: PartOfSpeechDetail(
43 | part_of_speech="名詞",
44 | part_of_speech_detail_1="一般",
45 | part_of_speech_detail_2="*",
46 | part_of_speech_detail_3="*",
47 | context_id=1345,
48 | cost_candidates=[
49 | -4445,
50 | 49,
51 | 1473,
52 | 2897,
53 | 4321,
54 | 5746,
55 | 6554,
56 | 7362,
57 | 8170,
58 | 8979,
59 | 15001,
60 | ],
61 | accent_associative_rules=[
62 | "*",
63 | "C1",
64 | "C2",
65 | "C3",
66 | "C4",
67 | "C5",
68 | ],
69 | ),
70 | WordTypes.VERB: PartOfSpeechDetail(
71 | part_of_speech="動詞",
72 | part_of_speech_detail_1="自立",
73 | part_of_speech_detail_2="*",
74 | part_of_speech_detail_3="*",
75 | context_id=642,
76 | cost_candidates=[
77 | 3100,
78 | 6160,
79 | 6360,
80 | 6561,
81 | 6761,
82 | 6962,
83 | 7414,
84 | 7866,
85 | 8318,
86 | 8771,
87 | 13433,
88 | ],
89 | accent_associative_rules=[
90 | "*",
91 | ],
92 | ),
93 | WordTypes.ADJECTIVE: PartOfSpeechDetail(
94 | part_of_speech="形容詞",
95 | part_of_speech_detail_1="自立",
96 | part_of_speech_detail_2="*",
97 | part_of_speech_detail_3="*",
98 | context_id=20,
99 | cost_candidates=[
100 | 1527,
101 | 3266,
102 | 3561,
103 | 3857,
104 | 4153,
105 | 4449,
106 | 5149,
107 | 5849,
108 | 6549,
109 | 7250,
110 | 10001,
111 | ],
112 | accent_associative_rules=[
113 | "*",
114 | ],
115 | ),
116 | WordTypes.SUFFIX: PartOfSpeechDetail(
117 | part_of_speech="名詞",
118 | part_of_speech_detail_1="接尾",
119 | part_of_speech_detail_2="一般",
120 | part_of_speech_detail_3="*",
121 | context_id=1358,
122 | cost_candidates=[
123 | 4399,
124 | 5373,
125 | 6041,
126 | 6710,
127 | 7378,
128 | 8047,
129 | 9440,
130 | 10834,
131 | 12228,
132 | 13622,
133 | 15847,
134 | ],
135 | accent_associative_rules=[
136 | "*",
137 | "C1",
138 | "C2",
139 | "C3",
140 | "C4",
141 | "C5",
142 | ],
143 | ),
144 | }
145 |
--------------------------------------------------------------------------------
/voicevox_engine/preset/Preset.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field
2 |
3 |
4 | class Preset(BaseModel):
5 | """
6 | プリセット情報
7 | """
8 |
9 | id: int = Field(title="プリセットID")
10 | name: str = Field(title="プリセット名")
11 | speaker_uuid: str = Field(title="スピーカーのUUID")
12 | style_id: int = Field(title="スタイルID")
13 | speedScale: float = Field(title="全体の話速")
14 | pitchScale: float = Field(title="全体の音高")
15 | intonationScale: float = Field(title="全体の抑揚")
16 | volumeScale: float = Field(title="全体の音量")
17 | prePhonemeLength: float = Field(title="音声の前の無音時間")
18 | postPhonemeLength: float = Field(title="音声の後の無音時間")
19 |
--------------------------------------------------------------------------------
/voicevox_engine/preset/PresetError.py:
--------------------------------------------------------------------------------
1 | class PresetError(Exception):
2 | pass
3 |
--------------------------------------------------------------------------------
/voicevox_engine/preset/PresetManager.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import List
3 |
4 | import yaml
5 | from pydantic import ValidationError, parse_obj_as
6 |
7 | from .Preset import Preset
8 | from .PresetError import PresetError
9 |
10 |
11 | class PresetManager:
12 | def __init__(
13 | self,
14 | preset_path: Path,
15 | ):
16 | self.presets = []
17 | self.last_modified_time = 0
18 | self.preset_path = preset_path
19 |
20 | def load_presets(self):
21 | """
22 | プリセットのYAMLファイルを読み込む
23 |
24 | Returns
25 | -------
26 | ret: List[Preset]
27 | プリセットのリスト
28 | """
29 |
30 | # 設定ファイルのタイムスタンプを確認
31 | try:
32 | _last_modified_time = self.preset_path.stat().st_mtime
33 | if _last_modified_time == self.last_modified_time:
34 | return self.presets
35 | except OSError:
36 | raise PresetError("プリセットの設定ファイルが見つかりません")
37 |
38 | with open(self.preset_path, mode="r", encoding="utf-8") as f:
39 | obj = yaml.safe_load(f)
40 | if obj is None:
41 | raise PresetError("プリセットの設定ファイルが空の内容です")
42 |
43 | try:
44 | _presets = parse_obj_as(List[Preset], obj)
45 | except ValidationError:
46 | raise PresetError("プリセットの設定ファイルにミスがあります")
47 |
48 | # idが一意か確認
49 | if len([preset.id for preset in _presets]) != len(
50 | {preset.id for preset in _presets}
51 | ):
52 | raise PresetError("プリセットのidに重複があります")
53 |
54 | self.presets = _presets
55 | self.last_modified_time = _last_modified_time
56 | return self.presets
57 |
58 | def add_preset(self, preset: Preset):
59 | """
60 | YAMLファイルに新規のプリセットを追加する
61 |
62 | Parameters
63 | ----------
64 | preset : Preset
65 | 追加するプリセットを渡す
66 |
67 | Returns
68 | -------
69 | ret: int
70 | 追加したプリセットのプリセットID
71 | """
72 |
73 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
74 | self.load_presets()
75 |
76 | # IDが0未満、または存在するIDなら新しいIDを決定し、配列に追加
77 | if preset.id < 0 or preset.id in {preset.id for preset in self.presets}:
78 | preset.id = max([preset.id for preset in self.presets]) + 1
79 | self.presets.append(preset)
80 |
81 | # ファイルに書き込み
82 | try:
83 | with open(self.preset_path, mode="w", encoding="utf-8") as f:
84 | yaml.safe_dump(
85 | [preset.dict() for preset in self.presets],
86 | f,
87 | allow_unicode=True,
88 | sort_keys=False,
89 | )
90 | except Exception as err:
91 | self.presets.pop()
92 | if isinstance(err, FileNotFoundError):
93 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
94 | else:
95 | raise err
96 |
97 | return preset.id
98 |
99 | def update_preset(self, preset: Preset):
100 | """
101 | YAMLファイルのプリセットを更新する
102 |
103 | Parameters
104 | ----------
105 | preset : Preset
106 | 更新するプリセットを渡す
107 |
108 | Returns
109 | -------
110 | ret: int
111 | 更新したプリセットのプリセットID
112 | """
113 |
114 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
115 | self.load_presets()
116 |
117 | # IDが存在するか探索
118 | prev_preset = (-1, None)
119 | for i in range(len(self.presets)):
120 | if self.presets[i].id == preset.id:
121 | prev_preset = (i, self.presets[i])
122 | self.presets[i] = preset
123 | break
124 | else:
125 | raise PresetError("更新先のプリセットが存在しません")
126 |
127 | # ファイルに書き込み
128 | try:
129 | with open(self.preset_path, mode="w", encoding="utf-8") as f:
130 | yaml.safe_dump(
131 | [preset.dict() for preset in self.presets],
132 | f,
133 | allow_unicode=True,
134 | sort_keys=False,
135 | )
136 | except Exception as err:
137 | if prev_preset != (-1, None):
138 | self.presets[prev_preset[0]] = prev_preset[1]
139 | if isinstance(err, FileNotFoundError):
140 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
141 | else:
142 | raise err
143 |
144 | return preset.id
145 |
146 | def delete_preset(self, id: int):
147 | """
148 | YAMLファイルのプリセットを削除する
149 |
150 | Parameters
151 | ----------
152 | id: int
153 | 削除するプリセットのプリセットIDを渡す
154 |
155 | Returns
156 | -------
157 | ret: int
158 | 削除したプリセットのプリセットID
159 | """
160 |
161 | # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
162 | self.load_presets()
163 |
164 | # IDが存在するか探索
165 | buf = None
166 | buf_index = -1
167 | for i in range(len(self.presets)):
168 | if self.presets[i].id == id:
169 | buf = self.presets.pop(i)
170 | buf_index = i
171 | break
172 | else:
173 | raise PresetError("削除対象のプリセットが存在しません")
174 |
175 | # ファイルに書き込み
176 | try:
177 | with open(self.preset_path, mode="w", encoding="utf-8") as f:
178 | yaml.safe_dump(
179 | [preset.dict() for preset in self.presets],
180 | f,
181 | allow_unicode=True,
182 | sort_keys=False,
183 | )
184 | except FileNotFoundError:
185 | self.presets.insert(buf_index, buf)
186 | raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
187 |
188 | return id
189 |
--------------------------------------------------------------------------------
/voicevox_engine/preset/__init__.py:
--------------------------------------------------------------------------------
1 | from .Preset import Preset
2 | from .PresetError import PresetError
3 | from .PresetManager import PresetManager
4 |
5 | __all__ = [
6 | "Preset",
7 | "PresetManager",
8 | "PresetError",
9 | ]
10 |
--------------------------------------------------------------------------------
/voicevox_engine/setting/Setting.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import Optional
3 |
4 | from pydantic import BaseModel, Field
5 |
6 |
7 | class CorsPolicyMode(str, Enum):
8 | """
9 | CORSの許可モード
10 | """
11 |
12 | all = "all" # 全てのオリジンからのリクエストを許可
13 | localapps = "localapps" # ローカルアプリケーションからのリクエストを許可
14 |
15 |
16 | class Setting(BaseModel):
17 | """
18 | エンジンの設定情報
19 | """
20 |
21 | cors_policy_mode: CorsPolicyMode = Field(title="リソース共有ポリシー")
22 | allow_origin: Optional[str] = Field(title="許可するオリジン")
23 |
24 | class Config:
25 | use_enum_values = True
26 |
--------------------------------------------------------------------------------
/voicevox_engine/setting/SettingLoader.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import yaml
4 |
5 | from ..utility import engine_root, get_save_dir
6 | from .Setting import Setting
7 |
8 | DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yml"
9 | USER_SETTING_PATH: Path = get_save_dir() / "setting.yml"
10 |
11 |
12 | class SettingLoader:
13 | def __init__(self, setting_file_path: Path) -> None:
14 | self.setting_file_path = setting_file_path
15 |
16 | def load_setting_file(self) -> Setting:
17 | if not self.setting_file_path.is_file():
18 | setting = yaml.safe_load(DEFAULT_SETTING_PATH.read_text(encoding="utf-8"))
19 | else:
20 | setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8"))
21 |
22 | setting = Setting(
23 | cors_policy_mode=setting["cors_policy_mode"],
24 | allow_origin=setting["allow_origin"],
25 | )
26 |
27 | return setting
28 |
29 | def dump_setting_file(self, settings: Setting) -> None:
30 | settings_dict = settings.dict()
31 |
32 | with open(self.setting_file_path, mode="w", encoding="utf-8") as f:
33 | yaml.safe_dump(settings_dict, f)
34 |
--------------------------------------------------------------------------------
/voicevox_engine/setting/__init__.py:
--------------------------------------------------------------------------------
1 | from .Setting import CorsPolicyMode, Setting
2 | from .SettingLoader import USER_SETTING_PATH, SettingLoader
3 |
4 | __all__ = [
5 | "USER_SETTING_PATH",
6 | "CorsPolicyMode",
7 | "Setting",
8 | "SettingLoader",
9 | ]
10 |
--------------------------------------------------------------------------------
/voicevox_engine/synthesis_engine/__init__.py:
--------------------------------------------------------------------------------
1 | from .core_wrapper import CoreWrapper, load_runtime_lib
2 | from .make_synthesis_engines import make_synthesis_engines
3 | from .synthesis_engine import SynthesisEngine
4 | from .synthesis_engine_base import SynthesisEngineBase
5 |
6 | __all__ = [
7 | "CoreWrapper",
8 | "load_runtime_lib",
9 | "make_synthesis_engines",
10 | "SynthesisEngine",
11 | "SynthesisEngineBase",
12 | ]
13 |
--------------------------------------------------------------------------------
/voicevox_engine/synthesis_engine/make_synthesis_engines.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import traceback
3 | from typing import Dict
4 |
5 | from ..bridge_config import BridgeConfigLoader
6 | from .synthesis_engine_base import SynthesisEngineBase
7 | from .synthesis_engine_espnet import SynthesisEngineESPNet
8 |
9 |
10 | def make_synthesis_engines(
11 | use_gpu: bool,
12 | bridge_config_loader: BridgeConfigLoader,
13 | enable_mock: bool = True,
14 | load_all_models: bool = False,
15 | ) -> Dict[str, SynthesisEngineBase]:
16 | """
17 | 音声ライブラリをロードして、音声合成エンジンを生成
18 |
19 | Parameters
20 | ----------
21 | use_gpu: bool
22 | 音声ライブラリに GPU を使わせるか否か
23 | bridge_config_loader: BridgeConfigLoader
24 | BridgeConfigLoader
25 | enable_mock: bool, optional, default=True
26 | コア読み込みに失敗したとき、代わりにmockを使用するかどうか
27 | load_all_models: bool, optional, default=False
28 | 起動時に全てのモデルを読み込むかどうか
29 | """
30 | synthesis_engines = {}
31 | try:
32 | _synthesis_engine = SynthesisEngineESPNet(
33 | bridge_config_loader=bridge_config_loader,
34 | use_gpu=use_gpu,
35 | load_all_models=load_all_models,
36 | )
37 | synthesis_engines[_synthesis_engine.engine_version] = _synthesis_engine
38 | except Exception:
39 | if not enable_mock:
40 | raise
41 | traceback.print_exc()
42 | print(
43 | "Notice: mock-library will be used.",
44 | file=sys.stderr,
45 | )
46 |
47 | from ..dev.core import metas as mock_metas
48 | from ..dev.core import supported_devices as mock_supported_devices
49 | from ..dev.synthesis_engine import MockSynthesisEngine
50 |
51 | if "0.0.0" not in synthesis_engines:
52 | synthesis_engines["0.0.0"] = MockSynthesisEngine(
53 | speakers=mock_metas(), supported_devices=mock_supported_devices()
54 | )
55 |
56 | return synthesis_engines
57 |
--------------------------------------------------------------------------------
/voicevox_engine/utility/__init__.py:
--------------------------------------------------------------------------------
1 | from .connect_base64_waves import (
2 | ConnectBase64WavesException,
3 | connect_base64_waves,
4 | decode_base64_waves,
5 | )
6 | from .core_version_utility import get_latest_core_version, parse_core_version
7 | from .mutex_utility import mutex_wrapper
8 | from .path_utility import delete_file, engine_root
9 | from .save_dir import get_save_dir
10 |
11 | __all__ = [
12 | "ConnectBase64WavesException",
13 | "connect_base64_waves",
14 | "decode_base64_waves",
15 | "get_latest_core_version",
16 | "parse_core_version",
17 | "delete_file",
18 | "engine_root",
19 | "get_save_dir",
20 | "mutex_wrapper",
21 | ]
22 |
--------------------------------------------------------------------------------
/voicevox_engine/utility/connect_base64_waves.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import io
3 | from typing import List, Tuple
4 |
5 | import numpy as np
6 | import soundfile
7 | from scipy.signal import resample
8 |
9 |
10 | class ConnectBase64WavesException(Exception):
11 | def __init__(self, message: str):
12 | self.message = message
13 |
14 |
15 | def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]:
16 | """
17 | base64エンコードされた複数のwavデータをデコードする
18 | Parameters
19 | ----------
20 | waves: list[str]
21 | base64エンコードされたwavデータのリスト
22 | Returns
23 | -------
24 | waves_nparray_sr: List[Tuple[np.ndarray, int]]
25 | (NumPy配列の音声波形データ, サンプリングレート) 形式のタプルのリスト
26 | """
27 | if len(waves) == 0:
28 | raise ConnectBase64WavesException("wavファイルが含まれていません")
29 |
30 | waves_nparray_sr = []
31 | for wave in waves:
32 | try:
33 | wav_bin = base64.standard_b64decode(wave)
34 | except ValueError:
35 | raise ConnectBase64WavesException("base64デコードに失敗しました")
36 | try:
37 | _data = soundfile.read(io.BytesIO(wav_bin))
38 | except Exception:
39 | raise ConnectBase64WavesException("wavファイルを読み込めませんでした")
40 | waves_nparray_sr.append(_data)
41 |
42 | return waves_nparray_sr
43 |
44 |
45 | def connect_base64_waves(waves: List[str]) -> Tuple[np.ndarray, int]:
46 | waves_nparray_sr = decode_base64_waves(waves)
47 |
48 | max_sampling_rate = max([sr for _, sr in waves_nparray_sr])
49 | max_channels = max([x.ndim for x, _ in waves_nparray_sr])
50 | assert 0 < max_channels <= 2
51 |
52 | waves_nparray_list = []
53 | for nparray, sr in waves_nparray_sr:
54 | if sr != max_sampling_rate:
55 | nparray = resample(nparray, max_sampling_rate * len(nparray) // sr)
56 | if nparray.ndim < max_channels:
57 | nparray = np.array([nparray, nparray]).T
58 | waves_nparray_list.append(nparray)
59 |
60 | return np.concatenate(waves_nparray_list), max_sampling_rate
61 |
--------------------------------------------------------------------------------
/voicevox_engine/utility/core_version_utility.py:
--------------------------------------------------------------------------------
1 | from typing import Iterable
2 |
3 | from semver.version import Version
4 |
5 |
6 | def parse_core_version(version: str) -> Version:
7 | return Version.parse(version)
8 |
9 |
10 | def get_latest_core_version(versions: Iterable[str]) -> str:
11 | if len(versions) == 0:
12 | raise Exception("versions must be non-empty.")
13 |
14 | return str(max(map(parse_core_version, versions)))
15 |
--------------------------------------------------------------------------------
/voicevox_engine/utility/mutex_utility.py:
--------------------------------------------------------------------------------
1 | import threading
2 |
3 |
4 | def mutex_wrapper(lock: threading.Lock):
5 | def wrap(f):
6 | def func(*args, **kw):
7 | lock.acquire()
8 | try:
9 | return f(*args, **kw)
10 | finally:
11 | lock.release()
12 |
13 | return func
14 |
15 | return wrap
16 |
--------------------------------------------------------------------------------
/voicevox_engine/utility/path_utility.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import traceback
4 | from pathlib import Path
5 |
6 | # コンパイル済環境でchdirをした場合、root_dirが書き換わるので、初期値を保存しておく
7 | _root_dir = Path(sys.argv[0]).parent.resolve(strict=True)
8 |
9 |
10 | def engine_root() -> Path:
11 | if is_development():
12 | root_dir = Path(__file__).parents[2]
13 |
14 | # Nuitka/Pyinstallerでビルドされている場合
15 | else:
16 | root_dir = Path(str(_root_dir))
17 |
18 | return root_dir.resolve(strict=True)
19 |
20 |
21 | def is_development() -> bool:
22 | """
23 | 開発版かどうか判定する関数
24 | Nuitka/Pyinstallerでコンパイルされていない場合は開発環境とする。
25 | """
26 | # nuitkaビルドをした際はグローバルに__compiled__が含まれる
27 | if "__compiled__" in globals():
28 | return False
29 |
30 | # pyinstallerでビルドをした際はsys.frozenが設定される
31 | elif getattr(sys, "frozen", False):
32 | return False
33 |
34 | return True
35 |
36 |
37 | def delete_file(file_path: str) -> None:
38 | try:
39 | os.remove(file_path)
40 | except OSError:
41 | traceback.print_exc()
42 |
--------------------------------------------------------------------------------
/voicevox_engine/utility/save_dir.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from platformdirs import user_data_dir
4 |
5 | from ..engine_manifest import EngineManifestLoader
6 |
7 |
8 | def get_save_dir():
9 | # TODO: ここの挙動が怪しいのできちんと確認する
10 | try:
11 | app_name = EngineManifestLoader().load_manifest().name
12 | except TypeError:
13 | app_name = EngineManifestLoader.EngineManifestLoader().load_manifest().name
14 | return Path(user_data_dir(app_name))
15 |
--------------------------------------------------------------------------------