├── .gitattributes
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── bugreport.md
    │   ├── featurerequest.md
    │   └── question.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── labeler.yml
    └── workflows
    │   ├── build-docker.yml
    │   ├── build.yml
    │   ├── coverage-comment.yml
    │   ├── labeler.yml
    │   ├── release-test.yml
    │   ├── test.yml
    │   ├── typos.yml
    │   └── upload-gh-pages.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── LGPL_LICENSE
├── LICENSE
├── Makefile
├── README.md
├── _typos.toml
├── build_util
    ├── check_release_build.py
    ├── codesign.bash
    ├── create_venv_and_generate_licenses.bash
    ├── merge_update_infos.py
    ├── modify_pyinstaller.bash
    └── process_voicevox_resource.bash
├── default.csv
├── default_setting.yml
├── docs
    ├── VOICEVOX音声合成エンジンとの連携.md
    ├── api
    │   └── .gitkeep
    ├── licenses
    │   ├── cuda
    │   │   └── EULA.txt
    │   ├── cudnn
    │   │   └── LICENSE
    │   ├── open_jtalk
    │   │   ├── COPYING
    │   │   ├── mecab-naist-jdic
    │   │   │   └── COPYING
    │   │   └── mecab
    │   │   │   └── COPYING
    │   └── world
    │   │   └── LICENSE.txt
    └── res
    │   └── マルチエンジン概念図.svg
├── engine_manifest.json
├── engine_manifest_assets
    ├── dependency_licenses.json
    ├── downloadable_libraries.json
    ├── icon.png
    ├── terms_of_service.md
    └── update_infos.json
├── generate_licenses.py
├── get_cost_candidates.py
├── make_docs.py
├── poetry.lock
├── presets.yaml
├── pyproject.toml
├── requirements-dev.txt
├── requirements-license.txt
├── requirements-test.txt
├── requirements.txt
├── run.py
├── run.spec
├── setup.cfg
├── speaker_info
    ├── 35b2c544-660e-401e-b503-0e14c635303a
    │   ├── icons
    │   │   └── 8.png
    │   ├── metas.json
    │   ├── policy.md
    │   ├── portrait.png
    │   ├── portraits
    │   │   └── 8.png
    │   └── voice_samples
    │   │   ├── 8_001.wav
    │   │   ├── 8_002.wav
    │   │   └── 8_003.wav
    ├── 388f246b-8c41-4ac1-8e2d-5d79f3ff56d9
    │   ├── icons
    │   │   ├── 1.png
    │   │   ├── 3.png
    │   │   ├── 5.png
    │   │   └── 7.png
    │   ├── metas.json
    │   ├── policy.md
    │   ├── portrait.png
    │   ├── portraits
    │   │   └── 3.png
    │   └── voice_samples
    │   │   ├── 1_001.wav
    │   │   ├── 1_002.wav
    │   │   ├── 1_003.wav
    │   │   ├── 3_001.wav
    │   │   ├── 3_002.wav
    │   │   ├── 3_003.wav
    │   │   ├── 5_001.wav
    │   │   ├── 5_002.wav
    │   │   ├── 5_003.wav
    │   │   ├── 7_001.wav
    │   │   ├── 7_002.wav
    │   │   └── 7_003.wav
    ├── 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
    │   ├── icons
    │   │   ├── 0.png
    │   │   ├── 2.png
    │   │   ├── 4.png
    │   │   └── 6.png
    │   ├── metas.json
    │   ├── policy.md
    │   ├── portrait.png
    │   ├── portraits
    │   │   ├── 0.png
    │   │   ├── 2.png
    │   │   ├── 4.png
    │   │   └── 6.png
    │   └── voice_samples
    │   │   ├── 0_001.wav
    │   │   ├── 0_002.wav
    │   │   ├── 0_003.wav
    │   │   ├── 2_001.wav
    │   │   ├── 2_002.wav
    │   │   ├── 2_003.wav
    │   │   ├── 4_001.wav
    │   │   ├── 4_002.wav
    │   │   ├── 4_003.wav
    │   │   ├── 6_001.wav
    │   │   ├── 6_002.wav
    │   │   └── 6_003.wav
    └── b1a81618-b27b-40d2-b0ea-27a9ad408c4b
    │   ├── icons
    │       └── 9.png
    │   ├── metas.json
    │   ├── policy.md
    │   ├── portrait.png
    │   └── voice_samples
    │       ├── 9_001.wav
    │       ├── 9_002.wav
    │       └── 9_003.wav
├── test
    ├── __init__.py
    ├── presets-test-1.yaml
    ├── presets-test-2.yaml
    ├── presets-test-3.yaml
    ├── presets-test-4.yaml
    ├── test_acoustic_feature_extractor.py
    ├── test_connect_base64_waves.py
    ├── test_full_context_label.py
    ├── test_kana_parser.py
    ├── test_mock_synthesis_engine.py
    ├── test_mora_list.py
    ├── test_mora_to_text.py
    ├── test_preset.py
    ├── test_synthesis_engine.py
    ├── test_synthesis_engine_base.py
    ├── test_user_dict.py
    ├── test_user_dict_model.py
    └── test_word_types.py
├── ui_template
    └── ui.html
└── voicevox_engine
    ├── __init__.py
    ├── acoustic_feature_extractor.py
    ├── cancellable_engine.py
    ├── dev
        ├── core
        │   ├── __init__.py
        │   └── mock.py
        └── synthesis_engine
        │   ├── __init__.py
        │   └── mock.py
    ├── engine_manifest
        ├── EngineManifest.py
        ├── EngineManifestLoader.py
        └── __init__.py
    ├── full_context_label.py
    ├── kana_parser.py
    ├── metas
        ├── Metas.py
        ├── MetasStore.py
        └── __init__.py
    ├── model.py
    ├── mora_list.py
    ├── morphing.py
    ├── part_of_speech_data.py
    ├── preset
        ├── Preset.py
        ├── PresetError.py
        ├── PresetManager.py
        └── __init__.py
    ├── setting
        ├── Setting.py
        ├── SettingLoader.py
        └── __init__.py
    ├── synthesis_engine
        ├── __init__.py
        ├── core_wrapper.py
        ├── make_synthesis_engines.py
        ├── synthesis_engine.py
        └── synthesis_engine_base.py
    ├── user_dict.py
    └── utility
        ├── __init__.py
        ├── connect_base64_waves.py
        ├── mutex_utility.py
        └── path_utility.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | *.png -text
3 | *.wav -text


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @VOICEVOX/maintainer
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bugreport.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report
 3 | about: 不具合の報告
 4 | labels: バグ
 5 | ---
 6 | 
 7 | ## 不具合の内容
 8 | 
 9 | <!-- 概要はここに記載してください -->
10 | 
11 | ### 現象・ログ
12 | 
13 | <!-- ここに記載してください -->
14 | 
15 | ### 再現手順
16 | 
17 | <!-- 最小の構成でできると、なおよい -->
18 | 
19 | ### 期待動作
20 | 
21 | <!-- 正しいと思う動作が明確であれば記載してください -->
22 | 
23 | ## VOICEVOXのバージョン
24 | 
25 | 0.?.0
26 | 
27 | <!-- "ヘルプ" → "アップデート情報" で確認できます -->
28 | 
29 | ## OSの種類/ディストリ/バージョン
30 | 
31 | <!-- チェックするには [ ] を [x] に変更してください -->
32 | 
33 | - [ ] Windows
34 | - [ ] macOS
35 | - [ ] Linux
36 | 
37 | <!--
38 | なるべく詳しく書いてください 記述例:
39 | *   Windows 10 Pro 64bit (10.0.10586)
40 | *   macOS Sierra
41 | *   Linux fedora 23 64bit
42 | *   Others
43 | -->
44 | 
45 | ## その他
46 | 
47 | <!-- 関連して何か気がついたこと、気になることがあればココに書いてください -->
48 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/featurerequest.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: 機能要望・改善提案
 4 | labels: 機能向上
 5 | ---
 6 | 
 7 | ## 内容
 8 | 
 9 | <!-- ここに要望する機能を記載してください -->
10 | <!-- その機能が必要な理由や、具体例も -->
11 | 
12 | ### Pros 良くなる点
13 | 
14 | <!-- 改善される状態など -->
15 | 
16 | ### Cons 悪くなる点
17 | 
18 | <!-- ないことが望ましいが、もしあるなら -->
19 | 
20 | ### 実現方法
21 | 
22 | <!-- 実現方法について検討済みであるなら -->
23 | 
24 | ## VOICEVOXのバージョン
25 | 
26 | 0.?.0
27 | 
28 | <!-- "ヘルプ" → "アップデート情報" で確認できます -->
29 | 
30 | ## OSの種類/ディストリ/バージョン
31 | 
32 | <!-- チェックするには [ ] を [x] に変更してください -->
33 | 
34 | - [ ] Windows
35 | - [ ] macOS
36 | - [ ] Linux
37 | 
38 | <!--
39 | なるべく詳しく書いてください 記述例:
40 | *   Windows 10 Pro 64bit (10.0.10586)
41 | *   macOS Sierra
42 | *   Linux fedora 23 64bit
43 | *   Others
44 | -->
45 | 
46 | ## その他
47 | 
48 | <!-- 関連して何か気がついたこと、気になることがあればココに書いてください -->
49 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Question
 3 | about: 質問 (既存のIssueや一般事例を良く調べてからしてください)
 4 | labels: 要議論
 5 | ---
 6 | 
 7 | ## 質問の内容
 8 | 
 9 | <!-- ここに記載してください -->
10 | 
11 | ## VOICEVOXのバージョン
12 | 
13 | 0.?.0
14 | 
15 | <!-- "ヘルプ" → "アップデート情報" で確認できます -->
16 | 
17 | ## OSの種類/ディストリ/バージョン
18 | 
19 | <!-- チェックするには [ ] を [x] に変更してください -->
20 | 
21 | - [ ] Windows
22 | - [ ] macOS
23 | - [ ] Linux
24 | 
25 | <!--
26 | なるべく詳しく書いてください 記述例:
27 | *   Windows 10 Pro 64bit (10.0.10586)
28 | *   macOS Sierra
29 | *   Linux fedora 23 64bit
30 | *   Others
31 | -->
32 | 
33 | ## その他
34 | 
35 | <!-- 関連して何か気がついたこと、気になることがあればココに書いてください -->
36 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## 内容
 2 | 
 3 | <!--
 4 | プルリクエストの内容説明を端的に記載してください。
 5 | -->
 6 | 
 7 | ## 関連 Issue
 8 | 
 9 | <!--
10 | 関連するIssue番号を記載してください。
11 | 番号の前に"close"を書くと自動的にIssueが閉じられます。
12 | 
13 | （例）
14 | ref #0
15 | close #0
16 | -->
17 | 
18 | ## スクリーンショット・動画など
19 | 
20 | <!--
21 | UIを変更した際は、変更がわかるような動画・スクリーンショットがあると助かります。
22 | -->
23 | 
24 | ## その他
25 | 


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
1 | 'OS：mac':
2 |     - '\[x\] macOS'
3 | 'OS：linux':
4 |     - '\[x\] Linux'
5 | 'OS：win':
6 |     - '\[x\] Windows'
7 | 


--------------------------------------------------------------------------------
/.github/workflows/build-docker.yml:
--------------------------------------------------------------------------------
  1 | name: build-docker
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - master
  6 |   release:
  7 |     types:
  8 |       - created
  9 |   workflow_dispatch:
 10 |     inputs:
 11 |       version:
 12 |         description: "バージョン情報（A.BB.C / A.BB.C-preview.D）"
 13 |         required: true
 14 | 
 15 | env:
 16 |   IMAGE_NAME: ${{ secrets.DOCKERHUB_USERNAME }}/voicevox_engine
 17 |   PYTHON_VERSION: "3.8.10"
 18 |   VOICEVOX_RESOURCE_VERSION: "0.14.1"
 19 |   VOICEVOX_CORE_VERSION: "0.14.2"
 20 |   VOICEVOX_ENGINE_VERSION:
 21 |     |- # releaseタグ名か、workflow_dispatchでのバージョン名か、latestが入る
 22 |     ${{ github.event.release.tag_name || github.event.inputs.version || 'latest' }}
 23 | 
 24 | jobs:
 25 |   build-docker:
 26 |     runs-on: ${{ matrix.os }}
 27 | 
 28 |     strategy:
 29 |       matrix:
 30 |         os: [ubuntu-latest]
 31 |         tag:
 32 |           - ""
 33 |           - cpu
 34 |           - cpu-ubuntu20.04
 35 |           - nvidia
 36 |           - nvidia-ubuntu20.04
 37 |           - cpu-ubuntu18.04
 38 |           - nvidia-ubuntu18.04
 39 |         include:
 40 |           # Ubuntu 20.04
 41 |           - tag: ""
 42 |             target: runtime-env
 43 |             base_image: ubuntu:20.04
 44 |             base_runtime_image: ubuntu:20.04
 45 |             voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu
 46 |             onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz
 47 |           - tag: cpu
 48 |             target: runtime-env
 49 |             base_image: ubuntu:20.04
 50 |             base_runtime_image: ubuntu:20.04
 51 |             voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu
 52 |             onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz
 53 |           - tag: cpu-ubuntu20.04
 54 |             target: runtime-env
 55 |             base_image: ubuntu:20.04
 56 |             base_runtime_image: ubuntu:20.04
 57 |             voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu
 58 |             onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz
 59 |           - tag: nvidia
 60 |             target: runtime-nvidia-env
 61 |             base_image: ubuntu:20.04
 62 |             base_runtime_image: nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
 63 |             voicevox_core_asset_prefix: voicevox_core-linux-x64-gpu
 64 |             onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz
 65 |           - tag: nvidia-ubuntu20.04
 66 |             target: runtime-nvidia-env
 67 |             base_image: ubuntu:20.04
 68 |             base_runtime_image: nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
 69 |             voicevox_core_asset_prefix: voicevox_core-linux-x64-gpu
 70 |             onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz
 71 |           # Ubuntu 18.04
 72 |           - tag: cpu-ubuntu18.04
 73 |             target: runtime-env
 74 |             base_image: ubuntu:18.04
 75 |             base_runtime_image: ubuntu:18.04
 76 |             voicevox_core_asset_prefix: voicevox_core-linux-x64-cpu
 77 |             onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz
 78 |           - tag: nvidia-ubuntu18.04
 79 |             target: runtime-nvidia-env
 80 |             base_image: ubuntu:18.04
 81 |             base_runtime_image: nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu18.04
 82 |             voicevox_core_asset_prefix: voicevox_core-linux-x64-gpu
 83 |             onnxruntime_url: https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz
 84 | 
 85 |     steps:
 86 |       - uses: actions/checkout@v3
 87 | 
 88 |       - name: Setup Docker Buildx
 89 |         id: buildx
 90 |         uses: docker/setup-buildx-action@v2
 91 | 
 92 |       - name: Login to DockerHub
 93 |         uses: docker/login-action@v2
 94 |         with:
 95 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
 96 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
 97 | 
 98 |       # Download VOICEVOX RESOURCE
 99 |       - name: Prepare VOICEVOX RESOURCE cache
100 |         uses: actions/cache@v3
101 |         id: voicevox-resource-cache
102 |         with:
103 |           key: voicevox-resource-${{ env.VOICEVOX_RESOURCE_VERSION }}
104 |           path: download/resource
105 | 
106 |       - name: Checkout VOICEVOX RESOURCE
107 |         if: steps.voicevox-resource-cache.outputs.cache-hit != 'true'
108 |         uses: actions/checkout@v3
109 |         with:
110 |           repository: VOICEVOX/voicevox_resource
111 |           ref: ${{ env.VOICEVOX_RESOURCE_VERSION }}
112 |           path: download/resource
113 | 
114 |       # Merge VOICEVOX RESOURCE
115 |       - name: Merge VOICEVOX RESOURCE
116 |         shell: bash
117 |         env:
118 |           DOWNLOAD_RESOURCE_PATH: download/resource
119 |         run: bash build_util/process_voicevox_resource.bash
120 | 
121 |       - name: Build and Deploy Docker image
122 |         uses: docker/build-push-action@v3
123 |         env:
124 |           IMAGE_TAG:
125 |             |- # If it's a release, add the version, otherwise add the `latest`
126 |             ${{ (
127 |               matrix.tag != '' && (
128 |                 format('{0}:{1}-{2}', env.IMAGE_NAME, matrix.tag, env.VOICEVOX_ENGINE_VERSION)
129 |               ) || format('{0}:{1}', env.IMAGE_NAME, env.VOICEVOX_ENGINE_VERSION)
130 |             ) }}
131 |           VOICEVOX_CORE_ASSET_NAME: ${{ matrix.voicevox_core_asset_prefix }}-${{ env.VOICEVOX_CORE_VERSION }}
132 |         with:
133 |           context: .
134 |           builder: ${{ steps.buildx.outputs.name }}
135 |           file: ./Dockerfile
136 |           build-args: |
137 |             BASE_IMAGE=${{ matrix.base_image }}
138 |             BASE_RUNTIME_IMAGE=${{ matrix.base_runtime_image }}
139 |             PYTHON_VERSION=${{ env.PYTHON_VERSION }}
140 |             VOICEVOX_ENGINE_VERSION=${{ env.VOICEVOX_ENGINE_VERSION }}
141 |             VOICEVOX_CORE_ASSET_NAME=${{ env.VOICEVOX_CORE_ASSET_NAME }}
142 |             VOICEVOX_CORE_VERSION=${{ env.VOICEVOX_CORE_VERSION }}
143 |             VOICEVOX_RESOURCE_VERSION=${{ env.VOICEVOX_RESOURCE_VERSION }}
144 |             ONNXRUNTIME_URL=${{ matrix.onnxruntime_url }}
145 |           target: ${{ matrix.target }}
146 |           push: true
147 |           tags: ${{ env.IMAGE_TAG }}
148 |           cache-from: type=registry,ref=${{ env.IMAGE_TAG }}-buildcache
149 |           cache-to: type=registry,ref=${{ env.IMAGE_TAG }}-buildcache,mode=max
150 | 


--------------------------------------------------------------------------------
/.github/workflows/coverage-comment.yml:
--------------------------------------------------------------------------------
 1 | name: Coverage Report Comment
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows:
 6 |       - test
 7 |     types:
 8 |       - completed
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   comment:
13 |     runs-on: ubuntu-latest
14 |     if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success'
15 |     steps:
16 |       - name: Download coverage report
17 |         uses: actions/github-script@v5.0.0
18 |         with:
19 |           script: |
20 |             const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
21 |                owner: context.repo.owner,
22 |                repo: context.repo.repo,
23 |                run_id: ${{ github.event.workflow_run.id }},
24 |             })
25 |             const matchArtifact = artifacts.data.artifacts.filter((artifact) => {
26 |               return artifact.name == 'report'
27 |             })[0]
28 |             const download = await github.rest.actions.downloadArtifact({
29 |                owner: context.repo.owner,
30 |                repo: context.repo.repo,
31 |                artifact_id: matchArtifact.id,
32 |                archive_format: 'zip',
33 |             })
34 |             const fs = require('fs')
35 |             fs.writeFileSync('${{github.workspace}}/report.zip', Buffer.from(download.data))
36 | 
37 |       - name: Unzip report
38 |         run: unzip report.zip
39 | 
40 |       - name: Comment coverage result to Pull Requests
41 |         uses: actions/github-script@v5.0.0
42 |         with:
43 |           github-token: ${{ secrets.GITHUB_TOKEN }}
44 |           script: |
45 |             const fs = require('fs')
46 |             const baseReport = fs.readFileSync('report.txt', 'utf8').toString().split('\n')
47 |             let report = ''
48 |             for (let i = 0; i < baseReport.length; i++) {
49 |               const line = baseReport[i].split(' ').filter(v => v)
50 |               if (i === 1 && line.length === 1) {
51 |                 report += "|:---|---:|---:|---:|\n"
52 |               } else if (line.length === 1) {
53 |                 continue
54 |               } else {
55 |                 if (i !== 0 && line.length === 4) {
56 |                   const parcent = Number(line[3].replace("%", ""))
57 |                   let color = 'green'
58 |                   if (parcent < 50) {
59 |                     color = 'red'
60 |                   } else if (parcent < 90) {
61 |                     color = 'orange'
62 |                   }
63 |                   line[3] = `![coverage-${parcent}%](https://img.shields.io/badge/coverage-${parcent}%25-${color}.svg)`
64 |                 }
65 |                 report += "|" + line.join("|") + "|\n"
66 |               }
67 |               if (line[0] === 'TOTAL') break
68 |             }
69 | 
70 |             const issue_number = Number(fs.readFileSync('pr_num.txt'))
71 |             const body = `## Coverage Result\n\n<details>\n<summary>Resultを開く</summary>\n\n${report}\n</details>`
72 | 
73 |             let listComments = await github.rest.issues.listComments({
74 |               issue_number,
75 |               owner: context.repo.owner,
76 |               repo: context.repo.repo,
77 |             })
78 |             listComments = listComments.data.filter((comment) => {
79 |               return comment.body.includes('Coverage Result') && comment.user.login.includes('github-actions')
80 |             })
81 | 
82 |             if (listComments.length === 0) {
83 |               github.rest.issues.createComment({
84 |                 issue_number,
85 |                 owner: context.repo.owner,
86 |                 repo: context.repo.repo,
87 |                 body,
88 |               })
89 |             } else {
90 |               github.rest.issues.updateComment({
91 |                 comment_id: listComments[0].id,
92 |                 owner: context.repo.owner,
93 |                 repo: context.repo.repo,
94 |                 body,
95 |               })
96 |             }
97 | 


--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
 1 | name: Issue Labeler
 2 | on:
 3 |   issues:
 4 |     types: [opened]
 5 | 
 6 | jobs:
 7 |   triage:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - uses: github/issue-labeler@v2.0
11 |       with:
12 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
13 |         configuration-path: .github/labeler.yml
14 |         enable-versioned-regex: 0
15 | 


--------------------------------------------------------------------------------
/.github/workflows/release-test.yml:
--------------------------------------------------------------------------------
 1 | name: Test Release Build
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |     inputs:
 6 |       version:
 7 |         type: string
 8 |         required: true
 9 |       repo_url:
10 |         type: string
11 |         required: false
12 |   workflow_dispatch:
13 |     inputs:
14 |       version:
15 |         type: string
16 |         description: "テストしたいタグ名"
17 |         required: true
18 |       repo_url:
19 |         type: string
20 |         description: "リポジトリのURL（省略可能）"
21 |         required: false
22 | 
23 | env:
24 |   REPO_URL:
25 |     |- # repo_url指定時はrepo_urlを、それ以外はgithubのリポジトリURLを使用
26 |     ${{ (github.event.inputs || inputs).repo_url || format('{0}/{1}', github.server_url, github.repository) }}
27 |   VERSION: |- # version指定時はversionを、それ以外はタグ名を使用
28 |     ${{ (github.event.inputs || inputs).version }}
29 | 
30 | jobs:
31 |   test:
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         include:
36 |           - os: ubuntu-20.04
37 |             target: linux-cpu
38 |           - os: ubuntu-20.04
39 |             target: linux-nvidia
40 |           - os: macos-11
41 |             target: macos-x64
42 |           - os: windows-2019
43 |             target: windows-cpu
44 |           - os: windows-2019
45 |             target: windows-nvidia
46 |           - os: windows-2019
47 |             target: windows-directml
48 | 
49 |     runs-on: ${{ matrix.os }}
50 | 
51 |     steps:
52 |       - name: declare variables
53 |         id: vars
54 |         shell: bash
55 |         run: |
56 |           echo "release_url=${{ env.REPO_URL }}/releases/download/${{ env.VERSION }}" >> $GITHUB_OUTPUT
57 |           echo "package_name=voicevox_engine-${{ matrix.target }}-${{ env.VERSION }}" >> $GITHUB_OUTPUT
58 | 
59 |       - uses: actions/checkout@v2
60 | 
61 |       - uses: actions/setup-python@v2
62 |         with:
63 |           python-version: "3.8.10"
64 |           cache: pip
65 | 
66 |       - name: Download
67 |         shell: bash -euxv {0}
68 |         run: |
69 |           mkdir -p download
70 |           curl -L -o "download/list.txt" "${{ steps.vars.outputs.release_url }}/${{ steps.vars.outputs.package_name }}.7z.txt"
71 |           cat "download/list.txt" | xargs -I '%' curl -L -o "download/%" "${{ steps.vars.outputs.release_url }}/%"
72 |           7z x "download/$(head -n1 download/list.txt)"
73 |           mv ${{ matrix.target }} dist/
74 | 
75 |       - name: chmod +x
76 |         if: startsWith(matrix.target, 'linux') || startsWith(matrix.target, 'macos')
77 |         shell: bash
78 |         run: chmod +x dist/run
79 | 
80 |       - name: Install libsndfile1
81 |         if: startsWith(matrix.target, 'linux')
82 |         run: |
83 |           sudo apt-get update
84 |           sudo apt-get install libsndfile1
85 | 
86 |       - name: Install requirements
87 |         run: |
88 |           pip install -r requirements-test.txt
89 | 
90 |       - name: Test
91 |         shell: bash
92 |         run: python build_util/check_release_build.py --dist_dir dist/
93 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     branches:
 7 |       - "**"
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       matrix:
15 |         os: [ubuntu-20.04, windows-latest] # [ubuntu-20.04, macos-latest, windows-latest]
16 |         python: ["3.8.10"]
17 |         include:
18 |           - os: ubuntu-20.04
19 |             path: ~/.cache/pip
20 |           # - os: macos-latest
21 |           #   path: ~/Library/Caches/pip
22 |           - os: windows-latest
23 |             path: ~\AppData\Local\pip\Cache
24 | 
25 |     steps:
26 |       - uses: actions/checkout@v3
27 | 
28 |       - name: Set up Python ${{ matrix.python }}
29 |         uses: actions/setup-python@v4
30 |         with:
31 |           python-version: ${{ matrix.python }}
32 |           cache: pip
33 | 
34 |       - name: Install libraries for ubuntu
35 |         if: matrix.os == 'ubuntu-20.04'
36 |         run: sudo apt-get install libsndfile1
37 | 
38 |       - name: Install dependencies
39 |         run: |
40 |           python -m pip install --upgrade pip setuptools wheel
41 |           python -m pip install -r requirements-test.txt
42 | 
43 |       - run: pysen run lint
44 | 
45 |       - name: Run pytest and get coverage
46 |         run: |
47 |           coverage run --omit=test/* -m pytest
48 | 
49 |       - name: Submit coverage to Coveralls
50 |         if: matrix.os == 'ubuntu-20.04'
51 |         run: coveralls --service=github
52 |         env:
53 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
54 | 
55 |       - name: Create coverage result
56 |         if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04'
57 |         run: |
58 |           mkdir report
59 |           coverage report > report/report.txt
60 |           echo ${{ github.event.number }} > report/pr_num.txt
61 | 
62 |       - name: Upload coverage result
63 |         if: github.event_name == 'pull_request' && matrix.os == 'ubuntu-20.04'
64 |         uses: actions/upload-artifact@v3
65 |         with:
66 |           name: report
67 |           path: report/
68 | 
69 |       - name: Check licenses
70 |         shell: bash
71 |         run: |
72 |           OUTPUT_LICENSE_JSON_PATH=/dev/null \
73 |           bash build_util/create_venv_and_generate_licenses.bash
74 | 


--------------------------------------------------------------------------------
/.github/workflows/typos.yml:
--------------------------------------------------------------------------------
 1 | name: Check typos
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     branches:
 7 |       - "**"
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   typos:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v3
16 | 
17 |       - name: typos-action
18 |         uses: crate-ci/typos@v1.12.12
19 | 


--------------------------------------------------------------------------------
/.github/workflows/upload-gh-pages.yml:
--------------------------------------------------------------------------------
 1 | name: upload-docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "master"
 7 | 
 8 | env:
 9 |   PYTHON_VERSION: "3.8.10"
10 |   PUBLISH_DIR: "./docs/api"
11 |   PUBLISH_BRANCH: "gh-pages"
12 |   DESTINATION_DIR: "api"
13 | 
14 | jobs:
15 |   upload-doc:
16 |     runs-on: ubuntu-20.04
17 |     steps:
18 |       - uses: actions/checkout@v2
19 | 
20 |       - name: Setup Python
21 |         id: setup-python
22 |         uses: actions/setup-python@v2
23 |         with:
24 |           python-version: ${{ env.PYTHON_VERSION }}
25 |           cache: pip
26 | 
27 |       - name: Install libraries for ubuntu
28 |         run: sudo apt-get install libsndfile1
29 | 
30 |       - name: Install Python dependencies
31 |         shell: bash
32 |         run: |
33 |           pip install -r requirements.txt
34 | 
35 |       - name: Make documents
36 |         shell: bash
37 |         run: |
38 |           python make_docs.py
39 | 
40 |       - name: Deploy to GitHub Pages
41 |         uses: peaceiris/actions-gh-pages@v3
42 |         with:
43 |           github_token: ${{ secrets.GITHUB_TOKEN }}
44 |           publish_dir: ${{ env.PUBLISH_DIR }}
45 |           publish_branch: ${{ env.PUBLISH_BRANCH }}
46 |           destination_dir: ${{ env.DESTINATION_DIR }}
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # VOICEVOX specifics
  2 | ## Artifacts of nuitka
  3 | *.dist
  4 | *.build
  5 | /build
  6 | /cache
  7 | ## Artifact of generating licenses
  8 | /licenses.json
  9 | licenses_venv/
 10 | 
 11 | # Copied from `https://github.com/github/gitignore/blob/main/Python.gitignore` @2022-01-10
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | share/python-wheels/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | MANIFEST
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | cover/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | db.sqlite3-journal
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | .pybuilder/
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | #   For a library or package, you might want to ignore these files since the code is
 92 | #   intended to run in multiple environments; otherwise, check them in:
 93 | .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | Pipfile.lock
101 | 
102 | # poetry
103 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
105 | #   commonly ignored for libraries.
106 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | # poetry.lock
108 | 
109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
110 | __pypackages__/
111 | 
112 | # Celery stuff
113 | celerybeat-schedule
114 | celerybeat.pid
115 | 
116 | # SageMath parsed files
117 | *.sage.py
118 | 
119 | # Environments
120 | .env
121 | .venv
122 | env/
123 | venv/
124 | ENV/
125 | env.bak/
126 | venv.bak/
127 | 
128 | # Spyder project settings
129 | .spyderproject
130 | .spyproject
131 | 
132 | # Rope project settings
133 | .ropeproject
134 | 
135 | # mkdocs documentation
136 | /site
137 | 
138 | # mypy
139 | .mypy_cache/
140 | .dmypy.json
141 | dmypy.json
142 | 
143 | # Pyre type checker
144 | .pyre/
145 | 
146 | # pytype static type analyzer
147 | .pytype/
148 | 
149 | # Cython debug symbols
150 | cython_debug/
151 | 
152 | # PyCharm
153 | #  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
154 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
155 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
156 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
157 | .idea/
158 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: local
 5 |     hooks:
 6 |       - id: pysen-lint
 7 |         name: pysen-lint
 8 |         entry: pysen run lint
 9 |         language: python
10 |         types: [file, python]
11 |         stages: [push]
12 |         pass_filenames: false
13 | 


--------------------------------------------------------------------------------
/LGPL_LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | LGPL v3 と、ソースコードの公開が不要な別ライセンスのデュアルライセンスです。
 2 | 
 3 | 1. LGPL v3
 4 | 
 5 | LGPL_LICENSEを参照してください。
 6 | 
 7 | 2. ソースコードの公開が不要な別ライセンス
 8 | 
 9 | 別ライセンスを取得したい場合は、ヒホ（twitter: @hiho_karuta）に求めてください。
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | CMD=
  2 | NOCACHE=
  3 | 
  4 | ARGS:=
  5 | ifeq ($(NOCACHE),1)
  6 | 	ARGS:=$(ARGS) --no-cache
  7 | endif
  8 | 
  9 | # Ubuntu 20.04
 10 | .PHONY: build-linux-docker-ubuntu20.04
 11 | build-linux-docker-ubuntu20.04:
 12 | 	docker buildx build . \
 13 | 		-t voicevox/voicevox_engine:cpu-ubuntu20.04-latest \
 14 | 		--target runtime-env \
 15 | 		--progress plain \
 16 | 		--build-arg BASE_IMAGE=ubuntu:20.04 \
 17 | 		--build-arg BASE_RUNTIME_IMAGE=ubuntu:20.04 \
 18 | 		--build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz \
 19 | 		--build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_cpu_x64.so $(ARGS)
 20 | 
 21 | .PHONY: run-linux-docker-ubuntu20.04
 22 | run-linux-docker-ubuntu20.04:
 23 | 	docker run --rm -it \
 24 | 		-p '127.0.0.1:50021:50021' $(ARGS) \
 25 | 		voicevox/voicevox_engine:cpu-ubuntu20.04-latest $(CMD)
 26 | 
 27 | .PHONY: build-linux-docker-nvidia-ubuntu20.04
 28 | build-linux-docker-nvidia-ubuntu20.04:
 29 | 	docker buildx build . \
 30 | 		-t voicevox/voicevox_engine:nvidia-ubuntu20.04-latest \
 31 | 		--target runtime-nvidia-env \
 32 | 		--progress plain \
 33 | 		--build-arg BASE_IMAGE=ubuntu:20.04 \
 34 | 		--build-arg BASE_RUNTIME_IMAGE=nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04 \
 35 | 		--build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz \
 36 | 		--build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_gpu_x64_nvidia.so $(ARGS)
 37 | 
 38 | .PHONY: run-linux-docker-nvidia-ubuntu20.04
 39 | run-linux-docker-nvidia-ubuntu20.04:
 40 | 	docker run --rm -it \
 41 | 		--gpus all \
 42 | 		-p '127.0.0.1:50021:50021' $(ARGS) \
 43 | 		voicevox/voicevox_engine:nvidia-ubuntu20.04-latest $(CMD)
 44 | 
 45 | 
 46 | # Ubuntu 18.04
 47 | .PHONY: build-linux-docker-ubuntu18.04
 48 | build-linux-docker-ubuntu18.04:
 49 | 	docker buildx build . \
 50 | 		-t voicevox/voicevox_engine:cpu-ubuntu18.04-latest \
 51 | 		--target runtime-env \
 52 | 		--progress plain \
 53 | 		--build-arg BASE_IMAGE=ubuntu:18.04 \
 54 | 		--build-arg BASE_RUNTIME_IMAGE=ubuntu:18.04 \
 55 | 		--build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-1.13.1.tgz \
 56 | 		--build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_cpu_x64.so $(ARGS)
 57 | 
 58 | .PHONY: run-linux-docker-ubuntu18.04
 59 | run-linux-docker-ubuntu18.04:
 60 | 	docker run --rm -it \
 61 | 		-p '127.0.0.1:50021:50021' $(ARGS) \
 62 | 		voicevox/voicevox_engine:cpu-ubuntu18.04-latest $(CMD)
 63 | 
 64 | .PHONY: build-linux-docker-nvidia-ubuntu18.04
 65 | build-linux-docker-nvidia-ubuntu18.04:
 66 | 	docker buildx build . \
 67 | 		-t voicevox/voicevox_engine:nvidia-ubuntu18.04-latest \
 68 | 		--target runtime-nvidia-env \
 69 | 		--progress plain \
 70 | 		--build-arg BASE_IMAGE=ubuntu:18.04 \
 71 | 		--build-arg BASE_RUNTIME_IMAGE=nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu18.04 \
 72 | 		--build-arg ONNXRUNTIME_URL=https://github.com/microsoft/onnxruntime/releases/download/v1.13.1/onnxruntime-linux-x64-gpu-1.13.1.tgz \
 73 | 		--build-arg VOICEVOX_CORE_LIBRARY_NAME=libcore_gpu_x64_nvidia.so $(ARGS)
 74 | 
 75 | .PHONY: run-linux-docker-nvidia-ubuntu18.04
 76 | run-linux-docker-nvidia-ubuntu18.04:
 77 | 	docker run --rm -it \
 78 | 		--gpus all \
 79 | 		-p '127.0.0.1:50021:50021' $(ARGS) \
 80 | 		voicevox/voicevox_engine:nvidia-ubuntu18.04-latest $(CMD)
 81 | 
 82 | 
 83 | # VOICEVOX Core env for test
 84 | .PHONY: build-linux-docker-download-core-env-ubuntu18.04
 85 | build-linux-docker-download-core-env-ubuntu18.04:
 86 | 	docker buildx build . \
 87 | 		-t voicevox/voicevox_engine:download-core-env-ubuntu18.04 \
 88 | 		--target download-core-env \
 89 | 		--progress plain \
 90 | 		--build-arg BASE_IMAGE=ubuntu:18.04 $(ARGS)
 91 | 
 92 | .PHONY: run-linux-docker-download-core-env-ubuntu18.04
 93 | run-linux-docker-download-core-env-ubuntu18.04:
 94 | 	docker run --rm -it $(ARGS) \
 95 | 		voicevox/voicevox_engine:download-core-env-ubuntu18.04 $(CMD)
 96 | 
 97 | 
 98 | # ONNX Runtime env for test
 99 | .PHONY: build-linux-docker-download-onnxruntime-env-ubuntu18.04
100 | build-linux-docker-download-onnxruntime-env-ubuntu18.04:
101 | 	docker buildx build . \
102 | 		-t voicevox/voicevox_engine:download-onnxruntime-env-ubuntu18.04 \
103 | 		--target download-onnxruntime-env \
104 | 		--progress plain \
105 | 		--build-arg BASE_IMAGE=ubuntu:18.04 $(ARGS)
106 | 
107 | .PHONY: run-linux-docker-download-onnxruntime-env-ubuntu18.04
108 | run-linux-docker-download-onnxruntime-env-ubuntu18.04:
109 | 	docker run --rm -it $(ARGS) \
110 | 		voicevox/voicevox_engine:download-onnxruntime-env-ubuntu18.04 $(CMD)
111 | 
112 | 
113 | # Python env for test
114 | .PHONY: build-linux-docker-compile-python-env
115 | build-linux-docker-compile-python-env:
116 | 	docker buildx build . \
117 | 		-t voicevox/voicevox_engine:compile-python-env \
118 | 		--target compile-python-env \
119 | 		--progress plain \
120 | 		--build-arg BASE_IMAGE=ubuntu:20.04 $(ARGS)
121 | 
122 | .PHONY: run-linux-docker-compile-python-env
123 | run-linux-docker-compile-python-env:
124 | 	docker run --rm -it $(ARGS) \
125 | 		voicevox/voicevox_engine:compile-python-env $(CMD)
126 | 


--------------------------------------------------------------------------------
/_typos.toml:
--------------------------------------------------------------------------------
 1 | # Files for typos
 2 | # Instruction:  https://github.com/marketplace/actions/typos-action#getting-started
 3 | 
 4 | [default.extend-identifiers]
 5 | 
 6 | [default.extend-words]
 7 | ba="ba" # 7zコマンドの-baオプション
 8 | datas="datas" # PyInstallerの引数
 9 | 
10 | [files]
11 | extend-exclude = ["package-lock.json", "src/store/project.ts", "*.svg"]
12 | 


--------------------------------------------------------------------------------
/build_util/check_release_build.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ビルド結果をテストする
 3 | """
 4 | import argparse
 5 | import json
 6 | import time
 7 | from io import BytesIO
 8 | from pathlib import Path
 9 | from subprocess import Popen
10 | from urllib.parse import urlencode
11 | from urllib.request import Request, urlopen
12 | 
13 | import soundfile
14 | 
15 | base_url = "http://localhost:50021/"
16 | 
17 | 
18 | def test_release_build(dist_dir: Path) -> None:
19 |     run_file = dist_dir / "run"
20 |     if not run_file.exists():
21 |         run_file = dist_dir / "run.exe"
22 | 
23 |     # 起動
24 |     process = Popen([run_file.absolute()], cwd=dist_dir)
25 |     time.sleep(120)  # 待機
26 | 
27 |     # バージョン取得テスト
28 |     req = Request(base_url + "version")
29 |     with urlopen(req) as res:
30 |         assert len(res.read()) > 0
31 | 
32 |     # テキスト -> クエリ
33 |     text = "こんにちは、音声合成の世界へようこそ"
34 |     req = Request(
35 |         base_url + "audio_query?" + urlencode({"speaker": "1", "text": text}),
36 |         method="POST",
37 |     )
38 |     with urlopen(req) as res:
39 |         query = json.loads(res.read().decode("utf-8"))
40 | 
41 |     # クエリ -> 音声
42 |     req = Request(base_url + "synthesis?speaker=1", method="POST")
43 |     req.add_header("Content-Type", "application/json")
44 |     req.data = json.dumps(query).encode("utf-8")
45 |     with urlopen(req) as res:
46 |         wave = res.read()
47 |     soundfile.read(BytesIO(wave))
48 | 
49 |     # エンジンマニフェスト
50 |     req = Request(base_url + "engine_manifest", method="GET")
51 |     with urlopen(req) as res:
52 |         manifest = json.loads(res.read().decode("utf-8"))
53 |         assert "uuid" in manifest
54 | 
55 |     # プロセスが稼働中であることを確認
56 |     assert process.poll() is None
57 | 
58 |     # 停止
59 |     process.terminate()
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     parser = argparse.ArgumentParser()
64 |     parser.add_argument("--dist_dir", type=Path, default=Path("dist/"))
65 |     args = parser.parse_args()
66 |     test_release_build(dist_dir=args.dist_dir)
67 | 


--------------------------------------------------------------------------------
/build_util/codesign.bash:
--------------------------------------------------------------------------------
 1 | # !!! コードサイニング証明書を取り扱うので取り扱い注意 !!!
 2 | 
 3 | set -eu
 4 | 
 5 | if [ ! -v CERT_BASE64 ]; then
 6 |     echo "CERT_BASE64が未定義です"
 7 |     exit 1
 8 | fi
 9 | if [ ! -v CERT_PASSWORD ]; then
10 |     echo "CERT_PASSWORDが未定義です"
11 |     exit 1
12 | fi
13 | 
14 | if [ $# -ne 1 ]; then
15 |     echo "引数の数が一致しません"
16 |     exit 1
17 | fi
18 | target_file_glob="$1"
19 | 
20 | # 証明書
21 | CERT_PATH=cert.pfx
22 | echo -n "$CERT_BASE64" | base64 -d - > $CERT_PATH
23 | 
24 | # 指定ファイルに署名する
25 | function codesign() {
26 |     TARGET="$1"
27 |     SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1)
28 |     powershell "& '$SIGNTOOL' sign /fd SHA256 /td SHA256 /tr http://timestamp.digicert.com /f $CERT_PATH /p $CERT_PASSWORD '$TARGET'"
29 | }
30 | 
31 | # 指定ファイルが署名されているか
32 | function is_signed() {
33 |     TARGET="$1"
34 |     SIGNTOOL=$(find "C:/Program Files (x86)/Windows Kits/10/App Certification Kit" -name "signtool.exe" | sort -V | tail -n 1)
35 |     powershell "& '$SIGNTOOL' verify /pa '$TARGET'" || return 1
36 | }
37 | 
38 | # 署名されていなければ署名
39 | ls $target_file_glob | while read target_file; do
40 |     if is_signed "$target_file"; then
41 |         echo "署名済み: $target_file"
42 |     else
43 |         echo "署名: $target_file"
44 |         codesign "$target_file"
45 |     fi
46 | done
47 | 
48 | # 証明書を消去
49 | rm $CERT_PATH
50 | 


--------------------------------------------------------------------------------
/build_util/create_venv_and_generate_licenses.bash:
--------------------------------------------------------------------------------
 1 | # 仮想環境を作ってrequirements.txtをインストールし、ライセンス一覧を生成する
 2 | 
 3 | set -eux
 4 | 
 5 | if [ ! -v OUTPUT_LICENSE_JSON_PATH ]; then
 6 |     echo "OUTPUT_LICENSE_JSON_PATHが未定義です"
 7 |     exit 1
 8 | fi
 9 | 
10 | VENV_PATH="licenses_venv"
11 | 
12 | python -m venv $VENV_PATH
13 | if [ -d "$VENV_PATH/Scripts" ]; then
14 |     source $VENV_PATH/Scripts/activate
15 | else
16 |     source $VENV_PATH/bin/activate
17 | fi
18 | 
19 | pip install -r requirements-license.txt
20 | python generate_licenses.py >$OUTPUT_LICENSE_JSON_PATH
21 | 
22 | deactivate
23 | 
24 | rm -rf $VENV_PATH
25 | 


--------------------------------------------------------------------------------
/build_util/merge_update_infos.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 更新履歴をマージする。
 3 | """
 4 | 
 5 | import argparse
 6 | import json
 7 | from collections import OrderedDict
 8 | from pathlib import Path
 9 | from typing import Dict, List, Union
10 | 
11 | 
12 | def merge_json_string(src: str, dst: str) -> str:
13 |     """
14 |     バージョンが同じ場合は要素を結合する
15 |     >>> src = '[{"version": "0.0.1", "a": ["a1"], "b": ["b1", "b2"]}]'
16 |     >>> dst = '[{"version": "0.0.1", "a": ["a2"], "b": ["b1", "b3"]}]'
17 |     >>> merge_json_string(src, dst)
18 |     '[{"version": "0.0.1", "a": ["a1", "a2"], "b": ["b1", "b2", "b3"]}]'
19 | 
20 |     バージョンが無かった場合は無視される
21 |     >>> src = '[{"version": "1"}]'
22 |     >>> dst = '[{"version": "1"}, {"version": "2"}]'
23 |     >>> merge_json_string(src, dst)
24 |     '[{"version": "1"}]'
25 |     """
26 |     src_json: List[Dict[str, Union[str, List[str]]]] = json.loads(src)
27 |     dst_json: List[Dict[str, Union[str, List[str]]]] = json.loads(dst)
28 | 
29 |     for src_item in src_json:
30 |         for dst_item in dst_json:
31 |             if src_item["version"] == dst_item["version"]:
32 |                 for key in src_item:
33 |                     if key == "version":
34 |                         continue
35 | 
36 |                     # 異なるものがあった場合だけ後ろに付け足す
37 |                     src_item[key] = list(
38 |                         OrderedDict.fromkeys(src_item[key] + dst_item[key])
39 |                     )
40 | 
41 |     return json.dumps(src_json)
42 | 
43 | 
44 | def merge_update_infos(src_path: Path, dst_path: Path, output_path: Path) -> None:
45 |     src = src_path.read_text(encoding="utf-8")
46 |     dst = dst_path.read_text(encoding="utf-8")
47 |     merged = merge_json_string(src, dst)
48 |     output_path.write_text(merged)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     parser = argparse.ArgumentParser()
53 |     parser.add_argument("src_path", type=Path)
54 |     parser.add_argument("dst_path", type=Path)
55 |     parser.add_argument("output_path", type=Path)
56 |     args = parser.parse_args()
57 |     merge_update_infos(args.src_path, args.dst_path, args.output_path)
58 | 


--------------------------------------------------------------------------------
/build_util/modify_pyinstaller.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # PyInstallerをカスタマイズしてから再インストールする
 4 | # 良いGPUが自動的に選択されるようにしている
 5 | # https://github.com/VOICEVOX/voicevox_engine/issues/502
 6 | 
 7 | set -eux
 8 | 
 9 | pyinstaller_version=$(pyinstaller -v)
10 | tempdir=$(mktemp -dt modify_pyinstaller.XXXXXXXX)
11 | trap 'rm -rf "$tempdir"' EXIT
12 | git clone https://github.com/pyinstaller/pyinstaller.git "$tempdir" -b "v$pyinstaller_version" --depth 1
13 | cat > "$tempdir/bootloader/src/symbols.c" << EOF
14 | #ifdef _WIN32
15 | #include <windows.h>
16 | 
17 | // https://docs.nvidia.com/gameworks/content/technologies/desktop/optimus.htm
18 | __declspec(dllexport) DWORD NvOptimusEnablement = 0x00000001;
19 | 
20 | // https://gpuopen.com/learn/amdpowerxpressrequesthighperformance/
21 | __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x00000001;
22 | #endif
23 | EOF
24 | (cd "$tempdir/bootloader" && python ./waf all)
25 | pip install -U "$tempdir"
26 | 


--------------------------------------------------------------------------------
/build_util/process_voicevox_resource.bash:
--------------------------------------------------------------------------------
 1 | set -eux
 2 | 
 3 | if [ ! -v DOWNLOAD_RESOURCE_PATH ]; then
 4 |     echo "DOWNLOAD_RESOURCE_PATHが未定義です"
 5 |     exit 1
 6 | fi
 7 | 
 8 | rm -r speaker_info
 9 | cp -r $DOWNLOAD_RESOURCE_PATH/character_info speaker_info
10 | 
11 | # .png_largeファイルを消去する
12 | rm speaker_info/*/icons/*.png_large
13 | 
14 | # マニフェスト
15 | jq -s '.[0] * .[1]' engine_manifest.json $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest.json \
16 |     > engine_manifest.json.tmp
17 | mv engine_manifest.json.tmp engine_manifest.json
18 | 
19 | python build_util/merge_update_infos.py \
20 |     engine_manifest_assets/update_infos.json \
21 |     $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/update_infos.json \
22 |     engine_manifest_assets/update_infos.json
23 | 
24 | for f in $(ls $DOWNLOAD_RESOURCE_PATH/engine/engine_manifest_assets/* | grep -v update_infos.json); do
25 |     cp $f ./engine_manifest_assets/
26 | done
27 | 


--------------------------------------------------------------------------------
/default.csv:
--------------------------------------------------------------------------------
 1 | 朱司,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,アカシ,アカシ,1/3,C1
 2 | 青山,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,アオヤマ,アオヤマ,2/4,C1
 3 | 雨晴,1350,1350,7000,名詞,固有名詞,人名,姓,*,*,*,アメハレ,アメハレ,2/4,C1
 4 | 櫻歌,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オウカ,オーカ,1/3,C1
 5 | 音街,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,オトマチ,オトマチ,2/4,C1
 6 | 春日部,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,カスカベ,カスカベ,0/4,C1
 7 | 麒ヶ島,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キガシマ,キガシマ,2/4,C1
 8 | 紲星,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,キズナ,キズナ,1/3,C1
 9 | 九州,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,キュウシュウ,キュウシュウ,1/4,C1
10 | キョウコ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,キョオコ,キョオコ,1/3,C1
11 | 玄野,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,クロノ,クロノ,1/3,C1
12 | 剣崎,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,ケンザキ,ケンザキ,1/4,C1
13 | 後鬼,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ゴキ,ゴキ,1/2,C1
14 | 虎太郎,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,コタロウ,コタロー,4/4,C1
15 | 琴葉,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,コトノハ,コトノハ,0/4,C1
16 | 小夜,1351,1351,2200,名詞,固有名詞,人名,名,*,*,*,サヨ,サヨ,1/2,C1
17 | 四国,1350,1350,2200,名詞,固有名詞,人名,姓,*,*,*,シコク,シコク,1/3,C1
18 | 白上,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,シラカミ,シラカミ,4/4,C1
19 | ずんだもん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ズンダモン,ズンダモン,1/5,C1
20 | そら,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ソラ,ソラ,1/2,C1
21 | 宗麟,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ソウリン,ソウリン,1/4,C1
22 | タイプＴ,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タイプティー,タイプティー,4/5,C1
23 | 波音,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナミネ,ナミネ,0/3,C1
24 | 武宏,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,タケヒロ,タケヒロ,2/4,C1
25 | ちび式じい,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,チビシキジー,チビシキジー,5/6,C1
26 | 月読,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ツクヨミ,ツクヨミ,0/4,C1
27 | つむぎ,1351,1351,7450,名詞,固有名詞,人名,名,*,*,*,ツムギ,ツムギ,0/3,C1
28 | ナースロボ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ナースロボ,ナースロボ,4/5,C1
29 | Ｎｏ．７,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ナンバーセブン,ナンバーセブン,5/7,C1
30 | はう,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,ハウ,ハウ,1/2,C1
31 | 桜乃,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ハルノ,ハルノ,1/3,C1
32 | ひまり,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ヒマリ,ヒマリ,0/3,C1
33 | 紅桜,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,ベニザクラ,ベニザクラ,3/5,C1
34 | 聖騎士,1350,1350,8600,名詞,固有名詞,人名,姓,*,*,*,ホーリーナイト,ホーリーナイト,5/7,C1
35 | ＷｈｉｔｅＣＵＬ,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ホワイトカル,ホワイトカル,5/6,C1
36 | ミコ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,ミコ,ミコ,1/2,C1
37 | 水奈瀬,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ミナセ,ミナセ,2/3,C1
38 | 冥鳴,1350,1350,5000,名詞,固有名詞,人名,姓,*,*,*,メイメイ,メイメイ,1/4,C1
39 | 鳴花,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,メイカ,メイカ,1/3,C1
40 | めたん,1351,1351,7000,名詞,固有名詞,人名,名,*,*,*,メタン,メタン,1/3,C1
41 | 雌雄,1351,1351,8600,名詞,固有名詞,人名,名,*,*,*,メスオ,メスオ,0/3,C1
42 | もち子さん,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,モチコサン,モチコサン,1/5,C1
43 | モチノ,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,モチノ,モチノ,0/3,C1
44 | 結月,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ユヅキ,ユヅキ,1/3,C1
45 | 弓鶴,1351,1351,0,名詞,固有名詞,人名,名,*,*,*,ユヅル,ユヅル,0/3,C1
46 | リツ,1351,1351,3900,名詞,固有名詞,人名,名,*,*,*,リツ,リツ,1/2,C1
47 | 六花,1351,1351,4900,名詞,固有名詞,人名,名,*,*,*,リッカ,リッカ,1/3,C1
48 | 龍星,1351,1351,5000,名詞,固有名詞,人名,名,*,*,*,リュウセイ,リュウセイ,1/4,C1
49 | 雀松,1350,1350,0,名詞,固有名詞,人名,姓,*,*,*,ワカマツ,ワカマツ,2/4,C1
50 | ＣＯＥＩＲＯＩＮＫ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1
51 | ｃｏｅｉｒｏｉｎｋ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエイロインク,コエイロインク,5/7,C1
52 | ＣｏｅＦｏｎｔ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1
53 | ｃｏｅｆｏｎｔ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,コエフォント,コエフォント,3/5,C1
54 | ＴＡＬＱｕ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1
55 | ｔａｌｑｕ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,トーク,トーク,0/3,C1
56 | ＶＯＩＣＥＶＯＸ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1
57 | ｖｏｉｃｅｖｏｘ,1348,1348,0,名詞,固有名詞,一般,*,*,*,*,ボイスボックス,ボイスボックス,4/7,C1


--------------------------------------------------------------------------------
/default_setting.yml:
--------------------------------------------------------------------------------
1 | allow_origin: null
2 | cors_policy_mode: localapps
3 | 


--------------------------------------------------------------------------------
/docs/VOICEVOX音声合成エンジンとの連携.md:
--------------------------------------------------------------------------------
1 | メモ書き程度ですが、どういう方針で開発を進めているかを紹介します。
2 | 
3 | - バージョンが上がっても、`/audio_query`で返ってくる値をそのまま`/synthesis`に POST すれば音声合成できるようにする予定です
4 |   - `AudioQuery`のパラメータは増えますが、なるべくデフォルト値で以前と変わらない音声が生成されるようにします
5 | - バージョン 0.7 から音声スタイルが実装されました。スタイルの情報は`/speakers`から取得できます
6 |   - スタイルの情報にある`style_id`を`speaker`に指定することで、今まで通り音声合成ができます
7 |     - style_id の指定先が speaker なのは互換性のためです
8 | 


--------------------------------------------------------------------------------
/docs/api/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/docs/api/.gitkeep


--------------------------------------------------------------------------------
/docs/licenses/open_jtalk/COPYING:
--------------------------------------------------------------------------------
 1 | /* ----------------------------------------------------------------- */
 2 | /*           The Japanese TTS System "Open JTalk"                    */
 3 | /*           developed by HTS Working Group                          */
 4 | /*           http://open-jtalk.sourceforge.net/                      */
 5 | /* ----------------------------------------------------------------- */
 6 | /*                                                                   */
 7 | /*  Copyright (c) 2008-2016  Nagoya Institute of Technology          */
 8 | /*                           Department of Computer Science          */
 9 | /*                                                                   */
10 | /* All rights reserved.                                              */
11 | /*                                                                   */
12 | /* Redistribution and use in source and binary forms, with or        */
13 | /* without modification, are permitted provided that the following   */
14 | /* conditions are met:                                               */
15 | /*                                                                   */
16 | /* - Redistributions of source code must retain the above copyright  */
17 | /*   notice, this list of conditions and the following disclaimer.   */
18 | /* - Redistributions in binary form must reproduce the above         */
19 | /*   copyright notice, this list of conditions and the following     */
20 | /*   disclaimer in the documentation and/or other materials provided */
21 | /*   with the distribution.                                          */
22 | /* - Neither the name of the HTS working group nor the names of its  */
23 | /*   contributors may be used to endorse or promote products derived */
24 | /*   from this software without specific prior written permission.   */
25 | /*                                                                   */
26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
38 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
39 | /* ----------------------------------------------------------------- */
40 | 


--------------------------------------------------------------------------------
/docs/licenses/open_jtalk/mecab-naist-jdic/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, Nara Institute of Science and Technology, Japan.
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met:
 8 | 
 9 | Redistributions of source code must retain the above copyright notice,
10 | this list of conditions and the following disclaimer.
11 | Redistributions in binary form must reproduce the above copyright
12 | notice, this list of conditions and the following disclaimer in the
13 | documentation and/or other materials provided with the distribution.
14 | Neither the name of the Nara Institute of Science and Technology
15 | (NAIST) nor the names of its contributors may be used to endorse or
16 | promote products derived from this software without specific prior
17 | written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | /* ----------------------------------------------------------------- */
32 | /*           The Japanese TTS System "Open JTalk"                    */
33 | /*           developed by HTS Working Group                          */
34 | /*           http://open-jtalk.sourceforge.net/                      */
35 | /* ----------------------------------------------------------------- */
36 | /*                                                                   */
37 | /*  Copyright (c) 2008-2016  Nagoya Institute of Technology          */
38 | /*                           Department of Computer Science          */
39 | /*                                                                   */
40 | /* All rights reserved.                                              */
41 | /*                                                                   */
42 | /* Redistribution and use in source and binary forms, with or        */
43 | /* without modification, are permitted provided that the following   */
44 | /* conditions are met:                                               */
45 | /*                                                                   */
46 | /* - Redistributions of source code must retain the above copyright  */
47 | /*   notice, this list of conditions and the following disclaimer.   */
48 | /* - Redistributions in binary form must reproduce the above         */
49 | /*   copyright notice, this list of conditions and the following     */
50 | /*   disclaimer in the documentation and/or other materials provided */
51 | /*   with the distribution.                                          */
52 | /* - Neither the name of the HTS working group nor the names of its  */
53 | /*   contributors may be used to endorse or promote products derived */
54 | /*   from this software without specific prior written permission.   */
55 | /*                                                                   */
56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
68 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
69 | /* ----------------------------------------------------------------- */
70 | 


--------------------------------------------------------------------------------
/docs/licenses/open_jtalk/mecab/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2001-2008, Taku Kudo
 2 | Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification, are
 6 | permitted provided that the following conditions are met:
 7 | 
 8 |  * Redistributions of source code must retain the above
 9 |    copyright notice, this list of conditions and the
10 |    following disclaimer.
11 | 
12 |  * Redistributions in binary form must reproduce the above
13 |    copyright notice, this list of conditions and the
14 |    following disclaimer in the documentation and/or other
15 |    materials provided with the distribution.
16 | 
17 |  * Neither the name of the Nippon Telegraph and Telegraph Corporation
18 |    nor the names of its contributors may be used to endorse or
19 |    promote products derived from this software without specific
20 |    prior written permission.
21 | 
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
23 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
28 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | /* ----------------------------------------------------------------- */
32 | /*           The Japanese TTS System "Open JTalk"                    */
33 | /*           developed by HTS Working Group                          */
34 | /*           http://open-jtalk.sourceforge.net/                      */
35 | /* ----------------------------------------------------------------- */
36 | /*                                                                   */
37 | /*  Copyright (c) 2008-2016  Nagoya Institute of Technology          */
38 | /*                           Department of Computer Science          */
39 | /*                                                                   */
40 | /* All rights reserved.                                              */
41 | /*                                                                   */
42 | /* Redistribution and use in source and binary forms, with or        */
43 | /* without modification, are permitted provided that the following   */
44 | /* conditions are met:                                               */
45 | /*                                                                   */
46 | /* - Redistributions of source code must retain the above copyright  */
47 | /*   notice, this list of conditions and the following disclaimer.   */
48 | /* - Redistributions in binary form must reproduce the above         */
49 | /*   copyright notice, this list of conditions and the following     */
50 | /*   disclaimer in the documentation and/or other materials provided */
51 | /*   with the distribution.                                          */
52 | /* - Neither the name of the HTS working group nor the names of its  */
53 | /*   contributors may be used to endorse or promote products derived */
54 | /*   from this software without specific prior written permission.   */
55 | /*                                                                   */
56 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
57 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
58 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
59 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
60 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
61 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
62 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
63 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
64 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
65 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
66 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
67 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
68 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
69 | /* ----------------------------------------------------------------- */
70 | 


--------------------------------------------------------------------------------
/docs/licenses/world/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | /* ----------------------------------------------------------------- */
 2 | /*           WORLD: High-quality speech analysis,                    */
 3 | /*           manipulation and synthesis system                       */
 4 | /*           developed by M. Morise                                  */
 5 | /*           http://www.kisc.meiji.ac.jp/~mmorise/world/english/     */
 6 | /* ----------------------------------------------------------------- */
 7 | /*                                                                   */
 8 | /*  Copyright (c) 2010  M. Morise                                    */
 9 | /*                                                                   */
10 | /* All rights reserved.                                              */
11 | /*                                                                   */
12 | /* Redistribution and use in source and binary forms, with or        */
13 | /* without modification, are permitted provided that the following   */
14 | /* conditions are met:                                               */
15 | /*                                                                   */
16 | /* - Redistributions of source code must retain the above copyright  */
17 | /*   notice, this list of conditions and the following disclaimer.   */
18 | /* - Redistributions in binary form must reproduce the above         */
19 | /*   copyright notice, this list of conditions and the following     */
20 | /*   disclaimer in the documentation and/or other materials provided */
21 | /*   with the distribution.                                          */
22 | /* - Neither the name of the M. Morise nor the names of its          */
23 | /*   contributors may be used to endorse or promote products derived */
24 | /*   from this software without specific prior written permission.   */
25 | /*                                                                   */
26 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
27 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
28 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
29 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
30 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
31 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
32 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
33 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
34 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
35 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
36 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
37 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
38 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
39 | /* ----------------------------------------------------------------- */
40 | 


--------------------------------------------------------------------------------
/engine_manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "manifest_version": "0.13.1",
 3 |     "name": "DUMMY Engine",
 4 |     "brand_name": "DUMMY",
 5 |     "uuid": "c7b58856-bd56-4aa1-afb7-b8415f824b06",
 6 |     "version": "999.999.999",
 7 |     "url": "https://github.com/VOICEVOX/voicevox_engine",
 8 |     "command": "run",
 9 |     "port": 50021,
10 |     "icon": "engine_manifest_assets/icon.png",
11 |     "default_sampling_rate": 24000,
12 |     "terms_of_service": "engine_manifest_assets/terms_of_service.md",
13 |     "update_infos": "engine_manifest_assets/update_infos.json",
14 |     "dependency_licenses": "engine_manifest_assets/dependency_licenses.json",
15 |     "downloadable_libraries_path": null,
16 |     "downloadable_libraries_url": null,
17 |     "supported_features": {
18 |         "adjust_mora_pitch": {
19 |             "type": "bool",
20 |             "value": true,
21 |             "name": "モーラごとの音高の調整"
22 |         },
23 |         "adjust_phoneme_length": {
24 |             "type": "bool",
25 |             "value": true,
26 |             "name": "音素ごとの長さの調整"
27 |         },
28 |         "adjust_speed_scale": {
29 |             "type": "bool",
30 |             "value": true,
31 |             "name": "全体の話速の調整"
32 |         },
33 |         "adjust_pitch_scale": {
34 |             "type": "bool",
35 |             "value": true,
36 |             "name": "全体の音高の調整"
37 |         },
38 |         "adjust_intonation_scale": {
39 |             "type": "bool",
40 |             "value": true,
41 |             "name": "全体の抑揚の調整"
42 |         },
43 |         "adjust_volume_scale": {
44 |             "type": "bool",
45 |             "value": true,
46 |             "name": "全体の音量の調整"
47 |         },
48 |         "interrogative_upspeak": {
49 |             "type": "bool",
50 |             "value": true,
51 |             "name": "疑問文の自動調整"
52 |         },
53 |         "synthesis_morphing" : {
54 |             "type": "bool",
55 |             "value": true,
56 |             "name": "2人の話者でモーフィングした音声を合成"
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/engine_manifest_assets/dependency_licenses.json:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "name": "dummy library",
4 |         "version": "0.0.1",
5 |         "license": "dummy license",
6 |         "text": "dummy license text"
7 |     }
8 | ]


--------------------------------------------------------------------------------
/engine_manifest_assets/downloadable_libraries.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "download_url": "",
 4 |     "bytes": "1000",
 5 |     "speaker": {
 6 |       "name": "dummy1",
 7 |       "speaker_uuid": "dummy1",
 8 |       "styles": [
 9 |         {
10 |           "name": "style1",
11 |           "id": 0
12 |         },
13 |         {
14 |           "name": "style2",
15 |           "id": 1
16 |         }
17 |       ],
18 |       "version": "0.0.1"
19 |     },
20 |     "speaker_info": {
21 |       "policy": "",
22 |       "portrait": "",
23 |       "style_infos": [
24 |         {
25 |           "id": 0,
26 |           "icon": "",
27 |           "voice_samples": [
28 |             "",
29 |             "",
30 |             ""
31 |           ]
32 |         },
33 |         {
34 |           "id": 1,
35 |           "icon": "",
36 |           "voice_samples": [
37 |             "",
38 |             "",
39 |             ""
40 |           ]
41 |         }
42 |       ]
43 |     }
44 |   }
45 | ]
46 | 


--------------------------------------------------------------------------------
/engine_manifest_assets/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/engine_manifest_assets/icon.png


--------------------------------------------------------------------------------
/engine_manifest_assets/terms_of_service.md:
--------------------------------------------------------------------------------
1 | dummy teams of service


--------------------------------------------------------------------------------
/engine_manifest_assets/update_infos.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "version": "0.14.3",
 4 |     "descriptions": [
 5 |       "キャラクター「†聖騎士 紅桜†」「雀松朱司」「麒ヶ島宗麟」を追加",
 6 |       "同時書き込みで辞書が破損する問題を修正"
 7 |     ],
 8 |     "contributors": ["Hiroshiba"]
 9 |   },
10 |   {
11 |     "version": "0.14.2",
12 |     "descriptions": ["DirectML版の生成が遅い問題を修正"],
13 |     "contributors": []
14 |   },
15 |   {
16 |     "version": "0.14.1",
17 |     "descriptions": ["AquesTalkライクな記法で生成した音声のバグを修正"],
18 |     "contributors": []
19 |   },
20 |   {
21 |     "version": "0.14.0",
22 |     "descriptions": [
23 |       "コアをRust言語に移行",
24 |       "セキュリティアップデート",
25 |       "スタイルごとに異なる立ち絵の提供を可能に",
26 |       "VVPPファイルの提供",
27 |       "設定GUIの提供",
28 |       "プリセットの保存",
29 |       "モーフィングAPIの仕様変更",
30 |       "DirectML利用時に適したGPUを自動選択",
31 |       "開発環境の向上",
32 |       "バグ修正"
33 |     ],
34 |     "contributors": [
35 |       "aoirint",
36 |       "Appletigerv",
37 |       "haru3me",
38 |       "Hiroshiba",
39 |       "ksk001100",
40 |       "masinc",
41 |       "misogihagi",
42 |       "My-MC",
43 |       "nebocco",
44 |       "PickledChair",
45 |       "qryxip",
46 |       "qwerty2501",
47 |       "sabonerune",
48 |       "sarisia",
49 |       "Segu-g",
50 |       "sevenc-nanashi",
51 |       "shigobu",
52 |       "smly",
53 |       "takana-v",
54 |       "ts-klassen",
55 |       "whiteball",
56 |       "y-chan"
57 |     ]
58 |   }
59 | ]
60 | 


--------------------------------------------------------------------------------
/get_cost_candidates.py:
--------------------------------------------------------------------------------
 1 | """
 2 | voicevox_engine/part_of_speech_data.pyのcost_candidatesを計算するプログラムです。
 3 | 引数のnaist_jdic_pathには、open_jtalkのsrc/mecab-naist-jdic/naist-jdic.csvを指定してください。
 4 | 
 5 | 実行例:
 6 | python get_cost_candidates.py --naist_jdic_path=/path/to/naist-jdic.csv \
 7 |     --pos=名詞 \
 8 |     --pos_detail_1=固有名詞 \
 9 |     --pos_detail_2=一般 \
10 |     --pos_detail_3=*
11 | 
12 | cost_candidatesの値の詳細は以下の通りです。
13 | - 1番目の値はnaist_jdic内の同一品詞の最小コストから1を引いたもの、11番目の値は最大コストに1を足したものです。
14 | - 2番目の値はnaist_jdic内の同一品詞のコストの下位1%、10番目の値は99%の値です。
15 | - 6番目の値はnaist_jdic内の同一品詞のコストの最頻値です。
16 | - 2番目から6番目、6番目から10番目までの値は一定割合で増加するようになっています。
17 | """
18 | 
19 | import argparse
20 | import statistics
21 | from pathlib import Path
22 | from typing import List
23 | 
24 | import numpy as np
25 | 
26 | 
27 | def get_candidates(
28 |     naist_jdic_path: Path,
29 |     pos: str,
30 |     pos_detail_1: str,
31 |     pos_detail_2: str,
32 |     pos_detail_3: str,
33 | ) -> List[int]:
34 |     costs = []
35 |     with naist_jdic_path.open(encoding="utf-8") as f:
36 |         for line in f:
37 |             (
38 |                 _,
39 |                 _,
40 |                 _,
41 |                 _cost,
42 |                 _pos,
43 |                 _pos_detail_1,
44 |                 _pos_detail_2,
45 |                 _pos_detail_3,
46 |                 _,
47 |                 _,
48 |                 _,
49 |                 _,
50 |                 _,
51 |                 _,
52 |                 _,
53 |             ) = line.split(",")
54 |             if (_pos, _pos_detail_1, _pos_detail_2, _pos_detail_3) == (
55 |                 pos,
56 |                 pos_detail_1,
57 |                 pos_detail_2,
58 |                 pos_detail_3,
59 |             ):
60 |                 costs.append(int(_cost))
61 |     assert len(costs) > 0
62 |     cost_min = min(costs) - 1
63 |     cost_1per = np.quantile(costs, 0.01).astype(np.int64)
64 |     cost_mode = statistics.mode(costs)
65 |     cost_99per = np.quantile(costs, 0.99).astype(np.int64)
66 |     cost_max = max(costs) + 1
67 |     return (
68 |         [cost_min]
69 |         + [int(cost_1per + (cost_mode - cost_1per) * i / 4) for i in range(5)]
70 |         + [int(cost_mode + (cost_99per - cost_mode) * i / 4) for i in range(1, 5)]
71 |         + [cost_max]
72 |     )
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     parser = argparse.ArgumentParser()
77 |     parser.add_argument("--naist_jdic_path", type=Path)
78 |     parser.add_argument("--pos", type=str)
79 |     parser.add_argument("--pos_detail_1", type=str)
80 |     parser.add_argument("--pos_detail_2", type=str)
81 |     parser.add_argument("--pos_detail_3", type=str)
82 |     args = parser.parse_args()
83 |     print(
84 |         get_candidates(
85 |             naist_jdic_path=args.naist_jdic_path,
86 |             pos=args.pos,
87 |             pos_detail_1=args.pos_detail_1,
88 |             pos_detail_2=args.pos_detail_2,
89 |             pos_detail_3=args.pos_detail_3,
90 |         )
91 |     )
92 | 


--------------------------------------------------------------------------------
/make_docs.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from voicevox_engine.dev.core import mock as core
 4 | from voicevox_engine.dev.synthesis_engine.mock import MockSynthesisEngine
 5 | from voicevox_engine.setting import USER_SETTING_PATH, SettingLoader
 6 | 
 7 | if __name__ == "__main__":
 8 |     import run
 9 | 
10 |     app = run.generate_app(
11 |         synthesis_engines={"mock": MockSynthesisEngine(speakers=core.metas())},
12 |         latest_core_version="mock",
13 |         setting_loader=SettingLoader(USER_SETTING_PATH),
14 |     )
15 |     with open("docs/api/index.html", "w") as f:
16 |         f.write(
17 |             """<!DOCTYPE html>
18 | <html lang="ja">
19 | <head>
20 |     <title>voicevox_engine API Document</title>
21 |     <meta charset="utf-8">
22 |     <link rel="shortcut icon" href="https://voicevox.hiroshiba.jp/favicon-32x32.png">
23 | </head>
24 | <body>
25 |     <div id="redoc-container"></div>
26 |     <script src="https://cdn.jsdelivr.net/npm/redoc/bundles/redoc.standalone.js"></script>
27 |     <script>
28 |         Redoc.init(%s, {"hideHostname": true}, document.getElementById("redoc-container"));
29 |     </script>
30 | </body>
31 | </html>"""
32 |             % json.dumps(app.openapi())
33 |         )
34 | 


--------------------------------------------------------------------------------
/presets.yaml:
--------------------------------------------------------------------------------
 1 | - id: 1
 2 |   name: サンプルプリセット
 3 |   speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
 4 |   style_id: 0
 5 |   speedScale: 1
 6 |   pitchScale: 0
 7 |   intonationScale: 1
 8 |   volumeScale: 1
 9 |   prePhonemeLength: 0.1
10 |   postPhonemeLength: 0.1
11 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.pysen]
 2 | version = "0.10.3"
 3 | 
 4 | [tool.pysen.lint]
 5 | enable_black = true
 6 | enable_flake8 = true
 7 | enable_isort = true
 8 | enable_mypy = false    # TODO: eliminate errors and enable at CI
 9 | mypy_preset = "entry"  # TODO: "strict"
10 | line_length = 88
11 | py_version = "py38"
12 | isort_known_first_party = ["voicevox_engine"]
13 | isort_known_third_party = ["numpy"]
14 | [[tool.pysen.lint.mypy_targets]]
15 |   paths = [".", "voicevox_engine/"]
16 | 
17 | [tool.black] # automatically generated by pysen
18 | line-length = 88
19 | target-version = ["py38"]
20 | 
21 | 
22 | [tool.isort] # automatically generated by pysen
23 | default_section = "THIRDPARTY"
24 | ensure_newline_before_comments = true
25 | force_grid_wrap = 0
26 | force_single_line = false
27 | include_trailing_comma = true
28 | known_first_party = ["voicevox_engine"]
29 | known_third_party = ["numpy"]
30 | line_length = 88
31 | multi_line_output = 3
32 | use_parentheses = true
33 | 
34 | [tool.poetry]
35 | name = "voicevox_engine"
36 | version = "0.0.0"
37 | description = ""
38 | authors = ["Hiroshiba <hihokaruta@gmail.com>"]
39 | 
40 | [tool.poetry.dependencies]
41 | python = "~3.8,>=3.8.1"
42 | numpy = "^1.20.0"
43 | fastapi = "^0.70.0"
44 | python-multipart = "^0.0.5"
45 | uvicorn = "^0.15.0"
46 | aiofiles = "^0.7.0"
47 | soundfile = "^0.10.3.post1"
48 | scipy = "^1.7.1"
49 | pyyaml = "^6.0"
50 | pyworld = "^0.3.0"
51 | appdirs = "^1.4.4"
52 | requests = "^2.28.1"
53 | jinja2 = "^3.1.2"
54 | pyopenjtalk = {git = "https://github.com/VOICEVOX/pyopenjtalk", rev = "f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae"}
55 | 
56 | [tool.poetry.group.dev.dependencies]
57 | cython = "^0.29.24"
58 | pyinstaller = "^5.3"
59 | pre-commit = "^2.16.0"
60 | atomicwrites = "^1.4.0"
61 | colorama = "^0.4.4"
62 | poetry = "^1.3.1"
63 | 
64 | [tool.poetry.group.test.dependencies]
65 | pysen = "~0.10.3"
66 | black = "^22.12.0"
67 | flake8-bugbear = "^23.1.0"
68 | flake8 = "^6.0.0"
69 | isort = "^5.12.0"
70 | mypy = "~0.991"
71 | pytest = "^6.2.5"
72 | coveralls = "^3.2.0"
73 | 
74 | [tool.poetry.group.license.dependencies]
75 | pip-licenses = "^3.5.3"
76 | 
77 | [build-system]
78 | requires = ["poetry-core"]
79 | build-backend = "poetry.core.masonry.api"
80 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 2 | altgraph==0.17.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
 3 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
 4 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
 5 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 6 | atomicwrites==1.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 7 | attrs==22.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 8 | cachecontrol[filecache]==0.12.11 ; python_full_version >= "3.8.1" and python_version < "3.9"
 9 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9"
10 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
11 | cfgv==3.3.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
12 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
13 | cleo==2.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
14 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
15 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
16 | crashtest==0.4.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
17 | cryptography==39.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "linux"
18 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9"
19 | distlib==0.3.6 ; python_full_version >= "3.8.1" and python_version < "3.9"
20 | dulwich==0.20.50 ; python_full_version >= "3.8.1" and python_version < "3.9"
21 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
22 | filelock==3.8.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
23 | future==0.18.2 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32"
24 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
25 | html5lib==1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
26 | identify==2.5.11 ; python_full_version >= "3.8.1" and python_version < "3.9"
27 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
28 | importlib-metadata==4.13.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
29 | importlib-resources==5.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
30 | jaraco-classes==3.2.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
31 | jeepney==0.8.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "linux"
32 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
33 | jsonschema==4.17.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
34 | keyring==23.13.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
35 | lockfile==0.12.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
36 | macholib==1.16.2 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "darwin"
37 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
38 | more-itertools==9.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
39 | msgpack==1.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
40 | nodeenv==1.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
41 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
42 | packaging==22.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
43 | pefile==2022.5.30 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32"
44 | pexpect==4.8.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
45 | pkginfo==1.9.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
46 | pkgutil-resolve-name==1.3.10 ; python_full_version >= "3.8.1" and python_version < "3.9"
47 | platformdirs==2.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
48 | poetry-core==1.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
49 | poetry-plugin-export==1.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
50 | poetry==1.3.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
51 | pre-commit==2.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
52 | ptyprocess==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
53 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9"
54 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
55 | pyinstaller-hooks-contrib==2022.14 ; python_full_version >= "3.8.1" and python_version < "3.9"
56 | pyinstaller==5.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
57 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9"
58 | pyrsistent==0.19.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
59 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9"
60 | pywin32-ctypes==0.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32"
61 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
62 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
63 | rapidfuzz==2.13.7 ; python_full_version >= "3.8.1" and python_version < "3.9"
64 | requests-toolbelt==0.10.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
65 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
66 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
67 | secretstorage==3.3.3 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "linux"
68 | setuptools==65.6.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
69 | shellingham==1.5.0.post1 ; python_full_version >= "3.8.1" and python_version < "3.9"
70 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
71 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
72 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9"
73 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
74 | toml==0.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
75 | tomli==2.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
76 | tomlkit==0.11.6 ; python_full_version >= "3.8.1" and python_version < "3.9"
77 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
78 | trove-classifiers==2022.12.22 ; python_full_version >= "3.8.1" and python_version < "3.9"
79 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
80 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9"
81 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
82 | virtualenv==20.17.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
83 | webencodings==0.5.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
84 | xattr==0.10.1 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "darwin"
85 | zipp==3.11.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
86 | 


--------------------------------------------------------------------------------
/requirements-license.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 2 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
 3 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
 4 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 5 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9"
 6 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
 7 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
 8 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
 9 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" and platform_system == "Windows"
10 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9"
11 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
12 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
13 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
14 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
15 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
16 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
17 | pip-licenses==3.5.5 ; python_full_version >= "3.8.1" and python_version < "3.9"
18 | ptable==0.9.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
19 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9"
20 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
21 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9"
22 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9"
23 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
24 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
25 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
26 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
27 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
28 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
29 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9"
30 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
31 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
32 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
33 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9"
34 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
35 | 


--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 2 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
 3 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
 4 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 5 | atomicwrites==1.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32"
 6 | attrs==22.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 7 | black==22.12.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 8 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9"
 9 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
10 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
11 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
12 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" and sys_platform == "win32" or python_full_version >= "3.8.1" and python_version < "3.9" and platform_system == "Windows"
13 | colorlog==4.8.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
14 | coverage==5.5 ; python_full_version >= "3.8.1" and python_version < "3.9"
15 | coveralls==3.2.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
16 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9"
17 | dacite==1.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
18 | docopt==0.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
19 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
20 | flake8-bugbear==23.1.20 ; python_full_version >= "3.8.1" and python_version < "3.9"
21 | flake8==6.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
22 | gitdb==4.0.10 ; python_full_version >= "3.8.1" and python_version < "3.9"
23 | gitpython==3.1.29 ; python_full_version >= "3.8.1" and python_version < "3.9"
24 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
25 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
26 | iniconfig==1.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
27 | isort==5.12.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
28 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
29 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
30 | mccabe==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
31 | mypy-extensions==0.4.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
32 | mypy==0.991 ; python_full_version >= "3.8.1" and python_version < "3.9"
33 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
34 | packaging==22.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
35 | pathspec==0.10.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
36 | platformdirs==2.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
37 | pluggy==1.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
38 | py==1.11.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
39 | pycodestyle==2.10.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
40 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9"
41 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
42 | pyflakes==3.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
43 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9"
44 | pysen==0.10.3 ; python_full_version >= "3.8.1" and python_version < "3.9"
45 | pytest==6.2.5 ; python_full_version >= "3.8.1" and python_version < "3.9"
46 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9"
47 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
48 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
49 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
50 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
51 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
52 | smmap==5.0.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
53 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
54 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9"
55 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
56 | toml==0.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
57 | tomli==2.0.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
58 | tomlkit==0.11.6 ; python_full_version >= "3.8.1" and python_version < "3.9"
59 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
60 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
61 | unidiff==0.7.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
62 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9"
63 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
64 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==0.7.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 2 | anyio==3.6.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
 3 | appdirs==1.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
 4 | asgiref==3.6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
 5 | certifi==2022.12.7 ; python_full_version >= "3.8.1" and python_version < "3.9"
 6 | cffi==1.15.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
 7 | charset-normalizer==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
 8 | click==8.0.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
 9 | colorama==0.4.4 ; python_full_version >= "3.8.1" and python_version < "3.9" and platform_system == "Windows"
10 | cython==0.29.24 ; python_full_version >= "3.8.1" and python_version < "3.9"
11 | fastapi==0.70.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
12 | h11==0.14.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
13 | idna==3.4 ; python_full_version >= "3.8.1" and python_version < "3.9"
14 | jinja2==3.1.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
15 | markupsafe==2.1.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
16 | numpy==1.20.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
17 | pycparser==2.21 ; python_full_version >= "3.8.1" and python_version < "3.9"
18 | pydantic==1.10.2 ; python_full_version >= "3.8.1" and python_version < "3.9"
19 | pyopenjtalk @ git+https://github.com/VOICEVOX/pyopenjtalk@f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae ; python_full_version >= "3.8.1" and python_version < "3.9"
20 | python-multipart==0.0.5 ; python_full_version >= "3.8.1" and python_version < "3.9"
21 | pyworld==0.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
22 | pyyaml==6.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
23 | requests==2.28.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
24 | scipy==1.7.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
25 | six==1.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
26 | sniffio==1.3.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
27 | soundfile==0.10.3.post1 ; python_full_version >= "3.8.1" and python_version < "3.9"
28 | starlette==0.16.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
29 | tqdm==4.64.1 ; python_full_version >= "3.8.1" and python_version < "3.9"
30 | typing-extensions==4.4.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
31 | urllib3==1.26.13 ; python_full_version >= "3.8.1" and python_version < "3.9"
32 | uvicorn==0.15.0 ; python_full_version >= "3.8.1" and python_version < "3.9"
33 | 


--------------------------------------------------------------------------------
/run.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | # このファイルはPyInstallerによって自動生成されたもので、それをカスタマイズして使用しています。
 3 | from PyInstaller.utils.hooks import collect_data_files
 4 | import os
 5 | 
 6 | datas = [
 7 |     ('engine_manifest_assets', 'engine_manifest_assets'),
 8 |     ('speaker_info', 'speaker_info'),
 9 |     ('engine_manifest.json', '.'),
10 |     ('default.csv', '.'),
11 |     ('licenses.json', '.'),
12 |     ('presets.yaml', '.'),
13 |     ('default_setting.yml', '.'),
14 |     ('ui_template', 'ui_template'),
15 |     ('model', 'model'),
16 | ]
17 | datas += collect_data_files('pyopenjtalk')
18 | 
19 | # コアとONNX Runtimeはバイナリであるが、`binaries`に加えると
20 | # 依存関係のパスがPyInstallerに書き換えらるので、`datas`に加える
21 | # 参考: https://github.com/VOICEVOX/voicevox_engine/pull/446#issuecomment-1210052318
22 | libcore_path = os.environ.get('LIBCORE_PATH')
23 | if libcore_path:
24 |     print('LIBCORE_PATH is found:', libcore_path)
25 |     if not os.path.isfile(libcore_path):
26 |         raise Exception("LIBCORE_PATH was found, but it is not file!")
27 |     datas += [(libcore_path, ".")]
28 | 
29 | libonnxruntime_path = os.environ.get('LIBONNXRUNTIME_PATH')
30 | if libonnxruntime_path:
31 |     print('LIBONNXRUNTIME_PATH is found:', libonnxruntime_path)
32 |     if not os.path.isfile(libonnxruntime_path):
33 |         raise Exception("LIBCORE_PATH was found, but it is not file!")
34 |     datas += [(libonnxruntime_path, ".")]
35 | 
36 | 
37 | block_cipher = None
38 | 
39 | 
40 | a = Analysis(
41 |     ['run.py'],
42 |     pathex=[],
43 |     binaries=[],
44 |     datas=datas,
45 |     hiddenimports=[],
46 |     hookspath=[],
47 |     hooksconfig={},
48 |     runtime_hooks=[],
49 |     excludes=[],
50 |     win_no_prefer_redirects=False,
51 |     win_private_assemblies=False,
52 |     cipher=block_cipher,
53 |     noarchive=False,
54 | )
55 | 
56 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
57 | 
58 | exe = EXE(
59 |     pyz,
60 |     a.scripts,
61 |     [],
62 |     exclude_binaries=True,
63 |     name='run',
64 |     debug=False,
65 |     bootloader_ignore_signals=False,
66 |     strip=False,
67 |     upx=True,
68 |     console=True,
69 |     disable_windowed_traceback=False,
70 |     argv_emulation=False,
71 |     target_arch=None,
72 |     codesign_identity=None,
73 |     entitlements_file=None,
74 | )
75 | 
76 | coll = COLLECT(
77 |     exe,
78 |     a.binaries,
79 |     a.zipfiles,
80 |     a.datas,
81 |     strip=False,
82 |     upx=True,
83 |     upx_exclude=[],
84 |     name='run',
85 | )
86 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | # automatically generated by pysen
 3 | # e203: black treats : as a binary operator
 4 | # e231: black doesn't put a space after ,
 5 | # e501: black may exceed the line-length to follow other style rules
 6 | # w503 or w504: either one needs to be disabled to select w error codes
 7 | # ignore = E203,E231,E501,W503
 8 | max-line-length = 88
 9 | select = B,B950,C,E,F,W
10 | # e741: do not use variables named 'I', 'O', or 'l'
11 | ignore = E203,W503,E741
12 | 
13 | 
14 | [mypy]
15 | # automatically generated by pysen
16 | check_untyped_defs = True
17 | disallow_any_decorated = False
18 | disallow_any_generics = True
19 | disallow_any_unimported = False
20 | disallow_incomplete_defs = True
21 | disallow_subclassing_any = True
22 | disallow_untyped_calls = False
23 | disallow_untyped_decorators = False
24 | disallow_untyped_defs = False
25 | ignore_errors = False
26 | ignore_missing_imports = True
27 | no_implicit_optional = True
28 | python_version = 3.7
29 | show_error_codes = True
30 | strict_equality = True
31 | strict_optional = True
32 | warn_redundant_casts = True
33 | warn_return_any = False
34 | warn_unreachable = True
35 | warn_unused_configs = True
36 | warn_unused_ignores = True
37 | 
38 | 


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/icons/8.png


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/metas.json:
--------------------------------------------------------------------------------
1 | {
2 |   "supported_features": { "permitted_synthesis_morphing": "NOTHING" }
3 | }
4 | 


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/policy.md:
--------------------------------------------------------------------------------
1 | dummy3 policy
2 | 
3 | https://voicevox.hiroshiba.jp/
4 | 


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portrait.png


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/portraits/8.png


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_001.wav


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_002.wav


--------------------------------------------------------------------------------
/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/35b2c544-660e-401e-b503-0e14c635303a/voice_samples/8_003.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/1.png


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/3.png


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/5.png


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/icons/7.png


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/metas.json:
--------------------------------------------------------------------------------
1 | {
2 |   "supported_features": { "permitted_synthesis_morphing": "SELF_ONLY" }
3 | }
4 | 


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/policy.md:
--------------------------------------------------------------------------------
1 | dummy2 policy
2 | 
3 | https://voicevox.hiroshiba.jp/
4 | 


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portrait.png


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/portraits/3.png


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_001.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_002.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/1_003.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_001.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_002.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/3_003.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_001.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_002.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/5_003.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_001.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_002.wav


--------------------------------------------------------------------------------
/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/388f246b-8c41-4ac1-8e2d-5d79f3ff56d9/voice_samples/7_003.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/0.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/2.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/4.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/icons/6.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/metas.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/policy.md:
--------------------------------------------------------------------------------
1 | dummy1 policy
2 | 
3 | https://voicevox.hiroshiba.jp/
4 | 


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portrait.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/0.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/2.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/4.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/portraits/6.png


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_001.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_002.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/0_003.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_001.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_002.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/2_003.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_001.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_002.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/4_003.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_001.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_002.wav


--------------------------------------------------------------------------------
/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff/voice_samples/6_003.wav


--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/icons/9.png


--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/metas.json:
--------------------------------------------------------------------------------
1 | {
2 |   "supported_features": { "permitted_synthesis_morphing": "ALL" }
3 | }
4 | 


--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/policy.md:
--------------------------------------------------------------------------------
1 | dummy4 policy
2 | 
3 | https://voicevox.hiroshiba.jp/
4 | 


--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/portrait.png


--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_001.wav


--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_002.wav


--------------------------------------------------------------------------------
/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/speaker_info/b1a81618-b27b-40d2-b0ea-27a9ad408c4b/voice_samples/9_003.wav


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/test/__init__.py


--------------------------------------------------------------------------------
/test/presets-test-1.yaml:
--------------------------------------------------------------------------------
 1 | - id: 1
 2 |   name: test
 3 |   speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
 4 |   style_id: 0
 5 |   speedScale: 1
 6 |   pitchScale: 0
 7 |   intonationScale: 1
 8 |   volumeScale: 1
 9 |   prePhonemeLength: 0.1
10 |   postPhonemeLength: 0.1
11 | 
12 | - id: 2
13 |   name: test2
14 |   speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
15 |   style_id: 2
16 |   speedScale: 1.5
17 |   pitchScale: 0
18 |   intonationScale: 1
19 |   volumeScale: 0.7
20 |   prePhonemeLength: 0.5
21 |   postPhonemeLength: 0.5
22 | 


--------------------------------------------------------------------------------
/test/presets-test-2.yaml:
--------------------------------------------------------------------------------
 1 | - id: 1
 2 |   name: test
 3 |   speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
 4 |   style_id: not_int
 5 |   speedScale: 1
 6 |   pitchScale: 0
 7 |   intonationScale: 1
 8 |   volumeScale: 1
 9 |   prePhonemeLength: 0.1
10 |   postPhonemeLength: 0.1
11 | 
12 | - id: 2
13 |   name: test2
14 |   speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
15 |   style_id: 2
16 |   speedScale: 1.5
17 |   pitchScale: 0
18 |   intonationScale: 1
19 |   volumeScale: 0.7
20 |   prePhonemeLength: 0.5
21 |   postPhonemeLength: 0.5
22 | 


--------------------------------------------------------------------------------
/test/presets-test-3.yaml:
--------------------------------------------------------------------------------
 1 | - id: 1
 2 |   name: test
 3 |   speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
 4 |   style_id: 0
 5 |   speedScale: 1
 6 |   pitchScale: 0
 7 |   intonationScale: 1
 8 |   volumeScale: 1
 9 |   prePhonemeLength: 0.1
10 |   postPhonemeLength: 0.1
11 | 
12 | - id: 1
13 |   name: test2
14 |   speaker_uuid: 7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff
15 |   style_id: 2
16 |   speedScale: 1.5
17 |   pitchScale: 0
18 |   intonationScale: 1
19 |   volumeScale: 0.7
20 |   prePhonemeLength: 0.5
21 |   postPhonemeLength: 0.5
22 | 


--------------------------------------------------------------------------------
/test/presets-test-4.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shirowanisan/voicevox_engine/5e91729a2c1910886e3e710ba3f44e509c96bc89/test/presets-test-4.yaml


--------------------------------------------------------------------------------
/test/test_connect_base64_waves.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import io
  3 | from unittest import TestCase
  4 | 
  5 | import numpy as np
  6 | import numpy.testing
  7 | import soundfile
  8 | from scipy.signal import resample
  9 | 
 10 | from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves
 11 | 
 12 | 
 13 | def generate_sine_wave_ndarray(
 14 |     seconds: float, samplerate: int, frequency: float
 15 | ) -> np.ndarray:
 16 |     x = np.linspace(0, seconds, int(seconds * samplerate), endpoint=False)
 17 |     wave = np.sin(2 * np.pi * frequency * x).astype(np.float32)
 18 | 
 19 |     return wave
 20 | 
 21 | 
 22 | def encode_bytes(wave_ndarray: np.ndarray, samplerate: int) -> bytes:
 23 |     wave_bio = io.BytesIO()
 24 |     soundfile.write(
 25 |         file=wave_bio,
 26 |         data=wave_ndarray,
 27 |         samplerate=samplerate,
 28 |         format="WAV",
 29 |         subtype="FLOAT",
 30 |     )
 31 |     wave_bio.seek(0)
 32 | 
 33 |     return wave_bio.getvalue()
 34 | 
 35 | 
 36 | def generate_sine_wave_bytes(
 37 |     seconds: float, samplerate: int, frequency: float
 38 | ) -> bytes:
 39 |     wave_ndarray = generate_sine_wave_ndarray(seconds, samplerate, frequency)
 40 |     return encode_bytes(wave_ndarray, samplerate)
 41 | 
 42 | 
 43 | def encode_base64(wave_bytes: bytes) -> str:
 44 |     return base64.standard_b64encode(wave_bytes).decode("utf-8")
 45 | 
 46 | 
 47 | def generate_sine_wave_base64(seconds: float, samplerate: int, frequency: float) -> str:
 48 |     wave_bytes = generate_sine_wave_bytes(seconds, samplerate, frequency)
 49 |     wave_base64 = encode_base64(wave_bytes)
 50 |     return wave_base64
 51 | 
 52 | 
 53 | class TestConnectBase64Waves(TestCase):
 54 |     def test_connect(self):
 55 |         samplerate = 1000
 56 |         wave = generate_sine_wave_ndarray(
 57 |             seconds=2, samplerate=samplerate, frequency=10
 58 |         )
 59 |         wave_base64 = encode_base64(encode_bytes(wave, samplerate=samplerate))
 60 | 
 61 |         wave_x2_ref = np.concatenate([wave, wave])
 62 | 
 63 |         wave_x2, _ = connect_base64_waves(waves=[wave_base64, wave_base64])
 64 | 
 65 |         self.assertEqual(wave_x2_ref.shape, wave_x2.shape)
 66 | 
 67 |         self.assertTrue((wave_x2_ref == wave_x2).all())
 68 | 
 69 |     def test_no_wave_error(self):
 70 |         self.assertRaises(ConnectBase64WavesException, connect_base64_waves, waves=[])
 71 | 
 72 |     def test_invalid_base64_error(self):
 73 |         wave_1000hz = generate_sine_wave_base64(
 74 |             seconds=2, samplerate=1000, frequency=10
 75 |         )
 76 |         wave_1000hz_broken = wave_1000hz[1:]  # remove head 1 char
 77 | 
 78 |         self.assertRaises(
 79 |             ConnectBase64WavesException,
 80 |             connect_base64_waves,
 81 |             waves=[
 82 |                 wave_1000hz_broken,
 83 |             ],
 84 |         )
 85 | 
 86 |     def test_invalid_wave_file_error(self):
 87 |         wave_1000hz = generate_sine_wave_bytes(seconds=2, samplerate=1000, frequency=10)
 88 |         wave_1000hz_broken_bytes = wave_1000hz[1:]  # remove head 1 byte
 89 |         wave_1000hz_broken = encode_base64(wave_1000hz_broken_bytes)
 90 | 
 91 |         self.assertRaises(
 92 |             ConnectBase64WavesException,
 93 |             connect_base64_waves,
 94 |             waves=[
 95 |                 wave_1000hz_broken,
 96 |             ],
 97 |         )
 98 | 
 99 |     def test_different_frequency(self):
100 |         wave_24000hz = generate_sine_wave_ndarray(
101 |             seconds=1, samplerate=24000, frequency=10
102 |         )
103 |         wave_1000hz = generate_sine_wave_ndarray(
104 |             seconds=2, samplerate=1000, frequency=10
105 |         )
106 |         wave_24000_base64 = encode_base64(encode_bytes(wave_24000hz, samplerate=24000))
107 |         wave_1000_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000))
108 | 
109 |         wave_1000hz_to2400hz = resample(wave_1000hz, 24000 * len(wave_1000hz) // 1000)
110 |         wave_x2_ref = np.concatenate([wave_24000hz, wave_1000hz_to2400hz])
111 | 
112 |         wave_x2, _ = connect_base64_waves(waves=[wave_24000_base64, wave_1000_base64])
113 | 
114 |         self.assertEqual(wave_x2_ref.shape, wave_x2.shape)
115 |         numpy.testing.assert_array_almost_equal(wave_x2_ref, wave_x2)
116 | 
117 |     def test_different_channels(self):
118 |         wave_1000hz = generate_sine_wave_ndarray(
119 |             seconds=2, samplerate=1000, frequency=10
120 |         )
121 |         wave_2ch_1000hz = np.array([wave_1000hz, wave_1000hz]).T
122 |         wave_1ch_base64 = encode_base64(encode_bytes(wave_1000hz, samplerate=1000))
123 |         wave_2ch_base64 = encode_base64(encode_bytes(wave_2ch_1000hz, samplerate=1000))
124 | 
125 |         wave_x2_ref = np.concatenate([wave_2ch_1000hz, wave_2ch_1000hz])
126 | 
127 |         wave_x2, _ = connect_base64_waves(waves=[wave_1ch_base64, wave_2ch_base64])
128 | 
129 |         self.assertEqual(wave_x2_ref.shape, wave_x2.shape)
130 |         self.assertTrue((wave_x2_ref == wave_x2).all())
131 | 


--------------------------------------------------------------------------------
/test/test_mock_synthesis_engine.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | from voicevox_engine.dev.synthesis_engine import MockSynthesisEngine
  4 | from voicevox_engine.kana_parser import create_kana
  5 | from voicevox_engine.model import AccentPhrase, AudioQuery, Mora
  6 | 
  7 | 
  8 | class TestMockSynthesisEngine(TestCase):
  9 |     def setUp(self):
 10 |         super().setUp()
 11 | 
 12 |         self.accent_phrases_hello_hiho = [
 13 |             AccentPhrase(
 14 |                 moras=[
 15 |                     Mora(
 16 |                         text="コ",
 17 |                         consonant="k",
 18 |                         consonant_length=0.0,
 19 |                         vowel="o",
 20 |                         vowel_length=0.0,
 21 |                         pitch=0.0,
 22 |                     ),
 23 |                     Mora(
 24 |                         text="ン",
 25 |                         consonant=None,
 26 |                         consonant_length=None,
 27 |                         vowel="N",
 28 |                         vowel_length=0.0,
 29 |                         pitch=0.0,
 30 |                     ),
 31 |                     Mora(
 32 |                         text="ニ",
 33 |                         consonant="n",
 34 |                         consonant_length=0.0,
 35 |                         vowel="i",
 36 |                         vowel_length=0.0,
 37 |                         pitch=0.0,
 38 |                     ),
 39 |                     Mora(
 40 |                         text="チ",
 41 |                         consonant="ch",
 42 |                         consonant_length=0.0,
 43 |                         vowel="i",
 44 |                         vowel_length=0.0,
 45 |                         pitch=0.0,
 46 |                     ),
 47 |                     Mora(
 48 |                         text="ワ",
 49 |                         consonant="w",
 50 |                         consonant_length=0.0,
 51 |                         vowel="a",
 52 |                         vowel_length=0.0,
 53 |                         pitch=0.0,
 54 |                     ),
 55 |                 ],
 56 |                 accent=5,
 57 |                 pause_mora=Mora(
 58 |                     text="、",
 59 |                     consonant=None,
 60 |                     consonant_length=None,
 61 |                     vowel="pau",
 62 |                     vowel_length=0.0,
 63 |                     pitch=0.0,
 64 |                 ),
 65 |             ),
 66 |             AccentPhrase(
 67 |                 moras=[
 68 |                     Mora(
 69 |                         text="ヒ",
 70 |                         consonant="h",
 71 |                         consonant_length=0.0,
 72 |                         vowel="i",
 73 |                         vowel_length=0.0,
 74 |                         pitch=0.0,
 75 |                     ),
 76 |                     Mora(
 77 |                         text="ホ",
 78 |                         consonant="h",
 79 |                         consonant_length=0.0,
 80 |                         vowel="o",
 81 |                         vowel_length=0.0,
 82 |                         pitch=0.0,
 83 |                     ),
 84 |                     Mora(
 85 |                         text="デ",
 86 |                         consonant="d",
 87 |                         consonant_length=0.0,
 88 |                         vowel="e",
 89 |                         vowel_length=0.0,
 90 |                         pitch=0.0,
 91 |                     ),
 92 |                     Mora(
 93 |                         text="ス",
 94 |                         consonant="s",
 95 |                         consonant_length=0.0,
 96 |                         vowel="U",
 97 |                         vowel_length=0.0,
 98 |                         pitch=0.0,
 99 |                     ),
100 |                 ],
101 |                 accent=1,
102 |                 pause_mora=None,
103 |             ),
104 |         ]
105 |         self.engine = MockSynthesisEngine(speakers="", supported_devices="")
106 | 
107 |     def test_replace_phoneme_length(self):
108 |         self.assertEqual(
109 |             self.engine.replace_phoneme_length(
110 |                 accent_phrases=self.accent_phrases_hello_hiho,
111 |                 speaker_id=0,
112 |             ),
113 |             self.accent_phrases_hello_hiho,
114 |         )
115 | 
116 |     def test_replace_mora_pitch(self):
117 |         self.assertEqual(
118 |             self.engine.replace_mora_pitch(
119 |                 accent_phrases=self.accent_phrases_hello_hiho,
120 |                 speaker_id=0,
121 |             ),
122 |             self.accent_phrases_hello_hiho,
123 |         )
124 | 
125 |     def test_synthesis(self):
126 |         self.engine.synthesis(
127 |             AudioQuery(
128 |                 accent_phrases=self.accent_phrases_hello_hiho,
129 |                 speedScale=1,
130 |                 pitchScale=0,
131 |                 intonationScale=1,
132 |                 volumeScale=1,
133 |                 prePhonemeLength=0.1,
134 |                 postPhonemeLength=0.1,
135 |                 outputSamplingRate=24000,
136 |                 outputStereo=False,
137 |                 kana=create_kana(self.accent_phrases_hello_hiho),
138 |             ),
139 |             speaker_id=0,
140 |         )
141 | 


--------------------------------------------------------------------------------
/test/test_mora_list.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from voicevox_engine.mora_list import openjtalk_mora2text
 4 | 
 5 | 
 6 | class TestOpenJTalkMoraList(TestCase):
 7 |     def test_mora2text(self):
 8 |         self.assertEqual("ッ", openjtalk_mora2text["cl"])
 9 |         self.assertEqual("ティ", openjtalk_mora2text["ti"])
10 |         self.assertEqual("トゥ", openjtalk_mora2text["tu"])
11 |         self.assertEqual("ディ", openjtalk_mora2text["di"])
12 |         # GitHub issue #60
13 |         self.assertEqual("ギェ", openjtalk_mora2text["gye"])
14 |         self.assertEqual("イェ", openjtalk_mora2text["ye"])
15 | 
16 |     def test_mora2text_injective(self):
17 |         """異なるモーラが同じ読みがなに対応しないか確認する"""
18 |         values = list(openjtalk_mora2text.values())
19 |         uniq_values = list(set(values))
20 |         self.assertCountEqual(values, uniq_values)
21 | 


--------------------------------------------------------------------------------
/test/test_mora_to_text.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | # TODO: import from voicevox_engine.synthesis_engine.mora
 4 | from voicevox_engine.synthesis_engine.synthesis_engine_base import mora_to_text
 5 | 
 6 | 
 7 | class TestMoraToText(TestCase):
 8 |     def test_voice(self):
 9 |         self.assertEqual(mora_to_text("a"), "ア")
10 |         self.assertEqual(mora_to_text("i"), "イ")
11 |         self.assertEqual(mora_to_text("ka"), "カ")
12 |         self.assertEqual(mora_to_text("N"), "ン")
13 |         self.assertEqual(mora_to_text("cl"), "ッ")
14 |         self.assertEqual(mora_to_text("gye"), "ギェ")
15 |         self.assertEqual(mora_to_text("ye"), "イェ")
16 |         self.assertEqual(mora_to_text("wo"), "ウォ")
17 | 
18 |     def test_unvoice(self):
19 |         self.assertEqual(mora_to_text("A"), "ア")
20 |         self.assertEqual(mora_to_text("I"), "イ")
21 |         self.assertEqual(mora_to_text("kA"), "カ")
22 |         self.assertEqual(mora_to_text("gyE"), "ギェ")
23 |         self.assertEqual(mora_to_text("yE"), "イェ")
24 |         self.assertEqual(mora_to_text("wO"), "ウォ")
25 | 
26 |     def test_invalid_mora(self):
27 |         """変なモーラが来ても例外を投げない"""
28 |         self.assertEqual(mora_to_text("x"), "x")
29 |         self.assertEqual(mora_to_text(""), "")
30 | 


--------------------------------------------------------------------------------
/test/test_user_dict_model.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from unittest import TestCase
  3 | 
  4 | from pydantic import ValidationError
  5 | 
  6 | from voicevox_engine.kana_parser import parse_kana
  7 | from voicevox_engine.model import UserDictWord
  8 | 
  9 | 
 10 | class TestUserDictWords(TestCase):
 11 |     def setUp(self):
 12 |         self.test_model = {
 13 |             "surface": "テスト",
 14 |             "priority": 0,
 15 |             "part_of_speech": "名詞",
 16 |             "part_of_speech_detail_1": "固有名詞",
 17 |             "part_of_speech_detail_2": "一般",
 18 |             "part_of_speech_detail_3": "*",
 19 |             "inflectional_type": "*",
 20 |             "inflectional_form": "*",
 21 |             "stem": "*",
 22 |             "yomi": "テスト",
 23 |             "pronunciation": "テスト",
 24 |             "accent_type": 0,
 25 |             "accent_associative_rule": "*",
 26 |         }
 27 | 
 28 |     def test_valid_word(self):
 29 |         test_value = deepcopy(self.test_model)
 30 |         try:
 31 |             UserDictWord(**test_value)
 32 |         except ValidationError as e:
 33 |             self.fail(f"Unexpected Validation Error\n{str(e)}")
 34 | 
 35 |     def test_convert_to_zenkaku(self):
 36 |         test_value = deepcopy(self.test_model)
 37 |         test_value["surface"] = "test"
 38 |         self.assertEqual(UserDictWord(**test_value).surface, "ｔｅｓｔ")
 39 | 
 40 |     def test_count_mora(self):
 41 |         test_value = deepcopy(self.test_model)
 42 |         self.assertEqual(UserDictWord(**test_value).mora_count, 3)
 43 | 
 44 |     def test_count_mora_x(self):
 45 |         test_value = deepcopy(self.test_model)
 46 |         for s in [chr(i) for i in range(12449, 12533)]:
 47 |             if s in ["ァ", "ィ", "ゥ", "ェ", "ォ", "ッ", "ャ", "ュ", "ョ", "ヮ"]:
 48 |                 continue
 49 |             for x in "ァィゥェォャュョ":
 50 |                 expected_count = 0
 51 |                 test_value["pronunciation"] = s + x
 52 |                 for accent_phrase in parse_kana(
 53 |                     test_value["pronunciation"] + "'",
 54 |                 ):
 55 |                     expected_count += len(accent_phrase.moras)
 56 |                 with self.subTest(s=s, x=x):
 57 |                     self.assertEqual(
 58 |                         UserDictWord(**test_value).mora_count,
 59 |                         expected_count,
 60 |                     )
 61 | 
 62 |     def test_count_mora_xwa(self):
 63 |         test_value = deepcopy(self.test_model)
 64 |         test_value["pronunciation"] = "クヮンセイ"
 65 |         expected_count = 0
 66 |         for accent_phrase in parse_kana(
 67 |             test_value["pronunciation"] + "'",
 68 |         ):
 69 |             expected_count += len(accent_phrase.moras)
 70 |         self.assertEqual(
 71 |             UserDictWord(**test_value).mora_count,
 72 |             expected_count,
 73 |         )
 74 | 
 75 |     def test_invalid_pronunciation_not_katakana(self):
 76 |         test_value = deepcopy(self.test_model)
 77 |         test_value["pronunciation"] = "ぼいぼ"
 78 |         with self.assertRaises(ValidationError):
 79 |             UserDictWord(**test_value)
 80 | 
 81 |     def test_invalid_pronunciation_invalid_sutegana(self):
 82 |         test_value = deepcopy(self.test_model)
 83 |         test_value["pronunciation"] = "アィウェォ"
 84 |         with self.assertRaises(ValidationError):
 85 |             UserDictWord(**test_value)
 86 | 
 87 |     def test_invalid_pronunciation_invalid_xwa(self):
 88 |         test_value = deepcopy(self.test_model)
 89 |         test_value["pronunciation"] = "アヮ"
 90 |         with self.assertRaises(ValidationError):
 91 |             UserDictWord(**test_value)
 92 | 
 93 |     def test_count_mora_voiced_sound(self):
 94 |         test_value = deepcopy(self.test_model)
 95 |         test_value["pronunciation"] = "ボイボ"
 96 |         self.assertEqual(UserDictWord(**test_value).mora_count, 3)
 97 | 
 98 |     def test_invalid_accent_type(self):
 99 |         test_value = deepcopy(self.test_model)
100 |         test_value["accent_type"] = 4
101 |         with self.assertRaises(ValidationError):
102 |             UserDictWord(**test_value)
103 | 
104 |     def test_invalid_accent_type_2(self):
105 |         test_value = deepcopy(self.test_model)
106 |         test_value["accent_type"] = -1
107 |         with self.assertRaises(ValidationError):
108 |             UserDictWord(**test_value)
109 | 


--------------------------------------------------------------------------------
/test/test_word_types.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from voicevox_engine.model import WordTypes
 4 | from voicevox_engine.part_of_speech_data import part_of_speech_data
 5 | 
 6 | 
 7 | class TestWordTypes(TestCase):
 8 |     def test_word_types(self):
 9 |         self.assertCountEqual(list(WordTypes), list(part_of_speech_data.keys()))
10 | 


--------------------------------------------------------------------------------
/ui_template/ui.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="ja">
  3 |     <head>
  4 |         <meta charset="utf-8" />
  5 |         <title>VOICEVOX Engine 設定</title>
  6 |         <link
  7 |             rel="shortcut icon"
  8 |             href="https://voicevox.hiroshiba.jp/favicon-32x32.png"
  9 |         />
 10 | 
 11 |         <link
 12 |             href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css"
 13 |             rel="stylesheet"
 14 |             integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC"
 15 |             crossorigin="anonymous"
 16 |         />
 17 |         <script
 18 |             src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
 19 |             integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
 20 |             crossorigin="anonymous"
 21 |         ></script>
 22 |     </head>
 23 | 
 24 |     <body>
 25 |         <div class="container p-3">
 26 |             <form method="post">
 27 |                 <div class="alert alert-warning" role="alert">
 28 |                     設定の変更の更新にはエンジンの再起動が必要です。
 29 |                 </div>
 30 | 
 31 |                 <div class="mb-3">
 32 |                     <label class="form-label">CORS Policy Mode</label>
 33 |                     <select
 34 |                         class="form-select"
 35 |                         aria-label="cors_policy_mode"
 36 |                         name="cors_policy_mode"
 37 |                     >
 38 |                         <option selected value="{{ cors_policy_mode }}">
 39 |                             現在値: {{ cors_policy_mode }}
 40 |                         </option>
 41 |                         <option value="localapps">localapps</option>
 42 |                         <option value="all">all</option>
 43 |                     </select>
 44 |                     <div class="form-text">
 45 |                         <p class="mb-1">
 46 |                             allまたはlocalappsを指定。allはすべてを許可します。
 47 |                         </p>
 48 |                         <p class="mb-1">
 49 |                             localappsはオリジン間リソース共有ポリシーを、app://.とlocalhost関連に限定します。
 50 |                         </p>
 51 |                         <p>
 52 |                             その他のオリジンはallow_originオプションで追加できます。デフォルトはlocalapps。
 53 |                         </p>
 54 |                     </div>
 55 |                 </div>
 56 | 
 57 |                 <div class="mb-3">
 58 |                     <label class="form-label">Allow Origin</label>
 59 |                     <input
 60 |                         class="form-control"
 61 |                         type="text"
 62 |                         name="allow_origin"
 63 |                         value="{{ allow_origin }}"
 64 |                     />
 65 |                     <div class="form-text">
 66 |                         許可するオリジンを指定します。複数指定する場合は、直後にスペースで区切って追加できます。
 67 |                     </div>
 68 |                 </div>
 69 | 
 70 |                 <div
 71 |                     class="modal fade"
 72 |                     id="submitModal"
 73 |                     tabindex="-1"
 74 |                     aria-labelledby="submitModalLabel"
 75 |                     aria-hidden="true"
 76 |                 >
 77 |                     <div class="modal-dialog">
 78 |                         <div class="modal-content">
 79 |                             <div class="modal-header">
 80 |                                 <h5 class="modal-title" id="submitModalLabel">
 81 |                                     設定の保存
 82 |                                 </h5>
 83 |                                 <button
 84 |                                     type="button"
 85 |                                     class="btn-close"
 86 |                                     data-bs-dismiss="modal"
 87 |                                     aria-label="Close"
 88 |                                 ></button>
 89 |                             </div>
 90 |                             <div class="modal-body">
 91 |                                 設定を保存します。よろしいですか？
 92 |                             </div>
 93 |                             <div class="modal-footer">
 94 |                                 <button
 95 |                                     type="button"
 96 |                                     class="btn btn-secondary"
 97 |                                     data-bs-dismiss="modal"
 98 |                                 >
 99 |                                     キャンセル
100 |                                 </button>
101 |                                 <button type="submit" class="btn btn-primary">
102 |                                     保存
103 |                                 </button>
104 |                             </div>
105 |                         </div>
106 |                     </div>
107 |                 </div>
108 | 
109 |                 <button
110 |                     type="button"
111 |                     class="btn btn-primary"
112 |                     data-bs-toggle="modal"
113 |                     data-bs-target="#submitModal"
114 |                 >
115 |                     保存
116 |                 </button>
117 |             </form>
118 |         </div>
119 |     </body>
120 | </html>
121 | 


--------------------------------------------------------------------------------
/voicevox_engine/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "latest"
2 | 


--------------------------------------------------------------------------------
/voicevox_engine/acoustic_feature_extractor.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | from enum import Enum
  3 | from pathlib import Path
  4 | from typing import List, Sequence
  5 | 
  6 | import numpy
  7 | 
  8 | 
  9 | class BasePhoneme(object):
 10 |     """
 11 |     音素の応用クラス群の抽象基底クラス
 12 | 
 13 |     Attributes
 14 |     ----------
 15 |     phoneme_list : Sequence[str]
 16 |         音素のリスト
 17 |     num_phoneme : int
 18 |         音素リストの要素数
 19 |     space_phoneme : str
 20 |         読点に値する音素
 21 |     """
 22 | 
 23 |     phoneme_list: Sequence[str]
 24 |     num_phoneme: int
 25 |     space_phoneme: str
 26 | 
 27 |     def __init__(
 28 |         self,
 29 |         phoneme: str,
 30 |         start: float,
 31 |         end: float,
 32 |     ):
 33 |         self.phoneme = phoneme
 34 |         self.start = numpy.round(start, decimals=2)
 35 |         self.end = numpy.round(end, decimals=2)
 36 | 
 37 |     def __repr__(self):
 38 |         return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})"
 39 | 
 40 |     def __eq__(self, o: object):
 41 |         return isinstance(o, BasePhoneme) and (
 42 |             self.phoneme == o.phoneme and self.start == o.start and self.end == o.end
 43 |         )
 44 | 
 45 |     def verify(self):
 46 |         """
 47 |         音素クラスとして、データが正しいかassertする
 48 |         """
 49 |         assert self.phoneme in self.phoneme_list, f"{self.phoneme} is not defined."
 50 | 
 51 |     @property
 52 |     def phoneme_id(self):
 53 |         """
 54 |         phoneme_id (phoneme list内でのindex)を取得する
 55 |         Returns
 56 |         -------
 57 |         id : int
 58 |             phoneme_idを返す
 59 |         """
 60 |         return self.phoneme_list.index(self.phoneme)
 61 | 
 62 |     @property
 63 |     def duration(self):
 64 |         """
 65 |         音素継続期間を取得する
 66 |         Returns
 67 |         -------
 68 |         duration : int
 69 |             音素継続期間を返す
 70 |         """
 71 |         return self.end - self.start
 72 | 
 73 |     @property
 74 |     def onehot(self):
 75 |         """
 76 |         phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す
 77 |         Returns
 78 |         -------
 79 |         onehot : numpu.ndarray
 80 |             関数内で変更された配列を返す
 81 |         """
 82 |         array = numpy.zeros(self.num_phoneme, dtype=bool)
 83 |         array[self.phoneme_id] = True
 84 |         return array
 85 | 
 86 |     @classmethod
 87 |     def parse(cls, s: str):
 88 |         """
 89 |         文字列をパースして音素クラスを作る
 90 |         Parameters
 91 |         ----------
 92 |         s : str
 93 |             パースしたい文字列
 94 | 
 95 |         Returns
 96 |         -------
 97 |         phoneme : BasePhoneme
 98 |             パース結果を用いた音素クラスを返す
 99 | 
100 |         Examples
101 |         --------
102 |         >>> BasePhoneme.parse('1.7425000 1.9125000 o:')
103 |         Phoneme(phoneme='o:', start=1.74, end=1.91)
104 |         """
105 |         words = s.split()
106 |         return cls(
107 |             start=float(words[0]),
108 |             end=float(words[1]),
109 |             phoneme=words[2],
110 |         )
111 | 
112 |     @classmethod
113 |     @abstractmethod
114 |     def convert(cls, phonemes: List["BasePhoneme"]) -> List["BasePhoneme"]:
115 |         raise NotImplementedError
116 | 
117 |     @classmethod
118 |     def load_lab_list(cls, path: Path):
119 |         """
120 |         labファイルを読み込む
121 |         Parameters
122 |         ----------
123 |         path : Path
124 |             読み込みたいlabファイルのパス
125 | 
126 |         Returns
127 |         -------
128 |         phonemes : List[BasePhoneme]
129 |             パース結果を用いた音素クラスを返す
130 |         """
131 |         phonemes = [cls.parse(s) for s in path.read_text().split("\n") if len(s) > 0]
132 |         phonemes = cls.convert(phonemes)
133 | 
134 |         for phoneme in phonemes:
135 |             phoneme.verify()
136 |         return phonemes
137 | 
138 |     @classmethod
139 |     def save_lab_list(cls, phonemes: List["BasePhoneme"], path: Path):
140 |         """
141 |         音素クラスのリストをlabファイル形式で保存する
142 |         Parameters
143 |         ----------
144 |         phonemes : List[BasePhoneme]
145 |             保存したい音素クラスのリスト
146 |         path : Path
147 |             labファイルの保存先パス
148 |         """
149 |         text = "\n".join(
150 |             [
151 |                 f"{numpy.round(p.start, decimals=2):.2f}\t"
152 |                 f"{numpy.round(p.end, decimals=2):.2f}\t"
153 |                 f"{p.phoneme}"
154 |                 for p in phonemes
155 |             ]
156 |         )
157 |         path.write_text(text)
158 | 
159 | 
160 | class JvsPhoneme(BasePhoneme):
161 |     """
162 |     JVS(Japanese versatile speech)コーパスに含まれる音素群クラス
163 | 
164 |     Attributes
165 |     ----------
166 |     phoneme_list : Sequence[str]
167 |         音素のリスト
168 |     num_phoneme : int
169 |         音素リストの要素数
170 |     space_phoneme : str
171 |         読点に値する音素
172 |     """
173 | 
174 |     phoneme_list = (
175 |         "pau",
176 |         "I",
177 |         "N",
178 |         "U",
179 |         "a",
180 |         "b",
181 |         "by",
182 |         "ch",
183 |         "cl",
184 |         "d",
185 |         "dy",
186 |         "e",
187 |         "f",
188 |         "g",
189 |         "gy",
190 |         "h",
191 |         "hy",
192 |         "i",
193 |         "j",
194 |         "k",
195 |         "ky",
196 |         "m",
197 |         "my",
198 |         "n",
199 |         "ny",
200 |         "o",
201 |         "p",
202 |         "py",
203 |         "r",
204 |         "ry",
205 |         "s",
206 |         "sh",
207 |         "t",
208 |         "ts",
209 |         "u",
210 |         "v",
211 |         "w",
212 |         "y",
213 |         "z",
214 |     )
215 |     num_phoneme = len(phoneme_list)
216 |     space_phoneme = "pau"
217 | 
218 |     @classmethod
219 |     def convert(cls, phonemes: List["JvsPhoneme"]) -> List["JvsPhoneme"]:
220 |         """
221 |         最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する
222 |         Parameters
223 |         ----------
224 |         phonemes : List[JvsPhoneme]
225 |             変換したいphonemeのリスト
226 | 
227 |         Returns
228 |         -------
229 |         phonemes : List[JvsPhoneme]
230 |             変換されたphonemeのリスト
231 |         """
232 |         if "sil" in phonemes[0].phoneme:
233 |             phonemes[0].phoneme = cls.space_phoneme
234 |         if "sil" in phonemes[-1].phoneme:
235 |             phonemes[-1].phoneme = cls.space_phoneme
236 |         return phonemes
237 | 
238 | 
239 | class OjtPhoneme(BasePhoneme):
240 |     """
241 |     OpenJTalkに含まれる音素群クラス
242 | 
243 |     Attributes
244 |     ----------
245 |     phoneme_list : Sequence[str]
246 |         音素のリスト
247 |     num_phoneme : int
248 |         音素リストの要素数
249 |     space_phoneme : str
250 |         読点に値する音素
251 |     """
252 | 
253 |     phoneme_list = (
254 |         "pau",
255 |         "A",
256 |         "E",
257 |         "I",
258 |         "N",
259 |         "O",
260 |         "U",
261 |         "a",
262 |         "b",
263 |         "by",
264 |         "ch",
265 |         "cl",
266 |         "d",
267 |         "dy",
268 |         "e",
269 |         "f",
270 |         "g",
271 |         "gw",
272 |         "gy",
273 |         "h",
274 |         "hy",
275 |         "i",
276 |         "j",
277 |         "k",
278 |         "kw",
279 |         "ky",
280 |         "m",
281 |         "my",
282 |         "n",
283 |         "ny",
284 |         "o",
285 |         "p",
286 |         "py",
287 |         "r",
288 |         "ry",
289 |         "s",
290 |         "sh",
291 |         "t",
292 |         "ts",
293 |         "ty",
294 |         "u",
295 |         "v",
296 |         "w",
297 |         "y",
298 |         "z",
299 |     )
300 |     num_phoneme = len(phoneme_list)
301 |     space_phoneme = "pau"
302 | 
303 |     @classmethod
304 |     def convert(cls, phonemes: List["OjtPhoneme"]):
305 |         """
306 |         最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する
307 |         Parameters
308 |         ----------
309 |         phonemes : List[OjtPhoneme]
310 |             変換したいphonemeのリスト
311 | 
312 |         Returns
313 |         -------
314 |         phonemes : List[OjtPhoneme]
315 |             変換されたphonemeのリスト
316 |         """
317 |         if "sil" in phonemes[0].phoneme:
318 |             phonemes[0].phoneme = cls.space_phoneme
319 |         if "sil" in phonemes[-1].phoneme:
320 |             phonemes[-1].phoneme = cls.space_phoneme
321 |         return phonemes
322 | 
323 | 
324 | class PhonemeType(str, Enum):
325 |     jvs = "jvs"
326 |     openjtalk = "openjtalk"
327 | 
328 | 
329 | phoneme_type_to_class = {
330 |     PhonemeType.jvs: JvsPhoneme,
331 |     PhonemeType.openjtalk: OjtPhoneme,
332 | }
333 | 


--------------------------------------------------------------------------------
/voicevox_engine/cancellable_engine.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import asyncio
  3 | import queue
  4 | from distutils.version import LooseVersion
  5 | from multiprocessing import Pipe, Process
  6 | from multiprocessing.connection import Connection
  7 | from tempfile import NamedTemporaryFile
  8 | from typing import List, Optional, Tuple
  9 | 
 10 | import soundfile
 11 | 
 12 | # FIXME: remove FastAPI dependency
 13 | from fastapi import HTTPException, Request
 14 | 
 15 | from .model import AudioQuery
 16 | from .synthesis_engine import make_synthesis_engines
 17 | 
 18 | 
 19 | class CancellableEngine:
 20 |     """
 21 |     音声合成のキャンセル機能に関するクラス
 22 |     初期化後は、synthesis関数で音声合成できる
 23 |     （オリジナルと比べ引数が増えているので注意）
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     watch_con_list: List[Tuple[Request, Process]]
 28 |         Requestは接続の監視に使用され、Processは通信切断時のプロセスキルに使用される
 29 |         クライアントから接続があるとListにTupleが追加される
 30 |         接続が切断、もしくは音声合成が終了すると削除される
 31 |     procs_and_cons: queue.Queue[Tuple[Process, Connection]]
 32 |         音声合成の準備が終わっているプロセスのList
 33 |         （音声合成中のプロセスは入っていない）
 34 |     """
 35 | 
 36 |     def __init__(self, args: argparse.Namespace) -> None:
 37 |         """
 38 |         変数の初期化を行う
 39 |         また、args.init_processesの数だけプロセスを起動し、procs_and_consに格納する
 40 |         """
 41 |         self.args = args
 42 |         if not self.args.enable_cancellable_synthesis:
 43 |             raise HTTPException(
 44 |                 status_code=404,
 45 |                 detail="実験的機能はデフォルトで無効になっています。使用するには引数を指定してください。",
 46 |             )
 47 | 
 48 |         self.watch_con_list: List[Tuple[Request, Process]] = []
 49 |         self.procs_and_cons: queue.Queue[Tuple[Process, Connection]] = queue.Queue()
 50 |         for _ in range(self.args.init_processes):
 51 |             self.procs_and_cons.put(self.start_new_proc())
 52 | 
 53 |     def start_new_proc(
 54 |         self,
 55 |     ) -> Tuple[Process, Connection]:
 56 |         """
 57 |         新しく開始したプロセスを返す関数
 58 | 
 59 |         Returns
 60 |         -------
 61 |         ret_proc: Process
 62 |             新規のプロセス
 63 |         sub_proc_con1: Connection
 64 |             ret_procのプロセスと通信するためのPipe
 65 |         """
 66 |         sub_proc_con1, sub_proc_con2 = Pipe(True)
 67 |         ret_proc = Process(
 68 |             target=start_synthesis_subprocess,
 69 |             kwargs={
 70 |                 "args": self.args,
 71 |                 "sub_proc_con": sub_proc_con2,
 72 |             },
 73 |             daemon=True,
 74 |         )
 75 |         ret_proc.start()
 76 |         return ret_proc, sub_proc_con1
 77 | 
 78 |     def finalize_con(
 79 |         self,
 80 |         req: Request,
 81 |         proc: Process,
 82 |         sub_proc_con: Optional[Connection],
 83 |     ) -> None:
 84 |         """
 85 |         接続が切断された時の処理を行う関数
 86 |         watch_con_listからの削除、プロセスの後処理を行う
 87 |         プロセスが生きている場合はそのままprocs_and_consに加える
 88 |         死んでいる場合は新しく生成したものをprocs_and_consに加える
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         req: fastapi.Request
 93 |             接続確立時に受け取ったものをそのまま渡せばよい
 94 |             https://fastapi.tiangolo.com/advanced/using-request-directly/
 95 |         proc: Process
 96 |             音声合成を行っていたプロセス
 97 |         sub_proc_con: Connection, optional
 98 |             音声合成を行っていたプロセスとのPipe
 99 |             指定されていない場合、プロセスは再利用されず終了される
100 |         """
101 |         try:
102 |             self.watch_con_list.remove((req, proc))
103 |         except ValueError:
104 |             pass
105 |         try:
106 |             if not proc.is_alive() or sub_proc_con is None:
107 |                 proc.close()
108 |                 raise ValueError
109 |             # プロセスが死んでいない場合は再利用する
110 |             self.procs_and_cons.put((proc, sub_proc_con))
111 |         except ValueError:
112 |             # プロセスが死んでいるので新しく作り直す
113 |             self.procs_and_cons.put(self.start_new_proc())
114 | 
115 |     def _synthesis_impl(
116 |         self,
117 |         query: AudioQuery,
118 |         speaker_id: int,
119 |         request: Request,
120 |         core_version: Optional[str],
121 |     ) -> str:
122 |         """
123 |         音声合成を行う関数
124 |         通常エンジンの引数に比べ、requestが必要になっている
125 |         また、返り値がファイル名になっている
126 | 
127 |         Parameters
128 |         ----------
129 |         query: AudioQuery
130 |         speaker_id: int
131 |         request: fastapi.Request
132 |             接続確立時に受け取ったものをそのまま渡せばよい
133 |             https://fastapi.tiangolo.com/advanced/using-request-directly/
134 |         core_version: str
135 | 
136 |         Returns
137 |         -------
138 |         f_name: str
139 |             生成された音声ファイルの名前
140 |         """
141 |         proc, sub_proc_con1 = self.procs_and_cons.get()
142 |         self.watch_con_list.append((request, proc))
143 |         try:
144 |             sub_proc_con1.send((query, speaker_id, core_version))
145 |             f_name = sub_proc_con1.recv()
146 |         except EOFError:
147 |             raise HTTPException(status_code=422, detail="既にサブプロセスは終了されています")
148 |         except Exception:
149 |             self.finalize_con(request, proc, sub_proc_con1)
150 |             raise
151 | 
152 |         self.finalize_con(request, proc, sub_proc_con1)
153 |         return f_name
154 | 
155 |     async def catch_disconnection(self):
156 |         """
157 |         接続監視を行うコルーチン
158 |         """
159 |         while True:
160 |             await asyncio.sleep(1)
161 |             for con in self.watch_con_list:
162 |                 req, proc = con
163 |                 if await req.is_disconnected():
164 |                     try:
165 |                         if proc.is_alive():
166 |                             proc.terminate()
167 |                             proc.join()
168 |                         proc.close()
169 |                     except ValueError:
170 |                         pass
171 |                     finally:
172 |                         self.finalize_con(req, proc, None)
173 | 
174 | 
175 | def start_synthesis_subprocess(
176 |     args: argparse.Namespace,
177 |     sub_proc_con: Connection,
178 | ):
179 |     """
180 |     音声合成を行うサブプロセスで行うための関数
181 |     pickle化の関係でグローバルに書いている
182 | 
183 |     Parameters
184 |     ----------
185 |     args: argparse.Namespace
186 |         起動時に作られたものをそのまま渡す
187 |     sub_proc_con: Connection
188 |         メインプロセスと通信するためのPipe
189 |     """
190 | 
191 |     synthesis_engines = make_synthesis_engines(
192 |         use_gpu=args.use_gpu,
193 |         voicelib_dirs=args.voicelib_dir,
194 |         voicevox_dir=args.voicevox_dir,
195 |         runtime_dirs=args.runtime_dir,
196 |         cpu_num_threads=args.cpu_num_threads,
197 |         enable_mock=args.enable_mock,
198 |     )
199 |     assert len(synthesis_engines) != 0, "音声合成エンジンがありません。"
200 |     latest_core_version = str(max([LooseVersion(ver) for ver in synthesis_engines]))
201 |     while True:
202 |         try:
203 |             query, speaker_id, core_version = sub_proc_con.recv()
204 |             if core_version is None:
205 |                 _engine = synthesis_engines[latest_core_version]
206 |             elif core_version in synthesis_engines:
207 |                 _engine = synthesis_engines[core_version]
208 |             else:
209 |                 # バージョンが見つからないエラー
210 |                 sub_proc_con.send("")
211 |                 continue
212 |             wave = _engine._synthesis_impl(query, speaker_id)
213 |             with NamedTemporaryFile(delete=False) as f:
214 |                 soundfile.write(
215 |                     file=f, data=wave, samplerate=query.outputSamplingRate, format="WAV"
216 |                 )
217 |             sub_proc_con.send(f.name)
218 |         except Exception:
219 |             sub_proc_con.close()
220 |             raise
221 | 


--------------------------------------------------------------------------------
/voicevox_engine/dev/core/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mock import (
 2 |     decode_forward,
 3 |     initialize,
 4 |     metas,
 5 |     supported_devices,
 6 |     yukarin_s_forward,
 7 |     yukarin_sa_forward,
 8 | )
 9 | 
10 | __all__ = [
11 |     "decode_forward",
12 |     "initialize",
13 |     "yukarin_s_forward",
14 |     "yukarin_sa_forward",
15 |     "metas",
16 |     "supported_devices",
17 | ]
18 | 


--------------------------------------------------------------------------------
/voicevox_engine/dev/core/mock.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from logging import getLogger
  3 | from typing import Any, Dict, List
  4 | 
  5 | import numpy as np
  6 | from pyopenjtalk import tts
  7 | from scipy.signal import resample
  8 | 
  9 | DUMMY_TEXT = "これはダミーのテキストです"
 10 | 
 11 | 
 12 | def initialize(path: str, use_gpu: bool, *args: List[Any]) -> None:
 13 |     pass
 14 | 
 15 | 
 16 | def yukarin_s_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
 17 |     logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
 18 |     logger.info(
 19 |         "Sorry, yukarin_s_forward() is a mock. Return values are incorrect.",
 20 |     )
 21 |     return np.ones(length) / 5
 22 | 
 23 | 
 24 | def yukarin_sa_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
 25 |     logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
 26 |     logger.info(
 27 |         "Sorry, yukarin_sa_forward() is a mock. Return values are incorrect.",
 28 |     )
 29 |     return np.ones((1, length)) * 5
 30 | 
 31 | 
 32 | def decode_forward(length: int, **kwargs: Dict[str, Any]) -> np.ndarray:
 33 |     """
 34 |     合成音声の波形データをNumPy配列で返します。ただし、常に固定の文言を読み上げます（DUMMY_TEXT）
 35 |     参照→SynthesisEngine のdocstring [Mock]
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     length : int
 40 |         フレームの長さ
 41 | 
 42 |     Returns
 43 |     -------
 44 |     wave : np.ndarray
 45 |         音声合成した波形データ
 46 | 
 47 |     Note
 48 |     -------
 49 |         ここで行う音声合成では、調声（ピッチ等）を反映しない
 50 |         また、入力内容によらず常に固定の文言を読み上げる
 51 | 
 52 |         # pyopenjtalk.tts()の出力仕様
 53 |         dtype=np.float64, 16 bit, mono 48000 Hz
 54 | 
 55 |         # resampleの説明
 56 |         非モックdecode_forwardと合わせるために、出力を24kHzに変換した。
 57 |     """
 58 |     logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
 59 |     logger.info(
 60 |         "Sorry, decode_forward() is a mock. Return values are incorrect.",
 61 |     )
 62 |     wave, sr = tts(DUMMY_TEXT)
 63 |     wave = resample(
 64 |         wave.astype("int16"),
 65 |         24000 * len(wave) // 48000,
 66 |     )
 67 |     return wave
 68 | 
 69 | 
 70 | def metas() -> str:
 71 |     return json.dumps(
 72 |         [
 73 |             {
 74 |                 "name": "dummy1",
 75 |                 "styles": [
 76 |                     {"name": "style0", "id": 0},
 77 |                     {"name": "style1", "id": 2},
 78 |                     {"name": "style2", "id": 4},
 79 |                     {"name": "style3", "id": 6},
 80 |                 ],
 81 |                 "speaker_uuid": "7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff",
 82 |                 "version": "mock",
 83 |             },
 84 |             {
 85 |                 "name": "dummy2",
 86 |                 "styles": [
 87 |                     {"name": "style0", "id": 1},
 88 |                     {"name": "style1", "id": 3},
 89 |                     {"name": "style2", "id": 5},
 90 |                     {"name": "style3", "id": 7},
 91 |                 ],
 92 |                 "speaker_uuid": "388f246b-8c41-4ac1-8e2d-5d79f3ff56d9",
 93 |                 "version": "mock",
 94 |             },
 95 |             {
 96 |                 "name": "dummy3",
 97 |                 "styles": [
 98 |                     {"name": "style0", "id": 8},
 99 |                 ],
100 |                 "speaker_uuid": "35b2c544-660e-401e-b503-0e14c635303a",
101 |                 "version": "mock",
102 |             },
103 |             {
104 |                 "name": "dummy4",
105 |                 "styles": [
106 |                     {"name": "style0", "id": 9},
107 |                 ],
108 |                 "speaker_uuid": "b1a81618-b27b-40d2-b0ea-27a9ad408c4b",
109 |                 "version": "mock",
110 |             },
111 |         ]
112 |     )
113 | 
114 | 
115 | def supported_devices() -> str:
116 |     return json.dumps(
117 |         {
118 |             "cpu": True,
119 |             "cuda": False,
120 |         }
121 |     )
122 | 


--------------------------------------------------------------------------------
/voicevox_engine/dev/synthesis_engine/__init__.py:
--------------------------------------------------------------------------------
1 | from .mock import MockSynthesisEngine
2 | 
3 | __all__ = ["MockSynthesisEngine"]
4 | 


--------------------------------------------------------------------------------
/voicevox_engine/dev/synthesis_engine/mock.py:
--------------------------------------------------------------------------------
  1 | from logging import getLogger
  2 | from typing import Any, Dict, List, Optional
  3 | 
  4 | import numpy as np
  5 | from pyopenjtalk import tts
  6 | from scipy.signal import resample
  7 | 
  8 | from ...model import AccentPhrase, AudioQuery
  9 | from ...synthesis_engine import SynthesisEngineBase
 10 | from ...synthesis_engine.synthesis_engine import to_flatten_moras
 11 | 
 12 | 
 13 | class MockSynthesisEngine(SynthesisEngineBase):
 14 |     """
 15 |     SynthesisEngine [Mock]
 16 |     """
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         speakers: str,
 21 |         supported_devices: Optional[str] = None,
 22 |     ):
 23 |         """
 24 |         __init__ [Mock]
 25 |         """
 26 |         super().__init__()
 27 | 
 28 |         self._speakers = speakers
 29 |         self._supported_devices = supported_devices
 30 |         self.default_sampling_rate = 24000
 31 | 
 32 |     @property
 33 |     def speakers(self) -> str:
 34 |         return self._speakers
 35 | 
 36 |     @property
 37 |     def supported_devices(self) -> Optional[str]:
 38 |         return self._supported_devices
 39 | 
 40 |     def replace_phoneme_length(
 41 |         self, accent_phrases: List[AccentPhrase], speaker_id: int
 42 |     ) -> List[AccentPhrase]:
 43 |         """
 44 |         replace_phoneme_length 入力accent_phrasesを変更せずにそのまま返します [Mock]
 45 | 
 46 |         Parameters
 47 |         ----------
 48 |         accent_phrases : List[AccentPhrase]
 49 |             フレーズ句のリスト
 50 |         speaker_id : int
 51 |             話者
 52 | 
 53 |         Returns
 54 |         -------
 55 |         List[AccentPhrase]
 56 |             フレーズ句のリスト（変更なし）
 57 |         """
 58 |         return accent_phrases
 59 | 
 60 |     def replace_mora_pitch(
 61 |         self, accent_phrases: List[AccentPhrase], speaker_id: int
 62 |     ) -> List[AccentPhrase]:
 63 |         """
 64 |         replace_mora_pitch 入力accent_phrasesを変更せずにそのまま返します [Mock]
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         accent_phrases : List[AccentPhrase]
 69 |             フレーズ句のリスト
 70 |         speaker_id : int
 71 |             話者
 72 | 
 73 |         Returns
 74 |         -------
 75 |         List[AccentPhrase]
 76 |             フレーズ句のリスト（変更なし）
 77 |         """
 78 |         return accent_phrases
 79 | 
 80 |     def _synthesis_impl(self, query: AudioQuery, speaker_id: int) -> np.ndarray:
 81 |         """
 82 |         synthesis voicevox coreを使わずに、音声合成する [Mock]
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         query : AudioQuery
 87 |             /audio_query APIで得たjson
 88 |         speaker_id : int
 89 |             話者
 90 | 
 91 |         Returns
 92 |         -------
 93 |         wave [npt.NDArray[np.int16]]
 94 |             音声波形データをNumPy配列で返します
 95 |         """
 96 |         # recall text in katakana
 97 |         flatten_moras = to_flatten_moras(query.accent_phrases)
 98 |         kana_text = "".join([mora.text for mora in flatten_moras])
 99 | 
100 |         wave = self.forward(kana_text)
101 | 
102 |         # volume
103 |         wave *= query.volumeScale
104 | 
105 |         return wave.astype("int16")
106 | 
107 |     def forward(self, text: str, **kwargs: Dict[str, Any]) -> np.ndarray:
108 |         """
109 |         forward tts via pyopenjtalk.tts()
110 |         参照→SynthesisEngine のdocstring [Mock]
111 | 
112 |         Parameters
113 |         ----------
114 |         text : str
115 |             入力文字列（例：読み上げたい文章をカタカナにした文字列、等）
116 | 
117 |         Returns
118 |         -------
119 |         wave [npt.NDArray[np.int16]]
120 |             音声波形データをNumPy配列で返します
121 | 
122 |         Note
123 |         -------
124 |         ここで行う音声合成では、調声（ピッチ等）を反映しない
125 | 
126 |         # pyopenjtalk.tts()の出力仕様
127 |         dtype=np.float64, 16 bit, mono 48000 Hz
128 | 
129 |         # resampleの説明
130 |         非モック実装（decode_forward）と合わせるために、出力を24kHzに変換した。
131 |         """
132 |         logger = getLogger("uvicorn")  # FastAPI / Uvicorn 内からの利用のため
133 |         logger.info("[Mock] input text: %s" % text)
134 |         wave, sr = tts(text)
135 |         wave = resample(wave, 24000 * len(wave) // 48000)
136 |         return wave
137 | 


--------------------------------------------------------------------------------
/voicevox_engine/engine_manifest/EngineManifest.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class UpdateInfo(BaseModel):
 7 |     """
 8 |     エンジンのアップデート情報
 9 |     """
10 | 
11 |     version: str = Field(title="エンジンのバージョン名")
12 |     descriptions: List[str] = Field(title="アップデートの詳細についての説明")
13 |     contributors: Optional[List[str]] = Field(title="貢献者名")
14 | 
15 | 
16 | class LicenseInfo(BaseModel):
17 |     """
18 |     依存ライブラリのライセンス情報
19 |     """
20 | 
21 |     name: str = Field(title="依存ライブラリ名")
22 |     version: Optional[str] = Field(title="依存ライブラリのバージョン")
23 |     license: Optional[str] = Field(title="依存ライブラリのライセンス名")
24 |     text: str = Field(title="依存ライブラリのライセンス本文")
25 | 
26 | 
27 | class SupportedFeatures(BaseModel):
28 |     """
29 |     エンジンが持つ機能の一覧
30 |     """
31 | 
32 |     adjust_mora_pitch: bool = Field(title="モーラごとの音高の調整")
33 |     adjust_phoneme_length: bool = Field(title="音素ごとの長さの調整")
34 |     adjust_speed_scale: bool = Field(title="全体の話速の調整")
35 |     adjust_pitch_scale: bool = Field(title="全体の音高の調整")
36 |     adjust_intonation_scale: bool = Field(title="全体の抑揚の調整")
37 |     adjust_volume_scale: bool = Field(title="全体の音量の調整")
38 |     interrogative_upspeak: bool = Field(title="疑問文の自動調整")
39 |     synthesis_morphing: bool = Field(title="2人の話者でモーフィングした音声を合成")
40 | 
41 | 
42 | class EngineManifest(BaseModel):
43 |     """
44 |     エンジン自体に関する情報
45 |     """
46 | 
47 |     manifest_version: str = Field(title="マニフェストのバージョン")
48 |     name: str = Field(title="エンジン名")
49 |     brand_name: str = Field(title="ブランド名")
50 |     uuid: str = Field(title="エンジンのUUID")
51 |     url: str = Field(title="エンジンのURL")
52 |     icon: str = Field(title="エンジンのアイコンをBASE64エンコードしたもの")
53 |     default_sampling_rate: int = Field(title="デフォルトのサンプリング周波数")
54 |     terms_of_service: str = Field(title="エンジンの利用規約")
55 |     update_infos: List[UpdateInfo] = Field(title="エンジンのアップデート情報")
56 |     dependency_licenses: List[LicenseInfo] = Field(title="依存関係のライセンス情報")
57 |     downloadable_libraries_path: Optional[str] = Field(
58 |         title="ダウンロード可能な音声ライブラリ情報を取得するためのローカルjsonパス"
59 |     )
60 |     downloadable_libraries_url: Optional[str] = Field(
61 |         title="ダウンロード可能な音声ライブラリ情報を取得するためのAPIのURL"
62 |     )
63 |     supported_features: SupportedFeatures = Field(title="エンジンが持つ機能")
64 | 


--------------------------------------------------------------------------------
/voicevox_engine/engine_manifest/EngineManifestLoader.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from base64 import b64encode
 3 | from pathlib import Path
 4 | 
 5 | from .EngineManifest import EngineManifest, LicenseInfo, UpdateInfo
 6 | 
 7 | 
 8 | class EngineManifestLoader:
 9 |     def __init__(self, manifest_path: Path, root_dir: Path):
10 |         self.manifest_path = manifest_path
11 |         self.root_dir = root_dir
12 | 
13 |     def load_manifest(self) -> EngineManifest:
14 |         manifest = json.loads(self.manifest_path.read_text(encoding="utf-8"))
15 | 
16 |         manifest = EngineManifest(
17 |             manifest_version=manifest["manifest_version"],
18 |             name=manifest["name"],
19 |             brand_name=manifest["brand_name"],
20 |             uuid=manifest["uuid"],
21 |             url=manifest["url"],
22 |             default_sampling_rate=manifest["default_sampling_rate"],
23 |             icon=b64encode((self.root_dir / manifest["icon"]).read_bytes()).decode(
24 |                 "utf-8"
25 |             ),
26 |             terms_of_service=(self.root_dir / manifest["terms_of_service"]).read_text(
27 |                 "utf-8"
28 |             ),
29 |             update_infos=[
30 |                 UpdateInfo(**update_info)
31 |                 for update_info in json.loads(
32 |                     (self.root_dir / manifest["update_infos"]).read_text("utf-8")
33 |                 )
34 |             ],
35 |             dependency_licenses=[
36 |                 LicenseInfo(**license_info)
37 |                 for license_info in json.loads(
38 |                     (self.root_dir / manifest["dependency_licenses"]).read_text("utf-8")
39 |                 )
40 |             ],
41 |             downloadable_libraries_path=manifest["downloadable_libraries_path"],
42 |             downloadable_libraries_url=manifest["downloadable_libraries_url"],
43 |             supported_features={
44 |                 key: item["value"]
45 |                 for key, item in manifest["supported_features"].items()
46 |             },
47 |         )
48 |         return manifest
49 | 


--------------------------------------------------------------------------------
/voicevox_engine/engine_manifest/__init__.py:
--------------------------------------------------------------------------------
1 | from .EngineManifest import EngineManifest
2 | from .EngineManifestLoader import EngineManifestLoader
3 | 
4 | __all__ = [
5 |     "EngineManifest",
6 |     "EngineManifestLoader",
7 | ]
8 | 


--------------------------------------------------------------------------------
/voicevox_engine/kana_parser.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode
  4 | from .mora_list import openjtalk_text2mora
  5 | 
  6 | LOOP_LIMIT = 300
  7 | UNVOICE_SYMBOL = "_"
  8 | ACCENT_SYMBOL = "'"
  9 | NOPAUSE_DELIMITER = "/"
 10 | PAUSE_DELIMITER = "、"
 11 | WIDE_INTERROGATION_MARK = "？"
 12 | 
 13 | text2mora_with_unvoice = {}
 14 | for text, (consonant, vowel) in openjtalk_text2mora.items():
 15 |     text2mora_with_unvoice[text] = Mora(
 16 |         text=text,
 17 |         consonant=consonant if len(consonant) > 0 else None,
 18 |         consonant_length=0 if len(consonant) > 0 else None,
 19 |         vowel=vowel,
 20 |         vowel_length=0,
 21 |         pitch=0,
 22 |         is_interrogative=False,
 23 |     )
 24 |     if vowel in ["a", "i", "u", "e", "o"]:
 25 |         text2mora_with_unvoice[UNVOICE_SYMBOL + text] = Mora(
 26 |             text=text,
 27 |             consonant=consonant if len(consonant) > 0 else None,
 28 |             consonant_length=0 if len(consonant) > 0 else None,
 29 |             vowel=vowel.upper(),
 30 |             vowel_length=0,
 31 |             pitch=0,
 32 |             is_interrogative=False,
 33 |         )
 34 | 
 35 | 
 36 | def _text_to_accent_phrase(phrase: str) -> AccentPhrase:
 37 |     """
 38 |     longest matchにより読み仮名からAccentPhraseを生成
 39 |     入力長Nに対し計算量O(N^2)
 40 |     """
 41 |     accent_index: Optional[int] = None
 42 |     moras: List[Mora] = []
 43 | 
 44 |     base_index = 0  # パース開始位置。ここから右の文字列をstackに詰めていく。
 45 |     stack = ""  # 保留中の文字列
 46 |     matched_text: Optional[str] = None  # 保留中の文字列内で最後にマッチした仮名
 47 | 
 48 |     outer_loop = 0
 49 |     while base_index < len(phrase):
 50 |         outer_loop += 1
 51 |         if phrase[base_index] == ACCENT_SYMBOL:
 52 |             if len(moras) == 0:
 53 |                 raise ParseKanaError(ParseKanaErrorCode.ACCENT_TOP, text=phrase)
 54 |             if accent_index is not None:
 55 |                 raise ParseKanaError(ParseKanaErrorCode.ACCENT_TWICE, text=phrase)
 56 |             accent_index = len(moras)
 57 |             base_index += 1
 58 |             continue
 59 |         for watch_index in range(base_index, len(phrase)):
 60 |             if phrase[watch_index] == ACCENT_SYMBOL:
 61 |                 break
 62 |             # 普通の文字の場合
 63 |             stack += phrase[watch_index]
 64 |             if stack in text2mora_with_unvoice:
 65 |                 matched_text = stack
 66 |         # push mora
 67 |         if matched_text is None:
 68 |             raise ParseKanaError(ParseKanaErrorCode.UNKNOWN_TEXT, text=stack)
 69 |         else:
 70 |             moras.append(text2mora_with_unvoice[matched_text].copy(deep=True))
 71 |             base_index += len(matched_text)
 72 |             stack = ""
 73 |             matched_text = None
 74 |         if outer_loop > LOOP_LIMIT:
 75 |             raise ParseKanaError(ParseKanaErrorCode.INFINITE_LOOP)
 76 |     if accent_index is None:
 77 |         raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase)
 78 |     else:
 79 |         return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None)
 80 | 
 81 | 
 82 | def parse_kana(text: str) -> List[AccentPhrase]:
 83 |     """
 84 |     AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換
 85 |     """
 86 | 
 87 |     parsed_results: List[AccentPhrase] = []
 88 |     phrase_base = 0
 89 |     if len(text) == 0:
 90 |         raise ParseKanaError(ParseKanaErrorCode.EMPTY_PHRASE, position=1)
 91 | 
 92 |     for i in range(len(text) + 1):
 93 |         if i == len(text) or text[i] in [PAUSE_DELIMITER, NOPAUSE_DELIMITER]:
 94 |             phrase = text[phrase_base:i]
 95 |             if len(phrase) == 0:
 96 |                 raise ParseKanaError(
 97 |                     ParseKanaErrorCode.EMPTY_PHRASE,
 98 |                     position=str(len(parsed_results) + 1),
 99 |                 )
100 |             phrase_base = i + 1
101 | 
102 |             is_interrogative = WIDE_INTERROGATION_MARK in phrase
103 |             if is_interrogative:
104 |                 if WIDE_INTERROGATION_MARK in phrase[:-1]:
105 |                     raise ParseKanaError(
106 |                         ParseKanaErrorCode.INTERROGATION_MARK_NOT_AT_END, text=phrase
107 |                     )
108 |                 phrase = phrase.replace(WIDE_INTERROGATION_MARK, "")
109 | 
110 |             accent_phrase: AccentPhrase = _text_to_accent_phrase(phrase)
111 |             if i < len(text) and text[i] == PAUSE_DELIMITER:
112 |                 accent_phrase.pause_mora = Mora(
113 |                     text="、",
114 |                     consonant=None,
115 |                     consonant_length=None,
116 |                     vowel="pau",
117 |                     vowel_length=0,
118 |                     pitch=0,
119 |                 )
120 |             accent_phrase.is_interrogative = is_interrogative
121 | 
122 |             parsed_results.append(accent_phrase)
123 | 
124 |     return parsed_results
125 | 
126 | 
127 | def create_kana(accent_phrases: List[AccentPhrase]) -> str:
128 |     text = ""
129 |     for i, phrase in enumerate(accent_phrases):
130 |         for j, mora in enumerate(phrase.moras):
131 |             if mora.vowel in ["A", "I", "U", "E", "O"]:
132 |                 text += UNVOICE_SYMBOL
133 | 
134 |             text += mora.text
135 |             if j + 1 == phrase.accent:
136 |                 text += ACCENT_SYMBOL
137 | 
138 |         if phrase.is_interrogative:
139 |             text += WIDE_INTERROGATION_MARK
140 | 
141 |         if i < len(accent_phrases) - 1:
142 |             if phrase.pause_mora is None:
143 |                 text += NOPAUSE_DELIMITER
144 |             else:
145 |                 text += PAUSE_DELIMITER
146 |     return text
147 | 


--------------------------------------------------------------------------------
/voicevox_engine/metas/Metas.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class SpeakerStyle(BaseModel):
 8 |     """
 9 |     スピーカーのスタイル情報
10 |     """
11 | 
12 |     name: str = Field(title="スタイル名")
13 |     id: int = Field(title="スタイルID")
14 | 
15 | 
16 | class SpeakerSupportPermittedSynthesisMorphing(str, Enum):
17 |     ALL = "ALL"  # 全て許可
18 |     SELF_ONLY = "SELF_ONLY"  # 同じ話者内でのみ許可
19 |     NOTHING = "NOTHING"  # 全て禁止
20 | 
21 |     @classmethod
22 |     def _missing_(cls, value: object) -> "SpeakerSupportPermittedSynthesisMorphing":
23 |         return SpeakerSupportPermittedSynthesisMorphing.ALL
24 | 
25 | 
26 | class SpeakerSupportedFeatures(BaseModel):
27 |     """
28 |     話者の対応機能の情報
29 |     """
30 | 
31 |     permitted_synthesis_morphing: SpeakerSupportPermittedSynthesisMorphing = Field(
32 |         title="モーフィング機能への対応", default=SpeakerSupportPermittedSynthesisMorphing(None)
33 |     )
34 | 
35 | 
36 | class CoreSpeaker(BaseModel):
37 |     """
38 |     コアに含まれるスピーカー情報
39 |     """
40 | 
41 |     name: str = Field(title="名前")
42 |     speaker_uuid: str = Field(title="スピーカーのUUID")
43 |     styles: List[SpeakerStyle] = Field(title="スピーカースタイルの一覧")
44 |     version: str = Field("スピーカーのバージョン")
45 | 
46 | 
47 | class EngineSpeaker(BaseModel):
48 |     """
49 |     エンジンに含まれるスピーカー情報
50 |     """
51 | 
52 |     supported_features: SpeakerSupportedFeatures = Field(
53 |         title="スピーカーの対応機能", default_factory=SpeakerSupportedFeatures
54 |     )
55 | 
56 | 
57 | class Speaker(CoreSpeaker, EngineSpeaker):
58 |     """
59 |     スピーカー情報
60 |     """
61 | 
62 |     pass
63 | 
64 | 
65 | class StyleInfo(BaseModel):
66 |     """
67 |     スタイルの追加情報
68 |     """
69 | 
70 |     id: int = Field(title="スタイルID")
71 |     icon: str = Field(title="当該スタイルのアイコンをbase64エンコードしたもの")
72 |     portrait: Optional[str] = Field(title="当該スタイルのportrait.pngをbase64エンコードしたもの")
73 |     voice_samples: List[str] = Field(title="voice_sampleのwavファイルをbase64エンコードしたもの")
74 | 
75 | 
76 | class SpeakerInfo(BaseModel):
77 |     """
78 |     話者の追加情報
79 |     """
80 | 
81 |     policy: str = Field(title="policy.md")
82 |     portrait: str = Field(title="portrait.pngをbase64エンコードしたもの")
83 |     style_infos: List[StyleInfo] = Field(title="スタイルの追加情報")
84 | 


--------------------------------------------------------------------------------
/voicevox_engine/metas/MetasStore.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from typing import TYPE_CHECKING, Dict, List, Tuple
 4 | 
 5 | from voicevox_engine.metas.Metas import CoreSpeaker, EngineSpeaker, Speaker, StyleInfo
 6 | 
 7 | if TYPE_CHECKING:
 8 |     from voicevox_engine.synthesis_engine.synthesis_engine_base import (
 9 |         SynthesisEngineBase,
10 |     )
11 | 
12 | 
13 | class MetasStore:
14 |     """
15 |     話者やスタイルのメタ情報を管理する
16 |     """
17 | 
18 |     def __init__(self, engine_speakers_path: Path) -> None:
19 |         self._engine_speakers_path = engine_speakers_path
20 |         self._loaded_metas: Dict[str, EngineSpeaker] = {
21 |             folder.name: EngineSpeaker(
22 |                 **json.loads((folder / "metas.json").read_text(encoding="utf-8"))
23 |             )
24 |             for folder in engine_speakers_path.iterdir()
25 |         }
26 | 
27 |     def speaker_engine_metas(self, speaker_uuid: str) -> EngineSpeaker:
28 |         return self.loaded_metas[speaker_uuid]
29 | 
30 |     def combine_metas(self, core_metas: List[CoreSpeaker]) -> List[Speaker]:
31 |         """
32 |         与えられたmetaにエンジンのコア情報を付加して返す
33 |         core_metas: コアのmetas()が返すJSONのModel
34 |         """
35 | 
36 |         return [
37 |             Speaker(
38 |                 **self.speaker_engine_metas(speaker_meta.speaker_uuid).dict(),
39 |                 **speaker_meta.dict(),
40 |             )
41 |             for speaker_meta in core_metas
42 |         ]
43 | 
44 |     # FIXME: engineではなくList[CoreSpeaker]を渡す形にすることで
45 |     # SynthesisEngineBaseによる循環importを修正する
46 |     def load_combined_metas(self, engine: "SynthesisEngineBase") -> List[Speaker]:
47 |         """
48 |         与えられたエンジンから、コア・エンジン両方の情報を含んだMetasを返す
49 |         """
50 | 
51 |         core_metas = [CoreSpeaker(**speaker) for speaker in json.loads(engine.speakers)]
52 |         return self.combine_metas(core_metas)
53 | 
54 |     @property
55 |     def engine_speakers_path(self) -> Path:
56 |         return self._engine_speakers_path
57 | 
58 |     @property
59 |     def loaded_metas(self) -> Dict[str, EngineSpeaker]:
60 |         return self._loaded_metas
61 | 
62 | 
63 | def construct_lookup(speakers: List[Speaker]) -> Dict[int, Tuple[Speaker, StyleInfo]]:
64 |     """
65 |     `{style.id: StyleInfo}`の変換テーブル
66 |     """
67 | 
68 |     lookup_table = dict()
69 |     for speaker in speakers:
70 |         for style in speaker.styles:
71 |             lookup_table[style.id] = (speaker, style)
72 |     return lookup_table
73 | 


--------------------------------------------------------------------------------
/voicevox_engine/metas/__init__.py:
--------------------------------------------------------------------------------
1 | from . import Metas, MetasStore
2 | 
3 | __all__ = [
4 |     "Metas",
5 |     "MetasStore",
6 | ]
7 | 


--------------------------------------------------------------------------------
/voicevox_engine/mora_list.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 以下のモーラ対応表はOpenJTalkのソースコードから取得し、
  3 | カタカナ表記とモーラが一対一対応するように改造した。
  4 | ライセンス表記：
  5 | -----------------------------------------------------------------
  6 |           The Japanese TTS System "Open JTalk"
  7 |           developed by HTS Working Group
  8 |           http://open-jtalk.sourceforge.net/
  9 | -----------------------------------------------------------------
 10 | 
 11 |  Copyright (c) 2008-2014  Nagoya Institute of Technology
 12 |                           Department of Computer Science
 13 | 
 14 | All rights reserved.
 15 | 
 16 | Redistribution and use in source and binary forms, with or
 17 | without modification, are permitted provided that the following
 18 | conditions are met:
 19 | 
 20 | - Redistributions of source code must retain the above copyright
 21 |   notice, this list of conditions and the following disclaimer.
 22 | - Redistributions in binary form must reproduce the above
 23 |   copyright notice, this list of conditions and the following
 24 |   disclaimer in the documentation and/or other materials provided
 25 |   with the distribution.
 26 | - Neither the name of the HTS working group nor the names of its
 27 |   contributors may be used to endorse or promote products derived
 28 |   from this software without specific prior written permission.
 29 | 
 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 31 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 32 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 33 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 34 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 35 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 36 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 37 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 38 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 39 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 40 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 41 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 42 | POSSIBILITY OF SUCH DAMAGE.
 43 | """
 44 | _mora_list_minimum = [
 45 |     ["ヴォ", "v", "o"],
 46 |     ["ヴェ", "v", "e"],
 47 |     ["ヴィ", "v", "i"],
 48 |     ["ヴァ", "v", "a"],
 49 |     ["ヴ", "v", "u"],
 50 |     ["ン", "", "N"],
 51 |     ["ワ", "w", "a"],
 52 |     ["ロ", "r", "o"],
 53 |     ["レ", "r", "e"],
 54 |     ["ル", "r", "u"],
 55 |     ["リョ", "ry", "o"],
 56 |     ["リュ", "ry", "u"],
 57 |     ["リャ", "ry", "a"],
 58 |     ["リェ", "ry", "e"],
 59 |     ["リ", "r", "i"],
 60 |     ["ラ", "r", "a"],
 61 |     ["ヨ", "y", "o"],
 62 |     ["ユ", "y", "u"],
 63 |     ["ヤ", "y", "a"],
 64 |     ["モ", "m", "o"],
 65 |     ["メ", "m", "e"],
 66 |     ["ム", "m", "u"],
 67 |     ["ミョ", "my", "o"],
 68 |     ["ミュ", "my", "u"],
 69 |     ["ミャ", "my", "a"],
 70 |     ["ミェ", "my", "e"],
 71 |     ["ミ", "m", "i"],
 72 |     ["マ", "m", "a"],
 73 |     ["ポ", "p", "o"],
 74 |     ["ボ", "b", "o"],
 75 |     ["ホ", "h", "o"],
 76 |     ["ペ", "p", "e"],
 77 |     ["ベ", "b", "e"],
 78 |     ["ヘ", "h", "e"],
 79 |     ["プ", "p", "u"],
 80 |     ["ブ", "b", "u"],
 81 |     ["フォ", "f", "o"],
 82 |     ["フェ", "f", "e"],
 83 |     ["フィ", "f", "i"],
 84 |     ["ファ", "f", "a"],
 85 |     ["フ", "f", "u"],
 86 |     ["ピョ", "py", "o"],
 87 |     ["ピュ", "py", "u"],
 88 |     ["ピャ", "py", "a"],
 89 |     ["ピェ", "py", "e"],
 90 |     ["ピ", "p", "i"],
 91 |     ["ビョ", "by", "o"],
 92 |     ["ビュ", "by", "u"],
 93 |     ["ビャ", "by", "a"],
 94 |     ["ビェ", "by", "e"],
 95 |     ["ビ", "b", "i"],
 96 |     ["ヒョ", "hy", "o"],
 97 |     ["ヒュ", "hy", "u"],
 98 |     ["ヒャ", "hy", "a"],
 99 |     ["ヒェ", "hy", "e"],
100 |     ["ヒ", "h", "i"],
101 |     ["パ", "p", "a"],
102 |     ["バ", "b", "a"],
103 |     ["ハ", "h", "a"],
104 |     ["ノ", "n", "o"],
105 |     ["ネ", "n", "e"],
106 |     ["ヌ", "n", "u"],
107 |     ["ニョ", "ny", "o"],
108 |     ["ニュ", "ny", "u"],
109 |     ["ニャ", "ny", "a"],
110 |     ["ニェ", "ny", "e"],
111 |     ["ニ", "n", "i"],
112 |     ["ナ", "n", "a"],
113 |     ["ドゥ", "d", "u"],
114 |     ["ド", "d", "o"],
115 |     ["トゥ", "t", "u"],
116 |     ["ト", "t", "o"],
117 |     ["デョ", "dy", "o"],
118 |     ["デュ", "dy", "u"],
119 |     ["デャ", "dy", "a"],
120 |     ["デェ", "dy", "e"],
121 |     ["ディ", "d", "i"],
122 |     ["デ", "d", "e"],
123 |     ["テョ", "ty", "o"],
124 |     ["テュ", "ty", "u"],
125 |     ["テャ", "ty", "a"],
126 |     ["ティ", "t", "i"],
127 |     ["テ", "t", "e"],
128 |     ["ツォ", "ts", "o"],
129 |     ["ツェ", "ts", "e"],
130 |     ["ツィ", "ts", "i"],
131 |     ["ツァ", "ts", "a"],
132 |     ["ツ", "ts", "u"],
133 |     ["ッ", "", "cl"],
134 |     ["チョ", "ch", "o"],
135 |     ["チュ", "ch", "u"],
136 |     ["チャ", "ch", "a"],
137 |     ["チェ", "ch", "e"],
138 |     ["チ", "ch", "i"],
139 |     ["ダ", "d", "a"],
140 |     ["タ", "t", "a"],
141 |     ["ゾ", "z", "o"],
142 |     ["ソ", "s", "o"],
143 |     ["ゼ", "z", "e"],
144 |     ["セ", "s", "e"],
145 |     ["ズィ", "z", "i"],
146 |     ["ズ", "z", "u"],
147 |     ["スィ", "s", "i"],
148 |     ["ス", "s", "u"],
149 |     ["ジョ", "j", "o"],
150 |     ["ジュ", "j", "u"],
151 |     ["ジャ", "j", "a"],
152 |     ["ジェ", "j", "e"],
153 |     ["ジ", "j", "i"],
154 |     ["ショ", "sh", "o"],
155 |     ["シュ", "sh", "u"],
156 |     ["シャ", "sh", "a"],
157 |     ["シェ", "sh", "e"],
158 |     ["シ", "sh", "i"],
159 |     ["ザ", "z", "a"],
160 |     ["サ", "s", "a"],
161 |     ["ゴ", "g", "o"],
162 |     ["コ", "k", "o"],
163 |     ["ゲ", "g", "e"],
164 |     ["ケ", "k", "e"],
165 |     ["グヮ", "gw", "a"],
166 |     ["グ", "g", "u"],
167 |     ["クヮ", "kw", "a"],
168 |     ["ク", "k", "u"],
169 |     ["ギョ", "gy", "o"],
170 |     ["ギュ", "gy", "u"],
171 |     ["ギャ", "gy", "a"],
172 |     ["ギェ", "gy", "e"],
173 |     ["ギ", "g", "i"],
174 |     ["キョ", "ky", "o"],
175 |     ["キュ", "ky", "u"],
176 |     ["キャ", "ky", "a"],
177 |     ["キェ", "ky", "e"],
178 |     ["キ", "k", "i"],
179 |     ["ガ", "g", "a"],
180 |     ["カ", "k", "a"],
181 |     ["オ", "", "o"],
182 |     ["エ", "", "e"],
183 |     ["ウォ", "w", "o"],
184 |     ["ウェ", "w", "e"],
185 |     ["ウィ", "w", "i"],
186 |     ["ウ", "", "u"],
187 |     ["イェ", "y", "e"],
188 |     ["イ", "", "i"],
189 |     ["ア", "", "a"],
190 | ]
191 | _mora_list_additional = [
192 |     ["ヴョ", "by", "o"],
193 |     ["ヴュ", "by", "u"],
194 |     ["ヴャ", "by", "a"],
195 |     ["ヲ", "", "o"],
196 |     ["ヱ", "", "e"],
197 |     ["ヰ", "", "i"],
198 |     ["ヮ", "w", "a"],
199 |     ["ョ", "y", "o"],
200 |     ["ュ", "y", "u"],
201 |     ["ヅ", "z", "u"],
202 |     ["ヂ", "j", "i"],
203 |     ["ヶ", "k", "e"],
204 |     ["ャ", "y", "a"],
205 |     ["ォ", "", "o"],
206 |     ["ェ", "", "e"],
207 |     ["ゥ", "", "u"],
208 |     ["ィ", "", "i"],
209 |     ["ァ", "", "a"],
210 | ]
211 | 
212 | openjtalk_mora2text = {
213 |     consonant + vowel: text for [text, consonant, vowel] in _mora_list_minimum
214 | }
215 | openjtalk_text2mora = {
216 |     text: (consonant, vowel)
217 |     for [text, consonant, vowel] in _mora_list_minimum + _mora_list_additional
218 | }
219 | 


--------------------------------------------------------------------------------
/voicevox_engine/morphing.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from dataclasses import dataclass
  3 | from itertools import chain
  4 | from typing import Dict, List, Tuple
  5 | 
  6 | import numpy as np
  7 | import pyworld as pw
  8 | from scipy.signal import resample
  9 | 
 10 | from .metas.Metas import Speaker, SpeakerSupportPermittedSynthesisMorphing, StyleInfo
 11 | from .metas.MetasStore import construct_lookup
 12 | from .model import AudioQuery, MorphableTargetInfo, SpeakerNotFoundError
 13 | from .synthesis_engine import SynthesisEngine
 14 | 
 15 | 
 16 | # FIXME: ndarray type hint, https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/blob/2b64f86197573497c685c785c6e0e743f407b63e/pyworld/pyworld.pyx#L398  # noqa
 17 | @dataclass(frozen=True)
 18 | class MorphingParameter:
 19 |     fs: int
 20 |     frame_period: float
 21 |     base_f0: np.ndarray
 22 |     base_aperiodicity: np.ndarray
 23 |     base_spectrogram: np.ndarray
 24 |     target_spectrogram: np.ndarray
 25 | 
 26 | 
 27 | def create_morphing_parameter(
 28 |     base_wave: np.ndarray,
 29 |     target_wave: np.ndarray,
 30 |     fs: int,
 31 | ) -> MorphingParameter:
 32 |     frame_period = 1.0
 33 |     base_f0, base_time_axis = pw.harvest(base_wave, fs, frame_period=frame_period)
 34 |     base_spectrogram = pw.cheaptrick(base_wave, base_f0, base_time_axis, fs)
 35 |     base_aperiodicity = pw.d4c(base_wave, base_f0, base_time_axis, fs)
 36 | 
 37 |     target_f0, morph_time_axis = pw.harvest(target_wave, fs, frame_period=frame_period)
 38 |     target_spectrogram = pw.cheaptrick(target_wave, target_f0, morph_time_axis, fs)
 39 |     target_spectrogram.resize(base_spectrogram.shape)
 40 | 
 41 |     return MorphingParameter(
 42 |         fs=fs,
 43 |         frame_period=frame_period,
 44 |         base_f0=base_f0,
 45 |         base_aperiodicity=base_aperiodicity,
 46 |         base_spectrogram=base_spectrogram,
 47 |         target_spectrogram=target_spectrogram,
 48 |     )
 49 | 
 50 | 
 51 | def get_morphable_targets(
 52 |     speakers: List[Speaker],
 53 |     base_speakers: List[int],
 54 | ) -> List[Dict[int, MorphableTargetInfo]]:
 55 |     """
 56 |     speakers: 全話者の情報
 57 |     base_speakers: モーフィング可能か判定したいベースの話者リスト（スタイルID）
 58 |     """
 59 |     speaker_lookup = construct_lookup(speakers)
 60 | 
 61 |     morphable_targets_arr = []
 62 |     for base_speaker in base_speakers:
 63 |         morphable_targets = dict()
 64 |         for style in chain.from_iterable(speaker.styles for speaker in speakers):
 65 |             morphable_targets[style.id] = MorphableTargetInfo(
 66 |                 is_morphable=is_synthesis_morphing_permitted(
 67 |                     speaker_lookup=speaker_lookup,
 68 |                     base_speaker=base_speaker,
 69 |                     target_speaker=style.id,
 70 |                 )
 71 |             )
 72 |         morphable_targets_arr.append(morphable_targets)
 73 | 
 74 |     return morphable_targets_arr
 75 | 
 76 | 
 77 | def is_synthesis_morphing_permitted(
 78 |     speaker_lookup: Dict[int, Tuple[Speaker, StyleInfo]],
 79 |     base_speaker: int,
 80 |     target_speaker: int,
 81 | ) -> bool:
 82 |     """
 83 |     指定されたspeakerがモーフィング可能かどうか返す
 84 |     speakerが見つからない場合はSpeakerNotFoundErrorを送出する
 85 |     """
 86 | 
 87 |     base_speaker_data = speaker_lookup[base_speaker]
 88 |     target_speaker_data = speaker_lookup[target_speaker]
 89 | 
 90 |     if base_speaker_data is None or target_speaker_data is None:
 91 |         raise SpeakerNotFoundError(
 92 |             base_speaker if base_speaker_data is None else target_speaker
 93 |         )
 94 | 
 95 |     base_speaker_info, _ = base_speaker_data
 96 |     target_speaker_info, _ = target_speaker_data
 97 | 
 98 |     base_speaker_uuid = base_speaker_info.speaker_uuid
 99 |     target_speaker_uuid = target_speaker_info.speaker_uuid
100 | 
101 |     base_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = (
102 |         base_speaker_info.supported_features.permitted_synthesis_morphing
103 |     )
104 | 
105 |     target_speaker_morphing_info: SpeakerSupportPermittedSynthesisMorphing = (
106 |         target_speaker_info.supported_features.permitted_synthesis_morphing
107 |     )
108 | 
109 |     # 禁止されている場合はFalse
110 |     if (
111 |         base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.NOTHING
112 |         or target_speaker_morphing_info
113 |         == SpeakerSupportPermittedSynthesisMorphing.NOTHING
114 |     ):
115 |         return False
116 |     # 同一話者のみの場合は同一話者判定
117 |     if (
118 |         base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY
119 |         or target_speaker_morphing_info
120 |         == SpeakerSupportPermittedSynthesisMorphing.SELF_ONLY
121 |     ):
122 |         return base_speaker_uuid == target_speaker_uuid
123 |     # 念のため許可されているかチェック
124 |     return (
125 |         base_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL
126 |         and target_speaker_morphing_info == SpeakerSupportPermittedSynthesisMorphing.ALL
127 |     )
128 | 
129 | 
130 | def synthesis_morphing_parameter(
131 |     engine: SynthesisEngine,
132 |     query: AudioQuery,
133 |     base_speaker: int,
134 |     target_speaker: int,
135 | ) -> MorphingParameter:
136 |     query = deepcopy(query)
137 | 
138 |     # 不具合回避のためデフォルトのサンプリングレートでWORLDに掛けた後に指定のサンプリングレートに変換する
139 |     query.outputSamplingRate = engine.default_sampling_rate
140 | 
141 |     # WORLDに掛けるため合成はモノラルで行う
142 |     query.outputStereo = False
143 | 
144 |     base_wave = engine.synthesis(query=query, speaker_id=base_speaker).astype("float")
145 |     target_wave = engine.synthesis(query=query, speaker_id=target_speaker).astype(
146 |         "float"
147 |     )
148 | 
149 |     return create_morphing_parameter(
150 |         base_wave=base_wave,
151 |         target_wave=target_wave,
152 |         fs=query.outputSamplingRate,
153 |     )
154 | 
155 | 
156 | def synthesis_morphing(
157 |     morph_param: MorphingParameter,
158 |     morph_rate: float,
159 |     output_fs: int,
160 |     output_stereo: bool = False,
161 | ) -> np.ndarray:
162 |     """
163 |     指定した割合で、パラメータをもとにモーフィングした音声を生成します。
164 | 
165 |     Parameters
166 |     ----------
167 |     morph_param : MorphingParameter
168 |         `synthesis_morphing_parameter`または`create_morphing_parameter`で作成したパラメータ
169 | 
170 |     morph_rate : float
171 |         モーフィングの割合
172 |         0.0でベースの話者、1.0でターゲットの話者に近づきます。
173 | 
174 |     Returns
175 |     -------
176 |     generated : np.ndarray
177 |         モーフィングした音声
178 | 
179 |     Raises
180 |     -------
181 |     ValueError
182 |         morph_rate ∈ [0, 1]
183 |     """
184 | 
185 |     if morph_rate < 0.0 or morph_rate > 1.0:
186 |         raise ValueError("morph_rateは0.0から1.0の範囲で指定してください")
187 | 
188 |     morph_spectrogram = (
189 |         morph_param.base_spectrogram * (1.0 - morph_rate)
190 |         + morph_param.target_spectrogram * morph_rate
191 |     )
192 | 
193 |     y_h = pw.synthesize(
194 |         morph_param.base_f0,
195 |         morph_spectrogram,
196 |         morph_param.base_aperiodicity,
197 |         morph_param.fs,
198 |         morph_param.frame_period,
199 |     )
200 | 
201 |     # TODO: synthesis_engine.py でのリサンプル処理と共通化する
202 |     if output_fs != morph_param.fs:
203 |         y_h = resample(y_h, output_fs * len(y_h) // morph_param.fs)
204 | 
205 |     if output_stereo:
206 |         y_h = np.array([y_h, y_h]).T
207 | 
208 |     return y_h
209 | 


--------------------------------------------------------------------------------
/voicevox_engine/part_of_speech_data.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict
  2 | 
  3 | from .model import (
  4 |     USER_DICT_MAX_PRIORITY,
  5 |     USER_DICT_MIN_PRIORITY,
  6 |     PartOfSpeechDetail,
  7 |     WordTypes,
  8 | )
  9 | 
 10 | MIN_PRIORITY = USER_DICT_MIN_PRIORITY
 11 | MAX_PRIORITY = USER_DICT_MAX_PRIORITY
 12 | 
 13 | part_of_speech_data: Dict[WordTypes, PartOfSpeechDetail] = {
 14 |     WordTypes.PROPER_NOUN: PartOfSpeechDetail(
 15 |         part_of_speech="名詞",
 16 |         part_of_speech_detail_1="固有名詞",
 17 |         part_of_speech_detail_2="一般",
 18 |         part_of_speech_detail_3="*",
 19 |         context_id=1348,
 20 |         cost_candidates=[
 21 |             -988,
 22 |             3488,
 23 |             4768,
 24 |             6048,
 25 |             7328,
 26 |             8609,
 27 |             8734,
 28 |             8859,
 29 |             8984,
 30 |             9110,
 31 |             14176,
 32 |         ],
 33 |         accent_associative_rules=[
 34 |             "*",
 35 |             "C1",
 36 |             "C2",
 37 |             "C3",
 38 |             "C4",
 39 |             "C5",
 40 |         ],
 41 |     ),
 42 |     WordTypes.COMMON_NOUN: PartOfSpeechDetail(
 43 |         part_of_speech="名詞",
 44 |         part_of_speech_detail_1="一般",
 45 |         part_of_speech_detail_2="*",
 46 |         part_of_speech_detail_3="*",
 47 |         context_id=1345,
 48 |         cost_candidates=[
 49 |             -4445,
 50 |             49,
 51 |             1473,
 52 |             2897,
 53 |             4321,
 54 |             5746,
 55 |             6554,
 56 |             7362,
 57 |             8170,
 58 |             8979,
 59 |             15001,
 60 |         ],
 61 |         accent_associative_rules=[
 62 |             "*",
 63 |             "C1",
 64 |             "C2",
 65 |             "C3",
 66 |             "C4",
 67 |             "C5",
 68 |         ],
 69 |     ),
 70 |     WordTypes.VERB: PartOfSpeechDetail(
 71 |         part_of_speech="動詞",
 72 |         part_of_speech_detail_1="自立",
 73 |         part_of_speech_detail_2="*",
 74 |         part_of_speech_detail_3="*",
 75 |         context_id=642,
 76 |         cost_candidates=[
 77 |             3100,
 78 |             6160,
 79 |             6360,
 80 |             6561,
 81 |             6761,
 82 |             6962,
 83 |             7414,
 84 |             7866,
 85 |             8318,
 86 |             8771,
 87 |             13433,
 88 |         ],
 89 |         accent_associative_rules=[
 90 |             "*",
 91 |         ],
 92 |     ),
 93 |     WordTypes.ADJECTIVE: PartOfSpeechDetail(
 94 |         part_of_speech="形容詞",
 95 |         part_of_speech_detail_1="自立",
 96 |         part_of_speech_detail_2="*",
 97 |         part_of_speech_detail_3="*",
 98 |         context_id=20,
 99 |         cost_candidates=[
100 |             1527,
101 |             3266,
102 |             3561,
103 |             3857,
104 |             4153,
105 |             4449,
106 |             5149,
107 |             5849,
108 |             6549,
109 |             7250,
110 |             10001,
111 |         ],
112 |         accent_associative_rules=[
113 |             "*",
114 |         ],
115 |     ),
116 |     WordTypes.SUFFIX: PartOfSpeechDetail(
117 |         part_of_speech="名詞",
118 |         part_of_speech_detail_1="接尾",
119 |         part_of_speech_detail_2="一般",
120 |         part_of_speech_detail_3="*",
121 |         context_id=1358,
122 |         cost_candidates=[
123 |             4399,
124 |             5373,
125 |             6041,
126 |             6710,
127 |             7378,
128 |             8047,
129 |             9440,
130 |             10834,
131 |             12228,
132 |             13622,
133 |             15847,
134 |         ],
135 |         accent_associative_rules=[
136 |             "*",
137 |             "C1",
138 |             "C2",
139 |             "C3",
140 |             "C4",
141 |             "C5",
142 |         ],
143 |     ),
144 | }
145 | 


--------------------------------------------------------------------------------
/voicevox_engine/preset/Preset.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class Preset(BaseModel):
 5 |     """
 6 |     プリセット情報
 7 |     """
 8 | 
 9 |     id: int = Field(title="プリセットID")
10 |     name: str = Field(title="プリセット名")
11 |     speaker_uuid: str = Field(title="スピーカーのUUID")
12 |     style_id: int = Field(title="スタイルID")
13 |     speedScale: float = Field(title="全体の話速")
14 |     pitchScale: float = Field(title="全体の音高")
15 |     intonationScale: float = Field(title="全体の抑揚")
16 |     volumeScale: float = Field(title="全体の音量")
17 |     prePhonemeLength: float = Field(title="音声の前の無音時間")
18 |     postPhonemeLength: float = Field(title="音声の後の無音時間")
19 | 


--------------------------------------------------------------------------------
/voicevox_engine/preset/PresetError.py:
--------------------------------------------------------------------------------
1 | class PresetError(Exception):
2 |     pass
3 | 


--------------------------------------------------------------------------------
/voicevox_engine/preset/PresetManager.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import List
  3 | 
  4 | import yaml
  5 | from pydantic import ValidationError, parse_obj_as
  6 | 
  7 | from .Preset import Preset
  8 | from .PresetError import PresetError
  9 | 
 10 | 
 11 | class PresetManager:
 12 |     def __init__(
 13 |         self,
 14 |         preset_path: Path,
 15 |     ):
 16 |         self.presets = []
 17 |         self.last_modified_time = 0
 18 |         self.preset_path = preset_path
 19 | 
 20 |     def load_presets(self):
 21 |         """
 22 |         プリセットのYAMLファイルを読み込む
 23 | 
 24 |         Returns
 25 |         -------
 26 |         ret: List[Preset]
 27 |             プリセットのリスト
 28 |         """
 29 | 
 30 |         # 設定ファイルのタイムスタンプを確認
 31 |         try:
 32 |             _last_modified_time = self.preset_path.stat().st_mtime
 33 |             if _last_modified_time == self.last_modified_time:
 34 |                 return self.presets
 35 |         except OSError:
 36 |             raise PresetError("プリセットの設定ファイルが見つかりません")
 37 | 
 38 |         with open(self.preset_path, mode="r", encoding="utf-8") as f:
 39 |             obj = yaml.safe_load(f)
 40 |             if obj is None:
 41 |                 raise PresetError("プリセットの設定ファイルが空の内容です")
 42 | 
 43 |         try:
 44 |             _presets = parse_obj_as(List[Preset], obj)
 45 |         except ValidationError:
 46 |             raise PresetError("プリセットの設定ファイルにミスがあります")
 47 | 
 48 |         # idが一意か確認
 49 |         if len([preset.id for preset in _presets]) != len(
 50 |             {preset.id for preset in _presets}
 51 |         ):
 52 |             raise PresetError("プリセットのidに重複があります")
 53 | 
 54 |         self.presets = _presets
 55 |         self.last_modified_time = _last_modified_time
 56 |         return self.presets
 57 | 
 58 |     def add_preset(self, preset: Preset):
 59 |         """
 60 |         YAMLファイルに新規のプリセットを追加する
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         preset : Preset
 65 |             追加するプリセットを渡す
 66 | 
 67 |         Returns
 68 |         -------
 69 |         ret: int
 70 |             追加したプリセットのプリセットID
 71 |         """
 72 | 
 73 |         # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
 74 |         self.load_presets()
 75 | 
 76 |         # IDが0未満、または存在するIDなら新しいIDを決定し、配列に追加
 77 |         if preset.id < 0 or preset.id in {preset.id for preset in self.presets}:
 78 |             preset.id = max([preset.id for preset in self.presets]) + 1
 79 |         self.presets.append(preset)
 80 | 
 81 |         # ファイルに書き込み
 82 |         try:
 83 |             with open(self.preset_path, mode="w", encoding="utf-8") as f:
 84 |                 yaml.safe_dump(
 85 |                     [preset.dict() for preset in self.presets],
 86 |                     f,
 87 |                     allow_unicode=True,
 88 |                     sort_keys=False,
 89 |                 )
 90 |         except Exception as err:
 91 |             self.presets.pop()
 92 |             if isinstance(err, FileNotFoundError):
 93 |                 raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
 94 |             else:
 95 |                 raise err
 96 | 
 97 |         return preset.id
 98 | 
 99 |     def update_preset(self, preset: Preset):
100 |         """
101 |         YAMLファイルのプリセットを更新する
102 | 
103 |         Parameters
104 |         ----------
105 |         preset : Preset
106 |             更新するプリセットを渡す
107 | 
108 |         Returns
109 |         -------
110 |         ret: int
111 |             更新したプリセットのプリセットID
112 |         """
113 | 
114 |         # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
115 |         self.load_presets()
116 | 
117 |         # IDが存在するか探索
118 |         prev_preset = (-1, None)
119 |         for i in range(len(self.presets)):
120 |             if self.presets[i].id == preset.id:
121 |                 prev_preset = (i, self.presets[i])
122 |                 self.presets[i] = preset
123 |                 break
124 |         else:
125 |             raise PresetError("更新先のプリセットが存在しません")
126 | 
127 |         # ファイルに書き込み
128 |         try:
129 |             with open(self.preset_path, mode="w", encoding="utf-8") as f:
130 |                 yaml.safe_dump(
131 |                     [preset.dict() for preset in self.presets],
132 |                     f,
133 |                     allow_unicode=True,
134 |                     sort_keys=False,
135 |                 )
136 |         except Exception as err:
137 |             if prev_preset != (-1, None):
138 |                 self.presets[prev_preset[0]] = prev_preset[1]
139 |             if isinstance(err, FileNotFoundError):
140 |                 raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
141 |             else:
142 |                 raise err
143 | 
144 |         return preset.id
145 | 
146 |     def delete_preset(self, id: int):
147 |         """
148 |         YAMLファイルのプリセットを削除する
149 | 
150 |         Parameters
151 |         ----------
152 |         id: int
153 |             削除するプリセットのプリセットIDを渡す
154 | 
155 |         Returns
156 |         -------
157 |         ret: int
158 |             削除したプリセットのプリセットID
159 |         """
160 | 
161 |         # 手動でファイルが更新されているかも知れないので、最新のYAMLファイルを読み直す
162 |         self.load_presets()
163 | 
164 |         # IDが存在するか探索
165 |         buf = None
166 |         buf_index = -1
167 |         for i in range(len(self.presets)):
168 |             if self.presets[i].id == id:
169 |                 buf = self.presets.pop(i)
170 |                 buf_index = i
171 |                 break
172 |         else:
173 |             raise PresetError("削除対象のプリセットが存在しません")
174 | 
175 |         # ファイルに書き込み
176 |         try:
177 |             with open(self.preset_path, mode="w", encoding="utf-8") as f:
178 |                 yaml.safe_dump(
179 |                     [preset.dict() for preset in self.presets],
180 |                     f,
181 |                     allow_unicode=True,
182 |                     sort_keys=False,
183 |                 )
184 |         except FileNotFoundError:
185 |             self.presets.insert(buf_index, buf)
186 |             raise PresetError("プリセットの設定ファイルに書き込み失敗しました")
187 | 
188 |         return id
189 | 


--------------------------------------------------------------------------------
/voicevox_engine/preset/__init__.py:
--------------------------------------------------------------------------------
 1 | from .Preset import Preset
 2 | from .PresetError import PresetError
 3 | from .PresetManager import PresetManager
 4 | 
 5 | __all__ = [
 6 |     "Preset",
 7 |     "PresetManager",
 8 |     "PresetError",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/voicevox_engine/setting/Setting.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class CorsPolicyMode(str, Enum):
 8 |     """
 9 |     CORSの許可モード
10 |     """
11 | 
12 |     all = "all"  # 全てのオリジンからのリクエストを許可
13 |     localapps = "localapps"  # ローカルアプリケーションからのリクエストを許可
14 | 
15 | 
16 | class Setting(BaseModel):
17 |     """
18 |     エンジンの設定情報
19 |     """
20 | 
21 |     cors_policy_mode: CorsPolicyMode = Field(title="リソース共有ポリシー")
22 |     allow_origin: Optional[str] = Field(title="許可するオリジン")
23 | 
24 |     class Config:
25 |         use_enum_values = True
26 | 


--------------------------------------------------------------------------------
/voicevox_engine/setting/SettingLoader.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | 
 5 | from ..utility import engine_root, get_save_dir
 6 | from .Setting import Setting
 7 | 
 8 | DEFAULT_SETTING_PATH: Path = engine_root() / "default_setting.yml"
 9 | USER_SETTING_PATH: Path = get_save_dir() / "setting.yml"
10 | 
11 | 
12 | class SettingLoader:
13 |     def __init__(self, setting_file_path: Path) -> None:
14 |         self.setting_file_path = setting_file_path
15 | 
16 |     def load_setting_file(self) -> Setting:
17 |         if not self.setting_file_path.is_file():
18 |             setting = yaml.safe_load(DEFAULT_SETTING_PATH.read_text(encoding="utf-8"))
19 |         else:
20 |             setting = yaml.safe_load(self.setting_file_path.read_text(encoding="utf-8"))
21 | 
22 |         setting = Setting(
23 |             cors_policy_mode=setting["cors_policy_mode"],
24 |             allow_origin=setting["allow_origin"],
25 |         )
26 | 
27 |         return setting
28 | 
29 |     def dump_setting_file(self, settings: Setting) -> None:
30 |         settings_dict = settings.dict()
31 | 
32 |         with open(self.setting_file_path, mode="w", encoding="utf-8") as f:
33 |             yaml.safe_dump(settings_dict, f)
34 | 


--------------------------------------------------------------------------------
/voicevox_engine/setting/__init__.py:
--------------------------------------------------------------------------------
 1 | from .Setting import CorsPolicyMode, Setting
 2 | from .SettingLoader import USER_SETTING_PATH, SettingLoader
 3 | 
 4 | __all__ = [
 5 |     "USER_SETTING_PATH",
 6 |     "CorsPolicyMode",
 7 |     "Setting",
 8 |     "SettingLoader",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/voicevox_engine/synthesis_engine/__init__.py:
--------------------------------------------------------------------------------
 1 | from .core_wrapper import CoreWrapper, load_runtime_lib
 2 | from .make_synthesis_engines import make_synthesis_engines
 3 | from .synthesis_engine import SynthesisEngine
 4 | from .synthesis_engine_base import SynthesisEngineBase
 5 | 
 6 | __all__ = [
 7 |     "CoreWrapper",
 8 |     "load_runtime_lib",
 9 |     "make_synthesis_engines",
10 |     "SynthesisEngine",
11 |     "SynthesisEngineBase",
12 | ]
13 | 


--------------------------------------------------------------------------------
/voicevox_engine/synthesis_engine/make_synthesis_engines.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import sys
  3 | from pathlib import Path
  4 | from typing import Dict, List, Optional
  5 | 
  6 | from ..utility import engine_root, get_save_dir
  7 | from .core_wrapper import CoreWrapper, load_runtime_lib
  8 | from .synthesis_engine import SynthesisEngine, SynthesisEngineBase
  9 | 
 10 | 
 11 | def make_synthesis_engines(
 12 |     use_gpu: bool,
 13 |     voicelib_dirs: Optional[List[Path]] = None,
 14 |     voicevox_dir: Optional[Path] = None,
 15 |     runtime_dirs: Optional[List[Path]] = None,
 16 |     cpu_num_threads: Optional[int] = None,
 17 |     enable_mock: bool = True,
 18 |     load_all_models: bool = False,
 19 | ) -> Dict[str, SynthesisEngineBase]:
 20 |     """
 21 |     音声ライブラリをロードして、音声合成エンジンを生成
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     use_gpu: bool
 26 |         音声ライブラリに GPU を使わせるか否か
 27 |     voicelib_dirs: List[Path], optional, default=None
 28 |         音声ライブラリ自体があるディレクトリのリスト
 29 |     voicevox_dir: Path, optional, default=None
 30 |         コンパイル済みのvoicevox、またはvoicevox_engineがあるディレクトリ
 31 |     runtime_dirs: List[Path], optional, default=None
 32 |         コアで使用するライブラリのあるディレクトリのリスト
 33 |         None のとき、voicevox_dir、カレントディレクトリになる
 34 |     cpu_num_threads: int, optional, default=None
 35 |         音声ライブラリが、推論に用いるCPUスレッド数を設定する
 36 |         Noneのとき、ライブラリ側の挙動により論理コア数の半分か、物理コア数が指定される
 37 |     enable_mock: bool, optional, default=True
 38 |         コア読み込みに失敗したとき、代わりにmockを使用するかどうか
 39 |     load_all_models: bool, optional, default=False
 40 |         起動時に全てのモデルを読み込むかどうか
 41 |     """
 42 |     if cpu_num_threads == 0 or cpu_num_threads is None:
 43 |         print(
 44 |             "Warning: cpu_num_threads is set to 0. "
 45 |             + "( The library leaves the decision to the synthesis runtime )",
 46 |             file=sys.stderr,
 47 |         )
 48 |         cpu_num_threads = 0
 49 | 
 50 |     if voicevox_dir is not None:
 51 |         if voicelib_dirs is not None:
 52 |             voicelib_dirs.append(voicevox_dir)
 53 |         else:
 54 |             voicelib_dirs = [voicevox_dir]
 55 |         if runtime_dirs is not None:
 56 |             runtime_dirs.append(voicevox_dir)
 57 |         else:
 58 |             runtime_dirs = [voicevox_dir]
 59 |     else:
 60 |         root_dir = engine_root()
 61 |         if voicelib_dirs is None:
 62 |             voicelib_dirs = [root_dir]
 63 |         if runtime_dirs is None:
 64 |             runtime_dirs = [root_dir]
 65 | 
 66 |     voicelib_dirs = [p.expanduser() for p in voicelib_dirs]
 67 |     runtime_dirs = [p.expanduser() for p in runtime_dirs]
 68 | 
 69 |     load_runtime_lib(runtime_dirs)
 70 | 
 71 |     synthesis_engines = {}
 72 | 
 73 |     if not enable_mock:
 74 | 
 75 |         def load_core_library(core_dir: Path, suppress_error: bool = False):
 76 |             """
 77 |             指定されたディレクトリにあるコアを読み込む。
 78 |             ユーザーディレクトリの場合は存在しないこともあるので、エラーを抑制すると良い。
 79 |             """
 80 |             try:
 81 |                 core = CoreWrapper(use_gpu, core_dir, cpu_num_threads, load_all_models)
 82 |                 metas = json.loads(core.metas())
 83 |                 core_version = metas[0]["version"]
 84 |                 if core_version in synthesis_engines:
 85 |                     print(
 86 |                         "Warning: Core loading is skipped because of version duplication.",
 87 |                         file=sys.stderr,
 88 |                     )
 89 |                 else:
 90 |                     synthesis_engines[core_version] = SynthesisEngine(core=core)
 91 |             except Exception:
 92 |                 if not suppress_error:
 93 |                     raise
 94 | 
 95 |         for core_dir in voicelib_dirs:
 96 |             load_core_library(core_dir)
 97 | 
 98 |         # ユーザーディレクトリにあるコアを読み込む
 99 |         user_voicelib_dirs = []
100 |         core_libraries_dir = get_save_dir() / "core_libraries"
101 |         core_libraries_dir.mkdir(exist_ok=True)
102 |         user_voicelib_dirs.append(core_libraries_dir)
103 |         for path in core_libraries_dir.glob("*"):
104 |             if not path.is_dir():
105 |                 continue
106 |             user_voicelib_dirs.append(path)
107 | 
108 |         for core_dir in user_voicelib_dirs:
109 |             load_core_library(core_dir, suppress_error=True)
110 | 
111 |     else:
112 |         # モック追加
113 |         from ..dev.core import metas as mock_metas
114 |         from ..dev.core import supported_devices as mock_supported_devices
115 |         from ..dev.synthesis_engine import MockSynthesisEngine
116 | 
117 |         if "0.0.0" not in synthesis_engines:
118 |             synthesis_engines["0.0.0"] = MockSynthesisEngine(
119 |                 speakers=mock_metas(), supported_devices=mock_supported_devices()
120 |             )
121 | 
122 |     return synthesis_engines
123 | 


--------------------------------------------------------------------------------
/voicevox_engine/utility/__init__.py:
--------------------------------------------------------------------------------
 1 | from .connect_base64_waves import (
 2 |     ConnectBase64WavesException,
 3 |     connect_base64_waves,
 4 |     decode_base64_waves,
 5 | )
 6 | from .mutex_utility import mutex_wrapper
 7 | from .path_utility import delete_file, engine_root, get_save_dir
 8 | 
 9 | __all__ = [
10 |     "ConnectBase64WavesException",
11 |     "connect_base64_waves",
12 |     "decode_base64_waves",
13 |     "delete_file",
14 |     "engine_root",
15 |     "get_save_dir",
16 |     "mutex_wrapper",
17 | ]
18 | 


--------------------------------------------------------------------------------
/voicevox_engine/utility/connect_base64_waves.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import io
 3 | from typing import List, Tuple
 4 | 
 5 | import numpy as np
 6 | import soundfile
 7 | from scipy.signal import resample
 8 | 
 9 | 
10 | class ConnectBase64WavesException(Exception):
11 |     def __init__(self, message: str):
12 |         self.message = message
13 | 
14 | 
15 | def decode_base64_waves(waves: List[str]) -> List[Tuple[np.ndarray, int]]:
16 |     """
17 |     base64エンコードされた複数のwavデータをデコードする
18 |     Parameters
19 |     ----------
20 |     waves: list[str]
21 |         base64エンコードされたwavデータのリスト
22 |     Returns
23 |     -------
24 |     waves_nparray_sr: List[Tuple[np.ndarray, int]]
25 |         (NumPy配列の音声波形データ, サンプリングレート) 形式のタプルのリスト
26 |     """
27 |     if len(waves) == 0:
28 |         raise ConnectBase64WavesException("wavファイルが含まれていません")
29 | 
30 |     waves_nparray_sr = []
31 |     for wave in waves:
32 |         try:
33 |             wav_bin = base64.standard_b64decode(wave)
34 |         except ValueError:
35 |             raise ConnectBase64WavesException("base64デコードに失敗しました")
36 |         try:
37 |             _data = soundfile.read(io.BytesIO(wav_bin))
38 |         except Exception:
39 |             raise ConnectBase64WavesException("wavファイルを読み込めませんでした")
40 |         waves_nparray_sr.append(_data)
41 | 
42 |     return waves_nparray_sr
43 | 
44 | 
45 | def connect_base64_waves(waves: List[str]) -> Tuple[np.ndarray, int]:
46 |     waves_nparray_sr = decode_base64_waves(waves)
47 | 
48 |     max_sampling_rate = max([sr for _, sr in waves_nparray_sr])
49 |     max_channels = max([x.ndim for x, _ in waves_nparray_sr])
50 |     assert 0 < max_channels <= 2
51 | 
52 |     waves_nparray_list = []
53 |     for nparray, sr in waves_nparray_sr:
54 |         if sr != max_sampling_rate:
55 |             nparray = resample(nparray, max_sampling_rate * len(nparray) // sr)
56 |         if nparray.ndim < max_channels:
57 |             nparray = np.array([nparray, nparray]).T
58 |         waves_nparray_list.append(nparray)
59 | 
60 |     return np.concatenate(waves_nparray_list), max_sampling_rate
61 | 


--------------------------------------------------------------------------------
/voicevox_engine/utility/mutex_utility.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | 
 3 | 
 4 | def mutex_wrapper(lock: threading.Lock):
 5 |     def wrap(f):
 6 |         def func(*args, **kw):
 7 |             lock.acquire()
 8 |             try:
 9 |                 return f(*args, **kw)
10 |             finally:
11 |                 lock.release()
12 | 
13 |         return func
14 | 
15 |     return wrap
16 | 


--------------------------------------------------------------------------------
/voicevox_engine/utility/path_utility.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import traceback
 4 | from pathlib import Path
 5 | 
 6 | from appdirs import user_data_dir
 7 | 
 8 | 
 9 | def engine_root() -> Path:
10 |     if is_development():
11 |         root_dir = Path(__file__).parents[2]
12 | 
13 |     # Nuitka/Pyinstallerでビルドされている場合
14 |     else:
15 |         root_dir = Path(sys.argv[0]).parent
16 | 
17 |     return root_dir.resolve(strict=True)
18 | 
19 | 
20 | def is_development() -> bool:
21 |     """
22 |     開発版かどうか判定する関数
23 |     Nuitka/Pyinstallerでコンパイルされていない場合は開発環境とする。
24 |     """
25 |     # nuitkaビルドをした際はグローバルに__compiled__が含まれる
26 |     if "__compiled__" in globals():
27 |         return False
28 | 
29 |     # pyinstallerでビルドをした際はsys.frozenが設定される
30 |     elif getattr(sys, "frozen", False):
31 |         return False
32 | 
33 |     return True
34 | 
35 | 
36 | def get_save_dir():
37 |     # FIXME: ファイル保存場所をエンジン固有のIDが入ったものにする
38 |     # FIXME: Windowsは`voicevox-engine/voicevox-engine`ディレクトリに保存されているので
39 |     # `VOICEVOX/voicevox-engine`に変更する
40 |     if is_development():
41 |         app_name = "voicevox-engine-dev"
42 |     else:
43 |         app_name = "voicevox-engine"
44 |     return Path(user_data_dir(app_name))
45 | 
46 | 
47 | def delete_file(file_path: str) -> None:
48 |     try:
49 |         os.remove(file_path)
50 |     except OSError:
51 |         traceback.print_exc()
52 | 


--------------------------------------------------------------------------------