├── CreateProject.bat
├── Download-DemoModel.bat
├── Download-LearningSample-JVNV.bat
├── HiyoriUi.bat
├── LICENSE.txt
├── README.md
├── doc
    ├── img
    │   ├── HiyoriUi.png
    │   ├── HiyoriUiGen.png
    │   ├── HiyoriUiModel.png
    │   ├── HiyoriUiModelCpu.png
    │   ├── HiyoriUiModelEnable.png
    │   ├── HiyoriUiModelJa.png
    │   └── HiyoriUiText.png
    └── sample
    │   ├── v21-gomen.wav
    │   ├── v21-massa.wav
    │   ├── v21-suki.wav
    │   └── v21-zetuyuru.wav
└── src
    ├── G_Compress.bat
    ├── G_WebUi.bat
    ├── HiyoriUi.bat
    ├── Install-EasyBertVits2.bat
    ├── Learn.bat
    ├── Setup-Python.bat
    ├── Setup.bat
    ├── Tensorboard.bat
    ├── config.json
    └── config.yml


/CreateProject.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0
 4 | set PS_CMD=PowerShell -Version 5.1 -ExecutionPolicy Bypass
 5 | 
 6 | if "%1" neq "" (
 7 |     set "SPEAKER_NAME=%1"
 8 | ) else (
 9 |     set /p SPEAKER_NAME=話者の名前を英数字とハイフンで入力してください: 
10 | )
11 | set BASE_PATH=Bert-VITS2\Data\%SPEAKER_NAME%
12 | 
13 | if not exist "%BASE_PATH%\audios\raw" (
14 | 	mkdir "%BASE_PATH%\audios\raw"
15 | 	explorer "%BASE_PATH%\"
16 | )
17 | 
18 | if not exist "%BASE_PATH%\config.yml" (
19 | 	copy src\config.yml "%BASE_PATH%\config.yml" > NUL
20 | 	%PS_CMD% "&{(Get-Content '%BASE_PATH%\config.yml' -Encoding UTF8) -creplace '{SPEAKER_NAME}', '%SPEAKER_NAME%' | Set-Content '%BASE_PATH%\config.yml' -Encoding UTF8 }"
21 | )
22 | if not exist "%BASE_PATH%\config.json" ( copy src\config.json "%BASE_PATH%\config.json" > NUL )
23 | if not exist "%BASE_PATH%\HiyoriUi.bat" ( copy src\HiyoriUi.bat "%BASE_PATH%\HiyoriUi.bat" > NUL )
24 | if not exist "%BASE_PATH%\Learn.bat" ( copy src\Learn.bat "%BASE_PATH%\Learn.bat" > NUL )
25 | if not exist "%BASE_PATH%\Tensorboard.bat" (
26 | 	copy src\Tensorboard.bat "%BASE_PATH%\Tensorboard.bat" > NUL 
27 | 	%PS_CMD% "&{(Get-Content '%BASE_PATH%\Tensorboard.bat') -creplace '{SPEAKER_NAME}', '%SPEAKER_NAME%' | Set-Content '%BASE_PATH%\Tensorboard.bat' }"
28 | )
29 | 
30 | if not exist "%BASE_PATH%\filelists" ( mkdir "%BASE_PATH%\filelists" )
31 | set "TEXT_TEMPLATE=Data/%SPEAKER_NAME%/audios/wavs/file_name.wav^|%SPEAKER_NAME%^|JP^|こんにちは。"
32 | if not exist %BASE_PATH%\filelists\esd.list (
33 | 	echo %TEXT_TEMPLATE% > "%BASE_PATH%\filelists\esd.list"
34 | )
35 | 
36 | if not exist "%BASE_PATH%\models" ( mkdir "%BASE_PATH%\models" )
37 | if not exist "%BASE_PATH%\models\DUR_0.pth" (
38 | 	copy lib\Bert-VITS2-2.3\DUR_0.pth "%BASE_PATH%\models\DUR_0.pth" > NUL
39 | )
40 | if not exist "%BASE_PATH%\models\D_0.pth" (
41 | 	copy lib\Bert-VITS2-2.3\D_0.pth "%BASE_PATH%\models\D_0.pth" > NUL
42 | )
43 | if not exist "%BASE_PATH%\models\G_0.pth" (
44 | 	copy lib\Bert-VITS2-2.3\G_0.pth "%BASE_PATH%\models\G_0.pth" > NUL
45 | )
46 | if not exist "%BASE_PATH%\models\WD_0.pth" (
47 | 	copy lib\Bert-VITS2-2.3\WD_0.pth "%BASE_PATH%\models\WD_0.pth" > NUL
48 | )
49 | 
50 | if not exist "%BASE_PATH%\models\G_WebUi.bat" ( copy src\G_WebUi.bat "%BASE_PATH%\models\G_WebUi.bat" > NUL )
51 | if not exist "%BASE_PATH%\models\G_Compress.bat" ( copy src\G_Compress.bat "%BASE_PATH%\models\G_Compress.bat" > NUL )
52 | 
53 | popd rem %~dp0
54 | 


--------------------------------------------------------------------------------
/Download-DemoModel.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0
 4 | 
 5 | set CURL_CMD=C:\Windows\System32\curl.exe
 6 | 
 7 | if not exist Bert-VITS2\Data\Demo-JVNV\ ( mkdir Bert-VITS2\Data\Demo-JVNV )
 8 | if not exist Bert-VITS2\Data\Demo-JVNV\config.json (
 9 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\config.json^
10 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/config.json
11 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
12 | )
13 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-2000.pth (
14 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-2000.pth^
15 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_2000.pth
16 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
17 | )
18 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-4000.pth (
19 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-4000.pth^
20 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_4000.pth
21 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
22 | )
23 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-6000.pth (
24 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-6000.pth^
25 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_6000.pth
26 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
27 | )
28 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-8000.pth (
29 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-8000.pth^
30 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_8000.pth
31 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
32 | )
33 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-10000.pth (
34 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-10000.pth^
35 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_10000.pth
36 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
37 | )
38 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-12000.pth (
39 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-12000.pth^
40 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_12000.pth
41 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
42 | )
43 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-16000.pth (
44 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-16000.pth^
45 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_16000.pth
46 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
47 | )
48 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-20000.pth (
49 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-20000.pth^
50 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_20000.pth
51 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
52 | )
53 | if not exist Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-26000.pth (
54 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV\G_JVNV-F2-26000.pth^
55 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_26000.pth
56 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
57 | )
58 | 
59 | popd rem %~dp0
60 | 


--------------------------------------------------------------------------------
/Download-LearningSample-JVNV.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0
 4 | 
 5 | set CURL_CMD=C:\Windows\System32\curl.exe
 6 | 
 7 | call .\CreateProject.bat JVNV-F2
 8 | 
 9 | if not exist lib\JVNV-F2_Bert-VITS2-main\ (
10 | 	%CURL_CMD% -Lo lib\JVNV-F2_Bert-VITS2.zip^
11 | 		https://codeload.github.com/Zuntan03/JVNV-F2_Bert-VITS2/zip/refs/heads/main
12 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
13 | 
14 | 	%PS_CMD% Expand-Archive -Path lib\JVNV-F2_Bert-VITS2.zip -DestinationPath lib -Force
15 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
16 | 
17 | 	del lib\JVNV-F2_Bert-VITS2.zip
18 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
19 | )
20 | if not exist Bert-VITS2\Data\JVNV-F2\audios\raw\F2_anger_regular_01.wav (
21 | 	xcopy /QSY lib\JVNV-F2_Bert-VITS2-main\JVNV-F2\*.* Bert-VITS2\Data\JVNV-F2\
22 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
23 | )
24 | 
25 | popd rem %~dp0
26 | 


--------------------------------------------------------------------------------
/HiyoriUi.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0Bert-VITS2
 4 | 
 5 | call venv\Scripts\activate.bat
 6 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 7 | 
 8 | python server_fastapi.py
 9 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
10 | 
11 | popd rem %~dp0Bert-VITS2
12 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ﻿MIT License
 2 | 
 3 | Copyright (c) 2023 Zuntan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ﻿# EasyBertVits2
  2 | 
  3 | # litagin さんの [Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2) を使ったほうが幸せになれそうです。
  4 | 
  5 | I haven't tested it in an English environment, but if you have an English model it should work the same way. Maybe.<br>我没有在中文环境中测试过，但是如果你准备一个中文模型，它应该以同样的方式工作。也许吧。
  6 | 
  7 | 文章から感情豊かな音声を生成する [Bert-VITS2](https://github.com/fishaudio/Bert-VITS2) を簡単に使えます。<br>主に litagin さんの  [Bert-VITS2 (ver 2.1, 2.2) の学習方法（2023-12-01）](https://zenn.dev/litagin/articles/b1ddc1da5ea2b3)を自動化したものです。
  8 | 
  9 | サンプル音声は[こちら](https://twitter.com/Zuntan03/status/1735947687466557733)。
 10 | 
 11 | <!-- ### 2023/12/17
 12 | 
 13 | - litagin さんの [bert_vits2_okiba](https://huggingface.co/litagin/bert_vits2_okiba) から 27種のモデル (18.1GB) をダウンロードする `Download-litagin-bert_vits2_okiba.bat` を追加しました。
 14 | 	- [Install-EasyBertVits2.bat](https://github.com/Zuntan03/EasyBertVits2/releases/download/v2.1.2/Install-EasyBertVits2.bat) を再ダウンロードして、インストール先の `Install-EasyBertVits2.bat` に上書きして再実行で更新できます。 -->
 15 | 
 16 | ## インストールと音声の生成
 17 | 
 18 | 最近のNVIDIA製ビデオカード（VRAM 8GB以上）を搭載した、管理者権限のある Windows PC で動作します。 
 19 | Git は不要で、Python がなければ自動でインストールします。 
 20 | アバストなどのウィルスチェックソフトが有効だと、インストールに失敗する場合があるようです。
 21 | 
 22 | 1. [Releases](https://github.com/Zuntan03/EasyBertVits2/releases) から [Install-EasyBertVits2.bat](https://github.com/Zuntan03/EasyBertVits2/releases/download/v2.1.2/Install-EasyBertVits2.bat)（←を右クリックから保存も可） をダウンロードして、インストール先のフォルダ **（スペースを含まない英数字のみの浅いパス）** で実行します。
 23 | 	- **「WindowsによってPCが保護されました」と表示されたら、「詳細表示」から「実行」します。**
 24 | 	- ファイルの配布元を `Ctrl + Click` で確認して、問題がなければ `y` と `Enter` を入力してください。
 25 | 	- Bert-VITS2 v2.3に仮対応したものは [こちらの Install-EasyBertVits2.bat](https://github.com/Zuntan03/EasyBertVits2/raw/main/src/Install-EasyBertVits2.bat) を右クリックから `名前をつけてリンク先を保存` してください。
 26 | 2. インストールが終わると `Hiyori UI` が Web ブラウザに表示されますので、**ブラウザの翻訳機能で日本語化します**（例はChrome で右クリックから `日本語に翻訳`）。<br>![HiyoriUi](./doc/img/HiyoriUi.png)
 27 | 3. 右上の `モデルのロード` の下の欄をクリックして、[JVNV](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvnv_corpus) の学習済みモデル `G_JVNV-F2-10000.pth` を選択します。<br>![HiyoriUiModel](./doc/img/HiyoriUiModel.png)
 28 | 	- `Download-DemoModel.bat` で学習ステップ数の異なるモデルを追加でダウンロードできます。<br>複数のモデルを選択して、生成された音声を聴き比べることもできます。
 29 | 4. モデル選択欄の右側の `ZH` を **2 番めの `日本`** （選択後に `JP` に変わります）にして `モデルをロードする` をします。<br>![HiyoriUiModelJa](./doc/img/HiyoriUiModelJa.png)
 30 | 5. ロードされたモデルが画面下に表示されますので、**右上のトグルを有効にします** 。<br>![HiyoriUiModelEnable](./doc/img/HiyoriUiModelEnable.png)
 31 | 6. `テキストコンテンツを入力してください` に文章を入力して、`音声を生成する` で生成します。<br>![HiyoriUiText](./doc/img/HiyoriUiText.png)
 32 | 7. 音声は下部のモデル別に生成され、再生やダウンロードができます。複数のモデルで同時に生成して、聴き比べることもできます。<br>![HiyoriUiGen](./doc/img/HiyoriUiGen.png)
 33 | 	- 文章の内容で感情表現が大きく変化します。以下のサンプル文章で生成を試してみてください。
 34 | 		- ずっとずっと好きでした! 私とお付き合いしてください。お願いします！
 35 | 		- マッサージ、上手いねー。 あっ そうっ!  そこっ!  いい! いい～!!  あああぁぁ～～～～! 
 36 | 		- ごめんなさい…本当に許してください…ごめんなさい…もうしませんkv
 37 | 	- `感情` で `0-9` のスタイルを選べますが、動作がいまいち＆旧仕様になるので放置が良さそうです。
 38 | 8. どこかから学習済みモデルを入手したのなら、`Bert-VITS2\Data` 以下にフォルダを作成して、`G_*.pth` のモデルと `config.json` の設定ファイルを一緒に置けば同様に読み込めます。
 39 | 	<!-- - `Download-litagin-bert_vits2_okiba.bat` で [litagin さんの bert_vits2_okiba](https://huggingface.co/litagin/bert_vits2_okiba) からモデルをダウンロードできます。 -->
 40 | 
 41 | `Hiyori UI` を再度立ち上げたい場合は、`HiyoriUi.bat` を実行します。
 42 | 
 43 | ## 音声を学習してみる
 44 | 
 45 | 音声の学習には感情豊かな読み上げ音声と、その文章が必要です。<br>ここではサンプルとして感情豊かな [JVNV](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvnv_corpus) の音声と文章を学習してみます。
 46 | 
 47 | 1. `Download-LearningSample-JVNV.bat` を実行すると、`Bert-VITS2\Data` に `JNNV-F2` プロジェクトを作成し、学習用のデータをダウンロードします。
 48 | 	- `JNNV-F2` プロジェクトの `audios\raw` に、感情豊かな音声ファイルがありますので、学習用音声の参考にしてください。
 49 | 	- `JNNV-F2` プロジェクトの `filelists\esd.list` で音声の読みとファイルパスを指定していますので、テキストエディタで開いて参考にしてください。
 50 | 2. プロジェクトの `Learn.bat` で学習を開始します。<br>しばらく待つと学習結果が `models\G_*.pth` に書き出されますので、`HiyoriUi.bat` で読み込んで学習状況を確認できます。<br>ビデオカードの VRAM が 8GB の場合は、学習中の VRAM 不足を防ぐために `CPU` で読み込んでください。<br>![HiyoriUiModelCPU](./doc/img/HiyoriUiModelCpu.png)
 51 | 
 52 | ## 学習用の音声を長尺の音声ファイルから変換する
 53 | 
 54 | [litaginさんがツールを用意しています。](https://github.com/litagin02/slice-and-transcribe)
 55 | 
 56 | 1. `CreateProject.bat` で `Bert-VITS2\Data` に学習用プロジェクトを作成します。
 57 | 2. プロジェクトの `audios\raw` に音声を配置します。
 58 | 3. プロジェクトの `filelists\esd.list` に音声ファイルパスと音声の読みを入力します。
 59 | 4. プロジェクトの `Learn.bat` で学習します。
 60 | 
 61 | ## 学習用の音声を収録する
 62 | 
 63 | `Download-LearningSample-JVNV.bat` を実行していれば、読み上げ収録ソフトの[OREMO](http://nwp8861.web.fc2.com/soft/oremo/index.html)（[ダウンロード](https://twitter.com/nwp8861/status/1694845767738167719)は[こちら](https://onedrive.live.com/?id=4E56C6D911E0FAA3%21326&cid=4E56C6D911E0FAA3)）で収録するためのファイルが `lib\JVNV-F2_Bert-VITS2-main\OREMO\` にあります。
 64 | 
 65 | - `JVNV_OREMO.txt` は OREMO で収録するための音名リストです。
 66 | 	- JVNV のフレーズフリーセッションは、学習に必要なアドリブ部の読みテキストが無いため取り除いています。
 67 | - `JVNV_OREMO_rename.bat` は OREMO で収録した wav と同じフォルダに置いて実行すると、ファイル名を学習用に変更します。
 68 | - `filelists\esd.list` は `lib\JVNV-F2_Bert-VITS2-main\JVNV.txt` を置換で編集して用意します。
 69 | - 数が多い場合は、`lib\JVNV-F2_Bert-VITS2-main\transcription.pdf` の各感情が均等になるように収録するとよいかもしれません。
 70 | - 収録時の感情表現を大きくしたほうが、Bert-VITS2 の特性にあっているようです。
 71 | 
 72 | 音声ファイルを用意したあとは『[学習用の音声を長尺の音声ファイルから変換する](#学習用の音声を長尺の音声ファイルから変換する)』と同じです。
 73 | 
 74 | ## その他
 75 | 
 76 | - プロジェクトの `models\G_WebUi.bat` に、学習したモデルファイル `G_*.pth` をドラッグ＆ドロップすると、[Bert-VITS2](https://github.com/fishaudio/Bert-VITS2) の WebUI でモデルを試せます。
 77 | - プロジェクトの `models\G_Compress.bat` に、学習したモデルファイル `G_*.pth` をドラッグ＆ドロップすると、配布用の省サイズモデルに圧縮します。
 78 | 	- 音声生成時に `ERROR:utils:enc_q … is not in the checkpoint` と表示されますが、音声の生成はできるようです。
 79 | 
 80 | ## そのうち？
 81 | 
 82 | - Bert-VITS2 v2.2 対応
 83 | 	- 手元では動いてはいるが、生成の品質が高められていない。
 84 | 	- [litaginさんのStyle Textが便利](https://github.com/litagin02/Bert-VITS2-litagin)で、[待っていれば入りそう](https://github.com/fishaudio/Bert-VITS2/pull/240) で様子見。
 85 | 	- [Bert-VITS2-UI](https://github.com/jiangyuxiaoxiao/Bert-VITS2-UI) の v2.2 対応版のリリースも待ちたい。
 86 | 	- そうこうしてるうちに [v2.3](https://github.com/fishaudio/Bert-VITS2/tree/dev-2.3) が来たりするかも。
 87 | 
 88 | ## 参照
 89 | 
 90 | - [Bert-VITS2 (ver 2.1, 2.2) の学習方法（2023-12-01）](https://zenn.dev/litagin/articles/b1ddc1da5ea2b3)
 91 | - [Bert-VITS2](https://github.com/fishaudio/Bert-VITS2)
 92 | - [Bert-VITS2-UI](https://github.com/jiangyuxiaoxiao/Bert-VITS2-UI)
 93 | - [bert-vits2_base_model-2.1](https://huggingface.co/Garydesu/bert-vits2_base_model-2.1)
 94 | - [JVNV](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvnv_corpus)
 95 | 
 96 | ## ライセンス
 97 | 
 98 | このリポジトリのスクリプトやドキュメントは、[MIT License](./LICENSE.txt)です。
 99 | 
100 | This software is released under the MIT License, see [LICENSE.txt](./LICENSE.txt).
101 | 


--------------------------------------------------------------------------------
/doc/img/HiyoriUi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/img/HiyoriUi.png


--------------------------------------------------------------------------------
/doc/img/HiyoriUiGen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/img/HiyoriUiGen.png


--------------------------------------------------------------------------------
/doc/img/HiyoriUiModel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/img/HiyoriUiModel.png


--------------------------------------------------------------------------------
/doc/img/HiyoriUiModelCpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/img/HiyoriUiModelCpu.png


--------------------------------------------------------------------------------
/doc/img/HiyoriUiModelEnable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/img/HiyoriUiModelEnable.png


--------------------------------------------------------------------------------
/doc/img/HiyoriUiModelJa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/img/HiyoriUiModelJa.png


--------------------------------------------------------------------------------
/doc/img/HiyoriUiText.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/img/HiyoriUiText.png


--------------------------------------------------------------------------------
/doc/sample/v21-gomen.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/sample/v21-gomen.wav


--------------------------------------------------------------------------------
/doc/sample/v21-massa.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/sample/v21-massa.wav


--------------------------------------------------------------------------------
/doc/sample/v21-suki.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/sample/v21-suki.wav


--------------------------------------------------------------------------------
/doc/sample/v21-zetuyuru.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zuntan03/EasyBertVits2/0cf1fb084d7380b945c752b5072b5ada0b9e284d/doc/sample/v21-zetuyuru.wav


--------------------------------------------------------------------------------
/src/G_Compress.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0..\..\..\
 4 | 
 5 | if "%~1" == "" (
 6 | 	echo [ERROR] G_*000.pth のモデルファイルをドラッグ＆ドロップしてください。
 7 | 	pause & popd & exit /b 1
 8 | )
 9 | 
10 | call venv\Scripts\activate.bat
11 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
12 | 
13 | set OUTPUT=%~dpn1_c%~x1
14 | echo python compress_model.py -c %~dp0..\config.json -i %~f1 -o %OUTPUT%
15 | python compress_model.py -c %~dp0..\config.json -i %~f1 -o %OUTPUT%
16 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
17 | 
18 | popd rem %~dp0..\..\..\
19 | 
20 | 


--------------------------------------------------------------------------------
/src/G_WebUi.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0..\..\..\
 4 | 
 5 | if "%~1" == "" (
 6 | 	echo [ERROR] G_*000.pth のモデルファイルをドラッグ＆ドロップしてください。
 7 | 	pause & popd & exit /b 1
 8 | )
 9 | 
10 | call venv\Scripts\activate.bat
11 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
12 | 
13 | set PS_CMD=PowerShell -Version 5.1 -ExecutionPolicy Bypass
14 | %PS_CMD% "&{(Get-Content '%~dp0..\config.yml' -Encoding UTF8) -replace 'models/G_.*\.pth', 'models/%~n1.pth' | Set-Content '%~dp0..\config.yml' -Encoding UTF8 }"
15 | 
16 | copy /Y %~dp0..\config.yml config.yml > NUL
17 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
18 | 
19 | python webui.py
20 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
21 | 
22 | popd rem %~dp0..\..\..\
23 | 


--------------------------------------------------------------------------------
/src/HiyoriUi.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0..\..\
 4 | 
 5 | call venv\Scripts\activate.bat
 6 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 7 | 
 8 | copy /Y %~dp0config.yml config.yml > NUL
 9 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
10 | 
11 | python server_fastapi.py
12 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
13 | 
14 | popd rem %~dp0..\..\
15 | 


--------------------------------------------------------------------------------
/src/Install-EasyBertVits2.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0
 4 | set PS_CMD=PowerShell -Version 5.1 -ExecutionPolicy Bypass
 5 | 
 6 | set CURL_CMD=C:\Windows\System32\curl.exe
 7 | if not exist %CURL_CMD% (
 8 | 	echo [ERROR] %CURL_CMD% が見つかりません。
 9 | 	pause & popd & exit /b 1
10 | )
11 | 
12 | set EASY_BERT_VITS2_DIR=EasyBertVits2-main
13 | 
14 | if not exist lib\ ( mkdir lib )
15 | 
16 | %CURL_CMD% -Lo lib\EasyBertVits2.zip^
17 | 	https://github.com/Zuntan03/EasyBertVits2/archive/refs/heads/main.zip
18 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
19 | 
20 | %PS_CMD% Expand-Archive -Path lib\EasyBertVits2.zip -DestinationPath lib -Force
21 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
22 | 
23 | del lib\EasyBertVits2.zip
24 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
25 | 
26 | xcopy /QSY .\lib\%EASY_BERT_VITS2_DIR%\ .
27 | 
28 | call src\Setup.bat
29 | 
30 | start HiyoriUi.bat
31 | 
32 | popd rem %~dp0..
33 | 


--------------------------------------------------------------------------------
/src/Learn.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0..\..\
 4 | 
 5 | call venv\Scripts\activate.bat
 6 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 7 | 
 8 | copy /Y %~dp0config.yml config.yml > NUL
 9 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
10 | 
11 | if not exist %~dp0audios\wavs (
12 | 	python resample.py
13 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
14 | 
15 | 	python preprocess_text.py
16 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
17 | 
18 | 	python bert_gen.py
19 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
20 | )
21 | 
22 | python train_ms.py
23 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
24 | 
25 | popd rem %~dp0..\..\
26 | 


--------------------------------------------------------------------------------
/src/Setup-Python.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | set PS_CMD=PowerShell -Version 5.1 -ExecutionPolicy Bypass
 4 | set CURL_CMD=C:\Windows\System32\curl.exe
 5 | 
 6 | if not exist %CURL_CMD% (
 7 | 	echo [ERROR] %CURL_CMD% が見つかりません。
 8 | 	pause & exit /b 1
 9 | )
10 | 
11 | if "%1" neq "" (
12 | 	set PYTHON_DIR=%~dp0%~1
13 | ) else (
14 | 	set PYTHON_DIR=%~dp0python
15 | )
16 | set PYTHON_CMD=%PYTHON_DIR%\python.exe
17 | 
18 | if "%2" neq "" (
19 | 	set VENV_DIR=%~dp0%~2
20 | ) else (
21 | 	set VENV_DIR=%~dp0venv
22 | )
23 | 
24 | echo PS_CMD: %PS_CMD%
25 | echo CURL_CMD: %CURL_CMD%
26 | echo PYTHON_CMD: %PYTHON_CMD%
27 | echo VENV_DIR: %VENV_DIR%
28 | 
29 | if not exist %PYTHON_DIR%\ (
30 | 	echo https://www.python.org/
31 | 	echo %CURL_CMD% -o python.zip https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip
32 | 	%CURL_CMD% -o python.zip https://www.python.org/ftp/python/3.10.11/python-3.10.11-embed-amd64.zip
33 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
34 | 
35 | 	echo %PS_CMD% Expand-Archive -Path python.zip -DestinationPath %PYTHON_DIR%
36 | 	%PS_CMD% Expand-Archive -Path python.zip -DestinationPath %PYTHON_DIR%
37 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
38 | 
39 | 	echo del python.zip
40 | 	del python.zip
41 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
42 | 
43 | 	echo %PS_CMD% "&{(Get-Content '%PYTHON_DIR%/python310._pth') -creplace '#import site', 'import site' | Set-Content '%PYTHON_DIR%/python310._pth' }"
44 | 	%PS_CMD% "&{(Get-Content '%PYTHON_DIR%/python310._pth') -creplace '#import site', 'import site' | Set-Content '%PYTHON_DIR%/python310._pth' }"
45 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
46 | 
47 | 	echo https://github.com/pypa/get-pip
48 | 	echo %CURL_CMD% -o %PYTHON_DIR%\get-pip.py https://bootstrap.pypa.io/get-pip.py
49 | 	%CURL_CMD% -o %PYTHON_DIR%\get-pip.py https://bootstrap.pypa.io/get-pip.py
50 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
51 | 
52 | 	echo %PYTHON_CMD% %PYTHON_DIR%\get-pip.py --no-warn-script-location
53 | 	%PYTHON_CMD% %PYTHON_DIR%\get-pip.py --no-warn-script-location
54 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
55 | 
56 | 	echo %PYTHON_CMD% -m pip install virtualenv --no-warn-script-location
57 | 	%PYTHON_CMD% -m pip install virtualenv --no-warn-script-location
58 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
59 | )
60 | 
61 | if not exist %VENV_DIR%\ (
62 | 	echo %PYTHON_CMD% -m virtualenv --copies %VENV_DIR%
63 | 	%PYTHON_CMD% -m virtualenv --copies %VENV_DIR%
64 | 	if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
65 | )
66 | 
67 | echo call %VENV_DIR%\Scripts\activate.bat
68 | call %VENV_DIR%\Scripts\activate.bat
69 | if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
70 | 
71 | echo python -m pip install --upgrade pip
72 | python -m pip install --upgrade pip
73 | if %errorlevel% neq 0 ( pause & exit /b %errorlevel% )
74 | 


--------------------------------------------------------------------------------
/src/Setup.bat:
--------------------------------------------------------------------------------
  1 | @echo off
  2 | chcp 65001 > NUL
  3 | pushd %~dp0..
  4 | set PS_CMD=PowerShell -Version 5.1 -ExecutionPolicy Bypass
  5 | 
  6 | @REM 2023-12-22
  7 | set BERT_VITS2_REV=3090f5837d943b758dc07ff900c35734fc0b793a
  8 | 
  9 | set CURL_CMD=C:\Windows\System32\curl.exe
 10 | if not exist %CURL_CMD% (
 11 | 	echo [ERROR] %CURL_CMD% が見つかりません。
 12 | 	pause & popd & exit /b 1
 13 | )
 14 | 
 15 | echo 以下の配布元から関連ファイルをダウンロードして使用します（URL を Ctrl + クリックで開けます）。
 16 | echo https://www.python.org/
 17 | echo https://github.com/pypa/get-pip
 18 | echo https://github.com/fishaudio/Bert-VITS2/
 19 | echo https://github.com/jiangyuxiaoxiao/Bert-VITS2-UI
 20 | echo https://huggingface.co/OedoSoldier/Bert-VITS2-2.3
 21 | echo https://huggingface.co/audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim/
 22 | echo https://huggingface.co/laion/clap-htsat-fused/
 23 | echo https://huggingface.co/microsoft/wavlm-base-plus/
 24 | echo https://sites.google.com/site/shinnosuketakamichi/research-topics/jvnv_corpus
 25 | echo https://github.com/Zuntan03/JVNV-F2_Bert-VITS2
 26 | echo https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1
 27 | echo よろしいですか？ [y/n]
 28 | set /p YES_OR_NO=
 29 | if /i not "%YES_OR_NO%" == "y" ( popd & exit /b 1 )
 30 | 
 31 | if not exist lib\ ( mkdir lib )
 32 | 
 33 | if not exist lib\Bert-VITS2-%BERT_VITS2_REV%\ (
 34 | 	%CURL_CMD% -Lo Bert-VITS2.zip https://github.com/fishaudio/Bert-VITS2/archive/%BERT_VITS2_REV%.zip
 35 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 36 | 
 37 | 	%PS_CMD% Expand-Archive -Path Bert-VITS2.zip -DestinationPath lib -Force
 38 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 39 | 
 40 | 	del Bert-VITS2.zip
 41 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 42 | 
 43 | 	xcopy /QSY lib\Bert-VITS2-%BERT_VITS2_REV%\*.* Bert-VITS2\
 44 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 45 | )
 46 | 
 47 | call %~dp0Setup-Python.bat ..\lib\python ..\Bert-VITS2\venv
 48 | if %errorlevel% neq 0 ( popd & exit /b %errorlevel% )
 49 | 
 50 | pip install torch==2.1.1+cu121 torchvision==0.16.1+cu121 torchaudio==2.1.1+cu121^
 51 | 	--index-url https://download.pytorch.org/whl/cu121
 52 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 53 | 
 54 | @REM pip install psutil==5.9.7
 55 | @REM if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 56 | 
 57 | pip install -r Bert-VITS2\requirements.txt
 58 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 59 | 
 60 | @REM pip install tensorflow
 61 | @REM if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 62 | 
 63 | @REM 2023-12-17
 64 | if not exist lib\HiyoriUI-0.7.0\ (
 65 | 	%CURL_CMD% -Lo lib\HiyoriUI.zip^
 66 | 		https://github.com/jiangyuxiaoxiao/Bert-VITS2-UI/releases/download/0.7.0/HiyoriUI-0.7.0.zip
 67 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 68 | 
 69 | 	%PS_CMD% Expand-Archive -Path lib\HiyoriUI.zip -DestinationPath lib -Force
 70 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 71 | 
 72 | 	del lib\HiyoriUI.zip
 73 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 74 | 
 75 | 	xcopy /QSY lib\HiyoriUI-0.7.0\Web\*.* Bert-VITS2\Web\
 76 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 77 | )
 78 | 
 79 | if not exist lib\Bert-VITS2-2.3\ ( mkdir lib\Bert-VITS2-2.3 )
 80 | if not exist lib\Bert-VITS2-2.3\DUR_0.pth (
 81 | 	%CURL_CMD% -Lo lib\Bert-VITS2-2.3\DUR_0.pth^
 82 | 		https://huggingface.co/OedoSoldier/Bert-VITS2-2.3/resolve/main/DUR_0.pth
 83 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 84 | )
 85 | if not exist lib\Bert-VITS2-2.3\D_0.pth (
 86 | 	%CURL_CMD% -Lo lib\Bert-VITS2-2.3\D_0.pth^
 87 | 		https://huggingface.co/OedoSoldier/Bert-VITS2-2.3/resolve/main/D_0.pth
 88 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 89 | )
 90 | if not exist lib\Bert-VITS2-2.3\G_0.pth (
 91 | 	%CURL_CMD% -Lo lib\Bert-VITS2-2.3\G_0.pth^
 92 | 		https://huggingface.co/OedoSoldier/Bert-VITS2-2.3/resolve/main/G_0.pth
 93 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 94 | )
 95 | if not exist lib\Bert-VITS2-2.3\WD_0.pth (
 96 | 	%CURL_CMD% -Lo lib\Bert-VITS2-2.3\WD_0.pth^
 97 | 		https://huggingface.co/OedoSoldier/Bert-VITS2-2.3/resolve/main/WD_0.pth
 98 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 99 | )
100 | 
101 | if not exist Bert-VITS2\emotional\wav2vec2-large-robust-12-ft-emotion-msp-dim\pytorch_model.bin (
102 | 	%CURL_CMD% -Lo Bert-VITS2\emotional\wav2vec2-large-robust-12-ft-emotion-msp-dim\pytorch_model.bin^
103 | 		https://huggingface.co/audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim/resolve/main/pytorch_model.bin
104 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
105 | )
106 | 
107 | if not exist Bert-VITS2\emotional\clap-htsat-fused\pytorch_model.bin (
108 | 	curl -Lo Bert-VITS2\emotional\clap-htsat-fused\pytorch_model.bin^
109 | 		https://huggingface.co/laion/clap-htsat-fused/resolve/main/pytorch_model.bin
110 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
111 | )
112 | 
113 | if not exist Bert-VITS2\slm\wavlm-base-plus\pytorch_model.bin (
114 | 	%CURL_CMD% -Lo Bert-VITS2\slm\wavlm-base-plus\pytorch_model.bin^
115 | 		https://huggingface.co/microsoft/wavlm-base-plus/resolve/main/pytorch_model.bin
116 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
117 | )
118 | 
119 | if not exist Bert-VITS2\config.yml (
120 | 	copy Bert-VITS2\default_config.yml Bert-VITS2\config.yml
121 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
122 | )
123 | 
124 | if not exist Bert-VITS2\Data\Demo-JVNV_v2.1\ ( mkdir Bert-VITS2\Data\Demo-JVNV_v2.1 )
125 | if not exist Bert-VITS2\Data\Demo-JVNV_v2.1\config.json (
126 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV_v2.1\config.json^
127 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/config.json
128 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
129 | )
130 | if not exist Bert-VITS2\Data\Demo-JVNV_v2.1\G_JVNV-F2-10000.pth (
131 | 	%CURL_CMD% -Lo Bert-VITS2\Data\Demo-JVNV_v2.1\G_JVNV-F2-10000.pth^
132 | 		https://huggingface.co/Zuntan/JVNV-F2_Bert-VITS2_v2.1/resolve/main/G_10000.pth
133 | 	if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
134 | )
135 | 
136 | popd rem %~dp0..
137 | 


--------------------------------------------------------------------------------
/src/Tensorboard.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 > NUL
 3 | pushd %~dp0..\..\
 4 | 
 5 | call venv\Scripts\activate.bat
 6 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
 7 | 
 8 | tensorboard --logdir Data/{SPEAKER_NAME}/models
 9 | if %errorlevel% neq 0 ( pause & popd & exit /b %errorlevel% )
10 | 
11 | popd rem %~dp0..\..\
12 | 


--------------------------------------------------------------------------------
/src/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "train": {
  3 |     "log_interval": 200,
  4 |     "eval_interval": 2000,
  5 |     "seed": 42,
  6 |     "epochs": 200,
  7 |     "learning_rate": 0.0002,
  8 |     "betas": [
  9 |       0.8,
 10 |       0.99
 11 |     ],
 12 |     "eps": 1e-09,
 13 |     "batch_size": 3,
 14 |     "bf16_run": true,
 15 |     "lr_decay": 0.99995,
 16 |     "segment_size": 16384,
 17 |     "init_lr_ratio": 1,
 18 |     "warmup_epochs": 0,
 19 |     "c_mel": 45,
 20 |     "c_kl": 1.0,
 21 |     "c_commit": 100,
 22 |     "skip_optimizer": true,
 23 |     "freeze_ZH_bert": false,
 24 |     "freeze_JP_bert": false,
 25 |     "freeze_EN_bert": false,
 26 |     "freeze_emo": false
 27 |   },
 28 |   "data": {
 29 |     "training_files": "filelists/train.list",
 30 |     "validation_files": "filelists/val.list",
 31 |     "max_wav_value": 32768.0,
 32 |     "sampling_rate": 44100,
 33 |     "filter_length": 2048,
 34 |     "hop_length": 512,
 35 |     "win_length": 2048,
 36 |     "n_mel_channels": 128,
 37 |     "mel_fmin": 0.0,
 38 |     "mel_fmax": null,
 39 |     "add_blank": true,
 40 |     "n_speakers": 850,
 41 |     "cleaned_text": true,
 42 |     "spk2id": {}
 43 |   },
 44 |   "model": {
 45 |     "use_spk_conditioned_encoder": true,
 46 |     "use_noise_scaled_mas": true,
 47 |     "use_mel_posterior_encoder": false,
 48 |     "use_duration_discriminator": true,
 49 |     "inter_channels": 192,
 50 |     "hidden_channels": 192,
 51 |     "filter_channels": 768,
 52 |     "n_heads": 2,
 53 |     "n_layers": 6,
 54 |     "kernel_size": 3,
 55 |     "p_dropout": 0.1,
 56 |     "resblock": "1",
 57 |     "resblock_kernel_sizes": [
 58 |       3,
 59 |       7,
 60 |       11
 61 |     ],
 62 |     "resblock_dilation_sizes": [
 63 |       [
 64 |         1,
 65 |         3,
 66 |         5
 67 |       ],
 68 |       [
 69 |         1,
 70 |         3,
 71 |         5
 72 |       ],
 73 |       [
 74 |         1,
 75 |         3,
 76 |         5
 77 |       ]
 78 |     ],
 79 |     "upsample_rates": [
 80 |       8,
 81 |       8,
 82 |       2,
 83 |       2,
 84 |       2
 85 |     ],
 86 |     "upsample_initial_channel": 512,
 87 |     "upsample_kernel_sizes": [
 88 |       16,
 89 |       16,
 90 |       8,
 91 |       2,
 92 |       2
 93 |     ],
 94 |     "n_layers_q": 3,
 95 |     "use_spectral_norm": false,
 96 |     "gin_channels": 512,
 97 |     "slm": {
 98 |       "model": "./slm/wavlm-base-plus",
 99 |       "sr": 16000,
100 |       "hidden": 768,
101 |       "nlayers": 13,
102 |       "initial_channel": 64
103 |     }
104 |   },
105 |   "version": "2.3"
106 | }


--------------------------------------------------------------------------------
/src/config.yml:
--------------------------------------------------------------------------------
  1 | ﻿# グローバル設定
  2 | # 2 つの GPU で 2 つのトレーニング セットを同時に実行するなど、複数の設定ファイルを同時に使用する場合: 環境変数を使用して設定ファイルを指定します。指定しない場合、デフォルトは ./config.yml です。
  3 | 
  4 | # 共通のパス設定を提供し、データが無秩序に配置されるのを避けるためにデータを均一に保存します。
  5 | # 各データセットとそれに対応するモデルは統一されたパスの下に保存され、後続のすべてのパス設定は datasetPath に対する相対パスになります。
  6 | # 空白のままにした場合、パスはプロジェクトのルート ディレクトリを基準にした相対パスになります。
  7 | dataset_path: "Data/{SPEAKER_NAME}/"
  8 | 
  9 | # モデルミラーソース、デフォルトはHuggingFace、openi ミラーソースを使用するには、openi_token を指定する必要があります
 10 | mirror: ""
 11 | openi_token: ""  # openi トークン
 12 | 
 13 | # resample オーディオのリサンプリング設定
 14 | # 注意「:」の後にはスペースが必要であることに注意してください。
 15 | resample:
 16 |   # 目標リサンプリングレート
 17 |   sampling_rate: 44100
 18 |   # オーディオ ファイル入力パス。リサンプリングすると、このパスにあるすべての .wav オーディオ ファイルがリサンプリングされます。
 19 |   # datasetPath からの相対パスを入力してください
 20 |   in_dir: "audios/raw" # ルートディレクトリからの相対パスは /datasetPath/in_dir
 21 |   # リサンプリング後の音声ファイルの出力パス
 22 |   out_dir: "audios/wavs"
 23 | 
 24 | 
 25 | # preprocess_text データセットの前処理関連の設定
 26 | # 注意「:」の後にはスペースが必要であることに注意してください。
 27 | preprocess_text:
 28 |   # 元のテキスト ファイルのパス。テキスト形式は次のとおりです。{wav_path}|{speaker_name}|{language}|{text}。
 29 |   transcription_path: "filelists/esd.list"
 30 |   # データクリーニング後のテキストパスは入力する必要はありません。 空白のままにすると、元のテキスト ディレクトリに生成されます。
 31 |   cleaned_path: ""
 32 |   # トレーニングセットのパス
 33 |   train_path: "filelists/train.list"
 34 |   # 検証セットのパス
 35 |   val_path: "filelists/val.list"
 36 |   # 設定ファイルのパス
 37 |   config_path: "config.json"
 38 |   # 各言語の検証セットエントリの数
 39 |   val_per_lang: 4
 40 |   # 検証セット内のエントリの最大数。それ以上のエントリがあるものは切り捨てられ、トレーニング セットに配置されます。
 41 |   max_val_total: 12
 42 |   # データクリーニングを行うかどうか
 43 |   clean: true
 44 | 
 45 | 
 46 | # bert_gen 関連の設定
 47 | # 注意「:」の後にはスペースが必要であることに注意してください。
 48 | bert_gen:
 49 |   # トレーニング データセット設定ファイルのパス
 50 |   config_path: "config.json"
 51 |   # 並列数
 52 |   num_processes: 4
 53 |   # 使用デバイス: オプションの "cuda" グラフィックス カード推論、"cpu" CPU 推論
 54 |   # このオプションは、get_bert_feature のデフォルトのデバイスも決定します
 55 |   device: "cuda"
 56 |   # 複数デバイス推論を使用する
 57 |   use_multi_device: false
 58 | 
 59 | # emo_gen 関連の設定
 60 | # 注意「:」の後にはスペースが必要であることに注意してください。
 61 | emo_gen:
 62 |   # トレーニング データセット設定ファイルのパス
 63 |   config_path: "config.json"
 64 |   # 並列数
 65 |   num_processes: 4
 66 |   # 使用デバイス: オプションの "cuda" グラフィックス カード推論、"cpu" CPU 推論
 67 |   device: "cuda"
 68 |   # 複数デバイス推論を使用する
 69 |   use_multi_device: false
 70 | 
 71 | # train 関連の設定
 72 | # 注意「:」の後にはスペースが必要であることに注意してください。
 73 | train_ms:
 74 |   env:
 75 |     MASTER_ADDR: "127.0.0.1"
 76 |     MASTER_PORT: 10086
 77 |     WORLD_SIZE: 1
 78 |     LOCAL_RANK: 0
 79 |     RANK: 0
 80 |     # 環境変数には任意の名前を入力できます
 81 |     # THE_ENV_VAR_YOU_NEED_TO_USE: "1234567"
 82 |   # ベース設定
 83 |   base:
 84 |     use_base_model: false
 85 |     repo_id: "Stardust_minus/Bert-VITS2"
 86 |     model_image: "Bert-VITS2_2.3底模" # openi Webページのモデル名
 87 |   # トレーニング モデルの保存ディレクトリ: 旧バージョンとの違いは、元のデータ セットは logs/model_name に保存されていましたが、現在は Data/your data set/models に統一されて保存されています。
 88 |   model: "models"
 89 |   # 設定ファイルのパス
 90 |   config_path: "config.json"
 91 |   # トレーニングに使用する worker の数、CPU コアの数を超えることは推奨されません
 92 |   num_workers: 16
 93 |   # これをオフにすると、ディスク容量を 50% 近く節約できますが、実際のトレーニング速度が遅くなり、CPU 使用率が高くなる可能性があります。
 94 |   spec_cache: True
 95 |   # 保存されたチェックポイントの数。スペースを節約するために、この数を超えるウェイトは削除されます。
 96 |   keep_ckpts: 0
 97 | 
 98 | 
 99 | # webui webui 設定
100 | # 注意「:」の後にはスペースが必要であることに注意してください。
101 | webui:
102 |   # 推論デバイス
103 |   device: "cuda"
104 |   # モデルパス
105 |   model: "models/G_2000.pth"
106 |   # 設定ファイルのパス
107 |   config_path: "config.json"
108 |   # ポート番号
109 |   port: 7860
110 |   # 外部ネットへの公開の有無
111 |   share: false
112 |   # デバッグモードを有効にするかどうか
113 |   debug: false
114 |   # 言語認識ライブラリ、langid か fastlid を選択
115 |   language_identification_library: "langid"
116 | 
117 | 
118 | # server-fastapi 設定
119 | # 注意「:」の後にはスペースが必要であることに注意してください。
120 | # この構成の下のすべての構成は、ルート ディレクトリに対する相対パスです。
121 | server:
122 |   # ポート番号
123 |   port: 5000
124 |   # モデルはデフォルトでデバイスを使用しますが、この構成は現在実装されていません。
125 |   device: "cuda"
126 |   # ロードする必要があるすべてのモデルの構成については、複数のモデルを入力することも、モデルを入力しないこともできます。Web ページが成功した後にモデルを手動でロードできます。
127 |   # モデルを読み込まない設定フォーマット、models に [ ] （空のリスト）を割り当てる。speakers が [] の２番めのモデルを参照してください。
128 |   # すべてのモデルにはモデルへのパスと構成が正しく設定されている必要があることに注意してください。空のパスは読み込みエラーを引き起こします。
129 |   # モデルを空白のままにし、Web ページが正常に読み込まれた後でモデルを手動で入力することもできます。
130 |   models:
131 |     - # モデルパス
132 |       model: ""
133 |       # モデル config.json へのパス
134 |       config: ""
135 |       # モデルが使用するデバイス。入力するとデフォルトの設定が上書きされます。
136 |       device: "cuda"
137 |       # モデルで使用されるデフォルトの言語
138 |       language: "JP"
139 |       # モデルキャラクターのデフォルトパラメータ
140 |       # すべての文字を入力する必要はありません。入力されていない場合はデフォルト値を使用します。
141 |       # 現時点では記入する必要はありませんが、人による区別設定はまだ実装されていません。
142 |       speakers:
143 |         - speaker: "科比"
144 |           sdp_ratio: 0.2
145 |           noise_scale: 0.6
146 |           noise_scale_w: 0.8
147 |           length_scale: 1
148 |     - # モデルパス
149 |       model: ""
150 |       # モデル config.json へのパス
151 |       config: ""
152 |       # モデルが使用するデバイス。入力するとデフォルトの設定が上書きされます。
153 |       device: "cuda"
154 |       # モデルで使用されるデフォルトの言語
155 |       language: "JP"
156 |       # モデルキャラクターのデフォルトパラメータ
157 |       # すべての文字を入力する必要はありません。入力されていない場合はデフォルト値を使用します。
158 |       speakers: [ ] # 空白のままにすることもできます
159 | 
160 | 
161 | # Baidu 翻訳オープン プラットフォーム API の構成
162 | # APIアクセスドキュメント https://api.fanyi.baidu.com/doc/21
163 | # アプリの ID やキーを Github や他の Web サイトで公開しないでください。
164 | # 注意「:」の後にはスペースが必要であることに注意してください。
165 | translate:
166 |   # あなたのAPPID
167 |   "app_key": ""
168 |   # 你あなたのキー
169 |   "secret_key": ""
170 | 


--------------------------------------------------------------------------------