├── .gitignore ├── LICENSE ├── README.md ├── configs ├── speechlm │ └── hubert.yaml └── unit2speech │ └── mhubert-expresso-2000.yaml ├── data └── .gitignore ├── docs ├── README.md ├── audio │ ├── 121_121726_000004_000003.wav │ ├── 121_121726_000004_000003_1.wav │ ├── 121_121726_000004_000003_2.wav │ ├── 237_126133_000002_000003.wav │ ├── 237_126133_000002_000003_1.wav │ ├── 237_126133_000002_000003_2.wav │ ├── 260_123288_000003_000001.wav │ ├── 260_123288_000003_000001_1.wav │ ├── 260_123288_000003_000001_2.wav │ ├── 260_123288_000003_000001_dac.wav │ ├── 672_122797_000002_000002.wav │ ├── 672_122797_000002_000002_1.wav │ ├── 672_122797_000002_000002_2.wav │ └── 672_122797_000002_000002_dac.wav └── index.html ├── main_resynth.py ├── main_speechlm.py ├── models └── .gitignore ├── requirements.txt ├── scripts ├── download_hi-fi-captain.sh ├── download_librilight.sh ├── download_librispeech.sh ├── download_libritts.sh ├── download_slm21.sh ├── run_speechlm.bash └── setup.sh └── src ├── bigvgan ├── LICENSE ├── __init__.py ├── activations.py ├── alias_free_activation │ ├── cuda │ │ ├── __init__.py │ │ ├── activation1d.py │ │ ├── anti_alias_activation.cpp │ │ ├── anti_alias_activation_cuda.cu │ │ ├── compat.h │ │ ├── load.py │ │ └── type_shim.h │ └── torch │ │ ├── __init__.py │ │ ├── act.py │ │ ├── filter.py │ │ └── resample.py ├── bigvgan.py ├── data.py ├── discriminators.py ├── incl_licenses │ ├── LICENSE_1 │ ├── LICENSE_2 │ ├── LICENSE_3 │ ├── LICENSE_4 │ ├── LICENSE_5 │ ├── LICENSE_6 │ ├── LICENSE_7 │ └── LICENSE_8 ├── loss.py ├── train.py └── utils.py ├── flow_matching ├── __init__.py ├── configs.py ├── data.py ├── models.py ├── modules │ ├── LICENSE │ ├── __init__.py │ ├── alibi.py │ ├── fastspeech.py │ ├── norm.py │ ├── time_embed.py │ └── transformer.py ├── preprocess.py ├── synthesize.py ├── train.py └── utils │ ├── __init__.py │ ├── misc.py │ └── textless.py ├── patch └── utmos_lightning_module.patch └── speechlm ├── __init__.py ├── data.py ├── eval.py ├── tokenize.py ├── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/README.md -------------------------------------------------------------------------------- /configs/speechlm/hubert.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/configs/speechlm/hubert.yaml -------------------------------------------------------------------------------- /configs/unit2speech/mhubert-expresso-2000.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/configs/unit2speech/mhubert-expresso-2000.yaml -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/audio/121_121726_000004_000003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/121_121726_000004_000003.wav -------------------------------------------------------------------------------- /docs/audio/121_121726_000004_000003_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/121_121726_000004_000003_1.wav -------------------------------------------------------------------------------- /docs/audio/121_121726_000004_000003_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/121_121726_000004_000003_2.wav -------------------------------------------------------------------------------- /docs/audio/237_126133_000002_000003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/237_126133_000002_000003.wav -------------------------------------------------------------------------------- /docs/audio/237_126133_000002_000003_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/237_126133_000002_000003_1.wav -------------------------------------------------------------------------------- /docs/audio/237_126133_000002_000003_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/237_126133_000002_000003_2.wav -------------------------------------------------------------------------------- /docs/audio/260_123288_000003_000001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/260_123288_000003_000001.wav -------------------------------------------------------------------------------- /docs/audio/260_123288_000003_000001_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/260_123288_000003_000001_1.wav -------------------------------------------------------------------------------- /docs/audio/260_123288_000003_000001_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/260_123288_000003_000001_2.wav -------------------------------------------------------------------------------- /docs/audio/260_123288_000003_000001_dac.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/260_123288_000003_000001_dac.wav -------------------------------------------------------------------------------- /docs/audio/672_122797_000002_000002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/672_122797_000002_000002.wav -------------------------------------------------------------------------------- /docs/audio/672_122797_000002_000002_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/672_122797_000002_000002_1.wav -------------------------------------------------------------------------------- /docs/audio/672_122797_000002_000002_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/672_122797_000002_000002_2.wav -------------------------------------------------------------------------------- /docs/audio/672_122797_000002_000002_dac.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/audio/672_122797_000002_000002_dac.wav -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/docs/index.html -------------------------------------------------------------------------------- /main_resynth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/main_resynth.py -------------------------------------------------------------------------------- /main_speechlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/main_speechlm.py -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/download_hi-fi-captain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/scripts/download_hi-fi-captain.sh -------------------------------------------------------------------------------- /scripts/download_librilight.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/scripts/download_librilight.sh -------------------------------------------------------------------------------- /scripts/download_librispeech.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/scripts/download_librispeech.sh -------------------------------------------------------------------------------- /scripts/download_libritts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/scripts/download_libritts.sh -------------------------------------------------------------------------------- /scripts/download_slm21.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/scripts/download_slm21.sh -------------------------------------------------------------------------------- /scripts/run_speechlm.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/scripts/run_speechlm.bash -------------------------------------------------------------------------------- /scripts/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/scripts/setup.sh -------------------------------------------------------------------------------- /src/bigvgan/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/LICENSE -------------------------------------------------------------------------------- /src/bigvgan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/bigvgan/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/activations.py -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/cuda/activation1d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/cuda/activation1d.py -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/cuda/anti_alias_activation_cuda.cu -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/cuda/compat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/cuda/compat.h -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/cuda/load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/cuda/load.py -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/cuda/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/cuda/type_shim.h -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/torch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/torch/act.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/torch/act.py -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/torch/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/torch/filter.py -------------------------------------------------------------------------------- /src/bigvgan/alias_free_activation/torch/resample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/alias_free_activation/torch/resample.py -------------------------------------------------------------------------------- /src/bigvgan/bigvgan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/bigvgan.py -------------------------------------------------------------------------------- /src/bigvgan/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/data.py -------------------------------------------------------------------------------- /src/bigvgan/discriminators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/discriminators.py -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_1 -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_2 -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_3 -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_4 -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_5 -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_6 -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_7 -------------------------------------------------------------------------------- /src/bigvgan/incl_licenses/LICENSE_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/incl_licenses/LICENSE_8 -------------------------------------------------------------------------------- /src/bigvgan/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/loss.py -------------------------------------------------------------------------------- /src/bigvgan/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/train.py -------------------------------------------------------------------------------- /src/bigvgan/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/bigvgan/utils.py -------------------------------------------------------------------------------- /src/flow_matching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/flow_matching/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/configs.py -------------------------------------------------------------------------------- /src/flow_matching/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/data.py -------------------------------------------------------------------------------- /src/flow_matching/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/models.py -------------------------------------------------------------------------------- /src/flow_matching/modules/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/modules/LICENSE -------------------------------------------------------------------------------- /src/flow_matching/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/flow_matching/modules/alibi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/modules/alibi.py -------------------------------------------------------------------------------- /src/flow_matching/modules/fastspeech.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/modules/fastspeech.py -------------------------------------------------------------------------------- /src/flow_matching/modules/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/modules/norm.py -------------------------------------------------------------------------------- /src/flow_matching/modules/time_embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/modules/time_embed.py -------------------------------------------------------------------------------- /src/flow_matching/modules/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/modules/transformer.py -------------------------------------------------------------------------------- /src/flow_matching/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/preprocess.py -------------------------------------------------------------------------------- /src/flow_matching/synthesize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/synthesize.py -------------------------------------------------------------------------------- /src/flow_matching/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/train.py -------------------------------------------------------------------------------- /src/flow_matching/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/flow_matching/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/utils/misc.py -------------------------------------------------------------------------------- /src/flow_matching/utils/textless.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/flow_matching/utils/textless.py -------------------------------------------------------------------------------- /src/patch/utmos_lightning_module.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/patch/utmos_lightning_module.patch -------------------------------------------------------------------------------- /src/speechlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/speechlm/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/speechlm/data.py -------------------------------------------------------------------------------- /src/speechlm/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/speechlm/eval.py -------------------------------------------------------------------------------- /src/speechlm/tokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/speechlm/tokenize.py -------------------------------------------------------------------------------- /src/speechlm/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/speechlm/train.py -------------------------------------------------------------------------------- /src/speechlm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ryota-komatsu/speech_resynth/HEAD/src/speechlm/utils.py --------------------------------------------------------------------------------