The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .gitattributes
├── .gitignore
├── .gitmodules
├── BasicDict.json
├── Bert
    ├── bert-base-japanese-v3
    │   └── Tokenizer.json
    ├── chinese-roberta-wwm-ext-large
    │   └── Tokenizer.json
    └── deberta-v2-large-japanese
    │   └── Tokenizer.json
├── BertVits.md
├── CMakeLists.txt
├── CMakePresets.json
├── CSharpDemo
    ├── CSharpDemo.csproj
    ├── Program.cs
    ├── Properties
    │   ├── Resources.Designer.cs
    │   ├── Resources.resx
    │   └── launchSettings.json
    ├── README.md
    └── README_en.md
├── DotNetApi
    ├── DotNetApi.csproj
    ├── LibSvcApi.cs
    └── Properties
    │   └── launchSettings.json
├── DragonianSpeech.sln
├── LICENSE
├── Lib
    ├── MJson
    │   ├── MJson.h
    │   ├── yyjson.c
    │   └── yyjson.h
    ├── OnnxRuntimeDmlProvider
    │   ├── .signature.p7s
    │   ├── LICENSE.txt
    │   ├── Microsoft.ML.OnnxRuntime.DirectML.nuspec
    │   ├── ORT_icon_for_light_bg.png
    │   ├── Privacy.md
    │   ├── ThirdPartyNotices.txt
    │   ├── [Content_Types].xml
    │   ├── _rels
    │   │   └── .rels
    │   ├── build
    │   │   ├── native
    │   │   │   ├── Microsoft.ML.OnnxRuntime.DirectML.props
    │   │   │   ├── Microsoft.ML.OnnxRuntime.DirectML.targets
    │   │   │   └── include
    │   │   │   │   ├── cpu_provider_factory.h
    │   │   │   │   ├── dml_provider_factory.h
    │   │   │   │   ├── onnxruntime_c_api.h
    │   │   │   │   ├── onnxruntime_cxx_api.h
    │   │   │   │   ├── onnxruntime_cxx_inline.h
    │   │   │   │   ├── onnxruntime_run_options_config_keys.h
    │   │   │   │   ├── onnxruntime_session_options_config_keys.h
    │   │   │   │   └── provider_options.h
    │   │   ├── netstandard1.1
    │   │   │   ├── Microsoft.ML.OnnxRuntime.DirectML.props
    │   │   │   └── Microsoft.ML.OnnxRuntime.DirectML.targets
    │   │   └── netstandard2.0
    │   │   │   ├── Microsoft.ML.OnnxRuntime.DirectML.props
    │   │   │   └── Microsoft.ML.OnnxRuntime.DirectML.targets
    │   ├── package
    │   │   └── services
    │   │   │   └── metadata
    │   │   │       └── core-properties
    │   │   │           └── c7795757db2346b9bcfb932f99cdb33f.psmdcp
    │   └── runtimes
    │   │   └── win-x64
    │   │       └── native
    │   │           └── onnxruntime.lib
    ├── World
    │   ├── LICENSE.txt
    │   ├── src
    │   │   ├── cheaptrick.cpp
    │   │   ├── codec.cpp
    │   │   ├── common.cpp
    │   │   ├── d4c.cpp
    │   │   ├── dio.cpp
    │   │   ├── fft.cpp
    │   │   ├── harvest.cpp
    │   │   ├── matlabfunctions.cpp
    │   │   ├── stonemask.cpp
    │   │   ├── synthesis.cpp
    │   │   ├── synthesisrealtime.cpp
    │   │   └── world
    │   │   │   ├── cheaptrick.h
    │   │   │   ├── codec.h
    │   │   │   ├── common.h
    │   │   │   ├── constantnumbers.h
    │   │   │   ├── d4c.h
    │   │   │   ├── dio.h
    │   │   │   ├── fft.h
    │   │   │   ├── harvest.h
    │   │   │   ├── macrodefinitions.h
    │   │   │   ├── matlabfunctions.h
    │   │   │   ├── stonemask.h
    │   │   │   ├── synthesis.h
    │   │   │   └── synthesisrealtime.h
    │   └── tools
    │   │   ├── audioio.cpp
    │   │   ├── audioio.h
    │   │   ├── parameterio.cpp
    │   │   └── parameterio.h
    └── ffmpeg-4.2.1
    │   ├── COPYING.GPLv3
    │   ├── COPYING.LGPLv3
    │   ├── Lib
    │       ├── avcodec.lib
    │       ├── avformat.lib
    │       ├── avutil.lib
    │       ├── swresample.lib
    │       └── swscale.lib
    │   └── include
    │       ├── libavcodec
    │           ├── ac3_parser.h
    │           ├── adts_parser.h
    │           ├── avcodec.h
    │           ├── avdct.h
    │           ├── avfft.h
    │           ├── d3d11va.h
    │           ├── dirac.h
    │           ├── dv_profile.h
    │           ├── dxva2.h
    │           ├── jni.h
    │           ├── mediacodec.h
    │           ├── qsv.h
    │           ├── vaapi.h
    │           ├── vdpau.h
    │           ├── version.h
    │           ├── videotoolbox.h
    │           ├── vorbis_parser.h
    │           └── xvmc.h
    │       ├── libavdevice
    │           ├── avdevice.h
    │           └── version.h
    │       ├── libavfilter
    │           ├── avfilter.h
    │           ├── buffersink.h
    │           ├── buffersrc.h
    │           └── version.h
    │       ├── libavformat
    │           ├── avformat.h
    │           ├── avio.h
    │           └── version.h
    │       ├── libavutil
    │           ├── adler32.h
    │           ├── aes.h
    │           ├── aes_ctr.h
    │           ├── attributes.h
    │           ├── audio_fifo.h
    │           ├── avassert.h
    │           ├── avconfig.h
    │           ├── avstring.h
    │           ├── avutil.h
    │           ├── base64.h
    │           ├── blowfish.h
    │           ├── bprint.h
    │           ├── bswap.h
    │           ├── buffer.h
    │           ├── camellia.h
    │           ├── cast5.h
    │           ├── channel_layout.h
    │           ├── common.h
    │           ├── cpu.h
    │           ├── crc.h
    │           ├── des.h
    │           ├── dict.h
    │           ├── display.h
    │           ├── downmix_info.h
    │           ├── encryption_info.h
    │           ├── error.h
    │           ├── eval.h
    │           ├── ffversion.h
    │           ├── fifo.h
    │           ├── file.h
    │           ├── frame.h
    │           ├── hash.h
    │           ├── hdr_dynamic_metadata.h
    │           ├── hmac.h
    │           ├── hwcontext.h
    │           ├── hwcontext_cuda.h
    │           ├── hwcontext_d3d11va.h
    │           ├── hwcontext_drm.h
    │           ├── hwcontext_dxva2.h
    │           ├── hwcontext_mediacodec.h
    │           ├── hwcontext_qsv.h
    │           ├── hwcontext_vaapi.h
    │           ├── hwcontext_vdpau.h
    │           ├── hwcontext_videotoolbox.h
    │           ├── imgutils.h
    │           ├── intfloat.h
    │           ├── intreadwrite.h
    │           ├── lfg.h
    │           ├── log.h
    │           ├── lzo.h
    │           ├── macros.h
    │           ├── mastering_display_metadata.h
    │           ├── mathematics.h
    │           ├── md5.h
    │           ├── mem.h
    │           ├── motion_vector.h
    │           ├── murmur3.h
    │           ├── opt.h
    │           ├── parseutils.h
    │           ├── pixdesc.h
    │           ├── pixelutils.h
    │           ├── pixfmt.h
    │           ├── random_seed.h
    │           ├── rational.h
    │           ├── rc4.h
    │           ├── replaygain.h
    │           ├── ripemd.h
    │           ├── samplefmt.h
    │           ├── sha.h
    │           ├── sha512.h
    │           ├── spherical.h
    │           ├── stereo3d.h
    │           ├── tea.h
    │           ├── threadmessage.h
    │           ├── time.h
    │           ├── timecode.h
    │           ├── timestamp.h
    │           ├── tree.h
    │           ├── twofish.h
    │           ├── tx.h
    │           ├── version.h
    │           └── xtea.h
    │       ├── libswresample
    │           ├── swresample.h
    │           └── version.h
    │       └── libswscale
    │           ├── swscale.h
    │           └── version.h
├── README.md
├── README_en.md
├── TTSProjectTemplate.ttsproj
├── VitsInputTemplate.json
├── fish-speech.cpp
    ├── CMakeLists.txt
    ├── Demo
    │   ├── CMakeLists.txt
    │   └── main.cpp
    ├── include
    │   ├── Base.h
    │   ├── Module.h
    │   └── llama.h
    ├── src
    │   ├── Base.cpp
    │   ├── Module.cpp
    │   └── llama.cpp
    ├── test.py
    └── test.txt
├── libdlvoicecodec
    ├── LibDLVoiceCodec
    │   ├── base.cpp
    │   ├── base.h
    │   ├── operator.cpp
    │   ├── operator.h
    │   ├── value.cpp
    │   └── value.h
    ├── Modules
    │   ├── AvCodec
    │   │   ├── AvCodeResample.h
    │   │   ├── Recorder.cpp
    │   │   └── Recorder.h
    │   ├── DataStruct
    │   │   ├── KDTree.cpp
    │   │   ├── KDTree.hpp
    │   │   └── README.md
    │   ├── InferTools
    │   │   ├── Cluster
    │   │   │   ├── MoeVSBaseCluster.cpp
    │   │   │   ├── MoeVSBaseCluster.hpp
    │   │   │   ├── MoeVSClusterManager.cpp
    │   │   │   ├── MoeVSClusterManager.hpp
    │   │   │   ├── MoeVSIndexCluster.cpp
    │   │   │   ├── MoeVSIndexCluster.hpp
    │   │   │   ├── MoeVSKmeansCluster.cpp
    │   │   │   └── MoeVSKmeansCluster.hpp
    │   │   ├── F0Extractor
    │   │   │   ├── BaseF0Extractor
    │   │   │   │   ├── BaseF0Extractor.cpp
    │   │   │   │   └── BaseF0Extractor.hpp
    │   │   │   ├── DioF0Extractor
    │   │   │   │   ├── DioF0Extractor.cpp
    │   │   │   │   └── DioF0Extractor.hpp
    │   │   │   ├── F0ExtractorManager.cpp
    │   │   │   ├── F0ExtractorManager.hpp
    │   │   │   ├── HarvestF0Extractor
    │   │   │   │   ├── HarvestF0Extractor.cpp
    │   │   │   │   └── HarvestF0Extractor.hpp
    │   │   │   └── NetF0Predictors
    │   │   │   │   ├── NetF0Predictors.cpp
    │   │   │   │   └── NetF0Predictors.hpp
    │   │   ├── G2P
    │   │   │   ├── MoeVSG2P.cpp
    │   │   │   └── MoeVSG2P.hpp
    │   │   ├── Sampler
    │   │   │   ├── MoeVSBaseSampler.cpp
    │   │   │   ├── MoeVSBaseSampler.hpp
    │   │   │   ├── MoeVSSamplerManager.cpp
    │   │   │   ├── MoeVSSamplerManager.hpp
    │   │   │   ├── MoeVSSamplers.cpp
    │   │   │   └── MoeVSSamplers.hpp
    │   │   ├── Stft
    │   │   │   ├── stft.cpp
    │   │   │   └── stft.hpp
    │   │   ├── TensorExtractor
    │   │   │   ├── MoeVSCoreTensorExtractor.cpp
    │   │   │   ├── MoeVSCoreTensorExtractor.hpp
    │   │   │   ├── MoeVoiceStudioTensorExtractor.cpp
    │   │   │   ├── MoeVoiceStudioTensorExtractor.hpp
    │   │   │   ├── TensorExtractorManager.cpp
    │   │   │   └── TensorExtractorManager.hpp
    │   │   ├── inferTools.cpp
    │   │   └── inferTools.hpp
    │   ├── Logger
    │   │   ├── MoeSSLogger.cpp
    │   │   └── MoeSSLogger.hpp
    │   ├── Models
    │   │   ├── EnvManager.cpp
    │   │   ├── EnvManager.hpp
    │   │   ├── header
    │   │   │   ├── DiffSvc.hpp
    │   │   │   ├── GPT-SoVits.hpp
    │   │   │   ├── ModelBase.hpp
    │   │   │   ├── MoeVSProject.hpp
    │   │   │   ├── SVC.hpp
    │   │   │   ├── TTS.hpp
    │   │   │   ├── Tacotron.hpp
    │   │   │   ├── Vits.hpp
    │   │   │   └── VitsSvc.hpp
    │   │   └── src
    │   │   │   ├── DiffSvc.cpp
    │   │   │   ├── GPT-SoVits.cpp
    │   │   │   ├── ModelBase.cpp
    │   │   │   ├── MoeVSProject.cpp
    │   │   │   ├── SVC.cpp
    │   │   │   ├── TTS.cpp
    │   │   │   ├── Vits.cpp
    │   │   │   └── VitsSvc.cpp
    │   ├── Modules.cpp
    │   ├── Modules.hpp
    │   ├── README.md
    │   └── StringPreprocess.hpp
    ├── MoeVoiceStudioSvc - Core - Cmd.cpp
    ├── MoeVoiceStudioSvc - Core - Cmd.vcxproj
    ├── MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters
    ├── analyse
    │   └── GptSoVits.md
    ├── input.wav
    ├── output.wav
    └── packages.config
├── libsvc
    ├── Api
    │   ├── header
    │   │   ├── NativeApi.h
    │   │   └── libsvc.h
    │   ├── readme.md
    │   └── src
    │   │   ├── NativeApi.cpp
    │   │   └── libsvc.cpp
    ├── Modules
    │   ├── Lib
    │   │   ├── MJson
    │   │   │   ├── MJson.cpp
    │   │   │   ├── MJson.h
    │   │   │   ├── yyjson.c
    │   │   │   └── yyjson.h
    │   │   └── World
    │   │   │   ├── LICENSE.txt
    │   │   │   ├── src
    │   │   │       ├── cheaptrick.cpp
    │   │   │       ├── codec.cpp
    │   │   │       ├── common.cpp
    │   │   │       ├── d4c.cpp
    │   │   │       ├── dio.cpp
    │   │   │       ├── fft.cpp
    │   │   │       ├── harvest.cpp
    │   │   │       ├── matlabfunctions.cpp
    │   │   │       ├── stonemask.cpp
    │   │   │       ├── synthesis.cpp
    │   │   │       ├── synthesisrealtime.cpp
    │   │   │       └── world
    │   │   │       │   ├── cheaptrick.h
    │   │   │       │   ├── codec.h
    │   │   │       │   ├── common.h
    │   │   │       │   ├── constantnumbers.h
    │   │   │       │   ├── d4c.h
    │   │   │       │   ├── dio.h
    │   │   │       │   ├── fft.h
    │   │   │       │   ├── harvest.h
    │   │   │       │   ├── macrodefinitions.h
    │   │   │       │   ├── matlabfunctions.h
    │   │   │       │   ├── stonemask.h
    │   │   │       │   ├── synthesis.h
    │   │   │       │   └── synthesisrealtime.h
    │   │   │   └── tools
    │   │   │       ├── audioio.cpp
    │   │   │       ├── audioio.h
    │   │   │       ├── parameterio.cpp
    │   │   │       └── parameterio.h
    │   ├── README.md
    │   ├── framework.h
    │   ├── header
    │   │   ├── InferTools
    │   │   │   ├── AvCodec
    │   │   │   │   └── AvCodeResample.h
    │   │   │   ├── Cluster
    │   │   │   │   ├── MoeVSBaseCluster.hpp
    │   │   │   │   ├── MoeVSClusterManager.hpp
    │   │   │   │   ├── MoeVSIndexCluster.hpp
    │   │   │   │   └── MoeVSKmeansCluster.hpp
    │   │   │   ├── DataStruct
    │   │   │   │   ├── KDTree.hpp
    │   │   │   │   └── README.md
    │   │   │   ├── F0Extractor
    │   │   │   │   ├── BaseF0Extractor.hpp
    │   │   │   │   ├── DioF0Extractor.hpp
    │   │   │   │   ├── F0ExtractorManager.hpp
    │   │   │   │   ├── HarvestF0Extractor.hpp
    │   │   │   │   └── NetF0Predictors.hpp
    │   │   │   ├── Sampler
    │   │   │   │   ├── MoeVSBaseSampler.hpp
    │   │   │   │   ├── MoeVSSamplerManager.hpp
    │   │   │   │   └── MoeVSSamplers.hpp
    │   │   │   ├── Stft
    │   │   │   │   └── stft.hpp
    │   │   │   ├── TensorExtractor
    │   │   │   │   ├── MoeVSCoreTensorExtractor.hpp
    │   │   │   │   ├── MoeVoiceStudioTensorExtractor.hpp
    │   │   │   │   └── TensorExtractorManager.hpp
    │   │   │   └── inferTools.hpp
    │   │   ├── Logger
    │   │   │   └── MoeSSLogger.hpp
    │   │   ├── Models
    │   │   │   ├── DiffSvc.hpp
    │   │   │   ├── EnvManager.hpp
    │   │   │   ├── ModelBase.hpp
    │   │   │   ├── MoeVSProject.hpp
    │   │   │   ├── ReflowSvc.hpp
    │   │   │   ├── SVC.hpp
    │   │   │   └── VitsSvc.hpp
    │   │   ├── Modules.hpp
    │   │   └── StringPreprocess.hpp
    │   └── src
    │   │   ├── InferTools
    │   │       ├── AvCodec
    │   │       │   └── AvCodeResample.cpp
    │   │       ├── Cluster
    │   │       │   ├── MoeVSBaseCluster.cpp
    │   │       │   ├── MoeVSClusterManager.cpp
    │   │       │   ├── MoeVSIndexCluster.cpp
    │   │       │   └── MoeVSKmeansCluster.cpp
    │   │       ├── DataStruct
    │   │       │   ├── KDTree.cpp
    │   │       │   └── README.md
    │   │       ├── F0Extractor
    │   │       │   ├── BaseF0Extractor.cpp
    │   │       │   ├── DioF0Extractor.cpp
    │   │       │   ├── F0ExtractorManager.cpp
    │   │       │   ├── HarvestF0Extractor.cpp
    │   │       │   └── NetF0Predictors.cpp
    │   │       ├── Sampler
    │   │       │   ├── MoeVSBaseSampler.cpp
    │   │       │   ├── MoeVSSamplerManager.cpp
    │   │       │   └── MoeVSSamplers.cpp
    │   │       ├── Stft
    │   │       │   └── stft.cpp
    │   │       ├── TensorExtractor
    │   │       │   ├── MoeVSCoreTensorExtractor.cpp
    │   │       │   ├── MoeVoiceStudioTensorExtractor.cpp
    │   │       │   └── TensorExtractorManager.cpp
    │   │       └── inferTools.cpp
    │   │   ├── Logger
    │   │       └── MoeSSLogger.cpp
    │   │   ├── Models
    │   │       ├── DiffSvc.cpp
    │   │       ├── EnvManager.cpp
    │   │       ├── ModelBase.cpp
    │   │       ├── MoeVSProject.cpp
    │   │       ├── ReflowSvc.cpp
    │   │       ├── SVC.cpp
    │   │       └── VitsSvc.cpp
    │   │   ├── Modules.cpp
    │   │   └── StringPreprocess.cpp
    ├── README.md
    ├── dllmain.cpp
    ├── libsvc.vcxproj
    ├── libsvc.vcxproj.filters
    ├── libsvc.vcxproj.user
    └── packages.config
├── libtts
    ├── Api
    │   ├── NativeApi.cpp
    │   └── NativeApi.h
    ├── Modules
    │   ├── AvCodec
    │   │   ├── AvCodeResample.h
    │   │   ├── Recorder.cpp
    │   │   └── Recorder.h
    │   ├── InferTools
    │   │   ├── G2P
    │   │   │   ├── MoeVSG2P.cpp
    │   │   │   └── MoeVSG2P.hpp
    │   │   ├── inferTools.cpp
    │   │   └── inferTools.hpp
    │   ├── Lib
    │   │   ├── MJson
    │   │   │   ├── MJson.cpp
    │   │   │   ├── MJson.h
    │   │   │   ├── yyjson.c
    │   │   │   └── yyjson.h
    │   │   └── World
    │   │   │   ├── LICENSE.txt
    │   │   │   ├── src
    │   │   │       ├── cheaptrick.cpp
    │   │   │       ├── codec.cpp
    │   │   │       ├── common.cpp
    │   │   │       ├── d4c.cpp
    │   │   │       ├── dio.cpp
    │   │   │       ├── fft.cpp
    │   │   │       ├── harvest.cpp
    │   │   │       ├── matlabfunctions.cpp
    │   │   │       ├── stonemask.cpp
    │   │   │       ├── synthesis.cpp
    │   │   │       ├── synthesisrealtime.cpp
    │   │   │       └── world
    │   │   │       │   ├── cheaptrick.h
    │   │   │       │   ├── codec.h
    │   │   │       │   ├── common.h
    │   │   │       │   ├── constantnumbers.h
    │   │   │       │   ├── d4c.h
    │   │   │       │   ├── dio.h
    │   │   │       │   ├── fft.h
    │   │   │       │   ├── harvest.h
    │   │   │       │   ├── macrodefinitions.h
    │   │   │       │   ├── matlabfunctions.h
    │   │   │       │   ├── stonemask.h
    │   │   │       │   ├── synthesis.h
    │   │   │       │   └── synthesisrealtime.h
    │   │   │   └── tools
    │   │   │       ├── audioio.cpp
    │   │   │       ├── audioio.h
    │   │   │       ├── parameterio.cpp
    │   │   │       └── parameterio.h
    │   ├── Logger
    │   │   ├── MoeSSLogger.cpp
    │   │   └── MoeSSLogger.hpp
    │   ├── Models
    │   │   ├── EnvManager.cpp
    │   │   ├── EnvManager.hpp
    │   │   ├── header
    │   │   │   ├── GPT-SoVits.hpp
    │   │   │   ├── ModelBase.hpp
    │   │   │   ├── MoeVSProject.hpp
    │   │   │   ├── TTS.hpp
    │   │   │   ├── Tacotron.hpp
    │   │   │   └── Vits.hpp
    │   │   └── src
    │   │   │   ├── GPT-SoVits.cpp
    │   │   │   ├── ModelBase.cpp
    │   │   │   ├── MoeVSProject.cpp
    │   │   │   ├── TTS.cpp
    │   │   │   └── Vits.cpp
    │   ├── Modules.cpp
    │   ├── Modules.hpp
    │   ├── README.md
    │   └── StringPreprocess.hpp
    ├── dllmain.cpp
    ├── framework.h
    ├── libtts.vcxproj
    └── libtts.vcxproj.filters
├── logo
    ├── logo(AIGen).png
    ├── logo256(AIGen).png
    └── logo512(AIGen).png
└── test.json


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "libsvc-tensorlib"]
2 |     path = libsvc-tensorlib
3 | 	url = https://github.com/NaruseMioShirakana/libsvc
4 | 


--------------------------------------------------------------------------------
/BasicDict.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "_" : ["_"],
 3 | "," : [","],
 4 | "." : ["."],
 5 | "!" : ["!"],
 6 | "?" : ["?"],
 7 | "-" : ["-"],
 8 | "~" : ["~"],
 9 | "…" : ["…"],
10 | "A" : ["A"],
11 | "E" : ["E"],
12 | "I" : ["I"],
13 | "N" : ["N"],
14 | "O" : ["O"],
15 | "Q" : ["Q"],
16 | "U" : ["U"],
17 | "a" : ["a"],
18 | "b" : ["b"],
19 | "d" : ["d"],
20 | "e" : ["e"],
21 | "f" : ["f"],
22 | "g" : ["g"],
23 | "h" : ["h"],
24 | "i" : ["i"],
25 | "j" : ["j"],
26 | "k" : ["k"],
27 | "m" : ["m"],
28 | "n" : ["n"],
29 | "o" : ["o"],
30 | "p" : ["p"],
31 | "r" : ["r"],
32 | "s" : ["s"],
33 | "t" : ["t"],
34 | "u" : ["u"],
35 | "v" : ["v"],
36 | "w" : ["w"],
37 | "y" : ["y"],
38 | "z" : ["z"],
39 | "ʃ" : ["ʃ"],
40 | "ʧ" : ["ʧ"],
41 | "ʦ" : ["ʦ"],
42 | "↓" : ["↓"],
43 | "↑" : ["↑"],
44 | " " : [" "]
45 | }


--------------------------------------------------------------------------------
/BertVits.md:
--------------------------------------------------------------------------------
 1 | # BertVits及Vits使用指南
 2 | - 1、按照要求安装模型
 3 | - 2、安装Cleaner([下载地址](https://github.com/NaruseMioShirakana/TextCleaner/releases),将文件夹“G2P”解压到Exe路径)
 4 | - 3、将Bert文件夹复制到Exe路径,其中的子文件夹可以放置我发布的Bert模型,也可以啥都不放(如果不放模型就不能用Bert模型,但是不影响正常推理,就是效果可能会大打折扣)
 5 | - 4、按照自己的需要配置字典(Dict)文件
 6 | - 5、编写输入,载入程序推理
 7 | 
 8 | ## BasicDict.json
 9 |     字典的作用就是将软件自动处理出来的文本替换为你使用的模型的Symbol,而字典文件的作用就是规定这个替换规则,字典文件是如同BasicDict.json的文件,其中由非常多的键值对组成,其中的Key就是待替换文本,而Value就是替换后的文本。
10 | 
11 | ## VitsInputTemplate.json
12 | ```jsonc
13 | //Json需要是数组类型
14 | [
15 |     {
16 |             "Tokens": "私は誰?",//必填,进入Bert的文本
17 |             "Seq": ["w","a","t","a","s","h","i","w","a","d","a","r","e","?"],//选填,音素组成的序列,如果不填会根据Tokens自动生成
18 |             "Tones": [0,0,0,0,0,0,0,0,0,0,0,0,0],//选填,音调序列,必须与音素序列等长
19 |             "Durations": [2,5,2,5,2,2,5,2,5,2,5,2,5],//选填,音素时长序列,必须与音素序列等长
20 |             "Language": [0,0,0,0,0,0,0,0,0,0,0,0,0],//选填,语言序列,必须与音素序列等长
21 |             "SpeakerMix": [1,0,0],//选填,角色混合比例,决定对应下标角色音色的混合比例
22 |             "EmotionPrompt": ["sad", "happy"],//选填,情感参数,有情感模型的情况下可用
23 |             "NoiseScale": 0.666,//选填,噪声修正因子
24 |             "LengthScale": 1.1,//选填,时长修正因子
25 |             "DurationPredictorNoiseScale": 0.333,//选填,随机时长预测器噪声修正因子
26 |             "FactorDpSdp": 0.6,//选填,时长预测器和随机时长预测器的混合比例
27 |             "GateThreshold": 0.777,//选填,Tacotron2 EOS阈值
28 |             "MaxDecodeStep": 114514,//选填,Tacotron2 最大解码步数
29 |             "Seed": 1919810,//选填,种子
30 |             "SpeakerId": 2,//选填,角色ID(若SpeakerMix为空则使用)
31 |             "RestTime": 1.0,//选填,决定与上一个片段的时间间隔(单位为秒),若为负数则表示切断音频并输出一个新的
32 |             "PlaceHolderSymbol": "|",//选填,当Seq为String时,隔开两个音素的记号
33 |             "LanguageID": "JP",//选填,语言(ZH,JP或EN)
34 |             "G2PAdditionalInfo": "/[Japanese2]"//选填,Cleaner额外参数
35 |         },
36 | ]
37 | ```


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # CMakeList.txt: 顶层 CMake 项目文件,在此处执行全局配置
 2 | # 并包含子项目。
 3 | #
 4 | cmake_minimum_required (VERSION 3.8)
 5 | # 如果支持,请为 MSVC 编译器启用热重载。
 6 | if (POLICY CMP0141)
 7 |   cmake_policy(SET CMP0141 NEW)
 8 |   set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "
lt;IF:
lt;AND:
lt;C_COMPILER_ID:MSVC>,
lt;CXX_COMPILER_ID:MSVC>>,
lt;
lt;CONFIG:Debug,RelWithDebInfo>:EditAndContinue>,
lt;
lt;CONFIG:Debug,RelWithDebInfo>:ProgramDatabase>>")
 9 | endif()
10 | 
11 | project ("DragonianSpeech")
12 | 
13 | set(FISHSPEECHCPP_BUILD_DEMO ON)
14 | 
15 | add_subdirectory ("fish-speech.cpp")
16 | 


--------------------------------------------------------------------------------
/CMakePresets.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": 3,
 3 |     "configurePresets": [
 4 |         {
 5 |             "name": "windows-base",
 6 |             "hidden": true,
 7 |             "generator": "Ninja",
 8 |             "binaryDir": "${sourceDir}/out/build/${presetName}",
 9 |             "installDir": "${sourceDir}/out/install/${presetName}",
10 |             "cacheVariables": {
11 |                 "CMAKE_C_COMPILER": "cl.exe",
12 |                 "CMAKE_CXX_COMPILER": "cl.exe"
13 |             },
14 |             "condition": {
15 |                 "type": "equals",
16 |                 "lhs": "${hostSystemName}",
17 |                 "rhs": "Windows"
18 |             }
19 |         },
20 |         {
21 |             "name": "x64-debug",
22 |             "displayName": "x64 Debug",
23 |             "inherits": "windows-base",
24 |             "architecture": {
25 |                 "value": "x64",
26 |                 "strategy": "external"
27 |             },
28 |             "cacheVariables": {
29 |                 "CMAKE_BUILD_TYPE": "Debug"
30 |             }
31 |         },
32 |         {
33 |             "name": "x64-release",
34 |             "displayName": "x64 Release",
35 |             "inherits": "x64-debug",
36 |             "cacheVariables": {
37 |                 "CMAKE_BUILD_TYPE": "Release"
38 |             }
39 |         },
40 |         {
41 |             "name": "x86-debug",
42 |             "displayName": "x86 Debug",
43 |             "inherits": "windows-base",
44 |             "architecture": {
45 |                 "value": "x86",
46 |                 "strategy": "external"
47 |             },
48 |             "cacheVariables": {
49 |                 "CMAKE_BUILD_TYPE": "Debug"
50 |             }
51 |         },
52 |         {
53 |             "name": "x86-release",
54 |             "displayName": "x86 Release",
55 |             "inherits": "x86-debug",
56 |             "cacheVariables": {
57 |                 "CMAKE_BUILD_TYPE": "Release"
58 |             }
59 |         }
60 |     ]
61 | }
62 | 


--------------------------------------------------------------------------------
/CSharpDemo/CSharpDemo.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net8.0</TargetFramework>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>enable</Nullable>
 8 |     <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
 9 |     <Configurations>Debug;Release</Configurations>
10 |   </PropertyGroup>
11 | 
12 |   <ItemGroup>
13 |     <ProjectReference Include="..\DotNetApi\DotNetApi.csproj" />
14 |   </ItemGroup>
15 | 
16 |   <ItemGroup>
17 |     <Compile Update="Properties\Resources.Designer.cs">
18 |       <DesignTime>True</DesignTime>
19 |       <AutoGen>True</AutoGen>
20 |       <DependentUpon>Resources.resx</DependentUpon>
21 |     </Compile>
22 |   </ItemGroup>
23 | 
24 |   <ItemGroup>
25 |     <EmbeddedResource Update="Properties\Resources.resx">
26 |       <Generator>ResXFileCodeGenerator</Generator>
27 |       <LastGenOutput>Resources.Designer.cs</LastGenOutput>
28 |     </EmbeddedResource>
29 |   </ItemGroup>
30 | 
31 | </Project>
32 | 


--------------------------------------------------------------------------------
/CSharpDemo/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "profiles": {
3 |     "CSharpDemo": {
4 |       "commandName": "Project",
5 |       "nativeDebugging": true
6 |     }
7 |   }
8 | }


--------------------------------------------------------------------------------
/DotNetApi/DotNetApi.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <TargetFramework>net8.0</TargetFramework>
 5 |     <ImplicitUsings>enable</ImplicitUsings>
 6 |     <Nullable>enable</Nullable>
 7 |     <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
 8 |     <Configurations>Debug;Release</Configurations>
 9 |     <BaseOutputPath></BaseOutputPath>
10 |   </PropertyGroup>
11 | 
12 | </Project>
13 | 


--------------------------------------------------------------------------------
/DotNetApi/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "profiles": {
3 |     "DotNetApi": {
4 |       "commandName": "Project",
5 |       "nativeDebugging": true
6 |     }
7 |   }
8 | }


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/.signature.p7s:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/OnnxRuntimeDmlProvider/.signature.p7s


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Microsoft Corporation
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/Microsoft.ML.OnnxRuntime.DirectML.nuspec:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <package xmlns="http://schemas.microsoft.com/packaging/2013/05/nuspec.xsd">
 3 |   <metadata>
 4 |     <id>Microsoft.ML.OnnxRuntime.DirectML</id>
 5 |     <version>1.15.0</version>
 6 |     <authors>Microsoft</authors>
 7 |     <owners>Microsoft</owners>
 8 |     <requireLicenseAcceptance>false</requireLicenseAcceptance>
 9 |     <license type="file">LICENSE.txt</license>
10 |     <licenseUrl>https://aka.ms/deprecateLicenseUrl</licenseUrl>
11 |     <icon>ORT_icon_for_light_bg.png</icon>
12 |     <projectUrl>https://github.com/Microsoft/onnxruntime</projectUrl>
13 |     <description>This package contains native shared library artifacts for all supported platforms of ONNX Runtime.</description>
14 |     <releaseNotes>Release Def:
15 | 	Branch: refs/heads/rel-1.15.0
16 | 	Commit: ddaaeeab42432cf9b924b5aa0459d644f615a01f
17 | 	Build: https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=312266</releaseNotes>
18 |     <copyright>© Microsoft Corporation. All rights reserved.</copyright>
19 |     <tags>native ONNX ONNXRuntime-Training Learning-on-The-Edge On-Device-Training MachineLearning</tags>
20 |     <repository type="git" url="https://github.com/Microsoft/onnxruntime.git" commit="ddaaeeab42432cf9b924b5aa0459d644f615a01f" />
21 |     <dependencies>
22 |       <group targetFramework=".NETCoreApp0.0">
23 |         <dependency id="Microsoft.ML.OnnxRuntime.Managed" version="1.15.0" />
24 |         <dependency id="Microsoft.AI.DirectML" version="1.12.0" />
25 |       </group>
26 |       <group targetFramework=".NETStandard0.0">
27 |         <dependency id="Microsoft.ML.OnnxRuntime.Managed" version="1.15.0" />
28 |         <dependency id="Microsoft.AI.DirectML" version="1.12.0" />
29 |       </group>
30 |       <group targetFramework=".NETFramework0.0">
31 |         <dependency id="Microsoft.ML.OnnxRuntime.Managed" version="1.15.0" />
32 |         <dependency id="Microsoft.AI.DirectML" version="1.12.0" />
33 |       </group>
34 |       <group targetFramework="native0.0">
35 |         <dependency id="Microsoft.AI.DirectML" version="1.12.0" />
36 |       </group>
37 |     </dependencies>
38 |   </metadata>
39 | </package>


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/ORT_icon_for_light_bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/OnnxRuntimeDmlProvider/ORT_icon_for_light_bg.png


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/Privacy.md:
--------------------------------------------------------------------------------
 1 | # Privacy
 2 | 
 3 | ## Data Collection
 4 | The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described in the repository. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft's privacy statement. Our privacy statement is located at https://go.microsoft.com/fwlink/?LinkID=824704. You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices.
 5 | 
 6 | ***
 7 | 
 8 | ### Private Builds
 9 | No data collection is performed when using your private builds built from source code.
10 | 
11 | ### Official Builds
12 | ONNX Runtime does not maintain any independent telemetry collection mechanisms outside of what is provided by the platforms it supports. However, where applicable, ONNX Runtime will take advantage of platform-supported telemetry systems to collect trace events with the goal of improving product quality.
13 | 
14 | Currently telemetry is only implemented for Windows builds and is turned **ON** by default in the official builds distributed in their respective package management repositories ([see here](../README.md#binaries)). This may be expanded to cover other platforms in the future. Data collection is implemented via 'Platform Telemetry' per vendor platform providers (see [telemetry.h](../onnxruntime/core/platform/telemetry.h)).
15 | 
16 | #### Technical Details
17 | The Windows provider uses the [TraceLogging](https://docs.microsoft.com/en-us/windows/win32/tracelogging/trace-logging-about) API for its implementation. This enables ONNX Runtime trace events to be collected by the operating system, and based on user consent, this data may be periodically sent to Microsoft servers following GDPR and privacy regulations for anonymity and data access controls. 
18 | 
19 | Windows ML and onnxruntime C APIs allow Trace Logging to be turned on/off (see [API pages](../README.md#api-documentation) for details).
20 | For information on how to enable and disable telemetry, see [C API: Telemetry](./C_API.md#telemetry). 
21 | There are equivalent APIs in the C#, Python, and Java language bindings as well.
22 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/[Content_Types].xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
 3 |   <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml" />
 4 |   <Default Extension="psmdcp" ContentType="application/vnd.openxmlformats-package.core-properties+xml" />
 5 |   <Default Extension="h" ContentType="application/octet" />
 6 |   <Default Extension="lib" ContentType="application/octet" />
 7 |   <Default Extension="dll" ContentType="application/octet" />
 8 |   <Default Extension="props" ContentType="application/octet" />
 9 |   <Default Extension="targets" ContentType="application/octet" />
10 |   <Default Extension="txt" ContentType="application/octet" />
11 |   <Default Extension="md" ContentType="application/octet" />
12 |   <Default Extension="png" ContentType="application/octet" />
13 |   <Default Extension="nuspec" ContentType="application/octet" />
14 | </Types>


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/_rels/.rels:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
3 |   <Relationship Type="http://schemas.microsoft.com/packaging/2010/07/manifest" Target="/Microsoft.ML.OnnxRuntime.DirectML.nuspec" Id="R605BB99C1A1EA48B" />
4 |   <Relationship Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="/package/services/metadata/core-properties/c7795757db2346b9bcfb932f99cdb33f.psmdcp" Id="R51F19D04ECB56433" />
5 | </Relationships>


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/build/native/Microsoft.ML.OnnxRuntime.DirectML.targets:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <Target Name="Microsoft_ML_OnnxRuntime_CheckPrerequisites" BeforeTargets="BeforeBuild">
 4 |     <!--
 5 |     Special case .NET Core portable applications.  When building a portable .NET Core app,
 6 |     the PlatformTarget is empty, and you don't know until runtime (i.e. which dotnet.exe)
 7 |     what processor architecture will be used.
 8 |   -->
 9 |     <Error Condition="('$(PlatformTarget)' != 'x64' AND '$(PlatformTarget)' != 'arm32' AND '$(PlatformTarget)' != 'arm64' AND '$(PlatformTarget)' != 'x86' AND '$(PlatformTarget)' != 'AnyCPU') AND
10 |                       ('$(OutputType)' == 'Exe' OR '$(OutputType)'=='WinExe') AND
11 |                       !('$(TargetFrameworkIdentifier)' == '.NETCoreApp' AND '$(PlatformTarget)' == '') AND
12 |                       ('$(TargetFrameworkIdentifier)' != 'Xamarin.iOS' AND
13 |                        $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'ios') AND
14 |                       '$(SuppressOnnxRuntimePlatformCompatibilityError)' != 'true'"
15 |            Text="Microsoft.ML.OnnxRuntime only supports the AnyCPU, x64, arm32, arm64 and x86 platforms at this time."/>
16 |   </Target>
17 | </Project>
18 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/build/native/include/cpu_provider_factory.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #include "onnxruntime_c_api.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | /**
11 |  * \param use_arena zero: false. non-zero: true.
12 |  */
13 | ORT_EXPORT
14 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_CPU, _In_ OrtSessionOptions* options, int use_arena)
15 | ORT_ALL_ARGS_NONNULL;
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/build/native/include/onnxruntime_run_options_config_keys.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #pragma once
 5 | 
 6 | /*
 7 |  * This file defines RunOptions Config Keys and format of the Config Values.
 8 |  *
 9 |  * The Naming Convention for a RunOptions Config Key,
10 |  * "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
11 |  * Such as "ep.cuda.use_arena"
12 |  * The Config Key cannot be empty
13 |  * The maximum length of the Config Key is 128
14 |  *
15 |  * The string format of a RunOptions Config Value is defined individually for each Config.
16 |  * The maximum length of the Config Value is 1024
17 |  */
18 | 
19 | // Key for enabling shrinkages of user listed device memory arenas.
20 | // Expects a list of semi-colon separated key value pairs separated by colon in the following format:
21 | // "device_0:device_id_0;device_1:device_id_1"
22 | // No white-spaces allowed in the provided list string.
23 | // Currently, the only supported devices are : "cpu", "gpu" (case sensitive).
24 | // If "cpu" is included in the list, DisableCpuMemArena() API must not be called (i.e.) arena for cpu should be enabled.
25 | // Example usage: "cpu:0;gpu:0" (or) "gpu:0"
26 | // By default, the value for this key is empty (i.e.) no memory arenas are shrunk
27 | static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage";
28 | 
29 | // Set to '1' to not synchronize execution providers with CPU at the end of session run.
30 | // Per default it will be set to '0'
31 | // Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream.
32 | static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers";
33 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/build/native/include/provider_options.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <string>
 7 | #include <unordered_map>
 8 | #include <vector>
 9 | 
10 | namespace onnxruntime {
11 | 
12 | // data types for execution provider options
13 | 
14 | using ProviderOptions = std::unordered_map<std::string, std::string>;
15 | using ProviderOptionsVector = std::vector<ProviderOptions>;
16 | using ProviderOptionsMap = std::unordered_map<std::string, ProviderOptions>;
17 | 
18 | }  // namespace onnxruntime
19 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/build/netstandard1.1/Microsoft.ML.OnnxRuntime.DirectML.targets:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <Target Name="Microsoft_ML_OnnxRuntime_CheckPrerequisites" BeforeTargets="BeforeBuild">
 4 |     <!--
 5 |     Special case .NET Core portable applications.  When building a portable .NET Core app,
 6 |     the PlatformTarget is empty, and you don't know until runtime (i.e. which dotnet.exe)
 7 |     what processor architecture will be used.
 8 |   -->
 9 |     <Error Condition="('$(PlatformTarget)' != 'x64' AND '$(PlatformTarget)' != 'arm32' AND '$(PlatformTarget)' != 'arm64' AND '$(PlatformTarget)' != 'x86' AND '$(PlatformTarget)' != 'AnyCPU') AND
10 |                       ('$(OutputType)' == 'Exe' OR '$(OutputType)'=='WinExe') AND
11 |                       !('$(TargetFrameworkIdentifier)' == '.NETCoreApp' AND '$(PlatformTarget)' == '') AND
12 |                       ('$(TargetFrameworkIdentifier)' != 'Xamarin.iOS' AND
13 |                        $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'ios') AND
14 |                       '$(SuppressOnnxRuntimePlatformCompatibilityError)' != 'true'"
15 |            Text="Microsoft.ML.OnnxRuntime only supports the AnyCPU, x64, arm32, arm64 and x86 platforms at this time."/>
16 |   </Target>
17 | </Project>
18 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/build/netstandard2.0/Microsoft.ML.OnnxRuntime.DirectML.targets:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <Target Name="Microsoft_ML_OnnxRuntime_CheckPrerequisites" BeforeTargets="BeforeBuild">
 4 |     <!--
 5 |     Special case .NET Core portable applications.  When building a portable .NET Core app,
 6 |     the PlatformTarget is empty, and you don't know until runtime (i.e. which dotnet.exe)
 7 |     what processor architecture will be used.
 8 |   -->
 9 |     <Error Condition="('$(PlatformTarget)' != 'x64' AND '$(PlatformTarget)' != 'arm32' AND '$(PlatformTarget)' != 'arm64' AND '$(PlatformTarget)' != 'x86' AND '$(PlatformTarget)' != 'AnyCPU') AND
10 |                       ('$(OutputType)' == 'Exe' OR '$(OutputType)'=='WinExe') AND
11 |                       !('$(TargetFrameworkIdentifier)' == '.NETCoreApp' AND '$(PlatformTarget)' == '') AND
12 |                       ('$(TargetFrameworkIdentifier)' != 'Xamarin.iOS' AND
13 |                        $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'ios') AND
14 |                       '$(SuppressOnnxRuntimePlatformCompatibilityError)' != 'true'"
15 |            Text="Microsoft.ML.OnnxRuntime only supports the AnyCPU, x64, arm32, arm64 and x86 platforms at this time."/>
16 |   </Target>
17 | </Project>
18 | 


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/package/services/metadata/core-properties/c7795757db2346b9bcfb932f99cdb33f.psmdcp:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <coreProperties xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">
3 |   <dc:creator>Microsoft</dc:creator>
4 |   <dc:description>This package contains native shared library artifacts for all supported platforms of ONNX Runtime.</dc:description>
5 |   <dc:identifier>Microsoft.ML.OnnxRuntime.DirectML</dc:identifier>
6 |   <version>1.15.0</version>
7 |   <keywords>native ONNX ONNXRuntime-Training Learning-on-The-Edge On-Device-Training MachineLearning</keywords>
8 |   <lastModifiedBy>NuGet, Version=5.7.0.7, Culture=neutral, PublicKeyToken=31bf3856ad364e35;Microsoft Windows NT 10.0.20348.0;.NET Framework 4.7.2</lastModifiedBy>
9 | </coreProperties>


--------------------------------------------------------------------------------
/Lib/OnnxRuntimeDmlProvider/runtimes/win-x64/native/onnxruntime.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/OnnxRuntimeDmlProvider/runtimes/win-x64/native/onnxruntime.lib


--------------------------------------------------------------------------------
/Lib/World/src/world/constantnumbers.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //
 6 | // This header file only defines constant numbers used for several function.
 7 | //-----------------------------------------------------------------------------
 8 | #ifndef WORLD_CONSTANT_NUMBERS_H_
 9 | #define WORLD_CONSTANT_NUMBERS_H_
10 | 
11 | namespace world {
12 |   // for Dio()
13 |   const double kCutOff = 50.0;
14 | 
15 |   // for StoneMask()
16 |   const double kFloorF0StoneMask = 40.0;
17 | 
18 |   const double kPi = 3.1415926535897932384;
19 |   const double kMySafeGuardMinimum = 0.000000000001;
20 |   const double kEps = 0.00000000000000022204460492503131;
21 |   const double kFloorF0 = 71.0;
22 |   const double kCeilF0 = 800.0;
23 |   const double kDefaultF0 = 500.0;
24 |   const double kLog2 = 0.69314718055994529;
25 |   // Maximum standard deviation not to be selected as a best f0.
26 |   const double kMaximumValue = 100000.0;
27 | 
28 |   // Note to me (fs: 48000)
29 |   // 71 Hz is the limit to maintain the FFT size at 2048.
30 |   // If we use 70 Hz as FLOOR_F0, the FFT size of 4096 is required.
31 | 
32 |   // for D4C()
33 |   const int kHanning = 1;
34 |   const int kBlackman = 2;
35 |   const double kFrequencyInterval = 3000.0;
36 |   const double kUpperLimit = 15000.0;
37 |   const double kThreshold = 0.85;
38 |   const double kFloorF0D4C = 47.0;
39 | 
40 |   // for Codec (Mel scale)
41 |   // S. Stevens & J. Volkmann,
42 |   // The Relation of Pitch to Frequency: A Revised Scale,
43 |   // American Journal of Psychology, vol. 53, no. 3, pp. 329-353, 1940.
44 |   const double kM0 = 1127.01048;
45 |   const double kF0 = 700.0;
46 |   const double kFloorFrequency = 40.0;
47 |   const double kCeilFrequency = 20000.0;
48 | 
49 | }  // namespace world
50 | 
51 | #endif  // WORLD_CONSTANT_NUMBERS_H_
52 | 


--------------------------------------------------------------------------------
/Lib/World/src/world/d4c.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_D4C_H_
 7 | #define WORLD_D4C_H_
 8 | 
 9 | #include "world/macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for D4C
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double threshold;
18 | } D4COption;
19 | 
20 | //-----------------------------------------------------------------------------
21 | // D4C() calculates the aperiodicity estimated by D4C.
22 | //
23 | // Input:
24 | //   x                  : Input signal
25 | //   x_length           : Length of x
26 | //   fs                 : Sampling frequency
27 | //   temporal_positions : Time axis
28 | //   f0                 : F0 contour
29 | //   f0_length          : Length of F0 contour
30 | //   fft_size           : Number of samples of the aperiodicity in one frame.
31 | //                      : It is given by the equation fft_size / 2 + 1.
32 | // Output:
33 | //   aperiodicity  : Aperiodicity estimated by D4C.
34 | //-----------------------------------------------------------------------------
35 | void D4C(const double *x, int x_length, int fs,
36 |     const double *temporal_positions, const double *f0, int f0_length,
37 |     int fft_size, const D4COption *option, double **aperiodicity);
38 | 
39 | //-----------------------------------------------------------------------------
40 | // InitializeD4COption allocates the memory to the struct and sets the
41 | // default parameters.
42 | //
43 | // Output:
44 | //   option   : Struct for the optional parameter.
45 | //-----------------------------------------------------------------------------
46 | void InitializeD4COption(D4COption *option);
47 | 
48 | WORLD_END_C_DECLS
49 | 
50 | #endif  // WORLD_D4C_H_
51 | 


--------------------------------------------------------------------------------
/Lib/World/src/world/dio.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_DIO_H_
 7 | #define WORLD_DIO_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for DIO
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double f0_floor;
18 |   double f0_ceil;
19 |   double channels_in_octave;
20 |   double frame_period;  // msec
21 |   int speed;  // (1, 2, ..., 12)
22 |   double allowed_range;  // Threshold used for fixing the F0 contour.
23 | } DioOption;
24 | 
25 | //-----------------------------------------------------------------------------
26 | // DIO
27 | //
28 | // Input:
29 | //   x                    : Input signal
30 | //   x_length             : Length of x
31 | //   fs                   : Sampling frequency
32 | //   option               : Struct to order the parameter for DIO
33 | //
34 | // Output:
35 | //   temporal_positions   : Temporal positions.
36 | //   f0                   : F0 contour.
37 | //-----------------------------------------------------------------------------
38 | void Dio(const double *x, int x_length, int fs, const DioOption *option,
39 |   double *temporal_positions, double *f0);
40 | 
41 | //-----------------------------------------------------------------------------
42 | // InitializeDioOption allocates the memory to the struct and sets the
43 | // default parameters.
44 | //
45 | // Output:
46 | //   option   : Struct for the optional parameter.
47 | //-----------------------------------------------------------------------------
48 | void InitializeDioOption(DioOption *option);
49 | 
50 | //-----------------------------------------------------------------------------
51 | // GetSamplesForDIO() calculates the number of samples required for Dio().
52 | //
53 | // Input:
54 | //   fs             : Sampling frequency [Hz]
55 | //   x_length       : Length of the input signal [Sample].
56 | //   frame_period   : Frame shift [msec]
57 | //
58 | // Output:
59 | //   The number of samples required to store the results of Dio()
60 | //-----------------------------------------------------------------------------
61 | int GetSamplesForDIO(int fs, int x_length, double frame_period);
62 | 
63 | WORLD_END_C_DECLS
64 | 
65 | #endif  // WORLD_DIO_H_
66 | 


--------------------------------------------------------------------------------
/Lib/World/src/world/fft.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //
 6 | // These functions and variables are defined to use FFT as well as FFTW
 7 | // Please see fft.cpp to show the detailed information
 8 | //-----------------------------------------------------------------------------
 9 | #ifndef WORLD_FFT_H_
10 | #define WORLD_FFT_H_
11 | 
12 | #include "macrodefinitions.h"
13 | 
14 | WORLD_BEGIN_C_DECLS
15 | 
16 | // Commands for FFT (This is the same as FFTW)
17 | #define FFT_FORWARD 1
18 | #define FFT_BACKWARD 2
19 | #define FFT_ESTIMATE 3
20 | 
21 | // Complex number for FFT
22 | typedef double fft_complex[2];
23 | // Struct used for FFT
24 | typedef struct {
25 |   int n;
26 |   int sign;
27 |   unsigned int flags;
28 |   fft_complex *c_in;
29 |   double *in;
30 |   fft_complex *c_out;
31 |   double *out;
32 |   double *input;
33 |   int *ip;
34 |   double *w;
35 | } fft_plan;
36 | 
37 | fft_plan fft_plan_dft_1d(int n, fft_complex *in, fft_complex *out, int sign,
38 |   unsigned int flags);
39 | fft_plan fft_plan_dft_c2r_1d(int n, fft_complex *in, double *out,
40 |   unsigned int flags);
41 | fft_plan fft_plan_dft_r2c_1d(int n, double *in, fft_complex *out,
42 |   unsigned int flags);
43 | void fft_execute(fft_plan p);
44 | void fft_destroy_plan(fft_plan p);
45 | 
46 | WORLD_END_C_DECLS
47 | 
48 | #endif  // WORLD_FFT_H_
49 | 


--------------------------------------------------------------------------------
/Lib/World/src/world/harvest.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_HARVEST_H_
 7 | #define WORLD_HARVEST_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for Harvest
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double f0_floor;
18 |   double f0_ceil;
19 |   double frame_period;
20 | } HarvestOption;
21 | 
22 | //-----------------------------------------------------------------------------
23 | // Harvest
24 | //
25 | // Input:
26 | //   x                    : Input signal
27 | //   x_length             : Length of x
28 | //   fs                   : Sampling frequency
29 | //   option               : Struct to order the parameter for Harvest
30 | //
31 | // Output:
32 | //   temporal_positions   : Temporal positions.
33 | //   f0                   : F0 contour.
34 | //-----------------------------------------------------------------------------
35 | void Harvest(const double *x, int x_length, int fs,
36 |   const HarvestOption *option, double *temporal_positions, double *f0);
37 | 
38 | //-----------------------------------------------------------------------------
39 | // InitializeHarvestOption allocates the memory to the struct and sets the
40 | // default parameters.
41 | //
42 | // Output:
43 | //   option   : Struct for the optional parameter.
44 | //-----------------------------------------------------------------------------
45 | void InitializeHarvestOption(HarvestOption *option);
46 | 
47 | //-----------------------------------------------------------------------------
48 | // GetSamplesForHarvest() calculates the number of samples required for
49 | // Harvest().
50 | //
51 | // Input:
52 | //   fs             : Sampling frequency [Hz]
53 | //   x_length       : Length of the input signal [Sample]
54 | //   frame_period   : Frame shift [msec]
55 | //
56 | // Output:
57 | //   The number of samples required to store the results of Harvest().
58 | //-----------------------------------------------------------------------------
59 | int GetSamplesForHarvest(int fs, int x_length, double frame_period);
60 | 
61 | WORLD_END_C_DECLS
62 | 
63 | #endif  // WORLD_HARVEST_H_
64 | 


--------------------------------------------------------------------------------
/Lib/World/src/world/stonemask.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_STONEMASK_H_
 7 | #define WORLD_STONEMASK_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // StoneMask() refines the estimated F0 by Dio()
15 | //
16 | // Input:
17 | //   x                      : Input signal
18 | //   x_length               : Length of the input signal
19 | //   fs                     : Sampling frequency
20 | //   time_axis              : Temporal information
21 | //   f0                     : f0 contour
22 | //   f0_length              : Length of f0
23 | //
24 | // Output:
25 | //   refined_f0             : Refined F0
26 | //-----------------------------------------------------------------------------
27 | void StoneMask(const double *x, int x_length, int fs,
28 |     const double *temporal_positions, const double *f0, int f0_length,
29 |     double *refined_f0);
30 | 
31 | WORLD_END_C_DECLS
32 | 
33 | #endif  // WORLD_STONEMASK_H_
34 | 


--------------------------------------------------------------------------------
/Lib/World/src/world/synthesis.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_SYNTHESIS_H_
 7 | #define WORLD_SYNTHESIS_H_
 8 | 
 9 | #include "world/macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Synthesis() synthesize the voice based on f0, spectrogram and
15 | // aperiodicity (not excitation signal).
16 | //
17 | // Input:
18 | //   f0                   : f0 contour
19 | //   f0_length            : Length of f0
20 | //   spectrogram          : Spectrogram estimated by CheapTrick
21 | //   fft_size             : FFT size
22 | //   aperiodicity         : Aperiodicity spectrogram based on D4C
23 | //   frame_period         : Temporal period used for the analysis
24 | //   fs                   : Sampling frequency
25 | //   y_length             : Length of the output signal (Memory of y has been
26 | //                          allocated in advance)
27 | // Output:
28 | //   y                    : Calculated speech
29 | //-----------------------------------------------------------------------------
30 | void Synthesis(const double *f0, int f0_length, 
31 |     const double * const *spectrogram, const double * const *aperiodicity, 
32 |     int fft_size, double frame_period, int fs, int y_length, double *y);
33 | 
34 | WORLD_END_C_DECLS
35 | 
36 | #endif  // WORLD_SYNTHESIS_H_
37 | 


--------------------------------------------------------------------------------
/Lib/World/tools/audioio.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_AUDIOIO_H_
 7 | #define WORLD_AUDIOIO_H_
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | //-----------------------------------------------------------------------------
14 | // wavwrite() write a .wav file.
15 | // Input:
16 | //   x          : Input signal
17 | //   x_ength : Signal length of x [sample]
18 | //   fs         : Sampling frequency [Hz]
19 | //   nbit       : Quantization bit [bit]
20 | //   filename   : Name of the output signal.
21 | // Caution:
22 | //   The variable nbit is not used in this function.
23 | //   This function only supports the 16 bit.
24 | //-----------------------------------------------------------------------------
25 | void wavwrite(const double *x, int x_length, int fs, int nbit,
26 |   const char *filename);
27 | 
28 | //-----------------------------------------------------------------------------
29 | // GetAudioLength() returns the length of .wav file.
30 | // Input:
31 | //   filename     : Filename of a .wav file.
32 | // Output:
33 | //   The number of samples of the file .wav
34 | //-----------------------------------------------------------------------------
35 | int GetAudioLength(const char *filename);
36 | 
37 | //-----------------------------------------------------------------------------
38 | // wavread() read a .wav file.
39 | // The memory of output x must be allocated in advance.
40 | // Input:
41 | //   filename     : Filename of the input file.
42 | // Output:
43 | //   fs           : Sampling frequency [Hz]
44 | //   nbit         : Quantization bit [bit]
45 | //   x            : The output waveform.
46 | //-----------------------------------------------------------------------------
47 | void wavread(const char* filename, int *fs, int *nbit, double *x);
48 | 
49 | #ifdef __cplusplus
50 | }
51 | #endif
52 | 
53 | #endif  // WORLD_AUDIOIO_H_
54 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/Lib/avcodec.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/avcodec.lib


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/Lib/avformat.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/avformat.lib


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/Lib/avutil.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/avutil.lib


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/Lib/swresample.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/swresample.lib


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/Lib/swscale.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/swscale.lib


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavcodec/ac3_parser.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * AC-3 parser prototypes
 3 |  * Copyright (c) 2003 Fabrice Bellard
 4 |  * Copyright (c) 2003 Michael Niedermayer
 5 |  *
 6 |  * This file is part of FFmpeg.
 7 |  *
 8 |  * FFmpeg is free software; you can redistribute it and/or
 9 |  * modify it under the terms of the GNU Lesser General Public
10 |  * License as published by the Free Software Foundation; either
11 |  * version 2.1 of the License, or (at your option) any later version.
12 |  *
13 |  * FFmpeg is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 |  * Lesser General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU Lesser General Public
19 |  * License along with FFmpeg; if not, write to the Free Software
20 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 |  */
22 | 
23 | #ifndef AVCODEC_AC3_PARSER_H
24 | #define AVCODEC_AC3_PARSER_H
25 | 
26 | #include <stddef.h>
27 | #include <stdint.h>
28 | 
29 | /**
30 |  * Extract the bitstream ID and the frame size from AC-3 data.
31 |  */
32 | int av_ac3_parse_header(const uint8_t *buf, size_t size,
33 |                         uint8_t *bitstream_id, uint16_t *frame_size);
34 | 
35 | 
36 | #endif /* AVCODEC_AC3_PARSER_H */
37 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavcodec/adts_parser.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVCODEC_ADTS_PARSER_H
20 | #define AVCODEC_ADTS_PARSER_H
21 | 
22 | #include <stddef.h>
23 | #include <stdint.h>
24 | 
25 | #define AV_AAC_ADTS_HEADER_SIZE 7
26 | 
27 | /**
28 |  * Extract the number of samples and frames from AAC data.
29 |  * @param[in]  buf     pointer to AAC data buffer
30 |  * @param[out] samples Pointer to where number of samples is written
31 |  * @param[out] frames  Pointer to where number of frames is written
32 |  * @return Returns 0 on success, error code on failure.
33 |  */
34 | int av_adts_header_parse(const uint8_t *buf, uint32_t *samples,
35 |                          uint8_t *frames);
36 | 
37 | #endif /* AVCODEC_ADTS_PARSER_H */
38 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavcodec/jni.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * JNI public API functions
 3 |  *
 4 |  * Copyright (c) 2015-2016 Matthieu Bouron <matthieu.bouron stupeflix.com>
 5 |  *
 6 |  * This file is part of FFmpeg.
 7 |  *
 8 |  * FFmpeg is free software; you can redistribute it and/or
 9 |  * modify it under the terms of the GNU Lesser General Public
10 |  * License as published by the Free Software Foundation; either
11 |  * version 2.1 of the License, or (at your option) any later version.
12 |  *
13 |  * FFmpeg is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 |  * Lesser General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU Lesser General Public
19 |  * License along with FFmpeg; if not, write to the Free Software
20 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 |  */
22 | 
23 | #ifndef AVCODEC_JNI_H
24 | #define AVCODEC_JNI_H
25 | 
26 | /*
27 |  * Manually set a Java virtual machine which will be used to retrieve the JNI
28 |  * environment. Once a Java VM is set it cannot be changed afterwards, meaning
29 |  * you can call multiple times av_jni_set_java_vm with the same Java VM pointer
30 |  * however it will error out if you try to set a different Java VM.
31 |  *
32 |  * @param vm Java virtual machine
33 |  * @param log_ctx context used for logging, can be NULL
34 |  * @return 0 on success, < 0 otherwise
35 |  */
36 | int av_jni_set_java_vm(void *vm, void *log_ctx);
37 | 
38 | /*
39 |  * Get the Java virtual machine which has been set with av_jni_set_java_vm.
40 |  *
41 |  * @param vm Java virtual machine
42 |  * @return a pointer to the Java virtual machine
43 |  */
44 | void *av_jni_get_java_vm(void *log_ctx);
45 | 
46 | #endif /* AVCODEC_JNI_H */
47 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavcodec/vorbis_parser.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | /**
20 |  * @file
21 |  * A public API for Vorbis parsing
22 |  *
23 |  * Determines the duration for each packet.
24 |  */
25 | 
26 | #ifndef AVCODEC_VORBIS_PARSER_H
27 | #define AVCODEC_VORBIS_PARSER_H
28 | 
29 | #include <stdint.h>
30 | 
31 | typedef struct AVVorbisParseContext AVVorbisParseContext;
32 | 
33 | /**
34 |  * Allocate and initialize the Vorbis parser using headers in the extradata.
35 |  */
36 | AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata,
37 |                                            int extradata_size);
38 | 
39 | /**
40 |  * Free the parser and everything associated with it.
41 |  */
42 | void av_vorbis_parse_free(AVVorbisParseContext **s);
43 | 
44 | #define VORBIS_FLAG_HEADER  0x00000001
45 | #define VORBIS_FLAG_COMMENT 0x00000002
46 | #define VORBIS_FLAG_SETUP   0x00000004
47 | 
48 | /**
49 |  * Get the duration for a Vorbis packet.
50 |  *
51 |  * If @p flags is @c NULL,
52 |  * special frames are considered invalid.
53 |  *
54 |  * @param s        Vorbis parser context
55 |  * @param buf      buffer containing a Vorbis frame
56 |  * @param buf_size size of the buffer
57 |  * @param flags    flags for special frames
58 |  */
59 | int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf,
60 |                                 int buf_size, int *flags);
61 | 
62 | /**
63 |  * Get the duration for a Vorbis packet.
64 |  *
65 |  * @param s        Vorbis parser context
66 |  * @param buf      buffer containing a Vorbis frame
67 |  * @param buf_size size of the buffer
68 |  */
69 | int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
70 |                           int buf_size);
71 | 
72 | void av_vorbis_parse_reset(AVVorbisParseContext *s);
73 | 
74 | #endif /* AVCODEC_VORBIS_PARSER_H */
75 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavdevice/version.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVDEVICE_VERSION_H
20 | #define AVDEVICE_VERSION_H
21 | 
22 | /**
23 |  * @file
24 |  * @ingroup lavd
25 |  * Libavdevice version macros
26 |  */
27 | 
28 | #include "libavutil/version.h"
29 | 
30 | #define LIBAVDEVICE_VERSION_MAJOR  58
31 | #define LIBAVDEVICE_VERSION_MINOR   8
32 | #define LIBAVDEVICE_VERSION_MICRO 100
33 | 
34 | #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \
35 |                                                LIBAVDEVICE_VERSION_MINOR, \
36 |                                                LIBAVDEVICE_VERSION_MICRO)
37 | #define LIBAVDEVICE_VERSION     AV_VERSION(LIBAVDEVICE_VERSION_MAJOR, \
38 |                                            LIBAVDEVICE_VERSION_MINOR, \
39 |                                            LIBAVDEVICE_VERSION_MICRO)
40 | #define LIBAVDEVICE_BUILD       LIBAVDEVICE_VERSION_INT
41 | 
42 | #define LIBAVDEVICE_IDENT       "Lavd" AV_STRINGIFY(LIBAVDEVICE_VERSION)
43 | 
44 | /**
45 |  * FF_API_* defines may be placed below to indicate public API that will be
46 |  * dropped at a future version bump. The defines themselves are not part of
47 |  * the public API and may change, break or disappear at any time.
48 |  */
49 | 
50 | #endif /* AVDEVICE_VERSION_H */
51 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/adler32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * copyright (c) 2006 Mans Rullgard
 3 |  *
 4 |  * This file is part of FFmpeg.
 5 |  *
 6 |  * FFmpeg is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * FFmpeg is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with FFmpeg; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | /**
22 |  * @file
23 |  * @ingroup lavu_adler32
24 |  * Public header for Adler-32 hash function implementation.
25 |  */
26 | 
27 | #ifndef AVUTIL_ADLER32_H
28 | #define AVUTIL_ADLER32_H
29 | 
30 | #include <stdint.h>
31 | #include "attributes.h"
32 | 
33 | /**
34 |  * @defgroup lavu_adler32 Adler-32
35 |  * @ingroup lavu_hash
36 |  * Adler-32 hash function implementation.
37 |  *
38 |  * @{
39 |  */
40 | 
41 | /**
42 |  * Calculate the Adler32 checksum of a buffer.
43 |  *
44 |  * Passing the return value to a subsequent av_adler32_update() call
45 |  * allows the checksum of multiple buffers to be calculated as though
46 |  * they were concatenated.
47 |  *
48 |  * @param adler initial checksum value
49 |  * @param buf   pointer to input buffer
50 |  * @param len   size of input buffer
51 |  * @return      updated checksum
52 |  */
53 | unsigned long av_adler32_update(unsigned long adler, const uint8_t *buf,
54 |                                 unsigned int len) av_pure;
55 | 
56 | /**
57 |  * @}
58 |  */
59 | 
60 | #endif /* AVUTIL_ADLER32_H */
61 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/aes.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * copyright (c) 2007 Michael Niedermayer <michaelni@gmx.at>
 3 |  *
 4 |  * This file is part of FFmpeg.
 5 |  *
 6 |  * FFmpeg is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * FFmpeg is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with FFmpeg; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | #ifndef AVUTIL_AES_H
22 | #define AVUTIL_AES_H
23 | 
24 | #include <stdint.h>
25 | 
26 | #include "attributes.h"
27 | #include "version.h"
28 | 
29 | /**
30 |  * @defgroup lavu_aes AES
31 |  * @ingroup lavu_crypto
32 |  * @{
33 |  */
34 | 
35 | extern const int av_aes_size;
36 | 
37 | struct AVAES;
38 | 
39 | /**
40 |  * Allocate an AVAES context.
41 |  */
42 | struct AVAES *av_aes_alloc(void);
43 | 
44 | /**
45 |  * Initialize an AVAES context.
46 |  * @param key_bits 128, 192 or 256
47 |  * @param decrypt 0 for encryption, 1 for decryption
48 |  */
49 | int av_aes_init(struct AVAES *a, const uint8_t *key, int key_bits, int decrypt);
50 | 
51 | /**
52 |  * Encrypt or decrypt a buffer using a previously initialized context.
53 |  * @param count number of 16 byte blocks
54 |  * @param dst destination array, can be equal to src
55 |  * @param src source array, can be equal to dst
56 |  * @param iv initialization vector for CBC mode, if NULL then ECB will be used
57 |  * @param decrypt 0 for encryption, 1 for decryption
58 |  */
59 | void av_aes_crypt(struct AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt);
60 | 
61 | /**
62 |  * @}
63 |  */
64 | 
65 | #endif /* AVUTIL_AES_H */
66 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/avconfig.h:
--------------------------------------------------------------------------------
1 | /* Generated by ffmpeg configure */
2 | #ifndef AVUTIL_AVCONFIG_H
3 | #define AVUTIL_AVCONFIG_H
4 | #define AV_HAVE_BIGENDIAN 0
5 | #define AV_HAVE_FAST_UNALIGNED 0
6 | #endif /* AVUTIL_AVCONFIG_H */
7 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/base64.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2006 Ryan Martell. (rdm4@martellventures.com)
 3 |  *
 4 |  * This file is part of FFmpeg.
 5 |  *
 6 |  * FFmpeg is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * FFmpeg is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with FFmpeg; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | #ifndef AVUTIL_BASE64_H
22 | #define AVUTIL_BASE64_H
23 | 
24 | #include <stdint.h>
25 | 
26 | /**
27 |  * @defgroup lavu_base64 Base64
28 |  * @ingroup lavu_crypto
29 |  * @{
30 |  */
31 | 
32 | /**
33 |  * Decode a base64-encoded string.
34 |  *
35 |  * @param out      buffer for decoded data
36 |  * @param in       null-terminated input string
37 |  * @param out_size size in bytes of the out buffer, must be at
38 |  *                 least 3/4 of the length of in, that is AV_BASE64_DECODE_SIZE(strlen(in))
39 |  * @return         number of bytes written, or a negative value in case of
40 |  *                 invalid input
41 |  */
42 | int av_base64_decode(uint8_t *out, const char *in, int out_size);
43 | 
44 | /**
45 |  * Calculate the output size in bytes needed to decode a base64 string
46 |  * with length x to a data buffer.
47 |  */
48 | #define AV_BASE64_DECODE_SIZE(x) ((x) * 3LL / 4)
49 | 
50 | /**
51 |  * Encode data to base64 and null-terminate.
52 |  *
53 |  * @param out      buffer for encoded data
54 |  * @param out_size size in bytes of the out buffer (including the
55 |  *                 null terminator), must be at least AV_BASE64_SIZE(in_size)
56 |  * @param in       input buffer containing the data to encode
57 |  * @param in_size  size in bytes of the in buffer
58 |  * @return         out or NULL in case of error
59 |  */
60 | char *av_base64_encode(char *out, int out_size, const uint8_t *in, int in_size);
61 | 
62 | /**
63 |  * Calculate the output size needed to base64-encode x bytes to a
64 |  * null-terminated string.
65 |  */
66 | #define AV_BASE64_SIZE(x)  (((x)+2) / 3 * 4 + 1)
67 | 
68 |  /**
69 |   * @}
70 |   */
71 | 
72 | #endif /* AVUTIL_BASE64_H */
73 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/camellia.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * An implementation of the CAMELLIA algorithm as mentioned in RFC3713
 3 |  * Copyright (c) 2014 Supraja Meedinti
 4 |  *
 5 |  * This file is part of FFmpeg.
 6 |  *
 7 |  * FFmpeg is free software; you can redistribute it and/or
 8 |  * modify it under the terms of the GNU Lesser General Public
 9 |  * License as published by the Free Software Foundation; either
10 |  * version 2.1 of the License, or (at your option) any later version.
11 |  *
12 |  * FFmpeg is distributed in the hope that it will be useful,
13 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 |  * Lesser General Public License for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public
18 |  * License along with FFmpeg; if not, write to the Free Software
19 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 |  */
21 | 
22 | #ifndef AVUTIL_CAMELLIA_H
23 | #define AVUTIL_CAMELLIA_H
24 | 
25 | #include <stdint.h>
26 | 
27 | 
28 | /**
29 |   * @file
30 |   * @brief Public header for libavutil CAMELLIA algorithm
31 |   * @defgroup lavu_camellia CAMELLIA
32 |   * @ingroup lavu_crypto
33 |   * @{
34 |   */
35 | 
36 | extern const int av_camellia_size;
37 | 
38 | struct AVCAMELLIA;
39 | 
40 | /**
41 |   * Allocate an AVCAMELLIA context
42 |   * To free the struct: av_free(ptr)
43 |   */
44 | struct AVCAMELLIA *av_camellia_alloc(void);
45 | 
46 | /**
47 |   * Initialize an AVCAMELLIA context.
48 |   *
49 |   * @param ctx an AVCAMELLIA context
50 |   * @param key a key of 16, 24, 32 bytes used for encryption/decryption
51 |   * @param key_bits number of keybits: possible are 128, 192, 256
52 |  */
53 | int av_camellia_init(struct AVCAMELLIA *ctx, const uint8_t *key, int key_bits);
54 | 
55 | /**
56 |   * Encrypt or decrypt a buffer using a previously initialized context
57 |   *
58 |   * @param ctx an AVCAMELLIA context
59 |   * @param dst destination array, can be equal to src
60 |   * @param src source array, can be equal to dst
61 |   * @param count number of 16 byte blocks
62 |   * @paran iv initialization vector for CBC mode, NULL for ECB mode
63 |   * @param decrypt 0 for encryption, 1 for decryption
64 |  */
65 | void av_camellia_crypt(struct AVCAMELLIA *ctx, uint8_t *dst, const uint8_t *src, int count, uint8_t* iv, int decrypt);
66 | 
67 | /**
68 |  * @}
69 |  */
70 | #endif /* AVUTIL_CAMELLIA_H */
71 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/ffversion.h:
--------------------------------------------------------------------------------
1 | /* Automatically generated by version.sh, do not manually edit! */
2 | #ifndef AVUTIL_FFVERSION_H
3 | #define AVUTIL_FFVERSION_H
4 | #define FFMPEG_VERSION "4.2.1"
5 | #endif /* AVUTIL_FFVERSION_H */
6 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_cuda.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | 
20 | #ifndef AVUTIL_HWCONTEXT_CUDA_H
21 | #define AVUTIL_HWCONTEXT_CUDA_H
22 | 
23 | #ifndef CUDA_VERSION
24 | #include <cuda.h>
25 | #endif
26 | 
27 | #include "pixfmt.h"
28 | 
29 | /**
30 |  * @file
31 |  * An API-specific header for AV_HWDEVICE_TYPE_CUDA.
32 |  *
33 |  * This API supports dynamic frame pools. AVHWFramesContext.pool must return
34 |  * AVBufferRefs whose data pointer is a CUdeviceptr.
35 |  */
36 | 
37 | typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal;
38 | 
39 | /**
40 |  * This struct is allocated as AVHWDeviceContext.hwctx
41 |  */
42 | typedef struct AVCUDADeviceContext {
43 |     CUcontext cuda_ctx;
44 |     CUstream stream;
45 |     AVCUDADeviceContextInternal *internal;
46 | } AVCUDADeviceContext;
47 | 
48 | /**
49 |  * AVHWFramesContext.hwctx is currently not used
50 |  */
51 | 
52 | #endif /* AVUTIL_HWCONTEXT_CUDA_H */
53 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_mediacodec.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVUTIL_HWCONTEXT_MEDIACODEC_H
20 | #define AVUTIL_HWCONTEXT_MEDIACODEC_H
21 | 
22 | /**
23 |  * MediaCodec details.
24 |  *
25 |  * Allocated as AVHWDeviceContext.hwctx
26 |  */
27 | typedef struct AVMediaCodecDeviceContext {
28 |     /**
29 |      * android/view/Surface handle, to be filled by the user.
30 |      *
31 |      * This is the default surface used by decoders on this device.
32 |      */
33 |     void *surface;
34 | } AVMediaCodecDeviceContext;
35 | 
36 | #endif /* AVUTIL_HWCONTEXT_MEDIACODEC_H */
37 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_qsv.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVUTIL_HWCONTEXT_QSV_H
20 | #define AVUTIL_HWCONTEXT_QSV_H
21 | 
22 | #include <mfx/mfxvideo.h>
23 | 
24 | /**
25 |  * @file
26 |  * An API-specific header for AV_HWDEVICE_TYPE_QSV.
27 |  *
28 |  * This API does not support dynamic frame pools. AVHWFramesContext.pool must
29 |  * contain AVBufferRefs whose data pointer points to an mfxFrameSurface1 struct.
30 |  */
31 | 
32 | /**
33 |  * This struct is allocated as AVHWDeviceContext.hwctx
34 |  */
35 | typedef struct AVQSVDeviceContext {
36 |     mfxSession session;
37 | } AVQSVDeviceContext;
38 | 
39 | /**
40 |  * This struct is allocated as AVHWFramesContext.hwctx
41 |  */
42 | typedef struct AVQSVFramesContext {
43 |     mfxFrameSurface1 *surfaces;
44 |     int            nb_surfaces;
45 | 
46 |     /**
47 |      * A combination of MFX_MEMTYPE_* describing the frame pool.
48 |      */
49 |     int frame_type;
50 | } AVQSVFramesContext;
51 | 
52 | #endif /* AVUTIL_HWCONTEXT_QSV_H */
53 | 
54 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_vdpau.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVUTIL_HWCONTEXT_VDPAU_H
20 | #define AVUTIL_HWCONTEXT_VDPAU_H
21 | 
22 | #include <vdpau/vdpau.h>
23 | 
24 | /**
25 |  * @file
26 |  * An API-specific header for AV_HWDEVICE_TYPE_VDPAU.
27 |  *
28 |  * This API supports dynamic frame pools. AVHWFramesContext.pool must return
29 |  * AVBufferRefs whose data pointer is a VdpVideoSurface.
30 |  */
31 | 
32 | /**
33 |  * This struct is allocated as AVHWDeviceContext.hwctx
34 |  */
35 | typedef struct AVVDPAUDeviceContext {
36 |     VdpDevice          device;
37 |     VdpGetProcAddress *get_proc_address;
38 | } AVVDPAUDeviceContext;
39 | 
40 | /**
41 |  * AVHWFramesContext.hwctx is currently not used
42 |  */
43 | 
44 | #endif /* AVUTIL_HWCONTEXT_VDPAU_H */
45 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_videotoolbox.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVUTIL_HWCONTEXT_VIDEOTOOLBOX_H
20 | #define AVUTIL_HWCONTEXT_VIDEOTOOLBOX_H
21 | 
22 | #include <stdint.h>
23 | 
24 | #include <VideoToolbox/VideoToolbox.h>
25 | 
26 | #include "pixfmt.h"
27 | 
28 | /**
29 |  * @file
30 |  * An API-specific header for AV_HWDEVICE_TYPE_VIDEOTOOLBOX.
31 |  *
32 |  * This API currently does not support frame allocation, as the raw VideoToolbox
33 |  * API does allocation, and FFmpeg itself never has the need to allocate frames.
34 |  *
35 |  * If the API user sets a custom pool, AVHWFramesContext.pool must return
36 |  * AVBufferRefs whose data pointer is a CVImageBufferRef or CVPixelBufferRef.
37 |  *
38 |  * Currently AVHWDeviceContext.hwctx and AVHWFramesContext.hwctx are always
39 |  * NULL.
40 |  */
41 | 
42 | /**
43 |  * Convert a VideoToolbox (actually CoreVideo) format to AVPixelFormat.
44 |  * Returns AV_PIX_FMT_NONE if no known equivalent was found.
45 |  */
46 | enum AVPixelFormat av_map_videotoolbox_format_to_pixfmt(uint32_t cv_fmt);
47 | 
48 | /**
49 |  * Convert an AVPixelFormat to a VideoToolbox (actually CoreVideo) format.
50 |  * Returns 0 if no known equivalent was found.
51 |  */
52 | uint32_t av_map_videotoolbox_format_from_pixfmt(enum AVPixelFormat pix_fmt);
53 | 
54 | #endif /* AVUTIL_HWCONTEXT_VIDEOTOOLBOX_H */
55 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/intfloat.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011 Mans Rullgard
 3 |  *
 4 |  * This file is part of FFmpeg.
 5 |  *
 6 |  * FFmpeg is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * FFmpeg is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with FFmpeg; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | #ifndef AVUTIL_INTFLOAT_H
22 | #define AVUTIL_INTFLOAT_H
23 | 
24 | #include <stdint.h>
25 | #include "attributes.h"
26 | 
27 | union av_intfloat32 {
28 |     uint32_t i;
29 |     float    f;
30 | };
31 | 
32 | union av_intfloat64 {
33 |     uint64_t i;
34 |     double   f;
35 | };
36 | 
37 | /**
38 |  * Reinterpret a 32-bit integer as a float.
39 |  */
40 | static av_always_inline float av_int2float(uint32_t i)
41 | {
42 |     union av_intfloat32 v;
43 |     v.i = i;
44 |     return v.f;
45 | }
46 | 
47 | /**
48 |  * Reinterpret a float as a 32-bit integer.
49 |  */
50 | static av_always_inline uint32_t av_float2int(float f)
51 | {
52 |     union av_intfloat32 v;
53 |     v.f = f;
54 |     return v.i;
55 | }
56 | 
57 | /**
58 |  * Reinterpret a 64-bit integer as a double.
59 |  */
60 | static av_always_inline double av_int2double(uint64_t i)
61 | {
62 |     union av_intfloat64 v;
63 |     v.i = i;
64 |     return v.f;
65 | }
66 | 
67 | /**
68 |  * Reinterpret a double as a 64-bit integer.
69 |  */
70 | static av_always_inline uint64_t av_double2int(double f)
71 | {
72 |     union av_intfloat64 v;
73 |     v.f = f;
74 |     return v.i;
75 | }
76 | 
77 | #endif /* AVUTIL_INTFLOAT_H */
78 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/lfg.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Lagged Fibonacci PRNG
 3 |  * Copyright (c) 2008 Michael Niedermayer
 4 |  *
 5 |  * This file is part of FFmpeg.
 6 |  *
 7 |  * FFmpeg is free software; you can redistribute it and/or
 8 |  * modify it under the terms of the GNU Lesser General Public
 9 |  * License as published by the Free Software Foundation; either
10 |  * version 2.1 of the License, or (at your option) any later version.
11 |  *
12 |  * FFmpeg is distributed in the hope that it will be useful,
13 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 |  * Lesser General Public License for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public
18 |  * License along with FFmpeg; if not, write to the Free Software
19 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 |  */
21 | 
22 | #ifndef AVUTIL_LFG_H
23 | #define AVUTIL_LFG_H
24 | 
25 | #include <stdint.h>
26 | 
27 | typedef struct AVLFG {
28 |     unsigned int state[64];
29 |     int index;
30 | } AVLFG;
31 | 
32 | void av_lfg_init(AVLFG *c, unsigned int seed);
33 | 
34 | /**
35 |  * Seed the state of the ALFG using binary data.
36 |  *
37 |  * Return value: 0 on success, negative value (AVERROR) on failure.
38 |  */
39 | int av_lfg_init_from_data(AVLFG *c, const uint8_t *data, unsigned int length);
40 | 
41 | /**
42 |  * Get the next random unsigned 32-bit number using an ALFG.
43 |  *
44 |  * Please also consider a simple LCG like state= state*1664525+1013904223,
45 |  * it may be good enough and faster for your specific use case.
46 |  */
47 | static inline unsigned int av_lfg_get(AVLFG *c){
48 |     c->state[c->index & 63] = c->state[(c->index-24) & 63] + c->state[(c->index-55) & 63];
49 |     return c->state[c->index++ & 63];
50 | }
51 | 
52 | /**
53 |  * Get the next random unsigned 32-bit number using a MLFG.
54 |  *
55 |  * Please also consider av_lfg_get() above, it is faster.
56 |  */
57 | static inline unsigned int av_mlfg_get(AVLFG *c){
58 |     unsigned int a= c->state[(c->index-55) & 63];
59 |     unsigned int b= c->state[(c->index-24) & 63];
60 |     return c->state[c->index++ & 63] = 2*a*b+a+b;
61 | }
62 | 
63 | /**
64 |  * Get the next two numbers generated by a Box-Muller Gaussian
65 |  * generator using the random numbers issued by lfg.
66 |  *
67 |  * @param out array where the two generated numbers are placed
68 |  */
69 | void av_bmg_get(AVLFG *lfg, double out[2]);
70 | 
71 | #endif /* AVUTIL_LFG_H */
72 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/lzo.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * LZO 1x decompression
 3 |  * copyright (c) 2006 Reimar Doeffinger
 4 |  *
 5 |  * This file is part of FFmpeg.
 6 |  *
 7 |  * FFmpeg is free software; you can redistribute it and/or
 8 |  * modify it under the terms of the GNU Lesser General Public
 9 |  * License as published by the Free Software Foundation; either
10 |  * version 2.1 of the License, or (at your option) any later version.
11 |  *
12 |  * FFmpeg is distributed in the hope that it will be useful,
13 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 |  * Lesser General Public License for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public
18 |  * License along with FFmpeg; if not, write to the Free Software
19 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 |  */
21 | 
22 | #ifndef AVUTIL_LZO_H
23 | #define AVUTIL_LZO_H
24 | 
25 | /**
26 |  * @defgroup lavu_lzo LZO
27 |  * @ingroup lavu_crypto
28 |  *
29 |  * @{
30 |  */
31 | 
32 | #include <stdint.h>
33 | 
34 | /** @name Error flags returned by av_lzo1x_decode
35 |  * @{ */
36 | /// end of the input buffer reached before decoding finished
37 | #define AV_LZO_INPUT_DEPLETED  1
38 | /// decoded data did not fit into output buffer
39 | #define AV_LZO_OUTPUT_FULL     2
40 | /// a reference to previously decoded data was wrong
41 | #define AV_LZO_INVALID_BACKPTR 4
42 | /// a non-specific error in the compressed bitstream
43 | #define AV_LZO_ERROR           8
44 | /** @} */
45 | 
46 | #define AV_LZO_INPUT_PADDING   8
47 | #define AV_LZO_OUTPUT_PADDING 12
48 | 
49 | /**
50 |  * @brief Decodes LZO 1x compressed data.
51 |  * @param out output buffer
52 |  * @param outlen size of output buffer, number of bytes left are returned here
53 |  * @param in input buffer
54 |  * @param inlen size of input buffer, number of bytes left are returned here
55 |  * @return 0 on success, otherwise a combination of the error flags above
56 |  *
57 |  * Make sure all buffers are appropriately padded, in must provide
58 |  * AV_LZO_INPUT_PADDING, out must provide AV_LZO_OUTPUT_PADDING additional bytes.
59 |  */
60 | int av_lzo1x_decode(void *out, int *outlen, const void *in, int *inlen);
61 | 
62 | /**
63 |  * @}
64 |  */
65 | 
66 | #endif /* AVUTIL_LZO_H */
67 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/macros.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | /**
20 |  * @file
21 |  * @ingroup lavu
22 |  * Utility Preprocessor macros
23 |  */
24 | 
25 | #ifndef AVUTIL_MACROS_H
26 | #define AVUTIL_MACROS_H
27 | 
28 | /**
29 |  * @addtogroup preproc_misc Preprocessor String Macros
30 |  *
31 |  * String manipulation macros
32 |  *
33 |  * @{
34 |  */
35 | 
36 | #define AV_STRINGIFY(s)         AV_TOSTRING(s)
37 | #define AV_TOSTRING(s) #s
38 | 
39 | #define AV_GLUE(a, b) a ## b
40 | #define AV_JOIN(a, b) AV_GLUE(a, b)
41 | 
42 | /**
43 |  * @}
44 |  */
45 | 
46 | #define AV_PRAGMA(s) _Pragma(#s)
47 | 
48 | #define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
49 | 
50 | #endif /* AVUTIL_MACROS_H */
51 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/motion_vector.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVUTIL_MOTION_VECTOR_H
20 | #define AVUTIL_MOTION_VECTOR_H
21 | 
22 | #include <stdint.h>
23 | 
24 | typedef struct AVMotionVector {
25 |     /**
26 |      * Where the current macroblock comes from; negative value when it comes
27 |      * from the past, positive value when it comes from the future.
28 |      * XXX: set exact relative ref frame reference instead of a +/- 1 "direction".
29 |      */
30 |     int32_t source;
31 |     /**
32 |      * Width and height of the block.
33 |      */
34 |     uint8_t w, h;
35 |     /**
36 |      * Absolute source position. Can be outside the frame area.
37 |      */
38 |     int16_t src_x, src_y;
39 |     /**
40 |      * Absolute destination position. Can be outside the frame area.
41 |      */
42 |     int16_t dst_x, dst_y;
43 |     /**
44 |      * Extra flag information.
45 |      * Currently unused.
46 |      */
47 |     uint64_t flags;
48 |     /**
49 |      * Motion vector
50 |      * src_x = dst_x + motion_x / motion_scale
51 |      * src_y = dst_y + motion_y / motion_scale
52 |      */
53 |     int32_t motion_x, motion_y;
54 |     uint16_t motion_scale;
55 | } AVMotionVector;
56 | 
57 | #endif /* AVUTIL_MOTION_VECTOR_H */
58 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/pixelutils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVUTIL_PIXELUTILS_H
20 | #define AVUTIL_PIXELUTILS_H
21 | 
22 | #include <stddef.h>
23 | #include <stdint.h>
24 | #include "common.h"
25 | 
26 | /**
27 |  * Sum of abs(src1[x] - src2[x])
28 |  */
29 | typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1,
30 |                                     const uint8_t *src2, ptrdiff_t stride2);
31 | 
32 | /**
33 |  * Get a potentially optimized pointer to a Sum-of-absolute-differences
34 |  * function (see the av_pixelutils_sad_fn prototype).
35 |  *
36 |  * @param w_bits  1<<w_bits is the requested width of the block size
37 |  * @param h_bits  1<<h_bits is the requested height of the block size
38 |  * @param aligned If set to 2, the returned sad function will assume src1 and
39 |  *                src2 addresses are aligned on the block size.
40 |  *                If set to 1, the returned sad function will assume src1 is
41 |  *                aligned on the block size.
42 |  *                If set to 0, the returned sad function assume no particular
43 |  *                alignment.
44 |  * @param log_ctx context used for logging, can be NULL
45 |  *
46 |  * @return a pointer to the SAD function or NULL in case of error (because of
47 |  *         invalid parameters)
48 |  */
49 | av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits,
50 |                                               int aligned, void *log_ctx);
51 | 
52 | #endif /* AVUTIL_PIXELUTILS_H */
53 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/random_seed.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2009 Baptiste Coudurier <baptiste.coudurier@gmail.com>
 3 |  *
 4 |  * This file is part of FFmpeg.
 5 |  *
 6 |  * FFmpeg is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * FFmpeg is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with FFmpeg; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | #ifndef AVUTIL_RANDOM_SEED_H
22 | #define AVUTIL_RANDOM_SEED_H
23 | 
24 | #include <stdint.h>
25 | /**
26 |  * @addtogroup lavu_crypto
27 |  * @{
28 |  */
29 | 
30 | /**
31 |  * Get a seed to use in conjunction with random functions.
32 |  * This function tries to provide a good seed at a best effort bases.
33 |  * Its possible to call this function multiple times if more bits are needed.
34 |  * It can be quite slow, which is why it should only be used as seed for a faster
35 |  * PRNG. The quality of the seed depends on the platform.
36 |  */
37 | uint32_t av_get_random_seed(void);
38 | 
39 | /**
40 |  * @}
41 |  */
42 | 
43 | #endif /* AVUTIL_RANDOM_SEED_H */
44 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/rc4.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * RC4 encryption/decryption/pseudo-random number generator
 3 |  *
 4 |  * This file is part of FFmpeg.
 5 |  *
 6 |  * FFmpeg is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * FFmpeg is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with FFmpeg; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | #ifndef AVUTIL_RC4_H
22 | #define AVUTIL_RC4_H
23 | 
24 | #include <stdint.h>
25 | 
26 | /**
27 |  * @defgroup lavu_rc4 RC4
28 |  * @ingroup lavu_crypto
29 |  * @{
30 |  */
31 | 
32 | typedef struct AVRC4 {
33 |     uint8_t state[256];
34 |     int x, y;
35 | } AVRC4;
36 | 
37 | /**
38 |  * Allocate an AVRC4 context.
39 |  */
40 | AVRC4 *av_rc4_alloc(void);
41 | 
42 | /**
43 |  * @brief Initializes an AVRC4 context.
44 |  *
45 |  * @param key_bits must be a multiple of 8
46 |  * @param decrypt 0 for encryption, 1 for decryption, currently has no effect
47 |  * @return zero on success, negative value otherwise
48 |  */
49 | int av_rc4_init(struct AVRC4 *d, const uint8_t *key, int key_bits, int decrypt);
50 | 
51 | /**
52 |  * @brief Encrypts / decrypts using the RC4 algorithm.
53 |  *
54 |  * @param count number of bytes
55 |  * @param dst destination array, can be equal to src
56 |  * @param src source array, can be equal to dst, may be NULL
57 |  * @param iv not (yet) used for RC4, should be NULL
58 |  * @param decrypt 0 for encryption, 1 for decryption, not (yet) used
59 |  */
60 | void av_rc4_crypt(struct AVRC4 *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt);
61 | 
62 | /**
63 |  * @}
64 |  */
65 | 
66 | #endif /* AVUTIL_RC4_H */
67 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/replaygain.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef AVUTIL_REPLAYGAIN_H
20 | #define AVUTIL_REPLAYGAIN_H
21 | 
22 | #include <stdint.h>
23 | 
24 | /**
25 |  * ReplayGain information (see
26 |  * http://wiki.hydrogenaudio.org/index.php?title=ReplayGain_1.0_specification).
27 |  * The size of this struct is a part of the public ABI.
28 |  */
29 | typedef struct AVReplayGain {
30 |     /**
31 |      * Track replay gain in microbels (divide by 100000 to get the value in dB).
32 |      * Should be set to INT32_MIN when unknown.
33 |      */
34 |     int32_t track_gain;
35 |     /**
36 |      * Peak track amplitude, with 100000 representing full scale (but values
37 |      * may overflow). 0 when unknown.
38 |      */
39 |     uint32_t track_peak;
40 |     /**
41 |      * Same as track_gain, but for the whole album.
42 |      */
43 |     int32_t album_gain;
44 |     /**
45 |      * Same as track_peak, but for the whole album,
46 |      */
47 |     uint32_t album_peak;
48 | } AVReplayGain;
49 | 
50 | #endif /* AVUTIL_REPLAYGAIN_H */
51 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/tea.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * A 32-bit implementation of the TEA algorithm
 3 |  * Copyright (c) 2015 Vesselin Bontchev
 4 |  *
 5 |  * This file is part of FFmpeg.
 6 |  *
 7 |  * FFmpeg is free software; you can redistribute it and/or
 8 |  * modify it under the terms of the GNU Lesser General Public
 9 |  * License as published by the Free Software Foundation; either
10 |  * version 2.1 of the License, or (at your option) any later version.
11 |  *
12 |  * FFmpeg is distributed in the hope that it will be useful,
13 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 |  * Lesser General Public License for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public
18 |  * License along with FFmpeg; if not, write to the Free Software
19 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 |  */
21 | 
22 | #ifndef AVUTIL_TEA_H
23 | #define AVUTIL_TEA_H
24 | 
25 | #include <stdint.h>
26 | 
27 | /**
28 |  * @file
29 |  * @brief Public header for libavutil TEA algorithm
30 |  * @defgroup lavu_tea TEA
31 |  * @ingroup lavu_crypto
32 |  * @{
33 |  */
34 | 
35 | extern const int av_tea_size;
36 | 
37 | struct AVTEA;
38 | 
39 | /**
40 |   * Allocate an AVTEA context
41 |   * To free the struct: av_free(ptr)
42 |   */
43 | struct AVTEA *av_tea_alloc(void);
44 | 
45 | /**
46 |  * Initialize an AVTEA context.
47 |  *
48 |  * @param ctx an AVTEA context
49 |  * @param key a key of 16 bytes used for encryption/decryption
50 |  * @param rounds the number of rounds in TEA (64 is the "standard")
51 |  */
52 | void av_tea_init(struct AVTEA *ctx, const uint8_t key[16], int rounds);
53 | 
54 | /**
55 |  * Encrypt or decrypt a buffer using a previously initialized context.
56 |  *
57 |  * @param ctx an AVTEA context
58 |  * @param dst destination array, can be equal to src
59 |  * @param src source array, can be equal to dst
60 |  * @param count number of 8 byte blocks
61 |  * @param iv initialization vector for CBC mode, if NULL then ECB will be used
62 |  * @param decrypt 0 for encryption, 1 for decryption
63 |  */
64 | void av_tea_crypt(struct AVTEA *ctx, uint8_t *dst, const uint8_t *src,
65 |                   int count, uint8_t *iv, int decrypt);
66 | 
67 | /**
68 |  * @}
69 |  */
70 | 
71 | #endif /* AVUTIL_TEA_H */
72 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/time.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2000-2003 Fabrice Bellard
 3 |  *
 4 |  * This file is part of FFmpeg.
 5 |  *
 6 |  * FFmpeg is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * FFmpeg is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with FFmpeg; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | #ifndef AVUTIL_TIME_H
22 | #define AVUTIL_TIME_H
23 | 
24 | #include <stdint.h>
25 | 
26 | /**
27 |  * Get the current time in microseconds.
28 |  */
29 | int64_t av_gettime(void);
30 | 
31 | /**
32 |  * Get the current time in microseconds since some unspecified starting point.
33 |  * On platforms that support it, the time comes from a monotonic clock
34 |  * This property makes this time source ideal for measuring relative time.
35 |  * The returned values may not be monotonic on platforms where a monotonic
36 |  * clock is not available.
37 |  */
38 | int64_t av_gettime_relative(void);
39 | 
40 | /**
41 |  * Indicates with a boolean result if the av_gettime_relative() time source
42 |  * is monotonic.
43 |  */
44 | int av_gettime_relative_is_monotonic(void);
45 | 
46 | /**
47 |  * Sleep for a period of time.  Although the duration is expressed in
48 |  * microseconds, the actual delay may be rounded to the precision of the
49 |  * system timer.
50 |  *
51 |  * @param  usec Number of microseconds to sleep.
52 |  * @return zero on success or (negative) error code.
53 |  */
54 | int av_usleep(unsigned usec);
55 | 
56 | #endif /* AVUTIL_TIME_H */
57 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libavutil/twofish.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * An implementation of the TwoFish algorithm
 3 |  * Copyright (c) 2015 Supraja Meedinti
 4 |  *
 5 |  * This file is part of FFmpeg.
 6 |  *
 7 |  * FFmpeg is free software; you can redistribute it and/or
 8 |  * modify it under the terms of the GNU Lesser General Public
 9 |  * License as published by the Free Software Foundation; either
10 |  * version 2.1 of the License, or (at your option) any later version.
11 |  *
12 |  * FFmpeg is distributed in the hope that it will be useful,
13 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 |  * Lesser General Public License for more details.
16 |  *
17 |  * You should have received a copy of the GNU Lesser General Public
18 |  * License along with FFmpeg; if not, write to the Free Software
19 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 |  */
21 | 
22 | #ifndef AVUTIL_TWOFISH_H
23 | #define AVUTIL_TWOFISH_H
24 | 
25 | #include <stdint.h>
26 | 
27 | 
28 | /**
29 |   * @file
30 |   * @brief Public header for libavutil TWOFISH algorithm
31 |   * @defgroup lavu_twofish TWOFISH
32 |   * @ingroup lavu_crypto
33 |   * @{
34 |   */
35 | 
36 | extern const int av_twofish_size;
37 | 
38 | struct AVTWOFISH;
39 | 
40 | /**
41 |   * Allocate an AVTWOFISH context
42 |   * To free the struct: av_free(ptr)
43 |   */
44 | struct AVTWOFISH *av_twofish_alloc(void);
45 | 
46 | /**
47 |   * Initialize an AVTWOFISH context.
48 |   *
49 |   * @param ctx an AVTWOFISH context
50 |   * @param key a key of size ranging from 1 to 32 bytes used for encryption/decryption
51 |   * @param key_bits number of keybits: 128, 192, 256 If less than the required, padded with zeroes to nearest valid value; return value is 0 if key_bits is 128/192/256, -1 if less than 0, 1 otherwise
52 |  */
53 | int av_twofish_init(struct AVTWOFISH *ctx, const uint8_t *key, int key_bits);
54 | 
55 | /**
56 |   * Encrypt or decrypt a buffer using a previously initialized context
57 |   *
58 |   * @param ctx an AVTWOFISH context
59 |   * @param dst destination array, can be equal to src
60 |   * @param src source array, can be equal to dst
61 |   * @param count number of 16 byte blocks
62 |   * @paran iv initialization vector for CBC mode, NULL for ECB mode
63 |   * @param decrypt 0 for encryption, 1 for decryption
64 |  */
65 | void av_twofish_crypt(struct AVTWOFISH *ctx, uint8_t *dst, const uint8_t *src, int count, uint8_t* iv, int decrypt);
66 | 
67 | /**
68 |  * @}
69 |  */
70 | #endif /* AVUTIL_TWOFISH_H */
71 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libswresample/version.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Version macros.
 3 |  *
 4 |  * This file is part of libswresample
 5 |  *
 6 |  * libswresample is free software; you can redistribute it and/or
 7 |  * modify it under the terms of the GNU Lesser General Public
 8 |  * License as published by the Free Software Foundation; either
 9 |  * version 2.1 of the License, or (at your option) any later version.
10 |  *
11 |  * libswresample is distributed in the hope that it will be useful,
12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 |  * Lesser General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Lesser General Public
17 |  * License along with libswresample; if not, write to the Free Software
18 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 |  */
20 | 
21 | #ifndef SWRESAMPLE_VERSION_H
22 | #define SWRESAMPLE_VERSION_H
23 | 
24 | /**
25 |  * @file
26 |  * Libswresample version macros
27 |  */
28 | 
29 | #include "libavutil/avutil.h"
30 | 
31 | #define LIBSWRESAMPLE_VERSION_MAJOR   3
32 | #define LIBSWRESAMPLE_VERSION_MINOR   5
33 | #define LIBSWRESAMPLE_VERSION_MICRO 100
34 | 
35 | #define LIBSWRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBSWRESAMPLE_VERSION_MAJOR, \
36 |                                                   LIBSWRESAMPLE_VERSION_MINOR, \
37 |                                                   LIBSWRESAMPLE_VERSION_MICRO)
38 | #define LIBSWRESAMPLE_VERSION      AV_VERSION(LIBSWRESAMPLE_VERSION_MAJOR, \
39 |                                               LIBSWRESAMPLE_VERSION_MINOR, \
40 |                                               LIBSWRESAMPLE_VERSION_MICRO)
41 | #define LIBSWRESAMPLE_BUILD        LIBSWRESAMPLE_VERSION_INT
42 | 
43 | #define LIBSWRESAMPLE_IDENT        "SwR" AV_STRINGIFY(LIBSWRESAMPLE_VERSION)
44 | 
45 | #endif /* SWRESAMPLE_VERSION_H */
46 | 


--------------------------------------------------------------------------------
/Lib/ffmpeg-4.2.1/include/libswscale/version.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of FFmpeg.
 3 |  *
 4 |  * FFmpeg is free software; you can redistribute it and/or
 5 |  * modify it under the terms of the GNU Lesser General Public
 6 |  * License as published by the Free Software Foundation; either
 7 |  * version 2.1 of the License, or (at your option) any later version.
 8 |  *
 9 |  * FFmpeg is distributed in the hope that it will be useful,
10 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  * Lesser General Public License for more details.
13 |  *
14 |  * You should have received a copy of the GNU Lesser General Public
15 |  * License along with FFmpeg; if not, write to the Free Software
16 |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 |  */
18 | 
19 | #ifndef SWSCALE_VERSION_H
20 | #define SWSCALE_VERSION_H
21 | 
22 | /**
23 |  * @file
24 |  * swscale version macros
25 |  */
26 | 
27 | #include "libavutil/version.h"
28 | 
29 | #define LIBSWSCALE_VERSION_MAJOR   5
30 | #define LIBSWSCALE_VERSION_MINOR   5
31 | #define LIBSWSCALE_VERSION_MICRO 100
32 | 
33 | #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
34 |                                                LIBSWSCALE_VERSION_MINOR, \
35 |                                                LIBSWSCALE_VERSION_MICRO)
36 | #define LIBSWSCALE_VERSION      AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \
37 |                                            LIBSWSCALE_VERSION_MINOR, \
38 |                                            LIBSWSCALE_VERSION_MICRO)
39 | #define LIBSWSCALE_BUILD        LIBSWSCALE_VERSION_INT
40 | 
41 | #define LIBSWSCALE_IDENT        "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION)
42 | 
43 | /**
44 |  * FF_API_* defines may be placed below to indicate public API that will be
45 |  * dropped at a future version bump. The defines themselves are not part of
46 |  * the public API and may change, break or disappear at any time.
47 |  */
48 | 
49 | #ifndef FF_API_SWS_VECTOR
50 | #define FF_API_SWS_VECTOR            (LIBSWSCALE_VERSION_MAJOR < 6)
51 | #endif
52 | 
53 | #endif /* SWSCALE_VERSION_H */
54 | 


--------------------------------------------------------------------------------
/TTSProjectTemplate.ttsproj:
--------------------------------------------------------------------------------
 1 | [
 2 | 	{
 3 | 		"Tokens": "私は誰?",
 4 | 		"Seq": ["w", "a", "t", "a", "s", "h", "i", "w", "a", "d", "a", "r", "e", "?"],
 5 | 		"Tones": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 6 | 		"Durations": [2, 5, 2, 5, 2, 2, 5, 2, 5, 2, 5, 2, 5],
 7 | 		"Language": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 8 | 		"SpeakerMix": [1.000000, 0.000000, 0.000000],
 9 | 		"EmotionPrompt": ["sad", "happy"],
10 | 		"NoiseScale": 0.666000,
11 | 		"LengthScale": 1.100000,
12 | 		"DurationPredictorNoiseScale": 0.333000,
13 | 		"FactorDpSdp": 0.600000,
14 | 		"GateThreshold": 0.777000,
15 | 		"MaxDecodeStep": 114514,
16 | 		"Seed": 1919810,
17 | 		"SpeakerId": 2,
18 | 		"RestTime": 1.000000,
19 | 		"PlaceHolderSymbol": "|",
20 | 		"LanguageID": "JP",
21 | 		"G2PAdditionalInfo": "/[Japanese2]"
22 | 	},
23 | 	{
24 | 		"Tokens": "私は鳴瀬しろは",
25 | 		"Seq": [],
26 | 		"Tones": [],
27 | 		"Durations": [],
28 | 		"Language": [],
29 | 		"SpeakerMix": [],
30 | 		"EmotionPrompt": [],
31 | 		"NoiseScale": 0.666000,
32 | 		"LengthScale": 1.000000,
33 | 		"DurationPredictorNoiseScale": 0.333000,
34 | 		"FactorDpSdp": 0.600000,
35 | 		"GateThreshold": 0.777000,
36 | 		"MaxDecodeStep": 1145147,
37 | 		"Seed": 1919810,
38 | 		"SpeakerId": 0,
39 | 		"RestTime": 1.000000,
40 | 		"PlaceHolderSymbol": "|",
41 | 		"LanguageID": "JP",
42 | 		"G2PAdditionalInfo": "/[Japanese2]"
43 | 	},
44 | 	{
45 | 		"Tokens": "私は誰?",
46 | 		"Seq": [],
47 | 		"Tones": [],
48 | 		"Durations": [],
49 | 		"Language": [],
50 | 		"SpeakerMix": [0.666000, 0.233000, 0.444000],
51 | 		"EmotionPrompt": ["sad", "happy"],
52 | 		"NoiseScale": 0.666000,
53 | 		"LengthScale": 1.100000,
54 | 		"DurationPredictorNoiseScale": 0.333000,
55 | 		"FactorDpSdp": 0.600000,
56 | 		"GateThreshold": 0.777000,
57 | 		"MaxDecodeStep": 1145145,
58 | 		"Seed": 19198101,
59 | 		"SpeakerId": 1,
60 | 		"RestTime": -1.000000,
61 | 		"PlaceHolderSymbol": "|",
62 | 		"LanguageID": "JP",
63 | 		"G2PAdditionalInfo": "/[Japanese2]"
64 | 	},
65 | 	{
66 | 		"Tokens": "私は鷗です、くみの名前は?",
67 | 		"Seq": [],
68 | 		"Tones": [],
69 | 		"Durations": [],
70 | 		"Language": [],
71 | 		"SpeakerMix": [],
72 | 		"EmotionPrompt": [],
73 | 		"NoiseScale": 0.666000,
74 | 		"LengthScale": 1.100000,
75 | 		"DurationPredictorNoiseScale": 0.222000,
76 | 		"FactorDpSdp": 0.600000,
77 | 		"GateThreshold": 0.777000,
78 | 		"MaxDecodeStep": 114514,
79 | 		"Seed": 1919810,
80 | 		"SpeakerId": 3,
81 | 		"RestTime": 1.000000,
82 | 		"PlaceHolderSymbol": "|",
83 | 		"LanguageID": "JP",
84 | 		"G2PAdditionalInfo": "/[Japanese2]"
85 | 	}
86 | ]


--------------------------------------------------------------------------------
/fish-speech.cpp/Demo/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 2 | 
 3 | set(EXE_DEMO Demo)
 4 | add_compile_options(/ZI)
 5 | add_executable(${EXE_DEMO} main.cpp)
 6 | target_link_libraries(${EXE_DEMO} PRIVATE fish-speech-cpp)
 7 | 
 8 | if (CMAKE_VERSION VERSION_GREATER 3.12)
 9 |   set_property(TARGET ${EXE_DEMO} PROPERTY CXX_STANDARD 20)
10 | endif()


--------------------------------------------------------------------------------
/fish-speech.cpp/Demo/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <windows.h>
 3 | #include "llama.h"
 4 | 
 5 | using namespace libtts;
 6 | 
 7 | int main()
 8 | {
 9 | 	auto a = BaseTransformer(nullptr, L"", BaseModelArgs());
10 | 	std::cout << UnicodeToByte(a.DumpLayerNameInfo());
11 | 	system("pause");
12 | 	return 0;
13 | }


--------------------------------------------------------------------------------
/fish-speech.cpp/src/Base.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/fish-speech.cpp/src/Base.cpp


--------------------------------------------------------------------------------
/fish-speech.cpp/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import time
3 | for i in range(20):
4 |     a = torch.ones(size=(1, 768, 100000))
5 |     beg = time.time()
6 |     a.fill_(i)
7 |     print(time.time() - beg)


--------------------------------------------------------------------------------
/libdlvoicecodec/LibDLVoiceCodec/base.cpp:
--------------------------------------------------------------------------------
1 | #include "base.h"


--------------------------------------------------------------------------------
/libdlvoicecodec/LibDLVoiceCodec/base.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cstdint>
 3 | #include <string>
 4 | #include <vector>
 5 | #include <iostream>
 6 | #include <unordered_map>
 7 | #define LibDLVoiceCodecBegin namespace libdlvcodec {
 8 | #define LibDLVoiceCodecEnd }
 9 | #define LIBDVCND [[nodiscard]]
10 | 
11 | #define LibDLVoiceCodecThrow(message) throw std::exception((std::string("[At \"") + __FILE__ + "\" Line " + std::to_string(__LINE__) + "]\n" + (message)).c_str())
12 | 
13 | LibDLVoiceCodecBegin
14 | 
15 | using int8 = int8_t;
16 | using int16 = int16_t;
17 | using int32 = int32_t;
18 | using int64 = int64_t;
19 | using float32 = float;
20 | using float64 = double;
21 | using byte = unsigned char;
22 | using lpvoid = void*;
23 | using uint8 = uint8_t;
24 | using uint16 = uint16_t;
25 | using uint32 = uint32_t;
26 | using uint64 = uint64_t;
27 | 
28 | class TensorView;
29 | class Tensor;
30 | 
31 | const std::unordered_map<std::string, size_t> __Dtype {{"int8", 1}, { "int16", 2 }, { "int32", 4 }, { "int64", 8 },
32 | 	{ "float8", 1 }, { "float16", 2 }, { "bfloat16", 2 }, { "float32", 4 }, { "float64", 8 }, { "bool", 1 } };
33 | 
34 | template <class _Ty, class _Alloc = std::allocator<_Ty>>
35 | using MResource = std::vector<_Ty, _Alloc>;
36 | 
37 | template<typename T>
38 | std::ostream& operator<<(std::ostream& _Stream, const std::vector<T>& _Data)
39 | {
40 | 	_Stream << '[';
41 | 	for (const auto& i : _Data)
42 | 		_Stream << i << ", ";
43 | 	_Stream << "]\n";
44 | 	return _Stream;
45 | }
46 | 
47 | class FileWrapper
48 | {
49 | public:
50 | 	FileWrapper() = default;
51 | 	~FileWrapper()
52 | 	{
53 | 		if (file_)
54 | 			fclose(file_);
55 | 		file_ = nullptr;
56 | 	}
57 | 	FileWrapper(const FileWrapper& _Left) = delete;
58 | 	FileWrapper& operator=(const FileWrapper& _Left) = delete;
59 | 	FileWrapper(FileWrapper&& _Right) noexcept
60 | 	{
61 | 		file_ = _Right.file_;
62 | 		_Right.file_ = nullptr;
63 | 	}
64 | 	FileWrapper& operator=(FileWrapper&& _Right) noexcept
65 | 	{
66 | 		file_ = _Right.file_;
67 | 		_Right.file_ = nullptr;
68 | 		return *this;
69 | 	}
70 | 	void open(const std::wstring& _Path, const std::wstring& _Mode)
71 | 	{
72 | #ifdef _WIN32
73 | 		_wfopen_s(&file_, _Path.c_str(), _Mode.c_str());
74 | #else
75 | 		file_ = _wfopen(_Path.c_str(), _Mode.c_str());
76 | #endif
77 | 	}
78 | 	operator FILE* () const
79 | 	{
80 | 		return file_;
81 | 	}
82 | 	LIBDVCND bool enabled() const
83 | 	{
84 | 		return file_;
85 | 	}
86 | private:
87 | 	FILE* file_ = nullptr;
88 | };
89 | 
90 | LibDLVoiceCodecEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/LibDLVoiceCodec/operator.cpp:
--------------------------------------------------------------------------------
1 | #include "operator.h"
2 | #include "value.h"
3 | #include <cblas.h>
4 | 
5 | LibDLVoiceCodecBegin
6 | 
7 | LibDLVoiceCodecEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/LibDLVoiceCodec/operator.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "base.h"
 3 | 
 4 | LibDLVoiceCodecBegin
 5 | Tensor equal(const Tensor& _A, const Tensor& _B);
 6 | Tensor add(const Tensor& _A, const Tensor& _B);
 7 | Tensor sub(const Tensor& _A, const Tensor& _B);
 8 | Tensor mul(const Tensor& _A, const Tensor& _B);
 9 | Tensor div(const Tensor& _A, const Tensor& _B);
10 | void selfAdd(Tensor& _Self, const Tensor& _O);
11 | void selfSub(Tensor& _Self, const Tensor& _O);
12 | void selfMul(Tensor& _Self, const Tensor& _O);
13 | void selfDiv(Tensor& _Self, const Tensor& _O);
14 | Tensor matmul(const Tensor& _A, const Tensor& _B);
15 | Tensor conv1d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
16 | 	int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1);
17 | Tensor conv2d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
18 | 	int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1);
19 | Tensor conv3d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
20 | 	int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1);
21 | Tensor conv_transpose1d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
22 | 	int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1);
23 | Tensor conv_transpose2d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
24 | 	int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1);
25 | Tensor conv_transpose3d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias,
26 | 	int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1);
27 | LibDLVoiceCodecEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/DataStruct/README.md:
--------------------------------------------------------------------------------
1 | ## KdTree From J. Frederico Carvalho
2 | 


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Cluster/MoeVSBaseCluster.cpp:
--------------------------------------------------------------------------------
1 | #include "MoeVSBaseCluster.hpp"
2 | #include "../inferTools.hpp"
3 | 
4 | std::vector<float> MoeVoiceStudioCluster::MoeVoiceStudioBaseCluster::find(float* point, long sid, int64_t n_points)
5 | {
6 | 	LibDLVoiceCodecThrow("NotImplementedError");
7 | }


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Cluster/MoeVSBaseCluster.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: MoeVSBaseCluster.hpp
 3 |  * Note: MoeVoiceStudioCore 聚类基类
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include <vector>
24 | 
25 | #define MoeVoiceStudioClusterHeader namespace MoeVoiceStudioCluster {
26 | #define MoeVoiceStudioClusterEnd }
27 | 
28 | MoeVoiceStudioClusterHeader
29 | 
30 | class MoeVoiceStudioBaseCluster
31 | {
32 | public:
33 | 	MoeVoiceStudioBaseCluster() = default;
34 | 	virtual ~MoeVoiceStudioBaseCluster() = default;
35 | 
36 | 	/**
37 | 	 * \brief 查找聚类最邻近点
38 | 	 * \param point 待查找的点
39 | 	 * \param sid 角色ID
40 | 	 * \param n_points 点数
41 | 	 * \return 查找到的最邻近点
42 | 	 */
43 | 	virtual std::vector<float> find(float* point, long sid, int64_t n_points = 1);
44 | };
45 | 
46 | MoeVoiceStudioClusterEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Cluster/MoeVSClusterManager.cpp:
--------------------------------------------------------------------------------
 1 | #include "MoeVSClusterManager.hpp"
 2 | #include <map>
 3 | #include <stdexcept>
 4 | #include "../../Logger/MoeSSLogger.hpp"
 5 | 
 6 | MoeVoiceStudioClusterHeader
 7 | 
 8 | std::map<std::wstring, GetMoeVSClusterFn> RegisteredMoeVSCluster;
 9 | 
10 | MoeVSCluster GetMoeVSCluster(const std::wstring& _name, const std::wstring& _path, size_t hidden_size, size_t KmeansLen)
11 | {
12 | 	const auto f_ClusterFn = RegisteredMoeVSCluster.find(_name);
13 | 	if (f_ClusterFn != RegisteredMoeVSCluster.end())
14 | 		return f_ClusterFn->second(_path, hidden_size, KmeansLen);
15 | 	throw std::runtime_error("Unable To Find An Available MoeVSCluster");
16 | }
17 | 
18 | void RegisterMoeVSCluster(const std::wstring& _name, const GetMoeVSClusterFn& _constructor_fn)
19 | {
20 | 	if (RegisteredMoeVSCluster.find(_name) != RegisteredMoeVSCluster.end())
21 | 	{
22 | 		logger.log(L"[Warn] MoeVSClusterNameConflict");
23 | 		return;
24 | 	}
25 | 	RegisteredMoeVSCluster[_name] = _constructor_fn;
26 | }
27 | 
28 | MoeVoiceStudioClusterEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Cluster/MoeVSKmeansCluster.cpp:
--------------------------------------------------------------------------------
 1 | #include "MoeVSKmeansCluster.hpp"
 2 | #include "../inferTools.hpp"
 3 | 
 4 | std::vector<float> MoeVoiceStudioCluster::KMeansCluster::find(float* point, long sid, int64_t n_points)
 5 | {
 6 | 	if (size_t(sid) < _tree.size())
 7 | 	{
 8 | 		std::vector<float> res;
 9 | 		res.reserve(dims * n_points * 2);
10 | 		for (int64_t pt = 0; pt < n_points; ++pt)
11 | 		{
12 | 			auto tmp = _tree[sid].nearest_point({ point + pt * dims,point + (pt + 1) * dims });
13 | 			res.insert(res.end(), tmp.begin(), tmp.end());
14 | 		}
15 | 		return res;
16 | 	}
17 | 	return { point, point + dims * n_points };
18 | }
19 | 
20 | MoeVoiceStudioCluster::KMeansCluster::KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen)
21 | {
22 | 	dims = hidden_size;
23 | 	FILE* file = nullptr;
24 | 	_wfopen_s(&file, (_path + L"/KMeans.npy").c_str(), L"rb");
25 | 	if (!file)
26 | 		LibDLVoiceCodecThrow("KMeansFileNotExist");
27 | 	constexpr long idx = 128;
28 | 	fseek(file, idx, SEEK_SET);
29 | 	std::vector<float> tmpData(hidden_size);
30 | 	const size_t ec = size_t(hidden_size) * sizeof(float);
31 | 	std::vector<std::vector<float>> _tmp;
32 | 	_tmp.reserve(KmeansLen);
33 | 	while (fread(tmpData.data(), 1, ec, file) == ec)
34 | 	{
35 | 		_tmp.emplace_back(tmpData);
36 | 		if (_tmp.size() == KmeansLen)
37 | 		{
38 | 			_tree.emplace_back(_tmp);
39 | 			_tmp.clear();
40 | 		}
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Cluster/MoeVSKmeansCluster.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: MoeVSKmeansCluster.hpp
 3 |  * Note: MoeVoiceStudioCore 官方聚类(Kmeans)
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include "MoeVSBaseCluster.hpp"
24 | #include "../../DataStruct/KDTree.hpp"
25 | #include <string>
26 | 
27 | MoeVoiceStudioClusterHeader
28 | 
29 | class KMeansCluster : public MoeVoiceStudioBaseCluster
30 | {
31 | public:
32 | 	KMeansCluster() = delete;
33 | 	~KMeansCluster() override = default;
34 | 	KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen);
35 | 	std::vector<float> find(float* point, long sid, int64_t n_points = 1) override;
36 | private:
37 | 	std::vector<KDTree> _tree;
38 | 	size_t dims = 0;
39 | };
40 | 
41 | MoeVoiceStudioClusterEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/F0Extractor/BaseF0Extractor/BaseF0Extractor.cpp:
--------------------------------------------------------------------------------
 1 | #include "BaseF0Extractor.hpp"
 2 | #include <map>
 3 | #include "../../../Logger/MoeSSLogger.hpp"
 4 | #include "../../inferTools.hpp"
 5 | 
 6 | MoeVSF0Extractor::BaseF0Extractor::BaseF0Extractor(int sampling_rate, int hop_size, int n_f0_bins, double max_f0, double min_f0) :
 7 | 	fs(sampling_rate),
 8 | 	hop(hop_size),
 9 | 	f0_bin(n_f0_bins),
10 | 	f0_max(max_f0),
11 | 	f0_min(min_f0)
12 | {
13 | 	f0_mel_min = (1127.0 * log(1.0 + f0_min / 700.0));
14 | 	f0_mel_max = (1127.0 * log(1.0 + f0_max / 700.0));
15 | }
16 | 
17 | std::vector<double> MoeVSF0Extractor::BaseF0Extractor::arange(double start, double end, double step, double div)
18 | {
19 | 	std::vector<double> output;
20 | 	while (start < end)
21 | 	{
22 | 		output.push_back(start / div);
23 | 		start += step;
24 | 	}
25 | 	return output;
26 | }
27 | 
28 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<double>& PCMData, size_t TargetLength)
29 | {
30 | 	LibDLVoiceCodecThrow("NotImplementedError");
31 | }
32 | 
33 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<float>& PCMData, size_t TargetLength)
34 | {
35 | 	std::vector<double> PCMVector(PCMData.size());
36 | 	for (size_t i = 0; i < PCMData.size(); ++i)
37 | 		PCMVector[i] = double(PCMData[i]);
38 | 	return ExtractF0(PCMVector, TargetLength);
39 | }
40 | 
41 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<int16_t>& PCMData, size_t TargetLength)
42 | {
43 | 	std::vector<double> PCMVector(PCMData.size());
44 | 	for (size_t i = 0; i < PCMData.size(); ++i)
45 | 		PCMVector[i] = double(PCMData[i]);
46 | 	return ExtractF0(PCMVector, TargetLength);
47 | }


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: DioF0Extractor.hpp
 3 |  * Note: MoeVoiceStudioCore 官方F0提取算法 Dio
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include "../BaseF0Extractor/BaseF0Extractor.hpp"
24 | 
25 | MoeVoiceStudioF0ExtractorHeader
26 | class DioF0Extractor : public BaseF0Extractor
27 | {
28 | public:
29 | 	DioF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0);
30 | 
31 | 	~DioF0Extractor() override = default;
32 | 
33 | 	void compute_f0(const double* PCMData, size_t PCMLen);
34 | 
35 | 	void InterPf0(size_t TargetLength);
36 | 
37 | 	std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override;
38 | private:
39 | 	std::vector<double> refined_f0;
40 | };
41 | MoeVoiceStudioF0ExtractorEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/F0Extractor/F0ExtractorManager.cpp:
--------------------------------------------------------------------------------
 1 | #include "F0ExtractorManager.hpp"
 2 | #include <map>
 3 | #include <stdexcept>
 4 | #include "../../Logger/MoeSSLogger.hpp"
 5 | 
 6 | MoeVoiceStudioF0ExtractorHeader
 7 | std::map<std::wstring, GetF0ExtractorFn> RegisteredF0Extractors;
 8 | 
 9 | F0Extractor GetF0Extractor(const std::wstring& _name,
10 | 	const uint32_t fs,
11 | 	const uint32_t hop,
12 | 	const uint32_t f0_bin,
13 | 	const double f0_max,
14 | 	const double f0_min)
15 | {
16 | 	const auto f_F0Extractor = RegisteredF0Extractors.find(_name);
17 | 	if (f_F0Extractor != RegisteredF0Extractors.end())
18 | 		return f_F0Extractor->second(fs, hop, f0_bin, f0_max, f0_min);
19 | 	throw std::runtime_error("Unable To Find An Available F0Extractor");
20 | }
21 | 
22 | void RegisterF0Extractor(const std::wstring& _name, const GetF0ExtractorFn& _constructor_fn)
23 | {
24 | 	if (RegisteredF0Extractors.find(_name) != RegisteredF0Extractors.end())
25 | 	{
26 | 		logger.log(L"[Warn] F0ExtractorNameConflict");
27 | 		return;
28 | 	}
29 | 	RegisteredF0Extractors[_name] = _constructor_fn;
30 | }
31 | 
32 | std::vector<std::wstring> GetF0ExtractorList()
33 | {
34 | 	std::vector<std::wstring> F0ExtractorsVec;
35 | 	F0ExtractorsVec.reserve(RegisteredF0Extractors.size());
36 | 	for (const auto& i : RegisteredF0Extractors)
37 | 		F0ExtractorsVec.emplace_back(i.first);
38 | 	return F0ExtractorsVec;
39 | }
40 | 
41 | MoeVoiceStudioF0ExtractorEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: HarvestF0Extractor.hpp
 3 |  * Note: MoeVoiceStudioCore 官方F0提取算法 Harvest
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include "../BaseF0Extractor/BaseF0Extractor.hpp"
24 | 
25 | MoeVoiceStudioF0ExtractorHeader
26 | class HarvestF0Extractor : public BaseF0Extractor
27 | {
28 | public:
29 | 	HarvestF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0);
30 | 
31 | 	~HarvestF0Extractor() override = default;
32 | 
33 | 	void compute_f0(const double* PCMData, size_t PCMLen);
34 | 
35 | 	void InterPf0(size_t TargetLength);
36 | 
37 | 	std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override;
38 | 
39 | private:
40 | 	std::vector<double> refined_f0;
41 | };
42 | MoeVoiceStudioF0ExtractorEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Sampler/MoeVSBaseSampler.cpp:
--------------------------------------------------------------------------------
 1 | #include "MoeVSBaseSampler.hpp"
 2 | #include "../inferTools.hpp"
 3 | MoeVoiceStudioSamplerHeader
 4 | 
 5 | MoeVSBaseSampler::MoeVSBaseSampler(Ort::Session* alpha, Ort::Session* dfn, Ort::Session* pred, int64_t Mel_Bins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory) :
 6 | 	MelBins(Mel_Bins), Alpha(alpha), DenoiseFn(dfn), NoisePredictor(pred)
 7 | {
 8 | 	_callback = _ProgressCallback;
 9 | 	Memory = memory;
10 | };
11 | 
12 | std::vector<Ort::Value> MoeVSBaseSampler::Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, int64_t SpeedUp, float NoiseScale, int64_t Seed, size_t& Process)
13 | {
14 | 	LibDLVoiceCodecThrow("NotImplementedError");
15 | }
16 | 
17 | MoeVoiceStudioSamplerEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Sampler/MoeVSBaseSampler.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: MoeVSBaseSampler.hpp
 3 |  * Note: MoeVoiceStudioCore Diffusion 采样器基类
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #define MoeVoiceStudioSamplerHeader namespace MoeVSSampler {
24 | #define MoeVoiceStudioSamplerEnd }
25 | #include <functional>
26 | #include <onnxruntime_cxx_api.h>
27 | MoeVoiceStudioSamplerHeader
28 | 
29 | class MoeVSBaseSampler
30 | {
31 | public:
32 | 	using ProgressCallback = std::function<void(size_t, size_t)>;
33 | 
34 | 	/**
35 | 	 * \brief 构造采样器
36 | 	 * \param alpha Alphas Onnx模型Session
37 | 	 * \param dfn DenoiseFn Onnx模型Session
38 | 	 * \param pred Predictor Onnx模型Session
39 | 	 * \param Mel_Bins MelBins
40 | 	 * \param _ProgressCallback 进度条回调(直接传模型的回调就可以了) 
41 | 	 * \param memory 模型的OrtMemoryInfo
42 | 	 */
43 | 	MoeVSBaseSampler(Ort::Session* alpha, Ort::Session* dfn, Ort::Session* pred, int64_t Mel_Bins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory);
44 | 
45 | 	virtual ~MoeVSBaseSampler() = default;
46 | 
47 | 	/**
48 | 	 * \brief 采样
49 | 	 * \param Tensors 输入张量(Tensors[0]为Condition,Tensors[1]为初始噪声)
50 | 	 * \param Steps 采样步数
51 | 	 * \param SpeedUp 加速倍数
52 | 	 * \param NoiseScale 噪声规模
53 | 	 * \param Seed 种子
54 | 	 * \param Process 当前进度
55 | 	 * \return Mel张量
56 | 	 */
57 | 	virtual std::vector<Ort::Value> Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, int64_t SpeedUp, float NoiseScale, int64_t Seed, size_t& Process);
58 | protected:
59 | 	int64_t MelBins = 128;
60 | 	Ort::Session* Alpha = nullptr;
61 | 	Ort::Session* DenoiseFn = nullptr;
62 | 	Ort::Session* NoisePredictor = nullptr;
63 | 	ProgressCallback _callback;
64 | 	Ort::MemoryInfo* Memory = nullptr;
65 | };
66 | 
67 | MoeVoiceStudioSamplerEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Sampler/MoeVSSamplerManager.cpp:
--------------------------------------------------------------------------------
 1 | #include "MoeVSSamplerManager.hpp"
 2 | #include <map>
 3 | #include "../../Logger/MoeSSLogger.hpp"
 4 | MoeVoiceStudioSamplerHeader
 5 | std::map<std::wstring, GetMoeVSSamplerFn> RegisteredMoeVSSamplers;
 6 | 
 7 | MoeVSSampler GetMoeVSSampler(const std::wstring& _name,
 8 | 	Ort::Session* alpha,
 9 | 	Ort::Session* dfn,
10 | 	Ort::Session* pred,
11 | 	int64_t Mel_Bins,
12 | 	const MoeVSBaseSampler::ProgressCallback& _ProgressCallback,
13 | 	Ort::MemoryInfo* memory)
14 | {
15 | 	const auto f_Sampler = RegisteredMoeVSSamplers.find(_name);
16 | 	if (f_Sampler != RegisteredMoeVSSamplers.end())
17 | 		return f_Sampler->second(alpha, dfn, pred, Mel_Bins, _ProgressCallback, memory);
18 | 	throw std::runtime_error("Unable To Find An Available Sampler");
19 | }
20 | 
21 | void RegisterMoeVSSampler(const std::wstring& _name, const GetMoeVSSamplerFn& _constructor_fn)
22 | {
23 | 	if (RegisteredMoeVSSamplers.find(_name) != RegisteredMoeVSSamplers.end())
24 | 	{
25 | 		logger.log(L"[Warn] F0ExtractorNameConflict");
26 | 		return;
27 | 	}
28 | 	RegisteredMoeVSSamplers[_name] = _constructor_fn;
29 | }
30 | 
31 | std::vector<std::wstring> GetMoeVSSamplerList()
32 | {
33 | 	std::vector<std::wstring> SamplersVec;
34 | 	SamplersVec.reserve(RegisteredMoeVSSamplers.size());
35 | 	for (const auto& i : RegisteredMoeVSSamplers)
36 | 		SamplersVec.emplace_back(i.first);
37 | 	return SamplersVec;
38 | }
39 | MoeVoiceStudioSamplerEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/Stft/stft.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | #include "fftw3.h"
 4 | 
 5 | namespace DlCodecStft
 6 | {
 7 |     class STFT
 8 |     {
 9 |     public:
10 |         STFT() = default;
11 |         ~STFT();
12 |         STFT(int WindowSize, int HopSize, int FFTSize = 0);
13 |         inline static double PI = 3.14159265358979323846;
14 |         std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const;
15 |     private:
16 |     	int WINDOW_SIZE = 2048;
17 |     	int HOP_SIZE = WINDOW_SIZE / 4;
18 |     	int FFT_SIZE = WINDOW_SIZE / 2 + 1;
19 |     };
20 | 
21 |     class Mel
22 |     {
23 |     public:
24 |         Mel() = delete;
25 |         ~Mel() = default;
26 |         Mel(int WindowSize, int HopSize, int SamplingRate, int MelSize = 0);
27 |         std::pair<std::vector<float>, int64_t> GetMel(const std::vector<double>& audioData) const;
28 |         std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const;
29 |     private:
30 |         STFT stft;
31 |         int MEL_SIZE = 128;
32 |         int FFT_SIZE = 0;
33 |         int sr = 22050;
34 |         std::vector<float> MelBasis;
35 |     };
36 | }
37 | 


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/InferTools/TensorExtractor/TensorExtractorManager.cpp:
--------------------------------------------------------------------------------
 1 | #include "TensorExtractorManager.hpp"
 2 | #include <map>
 3 | #include "../../Logger/MoeSSLogger.hpp"
 4 | 
 5 | MoeVoiceStudioTensorExtractorHeader
 6 | 	inline std::map<std::wstring, GetTensorExtractorFn> RegisteredTensorExtractors;
 7 | 
 8 | void RegisterTensorExtractor(const std::wstring& _name, const GetTensorExtractorFn& _constructor_fn)
 9 | {
10 | 	if (RegisteredTensorExtractors.find(_name) != RegisteredTensorExtractors.end())
11 | 	{
12 | 		logger.log(L"[Warn] TensorExtractorNameConflict");
13 | 		return;
14 | 	}
15 | 	RegisteredTensorExtractors[_name] = _constructor_fn;
16 | }
17 | 
18 | TensorExtractor GetTensorExtractor(const std::wstring& _name, uint64_t _srcsr, uint64_t _sr, uint64_t _hop, bool _smix, bool _volume, uint64_t _hidden_size, uint64_t _nspeaker, const MoeVoiceStudioTensorExtractor::Others& _other)
19 | {
20 | 	const auto f_TensorExtractor = RegisteredTensorExtractors.find(_name);
21 | 	if (f_TensorExtractor != RegisteredTensorExtractors.end())
22 | 		return f_TensorExtractor->second(_srcsr, _sr, _hop, _smix, _volume, _hidden_size, _nspeaker, _other);
23 | 	throw std::runtime_error("Unable To Find An Available TensorExtractor");
24 | }
25 | 
26 | MoeVoiceStudioTensorExtractorEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/Logger/MoeSSLogger.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include "../StringPreprocess.hpp"
 4 | #include <filesystem>
 5 | #include <mutex>
 6 | #define __MOEVS_DEBUG_MESSAGE(msg) __MOEVS_DEBUG_INFO(__FILE__, __LINE__, msg)
 7 | #define logger MoeSSLogger::GetLogger()
 8 | inline std::string __MOEVS_DEBUG_INFO(const char* filename, int line, const char* msg)
 9 | {
10 | 	return std::string("[In \"") + std::filesystem::path(filename).filename().string() + "\" Line " + std::to_string(line) + "] " + msg;
11 | }
12 | 
13 | inline std::wstring __MOEVS_DEBUG_INFO(const char* filename, int line, const wchar_t* msg)
14 | {
15 | 	return std::wstring(L"[In \"") + std::filesystem::path(filename).filename().wstring() + L"\" Line " + std::to_wstring(line) + L"] " + msg;
16 | }
17 | 
18 | namespace MoeSSLogger
19 | {
20 | 	class Logger
21 | 	{
22 | 	public:
23 | 		Logger();
24 | 		~Logger();
25 | 		void log(const std::wstring&);
26 | 		void log(const char*);
27 | 		void error(const std::wstring&);
28 | 		void error(const char*);
29 | 	private:
30 | 		std::filesystem::path cur_log_dir, logpath, errorpath;
31 | 		FILE* log_file = nullptr,* error_file = nullptr;
32 | 		std::mutex mx;
33 | 	};
34 | 
35 | 	Logger& GetLogger();
36 | }


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/Models/EnvManager.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: EnvManager.hpp
 3 |  * Note: MoeVoiceStudioCore 环境管理
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include <onnxruntime_cxx_api.h>
24 | 
25 | #define MoeVoiceStudioCoreEnvManagerHeader namespace moevsenv{
26 | #define MoeVoiceStudioCoreEnvManagerEnd }
27 | 
28 | MoeVoiceStudioCoreEnvManagerHeader
29 | class MoeVoiceStudioEnv
30 | {
31 | public:
32 | 	MoeVoiceStudioEnv() = default;
33 | 	~MoeVoiceStudioEnv() { Destory(); }
34 | 	void Load(unsigned ThreadCount, unsigned DeviceID, unsigned Provider);
35 | 	void Destory();
36 | 	[[nodiscard]] bool IsEnabled() const;
37 | 	[[nodiscard]] Ort::Env* GetEnv() const { return GlobalOrtEnv; }
38 | 	[[nodiscard]] Ort::SessionOptions* GetSessionOptions() const { return GlobalOrtSessionOptions; }
39 | 	[[nodiscard]] Ort::MemoryInfo* GetMemoryInfo() const { return GlobalOrtMemoryInfo; }
40 | 	[[nodiscard]] int GetCurThreadCount() const { return (int)CurThreadCount; }
41 | 	[[nodiscard]] int GetCurDeviceID() const { return (int)CurDeviceID; }
42 | 	[[nodiscard]] int GetCurProvider() const { return (int)CurProvider; }
43 | private:
44 | 	void Create(unsigned ThreadCount_, unsigned DeviceID_, unsigned ExecutionProvider_);
45 | 	Ort::Env* GlobalOrtEnv = nullptr;
46 | 	Ort::SessionOptions* GlobalOrtSessionOptions = nullptr;
47 | 	Ort::MemoryInfo* GlobalOrtMemoryInfo = nullptr;
48 | 	unsigned CurThreadCount = unsigned(-1);
49 | 	unsigned CurDeviceID = unsigned(-1);
50 | 	unsigned CurProvider = unsigned(-1);
51 | 	OrtCUDAProviderOptionsV2* cuda_option_v2 = nullptr;
52 | };
53 | 
54 | MoeVoiceStudioEnv& GetGlobalMoeVSEnv();
55 | 
56 | MoeVoiceStudioCoreEnvManagerEnd


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/Models/header/Tacotron.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "ModelBase.hpp"
 3 | 
 4 | INFERCLASSHEADER
 5 | 
 6 | class Tacotron2 : public TTS
 7 | {
 8 | public:
 9 |     Tacotron2(const MJson&, const callback&, const callback_params&, const DurationCallback&, Device _dev = Device::CPU);
10 | 
11 | 	~Tacotron2() override;
12 | 
13 |     std::vector<int16_t> Inference(std::wstring& _inputLens) const override;
14 | 
15 |     [[nodiscard]] std::vector<int16_t> Inference(const MoeVSProject::TTSParams& _input) const override;
16 | 
17 |     static void cat(std::vector<float>& tensorA, std::vector<int64>& Shape, const MTensor& tensorB) {
18 |         const int64 n = Shape[1];
19 |         for (int64 i = n; i > 0; --i)
20 |             tensorA.insert(tensorA.begin() + (i * Shape[2]), tensorB.GetTensorData<float>()[i - 1]);
21 |         ++Shape[2];
22 |     }
23 | private:
24 |     Ort::Session* sessionEncoder = nullptr;
25 |     Ort::Session* sessionDecoderIter = nullptr;
26 |     Ort::Session* sessionPostNet = nullptr;
27 |     Ort::Session* sessionGan = nullptr;
28 | 
29 |     const std::vector<const char*> ganIn = { "x" };
30 |     const std::vector<const char*> ganOut = { "audio" };
31 |     const std::vector<const char*> inputNodeNamesSessionEncoder = { "sequences","sequence_lengths" };
32 |     const std::vector<const char*> outputNodeNamesSessionEncoder = { "memory","processed_memory","lens" };
33 |     const std::vector<const char*> inputNodeNamesSessionDecoderIter = { "decoder_input","attention_hidden","attention_cell","decoder_hidden","decoder_cell","attention_weights","attention_weights_cum","attention_context","memory","processed_memory","mask" };
34 |     const std::vector<const char*> outputNodeNamesSessionDecoderIter = { "decoder_output","gate_prediction","out_attention_hidden","out_attention_cell","out_decoder_hidden","out_decoder_cell","out_attention_weights","out_attention_weights_cum","out_attention_context" };
35 |     const std::vector<const char*> inputNodeNamesSessionPostNet = { "mel_outputs" };
36 |     const std::vector<const char*> outputNodeNamesSessionPostNet = { "mel_outputs_postnet" };
37 | };
38 | 
39 | INFERCLASSEND


--------------------------------------------------------------------------------
/libdlvoicecodec/Modules/README.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | ```c++
 3 | #include "Modules/Models/header/Vits.hpp"
 4 | 
 5 | int main(){
 6 |   rapidjson::Document Config;
 7 |   Config.Parse("Your Config");
 8 |   
 9 |   //Progress bar
10 |   InferClass::BaseModelType::callback a_callback = [](size_t a, size_t b) {std::cout << std::to_string((float)a * 100.f / (float)b) << "%\n"; };
11 |   
12 |   //return params for inference
13 |   InferClass::BaseModelType::callback_params b_callback = []()  
14 | 	{
15 | 		auto cbaaa = InferClass::InferConfigs();
16 | 		cbaaa.kmeans_rate = 0.5;
17 | 		cbaaa.keys = 0;
18 | 		return cbaaa;
19 | 	};
20 |   
21 |   //modify duration per phoneme
22 |   InferClass::TTS::DurationCallback c_callback = [](std::vector<float>&) {};
23 |   
24 |   std::vector<int16_t> output;
25 |   try
26 |   {
27 |   	std::wstring inp("watashinoonaniomitekudasai");
28 |   	auto model = dynamic_cast<InferClass::BaseModelType*>(new InferClass::VitsSvc(modConfigJson, a_callback, b_callback));
29 |     
30 |   	output = model->Inference(inp);
31 |     
32 |   	Wav outWav(model->GetSamplingRate(), output.size() * 2, output.data());
33 |   	outWav.Writef(L"test.wav");
34 |     
35 |   	delete model;
36 |   }
37 |   catch(std::exception& e)
38 |   {
39 |   	std::cout << e.what();
40 |   }
41 | }
42 | 
43 | ```
44 | 


--------------------------------------------------------------------------------
/libdlvoicecodec/MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="MoeVoiceStudioSvc - Core - Cmd.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 |   <ItemGroup>
23 |     <None Include="..\README.md">
24 |       <Filter>资源文件</Filter>
25 |     </None>
26 |     <None Include="..\README_en.md">
27 |       <Filter>资源文件</Filter>
28 |     </None>
29 |     <None Include="..\.gitignore" />
30 |     <None Include="packages.config" />
31 |     <None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\onnxruntime.dll" />
32 |     <None Include="analyse\GptSoVits.md">
33 |       <Filter>资源文件</Filter>
34 |     </None>
35 |   </ItemGroup>
36 | </Project>


--------------------------------------------------------------------------------
/libdlvoicecodec/analyse/GptSoVits.md:
--------------------------------------------------------------------------------
 1 | # GptSoVits主要分为三个部分
 2 | 
 3 | ### VQ(内核为KMeans聚类)
 4 | - Train:对训练集音频的ssl[^1]进行KMeans聚类,获取到的聚类中心构造一个Embedding(CodeBook.embed)
 5 | - Infer:使用Indices获取聚类中心(CodeBook.embed)中的元素,构造一个ssl[^1]矩阵
 6 | 
 7 | 与SoVits的KMeans/Index聚类类似,只不过SoVits的聚类在使用时是使用输入的HuBert在CodeBook中查找与其距离排名前K的点后加权平均,而GptSoVits则是使用一个AR循环预测所需的HuBert在CodeBook中的下标,之后使用该下标获取CodeBook中对应元素
 8 | 
 9 | ---
10 | 
11 | ### AR(GPT)
12 | - Inputs:
13 | 	- text_seq:输入文本音素序列的数字ID(在Symbols数组中的下标)
14 | 	- text_bert:输入文本的Bert
15 | 	- ref_seq:参考文本音素序列的数字ID(在Symbols数组中的下标)
16 | 	- ref_bert:参考文本的Bert
17 | 	- ref_ssl:参考音频的ssl[^1]
18 | - OutPuts:
19 | 	- codes:输入到VQ的Indices,用于获取ssl[^1]的聚类中心
20 | 
21 | 与Gpt类似,使用一个AR循环,通过输入文本编码后的信息预测一个响应序列(序列终止为EOS),该响应序列为训练集音频聚类后的聚类中心在CodeBook中的下标,之后会从CodeBook中获取相应的元素,相当于SoVits中的Hubert。
22 | 
23 | ---
24 | 
25 | ### SoVits
26 | - Inputs:
27 | 	- codes:输入到VQ的Indices,用于获取ssl[^1]的聚类中心
28 | 	- text_seq:输入文本音素序列的数字ID(在Symbols数组中的下标)
29 | 	- ref_audio:参考音频(训练集内音频)
30 | 
31 | 与SoVits比较,其中的codes实际上相当于SoVits的Hubert,只不过这个Hubert是使用AR预测所得序列生成的。
32 | GptSoVits使用输入音素的Embedding,AR预测所得的Hubert以及参考音频的Mel共通指导音频生成,可以有效的控制音频的语气,感情。
33 | 然而在一些时候,会出现漏字和错字的情况,可能和AR有较大的关系
34 | 
35 | ---
36 | 
37 | ### 实验方案
38 | 将GptSovits中的AR部分去除,将VQ的输入从Indices(code)替换为ssl(即使用最临近点搜索)。即可获得一个svc模型。
39 | 
40 | 两个音频,一个训练集参考音频,一个输入音频。需完成以下步骤。
41 | 
42 | 1、训练集参考音频直接编码为mel记作ref_audio。
43 | 
44 | 2、输入音频经过一个asr处理为音素序列记作text_seq。
45 | 
46 | 3、输入音频经过hubert后使用最临近点搜索,从vq的embedding中取元素,记作ssl。
47 | 
48 | 4、将ssl,text_seq和ref_audio作为vits的输入进行推理。
49 | 
50 | 
51 | ---
52 | 
53 | [^1]: ssl其实就是音频的Hubert,与SoVits的Hubert一致
54 | 


--------------------------------------------------------------------------------
/libdlvoicecodec/input.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libdlvoicecodec/input.wav


--------------------------------------------------------------------------------
/libdlvoicecodec/output.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libdlvoicecodec/output.wav


--------------------------------------------------------------------------------
/libdlvoicecodec/packages.config:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <packages>
3 |   <package id="Microsoft.AI.DirectML" version="1.13.1" targetFramework="native" />
4 |   <package id="Microsoft.ML.OnnxRuntime.DirectML" version="1.17.1" targetFramework="native" />
5 |   <package id="VC-LTL" version="5.0.9" targetFramework="native" />
6 |   <package id="YY.NuGet.Import.Helper" version="1.0.0.4" targetFramework="native" />
7 | </packages>


--------------------------------------------------------------------------------
/libsvc/Api/header/NativeApi.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libsvc/Api/header/NativeApi.h


--------------------------------------------------------------------------------
/libsvc/Api/header/libsvc.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libsvc/Api/header/libsvc.h


--------------------------------------------------------------------------------
/libsvc/Api/src/NativeApi.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libsvc/Api/src/NativeApi.cpp


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/MJson/MJson.cpp:
--------------------------------------------------------------------------------
 1 | #include "MJson.h"
 2 | 
 3 | class FileGuard
 4 | {
 5 | public:
 6 | 	FileGuard() = delete;
 7 | 	~FileGuard()
 8 | 	{
 9 | 		if (_fp) fclose(_fp);
10 | 		_fp = nullptr;
11 | 	}
12 | 	FileGuard(const char* _path)
13 | 	{
14 | 		if (_fp) fclose(_fp);
15 | 		_wfopen_s(&_fp, to_wide_string(_path).c_str(), L"rb");
16 | 	}
17 | 	operator FILE* () const
18 | 	{
19 | 		return _fp;
20 | 	}
21 | private:
22 | 	FILE* _fp = nullptr;
23 | 	static std::wstring to_wide_string(const std::string& input)
24 | 	{
25 | 		std::vector<wchar_t> WideString(input.length() * 2);
26 | 		MultiByteToWideChar(
27 | 			CP_UTF8,
28 | 			0,
29 | 			input.c_str(),
30 | 			int(input.length()),
31 | 			WideString.data(),
32 | 			int(WideString.size())
33 | 		);
34 | 		return WideString.data();
35 | 	}
36 | };
37 | 
38 | MJson::MJson(const char* _path)
39 | {
40 | 	const auto file = FileGuard(_path);
41 | 	_document = yyjson_read_file(_path, YYJSON_READ_NOFLAG, nullptr, nullptr);
42 | 	if (!_document)
43 | 		throw std::exception("Json Parse Error !");
44 | 	root = yyjson_doc_get_root(_document);
45 | }
46 | 
47 | MJson::MJson(const std::string& _data, bool _read_from_string)
48 | {
49 | 	if (_read_from_string)
50 | 		_document = yyjson_read(_data.c_str(), _data.length(), YYJSON_READ_NOFLAG);
51 | 	else
52 | 	{
53 | 		const auto file = FileGuard(_data.c_str());
54 | 		_document = yyjson_read_fp(file, YYJSON_READ_NOFLAG, nullptr, nullptr);
55 | 	}
56 | 	if (!_document)
57 | 		throw std::exception("Json Parse Error !");
58 | 	root = yyjson_doc_get_root(_document);
59 | }
60 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/src/world/constantnumbers.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //
 6 | // This header file only defines constant numbers used for several function.
 7 | //-----------------------------------------------------------------------------
 8 | #ifndef WORLD_CONSTANT_NUMBERS_H_
 9 | #define WORLD_CONSTANT_NUMBERS_H_
10 | 
11 | namespace world {
12 |   // for Dio()
13 |   const double kCutOff = 50.0;
14 | 
15 |   // for StoneMask()
16 |   const double kFloorF0StoneMask = 40.0;
17 | 
18 |   const double kPi = 3.1415926535897932384;
19 |   const double kMySafeGuardMinimum = 0.000000000001;
20 |   const double kEps = 0.00000000000000022204460492503131;
21 |   const double kFloorF0 = 71.0;
22 |   const double kCeilF0 = 800.0;
23 |   const double kDefaultF0 = 500.0;
24 |   const double kLog2 = 0.69314718055994529;
25 |   // Maximum standard deviation not to be selected as a best f0.
26 |   const double kMaximumValue = 100000.0;
27 | 
28 |   // Note to me (fs: 48000)
29 |   // 71 Hz is the limit to maintain the FFT size at 2048.
30 |   // If we use 70 Hz as FLOOR_F0, the FFT size of 4096 is required.
31 | 
32 |   // for D4C()
33 |   const int kHanning = 1;
34 |   const int kBlackman = 2;
35 |   const double kFrequencyInterval = 3000.0;
36 |   const double kUpperLimit = 15000.0;
37 |   const double kThreshold = 0.85;
38 |   const double kFloorF0D4C = 47.0;
39 | 
40 |   // for Codec (Mel scale)
41 |   // S. Stevens & J. Volkmann,
42 |   // The Relation of Pitch to Frequency: A Revised Scale,
43 |   // American Journal of Psychology, vol. 53, no. 3, pp. 329-353, 1940.
44 |   const double kM0 = 1127.01048;
45 |   const double kF0 = 700.0;
46 |   const double kFloorFrequency = 40.0;
47 |   const double kCeilFrequency = 20000.0;
48 | 
49 | }  // namespace world
50 | 
51 | #endif  // WORLD_CONSTANT_NUMBERS_H_
52 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/src/world/d4c.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_D4C_H_
 7 | #define WORLD_D4C_H_
 8 | 
 9 | #include "world/macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for D4C
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double threshold;
18 | } D4COption;
19 | 
20 | //-----------------------------------------------------------------------------
21 | // D4C() calculates the aperiodicity estimated by D4C.
22 | //
23 | // Input:
24 | //   x                  : Input signal
25 | //   x_length           : Length of x
26 | //   fs                 : Sampling frequency
27 | //   temporal_positions : Time axis
28 | //   f0                 : F0 contour
29 | //   f0_length          : Length of F0 contour
30 | //   fft_size           : Number of samples of the aperiodicity in one frame.
31 | //                      : It is given by the equation fft_size / 2 + 1.
32 | // Output:
33 | //   aperiodicity  : Aperiodicity estimated by D4C.
34 | //-----------------------------------------------------------------------------
35 | void D4C(const double *x, int x_length, int fs,
36 |     const double *temporal_positions, const double *f0, int f0_length,
37 |     int fft_size, const D4COption *option, double **aperiodicity);
38 | 
39 | //-----------------------------------------------------------------------------
40 | // InitializeD4COption allocates the memory to the struct and sets the
41 | // default parameters.
42 | //
43 | // Output:
44 | //   option   : Struct for the optional parameter.
45 | //-----------------------------------------------------------------------------
46 | void InitializeD4COption(D4COption *option);
47 | 
48 | WORLD_END_C_DECLS
49 | 
50 | #endif  // WORLD_D4C_H_
51 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/src/world/dio.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_DIO_H_
 7 | #define WORLD_DIO_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for DIO
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double f0_floor;
18 |   double f0_ceil;
19 |   double channels_in_octave;
20 |   double frame_period;  // msec
21 |   int speed;  // (1, 2, ..., 12)
22 |   double allowed_range;  // Threshold used for fixing the F0 contour.
23 | } DioOption;
24 | 
25 | //-----------------------------------------------------------------------------
26 | // DIO
27 | //
28 | // Input:
29 | //   x                    : Input signal
30 | //   x_length             : Length of x
31 | //   fs                   : Sampling frequency
32 | //   option               : Struct to order the parameter for DIO
33 | //
34 | // Output:
35 | //   temporal_positions   : Temporal positions.
36 | //   f0                   : F0 contour.
37 | //-----------------------------------------------------------------------------
38 | void Dio(const double *x, int x_length, int fs, const DioOption *option,
39 |   double *temporal_positions, double *f0);
40 | 
41 | //-----------------------------------------------------------------------------
42 | // InitializeDioOption allocates the memory to the struct and sets the
43 | // default parameters.
44 | //
45 | // Output:
46 | //   option   : Struct for the optional parameter.
47 | //-----------------------------------------------------------------------------
48 | void InitializeDioOption(DioOption *option);
49 | 
50 | //-----------------------------------------------------------------------------
51 | // GetSamplesForDIO() calculates the number of samples required for Dio().
52 | //
53 | // Input:
54 | //   fs             : Sampling frequency [Hz]
55 | //   x_length       : Length of the input signal [Sample].
56 | //   frame_period   : Frame shift [msec]
57 | //
58 | // Output:
59 | //   The number of samples required to store the results of Dio()
60 | //-----------------------------------------------------------------------------
61 | int GetSamplesForDIO(int fs, int x_length, double frame_period);
62 | 
63 | WORLD_END_C_DECLS
64 | 
65 | #endif  // WORLD_DIO_H_
66 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/src/world/fft.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //
 6 | // These functions and variables are defined to use FFT as well as FFTW
 7 | // Please see fft.cpp to show the detailed information
 8 | //-----------------------------------------------------------------------------
 9 | #ifndef WORLD_FFT_H_
10 | #define WORLD_FFT_H_
11 | 
12 | #include "macrodefinitions.h"
13 | 
14 | WORLD_BEGIN_C_DECLS
15 | 
16 | // Commands for FFT (This is the same as FFTW)
17 | #define FFT_FORWARD 1
18 | #define FFT_BACKWARD 2
19 | #define FFT_ESTIMATE 3
20 | 
21 | // Complex number for FFT
22 | typedef double fft_complex[2];
23 | // Struct used for FFT
24 | typedef struct {
25 |   int n;
26 |   int sign;
27 |   unsigned int flags;
28 |   fft_complex *c_in;
29 |   double *in;
30 |   fft_complex *c_out;
31 |   double *out;
32 |   double *input;
33 |   int *ip;
34 |   double *w;
35 | } fft_plan;
36 | 
37 | fft_plan fft_plan_dft_1d(int n, fft_complex *in, fft_complex *out, int sign,
38 |   unsigned int flags);
39 | fft_plan fft_plan_dft_c2r_1d(int n, fft_complex *in, double *out,
40 |   unsigned int flags);
41 | fft_plan fft_plan_dft_r2c_1d(int n, double *in, fft_complex *out,
42 |   unsigned int flags);
43 | void fft_execute(fft_plan p);
44 | void fft_destroy_plan(fft_plan p);
45 | 
46 | WORLD_END_C_DECLS
47 | 
48 | #endif  // WORLD_FFT_H_
49 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/src/world/harvest.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_HARVEST_H_
 7 | #define WORLD_HARVEST_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for Harvest
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double f0_floor;
18 |   double f0_ceil;
19 |   double frame_period;
20 | } HarvestOption;
21 | 
22 | //-----------------------------------------------------------------------------
23 | // Harvest
24 | //
25 | // Input:
26 | //   x                    : Input signal
27 | //   x_length             : Length of x
28 | //   fs                   : Sampling frequency
29 | //   option               : Struct to order the parameter for Harvest
30 | //
31 | // Output:
32 | //   temporal_positions   : Temporal positions.
33 | //   f0                   : F0 contour.
34 | //-----------------------------------------------------------------------------
35 | void Harvest(const double *x, int x_length, int fs,
36 |   const HarvestOption *option, double *temporal_positions, double *f0);
37 | 
38 | //-----------------------------------------------------------------------------
39 | // InitializeHarvestOption allocates the memory to the struct and sets the
40 | // default parameters.
41 | //
42 | // Output:
43 | //   option   : Struct for the optional parameter.
44 | //-----------------------------------------------------------------------------
45 | void InitializeHarvestOption(HarvestOption *option);
46 | 
47 | //-----------------------------------------------------------------------------
48 | // GetSamplesForHarvest() calculates the number of samples required for
49 | // Harvest().
50 | //
51 | // Input:
52 | //   fs             : Sampling frequency [Hz]
53 | //   x_length       : Length of the input signal [Sample]
54 | //   frame_period   : Frame shift [msec]
55 | //
56 | // Output:
57 | //   The number of samples required to store the results of Harvest().
58 | //-----------------------------------------------------------------------------
59 | int GetSamplesForHarvest(int fs, int x_length, double frame_period);
60 | 
61 | WORLD_END_C_DECLS
62 | 
63 | #endif  // WORLD_HARVEST_H_
64 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/src/world/stonemask.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_STONEMASK_H_
 7 | #define WORLD_STONEMASK_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // StoneMask() refines the estimated F0 by Dio()
15 | //
16 | // Input:
17 | //   x                      : Input signal
18 | //   x_length               : Length of the input signal
19 | //   fs                     : Sampling frequency
20 | //   time_axis              : Temporal information
21 | //   f0                     : f0 contour
22 | //   f0_length              : Length of f0
23 | //
24 | // Output:
25 | //   refined_f0             : Refined F0
26 | //-----------------------------------------------------------------------------
27 | void StoneMask(const double *x, int x_length, int fs,
28 |     const double *temporal_positions, const double *f0, int f0_length,
29 |     double *refined_f0);
30 | 
31 | WORLD_END_C_DECLS
32 | 
33 | #endif  // WORLD_STONEMASK_H_
34 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/src/world/synthesis.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_SYNTHESIS_H_
 7 | #define WORLD_SYNTHESIS_H_
 8 | 
 9 | #include "world/macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Synthesis() synthesize the voice based on f0, spectrogram and
15 | // aperiodicity (not excitation signal).
16 | //
17 | // Input:
18 | //   f0                   : f0 contour
19 | //   f0_length            : Length of f0
20 | //   spectrogram          : Spectrogram estimated by CheapTrick
21 | //   fft_size             : FFT size
22 | //   aperiodicity         : Aperiodicity spectrogram based on D4C
23 | //   frame_period         : Temporal period used for the analysis
24 | //   fs                   : Sampling frequency
25 | //   y_length             : Length of the output signal (Memory of y has been
26 | //                          allocated in advance)
27 | // Output:
28 | //   y                    : Calculated speech
29 | //-----------------------------------------------------------------------------
30 | void Synthesis(const double *f0, int f0_length, 
31 |     const double * const *spectrogram, const double * const *aperiodicity, 
32 |     int fft_size, double frame_period, int fs, int y_length, double *y);
33 | 
34 | WORLD_END_C_DECLS
35 | 
36 | #endif  // WORLD_SYNTHESIS_H_
37 | 


--------------------------------------------------------------------------------
/libsvc/Modules/Lib/World/tools/audioio.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_AUDIOIO_H_
 7 | #define WORLD_AUDIOIO_H_
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | //-----------------------------------------------------------------------------
14 | // wavwrite() write a .wav file.
15 | // Input:
16 | //   x          : Input signal
17 | //   x_ength : Signal length of x [sample]
18 | //   fs         : Sampling frequency [Hz]
19 | //   nbit       : Quantization bit [bit]
20 | //   filename   : Name of the output signal.
21 | // Caution:
22 | //   The variable nbit is not used in this function.
23 | //   This function only supports the 16 bit.
24 | //-----------------------------------------------------------------------------
25 | void wavwrite(const double *x, int x_length, int fs, int nbit,
26 |   const char *filename);
27 | 
28 | //-----------------------------------------------------------------------------
29 | // GetAudioLength() returns the length of .wav file.
30 | // Input:
31 | //   filename     : Filename of a .wav file.
32 | // Output:
33 | //   The number of samples of the file .wav
34 | //-----------------------------------------------------------------------------
35 | int GetAudioLength(const char *filename);
36 | 
37 | //-----------------------------------------------------------------------------
38 | // wavread() read a .wav file.
39 | // The memory of output x must be allocated in advance.
40 | // Input:
41 | //   filename     : Filename of the input file.
42 | // Output:
43 | //   fs           : Sampling frequency [Hz]
44 | //   nbit         : Quantization bit [bit]
45 | //   x            : The output waveform.
46 | //-----------------------------------------------------------------------------
47 | void wavread(const char* filename, int *fs, int *nbit, double *x);
48 | 
49 | #ifdef __cplusplus
50 | }
51 | #endif
52 | 
53 | #endif  // WORLD_AUDIOIO_H_
54 | 


--------------------------------------------------------------------------------
/libsvc/Modules/README.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | ```c++
 3 | #include "Modules/Models/header/Vits.hpp"
 4 | 
 5 | int main(){
 6 |   rapidjson::Document Config;
 7 |   Config.Parse("Your Config");
 8 |   
 9 |   //Progress bar
10 |   InferClass::BaseModelType::callback a_callback = [](size_t a, size_t b) {std::cout << std::to_string((float)a * 100.f / (float)b) << "%\n"; };
11 |   
12 |   //return params for inference
13 |   InferClass::BaseModelType::callback_params b_callback = []()  
14 | 	{
15 | 		auto cbaaa = InferClass::InferConfigs();
16 | 		cbaaa.kmeans_rate = 0.5;
17 | 		cbaaa.keys = 0;
18 | 		return cbaaa;
19 | 	};
20 |   
21 |   //modify duration per phoneme
22 |   InferClass::TTS::DurationCallback c_callback = [](std::vector<float>&) {};
23 |   
24 |   std::vector<int16_t> output;
25 |   try
26 |   {
27 |   	std::wstring inp("watashinoonaniomitekudasai");
28 |   	auto model = dynamic_cast<InferClass::BaseModelType*>(new InferClass::VitsSvc(modConfigJson, a_callback, b_callback));
29 |     
30 |   	output = model->Inference(inp);
31 |     
32 |   	Wav outWav(model->GetSamplingRate(), output.size() * 2, output.data());
33 |   	outWav.Writef(L"test.wav");
34 |     
35 |   	delete model;
36 |   }
37 |   catch(std::exception& e)
38 |   {
39 |   	std::cout << e.what();
40 |   }
41 | }
42 | 
43 | ```
44 | 


--------------------------------------------------------------------------------
/libsvc/Modules/framework.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifndef LibSvcApi
 4 | #ifdef MoeVSDll
 5 | #ifdef LibSvcDll
 6 | #define LibSvcApi __declspec(dllexport)
 7 | #else
 8 | #ifndef MoeVS
 9 | #define LibSvcApi __declspec(dllimport)
10 | #else
11 | #define LibSvcApi
12 | #endif
13 | #endif
14 | #else
15 | #define LibSvcApi
16 | #endif
17 | #endif


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/AvCodec/AvCodeResample.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | #include <string>
 4 | #include "../../StringPreprocess.hpp"
 5 | #include "matlabfunctions.h"
 6 | #include "../inferTools.hpp"
 7 | extern "C" {
 8 | #include "libavcodec/avcodec.h"
 9 | #include "libavformat/avformat.h"
10 | #include "libswscale/swscale.h"
11 | #include "libswresample/swresample.h"
12 | #include "libavutil/samplefmt.h"
13 | }
14 | 
15 | class AudioPreprocess
16 | {
17 | public:
18 |     struct WAV_HEADER {
19 |         char             RIFF[4] = { 'R','I','F','F' };              //RIFF标识
20 |         unsigned long    ChunkSize;                                  //文件大小-8
21 |         char             WAVE[4] = { 'W','A','V','E' };              //WAVE块
22 |         char             fmt[4] = { 'f','m','t',' ' };               //fmt块
23 |         unsigned long    Subchunk1Size;                              //fmt块大小
24 |         unsigned short   AudioFormat;                                //编码格式
25 |         unsigned short   NumOfChan;                                  //声道数
26 |         WAV_HEADER(unsigned long cs = 36, unsigned long sc1s = 16, unsigned short af = 1, unsigned short nc = 1) :ChunkSize(cs), Subchunk1Size(sc1s), AudioFormat(af), NumOfChan(nc) {}
27 |     };
28 |     LibSvcApi static WAV_HEADER GetHeader(const std::wstring& path);
29 |     LibSvcApi static std::vector<double> arange(double start, double end, double step = 1.0, double div = 1.0);
30 |     LibSvcApi std::vector<short> codec(const std::wstring& path, int sr);
31 |     LibSvcApi void release();
32 |     LibSvcApi void init();
33 |     LibSvcApi AudioPreprocess();
34 | 	~AudioPreprocess()
35 | 	{
36 |         release();
37 | 	}
38 | private:
39 |     AVFrame* inFrame;
40 |     uint8_t* out_buffer;
41 |     SwrContext* swrContext;
42 |     AVCodecContext* avCodecContext;
43 |     AVFormatContext* avFormatContext;
44 |     AVPacket* packet;
45 | };
46 | 


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/Cluster/MoeVSBaseCluster.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: MoeVSBaseCluster.hpp
 3 |  * Note: MoeVoiceStudioCore 聚类基类
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include <vector>
24 | 
25 | #define MoeVoiceStudioClusterHeader namespace MoeVoiceStudioCluster {
26 | #define MoeVoiceStudioClusterEnd }
27 | 
28 | MoeVoiceStudioClusterHeader
29 | 
30 | class MoeVoiceStudioBaseCluster
31 | {
32 | public:
33 | 	MoeVoiceStudioBaseCluster() = default;
34 | 	virtual ~MoeVoiceStudioBaseCluster() = default;
35 | 
36 | 	/**
37 | 	 * \brief 查找聚类最邻近点
38 | 	 * \param point 待查找的点
39 | 	 * \param sid 角色ID
40 | 	 * \param n_points 点数
41 | 	 * \return 查找到的最邻近点
42 | 	 */
43 | 	virtual std::vector<float> find(float* point, long sid, int64_t n_points = 1);
44 | };
45 | 
46 | MoeVoiceStudioClusterEnd


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/Cluster/MoeVSIndexCluster.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: MoeVSIndexCluster.hpp
 3 |  * Note: MoeVoiceStudioCore 官方聚类(Index)
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #ifdef MoeVoiceStudioIndexCluster
24 | #include <string>
25 | #include "MoeVSBaseCluster.hpp"
26 | #include <faiss/IndexIVFFlat.h>
27 | #include <faiss/index_io.h>
28 | 
29 | MoeVoiceStudioClusterHeader
30 | class IndexClusterCore
31 | {
32 | public:
33 | 	IndexClusterCore() = delete;
34 | 	~IndexClusterCore();
35 | 	IndexClusterCore(const char* _path);
36 | 	IndexClusterCore(const IndexClusterCore&) = delete;
37 | 	IndexClusterCore(IndexClusterCore&& move) noexcept;
38 | 	IndexClusterCore& operator=(const IndexClusterCore&) = delete;
39 | 	IndexClusterCore& operator=(IndexClusterCore&& move) noexcept;
40 | 	std::vector<float> find(const float* points, faiss::idx_t n_points, faiss::idx_t n_searched_points = 8);
41 | 	float* GetVec(faiss::idx_t index);
42 | private:
43 | 	faiss::Index* IndexPtr = nullptr;
44 | 	faiss::idx_t Dim = 0;
45 | 	std::vector<float> IndexsVector;
46 | };
47 | 
48 | class IndexCluster : public MoeVoiceStudioBaseCluster
49 | {
50 | public:
51 | 	IndexCluster() = delete;
52 | 	~IndexCluster() override = default;
53 | 	IndexCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen);
54 | 	std::vector<float> find(float* point, long sid, int64_t n_points = 1) override;
55 | private:
56 | 	std::vector<IndexClusterCore> Indexs;
57 | 	size_t n_hidden_size = 256;
58 | };
59 | 
60 | MoeVoiceStudioClusterEnd
61 | 
62 | #endif


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/Cluster/MoeVSKmeansCluster.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: MoeVSKmeansCluster.hpp
 3 |  * Note: MoeVoiceStudioCore 官方聚类(Kmeans)
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include "MoeVSBaseCluster.hpp"
24 | #include "../DataStruct/KDTree.hpp"
25 | #include <string>
26 | 
27 | MoeVoiceStudioClusterHeader
28 | 
29 | class KMeansCluster : public MoeVoiceStudioBaseCluster
30 | {
31 | public:
32 | 	KMeansCluster() = delete;
33 | 	~KMeansCluster() override = default;
34 | 	KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen);
35 | 	std::vector<float> find(float* point, long sid, int64_t n_points = 1) override;
36 | private:
37 | 	std::vector<KDTree> _tree;
38 | 	size_t dims = 0;
39 | };
40 | 
41 | MoeVoiceStudioClusterEnd


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/DataStruct/README.md:
--------------------------------------------------------------------------------
1 | ## KdTree From J. Frederico Carvalho
2 | 


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/F0Extractor/DioF0Extractor.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: DioF0Extractor.hpp
 3 |  * Note: MoeVoiceStudioCore 官方F0提取算法 Dio
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include "BaseF0Extractor.hpp"
24 | 
25 | MoeVoiceStudioF0ExtractorHeader
26 | class DioF0Extractor : public BaseF0Extractor
27 | {
28 | public:
29 | 	DioF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0);
30 | 
31 | 	~DioF0Extractor() override = default;
32 | 
33 | 	void compute_f0(const double* PCMData, size_t PCMLen);
34 | 
35 | 	void InterPf0(size_t TargetLength);
36 | 
37 | 	std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override;
38 | private:
39 | 	std::vector<double> refined_f0;
40 | };
41 | MoeVoiceStudioF0ExtractorEnd


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/F0Extractor/HarvestF0Extractor.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: HarvestF0Extractor.hpp
 3 |  * Note: MoeVoiceStudioCore 官方F0提取算法 Harvest
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include "BaseF0Extractor.hpp"
24 | 
25 | MoeVoiceStudioF0ExtractorHeader
26 | class HarvestF0Extractor : public BaseF0Extractor
27 | {
28 | public:
29 | 	HarvestF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0);
30 | 
31 | 	~HarvestF0Extractor() override = default;
32 | 
33 | 	void compute_f0(const double* PCMData, size_t PCMLen);
34 | 
35 | 	void InterPf0(size_t TargetLength);
36 | 
37 | 	std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override;
38 | 
39 | private:
40 | 	std::vector<double> refined_f0;
41 | };
42 | MoeVoiceStudioF0ExtractorEnd


--------------------------------------------------------------------------------
/libsvc/Modules/header/InferTools/Stft/stft.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | #include "fftw3.h"
 4 | 
 5 | namespace DlCodecStft
 6 | {
 7 |     class STFT
 8 |     {
 9 |     public:
10 |         STFT() = default;
11 |         ~STFT();
12 |         STFT(int WindowSize, int HopSize, int FFTSize = 0);
13 |         inline static double PI = 3.14159265358979323846;
14 |         std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const;
15 |     private:
16 |     	int WINDOW_SIZE = 2048;
17 |     	int HOP_SIZE = WINDOW_SIZE / 4;
18 |     	int FFT_SIZE = WINDOW_SIZE / 2 + 1;
19 |     };
20 | 
21 |     class Mel
22 |     {
23 |     public:
24 |         Mel() = delete;
25 |         ~Mel() = default;
26 |         Mel(int WindowSize, int HopSize, int SamplingRate, int MelSize = 0);
27 |         std::pair<std::vector<float>, int64_t> GetMel(const std::vector<double>& audioData) const;
28 |         std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const;
29 |     private:
30 |         STFT stft;
31 |         int MEL_SIZE = 128;
32 |         int FFT_SIZE = 0;
33 |         int sr = 22050;
34 |         std::vector<float> MelBasis;
35 |     };
36 | }
37 | 


--------------------------------------------------------------------------------
/libsvc/Modules/header/Logger/MoeSSLogger.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include "../StringPreprocess.hpp"
 4 | #include "../../framework.h"
 5 | #include <filesystem>
 6 | #include <mutex>
 7 | #define __MOEVS_DEBUG_MESSAGE(msg) __MOEVS_DEBUG_INFO(__FILE__, __LINE__, msg)
 8 | #define logger MoeSSLogger::GetLogger()
 9 | 
10 | namespace MoeSSLogger
11 | {
12 | 	class Logger
13 | 	{
14 | 	public:
15 | 		using logger_fn = void(*)(const wchar_t*, const char*);
16 | 		Logger();
17 | 		~Logger();
18 | 		Logger(logger_fn error_fn, logger_fn log_fn);
19 | 		void log(const std::wstring&);
20 | 		void log(const char*);
21 | 		void error(const std::wstring&);
22 | 		void error(const char*);
23 | 		void enable(bool _filelogger)
24 | 		{
25 | 			filelogger = _filelogger;
26 | 		}
27 | 	private:
28 | 		bool custom_logger_fn = false;
29 | 		std::filesystem::path cur_log_dir, logpath, errorpath;
30 | 		logger_fn cerror_fn = nullptr, cloggerfn = nullptr;
31 | 		FILE* log_file = nullptr, * error_file = nullptr;
32 | 		bool filelogger = true;
33 | 		std::mutex mx;
34 | 	};
35 | 
36 | 	LibSvcApi Logger& GetLogger();
37 | }


--------------------------------------------------------------------------------
/libsvc/Modules/header/StringPreprocess.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | #include <string>
 5 | #include "../framework.h"
 6 | 
 7 | LibSvcApi std::string to_byte_string(const std::wstring& input);
 8 | 
 9 | LibSvcApi std::string to_ansi_string(const std::wstring& input);
10 | 
11 | LibSvcApi std::wstring to_wide_string(const std::string& input);
12 | 
13 | LibSvcApi std::wstring string_vector_to_string(const std::vector<std::string>& vector);
14 | 
15 | LibSvcApi std::wstring wstring_vector_to_string(const std::vector<std::wstring>& vector);
16 | 
17 | template <typename T>
18 | std::wstring vector_to_string(const std::vector<T>& vector)
19 | {
20 | 	std::wstring vecstr = L"[";
21 | 	for (const auto& it : vector)
22 | 	{
23 | 		std::wstring TmpStr = std::to_wstring(it);
24 | 		if ((std::is_same_v<T, float> || std::is_same_v<T, double>) && TmpStr.find(L'.') != std::string::npos)
25 | 		{
26 | 			while (TmpStr.back() == L'0')
27 | 				TmpStr.pop_back();
28 | 			if (TmpStr.back() == L'.')
29 | 				TmpStr += L"0";
30 | 		}
31 | 		vecstr += TmpStr + L", ";
32 | 	}
33 | 	if (vecstr.length() > 2)
34 | 		vecstr = vecstr.substr(0, vecstr.length() - 2);
35 | 	vecstr += L']';
36 | 	return vecstr;
37 | }


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/Cluster/MoeVSBaseCluster.cpp:
--------------------------------------------------------------------------------
1 | #include "../../../header/InferTools/Cluster/MoeVSBaseCluster.hpp"
2 | #include "../../../header/InferTools/inferTools.hpp"
3 | 
4 | std::vector<float> MoeVoiceStudioCluster::MoeVoiceStudioBaseCluster::find(float* point, long sid, int64_t n_points)
5 | {
6 | 	LibDLVoiceCodecThrow("NotImplementedError");
7 | }


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/Cluster/MoeVSClusterManager.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../../header/InferTools/Cluster/MoeVSClusterManager.hpp"
 2 | #include <map>
 3 | #include <stdexcept>
 4 | #include "../../../header/Logger/MoeSSLogger.hpp"
 5 | 
 6 | MoeVoiceStudioClusterHeader
 7 | 
 8 | std::map<std::wstring, GetMoeVSClusterFn> RegisteredMoeVSCluster;
 9 | 
10 | MoeVSCluster GetMoeVSCluster(const std::wstring& _name, const std::wstring& _path, size_t hidden_size, size_t KmeansLen)
11 | {
12 | 	const auto f_ClusterFn = RegisteredMoeVSCluster.find(_name);
13 | 	if (f_ClusterFn != RegisteredMoeVSCluster.end())
14 | 		return f_ClusterFn->second(_path, hidden_size, KmeansLen);
15 | 	throw std::runtime_error("Unable To Find An Available MoeVSCluster");
16 | }
17 | 
18 | void RegisterMoeVSCluster(const std::wstring& _name, const GetMoeVSClusterFn& _constructor_fn)
19 | {
20 | 	if (RegisteredMoeVSCluster.find(_name) != RegisteredMoeVSCluster.end())
21 | 	{
22 | 		logger.log(L"[Warn] MoeVSClusterNameConflict");
23 | 		return;
24 | 	}
25 | 	RegisteredMoeVSCluster[_name] = _constructor_fn;
26 | }
27 | 
28 | MoeVoiceStudioClusterEnd


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/Cluster/MoeVSKmeansCluster.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../../header/InferTools/Cluster/MoeVSKmeansCluster.hpp"
 2 | #include "../../../header/InferTools/inferTools.hpp"
 3 | 
 4 | std::vector<float> MoeVoiceStudioCluster::KMeansCluster::find(float* point, long sid, int64_t n_points)
 5 | {
 6 | 	if (size_t(sid) < _tree.size())
 7 | 	{
 8 | 		std::vector<float> res;
 9 | 		res.reserve(dims * n_points * 2);
10 | 		for (int64_t pt = 0; pt < n_points; ++pt)
11 | 		{
12 | 			auto tmp = _tree[sid].nearest_point({ point + pt * dims,point + (pt + 1) * dims });
13 | 			res.insert(res.end(), tmp.begin(), tmp.end());
14 | 		}
15 | 		return res;
16 | 	}
17 | 	return { point, point + dims * n_points };
18 | }
19 | 
20 | MoeVoiceStudioCluster::KMeansCluster::KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen)
21 | {
22 | 	dims = hidden_size;
23 | 	FILE* file = nullptr;
24 | 	_wfopen_s(&file, (_path + L"/KMeans.npy").c_str(), L"rb");
25 | 	if (!file)
26 | 		LibDLVoiceCodecThrow("KMeansFileNotExist");
27 | 	constexpr long idx = 128;
28 | 	fseek(file, idx, SEEK_SET);
29 | 	std::vector<float> tmpData(hidden_size);
30 | 	const size_t ec = size_t(hidden_size) * sizeof(float);
31 | 	std::vector<std::vector<float>> _tmp;
32 | 	_tmp.reserve(KmeansLen);
33 | 	while (fread(tmpData.data(), 1, ec, file) == ec)
34 | 	{
35 | 		_tmp.emplace_back(tmpData);
36 | 		if (_tmp.size() == KmeansLen)
37 | 		{
38 | 			_tree.emplace_back(_tmp);
39 | 			_tmp.clear();
40 | 		}
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/DataStruct/README.md:
--------------------------------------------------------------------------------
1 | ## KdTree From J. Frederico Carvalho
2 | 


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/F0Extractor/BaseF0Extractor.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../../header/InferTools/F0Extractor/BaseF0Extractor.hpp"
 2 | #include <map>
 3 | #include "../../../header/Logger/MoeSSLogger.hpp"
 4 | #include "../../../header/InferTools/inferTools.hpp"
 5 | 
 6 | MoeVSF0Extractor::BaseF0Extractor::BaseF0Extractor(int sampling_rate, int hop_size, int n_f0_bins, double max_f0, double min_f0) :
 7 | 	fs(sampling_rate),
 8 | 	hop(hop_size),
 9 | 	f0_bin(n_f0_bins),
10 | 	f0_max(max_f0),
11 | 	f0_min(min_f0)
12 | {
13 | 	f0_mel_min = (1127.0 * log(1.0 + f0_min / 700.0));
14 | 	f0_mel_max = (1127.0 * log(1.0 + f0_max / 700.0));
15 | }
16 | 
17 | std::vector<double> MoeVSF0Extractor::BaseF0Extractor::arange(double start, double end, double step, double div)
18 | {
19 | 	std::vector<double> output;
20 | 	while (start < end)
21 | 	{
22 | 		output.push_back(start / div);
23 | 		start += step;
24 | 	}
25 | 	return output;
26 | }
27 | 
28 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<double>& PCMData, size_t TargetLength)
29 | {
30 | 	LibDLVoiceCodecThrow("NotImplementedError");
31 | }
32 | 
33 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<float>& PCMData, size_t TargetLength)
34 | {
35 | 	std::vector<double> PCMVector(PCMData.size());
36 | 	for (size_t i = 0; i < PCMData.size(); ++i)
37 | 		PCMVector[i] = double(PCMData[i]);
38 | 	return ExtractF0(PCMVector, TargetLength);
39 | }
40 | 
41 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<int16_t>& PCMData, size_t TargetLength)
42 | {
43 | 	std::vector<double> PCMVector(PCMData.size());
44 | 	for (size_t i = 0; i < PCMData.size(); ++i)
45 | 		PCMVector[i] = double(PCMData[i]);
46 | 	return ExtractF0(PCMVector, TargetLength);
47 | }


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/F0Extractor/F0ExtractorManager.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../../header/InferTools/F0Extractor/F0ExtractorManager.hpp"
 2 | #include <map>
 3 | #include <stdexcept>
 4 | #include "../../../header/Logger/MoeSSLogger.hpp"
 5 | 
 6 | MoeVoiceStudioF0ExtractorHeader
 7 | std::map<std::wstring, GetF0ExtractorFn> RegisteredF0Extractors;
 8 | 
 9 | F0Extractor GetF0Extractor(const std::wstring& _name,
10 | 	const uint32_t fs,
11 | 	const uint32_t hop,
12 | 	const uint32_t f0_bin,
13 | 	const double f0_max,
14 | 	const double f0_min)
15 | {
16 | 	const auto f_F0Extractor = RegisteredF0Extractors.find(_name);
17 | 	if (f_F0Extractor != RegisteredF0Extractors.end())
18 | 		return f_F0Extractor->second(fs, hop, f0_bin, f0_max, f0_min);
19 | 	throw std::runtime_error("Unable To Find An Available F0Extractor");
20 | }
21 | 
22 | void RegisterF0Extractor(const std::wstring& _name, const GetF0ExtractorFn& _constructor_fn)
23 | {
24 | 	if (RegisteredF0Extractors.find(_name) != RegisteredF0Extractors.end())
25 | 	{
26 | 		logger.log(L"[Warn] F0ExtractorNameConflict");
27 | 		return;
28 | 	}
29 | 	RegisteredF0Extractors[_name] = _constructor_fn;
30 | }
31 | 
32 | std::vector<std::wstring> GetF0ExtractorList()
33 | {
34 | 	std::vector<std::wstring> F0ExtractorsVec;
35 | 	F0ExtractorsVec.reserve(RegisteredF0Extractors.size());
36 | 	for (const auto& i : RegisteredF0Extractors)
37 | 		F0ExtractorsVec.emplace_back(i.first);
38 | 	return F0ExtractorsVec;
39 | }
40 | 
41 | MoeVoiceStudioF0ExtractorEnd


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/Sampler/MoeVSBaseSampler.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../../header/InferTools/Sampler/MoeVSBaseSampler.hpp"
 2 | #include "../../../header/InferTools/inferTools.hpp"
 3 | MoeVoiceStudioSamplerHeader
 4 | 
 5 | MoeVSBaseSampler::MoeVSBaseSampler(Ort::Session* alpha, Ort::Session* dfn, Ort::Session* pred, int64_t Mel_Bins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory) :
 6 | 	MelBins(Mel_Bins), Alpha(alpha), DenoiseFn(dfn), NoisePredictor(pred)
 7 | {
 8 | 	_callback = _ProgressCallback;
 9 | 	Memory = memory;
10 | };
11 | 
12 | std::vector<Ort::Value> MoeVSBaseSampler::Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, int64_t SpeedUp, float NoiseScale, int64_t Seed, size_t& Process)
13 | {
14 | 	LibDLVoiceCodecThrow("NotImplementedError");
15 | }
16 | 
17 | MoeVSReflowBaseSampler::MoeVSReflowBaseSampler(Ort::Session* Velocity, int64_t MelBins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory) :
18 | 	MelBins_(MelBins), Velocity_(Velocity)
19 | {
20 | 	Callback_ = _ProgressCallback;
21 | 	Memory_ = memory;
22 | }
23 | 
24 | std::vector<Ort::Value> MoeVSReflowBaseSampler::Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, float dt, float Scale, size_t& Process)
25 | {
26 | 	LibDLVoiceCodecThrow("NotImplementedError");
27 | }
28 | 
29 | MoeVoiceStudioSamplerEnd


--------------------------------------------------------------------------------
/libsvc/Modules/src/InferTools/TensorExtractor/TensorExtractorManager.cpp:
--------------------------------------------------------------------------------
 1 | #include "../../../header/InferTools/TensorExtractor/TensorExtractorManager.hpp"
 2 | #include <map>
 3 | #include "../../../header/Logger/MoeSSLogger.hpp"
 4 | 
 5 | MoeVoiceStudioTensorExtractorHeader
 6 | 	inline std::map<std::wstring, GetTensorExtractorFn> RegisteredTensorExtractors;
 7 | 
 8 | void RegisterTensorExtractor(const std::wstring& _name, const GetTensorExtractorFn& _constructor_fn)
 9 | {
10 | 	if (RegisteredTensorExtractors.find(_name) != RegisteredTensorExtractors.end())
11 | 	{
12 | 		logger.log(L"[Warn] TensorExtractorNameConflict");
13 | 		return;
14 | 	}
15 | 	RegisteredTensorExtractors[_name] = _constructor_fn;
16 | }
17 | 
18 | TensorExtractor GetTensorExtractor(const std::wstring& _name, uint64_t _srcsr, uint64_t _sr, uint64_t _hop, bool _smix, bool _volume, uint64_t _hidden_size, uint64_t _nspeaker, const MoeVoiceStudioTensorExtractor::Others& _other)
19 | {
20 | 	const auto f_TensorExtractor = RegisteredTensorExtractors.find(_name);
21 | 	if (f_TensorExtractor != RegisteredTensorExtractors.end())
22 | 		return f_TensorExtractor->second(_srcsr, _sr, _hop, _smix, _volume, _hidden_size, _nspeaker, _other);
23 | 	throw std::runtime_error("Unable To Find An Available TensorExtractor");
24 | }
25 | 
26 | MoeVoiceStudioTensorExtractorEnd


--------------------------------------------------------------------------------
/libsvc/Modules/src/StringPreprocess.cpp:
--------------------------------------------------------------------------------
 1 | #include "../header/StringPreprocess.hpp"
 2 | #ifdef _WIN32
 3 | #include <Windows.h>
 4 | #else
 5 | #error
 6 | #endif
 7 | 
 8 | std::string to_byte_string(const std::wstring& input)
 9 | {
10 | 	std::vector<char> ByteString(input.length() * 6);
11 | 	WideCharToMultiByte(
12 | 		CP_UTF8,
13 | 		0,
14 | 		input.c_str(),
15 | 		int(input.length()),
16 | 		ByteString.data(),
17 | 		int(ByteString.size()),
18 | 		nullptr,
19 | 		nullptr
20 | 	);
21 | 	return ByteString.data();
22 | }
23 | 
24 | std::string to_ansi_string(const std::wstring& input)
25 | {
26 | 	std::vector<char> ByteString(input.length() * 6);
27 | 	WideCharToMultiByte(
28 | 		CP_ACP,
29 | 		0,
30 | 		input.c_str(),
31 | 		int(input.length()),
32 | 		ByteString.data(),
33 | 		int(ByteString.size()),
34 | 		nullptr,
35 | 		nullptr
36 | 	);
37 | 	return ByteString.data();
38 | }
39 | 
40 | std::wstring to_wide_string(const std::string& input)
41 | {
42 | 	std::vector<wchar_t> WideString(input.length() * 2);
43 | 	MultiByteToWideChar(
44 | 		CP_UTF8,
45 | 		0,
46 | 		input.c_str(),
47 | 		int(input.length()),
48 | 		WideString.data(),
49 | 		int(WideString.size())
50 | 	);
51 | 	return WideString.data();
52 | }
53 | 
54 | std::wstring string_vector_to_string(const std::vector<std::string>& vector)
55 | {
56 | 	std::wstring vecstr = L"[";
57 | 	for (const auto& it : vector)
58 | 		if (!it.empty())
59 | 			vecstr += L'\"' + to_wide_string(it) + L"\", ";
60 | 	if (vecstr.length() > 2)
61 | 		vecstr = vecstr.substr(0, vecstr.length() - 2);
62 | 	vecstr += L']';
63 | 	return vecstr;
64 | }
65 | 
66 | std::wstring wstring_vector_to_string(const std::vector<std::wstring>& vector)
67 | {
68 | 	std::wstring vecstr = L"[";
69 | 	for (const auto& it : vector)
70 | 		if (!it.empty())
71 | 			vecstr += L'\"' + it + L"\", ";
72 | 	if (vecstr.length() > 2)
73 | 		vecstr = vecstr.substr(0, vecstr.length() - 2);
74 | 	vecstr += L']';
75 | 	return vecstr;
76 | }


--------------------------------------------------------------------------------
/libsvc/README.md:
--------------------------------------------------------------------------------
 1 | # 使用方法
 2 | ### 构建
 3 | - 1、配置以下依赖:
 4 |   - [ffmpeg](https://ffmpeg.org/)
 5 |   - [onnxruntime](https://github.com/microsoft/onnxruntime)
 6 |   - [fftw](http://fftw.org/)
 7 |   - [openblas](https://github.com/OpenMathLib/OpenBLAS)
 8 |   - [faiss](https://github.com/facebookresearch/faiss)
 9 |   - [liblapack](https://netlib.org/lapack/)
10 | - 2、编译
11 | ---
12 | ### 使用动态库
13 | - 1、链接libsvc
14 | - 2、#include "libsvc/Api/header/libsvc.h"
15 | - 3、调用libsvc::Init()
16 | - 4、调用libsvc名称空间中的函数


--------------------------------------------------------------------------------
/libsvc/dllmain.cpp:
--------------------------------------------------------------------------------
 1 | // dllmain.cpp : 定义 DLL 应用程序的入口点。
 2 | #include "Windows.h"
 3 | 
 4 | BOOL APIENTRY DllMain( HMODULE hModule,
 5 |                        DWORD  ul_reason_for_call,
 6 |                        LPVOID lpReserved
 7 |                      )
 8 | {
 9 |     switch (ul_reason_for_call)
10 |     {
11 |     case DLL_PROCESS_ATTACH:
12 |     case DLL_THREAD_ATTACH:
13 |     case DLL_THREAD_DETACH:
14 |     case DLL_PROCESS_DETACH:
15 |         break;
16 |     }
17 |     return TRUE;
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/libsvc/libsvc.vcxproj.user:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <ShowAllFiles>false</ShowAllFiles>
 5 |   </PropertyGroup>
 6 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 7 |     <LocalDebuggerEnvironment>$(SolutionDir)Lib\Dll</LocalDebuggerEnvironment>
 8 |     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
 9 |   </PropertyGroup>
10 | </Project>


--------------------------------------------------------------------------------
/libsvc/packages.config:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <packages>
3 |   <package id="VC-LTL" version="5.0.9" targetFramework="native" />
4 |   <package id="YY.NuGet.Import.Helper" version="1.0.0.4" targetFramework="native" />
5 | </packages>


--------------------------------------------------------------------------------
/libtts/Api/NativeApi.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libtts/Api/NativeApi.cpp


--------------------------------------------------------------------------------
/libtts/Modules/Lib/MJson/MJson.cpp:
--------------------------------------------------------------------------------
 1 | #include "MJson.h"
 2 | 
 3 | class FileGuard
 4 | {
 5 | public:
 6 | 	FileGuard() = delete;
 7 | 	~FileGuard()
 8 | 	{
 9 | 		if (_fp) fclose(_fp);
10 | 		_fp = nullptr;
11 | 	}
12 | 	FileGuard(const char* _path)
13 | 	{
14 | 		if (_fp) fclose(_fp);
15 | 		_wfopen_s(&_fp, to_wide_string(_path).c_str(), L"rb");
16 | 	}
17 | 	FileGuard(const std::wstring& _path)
18 | 	{
19 | 		if (_fp) fclose(_fp);
20 | 		_wfopen_s(&_fp, _path.c_str(), L"rb");
21 | 	}
22 | 	operator FILE* () const
23 | 	{
24 | 		return _fp;
25 | 	}
26 | private:
27 | 	FILE* _fp = nullptr;
28 | 	static std::wstring to_wide_string(const std::string& input)
29 | 	{
30 | 		std::vector<wchar_t> WideString(input.length() * 2);
31 | 		MultiByteToWideChar(
32 | 			CP_UTF8,
33 | 			0,
34 | 			input.c_str(),
35 | 			int(input.length()),
36 | 			WideString.data(),
37 | 			int(WideString.size())
38 | 		);
39 | 		return WideString.data();
40 | 	}
41 | };
42 | 
43 | MJson::MJson(const char* _path)
44 | {
45 | 	const auto file = FileGuard(_path);
46 | 	_document = yyjson_read_file(_path, YYJSON_READ_NOFLAG, nullptr, nullptr);
47 | 	if (!_document)
48 | 		throw std::exception("Json Parse Error !");
49 | 	root = yyjson_doc_get_root(_document);
50 | }
51 | 
52 | MJson::MJson(const std::wstring& _path)
53 | {
54 | 	const FileGuard fp(_path);
55 | 	_document = yyjson_read_fp(fp, YYJSON_READ_NOFLAG, nullptr, nullptr);
56 | 	if (!_document)
57 | 		throw std::exception("File Not Exists!");
58 | 	root = yyjson_doc_get_root(_document);
59 | }
60 | 
61 | MJson::MJson(const std::string& _data, bool _read_from_string)
62 | {
63 | 	if (_read_from_string)
64 | 		_document = yyjson_read(_data.c_str(), _data.length(), YYJSON_READ_NOFLAG);
65 | 	else
66 | 	{
67 | 		const auto file = FileGuard(_data.c_str());
68 | 		_document = yyjson_read_fp(file, YYJSON_READ_NOFLAG, nullptr, nullptr);
69 | 	}
70 | 	if (!_document)
71 | 		throw std::exception("Json Parse Error !");
72 | 	root = yyjson_doc_get_root(_document);
73 | }
74 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/src/world/constantnumbers.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //
 6 | // This header file only defines constant numbers used for several function.
 7 | //-----------------------------------------------------------------------------
 8 | #ifndef WORLD_CONSTANT_NUMBERS_H_
 9 | #define WORLD_CONSTANT_NUMBERS_H_
10 | 
11 | namespace world {
12 |   // for Dio()
13 |   const double kCutOff = 50.0;
14 | 
15 |   // for StoneMask()
16 |   const double kFloorF0StoneMask = 40.0;
17 | 
18 |   const double kPi = 3.1415926535897932384;
19 |   const double kMySafeGuardMinimum = 0.000000000001;
20 |   const double kEps = 0.00000000000000022204460492503131;
21 |   const double kFloorF0 = 71.0;
22 |   const double kCeilF0 = 800.0;
23 |   const double kDefaultF0 = 500.0;
24 |   const double kLog2 = 0.69314718055994529;
25 |   // Maximum standard deviation not to be selected as a best f0.
26 |   const double kMaximumValue = 100000.0;
27 | 
28 |   // Note to me (fs: 48000)
29 |   // 71 Hz is the limit to maintain the FFT size at 2048.
30 |   // If we use 70 Hz as FLOOR_F0, the FFT size of 4096 is required.
31 | 
32 |   // for D4C()
33 |   const int kHanning = 1;
34 |   const int kBlackman = 2;
35 |   const double kFrequencyInterval = 3000.0;
36 |   const double kUpperLimit = 15000.0;
37 |   const double kThreshold = 0.85;
38 |   const double kFloorF0D4C = 47.0;
39 | 
40 |   // for Codec (Mel scale)
41 |   // S. Stevens & J. Volkmann,
42 |   // The Relation of Pitch to Frequency: A Revised Scale,
43 |   // American Journal of Psychology, vol. 53, no. 3, pp. 329-353, 1940.
44 |   const double kM0 = 1127.01048;
45 |   const double kF0 = 700.0;
46 |   const double kFloorFrequency = 40.0;
47 |   const double kCeilFrequency = 20000.0;
48 | 
49 | }  // namespace world
50 | 
51 | #endif  // WORLD_CONSTANT_NUMBERS_H_
52 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/src/world/d4c.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_D4C_H_
 7 | #define WORLD_D4C_H_
 8 | 
 9 | #include "world/macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for D4C
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double threshold;
18 | } D4COption;
19 | 
20 | //-----------------------------------------------------------------------------
21 | // D4C() calculates the aperiodicity estimated by D4C.
22 | //
23 | // Input:
24 | //   x                  : Input signal
25 | //   x_length           : Length of x
26 | //   fs                 : Sampling frequency
27 | //   temporal_positions : Time axis
28 | //   f0                 : F0 contour
29 | //   f0_length          : Length of F0 contour
30 | //   fft_size           : Number of samples of the aperiodicity in one frame.
31 | //                      : It is given by the equation fft_size / 2 + 1.
32 | // Output:
33 | //   aperiodicity  : Aperiodicity estimated by D4C.
34 | //-----------------------------------------------------------------------------
35 | void D4C(const double *x, int x_length, int fs,
36 |     const double *temporal_positions, const double *f0, int f0_length,
37 |     int fft_size, const D4COption *option, double **aperiodicity);
38 | 
39 | //-----------------------------------------------------------------------------
40 | // InitializeD4COption allocates the memory to the struct and sets the
41 | // default parameters.
42 | //
43 | // Output:
44 | //   option   : Struct for the optional parameter.
45 | //-----------------------------------------------------------------------------
46 | void InitializeD4COption(D4COption *option);
47 | 
48 | WORLD_END_C_DECLS
49 | 
50 | #endif  // WORLD_D4C_H_
51 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/src/world/dio.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_DIO_H_
 7 | #define WORLD_DIO_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for DIO
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double f0_floor;
18 |   double f0_ceil;
19 |   double channels_in_octave;
20 |   double frame_period;  // msec
21 |   int speed;  // (1, 2, ..., 12)
22 |   double allowed_range;  // Threshold used for fixing the F0 contour.
23 | } DioOption;
24 | 
25 | //-----------------------------------------------------------------------------
26 | // DIO
27 | //
28 | // Input:
29 | //   x                    : Input signal
30 | //   x_length             : Length of x
31 | //   fs                   : Sampling frequency
32 | //   option               : Struct to order the parameter for DIO
33 | //
34 | // Output:
35 | //   temporal_positions   : Temporal positions.
36 | //   f0                   : F0 contour.
37 | //-----------------------------------------------------------------------------
38 | void Dio(const double *x, int x_length, int fs, const DioOption *option,
39 |   double *temporal_positions, double *f0);
40 | 
41 | //-----------------------------------------------------------------------------
42 | // InitializeDioOption allocates the memory to the struct and sets the
43 | // default parameters.
44 | //
45 | // Output:
46 | //   option   : Struct for the optional parameter.
47 | //-----------------------------------------------------------------------------
48 | void InitializeDioOption(DioOption *option);
49 | 
50 | //-----------------------------------------------------------------------------
51 | // GetSamplesForDIO() calculates the number of samples required for Dio().
52 | //
53 | // Input:
54 | //   fs             : Sampling frequency [Hz]
55 | //   x_length       : Length of the input signal [Sample].
56 | //   frame_period   : Frame shift [msec]
57 | //
58 | // Output:
59 | //   The number of samples required to store the results of Dio()
60 | //-----------------------------------------------------------------------------
61 | int GetSamplesForDIO(int fs, int x_length, double frame_period);
62 | 
63 | WORLD_END_C_DECLS
64 | 
65 | #endif  // WORLD_DIO_H_
66 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/src/world/fft.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //
 6 | // These functions and variables are defined to use FFT as well as FFTW
 7 | // Please see fft.cpp to show the detailed information
 8 | //-----------------------------------------------------------------------------
 9 | #ifndef WORLD_FFT_H_
10 | #define WORLD_FFT_H_
11 | 
12 | #include "macrodefinitions.h"
13 | 
14 | WORLD_BEGIN_C_DECLS
15 | 
16 | // Commands for FFT (This is the same as FFTW)
17 | #define FFT_FORWARD 1
18 | #define FFT_BACKWARD 2
19 | #define FFT_ESTIMATE 3
20 | 
21 | // Complex number for FFT
22 | typedef double fft_complex[2];
23 | // Struct used for FFT
24 | typedef struct {
25 |   int n;
26 |   int sign;
27 |   unsigned int flags;
28 |   fft_complex *c_in;
29 |   double *in;
30 |   fft_complex *c_out;
31 |   double *out;
32 |   double *input;
33 |   int *ip;
34 |   double *w;
35 | } fft_plan;
36 | 
37 | fft_plan fft_plan_dft_1d(int n, fft_complex *in, fft_complex *out, int sign,
38 |   unsigned int flags);
39 | fft_plan fft_plan_dft_c2r_1d(int n, fft_complex *in, double *out,
40 |   unsigned int flags);
41 | fft_plan fft_plan_dft_r2c_1d(int n, double *in, fft_complex *out,
42 |   unsigned int flags);
43 | void fft_execute(fft_plan p);
44 | void fft_destroy_plan(fft_plan p);
45 | 
46 | WORLD_END_C_DECLS
47 | 
48 | #endif  // WORLD_FFT_H_
49 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/src/world/harvest.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_HARVEST_H_
 7 | #define WORLD_HARVEST_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Struct for Harvest
15 | //-----------------------------------------------------------------------------
16 | typedef struct {
17 |   double f0_floor;
18 |   double f0_ceil;
19 |   double frame_period;
20 | } HarvestOption;
21 | 
22 | //-----------------------------------------------------------------------------
23 | // Harvest
24 | //
25 | // Input:
26 | //   x                    : Input signal
27 | //   x_length             : Length of x
28 | //   fs                   : Sampling frequency
29 | //   option               : Struct to order the parameter for Harvest
30 | //
31 | // Output:
32 | //   temporal_positions   : Temporal positions.
33 | //   f0                   : F0 contour.
34 | //-----------------------------------------------------------------------------
35 | void Harvest(const double *x, int x_length, int fs,
36 |   const HarvestOption *option, double *temporal_positions, double *f0);
37 | 
38 | //-----------------------------------------------------------------------------
39 | // InitializeHarvestOption allocates the memory to the struct and sets the
40 | // default parameters.
41 | //
42 | // Output:
43 | //   option   : Struct for the optional parameter.
44 | //-----------------------------------------------------------------------------
45 | void InitializeHarvestOption(HarvestOption *option);
46 | 
47 | //-----------------------------------------------------------------------------
48 | // GetSamplesForHarvest() calculates the number of samples required for
49 | // Harvest().
50 | //
51 | // Input:
52 | //   fs             : Sampling frequency [Hz]
53 | //   x_length       : Length of the input signal [Sample]
54 | //   frame_period   : Frame shift [msec]
55 | //
56 | // Output:
57 | //   The number of samples required to store the results of Harvest().
58 | //-----------------------------------------------------------------------------
59 | int GetSamplesForHarvest(int fs, int x_length, double frame_period);
60 | 
61 | WORLD_END_C_DECLS
62 | 
63 | #endif  // WORLD_HARVEST_H_
64 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/src/world/stonemask.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_STONEMASK_H_
 7 | #define WORLD_STONEMASK_H_
 8 | 
 9 | #include "macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // StoneMask() refines the estimated F0 by Dio()
15 | //
16 | // Input:
17 | //   x                      : Input signal
18 | //   x_length               : Length of the input signal
19 | //   fs                     : Sampling frequency
20 | //   time_axis              : Temporal information
21 | //   f0                     : f0 contour
22 | //   f0_length              : Length of f0
23 | //
24 | // Output:
25 | //   refined_f0             : Refined F0
26 | //-----------------------------------------------------------------------------
27 | void StoneMask(const double *x, int x_length, int fs,
28 |     const double *temporal_positions, const double *f0, int f0_length,
29 |     double *refined_f0);
30 | 
31 | WORLD_END_C_DECLS
32 | 
33 | #endif  // WORLD_STONEMASK_H_
34 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/src/world/synthesis.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_SYNTHESIS_H_
 7 | #define WORLD_SYNTHESIS_H_
 8 | 
 9 | #include "world/macrodefinitions.h"
10 | 
11 | WORLD_BEGIN_C_DECLS
12 | 
13 | //-----------------------------------------------------------------------------
14 | // Synthesis() synthesize the voice based on f0, spectrogram and
15 | // aperiodicity (not excitation signal).
16 | //
17 | // Input:
18 | //   f0                   : f0 contour
19 | //   f0_length            : Length of f0
20 | //   spectrogram          : Spectrogram estimated by CheapTrick
21 | //   fft_size             : FFT size
22 | //   aperiodicity         : Aperiodicity spectrogram based on D4C
23 | //   frame_period         : Temporal period used for the analysis
24 | //   fs                   : Sampling frequency
25 | //   y_length             : Length of the output signal (Memory of y has been
26 | //                          allocated in advance)
27 | // Output:
28 | //   y                    : Calculated speech
29 | //-----------------------------------------------------------------------------
30 | void Synthesis(const double *f0, int f0_length, 
31 |     const double * const *spectrogram, const double * const *aperiodicity, 
32 |     int fft_size, double frame_period, int fs, int y_length, double *y);
33 | 
34 | WORLD_END_C_DECLS
35 | 
36 | #endif  // WORLD_SYNTHESIS_H_
37 | 


--------------------------------------------------------------------------------
/libtts/Modules/Lib/World/tools/audioio.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // Copyright 2012 Masanori Morise
 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise)
 4 | // Last update: 2021/02/15
 5 | //-----------------------------------------------------------------------------
 6 | #ifndef WORLD_AUDIOIO_H_
 7 | #define WORLD_AUDIOIO_H_
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | //-----------------------------------------------------------------------------
14 | // wavwrite() write a .wav file.
15 | // Input:
16 | //   x          : Input signal
17 | //   x_ength : Signal length of x [sample]
18 | //   fs         : Sampling frequency [Hz]
19 | //   nbit       : Quantization bit [bit]
20 | //   filename   : Name of the output signal.
21 | // Caution:
22 | //   The variable nbit is not used in this function.
23 | //   This function only supports the 16 bit.
24 | //-----------------------------------------------------------------------------
25 | void wavwrite(const double *x, int x_length, int fs, int nbit,
26 |   const char *filename);
27 | 
28 | //-----------------------------------------------------------------------------
29 | // GetAudioLength() returns the length of .wav file.
30 | // Input:
31 | //   filename     : Filename of a .wav file.
32 | // Output:
33 | //   The number of samples of the file .wav
34 | //-----------------------------------------------------------------------------
35 | int GetAudioLength(const char *filename);
36 | 
37 | //-----------------------------------------------------------------------------
38 | // wavread() read a .wav file.
39 | // The memory of output x must be allocated in advance.
40 | // Input:
41 | //   filename     : Filename of the input file.
42 | // Output:
43 | //   fs           : Sampling frequency [Hz]
44 | //   nbit         : Quantization bit [bit]
45 | //   x            : The output waveform.
46 | //-----------------------------------------------------------------------------
47 | void wavread(const char* filename, int *fs, int *nbit, double *x);
48 | 
49 | #ifdef __cplusplus
50 | }
51 | #endif
52 | 
53 | #endif  // WORLD_AUDIOIO_H_
54 | 


--------------------------------------------------------------------------------
/libtts/Modules/Logger/MoeSSLogger.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include "../StringPreprocess.hpp"
 4 | #include <filesystem>
 5 | #include <mutex>
 6 | #define __MOEVS_DEBUG_MESSAGE(msg) __MOEVS_DEBUG_INFO(__FILE__, __LINE__, msg)
 7 | #define logger MoeSSLogger::GetLogger()
 8 | inline std::string __MOEVS_DEBUG_INFO(const char* filename, int line, const char* msg)
 9 | {
10 | 	return std::string("[In \"") + std::filesystem::path(filename).filename().string() + "\" Line " + std::to_string(line) + "] " + msg;
11 | }
12 | 
13 | inline std::wstring __MOEVS_DEBUG_INFO(const char* filename, int line, const wchar_t* msg)
14 | {
15 | 	return std::wstring(L"[In \"") + std::filesystem::path(filename).filename().wstring() + L"\" Line " + std::to_wstring(line) + L"] " + msg;
16 | }
17 | 
18 | namespace MoeSSLogger
19 | {
20 | 	class Logger
21 | 	{
22 | 	public:
23 | 		using logger_fn = void(*)(const wchar_t*, const char*);
24 | 		Logger();
25 | 		~Logger();
26 | 		Logger(logger_fn error_fn, logger_fn log_fn);
27 | 		void log(const std::wstring&);
28 | 		void log(const char*);
29 | 		void error(const std::wstring&);
30 | 		void error(const char*);
31 | 		void enable(bool _filelogger)
32 | 		{
33 | 			filelogger = _filelogger;
34 | 		}
35 | 	private:
36 | 		bool custom_logger_fn = false;
37 | 		std::filesystem::path cur_log_dir, logpath, errorpath;
38 | 		logger_fn cerror_fn = nullptr, cloggerfn = nullptr;
39 | 		FILE* log_file = nullptr, * error_file = nullptr;
40 | 		bool filelogger = true;
41 | 		std::mutex mx;
42 | 	};
43 | 
44 | 	Logger& GetLogger();
45 | }


--------------------------------------------------------------------------------
/libtts/Modules/Models/EnvManager.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: EnvManager.hpp
 3 |  * Note: MoeVoiceStudioCore 环境管理
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include <onnxruntime_cxx_api.h>
24 | 
25 | #define MoeVoiceStudioCoreEnvManagerHeader namespace moevsenv{
26 | #define MoeVoiceStudioCoreEnvManagerEnd }
27 | 
28 | MoeVoiceStudioCoreEnvManagerHeader
29 | class MoeVoiceStudioEnv
30 | {
31 | public:
32 | 	MoeVoiceStudioEnv() = default;
33 | 	~MoeVoiceStudioEnv() { Destory(); }
34 | 	void Load(unsigned ThreadCount, unsigned DeviceID, unsigned Provider);
35 | 	void Destory();
36 | 	[[nodiscard]] bool IsEnabled() const;
37 | 	[[nodiscard]] Ort::Env* GetEnv() const { return GlobalOrtEnv; }
38 | 	[[nodiscard]] Ort::SessionOptions* GetSessionOptions() const { return GlobalOrtSessionOptions; }
39 | 	[[nodiscard]] Ort::MemoryInfo* GetMemoryInfo() const { return GlobalOrtMemoryInfo; }
40 | 	[[nodiscard]] int GetCurThreadCount() const { return (int)CurThreadCount; }
41 | 	[[nodiscard]] int GetCurDeviceID() const { return (int)CurDeviceID; }
42 | 	[[nodiscard]] int GetCurProvider() const { return (int)CurProvider; }
43 | private:
44 | 	void Create(unsigned ThreadCount_, unsigned DeviceID_, unsigned ExecutionProvider_);
45 | 	Ort::Env* GlobalOrtEnv = nullptr;
46 | 	Ort::SessionOptions* GlobalOrtSessionOptions = nullptr;
47 | 	Ort::MemoryInfo* GlobalOrtMemoryInfo = nullptr;
48 | 	unsigned CurThreadCount = unsigned(-1);
49 | 	unsigned CurDeviceID = unsigned(-1);
50 | 	unsigned CurProvider = unsigned(-1);
51 | 	OrtCUDAProviderOptionsV2* cuda_option_v2 = nullptr;
52 | };
53 | 
54 | MoeVoiceStudioEnv& GetGlobalMoeVSEnv();
55 | 
56 | MoeVoiceStudioCoreEnvManagerEnd


--------------------------------------------------------------------------------
/libtts/Modules/Models/header/Tacotron.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "ModelBase.hpp"
 3 | 
 4 | INFERCLASSHEADER
 5 | 
 6 | class Tacotron2 : public TTS
 7 | {
 8 | public:
 9 |     Tacotron2(const MJson&, const callback&, const callback_params&, const DurationCallback&, Device _dev = Device::CPU);
10 | 
11 | 	~Tacotron2() override;
12 | 
13 |     std::vector<int16_t> Inference(std::wstring& _inputLens) const override;
14 | 
15 |     [[nodiscard]] std::vector<int16_t> Inference(const MoeVSProject::TTSParams& _input) const override;
16 | 
17 |     static void cat(std::vector<float>& tensorA, std::vector<int64>& Shape, const MTensor& tensorB) {
18 |         const int64 n = Shape[1];
19 |         for (int64 i = n; i > 0; --i)
20 |             tensorA.insert(tensorA.begin() + (i * Shape[2]), tensorB.GetTensorData<float>()[i - 1]);
21 |         ++Shape[2];
22 |     }
23 | private:
24 |     Ort::Session* sessionEncoder = nullptr;
25 |     Ort::Session* sessionDecoderIter = nullptr;
26 |     Ort::Session* sessionPostNet = nullptr;
27 |     Ort::Session* sessionGan = nullptr;
28 | 
29 |     const std::vector<const char*> ganIn = { "x" };
30 |     const std::vector<const char*> ganOut = { "audio" };
31 |     const std::vector<const char*> inputNodeNamesSessionEncoder = { "sequences","sequence_lengths" };
32 |     const std::vector<const char*> outputNodeNamesSessionEncoder = { "memory","processed_memory","lens" };
33 |     const std::vector<const char*> inputNodeNamesSessionDecoderIter = { "decoder_input","attention_hidden","attention_cell","decoder_hidden","decoder_cell","attention_weights","attention_weights_cum","attention_context","memory","processed_memory","mask" };
34 |     const std::vector<const char*> outputNodeNamesSessionDecoderIter = { "decoder_output","gate_prediction","out_attention_hidden","out_attention_cell","out_decoder_hidden","out_decoder_cell","out_attention_weights","out_attention_weights_cum","out_attention_context" };
35 |     const std::vector<const char*> inputNodeNamesSessionPostNet = { "mel_outputs" };
36 |     const std::vector<const char*> outputNodeNamesSessionPostNet = { "mel_outputs_postnet" };
37 | };
38 | 
39 | INFERCLASSEND


--------------------------------------------------------------------------------
/libtts/Modules/Modules.cpp:
--------------------------------------------------------------------------------
 1 | #include "Modules.hpp"
 2 | 
 3 | namespace MoeVSModuleManager
 4 | {
 5 | 	bool MoeVoiceStudioCoreInitStat = false;
 6 | 
 7 | 	MoeVoiceStudioCore::TextToSpeech* CurTextToSpeechModel = nullptr;
 8 | 
 9 | 	void MoeVoiceStudioCoreInitSetup()
10 | 	{
11 | 		if (MoeVoiceStudioCoreInitStat)
12 | 			return;
13 | 		const auto BasicCleanerDir = GetCurrentFolder() + L"/G2P/BasicCleaner.dll";
14 | 		if (_waccess(BasicCleanerDir.c_str(), 0) != -1)
15 | 		{
16 | 			const auto Cleaner = MoeVSG2P::GetDefCleaner();
17 | 			Cleaner->loadG2p(BasicCleanerDir);
18 | 			Cleaner->GetCleaner().LoadDict(GetCurrentFolder() + L"/G2P");
19 | 			Cleaner->loadDict(GetCurrentFolder() + L"/Dict/BasicDict.json");
20 | 		}
21 | 		MoeVoiceStudioCoreInitStat = true;
22 | 	}
23 | 
24 | 	MoeVoiceStudioCore::TextToSpeech* GetCurTTSModel()
25 | 	{
26 | 		return CurTextToSpeechModel;
27 | 	}
28 | 
29 | 	void UnloadTTSModel()
30 | 	{
31 | 		delete CurTextToSpeechModel;
32 | 		CurTextToSpeechModel = nullptr;
33 | 		SamplingRate = 32000;
34 | 		SpeakerCount = 0;
35 | 	}
36 | 
37 | 	void LoadTTSModel(const MJson& Config,
38 | 		const MoeVoiceStudioCore::MoeVoiceStudioModule::ProgressCallback& Callback,
39 | 		int ProviderID, int NumThread, int DeviceID,
40 | 		const MoeVoiceStudioCore::TextToSpeech::DurationCallback& DurationCallback)
41 | 	{
42 | 		UnloadTTSModel();
43 | 		if (Config["Type"].GetString() == "Tacotron" || Config["Type"].GetString() == "Tacotron2")
44 | 			throw std::exception("Tacotron Not Support Yet");
45 | 		if (Config["Type"].GetString() == "GPT-SoVits")
46 | 		{
47 | 			MoeVoiceStudioCore::DestoryAllBerts();
48 | 			CurTextToSpeechModel = dynamic_cast<MoeVoiceStudioCore::TextToSpeech*>(
49 | 				new MoeVoiceStudioCore::GptSoVits(
50 | 					Config, Callback, DurationCallback,
51 | 					MoeVoiceStudioCore::MoeVoiceStudioModule::ExecutionProviders(ProviderID),
52 | 					DeviceID, NumThread
53 | 				)
54 | 				);
55 | 		}
56 | 		else
57 | 		{
58 | 			CurTextToSpeechModel = dynamic_cast<MoeVoiceStudioCore::TextToSpeech*>(
59 | 			   new MoeVoiceStudioCore::Vits(
60 | 				   Config, Callback, DurationCallback,
61 | 				   MoeVoiceStudioCore::MoeVoiceStudioModule::ExecutionProviders(ProviderID),
62 | 				   DeviceID, NumThread
63 | 			   )
64 | 			   );
65 | 		}
66 | 		SamplingRate = CurTextToSpeechModel->GetSamplingRate();
67 | 	}
68 | 
69 | }


--------------------------------------------------------------------------------
/libtts/Modules/Modules.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * FileName: Modules.hpp
 3 |  * Note: MoeVoiceStudioCore组件管理
 4 |  *
 5 |  * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com)
 6 |  *
 7 |  * This file is part of MoeVoiceStudioCore library.
 8 |  * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the
 9 |  * GNU Affero General Public License as published by the Free Software Foundation, either version 3
10 |  * of the License, or any later version.
11 |  *
12 |  * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
13 |  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 |  * See the GNU Affero General Public License for more details.
15 |  *
16 |  * You should have received a copy of the GNU Affero General Public License along with Foobar.
17 |  * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>.
18 |  *
19 |  * date: 2022-10-17 Create
20 | */
21 | 
22 | #pragma once
23 | #include "Models/header/Vits.hpp"
24 | #include "Models/header/GPT-SoVits.hpp"
25 | 
26 | namespace MoeVSModuleManager
27 | {
28 | 	inline int64_t SamplingRate = 32000;
29 | 	inline int64_t SpeakerCount = 0;
30 | 
31 | 	/**
32 | 	 * \brief 初始化所有组件
33 | 	 */
34 | 	void MoeVoiceStudioCoreInitSetup();
35 | 
36 | 	/**
37 | 	 * \brief 获取当前模型
38 | 	 * \return 当前模型的指针
39 | 	 */
40 | 	MoeVoiceStudioCore::TextToSpeech* GetCurTTSModel();
41 | 
42 | 	/**
43 | 	 * \brief 卸载模型
44 | 	 */
45 | 	void UnloadTTSModel();
46 | 
47 | 	/**
48 | 	 * \brief 载入模型
49 | 	 * \param Config 一个MJson类的实例(配置文件的JSON)
50 | 	 * \param Callback 进度条回调函数
51 | 	 * \param ProviderID Provider在所有Provider中的ID(遵循Enum Class的定义)
52 | 	 * \param NumThread CPU推理时的线程数(最好设置高一点,GPU不支持的算子可能也会Fallback到CPU)
53 | 	 * \param DeviceID GPU设备ID
54 | 	 * \param DurationCallback 时长回调
55 | 	 */
56 | 	void LoadTTSModel(const MJson& Config,
57 | 		const MoeVoiceStudioCore::MoeVoiceStudioModule::ProgressCallback& Callback,
58 | 		int ProviderID, int NumThread, int DeviceID,
59 | 		const MoeVoiceStudioCore::TextToSpeech::DurationCallback& DurationCallback = [&](std::vector<float>&) {});
60 | }
61 | 
62 | namespace MoeVSRename
63 | {
64 | 	using Vits = MoeVoiceStudioCore::Vits;
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/libtts/Modules/README.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | ```c++
 3 | #include "Modules/Models/header/Vits.hpp"
 4 | 
 5 | int main(){
 6 |   rapidjson::Document Config;
 7 |   Config.Parse("Your Config");
 8 |   
 9 |   //Progress bar
10 |   InferClass::BaseModelType::callback a_callback = [](size_t a, size_t b) {std::cout << std::to_string((float)a * 100.f / (float)b) << "%\n"; };
11 |   
12 |   //return params for inference
13 |   InferClass::BaseModelType::callback_params b_callback = []()  
14 | 	{
15 | 		auto cbaaa = InferClass::InferConfigs();
16 | 		cbaaa.kmeans_rate = 0.5;
17 | 		cbaaa.keys = 0;
18 | 		return cbaaa;
19 | 	};
20 |   
21 |   //modify duration per phoneme
22 |   InferClass::TTS::DurationCallback c_callback = [](std::vector<float>&) {};
23 |   
24 |   std::vector<int16_t> output;
25 |   try
26 |   {
27 |   	std::wstring inp("watashinoonaniomitekudasai");
28 |   	auto model = dynamic_cast<InferClass::BaseModelType*>(new InferClass::VitsSvc(modConfigJson, a_callback, b_callback));
29 |     
30 |   	output = model->Inference(inp);
31 |     
32 |   	Wav outWav(model->GetSamplingRate(), output.size() * 2, output.data());
33 |   	outWav.Writef(L"test.wav");
34 |     
35 |   	delete model;
36 |   }
37 |   catch(std::exception& e)
38 |   {
39 |   	std::cout << e.what();
40 |   }
41 | }
42 | 
43 | ```
44 | 


--------------------------------------------------------------------------------
/libtts/dllmain.cpp:
--------------------------------------------------------------------------------
 1 | // dllmain.cpp : 定义 DLL 应用程序的入口点。
 2 | #include "windows.h"
 3 | 
 4 | BOOL APIENTRY DllMain( HMODULE hModule,
 5 |                        DWORD  ul_reason_for_call,
 6 |                        LPVOID lpReserved
 7 |                      )
 8 | {
 9 |     switch (ul_reason_for_call)
10 |     {
11 |     case DLL_PROCESS_ATTACH:
12 |     case DLL_THREAD_ATTACH:
13 |     case DLL_THREAD_DETACH:
14 |     case DLL_PROCESS_DETACH:
15 |         break;
16 |     }
17 |     return TRUE;
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/libtts/framework.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #ifdef LIBTTS_EXPORTS
3 | #define LibTTSApi __declspec(dllexport)
4 | #else
5 | #define LibTTSApi __declspec(dllimport)
6 | #endif


--------------------------------------------------------------------------------
/logo/logo(AIGen).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/logo/logo(AIGen).png


--------------------------------------------------------------------------------
/logo/logo256(AIGen).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/logo/logo256(AIGen).png


--------------------------------------------------------------------------------
/logo/logo512(AIGen).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/logo/logo512(AIGen).png


--------------------------------------------------------------------------------