├── .gitattributes ├── .gitignore ├── .gitmodules ├── BasicDict.json ├── Bert ├── bert-base-japanese-v3 │ └── Tokenizer.json ├── chinese-roberta-wwm-ext-large │ └── Tokenizer.json └── deberta-v2-large-japanese │ └── Tokenizer.json ├── BertVits.md ├── CMakeLists.txt ├── CMakePresets.json ├── CSharpDemo ├── CSharpDemo.csproj ├── Program.cs ├── Properties │ ├── Resources.Designer.cs │ ├── Resources.resx │ └── launchSettings.json ├── README.md └── README_en.md ├── DotNetApi ├── DotNetApi.csproj ├── LibSvcApi.cs └── Properties │ └── launchSettings.json ├── DragonianSpeech.sln ├── LICENSE ├── Lib ├── MJson │ ├── MJson.h │ ├── yyjson.c │ └── yyjson.h ├── OnnxRuntimeDmlProvider │ ├── .signature.p7s │ ├── LICENSE.txt │ ├── Microsoft.ML.OnnxRuntime.DirectML.nuspec │ ├── ORT_icon_for_light_bg.png │ ├── Privacy.md │ ├── ThirdPartyNotices.txt │ ├── [Content_Types].xml │ ├── _rels │ │ └── .rels │ ├── build │ │ ├── native │ │ │ ├── Microsoft.ML.OnnxRuntime.DirectML.props │ │ │ ├── Microsoft.ML.OnnxRuntime.DirectML.targets │ │ │ └── include │ │ │ │ ├── cpu_provider_factory.h │ │ │ │ ├── dml_provider_factory.h │ │ │ │ ├── onnxruntime_c_api.h │ │ │ │ ├── onnxruntime_cxx_api.h │ │ │ │ ├── onnxruntime_cxx_inline.h │ │ │ │ ├── onnxruntime_run_options_config_keys.h │ │ │ │ ├── onnxruntime_session_options_config_keys.h │ │ │ │ └── provider_options.h │ │ ├── netstandard1.1 │ │ │ ├── Microsoft.ML.OnnxRuntime.DirectML.props │ │ │ └── Microsoft.ML.OnnxRuntime.DirectML.targets │ │ └── netstandard2.0 │ │ │ ├── Microsoft.ML.OnnxRuntime.DirectML.props │ │ │ └── Microsoft.ML.OnnxRuntime.DirectML.targets │ ├── package │ │ └── services │ │ │ └── metadata │ │ │ └── core-properties │ │ │ └── c7795757db2346b9bcfb932f99cdb33f.psmdcp │ └── runtimes │ │ └── win-x64 │ │ └── native │ │ └── onnxruntime.lib ├── World │ ├── LICENSE.txt │ ├── src │ │ ├── cheaptrick.cpp │ │ ├── codec.cpp │ │ ├── common.cpp │ │ ├── d4c.cpp │ │ ├── dio.cpp │ │ ├── fft.cpp │ │ ├── harvest.cpp │ │ ├── matlabfunctions.cpp │ │ ├── stonemask.cpp │ │ ├── synthesis.cpp │ │ ├── synthesisrealtime.cpp │ │ └── world │ │ │ ├── cheaptrick.h │ │ │ ├── codec.h │ │ │ ├── common.h │ │ │ ├── constantnumbers.h │ │ │ ├── d4c.h │ │ │ ├── dio.h │ │ │ ├── fft.h │ │ │ ├── harvest.h │ │ │ ├── macrodefinitions.h │ │ │ ├── matlabfunctions.h │ │ │ ├── stonemask.h │ │ │ ├── synthesis.h │ │ │ └── synthesisrealtime.h │ └── tools │ │ ├── audioio.cpp │ │ ├── audioio.h │ │ ├── parameterio.cpp │ │ └── parameterio.h └── ffmpeg-4.2.1 │ ├── COPYING.GPLv3 │ ├── COPYING.LGPLv3 │ ├── Lib │ ├── avcodec.lib │ ├── avformat.lib │ ├── avutil.lib │ ├── swresample.lib │ └── swscale.lib │ └── include │ ├── libavcodec │ ├── ac3_parser.h │ ├── adts_parser.h │ ├── avcodec.h │ ├── avdct.h │ ├── avfft.h │ ├── d3d11va.h │ ├── dirac.h │ ├── dv_profile.h │ ├── dxva2.h │ ├── jni.h │ ├── mediacodec.h │ ├── qsv.h │ ├── vaapi.h │ ├── vdpau.h │ ├── version.h │ ├── videotoolbox.h │ ├── vorbis_parser.h │ └── xvmc.h │ ├── libavdevice │ ├── avdevice.h │ └── version.h │ ├── libavfilter │ ├── avfilter.h │ ├── buffersink.h │ ├── buffersrc.h │ └── version.h │ ├── libavformat │ ├── avformat.h │ ├── avio.h │ └── version.h │ ├── libavutil │ ├── adler32.h │ ├── aes.h │ ├── aes_ctr.h │ ├── attributes.h │ ├── audio_fifo.h │ ├── avassert.h │ ├── avconfig.h │ ├── avstring.h │ ├── avutil.h │ ├── base64.h │ ├── blowfish.h │ ├── bprint.h │ ├── bswap.h │ ├── buffer.h │ ├── camellia.h │ ├── cast5.h │ ├── channel_layout.h │ ├── common.h │ ├── cpu.h │ ├── crc.h │ ├── des.h │ ├── dict.h │ ├── display.h │ ├── downmix_info.h │ ├── encryption_info.h │ ├── error.h │ ├── eval.h │ ├── ffversion.h │ ├── fifo.h │ ├── file.h │ ├── frame.h │ ├── hash.h │ ├── hdr_dynamic_metadata.h │ ├── hmac.h │ ├── hwcontext.h │ ├── hwcontext_cuda.h │ ├── hwcontext_d3d11va.h │ ├── hwcontext_drm.h │ ├── hwcontext_dxva2.h │ ├── hwcontext_mediacodec.h │ ├── hwcontext_qsv.h │ ├── hwcontext_vaapi.h │ ├── hwcontext_vdpau.h │ ├── hwcontext_videotoolbox.h │ ├── imgutils.h │ ├── intfloat.h │ ├── intreadwrite.h │ ├── lfg.h │ ├── log.h │ ├── lzo.h │ ├── macros.h │ ├── mastering_display_metadata.h │ ├── mathematics.h │ ├── md5.h │ ├── mem.h │ ├── motion_vector.h │ ├── murmur3.h │ ├── opt.h │ ├── parseutils.h │ ├── pixdesc.h │ ├── pixelutils.h │ ├── pixfmt.h │ ├── random_seed.h │ ├── rational.h │ ├── rc4.h │ ├── replaygain.h │ ├── ripemd.h │ ├── samplefmt.h │ ├── sha.h │ ├── sha512.h │ ├── spherical.h │ ├── stereo3d.h │ ├── tea.h │ ├── threadmessage.h │ ├── time.h │ ├── timecode.h │ ├── timestamp.h │ ├── tree.h │ ├── twofish.h │ ├── tx.h │ ├── version.h │ └── xtea.h │ ├── libswresample │ ├── swresample.h │ └── version.h │ └── libswscale │ ├── swscale.h │ └── version.h ├── README.md ├── README_en.md ├── TTSProjectTemplate.ttsproj ├── VitsInputTemplate.json ├── fish-speech.cpp ├── CMakeLists.txt ├── Demo │ ├── CMakeLists.txt │ └── main.cpp ├── include │ ├── Base.h │ ├── Module.h │ └── llama.h ├── src │ ├── Base.cpp │ ├── Module.cpp │ └── llama.cpp ├── test.py └── test.txt ├── libdlvoicecodec ├── LibDLVoiceCodec │ ├── base.cpp │ ├── base.h │ ├── operator.cpp │ ├── operator.h │ ├── value.cpp │ └── value.h ├── Modules │ ├── AvCodec │ │ ├── AvCodeResample.h │ │ ├── Recorder.cpp │ │ └── Recorder.h │ ├── DataStruct │ │ ├── KDTree.cpp │ │ ├── KDTree.hpp │ │ └── README.md │ ├── InferTools │ │ ├── Cluster │ │ │ ├── MoeVSBaseCluster.cpp │ │ │ ├── MoeVSBaseCluster.hpp │ │ │ ├── MoeVSClusterManager.cpp │ │ │ ├── MoeVSClusterManager.hpp │ │ │ ├── MoeVSIndexCluster.cpp │ │ │ ├── MoeVSIndexCluster.hpp │ │ │ ├── MoeVSKmeansCluster.cpp │ │ │ └── MoeVSKmeansCluster.hpp │ │ ├── F0Extractor │ │ │ ├── BaseF0Extractor │ │ │ │ ├── BaseF0Extractor.cpp │ │ │ │ └── BaseF0Extractor.hpp │ │ │ ├── DioF0Extractor │ │ │ │ ├── DioF0Extractor.cpp │ │ │ │ └── DioF0Extractor.hpp │ │ │ ├── F0ExtractorManager.cpp │ │ │ ├── F0ExtractorManager.hpp │ │ │ ├── HarvestF0Extractor │ │ │ │ ├── HarvestF0Extractor.cpp │ │ │ │ └── HarvestF0Extractor.hpp │ │ │ └── NetF0Predictors │ │ │ │ ├── NetF0Predictors.cpp │ │ │ │ └── NetF0Predictors.hpp │ │ ├── G2P │ │ │ ├── MoeVSG2P.cpp │ │ │ └── MoeVSG2P.hpp │ │ ├── Sampler │ │ │ ├── MoeVSBaseSampler.cpp │ │ │ ├── MoeVSBaseSampler.hpp │ │ │ ├── MoeVSSamplerManager.cpp │ │ │ ├── MoeVSSamplerManager.hpp │ │ │ ├── MoeVSSamplers.cpp │ │ │ └── MoeVSSamplers.hpp │ │ ├── Stft │ │ │ ├── stft.cpp │ │ │ └── stft.hpp │ │ ├── TensorExtractor │ │ │ ├── MoeVSCoreTensorExtractor.cpp │ │ │ ├── MoeVSCoreTensorExtractor.hpp │ │ │ ├── MoeVoiceStudioTensorExtractor.cpp │ │ │ ├── MoeVoiceStudioTensorExtractor.hpp │ │ │ ├── TensorExtractorManager.cpp │ │ │ └── TensorExtractorManager.hpp │ │ ├── inferTools.cpp │ │ └── inferTools.hpp │ ├── Logger │ │ ├── MoeSSLogger.cpp │ │ └── MoeSSLogger.hpp │ ├── Models │ │ ├── EnvManager.cpp │ │ ├── EnvManager.hpp │ │ ├── header │ │ │ ├── DiffSvc.hpp │ │ │ ├── GPT-SoVits.hpp │ │ │ ├── ModelBase.hpp │ │ │ ├── MoeVSProject.hpp │ │ │ ├── SVC.hpp │ │ │ ├── TTS.hpp │ │ │ ├── Tacotron.hpp │ │ │ ├── Vits.hpp │ │ │ └── VitsSvc.hpp │ │ └── src │ │ │ ├── DiffSvc.cpp │ │ │ ├── GPT-SoVits.cpp │ │ │ ├── ModelBase.cpp │ │ │ ├── MoeVSProject.cpp │ │ │ ├── SVC.cpp │ │ │ ├── TTS.cpp │ │ │ ├── Vits.cpp │ │ │ └── VitsSvc.cpp │ ├── Modules.cpp │ ├── Modules.hpp │ ├── README.md │ └── StringPreprocess.hpp ├── MoeVoiceStudioSvc - Core - Cmd.cpp ├── MoeVoiceStudioSvc - Core - Cmd.vcxproj ├── MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters ├── analyse │ └── GptSoVits.md ├── input.wav ├── output.wav └── packages.config ├── libsvc ├── Api │ ├── header │ │ ├── NativeApi.h │ │ └── libsvc.h │ ├── readme.md │ └── src │ │ ├── NativeApi.cpp │ │ └── libsvc.cpp ├── Modules │ ├── Lib │ │ ├── MJson │ │ │ ├── MJson.cpp │ │ │ ├── MJson.h │ │ │ ├── yyjson.c │ │ │ └── yyjson.h │ │ └── World │ │ │ ├── LICENSE.txt │ │ │ ├── src │ │ │ ├── cheaptrick.cpp │ │ │ ├── codec.cpp │ │ │ ├── common.cpp │ │ │ ├── d4c.cpp │ │ │ ├── dio.cpp │ │ │ ├── fft.cpp │ │ │ ├── harvest.cpp │ │ │ ├── matlabfunctions.cpp │ │ │ ├── stonemask.cpp │ │ │ ├── synthesis.cpp │ │ │ ├── synthesisrealtime.cpp │ │ │ └── world │ │ │ │ ├── cheaptrick.h │ │ │ │ ├── codec.h │ │ │ │ ├── common.h │ │ │ │ ├── constantnumbers.h │ │ │ │ ├── d4c.h │ │ │ │ ├── dio.h │ │ │ │ ├── fft.h │ │ │ │ ├── harvest.h │ │ │ │ ├── macrodefinitions.h │ │ │ │ ├── matlabfunctions.h │ │ │ │ ├── stonemask.h │ │ │ │ ├── synthesis.h │ │ │ │ └── synthesisrealtime.h │ │ │ └── tools │ │ │ ├── audioio.cpp │ │ │ ├── audioio.h │ │ │ ├── parameterio.cpp │ │ │ └── parameterio.h │ ├── README.md │ ├── framework.h │ ├── header │ │ ├── InferTools │ │ │ ├── AvCodec │ │ │ │ └── AvCodeResample.h │ │ │ ├── Cluster │ │ │ │ ├── MoeVSBaseCluster.hpp │ │ │ │ ├── MoeVSClusterManager.hpp │ │ │ │ ├── MoeVSIndexCluster.hpp │ │ │ │ └── MoeVSKmeansCluster.hpp │ │ │ ├── DataStruct │ │ │ │ ├── KDTree.hpp │ │ │ │ └── README.md │ │ │ ├── F0Extractor │ │ │ │ ├── BaseF0Extractor.hpp │ │ │ │ ├── DioF0Extractor.hpp │ │ │ │ ├── F0ExtractorManager.hpp │ │ │ │ ├── HarvestF0Extractor.hpp │ │ │ │ └── NetF0Predictors.hpp │ │ │ ├── Sampler │ │ │ │ ├── MoeVSBaseSampler.hpp │ │ │ │ ├── MoeVSSamplerManager.hpp │ │ │ │ └── MoeVSSamplers.hpp │ │ │ ├── Stft │ │ │ │ └── stft.hpp │ │ │ ├── TensorExtractor │ │ │ │ ├── MoeVSCoreTensorExtractor.hpp │ │ │ │ ├── MoeVoiceStudioTensorExtractor.hpp │ │ │ │ └── TensorExtractorManager.hpp │ │ │ └── inferTools.hpp │ │ ├── Logger │ │ │ └── MoeSSLogger.hpp │ │ ├── Models │ │ │ ├── DiffSvc.hpp │ │ │ ├── EnvManager.hpp │ │ │ ├── ModelBase.hpp │ │ │ ├── MoeVSProject.hpp │ │ │ ├── ReflowSvc.hpp │ │ │ ├── SVC.hpp │ │ │ └── VitsSvc.hpp │ │ ├── Modules.hpp │ │ └── StringPreprocess.hpp │ └── src │ │ ├── InferTools │ │ ├── AvCodec │ │ │ └── AvCodeResample.cpp │ │ ├── Cluster │ │ │ ├── MoeVSBaseCluster.cpp │ │ │ ├── MoeVSClusterManager.cpp │ │ │ ├── MoeVSIndexCluster.cpp │ │ │ └── MoeVSKmeansCluster.cpp │ │ ├── DataStruct │ │ │ ├── KDTree.cpp │ │ │ └── README.md │ │ ├── F0Extractor │ │ │ ├── BaseF0Extractor.cpp │ │ │ ├── DioF0Extractor.cpp │ │ │ ├── F0ExtractorManager.cpp │ │ │ ├── HarvestF0Extractor.cpp │ │ │ └── NetF0Predictors.cpp │ │ ├── Sampler │ │ │ ├── MoeVSBaseSampler.cpp │ │ │ ├── MoeVSSamplerManager.cpp │ │ │ └── MoeVSSamplers.cpp │ │ ├── Stft │ │ │ └── stft.cpp │ │ ├── TensorExtractor │ │ │ ├── MoeVSCoreTensorExtractor.cpp │ │ │ ├── MoeVoiceStudioTensorExtractor.cpp │ │ │ └── TensorExtractorManager.cpp │ │ └── inferTools.cpp │ │ ├── Logger │ │ └── MoeSSLogger.cpp │ │ ├── Models │ │ ├── DiffSvc.cpp │ │ ├── EnvManager.cpp │ │ ├── ModelBase.cpp │ │ ├── MoeVSProject.cpp │ │ ├── ReflowSvc.cpp │ │ ├── SVC.cpp │ │ └── VitsSvc.cpp │ │ ├── Modules.cpp │ │ └── StringPreprocess.cpp ├── README.md ├── dllmain.cpp ├── libsvc.vcxproj ├── libsvc.vcxproj.filters ├── libsvc.vcxproj.user └── packages.config ├── libtts ├── Api │ ├── NativeApi.cpp │ └── NativeApi.h ├── Modules │ ├── AvCodec │ │ ├── AvCodeResample.h │ │ ├── Recorder.cpp │ │ └── Recorder.h │ ├── InferTools │ │ ├── G2P │ │ │ ├── MoeVSG2P.cpp │ │ │ └── MoeVSG2P.hpp │ │ ├── inferTools.cpp │ │ └── inferTools.hpp │ ├── Lib │ │ ├── MJson │ │ │ ├── MJson.cpp │ │ │ ├── MJson.h │ │ │ ├── yyjson.c │ │ │ └── yyjson.h │ │ └── World │ │ │ ├── LICENSE.txt │ │ │ ├── src │ │ │ ├── cheaptrick.cpp │ │ │ ├── codec.cpp │ │ │ ├── common.cpp │ │ │ ├── d4c.cpp │ │ │ ├── dio.cpp │ │ │ ├── fft.cpp │ │ │ ├── harvest.cpp │ │ │ ├── matlabfunctions.cpp │ │ │ ├── stonemask.cpp │ │ │ ├── synthesis.cpp │ │ │ ├── synthesisrealtime.cpp │ │ │ └── world │ │ │ │ ├── cheaptrick.h │ │ │ │ ├── codec.h │ │ │ │ ├── common.h │ │ │ │ ├── constantnumbers.h │ │ │ │ ├── d4c.h │ │ │ │ ├── dio.h │ │ │ │ ├── fft.h │ │ │ │ ├── harvest.h │ │ │ │ ├── macrodefinitions.h │ │ │ │ ├── matlabfunctions.h │ │ │ │ ├── stonemask.h │ │ │ │ ├── synthesis.h │ │ │ │ └── synthesisrealtime.h │ │ │ └── tools │ │ │ ├── audioio.cpp │ │ │ ├── audioio.h │ │ │ ├── parameterio.cpp │ │ │ └── parameterio.h │ ├── Logger │ │ ├── MoeSSLogger.cpp │ │ └── MoeSSLogger.hpp │ ├── Models │ │ ├── EnvManager.cpp │ │ ├── EnvManager.hpp │ │ ├── header │ │ │ ├── GPT-SoVits.hpp │ │ │ ├── ModelBase.hpp │ │ │ ├── MoeVSProject.hpp │ │ │ ├── TTS.hpp │ │ │ ├── Tacotron.hpp │ │ │ └── Vits.hpp │ │ └── src │ │ │ ├── GPT-SoVits.cpp │ │ │ ├── ModelBase.cpp │ │ │ ├── MoeVSProject.cpp │ │ │ ├── TTS.cpp │ │ │ └── Vits.cpp │ ├── Modules.cpp │ ├── Modules.hpp │ ├── README.md │ └── StringPreprocess.hpp ├── dllmain.cpp ├── framework.h ├── libtts.vcxproj └── libtts.vcxproj.filters ├── logo ├── logo(AIGen).png ├── logo256(AIGen).png └── logo512(AIGen).png └── test.json /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libsvc-tensorlib"] 2 | path = libsvc-tensorlib 3 | url = https://github.com/NaruseMioShirakana/libsvc 4 | -------------------------------------------------------------------------------- /BasicDict.json: -------------------------------------------------------------------------------- 1 | { 2 | "_" : ["_"], 3 | "," : [","], 4 | "." : ["."], 5 | "!" : ["!"], 6 | "?" : ["?"], 7 | "-" : ["-"], 8 | "~" : ["~"], 9 | "…" : ["…"], 10 | "A" : ["A"], 11 | "E" : ["E"], 12 | "I" : ["I"], 13 | "N" : ["N"], 14 | "O" : ["O"], 15 | "Q" : ["Q"], 16 | "U" : ["U"], 17 | "a" : ["a"], 18 | "b" : ["b"], 19 | "d" : ["d"], 20 | "e" : ["e"], 21 | "f" : ["f"], 22 | "g" : ["g"], 23 | "h" : ["h"], 24 | "i" : ["i"], 25 | "j" : ["j"], 26 | "k" : ["k"], 27 | "m" : ["m"], 28 | "n" : ["n"], 29 | "o" : ["o"], 30 | "p" : ["p"], 31 | "r" : ["r"], 32 | "s" : ["s"], 33 | "t" : ["t"], 34 | "u" : ["u"], 35 | "v" : ["v"], 36 | "w" : ["w"], 37 | "y" : ["y"], 38 | "z" : ["z"], 39 | "ʃ" : ["ʃ"], 40 | "ʧ" : ["ʧ"], 41 | "ʦ" : ["ʦ"], 42 | "↓" : ["↓"], 43 | "↑" : ["↑"], 44 | " " : [" "] 45 | } -------------------------------------------------------------------------------- /BertVits.md: -------------------------------------------------------------------------------- 1 | # BertVits及Vits使用指南 2 | - 1、按照要求安装模型 3 | - 2、安装Cleaner([下载地址](https://github.com/NaruseMioShirakana/TextCleaner/releases),将文件夹“G2P”解压到Exe路径) 4 | - 3、将Bert文件夹复制到Exe路径,其中的子文件夹可以放置我发布的Bert模型,也可以啥都不放(如果不放模型就不能用Bert模型,但是不影响正常推理,就是效果可能会大打折扣) 5 | - 4、按照自己的需要配置字典(Dict)文件 6 | - 5、编写输入,载入程序推理 7 | 8 | ## BasicDict.json 9 | 字典的作用就是将软件自动处理出来的文本替换为你使用的模型的Symbol,而字典文件的作用就是规定这个替换规则,字典文件是如同BasicDict.json的文件,其中由非常多的键值对组成,其中的Key就是待替换文本,而Value就是替换后的文本。 10 | 11 | ## VitsInputTemplate.json 12 | ```jsonc 13 | //Json需要是数组类型 14 | [ 15 | { 16 | "Tokens": "私は誰?",//必填,进入Bert的文本 17 | "Seq": ["w","a","t","a","s","h","i","w","a","d","a","r","e","?"],//选填,音素组成的序列,如果不填会根据Tokens自动生成 18 | "Tones": [0,0,0,0,0,0,0,0,0,0,0,0,0],//选填,音调序列,必须与音素序列等长 19 | "Durations": [2,5,2,5,2,2,5,2,5,2,5,2,5],//选填,音素时长序列,必须与音素序列等长 20 | "Language": [0,0,0,0,0,0,0,0,0,0,0,0,0],//选填,语言序列,必须与音素序列等长 21 | "SpeakerMix": [1,0,0],//选填,角色混合比例,决定对应下标角色音色的混合比例 22 | "EmotionPrompt": ["sad", "happy"],//选填,情感参数,有情感模型的情况下可用 23 | "NoiseScale": 0.666,//选填,噪声修正因子 24 | "LengthScale": 1.1,//选填,时长修正因子 25 | "DurationPredictorNoiseScale": 0.333,//选填,随机时长预测器噪声修正因子 26 | "FactorDpSdp": 0.6,//选填,时长预测器和随机时长预测器的混合比例 27 | "GateThreshold": 0.777,//选填,Tacotron2 EOS阈值 28 | "MaxDecodeStep": 114514,//选填,Tacotron2 最大解码步数 29 | "Seed": 1919810,//选填,种子 30 | "SpeakerId": 2,//选填,角色ID(若SpeakerMix为空则使用) 31 | "RestTime": 1.0,//选填,决定与上一个片段的时间间隔(单位为秒),若为负数则表示切断音频并输出一个新的 32 | "PlaceHolderSymbol": "|",//选填,当Seq为String时,隔开两个音素的记号 33 | "LanguageID": "JP",//选填,语言(ZH,JP或EN) 34 | "G2PAdditionalInfo": "/[Japanese2]"//选填,Cleaner额外参数 35 | }, 36 | ] 37 | ``` -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt: 顶层 CMake 项目文件,在此处执行全局配置 2 | # 并包含子项目。 3 | # 4 | cmake_minimum_required (VERSION 3.8) 5 | # 如果支持,请为 MSVC 编译器启用热重载。 6 | if (POLICY CMP0141) 7 | cmake_policy(SET CMP0141 NEW) 8 | set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "lt;IF:lt;AND:lt;C_COMPILER_ID:MSVC>,lt;CXX_COMPILER_ID:MSVC>>,lt;lt;CONFIG:Debug,RelWithDebInfo>:EditAndContinue>,lt;lt;CONFIG:Debug,RelWithDebInfo>:ProgramDatabase>>") 9 | endif() 10 | 11 | project ("DragonianSpeech") 12 | 13 | set(FISHSPEECHCPP_BUILD_DEMO ON) 14 | 15 | add_subdirectory ("fish-speech.cpp") 16 | -------------------------------------------------------------------------------- /CMakePresets.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 3, 3 | "configurePresets": [ 4 | { 5 | "name": "windows-base", 6 | "hidden": true, 7 | "generator": "Ninja", 8 | "binaryDir": "${sourceDir}/out/build/${presetName}", 9 | "installDir": "${sourceDir}/out/install/${presetName}", 10 | "cacheVariables": { 11 | "CMAKE_C_COMPILER": "cl.exe", 12 | "CMAKE_CXX_COMPILER": "cl.exe" 13 | }, 14 | "condition": { 15 | "type": "equals", 16 | "lhs": "${hostSystemName}", 17 | "rhs": "Windows" 18 | } 19 | }, 20 | { 21 | "name": "x64-debug", 22 | "displayName": "x64 Debug", 23 | "inherits": "windows-base", 24 | "architecture": { 25 | "value": "x64", 26 | "strategy": "external" 27 | }, 28 | "cacheVariables": { 29 | "CMAKE_BUILD_TYPE": "Debug" 30 | } 31 | }, 32 | { 33 | "name": "x64-release", 34 | "displayName": "x64 Release", 35 | "inherits": "x64-debug", 36 | "cacheVariables": { 37 | "CMAKE_BUILD_TYPE": "Release" 38 | } 39 | }, 40 | { 41 | "name": "x86-debug", 42 | "displayName": "x86 Debug", 43 | "inherits": "windows-base", 44 | "architecture": { 45 | "value": "x86", 46 | "strategy": "external" 47 | }, 48 | "cacheVariables": { 49 | "CMAKE_BUILD_TYPE": "Debug" 50 | } 51 | }, 52 | { 53 | "name": "x86-release", 54 | "displayName": "x86 Release", 55 | "inherits": "x86-debug", 56 | "cacheVariables": { 57 | "CMAKE_BUILD_TYPE": "Release" 58 | } 59 | } 60 | ] 61 | } 62 | -------------------------------------------------------------------------------- /CSharpDemo/CSharpDemo.csproj: -------------------------------------------------------------------------------- 1 | <Project Sdk="Microsoft.NET.Sdk"> 2 | 3 | <PropertyGroup> 4 | <OutputType>Exe</OutputType> 5 | <TargetFramework>net8.0</TargetFramework> 6 | <ImplicitUsings>enable</ImplicitUsings> 7 | <Nullable>enable</Nullable> 8 | <AllowUnsafeBlocks>True</AllowUnsafeBlocks> 9 | <Configurations>Debug;Release</Configurations> 10 | </PropertyGroup> 11 | 12 | <ItemGroup> 13 | <ProjectReference Include="..\DotNetApi\DotNetApi.csproj" /> 14 | </ItemGroup> 15 | 16 | <ItemGroup> 17 | <Compile Update="Properties\Resources.Designer.cs"> 18 | <DesignTime>True</DesignTime> 19 | <AutoGen>True</AutoGen> 20 | <DependentUpon>Resources.resx</DependentUpon> 21 | </Compile> 22 | </ItemGroup> 23 | 24 | <ItemGroup> 25 | <EmbeddedResource Update="Properties\Resources.resx"> 26 | <Generator>ResXFileCodeGenerator</Generator> 27 | <LastGenOutput>Resources.Designer.cs</LastGenOutput> 28 | </EmbeddedResource> 29 | </ItemGroup> 30 | 31 | </Project> 32 | -------------------------------------------------------------------------------- /CSharpDemo/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "CSharpDemo": { 4 | "commandName": "Project", 5 | "nativeDebugging": true 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /DotNetApi/DotNetApi.csproj: -------------------------------------------------------------------------------- 1 | <Project Sdk="Microsoft.NET.Sdk"> 2 | 3 | <PropertyGroup> 4 | <TargetFramework>net8.0</TargetFramework> 5 | <ImplicitUsings>enable</ImplicitUsings> 6 | <Nullable>enable</Nullable> 7 | <AllowUnsafeBlocks>True</AllowUnsafeBlocks> 8 | <Configurations>Debug;Release</Configurations> 9 | <BaseOutputPath></BaseOutputPath> 10 | </PropertyGroup> 11 | 12 | </Project> 13 | -------------------------------------------------------------------------------- /DotNetApi/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "DotNetApi": { 4 | "commandName": "Project", 5 | "nativeDebugging": true 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/.signature.p7s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/OnnxRuntimeDmlProvider/.signature.p7s -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/Microsoft.ML.OnnxRuntime.DirectML.nuspec: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <package xmlns="http://schemas.microsoft.com/packaging/2013/05/nuspec.xsd"> 3 | <metadata> 4 | <id>Microsoft.ML.OnnxRuntime.DirectML</id> 5 | <version>1.15.0</version> 6 | <authors>Microsoft</authors> 7 | <owners>Microsoft</owners> 8 | <requireLicenseAcceptance>false</requireLicenseAcceptance> 9 | <license type="file">LICENSE.txt</license> 10 | <licenseUrl>https://aka.ms/deprecateLicenseUrl</licenseUrl> 11 | <icon>ORT_icon_for_light_bg.png</icon> 12 | <projectUrl>https://github.com/Microsoft/onnxruntime</projectUrl> 13 | <description>This package contains native shared library artifacts for all supported platforms of ONNX Runtime.</description> 14 | <releaseNotes>Release Def: 15 | Branch: refs/heads/rel-1.15.0 16 | Commit: ddaaeeab42432cf9b924b5aa0459d644f615a01f 17 | Build: https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=312266</releaseNotes> 18 | <copyright>© Microsoft Corporation. All rights reserved.</copyright> 19 | <tags>native ONNX ONNXRuntime-Training Learning-on-The-Edge On-Device-Training MachineLearning</tags> 20 | <repository type="git" url="https://github.com/Microsoft/onnxruntime.git" commit="ddaaeeab42432cf9b924b5aa0459d644f615a01f" /> 21 | <dependencies> 22 | <group targetFramework=".NETCoreApp0.0"> 23 | <dependency id="Microsoft.ML.OnnxRuntime.Managed" version="1.15.0" /> 24 | <dependency id="Microsoft.AI.DirectML" version="1.12.0" /> 25 | </group> 26 | <group targetFramework=".NETStandard0.0"> 27 | <dependency id="Microsoft.ML.OnnxRuntime.Managed" version="1.15.0" /> 28 | <dependency id="Microsoft.AI.DirectML" version="1.12.0" /> 29 | </group> 30 | <group targetFramework=".NETFramework0.0"> 31 | <dependency id="Microsoft.ML.OnnxRuntime.Managed" version="1.15.0" /> 32 | <dependency id="Microsoft.AI.DirectML" version="1.12.0" /> 33 | </group> 34 | <group targetFramework="native0.0"> 35 | <dependency id="Microsoft.AI.DirectML" version="1.12.0" /> 36 | </group> 37 | </dependencies> 38 | </metadata> 39 | </package> -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/ORT_icon_for_light_bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/OnnxRuntimeDmlProvider/ORT_icon_for_light_bg.png -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/Privacy.md: -------------------------------------------------------------------------------- 1 | # Privacy 2 | 3 | ## Data Collection 4 | The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described in the repository. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft's privacy statement. Our privacy statement is located at https://go.microsoft.com/fwlink/?LinkID=824704. You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices. 5 | 6 | *** 7 | 8 | ### Private Builds 9 | No data collection is performed when using your private builds built from source code. 10 | 11 | ### Official Builds 12 | ONNX Runtime does not maintain any independent telemetry collection mechanisms outside of what is provided by the platforms it supports. However, where applicable, ONNX Runtime will take advantage of platform-supported telemetry systems to collect trace events with the goal of improving product quality. 13 | 14 | Currently telemetry is only implemented for Windows builds and is turned **ON** by default in the official builds distributed in their respective package management repositories ([see here](../README.md#binaries)). This may be expanded to cover other platforms in the future. Data collection is implemented via 'Platform Telemetry' per vendor platform providers (see [telemetry.h](../onnxruntime/core/platform/telemetry.h)). 15 | 16 | #### Technical Details 17 | The Windows provider uses the [TraceLogging](https://docs.microsoft.com/en-us/windows/win32/tracelogging/trace-logging-about) API for its implementation. This enables ONNX Runtime trace events to be collected by the operating system, and based on user consent, this data may be periodically sent to Microsoft servers following GDPR and privacy regulations for anonymity and data access controls. 18 | 19 | Windows ML and onnxruntime C APIs allow Trace Logging to be turned on/off (see [API pages](../README.md#api-documentation) for details). 20 | For information on how to enable and disable telemetry, see [C API: Telemetry](./C_API.md#telemetry). 21 | There are equivalent APIs in the C#, Python, and Java language bindings as well. 22 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/[Content_Types].xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"> 3 | <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml" /> 4 | <Default Extension="psmdcp" ContentType="application/vnd.openxmlformats-package.core-properties+xml" /> 5 | <Default Extension="h" ContentType="application/octet" /> 6 | <Default Extension="lib" ContentType="application/octet" /> 7 | <Default Extension="dll" ContentType="application/octet" /> 8 | <Default Extension="props" ContentType="application/octet" /> 9 | <Default Extension="targets" ContentType="application/octet" /> 10 | <Default Extension="txt" ContentType="application/octet" /> 11 | <Default Extension="md" ContentType="application/octet" /> 12 | <Default Extension="png" ContentType="application/octet" /> 13 | <Default Extension="nuspec" ContentType="application/octet" /> 14 | </Types> -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/_rels/.rels: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> 3 | <Relationship Type="http://schemas.microsoft.com/packaging/2010/07/manifest" Target="/Microsoft.ML.OnnxRuntime.DirectML.nuspec" Id="R605BB99C1A1EA48B" /> 4 | <Relationship Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="/package/services/metadata/core-properties/c7795757db2346b9bcfb932f99cdb33f.psmdcp" Id="R51F19D04ECB56433" /> 5 | </Relationships> -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/build/native/Microsoft.ML.OnnxRuntime.DirectML.targets: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> 3 | <Target Name="Microsoft_ML_OnnxRuntime_CheckPrerequisites" BeforeTargets="BeforeBuild"> 4 | <!-- 5 | Special case .NET Core portable applications. When building a portable .NET Core app, 6 | the PlatformTarget is empty, and you don't know until runtime (i.e. which dotnet.exe) 7 | what processor architecture will be used. 8 | --> 9 | <Error Condition="('$(PlatformTarget)' != 'x64' AND '$(PlatformTarget)' != 'arm32' AND '$(PlatformTarget)' != 'arm64' AND '$(PlatformTarget)' != 'x86' AND '$(PlatformTarget)' != 'AnyCPU') AND 10 | ('$(OutputType)' == 'Exe' OR '$(OutputType)'=='WinExe') AND 11 | !('$(TargetFrameworkIdentifier)' == '.NETCoreApp' AND '$(PlatformTarget)' == '') AND 12 | ('$(TargetFrameworkIdentifier)' != 'Xamarin.iOS' AND 13 | $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'ios') AND 14 | '$(SuppressOnnxRuntimePlatformCompatibilityError)' != 'true'" 15 | Text="Microsoft.ML.OnnxRuntime only supports the AnyCPU, x64, arm32, arm64 and x86 platforms at this time."/> 16 | </Target> 17 | </Project> 18 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/build/native/include/cpu_provider_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "onnxruntime_c_api.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * \param use_arena zero: false. non-zero: true. 12 | */ 13 | ORT_EXPORT 14 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_CPU, _In_ OrtSessionOptions* options, int use_arena) 15 | ORT_ALL_ARGS_NONNULL; 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/build/native/include/onnxruntime_run_options_config_keys.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | /* 7 | * This file defines RunOptions Config Keys and format of the Config Values. 8 | * 9 | * The Naming Convention for a RunOptions Config Key, 10 | * "[Area][.[SubArea1].[SubArea2]...].[Keyname]" 11 | * Such as "ep.cuda.use_arena" 12 | * The Config Key cannot be empty 13 | * The maximum length of the Config Key is 128 14 | * 15 | * The string format of a RunOptions Config Value is defined individually for each Config. 16 | * The maximum length of the Config Value is 1024 17 | */ 18 | 19 | // Key for enabling shrinkages of user listed device memory arenas. 20 | // Expects a list of semi-colon separated key value pairs separated by colon in the following format: 21 | // "device_0:device_id_0;device_1:device_id_1" 22 | // No white-spaces allowed in the provided list string. 23 | // Currently, the only supported devices are : "cpu", "gpu" (case sensitive). 24 | // If "cpu" is included in the list, DisableCpuMemArena() API must not be called (i.e.) arena for cpu should be enabled. 25 | // Example usage: "cpu:0;gpu:0" (or) "gpu:0" 26 | // By default, the value for this key is empty (i.e.) no memory arenas are shrunk 27 | static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage"; 28 | 29 | // Set to '1' to not synchronize execution providers with CPU at the end of session run. 30 | // Per default it will be set to '0' 31 | // Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream. 32 | static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers"; 33 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/build/native/include/provider_options.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | #include <string> 7 | #include <unordered_map> 8 | #include <vector> 9 | 10 | namespace onnxruntime { 11 | 12 | // data types for execution provider options 13 | 14 | using ProviderOptions = std::unordered_map<std::string, std::string>; 15 | using ProviderOptionsVector = std::vector<ProviderOptions>; 16 | using ProviderOptionsMap = std::unordered_map<std::string, ProviderOptions>; 17 | 18 | } // namespace onnxruntime 19 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/build/netstandard1.1/Microsoft.ML.OnnxRuntime.DirectML.targets: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> 3 | <Target Name="Microsoft_ML_OnnxRuntime_CheckPrerequisites" BeforeTargets="BeforeBuild"> 4 | <!-- 5 | Special case .NET Core portable applications. When building a portable .NET Core app, 6 | the PlatformTarget is empty, and you don't know until runtime (i.e. which dotnet.exe) 7 | what processor architecture will be used. 8 | --> 9 | <Error Condition="('$(PlatformTarget)' != 'x64' AND '$(PlatformTarget)' != 'arm32' AND '$(PlatformTarget)' != 'arm64' AND '$(PlatformTarget)' != 'x86' AND '$(PlatformTarget)' != 'AnyCPU') AND 10 | ('$(OutputType)' == 'Exe' OR '$(OutputType)'=='WinExe') AND 11 | !('$(TargetFrameworkIdentifier)' == '.NETCoreApp' AND '$(PlatformTarget)' == '') AND 12 | ('$(TargetFrameworkIdentifier)' != 'Xamarin.iOS' AND 13 | $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'ios') AND 14 | '$(SuppressOnnxRuntimePlatformCompatibilityError)' != 'true'" 15 | Text="Microsoft.ML.OnnxRuntime only supports the AnyCPU, x64, arm32, arm64 and x86 platforms at this time."/> 16 | </Target> 17 | </Project> 18 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/build/netstandard2.0/Microsoft.ML.OnnxRuntime.DirectML.targets: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> 3 | <Target Name="Microsoft_ML_OnnxRuntime_CheckPrerequisites" BeforeTargets="BeforeBuild"> 4 | <!-- 5 | Special case .NET Core portable applications. When building a portable .NET Core app, 6 | the PlatformTarget is empty, and you don't know until runtime (i.e. which dotnet.exe) 7 | what processor architecture will be used. 8 | --> 9 | <Error Condition="('$(PlatformTarget)' != 'x64' AND '$(PlatformTarget)' != 'arm32' AND '$(PlatformTarget)' != 'arm64' AND '$(PlatformTarget)' != 'x86' AND '$(PlatformTarget)' != 'AnyCPU') AND 10 | ('$(OutputType)' == 'Exe' OR '$(OutputType)'=='WinExe') AND 11 | !('$(TargetFrameworkIdentifier)' == '.NETCoreApp' AND '$(PlatformTarget)' == '') AND 12 | ('$(TargetFrameworkIdentifier)' != 'Xamarin.iOS' AND 13 | $([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)')) != 'ios') AND 14 | '$(SuppressOnnxRuntimePlatformCompatibilityError)' != 'true'" 15 | Text="Microsoft.ML.OnnxRuntime only supports the AnyCPU, x64, arm32, arm64 and x86 platforms at this time."/> 16 | </Target> 17 | </Project> 18 | -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/package/services/metadata/core-properties/c7795757db2346b9bcfb932f99cdb33f.psmdcp: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <coreProperties xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"> 3 | <dc:creator>Microsoft</dc:creator> 4 | <dc:description>This package contains native shared library artifacts for all supported platforms of ONNX Runtime.</dc:description> 5 | <dc:identifier>Microsoft.ML.OnnxRuntime.DirectML</dc:identifier> 6 | <version>1.15.0</version> 7 | <keywords>native ONNX ONNXRuntime-Training Learning-on-The-Edge On-Device-Training MachineLearning</keywords> 8 | <lastModifiedBy>NuGet, Version=5.7.0.7, Culture=neutral, PublicKeyToken=31bf3856ad364e35;Microsoft Windows NT 10.0.20348.0;.NET Framework 4.7.2</lastModifiedBy> 9 | </coreProperties> -------------------------------------------------------------------------------- /Lib/OnnxRuntimeDmlProvider/runtimes/win-x64/native/onnxruntime.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/OnnxRuntimeDmlProvider/runtimes/win-x64/native/onnxruntime.lib -------------------------------------------------------------------------------- /Lib/World/src/world/constantnumbers.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | // 6 | // This header file only defines constant numbers used for several function. 7 | //----------------------------------------------------------------------------- 8 | #ifndef WORLD_CONSTANT_NUMBERS_H_ 9 | #define WORLD_CONSTANT_NUMBERS_H_ 10 | 11 | namespace world { 12 | // for Dio() 13 | const double kCutOff = 50.0; 14 | 15 | // for StoneMask() 16 | const double kFloorF0StoneMask = 40.0; 17 | 18 | const double kPi = 3.1415926535897932384; 19 | const double kMySafeGuardMinimum = 0.000000000001; 20 | const double kEps = 0.00000000000000022204460492503131; 21 | const double kFloorF0 = 71.0; 22 | const double kCeilF0 = 800.0; 23 | const double kDefaultF0 = 500.0; 24 | const double kLog2 = 0.69314718055994529; 25 | // Maximum standard deviation not to be selected as a best f0. 26 | const double kMaximumValue = 100000.0; 27 | 28 | // Note to me (fs: 48000) 29 | // 71 Hz is the limit to maintain the FFT size at 2048. 30 | // If we use 70 Hz as FLOOR_F0, the FFT size of 4096 is required. 31 | 32 | // for D4C() 33 | const int kHanning = 1; 34 | const int kBlackman = 2; 35 | const double kFrequencyInterval = 3000.0; 36 | const double kUpperLimit = 15000.0; 37 | const double kThreshold = 0.85; 38 | const double kFloorF0D4C = 47.0; 39 | 40 | // for Codec (Mel scale) 41 | // S. Stevens & J. Volkmann, 42 | // The Relation of Pitch to Frequency: A Revised Scale, 43 | // American Journal of Psychology, vol. 53, no. 3, pp. 329-353, 1940. 44 | const double kM0 = 1127.01048; 45 | const double kF0 = 700.0; 46 | const double kFloorFrequency = 40.0; 47 | const double kCeilFrequency = 20000.0; 48 | 49 | } // namespace world 50 | 51 | #endif // WORLD_CONSTANT_NUMBERS_H_ 52 | -------------------------------------------------------------------------------- /Lib/World/src/world/d4c.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_D4C_H_ 7 | #define WORLD_D4C_H_ 8 | 9 | #include "world/macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for D4C 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double threshold; 18 | } D4COption; 19 | 20 | //----------------------------------------------------------------------------- 21 | // D4C() calculates the aperiodicity estimated by D4C. 22 | // 23 | // Input: 24 | // x : Input signal 25 | // x_length : Length of x 26 | // fs : Sampling frequency 27 | // temporal_positions : Time axis 28 | // f0 : F0 contour 29 | // f0_length : Length of F0 contour 30 | // fft_size : Number of samples of the aperiodicity in one frame. 31 | // : It is given by the equation fft_size / 2 + 1. 32 | // Output: 33 | // aperiodicity : Aperiodicity estimated by D4C. 34 | //----------------------------------------------------------------------------- 35 | void D4C(const double *x, int x_length, int fs, 36 | const double *temporal_positions, const double *f0, int f0_length, 37 | int fft_size, const D4COption *option, double **aperiodicity); 38 | 39 | //----------------------------------------------------------------------------- 40 | // InitializeD4COption allocates the memory to the struct and sets the 41 | // default parameters. 42 | // 43 | // Output: 44 | // option : Struct for the optional parameter. 45 | //----------------------------------------------------------------------------- 46 | void InitializeD4COption(D4COption *option); 47 | 48 | WORLD_END_C_DECLS 49 | 50 | #endif // WORLD_D4C_H_ 51 | -------------------------------------------------------------------------------- /Lib/World/src/world/dio.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_DIO_H_ 7 | #define WORLD_DIO_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for DIO 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double f0_floor; 18 | double f0_ceil; 19 | double channels_in_octave; 20 | double frame_period; // msec 21 | int speed; // (1, 2, ..., 12) 22 | double allowed_range; // Threshold used for fixing the F0 contour. 23 | } DioOption; 24 | 25 | //----------------------------------------------------------------------------- 26 | // DIO 27 | // 28 | // Input: 29 | // x : Input signal 30 | // x_length : Length of x 31 | // fs : Sampling frequency 32 | // option : Struct to order the parameter for DIO 33 | // 34 | // Output: 35 | // temporal_positions : Temporal positions. 36 | // f0 : F0 contour. 37 | //----------------------------------------------------------------------------- 38 | void Dio(const double *x, int x_length, int fs, const DioOption *option, 39 | double *temporal_positions, double *f0); 40 | 41 | //----------------------------------------------------------------------------- 42 | // InitializeDioOption allocates the memory to the struct and sets the 43 | // default parameters. 44 | // 45 | // Output: 46 | // option : Struct for the optional parameter. 47 | //----------------------------------------------------------------------------- 48 | void InitializeDioOption(DioOption *option); 49 | 50 | //----------------------------------------------------------------------------- 51 | // GetSamplesForDIO() calculates the number of samples required for Dio(). 52 | // 53 | // Input: 54 | // fs : Sampling frequency [Hz] 55 | // x_length : Length of the input signal [Sample]. 56 | // frame_period : Frame shift [msec] 57 | // 58 | // Output: 59 | // The number of samples required to store the results of Dio() 60 | //----------------------------------------------------------------------------- 61 | int GetSamplesForDIO(int fs, int x_length, double frame_period); 62 | 63 | WORLD_END_C_DECLS 64 | 65 | #endif // WORLD_DIO_H_ 66 | -------------------------------------------------------------------------------- /Lib/World/src/world/fft.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | // 6 | // These functions and variables are defined to use FFT as well as FFTW 7 | // Please see fft.cpp to show the detailed information 8 | //----------------------------------------------------------------------------- 9 | #ifndef WORLD_FFT_H_ 10 | #define WORLD_FFT_H_ 11 | 12 | #include "macrodefinitions.h" 13 | 14 | WORLD_BEGIN_C_DECLS 15 | 16 | // Commands for FFT (This is the same as FFTW) 17 | #define FFT_FORWARD 1 18 | #define FFT_BACKWARD 2 19 | #define FFT_ESTIMATE 3 20 | 21 | // Complex number for FFT 22 | typedef double fft_complex[2]; 23 | // Struct used for FFT 24 | typedef struct { 25 | int n; 26 | int sign; 27 | unsigned int flags; 28 | fft_complex *c_in; 29 | double *in; 30 | fft_complex *c_out; 31 | double *out; 32 | double *input; 33 | int *ip; 34 | double *w; 35 | } fft_plan; 36 | 37 | fft_plan fft_plan_dft_1d(int n, fft_complex *in, fft_complex *out, int sign, 38 | unsigned int flags); 39 | fft_plan fft_plan_dft_c2r_1d(int n, fft_complex *in, double *out, 40 | unsigned int flags); 41 | fft_plan fft_plan_dft_r2c_1d(int n, double *in, fft_complex *out, 42 | unsigned int flags); 43 | void fft_execute(fft_plan p); 44 | void fft_destroy_plan(fft_plan p); 45 | 46 | WORLD_END_C_DECLS 47 | 48 | #endif // WORLD_FFT_H_ 49 | -------------------------------------------------------------------------------- /Lib/World/src/world/harvest.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_HARVEST_H_ 7 | #define WORLD_HARVEST_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for Harvest 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double f0_floor; 18 | double f0_ceil; 19 | double frame_period; 20 | } HarvestOption; 21 | 22 | //----------------------------------------------------------------------------- 23 | // Harvest 24 | // 25 | // Input: 26 | // x : Input signal 27 | // x_length : Length of x 28 | // fs : Sampling frequency 29 | // option : Struct to order the parameter for Harvest 30 | // 31 | // Output: 32 | // temporal_positions : Temporal positions. 33 | // f0 : F0 contour. 34 | //----------------------------------------------------------------------------- 35 | void Harvest(const double *x, int x_length, int fs, 36 | const HarvestOption *option, double *temporal_positions, double *f0); 37 | 38 | //----------------------------------------------------------------------------- 39 | // InitializeHarvestOption allocates the memory to the struct and sets the 40 | // default parameters. 41 | // 42 | // Output: 43 | // option : Struct for the optional parameter. 44 | //----------------------------------------------------------------------------- 45 | void InitializeHarvestOption(HarvestOption *option); 46 | 47 | //----------------------------------------------------------------------------- 48 | // GetSamplesForHarvest() calculates the number of samples required for 49 | // Harvest(). 50 | // 51 | // Input: 52 | // fs : Sampling frequency [Hz] 53 | // x_length : Length of the input signal [Sample] 54 | // frame_period : Frame shift [msec] 55 | // 56 | // Output: 57 | // The number of samples required to store the results of Harvest(). 58 | //----------------------------------------------------------------------------- 59 | int GetSamplesForHarvest(int fs, int x_length, double frame_period); 60 | 61 | WORLD_END_C_DECLS 62 | 63 | #endif // WORLD_HARVEST_H_ 64 | -------------------------------------------------------------------------------- /Lib/World/src/world/stonemask.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_STONEMASK_H_ 7 | #define WORLD_STONEMASK_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // StoneMask() refines the estimated F0 by Dio() 15 | // 16 | // Input: 17 | // x : Input signal 18 | // x_length : Length of the input signal 19 | // fs : Sampling frequency 20 | // time_axis : Temporal information 21 | // f0 : f0 contour 22 | // f0_length : Length of f0 23 | // 24 | // Output: 25 | // refined_f0 : Refined F0 26 | //----------------------------------------------------------------------------- 27 | void StoneMask(const double *x, int x_length, int fs, 28 | const double *temporal_positions, const double *f0, int f0_length, 29 | double *refined_f0); 30 | 31 | WORLD_END_C_DECLS 32 | 33 | #endif // WORLD_STONEMASK_H_ 34 | -------------------------------------------------------------------------------- /Lib/World/src/world/synthesis.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_SYNTHESIS_H_ 7 | #define WORLD_SYNTHESIS_H_ 8 | 9 | #include "world/macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Synthesis() synthesize the voice based on f0, spectrogram and 15 | // aperiodicity (not excitation signal). 16 | // 17 | // Input: 18 | // f0 : f0 contour 19 | // f0_length : Length of f0 20 | // spectrogram : Spectrogram estimated by CheapTrick 21 | // fft_size : FFT size 22 | // aperiodicity : Aperiodicity spectrogram based on D4C 23 | // frame_period : Temporal period used for the analysis 24 | // fs : Sampling frequency 25 | // y_length : Length of the output signal (Memory of y has been 26 | // allocated in advance) 27 | // Output: 28 | // y : Calculated speech 29 | //----------------------------------------------------------------------------- 30 | void Synthesis(const double *f0, int f0_length, 31 | const double * const *spectrogram, const double * const *aperiodicity, 32 | int fft_size, double frame_period, int fs, int y_length, double *y); 33 | 34 | WORLD_END_C_DECLS 35 | 36 | #endif // WORLD_SYNTHESIS_H_ 37 | -------------------------------------------------------------------------------- /Lib/World/tools/audioio.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_AUDIOIO_H_ 7 | #define WORLD_AUDIOIO_H_ 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | //----------------------------------------------------------------------------- 14 | // wavwrite() write a .wav file. 15 | // Input: 16 | // x : Input signal 17 | // x_ength : Signal length of x [sample] 18 | // fs : Sampling frequency [Hz] 19 | // nbit : Quantization bit [bit] 20 | // filename : Name of the output signal. 21 | // Caution: 22 | // The variable nbit is not used in this function. 23 | // This function only supports the 16 bit. 24 | //----------------------------------------------------------------------------- 25 | void wavwrite(const double *x, int x_length, int fs, int nbit, 26 | const char *filename); 27 | 28 | //----------------------------------------------------------------------------- 29 | // GetAudioLength() returns the length of .wav file. 30 | // Input: 31 | // filename : Filename of a .wav file. 32 | // Output: 33 | // The number of samples of the file .wav 34 | //----------------------------------------------------------------------------- 35 | int GetAudioLength(const char *filename); 36 | 37 | //----------------------------------------------------------------------------- 38 | // wavread() read a .wav file. 39 | // The memory of output x must be allocated in advance. 40 | // Input: 41 | // filename : Filename of the input file. 42 | // Output: 43 | // fs : Sampling frequency [Hz] 44 | // nbit : Quantization bit [bit] 45 | // x : The output waveform. 46 | //----------------------------------------------------------------------------- 47 | void wavread(const char* filename, int *fs, int *nbit, double *x); 48 | 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | 53 | #endif // WORLD_AUDIOIO_H_ 54 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/Lib/avcodec.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/avcodec.lib -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/Lib/avformat.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/avformat.lib -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/Lib/avutil.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/avutil.lib -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/Lib/swresample.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/swresample.lib -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/Lib/swscale.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/Lib/ffmpeg-4.2.1/Lib/swscale.lib -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavcodec/ac3_parser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * AC-3 parser prototypes 3 | * Copyright (c) 2003 Fabrice Bellard 4 | * Copyright (c) 2003 Michael Niedermayer 5 | * 6 | * This file is part of FFmpeg. 7 | * 8 | * FFmpeg is free software; you can redistribute it and/or 9 | * modify it under the terms of the GNU Lesser General Public 10 | * License as published by the Free Software Foundation; either 11 | * version 2.1 of the License, or (at your option) any later version. 12 | * 13 | * FFmpeg is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 | * Lesser General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU Lesser General Public 19 | * License along with FFmpeg; if not, write to the Free Software 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 | */ 22 | 23 | #ifndef AVCODEC_AC3_PARSER_H 24 | #define AVCODEC_AC3_PARSER_H 25 | 26 | #include <stddef.h> 27 | #include <stdint.h> 28 | 29 | /** 30 | * Extract the bitstream ID and the frame size from AC-3 data. 31 | */ 32 | int av_ac3_parse_header(const uint8_t *buf, size_t size, 33 | uint8_t *bitstream_id, uint16_t *frame_size); 34 | 35 | 36 | #endif /* AVCODEC_AC3_PARSER_H */ 37 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavcodec/adts_parser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVCODEC_ADTS_PARSER_H 20 | #define AVCODEC_ADTS_PARSER_H 21 | 22 | #include <stddef.h> 23 | #include <stdint.h> 24 | 25 | #define AV_AAC_ADTS_HEADER_SIZE 7 26 | 27 | /** 28 | * Extract the number of samples and frames from AAC data. 29 | * @param[in] buf pointer to AAC data buffer 30 | * @param[out] samples Pointer to where number of samples is written 31 | * @param[out] frames Pointer to where number of frames is written 32 | * @return Returns 0 on success, error code on failure. 33 | */ 34 | int av_adts_header_parse(const uint8_t *buf, uint32_t *samples, 35 | uint8_t *frames); 36 | 37 | #endif /* AVCODEC_ADTS_PARSER_H */ 38 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavcodec/jni.h: -------------------------------------------------------------------------------- 1 | /* 2 | * JNI public API functions 3 | * 4 | * Copyright (c) 2015-2016 Matthieu Bouron <matthieu.bouron stupeflix.com> 5 | * 6 | * This file is part of FFmpeg. 7 | * 8 | * FFmpeg is free software; you can redistribute it and/or 9 | * modify it under the terms of the GNU Lesser General Public 10 | * License as published by the Free Software Foundation; either 11 | * version 2.1 of the License, or (at your option) any later version. 12 | * 13 | * FFmpeg is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 | * Lesser General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU Lesser General Public 19 | * License along with FFmpeg; if not, write to the Free Software 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 | */ 22 | 23 | #ifndef AVCODEC_JNI_H 24 | #define AVCODEC_JNI_H 25 | 26 | /* 27 | * Manually set a Java virtual machine which will be used to retrieve the JNI 28 | * environment. Once a Java VM is set it cannot be changed afterwards, meaning 29 | * you can call multiple times av_jni_set_java_vm with the same Java VM pointer 30 | * however it will error out if you try to set a different Java VM. 31 | * 32 | * @param vm Java virtual machine 33 | * @param log_ctx context used for logging, can be NULL 34 | * @return 0 on success, < 0 otherwise 35 | */ 36 | int av_jni_set_java_vm(void *vm, void *log_ctx); 37 | 38 | /* 39 | * Get the Java virtual machine which has been set with av_jni_set_java_vm. 40 | * 41 | * @param vm Java virtual machine 42 | * @return a pointer to the Java virtual machine 43 | */ 44 | void *av_jni_get_java_vm(void *log_ctx); 45 | 46 | #endif /* AVCODEC_JNI_H */ 47 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavcodec/vorbis_parser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | /** 20 | * @file 21 | * A public API for Vorbis parsing 22 | * 23 | * Determines the duration for each packet. 24 | */ 25 | 26 | #ifndef AVCODEC_VORBIS_PARSER_H 27 | #define AVCODEC_VORBIS_PARSER_H 28 | 29 | #include <stdint.h> 30 | 31 | typedef struct AVVorbisParseContext AVVorbisParseContext; 32 | 33 | /** 34 | * Allocate and initialize the Vorbis parser using headers in the extradata. 35 | */ 36 | AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata, 37 | int extradata_size); 38 | 39 | /** 40 | * Free the parser and everything associated with it. 41 | */ 42 | void av_vorbis_parse_free(AVVorbisParseContext **s); 43 | 44 | #define VORBIS_FLAG_HEADER 0x00000001 45 | #define VORBIS_FLAG_COMMENT 0x00000002 46 | #define VORBIS_FLAG_SETUP 0x00000004 47 | 48 | /** 49 | * Get the duration for a Vorbis packet. 50 | * 51 | * If @p flags is @c NULL, 52 | * special frames are considered invalid. 53 | * 54 | * @param s Vorbis parser context 55 | * @param buf buffer containing a Vorbis frame 56 | * @param buf_size size of the buffer 57 | * @param flags flags for special frames 58 | */ 59 | int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf, 60 | int buf_size, int *flags); 61 | 62 | /** 63 | * Get the duration for a Vorbis packet. 64 | * 65 | * @param s Vorbis parser context 66 | * @param buf buffer containing a Vorbis frame 67 | * @param buf_size size of the buffer 68 | */ 69 | int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf, 70 | int buf_size); 71 | 72 | void av_vorbis_parse_reset(AVVorbisParseContext *s); 73 | 74 | #endif /* AVCODEC_VORBIS_PARSER_H */ 75 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavdevice/version.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVDEVICE_VERSION_H 20 | #define AVDEVICE_VERSION_H 21 | 22 | /** 23 | * @file 24 | * @ingroup lavd 25 | * Libavdevice version macros 26 | */ 27 | 28 | #include "libavutil/version.h" 29 | 30 | #define LIBAVDEVICE_VERSION_MAJOR 58 31 | #define LIBAVDEVICE_VERSION_MINOR 8 32 | #define LIBAVDEVICE_VERSION_MICRO 100 33 | 34 | #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \ 35 | LIBAVDEVICE_VERSION_MINOR, \ 36 | LIBAVDEVICE_VERSION_MICRO) 37 | #define LIBAVDEVICE_VERSION AV_VERSION(LIBAVDEVICE_VERSION_MAJOR, \ 38 | LIBAVDEVICE_VERSION_MINOR, \ 39 | LIBAVDEVICE_VERSION_MICRO) 40 | #define LIBAVDEVICE_BUILD LIBAVDEVICE_VERSION_INT 41 | 42 | #define LIBAVDEVICE_IDENT "Lavd" AV_STRINGIFY(LIBAVDEVICE_VERSION) 43 | 44 | /** 45 | * FF_API_* defines may be placed below to indicate public API that will be 46 | * dropped at a future version bump. The defines themselves are not part of 47 | * the public API and may change, break or disappear at any time. 48 | */ 49 | 50 | #endif /* AVDEVICE_VERSION_H */ 51 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/adler32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * copyright (c) 2006 Mans Rullgard 3 | * 4 | * This file is part of FFmpeg. 5 | * 6 | * FFmpeg is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * FFmpeg is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with FFmpeg; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | /** 22 | * @file 23 | * @ingroup lavu_adler32 24 | * Public header for Adler-32 hash function implementation. 25 | */ 26 | 27 | #ifndef AVUTIL_ADLER32_H 28 | #define AVUTIL_ADLER32_H 29 | 30 | #include <stdint.h> 31 | #include "attributes.h" 32 | 33 | /** 34 | * @defgroup lavu_adler32 Adler-32 35 | * @ingroup lavu_hash 36 | * Adler-32 hash function implementation. 37 | * 38 | * @{ 39 | */ 40 | 41 | /** 42 | * Calculate the Adler32 checksum of a buffer. 43 | * 44 | * Passing the return value to a subsequent av_adler32_update() call 45 | * allows the checksum of multiple buffers to be calculated as though 46 | * they were concatenated. 47 | * 48 | * @param adler initial checksum value 49 | * @param buf pointer to input buffer 50 | * @param len size of input buffer 51 | * @return updated checksum 52 | */ 53 | unsigned long av_adler32_update(unsigned long adler, const uint8_t *buf, 54 | unsigned int len) av_pure; 55 | 56 | /** 57 | * @} 58 | */ 59 | 60 | #endif /* AVUTIL_ADLER32_H */ 61 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/aes.h: -------------------------------------------------------------------------------- 1 | /* 2 | * copyright (c) 2007 Michael Niedermayer <michaelni@gmx.at> 3 | * 4 | * This file is part of FFmpeg. 5 | * 6 | * FFmpeg is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * FFmpeg is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with FFmpeg; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef AVUTIL_AES_H 22 | #define AVUTIL_AES_H 23 | 24 | #include <stdint.h> 25 | 26 | #include "attributes.h" 27 | #include "version.h" 28 | 29 | /** 30 | * @defgroup lavu_aes AES 31 | * @ingroup lavu_crypto 32 | * @{ 33 | */ 34 | 35 | extern const int av_aes_size; 36 | 37 | struct AVAES; 38 | 39 | /** 40 | * Allocate an AVAES context. 41 | */ 42 | struct AVAES *av_aes_alloc(void); 43 | 44 | /** 45 | * Initialize an AVAES context. 46 | * @param key_bits 128, 192 or 256 47 | * @param decrypt 0 for encryption, 1 for decryption 48 | */ 49 | int av_aes_init(struct AVAES *a, const uint8_t *key, int key_bits, int decrypt); 50 | 51 | /** 52 | * Encrypt or decrypt a buffer using a previously initialized context. 53 | * @param count number of 16 byte blocks 54 | * @param dst destination array, can be equal to src 55 | * @param src source array, can be equal to dst 56 | * @param iv initialization vector for CBC mode, if NULL then ECB will be used 57 | * @param decrypt 0 for encryption, 1 for decryption 58 | */ 59 | void av_aes_crypt(struct AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt); 60 | 61 | /** 62 | * @} 63 | */ 64 | 65 | #endif /* AVUTIL_AES_H */ 66 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/avconfig.h: -------------------------------------------------------------------------------- 1 | /* Generated by ffmpeg configure */ 2 | #ifndef AVUTIL_AVCONFIG_H 3 | #define AVUTIL_AVCONFIG_H 4 | #define AV_HAVE_BIGENDIAN 0 5 | #define AV_HAVE_FAST_UNALIGNED 0 6 | #endif /* AVUTIL_AVCONFIG_H */ 7 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/base64.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2006 Ryan Martell. (rdm4@martellventures.com) 3 | * 4 | * This file is part of FFmpeg. 5 | * 6 | * FFmpeg is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * FFmpeg is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with FFmpeg; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef AVUTIL_BASE64_H 22 | #define AVUTIL_BASE64_H 23 | 24 | #include <stdint.h> 25 | 26 | /** 27 | * @defgroup lavu_base64 Base64 28 | * @ingroup lavu_crypto 29 | * @{ 30 | */ 31 | 32 | /** 33 | * Decode a base64-encoded string. 34 | * 35 | * @param out buffer for decoded data 36 | * @param in null-terminated input string 37 | * @param out_size size in bytes of the out buffer, must be at 38 | * least 3/4 of the length of in, that is AV_BASE64_DECODE_SIZE(strlen(in)) 39 | * @return number of bytes written, or a negative value in case of 40 | * invalid input 41 | */ 42 | int av_base64_decode(uint8_t *out, const char *in, int out_size); 43 | 44 | /** 45 | * Calculate the output size in bytes needed to decode a base64 string 46 | * with length x to a data buffer. 47 | */ 48 | #define AV_BASE64_DECODE_SIZE(x) ((x) * 3LL / 4) 49 | 50 | /** 51 | * Encode data to base64 and null-terminate. 52 | * 53 | * @param out buffer for encoded data 54 | * @param out_size size in bytes of the out buffer (including the 55 | * null terminator), must be at least AV_BASE64_SIZE(in_size) 56 | * @param in input buffer containing the data to encode 57 | * @param in_size size in bytes of the in buffer 58 | * @return out or NULL in case of error 59 | */ 60 | char *av_base64_encode(char *out, int out_size, const uint8_t *in, int in_size); 61 | 62 | /** 63 | * Calculate the output size needed to base64-encode x bytes to a 64 | * null-terminated string. 65 | */ 66 | #define AV_BASE64_SIZE(x) (((x)+2) / 3 * 4 + 1) 67 | 68 | /** 69 | * @} 70 | */ 71 | 72 | #endif /* AVUTIL_BASE64_H */ 73 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/camellia.h: -------------------------------------------------------------------------------- 1 | /* 2 | * An implementation of the CAMELLIA algorithm as mentioned in RFC3713 3 | * Copyright (c) 2014 Supraja Meedinti 4 | * 5 | * This file is part of FFmpeg. 6 | * 7 | * FFmpeg is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * FFmpeg is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with FFmpeg; if not, write to the Free Software 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 | */ 21 | 22 | #ifndef AVUTIL_CAMELLIA_H 23 | #define AVUTIL_CAMELLIA_H 24 | 25 | #include <stdint.h> 26 | 27 | 28 | /** 29 | * @file 30 | * @brief Public header for libavutil CAMELLIA algorithm 31 | * @defgroup lavu_camellia CAMELLIA 32 | * @ingroup lavu_crypto 33 | * @{ 34 | */ 35 | 36 | extern const int av_camellia_size; 37 | 38 | struct AVCAMELLIA; 39 | 40 | /** 41 | * Allocate an AVCAMELLIA context 42 | * To free the struct: av_free(ptr) 43 | */ 44 | struct AVCAMELLIA *av_camellia_alloc(void); 45 | 46 | /** 47 | * Initialize an AVCAMELLIA context. 48 | * 49 | * @param ctx an AVCAMELLIA context 50 | * @param key a key of 16, 24, 32 bytes used for encryption/decryption 51 | * @param key_bits number of keybits: possible are 128, 192, 256 52 | */ 53 | int av_camellia_init(struct AVCAMELLIA *ctx, const uint8_t *key, int key_bits); 54 | 55 | /** 56 | * Encrypt or decrypt a buffer using a previously initialized context 57 | * 58 | * @param ctx an AVCAMELLIA context 59 | * @param dst destination array, can be equal to src 60 | * @param src source array, can be equal to dst 61 | * @param count number of 16 byte blocks 62 | * @paran iv initialization vector for CBC mode, NULL for ECB mode 63 | * @param decrypt 0 for encryption, 1 for decryption 64 | */ 65 | void av_camellia_crypt(struct AVCAMELLIA *ctx, uint8_t *dst, const uint8_t *src, int count, uint8_t* iv, int decrypt); 66 | 67 | /** 68 | * @} 69 | */ 70 | #endif /* AVUTIL_CAMELLIA_H */ 71 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/ffversion.h: -------------------------------------------------------------------------------- 1 | /* Automatically generated by version.sh, do not manually edit! */ 2 | #ifndef AVUTIL_FFVERSION_H 3 | #define AVUTIL_FFVERSION_H 4 | #define FFMPEG_VERSION "4.2.1" 5 | #endif /* AVUTIL_FFVERSION_H */ 6 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | 20 | #ifndef AVUTIL_HWCONTEXT_CUDA_H 21 | #define AVUTIL_HWCONTEXT_CUDA_H 22 | 23 | #ifndef CUDA_VERSION 24 | #include <cuda.h> 25 | #endif 26 | 27 | #include "pixfmt.h" 28 | 29 | /** 30 | * @file 31 | * An API-specific header for AV_HWDEVICE_TYPE_CUDA. 32 | * 33 | * This API supports dynamic frame pools. AVHWFramesContext.pool must return 34 | * AVBufferRefs whose data pointer is a CUdeviceptr. 35 | */ 36 | 37 | typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal; 38 | 39 | /** 40 | * This struct is allocated as AVHWDeviceContext.hwctx 41 | */ 42 | typedef struct AVCUDADeviceContext { 43 | CUcontext cuda_ctx; 44 | CUstream stream; 45 | AVCUDADeviceContextInternal *internal; 46 | } AVCUDADeviceContext; 47 | 48 | /** 49 | * AVHWFramesContext.hwctx is currently not used 50 | */ 51 | 52 | #endif /* AVUTIL_HWCONTEXT_CUDA_H */ 53 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_mediacodec.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVUTIL_HWCONTEXT_MEDIACODEC_H 20 | #define AVUTIL_HWCONTEXT_MEDIACODEC_H 21 | 22 | /** 23 | * MediaCodec details. 24 | * 25 | * Allocated as AVHWDeviceContext.hwctx 26 | */ 27 | typedef struct AVMediaCodecDeviceContext { 28 | /** 29 | * android/view/Surface handle, to be filled by the user. 30 | * 31 | * This is the default surface used by decoders on this device. 32 | */ 33 | void *surface; 34 | } AVMediaCodecDeviceContext; 35 | 36 | #endif /* AVUTIL_HWCONTEXT_MEDIACODEC_H */ 37 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_qsv.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVUTIL_HWCONTEXT_QSV_H 20 | #define AVUTIL_HWCONTEXT_QSV_H 21 | 22 | #include <mfx/mfxvideo.h> 23 | 24 | /** 25 | * @file 26 | * An API-specific header for AV_HWDEVICE_TYPE_QSV. 27 | * 28 | * This API does not support dynamic frame pools. AVHWFramesContext.pool must 29 | * contain AVBufferRefs whose data pointer points to an mfxFrameSurface1 struct. 30 | */ 31 | 32 | /** 33 | * This struct is allocated as AVHWDeviceContext.hwctx 34 | */ 35 | typedef struct AVQSVDeviceContext { 36 | mfxSession session; 37 | } AVQSVDeviceContext; 38 | 39 | /** 40 | * This struct is allocated as AVHWFramesContext.hwctx 41 | */ 42 | typedef struct AVQSVFramesContext { 43 | mfxFrameSurface1 *surfaces; 44 | int nb_surfaces; 45 | 46 | /** 47 | * A combination of MFX_MEMTYPE_* describing the frame pool. 48 | */ 49 | int frame_type; 50 | } AVQSVFramesContext; 51 | 52 | #endif /* AVUTIL_HWCONTEXT_QSV_H */ 53 | 54 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_vdpau.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVUTIL_HWCONTEXT_VDPAU_H 20 | #define AVUTIL_HWCONTEXT_VDPAU_H 21 | 22 | #include <vdpau/vdpau.h> 23 | 24 | /** 25 | * @file 26 | * An API-specific header for AV_HWDEVICE_TYPE_VDPAU. 27 | * 28 | * This API supports dynamic frame pools. AVHWFramesContext.pool must return 29 | * AVBufferRefs whose data pointer is a VdpVideoSurface. 30 | */ 31 | 32 | /** 33 | * This struct is allocated as AVHWDeviceContext.hwctx 34 | */ 35 | typedef struct AVVDPAUDeviceContext { 36 | VdpDevice device; 37 | VdpGetProcAddress *get_proc_address; 38 | } AVVDPAUDeviceContext; 39 | 40 | /** 41 | * AVHWFramesContext.hwctx is currently not used 42 | */ 43 | 44 | #endif /* AVUTIL_HWCONTEXT_VDPAU_H */ 45 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/hwcontext_videotoolbox.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVUTIL_HWCONTEXT_VIDEOTOOLBOX_H 20 | #define AVUTIL_HWCONTEXT_VIDEOTOOLBOX_H 21 | 22 | #include <stdint.h> 23 | 24 | #include <VideoToolbox/VideoToolbox.h> 25 | 26 | #include "pixfmt.h" 27 | 28 | /** 29 | * @file 30 | * An API-specific header for AV_HWDEVICE_TYPE_VIDEOTOOLBOX. 31 | * 32 | * This API currently does not support frame allocation, as the raw VideoToolbox 33 | * API does allocation, and FFmpeg itself never has the need to allocate frames. 34 | * 35 | * If the API user sets a custom pool, AVHWFramesContext.pool must return 36 | * AVBufferRefs whose data pointer is a CVImageBufferRef or CVPixelBufferRef. 37 | * 38 | * Currently AVHWDeviceContext.hwctx and AVHWFramesContext.hwctx are always 39 | * NULL. 40 | */ 41 | 42 | /** 43 | * Convert a VideoToolbox (actually CoreVideo) format to AVPixelFormat. 44 | * Returns AV_PIX_FMT_NONE if no known equivalent was found. 45 | */ 46 | enum AVPixelFormat av_map_videotoolbox_format_to_pixfmt(uint32_t cv_fmt); 47 | 48 | /** 49 | * Convert an AVPixelFormat to a VideoToolbox (actually CoreVideo) format. 50 | * Returns 0 if no known equivalent was found. 51 | */ 52 | uint32_t av_map_videotoolbox_format_from_pixfmt(enum AVPixelFormat pix_fmt); 53 | 54 | #endif /* AVUTIL_HWCONTEXT_VIDEOTOOLBOX_H */ 55 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/intfloat.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 Mans Rullgard 3 | * 4 | * This file is part of FFmpeg. 5 | * 6 | * FFmpeg is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * FFmpeg is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with FFmpeg; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef AVUTIL_INTFLOAT_H 22 | #define AVUTIL_INTFLOAT_H 23 | 24 | #include <stdint.h> 25 | #include "attributes.h" 26 | 27 | union av_intfloat32 { 28 | uint32_t i; 29 | float f; 30 | }; 31 | 32 | union av_intfloat64 { 33 | uint64_t i; 34 | double f; 35 | }; 36 | 37 | /** 38 | * Reinterpret a 32-bit integer as a float. 39 | */ 40 | static av_always_inline float av_int2float(uint32_t i) 41 | { 42 | union av_intfloat32 v; 43 | v.i = i; 44 | return v.f; 45 | } 46 | 47 | /** 48 | * Reinterpret a float as a 32-bit integer. 49 | */ 50 | static av_always_inline uint32_t av_float2int(float f) 51 | { 52 | union av_intfloat32 v; 53 | v.f = f; 54 | return v.i; 55 | } 56 | 57 | /** 58 | * Reinterpret a 64-bit integer as a double. 59 | */ 60 | static av_always_inline double av_int2double(uint64_t i) 61 | { 62 | union av_intfloat64 v; 63 | v.i = i; 64 | return v.f; 65 | } 66 | 67 | /** 68 | * Reinterpret a double as a 64-bit integer. 69 | */ 70 | static av_always_inline uint64_t av_double2int(double f) 71 | { 72 | union av_intfloat64 v; 73 | v.f = f; 74 | return v.i; 75 | } 76 | 77 | #endif /* AVUTIL_INTFLOAT_H */ 78 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/lfg.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Lagged Fibonacci PRNG 3 | * Copyright (c) 2008 Michael Niedermayer 4 | * 5 | * This file is part of FFmpeg. 6 | * 7 | * FFmpeg is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * FFmpeg is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with FFmpeg; if not, write to the Free Software 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 | */ 21 | 22 | #ifndef AVUTIL_LFG_H 23 | #define AVUTIL_LFG_H 24 | 25 | #include <stdint.h> 26 | 27 | typedef struct AVLFG { 28 | unsigned int state[64]; 29 | int index; 30 | } AVLFG; 31 | 32 | void av_lfg_init(AVLFG *c, unsigned int seed); 33 | 34 | /** 35 | * Seed the state of the ALFG using binary data. 36 | * 37 | * Return value: 0 on success, negative value (AVERROR) on failure. 38 | */ 39 | int av_lfg_init_from_data(AVLFG *c, const uint8_t *data, unsigned int length); 40 | 41 | /** 42 | * Get the next random unsigned 32-bit number using an ALFG. 43 | * 44 | * Please also consider a simple LCG like state= state*1664525+1013904223, 45 | * it may be good enough and faster for your specific use case. 46 | */ 47 | static inline unsigned int av_lfg_get(AVLFG *c){ 48 | c->state[c->index & 63] = c->state[(c->index-24) & 63] + c->state[(c->index-55) & 63]; 49 | return c->state[c->index++ & 63]; 50 | } 51 | 52 | /** 53 | * Get the next random unsigned 32-bit number using a MLFG. 54 | * 55 | * Please also consider av_lfg_get() above, it is faster. 56 | */ 57 | static inline unsigned int av_mlfg_get(AVLFG *c){ 58 | unsigned int a= c->state[(c->index-55) & 63]; 59 | unsigned int b= c->state[(c->index-24) & 63]; 60 | return c->state[c->index++ & 63] = 2*a*b+a+b; 61 | } 62 | 63 | /** 64 | * Get the next two numbers generated by a Box-Muller Gaussian 65 | * generator using the random numbers issued by lfg. 66 | * 67 | * @param out array where the two generated numbers are placed 68 | */ 69 | void av_bmg_get(AVLFG *lfg, double out[2]); 70 | 71 | #endif /* AVUTIL_LFG_H */ 72 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/lzo.h: -------------------------------------------------------------------------------- 1 | /* 2 | * LZO 1x decompression 3 | * copyright (c) 2006 Reimar Doeffinger 4 | * 5 | * This file is part of FFmpeg. 6 | * 7 | * FFmpeg is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * FFmpeg is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with FFmpeg; if not, write to the Free Software 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 | */ 21 | 22 | #ifndef AVUTIL_LZO_H 23 | #define AVUTIL_LZO_H 24 | 25 | /** 26 | * @defgroup lavu_lzo LZO 27 | * @ingroup lavu_crypto 28 | * 29 | * @{ 30 | */ 31 | 32 | #include <stdint.h> 33 | 34 | /** @name Error flags returned by av_lzo1x_decode 35 | * @{ */ 36 | /// end of the input buffer reached before decoding finished 37 | #define AV_LZO_INPUT_DEPLETED 1 38 | /// decoded data did not fit into output buffer 39 | #define AV_LZO_OUTPUT_FULL 2 40 | /// a reference to previously decoded data was wrong 41 | #define AV_LZO_INVALID_BACKPTR 4 42 | /// a non-specific error in the compressed bitstream 43 | #define AV_LZO_ERROR 8 44 | /** @} */ 45 | 46 | #define AV_LZO_INPUT_PADDING 8 47 | #define AV_LZO_OUTPUT_PADDING 12 48 | 49 | /** 50 | * @brief Decodes LZO 1x compressed data. 51 | * @param out output buffer 52 | * @param outlen size of output buffer, number of bytes left are returned here 53 | * @param in input buffer 54 | * @param inlen size of input buffer, number of bytes left are returned here 55 | * @return 0 on success, otherwise a combination of the error flags above 56 | * 57 | * Make sure all buffers are appropriately padded, in must provide 58 | * AV_LZO_INPUT_PADDING, out must provide AV_LZO_OUTPUT_PADDING additional bytes. 59 | */ 60 | int av_lzo1x_decode(void *out, int *outlen, const void *in, int *inlen); 61 | 62 | /** 63 | * @} 64 | */ 65 | 66 | #endif /* AVUTIL_LZO_H */ 67 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/macros.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | /** 20 | * @file 21 | * @ingroup lavu 22 | * Utility Preprocessor macros 23 | */ 24 | 25 | #ifndef AVUTIL_MACROS_H 26 | #define AVUTIL_MACROS_H 27 | 28 | /** 29 | * @addtogroup preproc_misc Preprocessor String Macros 30 | * 31 | * String manipulation macros 32 | * 33 | * @{ 34 | */ 35 | 36 | #define AV_STRINGIFY(s) AV_TOSTRING(s) 37 | #define AV_TOSTRING(s) #s 38 | 39 | #define AV_GLUE(a, b) a ## b 40 | #define AV_JOIN(a, b) AV_GLUE(a, b) 41 | 42 | /** 43 | * @} 44 | */ 45 | 46 | #define AV_PRAGMA(s) _Pragma(#s) 47 | 48 | #define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1)) 49 | 50 | #endif /* AVUTIL_MACROS_H */ 51 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/motion_vector.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVUTIL_MOTION_VECTOR_H 20 | #define AVUTIL_MOTION_VECTOR_H 21 | 22 | #include <stdint.h> 23 | 24 | typedef struct AVMotionVector { 25 | /** 26 | * Where the current macroblock comes from; negative value when it comes 27 | * from the past, positive value when it comes from the future. 28 | * XXX: set exact relative ref frame reference instead of a +/- 1 "direction". 29 | */ 30 | int32_t source; 31 | /** 32 | * Width and height of the block. 33 | */ 34 | uint8_t w, h; 35 | /** 36 | * Absolute source position. Can be outside the frame area. 37 | */ 38 | int16_t src_x, src_y; 39 | /** 40 | * Absolute destination position. Can be outside the frame area. 41 | */ 42 | int16_t dst_x, dst_y; 43 | /** 44 | * Extra flag information. 45 | * Currently unused. 46 | */ 47 | uint64_t flags; 48 | /** 49 | * Motion vector 50 | * src_x = dst_x + motion_x / motion_scale 51 | * src_y = dst_y + motion_y / motion_scale 52 | */ 53 | int32_t motion_x, motion_y; 54 | uint16_t motion_scale; 55 | } AVMotionVector; 56 | 57 | #endif /* AVUTIL_MOTION_VECTOR_H */ 58 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/pixelutils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVUTIL_PIXELUTILS_H 20 | #define AVUTIL_PIXELUTILS_H 21 | 22 | #include <stddef.h> 23 | #include <stdint.h> 24 | #include "common.h" 25 | 26 | /** 27 | * Sum of abs(src1[x] - src2[x]) 28 | */ 29 | typedef int (*av_pixelutils_sad_fn)(const uint8_t *src1, ptrdiff_t stride1, 30 | const uint8_t *src2, ptrdiff_t stride2); 31 | 32 | /** 33 | * Get a potentially optimized pointer to a Sum-of-absolute-differences 34 | * function (see the av_pixelutils_sad_fn prototype). 35 | * 36 | * @param w_bits 1<<w_bits is the requested width of the block size 37 | * @param h_bits 1<<h_bits is the requested height of the block size 38 | * @param aligned If set to 2, the returned sad function will assume src1 and 39 | * src2 addresses are aligned on the block size. 40 | * If set to 1, the returned sad function will assume src1 is 41 | * aligned on the block size. 42 | * If set to 0, the returned sad function assume no particular 43 | * alignment. 44 | * @param log_ctx context used for logging, can be NULL 45 | * 46 | * @return a pointer to the SAD function or NULL in case of error (because of 47 | * invalid parameters) 48 | */ 49 | av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, 50 | int aligned, void *log_ctx); 51 | 52 | #endif /* AVUTIL_PIXELUTILS_H */ 53 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/random_seed.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 Baptiste Coudurier <baptiste.coudurier@gmail.com> 3 | * 4 | * This file is part of FFmpeg. 5 | * 6 | * FFmpeg is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * FFmpeg is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with FFmpeg; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef AVUTIL_RANDOM_SEED_H 22 | #define AVUTIL_RANDOM_SEED_H 23 | 24 | #include <stdint.h> 25 | /** 26 | * @addtogroup lavu_crypto 27 | * @{ 28 | */ 29 | 30 | /** 31 | * Get a seed to use in conjunction with random functions. 32 | * This function tries to provide a good seed at a best effort bases. 33 | * Its possible to call this function multiple times if more bits are needed. 34 | * It can be quite slow, which is why it should only be used as seed for a faster 35 | * PRNG. The quality of the seed depends on the platform. 36 | */ 37 | uint32_t av_get_random_seed(void); 38 | 39 | /** 40 | * @} 41 | */ 42 | 43 | #endif /* AVUTIL_RANDOM_SEED_H */ 44 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/rc4.h: -------------------------------------------------------------------------------- 1 | /* 2 | * RC4 encryption/decryption/pseudo-random number generator 3 | * 4 | * This file is part of FFmpeg. 5 | * 6 | * FFmpeg is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * FFmpeg is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with FFmpeg; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef AVUTIL_RC4_H 22 | #define AVUTIL_RC4_H 23 | 24 | #include <stdint.h> 25 | 26 | /** 27 | * @defgroup lavu_rc4 RC4 28 | * @ingroup lavu_crypto 29 | * @{ 30 | */ 31 | 32 | typedef struct AVRC4 { 33 | uint8_t state[256]; 34 | int x, y; 35 | } AVRC4; 36 | 37 | /** 38 | * Allocate an AVRC4 context. 39 | */ 40 | AVRC4 *av_rc4_alloc(void); 41 | 42 | /** 43 | * @brief Initializes an AVRC4 context. 44 | * 45 | * @param key_bits must be a multiple of 8 46 | * @param decrypt 0 for encryption, 1 for decryption, currently has no effect 47 | * @return zero on success, negative value otherwise 48 | */ 49 | int av_rc4_init(struct AVRC4 *d, const uint8_t *key, int key_bits, int decrypt); 50 | 51 | /** 52 | * @brief Encrypts / decrypts using the RC4 algorithm. 53 | * 54 | * @param count number of bytes 55 | * @param dst destination array, can be equal to src 56 | * @param src source array, can be equal to dst, may be NULL 57 | * @param iv not (yet) used for RC4, should be NULL 58 | * @param decrypt 0 for encryption, 1 for decryption, not (yet) used 59 | */ 60 | void av_rc4_crypt(struct AVRC4 *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt); 61 | 62 | /** 63 | * @} 64 | */ 65 | 66 | #endif /* AVUTIL_RC4_H */ 67 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/replaygain.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef AVUTIL_REPLAYGAIN_H 20 | #define AVUTIL_REPLAYGAIN_H 21 | 22 | #include <stdint.h> 23 | 24 | /** 25 | * ReplayGain information (see 26 | * http://wiki.hydrogenaudio.org/index.php?title=ReplayGain_1.0_specification). 27 | * The size of this struct is a part of the public ABI. 28 | */ 29 | typedef struct AVReplayGain { 30 | /** 31 | * Track replay gain in microbels (divide by 100000 to get the value in dB). 32 | * Should be set to INT32_MIN when unknown. 33 | */ 34 | int32_t track_gain; 35 | /** 36 | * Peak track amplitude, with 100000 representing full scale (but values 37 | * may overflow). 0 when unknown. 38 | */ 39 | uint32_t track_peak; 40 | /** 41 | * Same as track_gain, but for the whole album. 42 | */ 43 | int32_t album_gain; 44 | /** 45 | * Same as track_peak, but for the whole album, 46 | */ 47 | uint32_t album_peak; 48 | } AVReplayGain; 49 | 50 | #endif /* AVUTIL_REPLAYGAIN_H */ 51 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/tea.h: -------------------------------------------------------------------------------- 1 | /* 2 | * A 32-bit implementation of the TEA algorithm 3 | * Copyright (c) 2015 Vesselin Bontchev 4 | * 5 | * This file is part of FFmpeg. 6 | * 7 | * FFmpeg is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * FFmpeg is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with FFmpeg; if not, write to the Free Software 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 | */ 21 | 22 | #ifndef AVUTIL_TEA_H 23 | #define AVUTIL_TEA_H 24 | 25 | #include <stdint.h> 26 | 27 | /** 28 | * @file 29 | * @brief Public header for libavutil TEA algorithm 30 | * @defgroup lavu_tea TEA 31 | * @ingroup lavu_crypto 32 | * @{ 33 | */ 34 | 35 | extern const int av_tea_size; 36 | 37 | struct AVTEA; 38 | 39 | /** 40 | * Allocate an AVTEA context 41 | * To free the struct: av_free(ptr) 42 | */ 43 | struct AVTEA *av_tea_alloc(void); 44 | 45 | /** 46 | * Initialize an AVTEA context. 47 | * 48 | * @param ctx an AVTEA context 49 | * @param key a key of 16 bytes used for encryption/decryption 50 | * @param rounds the number of rounds in TEA (64 is the "standard") 51 | */ 52 | void av_tea_init(struct AVTEA *ctx, const uint8_t key[16], int rounds); 53 | 54 | /** 55 | * Encrypt or decrypt a buffer using a previously initialized context. 56 | * 57 | * @param ctx an AVTEA context 58 | * @param dst destination array, can be equal to src 59 | * @param src source array, can be equal to dst 60 | * @param count number of 8 byte blocks 61 | * @param iv initialization vector for CBC mode, if NULL then ECB will be used 62 | * @param decrypt 0 for encryption, 1 for decryption 63 | */ 64 | void av_tea_crypt(struct AVTEA *ctx, uint8_t *dst, const uint8_t *src, 65 | int count, uint8_t *iv, int decrypt); 66 | 67 | /** 68 | * @} 69 | */ 70 | 71 | #endif /* AVUTIL_TEA_H */ 72 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/time.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2000-2003 Fabrice Bellard 3 | * 4 | * This file is part of FFmpeg. 5 | * 6 | * FFmpeg is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * FFmpeg is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with FFmpeg; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef AVUTIL_TIME_H 22 | #define AVUTIL_TIME_H 23 | 24 | #include <stdint.h> 25 | 26 | /** 27 | * Get the current time in microseconds. 28 | */ 29 | int64_t av_gettime(void); 30 | 31 | /** 32 | * Get the current time in microseconds since some unspecified starting point. 33 | * On platforms that support it, the time comes from a monotonic clock 34 | * This property makes this time source ideal for measuring relative time. 35 | * The returned values may not be monotonic on platforms where a monotonic 36 | * clock is not available. 37 | */ 38 | int64_t av_gettime_relative(void); 39 | 40 | /** 41 | * Indicates with a boolean result if the av_gettime_relative() time source 42 | * is monotonic. 43 | */ 44 | int av_gettime_relative_is_monotonic(void); 45 | 46 | /** 47 | * Sleep for a period of time. Although the duration is expressed in 48 | * microseconds, the actual delay may be rounded to the precision of the 49 | * system timer. 50 | * 51 | * @param usec Number of microseconds to sleep. 52 | * @return zero on success or (negative) error code. 53 | */ 54 | int av_usleep(unsigned usec); 55 | 56 | #endif /* AVUTIL_TIME_H */ 57 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libavutil/twofish.h: -------------------------------------------------------------------------------- 1 | /* 2 | * An implementation of the TwoFish algorithm 3 | * Copyright (c) 2015 Supraja Meedinti 4 | * 5 | * This file is part of FFmpeg. 6 | * 7 | * FFmpeg is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * FFmpeg is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with FFmpeg; if not, write to the Free Software 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 | */ 21 | 22 | #ifndef AVUTIL_TWOFISH_H 23 | #define AVUTIL_TWOFISH_H 24 | 25 | #include <stdint.h> 26 | 27 | 28 | /** 29 | * @file 30 | * @brief Public header for libavutil TWOFISH algorithm 31 | * @defgroup lavu_twofish TWOFISH 32 | * @ingroup lavu_crypto 33 | * @{ 34 | */ 35 | 36 | extern const int av_twofish_size; 37 | 38 | struct AVTWOFISH; 39 | 40 | /** 41 | * Allocate an AVTWOFISH context 42 | * To free the struct: av_free(ptr) 43 | */ 44 | struct AVTWOFISH *av_twofish_alloc(void); 45 | 46 | /** 47 | * Initialize an AVTWOFISH context. 48 | * 49 | * @param ctx an AVTWOFISH context 50 | * @param key a key of size ranging from 1 to 32 bytes used for encryption/decryption 51 | * @param key_bits number of keybits: 128, 192, 256 If less than the required, padded with zeroes to nearest valid value; return value is 0 if key_bits is 128/192/256, -1 if less than 0, 1 otherwise 52 | */ 53 | int av_twofish_init(struct AVTWOFISH *ctx, const uint8_t *key, int key_bits); 54 | 55 | /** 56 | * Encrypt or decrypt a buffer using a previously initialized context 57 | * 58 | * @param ctx an AVTWOFISH context 59 | * @param dst destination array, can be equal to src 60 | * @param src source array, can be equal to dst 61 | * @param count number of 16 byte blocks 62 | * @paran iv initialization vector for CBC mode, NULL for ECB mode 63 | * @param decrypt 0 for encryption, 1 for decryption 64 | */ 65 | void av_twofish_crypt(struct AVTWOFISH *ctx, uint8_t *dst, const uint8_t *src, int count, uint8_t* iv, int decrypt); 66 | 67 | /** 68 | * @} 69 | */ 70 | #endif /* AVUTIL_TWOFISH_H */ 71 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libswresample/version.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Version macros. 3 | * 4 | * This file is part of libswresample 5 | * 6 | * libswresample is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU Lesser General Public 8 | * License as published by the Free Software Foundation; either 9 | * version 2.1 of the License, or (at your option) any later version. 10 | * 11 | * libswresample is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | * Lesser General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public 17 | * License along with libswresample; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #ifndef SWRESAMPLE_VERSION_H 22 | #define SWRESAMPLE_VERSION_H 23 | 24 | /** 25 | * @file 26 | * Libswresample version macros 27 | */ 28 | 29 | #include "libavutil/avutil.h" 30 | 31 | #define LIBSWRESAMPLE_VERSION_MAJOR 3 32 | #define LIBSWRESAMPLE_VERSION_MINOR 5 33 | #define LIBSWRESAMPLE_VERSION_MICRO 100 34 | 35 | #define LIBSWRESAMPLE_VERSION_INT AV_VERSION_INT(LIBSWRESAMPLE_VERSION_MAJOR, \ 36 | LIBSWRESAMPLE_VERSION_MINOR, \ 37 | LIBSWRESAMPLE_VERSION_MICRO) 38 | #define LIBSWRESAMPLE_VERSION AV_VERSION(LIBSWRESAMPLE_VERSION_MAJOR, \ 39 | LIBSWRESAMPLE_VERSION_MINOR, \ 40 | LIBSWRESAMPLE_VERSION_MICRO) 41 | #define LIBSWRESAMPLE_BUILD LIBSWRESAMPLE_VERSION_INT 42 | 43 | #define LIBSWRESAMPLE_IDENT "SwR" AV_STRINGIFY(LIBSWRESAMPLE_VERSION) 44 | 45 | #endif /* SWRESAMPLE_VERSION_H */ 46 | -------------------------------------------------------------------------------- /Lib/ffmpeg-4.2.1/include/libswscale/version.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of FFmpeg. 3 | * 4 | * FFmpeg is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License as published by the Free Software Foundation; either 7 | * version 2.1 of the License, or (at your option) any later version. 8 | * 9 | * FFmpeg is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | * Lesser General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public 15 | * License along with FFmpeg; if not, write to the Free Software 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef SWSCALE_VERSION_H 20 | #define SWSCALE_VERSION_H 21 | 22 | /** 23 | * @file 24 | * swscale version macros 25 | */ 26 | 27 | #include "libavutil/version.h" 28 | 29 | #define LIBSWSCALE_VERSION_MAJOR 5 30 | #define LIBSWSCALE_VERSION_MINOR 5 31 | #define LIBSWSCALE_VERSION_MICRO 100 32 | 33 | #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ 34 | LIBSWSCALE_VERSION_MINOR, \ 35 | LIBSWSCALE_VERSION_MICRO) 36 | #define LIBSWSCALE_VERSION AV_VERSION(LIBSWSCALE_VERSION_MAJOR, \ 37 | LIBSWSCALE_VERSION_MINOR, \ 38 | LIBSWSCALE_VERSION_MICRO) 39 | #define LIBSWSCALE_BUILD LIBSWSCALE_VERSION_INT 40 | 41 | #define LIBSWSCALE_IDENT "SwS" AV_STRINGIFY(LIBSWSCALE_VERSION) 42 | 43 | /** 44 | * FF_API_* defines may be placed below to indicate public API that will be 45 | * dropped at a future version bump. The defines themselves are not part of 46 | * the public API and may change, break or disappear at any time. 47 | */ 48 | 49 | #ifndef FF_API_SWS_VECTOR 50 | #define FF_API_SWS_VECTOR (LIBSWSCALE_VERSION_MAJOR < 6) 51 | #endif 52 | 53 | #endif /* SWSCALE_VERSION_H */ 54 | -------------------------------------------------------------------------------- /TTSProjectTemplate.ttsproj: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Tokens": "私は誰?", 4 | "Seq": ["w", "a", "t", "a", "s", "h", "i", "w", "a", "d", "a", "r", "e", "?"], 5 | "Tones": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 6 | "Durations": [2, 5, 2, 5, 2, 2, 5, 2, 5, 2, 5, 2, 5], 7 | "Language": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 8 | "SpeakerMix": [1.000000, 0.000000, 0.000000], 9 | "EmotionPrompt": ["sad", "happy"], 10 | "NoiseScale": 0.666000, 11 | "LengthScale": 1.100000, 12 | "DurationPredictorNoiseScale": 0.333000, 13 | "FactorDpSdp": 0.600000, 14 | "GateThreshold": 0.777000, 15 | "MaxDecodeStep": 114514, 16 | "Seed": 1919810, 17 | "SpeakerId": 2, 18 | "RestTime": 1.000000, 19 | "PlaceHolderSymbol": "|", 20 | "LanguageID": "JP", 21 | "G2PAdditionalInfo": "/[Japanese2]" 22 | }, 23 | { 24 | "Tokens": "私は鳴瀬しろは", 25 | "Seq": [], 26 | "Tones": [], 27 | "Durations": [], 28 | "Language": [], 29 | "SpeakerMix": [], 30 | "EmotionPrompt": [], 31 | "NoiseScale": 0.666000, 32 | "LengthScale": 1.000000, 33 | "DurationPredictorNoiseScale": 0.333000, 34 | "FactorDpSdp": 0.600000, 35 | "GateThreshold": 0.777000, 36 | "MaxDecodeStep": 1145147, 37 | "Seed": 1919810, 38 | "SpeakerId": 0, 39 | "RestTime": 1.000000, 40 | "PlaceHolderSymbol": "|", 41 | "LanguageID": "JP", 42 | "G2PAdditionalInfo": "/[Japanese2]" 43 | }, 44 | { 45 | "Tokens": "私は誰?", 46 | "Seq": [], 47 | "Tones": [], 48 | "Durations": [], 49 | "Language": [], 50 | "SpeakerMix": [0.666000, 0.233000, 0.444000], 51 | "EmotionPrompt": ["sad", "happy"], 52 | "NoiseScale": 0.666000, 53 | "LengthScale": 1.100000, 54 | "DurationPredictorNoiseScale": 0.333000, 55 | "FactorDpSdp": 0.600000, 56 | "GateThreshold": 0.777000, 57 | "MaxDecodeStep": 1145145, 58 | "Seed": 19198101, 59 | "SpeakerId": 1, 60 | "RestTime": -1.000000, 61 | "PlaceHolderSymbol": "|", 62 | "LanguageID": "JP", 63 | "G2PAdditionalInfo": "/[Japanese2]" 64 | }, 65 | { 66 | "Tokens": "私は鷗です、くみの名前は?", 67 | "Seq": [], 68 | "Tones": [], 69 | "Durations": [], 70 | "Language": [], 71 | "SpeakerMix": [], 72 | "EmotionPrompt": [], 73 | "NoiseScale": 0.666000, 74 | "LengthScale": 1.100000, 75 | "DurationPredictorNoiseScale": 0.222000, 76 | "FactorDpSdp": 0.600000, 77 | "GateThreshold": 0.777000, 78 | "MaxDecodeStep": 114514, 79 | "Seed": 1919810, 80 | "SpeakerId": 3, 81 | "RestTime": 1.000000, 82 | "PlaceHolderSymbol": "|", 83 | "LanguageID": "JP", 84 | "G2PAdditionalInfo": "/[Japanese2]" 85 | } 86 | ] -------------------------------------------------------------------------------- /fish-speech.cpp/Demo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 2 | 3 | set(EXE_DEMO Demo) 4 | add_compile_options(/ZI) 5 | add_executable(${EXE_DEMO} main.cpp) 6 | target_link_libraries(${EXE_DEMO} PRIVATE fish-speech-cpp) 7 | 8 | if (CMAKE_VERSION VERSION_GREATER 3.12) 9 | set_property(TARGET ${EXE_DEMO} PROPERTY CXX_STANDARD 20) 10 | endif() -------------------------------------------------------------------------------- /fish-speech.cpp/Demo/main.cpp: -------------------------------------------------------------------------------- 1 | #include <iostream> 2 | #include <windows.h> 3 | #include "llama.h" 4 | 5 | using namespace libtts; 6 | 7 | int main() 8 | { 9 | auto a = BaseTransformer(nullptr, L"", BaseModelArgs()); 10 | std::cout << UnicodeToByte(a.DumpLayerNameInfo()); 11 | system("pause"); 12 | return 0; 13 | } -------------------------------------------------------------------------------- /fish-speech.cpp/src/Base.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/fish-speech.cpp/src/Base.cpp -------------------------------------------------------------------------------- /fish-speech.cpp/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | for i in range(20): 4 | a = torch.ones(size=(1, 768, 100000)) 5 | beg = time.time() 6 | a.fill_(i) 7 | print(time.time() - beg) -------------------------------------------------------------------------------- /libdlvoicecodec/LibDLVoiceCodec/base.cpp: -------------------------------------------------------------------------------- 1 | #include "base.h" -------------------------------------------------------------------------------- /libdlvoicecodec/LibDLVoiceCodec/base.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include <cstdint> 3 | #include <string> 4 | #include <vector> 5 | #include <iostream> 6 | #include <unordered_map> 7 | #define LibDLVoiceCodecBegin namespace libdlvcodec { 8 | #define LibDLVoiceCodecEnd } 9 | #define LIBDVCND [[nodiscard]] 10 | 11 | #define LibDLVoiceCodecThrow(message) throw std::exception((std::string("[At \"") + __FILE__ + "\" Line " + std::to_string(__LINE__) + "]\n" + (message)).c_str()) 12 | 13 | LibDLVoiceCodecBegin 14 | 15 | using int8 = int8_t; 16 | using int16 = int16_t; 17 | using int32 = int32_t; 18 | using int64 = int64_t; 19 | using float32 = float; 20 | using float64 = double; 21 | using byte = unsigned char; 22 | using lpvoid = void*; 23 | using uint8 = uint8_t; 24 | using uint16 = uint16_t; 25 | using uint32 = uint32_t; 26 | using uint64 = uint64_t; 27 | 28 | class TensorView; 29 | class Tensor; 30 | 31 | const std::unordered_map<std::string, size_t> __Dtype {{"int8", 1}, { "int16", 2 }, { "int32", 4 }, { "int64", 8 }, 32 | { "float8", 1 }, { "float16", 2 }, { "bfloat16", 2 }, { "float32", 4 }, { "float64", 8 }, { "bool", 1 } }; 33 | 34 | template <class _Ty, class _Alloc = std::allocator<_Ty>> 35 | using MResource = std::vector<_Ty, _Alloc>; 36 | 37 | template<typename T> 38 | std::ostream& operator<<(std::ostream& _Stream, const std::vector<T>& _Data) 39 | { 40 | _Stream << '['; 41 | for (const auto& i : _Data) 42 | _Stream << i << ", "; 43 | _Stream << "]\n"; 44 | return _Stream; 45 | } 46 | 47 | class FileWrapper 48 | { 49 | public: 50 | FileWrapper() = default; 51 | ~FileWrapper() 52 | { 53 | if (file_) 54 | fclose(file_); 55 | file_ = nullptr; 56 | } 57 | FileWrapper(const FileWrapper& _Left) = delete; 58 | FileWrapper& operator=(const FileWrapper& _Left) = delete; 59 | FileWrapper(FileWrapper&& _Right) noexcept 60 | { 61 | file_ = _Right.file_; 62 | _Right.file_ = nullptr; 63 | } 64 | FileWrapper& operator=(FileWrapper&& _Right) noexcept 65 | { 66 | file_ = _Right.file_; 67 | _Right.file_ = nullptr; 68 | return *this; 69 | } 70 | void open(const std::wstring& _Path, const std::wstring& _Mode) 71 | { 72 | #ifdef _WIN32 73 | _wfopen_s(&file_, _Path.c_str(), _Mode.c_str()); 74 | #else 75 | file_ = _wfopen(_Path.c_str(), _Mode.c_str()); 76 | #endif 77 | } 78 | operator FILE* () const 79 | { 80 | return file_; 81 | } 82 | LIBDVCND bool enabled() const 83 | { 84 | return file_; 85 | } 86 | private: 87 | FILE* file_ = nullptr; 88 | }; 89 | 90 | LibDLVoiceCodecEnd -------------------------------------------------------------------------------- /libdlvoicecodec/LibDLVoiceCodec/operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator.h" 2 | #include "value.h" 3 | #include <cblas.h> 4 | 5 | LibDLVoiceCodecBegin 6 | 7 | LibDLVoiceCodecEnd -------------------------------------------------------------------------------- /libdlvoicecodec/LibDLVoiceCodec/operator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base.h" 3 | 4 | LibDLVoiceCodecBegin 5 | Tensor equal(const Tensor& _A, const Tensor& _B); 6 | Tensor add(const Tensor& _A, const Tensor& _B); 7 | Tensor sub(const Tensor& _A, const Tensor& _B); 8 | Tensor mul(const Tensor& _A, const Tensor& _B); 9 | Tensor div(const Tensor& _A, const Tensor& _B); 10 | void selfAdd(Tensor& _Self, const Tensor& _O); 11 | void selfSub(Tensor& _Self, const Tensor& _O); 12 | void selfMul(Tensor& _Self, const Tensor& _O); 13 | void selfDiv(Tensor& _Self, const Tensor& _O); 14 | Tensor matmul(const Tensor& _A, const Tensor& _B); 15 | Tensor conv1d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias, 16 | int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1); 17 | Tensor conv2d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias, 18 | int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1); 19 | Tensor conv3d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias, 20 | int64 _Stride = 1, int64 _Padding = 0, int64 _Dilation = 1, int64 _Groups = 1); 21 | Tensor conv_transpose1d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias, 22 | int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1); 23 | Tensor conv_transpose2d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias, 24 | int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1); 25 | Tensor conv_transpose3d(const Tensor& _Input, const Tensor& _Weight, const Tensor& _Bias, 26 | int64 _Stride = 1, int64 _Padding = 0, int64 _OutputPadding = 0, int64 _Dilation = 1, int64 _Groups = 1); 27 | LibDLVoiceCodecEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/DataStruct/README.md: -------------------------------------------------------------------------------- 1 | ## KdTree From J. Frederico Carvalho 2 | -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Cluster/MoeVSBaseCluster.cpp: -------------------------------------------------------------------------------- 1 | #include "MoeVSBaseCluster.hpp" 2 | #include "../inferTools.hpp" 3 | 4 | std::vector<float> MoeVoiceStudioCluster::MoeVoiceStudioBaseCluster::find(float* point, long sid, int64_t n_points) 5 | { 6 | LibDLVoiceCodecThrow("NotImplementedError"); 7 | } -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Cluster/MoeVSBaseCluster.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: MoeVSBaseCluster.hpp 3 | * Note: MoeVoiceStudioCore 聚类基类 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include <vector> 24 | 25 | #define MoeVoiceStudioClusterHeader namespace MoeVoiceStudioCluster { 26 | #define MoeVoiceStudioClusterEnd } 27 | 28 | MoeVoiceStudioClusterHeader 29 | 30 | class MoeVoiceStudioBaseCluster 31 | { 32 | public: 33 | MoeVoiceStudioBaseCluster() = default; 34 | virtual ~MoeVoiceStudioBaseCluster() = default; 35 | 36 | /** 37 | * \brief 查找聚类最邻近点 38 | * \param point 待查找的点 39 | * \param sid 角色ID 40 | * \param n_points 点数 41 | * \return 查找到的最邻近点 42 | */ 43 | virtual std::vector<float> find(float* point, long sid, int64_t n_points = 1); 44 | }; 45 | 46 | MoeVoiceStudioClusterEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Cluster/MoeVSClusterManager.cpp: -------------------------------------------------------------------------------- 1 | #include "MoeVSClusterManager.hpp" 2 | #include <map> 3 | #include <stdexcept> 4 | #include "../../Logger/MoeSSLogger.hpp" 5 | 6 | MoeVoiceStudioClusterHeader 7 | 8 | std::map<std::wstring, GetMoeVSClusterFn> RegisteredMoeVSCluster; 9 | 10 | MoeVSCluster GetMoeVSCluster(const std::wstring& _name, const std::wstring& _path, size_t hidden_size, size_t KmeansLen) 11 | { 12 | const auto f_ClusterFn = RegisteredMoeVSCluster.find(_name); 13 | if (f_ClusterFn != RegisteredMoeVSCluster.end()) 14 | return f_ClusterFn->second(_path, hidden_size, KmeansLen); 15 | throw std::runtime_error("Unable To Find An Available MoeVSCluster"); 16 | } 17 | 18 | void RegisterMoeVSCluster(const std::wstring& _name, const GetMoeVSClusterFn& _constructor_fn) 19 | { 20 | if (RegisteredMoeVSCluster.find(_name) != RegisteredMoeVSCluster.end()) 21 | { 22 | logger.log(L"[Warn] MoeVSClusterNameConflict"); 23 | return; 24 | } 25 | RegisteredMoeVSCluster[_name] = _constructor_fn; 26 | } 27 | 28 | MoeVoiceStudioClusterEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Cluster/MoeVSKmeansCluster.cpp: -------------------------------------------------------------------------------- 1 | #include "MoeVSKmeansCluster.hpp" 2 | #include "../inferTools.hpp" 3 | 4 | std::vector<float> MoeVoiceStudioCluster::KMeansCluster::find(float* point, long sid, int64_t n_points) 5 | { 6 | if (size_t(sid) < _tree.size()) 7 | { 8 | std::vector<float> res; 9 | res.reserve(dims * n_points * 2); 10 | for (int64_t pt = 0; pt < n_points; ++pt) 11 | { 12 | auto tmp = _tree[sid].nearest_point({ point + pt * dims,point + (pt + 1) * dims }); 13 | res.insert(res.end(), tmp.begin(), tmp.end()); 14 | } 15 | return res; 16 | } 17 | return { point, point + dims * n_points }; 18 | } 19 | 20 | MoeVoiceStudioCluster::KMeansCluster::KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen) 21 | { 22 | dims = hidden_size; 23 | FILE* file = nullptr; 24 | _wfopen_s(&file, (_path + L"/KMeans.npy").c_str(), L"rb"); 25 | if (!file) 26 | LibDLVoiceCodecThrow("KMeansFileNotExist"); 27 | constexpr long idx = 128; 28 | fseek(file, idx, SEEK_SET); 29 | std::vector<float> tmpData(hidden_size); 30 | const size_t ec = size_t(hidden_size) * sizeof(float); 31 | std::vector<std::vector<float>> _tmp; 32 | _tmp.reserve(KmeansLen); 33 | while (fread(tmpData.data(), 1, ec, file) == ec) 34 | { 35 | _tmp.emplace_back(tmpData); 36 | if (_tmp.size() == KmeansLen) 37 | { 38 | _tree.emplace_back(_tmp); 39 | _tmp.clear(); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Cluster/MoeVSKmeansCluster.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: MoeVSKmeansCluster.hpp 3 | * Note: MoeVoiceStudioCore 官方聚类(Kmeans) 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include "MoeVSBaseCluster.hpp" 24 | #include "../../DataStruct/KDTree.hpp" 25 | #include <string> 26 | 27 | MoeVoiceStudioClusterHeader 28 | 29 | class KMeansCluster : public MoeVoiceStudioBaseCluster 30 | { 31 | public: 32 | KMeansCluster() = delete; 33 | ~KMeansCluster() override = default; 34 | KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen); 35 | std::vector<float> find(float* point, long sid, int64_t n_points = 1) override; 36 | private: 37 | std::vector<KDTree> _tree; 38 | size_t dims = 0; 39 | }; 40 | 41 | MoeVoiceStudioClusterEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/F0Extractor/BaseF0Extractor/BaseF0Extractor.cpp: -------------------------------------------------------------------------------- 1 | #include "BaseF0Extractor.hpp" 2 | #include <map> 3 | #include "../../../Logger/MoeSSLogger.hpp" 4 | #include "../../inferTools.hpp" 5 | 6 | MoeVSF0Extractor::BaseF0Extractor::BaseF0Extractor(int sampling_rate, int hop_size, int n_f0_bins, double max_f0, double min_f0) : 7 | fs(sampling_rate), 8 | hop(hop_size), 9 | f0_bin(n_f0_bins), 10 | f0_max(max_f0), 11 | f0_min(min_f0) 12 | { 13 | f0_mel_min = (1127.0 * log(1.0 + f0_min / 700.0)); 14 | f0_mel_max = (1127.0 * log(1.0 + f0_max / 700.0)); 15 | } 16 | 17 | std::vector<double> MoeVSF0Extractor::BaseF0Extractor::arange(double start, double end, double step, double div) 18 | { 19 | std::vector<double> output; 20 | while (start < end) 21 | { 22 | output.push_back(start / div); 23 | start += step; 24 | } 25 | return output; 26 | } 27 | 28 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) 29 | { 30 | LibDLVoiceCodecThrow("NotImplementedError"); 31 | } 32 | 33 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<float>& PCMData, size_t TargetLength) 34 | { 35 | std::vector<double> PCMVector(PCMData.size()); 36 | for (size_t i = 0; i < PCMData.size(); ++i) 37 | PCMVector[i] = double(PCMData[i]); 38 | return ExtractF0(PCMVector, TargetLength); 39 | } 40 | 41 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<int16_t>& PCMData, size_t TargetLength) 42 | { 43 | std::vector<double> PCMVector(PCMData.size()); 44 | for (size_t i = 0; i < PCMData.size(); ++i) 45 | PCMVector[i] = double(PCMData[i]); 46 | return ExtractF0(PCMVector, TargetLength); 47 | } -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/F0Extractor/DioF0Extractor/DioF0Extractor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: DioF0Extractor.hpp 3 | * Note: MoeVoiceStudioCore 官方F0提取算法 Dio 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include "../BaseF0Extractor/BaseF0Extractor.hpp" 24 | 25 | MoeVoiceStudioF0ExtractorHeader 26 | class DioF0Extractor : public BaseF0Extractor 27 | { 28 | public: 29 | DioF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0); 30 | 31 | ~DioF0Extractor() override = default; 32 | 33 | void compute_f0(const double* PCMData, size_t PCMLen); 34 | 35 | void InterPf0(size_t TargetLength); 36 | 37 | std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override; 38 | private: 39 | std::vector<double> refined_f0; 40 | }; 41 | MoeVoiceStudioF0ExtractorEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/F0Extractor/F0ExtractorManager.cpp: -------------------------------------------------------------------------------- 1 | #include "F0ExtractorManager.hpp" 2 | #include <map> 3 | #include <stdexcept> 4 | #include "../../Logger/MoeSSLogger.hpp" 5 | 6 | MoeVoiceStudioF0ExtractorHeader 7 | std::map<std::wstring, GetF0ExtractorFn> RegisteredF0Extractors; 8 | 9 | F0Extractor GetF0Extractor(const std::wstring& _name, 10 | const uint32_t fs, 11 | const uint32_t hop, 12 | const uint32_t f0_bin, 13 | const double f0_max, 14 | const double f0_min) 15 | { 16 | const auto f_F0Extractor = RegisteredF0Extractors.find(_name); 17 | if (f_F0Extractor != RegisteredF0Extractors.end()) 18 | return f_F0Extractor->second(fs, hop, f0_bin, f0_max, f0_min); 19 | throw std::runtime_error("Unable To Find An Available F0Extractor"); 20 | } 21 | 22 | void RegisterF0Extractor(const std::wstring& _name, const GetF0ExtractorFn& _constructor_fn) 23 | { 24 | if (RegisteredF0Extractors.find(_name) != RegisteredF0Extractors.end()) 25 | { 26 | logger.log(L"[Warn] F0ExtractorNameConflict"); 27 | return; 28 | } 29 | RegisteredF0Extractors[_name] = _constructor_fn; 30 | } 31 | 32 | std::vector<std::wstring> GetF0ExtractorList() 33 | { 34 | std::vector<std::wstring> F0ExtractorsVec; 35 | F0ExtractorsVec.reserve(RegisteredF0Extractors.size()); 36 | for (const auto& i : RegisteredF0Extractors) 37 | F0ExtractorsVec.emplace_back(i.first); 38 | return F0ExtractorsVec; 39 | } 40 | 41 | MoeVoiceStudioF0ExtractorEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/F0Extractor/HarvestF0Extractor/HarvestF0Extractor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: HarvestF0Extractor.hpp 3 | * Note: MoeVoiceStudioCore 官方F0提取算法 Harvest 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include "../BaseF0Extractor/BaseF0Extractor.hpp" 24 | 25 | MoeVoiceStudioF0ExtractorHeader 26 | class HarvestF0Extractor : public BaseF0Extractor 27 | { 28 | public: 29 | HarvestF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0); 30 | 31 | ~HarvestF0Extractor() override = default; 32 | 33 | void compute_f0(const double* PCMData, size_t PCMLen); 34 | 35 | void InterPf0(size_t TargetLength); 36 | 37 | std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override; 38 | 39 | private: 40 | std::vector<double> refined_f0; 41 | }; 42 | MoeVoiceStudioF0ExtractorEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Sampler/MoeVSBaseSampler.cpp: -------------------------------------------------------------------------------- 1 | #include "MoeVSBaseSampler.hpp" 2 | #include "../inferTools.hpp" 3 | MoeVoiceStudioSamplerHeader 4 | 5 | MoeVSBaseSampler::MoeVSBaseSampler(Ort::Session* alpha, Ort::Session* dfn, Ort::Session* pred, int64_t Mel_Bins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory) : 6 | MelBins(Mel_Bins), Alpha(alpha), DenoiseFn(dfn), NoisePredictor(pred) 7 | { 8 | _callback = _ProgressCallback; 9 | Memory = memory; 10 | }; 11 | 12 | std::vector<Ort::Value> MoeVSBaseSampler::Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, int64_t SpeedUp, float NoiseScale, int64_t Seed, size_t& Process) 13 | { 14 | LibDLVoiceCodecThrow("NotImplementedError"); 15 | } 16 | 17 | MoeVoiceStudioSamplerEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Sampler/MoeVSBaseSampler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: MoeVSBaseSampler.hpp 3 | * Note: MoeVoiceStudioCore Diffusion 采样器基类 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #define MoeVoiceStudioSamplerHeader namespace MoeVSSampler { 24 | #define MoeVoiceStudioSamplerEnd } 25 | #include <functional> 26 | #include <onnxruntime_cxx_api.h> 27 | MoeVoiceStudioSamplerHeader 28 | 29 | class MoeVSBaseSampler 30 | { 31 | public: 32 | using ProgressCallback = std::function<void(size_t, size_t)>; 33 | 34 | /** 35 | * \brief 构造采样器 36 | * \param alpha Alphas Onnx模型Session 37 | * \param dfn DenoiseFn Onnx模型Session 38 | * \param pred Predictor Onnx模型Session 39 | * \param Mel_Bins MelBins 40 | * \param _ProgressCallback 进度条回调(直接传模型的回调就可以了) 41 | * \param memory 模型的OrtMemoryInfo 42 | */ 43 | MoeVSBaseSampler(Ort::Session* alpha, Ort::Session* dfn, Ort::Session* pred, int64_t Mel_Bins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory); 44 | 45 | virtual ~MoeVSBaseSampler() = default; 46 | 47 | /** 48 | * \brief 采样 49 | * \param Tensors 输入张量(Tensors[0]为Condition,Tensors[1]为初始噪声) 50 | * \param Steps 采样步数 51 | * \param SpeedUp 加速倍数 52 | * \param NoiseScale 噪声规模 53 | * \param Seed 种子 54 | * \param Process 当前进度 55 | * \return Mel张量 56 | */ 57 | virtual std::vector<Ort::Value> Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, int64_t SpeedUp, float NoiseScale, int64_t Seed, size_t& Process); 58 | protected: 59 | int64_t MelBins = 128; 60 | Ort::Session* Alpha = nullptr; 61 | Ort::Session* DenoiseFn = nullptr; 62 | Ort::Session* NoisePredictor = nullptr; 63 | ProgressCallback _callback; 64 | Ort::MemoryInfo* Memory = nullptr; 65 | }; 66 | 67 | MoeVoiceStudioSamplerEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Sampler/MoeVSSamplerManager.cpp: -------------------------------------------------------------------------------- 1 | #include "MoeVSSamplerManager.hpp" 2 | #include <map> 3 | #include "../../Logger/MoeSSLogger.hpp" 4 | MoeVoiceStudioSamplerHeader 5 | std::map<std::wstring, GetMoeVSSamplerFn> RegisteredMoeVSSamplers; 6 | 7 | MoeVSSampler GetMoeVSSampler(const std::wstring& _name, 8 | Ort::Session* alpha, 9 | Ort::Session* dfn, 10 | Ort::Session* pred, 11 | int64_t Mel_Bins, 12 | const MoeVSBaseSampler::ProgressCallback& _ProgressCallback, 13 | Ort::MemoryInfo* memory) 14 | { 15 | const auto f_Sampler = RegisteredMoeVSSamplers.find(_name); 16 | if (f_Sampler != RegisteredMoeVSSamplers.end()) 17 | return f_Sampler->second(alpha, dfn, pred, Mel_Bins, _ProgressCallback, memory); 18 | throw std::runtime_error("Unable To Find An Available Sampler"); 19 | } 20 | 21 | void RegisterMoeVSSampler(const std::wstring& _name, const GetMoeVSSamplerFn& _constructor_fn) 22 | { 23 | if (RegisteredMoeVSSamplers.find(_name) != RegisteredMoeVSSamplers.end()) 24 | { 25 | logger.log(L"[Warn] F0ExtractorNameConflict"); 26 | return; 27 | } 28 | RegisteredMoeVSSamplers[_name] = _constructor_fn; 29 | } 30 | 31 | std::vector<std::wstring> GetMoeVSSamplerList() 32 | { 33 | std::vector<std::wstring> SamplersVec; 34 | SamplersVec.reserve(RegisteredMoeVSSamplers.size()); 35 | for (const auto& i : RegisteredMoeVSSamplers) 36 | SamplersVec.emplace_back(i.first); 37 | return SamplersVec; 38 | } 39 | MoeVoiceStudioSamplerEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/Stft/stft.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include <vector> 3 | #include "fftw3.h" 4 | 5 | namespace DlCodecStft 6 | { 7 | class STFT 8 | { 9 | public: 10 | STFT() = default; 11 | ~STFT(); 12 | STFT(int WindowSize, int HopSize, int FFTSize = 0); 13 | inline static double PI = 3.14159265358979323846; 14 | std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const; 15 | private: 16 | int WINDOW_SIZE = 2048; 17 | int HOP_SIZE = WINDOW_SIZE / 4; 18 | int FFT_SIZE = WINDOW_SIZE / 2 + 1; 19 | }; 20 | 21 | class Mel 22 | { 23 | public: 24 | Mel() = delete; 25 | ~Mel() = default; 26 | Mel(int WindowSize, int HopSize, int SamplingRate, int MelSize = 0); 27 | std::pair<std::vector<float>, int64_t> GetMel(const std::vector<double>& audioData) const; 28 | std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const; 29 | private: 30 | STFT stft; 31 | int MEL_SIZE = 128; 32 | int FFT_SIZE = 0; 33 | int sr = 22050; 34 | std::vector<float> MelBasis; 35 | }; 36 | } 37 | -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/InferTools/TensorExtractor/TensorExtractorManager.cpp: -------------------------------------------------------------------------------- 1 | #include "TensorExtractorManager.hpp" 2 | #include <map> 3 | #include "../../Logger/MoeSSLogger.hpp" 4 | 5 | MoeVoiceStudioTensorExtractorHeader 6 | inline std::map<std::wstring, GetTensorExtractorFn> RegisteredTensorExtractors; 7 | 8 | void RegisterTensorExtractor(const std::wstring& _name, const GetTensorExtractorFn& _constructor_fn) 9 | { 10 | if (RegisteredTensorExtractors.find(_name) != RegisteredTensorExtractors.end()) 11 | { 12 | logger.log(L"[Warn] TensorExtractorNameConflict"); 13 | return; 14 | } 15 | RegisteredTensorExtractors[_name] = _constructor_fn; 16 | } 17 | 18 | TensorExtractor GetTensorExtractor(const std::wstring& _name, uint64_t _srcsr, uint64_t _sr, uint64_t _hop, bool _smix, bool _volume, uint64_t _hidden_size, uint64_t _nspeaker, const MoeVoiceStudioTensorExtractor::Others& _other) 19 | { 20 | const auto f_TensorExtractor = RegisteredTensorExtractors.find(_name); 21 | if (f_TensorExtractor != RegisteredTensorExtractors.end()) 22 | return f_TensorExtractor->second(_srcsr, _sr, _hop, _smix, _volume, _hidden_size, _nspeaker, _other); 23 | throw std::runtime_error("Unable To Find An Available TensorExtractor"); 24 | } 25 | 26 | MoeVoiceStudioTensorExtractorEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/Logger/MoeSSLogger.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include <string> 3 | #include "../StringPreprocess.hpp" 4 | #include <filesystem> 5 | #include <mutex> 6 | #define __MOEVS_DEBUG_MESSAGE(msg) __MOEVS_DEBUG_INFO(__FILE__, __LINE__, msg) 7 | #define logger MoeSSLogger::GetLogger() 8 | inline std::string __MOEVS_DEBUG_INFO(const char* filename, int line, const char* msg) 9 | { 10 | return std::string("[In \"") + std::filesystem::path(filename).filename().string() + "\" Line " + std::to_string(line) + "] " + msg; 11 | } 12 | 13 | inline std::wstring __MOEVS_DEBUG_INFO(const char* filename, int line, const wchar_t* msg) 14 | { 15 | return std::wstring(L"[In \"") + std::filesystem::path(filename).filename().wstring() + L"\" Line " + std::to_wstring(line) + L"] " + msg; 16 | } 17 | 18 | namespace MoeSSLogger 19 | { 20 | class Logger 21 | { 22 | public: 23 | Logger(); 24 | ~Logger(); 25 | void log(const std::wstring&); 26 | void log(const char*); 27 | void error(const std::wstring&); 28 | void error(const char*); 29 | private: 30 | std::filesystem::path cur_log_dir, logpath, errorpath; 31 | FILE* log_file = nullptr,* error_file = nullptr; 32 | std::mutex mx; 33 | }; 34 | 35 | Logger& GetLogger(); 36 | } -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/Models/EnvManager.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: EnvManager.hpp 3 | * Note: MoeVoiceStudioCore 环境管理 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include <onnxruntime_cxx_api.h> 24 | 25 | #define MoeVoiceStudioCoreEnvManagerHeader namespace moevsenv{ 26 | #define MoeVoiceStudioCoreEnvManagerEnd } 27 | 28 | MoeVoiceStudioCoreEnvManagerHeader 29 | class MoeVoiceStudioEnv 30 | { 31 | public: 32 | MoeVoiceStudioEnv() = default; 33 | ~MoeVoiceStudioEnv() { Destory(); } 34 | void Load(unsigned ThreadCount, unsigned DeviceID, unsigned Provider); 35 | void Destory(); 36 | [[nodiscard]] bool IsEnabled() const; 37 | [[nodiscard]] Ort::Env* GetEnv() const { return GlobalOrtEnv; } 38 | [[nodiscard]] Ort::SessionOptions* GetSessionOptions() const { return GlobalOrtSessionOptions; } 39 | [[nodiscard]] Ort::MemoryInfo* GetMemoryInfo() const { return GlobalOrtMemoryInfo; } 40 | [[nodiscard]] int GetCurThreadCount() const { return (int)CurThreadCount; } 41 | [[nodiscard]] int GetCurDeviceID() const { return (int)CurDeviceID; } 42 | [[nodiscard]] int GetCurProvider() const { return (int)CurProvider; } 43 | private: 44 | void Create(unsigned ThreadCount_, unsigned DeviceID_, unsigned ExecutionProvider_); 45 | Ort::Env* GlobalOrtEnv = nullptr; 46 | Ort::SessionOptions* GlobalOrtSessionOptions = nullptr; 47 | Ort::MemoryInfo* GlobalOrtMemoryInfo = nullptr; 48 | unsigned CurThreadCount = unsigned(-1); 49 | unsigned CurDeviceID = unsigned(-1); 50 | unsigned CurProvider = unsigned(-1); 51 | OrtCUDAProviderOptionsV2* cuda_option_v2 = nullptr; 52 | }; 53 | 54 | MoeVoiceStudioEnv& GetGlobalMoeVSEnv(); 55 | 56 | MoeVoiceStudioCoreEnvManagerEnd -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/Models/header/Tacotron.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "ModelBase.hpp" 3 | 4 | INFERCLASSHEADER 5 | 6 | class Tacotron2 : public TTS 7 | { 8 | public: 9 | Tacotron2(const MJson&, const callback&, const callback_params&, const DurationCallback&, Device _dev = Device::CPU); 10 | 11 | ~Tacotron2() override; 12 | 13 | std::vector<int16_t> Inference(std::wstring& _inputLens) const override; 14 | 15 | [[nodiscard]] std::vector<int16_t> Inference(const MoeVSProject::TTSParams& _input) const override; 16 | 17 | static void cat(std::vector<float>& tensorA, std::vector<int64>& Shape, const MTensor& tensorB) { 18 | const int64 n = Shape[1]; 19 | for (int64 i = n; i > 0; --i) 20 | tensorA.insert(tensorA.begin() + (i * Shape[2]), tensorB.GetTensorData<float>()[i - 1]); 21 | ++Shape[2]; 22 | } 23 | private: 24 | Ort::Session* sessionEncoder = nullptr; 25 | Ort::Session* sessionDecoderIter = nullptr; 26 | Ort::Session* sessionPostNet = nullptr; 27 | Ort::Session* sessionGan = nullptr; 28 | 29 | const std::vector<const char*> ganIn = { "x" }; 30 | const std::vector<const char*> ganOut = { "audio" }; 31 | const std::vector<const char*> inputNodeNamesSessionEncoder = { "sequences","sequence_lengths" }; 32 | const std::vector<const char*> outputNodeNamesSessionEncoder = { "memory","processed_memory","lens" }; 33 | const std::vector<const char*> inputNodeNamesSessionDecoderIter = { "decoder_input","attention_hidden","attention_cell","decoder_hidden","decoder_cell","attention_weights","attention_weights_cum","attention_context","memory","processed_memory","mask" }; 34 | const std::vector<const char*> outputNodeNamesSessionDecoderIter = { "decoder_output","gate_prediction","out_attention_hidden","out_attention_cell","out_decoder_hidden","out_decoder_cell","out_attention_weights","out_attention_weights_cum","out_attention_context" }; 35 | const std::vector<const char*> inputNodeNamesSessionPostNet = { "mel_outputs" }; 36 | const std::vector<const char*> outputNodeNamesSessionPostNet = { "mel_outputs_postnet" }; 37 | }; 38 | 39 | INFERCLASSEND -------------------------------------------------------------------------------- /libdlvoicecodec/Modules/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | ```c++ 3 | #include "Modules/Models/header/Vits.hpp" 4 | 5 | int main(){ 6 | rapidjson::Document Config; 7 | Config.Parse("Your Config"); 8 | 9 | //Progress bar 10 | InferClass::BaseModelType::callback a_callback = [](size_t a, size_t b) {std::cout << std::to_string((float)a * 100.f / (float)b) << "%\n"; }; 11 | 12 | //return params for inference 13 | InferClass::BaseModelType::callback_params b_callback = []() 14 | { 15 | auto cbaaa = InferClass::InferConfigs(); 16 | cbaaa.kmeans_rate = 0.5; 17 | cbaaa.keys = 0; 18 | return cbaaa; 19 | }; 20 | 21 | //modify duration per phoneme 22 | InferClass::TTS::DurationCallback c_callback = [](std::vector<float>&) {}; 23 | 24 | std::vector<int16_t> output; 25 | try 26 | { 27 | std::wstring inp("watashinoonaniomitekudasai"); 28 | auto model = dynamic_cast<InferClass::BaseModelType*>(new InferClass::VitsSvc(modConfigJson, a_callback, b_callback)); 29 | 30 | output = model->Inference(inp); 31 | 32 | Wav outWav(model->GetSamplingRate(), output.size() * 2, output.data()); 33 | outWav.Writef(L"test.wav"); 34 | 35 | delete model; 36 | } 37 | catch(std::exception& e) 38 | { 39 | std::cout << e.what(); 40 | } 41 | } 42 | 43 | ``` 44 | -------------------------------------------------------------------------------- /libdlvoicecodec/MoeVoiceStudioSvc - Core - Cmd.vcxproj.filters: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> 3 | <ItemGroup> 4 | <Filter Include="源文件"> 5 | <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> 6 | <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions> 7 | </Filter> 8 | <Filter Include="头文件"> 9 | <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> 10 | <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions> 11 | </Filter> 12 | <Filter Include="资源文件"> 13 | <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> 14 | <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> 15 | </Filter> 16 | </ItemGroup> 17 | <ItemGroup> 18 | <ClCompile Include="MoeVoiceStudioSvc - Core - Cmd.cpp"> 19 | <Filter>源文件</Filter> 20 | </ClCompile> 21 | </ItemGroup> 22 | <ItemGroup> 23 | <None Include="..\README.md"> 24 | <Filter>资源文件</Filter> 25 | </None> 26 | <None Include="..\README_en.md"> 27 | <Filter>资源文件</Filter> 28 | </None> 29 | <None Include="..\.gitignore" /> 30 | <None Include="packages.config" /> 31 | <None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\onnxruntime.dll" /> 32 | <None Include="analyse\GptSoVits.md"> 33 | <Filter>资源文件</Filter> 34 | </None> 35 | </ItemGroup> 36 | </Project> -------------------------------------------------------------------------------- /libdlvoicecodec/analyse/GptSoVits.md: -------------------------------------------------------------------------------- 1 | # GptSoVits主要分为三个部分 2 | 3 | ### VQ(内核为KMeans聚类) 4 | - Train:对训练集音频的ssl[^1]进行KMeans聚类,获取到的聚类中心构造一个Embedding(CodeBook.embed) 5 | - Infer:使用Indices获取聚类中心(CodeBook.embed)中的元素,构造一个ssl[^1]矩阵 6 | 7 | 与SoVits的KMeans/Index聚类类似,只不过SoVits的聚类在使用时是使用输入的HuBert在CodeBook中查找与其距离排名前K的点后加权平均,而GptSoVits则是使用一个AR循环预测所需的HuBert在CodeBook中的下标,之后使用该下标获取CodeBook中对应元素 8 | 9 | --- 10 | 11 | ### AR(GPT) 12 | - Inputs: 13 | - text_seq:输入文本音素序列的数字ID(在Symbols数组中的下标) 14 | - text_bert:输入文本的Bert 15 | - ref_seq:参考文本音素序列的数字ID(在Symbols数组中的下标) 16 | - ref_bert:参考文本的Bert 17 | - ref_ssl:参考音频的ssl[^1] 18 | - OutPuts: 19 | - codes:输入到VQ的Indices,用于获取ssl[^1]的聚类中心 20 | 21 | 与Gpt类似,使用一个AR循环,通过输入文本编码后的信息预测一个响应序列(序列终止为EOS),该响应序列为训练集音频聚类后的聚类中心在CodeBook中的下标,之后会从CodeBook中获取相应的元素,相当于SoVits中的Hubert。 22 | 23 | --- 24 | 25 | ### SoVits 26 | - Inputs: 27 | - codes:输入到VQ的Indices,用于获取ssl[^1]的聚类中心 28 | - text_seq:输入文本音素序列的数字ID(在Symbols数组中的下标) 29 | - ref_audio:参考音频(训练集内音频) 30 | 31 | 与SoVits比较,其中的codes实际上相当于SoVits的Hubert,只不过这个Hubert是使用AR预测所得序列生成的。 32 | GptSoVits使用输入音素的Embedding,AR预测所得的Hubert以及参考音频的Mel共通指导音频生成,可以有效的控制音频的语气,感情。 33 | 然而在一些时候,会出现漏字和错字的情况,可能和AR有较大的关系 34 | 35 | --- 36 | 37 | ### 实验方案 38 | 将GptSovits中的AR部分去除,将VQ的输入从Indices(code)替换为ssl(即使用最临近点搜索)。即可获得一个svc模型。 39 | 40 | 两个音频,一个训练集参考音频,一个输入音频。需完成以下步骤。 41 | 42 | 1、训练集参考音频直接编码为mel记作ref_audio。 43 | 44 | 2、输入音频经过一个asr处理为音素序列记作text_seq。 45 | 46 | 3、输入音频经过hubert后使用最临近点搜索,从vq的embedding中取元素,记作ssl。 47 | 48 | 4、将ssl,text_seq和ref_audio作为vits的输入进行推理。 49 | 50 | 51 | --- 52 | 53 | [^1]: ssl其实就是音频的Hubert,与SoVits的Hubert一致 54 | -------------------------------------------------------------------------------- /libdlvoicecodec/input.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libdlvoicecodec/input.wav -------------------------------------------------------------------------------- /libdlvoicecodec/output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libdlvoicecodec/output.wav -------------------------------------------------------------------------------- /libdlvoicecodec/packages.config: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <packages> 3 | <package id="Microsoft.AI.DirectML" version="1.13.1" targetFramework="native" /> 4 | <package id="Microsoft.ML.OnnxRuntime.DirectML" version="1.17.1" targetFramework="native" /> 5 | <package id="VC-LTL" version="5.0.9" targetFramework="native" /> 6 | <package id="YY.NuGet.Import.Helper" version="1.0.0.4" targetFramework="native" /> 7 | </packages> -------------------------------------------------------------------------------- /libsvc/Api/header/NativeApi.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libsvc/Api/header/NativeApi.h -------------------------------------------------------------------------------- /libsvc/Api/header/libsvc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libsvc/Api/header/libsvc.h -------------------------------------------------------------------------------- /libsvc/Api/src/NativeApi.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libsvc/Api/src/NativeApi.cpp -------------------------------------------------------------------------------- /libsvc/Modules/Lib/MJson/MJson.cpp: -------------------------------------------------------------------------------- 1 | #include "MJson.h" 2 | 3 | class FileGuard 4 | { 5 | public: 6 | FileGuard() = delete; 7 | ~FileGuard() 8 | { 9 | if (_fp) fclose(_fp); 10 | _fp = nullptr; 11 | } 12 | FileGuard(const char* _path) 13 | { 14 | if (_fp) fclose(_fp); 15 | _wfopen_s(&_fp, to_wide_string(_path).c_str(), L"rb"); 16 | } 17 | operator FILE* () const 18 | { 19 | return _fp; 20 | } 21 | private: 22 | FILE* _fp = nullptr; 23 | static std::wstring to_wide_string(const std::string& input) 24 | { 25 | std::vector<wchar_t> WideString(input.length() * 2); 26 | MultiByteToWideChar( 27 | CP_UTF8, 28 | 0, 29 | input.c_str(), 30 | int(input.length()), 31 | WideString.data(), 32 | int(WideString.size()) 33 | ); 34 | return WideString.data(); 35 | } 36 | }; 37 | 38 | MJson::MJson(const char* _path) 39 | { 40 | const auto file = FileGuard(_path); 41 | _document = yyjson_read_file(_path, YYJSON_READ_NOFLAG, nullptr, nullptr); 42 | if (!_document) 43 | throw std::exception("Json Parse Error !"); 44 | root = yyjson_doc_get_root(_document); 45 | } 46 | 47 | MJson::MJson(const std::string& _data, bool _read_from_string) 48 | { 49 | if (_read_from_string) 50 | _document = yyjson_read(_data.c_str(), _data.length(), YYJSON_READ_NOFLAG); 51 | else 52 | { 53 | const auto file = FileGuard(_data.c_str()); 54 | _document = yyjson_read_fp(file, YYJSON_READ_NOFLAG, nullptr, nullptr); 55 | } 56 | if (!_document) 57 | throw std::exception("Json Parse Error !"); 58 | root = yyjson_doc_get_root(_document); 59 | } 60 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/src/world/constantnumbers.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | // 6 | // This header file only defines constant numbers used for several function. 7 | //----------------------------------------------------------------------------- 8 | #ifndef WORLD_CONSTANT_NUMBERS_H_ 9 | #define WORLD_CONSTANT_NUMBERS_H_ 10 | 11 | namespace world { 12 | // for Dio() 13 | const double kCutOff = 50.0; 14 | 15 | // for StoneMask() 16 | const double kFloorF0StoneMask = 40.0; 17 | 18 | const double kPi = 3.1415926535897932384; 19 | const double kMySafeGuardMinimum = 0.000000000001; 20 | const double kEps = 0.00000000000000022204460492503131; 21 | const double kFloorF0 = 71.0; 22 | const double kCeilF0 = 800.0; 23 | const double kDefaultF0 = 500.0; 24 | const double kLog2 = 0.69314718055994529; 25 | // Maximum standard deviation not to be selected as a best f0. 26 | const double kMaximumValue = 100000.0; 27 | 28 | // Note to me (fs: 48000) 29 | // 71 Hz is the limit to maintain the FFT size at 2048. 30 | // If we use 70 Hz as FLOOR_F0, the FFT size of 4096 is required. 31 | 32 | // for D4C() 33 | const int kHanning = 1; 34 | const int kBlackman = 2; 35 | const double kFrequencyInterval = 3000.0; 36 | const double kUpperLimit = 15000.0; 37 | const double kThreshold = 0.85; 38 | const double kFloorF0D4C = 47.0; 39 | 40 | // for Codec (Mel scale) 41 | // S. Stevens & J. Volkmann, 42 | // The Relation of Pitch to Frequency: A Revised Scale, 43 | // American Journal of Psychology, vol. 53, no. 3, pp. 329-353, 1940. 44 | const double kM0 = 1127.01048; 45 | const double kF0 = 700.0; 46 | const double kFloorFrequency = 40.0; 47 | const double kCeilFrequency = 20000.0; 48 | 49 | } // namespace world 50 | 51 | #endif // WORLD_CONSTANT_NUMBERS_H_ 52 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/src/world/d4c.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_D4C_H_ 7 | #define WORLD_D4C_H_ 8 | 9 | #include "world/macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for D4C 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double threshold; 18 | } D4COption; 19 | 20 | //----------------------------------------------------------------------------- 21 | // D4C() calculates the aperiodicity estimated by D4C. 22 | // 23 | // Input: 24 | // x : Input signal 25 | // x_length : Length of x 26 | // fs : Sampling frequency 27 | // temporal_positions : Time axis 28 | // f0 : F0 contour 29 | // f0_length : Length of F0 contour 30 | // fft_size : Number of samples of the aperiodicity in one frame. 31 | // : It is given by the equation fft_size / 2 + 1. 32 | // Output: 33 | // aperiodicity : Aperiodicity estimated by D4C. 34 | //----------------------------------------------------------------------------- 35 | void D4C(const double *x, int x_length, int fs, 36 | const double *temporal_positions, const double *f0, int f0_length, 37 | int fft_size, const D4COption *option, double **aperiodicity); 38 | 39 | //----------------------------------------------------------------------------- 40 | // InitializeD4COption allocates the memory to the struct and sets the 41 | // default parameters. 42 | // 43 | // Output: 44 | // option : Struct for the optional parameter. 45 | //----------------------------------------------------------------------------- 46 | void InitializeD4COption(D4COption *option); 47 | 48 | WORLD_END_C_DECLS 49 | 50 | #endif // WORLD_D4C_H_ 51 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/src/world/dio.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_DIO_H_ 7 | #define WORLD_DIO_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for DIO 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double f0_floor; 18 | double f0_ceil; 19 | double channels_in_octave; 20 | double frame_period; // msec 21 | int speed; // (1, 2, ..., 12) 22 | double allowed_range; // Threshold used for fixing the F0 contour. 23 | } DioOption; 24 | 25 | //----------------------------------------------------------------------------- 26 | // DIO 27 | // 28 | // Input: 29 | // x : Input signal 30 | // x_length : Length of x 31 | // fs : Sampling frequency 32 | // option : Struct to order the parameter for DIO 33 | // 34 | // Output: 35 | // temporal_positions : Temporal positions. 36 | // f0 : F0 contour. 37 | //----------------------------------------------------------------------------- 38 | void Dio(const double *x, int x_length, int fs, const DioOption *option, 39 | double *temporal_positions, double *f0); 40 | 41 | //----------------------------------------------------------------------------- 42 | // InitializeDioOption allocates the memory to the struct and sets the 43 | // default parameters. 44 | // 45 | // Output: 46 | // option : Struct for the optional parameter. 47 | //----------------------------------------------------------------------------- 48 | void InitializeDioOption(DioOption *option); 49 | 50 | //----------------------------------------------------------------------------- 51 | // GetSamplesForDIO() calculates the number of samples required for Dio(). 52 | // 53 | // Input: 54 | // fs : Sampling frequency [Hz] 55 | // x_length : Length of the input signal [Sample]. 56 | // frame_period : Frame shift [msec] 57 | // 58 | // Output: 59 | // The number of samples required to store the results of Dio() 60 | //----------------------------------------------------------------------------- 61 | int GetSamplesForDIO(int fs, int x_length, double frame_period); 62 | 63 | WORLD_END_C_DECLS 64 | 65 | #endif // WORLD_DIO_H_ 66 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/src/world/fft.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | // 6 | // These functions and variables are defined to use FFT as well as FFTW 7 | // Please see fft.cpp to show the detailed information 8 | //----------------------------------------------------------------------------- 9 | #ifndef WORLD_FFT_H_ 10 | #define WORLD_FFT_H_ 11 | 12 | #include "macrodefinitions.h" 13 | 14 | WORLD_BEGIN_C_DECLS 15 | 16 | // Commands for FFT (This is the same as FFTW) 17 | #define FFT_FORWARD 1 18 | #define FFT_BACKWARD 2 19 | #define FFT_ESTIMATE 3 20 | 21 | // Complex number for FFT 22 | typedef double fft_complex[2]; 23 | // Struct used for FFT 24 | typedef struct { 25 | int n; 26 | int sign; 27 | unsigned int flags; 28 | fft_complex *c_in; 29 | double *in; 30 | fft_complex *c_out; 31 | double *out; 32 | double *input; 33 | int *ip; 34 | double *w; 35 | } fft_plan; 36 | 37 | fft_plan fft_plan_dft_1d(int n, fft_complex *in, fft_complex *out, int sign, 38 | unsigned int flags); 39 | fft_plan fft_plan_dft_c2r_1d(int n, fft_complex *in, double *out, 40 | unsigned int flags); 41 | fft_plan fft_plan_dft_r2c_1d(int n, double *in, fft_complex *out, 42 | unsigned int flags); 43 | void fft_execute(fft_plan p); 44 | void fft_destroy_plan(fft_plan p); 45 | 46 | WORLD_END_C_DECLS 47 | 48 | #endif // WORLD_FFT_H_ 49 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/src/world/harvest.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_HARVEST_H_ 7 | #define WORLD_HARVEST_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for Harvest 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double f0_floor; 18 | double f0_ceil; 19 | double frame_period; 20 | } HarvestOption; 21 | 22 | //----------------------------------------------------------------------------- 23 | // Harvest 24 | // 25 | // Input: 26 | // x : Input signal 27 | // x_length : Length of x 28 | // fs : Sampling frequency 29 | // option : Struct to order the parameter for Harvest 30 | // 31 | // Output: 32 | // temporal_positions : Temporal positions. 33 | // f0 : F0 contour. 34 | //----------------------------------------------------------------------------- 35 | void Harvest(const double *x, int x_length, int fs, 36 | const HarvestOption *option, double *temporal_positions, double *f0); 37 | 38 | //----------------------------------------------------------------------------- 39 | // InitializeHarvestOption allocates the memory to the struct and sets the 40 | // default parameters. 41 | // 42 | // Output: 43 | // option : Struct for the optional parameter. 44 | //----------------------------------------------------------------------------- 45 | void InitializeHarvestOption(HarvestOption *option); 46 | 47 | //----------------------------------------------------------------------------- 48 | // GetSamplesForHarvest() calculates the number of samples required for 49 | // Harvest(). 50 | // 51 | // Input: 52 | // fs : Sampling frequency [Hz] 53 | // x_length : Length of the input signal [Sample] 54 | // frame_period : Frame shift [msec] 55 | // 56 | // Output: 57 | // The number of samples required to store the results of Harvest(). 58 | //----------------------------------------------------------------------------- 59 | int GetSamplesForHarvest(int fs, int x_length, double frame_period); 60 | 61 | WORLD_END_C_DECLS 62 | 63 | #endif // WORLD_HARVEST_H_ 64 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/src/world/stonemask.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_STONEMASK_H_ 7 | #define WORLD_STONEMASK_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // StoneMask() refines the estimated F0 by Dio() 15 | // 16 | // Input: 17 | // x : Input signal 18 | // x_length : Length of the input signal 19 | // fs : Sampling frequency 20 | // time_axis : Temporal information 21 | // f0 : f0 contour 22 | // f0_length : Length of f0 23 | // 24 | // Output: 25 | // refined_f0 : Refined F0 26 | //----------------------------------------------------------------------------- 27 | void StoneMask(const double *x, int x_length, int fs, 28 | const double *temporal_positions, const double *f0, int f0_length, 29 | double *refined_f0); 30 | 31 | WORLD_END_C_DECLS 32 | 33 | #endif // WORLD_STONEMASK_H_ 34 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/src/world/synthesis.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_SYNTHESIS_H_ 7 | #define WORLD_SYNTHESIS_H_ 8 | 9 | #include "world/macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Synthesis() synthesize the voice based on f0, spectrogram and 15 | // aperiodicity (not excitation signal). 16 | // 17 | // Input: 18 | // f0 : f0 contour 19 | // f0_length : Length of f0 20 | // spectrogram : Spectrogram estimated by CheapTrick 21 | // fft_size : FFT size 22 | // aperiodicity : Aperiodicity spectrogram based on D4C 23 | // frame_period : Temporal period used for the analysis 24 | // fs : Sampling frequency 25 | // y_length : Length of the output signal (Memory of y has been 26 | // allocated in advance) 27 | // Output: 28 | // y : Calculated speech 29 | //----------------------------------------------------------------------------- 30 | void Synthesis(const double *f0, int f0_length, 31 | const double * const *spectrogram, const double * const *aperiodicity, 32 | int fft_size, double frame_period, int fs, int y_length, double *y); 33 | 34 | WORLD_END_C_DECLS 35 | 36 | #endif // WORLD_SYNTHESIS_H_ 37 | -------------------------------------------------------------------------------- /libsvc/Modules/Lib/World/tools/audioio.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_AUDIOIO_H_ 7 | #define WORLD_AUDIOIO_H_ 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | //----------------------------------------------------------------------------- 14 | // wavwrite() write a .wav file. 15 | // Input: 16 | // x : Input signal 17 | // x_ength : Signal length of x [sample] 18 | // fs : Sampling frequency [Hz] 19 | // nbit : Quantization bit [bit] 20 | // filename : Name of the output signal. 21 | // Caution: 22 | // The variable nbit is not used in this function. 23 | // This function only supports the 16 bit. 24 | //----------------------------------------------------------------------------- 25 | void wavwrite(const double *x, int x_length, int fs, int nbit, 26 | const char *filename); 27 | 28 | //----------------------------------------------------------------------------- 29 | // GetAudioLength() returns the length of .wav file. 30 | // Input: 31 | // filename : Filename of a .wav file. 32 | // Output: 33 | // The number of samples of the file .wav 34 | //----------------------------------------------------------------------------- 35 | int GetAudioLength(const char *filename); 36 | 37 | //----------------------------------------------------------------------------- 38 | // wavread() read a .wav file. 39 | // The memory of output x must be allocated in advance. 40 | // Input: 41 | // filename : Filename of the input file. 42 | // Output: 43 | // fs : Sampling frequency [Hz] 44 | // nbit : Quantization bit [bit] 45 | // x : The output waveform. 46 | //----------------------------------------------------------------------------- 47 | void wavread(const char* filename, int *fs, int *nbit, double *x); 48 | 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | 53 | #endif // WORLD_AUDIOIO_H_ 54 | -------------------------------------------------------------------------------- /libsvc/Modules/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | ```c++ 3 | #include "Modules/Models/header/Vits.hpp" 4 | 5 | int main(){ 6 | rapidjson::Document Config; 7 | Config.Parse("Your Config"); 8 | 9 | //Progress bar 10 | InferClass::BaseModelType::callback a_callback = [](size_t a, size_t b) {std::cout << std::to_string((float)a * 100.f / (float)b) << "%\n"; }; 11 | 12 | //return params for inference 13 | InferClass::BaseModelType::callback_params b_callback = []() 14 | { 15 | auto cbaaa = InferClass::InferConfigs(); 16 | cbaaa.kmeans_rate = 0.5; 17 | cbaaa.keys = 0; 18 | return cbaaa; 19 | }; 20 | 21 | //modify duration per phoneme 22 | InferClass::TTS::DurationCallback c_callback = [](std::vector<float>&) {}; 23 | 24 | std::vector<int16_t> output; 25 | try 26 | { 27 | std::wstring inp("watashinoonaniomitekudasai"); 28 | auto model = dynamic_cast<InferClass::BaseModelType*>(new InferClass::VitsSvc(modConfigJson, a_callback, b_callback)); 29 | 30 | output = model->Inference(inp); 31 | 32 | Wav outWav(model->GetSamplingRate(), output.size() * 2, output.data()); 33 | outWav.Writef(L"test.wav"); 34 | 35 | delete model; 36 | } 37 | catch(std::exception& e) 38 | { 39 | std::cout << e.what(); 40 | } 41 | } 42 | 43 | ``` 44 | -------------------------------------------------------------------------------- /libsvc/Modules/framework.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef LibSvcApi 4 | #ifdef MoeVSDll 5 | #ifdef LibSvcDll 6 | #define LibSvcApi __declspec(dllexport) 7 | #else 8 | #ifndef MoeVS 9 | #define LibSvcApi __declspec(dllimport) 10 | #else 11 | #define LibSvcApi 12 | #endif 13 | #endif 14 | #else 15 | #define LibSvcApi 16 | #endif 17 | #endif -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/AvCodec/AvCodeResample.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include <vector> 3 | #include <string> 4 | #include "../../StringPreprocess.hpp" 5 | #include "matlabfunctions.h" 6 | #include "../inferTools.hpp" 7 | extern "C" { 8 | #include "libavcodec/avcodec.h" 9 | #include "libavformat/avformat.h" 10 | #include "libswscale/swscale.h" 11 | #include "libswresample/swresample.h" 12 | #include "libavutil/samplefmt.h" 13 | } 14 | 15 | class AudioPreprocess 16 | { 17 | public: 18 | struct WAV_HEADER { 19 | char RIFF[4] = { 'R','I','F','F' }; //RIFF标识 20 | unsigned long ChunkSize; //文件大小-8 21 | char WAVE[4] = { 'W','A','V','E' }; //WAVE块 22 | char fmt[4] = { 'f','m','t',' ' }; //fmt块 23 | unsigned long Subchunk1Size; //fmt块大小 24 | unsigned short AudioFormat; //编码格式 25 | unsigned short NumOfChan; //声道数 26 | WAV_HEADER(unsigned long cs = 36, unsigned long sc1s = 16, unsigned short af = 1, unsigned short nc = 1) :ChunkSize(cs), Subchunk1Size(sc1s), AudioFormat(af), NumOfChan(nc) {} 27 | }; 28 | LibSvcApi static WAV_HEADER GetHeader(const std::wstring& path); 29 | LibSvcApi static std::vector<double> arange(double start, double end, double step = 1.0, double div = 1.0); 30 | LibSvcApi std::vector<short> codec(const std::wstring& path, int sr); 31 | LibSvcApi void release(); 32 | LibSvcApi void init(); 33 | LibSvcApi AudioPreprocess(); 34 | ~AudioPreprocess() 35 | { 36 | release(); 37 | } 38 | private: 39 | AVFrame* inFrame; 40 | uint8_t* out_buffer; 41 | SwrContext* swrContext; 42 | AVCodecContext* avCodecContext; 43 | AVFormatContext* avFormatContext; 44 | AVPacket* packet; 45 | }; 46 | -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/Cluster/MoeVSBaseCluster.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: MoeVSBaseCluster.hpp 3 | * Note: MoeVoiceStudioCore 聚类基类 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include <vector> 24 | 25 | #define MoeVoiceStudioClusterHeader namespace MoeVoiceStudioCluster { 26 | #define MoeVoiceStudioClusterEnd } 27 | 28 | MoeVoiceStudioClusterHeader 29 | 30 | class MoeVoiceStudioBaseCluster 31 | { 32 | public: 33 | MoeVoiceStudioBaseCluster() = default; 34 | virtual ~MoeVoiceStudioBaseCluster() = default; 35 | 36 | /** 37 | * \brief 查找聚类最邻近点 38 | * \param point 待查找的点 39 | * \param sid 角色ID 40 | * \param n_points 点数 41 | * \return 查找到的最邻近点 42 | */ 43 | virtual std::vector<float> find(float* point, long sid, int64_t n_points = 1); 44 | }; 45 | 46 | MoeVoiceStudioClusterEnd -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/Cluster/MoeVSIndexCluster.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: MoeVSIndexCluster.hpp 3 | * Note: MoeVoiceStudioCore 官方聚类(Index) 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #ifdef MoeVoiceStudioIndexCluster 24 | #include <string> 25 | #include "MoeVSBaseCluster.hpp" 26 | #include <faiss/IndexIVFFlat.h> 27 | #include <faiss/index_io.h> 28 | 29 | MoeVoiceStudioClusterHeader 30 | class IndexClusterCore 31 | { 32 | public: 33 | IndexClusterCore() = delete; 34 | ~IndexClusterCore(); 35 | IndexClusterCore(const char* _path); 36 | IndexClusterCore(const IndexClusterCore&) = delete; 37 | IndexClusterCore(IndexClusterCore&& move) noexcept; 38 | IndexClusterCore& operator=(const IndexClusterCore&) = delete; 39 | IndexClusterCore& operator=(IndexClusterCore&& move) noexcept; 40 | std::vector<float> find(const float* points, faiss::idx_t n_points, faiss::idx_t n_searched_points = 8); 41 | float* GetVec(faiss::idx_t index); 42 | private: 43 | faiss::Index* IndexPtr = nullptr; 44 | faiss::idx_t Dim = 0; 45 | std::vector<float> IndexsVector; 46 | }; 47 | 48 | class IndexCluster : public MoeVoiceStudioBaseCluster 49 | { 50 | public: 51 | IndexCluster() = delete; 52 | ~IndexCluster() override = default; 53 | IndexCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen); 54 | std::vector<float> find(float* point, long sid, int64_t n_points = 1) override; 55 | private: 56 | std::vector<IndexClusterCore> Indexs; 57 | size_t n_hidden_size = 256; 58 | }; 59 | 60 | MoeVoiceStudioClusterEnd 61 | 62 | #endif -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/Cluster/MoeVSKmeansCluster.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: MoeVSKmeansCluster.hpp 3 | * Note: MoeVoiceStudioCore 官方聚类(Kmeans) 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include "MoeVSBaseCluster.hpp" 24 | #include "../DataStruct/KDTree.hpp" 25 | #include <string> 26 | 27 | MoeVoiceStudioClusterHeader 28 | 29 | class KMeansCluster : public MoeVoiceStudioBaseCluster 30 | { 31 | public: 32 | KMeansCluster() = delete; 33 | ~KMeansCluster() override = default; 34 | KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen); 35 | std::vector<float> find(float* point, long sid, int64_t n_points = 1) override; 36 | private: 37 | std::vector<KDTree> _tree; 38 | size_t dims = 0; 39 | }; 40 | 41 | MoeVoiceStudioClusterEnd -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/DataStruct/README.md: -------------------------------------------------------------------------------- 1 | ## KdTree From J. Frederico Carvalho 2 | -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/F0Extractor/DioF0Extractor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: DioF0Extractor.hpp 3 | * Note: MoeVoiceStudioCore 官方F0提取算法 Dio 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include "BaseF0Extractor.hpp" 24 | 25 | MoeVoiceStudioF0ExtractorHeader 26 | class DioF0Extractor : public BaseF0Extractor 27 | { 28 | public: 29 | DioF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0); 30 | 31 | ~DioF0Extractor() override = default; 32 | 33 | void compute_f0(const double* PCMData, size_t PCMLen); 34 | 35 | void InterPf0(size_t TargetLength); 36 | 37 | std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override; 38 | private: 39 | std::vector<double> refined_f0; 40 | }; 41 | MoeVoiceStudioF0ExtractorEnd -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/F0Extractor/HarvestF0Extractor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: HarvestF0Extractor.hpp 3 | * Note: MoeVoiceStudioCore 官方F0提取算法 Harvest 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include "BaseF0Extractor.hpp" 24 | 25 | MoeVoiceStudioF0ExtractorHeader 26 | class HarvestF0Extractor : public BaseF0Extractor 27 | { 28 | public: 29 | HarvestF0Extractor(int sampling_rate, int hop_size, int n_f0_bins = 256, double max_f0 = 1100.0, double min_f0 = 50.0); 30 | 31 | ~HarvestF0Extractor() override = default; 32 | 33 | void compute_f0(const double* PCMData, size_t PCMLen); 34 | 35 | void InterPf0(size_t TargetLength); 36 | 37 | std::vector<float> ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) override; 38 | 39 | private: 40 | std::vector<double> refined_f0; 41 | }; 42 | MoeVoiceStudioF0ExtractorEnd -------------------------------------------------------------------------------- /libsvc/Modules/header/InferTools/Stft/stft.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include <vector> 3 | #include "fftw3.h" 4 | 5 | namespace DlCodecStft 6 | { 7 | class STFT 8 | { 9 | public: 10 | STFT() = default; 11 | ~STFT(); 12 | STFT(int WindowSize, int HopSize, int FFTSize = 0); 13 | inline static double PI = 3.14159265358979323846; 14 | std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const; 15 | private: 16 | int WINDOW_SIZE = 2048; 17 | int HOP_SIZE = WINDOW_SIZE / 4; 18 | int FFT_SIZE = WINDOW_SIZE / 2 + 1; 19 | }; 20 | 21 | class Mel 22 | { 23 | public: 24 | Mel() = delete; 25 | ~Mel() = default; 26 | Mel(int WindowSize, int HopSize, int SamplingRate, int MelSize = 0); 27 | std::pair<std::vector<float>, int64_t> GetMel(const std::vector<double>& audioData) const; 28 | std::pair<std::vector<float>, int64_t> operator()(const std::vector<double>& audioData) const; 29 | private: 30 | STFT stft; 31 | int MEL_SIZE = 128; 32 | int FFT_SIZE = 0; 33 | int sr = 22050; 34 | std::vector<float> MelBasis; 35 | }; 36 | } 37 | -------------------------------------------------------------------------------- /libsvc/Modules/header/Logger/MoeSSLogger.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include <string> 3 | #include "../StringPreprocess.hpp" 4 | #include "../../framework.h" 5 | #include <filesystem> 6 | #include <mutex> 7 | #define __MOEVS_DEBUG_MESSAGE(msg) __MOEVS_DEBUG_INFO(__FILE__, __LINE__, msg) 8 | #define logger MoeSSLogger::GetLogger() 9 | 10 | namespace MoeSSLogger 11 | { 12 | class Logger 13 | { 14 | public: 15 | using logger_fn = void(*)(const wchar_t*, const char*); 16 | Logger(); 17 | ~Logger(); 18 | Logger(logger_fn error_fn, logger_fn log_fn); 19 | void log(const std::wstring&); 20 | void log(const char*); 21 | void error(const std::wstring&); 22 | void error(const char*); 23 | void enable(bool _filelogger) 24 | { 25 | filelogger = _filelogger; 26 | } 27 | private: 28 | bool custom_logger_fn = false; 29 | std::filesystem::path cur_log_dir, logpath, errorpath; 30 | logger_fn cerror_fn = nullptr, cloggerfn = nullptr; 31 | FILE* log_file = nullptr, * error_file = nullptr; 32 | bool filelogger = true; 33 | std::mutex mx; 34 | }; 35 | 36 | LibSvcApi Logger& GetLogger(); 37 | } -------------------------------------------------------------------------------- /libsvc/Modules/header/StringPreprocess.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include <vector> 4 | #include <string> 5 | #include "../framework.h" 6 | 7 | LibSvcApi std::string to_byte_string(const std::wstring& input); 8 | 9 | LibSvcApi std::string to_ansi_string(const std::wstring& input); 10 | 11 | LibSvcApi std::wstring to_wide_string(const std::string& input); 12 | 13 | LibSvcApi std::wstring string_vector_to_string(const std::vector<std::string>& vector); 14 | 15 | LibSvcApi std::wstring wstring_vector_to_string(const std::vector<std::wstring>& vector); 16 | 17 | template <typename T> 18 | std::wstring vector_to_string(const std::vector<T>& vector) 19 | { 20 | std::wstring vecstr = L"["; 21 | for (const auto& it : vector) 22 | { 23 | std::wstring TmpStr = std::to_wstring(it); 24 | if ((std::is_same_v<T, float> || std::is_same_v<T, double>) && TmpStr.find(L'.') != std::string::npos) 25 | { 26 | while (TmpStr.back() == L'0') 27 | TmpStr.pop_back(); 28 | if (TmpStr.back() == L'.') 29 | TmpStr += L"0"; 30 | } 31 | vecstr += TmpStr + L", "; 32 | } 33 | if (vecstr.length() > 2) 34 | vecstr = vecstr.substr(0, vecstr.length() - 2); 35 | vecstr += L']'; 36 | return vecstr; 37 | } -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/Cluster/MoeVSBaseCluster.cpp: -------------------------------------------------------------------------------- 1 | #include "../../../header/InferTools/Cluster/MoeVSBaseCluster.hpp" 2 | #include "../../../header/InferTools/inferTools.hpp" 3 | 4 | std::vector<float> MoeVoiceStudioCluster::MoeVoiceStudioBaseCluster::find(float* point, long sid, int64_t n_points) 5 | { 6 | LibDLVoiceCodecThrow("NotImplementedError"); 7 | } -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/Cluster/MoeVSClusterManager.cpp: -------------------------------------------------------------------------------- 1 | #include "../../../header/InferTools/Cluster/MoeVSClusterManager.hpp" 2 | #include <map> 3 | #include <stdexcept> 4 | #include "../../../header/Logger/MoeSSLogger.hpp" 5 | 6 | MoeVoiceStudioClusterHeader 7 | 8 | std::map<std::wstring, GetMoeVSClusterFn> RegisteredMoeVSCluster; 9 | 10 | MoeVSCluster GetMoeVSCluster(const std::wstring& _name, const std::wstring& _path, size_t hidden_size, size_t KmeansLen) 11 | { 12 | const auto f_ClusterFn = RegisteredMoeVSCluster.find(_name); 13 | if (f_ClusterFn != RegisteredMoeVSCluster.end()) 14 | return f_ClusterFn->second(_path, hidden_size, KmeansLen); 15 | throw std::runtime_error("Unable To Find An Available MoeVSCluster"); 16 | } 17 | 18 | void RegisterMoeVSCluster(const std::wstring& _name, const GetMoeVSClusterFn& _constructor_fn) 19 | { 20 | if (RegisteredMoeVSCluster.find(_name) != RegisteredMoeVSCluster.end()) 21 | { 22 | logger.log(L"[Warn] MoeVSClusterNameConflict"); 23 | return; 24 | } 25 | RegisteredMoeVSCluster[_name] = _constructor_fn; 26 | } 27 | 28 | MoeVoiceStudioClusterEnd -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/Cluster/MoeVSKmeansCluster.cpp: -------------------------------------------------------------------------------- 1 | #include "../../../header/InferTools/Cluster/MoeVSKmeansCluster.hpp" 2 | #include "../../../header/InferTools/inferTools.hpp" 3 | 4 | std::vector<float> MoeVoiceStudioCluster::KMeansCluster::find(float* point, long sid, int64_t n_points) 5 | { 6 | if (size_t(sid) < _tree.size()) 7 | { 8 | std::vector<float> res; 9 | res.reserve(dims * n_points * 2); 10 | for (int64_t pt = 0; pt < n_points; ++pt) 11 | { 12 | auto tmp = _tree[sid].nearest_point({ point + pt * dims,point + (pt + 1) * dims }); 13 | res.insert(res.end(), tmp.begin(), tmp.end()); 14 | } 15 | return res; 16 | } 17 | return { point, point + dims * n_points }; 18 | } 19 | 20 | MoeVoiceStudioCluster::KMeansCluster::KMeansCluster(const std::wstring& _path, size_t hidden_size, size_t KmeansLen) 21 | { 22 | dims = hidden_size; 23 | FILE* file = nullptr; 24 | _wfopen_s(&file, (_path + L"/KMeans.npy").c_str(), L"rb"); 25 | if (!file) 26 | LibDLVoiceCodecThrow("KMeansFileNotExist"); 27 | constexpr long idx = 128; 28 | fseek(file, idx, SEEK_SET); 29 | std::vector<float> tmpData(hidden_size); 30 | const size_t ec = size_t(hidden_size) * sizeof(float); 31 | std::vector<std::vector<float>> _tmp; 32 | _tmp.reserve(KmeansLen); 33 | while (fread(tmpData.data(), 1, ec, file) == ec) 34 | { 35 | _tmp.emplace_back(tmpData); 36 | if (_tmp.size() == KmeansLen) 37 | { 38 | _tree.emplace_back(_tmp); 39 | _tmp.clear(); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/DataStruct/README.md: -------------------------------------------------------------------------------- 1 | ## KdTree From J. Frederico Carvalho 2 | -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/F0Extractor/BaseF0Extractor.cpp: -------------------------------------------------------------------------------- 1 | #include "../../../header/InferTools/F0Extractor/BaseF0Extractor.hpp" 2 | #include <map> 3 | #include "../../../header/Logger/MoeSSLogger.hpp" 4 | #include "../../../header/InferTools/inferTools.hpp" 5 | 6 | MoeVSF0Extractor::BaseF0Extractor::BaseF0Extractor(int sampling_rate, int hop_size, int n_f0_bins, double max_f0, double min_f0) : 7 | fs(sampling_rate), 8 | hop(hop_size), 9 | f0_bin(n_f0_bins), 10 | f0_max(max_f0), 11 | f0_min(min_f0) 12 | { 13 | f0_mel_min = (1127.0 * log(1.0 + f0_min / 700.0)); 14 | f0_mel_max = (1127.0 * log(1.0 + f0_max / 700.0)); 15 | } 16 | 17 | std::vector<double> MoeVSF0Extractor::BaseF0Extractor::arange(double start, double end, double step, double div) 18 | { 19 | std::vector<double> output; 20 | while (start < end) 21 | { 22 | output.push_back(start / div); 23 | start += step; 24 | } 25 | return output; 26 | } 27 | 28 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<double>& PCMData, size_t TargetLength) 29 | { 30 | LibDLVoiceCodecThrow("NotImplementedError"); 31 | } 32 | 33 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<float>& PCMData, size_t TargetLength) 34 | { 35 | std::vector<double> PCMVector(PCMData.size()); 36 | for (size_t i = 0; i < PCMData.size(); ++i) 37 | PCMVector[i] = double(PCMData[i]); 38 | return ExtractF0(PCMVector, TargetLength); 39 | } 40 | 41 | std::vector<float> MoeVSF0Extractor::BaseF0Extractor::ExtractF0(const std::vector<int16_t>& PCMData, size_t TargetLength) 42 | { 43 | std::vector<double> PCMVector(PCMData.size()); 44 | for (size_t i = 0; i < PCMData.size(); ++i) 45 | PCMVector[i] = double(PCMData[i]); 46 | return ExtractF0(PCMVector, TargetLength); 47 | } -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/F0Extractor/F0ExtractorManager.cpp: -------------------------------------------------------------------------------- 1 | #include "../../../header/InferTools/F0Extractor/F0ExtractorManager.hpp" 2 | #include <map> 3 | #include <stdexcept> 4 | #include "../../../header/Logger/MoeSSLogger.hpp" 5 | 6 | MoeVoiceStudioF0ExtractorHeader 7 | std::map<std::wstring, GetF0ExtractorFn> RegisteredF0Extractors; 8 | 9 | F0Extractor GetF0Extractor(const std::wstring& _name, 10 | const uint32_t fs, 11 | const uint32_t hop, 12 | const uint32_t f0_bin, 13 | const double f0_max, 14 | const double f0_min) 15 | { 16 | const auto f_F0Extractor = RegisteredF0Extractors.find(_name); 17 | if (f_F0Extractor != RegisteredF0Extractors.end()) 18 | return f_F0Extractor->second(fs, hop, f0_bin, f0_max, f0_min); 19 | throw std::runtime_error("Unable To Find An Available F0Extractor"); 20 | } 21 | 22 | void RegisterF0Extractor(const std::wstring& _name, const GetF0ExtractorFn& _constructor_fn) 23 | { 24 | if (RegisteredF0Extractors.find(_name) != RegisteredF0Extractors.end()) 25 | { 26 | logger.log(L"[Warn] F0ExtractorNameConflict"); 27 | return; 28 | } 29 | RegisteredF0Extractors[_name] = _constructor_fn; 30 | } 31 | 32 | std::vector<std::wstring> GetF0ExtractorList() 33 | { 34 | std::vector<std::wstring> F0ExtractorsVec; 35 | F0ExtractorsVec.reserve(RegisteredF0Extractors.size()); 36 | for (const auto& i : RegisteredF0Extractors) 37 | F0ExtractorsVec.emplace_back(i.first); 38 | return F0ExtractorsVec; 39 | } 40 | 41 | MoeVoiceStudioF0ExtractorEnd -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/Sampler/MoeVSBaseSampler.cpp: -------------------------------------------------------------------------------- 1 | #include "../../../header/InferTools/Sampler/MoeVSBaseSampler.hpp" 2 | #include "../../../header/InferTools/inferTools.hpp" 3 | MoeVoiceStudioSamplerHeader 4 | 5 | MoeVSBaseSampler::MoeVSBaseSampler(Ort::Session* alpha, Ort::Session* dfn, Ort::Session* pred, int64_t Mel_Bins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory) : 6 | MelBins(Mel_Bins), Alpha(alpha), DenoiseFn(dfn), NoisePredictor(pred) 7 | { 8 | _callback = _ProgressCallback; 9 | Memory = memory; 10 | }; 11 | 12 | std::vector<Ort::Value> MoeVSBaseSampler::Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, int64_t SpeedUp, float NoiseScale, int64_t Seed, size_t& Process) 13 | { 14 | LibDLVoiceCodecThrow("NotImplementedError"); 15 | } 16 | 17 | MoeVSReflowBaseSampler::MoeVSReflowBaseSampler(Ort::Session* Velocity, int64_t MelBins, const ProgressCallback& _ProgressCallback, Ort::MemoryInfo* memory) : 18 | MelBins_(MelBins), Velocity_(Velocity) 19 | { 20 | Callback_ = _ProgressCallback; 21 | Memory_ = memory; 22 | } 23 | 24 | std::vector<Ort::Value> MoeVSReflowBaseSampler::Sample(std::vector<Ort::Value>& Tensors, int64_t Steps, float dt, float Scale, size_t& Process) 25 | { 26 | LibDLVoiceCodecThrow("NotImplementedError"); 27 | } 28 | 29 | MoeVoiceStudioSamplerEnd -------------------------------------------------------------------------------- /libsvc/Modules/src/InferTools/TensorExtractor/TensorExtractorManager.cpp: -------------------------------------------------------------------------------- 1 | #include "../../../header/InferTools/TensorExtractor/TensorExtractorManager.hpp" 2 | #include <map> 3 | #include "../../../header/Logger/MoeSSLogger.hpp" 4 | 5 | MoeVoiceStudioTensorExtractorHeader 6 | inline std::map<std::wstring, GetTensorExtractorFn> RegisteredTensorExtractors; 7 | 8 | void RegisterTensorExtractor(const std::wstring& _name, const GetTensorExtractorFn& _constructor_fn) 9 | { 10 | if (RegisteredTensorExtractors.find(_name) != RegisteredTensorExtractors.end()) 11 | { 12 | logger.log(L"[Warn] TensorExtractorNameConflict"); 13 | return; 14 | } 15 | RegisteredTensorExtractors[_name] = _constructor_fn; 16 | } 17 | 18 | TensorExtractor GetTensorExtractor(const std::wstring& _name, uint64_t _srcsr, uint64_t _sr, uint64_t _hop, bool _smix, bool _volume, uint64_t _hidden_size, uint64_t _nspeaker, const MoeVoiceStudioTensorExtractor::Others& _other) 19 | { 20 | const auto f_TensorExtractor = RegisteredTensorExtractors.find(_name); 21 | if (f_TensorExtractor != RegisteredTensorExtractors.end()) 22 | return f_TensorExtractor->second(_srcsr, _sr, _hop, _smix, _volume, _hidden_size, _nspeaker, _other); 23 | throw std::runtime_error("Unable To Find An Available TensorExtractor"); 24 | } 25 | 26 | MoeVoiceStudioTensorExtractorEnd -------------------------------------------------------------------------------- /libsvc/Modules/src/StringPreprocess.cpp: -------------------------------------------------------------------------------- 1 | #include "../header/StringPreprocess.hpp" 2 | #ifdef _WIN32 3 | #include <Windows.h> 4 | #else 5 | #error 6 | #endif 7 | 8 | std::string to_byte_string(const std::wstring& input) 9 | { 10 | std::vector<char> ByteString(input.length() * 6); 11 | WideCharToMultiByte( 12 | CP_UTF8, 13 | 0, 14 | input.c_str(), 15 | int(input.length()), 16 | ByteString.data(), 17 | int(ByteString.size()), 18 | nullptr, 19 | nullptr 20 | ); 21 | return ByteString.data(); 22 | } 23 | 24 | std::string to_ansi_string(const std::wstring& input) 25 | { 26 | std::vector<char> ByteString(input.length() * 6); 27 | WideCharToMultiByte( 28 | CP_ACP, 29 | 0, 30 | input.c_str(), 31 | int(input.length()), 32 | ByteString.data(), 33 | int(ByteString.size()), 34 | nullptr, 35 | nullptr 36 | ); 37 | return ByteString.data(); 38 | } 39 | 40 | std::wstring to_wide_string(const std::string& input) 41 | { 42 | std::vector<wchar_t> WideString(input.length() * 2); 43 | MultiByteToWideChar( 44 | CP_UTF8, 45 | 0, 46 | input.c_str(), 47 | int(input.length()), 48 | WideString.data(), 49 | int(WideString.size()) 50 | ); 51 | return WideString.data(); 52 | } 53 | 54 | std::wstring string_vector_to_string(const std::vector<std::string>& vector) 55 | { 56 | std::wstring vecstr = L"["; 57 | for (const auto& it : vector) 58 | if (!it.empty()) 59 | vecstr += L'\"' + to_wide_string(it) + L"\", "; 60 | if (vecstr.length() > 2) 61 | vecstr = vecstr.substr(0, vecstr.length() - 2); 62 | vecstr += L']'; 63 | return vecstr; 64 | } 65 | 66 | std::wstring wstring_vector_to_string(const std::vector<std::wstring>& vector) 67 | { 68 | std::wstring vecstr = L"["; 69 | for (const auto& it : vector) 70 | if (!it.empty()) 71 | vecstr += L'\"' + it + L"\", "; 72 | if (vecstr.length() > 2) 73 | vecstr = vecstr.substr(0, vecstr.length() - 2); 74 | vecstr += L']'; 75 | return vecstr; 76 | } -------------------------------------------------------------------------------- /libsvc/README.md: -------------------------------------------------------------------------------- 1 | # 使用方法 2 | ### 构建 3 | - 1、配置以下依赖: 4 | - [ffmpeg](https://ffmpeg.org/) 5 | - [onnxruntime](https://github.com/microsoft/onnxruntime) 6 | - [fftw](http://fftw.org/) 7 | - [openblas](https://github.com/OpenMathLib/OpenBLAS) 8 | - [faiss](https://github.com/facebookresearch/faiss) 9 | - [liblapack](https://netlib.org/lapack/) 10 | - 2、编译 11 | --- 12 | ### 使用动态库 13 | - 1、链接libsvc 14 | - 2、#include "libsvc/Api/header/libsvc.h" 15 | - 3、调用libsvc::Init() 16 | - 4、调用libsvc名称空间中的函数 -------------------------------------------------------------------------------- /libsvc/dllmain.cpp: -------------------------------------------------------------------------------- 1 | // dllmain.cpp : 定义 DLL 应用程序的入口点。 2 | #include "Windows.h" 3 | 4 | BOOL APIENTRY DllMain( HMODULE hModule, 5 | DWORD ul_reason_for_call, 6 | LPVOID lpReserved 7 | ) 8 | { 9 | switch (ul_reason_for_call) 10 | { 11 | case DLL_PROCESS_ATTACH: 12 | case DLL_THREAD_ATTACH: 13 | case DLL_THREAD_DETACH: 14 | case DLL_PROCESS_DETACH: 15 | break; 16 | } 17 | return TRUE; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /libsvc/libsvc.vcxproj.user: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> 3 | <PropertyGroup> 4 | <ShowAllFiles>false</ShowAllFiles> 5 | </PropertyGroup> 6 | <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> 7 | <LocalDebuggerEnvironment>$(SolutionDir)Lib\Dll</LocalDebuggerEnvironment> 8 | <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor> 9 | </PropertyGroup> 10 | </Project> -------------------------------------------------------------------------------- /libsvc/packages.config: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <packages> 3 | <package id="VC-LTL" version="5.0.9" targetFramework="native" /> 4 | <package id="YY.NuGet.Import.Helper" version="1.0.0.4" targetFramework="native" /> 5 | </packages> -------------------------------------------------------------------------------- /libtts/Api/NativeApi.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/libtts/Api/NativeApi.cpp -------------------------------------------------------------------------------- /libtts/Modules/Lib/MJson/MJson.cpp: -------------------------------------------------------------------------------- 1 | #include "MJson.h" 2 | 3 | class FileGuard 4 | { 5 | public: 6 | FileGuard() = delete; 7 | ~FileGuard() 8 | { 9 | if (_fp) fclose(_fp); 10 | _fp = nullptr; 11 | } 12 | FileGuard(const char* _path) 13 | { 14 | if (_fp) fclose(_fp); 15 | _wfopen_s(&_fp, to_wide_string(_path).c_str(), L"rb"); 16 | } 17 | FileGuard(const std::wstring& _path) 18 | { 19 | if (_fp) fclose(_fp); 20 | _wfopen_s(&_fp, _path.c_str(), L"rb"); 21 | } 22 | operator FILE* () const 23 | { 24 | return _fp; 25 | } 26 | private: 27 | FILE* _fp = nullptr; 28 | static std::wstring to_wide_string(const std::string& input) 29 | { 30 | std::vector<wchar_t> WideString(input.length() * 2); 31 | MultiByteToWideChar( 32 | CP_UTF8, 33 | 0, 34 | input.c_str(), 35 | int(input.length()), 36 | WideString.data(), 37 | int(WideString.size()) 38 | ); 39 | return WideString.data(); 40 | } 41 | }; 42 | 43 | MJson::MJson(const char* _path) 44 | { 45 | const auto file = FileGuard(_path); 46 | _document = yyjson_read_file(_path, YYJSON_READ_NOFLAG, nullptr, nullptr); 47 | if (!_document) 48 | throw std::exception("Json Parse Error !"); 49 | root = yyjson_doc_get_root(_document); 50 | } 51 | 52 | MJson::MJson(const std::wstring& _path) 53 | { 54 | const FileGuard fp(_path); 55 | _document = yyjson_read_fp(fp, YYJSON_READ_NOFLAG, nullptr, nullptr); 56 | if (!_document) 57 | throw std::exception("File Not Exists!"); 58 | root = yyjson_doc_get_root(_document); 59 | } 60 | 61 | MJson::MJson(const std::string& _data, bool _read_from_string) 62 | { 63 | if (_read_from_string) 64 | _document = yyjson_read(_data.c_str(), _data.length(), YYJSON_READ_NOFLAG); 65 | else 66 | { 67 | const auto file = FileGuard(_data.c_str()); 68 | _document = yyjson_read_fp(file, YYJSON_READ_NOFLAG, nullptr, nullptr); 69 | } 70 | if (!_document) 71 | throw std::exception("Json Parse Error !"); 72 | root = yyjson_doc_get_root(_document); 73 | } 74 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/src/world/constantnumbers.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | // 6 | // This header file only defines constant numbers used for several function. 7 | //----------------------------------------------------------------------------- 8 | #ifndef WORLD_CONSTANT_NUMBERS_H_ 9 | #define WORLD_CONSTANT_NUMBERS_H_ 10 | 11 | namespace world { 12 | // for Dio() 13 | const double kCutOff = 50.0; 14 | 15 | // for StoneMask() 16 | const double kFloorF0StoneMask = 40.0; 17 | 18 | const double kPi = 3.1415926535897932384; 19 | const double kMySafeGuardMinimum = 0.000000000001; 20 | const double kEps = 0.00000000000000022204460492503131; 21 | const double kFloorF0 = 71.0; 22 | const double kCeilF0 = 800.0; 23 | const double kDefaultF0 = 500.0; 24 | const double kLog2 = 0.69314718055994529; 25 | // Maximum standard deviation not to be selected as a best f0. 26 | const double kMaximumValue = 100000.0; 27 | 28 | // Note to me (fs: 48000) 29 | // 71 Hz is the limit to maintain the FFT size at 2048. 30 | // If we use 70 Hz as FLOOR_F0, the FFT size of 4096 is required. 31 | 32 | // for D4C() 33 | const int kHanning = 1; 34 | const int kBlackman = 2; 35 | const double kFrequencyInterval = 3000.0; 36 | const double kUpperLimit = 15000.0; 37 | const double kThreshold = 0.85; 38 | const double kFloorF0D4C = 47.0; 39 | 40 | // for Codec (Mel scale) 41 | // S. Stevens & J. Volkmann, 42 | // The Relation of Pitch to Frequency: A Revised Scale, 43 | // American Journal of Psychology, vol. 53, no. 3, pp. 329-353, 1940. 44 | const double kM0 = 1127.01048; 45 | const double kF0 = 700.0; 46 | const double kFloorFrequency = 40.0; 47 | const double kCeilFrequency = 20000.0; 48 | 49 | } // namespace world 50 | 51 | #endif // WORLD_CONSTANT_NUMBERS_H_ 52 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/src/world/d4c.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_D4C_H_ 7 | #define WORLD_D4C_H_ 8 | 9 | #include "world/macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for D4C 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double threshold; 18 | } D4COption; 19 | 20 | //----------------------------------------------------------------------------- 21 | // D4C() calculates the aperiodicity estimated by D4C. 22 | // 23 | // Input: 24 | // x : Input signal 25 | // x_length : Length of x 26 | // fs : Sampling frequency 27 | // temporal_positions : Time axis 28 | // f0 : F0 contour 29 | // f0_length : Length of F0 contour 30 | // fft_size : Number of samples of the aperiodicity in one frame. 31 | // : It is given by the equation fft_size / 2 + 1. 32 | // Output: 33 | // aperiodicity : Aperiodicity estimated by D4C. 34 | //----------------------------------------------------------------------------- 35 | void D4C(const double *x, int x_length, int fs, 36 | const double *temporal_positions, const double *f0, int f0_length, 37 | int fft_size, const D4COption *option, double **aperiodicity); 38 | 39 | //----------------------------------------------------------------------------- 40 | // InitializeD4COption allocates the memory to the struct and sets the 41 | // default parameters. 42 | // 43 | // Output: 44 | // option : Struct for the optional parameter. 45 | //----------------------------------------------------------------------------- 46 | void InitializeD4COption(D4COption *option); 47 | 48 | WORLD_END_C_DECLS 49 | 50 | #endif // WORLD_D4C_H_ 51 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/src/world/dio.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_DIO_H_ 7 | #define WORLD_DIO_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for DIO 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double f0_floor; 18 | double f0_ceil; 19 | double channels_in_octave; 20 | double frame_period; // msec 21 | int speed; // (1, 2, ..., 12) 22 | double allowed_range; // Threshold used for fixing the F0 contour. 23 | } DioOption; 24 | 25 | //----------------------------------------------------------------------------- 26 | // DIO 27 | // 28 | // Input: 29 | // x : Input signal 30 | // x_length : Length of x 31 | // fs : Sampling frequency 32 | // option : Struct to order the parameter for DIO 33 | // 34 | // Output: 35 | // temporal_positions : Temporal positions. 36 | // f0 : F0 contour. 37 | //----------------------------------------------------------------------------- 38 | void Dio(const double *x, int x_length, int fs, const DioOption *option, 39 | double *temporal_positions, double *f0); 40 | 41 | //----------------------------------------------------------------------------- 42 | // InitializeDioOption allocates the memory to the struct and sets the 43 | // default parameters. 44 | // 45 | // Output: 46 | // option : Struct for the optional parameter. 47 | //----------------------------------------------------------------------------- 48 | void InitializeDioOption(DioOption *option); 49 | 50 | //----------------------------------------------------------------------------- 51 | // GetSamplesForDIO() calculates the number of samples required for Dio(). 52 | // 53 | // Input: 54 | // fs : Sampling frequency [Hz] 55 | // x_length : Length of the input signal [Sample]. 56 | // frame_period : Frame shift [msec] 57 | // 58 | // Output: 59 | // The number of samples required to store the results of Dio() 60 | //----------------------------------------------------------------------------- 61 | int GetSamplesForDIO(int fs, int x_length, double frame_period); 62 | 63 | WORLD_END_C_DECLS 64 | 65 | #endif // WORLD_DIO_H_ 66 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/src/world/fft.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | // 6 | // These functions and variables are defined to use FFT as well as FFTW 7 | // Please see fft.cpp to show the detailed information 8 | //----------------------------------------------------------------------------- 9 | #ifndef WORLD_FFT_H_ 10 | #define WORLD_FFT_H_ 11 | 12 | #include "macrodefinitions.h" 13 | 14 | WORLD_BEGIN_C_DECLS 15 | 16 | // Commands for FFT (This is the same as FFTW) 17 | #define FFT_FORWARD 1 18 | #define FFT_BACKWARD 2 19 | #define FFT_ESTIMATE 3 20 | 21 | // Complex number for FFT 22 | typedef double fft_complex[2]; 23 | // Struct used for FFT 24 | typedef struct { 25 | int n; 26 | int sign; 27 | unsigned int flags; 28 | fft_complex *c_in; 29 | double *in; 30 | fft_complex *c_out; 31 | double *out; 32 | double *input; 33 | int *ip; 34 | double *w; 35 | } fft_plan; 36 | 37 | fft_plan fft_plan_dft_1d(int n, fft_complex *in, fft_complex *out, int sign, 38 | unsigned int flags); 39 | fft_plan fft_plan_dft_c2r_1d(int n, fft_complex *in, double *out, 40 | unsigned int flags); 41 | fft_plan fft_plan_dft_r2c_1d(int n, double *in, fft_complex *out, 42 | unsigned int flags); 43 | void fft_execute(fft_plan p); 44 | void fft_destroy_plan(fft_plan p); 45 | 46 | WORLD_END_C_DECLS 47 | 48 | #endif // WORLD_FFT_H_ 49 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/src/world/harvest.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_HARVEST_H_ 7 | #define WORLD_HARVEST_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Struct for Harvest 15 | //----------------------------------------------------------------------------- 16 | typedef struct { 17 | double f0_floor; 18 | double f0_ceil; 19 | double frame_period; 20 | } HarvestOption; 21 | 22 | //----------------------------------------------------------------------------- 23 | // Harvest 24 | // 25 | // Input: 26 | // x : Input signal 27 | // x_length : Length of x 28 | // fs : Sampling frequency 29 | // option : Struct to order the parameter for Harvest 30 | // 31 | // Output: 32 | // temporal_positions : Temporal positions. 33 | // f0 : F0 contour. 34 | //----------------------------------------------------------------------------- 35 | void Harvest(const double *x, int x_length, int fs, 36 | const HarvestOption *option, double *temporal_positions, double *f0); 37 | 38 | //----------------------------------------------------------------------------- 39 | // InitializeHarvestOption allocates the memory to the struct and sets the 40 | // default parameters. 41 | // 42 | // Output: 43 | // option : Struct for the optional parameter. 44 | //----------------------------------------------------------------------------- 45 | void InitializeHarvestOption(HarvestOption *option); 46 | 47 | //----------------------------------------------------------------------------- 48 | // GetSamplesForHarvest() calculates the number of samples required for 49 | // Harvest(). 50 | // 51 | // Input: 52 | // fs : Sampling frequency [Hz] 53 | // x_length : Length of the input signal [Sample] 54 | // frame_period : Frame shift [msec] 55 | // 56 | // Output: 57 | // The number of samples required to store the results of Harvest(). 58 | //----------------------------------------------------------------------------- 59 | int GetSamplesForHarvest(int fs, int x_length, double frame_period); 60 | 61 | WORLD_END_C_DECLS 62 | 63 | #endif // WORLD_HARVEST_H_ 64 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/src/world/stonemask.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_STONEMASK_H_ 7 | #define WORLD_STONEMASK_H_ 8 | 9 | #include "macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // StoneMask() refines the estimated F0 by Dio() 15 | // 16 | // Input: 17 | // x : Input signal 18 | // x_length : Length of the input signal 19 | // fs : Sampling frequency 20 | // time_axis : Temporal information 21 | // f0 : f0 contour 22 | // f0_length : Length of f0 23 | // 24 | // Output: 25 | // refined_f0 : Refined F0 26 | //----------------------------------------------------------------------------- 27 | void StoneMask(const double *x, int x_length, int fs, 28 | const double *temporal_positions, const double *f0, int f0_length, 29 | double *refined_f0); 30 | 31 | WORLD_END_C_DECLS 32 | 33 | #endif // WORLD_STONEMASK_H_ 34 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/src/world/synthesis.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_SYNTHESIS_H_ 7 | #define WORLD_SYNTHESIS_H_ 8 | 9 | #include "world/macrodefinitions.h" 10 | 11 | WORLD_BEGIN_C_DECLS 12 | 13 | //----------------------------------------------------------------------------- 14 | // Synthesis() synthesize the voice based on f0, spectrogram and 15 | // aperiodicity (not excitation signal). 16 | // 17 | // Input: 18 | // f0 : f0 contour 19 | // f0_length : Length of f0 20 | // spectrogram : Spectrogram estimated by CheapTrick 21 | // fft_size : FFT size 22 | // aperiodicity : Aperiodicity spectrogram based on D4C 23 | // frame_period : Temporal period used for the analysis 24 | // fs : Sampling frequency 25 | // y_length : Length of the output signal (Memory of y has been 26 | // allocated in advance) 27 | // Output: 28 | // y : Calculated speech 29 | //----------------------------------------------------------------------------- 30 | void Synthesis(const double *f0, int f0_length, 31 | const double * const *spectrogram, const double * const *aperiodicity, 32 | int fft_size, double frame_period, int fs, int y_length, double *y); 33 | 34 | WORLD_END_C_DECLS 35 | 36 | #endif // WORLD_SYNTHESIS_H_ 37 | -------------------------------------------------------------------------------- /libtts/Modules/Lib/World/tools/audioio.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // Copyright 2012 Masanori Morise 3 | // Author: mmorise [at] meiji.ac.jp (Masanori Morise) 4 | // Last update: 2021/02/15 5 | //----------------------------------------------------------------------------- 6 | #ifndef WORLD_AUDIOIO_H_ 7 | #define WORLD_AUDIOIO_H_ 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | //----------------------------------------------------------------------------- 14 | // wavwrite() write a .wav file. 15 | // Input: 16 | // x : Input signal 17 | // x_ength : Signal length of x [sample] 18 | // fs : Sampling frequency [Hz] 19 | // nbit : Quantization bit [bit] 20 | // filename : Name of the output signal. 21 | // Caution: 22 | // The variable nbit is not used in this function. 23 | // This function only supports the 16 bit. 24 | //----------------------------------------------------------------------------- 25 | void wavwrite(const double *x, int x_length, int fs, int nbit, 26 | const char *filename); 27 | 28 | //----------------------------------------------------------------------------- 29 | // GetAudioLength() returns the length of .wav file. 30 | // Input: 31 | // filename : Filename of a .wav file. 32 | // Output: 33 | // The number of samples of the file .wav 34 | //----------------------------------------------------------------------------- 35 | int GetAudioLength(const char *filename); 36 | 37 | //----------------------------------------------------------------------------- 38 | // wavread() read a .wav file. 39 | // The memory of output x must be allocated in advance. 40 | // Input: 41 | // filename : Filename of the input file. 42 | // Output: 43 | // fs : Sampling frequency [Hz] 44 | // nbit : Quantization bit [bit] 45 | // x : The output waveform. 46 | //----------------------------------------------------------------------------- 47 | void wavread(const char* filename, int *fs, int *nbit, double *x); 48 | 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | 53 | #endif // WORLD_AUDIOIO_H_ 54 | -------------------------------------------------------------------------------- /libtts/Modules/Logger/MoeSSLogger.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include <string> 3 | #include "../StringPreprocess.hpp" 4 | #include <filesystem> 5 | #include <mutex> 6 | #define __MOEVS_DEBUG_MESSAGE(msg) __MOEVS_DEBUG_INFO(__FILE__, __LINE__, msg) 7 | #define logger MoeSSLogger::GetLogger() 8 | inline std::string __MOEVS_DEBUG_INFO(const char* filename, int line, const char* msg) 9 | { 10 | return std::string("[In \"") + std::filesystem::path(filename).filename().string() + "\" Line " + std::to_string(line) + "] " + msg; 11 | } 12 | 13 | inline std::wstring __MOEVS_DEBUG_INFO(const char* filename, int line, const wchar_t* msg) 14 | { 15 | return std::wstring(L"[In \"") + std::filesystem::path(filename).filename().wstring() + L"\" Line " + std::to_wstring(line) + L"] " + msg; 16 | } 17 | 18 | namespace MoeSSLogger 19 | { 20 | class Logger 21 | { 22 | public: 23 | using logger_fn = void(*)(const wchar_t*, const char*); 24 | Logger(); 25 | ~Logger(); 26 | Logger(logger_fn error_fn, logger_fn log_fn); 27 | void log(const std::wstring&); 28 | void log(const char*); 29 | void error(const std::wstring&); 30 | void error(const char*); 31 | void enable(bool _filelogger) 32 | { 33 | filelogger = _filelogger; 34 | } 35 | private: 36 | bool custom_logger_fn = false; 37 | std::filesystem::path cur_log_dir, logpath, errorpath; 38 | logger_fn cerror_fn = nullptr, cloggerfn = nullptr; 39 | FILE* log_file = nullptr, * error_file = nullptr; 40 | bool filelogger = true; 41 | std::mutex mx; 42 | }; 43 | 44 | Logger& GetLogger(); 45 | } -------------------------------------------------------------------------------- /libtts/Modules/Models/EnvManager.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: EnvManager.hpp 3 | * Note: MoeVoiceStudioCore 环境管理 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include <onnxruntime_cxx_api.h> 24 | 25 | #define MoeVoiceStudioCoreEnvManagerHeader namespace moevsenv{ 26 | #define MoeVoiceStudioCoreEnvManagerEnd } 27 | 28 | MoeVoiceStudioCoreEnvManagerHeader 29 | class MoeVoiceStudioEnv 30 | { 31 | public: 32 | MoeVoiceStudioEnv() = default; 33 | ~MoeVoiceStudioEnv() { Destory(); } 34 | void Load(unsigned ThreadCount, unsigned DeviceID, unsigned Provider); 35 | void Destory(); 36 | [[nodiscard]] bool IsEnabled() const; 37 | [[nodiscard]] Ort::Env* GetEnv() const { return GlobalOrtEnv; } 38 | [[nodiscard]] Ort::SessionOptions* GetSessionOptions() const { return GlobalOrtSessionOptions; } 39 | [[nodiscard]] Ort::MemoryInfo* GetMemoryInfo() const { return GlobalOrtMemoryInfo; } 40 | [[nodiscard]] int GetCurThreadCount() const { return (int)CurThreadCount; } 41 | [[nodiscard]] int GetCurDeviceID() const { return (int)CurDeviceID; } 42 | [[nodiscard]] int GetCurProvider() const { return (int)CurProvider; } 43 | private: 44 | void Create(unsigned ThreadCount_, unsigned DeviceID_, unsigned ExecutionProvider_); 45 | Ort::Env* GlobalOrtEnv = nullptr; 46 | Ort::SessionOptions* GlobalOrtSessionOptions = nullptr; 47 | Ort::MemoryInfo* GlobalOrtMemoryInfo = nullptr; 48 | unsigned CurThreadCount = unsigned(-1); 49 | unsigned CurDeviceID = unsigned(-1); 50 | unsigned CurProvider = unsigned(-1); 51 | OrtCUDAProviderOptionsV2* cuda_option_v2 = nullptr; 52 | }; 53 | 54 | MoeVoiceStudioEnv& GetGlobalMoeVSEnv(); 55 | 56 | MoeVoiceStudioCoreEnvManagerEnd -------------------------------------------------------------------------------- /libtts/Modules/Models/header/Tacotron.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "ModelBase.hpp" 3 | 4 | INFERCLASSHEADER 5 | 6 | class Tacotron2 : public TTS 7 | { 8 | public: 9 | Tacotron2(const MJson&, const callback&, const callback_params&, const DurationCallback&, Device _dev = Device::CPU); 10 | 11 | ~Tacotron2() override; 12 | 13 | std::vector<int16_t> Inference(std::wstring& _inputLens) const override; 14 | 15 | [[nodiscard]] std::vector<int16_t> Inference(const MoeVSProject::TTSParams& _input) const override; 16 | 17 | static void cat(std::vector<float>& tensorA, std::vector<int64>& Shape, const MTensor& tensorB) { 18 | const int64 n = Shape[1]; 19 | for (int64 i = n; i > 0; --i) 20 | tensorA.insert(tensorA.begin() + (i * Shape[2]), tensorB.GetTensorData<float>()[i - 1]); 21 | ++Shape[2]; 22 | } 23 | private: 24 | Ort::Session* sessionEncoder = nullptr; 25 | Ort::Session* sessionDecoderIter = nullptr; 26 | Ort::Session* sessionPostNet = nullptr; 27 | Ort::Session* sessionGan = nullptr; 28 | 29 | const std::vector<const char*> ganIn = { "x" }; 30 | const std::vector<const char*> ganOut = { "audio" }; 31 | const std::vector<const char*> inputNodeNamesSessionEncoder = { "sequences","sequence_lengths" }; 32 | const std::vector<const char*> outputNodeNamesSessionEncoder = { "memory","processed_memory","lens" }; 33 | const std::vector<const char*> inputNodeNamesSessionDecoderIter = { "decoder_input","attention_hidden","attention_cell","decoder_hidden","decoder_cell","attention_weights","attention_weights_cum","attention_context","memory","processed_memory","mask" }; 34 | const std::vector<const char*> outputNodeNamesSessionDecoderIter = { "decoder_output","gate_prediction","out_attention_hidden","out_attention_cell","out_decoder_hidden","out_decoder_cell","out_attention_weights","out_attention_weights_cum","out_attention_context" }; 35 | const std::vector<const char*> inputNodeNamesSessionPostNet = { "mel_outputs" }; 36 | const std::vector<const char*> outputNodeNamesSessionPostNet = { "mel_outputs_postnet" }; 37 | }; 38 | 39 | INFERCLASSEND -------------------------------------------------------------------------------- /libtts/Modules/Modules.cpp: -------------------------------------------------------------------------------- 1 | #include "Modules.hpp" 2 | 3 | namespace MoeVSModuleManager 4 | { 5 | bool MoeVoiceStudioCoreInitStat = false; 6 | 7 | MoeVoiceStudioCore::TextToSpeech* CurTextToSpeechModel = nullptr; 8 | 9 | void MoeVoiceStudioCoreInitSetup() 10 | { 11 | if (MoeVoiceStudioCoreInitStat) 12 | return; 13 | const auto BasicCleanerDir = GetCurrentFolder() + L"/G2P/BasicCleaner.dll"; 14 | if (_waccess(BasicCleanerDir.c_str(), 0) != -1) 15 | { 16 | const auto Cleaner = MoeVSG2P::GetDefCleaner(); 17 | Cleaner->loadG2p(BasicCleanerDir); 18 | Cleaner->GetCleaner().LoadDict(GetCurrentFolder() + L"/G2P"); 19 | Cleaner->loadDict(GetCurrentFolder() + L"/Dict/BasicDict.json"); 20 | } 21 | MoeVoiceStudioCoreInitStat = true; 22 | } 23 | 24 | MoeVoiceStudioCore::TextToSpeech* GetCurTTSModel() 25 | { 26 | return CurTextToSpeechModel; 27 | } 28 | 29 | void UnloadTTSModel() 30 | { 31 | delete CurTextToSpeechModel; 32 | CurTextToSpeechModel = nullptr; 33 | SamplingRate = 32000; 34 | SpeakerCount = 0; 35 | } 36 | 37 | void LoadTTSModel(const MJson& Config, 38 | const MoeVoiceStudioCore::MoeVoiceStudioModule::ProgressCallback& Callback, 39 | int ProviderID, int NumThread, int DeviceID, 40 | const MoeVoiceStudioCore::TextToSpeech::DurationCallback& DurationCallback) 41 | { 42 | UnloadTTSModel(); 43 | if (Config["Type"].GetString() == "Tacotron" || Config["Type"].GetString() == "Tacotron2") 44 | throw std::exception("Tacotron Not Support Yet"); 45 | if (Config["Type"].GetString() == "GPT-SoVits") 46 | { 47 | MoeVoiceStudioCore::DestoryAllBerts(); 48 | CurTextToSpeechModel = dynamic_cast<MoeVoiceStudioCore::TextToSpeech*>( 49 | new MoeVoiceStudioCore::GptSoVits( 50 | Config, Callback, DurationCallback, 51 | MoeVoiceStudioCore::MoeVoiceStudioModule::ExecutionProviders(ProviderID), 52 | DeviceID, NumThread 53 | ) 54 | ); 55 | } 56 | else 57 | { 58 | CurTextToSpeechModel = dynamic_cast<MoeVoiceStudioCore::TextToSpeech*>( 59 | new MoeVoiceStudioCore::Vits( 60 | Config, Callback, DurationCallback, 61 | MoeVoiceStudioCore::MoeVoiceStudioModule::ExecutionProviders(ProviderID), 62 | DeviceID, NumThread 63 | ) 64 | ); 65 | } 66 | SamplingRate = CurTextToSpeechModel->GetSamplingRate(); 67 | } 68 | 69 | } -------------------------------------------------------------------------------- /libtts/Modules/Modules.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * FileName: Modules.hpp 3 | * Note: MoeVoiceStudioCore组件管理 4 | * 5 | * Copyright (C) 2022-2023 NaruseMioShirakana (shirakanamio@foxmail.com) 6 | * 7 | * This file is part of MoeVoiceStudioCore library. 8 | * MoeVoiceStudioCore library is free software: you can redistribute it and/or modify it under the terms of the 9 | * GNU Affero General Public License as published by the Free Software Foundation, either version 3 10 | * of the License, or any later version. 11 | * 12 | * MoeVoiceStudioCore library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 13 | * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * See the GNU Affero General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU Affero General Public License along with Foobar. 17 | * If not, see <https://www.gnu.org/licenses/agpl-3.0.html>. 18 | * 19 | * date: 2022-10-17 Create 20 | */ 21 | 22 | #pragma once 23 | #include "Models/header/Vits.hpp" 24 | #include "Models/header/GPT-SoVits.hpp" 25 | 26 | namespace MoeVSModuleManager 27 | { 28 | inline int64_t SamplingRate = 32000; 29 | inline int64_t SpeakerCount = 0; 30 | 31 | /** 32 | * \brief 初始化所有组件 33 | */ 34 | void MoeVoiceStudioCoreInitSetup(); 35 | 36 | /** 37 | * \brief 获取当前模型 38 | * \return 当前模型的指针 39 | */ 40 | MoeVoiceStudioCore::TextToSpeech* GetCurTTSModel(); 41 | 42 | /** 43 | * \brief 卸载模型 44 | */ 45 | void UnloadTTSModel(); 46 | 47 | /** 48 | * \brief 载入模型 49 | * \param Config 一个MJson类的实例(配置文件的JSON) 50 | * \param Callback 进度条回调函数 51 | * \param ProviderID Provider在所有Provider中的ID(遵循Enum Class的定义) 52 | * \param NumThread CPU推理时的线程数(最好设置高一点,GPU不支持的算子可能也会Fallback到CPU) 53 | * \param DeviceID GPU设备ID 54 | * \param DurationCallback 时长回调 55 | */ 56 | void LoadTTSModel(const MJson& Config, 57 | const MoeVoiceStudioCore::MoeVoiceStudioModule::ProgressCallback& Callback, 58 | int ProviderID, int NumThread, int DeviceID, 59 | const MoeVoiceStudioCore::TextToSpeech::DurationCallback& DurationCallback = [&](std::vector<float>&) {}); 60 | } 61 | 62 | namespace MoeVSRename 63 | { 64 | using Vits = MoeVoiceStudioCore::Vits; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /libtts/Modules/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | ```c++ 3 | #include "Modules/Models/header/Vits.hpp" 4 | 5 | int main(){ 6 | rapidjson::Document Config; 7 | Config.Parse("Your Config"); 8 | 9 | //Progress bar 10 | InferClass::BaseModelType::callback a_callback = [](size_t a, size_t b) {std::cout << std::to_string((float)a * 100.f / (float)b) << "%\n"; }; 11 | 12 | //return params for inference 13 | InferClass::BaseModelType::callback_params b_callback = []() 14 | { 15 | auto cbaaa = InferClass::InferConfigs(); 16 | cbaaa.kmeans_rate = 0.5; 17 | cbaaa.keys = 0; 18 | return cbaaa; 19 | }; 20 | 21 | //modify duration per phoneme 22 | InferClass::TTS::DurationCallback c_callback = [](std::vector<float>&) {}; 23 | 24 | std::vector<int16_t> output; 25 | try 26 | { 27 | std::wstring inp("watashinoonaniomitekudasai"); 28 | auto model = dynamic_cast<InferClass::BaseModelType*>(new InferClass::VitsSvc(modConfigJson, a_callback, b_callback)); 29 | 30 | output = model->Inference(inp); 31 | 32 | Wav outWav(model->GetSamplingRate(), output.size() * 2, output.data()); 33 | outWav.Writef(L"test.wav"); 34 | 35 | delete model; 36 | } 37 | catch(std::exception& e) 38 | { 39 | std::cout << e.what(); 40 | } 41 | } 42 | 43 | ``` 44 | -------------------------------------------------------------------------------- /libtts/dllmain.cpp: -------------------------------------------------------------------------------- 1 | // dllmain.cpp : 定义 DLL 应用程序的入口点。 2 | #include "windows.h" 3 | 4 | BOOL APIENTRY DllMain( HMODULE hModule, 5 | DWORD ul_reason_for_call, 6 | LPVOID lpReserved 7 | ) 8 | { 9 | switch (ul_reason_for_call) 10 | { 11 | case DLL_PROCESS_ATTACH: 12 | case DLL_THREAD_ATTACH: 13 | case DLL_THREAD_DETACH: 14 | case DLL_PROCESS_DETACH: 15 | break; 16 | } 17 | return TRUE; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /libtts/framework.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifdef LIBTTS_EXPORTS 3 | #define LibTTSApi __declspec(dllexport) 4 | #else 5 | #define LibTTSApi __declspec(dllimport) 6 | #endif -------------------------------------------------------------------------------- /logo/logo(AIGen).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/logo/logo(AIGen).png -------------------------------------------------------------------------------- /logo/logo256(AIGen).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/logo/logo256(AIGen).png -------------------------------------------------------------------------------- /logo/logo512(AIGen).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PriesiaMioShirakana/DragonianVoice/d3efc14f69f91c99b2ea6d96997a129e4e771d45/logo/logo512(AIGen).png --------------------------------------------------------------------------------