├── .gitignore ├── EnglishPhoneticProcessor.cpp ├── EnglishPhoneticProcessor.h ├── FastSpeech2.cpp ├── FastSpeech2.h ├── LICENSE.md ├── MultiBandMelGAN.cpp ├── MultiBandMelGAN.h ├── README.md ├── TensorVox.pro ├── TextTokenizer.cpp ├── TextTokenizer.h ├── Voice.cpp ├── Voice.h ├── VoxCommon.cpp ├── VoxCommon.hpp ├── attention.cpp ├── attention.h ├── batchdenoisedlg.cpp ├── batchdenoisedlg.h ├── batchdenoisedlg.ui ├── espeakphonemizer.cpp ├── espeakphonemizer.h ├── ext ├── AudioFile.hpp ├── ByteArr.cpp ├── ByteArr.h ├── CppFlow │ ├── context.h │ ├── cppflow.h │ ├── datatype.h │ ├── defer.h │ ├── model.h │ ├── ops.h │ ├── raw_ops.h │ └── tensor.h ├── Qt-Frameless-Window-DarkStyle-master │ ├── .gitignore │ ├── DarkStyle.cpp │ ├── DarkStyle.h │ ├── README.md │ ├── darkstyle.qrc │ ├── darkstyle │ │ ├── darkstyle.qss │ │ ├── icon_branch_closed.png │ │ ├── icon_branch_end.png │ │ ├── icon_branch_more.png │ │ ├── icon_branch_open.png │ │ ├── icon_checkbox_checked.png │ │ ├── icon_checkbox_checked_disabled.png │ │ ├── icon_checkbox_checked_pressed.png │ │ ├── icon_checkbox_indeterminate.png │ │ ├── icon_checkbox_indeterminate_disabled.png │ │ ├── icon_checkbox_indeterminate_pressed.png │ │ ├── icon_checkbox_unchecked.png │ │ ├── icon_checkbox_unchecked_disabled.png │ │ ├── icon_checkbox_unchecked_pressed.png │ │ ├── icon_close.png │ │ ├── icon_radiobutton_checked.png │ │ ├── icon_radiobutton_checked_disabled.png │ │ ├── icon_radiobutton_checked_pressed.png │ │ ├── icon_radiobutton_unchecked.png │ │ ├── icon_radiobutton_unchecked_disabled.png │ │ ├── icon_radiobutton_unchecked_pressed.png │ │ ├── icon_restore.png │ │ ├── icon_sepvline.png │ │ ├── icon_tbclose.png │ │ ├── icon_tbclose_hover.png │ │ ├── icon_undock.png │ │ └── icon_vline.png │ ├── frameless_window_dark.pro │ ├── framelesswindow.qrc │ ├── framelesswindow │ │ ├── framelesswindow.cpp │ │ ├── framelesswindow.h │ │ ├── framelesswindow.ui │ │ ├── windowdragger.cpp │ │ └── windowdragger.h │ ├── images │ │ ├── icon_window_close.png │ │ ├── icon_window_maximize.png │ │ ├── icon_window_minimize.png │ │ └── icon_window_restore.png │ ├── screenshot_mac_frameless_window_qt_dark_style_disabled.png │ ├── screenshot_mac_frameless_window_qt_dark_style_enabled.png │ └── screenshot_win7_frameless_window_qt_dark_style_enabled.png ├── ZCharScanner.cpp ├── ZCharScanner.h ├── ZFile.cpp ├── ZFile.h ├── json.hpp ├── qcustomplot.cpp └── qcustomplot.h ├── g2p_train ├── README.md ├── config │ ├── default.yaml │ └── longer.yaml ├── models │ └── English.zip └── train_and_export.py ├── istftnettorch.cpp ├── istftnettorch.h ├── main.cpp ├── mainwindow.cpp ├── mainwindow.h ├── mainwindow.ui ├── melgen.cpp ├── melgen.h ├── modelinfodlg.cpp ├── modelinfodlg.h ├── modelinfodlg.ui ├── phddialog.cpp ├── phddialog.h ├── phddialog.ui ├── phonemizer.cpp ├── phonemizer.h ├── phoneticdict.cpp ├── phoneticdict.h ├── phonetichighlighter.cpp ├── phonetichighlighter.h ├── res ├── clear64.png ├── infico.png ├── multiwav.png ├── noim.png ├── phoneticdico.png ├── random64.png ├── refresh.png ├── speak64.png ├── stdico.png └── wav.png ├── spectrogram.cpp ├── spectrogram.h ├── stdres.qrc ├── tacotron2.cpp ├── tacotron2.h ├── tacotron2torch.cpp ├── tacotron2torch.h ├── tfg2p.cpp ├── tfg2p.h ├── torchmoji.cpp ├── torchmoji.h ├── track.cpp ├── track.h ├── vits.cpp ├── vits.h ├── voicemanager.cpp ├── voicemanager.h ├── voxer.cpp ├── voxer.h └── winicon.ico /.gitignore: -------------------------------------------------------------------------------- 1 | # This file is used to ignore files which are generated 2 | # ---------------------------------------------------------------------------- 3 | deps/* 4 | rdeployed/* 5 | brelease/* 6 | bdebug/* 7 | *.wav 8 | *~ 9 | *.autosave 10 | *.a 11 | *.core 12 | *.moc 13 | *.o 14 | *.obj 15 | *.orig 16 | *.rej 17 | *.so 18 | *.so.* 19 | *_pch.h.cpp 20 | *_resource.rc 21 | *.qm 22 | .#* 23 | *.*# 24 | core 25 | !core/ 26 | tags 27 | .DS_Store 28 | .directory 29 | *.debug 30 | Makefile* 31 | *.prl 32 | *.app 33 | moc_*.cpp 34 | ui_*.h 35 | qrc_*.cpp 36 | Thumbs.db 37 | *.res 38 | *.rc 39 | *.qmake.cache 40 | *.qmake.stash 41 | release/* 42 | 43 | # qtcreator generated files 44 | *.pro.user* 45 | 46 | # xemacs temporary files 47 | *.flc 48 | 49 | # Vim temporary files 50 | .*.swp 51 | 52 | # Visual Studio generated files 53 | *.ib_pdb_index 54 | *.idb 55 | *.ilk 56 | *.pdb 57 | *.sln 58 | *.suo 59 | *.vcproj 60 | *vcproj.*.*.user 61 | *.ncb 62 | *.sdf 63 | *.opensdf 64 | *.vcxproj 65 | *vcxproj.* 66 | 67 | # MinGW generated files 68 | *.Debug 69 | *.Release 70 | 71 | # Python byte code 72 | *.pyc 73 | 74 | # Binaries 75 | # -------- 76 | *.dll 77 | *.exe 78 | 79 | deps.zip 80 | README.md.backup 81 | -------------------------------------------------------------------------------- /EnglishPhoneticProcessor.cpp: -------------------------------------------------------------------------------- 1 | #include "EnglishPhoneticProcessor.h" 2 | #include "VoxCommon.hpp" 3 | 4 | using namespace std; 5 | 6 | bool EnglishPhoneticProcessor::Initialize(Phonemizer* InPhn, ESpeakPhonemizer *InENGPh) 7 | { 8 | 9 | 10 | Phoner = InPhn; 11 | Tokenizer.SetAllowedChars(Phoner->GetGraphemeChars()); 12 | ENG_Phonemizer = InENGPh; 13 | 14 | 15 | 16 | 17 | return true; 18 | } 19 | 20 | 21 | std::string EnglishPhoneticProcessor::ProcessTextPhonetic(const std::string& InText, const std::vector &InPhonemes, const std::vector& InDict, ETTSLanguageType::Enum InLanguageType, bool IsTac) 22 | { 23 | if (!Phoner) 24 | return "ERROR"; 25 | 26 | 27 | 28 | vector Words = Tokenizer.Tokenize(InText,IsTac); 29 | 30 | string Assemble = ""; 31 | 32 | 33 | if (InLanguageType == ETTSLanguageType::Char) 34 | { 35 | for (size_t w = 0; w < Words.size();w++) 36 | { 37 | Assemble.append(Words[w]); 38 | 39 | if (w > 0) 40 | Assemble.append(" "); 41 | 42 | } 43 | 44 | if (Assemble[Assemble.size() - 1] == ' ') 45 | Assemble.pop_back(); 46 | 47 | return Assemble; 48 | 49 | 50 | 51 | } 52 | 53 | // Make a copy of the dict passed. 54 | std::vector CurrentDict = InDict; 55 | 56 | 57 | for (size_t w = 0; w < Words.size();w++) 58 | { 59 | const string& Word = Words[w]; 60 | 61 | 62 | if (Word.size() > 22) 63 | continue; 64 | 65 | 66 | // Double email symbol indicates Tacotron punctuation handling 67 | if (Word.find("@@") != std::string::npos) 68 | { 69 | std::string AddPonct = Word.substr(2); // Remove the @@ 70 | Assemble.append(" "); 71 | Assemble.append(AddPonct); 72 | Assemble.append(" "); 73 | 74 | continue; 75 | 76 | 77 | } 78 | 79 | if (Word.find("@") != std::string::npos){ 80 | std::u32string AddPh = VoxUtil::StrToU32(Word.substr(1)); // Remove the @ 81 | size_t OutId = 0; 82 | if (VoxUtil::FindInVec(AddPh,InPhonemes,OutId)) 83 | { 84 | Assemble.append(VoxUtil::U32ToStr(InPhonemes[OutId])); 85 | Assemble.append(" "); 86 | 87 | 88 | } 89 | 90 | continue; 91 | 92 | } 93 | 94 | 95 | 96 | 97 | size_t OverrideIdx = 0; 98 | if (!ENG_Phonemizer && VoxUtil::FindInVec2(Word,InDict,OverrideIdx)) 99 | { 100 | Assemble.append(InDict[OverrideIdx].PhSpelling); 101 | Assemble.append(" "); 102 | continue; 103 | } 104 | 105 | 106 | 107 | std::string Res = Word; 108 | if (!ENG_Phonemizer){ 109 | Res = Phoner->ProcessWord(Word,0.001f); 110 | CurrentDict.push_back({Word,Res,""}); 111 | } 112 | 113 | 114 | // Cache the word in the override dict so next time we don't have to research it 115 | 116 | 117 | Assemble.append(Res); 118 | Assemble.append(" "); 119 | 120 | 121 | 122 | 123 | 124 | } 125 | 126 | 127 | // eSpeak phonemizer takes in whole thing 128 | if (ENG_Phonemizer){ 129 | 130 | Assemble = ENG_Phonemizer->Phonemize(Assemble); 131 | } 132 | 133 | 134 | // Delete last space if there is 135 | if (Assemble[Assemble.size() - 1] == ' ') 136 | Assemble.pop_back(); 137 | 138 | 139 | 140 | 141 | return Assemble; 142 | } 143 | 144 | EnglishPhoneticProcessor::EnglishPhoneticProcessor() 145 | { 146 | Phoner = nullptr; 147 | ENG_Phonemizer = nullptr; 148 | } 149 | 150 | EnglishPhoneticProcessor::EnglishPhoneticProcessor(Phonemizer *InPhn, ESpeakPhonemizer *InENGPh) 151 | { 152 | Initialize(InPhn,InENGPh); 153 | 154 | } 155 | 156 | 157 | 158 | EnglishPhoneticProcessor::~EnglishPhoneticProcessor() 159 | { 160 | // Causes annoying crash on exit. It's also irrelevant because the OS frees what little memory this had. 161 | /* 162 | if (Phoner) 163 | delete Phoner; 164 | 165 | */ 166 | } 167 | -------------------------------------------------------------------------------- /EnglishPhoneticProcessor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "TextTokenizer.h" 3 | 4 | 5 | #include "phoneticdict.h" 6 | #include "phonemizer.h" 7 | #include "espeakphonemizer.h" 8 | 9 | class EnglishPhoneticProcessor 10 | { 11 | private: 12 | TextTokenizer Tokenizer; 13 | Phonemizer* Phoner; 14 | 15 | ESpeakPhonemizer* ENG_Phonemizer; 16 | 17 | inline bool FileExists(const std::string& name) { 18 | std::ifstream f(name.c_str()); 19 | return f.good(); 20 | } 21 | 22 | public: 23 | bool Initialize(Phonemizer *InPhn,ESpeakPhonemizer* InENGPh = nullptr); 24 | std::string ProcessTextPhonetic(const std::string& InText, const std::vector &InPhonemes, const std::vector& InDict, ETTSLanguageType::Enum InLanguageType, bool IsTac); 25 | EnglishPhoneticProcessor(); 26 | EnglishPhoneticProcessor(Phonemizer *InPhn,ESpeakPhonemizer* InENGPh = nullptr); 27 | ~EnglishPhoneticProcessor(); 28 | 29 | inline TextTokenizer& GetTokenizer() {return Tokenizer;} 30 | }; 31 | 32 | -------------------------------------------------------------------------------- /FastSpeech2.cpp: -------------------------------------------------------------------------------- 1 | #include "FastSpeech2.h" 2 | 3 | 4 | 5 | FastSpeech2::FastSpeech2() 6 | { 7 | } 8 | 9 | 10 | TFTensor FastSpeech2::DoInference(const std::vector& InputIDs,const std::vector& ArgsFloat,const std::vector ArgsInt, int32_t SpeakerID , int32_t EmotionID) 11 | { 12 | if (!CurrentMdl) 13 | throw std::exception("Tried to do inference on unloaded or invalid model!"); 14 | 15 | // Convenience reference so that we don't have to constantly derefer pointers. 16 | cppflow::model& Mdl = *CurrentMdl; 17 | 18 | // This is the shape of the input IDs, our equivalent to tf.expand_dims. 19 | 20 | std::vector InputIDShape = { 1, (int64_t)InputIDs.size() }; 21 | 22 | // Define the tensors 23 | cppflow::tensor input_ids{InputIDs, InputIDShape }; 24 | cppflow::tensor energy_ratios{ ArgsFloat[1] }; 25 | cppflow::tensor f0_ratios{ArgsFloat[2]}; 26 | cppflow::tensor speaker_ids{ SpeakerID }; 27 | cppflow::tensor speed_ratios{ ArgsFloat[0] }; 28 | cppflow::tensor* emotion_ids = nullptr; 29 | 30 | 31 | 32 | 33 | 34 | 35 | // Vector of input tensors 36 | TensorVec Inputs = {{"serving_default_input_ids:0",input_ids}, 37 | {"serving_default_speaker_ids:0",speaker_ids}, 38 | {"serving_default_energy_ratios:0",energy_ratios}, 39 | {"serving_default_f0_ratios:0",f0_ratios}, 40 | {"serving_default_speed_ratios:0",speed_ratios}}; 41 | 42 | // This is a multi-emotion model 43 | if (EmotionID != -1) 44 | { 45 | emotion_ids = new cppflow::tensor{EmotionID}; 46 | Inputs.push_back({"serving_default_emotion_ids:0",*emotion_ids}); 47 | 48 | 49 | } 50 | 51 | 52 | 53 | 54 | 55 | // Do inference 56 | // If we don't extract every single output it crashes. 57 | auto Outputs = Mdl(Inputs,{"StatefulPartitionedCall:0","StatefulPartitionedCall:1","StatefulPartitionedCall:2","StatefulPartitionedCall:3","StatefulPartitionedCall:4"}); 58 | 59 | // Define output and return it 60 | TFTensor Output = VoxUtil::CopyTensor(Outputs[1]); 61 | 62 | // We allocated the emotion_ids cppflow::tensor dynamically, delete it 63 | if (emotion_ids) 64 | delete emotion_ids; 65 | 66 | // We could just straight out define it in the return statement, but I like it more this way 67 | 68 | return Output; 69 | } 70 | 71 | FastSpeech2::~FastSpeech2() 72 | { 73 | 74 | } 75 | -------------------------------------------------------------------------------- /FastSpeech2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "melgen.h" 4 | 5 | 6 | class FastSpeech2 : public MelGen 7 | { 8 | 9 | public: 10 | FastSpeech2(); 11 | 12 | 13 | 14 | /* 15 | Do inference on a FastSpeech2 model. 16 | 17 | -> InputIDs: Input IDs of tokens for inference 18 | -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model. 19 | -> (In ArgsFloat)Speed, Energy, F0: Parameters for FS2 inference. Leave at 1.f for defaults 20 | 21 | <- Returns: TFTensor with shape {1,,80} containing contents of mel spectrogram. 22 | */ 23 | TFTensor DoInference(const std::vector& InputIDs,const std::vector& ArgsFloat,const std::vector ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1); 24 | 25 | 26 | 27 | ~FastSpeech2(); 28 | }; 29 | 30 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 ZDisket 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MultiBandMelGAN.cpp: -------------------------------------------------------------------------------- 1 | #include "MultiBandMelGAN.h" 2 | #define IF_EXCEPT(cond,ex) if (cond){throw std::exception(ex);} 3 | 4 | 5 | 6 | bool MultiBandMelGAN::Initialize(const std::string & VocoderPath) 7 | { 8 | try { 9 | MelGAN = std::make_unique(VocoderPath); 10 | } 11 | catch (...) { 12 | return false; 13 | 14 | } 15 | return true; 16 | 17 | 18 | } 19 | 20 | TFTensor MultiBandMelGAN::DoInference(const TFTensor& InMel) 21 | { 22 | IF_EXCEPT(!MelGAN, "Tried to infer MB-MelGAN on uninitialized model!!!!") 23 | 24 | // Convenience reference so that we don't have to constantly derefer pointers. 25 | cppflow::model& Mdl = *MelGAN; 26 | 27 | 28 | cppflow::tensor input_mels{ InMel.Data, InMel.Shape}; 29 | 30 | 31 | auto out_audio = Mdl({{"serving_default_mels:0",input_mels}}, {"StatefulPartitionedCall:0"})[0]; 32 | TFTensor RetTensor = VoxUtil::CopyTensor(out_audio); 33 | 34 | return RetTensor; 35 | 36 | 37 | 38 | 39 | 40 | } 41 | 42 | MultiBandMelGAN::MultiBandMelGAN() 43 | { 44 | MelGAN = nullptr; 45 | } 46 | 47 | 48 | MultiBandMelGAN::~MultiBandMelGAN() 49 | { 50 | 51 | 52 | } 53 | -------------------------------------------------------------------------------- /MultiBandMelGAN.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "VoxCommon.hpp" 4 | #include 5 | class MultiBandMelGAN 6 | { 7 | private: 8 | std::unique_ptr MelGAN; 9 | 10 | 11 | public: 12 | virtual bool Initialize(const std::string& VocoderPath); 13 | 14 | 15 | // Do MultiBand MelGAN inference including PQMF 16 | // -> InMel: Mel spectrogram (shape [1, xx, 80]) 17 | // <- Returns: Tensor data [4, xx, 1] 18 | virtual TFTensor DoInference(const TFTensor& InMel); 19 | 20 | MultiBandMelGAN(); 21 | ~MultiBandMelGAN(); 22 | }; 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorVox 2 | 3 | [![](https://dcbadge.vercel.app/api/server/yqFDAWH)](https://discord.gg/yqFDAWH) 4 | 5 | TensorVox is an application designed to enable user-friendly and lightweight neural speech synthesis in the desktop, aimed at increasing accessibility to such technology. 6 | 7 | Powered mainly by [TensorFlowTTS](https://github.com/TensorSpeech/TensorFlowTTS) and also by [Coqui-TTS](https://github.com/coqui-ai/TTS) and [VITS](https://github.com/jaywalnut310/vits), it is written in pure C++/Qt, using the Tensorflow C API for interacting with Tensorflow models (first two), and LibTorch for PyTorch ones. This way, we can perform inference without having to install gigabytes worth of Python libraries, just a few DLLs. 8 | 9 | ![Interface with Tac2 model loaded](https://i.imgur.com/wtPzzNh.png) 10 | 11 | 12 | ### Try it out 13 | 14 | [Detailed guide in Google Docs](https://docs.google.com/document/d/1OS1kfb19bvpPPkF71Vbak_b735mi7epjUanIfPG671M/edit?usp=sharing) 15 | 16 | Grab a copy from the releases, extract the .zip and check [the Google Drive folder](https://drive.google.com/drive/folders/1atUyxBbstKZpMqQEZMdNmRF2AKrlahKy?usp=sharing) for models and installation instructions 17 | 18 | If you're interested in using your own model, first you need to train then export it. 19 | 20 | 21 | ## Supported architectures 22 | 23 | TensorVox supports models from three repos: 24 | 25 | - **TensorFlowTTS**: FastSpeech2, Tacotron2, both char and phoneme based and Multi-Band MelGAN. Here's a Colab notebook demonstrating how to export the LJSpeech pretrained, char-based Tacotron2 model: [](https://colab.research.google.com/drive/1KLqZ1rkD4Enw7zpTgXGL6if7e5s0UeWa?usp=sharing) 26 | - **Coqui-TTS:** Tacotron2 (phoneme-based IPA) and Multi-Band MelGAN, after converting from PyTorch to Tensorflow. Here's a notebook showing how to export the LJSpeech DDC model: [](https://colab.research.google.com/drive/15CdGEAu_-KezV1XxwzVfQiFSm0tveBkC?usp=sharing) 27 | - **jaywalnut310/VITS:** VITS, which is a fully E2E model. (Stressed IPA as phonemes) Export notebook: [](https://colab.research.google.com/drive/1BSGE5DQYweXBWrwPOmb6CRPUU8H5mBvb?usp=sharing) 28 | 29 | Those two examples should provide you with enough guidance to understand what is needed. If you're looking to train a model specifically for this purpose then I recommend TensorFlowTTS, as it is the one with the best support, and VITS, as it's the closest thing to perfect 30 | As for languages, out-of-the-box support is provided for English (Coqui and TFTTS, VITS), German and Spanish (only TensorFlowTTS); that is, you won't have to do anything. You can add languages without modifying code, as long as the phoneme set are IPA (stressed or nonstressed), ARPA, or GlobalPhone, (open an issue and I'll explain it to you) 31 | 32 | 33 | ## Build instructions 34 | Currently, only Windows 10 x64 (although I've heard reports of it running on 8.1) is supported. 35 | 36 | **Requirements:** 37 | 1. Qt Creator 38 | 2. MSVC 2017 (v141) compiler 39 | 40 | **Primed build (with all provided libraries):** 41 | 42 | 1. Download [precompiled binary dependencies and includes](https://drive.google.com/file/d/1N6IxSpsgemS94z_v82toXhiNs2tLXkz6/view?usp=sharing) 43 | 2. Unzip it so that the `deps` folder is in the same place as the .pro and main source files. 44 | 3. Open the project with Qt Creator, add your compiler and compile 45 | 46 | Note that to try your shiny new executable you'll need to download a release of program as described above and replace the executable in that release with your new one, so you have all the DLLs in place. 47 | 48 | TODO: Add instructions for compile from scratch. 49 | 50 | ## Externals (and thanks) 51 | 52 | - **LibTorch**: https://pytorch.org/cppdocs/installing.html 53 | 54 | - **Tensorflow C API**: [https://www.tensorflow.org/install/lang_c](https://www.tensorflow.org/install/lang_c) 55 | - **CppFlow** (TF C API -> C++ wrapper): [https://github.com/serizba/cppflow](https://github.com/serizba/cppflow) 56 | - **AudioFile** (for WAV export): [https://github.com/adamstark/AudioFile](https://github.com/adamstark/AudioFile) 57 | - **Frameless Dark Style Window**: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle 58 | - **JSON for modern C++**: https://github.com/nlohmann/json 59 | - **r8brain-free-src** (Resampling): https://github.com/avaneev/r8brain-free-src 60 | - **rnnoise** (CMake version, denoising output): https://github.com/almogh52/rnnoise-cmake 61 | - **Logitech LED Illumination SDK** (Mouse RGB integration): https://www.logitechg.com/en-us/innovation/developer-lab.html 62 | - **QCustomPlot** : https://www.qcustomplot.com/index.php/introduction 63 | - **libnumbertext** : https://github.com/Numbertext/libnumbertext 64 | 65 | 66 | ## Contact 67 | You can open an issue here or join the [Discord server](https://discord.gg/yqFDAWH) and discuss/ask anything there 68 | 69 | For media/licensing/any other formal stuff inquiries, send to this email: 9yba9c1y@anonaddy.me 70 | 71 | ## Note about licensing 72 | 73 | This program itself is MIT licensed, but for the models you use, their license terms apply. For example, if you're in Vietnam and using TensorFlowTTS models, you'll have to check [here](https://github.com/TensorSpeech/TensorFlowTTS#license) for some details 74 | -------------------------------------------------------------------------------- /TensorVox.pro: -------------------------------------------------------------------------------- 1 | QT += core gui 2 | QT += multimedia 3 | QT += winextras 4 | 5 | greaterThan(QT_MAJOR_VERSION, 4): QT += widgets printsupport 6 | 7 | CONFIG += c++17 8 | 9 | # The following define makes your compiler emit warnings if you use 10 | # any Qt feature that has been marked deprecated (the exact warnings 11 | # depend on your compiler). Please consult the documentation of the 12 | # deprecated API in order to know how to port your code away from it. 13 | DEFINES += QT_DEPRECATED_WARNINGS _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 14 | # You can also make your code fail to compile if it uses deprecated APIs. 15 | # In order to do so, uncomment the following line. 16 | # You can also select to disable deprecated APIs only up to a certain version of Qt. 17 | #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0 18 | 19 | SOURCES += \ 20 | EnglishPhoneticProcessor.cpp \ 21 | FastSpeech2.cpp \ 22 | MultiBandMelGAN.cpp \ 23 | TextTokenizer.cpp \ 24 | Voice.cpp \ 25 | VoxCommon.cpp \ 26 | attention.cpp \ 27 | batchdenoisedlg.cpp \ 28 | espeakphonemizer.cpp \ 29 | ext/ByteArr.cpp \ 30 | ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.cpp \ 31 | ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.cpp \ 32 | ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.cpp \ 33 | ext/ZCharScanner.cpp \ 34 | ext/ZFile.cpp \ 35 | ext/qcustomplot.cpp \ 36 | istftnettorch.cpp \ 37 | main.cpp \ 38 | mainwindow.cpp \ 39 | melgen.cpp \ 40 | modelinfodlg.cpp \ 41 | phddialog.cpp \ 42 | phonemizer.cpp \ 43 | phoneticdict.cpp \ 44 | phonetichighlighter.cpp \ 45 | spectrogram.cpp \ 46 | tacotron2.cpp \ 47 | tacotron2torch.cpp \ 48 | tfg2p.cpp \ 49 | torchmoji.cpp \ 50 | track.cpp \ 51 | vits.cpp \ 52 | voicemanager.cpp \ 53 | voxer.cpp 54 | 55 | HEADERS += \ 56 | EnglishPhoneticProcessor.h \ 57 | FastSpeech2.h \ 58 | MultiBandMelGAN.h \ 59 | TextTokenizer.h \ 60 | Voice.h \ 61 | VoxCommon.hpp \ 62 | attention.h \ 63 | batchdenoisedlg.h \ 64 | espeakphonemizer.h \ 65 | ext/AudioFile.hpp \ 66 | ext/ByteArr.h \ 67 | ext/CppFlow/context.h \ 68 | ext/CppFlow/cppflow.h \ 69 | ext/CppFlow/datatype.h \ 70 | ext/CppFlow/defer.h \ 71 | ext/CppFlow/model.h \ 72 | ext/CppFlow/ops.h \ 73 | ext/CppFlow/raw_ops.h \ 74 | ext/CppFlow/tensor.h \ 75 | ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.h \ 76 | ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.h \ 77 | ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.h \ 78 | ext/ZCharScanner.h \ 79 | ext/ZFile.h \ 80 | ext/json.hpp \ 81 | ext/qcustomplot.h \ 82 | istftnettorch.h \ 83 | mainwindow.h \ 84 | melgen.h \ 85 | modelinfodlg.h \ 86 | phddialog.h \ 87 | phonemizer.h \ 88 | phoneticdict.h \ 89 | phonetichighlighter.h \ 90 | spectrogram.h \ 91 | tacotron2.h \ 92 | tacotron2torch.h \ 93 | tfg2p.h \ 94 | torchmoji.h \ 95 | track.h \ 96 | vits.h \ 97 | voicemanager.h \ 98 | voxer.h 99 | 100 | FORMS += \ 101 | batchdenoisedlg.ui \ 102 | ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.ui \ 103 | mainwindow.ui \ 104 | modelinfodlg.ui \ 105 | phddialog.ui 106 | 107 | # Default rules for deployment. 108 | qnx: target.path = /tmp/$${TARGET}/bin 109 | else: unix:!android: target.path = /opt/$${TARGET}/bin 110 | !isEmpty(target.path): INSTALLS += target 111 | 112 | 113 | DEFINES += _CRT_SECURE_NO_WARNINGS 114 | 115 | INCLUDEPATH += $$PWD/deps/include 116 | INCLUDEPATH += $$PWD/deps/include/libtorch 117 | INCLUDEPATH += $$PWD/ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow 118 | win32: LIBS += -L$$PWD/deps/lib/ tensorflow.lib r8bsrc64.lib rnnoise64.lib LogitechLEDLib.lib LibNumberText64.lib c10.lib torch.lib torch_cpu.lib libespeak-ng.lib 119 | win32: LIBS += Advapi32.lib User32.lib Psapi.lib 120 | 121 | 122 | RESOURCES += \ 123 | ext/Qt-Frameless-Window-DarkStyle-master/darkstyle.qrc \ 124 | ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow.qrc \ 125 | stdres.qrc 126 | 127 | win32:RC_ICONS += winicon.ico 128 | 129 | VERSION = 1.2.0.0 130 | CONFIG += force_debug_info 131 | 132 | QMAKE_CXXFLAGS += /std:c++17 /utf-8 -DPSAPI_VERSION=1 133 | 134 | DISTFILES += \ 135 | res/defaultim.png 136 | -------------------------------------------------------------------------------- /TextTokenizer.cpp: -------------------------------------------------------------------------------- 1 | #include "TextTokenizer.h" 2 | #include "ext/ZCharScanner.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | 11 | 12 | // Punctuation, this gets auto-converted to SIL 13 | const std::u32string punctuation_f = U",.-;"; 14 | 15 | // For Tacotron2, including question and other marks 16 | const std::u32string punctuation_tac = U",.;¡!¿?:-"; 17 | 18 | 19 | const std::u32string digits = U"1234567890"; 20 | 21 | using namespace std; 22 | 23 | void TextTokenizer::SetAllowedChars(const std::string &value) 24 | { 25 | AllowedChars = VoxUtil::StrToU32(value); 26 | } 27 | 28 | vector TextTokenizer::ExpandNumbers(const std::vector& SpaceTokens) 29 | { 30 | vector RetVec; 31 | RetVec.reserve(SpaceTokens.size()); 32 | 33 | for (auto& Token : SpaceTokens) { 34 | char* p; 35 | strtol(Token.c_str(), &p, 10); 36 | if (*p) { 37 | RetVec.push_back(Token); 38 | } 39 | else { 40 | std::string ModTk = Token; 41 | CuNumber->numbertext(ModTk,NumLang); 42 | 43 | std::replace(ModTk.begin(),ModTk.end(),'-',' '); 44 | 45 | // If the number has spaces we must sep again and add one by one otherwise all the words are merged together due to the 46 | // nature of it 47 | ZStringDelimiter DelSp(ModTk); 48 | DelSp.AddDelimiter(" "); 49 | 50 | if (DelSp.szTokens()) 51 | { 52 | for (const auto& Ttk : DelSp.GetTokens()) 53 | RetVec.push_back(Ttk); 54 | 55 | }else{ 56 | RetVec.push_back(ModTk); 57 | } 58 | 59 | 60 | 61 | 62 | 63 | } 64 | } 65 | 66 | return RetVec; 67 | 68 | } 69 | 70 | string TextTokenizer::SpaceChars(const string &InStr) 71 | { 72 | std::u32string AsmStr = U""; 73 | std::u32string Stry = VoxUtil::StrToU32(InStr); 74 | 75 | bool InNumChain = false; 76 | bool InPhn = false; 77 | 78 | for (size_t i = 0; i < Stry.size();i++) 79 | { 80 | auto uChar = Stry[i]; 81 | 82 | if (uChar == U'@') 83 | InPhn = true; 84 | 85 | if (uChar == U' ') 86 | InPhn = false; 87 | 88 | 89 | if (InPhn) 90 | { 91 | AsmStr += uChar; 92 | continue; 93 | 94 | } 95 | 96 | 97 | if (digits.find(uChar) != std::u32string::npos && !InNumChain) 98 | { 99 | AsmStr += U" "; 100 | AsmStr += uChar; 101 | InNumChain = true; 102 | continue; 103 | } 104 | 105 | if (digits.find(uChar) == std::u32string::npos && InNumChain ) 106 | { 107 | AsmStr += U" "; 108 | AsmStr += uChar; 109 | 110 | InNumChain = false; 111 | continue; 112 | 113 | } 114 | 115 | AsmStr += uChar; 116 | 117 | 118 | 119 | } 120 | 121 | return VoxUtil::U32ToStr(AsmStr); 122 | 123 | 124 | } 125 | 126 | TextTokenizer::TextTokenizer() 127 | { 128 | } 129 | 130 | TextTokenizer::~TextTokenizer() 131 | { 132 | } 133 | 134 | void TextTokenizer::SetNumberText(Numbertext &INum, const string &Lang) 135 | { 136 | CuNumber = &INum; 137 | NumLang = Lang; 138 | 139 | } 140 | 141 | 142 | 143 | vector TextTokenizer::Tokenize(const std::string & InTxt,bool IsTacotron, bool IsTorchMoji) 144 | { 145 | vector ProcessedTokens; 146 | 147 | 148 | 149 | std::string TxtPreProc = SpaceChars(InTxt); 150 | 151 | ZStringDelimiter Delim(TxtPreProc); 152 | Delim.AddDelimiter(" "); 153 | 154 | vector DelimitedTokens = Delim.GetTokens(); 155 | 156 | 157 | 158 | // Single word handler 159 | if (!Delim.szTokens()) 160 | DelimitedTokens.push_back(TxtPreProc); 161 | 162 | DelimitedTokens = ExpandNumbers(DelimitedTokens); 163 | 164 | std::u32string punctuation = punctuation_f; 165 | 166 | if (IsTacotron) 167 | punctuation = punctuation_tac; 168 | 169 | 170 | 171 | 172 | // We know that the new vector is going to be at least this size so we reserve 173 | ProcessedTokens.reserve(DelimitedTokens.size()); 174 | 175 | /* 176 | In this step we go through the string and only allow qualified character to pass through. 177 | */ 178 | for (size_t TokCtr = 0; TokCtr < DelimitedTokens.size();TokCtr++) 179 | { 180 | // We are now using U32string because it's guaranteed to be 1 character = 1 element 181 | const auto& tok = VoxUtil::StrToU32(DelimitedTokens[TokCtr]); 182 | std::u32string AppTok = U""; 183 | 184 | 185 | if (tok.find(U"@") != string::npos) 186 | { 187 | 188 | ProcessedTokens.push_back(VoxUtil::U32ToStr(tok)); 189 | continue; 190 | 191 | } 192 | 193 | for (size_t s = 0;s < tok.size();s++) 194 | { 195 | 196 | 197 | if (AllowedChars.find(tok[s]) != std::u32string::npos) 198 | AppTok += tok[s]; 199 | 200 | 201 | // Punctuation handler 202 | // This time we explicitly add a token to the vector 203 | if (punctuation.find(tok[s]) != std::u32string::npos) { 204 | 205 | 206 | // First, if the assembled string isn't empty, we add it in its current state 207 | // Otherwise, the SIL could end up appearing before the word. 208 | 209 | if (!AppTok.empty()) { 210 | ProcessedTokens.push_back(VoxUtil::U32ToStr(AppTok)); 211 | 212 | AppTok = U""; 213 | } 214 | 215 | if (IsTacotron){ 216 | 217 | // Double at-symbol is handled later 218 | if (!IsTorchMoji) 219 | AppTok += U"@@"; 220 | 221 | AppTok += tok[s]; 222 | 223 | } 224 | else{ 225 | AppTok = U"@SIL"; 226 | } 227 | 228 | ProcessedTokens.push_back(VoxUtil::U32ToStr(AppTok)); 229 | AppTok = U""; 230 | continue; 231 | 232 | } 233 | 234 | 235 | 236 | 237 | 238 | 239 | } 240 | if (!AppTok.empty()) 241 | { 242 | ProcessedTokens.push_back(VoxUtil::U32ToStr(AppTok)); 243 | AppTok = U""; 244 | 245 | 246 | } 247 | 248 | } 249 | // Prevent out of range error if the user inputs one word 250 | if (ProcessedTokens.size() > 1) 251 | { 252 | if (ProcessedTokens[ProcessedTokens.size() - 1] == "SIL") 253 | ProcessedTokens.pop_back(); 254 | } 255 | 256 | 257 | return ProcessedTokens; 258 | } 259 | -------------------------------------------------------------------------------- /TextTokenizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "VoxCommon.hpp" 5 | #include "Numbertext.hxx" 6 | 7 | class TextTokenizer 8 | { 9 | private: 10 | std::u32string AllowedChars; 11 | 12 | std::vector ExpandNumbers(const std::vector& SpaceTokens); 13 | 14 | Numbertext* CuNumber; 15 | 16 | std::string NumLang; 17 | 18 | 19 | // Go through the string and add spaces before and after punctuation. 20 | // This is because ExpandNumbers won't recognize numbers if they've got punctuation like 500, or .9000 21 | std::string SpaceChars(const std::string& InStr); 22 | 23 | 24 | 25 | public: 26 | TextTokenizer(); 27 | ~TextTokenizer(); 28 | 29 | void SetNumberText(Numbertext& INum,const std::string& Lang); 30 | 31 | std::vector Tokenize(const std::string& InTxt, bool IsTacotron = false, bool IsTorchMoji = false); 32 | void SetAllowedChars(const std::string &value); 33 | }; 34 | 35 | -------------------------------------------------------------------------------- /Voice.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "FastSpeech2.h" 4 | #include "tacotron2.h" 5 | #include "MultiBandMelGAN.h" 6 | #include "EnglishPhoneticProcessor.h" 7 | #include "vits.h" 8 | #include "Numbertext.hxx" 9 | #include "torchmoji.h" 10 | #include "phoneticdict.h" 11 | #include "tacotron2torch.h" 12 | #include "istftnettorch.h" 13 | struct VoxResults{ 14 | std::vector Audio; 15 | TFTensor Alignment; 16 | TFTensor Mel; 17 | }; 18 | 19 | class Voice 20 | { 21 | private: 22 | std::unique_ptr MelPredictor; 23 | std::unique_ptr Vocoder; 24 | EnglishPhoneticProcessor Processor; 25 | VoiceInfo VoxInfo; 26 | TorchMoji Moji; 27 | 28 | 29 | 30 | std::vector Phonemes; 31 | std::vector PhonemeIDs; 32 | 33 | 34 | 35 | std::vector PhonemesToID(const std::string& RawInTxt); 36 | 37 | std::vector Speakers; 38 | std::vector Emotions; 39 | 40 | void ReadPhonemes(const std::string& PhonemePath); 41 | 42 | void ReadSpeakers(const std::string& SpeakerPath); 43 | 44 | void ReadEmotions(const std::string& EmotionPath); 45 | 46 | 47 | 48 | void ReadModelInfo(const std::string& ModelInfoPath); 49 | 50 | 51 | 52 | std::vector CurrentDict; 53 | 54 | std::string ModelInfo; 55 | 56 | std::vector CharsToID(const std::string &RawInTxt); 57 | 58 | Numbertext NumTxt; 59 | public: 60 | /* Voice constructor, arguments obligatory. 61 | -> VoxPath: Path of folder where models are contained. 62 | -- Must be a folder without an ending slash with UNIX slashes, can be relative or absolute (eg: MyVoices/Karen) 63 | -- The folder must contain the following elements: 64 | --- melgen: Folder generated where a FastSpeech2 model was saved as SavedModel, with .pb, variables, etc 65 | --- vocoder: Folder where a Multi-Band MelGAN model was saved as SavedModel. 66 | --- info.json: Model information 67 | --- phonemes.txt: Tab delimited file containing PHONEME \t ID, for inputting to the FS2 model. 68 | 69 | --- If multispeaker, a lined .txt file called speakers.txt 70 | --- If multi-emotion, a lined .txt file called emotions.txt 71 | 72 | */ 73 | 74 | 75 | Voice(const std::string& VoxPath, const std::string& inName,Phonemizer* InPhn); 76 | 77 | void AddPhonemizer(Phonemizer* InPhn, ESpeakPhonemizer *InENGPhn); 78 | void LoadNumberText(const std::string& NumTxtPath); 79 | 80 | 81 | std::string PhonemizeStr(const std::string& Prompt); 82 | VoxResults Vocalize(const std::string& Prompt, float Speed = 1.f, int32_t SpeakerID = 0, float Energy = 1.f, float F0 = 1.f, int32_t EmotionID = -1, const std::string &EmotionOvr = ""); 83 | 84 | std::string Name; 85 | inline const VoiceInfo& GetInfo(){return VoxInfo;} 86 | 87 | inline const std::vector& GetSpeakers(){return Speakers;} 88 | inline const std::vector& GetEmotions(){return Emotions;} 89 | 90 | void SetDictEntries(const std::vector& InEntries); 91 | inline const std::string& GetModelInfo(){return ModelInfo;} 92 | 93 | ~Voice(); 94 | }; 95 | 96 | -------------------------------------------------------------------------------- /VoxCommon.cpp: -------------------------------------------------------------------------------- 1 | #include "VoxCommon.hpp" 2 | #include "ext/json.hpp" 3 | using namespace nlohmann; 4 | #include 5 | #include // std::wstring_convert 6 | 7 | const std::vector Text2MelNames = {"FastSpeech2","Tacotron2 (TF)","VITS","VITS + TorchMoji","Tacotron2 (Torch)"}; 8 | const std::vector VocoderNames = {"Multi-Band MelGAN","MelGAN-STFT","","iSTFTNet"}; 9 | const std::vector RepoNames = {"TensorflowTTS","Coqui-TTS","jaywalnut310","keonlee9420"}; 10 | 11 | const std::vector LanguageNames = {"English","Spanish", "German", "EnglishIPA"}; 12 | const std::vector LangaugeNamesNumToWords = {"en", "es","de","en"}; 13 | 14 | 15 | 16 | 17 | #include "ext/ZCharScanner.h" 18 | 19 | const std::map LegacyToV1Lang = { 20 | {-3,"German-Char"}, 21 | {0,"English-ARPA"}, 22 | {-1,"English-Char"}, 23 | {3,"English-IPA"}, 24 | {1,"Spanish-GlobalPhone"} 25 | }; 26 | 27 | const std::map V1LangTypes ={ 28 | {"IPA",ETTSLanguageType::IPA}, 29 | {"IPAStressed",ETTSLanguageType::IPA}, 30 | {"ARPA",ETTSLanguageType::ARPA}, 31 | {"Char",ETTSLanguageType::Char}, 32 | {"GlobalPhone",ETTSLanguageType::GlobalPhone} 33 | }; 34 | 35 | void VoxUtil::ExportWAV(const std::string & Filename, const std::vector& Data, unsigned SampleRate) { 36 | AudioFile::AudioBuffer Buffer; 37 | Buffer.resize(1); 38 | 39 | 40 | Buffer[0] = Data; 41 | size_t BufSz = Data.size(); 42 | 43 | 44 | AudioFile File; 45 | 46 | File.setAudioBuffer(Buffer); 47 | File.setAudioBufferSize(1, (int)BufSz); 48 | File.setNumSamplesPerChannel((int)BufSz); 49 | File.setNumChannels(1); 50 | File.setBitDepth(32); 51 | File.setSampleRate(SampleRate); 52 | 53 | File.save(Filename, AudioFileFormat::Wave); 54 | 55 | 56 | 57 | } 58 | 59 | // Process language value for vector indexes. Language value must adhere to standard. 60 | uint32_t ProcessLanguageValue(int32_t LangVal) 61 | { 62 | if (LangVal > -1) 63 | return LangVal; 64 | 65 | if (LangVal == -1) 66 | return 0; 67 | 68 | if (LangVal < 0) 69 | return (LangVal * -1) - 1; 70 | 71 | return LangVal; 72 | 73 | } 74 | 75 | VoiceInfo VoxUtil::ReadModelJSON(const std::string &InfoFilename) 76 | { 77 | const size_t MaxNoteSize = 80; 78 | 79 | 80 | std::ifstream JFile(InfoFilename); 81 | json JS; 82 | 83 | 84 | try { 85 | JFile >> JS; 86 | } catch(json::parse_error Err) { 87 | QMessageBox::critical(nullptr,"JSON parse error",QString::fromUtf8(Err.what())); 88 | } 89 | 90 | 91 | JFile.close(); 92 | 93 | auto Arch = JS["architecture"]; 94 | 95 | ArchitectureInfo CuArch; 96 | CuArch.Repo = Arch["repo"].get(); 97 | CuArch.Text2Mel = Arch["text2mel"].get(); 98 | CuArch.Vocoder = Arch["vocoder"].get(); 99 | 100 | // Now fill the strings 101 | CuArch.s_Repo = RepoNames[CuArch.Repo]; 102 | CuArch.s_Text2Mel = Text2MelNames[CuArch.Text2Mel]; 103 | CuArch.s_Vocoder = VocoderNames[CuArch.Vocoder]; 104 | 105 | // Language value for the info 106 | 107 | auto LangVal = JS["language"]; 108 | 109 | 110 | std::string LanguageFullName; 111 | 112 | if (LangVal.is_string()){ // V1 Language type standard model; see ETTSLanguageType enum desc on header 113 | LanguageFullName = LangVal.get(); 114 | 115 | }else{ 116 | // Convert legacy language to V1 117 | int32_t LegacyLang = JS["language"].get(); 118 | LanguageFullName = LegacyToV1Lang.find(LegacyLang)->second; 119 | 120 | 121 | } 122 | 123 | ZStringDelimiter LangDel(LanguageFullName); 124 | LangDel.AddDelimiter("-"); 125 | 126 | std::string LangName = LangDel[0]; 127 | std::string LangTypeStr = LangDel[1]; 128 | std::string eSpeakLangStr = ""; 129 | if (LangDel.szTokens() > 2) 130 | { 131 | eSpeakLangStr = LangDel[2]; 132 | LanguageFullName = LangDel[0] + "-" + LangDel[1]; 133 | 134 | } 135 | 136 | int32_t LangType = V1LangTypes.find(LangTypeStr)->second; 137 | 138 | 139 | 140 | // If the voice is char then the pad value must be a string of the EOS token ID (like "148"). 141 | std::string EndToken = JS["pad"].get(); 142 | 143 | // If it's phonetic then it's the token str, like "@EOS" 144 | if (LangType != ETTSLanguageType::Char && EndToken.size() && CuArch.Text2Mel != EText2MelModel::Tacotron2Torch) 145 | EndToken = " " + EndToken; // In this case we add a space for separation since we directly append the value to the prompt 146 | 147 | 148 | 149 | VoiceInfo Inf{JS["name"].get(), 150 | JS["author"].get(), 151 | JS["version"].get(), 152 | JS["description"].get(), 153 | CuArch, 154 | JS["note"].get(), 155 | JS["sarate"].get(), 156 | LangName, 157 | LanguageFullName, 158 | eSpeakLangStr, 159 | EndToken, 160 | LangType 161 | }; 162 | 163 | if (Inf.Note.size() > MaxNoteSize) 164 | Inf.Note = Inf.Note.substr(0,MaxNoteSize); 165 | 166 | return Inf; 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | } 175 | 176 | std::vector VoxUtil::GetLinedFile(const std::string &Path) 177 | { 178 | std::vector RetLines; 179 | std::ifstream Fi(Path); 180 | 181 | if (!Fi.good()) // File not exists, ret empty vec 182 | return RetLines; 183 | 184 | std::string Line; 185 | while (std::getline(Fi, Line)) 186 | { 187 | if (Line.size() > 1) 188 | RetLines.push_back(Line); 189 | 190 | 191 | } 192 | 193 | return RetLines; 194 | } 195 | 196 | std::string VoxUtil::U32ToStr(const std::u32string &InU32) 197 | { 198 | std::wstring_convert,char32_t> Converter; 199 | return Converter.to_bytes(InU32); 200 | 201 | 202 | 203 | } 204 | 205 | std::u32string VoxUtil::StrToU32(const std::string &InStr) 206 | { 207 | std::wstring_convert, char32_t> Converter; 208 | return Converter.from_bytes(InStr); 209 | 210 | } 211 | -------------------------------------------------------------------------------- /VoxCommon.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* 3 | VoxCommon.hpp : Defines common data structures and constants to be used with TensorVox 4 | */ 5 | #include 6 | 7 | #undef slots // https://github.com/pytorch/pytorch/issues/19405 8 | 9 | 10 | #pragma warning(push, 0) // LibTorch spams us with warnings 11 | #include // One-stop header. 12 | #pragma warning(pop) 13 | 14 | #define slots Q_SLOTS 15 | 16 | #include 17 | #include "ext/AudioFile.hpp" 18 | #include "ext/CppFlow/ops.h" 19 | #include "ext/CppFlow/model.h" 20 | 21 | 22 | 23 | #include 24 | 25 | 26 | 27 | #define IF_RETURN(cond,ret) if (cond){return ret;} 28 | 29 | const uint32_t CommonSampleRate = 48000; 30 | 31 | namespace VoxCommon{ 32 | const std::string CommonLangConst = "_std"; 33 | const int32_t TorchMojiLen = 120; 34 | const int32_t TorchMojiEmbSize = 2304; 35 | 36 | 37 | } 38 | 39 | // https://github.com/almogh52/rnnoise-cmake/blob/d981adb2e797216f456cfcf158f73761a29981f8/examples/rnnoise_demo.c#L31 40 | const uint32_t RNNoiseFrameSize = 480; 41 | typedef std::vector> TensorVec; 42 | 43 | template 44 | struct TFTensor { 45 | std::vector Data; 46 | std::vector Shape; 47 | size_t TotalSize; 48 | 49 | }; 50 | 51 | 52 | namespace ETTSRepo { 53 | enum Enum{ 54 | TensorflowTTS = 0, 55 | CoquiTTS, 56 | jaywalnut310, // OG VITS repo 57 | keonlee9420 58 | }; 59 | 60 | } 61 | namespace EText2MelModel { 62 | enum Enum{ 63 | FastSpeech2 = 0, 64 | Tacotron2, 65 | VITS, 66 | VITSTM, 67 | Tacotron2Torch 68 | }; 69 | 70 | } 71 | 72 | namespace EVocoderModel{ 73 | enum Enum{ 74 | MultiBandMelGAN = 0, 75 | MelGANSTFT, // there is no architectural changes so we can use mb-melgan class for melgan-stft 76 | NullVocoder, // For fully E2E models 77 | iSTFTNet 78 | }; 79 | } 80 | 81 | // ===========DEPRECATED=============== 82 | // Negative numbers denote character-based language, positive for phoneme based. Standard is char-equivalent language idx = negative(phn-based) 83 | // In case of English, since -0 doesn't exist, we use -1. 84 | // For example, German phonetic would be 3, and character based would be -3 85 | // IPA-phn-based are mainly for Coqui 86 | // ===========DEPRECATED=============== 87 | namespace ETTSLanguage{ 88 | enum Enum{ 89 | GermanChar = -3, 90 | SpanishChar, 91 | EnglishChar, 92 | EnglishPhn, 93 | SpanishPhn, 94 | GermanPhn, 95 | EnglishIPA, 96 | }; 97 | 98 | } 99 | 100 | /* Language Spec Standard V1: 101 | - Language is specified with a string from the JSON and the type is saved instead of relying 102 | on ETTSLanguage enum. 103 | -- The string is LanguageName-Method; for example English-StressedIPA, English-ARPA, German-Char 104 | - Both pre-V1 standard and current are supported 105 | - V1 Standard does not require changes in code to add new languages 106 | -- For eSpeak phonemizers, an additional entry is added with the language name: English-StressedIPA-English (America) 107 | 108 | 109 | 110 | */ 111 | 112 | namespace ETTSLanguageType{ 113 | enum Enum{ 114 | ARPA = 0, 115 | Char, 116 | IPA, 117 | GlobalPhone 118 | }; 119 | } 120 | 121 | 122 | struct ArchitectureInfo{ 123 | int Repo; 124 | int Text2Mel; 125 | int Vocoder; 126 | 127 | // String versions of the info, for displaying. 128 | // We want boilerplate int index to str conversion code to be low. 129 | std::string s_Repo; 130 | std::string s_Text2Mel; 131 | std::string s_Vocoder; 132 | 133 | }; 134 | struct VoiceInfo{ 135 | std::string Name; 136 | std::string Author; 137 | int32_t Version; 138 | std::string Description; 139 | ArchitectureInfo Architecture; 140 | std::string Note; 141 | 142 | uint32_t SampleRate; 143 | 144 | std::string s_Language; // Language name = English-ARPA -> "English" 145 | std::string s_Language_Fullname; // Full language name = "English-ARPA" 146 | std::string s_eSpeakLang; // eSpeak voice name: "English (America)" 147 | 148 | std::string EndPadding; 149 | int32_t LangType; 150 | 151 | 152 | 153 | }; 154 | 155 | namespace VoxUtil { 156 | 157 | 158 | std::string U32ToStr(const std::u32string& InU32); 159 | std::u32string StrToU32(const std::string& InStr); 160 | 161 | std::vector GetLinedFile(const std::string& Path); 162 | 163 | VoiceInfo ReadModelJSON(const std::string& InfoFilename); 164 | 165 | 166 | 167 | // Copy PyTorch tensor 168 | 169 | template 170 | TFTensor CopyTensor(at::Tensor& InTens){ 171 | D* Data = InTens.data(); 172 | std::vector Shape = InTens.sizes().vec(); 173 | 174 | size_t TotalSize = 1; 175 | 176 | for (const int64_t& Dim : Shape) 177 | TotalSize *= Dim; 178 | 179 | std::vector DataVec = std::vector(Data,Data + TotalSize); 180 | 181 | return TFTensor{DataVec,Shape,TotalSize}; 182 | 183 | 184 | } 185 | 186 | 187 | // Copy CppFlow (TF) tensor 188 | template 189 | TFTensor CopyTensor(cppflow::tensor& InTens) 190 | { 191 | std::vector Data = InTens.get_data(); 192 | std::vector Shape = InTens.shape().get_data(); 193 | size_t TotalSize = 1; 194 | for (const int64_t& Dim : Shape) 195 | TotalSize *= Dim; 196 | 197 | return TFTensor{Data, Shape, TotalSize}; 198 | 199 | 200 | } 201 | 202 | template 203 | bool FindInVec(VXVec1 In, const std::vector& Vec, size_t& OutIdx, size_t start = 0) { 204 | for (size_t xx = start;xx < Vec.size();xx++) 205 | { 206 | if (Vec[xx] == In) { 207 | OutIdx = xx; 208 | return true; 209 | 210 | } 211 | 212 | } 213 | 214 | 215 | return false; 216 | 217 | } 218 | template 219 | bool FindInVec2(VXVec1 In, const std::vector& Vec, size_t& OutIdx, size_t start = 0) { 220 | for (size_t xx = start;xx < Vec.size();xx++) 221 | { 222 | if (Vec[xx] == In) { 223 | OutIdx = xx; 224 | return true; 225 | 226 | } 227 | 228 | } 229 | 230 | 231 | return false; 232 | 233 | } 234 | 235 | void ExportWAV(const std::string& Filename, const std::vector& Data, unsigned SampleRate); 236 | } 237 | -------------------------------------------------------------------------------- /attention.cpp: -------------------------------------------------------------------------------- 1 | #include "attention.h" 2 | 3 | 4 | Attention::Attention(QWidget *parent) : QCustomPlot(parent) 5 | { 6 | 7 | QBrush FillBrush(QColor(100,100,100)); 8 | this->setBackground(FillBrush); 9 | QColor White(255,255,255); 10 | QPen AxisPen(QColor(150,150,150)); 11 | xAxis->setTickLabelColor(White); 12 | yAxis->setTickLabelColor(White); 13 | 14 | xAxis->setBasePen(AxisPen); 15 | yAxis->setBasePen(AxisPen); 16 | 17 | xAxis->setLabel("Decoder timestep"); 18 | yAxis->setLabel("Encoder timestep"); 19 | 20 | xAxis->setLabelColor(White); 21 | yAxis->setLabelColor(White); 22 | QFont Fnt = QFont(font().family(), 10); 23 | 24 | xAxis->setLabelFont(QFont(font().family(), 9)); 25 | yAxis->setLabelFont(QFont(font().family(), 9)); 26 | 27 | yAxis->setTickPen(AxisPen); 28 | xAxis->setTickPen(AxisPen); 29 | 30 | yAxis->setSubTickPen(AxisPen); 31 | xAxis->setSubTickPen(AxisPen); 32 | 33 | 34 | 35 | } 36 | 37 | void Attention::DoPlot(const TFTensor &Alignment) 38 | { 39 | const auto& Shp = Alignment.Shape; 40 | 41 | 42 | 43 | 44 | Map->data()->setSize((int32_t)Shp[2],(int32_t)Shp[1]); 45 | 46 | Map->data()->setRange(QCPRange(0.0,(double)Shp[2]),QCPRange(0.0,(double)Shp[1])); 47 | for (int64_t x = 0; x < Shp[2];x++) 48 | { 49 | for (int64_t y = 0;y < Shp[1];y++) 50 | { 51 | size_t i = x + Shp[2]*y; 52 | Map->data()->setCell(x,y,(double)Alignment.Data[i]); 53 | 54 | } 55 | 56 | 57 | } 58 | Map->setDataRange(QCPRange(0.0,1.0)); 59 | xAxis->setRange(QCPRange(0.0,(double)Shp[2])); 60 | 61 | yAxis->setRange(QCPRange(0.0,(double)Shp[1])); 62 | 63 | rescaleAxes(); 64 | 65 | replot(); 66 | 67 | 68 | } 69 | 70 | -------------------------------------------------------------------------------- /attention.h: -------------------------------------------------------------------------------- 1 | #ifndef ATTENTION_H 2 | #define ATTENTION_H 3 | 4 | #include "ext/qcustomplot.h" 5 | #include "VoxCommon.hpp" 6 | 7 | class Attention : public QCustomPlot 8 | { 9 | public: 10 | Attention(QWidget *parent = nullptr); 11 | 12 | void DoPlot(const TFTensor& Alignment); 13 | 14 | QCPColorMap* Map; 15 | 16 | }; 17 | 18 | #endif // ATTENTION_H 19 | -------------------------------------------------------------------------------- /batchdenoisedlg.cpp: -------------------------------------------------------------------------------- 1 | #include "batchdenoisedlg.h" 2 | #include "ui_batchdenoisedlg.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include "mainwindow.h" 8 | 9 | #define ManWi ((MainWindow*)pMainWindow) 10 | 11 | BatchDenoiseDlg::BatchDenoiseDlg(QWidget *parent) : 12 | QDialog(parent), 13 | ui(new Ui::BatchDenoiseDlg) 14 | { 15 | ui->setupUi(this); 16 | ProcessedFiles = 0; 17 | CurrentIndex = 0; 18 | Failures = 0; 19 | 20 | } 21 | 22 | 23 | // can't define in header because InferDetails belongs to mainwindow.h and including it in this dlg's .h would case circular dependency error 24 | InferDetails MakeInferDetails(const std::vector& InAudat,const QString& FilePath,unsigned InSampleRate,int32_t OutSampleRate) 25 | { 26 | InferDetails Dets; 27 | Dets.F0 = 0.0f; 28 | Dets.Speed = 0.0f; 29 | Dets.Energy = 0.0f; 30 | Dets.pItem = nullptr; // the mainwindow's function will make an item for us. 31 | Dets.Prompt = ""; 32 | Dets.SpeakerID = OutSampleRate; // SpeakerID will double as resample when a denoise only job is requested. 33 | Dets.EmotionID = -1; 34 | Dets.Denoise = true; 35 | Dets.Amplification = 1.f; 36 | Dets.ExportFileName = FilePath; 37 | 38 | 39 | Dets.VoiceName = ""; 40 | Dets.ForcedAudio = InAudat; 41 | Dets.SampleRate = InSampleRate; 42 | 43 | return Dets; 44 | 45 | } 46 | 47 | 48 | BatchDenoiseDlg::~BatchDenoiseDlg() 49 | { 50 | delete ui; 51 | } 52 | 53 | void BatchDenoiseDlg::IterateDo() 54 | { 55 | 56 | if (ProcessedFiles == Files.size() && ManWi->GetCountItems() == 0) 57 | { 58 | // It's done! 59 | delete timIter; 60 | SetControls(true); 61 | 62 | return; 63 | 64 | } 65 | 66 | if (ManWi->GetCountItems() != 0) 67 | return; 68 | 69 | 70 | ManWi->DenDone = 0; 71 | if (CurrentIndex + ui->spbBatchSz->value() > Files.size()) 72 | ManWi->DenBatchSize = Files.size() - CurrentIndex; 73 | 74 | for (int32_t i = 0;i < ui->spbBatchSz->value();i++) 75 | { 76 | 77 | 78 | 79 | QString CurrentFn = Files[CurrentIndex]; 80 | 81 | AudioFile AudFile; 82 | InferDetails CurrentDets; 83 | try { 84 | AudFile.load(CurrentFn.toStdString()); 85 | 86 | CurrentDets = MakeInferDetails(AudFile.samples[0],CurrentFn,AudFile.getSampleRate(),ui->spbOutSR->value()); 87 | 88 | } catch (...) { 89 | 90 | CurrentIndex += 1; // NOT i !!!!!!! 91 | ProcessedFiles += 1; 92 | ++Failures; 93 | 94 | if (CurrentIndex > Files.size() - 1) 95 | break; 96 | 97 | continue; 98 | } 99 | 100 | ManWi->PushToInfers(CurrentDets); 101 | 102 | 103 | CurrentIndex += 1; // NOT i !!!!!!! 104 | ProcessedFiles += 1; 105 | 106 | if (CurrentIndex > Files.size() - 1) 107 | break; 108 | 109 | 110 | } 111 | SetLabel(); 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | } 121 | 122 | void BatchDenoiseDlg::on_btnFindFolder_clicked() 123 | { 124 | 125 | QString Dir = QFileDialog::getExistingDirectory(this, tr("Find base folder of your WAVs"), 126 | "", 127 | QFileDialog::ShowDirsOnly 128 | | QFileDialog::DontResolveSymlinks); 129 | 130 | ui->edtFolPath->setText(Dir); 131 | 132 | UpdateDirectory(); 133 | 134 | } 135 | 136 | void BatchDenoiseDlg::on_edtFolPath_editingFinished() 137 | { 138 | UpdateDirectory(); 139 | 140 | } 141 | 142 | void BatchDenoiseDlg::SetLabel() 143 | { 144 | ui->lblFiles->setText(QString(QString::number(ProcessedFiles) + " / " + QString::number(Files.size()) + " files, " + QString::number(Failures) + " failures.") ); 145 | 146 | ui->pgFiles->setValue(ProcessedFiles); 147 | ui->pgFiles->update(); 148 | } 149 | 150 | void BatchDenoiseDlg::UpdateDirectory() 151 | { 152 | if (ui->edtFolPath->text().isEmpty()) 153 | return; 154 | 155 | if (Files.size()) 156 | Files.clear(); 157 | 158 | QDirIterator DirIt(ui->edtFolPath->text(),QDirIterator::Subdirectories); 159 | while (DirIt.hasNext()) 160 | { 161 | DirIt.next(); 162 | if (QFileInfo(DirIt.filePath()).isFile() && QFileInfo(DirIt.filePath()).suffix() == "wav") 163 | Files.push_back(DirIt.filePath()); 164 | } 165 | CurrentIndex = 0; 166 | ProcessedFiles = 0; 167 | Failures = 0; 168 | 169 | ui->pgFiles->setRange(0,Files.size()); 170 | 171 | 172 | SetLabel(); 173 | 174 | 175 | } 176 | 177 | void BatchDenoiseDlg::on_btnStart_clicked() 178 | { 179 | 180 | 181 | 182 | CurrentIndex = 0; 183 | ProcessedFiles = 0; 184 | Failures = 0; 185 | ManWi->DenBatchSize = ui->spbBatchSz->value(); 186 | 187 | timIter = new QTimer(this); 188 | timIter->setSingleShot(false); 189 | timIter->setInterval(1000); 190 | 191 | connect(timIter,&QTimer::timeout,this,&BatchDenoiseDlg::IterateDo); 192 | 193 | timIter->start(); 194 | 195 | SetControls(false); 196 | } 197 | 198 | void BatchDenoiseDlg::SetControls(bool En) 199 | { 200 | ui->edtFolPath->setEnabled(En); 201 | ui->spbBatchSz->setEnabled(En); 202 | ui->btnStart->setEnabled(En); 203 | ui->btnFindFolder->setEnabled(En); 204 | 205 | } 206 | -------------------------------------------------------------------------------- /batchdenoisedlg.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCHDENOISEDLG_H 2 | #define BATCHDENOISEDLG_H 3 | 4 | #include 5 | #include 6 | namespace Ui { 7 | class BatchDenoiseDlg; 8 | } 9 | 10 | class BatchDenoiseDlg : public QDialog 11 | { 12 | Q_OBJECT 13 | 14 | public: 15 | explicit BatchDenoiseDlg(QWidget *parent = nullptr); 16 | ~BatchDenoiseDlg(); 17 | 18 | 19 | 20 | // if we included mainwindow.h in here it would result in circular dependency problem so we include it in the .cpp 21 | // and make it a void* here 22 | void* pMainWindow; 23 | 24 | private slots: 25 | 26 | 27 | void IterateDo(); 28 | void on_btnFindFolder_clicked(); 29 | 30 | void on_edtFolPath_editingFinished(); 31 | 32 | void on_btnStart_clicked(); 33 | 34 | private: 35 | 36 | void SetControls(bool En); 37 | 38 | QStringList Files; 39 | QTimer* timIter; 40 | int32_t ProcessedFiles; 41 | int32_t CurrentIndex; 42 | int32_t Failures; 43 | 44 | 45 | 46 | void SetLabel(); 47 | void UpdateDirectory(); 48 | Ui::BatchDenoiseDlg *ui; 49 | }; 50 | 51 | #endif // BATCHDENOISEDLG_H 52 | -------------------------------------------------------------------------------- /batchdenoisedlg.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | BatchDenoiseDlg 4 | 5 | 6 | 7 | 0 8 | 0 9 | 510 10 | 299 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | Folder Path 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | Browse 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | Batch size: 44 | 45 | 46 | 47 | 48 | 49 | 50 | 16384 51 | 52 | 53 | 32 54 | 55 | 56 | 4096 57 | 58 | 59 | 60 | 61 | 62 | 63 | Qt::Horizontal 64 | 65 | 66 | 67 | 40 68 | 20 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | Output sampling rate (Hz): 77 | 78 | 79 | 80 | 81 | 82 | 83 | 96000 84 | 85 | 86 | 8000 87 | 88 | 89 | 48000 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | <html><head/><body><p>Note: Will find all WAVs, recursive (folders and subfolders), and REPLACE FILES. If you don't want it to do that, make a copy first. Treats all files as mono.</p><p>Note 2: Files are resampled on input and output accordingly</p></body></html> 99 | 100 | 101 | true 102 | 103 | 104 | 105 | 106 | 107 | 108 | Files: 0 / 0 109 | 110 | 111 | 112 | 113 | 114 | 115 | 0 116 | 117 | 118 | 119 | 120 | 121 | 122 | Start 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /espeakphonemizer.cpp: -------------------------------------------------------------------------------- 1 | #include "espeakphonemizer.h" 2 | #include 3 | 4 | 5 | static const std::u32string Punctuation_t = U",.;¡!¿?:-~"; 6 | static const std::u32string Punctuation_ns = U"¿-~"; 7 | 8 | using namespace ESP; 9 | 10 | std::string ESpeakPhonemizer::ToPhon(const std::string &InTxt) 11 | { 12 | const char* TextPtr = InTxt.c_str(); 13 | const void** OurPtr = (const void**)&TextPtr; 14 | const char* Phon = espeak_TextToPhonemes(OurPtr, espeakCHARS_AUTO, (int)PhonemePars.to_ulong()); 15 | 16 | 17 | return std::string(Phon); 18 | } 19 | 20 | 21 | void ESpeakPhonemizer::Initialize(const std::string &DataPath, const std::string &VoiceName) 22 | { 23 | // these are irrelevant because we don't play any audio, we just use the phonemizer 24 | espeak_AUDIO_OUTPUT output = AUDIO_OUTPUT_SYNCH_PLAYBACK; 25 | int buflength = 500, options = 0; 26 | 27 | 28 | auto Err1 = espeak_Initialize(output, buflength, DataPath.c_str(), options); 29 | auto Err = espeak_SetVoiceByName(VoiceName.c_str()); 30 | EVoiceName = VoiceName; 31 | 32 | 33 | PhonemePars[1] = 1; // set IPA 34 | 35 | 36 | } 37 | 38 | std::string ESpeakPhonemizer::Phonemize(const std::string &Input) 39 | { 40 | std::u32string In = VoxUtil::StrToU32(Input); 41 | 42 | // ESpeak's phonemize function stops at punctuation, so we split it up into chunks, phonemize, then put them back together 43 | PunctSplitVec SplitVec = IterativePunctuationSplit(In, Punctuation_t); 44 | 45 | std::string Assembled = ""; 46 | bool Space = false; 47 | for (const auto& Spli : SplitVec) 48 | { 49 | 50 | 51 | std::string Pibber = VoxUtil::U32ToStr(Spli.second); 52 | if (!Spli.first) 53 | { 54 | Pibber = ToPhon(Pibber); 55 | if (Space) 56 | Assembled += " "; 57 | 58 | 59 | }else 60 | { 61 | Space = true; 62 | for (const auto& PCh : Punctuation_ns){ 63 | if (Spli.second.find(PCh) != std::u32string::npos) 64 | Space = false; 65 | 66 | } 67 | 68 | 69 | 70 | 71 | 72 | } 73 | Assembled += Pibber; 74 | 75 | 76 | } 77 | 78 | return Assembled; 79 | 80 | } 81 | 82 | ESpeakPhonemizer::ESpeakPhonemizer() 83 | { 84 | 85 | } 86 | 87 | ESP::PunctSplitVec ESP::IterativePunctuationSplit(const std::u32string &Input, const std::u32string &Punct) 88 | { 89 | PunctSplitVec Ret; 90 | 91 | std::u32string CuStr = U""; 92 | for (const auto& Ch : Input) { 93 | 94 | if (Punct.find(Ch) != std::u32string::npos) { 95 | if (CuStr.size()) 96 | Ret.push_back({ false,CuStr }); 97 | 98 | std::u32string PunctOnly(1,Ch); 99 | Ret.push_back({ true, PunctOnly }); 100 | CuStr = U""; 101 | 102 | } 103 | else { 104 | CuStr += Ch; 105 | } 106 | 107 | 108 | } 109 | Ret.push_back({ false,CuStr }); 110 | return Ret; 111 | 112 | } 113 | 114 | -------------------------------------------------------------------------------- /espeakphonemizer.h: -------------------------------------------------------------------------------- 1 | #ifndef ESPEAKPHONEMIZER_H 2 | #define ESPEAKPHONEMIZER_H 3 | 4 | /* 5 | 6 | ESpeakPhonemizer: Tool for IPA Text2Phon using ESpeak NG as backend. 7 | 8 | */ 9 | #include 10 | #include 11 | #include 12 | #include "VoxCommon.hpp" 13 | #include 14 | 15 | namespace ESP{ 16 | typedef std::pair PunctSplit; 17 | typedef std::vector PunctSplitVec; 18 | 19 | 20 | // Returns vector> 21 | PunctSplitVec IterativePunctuationSplit(const std::u32string& Input, const std::u32string& Punct); 22 | 23 | } 24 | 25 | class ESpeakPhonemizer 26 | { 27 | private: 28 | std::bitset PhonemePars; 29 | std::string ToPhon(const std::string& InTxt); 30 | 31 | std::string EVoiceName; 32 | public: 33 | 34 | // DataPath: Path to ESpeak NG data dir 35 | // VoiceName: Name of voice to use for phonemizing (like "Spanish (Latin America)") 36 | void Initialize(const std::string& DataPath,const std::string& VoiceName); 37 | 38 | 39 | // Phonemize text using ESpeak phonemizer 40 | // Unlike regular phonemizer, feed complete texts at once instead of just words. 41 | std::string Phonemize(const std::string& Input); 42 | 43 | ESpeakPhonemizer(); 44 | const std::string& GetVoiceName() const {return EVoiceName;}; 45 | }; 46 | 47 | #endif // ESPEAKPHONEMIZER_H 48 | -------------------------------------------------------------------------------- /ext/CppFlow/context.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by serizba on 27/6/20. 3 | // 4 | 5 | #ifndef CPPFLOW2_CONTEXT_H 6 | #define CPPFLOW2_CONTEXT_H 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | namespace cppflow { 16 | 17 | inline bool status_check(TF_Status* status) { 18 | if (TF_GetCode(status) != TF_OK) { 19 | throw std::runtime_error(TF_Message(status)); 20 | } 21 | return true; 22 | } 23 | 24 | class context { 25 | public: 26 | static TFE_Context* get_context(); 27 | static TF_Status* get_status(); 28 | 29 | private: 30 | TFE_Context* tfe_context{nullptr}; 31 | 32 | public: 33 | explicit context(TFE_ContextOptions* opts = nullptr); 34 | 35 | context(context const&) = delete; 36 | context& operator=(context const&) = delete; 37 | context(context&&) noexcept; 38 | context& operator=(context&&) noexcept; 39 | 40 | ~context(); 41 | }; 42 | 43 | // TODO: create ContextManager class if needed 44 | // Set new context, thread unsafe, must be called at the beginning. 45 | // TFE_ContextOptions* tfe_opts = ... 46 | // cppflow::get_global_context() = cppflow::context(tfe_opts); 47 | inline context& get_global_context() { 48 | static context global_context; 49 | return global_context; 50 | } 51 | 52 | } 53 | 54 | namespace cppflow { 55 | 56 | inline TFE_Context* context::get_context() { 57 | return get_global_context().tfe_context; 58 | } 59 | 60 | inline TF_Status* context::get_status() { 61 | thread_local std::unique_ptr local_tf_status(TF_NewStatus(), &TF_DeleteStatus); 62 | return local_tf_status.get(); 63 | } 64 | 65 | inline context::context(TFE_ContextOptions* opts) { 66 | auto tf_status = context::get_status(); 67 | if(opts == nullptr) { 68 | std::unique_ptr new_opts(TFE_NewContextOptions(), &TFE_DeleteContextOptions); 69 | this->tfe_context = TFE_NewContext(new_opts.get(), tf_status); 70 | } else { 71 | this->tfe_context = TFE_NewContext(opts, tf_status); 72 | } 73 | status_check(tf_status); 74 | } 75 | 76 | inline context::context(context&& ctx) noexcept : 77 | tfe_context(std::exchange(ctx.tfe_context, nullptr)) 78 | { 79 | } 80 | 81 | inline context& context::operator=(context&& ctx) noexcept { 82 | tfe_context = std::exchange(ctx.tfe_context, tfe_context); 83 | return *this; 84 | } 85 | 86 | inline context::~context() { 87 | TFE_DeleteContext(this->tfe_context); 88 | } 89 | 90 | } 91 | 92 | #endif //CPPFLOW2_CONTEXT_H 93 | -------------------------------------------------------------------------------- /ext/CppFlow/cppflow.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by serizba on 17/9/20. 3 | // 4 | 5 | #ifndef EXAMPLE_CPPFLOW_H 6 | #define EXAMPLE_CPPFLOW_H 7 | 8 | #include "tensor.h" 9 | #include "model.h" 10 | #include "raw_ops.h" 11 | #include "ops.h" 12 | #include "datatype.h" 13 | 14 | #include 15 | 16 | namespace cppflow { 17 | 18 | /** 19 | * Version of TensorFlow and CppFlow 20 | * @return A string containing the version of TensorFow and CppFlow 21 | */ 22 | std::string version(); 23 | 24 | } 25 | 26 | /****************************** 27 | * IMPLEMENTATION DETAILS * 28 | ******************************/ 29 | 30 | namespace cppflow { 31 | inline std::string version() { 32 | return "TensorFlow: " + std::string(TF_Version()) + " CppFlow: 2.0.0"; 33 | } 34 | } 35 | 36 | #endif //EXAMPLE_CPPFLOW_H 37 | -------------------------------------------------------------------------------- /ext/CppFlow/datatype.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by serizba on 12/7/20. 3 | // 4 | 5 | #ifndef CPPFLOW2_DATATYPE_H 6 | #define CPPFLOW2_DATATYPE_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace cppflow { 15 | 16 | using datatype = TF_DataType; 17 | 18 | /** 19 | * @return A string representing dt 20 | * 21 | */ 22 | inline std::string to_string(datatype dt) { 23 | switch (dt) { 24 | case TF_FLOAT: 25 | return "TF_FLOAT"; 26 | case TF_DOUBLE: 27 | return "TF_DOUBLE"; 28 | case TF_INT32: 29 | return "TF_INT32"; 30 | case TF_UINT8: 31 | return "TF_UINT8"; 32 | case TF_INT16: 33 | return "TF_INT16"; 34 | case TF_INT8: 35 | return "TF_INT8"; 36 | case TF_STRING: 37 | return "TF_STRING"; 38 | case TF_COMPLEX64: 39 | return "TF_COMPLEX64"; 40 | case TF_INT64: 41 | return "TF_INT64"; 42 | case TF_BOOL: 43 | return "TF_BOOL"; 44 | case TF_QINT8: 45 | return "TF_QINT8"; 46 | case TF_QUINT8: 47 | return "TF_QUINT8"; 48 | case TF_QINT32: 49 | return "TF_QINT32"; 50 | case TF_BFLOAT16: 51 | return "TF_BFLOAT16"; 52 | case TF_QINT16: 53 | return "TF_QINT16"; 54 | case TF_QUINT16: 55 | return "TF_QUINT16"; 56 | case TF_UINT16: 57 | return "TF_UINT16"; 58 | case TF_COMPLEX128: 59 | return "TF_COMPLEX128"; 60 | case TF_HALF: 61 | return "TF_HALF"; 62 | case TF_RESOURCE: 63 | return "TF_RESOURCE"; 64 | case TF_VARIANT: 65 | return "TF_VARIANT"; 66 | case TF_UINT32: 67 | return "TF_UINT32"; 68 | case TF_UINT64: 69 | return "TF_UINT64"; 70 | default: 71 | return "DATATYPE_NOT_KNOWN"; 72 | } 73 | } 74 | 75 | /** 76 | * 77 | * @tparam T 78 | * @return The TensorFlow type of T 79 | */ 80 | template 81 | TF_DataType deduce_tf_type() { 82 | if (std::is_same::value) 83 | return TF_FLOAT; 84 | if (std::is_same::value) 85 | return TF_DOUBLE; 86 | if (std::is_same::value) 87 | return TF_INT32; 88 | if (std::is_same::value) 89 | return TF_UINT8; 90 | if (std::is_same::value) 91 | return TF_INT16; 92 | if (std::is_same::value) 93 | return TF_INT8; 94 | if (std::is_same::value) 95 | return TF_INT64; 96 | if (std::is_same::value) 97 | return TF_BOOL; 98 | if (std::is_same::value) 99 | return TF_UINT16; 100 | if (std::is_same::value) 101 | return TF_UINT32; 102 | if (std::is_same::value) 103 | return TF_UINT64; 104 | 105 | // decode with `c++filt --type $output` for gcc 106 | throw std::runtime_error{"Could not deduce type! type_name: " + std::string(typeid(T).name())}; 107 | } 108 | 109 | /** 110 | * @return The stream os after inserting the string representation of dt 111 | * 112 | */ 113 | inline std::ostream& operator<<(std::ostream& os, datatype dt) { 114 | os << to_string(dt); 115 | return os; 116 | } 117 | 118 | } 119 | #endif //CPPFLOW2_DATATYPE_H 120 | -------------------------------------------------------------------------------- /ext/CppFlow/defer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cppflow { 5 | 6 | class defer { 7 | public: 8 | typedef std::function Func; 9 | 10 | explicit defer(const Func& func) : _func(func) {} 11 | ~defer() { 12 | _func(); 13 | } 14 | 15 | defer(const defer&) = delete; 16 | defer(defer&&) = delete; 17 | defer& operator=(const defer&) = delete; 18 | void* operator new (size_t) = delete; 19 | void operator delete (void*) = delete; 20 | 21 | private: 22 | Func _func; 23 | }; 24 | 25 | } // namespace cppflow 26 | -------------------------------------------------------------------------------- /ext/CppFlow/model.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by serizba on 29/6/20. 3 | // 4 | 5 | #ifndef CPPFLOW2_MODEL_H 6 | #define CPPFLOW2_MODEL_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "context.h" 15 | #include "defer.h" 16 | #include "tensor.h" 17 | 18 | namespace cppflow { 19 | 20 | class model { 21 | public: 22 | explicit model(const std::string& filename); 23 | 24 | std::vector get_operations() const; 25 | std::vector get_operation_shape(const std::string& operation) const; 26 | 27 | std::vector operator()(std::vector> inputs, std::vector outputs); 28 | tensor operator()(const tensor& input); 29 | 30 | ~model() = default; 31 | model(const model &model) = default; 32 | model(model &&model) = default; 33 | model &operator=(const model &other) = default; 34 | model &operator=(model &&other) = default; 35 | 36 | private: 37 | 38 | std::shared_ptr graph; 39 | std::shared_ptr session; 40 | }; 41 | } 42 | 43 | 44 | namespace cppflow { 45 | 46 | inline model::model(const std::string &filename) { 47 | this->graph = {TF_NewGraph(), TF_DeleteGraph}; 48 | 49 | // Create the session. 50 | std::unique_ptr session_options = {TF_NewSessionOptions(), TF_DeleteSessionOptions}; 51 | std::unique_ptr run_options = {TF_NewBufferFromString("", 0), TF_DeleteBuffer}; 52 | std::unique_ptr meta_graph = {TF_NewBuffer(), TF_DeleteBuffer}; 53 | 54 | auto session_deleter = [](TF_Session* sess) { 55 | TF_DeleteSession(sess, context::get_status()); 56 | status_check(context::get_status()); 57 | }; 58 | 59 | int tag_len = 1; 60 | const char* tag = "serve"; 61 | this->session = {TF_LoadSessionFromSavedModel(session_options.get(), run_options.get(), filename.c_str(), 62 | &tag, tag_len, this->graph.get(), meta_graph.get(), context::get_status()), 63 | session_deleter}; 64 | 65 | status_check(context::get_status()); 66 | } 67 | 68 | inline std::vector model::get_operations() const { 69 | std::vector result; 70 | size_t pos = 0; 71 | TF_Operation* oper; 72 | 73 | // Iterate through the operations of a graph 74 | while ((oper = TF_GraphNextOperation(this->graph.get(), &pos)) != nullptr) { 75 | result.emplace_back(TF_OperationName(oper)); 76 | } 77 | return result; 78 | } 79 | 80 | inline std::vector model::get_operation_shape(const std::string& operation) const { 81 | // Get operation by the name 82 | TF_Output out_op; 83 | out_op.oper = TF_GraphOperationByName(this->graph.get(), operation.c_str()); 84 | out_op.index = 0; 85 | 86 | std::vector shape; 87 | 88 | // Operation does not exist 89 | if (!out_op.oper) 90 | throw std::runtime_error("No operation named \"" + operation + "\" exists"); 91 | 92 | // DIMENSIONS 93 | 94 | // Get number of dimensions 95 | int n_dims = TF_GraphGetTensorNumDims(this->graph.get(), out_op, context::get_status()); 96 | 97 | // If is not a scalar 98 | if (n_dims > 0) { 99 | // Get dimensions 100 | auto* dims = new int64_t[n_dims]; 101 | TF_GraphGetTensorShape(this->graph.get(), out_op, dims, n_dims, context::get_status()); 102 | 103 | // Check error on Model Status 104 | status_check(context::get_status()); 105 | 106 | shape = std::vector(dims, dims + n_dims); 107 | 108 | delete[] dims; 109 | } 110 | 111 | return shape; 112 | } 113 | 114 | inline std::tuple parse_name(const std::string& name) { 115 | auto idx = name.find(':'); 116 | return (idx == -1 ? std::make_tuple(name, 0) : std::make_tuple(name.substr(0, idx), std::stoi(name.substr(idx + 1)))); 117 | } 118 | 119 | inline std::vector model::operator()(std::vector> inputs, std::vector outputs) { 120 | 121 | std::vector inp_ops(inputs.size()); 122 | std::vector inp_val(inputs.size(), nullptr); 123 | 124 | for (int i=0; i(inputs[i])); 128 | inp_ops[i].oper = TF_GraphOperationByName(this->graph.get(), op_name.c_str()); 129 | inp_ops[i].index = op_idx; 130 | 131 | if (!inp_ops[i].oper) 132 | throw std::runtime_error("No operation named \"" + op_name + "\" exists"); 133 | 134 | // Values 135 | inp_val[i] = std::get<1>(inputs[i]).get_tensor().get(); 136 | } 137 | 138 | std::vector out_ops(outputs.size()); 139 | auto out_val = std::make_unique(outputs.size()); 140 | for (int i=0; igraph.get(), op_name.c_str()); 144 | out_ops[i].index = op_idx; 145 | 146 | if (!out_ops[i].oper) 147 | throw std::runtime_error("No operation named \"" + op_name + "\" exists"); 148 | 149 | } 150 | 151 | TF_SessionRun(this->session.get(), NULL, 152 | inp_ops.data(), inp_val.data(), inputs.size(), 153 | out_ops.data(), out_val.get(), outputs.size(), 154 | NULL, 0,NULL , context::get_status()); 155 | status_check(context::get_status()); 156 | 157 | std::vector result; 158 | result.reserve(outputs.size()); 159 | for (int i=0; i(TF_TensorData(res_tensor_h.get())); 88 | auto *t_str = (TF_TString *)(TF_TensorData(res_tensor_h.get())); 89 | auto result = std::string(TF_TString_GetDataPointer(t_str), TF_TString_GetSize(t_str)); 90 | #else 91 | const char* dst[1] = {nullptr}; 92 | size_t dst_len[1] = {3}; 93 | TF_StringDecode(static_cast(TF_TensorData(res_tensor_h.get())) + 8, TF_TensorByteSize(res_tensor_h.get()), dst, dst_len, context::get_status()); 94 | status_check(context::get_status()); 95 | auto result = std::string(dst[0], *dst_len); 96 | #endif // TENSORFLOW_C_TF_TSTRING_H_ 97 | 98 | return result; 99 | } 100 | 101 | } 102 | 103 | #endif //CPPFLOW2_OPS_H 104 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/.gitignore: -------------------------------------------------------------------------------- 1 | # C++ objects and libs 2 | 3 | *.slo 4 | *.lo 5 | *.o 6 | *.a 7 | *.la 8 | *.lai 9 | *.so 10 | *.dll 11 | *.dylib 12 | 13 | # Qt-es 14 | 15 | /.qmake.cache 16 | /.qmake.stash 17 | *.pro.user 18 | *.pro.user.* 19 | *.qbs.user 20 | *.qbs.user.* 21 | *.moc 22 | moc_*.cpp 23 | moc_*.h 24 | qrc_*.cpp 25 | ui_*.h 26 | Makefile* 27 | *build-* 28 | 29 | # QtCreator 30 | 31 | *.autosave 32 | 33 | # QtCtreator Qml 34 | *.qmlproject.user 35 | *.qmlproject.user.* 36 | 37 | # QtCtreator CMake 38 | CMakeLists.txt.user* 39 | 40 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ############################################################################### 3 | # # 4 | # The MIT License # 5 | # # 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com) # 7 | # >> https://github.com/Jorgen-VikingGod # 8 | # # 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle # 10 | # # 11 | ############################################################################### 12 | */ 13 | 14 | #include "DarkStyle.h" 15 | 16 | DarkStyle::DarkStyle(): 17 | DarkStyle(styleBase()) 18 | { } 19 | 20 | DarkStyle::DarkStyle(QStyle *style): 21 | QProxyStyle(style) 22 | { } 23 | 24 | QStyle *DarkStyle::styleBase(QStyle *style) const { 25 | static QStyle *base = !style ? QStyleFactory::create(QStringLiteral("Fusion")) : style; 26 | return base; 27 | } 28 | 29 | QStyle *DarkStyle::baseStyle() const 30 | { 31 | return styleBase(); 32 | } 33 | 34 | void DarkStyle::polish(QPalette &palette) 35 | { 36 | // modify palette to dark 37 | palette.setColor(QPalette::Window,QColor(53,53,53)); 38 | palette.setColor(QPalette::WindowText,Qt::white); 39 | palette.setColor(QPalette::Disabled,QPalette::WindowText,QColor(127,127,127)); 40 | palette.setColor(QPalette::Base,QColor(42,42,42)); 41 | palette.setColor(QPalette::AlternateBase,QColor(66,66,66)); 42 | palette.setColor(QPalette::ToolTipBase,Qt::white); 43 | palette.setColor(QPalette::ToolTipText,QColor(53,53,53)); 44 | palette.setColor(QPalette::Text,Qt::white); 45 | palette.setColor(QPalette::Disabled,QPalette::Text,QColor(127,127,127)); 46 | palette.setColor(QPalette::Dark,QColor(35,35,35)); 47 | palette.setColor(QPalette::Shadow,QColor(20,20,20)); 48 | palette.setColor(QPalette::Button,QColor(53,53,53)); 49 | palette.setColor(QPalette::ButtonText,Qt::white); 50 | palette.setColor(QPalette::Disabled,QPalette::ButtonText,QColor(127,127,127)); 51 | palette.setColor(QPalette::BrightText,Qt::red); 52 | palette.setColor(QPalette::Link,QColor(42,130,218)); 53 | palette.setColor(QPalette::Highlight,QColor(42,130,218)); 54 | palette.setColor(QPalette::Disabled,QPalette::Highlight,QColor(80,80,80)); 55 | palette.setColor(QPalette::HighlightedText,Qt::white); 56 | palette.setColor(QPalette::Disabled,QPalette::HighlightedText,QColor(127,127,127)); 57 | } 58 | 59 | void DarkStyle::polish(QApplication *app) 60 | { 61 | if (!app) return; 62 | 63 | // increase font size for better reading, 64 | // setPointSize was reduced from +2 because when applied this way in Qt5, the font is larger than intended for some reason 65 | QFont defaultFont = QApplication::font(); 66 | defaultFont.setPointSize(defaultFont.pointSize()+1); 67 | app->setFont(defaultFont); 68 | 69 | // loadstylesheet 70 | QFile qfDarkstyle(QStringLiteral(":/darkstyle/darkstyle.qss")); 71 | if (qfDarkstyle.open(QIODevice::ReadOnly | QIODevice::Text)) 72 | { 73 | // set stylesheet 74 | QString qsStylesheet = QString::fromLatin1(qfDarkstyle.readAll()); 75 | app->setStyleSheet(qsStylesheet); 76 | qfDarkstyle.close(); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.h: -------------------------------------------------------------------------------- 1 | /* 2 | ############################################################################### 3 | # # 4 | # The MIT License # 5 | # # 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com) # 7 | # >> https://github.com/Jorgen-VikingGod # 8 | # # 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle # 10 | # # 11 | ############################################################################### 12 | */ 13 | 14 | #ifndef _DarkStyle_HPP 15 | #define _DarkStyle_HPP 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | class DarkStyle : public QProxyStyle 24 | { 25 | Q_OBJECT 26 | 27 | public: 28 | DarkStyle(); 29 | explicit DarkStyle(QStyle *style); 30 | 31 | QStyle *baseStyle() const; 32 | 33 | void polish(QPalette &palette) override; 34 | void polish(QApplication *app) override; 35 | 36 | private: 37 | QStyle *styleBase(QStyle *style = Q_NULLPTR) const; 38 | }; 39 | 40 | #endif // _DarkStyle_HPP 41 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/README.md: -------------------------------------------------------------------------------- 1 | # Qt Frameless Window with DarkStyle 2 | simple MainWindow class implementation with frameless window and custom dark style. 3 | 4 | It adds also support for titlebar and buttons (minimize, maximize, close) 5 | 6 | Look is based on the VS2013 application window (flat and frameless window) 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
Screenshots
mac enabledmac disabled
16 | 17 | 18 | ## Qt and OS 19 | * tested with Qt5.5.0, Qt5.9.0 and Qt5.10.0 20 | * tested on Windows 7, Windows 10,MacOSX 10.12.5 and MacOS 10.13.2 21 | 22 | ## PyQt5 23 | Here is an [unofficial Python port](https://github.com/gmarull/qtmodern) of my implementation. 24 | 25 | ## How to use 26 | * add additional include plath to **framelesswindow** 27 | * add resources **framelesswindow.qrc** and **darkstyle.qrc** 28 | * add ``#include "framelesswindow.h"`` into **main.cpp**, create window ``FramelessWindow framelessWindow;`` and assign your mainwindow object as content ``framelessWindow.setContent(mainWindow);`` and show it ``framelessWindow.show();`` 29 | * add ``#include "DarkStyle.h"`` into **main.cpp** and call ``a.setStyle(new DarkStyle);`` 30 | 31 | 32 | ```qt 33 | #include 34 | #include "DarkStyle.h" 35 | #include "framelesswindow.h" 36 | #include "mainwindow.h" 37 | 38 | int main(int argc, char *argv[]) 39 | { 40 | QApplication a(argc, argv); 41 | 42 | // style our application with custom dark style 43 | a.setStyle(new DarkStyle); 44 | 45 | // create frameless window (and set windowState or title) 46 | FramelessWindow framelessWindow; 47 | //framelessWindow.setWindowState(Qt::WindowMaximized); 48 | //framelessWindow.setWindowTitle("test title"); 49 | //framelessWindow.setWindowIcon(a.style()->standardIcon(QStyle::SP_DesktopIcon)); 50 | 51 | // create our mainwindow instance 52 | MainWindow *mainWindow = new MainWindow; 53 | 54 | // add the mainwindow to our custom frameless window 55 | framelessWindow.setContent(mainWindow); 56 | framelessWindow.show(); 57 | 58 | return a.exec(); 59 | } 60 | ``` 61 | 62 | 63 | ## features 64 | * frameless window 65 | * custom dark style (based on **Fusion style** with dark palette and custom stylesheets) 66 | * title bar 67 | * buttons (minimize | restore | maximize | close) 68 | * move window by drag the title bar 69 | * dobule click title bar to toggle between window styte (maximize and normal) 70 | * use of native events, like minimizing or system menu 71 | 72 | 73 | ## todo 74 | * [resize window on each corner [#1]](https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle/issues/1) 75 | * [snap on screen edges [#3]](https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle/issues/3) 76 | 77 | 78 | ## thanks 79 | Many thanks goes to the [Qt Forum](https://forum.qt.io/topic/80654/how-to-create-vs2013-like-frameless-window-with-dark-style) and especially to [Chris Kawa](https://forum.qt.io/user/chris-kawa) for pointing me to some usual issues and hints of great must have features. 80 | 81 | 82 | ## Licence 83 | > The MIT License 84 | > 85 | > Copyright (c) 2018, Juergen Skrotzky (https://github.com/Jorgen-VikingGod, JorgenVikingGod@gmail.com) 86 | > 87 | > Permission is hereby granted, free of charge, to any person obtaining a copy 88 | > of this software and associated documentation files (the "Software"), to deal 89 | > in the Software without restriction, including without limitation the rights 90 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 91 | > copies of the Software, and to permit persons to whom the Software is 92 | > furnished to do so, subject to the following conditions: 93 | > 94 | > The above copyright notice and this permission notice shall be included in 95 | > all copies or substantial portions of the Software. 96 | > 97 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 98 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 99 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 100 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 101 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 102 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 103 | > THE SOFTWARE. 104 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle.qrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | darkstyle/darkstyle.qss 4 | darkstyle/icon_close.png 5 | darkstyle/icon_restore.png 6 | darkstyle/icon_undock.png 7 | darkstyle/icon_branch_closed.png 8 | darkstyle/icon_branch_end.png 9 | darkstyle/icon_branch_more.png 10 | darkstyle/icon_branch_open.png 11 | darkstyle/icon_vline.png 12 | darkstyle/icon_checkbox_checked.png 13 | darkstyle/icon_checkbox_indeterminate.png 14 | darkstyle/icon_checkbox_unchecked.png 15 | darkstyle/icon_checkbox_checked_pressed.png 16 | darkstyle/icon_checkbox_indeterminate_pressed.png 17 | darkstyle/icon_checkbox_unchecked_pressed.png 18 | darkstyle/icon_checkbox_checked_disabled.png 19 | darkstyle/icon_checkbox_indeterminate_disabled.png 20 | darkstyle/icon_checkbox_unchecked_disabled.png 21 | darkstyle/icon_radiobutton_checked.png 22 | darkstyle/icon_radiobutton_unchecked.png 23 | darkstyle/icon_radiobutton_checked_pressed.png 24 | darkstyle/icon_radiobutton_unchecked_pressed.png 25 | darkstyle/icon_radiobutton_checked_disabled.png 26 | darkstyle/icon_radiobutton_unchecked_disabled.png 27 | darkstyle/icon_tbclose.png 28 | darkstyle/icon_tbclose_hover.png 29 | darkstyle/icon_sepvline.png 30 | 31 | 32 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_closed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_closed.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_end.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_more.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_more.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_open.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_disabled.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_pressed.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_disabled.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_pressed.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_disabled.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_pressed.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_close.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_disabled.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_pressed.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_disabled.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_pressed.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_restore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_restore.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_sepvline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_sepvline.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose_hover.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_undock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_undock.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_vline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_vline.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/frameless_window_dark.pro: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # # 3 | # The MIT License # 4 | # # 5 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com) # 6 | # >> https://github.com/Jorgen-VikingGod # 7 | # # 8 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle # 9 | # # 10 | ############################################################################### 11 | 12 | QT += core gui 13 | 14 | greaterThan(QT_MAJOR_VERSION, 4): QT += widgets 15 | 16 | INCLUDEPATH +="framelesswindow" 17 | 18 | TARGET = QtFramelessWindowDarkStyle 19 | TEMPLATE = app 20 | 21 | SOURCES += main.cpp\ 22 | mainwindow.cpp \ 23 | framelesswindow/framelesswindow.cpp \ 24 | framelesswindow/windowdragger.cpp \ 25 | DarkStyle.cpp 26 | 27 | 28 | HEADERS += mainwindow.h \ 29 | framelesswindow/framelesswindow.h \ 30 | framelesswindow/windowdragger.h \ 31 | DarkStyle.h 32 | 33 | 34 | FORMS += mainwindow.ui \ 35 | framelesswindow/framelesswindow.ui 36 | 37 | RESOURCES += darkstyle.qrc \ 38 | framelesswindow.qrc 39 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow.qrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | images/icon_window_minimize.png 4 | images/icon_window_restore.png 5 | images/icon_window_maximize.png 6 | images/icon_window_close.png 7 | 8 | 9 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.h: -------------------------------------------------------------------------------- 1 | /* 2 | ############################################################################### 3 | # # 4 | # The MIT License # 5 | # # 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com) # 7 | # >> https://github.com/Jorgen-VikingGod # 8 | # # 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle # 10 | # # 11 | ############################################################################### 12 | */ 13 | 14 | #ifndef FRAMELESSWINDOW_H 15 | #define FRAMELESSWINDOW_H 16 | 17 | #include 18 | #include 19 | 20 | namespace Ui { 21 | class FramelessWindow; 22 | } 23 | 24 | class MouseButtonSignaler: public QObject 25 | { 26 | Q_OBJECT 27 | 28 | public: 29 | MouseButtonSignaler(QObject * parent = 0) : QObject(parent) {} 30 | void installOn(QWidget * widget) { widget->installEventFilter(this); } 31 | 32 | protected: 33 | virtual bool eventFilter(QObject * obj, QEvent * ev) Q_DECL_OVERRIDE { 34 | if (( ev->type() == QEvent::MouseButtonPress 35 | || ev->type() == QEvent::MouseButtonRelease 36 | || ev->type() == QEvent::MouseButtonDblClick) 37 | && obj->isWidgetType()) { 38 | emit mouseButtonEvent(static_cast(obj), 39 | static_cast(ev)); 40 | } 41 | return false; 42 | } 43 | signals: 44 | void mouseButtonEvent(QWidget *, QMouseEvent *); 45 | }; 46 | 47 | class FramelessWindow: public QWidget 48 | { 49 | Q_OBJECT 50 | 51 | public: 52 | explicit FramelessWindow(QWidget *parent = 0); 53 | void setContent(QWidget *w); 54 | 55 | // Set a content dialog which if the close button is done, it sends a cancel signal. 56 | void ContentDlg(QDialog* indlg); 57 | void SetTitleBarBtns(bool Maximize,bool Minimize,bool Close); 58 | private: 59 | void styleWindow(bool bActive, bool bNoState); 60 | 61 | bool ContDlg; 62 | QDialog* dlgCont; 63 | 64 | signals: 65 | void windowIconLeftClicked(); 66 | void windowIconRightClicked(); 67 | void windowIconDblClick(); 68 | 69 | public slots: 70 | void setWindowTitle(const QString &text); 71 | void setWindowIcon(const QIcon &ico); 72 | 73 | private slots: 74 | void on_applicationStateChanged(Qt::ApplicationState state); 75 | void on_minimizeButton_clicked(); 76 | void on_restoreButton_clicked(); 77 | void on_maximizeButton_clicked(); 78 | void on_closeButton_clicked(); 79 | void on_windowTitlebar_doubleClicked(); 80 | 81 | protected: 82 | virtual void changeEvent(QEvent *event); 83 | 84 | private: 85 | Ui::FramelessWindow *ui; 86 | 87 | protected: 88 | QHBoxLayout contentLayout; 89 | }; 90 | 91 | #endif // FRAMELESSWINDOW_H 92 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ############################################################################### 3 | # # 4 | # The MIT License # 5 | # # 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com) # 7 | # >> https://github.com/Jorgen-VikingGod # 8 | # # 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle # 10 | # # 11 | ############################################################################### 12 | */ 13 | 14 | #include 15 | #include 16 | #include "windowdragger.h" 17 | 18 | WindowDragger::WindowDragger(QWidget *parent): QWidget(parent) 19 | { 20 | mousePressed = false; 21 | } 22 | 23 | void WindowDragger::mousePressEvent(QMouseEvent *event) 24 | { 25 | mousePressed = true; 26 | mousePos = event->globalPos(); 27 | 28 | QWidget *parent = parentWidget(); 29 | if (parent) 30 | parent = parent->parentWidget(); 31 | 32 | if (parent) 33 | wndPos = parent->pos(); 34 | } 35 | 36 | void WindowDragger::mouseMoveEvent(QMouseEvent *event) 37 | { 38 | QWidget *parent = parentWidget(); 39 | if (parent) 40 | parent = parent->parentWidget(); 41 | 42 | if (parent && mousePressed) 43 | parent->move(wndPos + (event->globalPos() - mousePos)); 44 | } 45 | 46 | void WindowDragger::mouseReleaseEvent(QMouseEvent *event) 47 | { 48 | Q_UNUSED(event); 49 | mousePressed = false; 50 | } 51 | 52 | void WindowDragger::paintEvent(QPaintEvent *event) 53 | { 54 | Q_UNUSED(event); 55 | QStyleOption styleOption; 56 | styleOption.init(this); 57 | QPainter painter(this); 58 | style()->drawPrimitive(QStyle::PE_Widget, &styleOption, &painter, this); 59 | } 60 | 61 | void WindowDragger::mouseDoubleClickEvent(QMouseEvent *event) 62 | { 63 | Q_UNUSED(event); 64 | emit doubleClicked(); 65 | } 66 | 67 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.h: -------------------------------------------------------------------------------- 1 | /* 2 | ############################################################################### 3 | # # 4 | # The MIT License # 5 | # # 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com) # 7 | # >> https://github.com/Jorgen-VikingGod # 8 | # # 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle # 10 | # # 11 | ############################################################################### 12 | */ 13 | 14 | #ifndef WINDOWDRAGGER_H 15 | #define WINDOWDRAGGER_H 16 | 17 | #include 18 | #include 19 | 20 | class WindowDragger : public QWidget 21 | { 22 | Q_OBJECT 23 | 24 | public: 25 | explicit WindowDragger(QWidget *parent = 0); 26 | 27 | signals: 28 | void doubleClicked(); 29 | 30 | protected: 31 | void mousePressEvent(QMouseEvent *event); 32 | void mouseMoveEvent(QMouseEvent *event); 33 | void mouseReleaseEvent(QMouseEvent *event); 34 | void mouseDoubleClickEvent(QMouseEvent *event); 35 | void paintEvent(QPaintEvent *event); 36 | 37 | protected: 38 | bool mousePressed; 39 | QPoint mousePos; 40 | QPoint wndPos; 41 | }; 42 | 43 | #endif // WINDOWDRAGGER_H 44 | -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_close.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_maximize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_maximize.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_minimize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_minimize.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_restore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_restore.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_disabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_disabled.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_enabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_enabled.png -------------------------------------------------------------------------------- /ext/Qt-Frameless-Window-DarkStyle-master/screenshot_win7_frameless_window_qt_dark_style_enabled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_win7_frameless_window_qt_dark_style_enabled.png -------------------------------------------------------------------------------- /ext/ZCharScanner.cpp: -------------------------------------------------------------------------------- 1 | #include "ZCharScanner.h" 2 | using namespace std; 3 | #include 4 | 5 | int ZStringDelimiter::key_search(const GString& s, const GString& key) 6 | { 7 | int count = 0; 8 | size_t pos = 0; 9 | while ((pos = s.find(key, pos)) != GString::npos) { 10 | ++count; 11 | ++pos; 12 | } 13 | return count; 14 | } 15 | void ZStringDelimiter::UpdateTokens() 16 | { 17 | if (!m_vDelimiters.size() || m_sString == "") 18 | return; 19 | 20 | m_vTokens.clear(); 21 | 22 | 23 | vector::iterator dIt = m_vDelimiters.begin(); 24 | while (dIt != m_vDelimiters.end()) 25 | { 26 | GString delimiter = *dIt; 27 | 28 | 29 | DelimStr(m_sString, delimiter, true); 30 | 31 | 32 | ++dIt; 33 | } 34 | 35 | 36 | 37 | } 38 | 39 | 40 | void ZStringDelimiter::DelimStr(const GString & s, const GString & delimiter, const bool & removeEmptyEntries) 41 | { 42 | BarRange(0, s.length()); 43 | for (size_t start = 0, end; start < s.length(); start = end + delimiter.length()) 44 | { 45 | size_t position = s.find(delimiter, start); 46 | end = position != GString::npos ? position : s.length(); 47 | 48 | GString token = s.substr(start, end - start); 49 | if (!removeEmptyEntries || !token.empty()) 50 | { 51 | if (token != s) 52 | m_vTokens.push_back(token); 53 | 54 | } 55 | Bar(position); 56 | } 57 | 58 | // dadwwdawdaawdwadwd 59 | } 60 | 61 | void ZStringDelimiter::BarRange(const int & min, const int & max) 62 | { 63 | #ifdef _AFX_ALL_WARNINGS 64 | if (PgBar) 65 | m_pBar->SetRange32(min, max); 66 | 67 | 68 | #endif 69 | } 70 | 71 | void ZStringDelimiter::Bar(const int & pos) 72 | { 73 | #ifdef _AFX_ALL_WARNINGS 74 | if (PgBar) 75 | m_pBar->SetPos(pos); 76 | 77 | 78 | #endif 79 | } 80 | 81 | ZStringDelimiter::ZStringDelimiter() 82 | { 83 | m_sString = ""; 84 | tokenIndex = 0; 85 | PgBar = false; 86 | } 87 | 88 | 89 | bool ZStringDelimiter::GetFirstToken(GString & in_out) 90 | { 91 | if (m_vTokens.size() >= 1) { 92 | in_out = m_vTokens[0]; 93 | return true; 94 | } 95 | else { 96 | return false; 97 | } 98 | } 99 | 100 | bool ZStringDelimiter::GetNextToken(GString & in_sOut) 101 | { 102 | if (tokenIndex > m_vTokens.size() - 1) 103 | return false; 104 | 105 | in_sOut = m_vTokens[tokenIndex]; 106 | ++tokenIndex; 107 | 108 | return true; 109 | } 110 | 111 | GString ZStringDelimiter::operator[](const size_t & in_index) 112 | { 113 | if (in_index > m_vTokens.size()) 114 | throw std::out_of_range("ZStringDelimiter tried to access token higher than size"); 115 | 116 | return m_vTokens[in_index]; 117 | 118 | } 119 | GString ZStringDelimiter::Reassemble(const GString& delim, const int& nelem) 120 | { 121 | GString Result = ""; 122 | TokenIterator RasIt = m_vTokens.begin(); 123 | int r = 0; 124 | if (nelem == -1) { 125 | while (RasIt != m_vTokens.end()) 126 | { 127 | 128 | if (r != 0) 129 | Result.append(delim); 130 | 131 | Result.append(*RasIt); 132 | 133 | ++r; 134 | 135 | 136 | ++RasIt; 137 | } 138 | } 139 | else { 140 | while (RasIt != m_vTokens.end() && r < nelem) 141 | { 142 | 143 | if (r != 0) 144 | Result.append(delim); 145 | 146 | Result.append(*RasIt); 147 | 148 | ++r; 149 | ++RasIt; 150 | } 151 | } 152 | 153 | return Result; 154 | 155 | } 156 | 157 | GString ZStringDelimiter::Reassemble(const GString & delim, const std::vector& Strs,int nelem) 158 | { 159 | GString Result = ""; 160 | TokenIterator RasIt = Strs.begin(); 161 | int r = 0; 162 | if (nelem == -1) { 163 | while (RasIt != Strs.end()) 164 | { 165 | 166 | if (r != 0) 167 | Result.append(delim); 168 | 169 | Result.append(*RasIt); 170 | 171 | ++r; 172 | 173 | 174 | ++RasIt; 175 | } 176 | } 177 | else { 178 | while (RasIt != Strs.end() && r < nelem) 179 | { 180 | 181 | if (r != 0) 182 | Result.append(delim); 183 | 184 | Result.append(*RasIt); 185 | 186 | ++r; 187 | ++RasIt; 188 | } 189 | } 190 | 191 | return Result; 192 | } 193 | 194 | void ZStringDelimiter::AddDelimiter(const GString & in_Delim) 195 | { 196 | m_vDelimiters.push_back(in_Delim); 197 | UpdateTokens(); 198 | 199 | } 200 | 201 | ZStringDelimiter::~ZStringDelimiter() 202 | { 203 | } 204 | -------------------------------------------------------------------------------- /ext/ZCharScanner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define GBasicCharScanner ZStringDelimiter 4 | 5 | #include 6 | #include 7 | 8 | #define ZSDEL_USE_STD_STRING 9 | #ifndef ZSDEL_USE_STD_STRING 10 | #include "golem_string.h" 11 | #else 12 | #define GString std::string 13 | #endif 14 | 15 | typedef std::vector::const_iterator TokenIterator; 16 | 17 | // ZStringDelimiter 18 | // ============== 19 | // Simple class to delimit and split strings. 20 | // You can use operator[] to access them 21 | // Or you can use the itBegin() and itEnd() to get some iterators 22 | // ================= 23 | class ZStringDelimiter 24 | { 25 | private: 26 | int key_search(const GString & s, const GString & key); 27 | void UpdateTokens(); 28 | std::vector m_vTokens; 29 | std::vector m_vDelimiters; 30 | 31 | GString m_sString; 32 | 33 | void DelimStr(const GString& s, const GString& delimiter, const bool& removeEmptyEntries = false); 34 | void BarRange(const int& min, const int& max); 35 | void Bar(const int& pos); 36 | size_t tokenIndex; 37 | public: 38 | ZStringDelimiter(); 39 | bool PgBar; 40 | 41 | #ifdef _AFX_ALL_WARNINGS 42 | CProgressCtrl* m_pBar; 43 | #endif 44 | 45 | ZStringDelimiter(const GString& in_iStr) { 46 | m_sString = in_iStr; 47 | PgBar = false; 48 | 49 | } 50 | 51 | bool GetFirstToken(GString& in_out); 52 | bool GetNextToken(GString& in_sOut); 53 | 54 | // std::String alts 55 | 56 | size_t szTokens() { return m_vTokens.size(); } 57 | GString operator[](const size_t& in_index); 58 | 59 | GString Reassemble(const GString & delim, const int & nelem = -1); 60 | 61 | // Override to reassemble provided tokens. 62 | GString Reassemble(const GString & delim, const std::vector& Strs,int nelem = -1); 63 | 64 | // Get a const reference to the tokens 65 | const std::vector& GetTokens() { return m_vTokens; } 66 | 67 | TokenIterator itBegin() { return m_vTokens.begin(); } 68 | TokenIterator itEnd() { return m_vTokens.end(); } 69 | 70 | void SetText(const GString& in_Txt) { 71 | m_sString = in_Txt; 72 | if (m_vDelimiters.size()) 73 | UpdateTokens(); 74 | } 75 | void AddDelimiter(const GString& in_Delim); 76 | 77 | ~ZStringDelimiter(); 78 | }; 79 | 80 | -------------------------------------------------------------------------------- /ext/ZFile.cpp: -------------------------------------------------------------------------------- 1 | #include "ZFile.h" 2 | 3 | using namespace std; 4 | int ZFile::EZFOpenModeToIos(const EZFOpenMode::Enum & input) 5 | { 6 | /* 7 | hehe wall of ifs 8 | yanderedev amirite??? 9 | */ 10 | if (input == EZFOpenMode::BinaryRead) 11 | return ios::in | ios::binary; 12 | else if (input == EZFOpenMode::BinaryWrite) 13 | return ios::out | ios::binary; 14 | else if (input == EZFOpenMode::TextRead) 15 | return ios::in; 16 | else if (input == EZFOpenMode::TextWrite) 17 | return ios::out; 18 | 19 | SysEndian = ZFUtil::GetSysEndianness(); 20 | 21 | return ios::in | ios::binary; 22 | 23 | } 24 | 25 | ZFile::ZFile(const std::string & coFName, const EZFOpenMode::Enum & coMode) 26 | { 27 | Open(coFName, coMode); 28 | } 29 | 30 | bool ZFile::Open(const std::string & in_sFileName, const EZFOpenMode::Enum & in_Mode) 31 | { 32 | OpenMode = in_Mode; 33 | 34 | Stream.open(in_sFileName,(ios_base::openmode)EZFOpenModeToIos(in_Mode)); 35 | return Stream.good(); 36 | 37 | } 38 | 39 | 40 | 41 | void ZFile::Seek(const INT64 & in_Pos) 42 | { 43 | if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead) 44 | Stream.seekg(in_Pos, ios::beg); 45 | else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite) 46 | Stream.seekp(in_Pos, ios::beg); 47 | } 48 | 49 | INT64 ZFile::GetPos() 50 | { 51 | if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead) 52 | return Stream.tellg(); 53 | else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite) 54 | return Stream.tellp(); 55 | 56 | // NO TYPE????????????? 57 | return -1; 58 | } 59 | 60 | void ZFile::SeekToEnd() 61 | { 62 | if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead) 63 | Stream.seekg(0, Stream.end); 64 | else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite) 65 | Stream.seekp(0, Stream.end); 66 | } 67 | 68 | INT64 ZFile::GetFileLength() 69 | { 70 | std::streampos lpos = GetPos(); 71 | 72 | if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead) 73 | Stream.seekg(0, Stream.end); 74 | else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite) 75 | Stream.seekp(0, Stream.end); 76 | 77 | const INT64 Len = GetPos(); 78 | Seek(lpos); 79 | 80 | return Len; 81 | 82 | } 83 | 84 | void ZFile::Read(void * out, const INT64 & count) 85 | { 86 | Stream.read((BYTE*)out, count); 87 | 88 | } 89 | 90 | void ZFile::Write(void * in, const INT64 & incount) 91 | { 92 | Stream.write((BYTE*)in, incount); 93 | 94 | } 95 | 96 | ByteArr ZFile::ReadEntireFile() 97 | { 98 | 99 | ByteArr ArrRet; 100 | 101 | Stream.seekg(0, Stream.end); 102 | INT64 length = Stream.tellg(); 103 | Stream.seekg(0, Stream.beg); 104 | ArrRet.CAlloc(length); 105 | 106 | Stream.read(ArrRet.GetData(), length); 107 | 108 | return ArrRet; 109 | 110 | } 111 | 112 | void ZFile::WriteLine(const string &inLi) 113 | { 114 | std::string Line = inLi + "\n"; 115 | 116 | Write((void*)Line.data(),Line.size() * sizeof(char)); 117 | 118 | } 119 | 120 | void ZFile::Write(const ByteArr & BrDat) 121 | { 122 | Stream.write(BrDat.CoData(), BrDat.Size()); 123 | } 124 | 125 | void ZFile::Close() 126 | { 127 | Stream.close(); 128 | } 129 | 130 | void ZFile::operator>>(ByteArr& BarDat) { 131 | size_t BaSz = 0; 132 | Read(BaSz); 133 | BarDat.CAlloc(BaSz); 134 | Stream.read(BarDat.GetData(), BaSz); 135 | 136 | } 137 | 138 | 139 | ZFile::ZFile() 140 | { 141 | } 142 | 143 | 144 | ZFile::~ZFile() 145 | { 146 | } 147 | -------------------------------------------------------------------------------- /ext/ZFile.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | ###################################### 5 | # 6 | # 7 | ____________ _ _ 8 | |___ / ____(_) | 9 | / /| |__ _| | ___ 10 | / / | __| | | |/ _ \ 11 | / /__| | | | | __/ 12 | /_____|_| |_|_|\___| 13 | 14 | 15 | ######################################## 16 | # Description: Defines ZFile class, one meant for easy serialization and writing of binary types, 17 | # including commonly used std containers without much problem 18 | # 19 | # Author: ZDisket 20 | # Copyright (C) 2018 YOUR MOM GAY LOLOLOL 21 | ####################################### 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "ByteArr.h" 29 | 30 | #define ZFILE_IOVR(cla,n) ZFile& operator<<(ZFile& right,const cla& n) 31 | #define ZFILE_OOVR(cla,n) ZFile& operator>>(ZFile& right,cla& n) 32 | // FStream that works with bytes 33 | typedef std::basic_fstream> ufstream; 34 | 35 | 36 | 37 | namespace EZFOpenMode { 38 | enum Enum { 39 | BinaryRead = 0, 40 | TextRead, 41 | BinaryWrite, 42 | TextWrite 43 | }; 44 | } 45 | 46 | namespace EZFEndian { 47 | enum Enum { 48 | Big = 0, 49 | Little 50 | }; 51 | } 52 | 53 | 54 | 55 | namespace ZFUtil { 56 | inline EZFEndian::Enum GetSysEndianness() 57 | { 58 | const int value{ 0x01 }; 59 | const void * address = static_cast(&value); 60 | const unsigned char * least_significant_address = static_cast(address); 61 | return (*least_significant_address == 0x01) ? EZFEndian::Little : EZFEndian::Big; 62 | } 63 | 64 | template 65 | void SwapEndian(T& var) 66 | { 67 | static_assert(std::is_pod::value, "Type must be POD type for safety"); 68 | std::array varArray; 69 | std::memcpy(varArray.data(), &var, sizeof(T)); 70 | for (int i = 0; i < static_cast(sizeof(var) / 2); i++) 71 | std::swap(varArray[sizeof(var) - 1 - i], varArray[i]); 72 | std::memcpy(&var, varArray.data(), sizeof(T)); 73 | } 74 | } 75 | 76 | // ZFile: Class for (mostly binary) file handling. 77 | // Cannot be copied 78 | class ZFile 79 | { 80 | private: 81 | 82 | ZFile(const ZFile&); 83 | 84 | BYTE * m_pData; 85 | bool FileOpened; 86 | ufstream Stream; 87 | 88 | EZFOpenMode::Enum OpenMode; 89 | EZFEndian::Enum SysEndian; 90 | 91 | int EZFOpenModeToIos(const EZFOpenMode::Enum& input); 92 | 93 | public: 94 | ZFile(); 95 | 96 | ZFile(const std::string& coFName, const EZFOpenMode::Enum& coMode); 97 | 98 | bool Open(const std::string& in_sFileName,const EZFOpenMode::Enum& in_Mode); 99 | 100 | void Seek(const INT64& in_Pos); 101 | INT64 GetPos(); 102 | 103 | void SeekToEnd(); 104 | 105 | INT64 GetFileLength(); 106 | // Reads from the file 107 | // Please pass a pointer to this 108 | void Read(void* out, const INT64& count); 109 | // Writes to the file 110 | // Please pass a pointer 111 | void Write(void* in, const INT64& incount); 112 | 113 | 114 | // Read the entire file into a byte array 115 | ByteArr ReadEntireFile(); 116 | 117 | // Write with template argument to not pass size. 118 | // Only works with regular datatypes 119 | template 120 | void Write(const Dat& dta) 121 | { 122 | Stream.write((BYTE*)&dta, sizeof(dta)); 123 | 124 | 125 | } 126 | 127 | void WriteLine(const std::string& inLi); 128 | 129 | // Read with template argument to not pass size. 130 | // Only works with regular datatypes 131 | template 132 | void Read(Dat& dta) 133 | { 134 | Stream.read((BYTE*)&dta, sizeof(dta)); 135 | 136 | 137 | } 138 | 139 | 140 | 141 | // Write a string 142 | template 143 | void Write(const std::basic_string& Str) { 144 | // Get total len in bytes. 145 | const size_t LenInBytes = Str.length() * sizeof(chardat); 146 | 147 | // Write the string length (NOT in bytes) 148 | Write(Str.length()); 149 | Stream.write((BYTE*)Str.data(),LenInBytes); 150 | 151 | 152 | 153 | } 154 | 155 | // Read a string 156 | template 157 | void Read(std::basic_string& Str) { 158 | 159 | size_t StrLen = 0; 160 | Read(StrLen); 161 | chardat* dpBuffer = new chardat[StrLen]; 162 | 163 | 164 | Stream.read((BYTE*)dpBuffer, sizeof(chardat) * StrLen); 165 | 166 | // For some reason (witchcraft?) our buffer has more chars in it than we actually allocated, which should be impossible. 167 | // Thankfully, std::string's assign function allows for cutting. 168 | Str.assign(dpBuffer,0,StrLen); 169 | 170 | 171 | delete[] dpBuffer; 172 | 173 | } 174 | 175 | // Write a vector 176 | template 177 | void Write(const std::vector& Vec) { 178 | // Write size in bytes then vector size. 179 | Write(Vec.size()); 180 | 181 | // Write vector size. 182 | 183 | auto It = Vec.begin(); 184 | 185 | while (It != Vec.end()) { 186 | (*this) << *It; 187 | ++It; 188 | } 189 | 190 | 191 | 192 | } 193 | 194 | // Read a vector 195 | template 196 | void Read(std::vector& Vec) { 197 | size_t vSz = 0; 198 | Read(vSz); 199 | 200 | Vec.resize(vSz); 201 | 202 | size_t i = 0; 203 | 204 | while (i != vSz) { 205 | (*this) >> Vec[i]; 206 | 207 | ++i; 208 | } 209 | 210 | 211 | } 212 | 213 | template 214 | void Write(const N& Num, EZFEndian::Enum TargetEndian); 215 | 216 | // Write some stuff 217 | template 218 | void operator<<(const Ty& In) { 219 | Write(In); 220 | 221 | } 222 | 223 | // Write a Byte Array RAW into the file, without the size. Useful for exporting 224 | void Write(const ByteArr& BrDat); 225 | 226 | void operator<<(const ByteArr& BarDat) { 227 | if (BarDat.CoData() == NULL) { 228 | throw new std::invalid_argument("ZFile tried to write invalid byte array!!"); 229 | } 230 | 231 | Write(BarDat.Size()); 232 | Stream.write(BarDat.CoData(), BarDat.Size()); 233 | 234 | } 235 | // Read to a byte array. Note: DELETES AND REPLACES THE ALREADY EXISTING CONTENTS THERE!! 236 | void operator>>(ByteArr& BarDat); 237 | 238 | 239 | template 240 | void operator>>(MTy& mIn) { 241 | Read(mIn); 242 | } 243 | 244 | void Close(); 245 | 246 | 247 | ~ZFile(); 248 | }; 249 | 250 | template 251 | // Function to write a value with target endianness. 252 | inline void ZFile::Write(const N & Num, EZFEndian::Enum TargetEndian) 253 | { 254 | if (SysEndian == TargetEndian) 255 | Write(Num); 256 | else 257 | Write(ZFUtil::SwapEndian(Num)); 258 | 259 | 260 | } 261 | -------------------------------------------------------------------------------- /g2p_train/README.md: -------------------------------------------------------------------------------- 1 | # G2P for TensorVox 2 | TensorVox utilizes an RNN-based G2P model implemented in Tensorflow to convert text to phonemes before feeding the text2speech models. 3 | 4 | ## Training 5 | In order to train a model, you need to prepare two things: 6 | 1. A dictionary in format `WORD \t PHONETIC SPELLING` as the dataset 7 | 2. A config file (optional, there is already one in `config/default.yaml`) 8 | 9 | Tensorflow 2.0 or greater, is of course, required. 10 | 11 | Since the training is very quick on GPU (Tesla T4), it's just one script that does preprocessing, training, and exporting. If you don't have one, just use Google Colab. 12 | 13 | You can download my English dictionary (converted to tab-based from the LibriSpeech lexicon) [here](https://drive.google.com/file/d/19cnHM3-Zsc7uRJ2scUPNMNoSlyXuaGNe/view?usp=sharing). 14 | Rename it from dict.d to dict.txt 15 | 16 | The command to run it is as follows: 17 | 18 | python3 train_and_export.py --dict-path dict.txt --config-path config/default.yaml --out-path English 19 | 20 | Arguments should be self-explanatory. 21 | ### Important note 22 | If your phoneme format does not separate phonemes by space (like IPA), pass `--char-tok-phn` as an argument, because the script assumes that all phoneme texts are like ARPA (example: G R IY1 N) and tokenizes separated by spaces. One sign that it may be doing this could be very slow training on a decent GPU. 23 | 24 | ## Structure 25 | 26 | Once finished, the script will output all files required to use the model to the folder determined by the `--out-path` argument (will be created if it doesn't exist). 27 | 28 | No further action is necessary, just drag it so that all the files in the folder are in the (executable file path)/g2p/`language name` folder and it will be used by the program to do phoneme conversion for all models it loads in that language. Make sure language name folder is capitalized. 29 | 30 | The output consists of three things: 31 | 32 | 4. **char2id.txt, phn2id.txt**: Two text files in format `TOKEN \t ID` that indicate the IDs that first go into the model (char) and are returned (phn). Automatically generated by the script. 33 | 5. **dict.txt**: Dictionary in format `WORD \t PHONETIC-SPELLING` that is used to find phonetic spellings in. Automatically re-exported (words forced to lowercase) by the script. 34 | 6. **model**: The actual G2P model, saved in Tensorflow SavedModel format. 35 | 36 | Due to the unreliability of the network, we only want to use it to guess novel words, so first it does a dictionary lookup (semi-optimized with bucketed string search) then if not found, uses the model. 37 | 38 | An example English model is zipped in the `models/` directory. 39 | -------------------------------------------------------------------------------- /g2p_train/config/default.yaml: -------------------------------------------------------------------------------- 1 | # Config file for G2P-English model. This one does 15 epochs and trains very quickly on GPU. 2 | 3 | gru_dims: 128 # Size of GRU (and embedding layer) 4 | batch_size: 4096 # Batch size 5 | val_per: 0.1 # % of dataset to be used for validation. Floating point, 1.0 = 100%; 0.5 = 50%, etc... 6 | epochs: 15 # Amount of epochs 7 | learning_rate: 0.006 # Learning rate 8 | -------------------------------------------------------------------------------- /g2p_train/config/longer.yaml: -------------------------------------------------------------------------------- 1 | # Config file for G2P model that trains for longer with softer learning, recommended 2 | 3 | gru_dims: 128 # Size of GRU (and embedding layer) 4 | batch_size: 4096 # Batch size 5 | val_per: 0.02 # % of dataset to be used for validation. Floating point, 1.0 = 100%; 0.5 = 50%, etc... 6 | epochs: 35 # Amount of epochs 7 | learning_rate: 0.001 # Learning rate 8 | -------------------------------------------------------------------------------- /g2p_train/models/English.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/g2p_train/models/English.zip -------------------------------------------------------------------------------- /g2p_train/train_and_export.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import os 3 | import argparse 4 | import tensorflow as tf 5 | import yaml 6 | import shutil 7 | global_max = 0 8 | cumodel = None 9 | 10 | def safemkdir(dirn): 11 | if not os.path.isdir(dirn): 12 | os.mkdir(dirn) 13 | 14 | 15 | def preprocess(in_fname,char_phn_tok): 16 | words = list() 17 | phn = list() 18 | print("Opening file...") 19 | with open(in_fname,"r",encoding="utf-8") as f: 20 | for li in tqdm(f.readlines()): 21 | spl = li.strip().split("\t") 22 | if len(spl) > 1: 23 | words.append(spl[0].lower()) #convert to lowercase for re-exporting later 24 | phn.append(spl[1]) 25 | 26 | if char_phn_tok: 27 | print("Tokenizing phoneme strings in char level too") 28 | 29 | phntok = tf.keras.preprocessing.text.Tokenizer(lower=False,filters='"\t\n',char_level=char_phn_tok) 30 | txttok = tf.keras.preprocessing.text.Tokenizer(char_level=True) 31 | 32 | print("Fitting on texts...") 33 | phntok.fit_on_texts(phn) 34 | txttok.fit_on_texts(words) 35 | 36 | print("Converting to sequences") 37 | txtseqs = txttok.texts_to_sequences(words) 38 | phnseqs = phntok.texts_to_sequences(phn) 39 | 40 | txt_max = len(max(txtseqs, key=len)) 41 | phn_max = len(max(phnseqs,key=len)) 42 | 43 | global global_max 44 | global_max = max(txt_max,phn_max) 45 | print("Common padding index is " + str(global_max)) 46 | 47 | txtpadded = tf.keras.preprocessing.sequence.pad_sequences(txtseqs,padding="post",maxlen=global_max) 48 | phnpadded = tf.keras.preprocessing.sequence.pad_sequences(phnseqs,padding="post",maxlen=global_max) 49 | 50 | txtsize = len(txttok.word_index) 51 | phnsize = len(phntok.word_index) 52 | 53 | return txtpadded, phnpadded, txtsize, phnsize, phntok.word_index, txttok.word_index, words, phn 54 | 55 | 56 | def getmodel(input_shape, in_vocab_size, out_vocab_size,gru_size,in_lr): 57 | model = tf.keras.models.Sequential([tf.keras.layers.Embedding(in_vocab_size, gru_size, input_length=input_shape[1], input_shape=input_shape[1:]), 58 | tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_size,input_shape=input_shape[1:],return_sequences=True)), 59 | tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1024,activation="relu")), 60 | tf.keras.layers.Dropout(0.5), 61 | tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(out_vocab_size,activation="softmax"))]) 62 | 63 | model.compile(loss='sparse_categorical_crossentropy', 64 | optimizer=tf.keras.optimizers.Adam(in_lr), 65 | metrics=['accuracy']) 66 | return model 67 | 68 | @tf.function( 69 | experimental_relax_shapes=True, 70 | input_signature=[ 71 | tf.TensorSpec([None], dtype=tf.int32, name="input_ids"), 72 | tf.TensorSpec([1,], dtype=tf.int32, name="input_len"), 73 | tf.TensorSpec([1,], dtype=tf.float32, name="input_temperature"), 74 | ], 75 | ) 76 | def callg2p(input_ids,input_len,input_temperature): 77 | #Generate padding 78 | pad = tf.zeros([global_max - input_len[0]],dtype=tf.int32) 79 | #Add padding to input_ids and reshape 80 | input_ids = tf.concat([input_ids,pad],0) 81 | input_ids = tf.reshape(input_ids,[-1,global_max]) 82 | #Predict 83 | pred = cumodel(input_ids) 84 | #Apply temperature 85 | predx = tf.squeeze(pred, 0) 86 | predx /= input_temperature 87 | 88 | #Select IDs 89 | retids = tf.random.categorical(predx, 1) 90 | 91 | #Remove padding 92 | bool_mask = tf.not_equal(retids, 0) 93 | phn_ids = tf.boolean_mask(retids, bool_mask) 94 | 95 | return tf.cast(phn_ids,tf.int32) 96 | 97 | def exportdict(indict,outf): 98 | f = open(outf,"w") 99 | for de in indict: 100 | f.write(de + "\t" + str(indict[de]) + "\n") 101 | 102 | f.close() 103 | 104 | 105 | def export_model(folname,in_model,in_phnwi,in_charwi): 106 | safemkdir(folname) 107 | 108 | 109 | exportdict(in_phnwi,os.path.join(folname,"phn2id.txt")) 110 | exportdict(in_charwi,os.path.join(folname,"char2id.txt")) 111 | 112 | print("Exporting model...") 113 | in_model.save(os.path.join(folname,"model"),save_format="tf",signatures=callg2p) 114 | 115 | 116 | def main(): 117 | parser = argparse.ArgumentParser(description="Train and export a G2P model") 118 | parser.add_argument( 119 | "--config-path", 120 | default="config/default.yaml", 121 | type=str, 122 | help="Path of config", 123 | ) 124 | parser.add_argument( 125 | "--dict-path", 126 | default="dict.txt", 127 | type=str, 128 | help="Path of dictionary", 129 | ) 130 | parser.add_argument( 131 | "--out-path", 132 | default="model1", 133 | type=str, 134 | help="Output path of model", 135 | ) 136 | parser.add_argument( 137 | "--char-tok-phn", 138 | action="store_true", 139 | help="Whether to tokenize phoneme strings by char. Turn this on if using IPA or some other phoneme with no spaces inbetween", 140 | ) 141 | 142 | 143 | args = parser.parse_args() 144 | 145 | txtpadded, phnpadded, txtsize, phnsize, phn_wi, txt_wi, words, phns = preprocess(args.dict_path,args.char_tok_phn) 146 | 147 | yf = open(args.config_path,"r") 148 | config = yaml.load(yf,Loader=yaml.FullLoader) 149 | yf.close() 150 | 151 | print("Finished preprocessing. Getting model") 152 | global cumodel 153 | cumodel = getmodel(txtpadded.shape,txtsize + 1,phnsize + 1,config["gru_dims"],config["learning_rate"]) 154 | 155 | x_train = txtpadded 156 | y_train = phnpadded 157 | 158 | print("Starting training...") 159 | cumodel.fit(x_train, y_train, batch_size=config["batch_size"], epochs=config["epochs"],validation_split=config["val_per"]) 160 | 161 | print("Starting export...") 162 | export_model(args.out_path,cumodel,phn_wi,txt_wi) 163 | 164 | print("Re-exporting dict...") 165 | outdict = open(os.path.join(args.out_path,"dict.txt"),"w",encoding="utf-8") 166 | 167 | for idx, w in enumerate(words): 168 | outdict.write(w + "\t" + phns[idx] + "\n") 169 | 170 | outdict.close() 171 | 172 | 173 | 174 | 175 | 176 | 177 | print("Done!") 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | if __name__ == "__main__": 187 | main() 188 | 189 | -------------------------------------------------------------------------------- /istftnettorch.cpp: -------------------------------------------------------------------------------- 1 | #include "istftnettorch.h" 2 | #include 3 | bool iSTFTNetTorch::Initialize(const std::string &VocoderPath) 4 | { 5 | torch::Device device(torch::kCPU); 6 | 7 | try { 8 | // Deserialize the ScriptModule from a file using torch::jit::load(). 9 | 10 | std::string VCP = VocoderPath + ".pt"; 11 | 12 | Model = torch::jit::load(VCP,device); 13 | 14 | } 15 | catch (const c10::Error& e) { 16 | QMessageBox::critical(nullptr,"r",e.what_without_backtrace()); 17 | return false; 18 | 19 | } 20 | try{ 21 | std::string PostPath = VocoderPath + "-post.pt"; 22 | Post = torch::jit::load(PostPath,device); 23 | PostLoaded = true; 24 | } 25 | catch (const c10::Error& e){ 26 | PostLoaded = false; 27 | 28 | } 29 | 30 | 31 | 32 | return true; 33 | 34 | } 35 | 36 | TFTensor iSTFTNetTorch::DoInference(const TFTensor &InMel) 37 | { 38 | // without this memory consumption is 4x 39 | torch::NoGradGuard no_grad; 40 | torch::Device device(torch::kCPU); 41 | auto TorchMel = torch::tensor(InMel.Data,device).reshape(InMel.Shape).transpose(1,2); // [1, frames, n_mels] -> [1, n_mels, frames] 42 | 43 | 44 | 45 | try{ 46 | at::Tensor Output = Model({TorchMel}).toTensor().squeeze(); // (audio frames) 47 | if (PostLoaded) 48 | Output = Post({Output.unsqueeze(0).toType(at::ScalarType::Float)}).toTensor(); 49 | 50 | 51 | TFTensor Tens = VoxUtil::CopyTensor(Output); 52 | 53 | 54 | return Tens; 55 | } 56 | catch (const std::exception& e) { 57 | int msgboxID = MessageBox( 58 | NULL, 59 | (LPCWSTR)QString::fromStdString(e.what()).toStdWString().c_str(), 60 | (LPCWSTR)L"Account Details", 61 | MB_ICONWARNING | MB_CANCELTRYCONTINUE | MB_DEFBUTTON2 62 | ); 63 | 64 | 65 | return TFTensor(); 66 | 67 | } 68 | 69 | 70 | 71 | 72 | 73 | } 74 | 75 | iSTFTNetTorch::iSTFTNetTorch() 76 | { 77 | 78 | } 79 | -------------------------------------------------------------------------------- /istftnettorch.h: -------------------------------------------------------------------------------- 1 | #ifndef ISTFTNETTORCH_H 2 | #define ISTFTNETTORCH_H 3 | #include "MultiBandMelGAN.h" 4 | 5 | class iSTFTNetTorch : public MultiBandMelGAN 6 | { 7 | private: 8 | torch::jit::script::Module Model; 9 | torch::jit::script::Module Post; 10 | 11 | bool PostLoaded; 12 | 13 | public: 14 | bool Initialize(const std::string& VocoderPath); 15 | 16 | 17 | 18 | // Do MultiBand MelGAN inference including PQMF 19 | // -> InMel: Mel spectrogram (shape [1, xx, 80]) 20 | // <- Returns: Tensor data [frames] 21 | virtual TFTensor DoInference(const TFTensor& InMel); 22 | iSTFTNetTorch(); 23 | }; 24 | 25 | #endif // ISTFTNETTORCH_H 26 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "mainwindow.h" 2 | 3 | #include "ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.h" 4 | #include "framelesswindow.h" 5 | 6 | #include 7 | 8 | int main(int argc, char *argv[]) 9 | { 10 | QCoreApplication::addLibraryPath("./"); 11 | QApplication a(argc, argv); 12 | a.setStyle(new DarkStyle); 13 | 14 | FramelessWindow framelessWindow; 15 | framelessWindow.setWindowTitle("TensorVox"); 16 | framelessWindow.setWindowIcon(QIcon("://res/stdico.png")); 17 | 18 | MainWindow *mainWindow = new MainWindow; 19 | mainWindow->pDarkFw = &framelessWindow; 20 | 21 | framelessWindow.setContent(mainWindow); 22 | framelessWindow.show(); 23 | 24 | 25 | return a.exec(); 26 | } 27 | -------------------------------------------------------------------------------- /melgen.cpp: -------------------------------------------------------------------------------- 1 | #include "melgen.h" 2 | 3 | MelGen::MelGen() 4 | { 5 | 6 | } 7 | 8 | MelGen::MelGen(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo) 9 | { 10 | Initialize(SavedModelFolder,InTTSRepo); 11 | 12 | } 13 | 14 | bool MelGen::Initialize(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo) 15 | { 16 | try { 17 | CurrentMdl = std::make_unique(SavedModelFolder); 18 | } 19 | catch (...) { 20 | return false; 21 | 22 | } 23 | CurrentRepo = InTTSRepo; 24 | return true; 25 | 26 | } 27 | -------------------------------------------------------------------------------- /melgen.h: -------------------------------------------------------------------------------- 1 | #ifndef MELGEN_H 2 | #define MELGEN_H 3 | 4 | 5 | 6 | #include "ext/CppFlow/ops.h" 7 | #include "ext/CppFlow/model.h" 8 | #include "VoxCommon.hpp" 9 | 10 | #include 11 | 12 | // MelGen: base virtual class for mel generators 13 | class MelGen 14 | { 15 | private: 16 | 17 | public: 18 | ETTSRepo::Enum CurrentRepo; 19 | MelGen(); 20 | MelGen(const std::string& SavedModelFolder,ETTSRepo::Enum InTTSRepo); 21 | 22 | 23 | // Generic inference function 24 | // Utilize ArgsFloat and ArgsInt for additional arguments for certain models 25 | virtual TFTensor DoInference(const std::vector& InputIDs,const std::vector& ArgsFloat,const std::vector ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1) = 0; 26 | 27 | /* 28 | Initialize and load the model 29 | 30 | -> SavedModelFolder: Folder where the .pb, variables, and other characteristics of the exported SavedModel 31 | <- Returns: (bool)Success 32 | */ 33 | virtual bool Initialize(const std::string& SavedModelFolder, ETTSRepo::Enum InTTSRepo); 34 | 35 | 36 | std::unique_ptr CurrentMdl; 37 | 38 | inline ETTSRepo::Enum GetCurrentRepo() {return CurrentRepo;} 39 | 40 | }; 41 | 42 | #endif // MELGEN_H 43 | -------------------------------------------------------------------------------- /modelinfodlg.cpp: -------------------------------------------------------------------------------- 1 | #include "modelinfodlg.h" 2 | #include "ui_modelinfodlg.h" 3 | #include 4 | 5 | ModelInfoDlg::ModelInfoDlg(QWidget *parent) : 6 | QDialog(parent), 7 | ui(new Ui::ModelInfoDlg) 8 | { 9 | ui->setupUi(this); 10 | } 11 | 12 | ModelInfoDlg::~ModelInfoDlg() 13 | { 14 | delete ui; 15 | } 16 | 17 | void ModelInfoDlg::SetInfo(const QString &ModelName, const QString &Info, int32_t InVersion, const QString &Author, const QString &Repo, const QString &MelGen, const QString &Vocoder, uint32_t SampleRate) 18 | { 19 | ui->lblAuthor->setText("Author: " + Author); 20 | ui->lblVersion->setText("Version: " + QString::number(InVersion) + " "); 21 | ui->redtModelInfo->setText(QString(Info).replace("(/NL)","\n")); 22 | 23 | 24 | ui->lblModelTitle->setText(ModelName); 25 | 26 | QString ArchShow = "Architecture: " + Repo + " " + MelGen; 27 | 28 | if (Vocoder.size()) 29 | ArchShow += " & " + Vocoder; 30 | 31 | ui->lblModelArchitecture->setText(ArchShow); 32 | ui->lblSampleRate->setText("Sampling rate: " + QString::number(SampleRate / 1000) + "KHz"); 33 | 34 | QString ImgPath = QApplication::applicationDirPath() + "/models/" + ModelName + "/image.png"; 35 | if (QFile::exists(ImgPath)) 36 | { 37 | ui->lblImg->setPixmap(QPixmap::fromImage(QImage(ImgPath))); 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /modelinfodlg.h: -------------------------------------------------------------------------------- 1 | #ifndef MODELINFODLG_H 2 | #define MODELINFODLG_H 3 | 4 | #include 5 | 6 | namespace Ui { 7 | class ModelInfoDlg; 8 | } 9 | 10 | class ModelInfoDlg : public QDialog 11 | { 12 | Q_OBJECT 13 | 14 | public: 15 | explicit ModelInfoDlg(QWidget *parent = nullptr); 16 | ~ModelInfoDlg(); 17 | 18 | void SetInfo(const QString& ModelName,const QString& Info,int32_t InVersion,const QString& Author,const QString& Repo,const QString& MelGen,const QString& Vocoder,uint32_t SampleRate); 19 | 20 | private: 21 | Ui::ModelInfoDlg *ui; 22 | }; 23 | 24 | #endif // MODELINFODLG_H 25 | -------------------------------------------------------------------------------- /modelinfodlg.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | ModelInfoDlg 4 | 5 | 6 | 7 | 0 8 | 0 9 | 576 10 | 531 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | 21 | Verdana 22 | 16 23 | 24 | 25 | 26 | Model Name 27 | 28 | 29 | Qt::AlignCenter 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 600 40 | 256 41 | 42 | 43 | 44 | 45 | 46 | 47 | :/res/noim.png 48 | 49 | 50 | true 51 | 52 | 53 | Qt::AlignCenter 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | Verdana 64 | 10 65 | 66 | 67 | 68 | true 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | Author: Anonymous 78 | 79 | 80 | 81 | 82 | 83 | 84 | Qt::Horizontal 85 | 86 | 87 | 88 | 40 89 | 20 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | Sampling rate: 22KHz 98 | 99 | 100 | 101 | 102 | 103 | 104 | Qt::Horizontal 105 | 106 | 107 | 108 | 40 109 | 20 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | Version: 1 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | Architecture: TensorflowTTS FastSpeech2 & Multi-Band MelGAN 129 | 130 | 131 | 132 | 133 | 134 | 135 | Qt::Horizontal 136 | 137 | 138 | 139 | 40 140 | 20 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | Qt::Horizontal 151 | 152 | 153 | QDialogButtonBox::Ok 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | buttonBox 165 | accepted() 166 | ModelInfoDlg 167 | accept() 168 | 169 | 170 | 248 171 | 254 172 | 173 | 174 | 157 175 | 274 176 | 177 | 178 | 179 | 180 | buttonBox 181 | rejected() 182 | ModelInfoDlg 183 | reject() 184 | 185 | 186 | 316 187 | 260 188 | 189 | 190 | 286 191 | 274 192 | 193 | 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /phddialog.cpp: -------------------------------------------------------------------------------- 1 | #include "phddialog.h" 2 | #include "ui_phddialog.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | PhdDialog::PhdDialog(QWidget *parent) : 8 | QDialog(parent), 9 | ui(new Ui::PhdDialog) 10 | { 11 | ui->setupUi(this); 12 | ui->tbDict->horizontalHeader()->setStretchLastSection(true); 13 | 14 | } 15 | 16 | PhdDialog::~PhdDialog() 17 | { 18 | delete ui; 19 | } 20 | 21 | int PhdDialog::exec() 22 | { 23 | // Populate the list 24 | 25 | PopulateWithEntries(); 26 | 27 | 28 | // ui->tbDict->setColumnWidth(0,ui->tbDict->width() / 2); 29 | // ui->tbDict->setColumnWidth(1,ui->tbDict->width() / 2); 30 | return QDialog::exec(); 31 | } 32 | 33 | void PhdDialog::accept() 34 | { 35 | // Validate input 36 | for (int i = 0; i < ui->tbDict->rowCount();++i) 37 | { 38 | if (ui->tbDict->item(i,0)->text().isEmpty() 39 | || ui->tbDict->item(i,0)->text() == " " || 40 | ui->tbDict->item(i,1)->text().isEmpty()) 41 | { 42 | QMessageBox::critical(this,"Invalid input","None of the cells can be empty, and words cannot be spaces. Check your input and try again."); 43 | return; 44 | 45 | } 46 | 47 | 48 | } 49 | 50 | // Now clear and run second loop 51 | 52 | Entrs.clear(); 53 | Entrs.reserve((size_t)ui->tbDict->rowCount()); 54 | // Second loop 55 | for (int i = 0; i < ui->tbDict->rowCount();++i) 56 | { 57 | DictEntry de; 58 | de.Word = ui->tbDict->item(i,0)->text().toStdString(); 59 | de.PhSpelling = ui->tbDict->item(i,1)->text().toStdString(); 60 | de.Language = ui->tbDict->item(i,2)->text().toStdString(); 61 | Entrs.push_back(de); 62 | 63 | 64 | 65 | } 66 | 67 | QDialog::accept(); 68 | } 69 | 70 | void PhdDialog::on_btnAdd_clicked() 71 | { 72 | ui->tbDict->insertRow(ui->tbDict->rowCount()); 73 | ui->tbDict->scrollToItem(ui->tbDict->item(ui->tbDict->rowCount() - 1,0)); 74 | 75 | QTableWidgetItem* LangItem = new QTableWidgetItem(QString::fromStdString(CurrentLang)); 76 | LangItem->setFlags(LangItem->flags() ^ Qt::ItemIsEditable); 77 | 78 | ui->tbDict->setItem(ui->tbDict->rowCount() - 1,2,LangItem); 79 | 80 | } 81 | 82 | void PhdDialog::PopulateWithEntries() 83 | { 84 | ui->tbDict->clearContents(); 85 | ui->tbDict->setRowCount((int)Entrs.size()); 86 | for (size_t i = 0;i < Entrs.size();++i) 87 | { 88 | ui->tbDict->setItem((int)i,0,new QTableWidgetItem(QString::fromStdString(Entrs[i].Word))); 89 | ui->tbDict->setItem((int)i,1,new QTableWidgetItem(QString::fromStdString(Entrs[i].PhSpelling))); 90 | 91 | QTableWidgetItem* LangItem = new QTableWidgetItem(QString::fromStdString(Entrs[i].Language)); 92 | LangItem->setFlags(LangItem->flags() ^ Qt::ItemIsEditable); 93 | 94 | ui->tbDict->setItem((int)i,2,LangItem); 95 | 96 | 97 | } 98 | 99 | } 100 | 101 | void PhdDialog::on_btnRemove_clicked() 102 | { 103 | QList seli = ui->tbDict->selectedItems(); 104 | QList::iterator It = seli.begin(); 105 | while (It != seli.end()) 106 | { 107 | QTableWidgetItem* item = *It; 108 | ui->tbDict->removeRow(item->row()); 109 | 110 | ++It; 111 | } 112 | } 113 | 114 | void PhdDialog::on_btnImport_clicked() 115 | { 116 | QString fnamei = QFileDialog::getOpenFileName(this, tr("Open dictionary to import"), QString(), tr("DeltaVox Phonetic Dictionary Files (*.phd)")); 117 | 118 | if (fnamei == "") 119 | return; 120 | 121 | PhoneticDict Pd; 122 | if (!Pd.Import(fnamei)){ 123 | QMessageBox::critical(this,"Error","Failed to import this file."); 124 | return; 125 | } 126 | 127 | Entrs.reserve(Entrs.size() + Pd.Entries.size()); 128 | for (DictEntry& De : Pd.Entries ) 129 | { 130 | Entrs.push_back(De); 131 | 132 | 133 | } 134 | PopulateWithEntries(); 135 | 136 | 137 | 138 | } 139 | 140 | void PhdDialog::on_tbDict_cellChanged(int row, int column) 141 | { 142 | if (row != 0) 143 | return; 144 | 145 | } 146 | -------------------------------------------------------------------------------- /phddialog.h: -------------------------------------------------------------------------------- 1 | #ifndef PHDDIALOG_H 2 | #define PHDDIALOG_H 3 | 4 | #include 5 | #include "phoneticdict.h" 6 | namespace Ui { 7 | class PhdDialog; 8 | } 9 | 10 | class PhdDialog : public QDialog 11 | { 12 | Q_OBJECT 13 | 14 | public: 15 | explicit PhdDialog(QWidget *parent = nullptr); 16 | ~PhdDialog(); 17 | 18 | int exec() override; 19 | void accept() override; 20 | 21 | std::vector Entrs; 22 | 23 | 24 | std::string CurrentLang; 25 | private slots: 26 | void on_btnAdd_clicked(); 27 | 28 | void on_btnRemove_clicked(); 29 | 30 | void on_btnImport_clicked(); 31 | 32 | void on_tbDict_cellChanged(int row, int column); 33 | 34 | private: 35 | void PopulateWithEntries(); 36 | Ui::PhdDialog *ui; 37 | }; 38 | 39 | #endif // PHDDIALOG_H 40 | -------------------------------------------------------------------------------- /phddialog.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | PhdDialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 640 10 | 480 11 | 12 | 13 | 14 | 15 | Verdana 16 | 9 17 | 18 | 19 | 20 | Phonetic Dictionary 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | Verdana 30 | 8 31 | 32 | 33 | 34 | 35 | Word 36 | 37 | 38 | 39 | 40 | Phonetic spelling 41 | 42 | 43 | 44 | 45 | Language 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | Add 56 | 57 | 58 | 59 | 60 | 61 | 62 | Import 63 | 64 | 65 | 66 | 67 | 68 | 69 | Remove 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | Qt::Horizontal 81 | 82 | 83 | QDialogButtonBox::Cancel|QDialogButtonBox::Ok 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | buttonBox 93 | accepted() 94 | PhdDialog 95 | accept() 96 | 97 | 98 | 248 99 | 254 100 | 101 | 102 | 157 103 | 274 104 | 105 | 106 | 107 | 108 | buttonBox 109 | rejected() 110 | PhdDialog 111 | reject() 112 | 113 | 114 | 316 115 | 260 116 | 117 | 118 | 286 119 | 274 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /phonemizer.cpp: -------------------------------------------------------------------------------- 1 | #include "phonemizer.h" 2 | #include 3 | #include "ext/ZCharScanner.h" 4 | 5 | #include 6 | int32_t GetID(const std::vector& In, const std::string &InStr) 7 | { 8 | for (const IdStr& It : In) 9 | if (It.STR == InStr) 10 | return It.ID; 11 | 12 | return -1; 13 | } 14 | 15 | std::string GetSTR(const std::vector& In, int32_t InID) 16 | { 17 | for (const IdStr& It : In) 18 | if (It.ID == InID) 19 | return It.STR; 20 | 21 | return ""; 22 | 23 | } 24 | 25 | 26 | 27 | std::vector Phonemizer::GetDelimitedFile(const std::string &InFname) 28 | { 29 | 30 | 31 | std::ifstream InFile (InFname); 32 | 33 | int32_t CuID; 34 | std::string Tok; 35 | std::vector RetVec; 36 | 37 | 38 | std::string Line; 39 | while (std::getline(InFile, Line)) { 40 | 41 | if (Line.find("\t") == std::string::npos) 42 | continue; 43 | 44 | 45 | ZStringDelimiter Deline(Line); 46 | Deline.AddDelimiter("\t"); 47 | 48 | CuID = stoi(Deline[1]); 49 | Tok = Deline[0]; 50 | 51 | 52 | RetVec.push_back(IdStr{CuID,Tok}); 53 | 54 | } 55 | 56 | return RetVec; 57 | 58 | 59 | } 60 | 61 | void Phonemizer::LoadDictionary(const std::string &InDictFn) 62 | { 63 | 64 | std::ifstream InFile (InDictFn); 65 | 66 | std::string Word; 67 | std::string Phn; 68 | 69 | 70 | if (MapDict.size()) 71 | MapDict.clear(); 72 | 73 | 74 | std::string Line; 75 | while (std::getline(InFile, Line)) { 76 | 77 | if (Line.find("\t") == std::string::npos) 78 | continue; 79 | 80 | 81 | ZStringDelimiter Deline(Line); 82 | Deline.AddDelimiter("\t"); 83 | 84 | Word = Deline[0]; 85 | Phn = Deline[1]; 86 | 87 | MapDict.insert({Word,Phn}); 88 | 89 | 90 | } 91 | 92 | 93 | } 94 | 95 | std::string Phonemizer::DictLookup(const std::string &InWord) 96 | { 97 | auto It = MapDict.find(InWord); 98 | 99 | if (It == MapDict.end()) 100 | return ""; 101 | 102 | return It->second; 103 | 104 | 105 | } 106 | // To remove from the string before dicting 107 | const std::u32string StripPonct = U",.;!?"; 108 | 109 | 110 | std::string Phonemizer::CleanWord(const std::string &InW) 111 | { 112 | // U32string = guaranteed 1 char = 1 value 113 | std::u32string Word = VoxUtil::StrToU32(InW); 114 | 115 | 116 | std::u32string RetWord; 117 | RetWord.reserve(Word.size()); 118 | 119 | for (auto Ch : Word){ 120 | if (StripPonct.find(Ch) == std::u32string::npos) 121 | RetWord.push_back(Ch); 122 | } 123 | 124 | return VoxUtil::U32ToStr(RetWord); 125 | } 126 | 127 | 128 | Phonemizer::Phonemizer() 129 | { 130 | IsMinimal = true; 131 | 132 | } 133 | 134 | bool Phonemizer::Initialize(const std::string InPath, bool Minimal) 135 | { 136 | IsMinimal = Minimal; 137 | 138 | 139 | 140 | // Load char indices 141 | CharId = GetDelimitedFile(InPath + "/char2id.txt"); 142 | 143 | // If we're doing minimal loading then stop here 144 | if (IsMinimal) 145 | return true; 146 | 147 | 148 | PhnId = GetDelimitedFile(InPath + "/phn2id.txt"); 149 | 150 | // Load model 151 | G2pModel.Initialize(InPath + "/model"); 152 | 153 | LoadDictionary(InPath + "/dict.txt"); 154 | 155 | 156 | 157 | 158 | IsMinimal = false; 159 | return true; 160 | 161 | 162 | } 163 | 164 | 165 | 166 | 167 | std::string Phonemizer::ProcessWord(const std::string &InWord,float Temperature) 168 | { 169 | if (IsMinimal) 170 | return InWord; 171 | 172 | 173 | // First we try dictionary lookup 174 | // This is because the g2p model can be unreliable, we only want to use it for novel sentences 175 | 176 | std::string PhnDict = DictLookup(CleanWord(InWord)); 177 | if (!PhnDict.empty()) 178 | return PhnDict; 179 | 180 | std::vector InIndexes; 181 | std::u32string IterStr = VoxUtil::StrToU32(InWord); 182 | 183 | InIndexes.reserve(IterStr.size()); 184 | 185 | 186 | // Turn word into indices 187 | for (const char32_t ch : IterStr) 188 | { 189 | std::u32string Single(1,ch); 190 | int32_t Idx = GetID(CharId,VoxUtil::U32ToStr(Single)); 191 | 192 | if (Idx != -1) 193 | InIndexes.push_back(Idx); 194 | 195 | 196 | } 197 | 198 | TFTensor PhnPrediction = G2pModel.DoInference(InIndexes,Temperature); 199 | 200 | 201 | std::string RetStr = ""; 202 | bool FirstIter = true; 203 | 204 | for (int32_t PhnIdx : PhnPrediction.Data) 205 | { 206 | std::string PhnTxt = GetSTR(PhnId,PhnIdx); 207 | if (!PhnTxt.empty()) 208 | { 209 | if (!FirstIter) 210 | RetStr.append(" "); 211 | 212 | RetStr.append(PhnTxt); 213 | 214 | } 215 | 216 | FirstIter = false; 217 | } 218 | 219 | 220 | 221 | return RetStr; 222 | 223 | } 224 | 225 | std::string Phonemizer::GetPhnLanguage() const 226 | { 227 | return PhnLanguage; 228 | } 229 | 230 | void Phonemizer::SetPhnLanguage(const std::string &value) 231 | { 232 | 233 | PhnLanguage = value; 234 | } 235 | 236 | std::string Phonemizer::GetGraphemeChars() 237 | { 238 | 239 | std::string RetAllowed = ""; 240 | for (const IdStr& Idx : CharId) 241 | RetAllowed.append(Idx.STR); 242 | 243 | return RetAllowed; 244 | 245 | } 246 | 247 | Phonemizer::~Phonemizer() 248 | { 249 | 250 | } 251 | 252 | 253 | 254 | 255 | bool operator<(const StrStr &right, const StrStr &left) 256 | { 257 | return right.Word.length() < left.Word.length(); 258 | } 259 | -------------------------------------------------------------------------------- /phonemizer.h: -------------------------------------------------------------------------------- 1 | #ifndef PHONEMIZER_H 2 | #define PHONEMIZER_H 3 | #include "tfg2p.h" 4 | #include 5 | #include 6 | #include 7 | 8 | struct IdStr{ 9 | int32_t ID; 10 | std::string STR; 11 | }; 12 | 13 | 14 | struct StrStr{ 15 | std::string Word; 16 | std::string Phn; 17 | }; 18 | 19 | // Length, start index in vec 20 | typedef std::pair VBucket; 21 | 22 | class Phonemizer 23 | { 24 | private: 25 | TFG2P G2pModel; 26 | 27 | std::vector CharId; 28 | std::vector PhnId; 29 | 30 | std::unordered_map MapDict; 31 | 32 | 33 | std::string NumTxtLang; 34 | 35 | bool IsMinimal; 36 | 37 | 38 | 39 | 40 | std::vector GetDelimitedFile(const std::string& InFname); 41 | 42 | void LoadDictionary(const std::string& InDictFn); 43 | 44 | std::string DictLookup(const std::string& InWord); 45 | 46 | std::string CleanWord(const std::string& InW); 47 | 48 | 49 | 50 | std::string PhnLanguage; 51 | public: 52 | std::string PhnLangID; 53 | public: 54 | Phonemizer(); 55 | /* 56 | * Initialize a phonemizer 57 | * Expects: (if Minimal == false) 58 | * - Two files consisting in TOKEN \t ID: 59 | * -- char2id.txt: Translation from input character to ID the model can accept 60 | * -- phn2id.txt: Translation from output ID from the model to phoneme 61 | * - A model/ folder where a G2P-Tensorflow model was saved as SavedModel 62 | * - dict.txt: Phonetic dictionary. First it searches the word there and if it can't be found then it uses the model. 63 | * 64 | * 65 | * If Minimal == true, it only requires the .sor and char2id (for determining allowed graphemes only, 66 | * the IDs can be arbitrary in this case) 67 | * A Minimal phonemizer only serves to hold values useful to the processor and tokenizer, for char-based models. 68 | 69 | */ 70 | bool Initialize(const std::string InPath, bool Minimal); 71 | 72 | 73 | std::string ProcessWord(const std::string& InWord, float Temperature = 0.1f); 74 | std::string GetPhnLanguage() const; 75 | void SetPhnLanguage(const std::string &value); 76 | 77 | std::string GetGraphemeChars(); 78 | 79 | ~Phonemizer(); 80 | 81 | inline const std::string& GetNumTxtLang() {return NumTxtLang;} 82 | }; 83 | 84 | 85 | bool operator<(const StrStr& right,const StrStr& left); 86 | #endif // PHONEMIZER_H 87 | -------------------------------------------------------------------------------- /phoneticdict.cpp: -------------------------------------------------------------------------------- 1 | #include "phoneticdict.h" 2 | #include "ext/ZFile.h" 3 | #include 4 | 5 | const std::map LegToV1{ 6 | {"English","English-ARPA"}, 7 | {"Spanish","Spanish-GlobalPhone"} 8 | }; 9 | 10 | void AutoConvertToV1(std::string& LangStr){ 11 | auto It = LegToV1.find(LangStr); 12 | if (It != LegToV1.end()) 13 | LangStr = It->second; 14 | 15 | } 16 | 17 | ZFILE_IOVR(DictEntry,inentr){ 18 | right << inentr.Word; 19 | right << inentr.PhSpelling; 20 | right << inentr.Language; 21 | return right; 22 | } 23 | 24 | ZFILE_OOVR(DictEntry,entr){ 25 | right >> entr.Word; 26 | right >> entr.PhSpelling; 27 | right >> entr.Language; 28 | 29 | AutoConvertToV1(entr.Language); 30 | 31 | return right; 32 | 33 | } 34 | PhoneticDict::PhoneticDict() 35 | { 36 | 37 | } 38 | 39 | void PhoneticDict::Export(const QString &exfn) 40 | { 41 | ZFile ofi; 42 | ofi.Open(exfn.toStdString(),EZFOpenMode::BinaryWrite); 43 | 44 | ofi << Entries; 45 | ofi.Close(); 46 | 47 | 48 | } 49 | 50 | bool PhoneticDict::Import(const QString &infn) 51 | { 52 | ZFile fi; 53 | if (!fi.Open(infn.toStdString(),EZFOpenMode::BinaryRead)) 54 | return false; 55 | 56 | 57 | if (fi.GetFileLength() == 0){ 58 | fi.Close(); 59 | return true; 60 | 61 | } 62 | 63 | fi >> Entries; 64 | 65 | fi.Close(); 66 | 67 | 68 | 69 | return true; 70 | 71 | 72 | 73 | } 74 | 75 | 76 | bool operator==(const DictEntry &left, const std::string &right) 77 | { 78 | return left.Word == right; 79 | 80 | 81 | } 82 | -------------------------------------------------------------------------------- /phoneticdict.h: -------------------------------------------------------------------------------- 1 | #ifndef PHONETICDICT_H 2 | #define PHONETICDICT_H 3 | #include "ext/ZFile.h" 4 | #include 5 | #include 6 | struct DictEntry{ 7 | std::string Word; 8 | std::string PhSpelling; 9 | std::string Language; 10 | }; 11 | 12 | 13 | // Check if the base word is equal to this string 14 | bool operator==(const DictEntry& left,const std::string& right); 15 | 16 | ZFILE_OOVR(DictEntry,entr); 17 | 18 | ZFILE_IOVR(DictEntry,inentr); 19 | class PhoneticDict 20 | { 21 | public: 22 | PhoneticDict(); 23 | 24 | void Export(const QString& exfn); 25 | bool Import(const QString &infn); 26 | 27 | std::vector Entries; 28 | 29 | private: 30 | 31 | }; 32 | 33 | #endif // PHONETICDICT_H 34 | -------------------------------------------------------------------------------- /phonetichighlighter.cpp: -------------------------------------------------------------------------------- 1 | #include "phonetichighlighter.h" 2 | 3 | 4 | PhoneticHighlighter::PhoneticHighlighter(QTextDocument *parent) : QSyntaxHighlighter(parent) 5 | { 6 | 7 | QString MatchExp = "\\{(\\s*?.*?)*?\\}"; 8 | PhonemeFormat.setForeground(Qt::magenta); 9 | PhonemeFormat.setFontWeight(QFont::Bold); 10 | PhonemeExp = QRegularExpression(MatchExp); 11 | 12 | QString SingleExp = "@.\\S*"; 13 | SinglePhonemeExp = QRegularExpression(SingleExp); 14 | 15 | QString LongExp = "\\b\\w{23,}"; 16 | TooLongExp = QRegularExpression(LongExp); 17 | 18 | ErrorFormat = PhonemeFormat; 19 | ErrorFormat.setForeground(Qt::red); 20 | ErrorFormat.setBackground(Qt::black); 21 | 22 | 23 | 24 | 25 | 26 | 27 | } 28 | 29 | void PhoneticHighlighter::highlightBlock(const QString &text) 30 | { 31 | 32 | // Phoneme 33 | HighlightRegex(text,PhonemeExp,PhonemeFormat); 34 | HighlightRegex(text,SinglePhonemeExp,PhonemeFormat); 35 | 36 | // Error 37 | HighlightRegex(text,TooLongExp,ErrorFormat); 38 | 39 | } 40 | 41 | void PhoneticHighlighter::HighlightRegex(const QString& Text,const QRegularExpression &Reg, const QTextCharFormat &Fmt) 42 | { 43 | QRegularExpressionMatchIterator MatchIter = Reg.globalMatch(Text); 44 | while (MatchIter.hasNext()) { 45 | QRegularExpressionMatch match = MatchIter.next(); 46 | setFormat(match.capturedStart(), match.capturedLength(), Fmt); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /phonetichighlighter.h: -------------------------------------------------------------------------------- 1 | #ifndef PHONETICHIGHLIGHTER_H 2 | #define PHONETICHIGHLIGHTER_H 3 | #include 4 | #include 5 | class PhoneticHighlighter : public QSyntaxHighlighter 6 | { 7 | public: 8 | PhoneticHighlighter(QTextDocument *parent = 0); 9 | 10 | // This is public because the main window uses it 11 | QRegularExpression PhonemeExp; 12 | 13 | 14 | protected: 15 | void highlightBlock(const QString &text) override; 16 | private: 17 | 18 | void HighlightRegex(const QString &Text, const QRegularExpression& Reg, const QTextCharFormat& Fmt); 19 | QRegularExpression SinglePhonemeExp; 20 | QRegularExpression TooLongExp; 21 | QTextCharFormat PhonemeFormat; 22 | QTextCharFormat ErrorFormat; 23 | 24 | }; 25 | 26 | #endif // PHONETICHIGHLIGHTER_H 27 | -------------------------------------------------------------------------------- /res/clear64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/clear64.png -------------------------------------------------------------------------------- /res/infico.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/infico.png -------------------------------------------------------------------------------- /res/multiwav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/multiwav.png -------------------------------------------------------------------------------- /res/noim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/noim.png -------------------------------------------------------------------------------- /res/phoneticdico.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/phoneticdico.png -------------------------------------------------------------------------------- /res/random64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/random64.png -------------------------------------------------------------------------------- /res/refresh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/refresh.png -------------------------------------------------------------------------------- /res/speak64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/speak64.png -------------------------------------------------------------------------------- /res/stdico.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/stdico.png -------------------------------------------------------------------------------- /res/wav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/wav.png -------------------------------------------------------------------------------- /spectrogram.cpp: -------------------------------------------------------------------------------- 1 | #include "spectrogram.h" 2 | 3 | 4 | 5 | void Spectrogram::TimerTick() 6 | { 7 | if (!DoSlide) 8 | return; 9 | 10 | float RemSecs = ((float)timEndTick->remainingTime()) / 1000.f; 11 | float CurrentPos = TotSecs - RemSecs; 12 | float TickSet = CurrentPos/TotSecs; 13 | 14 | PlayRect->topLeft->setCoords(TickSet,0); 15 | 16 | layer("Lay2")->replot(); 17 | 18 | 19 | 20 | } 21 | 22 | void Spectrogram::EndSlide() 23 | { 24 | timGenericTick->stop(); 25 | timEndTick->stop(); 26 | PlayRect->topLeft->setCoords(1,0); 27 | 28 | layer("Lay2")->replot(); 29 | 30 | } 31 | 32 | size_t Spectrogram::Get2DIndex(size_t x, size_t y, size_t xSize) 33 | { 34 | return x + xSize*y; 35 | } 36 | 37 | 38 | Spectrogram::Spectrogram(QWidget *parent) : QCustomPlot(parent) 39 | { 40 | 41 | QBrush FillBrush(QColor(100,100,100)); 42 | this->setBackground(FillBrush); 43 | QColor White(255,255,255); 44 | QPen AxisPen(QColor(150,150,150)); 45 | xAxis->setTickLabelColor(White); 46 | yAxis->setTickLabelColor(White); 47 | 48 | xAxis->setBasePen(AxisPen); 49 | yAxis->setBasePen(AxisPen); 50 | 51 | yAxis->setLabel("Frequency"); 52 | xAxis->setLabel("Time"); 53 | 54 | 55 | // They show the wrong info 56 | 57 | xAxis->setTickLabels(false); 58 | yAxis->setTickLabels(false); 59 | 60 | 61 | xAxis->setTicks(false); 62 | yAxis->setTicks(false); 63 | xAxis->setLabelColor(White); 64 | yAxis->setLabelColor(White); 65 | QFont Fnt = QFont(font().family(), 10); 66 | 67 | xAxis->setLabelFont(Fnt); 68 | yAxis->setLabelFont(Fnt); 69 | 70 | 71 | 72 | PlayRect = new QCPItemRect(this); 73 | PlayRect->topLeft->setType(QCPItemPosition::ptViewportRatio); 74 | PlayRect->bottomRight->setType(QCPItemPosition::ptViewportRatio); 75 | 76 | 77 | 78 | // The rect is not visible without adding a layer, probably because we are using a more unusual type of plot 79 | addLayer("Lay2"); 80 | 81 | QPen RectPen(QColor(255,255,255,150)); 82 | QBrush RectBrush(QColor(200,200,200,75)); 83 | 84 | RectPen.setWidth(3); 85 | PlayRect->topLeft->setCoords(0,0); 86 | PlayRect->bottomRight->setCoords(1,1); 87 | PlayRect->setPen(RectPen); 88 | PlayRect->setBrush(RectBrush); 89 | PlayRect->setLayer("Lay2"); 90 | 91 | 92 | 93 | timGenericTick = new QTimer(this); 94 | timGenericTick->setInterval(10); 95 | timGenericTick->setSingleShot(false); 96 | 97 | timEndTick = new QTimer(this); 98 | timEndTick->setInterval(1000); 99 | timEndTick->setSingleShot(false); 100 | 101 | connect(timGenericTick,&QTimer::timeout,this,&Spectrogram::TimerTick); 102 | connect(timEndTick,&QTimer::timeout,this,&Spectrogram::EndSlide); 103 | 104 | DoSlide = false; 105 | 106 | 107 | } 108 | 109 | void Spectrogram::DoPlot(const TFTensor &InMel, float TimeInSeconds) 110 | { 111 | 112 | const TFTensor& Mel = InMel; 113 | 114 | 115 | const auto& Shp = Mel.Shape; 116 | 117 | 118 | Map->data()->setSize((int32_t)Shp[2],(int32_t)Shp[1]); 119 | 120 | Map->data()->setRange(QCPRange(0.0,(double)Shp[1]),QCPRange(0.0,(double)Shp[2])); 121 | for (int64_t x = 0; x < Shp[2];x++) 122 | { 123 | for (int64_t y = 0;y < Shp[1];y++) 124 | { 125 | size_t i = Get2DIndex(x,y,Shp[2]); 126 | Map->data()->setCell(x,y,(double)Mel.Data[i]); 127 | 128 | } 129 | 130 | 131 | } 132 | Map->rescaleDataRange(true); 133 | 134 | 135 | 136 | 137 | 138 | rescaleAxes(); 139 | 140 | replot(); 141 | 142 | TotSecs = TimeInSeconds; 143 | 144 | 145 | PlayRect->setVisible(true); 146 | 147 | PlayRect->topLeft->setCoords(1,0); 148 | 149 | timGenericTick->start(); 150 | 151 | timEndTick->start((int)(TimeInSeconds * 1000)); 152 | 153 | 154 | 155 | 156 | 157 | } 158 | -------------------------------------------------------------------------------- /spectrogram.h: -------------------------------------------------------------------------------- 1 | #ifndef SPECTROGRAM_H 2 | #define SPECTROGRAM_H 3 | 4 | #include "ext/qcustomplot.h" 5 | #include "VoxCommon.hpp" 6 | 7 | class Spectrogram : public QCustomPlot 8 | { 9 | public slots: 10 | void TimerTick(); 11 | void EndSlide(); 12 | private: 13 | inline size_t Get2DIndex(size_t x,size_t y,size_t xSize); 14 | 15 | QCPItemRect* PlayRect; 16 | 17 | QTimer* timGenericTick; 18 | QTimer* timEndTick; 19 | 20 | float TotSecs; 21 | 22 | 23 | public: 24 | bool DoSlide; 25 | Spectrogram(QWidget *parent = nullptr); 26 | 27 | void DoPlot(const TFTensor& InMel,float TimeInSeconds); 28 | 29 | QCPColorMap* Map; 30 | }; 31 | 32 | #endif // SPECTROGRAM_H 33 | -------------------------------------------------------------------------------- /stdres.qrc: -------------------------------------------------------------------------------- 1 | 2 | 3 | res/stdico.png 4 | res/phoneticdico.png 5 | res/infico.png 6 | res/refresh.png 7 | res/noim.png 8 | res/clear64.png 9 | res/multiwav.png 10 | res/random64.png 11 | res/wav.png 12 | res/speak64.png 13 | 14 | 15 | -------------------------------------------------------------------------------- /tacotron2.cpp: -------------------------------------------------------------------------------- 1 | #include "tacotron2.h" 2 | 3 | 4 | 5 | TFTensor Tacotron2::DoInferenceTFTTS(const std::vector &InputIDs, int32_t SpeakerID, int32_t EmotionID) 6 | { 7 | if (!CurrentMdl) 8 | throw std::exception("Tried to do inference on unloaded or invalid model!"); 9 | 10 | 11 | 12 | // Convenience reference so that we don't have to constantly derefer pointers. 13 | cppflow::model& Mdl = *CurrentMdl; 14 | 15 | 16 | // Define the tensors 17 | 18 | // This is the shape of the input IDs, our equivalent to tf.expand_dims. 19 | std::vector InputIDShape = { 1, (int64_t)InputIDs.size() }; 20 | 21 | 22 | 23 | cppflow::tensor input_ids{ InputIDs, InputIDShape }; 24 | cppflow::tensor speaker_ids{SpeakerID }; 25 | cppflow::tensor input_lengths{(int32_t)InputIDs.size() }; 26 | cppflow::tensor* emotion_ids = nullptr; 27 | 28 | 29 | // This is a multi-emotion model 30 | if (EmotionID != -1) 31 | { 32 | emotion_ids = new cppflow::tensor{std::vector{EmotionID}}; 33 | 34 | } 35 | 36 | TensorVec Inputs = {{"serving_default_input_ids:0",input_ids}, 37 | {"serving_default_input_lengths:0",input_lengths}, 38 | {"serving_default_speaker_ids:0",speaker_ids}}; 39 | 40 | 41 | 42 | // Define output tensor 43 | if (EmotionID != -1) 44 | Inputs.push_back({"serving_default_emotion_ids:0",*emotion_ids}); 45 | 46 | 47 | // Do inference 48 | 49 | // We only care about the after mel-after [1] and alignment history [3] 50 | auto Outputs = Mdl(Inputs,{"StatefulPartitionedCall:0","StatefulPartitionedCall:1","StatefulPartitionedCall:2","StatefulPartitionedCall:3"}); 51 | 52 | // Define output and return it 53 | TFTensor MelOut = VoxUtil::CopyTensor(Outputs[1]); 54 | Attention = VoxUtil::CopyTensor(Outputs[3]); 55 | 56 | 57 | // We allocated the emotion_ids cppflow::tensor dynamically, delete it 58 | if (emotion_ids) 59 | delete emotion_ids; 60 | 61 | // We could just straight out define it in the return statement, but I like it more this way 62 | 63 | return MelOut; 64 | } 65 | 66 | TFTensor Tacotron2::DoInferenceCoqui(const std::vector &InputIDs) 67 | { 68 | // Convenience reference so that we don't have to constantly derefer pointers. 69 | cppflow::model& Mdl = *CurrentMdl; 70 | 71 | 72 | // Define the tensors 73 | 74 | // This is the shape of the input IDs, our equivalent to tf.expand_dims. 75 | 76 | std::vector InputIDShape = { 1, (int64_t)InputIDs.size() }; 77 | cppflow::tensor input_ids{ InputIDs, InputIDShape }; 78 | 79 | 80 | TensorVec Inputs = {{"serving_default_characters:0",input_ids}}; 81 | 82 | 83 | // We only care about the after mel-after [1] and alignment history [2] 84 | auto Outputs = Mdl(Inputs,{"StatefulPartitionedCall:0","StatefulPartitionedCall:1","StatefulPartitionedCall:2","StatefulPartitionedCall:3"}); 85 | 86 | // Define output and return it 87 | TFTensor MelOut = VoxUtil::CopyTensor(Outputs[1]); 88 | 89 | 90 | // Coqui TT2 attention output is inverse of what our attention plotter expects, so we transpose it. 91 | cppflow::tensor AttTransposed = cppflow::transpose(Outputs[2],cppflow::tensor{0,2,1}); 92 | Attention = VoxUtil::CopyTensor(AttTransposed); 93 | 94 | 95 | return MelOut; 96 | } 97 | 98 | Tacotron2::Tacotron2() 99 | { 100 | 101 | } 102 | 103 | TFTensor Tacotron2::DoInference(const std::vector &InputIDs, const std::vector &ArgsFloat, const std::vector ArgsInt, int32_t SpeakerID, int32_t EmotionID) 104 | { 105 | 106 | 107 | if (!CurrentMdl) 108 | throw std::runtime_error("Tried to do inference on unloaded or invalid model!"); 109 | 110 | if (GetCurrentRepo() == ETTSRepo::TensorflowTTS) 111 | return DoInferenceTFTTS(InputIDs,SpeakerID,EmotionID); 112 | else if (GetCurrentRepo() == ETTSRepo::CoquiTTS) 113 | return DoInferenceCoqui(InputIDs); 114 | else 115 | throw std::runtime_error("Unknown/unset/unimplemented TTS repo!!!"); 116 | 117 | } 118 | -------------------------------------------------------------------------------- /tacotron2.h: -------------------------------------------------------------------------------- 1 | #ifndef TACOTRON2_H 2 | #define TACOTRON2_H 3 | 4 | #include "melgen.h" 5 | 6 | class Tacotron2 : public MelGen 7 | { 8 | private: 9 | 10 | TFTensor DoInferenceTFTTS(const std::vector& InputIDs,int32_t SpeakerID = 0, int32_t EmotionID = -1); 11 | TFTensor DoInferenceCoqui(const std::vector& InputIDs); 12 | 13 | 14 | 15 | public: 16 | Tacotron2(); 17 | TFTensor Attention; 18 | 19 | /* 20 | Do inference on a Tacotron2 model. 21 | 22 | -> InputIDs: Input IDs of tokens for inference 23 | -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model. 24 | 25 | <- Returns: TFTensor with shape {1,,80} containing contents of mel spectrogram. 26 | */ 27 | TFTensor DoInference(const std::vector& InputIDs,const std::vector& ArgsFloat,const std::vector ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1); 28 | 29 | }; 30 | 31 | #endif // TACOTRON2_H 32 | -------------------------------------------------------------------------------- /tacotron2torch.cpp: -------------------------------------------------------------------------------- 1 | #include "tacotron2torch.h" 2 | 3 | Tacotron2Torch::Tacotron2Torch() 4 | { 5 | 6 | } 7 | 8 | bool Tacotron2Torch::Initialize(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo) 9 | { 10 | try { 11 | // Deserialize the ScriptModule from a file using torch::jit::load(). 12 | 13 | Model = torch::jit::load(SavedModelFolder); 14 | 15 | } 16 | catch (const c10::Error& e) { 17 | return false; 18 | 19 | } 20 | 21 | CurrentRepo = InTTSRepo; 22 | return true; 23 | 24 | } 25 | 26 | TFTensor Tacotron2Torch::DoInference(const std::vector &InputIDs, const std::vector &ArgsFloat, const std::vector ArgsInt, int32_t SpeakerID, int32_t EmotionID) 27 | { 28 | // without this memory consumption is 4x 29 | torch::NoGradGuard no_grad; 30 | 31 | 32 | std::vector IInputIDs; 33 | IInputIDs.reserve(InputIDs.size()); 34 | for (const int32_t& Id : InputIDs){ 35 | int64_t casted = (int64_t)Id; 36 | IInputIDs.push_back(casted); 37 | 38 | } 39 | 40 | 41 | 42 | torch::TensorOptions Opts = torch::TensorOptions().requires_grad(false); 43 | 44 | // This Tacotron2 always takes in speaker IDs 45 | if (SpeakerID == -1) 46 | SpeakerID = 0; 47 | 48 | auto InSpkid = torch::tensor({SpeakerID},Opts); 49 | auto InIDS = torch::tensor(IInputIDs, Opts).unsqueeze(0); 50 | 51 | 52 | 53 | std::vector inputs{ InSpkid,InIDS}; 54 | 55 | 56 | 57 | // Infer 58 | c10::IValue Output = Model(inputs); 59 | 60 | 61 | // Output = list (mel_outputs, mel_outputs_postnet, gate_outputs, alignments) 62 | 63 | auto OutputL = Output.toList(); 64 | 65 | auto MelTens = OutputL[1].get().toTensor(); 66 | auto AttTens = OutputL[3].get().toTensor();//.transpose(1,2); // [1, dec_t, enc_t ] -> [1, enc_t, dec_t] 67 | 68 | 69 | Attention = VoxUtil::CopyTensor(AttTens); 70 | 71 | 72 | return VoxUtil::CopyTensor(MelTens); 73 | 74 | 75 | 76 | } 77 | -------------------------------------------------------------------------------- /tacotron2torch.h: -------------------------------------------------------------------------------- 1 | #ifndef TACOTRON2TORCH_H 2 | #define TACOTRON2TORCH_H 3 | #include "melgen.h" 4 | 5 | class Tacotron2Torch : public MelGen 6 | { 7 | private: 8 | torch::jit::script::Module Model; 9 | 10 | public: 11 | 12 | TFTensor Attention; 13 | 14 | 15 | Tacotron2Torch(); 16 | /* 17 | Initialize and load the model 18 | 19 | -> SavedModelFolder: Folder where the TorchScript models are exported 20 | <- Returns: (bool)Success 21 | */ 22 | bool Initialize(const std::string& SavedModelFolder, ETTSRepo::Enum InTTSRepo); 23 | 24 | 25 | /* 26 | Do inference on a Tacotron2 model. 27 | 28 | -> InputIDs: Input IDs of tokens for inference 29 | -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model. 30 | 31 | <- Returns: TFTensor with shape {1,,80} containing contents of mel spectrogram. 32 | */ 33 | TFTensor DoInference(const std::vector& InputIDs,const std::vector& ArgsFloat,const std::vector ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1); 34 | 35 | }; 36 | 37 | #endif // TACOTRON2TORCH_H 38 | -------------------------------------------------------------------------------- /tfg2p.cpp: -------------------------------------------------------------------------------- 1 | #include "tfg2p.h" 2 | TFG2P::TFG2P() 3 | { 4 | G2P = nullptr; 5 | 6 | } 7 | 8 | TFG2P::TFG2P(const std::string &SavedModelFolder) 9 | { 10 | G2P = nullptr; 11 | 12 | Initialize(SavedModelFolder); 13 | } 14 | 15 | bool TFG2P::Initialize(const std::string &SavedModelFolder) 16 | { 17 | try { 18 | 19 | G2P = new cppflow::model(SavedModelFolder); 20 | 21 | } 22 | catch (...) { 23 | G2P = nullptr; 24 | return false; 25 | 26 | } 27 | return true; 28 | } 29 | 30 | TFTensor TFG2P::DoInference(const std::vector &InputIDs, float Temperature) 31 | { 32 | if (!G2P) 33 | throw std::exception("Tried to do inference on unloaded or invalid model!"); 34 | 35 | // Convenience reference so that we don't have to constantly derefer pointers. 36 | cppflow::model& Mdl = *G2P; 37 | 38 | 39 | // Convenience reference so that we don't have to constantly derefer pointers. 40 | 41 | cppflow::tensor input_ids{ InputIDs, std::vector{(int64_t)InputIDs.size()}}; 42 | cppflow::tensor input_len{(int32_t)InputIDs.size()}; 43 | cppflow::tensor input_temp{Temperature}; 44 | 45 | 46 | 47 | 48 | 49 | auto Outs = Mdl({{"serving_default_input_ids:0",input_ids}, 50 | {"serving_default_input_len:0",input_len}, 51 | {"serving_default_input_temperature:0",input_temp}},{"StatefulPartitionedCall:0"}); 52 | 53 | TFTensor RetTensor = VoxUtil::CopyTensor(Outs[0]); 54 | 55 | return RetTensor; 56 | 57 | 58 | } 59 | 60 | TFG2P::~TFG2P() 61 | { 62 | if (G2P) 63 | delete G2P; 64 | 65 | } 66 | -------------------------------------------------------------------------------- /tfg2p.h: -------------------------------------------------------------------------------- 1 | #ifndef TFG2P_H 2 | #define TFG2P_H 3 | 4 | #include "VoxCommon.hpp" 5 | 6 | 7 | class TFG2P 8 | { 9 | private: 10 | cppflow::model* G2P; 11 | 12 | public: 13 | TFG2P(); 14 | TFG2P(const std::string& SavedModelFolder); 15 | 16 | /* 17 | Initialize and load the model 18 | 19 | -> SavedModelFolder: Folder where the .pb, variables, and other characteristics of the exported SavedModel 20 | <- Returns: (bool)Success 21 | */ 22 | bool Initialize(const std::string& SavedModelFolder); 23 | 24 | /* 25 | Do inference on a G2P-TF-RNN model. 26 | 27 | -> InputIDs: Input IDs of tokens for inference 28 | -> Temperature: Temperature of the RNN, values higher than 0.1 cause instability. 29 | 30 | <- Returns: TFTensor containing phoneme IDs 31 | */ 32 | TFTensor DoInference(const std::vector& InputIDs, float Temperature = 0.1f); 33 | 34 | ~TFG2P(); 35 | 36 | }; 37 | 38 | #endif // TFG2P_H 39 | -------------------------------------------------------------------------------- /torchmoji.cpp: -------------------------------------------------------------------------------- 1 | #include "torchmoji.h" 2 | #include "ext/ZCharScanner.h" 3 | 4 | void TorchMoji::LoadDict(const std::string& Path) 5 | { 6 | if (Dictionary.size()) 7 | Dictionary.clear(); 8 | 9 | std::vector Lined = VoxUtil::GetLinedFile(Path); 10 | 11 | ZStringDelimiter Delim; 12 | Delim.AddDelimiter("\t"); 13 | 14 | for (const auto& Li : Lined){ 15 | Delim.SetText(Li); 16 | 17 | if (Delim.szTokens() < 2) 18 | continue; 19 | 20 | Dictionary.insert({Delim[0], std::stoi(Delim[1])}); 21 | } 22 | } 23 | 24 | std::vector TorchMoji::WordsToIDs(const std::vector& Words) 25 | { 26 | std::vector IDs(VoxCommon::TorchMojiLen,0); 27 | 28 | for (size_t i = 0; i < Words.size();i++) 29 | { 30 | if (i + 1 > VoxCommon::TorchMojiLen) 31 | break; 32 | 33 | auto Iter = Dictionary.find(Words[i]); 34 | 35 | if (Iter == Dictionary.end()) 36 | IDs[i] = 1; // unknown 37 | else 38 | IDs[i] = Iter->second; 39 | 40 | 41 | 42 | } 43 | 44 | return IDs; 45 | 46 | 47 | 48 | } 49 | 50 | TorchMoji::TorchMoji() 51 | { 52 | 53 | } 54 | 55 | TorchMoji::TorchMoji(const std::string &InitPath, const std::string &DPath) 56 | { 57 | Initialize(InitPath,DPath); 58 | 59 | } 60 | 61 | void TorchMoji::Initialize(const std::string &Path, const std::string &DictPath) 62 | { 63 | 64 | Model = torch::jit::load(Path); 65 | LoadDict(DictPath); 66 | } 67 | 68 | std::vector TorchMoji::Infer(const std::vector &Seq) 69 | { 70 | std::vector Input = WordsToIDs(Seq); 71 | 72 | auto InIDS = torch::tensor(Input).unsqueeze(0); // (1, TMLen) 73 | 74 | at::Tensor Output = Model({InIDS}).toTensor(); // (1, VoxCommon::TorchMojiEmbSize) 75 | 76 | Output = Output.squeeze(); // (TorchMojiEmbSize) 77 | 78 | TFTensor Tens = VoxUtil::CopyTensor(Output); 79 | 80 | 81 | return Tens.Data; 82 | 83 | 84 | 85 | } 86 | -------------------------------------------------------------------------------- /torchmoji.h: -------------------------------------------------------------------------------- 1 | #ifndef TORCHMOJI_H 2 | #define TORCHMOJI_H 3 | #include "VoxCommon.hpp" 4 | 5 | 6 | // TorchMoji: Emotion contextualizer model (Cookie design: skipping last layer and using hidden states to feed TTS model) 7 | // Allows for manipulation of emotion at inference time 8 | class TorchMoji 9 | { 10 | private: 11 | // Word, ID 12 | std::map Dictionary; 13 | 14 | torch::jit::script::Module Model; 15 | 16 | void LoadDict(const std::string& Path); 17 | 18 | std::vector WordsToIDs(const std::vector &Words); 19 | public: 20 | TorchMoji(); 21 | 22 | TorchMoji(const std::string& InitPath,const std::string& DPath); 23 | 24 | void Initialize(const std::string& Path,const std::string& DictPath); 25 | 26 | // Return hidden states of emotion state. 27 | // -> Seq: Vector of words 28 | // <- Returns float vec of size VoxCommon::TorchMojiEmbSize containing hidden states, ready to feed into TTS model. 29 | std::vector Infer(const std::vector& Seq); 30 | }; 31 | 32 | #endif // TORCHMOJI_H 33 | -------------------------------------------------------------------------------- /track.cpp: -------------------------------------------------------------------------------- 1 | #include "track.h" 2 | 3 | #include 4 | 5 | Track::Track(QWidget *parent) 6 | : QCustomPlot(parent) 7 | , decoder(new QAudioDecoder(this)) 8 | { 9 | 10 | wavePlot = addGraph(); 11 | 12 | QBrush FillBrush(QColor(100,100,100)); 13 | this->setBackground(FillBrush); 14 | QPen ThePen(QColor(127,255,0)); 15 | wavePlot->setPen(ThePen); 16 | wavePlot->setBrush(FillBrush); 17 | 18 | yAxis->setVisible(false); 19 | xAxis->setVisible(false); 20 | 21 | // add independent layer for playrect and labels so we don't replot the entire thing every time 22 | 23 | addLayer("Playing"); 24 | setCurrentLayer("Playing"); 25 | layer("Playing")->setMode(QCPLayer::LayerMode::lmBuffered); 26 | 27 | 28 | PlayRect = new QCPItemRect(this); 29 | PlayRect->topLeft->setType(QCPItemPosition::ptViewportRatio); 30 | PlayRect->bottomRight->setType(QCPItemPosition::ptViewportRatio); 31 | 32 | 33 | QPen RectPen(QColor(255,255,255,150)); 34 | QBrush RectBrush(QColor(200,200,200,75)); 35 | 36 | RectPen.setWidth(3); 37 | PlayRect->topLeft->setCoords(0,0); 38 | PlayRect->bottomRight->setCoords(1,1); 39 | PlayRect->setPen(RectPen); 40 | PlayRect->setBrush(RectBrush); 41 | 42 | 43 | 44 | 45 | timGenericTick = new QTimer(this); 46 | timGenericTick->setInterval(10); 47 | timGenericTick->setSingleShot(false); 48 | 49 | timEndTick = new QTimer(this); 50 | timEndTick->setInterval(1000); 51 | timEndTick->setSingleShot(false); 52 | 53 | connect(timGenericTick,&QTimer::timeout,this,&Track::TimerTick); 54 | connect(timEndTick,&QTimer::timeout,this,&Track::EndSlide); 55 | 56 | SecsTxt = new QCPItemText(this); 57 | SecsTxt->setPositionAlignment(Qt::AlignTop|Qt::AlignLeft); 58 | SecsTxt->position->setType(QCPItemPosition::ptViewportRatio); 59 | SecsTxt->position->setCoords(0.02, 0.05); 60 | SecsTxt->setText("Ready"); 61 | SecsTxt->setFont(QFont(font().family(), 10)); 62 | SecsTxt->setColor(QColor(255,255,255)); 63 | SecsTxt->setClipToAxisRect(false); 64 | DoSlide = false; 65 | 66 | //wavePlot->setPen(ThePen); 67 | 68 | } 69 | 70 | Track::~Track() 71 | { 72 | delete decoder; 73 | // wavePlot delete auto ? 74 | } 75 | 76 | void Track::setSource(const QAudioBuffer &inbuffer) 77 | { 78 | buffer = inbuffer; 79 | 80 | 81 | setBuffer(); 82 | 83 | 84 | startPlaying(((float)buffer.duration()) / 1e+6); 85 | 86 | } 87 | 88 | void Track::setBuffer() 89 | { 90 | samples.clear(); 91 | qreal peak = getPeakValue(buffer.format()); 92 | const float *data = buffer.constData(); 93 | int count = buffer.sampleCount(); 94 | 95 | for (int i=0; i x(samples.size()); 105 | for (int i=0; iaddData(x, samples); 108 | yAxis->setRange(QCPRange(-1.0, 1.0)); 109 | 110 | xAxis->setRange(QCPRange(0, samples.size())); 111 | replot(); 112 | } 113 | 114 | void Track::startPlaying(float TimeInSecs) 115 | { 116 | //TickAdd = 1.f/( TimeInSecs / 0.025f ); 117 | TotSecs = TimeInSecs; 118 | 119 | 120 | timGenericTick->start(); 121 | 122 | timEndTick->start((int)(TimeInSecs * 1000)); 123 | 124 | 125 | } 126 | 127 | void Track::TimerTick() 128 | { 129 | if (!DoSlide) 130 | return; 131 | 132 | float RemSecs = ((float)timEndTick->remainingTime()) / 1000.f; 133 | float CurrentPos = TotSecs - RemSecs; 134 | TickSet = CurrentPos/TotSecs; 135 | 136 | PlayRect->topLeft->setCoords(TickSet,0); 137 | SetTimeLabel(CurrentPos,TotSecs); 138 | 139 | 140 | layer("Playing")->replot(); 141 | 142 | 143 | } 144 | 145 | void Track::EndSlide() 146 | { 147 | 148 | timGenericTick->stop(); 149 | timEndTick->stop(); 150 | PlayRect->topLeft->setCoords(1,0); 151 | SetTimeLabel(TotSecs,TotSecs); 152 | 153 | layer("Playing")->replot(); 154 | 155 | } 156 | 157 | void Track::SetTimeLabel(float Cur, float Remaining) 158 | { 159 | SecsTxt->setText(QString::number(Cur,'f',1) + " / " + QString::number(Remaining,'f',1) + " (sec)"); 160 | 161 | 162 | } 163 | 164 | /** 165 | * https://stackoverflow.com/questions/46947668/draw-waveform-from-raw-data-using-qaudioprobe 166 | * @brief Track::getPeakValue 167 | * @param format 168 | * @return The peak value 169 | */ 170 | qreal Track::getPeakValue(const QAudioFormat &format) 171 | { 172 | qreal ret(0); 173 | if (format.isValid()){ 174 | switch (format.sampleType()) { 175 | case QAudioFormat::Unknown: 176 | break; 177 | case QAudioFormat::Float: 178 | if (format.sampleSize() != 32) // other sample formats are not supported 179 | ret = 0; 180 | else 181 | ret = 1.00003; 182 | break; 183 | case QAudioFormat::SignedInt: 184 | if (format.sampleSize() == 32) 185 | #ifdef Q_OS_WIN 186 | ret = INT_MAX; 187 | #endif 188 | #ifdef Q_OS_UNIX 189 | ret = SHRT_MAX; 190 | #endif 191 | else if (format.sampleSize() == 16) 192 | ret = SHRT_MAX; 193 | else if (format.sampleSize() == 8) 194 | ret = CHAR_MAX; 195 | break; 196 | case QAudioFormat::UnSignedInt: 197 | if (format.sampleSize() == 32) 198 | ret = UINT_MAX; 199 | else if (format.sampleSize() == 16) 200 | ret = USHRT_MAX; 201 | else if (format.sampleSize() == 8) 202 | ret = UCHAR_MAX; 203 | break; 204 | default: 205 | break; 206 | } 207 | } 208 | return ret; 209 | } 210 | -------------------------------------------------------------------------------- /track.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACK_H 2 | #define TRACK_H 3 | #include "ext/qcustomplot.h" 4 | #include 5 | 6 | 7 | // Copied from https://stackoverflow.com/questions/50277132/qt-audio-file-to-wave-like-audacity 8 | 9 | class QAudioDecoder; 10 | 11 | class Track : public QCustomPlot 12 | { 13 | Q_OBJECT 14 | 15 | public: 16 | Track(QWidget *parent = Q_NULLPTR); 17 | ~Track(); 18 | void setSource(const QAudioBuffer &inbuffer); 19 | 20 | public: 21 | bool DoSlide; 22 | 23 | 24 | void setBuffer(); 25 | void plot(); 26 | void startPlaying(float TimeInSecs); 27 | 28 | 29 | public slots: 30 | void TimerTick(); 31 | void EndSlide(); 32 | private: 33 | void SetTimeLabel(float Cur, float Remaining); 34 | QTimer* timGenericTick; 35 | QTimer* timEndTick; 36 | 37 | float TickSet; 38 | float TotSecs; 39 | 40 | QCPItemRect* PlayRect; 41 | QCPItemText* SecsTxt; 42 | 43 | qreal getPeakValue(const QAudioFormat& format); 44 | 45 | QAudioDecoder *decoder; 46 | QAudioBuffer buffer; 47 | QVector samples; 48 | QCPGraph *wavePlot; 49 | }; 50 | #endif // TRACK_H 51 | -------------------------------------------------------------------------------- /vits.cpp: -------------------------------------------------------------------------------- 1 | #include "vits.h" 2 | 3 | std::vector VITS::ZeroPadVec(const std::vector &InIDs) 4 | { 5 | std::vector NewIDs; 6 | NewIDs.reserve(InIDs.size() * 2); 7 | 8 | NewIDs.push_back(0); 9 | 10 | for (auto CharID : InIDs) 11 | { 12 | 13 | NewIDs.push_back((int64_t)CharID); 14 | NewIDs.push_back(0); 15 | 16 | 17 | } 18 | // Add final 0 19 | // NewIDs.push_back(0); 20 | 21 | 22 | return NewIDs; 23 | 24 | } 25 | 26 | VITS::VITS() 27 | { 28 | 29 | } 30 | 31 | bool VITS::Initialize(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo) 32 | { 33 | try { 34 | // Deserialize the ScriptModule from a file using torch::jit::load(). 35 | 36 | Model = torch::jit::load(SavedModelFolder); 37 | 38 | } 39 | catch (const c10::Error& e) { 40 | return false; 41 | 42 | } 43 | 44 | CurrentRepo = InTTSRepo; 45 | return true; 46 | } 47 | 48 | TFTensor VITS::DoInference(const std::vector &InputIDs, const std::vector &ArgsFloat, const std::vector ArgsInt, int32_t SpeakerID, int32_t EmotionID) 49 | { 50 | // without this memory consumption is 4x 51 | torch::NoGradGuard no_grad; 52 | 53 | // TorchMoji hidden states are added to ArgsFloat 54 | const bool UsesTorchMoji = ArgsFloat.size() > 1; 55 | 56 | std::vector PaddedIDs; 57 | 58 | 59 | // Our current TM-enabled models don't use zero interspersion 60 | if (UsesTorchMoji) 61 | PaddedIDs.assign(InputIDs.begin(),InputIDs.end()); 62 | else 63 | PaddedIDs = ZeroPadVec(InputIDs); 64 | 65 | 66 | std::vector inLen = { (int64_t)PaddedIDs.size() }; 67 | 68 | 69 | // ZDisket: Is this really necessary? 70 | torch::TensorOptions Opts = torch::TensorOptions().requires_grad(false); 71 | 72 | auto InIDS = torch::tensor(PaddedIDs, Opts).unsqueeze(0); 73 | auto InLens = torch::tensor(inLen, Opts); 74 | auto InLenScale = torch::tensor({ ArgsFloat[0]}, Opts); 75 | 76 | 77 | 78 | std::vector inputs{ InIDS,InLens,InLenScale }; 79 | 80 | if (SpeakerID != -1){ 81 | auto InSpkid = torch::tensor({SpeakerID},Opts); 82 | inputs.push_back(InSpkid); 83 | } 84 | 85 | if (EmotionID != -1){ 86 | auto InEmid = torch::tensor({EmotionID},Opts); 87 | inputs.push_back(InEmid); 88 | } 89 | 90 | // Handle TorchMoji Emb 91 | if (UsesTorchMoji){ 92 | // Make a copy stripping first elem 93 | std::vector TMHidden(ArgsFloat.begin() + 1, ArgsFloat.end()); 94 | 95 | auto InMoji = torch::tensor(TMHidden,Opts).unsqueeze(0); 96 | inputs.push_back(InMoji); 97 | 98 | } 99 | 100 | // Infer 101 | 102 | c10::IValue Output = Model.get_method("infer_ts")(inputs); 103 | 104 | // Output = tuple (audio,att) 105 | 106 | auto OutputT = Output.toTuple(); 107 | 108 | // Grab audio 109 | // [1, frames] -> [frames] 110 | auto AuTens = OutputT.get()->elements()[0].toTensor().squeeze(); 111 | 112 | // Grab Attention 113 | // [1, 1, x, y] -> [x, y] -> [y,x] -> [1, y, x] 114 | auto AttTens = OutputT.get()->elements()[1].toTensor().squeeze().transpose(0,1).unsqueeze(0); 115 | 116 | Attention = VoxUtil::CopyTensor(AttTens); 117 | 118 | return VoxUtil::CopyTensor(AuTens); 119 | 120 | } 121 | -------------------------------------------------------------------------------- /vits.h: -------------------------------------------------------------------------------- 1 | #ifndef VITS_H 2 | #define VITS_H 3 | 4 | 5 | #include "melgen.h" 6 | 7 | 8 | 9 | 10 | 11 | // VITS is a fully E2E model; no separate vocoder needed 12 | class VITS : public MelGen 13 | { 14 | private: 15 | torch::jit::script::Module Model; 16 | 17 | // Most VITS model require zero-interspersed input IDs 18 | std::vector ZeroPadVec(const std::vector& InIDs); 19 | 20 | public: 21 | TFTensor Attention; 22 | 23 | VITS(); 24 | 25 | // Since VITS runs on PyTorch, we override the loader 26 | /* 27 | Initialize and load the model 28 | 29 | -> SavedModelFolder: Not a folder, but path to TorchScripted .pt file 30 | <- Returns: (bool)Success 31 | */ 32 | virtual bool Initialize(const std::string& SavedModelFolder, ETTSRepo::Enum InTTSRepo); 33 | 34 | 35 | /* 36 | Do inference on a VITS model. 37 | 38 | -> InputIDs: Input IDs of tokens for inference 39 | -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model. 40 | -> ArgsFloat[0]: Length scale. 41 | 42 | <- Returns: TFTensor with shape {frames} of audio data 43 | */ 44 | TFTensor DoInference(const std::vector& InputIDs,const std::vector& ArgsFloat,const std::vector ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1); 45 | }; 46 | 47 | #endif // VITS_H 48 | -------------------------------------------------------------------------------- /voicemanager.cpp: -------------------------------------------------------------------------------- 1 | #include "voicemanager.h" 2 | #define SAFE_DELETE(pdel)if (pdel){delete pdel;} 3 | #include 4 | 5 | Phonemizer* VoiceManager::LoadPhonemizer(const QString& InPhnLang,int32_t InLangNum) 6 | { 7 | 8 | for (Phonemizer*& Phn : Phonemizers) 9 | { 10 | if (Phn->GetPhnLanguage() == InPhnLang.toStdString()) 11 | return Phn; 12 | 13 | 14 | } 15 | 16 | 17 | Phonemizer* CreatePhn = new Phonemizer; 18 | 19 | // Initialize regularly or minimally 20 | CreatePhn->Initialize(QString(QCoreApplication::applicationDirPath() + "/g2p/" + InPhnLang).toStdString(), 21 | InLangNum == ETTSLanguageType::Char); 22 | 23 | CreatePhn->SetPhnLanguage(InPhnLang.toStdString()); 24 | 25 | 26 | Phonemizers.push_back(CreatePhn); 27 | 28 | return Phonemizers[Phonemizers.size() - 1]; 29 | 30 | 31 | } 32 | 33 | ESpeakPhonemizer *VoiceManager::LoadESpeakPhonemizer(const QString &InVoiceName) 34 | { 35 | for (ESpeakPhonemizer*& Phn : ENGPhonemizers) 36 | { 37 | if (Phn->GetVoiceName() == InVoiceName.toStdString()) 38 | return Phn; 39 | 40 | 41 | } 42 | 43 | ESpeakPhonemizer* CreatePhn = new ESpeakPhonemizer; 44 | CreatePhn->Initialize(QString(QCoreApplication::applicationDirPath() + "/g2p/eSpeak-NG").toStdString() 45 | ,InVoiceName.toStdString()); 46 | 47 | ENGPhonemizers.push_back(CreatePhn); 48 | 49 | return CreatePhn; 50 | 51 | } 52 | 53 | size_t VoiceManager::LoadVoice(const QString &Voname) 54 | { 55 | Voice* NuVoice = new Voice(QString(QCoreApplication::applicationDirPath() + "/models/" + Voname).toStdString(),Voname.toStdString(),nullptr); 56 | 57 | QString PLang = QString::fromStdString(NuVoice->GetInfo().s_Language_Fullname); 58 | 59 | Phonemizer* Phon = LoadPhonemizer(PLang,NuVoice->GetInfo().LangType); 60 | ESpeakPhonemizer* ENG_Phon = nullptr; 61 | 62 | if (NuVoice->GetInfo().s_eSpeakLang.size()){ 63 | ENG_Phon = LoadESpeakPhonemizer(QString::fromStdString(NuVoice->GetInfo().s_eSpeakLang)); 64 | 65 | 66 | } 67 | 68 | 69 | NuVoice->AddPhonemizer(Phon,ENG_Phon); 70 | 71 | std::string NumTxtPath = QString(QCoreApplication::applicationDirPath() + "/num2txt/" + 72 | QString::fromStdString(NuVoice->GetInfo().s_Language) + ".sor").toStdString(); 73 | 74 | NuVoice->LoadNumberText(NumTxtPath); 75 | 76 | Voices.push_back(NuVoice); 77 | Voices[Voices.size() - 1]->SetDictEntries(ManDict); 78 | return Voices.size() - 1; 79 | } 80 | 81 | int VoiceManager::FindVoice(const QString &inName, bool autoload) 82 | { 83 | for (size_t i = 0; i < Voices.size();i++) 84 | { 85 | if (Voices[i]->Name == inName.toStdString()) 86 | return (int)i; 87 | 88 | 89 | 90 | 91 | } 92 | 93 | if (autoload) 94 | return (int)LoadVoice(inName); 95 | else 96 | return -1; 97 | 98 | 99 | } 100 | 101 | Voice *VoiceManager::operator[](size_t in) 102 | { 103 | 104 | return Voices[in]; 105 | 106 | } 107 | 108 | void VoiceManager::SetDict(const std::vector &InDict) 109 | { 110 | ManDict = InDict; 111 | 112 | } 113 | 114 | VoiceManager::VoiceManager() 115 | { 116 | 117 | } 118 | 119 | VoiceManager::~VoiceManager() 120 | { 121 | 122 | for (Phonemizer* Phni : Phonemizers) 123 | { 124 | SAFE_DELETE(Phni) 125 | 126 | 127 | } 128 | for (Voice* Vo : Voices) 129 | { 130 | 131 | SAFE_DELETE(Vo) 132 | 133 | } 134 | 135 | Voices.clear(); 136 | Phonemizers.clear(); 137 | 138 | 139 | 140 | } 141 | -------------------------------------------------------------------------------- /voicemanager.h: -------------------------------------------------------------------------------- 1 | #ifndef VOICEMANAGER_H 2 | #define VOICEMANAGER_H 3 | #include "Voice.h" 4 | #include 5 | #include "phoneticdict.h" 6 | #include "phonemizer.h" 7 | class VoiceManager 8 | { 9 | private: 10 | std::vector Voices; 11 | std::vector ManDict; 12 | 13 | std::vector Phonemizers; 14 | std::vector ENGPhonemizers; 15 | 16 | Phonemizer* LoadPhonemizer(const QString& InPhnLang, int32_t InLangNum); 17 | ESpeakPhonemizer* LoadESpeakPhonemizer(const QString& InVoiceName); 18 | 19 | 20 | 21 | public: 22 | 23 | // Load a voice and return index in vector 24 | size_t LoadVoice(const QString& Voname); 25 | // Find a voice in Voices 26 | // Returns index in Voices vector, if not found returns -1 27 | int FindVoice(const QString& inName, bool autoload = true); 28 | 29 | Voice* operator[](size_t in); 30 | 31 | inline std::vector& GetVoices(){return Voices;} 32 | 33 | void SetDict(const std::vector& InDict); 34 | 35 | 36 | VoiceManager(); 37 | ~VoiceManager(); 38 | }; 39 | 40 | #endif // VOICEMANAGER_H 41 | -------------------------------------------------------------------------------- /voxer.cpp: -------------------------------------------------------------------------------- 1 | #include "voxer.h" 2 | using namespace std::chrono; 3 | #include "r8b/r8bsrc.h" 4 | 5 | float remap(float OldValue, float OldMin, float OldMax, float NewMin, float NewMax ){ 6 | 7 | float NewValue = (((OldValue - OldMin) * (NewMax - NewMin)) / (OldMax - OldMin)) + NewMin; 8 | 9 | return NewValue; 10 | 11 | } 12 | 13 | std::vector Resample(const std::vector& InAudata,uint32_t SrcSampleRate,uint32_t OutSampleRate) 14 | { 15 | if (SrcSampleRate == OutSampleRate) 16 | return InAudata; 17 | 18 | // Define the resampler 19 | 20 | int32_t SampleCount = (int32_t)InAudata.size(); 21 | 22 | 23 | // 2.5 is a good middle-ground number for this parameter whose name I just forgot 24 | CR8BResampler Resampler = r8b_create((double)SrcSampleRate,(double)OutSampleRate,SampleCount,2.5,ER8BResamplerRes::r8brr24); 25 | 26 | double* OutBuff = nullptr; 27 | 28 | std::vector DBuff; 29 | DBuff.resize(InAudata.size()); 30 | 31 | // Cast input buffer to double 32 | for (size_t i = 0; i < InAudata.size();i++) 33 | DBuff[i] = (double)InAudata[i]; 34 | 35 | int32_t NumSamples = r8b_process(Resampler,DBuff.data(),SampleCount,OutBuff); 36 | 37 | // Create output buffer 38 | std::vector OutAud; 39 | OutAud.resize((size_t)NumSamples); 40 | 41 | 42 | // Re-cast to float 43 | for (size_t i = 0; i < (size_t)NumSamples;i++) 44 | OutAud[i] = (float)OutBuff[i]; 45 | 46 | 47 | // Cleanup 48 | r8b_clear(Resampler); 49 | r8b_delete(Resampler); 50 | 51 | 52 | return OutAud; 53 | 54 | 55 | 56 | 57 | 58 | } 59 | 60 | std::vector DoDenoise(const std::vector& InAudata,DenoiseState* DenState) 61 | { 62 | // if (!DenState) 63 | // return InAudata; 64 | 65 | std::vector NewAudata(InAudata.size()); 66 | float buf[RNNoiseFrameSize]; 67 | 68 | // Find the min and max vals in the vector 69 | float MinVal = -1.f; 70 | float MaxVal = 1.f; 71 | 72 | for (size_t f = 0; f < InAudata.size();f += RNNoiseFrameSize) 73 | { 74 | //RNNoise expects a float in range [-32768.f,32768.f] 75 | for (size_t y = 0; y < RNNoiseFrameSize;y++) 76 | { 77 | size_t TotalIndex = f + y; 78 | 79 | if (TotalIndex > InAudata.size()) 80 | break; 81 | 82 | buf[y] = remap(InAudata[TotalIndex],MinVal,MaxVal,-32768.f,32768.f); 83 | 84 | } 85 | 86 | 87 | rnnoise_process_frame(DenState,buf,buf); 88 | 89 | for (size_t x = 0; x < RNNoiseFrameSize;x++) 90 | { 91 | size_t TotalIndex = f + x; 92 | if (TotalIndex > NewAudata.size()) 93 | break; 94 | 95 | NewAudata[TotalIndex] = remap(buf[x],-32768.f,32768.f,-1.f,1.f); 96 | 97 | } 98 | 99 | 100 | 101 | } 102 | 103 | 104 | 105 | 106 | // Due to post-normalization, the audio is about 2.1x louder. Apply makeup deamplification 107 | // for (float& f : NewAudata) 108 | // f *= 0.4f; 109 | 110 | return NewAudata; 111 | } 112 | 113 | void Voxer::run() 114 | { 115 | 116 | 117 | 118 | 119 | 120 | pAttItem->setBackgroundColor(InProcessColor); 121 | 122 | 123 | high_resolution_clock::time_point Start = high_resolution_clock::now(); 124 | std::vector Audat; 125 | 126 | VoxResults Res; 127 | 128 | if (!ForcedAudio.size()) 129 | { 130 | Res = pAttVoice->Vocalize(Prompt.toStdString(),Speed,SpeakerID,Energy,F0,EmotionID,EmotionOverride.toStdString()); 131 | Audat = Res.Audio; 132 | 133 | } 134 | else 135 | { 136 | Audat = ForcedAudio; 137 | 138 | } 139 | 140 | 141 | high_resolution_clock::time_point End = high_resolution_clock::now(); 142 | 143 | 144 | // Resample the audio to 48KHz 145 | std::vector AudRes = Resample(Audat,SampleRate,CommonSampleRate); 146 | 147 | 148 | 149 | DenoiseState* Denoiser = nullptr; 150 | if (Denoise) 151 | { 152 | // Every thread creates its own denoiser. 153 | // This is because a generic passed denoiser created from the main window 154 | // worked well for the first generation but later shat itself (heavy artifacts then just silence) 155 | 156 | Denoiser = rnnoise_create(nullptr); 157 | // Denoise. Function will return same vec if there is no denoiser 158 | AudRes = DoDenoise(AudRes,Denoiser); 159 | 160 | 161 | 162 | 163 | } 164 | 165 | // Apply Amplification 166 | for (float& f : AudRes) 167 | f *= Amplify; 168 | 169 | 170 | 171 | pAttItem->setBackgroundColor(DoneColor); 172 | 173 | 174 | if (ForcedAudio.size()) 175 | { 176 | Res.Mel.Shape.push_back(-1); 177 | // see MakeInferDetails at batchdenoisedlg.cpp 178 | AudRes = Resample(AudRes,CommonSampleRate,SpeakerID); 179 | 180 | 181 | } 182 | 183 | 184 | 185 | 186 | 187 | 188 | if (ExportFileName.size()) 189 | { 190 | VoxUtil::ExportWAV(ExportFileName.toStdString(),AudRes,SpeakerID); 191 | AudRes.clear(); 192 | 193 | CurrentID = UINT32_MAX; 194 | } 195 | emit Done(AudRes,Res.Mel,duration_cast>(End - Start),CurrentID); 196 | 197 | 198 | 199 | 200 | if (Res.Alignment.Data.size() > 0) 201 | emit AttentionReady(Res.Alignment,CurrentID); 202 | 203 | // rnnoise_destroy throws some exception we can't do anything about 204 | if (Denoise) 205 | { 206 | try { 207 | rnnoise_destroy(Denoiser); 208 | 209 | } catch (...) { 210 | 211 | } 212 | 213 | } 214 | 215 | } 216 | 217 | Voxer::Voxer() 218 | { 219 | 220 | } 221 | -------------------------------------------------------------------------------- /voxer.h: -------------------------------------------------------------------------------- 1 | #ifndef VOXER_H 2 | #define VOXER_H 3 | 4 | #include "Voice.h" 5 | #include 6 | 7 | #include 8 | #include 9 | #include "rnnoise.h" 10 | 11 | const QColor DoneColor = QColor(0,128,0); 12 | const QColor PlayingColor = QColor(168, 40, 94); 13 | const QColor InProcessColor = QColor(0,0,255); 14 | 15 | // A Voxer is a thread spawned for the sole purpose of doing inference 16 | class Voxer : public QThread 17 | { 18 | Q_OBJECT 19 | 20 | void run() override; 21 | public: 22 | 23 | Voice* pAttVoice; 24 | QListWidgetItem* pAttItem; 25 | QString Prompt; 26 | float Speed; 27 | float Energy; 28 | float F0; 29 | int32_t SpeakerID; 30 | uint32_t SampleRate; 31 | int32_t EmotionID; 32 | bool Denoise; 33 | QString EmotionOverride; 34 | 35 | // DANGER: If this is set, the item will not emit anything 36 | QString ExportFileName; 37 | 38 | float Amplify; 39 | Voxer(); 40 | 41 | uint32_t CurrentID; 42 | 43 | std::vector ForcedAudio; 44 | 45 | 46 | 47 | signals: 48 | void Done(std::vector AudioData,TFTensor Mel,std::chrono::duration infer_span,uint32_t ID); 49 | void AttentionReady(TFTensor Att,uint32_t ID); 50 | 51 | }; 52 | 53 | #endif // VOXER_H 54 | -------------------------------------------------------------------------------- /winicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/winicon.ico --------------------------------------------------------------------------------