├── .gitignore
├── EnglishPhoneticProcessor.cpp
├── EnglishPhoneticProcessor.h
├── FastSpeech2.cpp
├── FastSpeech2.h
├── LICENSE.md
├── MultiBandMelGAN.cpp
├── MultiBandMelGAN.h
├── README.md
├── TensorVox.pro
├── TextTokenizer.cpp
├── TextTokenizer.h
├── Voice.cpp
├── Voice.h
├── VoxCommon.cpp
├── VoxCommon.hpp
├── attention.cpp
├── attention.h
├── batchdenoisedlg.cpp
├── batchdenoisedlg.h
├── batchdenoisedlg.ui
├── espeakphonemizer.cpp
├── espeakphonemizer.h
├── ext
    ├── AudioFile.hpp
    ├── ByteArr.cpp
    ├── ByteArr.h
    ├── CppFlow
    │   ├── context.h
    │   ├── cppflow.h
    │   ├── datatype.h
    │   ├── defer.h
    │   ├── model.h
    │   ├── ops.h
    │   ├── raw_ops.h
    │   └── tensor.h
    ├── Qt-Frameless-Window-DarkStyle-master
    │   ├── .gitignore
    │   ├── DarkStyle.cpp
    │   ├── DarkStyle.h
    │   ├── README.md
    │   ├── darkstyle.qrc
    │   ├── darkstyle
    │   │   ├── darkstyle.qss
    │   │   ├── icon_branch_closed.png
    │   │   ├── icon_branch_end.png
    │   │   ├── icon_branch_more.png
    │   │   ├── icon_branch_open.png
    │   │   ├── icon_checkbox_checked.png
    │   │   ├── icon_checkbox_checked_disabled.png
    │   │   ├── icon_checkbox_checked_pressed.png
    │   │   ├── icon_checkbox_indeterminate.png
    │   │   ├── icon_checkbox_indeterminate_disabled.png
    │   │   ├── icon_checkbox_indeterminate_pressed.png
    │   │   ├── icon_checkbox_unchecked.png
    │   │   ├── icon_checkbox_unchecked_disabled.png
    │   │   ├── icon_checkbox_unchecked_pressed.png
    │   │   ├── icon_close.png
    │   │   ├── icon_radiobutton_checked.png
    │   │   ├── icon_radiobutton_checked_disabled.png
    │   │   ├── icon_radiobutton_checked_pressed.png
    │   │   ├── icon_radiobutton_unchecked.png
    │   │   ├── icon_radiobutton_unchecked_disabled.png
    │   │   ├── icon_radiobutton_unchecked_pressed.png
    │   │   ├── icon_restore.png
    │   │   ├── icon_sepvline.png
    │   │   ├── icon_tbclose.png
    │   │   ├── icon_tbclose_hover.png
    │   │   ├── icon_undock.png
    │   │   └── icon_vline.png
    │   ├── frameless_window_dark.pro
    │   ├── framelesswindow.qrc
    │   ├── framelesswindow
    │   │   ├── framelesswindow.cpp
    │   │   ├── framelesswindow.h
    │   │   ├── framelesswindow.ui
    │   │   ├── windowdragger.cpp
    │   │   └── windowdragger.h
    │   ├── images
    │   │   ├── icon_window_close.png
    │   │   ├── icon_window_maximize.png
    │   │   ├── icon_window_minimize.png
    │   │   └── icon_window_restore.png
    │   ├── screenshot_mac_frameless_window_qt_dark_style_disabled.png
    │   ├── screenshot_mac_frameless_window_qt_dark_style_enabled.png
    │   └── screenshot_win7_frameless_window_qt_dark_style_enabled.png
    ├── ZCharScanner.cpp
    ├── ZCharScanner.h
    ├── ZFile.cpp
    ├── ZFile.h
    ├── json.hpp
    ├── qcustomplot.cpp
    └── qcustomplot.h
├── g2p_train
    ├── README.md
    ├── config
    │   ├── default.yaml
    │   └── longer.yaml
    ├── models
    │   └── English.zip
    └── train_and_export.py
├── istftnettorch.cpp
├── istftnettorch.h
├── main.cpp
├── mainwindow.cpp
├── mainwindow.h
├── mainwindow.ui
├── melgen.cpp
├── melgen.h
├── modelinfodlg.cpp
├── modelinfodlg.h
├── modelinfodlg.ui
├── phddialog.cpp
├── phddialog.h
├── phddialog.ui
├── phonemizer.cpp
├── phonemizer.h
├── phoneticdict.cpp
├── phoneticdict.h
├── phonetichighlighter.cpp
├── phonetichighlighter.h
├── res
    ├── clear64.png
    ├── infico.png
    ├── multiwav.png
    ├── noim.png
    ├── phoneticdico.png
    ├── random64.png
    ├── refresh.png
    ├── speak64.png
    ├── stdico.png
    └── wav.png
├── spectrogram.cpp
├── spectrogram.h
├── stdres.qrc
├── tacotron2.cpp
├── tacotron2.h
├── tacotron2torch.cpp
├── tacotron2torch.h
├── tfg2p.cpp
├── tfg2p.h
├── torchmoji.cpp
├── torchmoji.h
├── track.cpp
├── track.h
├── vits.cpp
├── vits.h
├── voicemanager.cpp
├── voicemanager.h
├── voxer.cpp
├── voxer.h
└── winicon.ico


/.gitignore:
--------------------------------------------------------------------------------
 1 | # This file is used to ignore files which are generated
 2 | # ----------------------------------------------------------------------------
 3 | deps/*
 4 | rdeployed/*
 5 | brelease/*
 6 | bdebug/*
 7 | *.wav
 8 | *~
 9 | *.autosave
10 | *.a
11 | *.core
12 | *.moc
13 | *.o
14 | *.obj
15 | *.orig
16 | *.rej
17 | *.so
18 | *.so.*
19 | *_pch.h.cpp
20 | *_resource.rc
21 | *.qm
22 | .#*
23 | *.*#
24 | core
25 | !core/
26 | tags
27 | .DS_Store
28 | .directory
29 | *.debug
30 | Makefile*
31 | *.prl
32 | *.app
33 | moc_*.cpp
34 | ui_*.h
35 | qrc_*.cpp
36 | Thumbs.db
37 | *.res
38 | *.rc
39 | *.qmake.cache
40 | *.qmake.stash
41 | release/*
42 | 
43 | # qtcreator generated files
44 | *.pro.user*
45 | 
46 | # xemacs temporary files
47 | *.flc
48 | 
49 | # Vim temporary files
50 | .*.swp
51 | 
52 | # Visual Studio generated files
53 | *.ib_pdb_index
54 | *.idb
55 | *.ilk
56 | *.pdb
57 | *.sln
58 | *.suo
59 | *.vcproj
60 | *vcproj.*.*.user
61 | *.ncb
62 | *.sdf
63 | *.opensdf
64 | *.vcxproj
65 | *vcxproj.*
66 | 
67 | # MinGW generated files
68 | *.Debug
69 | *.Release
70 | 
71 | # Python byte code
72 | *.pyc
73 | 
74 | # Binaries
75 | # --------
76 | *.dll
77 | *.exe
78 | 
79 | deps.zip
80 | README.md.backup
81 | 


--------------------------------------------------------------------------------
/EnglishPhoneticProcessor.cpp:
--------------------------------------------------------------------------------
  1 | #include "EnglishPhoneticProcessor.h"
  2 | #include "VoxCommon.hpp"
  3 | 
  4 | using namespace std;
  5 | 
  6 | bool EnglishPhoneticProcessor::Initialize(Phonemizer* InPhn, ESpeakPhonemizer *InENGPh)
  7 | {
  8 | 
  9 | 
 10 |     Phoner = InPhn;
 11 |     Tokenizer.SetAllowedChars(Phoner->GetGraphemeChars());
 12 |     ENG_Phonemizer = InENGPh;
 13 | 
 14 | 
 15 | 
 16 | 
 17 | 	return true;
 18 | }
 19 | 
 20 | 
 21 | std::string EnglishPhoneticProcessor::ProcessTextPhonetic(const std::string& InText, const std::vector<u32string> &InPhonemes, const std::vector<DictEntry>& InDict, ETTSLanguageType::Enum InLanguageType, bool IsTac)
 22 | {
 23 |     if (!Phoner)
 24 | 		return "ERROR";
 25 | 
 26 | 
 27 | 
 28 |     vector<string> Words = Tokenizer.Tokenize(InText,IsTac);
 29 | 
 30 | 	string Assemble = "";
 31 | 
 32 | 
 33 |     if (InLanguageType == ETTSLanguageType::Char)
 34 |     {
 35 |         for (size_t w = 0; w < Words.size();w++)
 36 |         {
 37 |             Assemble.append(Words[w]);
 38 | 
 39 |             if (w > 0)
 40 |                 Assemble.append(" ");
 41 | 
 42 |         }
 43 | 
 44 |         if (Assemble[Assemble.size() - 1] == ' ')
 45 |             Assemble.pop_back();
 46 | 
 47 |         return Assemble;
 48 | 
 49 | 
 50 | 
 51 |     }
 52 | 
 53 |     // Make a copy of the dict passed.
 54 |     std::vector<DictEntry> CurrentDict = InDict;
 55 | 
 56 | 
 57 | 	for (size_t w = 0; w < Words.size();w++) 
 58 | 	{
 59 | 		const string& Word = Words[w];
 60 | 
 61 | 
 62 |         if (Word.size() > 22)
 63 |             continue;
 64 | 
 65 | 
 66 |         // Double email symbol indicates Tacotron punctuation handling
 67 |         if (Word.find("@@") != std::string::npos)
 68 |         {
 69 |             std::string AddPonct = Word.substr(2); // Remove the @@
 70 |             Assemble.append(" ");
 71 |             Assemble.append(AddPonct);
 72 |             Assemble.append(" ");
 73 | 
 74 |             continue;
 75 | 
 76 | 
 77 |         }
 78 | 
 79 |         if (Word.find("@") != std::string::npos){
 80 |             std::u32string AddPh = VoxUtil::StrToU32(Word.substr(1)); // Remove the @
 81 |             size_t OutId = 0;
 82 |             if (VoxUtil::FindInVec(AddPh,InPhonemes,OutId))
 83 |             {
 84 |                 Assemble.append(VoxUtil::U32ToStr(InPhonemes[OutId]));
 85 |                 Assemble.append(" ");
 86 | 
 87 | 
 88 |             }
 89 | 
 90 |             continue;
 91 | 
 92 |         }
 93 | 
 94 | 
 95 | 
 96 | 
 97 |         size_t OverrideIdx = 0;
 98 |         if (!ENG_Phonemizer && VoxUtil::FindInVec2<std::string,DictEntry>(Word,InDict,OverrideIdx))
 99 |         {
100 |              Assemble.append(InDict[OverrideIdx].PhSpelling);
101 |              Assemble.append(" ");
102 |              continue;
103 |         }
104 | 
105 | 
106 | 
107 |         std::string Res = Word;
108 |         if (!ENG_Phonemizer){
109 |             Res = Phoner->ProcessWord(Word,0.001f);
110 |             CurrentDict.push_back({Word,Res,""});
111 |         }
112 | 
113 | 
114 |         // Cache the word in the override dict so next time we don't have to research it
115 | 
116 | 
117 |         Assemble.append(Res);
118 |         Assemble.append(" ");
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 	}
125 | 	
126 | 
127 |     // eSpeak phonemizer takes in whole thing
128 |     if (ENG_Phonemizer){
129 | 
130 |         Assemble = ENG_Phonemizer->Phonemize(Assemble);
131 |     }
132 | 
133 | 
134 |     // Delete last space if there is
135 | 	if (Assemble[Assemble.size() - 1] == ' ')
136 | 		Assemble.pop_back();
137 | 
138 | 
139 | 
140 | 
141 | 	return Assemble;
142 | }
143 | 
144 | EnglishPhoneticProcessor::EnglishPhoneticProcessor()
145 | {
146 |     Phoner = nullptr;
147 |     ENG_Phonemizer = nullptr;
148 | }
149 | 
150 | EnglishPhoneticProcessor::EnglishPhoneticProcessor(Phonemizer *InPhn, ESpeakPhonemizer *InENGPh)
151 | {
152 |     Initialize(InPhn,InENGPh);
153 | 
154 | }
155 | 
156 | 
157 | 
158 | EnglishPhoneticProcessor::~EnglishPhoneticProcessor()
159 | {
160 |     // Causes annoying crash on exit. It's also irrelevant because the OS frees what little memory this had.
161 |     /*
162 |     if (Phoner)
163 |        delete Phoner;
164 | 
165 |    */
166 | }
167 | 


--------------------------------------------------------------------------------
/EnglishPhoneticProcessor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "TextTokenizer.h"
 3 | 
 4 | 
 5 | #include "phoneticdict.h"
 6 | #include "phonemizer.h"
 7 | #include "espeakphonemizer.h"
 8 | 
 9 | class EnglishPhoneticProcessor
10 | {
11 | private:
12 | 	TextTokenizer Tokenizer;
13 |     Phonemizer* Phoner;
14 | 
15 |     ESpeakPhonemizer* ENG_Phonemizer;
16 | 
17 | 	inline bool FileExists(const std::string& name) {
18 |         std::ifstream f(name.c_str());
19 | 		return f.good();
20 | 	}
21 | 
22 | public:
23 |     bool Initialize(Phonemizer *InPhn,ESpeakPhonemizer* InENGPh = nullptr);
24 |     std::string ProcessTextPhonetic(const std::string& InText, const std::vector<std::u32string> &InPhonemes, const std::vector<DictEntry>& InDict, ETTSLanguageType::Enum InLanguageType, bool IsTac);
25 | 	EnglishPhoneticProcessor();
26 |     EnglishPhoneticProcessor(Phonemizer *InPhn,ESpeakPhonemizer* InENGPh = nullptr);
27 | 	~EnglishPhoneticProcessor();
28 | 
29 |     inline TextTokenizer& GetTokenizer() {return Tokenizer;}
30 | };
31 | 
32 | 


--------------------------------------------------------------------------------
/FastSpeech2.cpp:
--------------------------------------------------------------------------------
 1 | #include "FastSpeech2.h"
 2 | 
 3 | 
 4 | 
 5 | FastSpeech2::FastSpeech2()
 6 | {
 7 | }
 8 | 
 9 | 
10 | TFTensor<float> FastSpeech2::DoInference(const std::vector<int32_t>& InputIDs,const std::vector<float>& ArgsFloat,const std::vector<int32_t> ArgsInt, int32_t SpeakerID , int32_t EmotionID)
11 | {
12 |     if (!CurrentMdl)
13 |         throw std::exception("Tried to do inference on unloaded or invalid model!");
14 | 
15 |     // Convenience reference so that we don't have to constantly derefer pointers.
16 |     cppflow::model& Mdl = *CurrentMdl;
17 | 
18 |     // This is the shape of the input IDs, our equivalent to tf.expand_dims.
19 | 
20 |     std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };
21 | 
22 |     // Define the tensors
23 |     cppflow::tensor input_ids{InputIDs, InputIDShape };
24 |     cppflow::tensor energy_ratios{ ArgsFloat[1] };
25 |     cppflow::tensor f0_ratios{ArgsFloat[2]};
26 |     cppflow::tensor speaker_ids{ SpeakerID };
27 |     cppflow::tensor speed_ratios{ ArgsFloat[0] };
28 |     cppflow::tensor* emotion_ids = nullptr;
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 |     // Vector of input tensors
36 |     TensorVec Inputs = {{"serving_default_input_ids:0",input_ids},
37 |                         {"serving_default_speaker_ids:0",speaker_ids},
38 |                         {"serving_default_energy_ratios:0",energy_ratios},
39 |                         {"serving_default_f0_ratios:0",f0_ratios},
40 |                         {"serving_default_speed_ratios:0",speed_ratios}};
41 | 
42 |     // This is a multi-emotion model
43 |     if (EmotionID != -1)
44 |     {
45 |         emotion_ids = new cppflow::tensor{EmotionID};
46 |         Inputs.push_back({"serving_default_emotion_ids:0",*emotion_ids});
47 | 
48 | 
49 |     }
50 | 
51 | 
52 | 
53 | 
54 | 
55 |     // Do inference
56 |     // If we don't extract every single output it crashes.
57 |     auto Outputs = Mdl(Inputs,{"StatefulPartitionedCall:0","StatefulPartitionedCall:1","StatefulPartitionedCall:2","StatefulPartitionedCall:3","StatefulPartitionedCall:4"});
58 | 
59 |     // Define output and return it
60 |     TFTensor<float> Output = VoxUtil::CopyTensor<float>(Outputs[1]);
61 | 
62 |     // We allocated the emotion_ids cppflow::tensor dynamically, delete it
63 |     if (emotion_ids)
64 |         delete emotion_ids;
65 | 
66 |     // We could just straight out define it in the return statement, but I like it more this way
67 | 
68 |     return Output;
69 | }
70 | 
71 | FastSpeech2::~FastSpeech2()
72 | {
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/FastSpeech2.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "melgen.h"
 4 | 
 5 | 
 6 | class FastSpeech2 : public MelGen
 7 | {
 8 | 
 9 | public:
10 | 	FastSpeech2();
11 | 
12 | 
13 | 
14 | 	/*
15 | 	Do inference on a FastSpeech2 model.
16 | 
17 | 	-> InputIDs: Input IDs of tokens for inference
18 | 	-> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model.
19 |     -> (In ArgsFloat)Speed, Energy, F0: Parameters for FS2 inference. Leave at 1.f for defaults
20 | 
21 | 	<- Returns: TFTensor<float> with shape {1,<len of mel in frames>,80} containing contents of mel spectrogram. 
22 | 	*/
23 |     TFTensor<float> DoInference(const std::vector<int32_t>& InputIDs,const std::vector<float>& ArgsFloat,const std::vector<int32_t> ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1);
24 | 
25 | 
26 | 
27 | 	~FastSpeech2();
28 | };
29 | 
30 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 ZDisket
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MultiBandMelGAN.cpp:
--------------------------------------------------------------------------------
 1 | #include "MultiBandMelGAN.h"
 2 | #define IF_EXCEPT(cond,ex) if (cond){throw std::exception(ex);}
 3 | 
 4 | 
 5 | 
 6 | bool MultiBandMelGAN::Initialize(const std::string & VocoderPath)
 7 | {
 8 | 	try {
 9 |         MelGAN = std::make_unique<cppflow::model>(VocoderPath);
10 | 	}
11 | 	catch (...) {
12 | 		return false;
13 | 
14 | 	}
15 | 	return true;
16 | 
17 | 
18 | }
19 | 
20 | TFTensor<float> MultiBandMelGAN::DoInference(const TFTensor<float>& InMel)
21 | {
22 |     IF_EXCEPT(!MelGAN, "Tried to infer MB-MelGAN on uninitialized model!!!!")
23 | 
24 |     // Convenience reference so that we don't have to constantly derefer pointers.
25 |     cppflow::model& Mdl = *MelGAN;
26 | 
27 | 
28 |     cppflow::tensor input_mels{ InMel.Data, InMel.Shape};
29 | 
30 | 
31 |     auto out_audio = Mdl({{"serving_default_mels:0",input_mels}}, {"StatefulPartitionedCall:0"})[0];
32 |     TFTensor<float> RetTensor = VoxUtil::CopyTensor<float>(out_audio);
33 | 
34 |     return RetTensor;
35 | 
36 | 
37 | 
38 | 
39 | 
40 | }
41 | 
42 | MultiBandMelGAN::MultiBandMelGAN()
43 | {
44 | 	MelGAN = nullptr;
45 | }
46 | 
47 | 
48 | MultiBandMelGAN::~MultiBandMelGAN()
49 | {
50 | 
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/MultiBandMelGAN.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "VoxCommon.hpp"
 4 | #include <memory>
 5 | class MultiBandMelGAN
 6 | {
 7 | private:
 8 |     std::unique_ptr<cppflow::model> MelGAN;
 9 | 
10 | 
11 | public:
12 |     virtual bool Initialize(const std::string& VocoderPath);
13 | 
14 | 
15 | 	// Do MultiBand MelGAN inference including PQMF
16 | 	// -> InMel:  Mel spectrogram (shape [1, xx, 80])
17 | 	// <- Returns: Tensor data [4, xx, 1]
18 |     virtual TFTensor<float> DoInference(const TFTensor<float>& InMel);
19 | 
20 | 	MultiBandMelGAN();
21 | 	~MultiBandMelGAN();
22 | };
23 | 
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TensorVox
 2 | 
 3 | [![](https://dcbadge.vercel.app/api/server/yqFDAWH)](https://discord.gg/yqFDAWH)
 4 | 
 5 | TensorVox is an application designed to enable user-friendly and lightweight neural speech synthesis in the desktop, aimed at increasing accessibility to such technology. 
 6 | 
 7 | Powered mainly by [TensorFlowTTS](https://github.com/TensorSpeech/TensorFlowTTS) and also by [Coqui-TTS](https://github.com/coqui-ai/TTS) and [VITS](https://github.com/jaywalnut310/vits), it is written in pure C++/Qt, using the Tensorflow C API for interacting with Tensorflow models (first two), and LibTorch for PyTorch ones. This way, we can perform inference without having to install gigabytes worth of Python libraries, just a few DLLs.
 8 | 
 9 | ![Interface with Tac2 model loaded](https://i.imgur.com/wtPzzNh.png)
10 | 
11 | 
12 | ### Try it out
13 | 
14 | [Detailed guide in Google Docs](https://docs.google.com/document/d/1OS1kfb19bvpPPkF71Vbak_b735mi7epjUanIfPG671M/edit?usp=sharing)
15 | 
16 | Grab a copy from the releases, extract the .zip and check [the Google Drive folder](https://drive.google.com/drive/folders/1atUyxBbstKZpMqQEZMdNmRF2AKrlahKy?usp=sharing) for models and installation instructions
17 | 
18 | If you're interested in using your own model, first you need to train then export it. 
19 | 
20 | 
21 | ## Supported architectures
22 | 
23 | TensorVox supports models from three repos:
24 | 
25 |  - **TensorFlowTTS**: FastSpeech2, Tacotron2, both char and phoneme based and Multi-Band MelGAN. Here's a Colab notebook demonstrating how to export the LJSpeech pretrained, char-based Tacotron2 model: [<img src="https://colab.research.google.com/assets/colab-badge.svg">](https://colab.research.google.com/drive/1KLqZ1rkD4Enw7zpTgXGL6if7e5s0UeWa?usp=sharing) 
26 |  - **Coqui-TTS:** Tacotron2 (phoneme-based IPA) and Multi-Band MelGAN, after converting from PyTorch to Tensorflow. Here's a notebook showing how to export the LJSpeech DDC model: [<img src="https://colab.research.google.com/assets/colab-badge.svg">](https://colab.research.google.com/drive/15CdGEAu_-KezV1XxwzVfQiFSm0tveBkC?usp=sharing)
27 |  - **jaywalnut310/VITS:** VITS, which is a fully E2E model. (Stressed IPA as phonemes) Export notebook: [<img src="https://colab.research.google.com/assets/colab-badge.svg">](https://colab.research.google.com/drive/1BSGE5DQYweXBWrwPOmb6CRPUU8H5mBvb?usp=sharing)
28 | 
29 | Those two examples should provide you with enough guidance to understand what is needed. If you're looking to train a model specifically for this purpose then I recommend TensorFlowTTS, as it is the one with the best support, and VITS, as it's the closest thing to perfect
30 | As for languages, out-of-the-box support is provided for English (Coqui and TFTTS, VITS), German and Spanish (only TensorFlowTTS); that is, you won't have to do anything. You can add languages without modifying code, as long as the phoneme set are IPA (stressed or nonstressed), ARPA, or GlobalPhone, (open an issue and I'll explain it to you)
31 | 
32 | 
33 | ## Build instructions
34 | Currently, only Windows 10 x64 (although I've heard reports of it running on 8.1) is supported.
35 | 
36 | **Requirements:**
37 |  1. Qt Creator
38 |  2. MSVC 2017 (v141) compiler
39 | 
40 | **Primed build (with all provided libraries):**
41 | 
42 |  1. Download [precompiled binary dependencies and includes](https://drive.google.com/file/d/1N6IxSpsgemS94z_v82toXhiNs2tLXkz6/view?usp=sharing)
43 |  2. Unzip it so that the `deps` folder is in the same place as the .pro and main source files.
44 |  3. Open the project with Qt Creator, add your compiler and compile
45 | 
46 | Note that to try your shiny new executable you'll need to download a release of program as described above and replace the executable in that release with your new one, so you have all the DLLs in place.
47 | 
48 | TODO: Add instructions for compile from scratch.
49 | 
50 | ## Externals (and thanks)
51 | 
52 |  - **LibTorch**: https://pytorch.org/cppdocs/installing.html
53 | 
54 |  - **Tensorflow C API**: [https://www.tensorflow.org/install/lang_c](https://www.tensorflow.org/install/lang_c)
55 |  - **CppFlow** (TF C API -> C++ wrapper): [https://github.com/serizba/cppflow](https://github.com/serizba/cppflow) 
56 |  - **AudioFile** (for WAV export): [https://github.com/adamstark/AudioFile](https://github.com/adamstark/AudioFile)
57 |  - **Frameless Dark Style Window**: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle
58 |  - **JSON for modern C++**: https://github.com/nlohmann/json
59 |  - **r8brain-free-src** (Resampling): https://github.com/avaneev/r8brain-free-src
60 |  - **rnnoise** (CMake version, denoising output): https://github.com/almogh52/rnnoise-cmake
61 |  - **Logitech LED Illumination SDK** (Mouse RGB integration): https://www.logitechg.com/en-us/innovation/developer-lab.html
62 |  - **QCustomPlot** : https://www.qcustomplot.com/index.php/introduction
63 |  - **libnumbertext** : https://github.com/Numbertext/libnumbertext
64 | 
65 | 
66 | ## Contact
67 | You can open an issue here or join the [Discord server](https://discord.gg/yqFDAWH) and discuss/ask anything there
68 | 
69 | For media/licensing/any other formal stuff inquiries, send to this email: 9yba9c1y@anonaddy.me
70 | 
71 | ## Note about licensing
72 | 
73 | This program itself is MIT licensed, but for the models you use, their license terms apply. For example, if you're in Vietnam and using TensorFlowTTS models, you'll have to check [here](https://github.com/TensorSpeech/TensorFlowTTS#license) for some details
74 | 


--------------------------------------------------------------------------------
/TensorVox.pro:
--------------------------------------------------------------------------------
  1 | QT += core gui
  2 | QT += multimedia
  3 | QT += winextras
  4 | 
  5 | greaterThan(QT_MAJOR_VERSION, 4): QT += widgets printsupport
  6 | 
  7 | CONFIG += c++17
  8 | 
  9 | # The following define makes your compiler emit warnings if you use
 10 | # any Qt feature that has been marked deprecated (the exact warnings
 11 | # depend on your compiler). Please consult the documentation of the
 12 | # deprecated API in order to know how to port your code away from it.
 13 | DEFINES += QT_DEPRECATED_WARNINGS _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
 14 | # You can also make your code fail to compile if it uses deprecated APIs.
 15 | # In order to do so, uncomment the following line.
 16 | # You can also select to disable deprecated APIs only up to a certain version of Qt.
 17 | #DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
 18 | 
 19 | SOURCES += \
 20 |     EnglishPhoneticProcessor.cpp \
 21 |     FastSpeech2.cpp \
 22 |     MultiBandMelGAN.cpp \
 23 |     TextTokenizer.cpp \
 24 |     Voice.cpp \
 25 |     VoxCommon.cpp \
 26 |     attention.cpp \
 27 |     batchdenoisedlg.cpp \
 28 |     espeakphonemizer.cpp \
 29 |     ext/ByteArr.cpp \
 30 |     ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.cpp \
 31 |     ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.cpp \
 32 |     ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.cpp \
 33 |     ext/ZCharScanner.cpp \
 34 |     ext/ZFile.cpp \
 35 |     ext/qcustomplot.cpp \
 36 |     istftnettorch.cpp \
 37 |     main.cpp \
 38 |     mainwindow.cpp \
 39 |     melgen.cpp \
 40 |     modelinfodlg.cpp \
 41 |     phddialog.cpp \
 42 |     phonemizer.cpp \
 43 |     phoneticdict.cpp \
 44 |     phonetichighlighter.cpp \
 45 |     spectrogram.cpp \
 46 |     tacotron2.cpp \
 47 |     tacotron2torch.cpp \
 48 |     tfg2p.cpp \
 49 |     torchmoji.cpp \
 50 |     track.cpp \
 51 |     vits.cpp \
 52 |     voicemanager.cpp \
 53 |     voxer.cpp
 54 | 
 55 | HEADERS += \
 56 |     EnglishPhoneticProcessor.h \
 57 |     FastSpeech2.h \
 58 |     MultiBandMelGAN.h \
 59 |     TextTokenizer.h \
 60 |     Voice.h \
 61 |     VoxCommon.hpp \
 62 |     attention.h \
 63 |     batchdenoisedlg.h \
 64 |     espeakphonemizer.h \
 65 |     ext/AudioFile.hpp \
 66 |     ext/ByteArr.h \
 67 |     ext/CppFlow/context.h \
 68 |     ext/CppFlow/cppflow.h \
 69 |     ext/CppFlow/datatype.h \
 70 |     ext/CppFlow/defer.h \
 71 |     ext/CppFlow/model.h \
 72 |     ext/CppFlow/ops.h \
 73 |     ext/CppFlow/raw_ops.h \
 74 |     ext/CppFlow/tensor.h \
 75 |     ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.h \
 76 |     ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.h \
 77 |     ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.h \
 78 |     ext/ZCharScanner.h \
 79 |     ext/ZFile.h \
 80 |     ext/json.hpp \
 81 |     ext/qcustomplot.h \
 82 |     istftnettorch.h \
 83 |     mainwindow.h \
 84 |     melgen.h \
 85 |     modelinfodlg.h \
 86 |     phddialog.h \
 87 |     phonemizer.h \
 88 |     phoneticdict.h \
 89 |     phonetichighlighter.h \
 90 |     spectrogram.h \
 91 |     tacotron2.h \
 92 |     tacotron2torch.h \
 93 |     tfg2p.h \
 94 |     torchmoji.h \
 95 |     track.h \
 96 |     vits.h \
 97 |     voicemanager.h \
 98 |     voxer.h
 99 | 
100 | FORMS += \
101 |     batchdenoisedlg.ui \
102 |     ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.ui \
103 |     mainwindow.ui \
104 |     modelinfodlg.ui \
105 |     phddialog.ui
106 | 
107 | # Default rules for deployment.
108 | qnx: target.path = /tmp/$${TARGET}/bin
109 | else: unix:!android: target.path = /opt/$${TARGET}/bin
110 | !isEmpty(target.path): INSTALLS += target
111 | 
112 | 
113 | DEFINES += _CRT_SECURE_NO_WARNINGS
114 | 
115 | INCLUDEPATH += $$PWD/deps/include
116 | INCLUDEPATH += $$PWD/deps/include/libtorch
117 | INCLUDEPATH += $$PWD/ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow
118 | win32: LIBS += -L$$PWD/deps/lib/ tensorflow.lib r8bsrc64.lib rnnoise64.lib LogitechLEDLib.lib LibNumberText64.lib c10.lib torch.lib torch_cpu.lib libespeak-ng.lib
119 | win32: LIBS += Advapi32.lib User32.lib Psapi.lib
120 | 
121 | 
122 | RESOURCES += \
123 |     ext/Qt-Frameless-Window-DarkStyle-master/darkstyle.qrc \
124 |     ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow.qrc \
125 |     stdres.qrc
126 | 
127 | win32:RC_ICONS += winicon.ico
128 | 
129 | VERSION = 1.2.0.0
130 | CONFIG += force_debug_info
131 | 
132 | QMAKE_CXXFLAGS += /std:c++17 /utf-8 -DPSAPI_VERSION=1
133 | 
134 | DISTFILES += \
135 |     res/defaultim.png
136 | 


--------------------------------------------------------------------------------
/TextTokenizer.cpp:
--------------------------------------------------------------------------------
  1 | #include "TextTokenizer.h"
  2 | #include "ext/ZCharScanner.h"
  3 | #include <algorithm>
  4 | #include <cassert>
  5 | #include <cctype>
  6 | #include <iostream>
  7 | #include <algorithm>
  8 | 
  9 | 
 10 | 
 11 | 
 12 | // Punctuation, this gets auto-converted to SIL
 13 | const std::u32string punctuation_f = U",.-;";
 14 | 
 15 | // For Tacotron2, including question and other marks
 16 | const std::u32string punctuation_tac = U",.;¡!¿?:-";
 17 | 
 18 | 
 19 | const std::u32string digits = U"1234567890";
 20 | 
 21 | using namespace std;
 22 | 
 23 | void TextTokenizer::SetAllowedChars(const std::string &value)
 24 | {
 25 |     AllowedChars = VoxUtil::StrToU32(value);
 26 | }
 27 | 
 28 | vector<string> TextTokenizer::ExpandNumbers(const std::vector<std::string>& SpaceTokens)
 29 | {
 30 | 	vector<string> RetVec;
 31 | 	RetVec.reserve(SpaceTokens.size());
 32 | 
 33 | 	for (auto& Token : SpaceTokens) {
 34 | 		char* p;
 35 |         strtol(Token.c_str(), &p, 10);
 36 | 		if (*p) {
 37 | 			RetVec.push_back(Token);
 38 | 		}
 39 | 		else {
 40 |             std::string ModTk = Token;
 41 |             CuNumber->numbertext(ModTk,NumLang);
 42 | 
 43 |             std::replace(ModTk.begin(),ModTk.end(),'-',' ');
 44 | 
 45 |             // If the number has spaces we must sep again and add one by one otherwise all the words are merged together due to the
 46 |             // nature of it
 47 |             ZStringDelimiter DelSp(ModTk);
 48 |             DelSp.AddDelimiter(" ");
 49 | 
 50 |             if (DelSp.szTokens())
 51 |             {
 52 |                 for (const auto& Ttk : DelSp.GetTokens())
 53 |                     RetVec.push_back(Ttk);
 54 | 
 55 |             }else{
 56 |                 RetVec.push_back(ModTk);
 57 |             }
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 		}
 64 | 	}
 65 | 
 66 | 	return RetVec;
 67 | 
 68 | }
 69 | 
 70 | string TextTokenizer::SpaceChars(const string &InStr)
 71 | {
 72 |     std::u32string AsmStr = U"";
 73 |     std::u32string Stry = VoxUtil::StrToU32(InStr);
 74 | 
 75 |     bool InNumChain = false;
 76 |     bool InPhn = false;
 77 | 
 78 |     for (size_t i = 0; i < Stry.size();i++)
 79 |     {
 80 |         auto uChar = Stry[i];
 81 | 
 82 |         if (uChar == U'@')
 83 |             InPhn = true;
 84 | 
 85 |         if (uChar == U' ')
 86 |             InPhn = false;
 87 | 
 88 | 
 89 |         if (InPhn)
 90 |         {
 91 |             AsmStr += uChar;
 92 |             continue;
 93 | 
 94 |         }
 95 | 
 96 | 
 97 |         if (digits.find(uChar) != std::u32string::npos && !InNumChain)
 98 |         {
 99 |             AsmStr += U" ";
100 |             AsmStr += uChar;
101 |             InNumChain = true;
102 |             continue;
103 |         }
104 | 
105 |         if (digits.find(uChar) == std::u32string::npos && InNumChain )
106 |         {
107 |             AsmStr += U" ";
108 |             AsmStr += uChar;
109 | 
110 |             InNumChain = false;
111 |             continue;
112 | 
113 |         }
114 | 
115 |         AsmStr += uChar;
116 | 
117 | 
118 | 
119 |     }
120 | 
121 |     return VoxUtil::U32ToStr(AsmStr);
122 | 
123 | 
124 | }
125 | 
126 | TextTokenizer::TextTokenizer()
127 | {
128 | }
129 | 
130 | TextTokenizer::~TextTokenizer()
131 | {
132 | }
133 | 
134 | void TextTokenizer::SetNumberText(Numbertext &INum, const string &Lang)
135 | {
136 |     CuNumber = &INum;
137 |     NumLang = Lang;
138 | 
139 | }
140 | 
141 | 
142 | 
143 | vector<string> TextTokenizer::Tokenize(const std::string & InTxt,bool IsTacotron, bool IsTorchMoji)
144 | {
145 | 	vector<string> ProcessedTokens;
146 | 
147 | 
148 | 
149 |     std::string TxtPreProc = SpaceChars(InTxt);
150 | 
151 |     ZStringDelimiter Delim(TxtPreProc);
152 | 	Delim.AddDelimiter(" ");
153 | 
154 |     vector<string> DelimitedTokens = Delim.GetTokens();
155 | 
156 | 
157 | 
158 | 	// Single word handler
159 |     if (!Delim.szTokens())
160 |         DelimitedTokens.push_back(TxtPreProc);
161 | 
162 |     DelimitedTokens = ExpandNumbers(DelimitedTokens);
163 | 
164 |     std::u32string punctuation = punctuation_f;
165 | 
166 |     if (IsTacotron)
167 |         punctuation = punctuation_tac;
168 | 
169 | 
170 | 
171 | 
172 | 	// We know that the new vector is going to be at least this size so we reserve
173 | 	ProcessedTokens.reserve(DelimitedTokens.size());
174 | 
175 | 	/*
176 | 	In this step we go through the string and only allow qualified character to pass through.
177 | 	*/
178 |     for (size_t TokCtr = 0; TokCtr < DelimitedTokens.size();TokCtr++)
179 |     {
180 |         // We are now using U32string because it's guaranteed to be 1 character = 1 element
181 |         const auto& tok = VoxUtil::StrToU32(DelimitedTokens[TokCtr]);
182 |         std::u32string AppTok = U"";
183 | 
184 | 
185 |         if (tok.find(U"@") != string::npos)
186 |         {
187 | 
188 |             ProcessedTokens.push_back(VoxUtil::U32ToStr(tok));
189 |             continue;
190 | 
191 |         }
192 | 
193 | 		for (size_t s = 0;s < tok.size();s++)
194 | 		{
195 | 
196 | 
197 |             if (AllowedChars.find(tok[s]) != std::u32string::npos)
198 |                 AppTok += tok[s];
199 | 
200 | 
201 | 			// Punctuation handler
202 |             // This time we explicitly add a token to the vector
203 |             if (punctuation.find(tok[s]) != std::u32string::npos) {
204 | 
205 | 
206 | 				// First, if the assembled string isn't empty, we add it in its current state
207 | 				// Otherwise, the SIL could end up appearing before the word.
208 | 
209 |                 if (!AppTok.empty()) {
210 |                     ProcessedTokens.push_back(VoxUtil::U32ToStr(AppTok));
211 | 
212 |                     AppTok = U"";
213 | 				}
214 | 
215 |                 if (IsTacotron){
216 | 
217 |                     // Double at-symbol is handled later
218 |                     if (!IsTorchMoji)
219 |                         AppTok += U"@@";
220 | 
221 |                     AppTok += tok[s];
222 | 
223 |                 }
224 |                 else{
225 |                     AppTok = U"@SIL";
226 |                 }
227 | 
228 |                 ProcessedTokens.push_back(VoxUtil::U32ToStr(AppTok));
229 |                 AppTok = U"";
230 |                 continue;
231 | 
232 | 			}
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 		}
240 |         if (!AppTok.empty())
241 |         {
242 |             ProcessedTokens.push_back(VoxUtil::U32ToStr(AppTok));
243 |             AppTok = U"";
244 | 
245 | 
246 |         }
247 | 
248 | 	}
249 | 	// Prevent out of range error if the user inputs one word
250 | 	if (ProcessedTokens.size() > 1) 
251 | 	{
252 | 		if (ProcessedTokens[ProcessedTokens.size() - 1] == "SIL")
253 | 			ProcessedTokens.pop_back();
254 | 	}
255 | 
256 | 
257 | 	return ProcessedTokens;
258 | }
259 | 


--------------------------------------------------------------------------------
/TextTokenizer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <vector>
 3 | #include <string>
 4 | #include "VoxCommon.hpp"
 5 | #include "Numbertext.hxx"
 6 | 
 7 | class TextTokenizer
 8 | {
 9 | private:
10 |     std::u32string AllowedChars;
11 | 
12 | 	std::vector<std::string> ExpandNumbers(const std::vector<std::string>& SpaceTokens);
13 | 
14 |     Numbertext* CuNumber;
15 | 
16 |     std::string NumLang;
17 | 
18 | 
19 |     // Go through the string and add spaces before and after punctuation.
20 |     // This is because ExpandNumbers won't recognize numbers if they've got punctuation like 500, or .9000
21 |     std::string SpaceChars(const std::string& InStr);
22 | 
23 | 
24 | 
25 | public:
26 | 	TextTokenizer();
27 | 	~TextTokenizer();
28 | 
29 |     void SetNumberText(Numbertext& INum,const std::string& Lang);
30 | 
31 |     std::vector<std::string> Tokenize(const std::string& InTxt, bool IsTacotron = false, bool IsTorchMoji = false);
32 |     void SetAllowedChars(const std::string &value);
33 | };
34 | 
35 | 


--------------------------------------------------------------------------------
/Voice.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "FastSpeech2.h"
 4 | #include "tacotron2.h"
 5 | #include "MultiBandMelGAN.h"
 6 | #include "EnglishPhoneticProcessor.h"
 7 | #include "vits.h"
 8 | #include "Numbertext.hxx"
 9 | #include "torchmoji.h"
10 | #include "phoneticdict.h"
11 | #include "tacotron2torch.h"
12 | #include "istftnettorch.h"
13 | struct VoxResults{
14 |   std::vector<float> Audio;
15 |   TFTensor<float> Alignment;
16 |   TFTensor<float> Mel;
17 | };
18 | 
19 | class Voice
20 | {
21 | private:
22 |     std::unique_ptr<MelGen> MelPredictor;
23 |     std::unique_ptr<MultiBandMelGAN> Vocoder;
24 | 	EnglishPhoneticProcessor Processor;
25 |     VoiceInfo VoxInfo;
26 |     TorchMoji Moji;
27 | 
28 | 
29 | 
30 |     std::vector<std::u32string> Phonemes;
31 |     std::vector<int32_t> PhonemeIDs;
32 | 
33 | 
34 | 
35 |     std::vector<int32_t> PhonemesToID(const std::string& RawInTxt);
36 | 
37 |     std::vector<std::string> Speakers;
38 |     std::vector<std::string> Emotions;
39 | 
40 |     void ReadPhonemes(const std::string& PhonemePath);
41 | 
42 |     void ReadSpeakers(const std::string& SpeakerPath);
43 | 
44 |     void ReadEmotions(const std::string& EmotionPath);
45 | 
46 | 
47 | 
48 |     void ReadModelInfo(const std::string& ModelInfoPath);
49 | 
50 | 
51 | 
52 |     std::vector<DictEntry> CurrentDict;
53 | 
54 |     std::string ModelInfo;
55 | 
56 |     std::vector<int32_t> CharsToID(const std::string &RawInTxt);
57 | 
58 |     Numbertext NumTxt;
59 | public:
60 | 	/* Voice constructor, arguments obligatory.
61 | 	 -> VoxPath: Path of folder where models are contained. 
62 | 	 --  Must be a folder without an ending slash with UNIX slashes, can be relative or absolute (eg: MyVoices/Karen)
63 | 	 --  The folder must contain the following elements:
64 | 	 ---  melgen: Folder generated where a FastSpeech2 model was saved as SavedModel, with .pb, variables, etc
65 | 	 ---  vocoder: Folder where a Multi-Band MelGAN model was saved as SavedModel.
66 |      ---  info.json: Model information
67 |      ---  phonemes.txt: Tab delimited file containing PHONEME \t ID, for inputting to the FS2 model.
68 | 
69 |      --- If multispeaker, a lined .txt file called speakers.txt
70 |      --- If multi-emotion, a lined .txt file called emotions.txt
71 | 
72 | 	*/
73 | 
74 | 
75 |     Voice(const std::string& VoxPath, const std::string& inName,Phonemizer* InPhn);
76 | 
77 |     void AddPhonemizer(Phonemizer* InPhn, ESpeakPhonemizer *InENGPhn);
78 |     void LoadNumberText(const std::string& NumTxtPath);
79 | 
80 | 
81 |     std::string PhonemizeStr(const std::string& Prompt);
82 |     VoxResults Vocalize(const std::string& Prompt, float Speed = 1.f, int32_t SpeakerID = 0, float Energy = 1.f, float F0 = 1.f, int32_t EmotionID = -1, const std::string &EmotionOvr = "");
83 | 
84 |     std::string Name;
85 |     inline const VoiceInfo& GetInfo(){return VoxInfo;}
86 | 
87 |     inline const std::vector<std::string>& GetSpeakers(){return Speakers;}
88 |     inline const std::vector<std::string>& GetEmotions(){return Emotions;}
89 | 
90 |     void SetDictEntries(const std::vector<DictEntry>& InEntries);
91 |     inline const std::string& GetModelInfo(){return ModelInfo;}
92 | 
93 | 	~Voice();
94 | };
95 | 
96 | 


--------------------------------------------------------------------------------
/VoxCommon.cpp:
--------------------------------------------------------------------------------
  1 | #include "VoxCommon.hpp"
  2 | #include "ext/json.hpp"
  3 | using namespace nlohmann;
  4 | #include <codecvt>
  5 | #include <locale>         // std::wstring_convert
  6 | 
  7 | const std::vector<std::string> Text2MelNames = {"FastSpeech2","Tacotron2 (TF)","VITS","VITS + TorchMoji","Tacotron2 (Torch)"};
  8 | const std::vector<std::string> VocoderNames = {"Multi-Band MelGAN","MelGAN-STFT","","iSTFTNet"};
  9 | const std::vector<std::string> RepoNames = {"TensorflowTTS","Coqui-TTS","jaywalnut310","keonlee9420"};
 10 | 
 11 | const std::vector<std::string> LanguageNames = {"English","Spanish", "German", "EnglishIPA"};
 12 | const std::vector<std::string> LangaugeNamesNumToWords = {"en", "es","de","en"};
 13 | 
 14 | 
 15 | 
 16 | 
 17 | #include "ext/ZCharScanner.h"
 18 | 
 19 | const std::map<int32_t,std::string> LegacyToV1Lang = {
 20 |     {-3,"German-Char"},
 21 |     {0,"English-ARPA"},
 22 |     {-1,"English-Char"},
 23 |     {3,"English-IPA"},
 24 |     {1,"Spanish-GlobalPhone"}
 25 |                                                            };
 26 | 
 27 | const std::map<std::string,int32_t> V1LangTypes ={
 28 |   {"IPA",ETTSLanguageType::IPA},
 29 |   {"IPAStressed",ETTSLanguageType::IPA},
 30 |   {"ARPA",ETTSLanguageType::ARPA},
 31 |   {"Char",ETTSLanguageType::Char},
 32 |   {"GlobalPhone",ETTSLanguageType::GlobalPhone}
 33 | };
 34 | 
 35 | void VoxUtil::ExportWAV(const std::string & Filename, const std::vector<float>& Data, unsigned SampleRate) {
 36 | 	AudioFile<float>::AudioBuffer Buffer;
 37 | 	Buffer.resize(1);
 38 | 
 39 | 
 40 | 	Buffer[0] = Data;
 41 | 	size_t BufSz = Data.size();
 42 | 
 43 | 
 44 | 	AudioFile<float> File;
 45 | 
 46 |     File.setAudioBuffer(Buffer);
 47 | 	File.setAudioBufferSize(1, (int)BufSz);
 48 | 	File.setNumSamplesPerChannel((int)BufSz);
 49 | 	File.setNumChannels(1);
 50 | 	File.setBitDepth(32);
 51 | 	File.setSampleRate(SampleRate);
 52 | 
 53 | 	File.save(Filename, AudioFileFormat::Wave);
 54 | 
 55 | 
 56 | 
 57 | }
 58 | 
 59 | // Process language value for vector indexes. Language value must adhere to standard.
 60 | uint32_t ProcessLanguageValue(int32_t LangVal)
 61 | {
 62 |     if (LangVal > -1)
 63 |         return LangVal;
 64 | 
 65 |     if (LangVal == -1)
 66 |         return 0;
 67 | 
 68 |     if (LangVal < 0)
 69 |         return (LangVal * -1) - 1;
 70 | 
 71 |     return LangVal;
 72 | 
 73 | }
 74 | 
 75 | VoiceInfo VoxUtil::ReadModelJSON(const std::string &InfoFilename)
 76 | {
 77 |     const size_t MaxNoteSize = 80;
 78 | 
 79 | 
 80 |     std::ifstream JFile(InfoFilename);
 81 |     json JS;
 82 | 
 83 | 
 84 |     try {
 85 |         JFile >> JS;
 86 |     } catch(json::parse_error Err) {
 87 |         QMessageBox::critical(nullptr,"JSON parse error",QString::fromUtf8(Err.what()));
 88 |     }
 89 | 
 90 | 
 91 |     JFile.close();
 92 | 
 93 |     auto Arch = JS["architecture"];
 94 | 
 95 |     ArchitectureInfo CuArch;
 96 |     CuArch.Repo = Arch["repo"].get<int>();
 97 |     CuArch.Text2Mel = Arch["text2mel"].get<int>();
 98 |     CuArch.Vocoder = Arch["vocoder"].get<int>();
 99 | 
100 |     // Now fill the strings
101 |     CuArch.s_Repo = RepoNames[CuArch.Repo];
102 |     CuArch.s_Text2Mel = Text2MelNames[CuArch.Text2Mel];
103 |     CuArch.s_Vocoder = VocoderNames[CuArch.Vocoder];
104 | 
105 |     // Language value for the info
106 | 
107 |     auto LangVal = JS["language"];
108 | 
109 |     
110 |     std::string LanguageFullName;
111 | 
112 |     if (LangVal.is_string()){  // V1 Language type standard model; see ETTSLanguageType enum desc on header
113 |         LanguageFullName = LangVal.get<std::string>();
114 | 
115 |     }else{
116 |         // Convert legacy language to V1
117 |        int32_t LegacyLang = JS["language"].get<int32_t>();
118 |        LanguageFullName = LegacyToV1Lang.find(LegacyLang)->second;
119 | 
120 | 
121 |     }
122 | 
123 |      ZStringDelimiter LangDel(LanguageFullName);
124 |      LangDel.AddDelimiter("-");
125 | 
126 |      std::string LangName = LangDel[0];
127 |      std::string LangTypeStr = LangDel[1];
128 |      std::string eSpeakLangStr = "";
129 |      if (LangDel.szTokens() > 2)
130 |      {
131 |          eSpeakLangStr = LangDel[2];
132 |          LanguageFullName = LangDel[0] + "-" + LangDel[1];
133 | 
134 |      }
135 | 
136 |      int32_t LangType = V1LangTypes.find(LangTypeStr)->second;
137 | 
138 | 
139 | 
140 |     // If the voice is char then the pad value must be a string of the EOS token ID (like "148").
141 |     std::string EndToken = JS["pad"].get<std::string>();
142 | 
143 |     // If it's phonetic then it's the token str, like "@EOS"
144 |     if (LangType != ETTSLanguageType::Char && EndToken.size() && CuArch.Text2Mel != EText2MelModel::Tacotron2Torch)
145 |         EndToken =  " " + EndToken; // In this case we add a space for separation since we directly append the value to the prompt
146 | 
147 | 
148 | 
149 |     VoiceInfo Inf{JS["name"].get<std::string>(),
150 |                  JS["author"].get<std::string>(),
151 |                  JS["version"].get<int>(),
152 |                  JS["description"].get<std::string>(),
153 |                  CuArch,
154 |                  JS["note"].get<std::string>(),
155 |                  JS["sarate"].get<uint32_t>(),
156 |                 LangName,
157 |                 LanguageFullName,
158 |                 eSpeakLangStr,
159 |                  EndToken,
160 |                 LangType
161 |                  };
162 | 
163 |     if (Inf.Note.size() > MaxNoteSize)
164 |         Inf.Note = Inf.Note.substr(0,MaxNoteSize);
165 | 
166 |     return Inf;
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | }
175 | 
176 | std::vector<std::string> VoxUtil::GetLinedFile(const std::string &Path)
177 | {
178 |     std::vector<std::string> RetLines;
179 |     std::ifstream Fi(Path);
180 | 
181 |     if (!Fi.good()) // File not exists, ret empty vec
182 |         return RetLines;
183 | 
184 |     std::string Line;
185 |     while (std::getline(Fi, Line))
186 |     {
187 |         if (Line.size() > 1)
188 |             RetLines.push_back(Line);
189 | 
190 | 
191 |     }
192 | 
193 |     return RetLines;
194 | }
195 | 
196 | std::string VoxUtil::U32ToStr(const std::u32string &InU32)
197 | {
198 |     std::wstring_convert<std::codecvt_utf8<char32_t>,char32_t> Converter;
199 |     return Converter.to_bytes(InU32);
200 | 
201 | 
202 | 
203 | }
204 | 
205 | std::u32string VoxUtil::StrToU32(const std::string &InStr)
206 | {
207 |     std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> Converter;
208 |     return Converter.from_bytes(InStr);
209 | 
210 | }
211 | 


--------------------------------------------------------------------------------
/VoxCommon.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | /*
  3 |  VoxCommon.hpp : Defines common data structures and constants to be used with TensorVox 
  4 | */
  5 | #include <iostream>
  6 | 
  7 | #undef slots // https://github.com/pytorch/pytorch/issues/19405
  8 | 
  9 | 
 10 | #pragma warning(push, 0) // LibTorch spams us with warnings
 11 | #include <torch/script.h> // One-stop header.
 12 | #pragma warning(pop)
 13 | 
 14 | #define slots Q_SLOTS
 15 | 
 16 | #include <vector>
 17 | #include "ext/AudioFile.hpp"
 18 | #include "ext/CppFlow/ops.h"
 19 | #include "ext/CppFlow/model.h"
 20 | 
 21 | 
 22 | 
 23 | #include <QMessageBox>
 24 | 
 25 | 
 26 | 
 27 | #define IF_RETURN(cond,ret) if (cond){return ret;}
 28 | 
 29 | const uint32_t CommonSampleRate = 48000;
 30 | 
 31 | namespace VoxCommon{
 32 | const std::string CommonLangConst = "_std";
 33 | const int32_t TorchMojiLen = 120;
 34 | const int32_t TorchMojiEmbSize = 2304;
 35 | 
 36 | 
 37 | }
 38 | 
 39 | // https://github.com/almogh52/rnnoise-cmake/blob/d981adb2e797216f456cfcf158f73761a29981f8/examples/rnnoise_demo.c#L31
 40 | const uint32_t RNNoiseFrameSize = 480;
 41 | typedef std::vector<std::tuple<std::string,cppflow::tensor>> TensorVec;
 42 | 
 43 | template<typename T>
 44 | struct TFTensor {
 45 | 	std::vector<T> Data;
 46 | 	std::vector<int64_t> Shape;
 47 | 	size_t TotalSize;
 48 | 
 49 | };
 50 | 
 51 | 
 52 | namespace ETTSRepo {
 53 | enum Enum{
 54 |     TensorflowTTS = 0,
 55 |     CoquiTTS,
 56 |     jaywalnut310, // OG VITS repo
 57 |     keonlee9420
 58 | };
 59 | 
 60 | }
 61 | namespace EText2MelModel {
 62 | enum Enum{
 63 |     FastSpeech2 = 0,
 64 |     Tacotron2,
 65 |     VITS,
 66 |     VITSTM,
 67 |     Tacotron2Torch
 68 | };
 69 | 
 70 | }
 71 | 
 72 | namespace EVocoderModel{
 73 | enum Enum{
 74 |     MultiBandMelGAN = 0,
 75 |     MelGANSTFT, // there is no architectural changes so we can use mb-melgan class for melgan-stft
 76 |     NullVocoder, // For fully E2E models
 77 |     iSTFTNet
 78 | };
 79 | }
 80 | 
 81 | // ===========DEPRECATED===============
 82 | // Negative numbers denote character-based language, positive for phoneme based. Standard is char-equivalent language idx = negative(phn-based)
 83 | // In case of English, since -0 doesn't exist, we use -1.
 84 | // For example, German phonetic would be 3, and character based would be -3
 85 | // IPA-phn-based are mainly for Coqui
 86 | // ===========DEPRECATED===============
 87 | namespace ETTSLanguage{
 88 | enum Enum{
 89 |   GermanChar = -3,
 90 |   SpanishChar,
 91 |   EnglishChar,
 92 |   EnglishPhn,
 93 |   SpanishPhn,
 94 |   GermanPhn,
 95 |   EnglishIPA,
 96 | };
 97 | 
 98 | }
 99 | 
100 | /* Language Spec Standard V1:
101 | - Language is specified with a string from the JSON and the type is saved instead of relying
102 | on ETTSLanguage enum.
103 | -- The string is LanguageName-Method; for example English-StressedIPA, English-ARPA, German-Char
104 | - Both pre-V1 standard and current are supported
105 | - V1 Standard does not require changes in code to add new languages
106 | -- For eSpeak phonemizers, an additional entry is added with the language name: English-StressedIPA-English (America)
107 | 
108 | 
109 | 
110 | */
111 | 
112 | namespace ETTSLanguageType{
113 | enum Enum{
114 |     ARPA = 0,
115 |     Char,
116 |     IPA,
117 |     GlobalPhone
118 | };
119 | }
120 | 
121 | 
122 | struct ArchitectureInfo{
123 |     int Repo;
124 |     int Text2Mel;
125 |     int Vocoder;
126 | 
127 |     // String versions of the info, for displaying.
128 |     // We want boilerplate int index to str conversion code to be low.
129 |     std::string s_Repo;
130 |     std::string s_Text2Mel;
131 |     std::string s_Vocoder;
132 | 
133 | };
134 | struct VoiceInfo{
135 |   std::string Name;
136 |   std::string Author;
137 |   int32_t Version;
138 |   std::string Description;
139 |   ArchitectureInfo Architecture;
140 |   std::string Note;
141 | 
142 |   uint32_t SampleRate;
143 | 
144 |   std::string s_Language; // Language name = English-ARPA -> "English"
145 |   std::string s_Language_Fullname; // Full language name = "English-ARPA"
146 |   std::string s_eSpeakLang; // eSpeak voice name: "English (America)"
147 | 
148 |   std::string EndPadding;
149 |   int32_t LangType;
150 | 
151 | 
152 | 
153 | };
154 | 
155 | namespace VoxUtil {
156 | 
157 | 
158 |     std::string U32ToStr(const std::u32string& InU32);
159 |     std::u32string StrToU32(const std::string& InStr);
160 | 
161 |     std::vector<std::string> GetLinedFile(const std::string& Path);
162 | 
163 |     VoiceInfo ReadModelJSON(const std::string& InfoFilename);
164 | 
165 | 
166 | 
167 |     // Copy PyTorch tensor
168 | 
169 |     template<typename D>
170 |     TFTensor<D> CopyTensor(at::Tensor& InTens){
171 |         D* Data = InTens.data<D>();
172 |         std::vector<int64_t> Shape = InTens.sizes().vec();
173 | 
174 |         size_t TotalSize = 1;
175 | 
176 |         for (const int64_t& Dim : Shape)
177 |             TotalSize *= Dim;
178 | 
179 |         std::vector<D> DataVec = std::vector<D>(Data,Data + TotalSize);
180 | 
181 |         return TFTensor<D>{DataVec,Shape,TotalSize};
182 | 
183 | 
184 |     }
185 | 
186 | 
187 |     // Copy CppFlow (TF) tensor
188 | 	template<typename F>
189 |     TFTensor<F> CopyTensor(cppflow::tensor& InTens)
190 | 	{
191 | 		std::vector<F> Data = InTens.get_data<F>();
192 |         std::vector<int64_t> Shape = InTens.shape().get_data<int64_t>();
193 | 		size_t TotalSize = 1;
194 | 		for (const int64_t& Dim : Shape)
195 | 			TotalSize *= Dim;
196 | 
197 | 		return TFTensor<F>{Data, Shape, TotalSize};
198 | 
199 | 
200 | 	}
201 | 
202 |     template<typename VXVec1>
203 |     bool FindInVec(VXVec1 In, const std::vector<VXVec1>& Vec, size_t& OutIdx, size_t start = 0) {
204 | 		for (size_t xx = start;xx < Vec.size();xx++)
205 | 		{
206 | 			if (Vec[xx] == In) {
207 | 				OutIdx = xx;
208 | 				return true;
209 | 
210 | 			}
211 | 
212 | 		}
213 | 
214 | 
215 | 		return false;
216 | 
217 | 	}
218 |     template<typename VXVec1, typename X>
219 |     bool FindInVec2(VXVec1 In, const std::vector<X>& Vec, size_t& OutIdx, size_t start = 0) {
220 |         for (size_t xx = start;xx < Vec.size();xx++)
221 |         {
222 |             if (Vec[xx] == In) {
223 |                 OutIdx = xx;
224 |                 return true;
225 | 
226 |             }
227 | 
228 |         }
229 | 
230 | 
231 |         return false;
232 | 
233 |     }
234 | 
235 | 	void ExportWAV(const std::string& Filename, const std::vector<float>& Data, unsigned SampleRate);
236 | }
237 | 


--------------------------------------------------------------------------------
/attention.cpp:
--------------------------------------------------------------------------------
 1 | #include "attention.h"
 2 | 
 3 | 
 4 | Attention::Attention(QWidget *parent) : QCustomPlot(parent)
 5 | {
 6 | 
 7 |     QBrush FillBrush(QColor(100,100,100));
 8 |     this->setBackground(FillBrush);
 9 |     QColor White(255,255,255);
10 |     QPen AxisPen(QColor(150,150,150));
11 |     xAxis->setTickLabelColor(White);
12 |     yAxis->setTickLabelColor(White);
13 | 
14 |     xAxis->setBasePen(AxisPen);
15 |     yAxis->setBasePen(AxisPen);
16 | 
17 |     xAxis->setLabel("Decoder timestep");
18 |     yAxis->setLabel("Encoder timestep");
19 | 
20 |     xAxis->setLabelColor(White);
21 |     yAxis->setLabelColor(White);
22 |     QFont Fnt = QFont(font().family(), 10);
23 | 
24 |     xAxis->setLabelFont(QFont(font().family(), 9));
25 |     yAxis->setLabelFont(QFont(font().family(), 9));
26 | 
27 |     yAxis->setTickPen(AxisPen);
28 |     xAxis->setTickPen(AxisPen);
29 | 
30 |     yAxis->setSubTickPen(AxisPen);
31 |     xAxis->setSubTickPen(AxisPen);
32 | 
33 | 
34 | 
35 | }
36 | 
37 | void Attention::DoPlot(const TFTensor<float> &Alignment)
38 | {
39 |     const auto& Shp = Alignment.Shape;
40 | 
41 | 
42 | 
43 | 
44 |     Map->data()->setSize((int32_t)Shp[2],(int32_t)Shp[1]);
45 | 
46 |     Map->data()->setRange(QCPRange(0.0,(double)Shp[2]),QCPRange(0.0,(double)Shp[1]));
47 |     for (int64_t x = 0; x < Shp[2];x++)
48 |     {
49 |         for (int64_t y = 0;y < Shp[1];y++)
50 |         {
51 |             size_t i = x + Shp[2]*y;
52 |             Map->data()->setCell(x,y,(double)Alignment.Data[i]);
53 | 
54 |         }
55 | 
56 | 
57 |     }
58 |     Map->setDataRange(QCPRange(0.0,1.0));
59 |     xAxis->setRange(QCPRange(0.0,(double)Shp[2]));
60 | 
61 |     yAxis->setRange(QCPRange(0.0,(double)Shp[1]));
62 | 
63 |     rescaleAxes();
64 | 
65 |     replot();
66 | 
67 | 
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/attention.h:
--------------------------------------------------------------------------------
 1 | #ifndef ATTENTION_H
 2 | #define ATTENTION_H
 3 | 
 4 | #include "ext/qcustomplot.h"
 5 | #include "VoxCommon.hpp"
 6 | 
 7 | class Attention : public QCustomPlot
 8 | {
 9 | public:
10 |     Attention(QWidget *parent = nullptr);
11 | 
12 |     void DoPlot(const TFTensor<float>& Alignment);
13 | 
14 |     QCPColorMap* Map;
15 | 
16 | };
17 | 
18 | #endif // ATTENTION_H
19 | 


--------------------------------------------------------------------------------
/batchdenoisedlg.cpp:
--------------------------------------------------------------------------------
  1 | #include "batchdenoisedlg.h"
  2 | #include "ui_batchdenoisedlg.h"
  3 | 
  4 | #include <QFileDialog>
  5 | #include <QDir>
  6 | #include <QDirIterator>
  7 | #include "mainwindow.h"
  8 | 
  9 | #define ManWi ((MainWindow*)pMainWindow)
 10 | 
 11 | BatchDenoiseDlg::BatchDenoiseDlg(QWidget *parent) :
 12 |     QDialog(parent),
 13 |     ui(new Ui::BatchDenoiseDlg)
 14 | {
 15 |     ui->setupUi(this);
 16 |     ProcessedFiles = 0;
 17 |     CurrentIndex = 0;
 18 |     Failures = 0;
 19 | 
 20 | }
 21 | 
 22 | 
 23 | // can't define in header because InferDetails belongs to mainwindow.h and including it in this dlg's .h would case circular dependency error
 24 | InferDetails MakeInferDetails(const std::vector<float>& InAudat,const QString& FilePath,unsigned InSampleRate,int32_t OutSampleRate)
 25 | {
 26 |     InferDetails Dets;
 27 |     Dets.F0 = 0.0f;
 28 |     Dets.Speed = 0.0f;
 29 |     Dets.Energy = 0.0f;
 30 |     Dets.pItem = nullptr; // the mainwindow's function will make an item for us.
 31 |     Dets.Prompt = "";
 32 |     Dets.SpeakerID = OutSampleRate; // SpeakerID will double as resample when a denoise only job is requested.
 33 |     Dets.EmotionID = -1;
 34 |     Dets.Denoise = true;
 35 |     Dets.Amplification = 1.f;
 36 |     Dets.ExportFileName = FilePath;
 37 | 
 38 | 
 39 |     Dets.VoiceName = "";
 40 |     Dets.ForcedAudio = InAudat;
 41 |     Dets.SampleRate = InSampleRate;
 42 | 
 43 |     return Dets;
 44 | 
 45 | }
 46 | 
 47 | 
 48 | BatchDenoiseDlg::~BatchDenoiseDlg()
 49 | {
 50 |     delete ui;
 51 | }
 52 | 
 53 | void BatchDenoiseDlg::IterateDo()
 54 | {
 55 | 
 56 |     if (ProcessedFiles == Files.size() && ManWi->GetCountItems() == 0)
 57 |     {
 58 |         // It's done!
 59 |         delete timIter;
 60 |         SetControls(true);
 61 | 
 62 |         return;
 63 | 
 64 |     }
 65 | 
 66 |     if (ManWi->GetCountItems() != 0)
 67 |         return;
 68 | 
 69 | 
 70 |     ManWi->DenDone = 0;
 71 |     if (CurrentIndex + ui->spbBatchSz->value() > Files.size())
 72 |         ManWi->DenBatchSize = Files.size() - CurrentIndex;
 73 | 
 74 |     for (int32_t i = 0;i < ui->spbBatchSz->value();i++)
 75 |     {
 76 | 
 77 | 
 78 | 
 79 |         QString CurrentFn = Files[CurrentIndex];
 80 | 
 81 |         AudioFile<float> AudFile;
 82 |         InferDetails CurrentDets;
 83 |         try {
 84 |             AudFile.load(CurrentFn.toStdString());
 85 | 
 86 |             CurrentDets = MakeInferDetails(AudFile.samples[0],CurrentFn,AudFile.getSampleRate(),ui->spbOutSR->value());
 87 | 
 88 |         }  catch (...) {
 89 | 
 90 |            CurrentIndex += 1; // NOT i !!!!!!!
 91 |            ProcessedFiles += 1;
 92 |            ++Failures;
 93 | 
 94 |            if (CurrentIndex > Files.size() - 1)
 95 |                break;
 96 | 
 97 |            continue;
 98 |         }
 99 | 
100 |         ManWi->PushToInfers(CurrentDets);
101 | 
102 | 
103 |         CurrentIndex += 1; // NOT i !!!!!!!
104 |         ProcessedFiles += 1;
105 | 
106 |         if (CurrentIndex > Files.size() - 1)
107 |             break;
108 | 
109 | 
110 |     }
111 |     SetLabel();
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | }
121 | 
122 | void BatchDenoiseDlg::on_btnFindFolder_clicked()
123 | {
124 | 
125 |     QString Dir = QFileDialog::getExistingDirectory(this, tr("Find base folder of your WAVs"),
126 |                                                 "",
127 |                                                 QFileDialog::ShowDirsOnly
128 |                                                 | QFileDialog::DontResolveSymlinks);
129 | 
130 |     ui->edtFolPath->setText(Dir);
131 | 
132 |     UpdateDirectory();
133 | 
134 | }
135 | 
136 | void BatchDenoiseDlg::on_edtFolPath_editingFinished()
137 | {
138 |     UpdateDirectory();
139 | 
140 | }
141 | 
142 | void BatchDenoiseDlg::SetLabel()
143 | {
144 |     ui->lblFiles->setText(QString(QString::number(ProcessedFiles) + " / " + QString::number(Files.size()) + " files, " + QString::number(Failures) + " failures.") );
145 | 
146 |     ui->pgFiles->setValue(ProcessedFiles);
147 |     ui->pgFiles->update();
148 | }
149 | 
150 | void BatchDenoiseDlg::UpdateDirectory()
151 | {
152 |     if (ui->edtFolPath->text().isEmpty())
153 |         return;
154 | 
155 |     if (Files.size())
156 |         Files.clear();
157 | 
158 |     QDirIterator DirIt(ui->edtFolPath->text(),QDirIterator::Subdirectories);
159 |     while (DirIt.hasNext())
160 |     {
161 |         DirIt.next();
162 |         if (QFileInfo(DirIt.filePath()).isFile() && QFileInfo(DirIt.filePath()).suffix() == "wav")
163 |             Files.push_back(DirIt.filePath());
164 |     }
165 |     CurrentIndex = 0;
166 |     ProcessedFiles = 0;
167 |     Failures = 0;
168 | 
169 |     ui->pgFiles->setRange(0,Files.size());
170 | 
171 | 
172 |     SetLabel();
173 | 
174 | 
175 | }
176 | 
177 | void BatchDenoiseDlg::on_btnStart_clicked()
178 | {
179 | 
180 | 
181 | 
182 |     CurrentIndex = 0;
183 |     ProcessedFiles = 0;
184 |     Failures = 0;
185 |     ManWi->DenBatchSize = ui->spbBatchSz->value();
186 | 
187 |     timIter = new QTimer(this);
188 |     timIter->setSingleShot(false);
189 |     timIter->setInterval(1000);
190 | 
191 |     connect(timIter,&QTimer::timeout,this,&BatchDenoiseDlg::IterateDo);
192 | 
193 |     timIter->start();
194 | 
195 |     SetControls(false);
196 | }
197 | 
198 | void BatchDenoiseDlg::SetControls(bool En)
199 | {
200 |     ui->edtFolPath->setEnabled(En);
201 |     ui->spbBatchSz->setEnabled(En);
202 |     ui->btnStart->setEnabled(En);
203 |     ui->btnFindFolder->setEnabled(En);
204 | 
205 | }
206 | 


--------------------------------------------------------------------------------
/batchdenoisedlg.h:
--------------------------------------------------------------------------------
 1 | #ifndef BATCHDENOISEDLG_H
 2 | #define BATCHDENOISEDLG_H
 3 | 
 4 | #include <QDialog>
 5 | #include <QTimer>
 6 | namespace Ui {
 7 | class BatchDenoiseDlg;
 8 | }
 9 | 
10 | class BatchDenoiseDlg : public QDialog
11 | {
12 |     Q_OBJECT
13 | 
14 | public:
15 |     explicit BatchDenoiseDlg(QWidget *parent = nullptr);
16 |     ~BatchDenoiseDlg();
17 | 
18 | 
19 | 
20 |     // if we included mainwindow.h in here it would result in circular dependency problem so we include it in the .cpp
21 |     // and make it a void* here
22 |     void* pMainWindow;
23 | 
24 | private slots:
25 | 
26 | 
27 |     void IterateDo();
28 |     void on_btnFindFolder_clicked();
29 | 
30 |     void on_edtFolPath_editingFinished();
31 | 
32 |     void on_btnStart_clicked();
33 | 
34 | private:
35 | 
36 |     void SetControls(bool En);
37 | 
38 |     QStringList Files;
39 |     QTimer* timIter;
40 |     int32_t ProcessedFiles;
41 |     int32_t CurrentIndex;
42 |     int32_t Failures;
43 | 
44 | 
45 | 
46 |     void SetLabel();
47 |     void UpdateDirectory();
48 |     Ui::BatchDenoiseDlg *ui;
49 | };
50 | 
51 | #endif // BATCHDENOISEDLG_H
52 | 


--------------------------------------------------------------------------------
/batchdenoisedlg.ui:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <ui version="4.0">
  3 |  <class>BatchDenoiseDlg</class>
  4 |  <widget class="QDialog" name="BatchDenoiseDlg">
  5 |   <property name="geometry">
  6 |    <rect>
  7 |     <x>0</x>
  8 |     <y>0</y>
  9 |     <width>510</width>
 10 |     <height>299</height>
 11 |    </rect>
 12 |   </property>
 13 |   <property name="windowTitle">
 14 |    <string>Dialog</string>
 15 |   </property>
 16 |   <layout class="QVBoxLayout" name="verticalLayout">
 17 |    <item>
 18 |     <layout class="QHBoxLayout" name="horizontalLayout">
 19 |      <item>
 20 |       <widget class="QLabel" name="label">
 21 |        <property name="text">
 22 |         <string>Folder Path</string>
 23 |        </property>
 24 |       </widget>
 25 |      </item>
 26 |      <item>
 27 |       <widget class="QLineEdit" name="edtFolPath"/>
 28 |      </item>
 29 |      <item>
 30 |       <widget class="QPushButton" name="btnFindFolder">
 31 |        <property name="text">
 32 |         <string>Browse</string>
 33 |        </property>
 34 |       </widget>
 35 |      </item>
 36 |     </layout>
 37 |    </item>
 38 |    <item>
 39 |     <layout class="QHBoxLayout" name="horizontalLayout_2">
 40 |      <item>
 41 |       <widget class="QLabel" name="label_3">
 42 |        <property name="text">
 43 |         <string>Batch size:</string>
 44 |        </property>
 45 |       </widget>
 46 |      </item>
 47 |      <item>
 48 |       <widget class="QSpinBox" name="spbBatchSz">
 49 |        <property name="maximum">
 50 |         <number>16384</number>
 51 |        </property>
 52 |        <property name="singleStep">
 53 |         <number>32</number>
 54 |        </property>
 55 |        <property name="value">
 56 |         <number>4096</number>
 57 |        </property>
 58 |       </widget>
 59 |      </item>
 60 |      <item>
 61 |       <spacer name="horizontalSpacer">
 62 |        <property name="orientation">
 63 |         <enum>Qt::Horizontal</enum>
 64 |        </property>
 65 |        <property name="sizeHint" stdset="0">
 66 |         <size>
 67 |          <width>40</width>
 68 |          <height>20</height>
 69 |         </size>
 70 |        </property>
 71 |       </spacer>
 72 |      </item>
 73 |      <item>
 74 |       <widget class="QLabel" name="label_4">
 75 |        <property name="text">
 76 |         <string>Output sampling rate (Hz): </string>
 77 |        </property>
 78 |       </widget>
 79 |      </item>
 80 |      <item>
 81 |       <widget class="QSpinBox" name="spbOutSR">
 82 |        <property name="maximum">
 83 |         <number>96000</number>
 84 |        </property>
 85 |        <property name="singleStep">
 86 |         <number>8000</number>
 87 |        </property>
 88 |        <property name="value">
 89 |         <number>48000</number>
 90 |        </property>
 91 |       </widget>
 92 |      </item>
 93 |     </layout>
 94 |    </item>
 95 |    <item>
 96 |     <widget class="QLabel" name="label_2">
 97 |      <property name="text">
 98 |       <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Note: Will find all WAVs, recursive (folders and subfolders), and REPLACE FILES. If you don't want it to do that, make a copy first. Treats all files as mono.&lt;/p&gt;&lt;p&gt;Note 2: Files are resampled on input and output accordingly&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
 99 |      </property>
100 |      <property name="wordWrap">
101 |       <bool>true</bool>
102 |      </property>
103 |     </widget>
104 |    </item>
105 |    <item>
106 |     <widget class="QLabel" name="lblFiles">
107 |      <property name="text">
108 |       <string>Files: 0 / 0</string>
109 |      </property>
110 |     </widget>
111 |    </item>
112 |    <item>
113 |     <widget class="QProgressBar" name="pgFiles">
114 |      <property name="value">
115 |       <number>0</number>
116 |      </property>
117 |     </widget>
118 |    </item>
119 |    <item>
120 |     <widget class="QPushButton" name="btnStart">
121 |      <property name="text">
122 |       <string>Start</string>
123 |      </property>
124 |     </widget>
125 |    </item>
126 |   </layout>
127 |  </widget>
128 |  <resources/>
129 |  <connections/>
130 | </ui>
131 | 


--------------------------------------------------------------------------------
/espeakphonemizer.cpp:
--------------------------------------------------------------------------------
  1 | #include "espeakphonemizer.h"
  2 | #include <espeak/speak_lib.h>
  3 | 
  4 | 
  5 | static const std::u32string Punctuation_t = U",.;¡!¿?:-~";
  6 | static const std::u32string Punctuation_ns = U"¿-~";
  7 | 
  8 | using namespace ESP;
  9 | 
 10 | std::string ESpeakPhonemizer::ToPhon(const std::string &InTxt)
 11 | {
 12 |     const char* TextPtr = InTxt.c_str();
 13 |     const void** OurPtr = (const void**)&TextPtr;
 14 |     const char* Phon = espeak_TextToPhonemes(OurPtr, espeakCHARS_AUTO, (int)PhonemePars.to_ulong());
 15 | 
 16 | 
 17 |     return std::string(Phon);
 18 | }
 19 | 
 20 | 
 21 | void ESpeakPhonemizer::Initialize(const std::string &DataPath, const std::string &VoiceName)
 22 | {
 23 |     // these are irrelevant because we don't play any audio, we just use the phonemizer
 24 |     espeak_AUDIO_OUTPUT output = AUDIO_OUTPUT_SYNCH_PLAYBACK;
 25 |     int buflength = 500, options = 0;
 26 | 
 27 | 
 28 |     auto Err1 = espeak_Initialize(output, buflength, DataPath.c_str(), options);
 29 |     auto Err = espeak_SetVoiceByName(VoiceName.c_str());
 30 |     EVoiceName = VoiceName;
 31 | 
 32 | 
 33 |     PhonemePars[1] = 1; // set IPA
 34 | 
 35 | 
 36 | }
 37 | 
 38 | std::string ESpeakPhonemizer::Phonemize(const std::string &Input)
 39 | {
 40 |     std::u32string In = VoxUtil::StrToU32(Input);
 41 | 
 42 |     // ESpeak's phonemize function stops at punctuation, so we split it up into chunks, phonemize, then put them back together
 43 |     PunctSplitVec SplitVec = IterativePunctuationSplit(In, Punctuation_t);
 44 | 
 45 |     std::string Assembled = "";
 46 |     bool Space = false;
 47 |     for (const auto& Spli : SplitVec)
 48 |     {
 49 | 
 50 | 
 51 |         std::string Pibber = VoxUtil::U32ToStr(Spli.second);
 52 |         if (!Spli.first)
 53 |         {
 54 |             Pibber = ToPhon(Pibber);
 55 |             if (Space)
 56 |                 Assembled += " ";
 57 | 
 58 | 
 59 |         }else
 60 |         {
 61 |             Space = true;
 62 |             for (const auto& PCh : Punctuation_ns){
 63 |                 if (Spli.second.find(PCh) != std::u32string::npos)
 64 |                     Space = false;
 65 | 
 66 |             }
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 |         }
 73 |         Assembled += Pibber;
 74 | 
 75 | 
 76 |     }
 77 | 
 78 |     return Assembled;
 79 | 
 80 | }
 81 | 
 82 | ESpeakPhonemizer::ESpeakPhonemizer()
 83 | {
 84 | 
 85 | }
 86 | 
 87 | ESP::PunctSplitVec ESP::IterativePunctuationSplit(const std::u32string &Input, const std::u32string &Punct)
 88 | {
 89 |     PunctSplitVec Ret;
 90 | 
 91 |     std::u32string CuStr = U"";
 92 |     for (const auto& Ch : Input) {
 93 | 
 94 |         if (Punct.find(Ch) != std::u32string::npos) {
 95 |             if (CuStr.size())
 96 |                 Ret.push_back({ false,CuStr });
 97 | 
 98 |             std::u32string PunctOnly(1,Ch);
 99 |             Ret.push_back({ true, PunctOnly });
100 |             CuStr = U"";
101 | 
102 |         }
103 |         else {
104 |             CuStr += Ch;
105 |         }
106 | 
107 | 
108 |     }
109 |     Ret.push_back({ false,CuStr });
110 |     return Ret;
111 | 
112 | }
113 | 
114 | 


--------------------------------------------------------------------------------
/espeakphonemizer.h:
--------------------------------------------------------------------------------
 1 | #ifndef ESPEAKPHONEMIZER_H
 2 | #define ESPEAKPHONEMIZER_H
 3 | 
 4 | /*
 5 | 
 6 |   ESpeakPhonemizer: Tool for IPA Text2Phon using ESpeak NG as backend.
 7 | 
 8 | */
 9 | #include <iostream>
10 | #include <string>
11 | #include <bitset>
12 | #include "VoxCommon.hpp"
13 | #include <vector>
14 | 
15 | namespace ESP{
16 | typedef std::pair<bool, std::u32string> PunctSplit;
17 | typedef std::vector<PunctSplit> PunctSplitVec;
18 | 
19 | 
20 | // Returns vector<pair<IS_PUNCTUATION,String>>
21 | PunctSplitVec IterativePunctuationSplit(const std::u32string& Input, const std::u32string& Punct);
22 | 
23 | }
24 | 
25 | class ESpeakPhonemizer
26 | {
27 | private:
28 |     std::bitset<sizeof(int) * 8> PhonemePars;
29 |     std::string ToPhon(const std::string& InTxt);
30 | 
31 |     std::string EVoiceName;
32 | public:
33 | 
34 |     // DataPath: Path to ESpeak NG data dir
35 |     // VoiceName: Name of voice to use for phonemizing (like "Spanish (Latin America)")
36 |     void Initialize(const std::string& DataPath,const std::string& VoiceName);
37 | 
38 | 
39 |     // Phonemize text using ESpeak phonemizer
40 |     // Unlike regular phonemizer, feed complete texts at once instead of just words.
41 |     std::string Phonemize(const std::string& Input);
42 | 
43 |     ESpeakPhonemizer();
44 |     const std::string& GetVoiceName() const {return EVoiceName;};
45 | };
46 | 
47 | #endif // ESPEAKPHONEMIZER_H
48 | 


--------------------------------------------------------------------------------
/ext/CppFlow/context.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by serizba on 27/6/20.
 3 | //
 4 | 
 5 | #ifndef CPPFLOW2_CONTEXT_H
 6 | #define CPPFLOW2_CONTEXT_H
 7 | 
 8 | #include <memory>
 9 | #include <stdexcept>
10 | #include <utility>
11 | 
12 | #include <tensorflow/c/c_api.h>
13 | #include <tensorflow/c/eager/c_api.h>
14 | 
15 | namespace cppflow {
16 | 
17 |     inline bool status_check(TF_Status* status) {
18 |         if (TF_GetCode(status) != TF_OK) {
19 |             throw std::runtime_error(TF_Message(status));
20 |         }
21 |         return true;
22 |     }
23 | 
24 |     class context {
25 |         public:
26 |             static TFE_Context* get_context();
27 |             static TF_Status* get_status();
28 | 
29 |         private:
30 |             TFE_Context* tfe_context{nullptr};
31 | 
32 |         public:
33 |             explicit context(TFE_ContextOptions* opts = nullptr);
34 | 
35 |             context(context const&) = delete;
36 |             context& operator=(context const&) = delete;
37 |             context(context&&) noexcept;
38 |             context& operator=(context&&) noexcept;
39 | 
40 |             ~context();
41 |     };
42 | 
43 |     // TODO: create ContextManager class if needed
44 |     // Set new context, thread unsafe, must be called at the beginning.
45 |     //  TFE_ContextOptions* tfe_opts = ...
46 |     //  cppflow::get_global_context() = cppflow::context(tfe_opts);
47 |     inline context& get_global_context() {
48 |         static context global_context;
49 |         return global_context;
50 |     }
51 | 
52 | }
53 | 
54 | namespace cppflow {
55 | 
56 |     inline TFE_Context* context::get_context() {
57 |         return get_global_context().tfe_context;
58 |     }
59 | 
60 |     inline TF_Status* context::get_status() {
61 |         thread_local std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> local_tf_status(TF_NewStatus(), &TF_DeleteStatus);
62 |         return local_tf_status.get();
63 |     }
64 | 
65 |     inline context::context(TFE_ContextOptions* opts) {
66 |         auto tf_status = context::get_status();
67 |         if(opts == nullptr) {
68 |             std::unique_ptr<TFE_ContextOptions, decltype(&TFE_DeleteContextOptions)> new_opts(TFE_NewContextOptions(), &TFE_DeleteContextOptions);
69 |             this->tfe_context = TFE_NewContext(new_opts.get(), tf_status);
70 |         } else {
71 |             this->tfe_context = TFE_NewContext(opts, tf_status);
72 |         }
73 |         status_check(tf_status);
74 |     }
75 | 
76 |     inline context::context(context&& ctx) noexcept :
77 |         tfe_context(std::exchange(ctx.tfe_context, nullptr))
78 |     {
79 |     }
80 | 
81 |     inline context& context::operator=(context&& ctx) noexcept {
82 |         tfe_context = std::exchange(ctx.tfe_context, tfe_context);
83 |         return *this;
84 |     }
85 | 
86 |     inline context::~context() {
87 |         TFE_DeleteContext(this->tfe_context);
88 |     }
89 | 
90 | }
91 | 
92 | #endif //CPPFLOW2_CONTEXT_H
93 | 


--------------------------------------------------------------------------------
/ext/CppFlow/cppflow.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // Created by serizba on 17/9/20.
 3 | //
 4 | 
 5 | #ifndef EXAMPLE_CPPFLOW_H
 6 | #define EXAMPLE_CPPFLOW_H
 7 | 
 8 | #include "tensor.h"
 9 | #include "model.h"
10 | #include "raw_ops.h"
11 | #include "ops.h"
12 | #include "datatype.h"
13 | 
14 | #include <tensorflow/c/c_api.h>
15 | 
16 | namespace cppflow {
17 | 
18 |     /**
19 |      * Version of TensorFlow and CppFlow
20 |      * @return A string containing the version of TensorFow and CppFlow
21 |      */
22 |     std::string version();
23 | 
24 | }
25 | 
26 | /******************************
27 |  *   IMPLEMENTATION DETAILS   *
28 |  ******************************/
29 | 
30 | namespace cppflow {
31 |     inline std::string version() {
32 |         return "TensorFlow: " + std::string(TF_Version()) + " CppFlow: 2.0.0";
33 |     }
34 | }
35 | 
36 | #endif //EXAMPLE_CPPFLOW_H
37 | 


--------------------------------------------------------------------------------
/ext/CppFlow/datatype.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by serizba on 12/7/20.
  3 | //
  4 | 
  5 | #ifndef CPPFLOW2_DATATYPE_H
  6 | #define CPPFLOW2_DATATYPE_H
  7 | 
  8 | #include <type_traits>
  9 | #include <string>
 10 | #include <typeinfo>
 11 | #include <ostream>
 12 | #include <stdexcept>
 13 | 
 14 | namespace cppflow {
 15 | 
 16 |     using datatype = TF_DataType;
 17 | 
 18 |     /**
 19 |      * @return A string representing dt
 20 |      *
 21 |      */
 22 |     inline std::string to_string(datatype dt) {
 23 |         switch (dt) {
 24 |             case TF_FLOAT:
 25 |                 return "TF_FLOAT";
 26 |             case TF_DOUBLE:
 27 |                 return "TF_DOUBLE";
 28 |             case TF_INT32:
 29 |                 return "TF_INT32";
 30 |             case TF_UINT8:
 31 |                 return "TF_UINT8";
 32 |             case TF_INT16:
 33 |                 return "TF_INT16";
 34 |             case TF_INT8:
 35 |                 return "TF_INT8";
 36 |             case TF_STRING:
 37 |                 return "TF_STRING";
 38 |             case TF_COMPLEX64:
 39 |                 return "TF_COMPLEX64";
 40 |             case TF_INT64:
 41 |                 return "TF_INT64";
 42 |             case TF_BOOL:
 43 |                 return "TF_BOOL";
 44 |             case TF_QINT8:
 45 |                 return "TF_QINT8";
 46 |             case TF_QUINT8:
 47 |                 return "TF_QUINT8";
 48 |             case TF_QINT32:
 49 |                 return "TF_QINT32";
 50 |             case TF_BFLOAT16:
 51 |                 return "TF_BFLOAT16";
 52 |             case TF_QINT16:
 53 |                 return "TF_QINT16";
 54 |             case TF_QUINT16:
 55 |                 return "TF_QUINT16";
 56 |             case TF_UINT16:
 57 |                 return "TF_UINT16";
 58 |             case TF_COMPLEX128:
 59 |                 return "TF_COMPLEX128";
 60 |             case TF_HALF:
 61 |                 return "TF_HALF";
 62 |             case TF_RESOURCE:
 63 |                 return "TF_RESOURCE";
 64 |             case TF_VARIANT:
 65 |                 return "TF_VARIANT";
 66 |             case TF_UINT32:
 67 |                 return "TF_UINT32";
 68 |             case TF_UINT64:
 69 |                 return "TF_UINT64";
 70 |             default:
 71 |                 return "DATATYPE_NOT_KNOWN";
 72 |         }
 73 |     }
 74 | 
 75 |     /**
 76 |      *
 77 |      * @tparam T
 78 |      * @return The TensorFlow type of T
 79 |      */
 80 |     template<typename T>
 81 |     TF_DataType deduce_tf_type() {
 82 |         if (std::is_same<T, float>::value)
 83 |             return TF_FLOAT;
 84 |         if (std::is_same<T, double>::value)
 85 |             return TF_DOUBLE;
 86 |         if (std::is_same<T, int32_t >::value)
 87 |             return TF_INT32;
 88 |         if (std::is_same<T, uint8_t>::value)
 89 |             return TF_UINT8;
 90 |         if (std::is_same<T, int16_t>::value)
 91 |             return TF_INT16;
 92 |         if (std::is_same<T, int8_t>::value)
 93 |             return TF_INT8;
 94 |         if (std::is_same<T, int64_t>::value)
 95 |             return TF_INT64;
 96 |         if (std::is_same<T, unsigned char>::value)
 97 |             return TF_BOOL;
 98 |         if (std::is_same<T, uint16_t>::value)
 99 |             return TF_UINT16;
100 |         if (std::is_same<T, uint32_t>::value)
101 |             return TF_UINT32;
102 |         if (std::is_same<T, uint64_t>::value)
103 |             return TF_UINT64;
104 | 
105 |         // decode with `c++filt --type $output` for gcc
106 |         throw std::runtime_error{"Could not deduce type! type_name: " + std::string(typeid(T).name())};
107 |     }
108 | 
109 |     /**
110 |      * @return  The stream os after inserting the string representation of dt
111 |      *
112 |      */
113 |     inline std::ostream& operator<<(std::ostream& os, datatype dt) {
114 |         os << to_string(dt);
115 |         return os;
116 |     }
117 | 
118 | }
119 | #endif //CPPFLOW2_DATATYPE_H
120 | 


--------------------------------------------------------------------------------
/ext/CppFlow/defer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <functional>
 3 | 
 4 | namespace cppflow {
 5 | 
 6 | class defer {
 7 | public:
 8 |     typedef std::function<void ()> Func;
 9 | 
10 |     explicit defer(const Func& func) : _func(func) {}
11 |     ~defer() {
12 |         _func();
13 |     }
14 | 
15 |     defer(const defer&) = delete;
16 |     defer(defer&&) = delete;
17 |     defer& operator=(const defer&) = delete;
18 |     void* operator new (size_t) = delete;
19 |     void operator delete (void*) = delete;
20 | 
21 | private:
22 |     Func _func;
23 | };
24 | 
25 | } // namespace cppflow
26 | 


--------------------------------------------------------------------------------
/ext/CppFlow/model.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by serizba on 29/6/20.
  3 | //
  4 | 
  5 | #ifndef CPPFLOW2_MODEL_H
  6 | #define CPPFLOW2_MODEL_H
  7 | 
  8 | #include <tensorflow/c/c_api.h>
  9 | #include <string>
 10 | #include <fstream>
 11 | #include <iostream>
 12 | #include <vector>
 13 | 
 14 | #include "context.h"
 15 | #include "defer.h"
 16 | #include "tensor.h"
 17 | 
 18 | namespace cppflow {
 19 | 
 20 |     class model {
 21 |     public:
 22 |         explicit model(const std::string& filename);
 23 | 
 24 |         std::vector<std::string> get_operations() const;
 25 |         std::vector<int64_t> get_operation_shape(const std::string& operation) const;
 26 | 
 27 |         std::vector<tensor> operator()(std::vector<std::tuple<std::string, tensor>> inputs, std::vector<std::string> outputs);
 28 |         tensor operator()(const tensor& input);
 29 | 
 30 |         ~model() = default;
 31 |         model(const model &model) = default;
 32 |         model(model &&model) = default;
 33 |         model &operator=(const model &other) = default;
 34 |         model &operator=(model &&other) = default;
 35 | 
 36 |     private:
 37 | 
 38 |         std::shared_ptr<TF_Graph> graph;
 39 |         std::shared_ptr<TF_Session> session;
 40 |     };
 41 | }
 42 | 
 43 | 
 44 | namespace cppflow {
 45 | 
 46 |     inline model::model(const std::string &filename) {
 47 |         this->graph = {TF_NewGraph(), TF_DeleteGraph};
 48 | 
 49 |         // Create the session.
 50 |         std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)> session_options = {TF_NewSessionOptions(), TF_DeleteSessionOptions};
 51 |         std::unique_ptr<TF_Buffer, decltype(&TF_DeleteBuffer)> run_options = {TF_NewBufferFromString("", 0), TF_DeleteBuffer};
 52 |         std::unique_ptr<TF_Buffer, decltype(&TF_DeleteBuffer)> meta_graph = {TF_NewBuffer(), TF_DeleteBuffer};
 53 | 
 54 |         auto session_deleter = [](TF_Session* sess) {
 55 |             TF_DeleteSession(sess, context::get_status());
 56 |             status_check(context::get_status());
 57 |         };
 58 | 
 59 |         int tag_len = 1;
 60 |         const char* tag = "serve";
 61 |         this->session = {TF_LoadSessionFromSavedModel(session_options.get(), run_options.get(), filename.c_str(),
 62 |                                 &tag, tag_len, this->graph.get(), meta_graph.get(), context::get_status()),
 63 |                          session_deleter};
 64 | 
 65 |         status_check(context::get_status());
 66 |     }
 67 | 
 68 |     inline std::vector<std::string> model::get_operations() const {
 69 |         std::vector<std::string> result;
 70 |         size_t pos = 0;
 71 |         TF_Operation* oper;
 72 | 
 73 |         // Iterate through the operations of a graph
 74 |         while ((oper = TF_GraphNextOperation(this->graph.get(), &pos)) != nullptr) {
 75 |             result.emplace_back(TF_OperationName(oper));
 76 |         }
 77 |         return result;
 78 |     }
 79 | 
 80 |     inline std::vector<int64_t> model::get_operation_shape(const std::string& operation) const {
 81 |         // Get operation by the name
 82 |         TF_Output out_op;
 83 |         out_op.oper = TF_GraphOperationByName(this->graph.get(), operation.c_str());
 84 |         out_op.index = 0;
 85 | 
 86 |         std::vector<int64_t> shape;
 87 | 
 88 |         // Operation does not exist
 89 |         if (!out_op.oper)
 90 |             throw std::runtime_error("No operation named \"" + operation + "\" exists");
 91 | 
 92 |         // DIMENSIONS
 93 | 
 94 |         // Get number of dimensions
 95 |         int n_dims = TF_GraphGetTensorNumDims(this->graph.get(), out_op, context::get_status());
 96 | 
 97 |         // If is not a scalar
 98 |         if (n_dims > 0) {
 99 |             // Get dimensions
100 |             auto* dims = new int64_t[n_dims];
101 |             TF_GraphGetTensorShape(this->graph.get(), out_op, dims, n_dims, context::get_status());
102 | 
103 |             // Check error on Model Status
104 |             status_check(context::get_status());
105 | 
106 |             shape = std::vector<int64_t>(dims, dims + n_dims);
107 | 
108 |             delete[] dims;
109 |         }
110 | 
111 |         return shape;
112 |     }
113 | 
114 |     inline std::tuple<std::string, int> parse_name(const std::string& name) {
115 |         auto idx = name.find(':');
116 |         return (idx == -1 ? std::make_tuple(name, 0) : std::make_tuple(name.substr(0, idx), std::stoi(name.substr(idx + 1))));
117 |     }
118 | 
119 |     inline std::vector<tensor> model::operator()(std::vector<std::tuple<std::string, tensor>> inputs, std::vector<std::string> outputs) {
120 | 
121 |         std::vector<TF_Output> inp_ops(inputs.size());
122 |         std::vector<TF_Tensor*> inp_val(inputs.size(), nullptr);
123 | 
124 |         for (int i=0; i<inputs.size(); i++) {
125 | 
126 |             // Operations
127 |             const auto[op_name, op_idx] = parse_name(std::get<0>(inputs[i]));
128 |             inp_ops[i].oper = TF_GraphOperationByName(this->graph.get(), op_name.c_str());
129 |             inp_ops[i].index = op_idx;
130 | 
131 |             if (!inp_ops[i].oper)
132 |                 throw std::runtime_error("No operation named \"" + op_name + "\" exists");
133 | 
134 |             // Values
135 |             inp_val[i] = std::get<1>(inputs[i]).get_tensor().get();
136 |         }
137 | 
138 |         std::vector<TF_Output> out_ops(outputs.size());
139 |         auto out_val = std::make_unique<TF_Tensor*[]>(outputs.size());
140 |         for (int i=0; i<outputs.size(); i++) {
141 | 
142 |             const auto[op_name, op_idx] = parse_name(outputs[i]);
143 |             out_ops[i].oper = TF_GraphOperationByName(this->graph.get(), op_name.c_str());
144 |             out_ops[i].index = op_idx;
145 | 
146 |             if (!out_ops[i].oper)
147 |                 throw std::runtime_error("No operation named \"" + op_name + "\" exists");
148 | 
149 |         }
150 | 
151 |         TF_SessionRun(this->session.get(), NULL,
152 |                 inp_ops.data(), inp_val.data(), inputs.size(),
153 |                 out_ops.data(), out_val.get(), outputs.size(),
154 |                 NULL, 0,NULL , context::get_status());
155 |         status_check(context::get_status());
156 | 
157 |         std::vector<tensor> result;
158 |         result.reserve(outputs.size());
159 |         for (int i=0; i<outputs.size(); i++) {
160 |             result.emplace_back(tensor(out_val[i]));
161 |         }
162 | 
163 |         return result;
164 |     }
165 | 
166 |     inline tensor model::operator()(const tensor& input) {
167 |         return (*this)({{"serving_default_input_1", input}}, {"StatefulPartitionedCall"})[0];
168 |     }
169 | }
170 | 
171 | #endif //CPPFLOW2_MODEL_H
172 | 


--------------------------------------------------------------------------------
/ext/CppFlow/ops.h:
--------------------------------------------------------------------------------
  1 | //
  2 | // Created by serizba on 31/7/20.
  3 | //
  4 | 
  5 | #ifndef CPPFLOW2_OPS_H
  6 | #define CPPFLOW2_OPS_H
  7 | 
  8 | 
  9 | #include "tensor.h"
 10 | #include "raw_ops.h"
 11 | 
 12 | namespace cppflow {
 13 | 
 14 |     /**
 15 |      * @name Operators
 16 |      */
 17 |     //@{
 18 | 
 19 |     /**
 20 |      * @returns x + y elementwise
 21 |      */
 22 |     tensor operator+(const tensor& x, const tensor& y);
 23 | 
 24 |     /**
 25 |      * @returns x - y elementwise
 26 |      */
 27 |     tensor operator-(const tensor& x, const tensor& y);
 28 | 
 29 |     /**
 30 |      * @returns x * y elementwise
 31 |      */
 32 |     tensor operator*(const tensor& x, const tensor& y);
 33 | 
 34 |     /**
 35 |      * @return x / y elementwise
 36 |      */
 37 |     tensor operator/(const tensor& x, const tensor& y);
 38 | 
 39 |     std::ostream& operator<<(std::ostream& os, const cppflow::tensor& t);
 40 | 
 41 |     //@}
 42 | 
 43 |     /**
 44 |      * @return A string representing t in the form:
 45 |      * (tensor: shape=?, data=
 46 |      * ?)
 47 |      */
 48 |     std::string to_string(const tensor& t);
 49 | }
 50 | 
 51 | /******************************
 52 |  *   IMPLEMENTATION DETAILS   *
 53 |  ******************************/
 54 | 
 55 | namespace cppflow {
 56 | 
 57 |     // Operators
 58 | 
 59 |     inline tensor operator+(const tensor& x, const tensor& y) {
 60 |         return add(x, y);
 61 |     }
 62 | 
 63 |     inline tensor operator-(const tensor& x, const tensor& y) {
 64 |         return sub(x, y);
 65 |     }
 66 | 
 67 |     inline tensor operator*(const tensor& x, const tensor& y) {
 68 |         return mul(x, y);
 69 |     }
 70 | 
 71 |     inline tensor operator/(const tensor& x, const tensor& y) {
 72 |         return div(x, y);
 73 |     }
 74 | 
 75 |     inline std::ostream& operator<<(std::ostream& os, const cppflow::tensor& t) {
 76 |         std::string res =  to_string(t);
 77 |         return os << res;
 78 |     }
 79 | 
 80 | 
 81 |     inline std::string to_string(const tensor &t) {
 82 |         auto res_tensor = string_format({t.shape(), t}, "(tensor: shape=%s, data=\n%s)");
 83 |         auto res_tensor_h = res_tensor.get_tensor();
 84 | 
 85 | #ifdef TENSORFLOW_C_TF_TSTRING_H_
 86 |         // For future version TensorFlow 2.4
 87 |         //auto *t_str = reinterpret_cast<TF_TString *>(TF_TensorData(res_tensor_h.get()));
 88 |         auto *t_str = (TF_TString *)(TF_TensorData(res_tensor_h.get()));
 89 |         auto result = std::string(TF_TString_GetDataPointer(t_str), TF_TString_GetSize(t_str));
 90 | #else
 91 |         const char* dst[1] = {nullptr};
 92 |         size_t dst_len[1] = {3};
 93 |         TF_StringDecode(static_cast<char*>(TF_TensorData(res_tensor_h.get())) + 8, TF_TensorByteSize(res_tensor_h.get()), dst, dst_len, context::get_status());
 94 |         status_check(context::get_status());
 95 |         auto result = std::string(dst[0], *dst_len);
 96 | #endif // TENSORFLOW_C_TF_TSTRING_H_
 97 | 
 98 |         return result;
 99 |     }
100 | 
101 | }
102 | 
103 | #endif //CPPFLOW2_OPS_H
104 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/.gitignore:
--------------------------------------------------------------------------------
 1 | # C++ objects and libs
 2 | 
 3 | *.slo
 4 | *.lo
 5 | *.o
 6 | *.a
 7 | *.la
 8 | *.lai
 9 | *.so
10 | *.dll
11 | *.dylib
12 | 
13 | # Qt-es
14 | 
15 | /.qmake.cache
16 | /.qmake.stash
17 | *.pro.user
18 | *.pro.user.*
19 | *.qbs.user
20 | *.qbs.user.*
21 | *.moc
22 | moc_*.cpp
23 | moc_*.h
24 | qrc_*.cpp
25 | ui_*.h
26 | Makefile*
27 | *build-*
28 | 
29 | # QtCreator
30 | 
31 | *.autosave
32 | 
33 | # QtCtreator Qml
34 | *.qmlproject.user
35 | *.qmlproject.user.*
36 | 
37 | # QtCtreator CMake
38 | CMakeLists.txt.user*
39 | 
40 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | ###############################################################################
 3 | #                                                                             #
 4 | # The MIT License                                                             #
 5 | #                                                                             #
 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com)          #
 7 | #               >> https://github.com/Jorgen-VikingGod                        #
 8 | #                                                                             #
 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle  #
10 | #                                                                             #
11 | ###############################################################################
12 | */
13 | 
14 | #include "DarkStyle.h"
15 | 
16 | DarkStyle::DarkStyle():
17 |   DarkStyle(styleBase())
18 | { }
19 | 
20 | DarkStyle::DarkStyle(QStyle *style):
21 |   QProxyStyle(style)
22 | { }
23 | 
24 | QStyle *DarkStyle::styleBase(QStyle *style) const {
25 |   static QStyle *base = !style ? QStyleFactory::create(QStringLiteral("Fusion")) : style;
26 |   return base;
27 | }
28 | 
29 | QStyle *DarkStyle::baseStyle() const
30 | {
31 |   return styleBase();
32 | }
33 | 
34 | void DarkStyle::polish(QPalette &palette)
35 | {
36 |   // modify palette to dark
37 |   palette.setColor(QPalette::Window,QColor(53,53,53));
38 |   palette.setColor(QPalette::WindowText,Qt::white);
39 |   palette.setColor(QPalette::Disabled,QPalette::WindowText,QColor(127,127,127));
40 |   palette.setColor(QPalette::Base,QColor(42,42,42));
41 |   palette.setColor(QPalette::AlternateBase,QColor(66,66,66));
42 |   palette.setColor(QPalette::ToolTipBase,Qt::white);
43 |   palette.setColor(QPalette::ToolTipText,QColor(53,53,53));
44 |   palette.setColor(QPalette::Text,Qt::white);
45 |   palette.setColor(QPalette::Disabled,QPalette::Text,QColor(127,127,127));
46 |   palette.setColor(QPalette::Dark,QColor(35,35,35));
47 |   palette.setColor(QPalette::Shadow,QColor(20,20,20));
48 |   palette.setColor(QPalette::Button,QColor(53,53,53));
49 |   palette.setColor(QPalette::ButtonText,Qt::white);
50 |   palette.setColor(QPalette::Disabled,QPalette::ButtonText,QColor(127,127,127));
51 |   palette.setColor(QPalette::BrightText,Qt::red);
52 |   palette.setColor(QPalette::Link,QColor(42,130,218));
53 |   palette.setColor(QPalette::Highlight,QColor(42,130,218));
54 |   palette.setColor(QPalette::Disabled,QPalette::Highlight,QColor(80,80,80));
55 |   palette.setColor(QPalette::HighlightedText,Qt::white);
56 |   palette.setColor(QPalette::Disabled,QPalette::HighlightedText,QColor(127,127,127));
57 | }
58 | 
59 | void DarkStyle::polish(QApplication *app)
60 | {
61 |   if (!app) return;
62 | 
63 |   // increase font size for better reading,
64 |   // setPointSize was reduced from +2 because when applied this way in Qt5, the font is larger than intended for some reason
65 |   QFont defaultFont = QApplication::font();
66 |   defaultFont.setPointSize(defaultFont.pointSize()+1);
67 |   app->setFont(defaultFont);
68 | 
69 |   // loadstylesheet
70 |   QFile qfDarkstyle(QStringLiteral(":/darkstyle/darkstyle.qss"));
71 |   if (qfDarkstyle.open(QIODevice::ReadOnly | QIODevice::Text))
72 |   {
73 |     // set stylesheet
74 |     QString qsStylesheet = QString::fromLatin1(qfDarkstyle.readAll());
75 |     app->setStyleSheet(qsStylesheet);
76 |     qfDarkstyle.close();
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | ###############################################################################
 3 | #                                                                             #
 4 | # The MIT License                                                             #
 5 | #                                                                             #
 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com)          #
 7 | #               >> https://github.com/Jorgen-VikingGod                        #
 8 | #                                                                             #
 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle  #
10 | #                                                                             #
11 | ###############################################################################
12 | */
13 | 
14 | #ifndef _DarkStyle_HPP
15 | #define _DarkStyle_HPP
16 | 
17 | #include <QApplication>
18 | #include <QProxyStyle>
19 | #include <QStyleFactory>
20 | #include <QFont>
21 | #include <QFile>
22 | 
23 | class DarkStyle : public QProxyStyle
24 | {
25 |   Q_OBJECT
26 | 
27 | public:
28 |   DarkStyle();
29 |   explicit DarkStyle(QStyle *style);
30 | 
31 |   QStyle *baseStyle() const;
32 | 
33 |   void polish(QPalette &palette) override;
34 |   void polish(QApplication *app) override;
35 | 
36 | private:
37 |   QStyle *styleBase(QStyle *style = Q_NULLPTR) const;
38 | };
39 | 
40 | #endif  // _DarkStyle_HPP
41 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/README.md:
--------------------------------------------------------------------------------
  1 | # Qt Frameless Window with DarkStyle
  2 | simple MainWindow class implementation with frameless window and custom dark style. 
  3 | 
  4 | It adds also support for titlebar and buttons (minimize, maximize, close)
  5 | 
  6 | Look is based on the VS2013 application window (flat and frameless window)
  7 | 
  8 | <table>
  9 |   <tr><th colspan="2">Screenshots</th></tr>
 10 |   <tr><th>mac enabled</th><th>mac disabled</th></tr>
 11 |   <tr>
 12 |     <td><img src="https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle/blob/master/screenshot_mac_frameless_window_qt_dark_style_enabled.png" title="screenshot mac frameless window qt dark style enabled" /></td>
 13 |     <td><img src="https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle/blob/master/screenshot_mac_frameless_window_qt_dark_style_disabled.png" title="screenshot mac frameless window qt dark style disabled" /></td>
 14 |   </tr>
 15 | </table>
 16 | 
 17 | 
 18 | ## Qt and OS
 19 | * tested with Qt5.5.0, Qt5.9.0 and Qt5.10.0
 20 | * tested on Windows 7, Windows 10,MacOSX 10.12.5 and MacOS 10.13.2
 21 | 
 22 | ## PyQt5
 23 | Here is an [unofficial Python port](https://github.com/gmarull/qtmodern) of my implementation.
 24 | 
 25 | ## How to use
 26 | * add additional include plath to **framelesswindow**
 27 | * add resources **framelesswindow.qrc** and **darkstyle.qrc**
 28 | * add ``#include "framelesswindow.h"`` into **main.cpp**, create window ``FramelessWindow framelessWindow;`` and assign your mainwindow object as content ``framelessWindow.setContent(mainWindow);`` and show it ``framelessWindow.show();``
 29 | * add ``#include "DarkStyle.h"`` into **main.cpp** and call ``a.setStyle(new DarkStyle);``
 30 | 
 31 | 
 32 | ```qt
 33 | #include <QApplication>
 34 | #include "DarkStyle.h"
 35 | #include "framelesswindow.h"
 36 | #include "mainwindow.h"
 37 | 
 38 | int main(int argc, char *argv[])
 39 | {
 40 |   QApplication a(argc, argv);
 41 | 
 42 |   // style our application with custom dark style
 43 |   a.setStyle(new DarkStyle);
 44 | 
 45 |   // create frameless window (and set windowState or title)
 46 |   FramelessWindow framelessWindow;
 47 |   //framelessWindow.setWindowState(Qt::WindowMaximized);
 48 |   //framelessWindow.setWindowTitle("test title");
 49 |   //framelessWindow.setWindowIcon(a.style()->standardIcon(QStyle::SP_DesktopIcon));
 50 |   
 51 |   // create our mainwindow instance
 52 |   MainWindow *mainWindow = new MainWindow;
 53 | 
 54 |   // add the mainwindow to our custom frameless window
 55 |   framelessWindow.setContent(mainWindow);
 56 |   framelessWindow.show();
 57 | 
 58 |   return a.exec();
 59 | }
 60 | ```
 61 | 
 62 | 
 63 | ## features
 64 | * frameless window
 65 | * custom dark style (based on **Fusion style** with dark palette and custom stylesheets)
 66 | * title bar
 67 | * buttons (minimize | restore | maximize | close)
 68 | * move window by drag the title bar
 69 | * dobule click title bar to toggle between window styte (maximize and normal)
 70 | * use of native events, like minimizing or system menu
 71 | 
 72 | 
 73 | ## todo
 74 | * [resize window on each corner [#1]](https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle/issues/1)
 75 | * [snap on screen edges [#3]](https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle/issues/3)
 76 | 
 77 | 
 78 | ## thanks
 79 | Many thanks goes to the [Qt Forum](https://forum.qt.io/topic/80654/how-to-create-vs2013-like-frameless-window-with-dark-style) and especially to [Chris Kawa](https://forum.qt.io/user/chris-kawa) for pointing me to some usual issues and hints of great must have features. 
 80 | 
 81 | 
 82 | ## Licence
 83 | > The MIT License
 84 | >
 85 | > Copyright (c) 2018, Juergen Skrotzky (https://github.com/Jorgen-VikingGod, JorgenVikingGod@gmail.com)
 86 | >
 87 | > Permission is hereby granted, free of charge, to any person obtaining a copy
 88 | > of this software and associated documentation files (the "Software"), to deal
 89 | > in the Software without restriction, including without limitation the rights
 90 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 91 | > copies of the Software, and to permit persons to whom the Software is
 92 | > furnished to do so, subject to the following conditions:
 93 | >
 94 | > The above copyright notice and this permission notice shall be included in
 95 | > all copies or substantial portions of the Software.
 96 | >
 97 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 98 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 99 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
100 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
101 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
102 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
103 | > THE SOFTWARE.
104 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle.qrc:
--------------------------------------------------------------------------------
 1 | <RCC>
 2 |     <qresource prefix="/">
 3 |         <file>darkstyle/darkstyle.qss</file>
 4 |         <file>darkstyle/icon_close.png</file>
 5 |         <file>darkstyle/icon_restore.png</file>
 6 |         <file>darkstyle/icon_undock.png</file>
 7 |         <file>darkstyle/icon_branch_closed.png</file>
 8 |         <file>darkstyle/icon_branch_end.png</file>
 9 |         <file>darkstyle/icon_branch_more.png</file>
10 |         <file>darkstyle/icon_branch_open.png</file>
11 |         <file>darkstyle/icon_vline.png</file>
12 |         <file>darkstyle/icon_checkbox_checked.png</file>
13 |         <file>darkstyle/icon_checkbox_indeterminate.png</file>
14 |         <file>darkstyle/icon_checkbox_unchecked.png</file>
15 |         <file>darkstyle/icon_checkbox_checked_pressed.png</file>
16 |         <file>darkstyle/icon_checkbox_indeterminate_pressed.png</file>
17 |         <file>darkstyle/icon_checkbox_unchecked_pressed.png</file>
18 |         <file>darkstyle/icon_checkbox_checked_disabled.png</file>
19 |         <file>darkstyle/icon_checkbox_indeterminate_disabled.png</file>
20 |         <file>darkstyle/icon_checkbox_unchecked_disabled.png</file>
21 |         <file>darkstyle/icon_radiobutton_checked.png</file>
22 |         <file>darkstyle/icon_radiobutton_unchecked.png</file>
23 |         <file>darkstyle/icon_radiobutton_checked_pressed.png</file>
24 |         <file>darkstyle/icon_radiobutton_unchecked_pressed.png</file>
25 |         <file>darkstyle/icon_radiobutton_checked_disabled.png</file>
26 |         <file>darkstyle/icon_radiobutton_unchecked_disabled.png</file>
27 |         <file>darkstyle/icon_tbclose.png</file>
28 |         <file>darkstyle/icon_tbclose_hover.png</file>
29 |         <file>darkstyle/icon_sepvline.png</file>
30 |     </qresource>
31 | </RCC>
32 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_closed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_closed.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_end.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_more.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_more.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_open.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_branch_open.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_disabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_disabled.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_checked_pressed.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_disabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_disabled.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_indeterminate_pressed.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_disabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_disabled.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_checkbox_unchecked_pressed.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_close.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_disabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_disabled.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_checked_pressed.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_disabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_disabled.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_radiobutton_unchecked_pressed.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_restore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_restore.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_sepvline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_sepvline.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose_hover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_tbclose_hover.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_undock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_undock.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_vline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/darkstyle/icon_vline.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/frameless_window_dark.pro:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | #                                                                             #
 3 | # The MIT License                                                             #
 4 | #                                                                             #
 5 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com)          #
 6 | #               >> https://github.com/Jorgen-VikingGod                        #
 7 | #                                                                             #
 8 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle  #
 9 | #                                                                             #
10 | ###############################################################################
11 | 
12 | QT       += core gui
13 | 
14 | greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
15 | 
16 | INCLUDEPATH +="framelesswindow"
17 | 
18 | TARGET      =  QtFramelessWindowDarkStyle
19 | TEMPLATE    =  app
20 | 
21 | SOURCES     += main.cpp\
22 |                mainwindow.cpp \
23 |                framelesswindow/framelesswindow.cpp \
24 |                framelesswindow/windowdragger.cpp \
25 |                DarkStyle.cpp
26 | 
27 | 
28 | HEADERS     += mainwindow.h \
29 |                framelesswindow/framelesswindow.h \
30 |                framelesswindow/windowdragger.h \
31 |                DarkStyle.h
32 | 
33 | 
34 | FORMS       += mainwindow.ui \
35 |                framelesswindow/framelesswindow.ui
36 | 
37 | RESOURCES   += darkstyle.qrc \
38 |                framelesswindow.qrc
39 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow.qrc:
--------------------------------------------------------------------------------
1 | <RCC>
2 |     <qresource prefix="/">
3 |         <file>images/icon_window_minimize.png</file>
4 |         <file>images/icon_window_restore.png</file>
5 |         <file>images/icon_window_maximize.png</file>
6 |         <file>images/icon_window_close.png</file>
7 |     </qresource>
8 | </RCC>
9 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/framelesswindow.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | ###############################################################################
 3 | #                                                                             #
 4 | # The MIT License                                                             #
 5 | #                                                                             #
 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com)          #
 7 | #               >> https://github.com/Jorgen-VikingGod                        #
 8 | #                                                                             #
 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle  #
10 | #                                                                             #
11 | ###############################################################################
12 | */
13 | 
14 | #ifndef FRAMELESSWINDOW_H
15 | #define FRAMELESSWINDOW_H
16 | 
17 | #include <QEvent>
18 | #include <QtWidgets>
19 | 
20 | namespace Ui {
21 |   class FramelessWindow;
22 | }
23 | 
24 | class MouseButtonSignaler: public QObject
25 | {
26 |   Q_OBJECT
27 | 
28 | public:
29 |   MouseButtonSignaler(QObject * parent = 0) : QObject(parent) {}
30 |   void installOn(QWidget * widget) { widget->installEventFilter(this); }
31 | 
32 | protected:
33 |   virtual bool eventFilter(QObject * obj, QEvent * ev) Q_DECL_OVERRIDE {
34 |     if ((   ev->type() == QEvent::MouseButtonPress
35 |          || ev->type() == QEvent::MouseButtonRelease
36 |          || ev->type() == QEvent::MouseButtonDblClick)
37 |         && obj->isWidgetType()) {
38 |       emit mouseButtonEvent(static_cast<QWidget*>(obj),
39 |                             static_cast<QMouseEvent*>(ev));
40 |     }
41 |     return false;
42 |   }
43 | signals:
44 |   void mouseButtonEvent(QWidget *, QMouseEvent *);
45 | };
46 | 
47 | class FramelessWindow: public QWidget
48 | {
49 |   Q_OBJECT
50 | 
51 | public:
52 |   explicit FramelessWindow(QWidget *parent = 0);
53 |   void setContent(QWidget *w);
54 | 
55 |   // Set a content dialog which if the close button is done, it sends a cancel signal.
56 |   void ContentDlg(QDialog* indlg);
57 |   void SetTitleBarBtns(bool Maximize,bool Minimize,bool Close);
58 | private:
59 |   void styleWindow(bool bActive, bool bNoState);
60 | 
61 |   bool ContDlg;
62 |   QDialog* dlgCont;
63 | 
64 | signals:
65 |   void windowIconLeftClicked();
66 |   void windowIconRightClicked();
67 |   void windowIconDblClick();
68 | 
69 | public slots:
70 |   void setWindowTitle(const QString &text);
71 |   void setWindowIcon(const QIcon &ico);
72 | 
73 | private slots:
74 |   void on_applicationStateChanged(Qt::ApplicationState state);
75 |   void on_minimizeButton_clicked();
76 |   void on_restoreButton_clicked();
77 |   void on_maximizeButton_clicked();
78 |   void on_closeButton_clicked();
79 |   void on_windowTitlebar_doubleClicked();
80 | 
81 | protected:
82 |   virtual void changeEvent(QEvent *event);
83 | 
84 | private:
85 |   Ui::FramelessWindow *ui;
86 | 
87 | protected:
88 |   QHBoxLayout contentLayout;
89 | };
90 | 
91 | #endif // FRAMELESSWINDOW_H
92 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | ###############################################################################
 3 | #                                                                             #
 4 | # The MIT License                                                             #
 5 | #                                                                             #
 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com)          #
 7 | #               >> https://github.com/Jorgen-VikingGod                        #
 8 | #                                                                             #
 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle  #
10 | #                                                                             #
11 | ###############################################################################
12 | */
13 | 
14 | #include <QStyleOption>
15 | #include <QPainter>
16 | #include "windowdragger.h"
17 | 
18 | WindowDragger::WindowDragger(QWidget *parent): QWidget(parent)
19 | {
20 |   mousePressed = false;
21 | }
22 | 
23 | void WindowDragger::mousePressEvent(QMouseEvent *event)
24 | {
25 |   mousePressed = true;
26 |   mousePos = event->globalPos();
27 | 
28 |   QWidget *parent = parentWidget();
29 |   if (parent)
30 |     parent = parent->parentWidget();
31 | 
32 |   if (parent)
33 |     wndPos = parent->pos();
34 | }
35 | 
36 | void WindowDragger::mouseMoveEvent(QMouseEvent *event)
37 | {
38 |   QWidget *parent = parentWidget();
39 |   if (parent)
40 |     parent = parent->parentWidget();
41 | 
42 |   if (parent && mousePressed)
43 |     parent->move(wndPos + (event->globalPos() - mousePos));
44 | }
45 | 
46 | void WindowDragger::mouseReleaseEvent(QMouseEvent *event)
47 | {
48 |   Q_UNUSED(event);
49 |   mousePressed = false;
50 | }
51 | 
52 | void WindowDragger::paintEvent(QPaintEvent *event)
53 | {
54 |   Q_UNUSED(event);
55 |   QStyleOption styleOption;
56 |   styleOption.init(this);
57 |   QPainter painter(this);
58 |   style()->drawPrimitive(QStyle::PE_Widget, &styleOption, &painter, this);
59 | }
60 | 
61 | void WindowDragger::mouseDoubleClickEvent(QMouseEvent *event)
62 | {
63 |   Q_UNUSED(event);
64 |   emit doubleClicked();
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/framelesswindow/windowdragger.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | ###############################################################################
 3 | #                                                                             #
 4 | # The MIT License                                                             #
 5 | #                                                                             #
 6 | # Copyright (C) 2017 by Juergen Skrotzky (JorgenVikingGod@gmail.com)          #
 7 | #               >> https://github.com/Jorgen-VikingGod                        #
 8 | #                                                                             #
 9 | # Sources: https://github.com/Jorgen-VikingGod/Qt-Frameless-Window-DarkStyle  #
10 | #                                                                             #
11 | ###############################################################################
12 | */
13 | 
14 | #ifndef WINDOWDRAGGER_H
15 | #define WINDOWDRAGGER_H
16 | 
17 | #include <QWidget>
18 | #include <QMouseEvent>
19 | 
20 | class WindowDragger : public QWidget
21 | {
22 |   Q_OBJECT
23 | 
24 | public:
25 |   explicit WindowDragger(QWidget *parent = 0);
26 | 
27 | signals:
28 |   void doubleClicked();
29 | 
30 | protected:
31 |   void mousePressEvent(QMouseEvent *event);
32 |   void mouseMoveEvent(QMouseEvent *event);
33 |   void mouseReleaseEvent(QMouseEvent *event);
34 |   void mouseDoubleClickEvent(QMouseEvent *event);
35 |   void paintEvent(QPaintEvent *event);
36 | 
37 | protected:
38 |   bool   mousePressed;
39 |   QPoint mousePos;
40 |   QPoint wndPos;
41 | };
42 | 
43 | #endif // WINDOWDRAGGER_H
44 | 


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_close.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_maximize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_maximize.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_minimize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_minimize.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_restore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/images/icon_window_restore.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_disabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_disabled.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_enabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_mac_frameless_window_qt_dark_style_enabled.png


--------------------------------------------------------------------------------
/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_win7_frameless_window_qt_dark_style_enabled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/ext/Qt-Frameless-Window-DarkStyle-master/screenshot_win7_frameless_window_qt_dark_style_enabled.png


--------------------------------------------------------------------------------
/ext/ZCharScanner.cpp:
--------------------------------------------------------------------------------
  1 | #include "ZCharScanner.h"
  2 | using namespace std;
  3 | #include <stdexcept>
  4 | 
  5 | int ZStringDelimiter::key_search(const GString& s, const GString& key)
  6 | {
  7 | 	int count = 0;
  8 | 	size_t pos = 0;
  9 | 	while ((pos = s.find(key, pos)) != GString::npos) {
 10 | 		++count;
 11 | 		++pos;
 12 | 	}
 13 | 	return count;
 14 | }
 15 | void ZStringDelimiter::UpdateTokens()
 16 | {
 17 |     if (!m_vDelimiters.size() || m_sString == "")
 18 | 		return;
 19 | 
 20 | 	m_vTokens.clear();
 21 | 
 22 | 
 23 | 	vector<GString>::iterator dIt = m_vDelimiters.begin();
 24 | 	while (dIt != m_vDelimiters.end())
 25 | 	{
 26 | 		GString delimiter = *dIt;
 27 | 	
 28 | 
 29 | 		DelimStr(m_sString, delimiter, true);
 30 | 		
 31 | 	
 32 | 		++dIt;
 33 | 	}
 34 | 	
 35 | 	
 36 | 
 37 | }
 38 | 
 39 | 
 40 | void ZStringDelimiter::DelimStr(const GString & s, const GString & delimiter, const bool & removeEmptyEntries)
 41 | {
 42 | 	BarRange(0, s.length());
 43 | 	for (size_t start = 0, end; start < s.length(); start = end + delimiter.length())
 44 | 	{
 45 | 		size_t position = s.find(delimiter, start);
 46 | 		end = position != GString::npos ? position : s.length();
 47 | 
 48 | 		GString token = s.substr(start, end - start);
 49 | 		if (!removeEmptyEntries || !token.empty())
 50 | 		{
 51 | 			if (token != s)
 52 | 				m_vTokens.push_back(token);
 53 | 
 54 | 		}
 55 | 		Bar(position);
 56 | 	}
 57 | 
 58 | 	// dadwwdawdaawdwadwd
 59 | }
 60 | 
 61 | void ZStringDelimiter::BarRange(const int & min, const int & max)
 62 | {
 63 | #ifdef _AFX_ALL_WARNINGS
 64 | 	if (PgBar)
 65 | 		m_pBar->SetRange32(min, max);
 66 | 
 67 | 
 68 | #endif
 69 | }
 70 | 
 71 | void ZStringDelimiter::Bar(const int & pos)
 72 | {
 73 | #ifdef _AFX_ALL_WARNINGS
 74 | 	if (PgBar)
 75 | 		m_pBar->SetPos(pos);
 76 | 
 77 | 
 78 | #endif
 79 | }
 80 | 
 81 | ZStringDelimiter::ZStringDelimiter()
 82 | {
 83 |     m_sString = "";
 84 | 	tokenIndex = 0;
 85 | 	PgBar = false;
 86 | }
 87 | 
 88 | 
 89 | bool ZStringDelimiter::GetFirstToken(GString & in_out)
 90 | {
 91 | 	if (m_vTokens.size() >= 1) {
 92 | 		in_out = m_vTokens[0];
 93 | 		return true;
 94 | 	}
 95 | 	else {
 96 |         return false;
 97 | 	}
 98 | }
 99 | 
100 | bool ZStringDelimiter::GetNextToken(GString & in_sOut)
101 | {
102 | 	if (tokenIndex > m_vTokens.size() - 1)
103 | 		return false;
104 | 
105 | 	in_sOut = m_vTokens[tokenIndex];
106 | 	++tokenIndex;
107 | 
108 | 	return true;
109 | }
110 | 
111 | GString ZStringDelimiter::operator[](const size_t & in_index)
112 | {
113 | 	if (in_index > m_vTokens.size())
114 | 		throw std::out_of_range("ZStringDelimiter tried to access token higher than size");
115 | 
116 | 	return m_vTokens[in_index];
117 | 
118 | }
119 | GString ZStringDelimiter::Reassemble(const GString& delim, const int& nelem)
120 | {
121 |     GString Result = "";
122 | 	TokenIterator RasIt = m_vTokens.begin();
123 | 	int r = 0;
124 | 	if (nelem == -1) {
125 | 		while (RasIt != m_vTokens.end())
126 | 		{
127 | 
128 | 			if (r != 0)
129 | 				Result.append(delim);
130 | 
131 | 			Result.append(*RasIt);
132 | 
133 | 			++r;
134 | 
135 | 
136 | 			++RasIt;
137 | 		}
138 | 	}
139 | 	else {
140 | 		while (RasIt != m_vTokens.end() && r < nelem)
141 | 		{
142 | 		
143 | 			if (r != 0)
144 | 				Result.append(delim);
145 | 
146 | 			Result.append(*RasIt);
147 | 
148 | 			++r;
149 | 			++RasIt;
150 | 		}
151 | 	}
152 | 	
153 | 	return Result;
154 | 
155 | }
156 | 
157 | GString ZStringDelimiter::Reassemble(const GString & delim, const std::vector<GString>& Strs,int nelem)
158 | {
159 |     GString Result = "";
160 | 	TokenIterator RasIt = Strs.begin();
161 | 	int r = 0;
162 | 	if (nelem == -1) {
163 | 		while (RasIt != Strs.end())
164 | 		{
165 | 
166 | 			if (r != 0)
167 | 				Result.append(delim);
168 | 
169 | 			Result.append(*RasIt);
170 | 
171 | 			++r;
172 | 
173 | 
174 | 			++RasIt;
175 | 		}
176 | 	}
177 | 	else {
178 | 		while (RasIt != Strs.end() && r < nelem)
179 | 		{
180 | 
181 | 			if (r != 0)
182 | 				Result.append(delim);
183 | 
184 | 			Result.append(*RasIt);
185 | 
186 | 			++r;
187 | 			++RasIt;
188 | 		}
189 | 	}
190 | 
191 | 	return Result;
192 | }
193 | 
194 | void ZStringDelimiter::AddDelimiter(const GString & in_Delim)
195 | {
196 | 	m_vDelimiters.push_back(in_Delim);
197 | 	UpdateTokens();
198 | 
199 | }
200 | 
201 | ZStringDelimiter::~ZStringDelimiter()
202 | {
203 | }
204 | 


--------------------------------------------------------------------------------
/ext/ZCharScanner.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #define GBasicCharScanner ZStringDelimiter
 4 | 
 5 | #include <vector>
 6 | #include <string>
 7 | 
 8 | #define ZSDEL_USE_STD_STRING
 9 | #ifndef ZSDEL_USE_STD_STRING
10 | #include "golem_string.h"
11 | #else
12 | #define GString std::string
13 | #endif
14 | 
15 | typedef std::vector<GString>::const_iterator TokenIterator;
16 | 
17 | // ZStringDelimiter
18 | // ==============
19 | // Simple class to delimit and split strings.
20 | // You can use operator[] to access them
21 | // Or you can use the itBegin() and itEnd() to get some iterators
22 | // =================
23 | class ZStringDelimiter
24 | {
25 | private:
26 | 	int key_search(const GString & s, const GString & key);
27 | 	void UpdateTokens();
28 | 	std::vector<GString> m_vTokens;
29 | 	std::vector<GString> m_vDelimiters;
30 | 
31 | 	GString m_sString;
32 | 
33 | 	void DelimStr(const GString& s, const GString& delimiter, const bool& removeEmptyEntries = false);
34 | 	void BarRange(const int& min, const int& max);
35 | 	void Bar(const int& pos);
36 | 	size_t tokenIndex;
37 | public:
38 | 	ZStringDelimiter();
39 | 	bool PgBar;
40 | 
41 | #ifdef _AFX_ALL_WARNINGS
42 | 	CProgressCtrl* m_pBar;
43 | #endif
44 | 
45 | 	ZStringDelimiter(const GString& in_iStr) {
46 | 		m_sString = in_iStr;
47 | 		PgBar = false;
48 | 
49 | 	}
50 | 
51 | 	bool GetFirstToken(GString& in_out);
52 | 	bool GetNextToken(GString& in_sOut);
53 | 
54 | 	// std::String alts
55 | 
56 | 	size_t szTokens() { return m_vTokens.size(); }
57 | 	GString operator[](const size_t& in_index);
58 | 
59 | 	GString Reassemble(const GString & delim, const int & nelem = -1);
60 | 
61 | 	// Override to reassemble provided tokens.
62 | 	GString Reassemble(const GString & delim, const std::vector<GString>& Strs,int nelem = -1);
63 | 
64 | 	// Get a const reference to the tokens
65 | 	const std::vector<GString>& GetTokens() { return m_vTokens; }
66 | 
67 | 	TokenIterator itBegin() { return m_vTokens.begin(); }
68 | 	TokenIterator itEnd() { return m_vTokens.end(); }
69 | 
70 | 	void SetText(const GString& in_Txt) { 
71 | 		m_sString = in_Txt; 
72 | 		if (m_vDelimiters.size())
73 | 			UpdateTokens();
74 | 	}
75 | 	void AddDelimiter(const GString& in_Delim);
76 | 
77 | 	~ZStringDelimiter();
78 | };
79 | 
80 | 


--------------------------------------------------------------------------------
/ext/ZFile.cpp:
--------------------------------------------------------------------------------
  1 | #include "ZFile.h"
  2 | 
  3 | using namespace std;
  4 | int ZFile::EZFOpenModeToIos(const EZFOpenMode::Enum & input)
  5 | {
  6 | 	/*
  7 | 	hehe wall of ifs
  8 | 	yanderedev amirite???
  9 | 	*/
 10 | 	if (input == EZFOpenMode::BinaryRead)
 11 | 		return ios::in | ios::binary;
 12 | 	else if (input == EZFOpenMode::BinaryWrite)
 13 | 		return ios::out | ios::binary;
 14 | 	else if (input == EZFOpenMode::TextRead)
 15 | 		return ios::in;
 16 | 	else if (input == EZFOpenMode::TextWrite)
 17 | 		return ios::out;
 18 | 
 19 | 	SysEndian = ZFUtil::GetSysEndianness();
 20 | 
 21 | 	return ios::in | ios::binary;
 22 | 
 23 | }
 24 | 
 25 | ZFile::ZFile(const std::string & coFName, const EZFOpenMode::Enum & coMode)
 26 | {
 27 | 	Open(coFName, coMode);
 28 | }
 29 | 
 30 | bool ZFile::Open(const std::string & in_sFileName, const EZFOpenMode::Enum & in_Mode)
 31 | {
 32 | 	OpenMode = in_Mode;
 33 | 
 34 |     Stream.open(in_sFileName,(ios_base::openmode)EZFOpenModeToIos(in_Mode));
 35 | 	return Stream.good();
 36 | 
 37 | }
 38 | 
 39 | 
 40 | 
 41 | void ZFile::Seek(const INT64 & in_Pos)
 42 | {
 43 | 	if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead)
 44 | 		Stream.seekg(in_Pos, ios::beg);
 45 | 	else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite)
 46 | 		Stream.seekp(in_Pos, ios::beg);
 47 | }
 48 | 
 49 | INT64 ZFile::GetPos()
 50 | {
 51 | 	if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead)
 52 | 		return Stream.tellg();
 53 | 	else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite)
 54 | 		return Stream.tellp();
 55 | 
 56 | 	// NO TYPE?????????????
 57 |     return -1;
 58 | }
 59 | 
 60 | void ZFile::SeekToEnd()
 61 | {
 62 |     if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead)
 63 |         Stream.seekg(0, Stream.end);
 64 |     else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite)
 65 |         Stream.seekp(0, Stream.end);
 66 | }
 67 | 
 68 | INT64 ZFile::GetFileLength()
 69 | {
 70 | 	std::streampos lpos = GetPos();
 71 | 
 72 | 	if (OpenMode == EZFOpenMode::BinaryRead || OpenMode == EZFOpenMode::TextRead)
 73 | 		Stream.seekg(0, Stream.end);
 74 | 	else if (OpenMode == EZFOpenMode::BinaryWrite || OpenMode == EZFOpenMode::TextWrite)
 75 | 		Stream.seekp(0, Stream.end);
 76 | 
 77 | 	const INT64 Len = GetPos();
 78 | 	Seek(lpos);
 79 | 
 80 | 	return Len;
 81 | 
 82 | }
 83 | 
 84 | void ZFile::Read(void * out, const INT64 & count)
 85 | {
 86 | 	Stream.read((BYTE*)out, count);
 87 | 
 88 | }
 89 | 
 90 | void ZFile::Write(void * in, const INT64 & incount)
 91 | {
 92 | 	Stream.write((BYTE*)in, incount);
 93 | 	
 94 | }
 95 | 
 96 | ByteArr ZFile::ReadEntireFile()
 97 | {
 98 | 
 99 | 	ByteArr ArrRet;
100 | 
101 | 	Stream.seekg(0, Stream.end);
102 | 	INT64 length = Stream.tellg();
103 | 	Stream.seekg(0, Stream.beg);
104 | 	ArrRet.CAlloc(length);
105 | 
106 | 	Stream.read(ArrRet.GetData(), length);
107 | 	
108 | 	return ArrRet;
109 | 
110 | }
111 | 
112 | void ZFile::WriteLine(const string &inLi)
113 | {
114 |     std::string Line = inLi + "\n";
115 | 
116 |     Write((void*)Line.data(),Line.size() * sizeof(char));
117 | 
118 | }
119 | 
120 | void ZFile::Write(const ByteArr & BrDat)
121 | {
122 | 	Stream.write(BrDat.CoData(), BrDat.Size());
123 | }
124 | 
125 | void ZFile::Close()
126 | {
127 | 	Stream.close();
128 | }
129 | 
130 | void ZFile::operator>>(ByteArr& BarDat) {
131 | 	size_t BaSz = 0;
132 | 	Read(BaSz);
133 | 	BarDat.CAlloc(BaSz);
134 | 	Stream.read(BarDat.GetData(), BaSz);
135 | 
136 | }
137 | 
138 | 
139 | ZFile::ZFile()
140 | {
141 | }
142 | 
143 | 
144 | ZFile::~ZFile()
145 | {
146 | }
147 | 


--------------------------------------------------------------------------------
/ext/ZFile.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 | ######################################
  5 | #
  6 | #
  7 |   ____________ _ _      
  8 |  |___  /  ____(_) |     
  9 |     / /| |__   _| | ___ 
 10 |    / / |  __| | | |/ _ \
 11 |   / /__| |    | | |  __/
 12 |  /_____|_|    |_|_|\___|
 13 |                         
 14 |                         
 15 | ########################################
 16 | # Description: Defines ZFile class, one meant for easy serialization and writing of binary types,
 17 | # including commonly used std containers without much problem
 18 | #
 19 | # Author: ZDisket
 20 | # Copyright (C) 2018 YOUR MOM GAY LOLOLOL
 21 | #######################################
 22 | */
 23 | 
 24 | #include <fstream>
 25 | #include <string>
 26 | #include <vector>
 27 | 
 28 | #include "ByteArr.h"
 29 | 
 30 | #define ZFILE_IOVR(cla,n) ZFile& operator<<(ZFile& right,const cla& n)
 31 | #define ZFILE_OOVR(cla,n) ZFile& operator>>(ZFile& right,cla& n)
 32 | // FStream that works with bytes
 33 | typedef std::basic_fstream<BYTE,std::char_traits<BYTE>> ufstream;
 34 | 
 35 | 
 36 | 
 37 | namespace EZFOpenMode {
 38 | 	enum Enum {
 39 | 		BinaryRead = 0,
 40 | 		TextRead,
 41 | 		BinaryWrite,
 42 | 		TextWrite
 43 | 	};
 44 | }
 45 | 
 46 | namespace EZFEndian {
 47 | 	enum Enum {
 48 | 		Big = 0,
 49 | 		Little
 50 | 	};
 51 | }
 52 | 
 53 | 
 54 | 
 55 | namespace ZFUtil {
 56 | 	inline EZFEndian::Enum GetSysEndianness()
 57 | 	{
 58 | 		const int value{ 0x01 };
 59 | 		const void * address = static_cast<const void *>(&value);
 60 | 		const unsigned char * least_significant_address = static_cast<const unsigned char *>(address);
 61 | 		return (*least_significant_address == 0x01) ? EZFEndian::Little : EZFEndian::Big;
 62 | 	}
 63 | 
 64 | 	template <typename T>
 65 | 	void SwapEndian(T& var)
 66 | 	{
 67 | 		static_assert(std::is_pod<T>::value, "Type must be POD type for safety");
 68 | 		std::array<char, sizeof(T)> varArray;
 69 | 		std::memcpy(varArray.data(), &var, sizeof(T));
 70 | 		for (int i = 0; i < static_cast<int>(sizeof(var) / 2); i++)
 71 | 			std::swap(varArray[sizeof(var) - 1 - i], varArray[i]);
 72 | 		std::memcpy(&var, varArray.data(), sizeof(T));
 73 | 	}
 74 | }
 75 | 
 76 | // ZFile: Class for (mostly binary) file handling.
 77 | // Cannot be copied
 78 | class ZFile
 79 | {
 80 | private:
 81 | 
 82 | 	ZFile(const ZFile&);
 83 | 
 84 | 	BYTE * m_pData;
 85 | 	bool FileOpened;
 86 | 	ufstream Stream;
 87 | 
 88 | 	EZFOpenMode::Enum OpenMode;
 89 | 	EZFEndian::Enum SysEndian;
 90 | 
 91 | 	int EZFOpenModeToIos(const EZFOpenMode::Enum& input);
 92 | 
 93 | public:
 94 | 	ZFile();
 95 | 
 96 | 	ZFile(const std::string& coFName, const EZFOpenMode::Enum& coMode);
 97 | 
 98 | 	bool Open(const std::string& in_sFileName,const EZFOpenMode::Enum& in_Mode);
 99 | 
100 | 	void Seek(const INT64& in_Pos);
101 | 	INT64 GetPos();
102 | 
103 |     void SeekToEnd();
104 | 	
105 | 	 INT64 GetFileLength();
106 | 	// Reads from the file
107 | 	// Please pass a pointer to this
108 | 	void Read(void* out, const INT64& count);
109 | 	// Writes to the file
110 | 	// Please pass a pointer
111 | 	void Write(void* in, const INT64& incount);
112 | 
113 | 	
114 | 	// Read the entire file into a byte array
115 | 	ByteArr ReadEntireFile();
116 | 	
117 | 	// Write with template argument to not pass size.
118 | 	// Only works with regular datatypes
119 |     template <typename Dat>
120 | 	void Write(const Dat& dta)
121 | 	{
122 | 		Stream.write((BYTE*)&dta, sizeof(dta));
123 | 		
124 | 	
125 | 	}
126 | 
127 |     void WriteLine(const std::string& inLi);
128 | 
129 | 	// Read with template argument to not pass size.
130 |    // Only works with regular datatypes
131 | 	template <typename Dat>
132 | 	void Read(Dat& dta)
133 | 	{
134 | 		Stream.read((BYTE*)&dta, sizeof(dta));
135 | 
136 | 
137 | 	}
138 | 
139 | 	
140 | 
141 | 	// Write a string
142 | 	template<typename chardat>
143 | 	void Write(const std::basic_string<chardat>& Str) {
144 | 		// Get total len in bytes.
145 | 		const size_t LenInBytes = Str.length() * sizeof(chardat);
146 | 
147 | 		// Write the string length (NOT in bytes)
148 | 		Write(Str.length());
149 | 		Stream.write((BYTE*)Str.data(),LenInBytes);
150 | 		
151 | 		
152 | 	
153 | 	}
154 | 
155 | 	// Read a string
156 | 	template<typename chardat>
157 | 	void Read(std::basic_string<chardat>& Str) {
158 | 
159 | 		size_t StrLen = 0;
160 | 		Read(StrLen);
161 | 		chardat* dpBuffer = new chardat[StrLen];
162 | 
163 | 
164 | 		Stream.read((BYTE*)dpBuffer, sizeof(chardat) * StrLen);
165 | 
166 | 		// For some reason (witchcraft?) our buffer has more chars in it than we actually allocated, which should be impossible.
167 | 		// Thankfully, std::string's assign function allows for cutting.
168 | 		Str.assign(dpBuffer,0,StrLen);
169 | 
170 | 
171 | 		delete[] dpBuffer;
172 | 
173 | 	}
174 | 
175 | 	// Write a vector
176 | 	template<typename vdat>
177 | 	void Write(const std::vector<vdat>& Vec) {
178 | 		// Write size in bytes then vector size.
179 | 		Write(Vec.size());
180 | 
181 | 		// Write vector size.
182 | 
183 | 		auto It = Vec.begin();
184 | 		
185 | 		while (It != Vec.end()) {
186 | 			(*this) << *It;
187 | 			++It;
188 | 		}
189 | 		
190 | 
191 | 	
192 | 	}
193 | 
194 | 	// Read a vector
195 | 	template<typename vdat>
196 | 	void Read(std::vector<vdat>& Vec) {
197 | 		size_t vSz = 0;
198 | 		Read(vSz);
199 | 		
200 | 		Vec.resize(vSz);
201 | 
202 | 		size_t i = 0;
203 | 
204 | 		while (i != vSz) {
205 | 			(*this) >> Vec[i];
206 | 		
207 | 			++i;
208 | 		}
209 | 
210 | 
211 | 	}
212 | 
213 | 	template <typename N>
214 | 	void Write(const N& Num, EZFEndian::Enum TargetEndian);
215 | 
216 | 	// Write some stuff
217 | 	template<typename Ty>
218 | 	void operator<<(const Ty& In) {
219 | 		Write(In);
220 | 	
221 | 	}
222 | 
223 | 	// Write a Byte Array RAW into the file, without the size. Useful for exporting
224 | 	void Write(const ByteArr& BrDat);
225 | 
226 | 	void operator<<(const ByteArr& BarDat) {
227 | 		if (BarDat.CoData() == NULL) {
228 | 			throw new std::invalid_argument("ZFile tried to write invalid byte array!!");
229 | 		}
230 | 
231 | 		Write(BarDat.Size());
232 | 		Stream.write(BarDat.CoData(), BarDat.Size());
233 | 	
234 | 	}
235 | 	// Read to a byte array. Note: DELETES AND REPLACES THE ALREADY EXISTING CONTENTS THERE!!
236 | 	void operator>>(ByteArr& BarDat);
237 | 
238 | 
239 | 	template<typename MTy>
240 | 	void operator>>(MTy& mIn) {
241 | 		Read(mIn);
242 | 	}
243 | 
244 | 	void Close();
245 | 
246 | 
247 | 	~ZFile();
248 | };
249 | 
250 | template<typename N>
251 | // Function to write a value with target endianness.
252 | inline void ZFile::Write(const N & Num, EZFEndian::Enum TargetEndian)
253 | {
254 | 	if (SysEndian == TargetEndian)
255 | 		Write(Num);
256 | 	else
257 | 		Write(ZFUtil::SwapEndian(Num));
258 | 
259 | 
260 | }
261 | 


--------------------------------------------------------------------------------
/g2p_train/README.md:
--------------------------------------------------------------------------------
 1 | ﻿#  G2P for TensorVox
 2 | TensorVox utilizes an RNN-based G2P model implemented in Tensorflow to convert text to phonemes before feeding the text2speech models.
 3 | 
 4 | ## Training
 5 | In order to train a model, you need to prepare two things:
 6 | 1. A dictionary in format `WORD \t PHONETIC SPELLING` as the dataset
 7 | 2. A config file (optional, there is already one in `config/default.yaml`)
 8 | 
 9 | Tensorflow 2.0 or greater, is of course, required.
10 | 
11 | Since the training is very quick on GPU (Tesla T4), it's just one script that does preprocessing, training, and exporting. If you don't have one, just use Google Colab.
12 | 
13 | You can download my English dictionary (converted to tab-based from the LibriSpeech lexicon) [here](https://drive.google.com/file/d/19cnHM3-Zsc7uRJ2scUPNMNoSlyXuaGNe/view?usp=sharing).
14 | Rename it from dict.d to dict.txt
15 | 
16 | The command to run it is as follows: 
17 | 
18 |     python3 train_and_export.py --dict-path dict.txt --config-path config/default.yaml --out-path English
19 | 
20 | Arguments should be self-explanatory. 
21 | ### Important note
22 | If your phoneme format does not separate phonemes by space (like IPA), pass `--char-tok-phn` as an argument, because the script assumes that all phoneme texts are like ARPA (example: G R IY1 N) and tokenizes separated by spaces. One sign that it may be doing this could be very slow training on a decent GPU.
23 | 
24 | ## Structure
25 | 
26 | Once finished, the script will output all files required to use the model to the folder determined by the `--out-path` argument  (will be created if it doesn't exist). 
27 | 
28 | No further action is necessary, just drag it so that all the files in the folder are in the (executable file path)/g2p/`language name` folder and it will be used by the program to do phoneme conversion for all models it loads in that language. Make sure language name folder is capitalized.
29 | 
30 | The output consists of three things:
31 | 
32 | 4. **char2id.txt, phn2id.txt**: Two text files in format `TOKEN \t ID` that indicate the IDs that first go into the model (char) and are returned (phn). Automatically generated by the script.
33 | 5. **dict.txt**: Dictionary in format `WORD \t PHONETIC-SPELLING` that is used to find phonetic spellings in. Automatically re-exported (words forced to lowercase) by the script.
34 | 6. **model**: The actual G2P model, saved in Tensorflow SavedModel format.
35 | 
36 | Due to the unreliability of the network, we only want to use it to guess novel words, so first it does a dictionary lookup (semi-optimized with bucketed string search) then if not found, uses the model.
37 | 
38 | An example English model is zipped in the `models/` directory.
39 | 


--------------------------------------------------------------------------------
/g2p_train/config/default.yaml:
--------------------------------------------------------------------------------
1 | # Config file for G2P-English model. This one does 15 epochs and trains very quickly on GPU.
2 | 
3 | gru_dims: 128 # Size of GRU (and embedding layer)
4 | batch_size: 4096 # Batch size
5 | val_per: 0.1 # % of dataset to be used for validation. Floating point, 1.0 = 100%; 0.5 = 50%, etc...
6 | epochs: 15 # Amount of epochs
7 | learning_rate: 0.006 # Learning rate
8 | 


--------------------------------------------------------------------------------
/g2p_train/config/longer.yaml:
--------------------------------------------------------------------------------
1 | # Config file for G2P model that trains for longer with softer learning, recommended
2 | 
3 | gru_dims: 128 # Size of GRU (and embedding layer)
4 | batch_size: 4096 # Batch size
5 | val_per: 0.02 # % of dataset to be used for validation. Floating point, 1.0 = 100%; 0.5 = 50%, etc...
6 | epochs: 35 # Amount of epochs
7 | learning_rate: 0.001 # Learning rate
8 | 


--------------------------------------------------------------------------------
/g2p_train/models/English.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/g2p_train/models/English.zip


--------------------------------------------------------------------------------
/g2p_train/train_and_export.py:
--------------------------------------------------------------------------------
  1 | from tqdm import tqdm
  2 | import os
  3 | import argparse
  4 | import tensorflow as tf
  5 | import yaml
  6 | import shutil
  7 | global_max = 0
  8 | cumodel = None
  9 | 
 10 | def safemkdir(dirn):
 11 |   if not os.path.isdir(dirn):
 12 |     os.mkdir(dirn)
 13 | 
 14 | 
 15 | def preprocess(in_fname,char_phn_tok):
 16 |   words = list()
 17 |   phn = list()
 18 |   print("Opening file...")
 19 |   with open(in_fname,"r",encoding="utf-8") as f:
 20 |     for li in tqdm(f.readlines()):
 21 |       spl = li.strip().split("\t")
 22 |       if len(spl) > 1:
 23 |         words.append(spl[0].lower()) #convert to lowercase for re-exporting later
 24 |         phn.append(spl[1])
 25 | 
 26 |   if char_phn_tok:
 27 |     print("Tokenizing phoneme strings in char level too")
 28 |     
 29 |   phntok = tf.keras.preprocessing.text.Tokenizer(lower=False,filters='"\t\n',char_level=char_phn_tok)
 30 |   txttok = tf.keras.preprocessing.text.Tokenizer(char_level=True)
 31 |   
 32 |   print("Fitting on texts...")
 33 |   phntok.fit_on_texts(phn)
 34 |   txttok.fit_on_texts(words)
 35 | 
 36 |   print("Converting to sequences")
 37 |   txtseqs = txttok.texts_to_sequences(words)
 38 |   phnseqs = phntok.texts_to_sequences(phn)
 39 | 
 40 |   txt_max = len(max(txtseqs, key=len))
 41 |   phn_max = len(max(phnseqs,key=len))
 42 | 
 43 |   global global_max
 44 |   global_max = max(txt_max,phn_max)
 45 |   print("Common padding index is " + str(global_max))
 46 | 
 47 |   txtpadded = tf.keras.preprocessing.sequence.pad_sequences(txtseqs,padding="post",maxlen=global_max)
 48 |   phnpadded =  tf.keras.preprocessing.sequence.pad_sequences(phnseqs,padding="post",maxlen=global_max)
 49 |   
 50 |   txtsize = len(txttok.word_index)
 51 |   phnsize = len(phntok.word_index)
 52 | 
 53 |   return txtpadded, phnpadded, txtsize, phnsize, phntok.word_index, txttok.word_index, words, phn
 54 | 
 55 | 
 56 | def getmodel(input_shape, in_vocab_size, out_vocab_size,gru_size,in_lr):
 57 |     model = tf.keras.models.Sequential([tf.keras.layers.Embedding(in_vocab_size, gru_size, input_length=input_shape[1], input_shape=input_shape[1:]),
 58 |                                         tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_size,input_shape=input_shape[1:],return_sequences=True)),
 59 |                                         tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1024,activation="relu")),
 60 |                                         tf.keras.layers.Dropout(0.5),
 61 |                                         tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(out_vocab_size,activation="softmax"))])
 62 |     
 63 |     model.compile(loss='sparse_categorical_crossentropy',
 64 |                   optimizer=tf.keras.optimizers.Adam(in_lr),
 65 |                   metrics=['accuracy'])
 66 |     return model
 67 | 
 68 | @tf.function(
 69 |     experimental_relax_shapes=True,
 70 |     input_signature=[
 71 |         tf.TensorSpec([None], dtype=tf.int32, name="input_ids"),
 72 |         tf.TensorSpec([1,], dtype=tf.int32, name="input_len"),
 73 |         tf.TensorSpec([1,], dtype=tf.float32, name="input_temperature"),
 74 |     ],
 75 | )
 76 | def callg2p(input_ids,input_len,input_temperature):
 77 |   #Generate padding
 78 |   pad = tf.zeros([global_max - input_len[0]],dtype=tf.int32)
 79 |   #Add padding to input_ids and reshape
 80 |   input_ids = tf.concat([input_ids,pad],0)
 81 |   input_ids = tf.reshape(input_ids,[-1,global_max])
 82 |   #Predict
 83 |   pred = cumodel(input_ids)
 84 |   #Apply temperature
 85 |   predx = tf.squeeze(pred, 0)
 86 |   predx /= input_temperature
 87 | 
 88 |   #Select IDs
 89 |   retids = tf.random.categorical(predx, 1)
 90 | 
 91 |   #Remove padding
 92 |   bool_mask = tf.not_equal(retids, 0)
 93 |   phn_ids = tf.boolean_mask(retids, bool_mask)
 94 | 
 95 |   return tf.cast(phn_ids,tf.int32)
 96 | 
 97 | def exportdict(indict,outf):
 98 |   f = open(outf,"w")
 99 |   for de in indict:
100 |     f.write(de + "\t" + str(indict[de]) + "\n")
101 |   
102 |   f.close()
103 | 
104 | 
105 | def export_model(folname,in_model,in_phnwi,in_charwi):
106 |   safemkdir(folname)
107 | 
108 | 
109 |   exportdict(in_phnwi,os.path.join(folname,"phn2id.txt"))
110 |   exportdict(in_charwi,os.path.join(folname,"char2id.txt"))
111 |   
112 |   print("Exporting model...")
113 |   in_model.save(os.path.join(folname,"model"),save_format="tf",signatures=callg2p)
114 | 
115 | 
116 | def main():
117 |     parser = argparse.ArgumentParser(description="Train and export a G2P model")
118 |     parser.add_argument(
119 |         "--config-path",
120 |         default="config/default.yaml",
121 |         type=str,
122 |         help="Path of config",
123 |     )    
124 |     parser.add_argument(
125 |         "--dict-path",
126 |         default="dict.txt",
127 |         type=str,
128 |         help="Path of dictionary",
129 |     )
130 |     parser.add_argument(
131 |         "--out-path",
132 |         default="model1",
133 |         type=str,
134 |         help="Output path of model",
135 |     )
136 |     parser.add_argument(
137 |       "--char-tok-phn",
138 |       action="store_true",
139 |       help="Whether to tokenize phoneme strings by char. Turn this on if using IPA or some other phoneme with no spaces inbetween",
140 |     )
141 | 
142 | 
143 |     args = parser.parse_args()
144 |     
145 |     txtpadded, phnpadded, txtsize, phnsize, phn_wi, txt_wi, words, phns = preprocess(args.dict_path,args.char_tok_phn)
146 |     
147 |     yf = open(args.config_path,"r")
148 |     config = yaml.load(yf,Loader=yaml.FullLoader)
149 |     yf.close()
150 | 
151 |     print("Finished preprocessing. Getting model")
152 |     global cumodel
153 |     cumodel = getmodel(txtpadded.shape,txtsize + 1,phnsize + 1,config["gru_dims"],config["learning_rate"])
154 | 
155 |     x_train = txtpadded
156 |     y_train = phnpadded
157 | 
158 |     print("Starting training...")
159 |     cumodel.fit(x_train, y_train, batch_size=config["batch_size"], epochs=config["epochs"],validation_split=config["val_per"])
160 |     
161 |     print("Starting export...")
162 |     export_model(args.out_path,cumodel,phn_wi,txt_wi)
163 | 
164 |     print("Re-exporting dict...")
165 |     outdict = open(os.path.join(args.out_path,"dict.txt"),"w",encoding="utf-8")
166 | 
167 |     for idx, w in enumerate(words):
168 |       outdict.write(w + "\t" + phns[idx] + "\n")
169 |     
170 |     outdict.close()
171 | 
172 | 
173 | 
174 | 
175 | 
176 |     
177 |     print("Done!")
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 |         
185 | 
186 | if __name__ == "__main__":
187 |     main()
188 | 
189 | 


--------------------------------------------------------------------------------
/istftnettorch.cpp:
--------------------------------------------------------------------------------
 1 | #include "istftnettorch.h"
 2 | #include <windows.h>
 3 | bool iSTFTNetTorch::Initialize(const std::string &VocoderPath)
 4 | {
 5 |     torch::Device device(torch::kCPU);
 6 | 
 7 |     try {
 8 |         // Deserialize the ScriptModule from a file using torch::jit::load().
 9 | 
10 |         std::string VCP = VocoderPath + ".pt";
11 | 
12 |         Model = torch::jit::load(VCP,device);
13 | 
14 |     }
15 |     catch (const c10::Error& e) {
16 |         QMessageBox::critical(nullptr,"r",e.what_without_backtrace());
17 |         return false;
18 | 
19 |     }
20 |     try{
21 |         std::string PostPath = VocoderPath + "-post.pt";
22 |         Post = torch::jit::load(PostPath,device);
23 |         PostLoaded = true;
24 |     }
25 |     catch (const c10::Error& e){
26 |         PostLoaded = false;
27 | 
28 |     }
29 | 
30 | 
31 | 
32 |     return true;
33 | 
34 | }
35 | 
36 | TFTensor<float> iSTFTNetTorch::DoInference(const TFTensor<float> &InMel)
37 | {
38 |     // without this memory consumption is 4x
39 |     torch::NoGradGuard no_grad;
40 |     torch::Device device(torch::kCPU);
41 |     auto TorchMel = torch::tensor(InMel.Data,device).reshape(InMel.Shape).transpose(1,2); // [1, frames, n_mels] -> [1, n_mels, frames]
42 | 
43 | 
44 | 
45 |     try{
46 |         at::Tensor Output = Model({TorchMel}).toTensor().squeeze(); // (audio frames)
47 |         if (PostLoaded)
48 |             Output = Post({Output.unsqueeze(0).toType(at::ScalarType::Float)}).toTensor();
49 | 
50 | 
51 |         TFTensor<float> Tens = VoxUtil::CopyTensor<float>(Output);
52 | 
53 | 
54 |         return Tens;
55 |     }
56 |     catch (const std::exception& e) {
57 |         int msgboxID = MessageBox(
58 |             NULL,
59 |             (LPCWSTR)QString::fromStdString(e.what()).toStdWString().c_str(),
60 |             (LPCWSTR)L"Account Details",
61 |             MB_ICONWARNING | MB_CANCELTRYCONTINUE | MB_DEFBUTTON2
62 |         );
63 | 
64 | 
65 |         return TFTensor<float>();
66 | 
67 |     }
68 | 
69 | 
70 | 
71 | 
72 | 
73 | }
74 | 
75 | iSTFTNetTorch::iSTFTNetTorch()
76 | {
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/istftnettorch.h:
--------------------------------------------------------------------------------
 1 | #ifndef ISTFTNETTORCH_H
 2 | #define ISTFTNETTORCH_H
 3 | #include "MultiBandMelGAN.h"
 4 | 
 5 | class iSTFTNetTorch : public MultiBandMelGAN
 6 | {
 7 | private:
 8 |    torch::jit::script::Module Model;
 9 |    torch::jit::script::Module Post;
10 | 
11 |    bool PostLoaded;
12 | 
13 | public:
14 |     bool Initialize(const std::string& VocoderPath);
15 | 
16 | 
17 | 
18 |     // Do MultiBand MelGAN inference including PQMF
19 |     // -> InMel:  Mel spectrogram (shape [1, xx, 80])
20 |     // <- Returns: Tensor data  [frames]
21 |     virtual TFTensor<float> DoInference(const TFTensor<float>& InMel);
22 |     iSTFTNetTorch();
23 | };
24 | 
25 | #endif // ISTFTNETTORCH_H
26 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "mainwindow.h"
 2 | 
 3 | #include "ext/Qt-Frameless-Window-DarkStyle-master/DarkStyle.h"
 4 | #include "framelesswindow.h"
 5 | 
 6 | #include <QApplication>
 7 | 
 8 | int main(int argc, char *argv[])
 9 | {
10 |     QCoreApplication::addLibraryPath("./");
11 |     QApplication a(argc, argv);
12 |     a.setStyle(new DarkStyle);
13 | 
14 |     FramelessWindow framelessWindow;
15 |     framelessWindow.setWindowTitle("TensorVox");
16 |     framelessWindow.setWindowIcon(QIcon("://res/stdico.png"));
17 | 
18 |     MainWindow *mainWindow = new MainWindow;
19 |     mainWindow->pDarkFw = &framelessWindow;
20 | 
21 |     framelessWindow.setContent(mainWindow);
22 |     framelessWindow.show();
23 | 
24 | 
25 |     return a.exec();
26 | }
27 | 


--------------------------------------------------------------------------------
/melgen.cpp:
--------------------------------------------------------------------------------
 1 | #include "melgen.h"
 2 | 
 3 | MelGen::MelGen()
 4 | {
 5 | 
 6 | }
 7 | 
 8 | MelGen::MelGen(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo)
 9 | {
10 |    Initialize(SavedModelFolder,InTTSRepo);
11 | 
12 | }
13 | 
14 | bool MelGen::Initialize(const std::string &SavedModelFolder,  ETTSRepo::Enum InTTSRepo)
15 | {
16 |     try {
17 |         CurrentMdl = std::make_unique<cppflow::model>(SavedModelFolder);
18 |     }
19 |     catch (...) {
20 |         return false;
21 | 
22 |     }
23 |     CurrentRepo = InTTSRepo;
24 |     return true;
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/melgen.h:
--------------------------------------------------------------------------------
 1 | #ifndef MELGEN_H
 2 | #define MELGEN_H
 3 | 
 4 | 
 5 | 
 6 | #include "ext/CppFlow/ops.h"
 7 | #include "ext/CppFlow/model.h"
 8 | #include "VoxCommon.hpp"
 9 | 
10 | #include <memory>
11 | 
12 | // MelGen: base virtual class for mel generators
13 | class MelGen
14 | {
15 | private:
16 | 
17 | public:
18 |     ETTSRepo::Enum CurrentRepo;
19 |     MelGen();
20 |     MelGen(const std::string& SavedModelFolder,ETTSRepo::Enum InTTSRepo);
21 | 
22 | 
23 |     // Generic inference function
24 |     // Utilize ArgsFloat and ArgsInt for additional arguments for certain models
25 |     virtual TFTensor<float> DoInference(const std::vector<int32_t>& InputIDs,const std::vector<float>& ArgsFloat,const std::vector<int32_t> ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1) = 0;
26 | 
27 |     /*
28 |     Initialize and load the model
29 | 
30 |     -> SavedModelFolder: Folder where the .pb, variables, and other characteristics of the exported SavedModel
31 |     <- Returns: (bool)Success
32 |     */
33 |     virtual bool Initialize(const std::string& SavedModelFolder, ETTSRepo::Enum InTTSRepo);
34 | 
35 | 
36 |     std::unique_ptr<cppflow::model> CurrentMdl;
37 | 
38 |     inline ETTSRepo::Enum GetCurrentRepo() {return CurrentRepo;}
39 | 
40 | };
41 | 
42 | #endif // MELGEN_H
43 | 


--------------------------------------------------------------------------------
/modelinfodlg.cpp:
--------------------------------------------------------------------------------
 1 | #include "modelinfodlg.h"
 2 | #include "ui_modelinfodlg.h"
 3 | #include <QFile>
 4 | 
 5 | ModelInfoDlg::ModelInfoDlg(QWidget *parent) :
 6 |     QDialog(parent),
 7 |     ui(new Ui::ModelInfoDlg)
 8 | {
 9 |     ui->setupUi(this);
10 | }
11 | 
12 | ModelInfoDlg::~ModelInfoDlg()
13 | {
14 |     delete ui;
15 | }
16 | 
17 | void ModelInfoDlg::SetInfo(const QString &ModelName, const QString &Info, int32_t InVersion, const QString &Author, const QString &Repo, const QString &MelGen, const QString &Vocoder, uint32_t SampleRate)
18 | {
19 |     ui->lblAuthor->setText("Author: " + Author);
20 |     ui->lblVersion->setText("Version: " + QString::number(InVersion) + "  ");
21 |     ui->redtModelInfo->setText(QString(Info).replace("(/NL)","\n"));
22 | 
23 | 
24 |     ui->lblModelTitle->setText(ModelName);
25 | 
26 |     QString ArchShow = "Architecture: " + Repo + " " + MelGen;
27 | 
28 |     if (Vocoder.size())
29 |         ArchShow += " & " + Vocoder;
30 | 
31 |     ui->lblModelArchitecture->setText(ArchShow);
32 |     ui->lblSampleRate->setText("Sampling rate: " + QString::number(SampleRate / 1000) + "KHz");
33 | 
34 |     QString ImgPath = QApplication::applicationDirPath() + "/models/" + ModelName + "/image.png";
35 |     if (QFile::exists(ImgPath))
36 |     {
37 |         ui->lblImg->setPixmap(QPixmap::fromImage(QImage(ImgPath)));
38 | 
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/modelinfodlg.h:
--------------------------------------------------------------------------------
 1 | #ifndef MODELINFODLG_H
 2 | #define MODELINFODLG_H
 3 | 
 4 | #include <QDialog>
 5 | 
 6 | namespace Ui {
 7 | class ModelInfoDlg;
 8 | }
 9 | 
10 | class ModelInfoDlg : public QDialog
11 | {
12 |     Q_OBJECT
13 | 
14 | public:
15 |     explicit ModelInfoDlg(QWidget *parent = nullptr);
16 |     ~ModelInfoDlg();
17 | 
18 |     void SetInfo(const QString& ModelName,const QString& Info,int32_t InVersion,const QString& Author,const QString& Repo,const QString& MelGen,const QString& Vocoder,uint32_t SampleRate);
19 | 
20 | private:
21 |     Ui::ModelInfoDlg *ui;
22 | };
23 | 
24 | #endif // MODELINFODLG_H
25 | 


--------------------------------------------------------------------------------
/modelinfodlg.ui:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <ui version="4.0">
  3 |  <class>ModelInfoDlg</class>
  4 |  <widget class="QDialog" name="ModelInfoDlg">
  5 |   <property name="geometry">
  6 |    <rect>
  7 |     <x>0</x>
  8 |     <y>0</y>
  9 |     <width>576</width>
 10 |     <height>531</height>
 11 |    </rect>
 12 |   </property>
 13 |   <property name="windowTitle">
 14 |    <string>Dialog</string>
 15 |   </property>
 16 |   <layout class="QVBoxLayout" name="verticalLayout">
 17 |    <item>
 18 |     <widget class="QLabel" name="lblModelTitle">
 19 |      <property name="font">
 20 |       <font>
 21 |        <family>Verdana</family>
 22 |        <pointsize>16</pointsize>
 23 |       </font>
 24 |      </property>
 25 |      <property name="text">
 26 |       <string>Model Name</string>
 27 |      </property>
 28 |      <property name="alignment">
 29 |       <set>Qt::AlignCenter</set>
 30 |      </property>
 31 |     </widget>
 32 |    </item>
 33 |    <item>
 34 |     <layout class="QHBoxLayout" name="horizontalLayout_3">
 35 |      <item>
 36 |       <widget class="QLabel" name="lblImg">
 37 |        <property name="maximumSize">
 38 |         <size>
 39 |          <width>600</width>
 40 |          <height>256</height>
 41 |         </size>
 42 |        </property>
 43 |        <property name="text">
 44 |         <string/>
 45 |        </property>
 46 |        <property name="pixmap">
 47 |         <pixmap resource="stdres.qrc">:/res/noim.png</pixmap>
 48 |        </property>
 49 |        <property name="scaledContents">
 50 |         <bool>true</bool>
 51 |        </property>
 52 |        <property name="alignment">
 53 |         <set>Qt::AlignCenter</set>
 54 |        </property>
 55 |       </widget>
 56 |      </item>
 57 |     </layout>
 58 |    </item>
 59 |    <item>
 60 |     <widget class="QTextEdit" name="redtModelInfo">
 61 |      <property name="font">
 62 |       <font>
 63 |        <family>Verdana</family>
 64 |        <pointsize>10</pointsize>
 65 |       </font>
 66 |      </property>
 67 |      <property name="readOnly">
 68 |       <bool>true</bool>
 69 |      </property>
 70 |     </widget>
 71 |    </item>
 72 |    <item>
 73 |     <layout class="QHBoxLayout" name="horizontalLayout">
 74 |      <item>
 75 |       <widget class="QLabel" name="lblAuthor">
 76 |        <property name="text">
 77 |         <string>Author: Anonymous</string>
 78 |        </property>
 79 |       </widget>
 80 |      </item>
 81 |      <item>
 82 |       <spacer name="horizontalSpacer">
 83 |        <property name="orientation">
 84 |         <enum>Qt::Horizontal</enum>
 85 |        </property>
 86 |        <property name="sizeHint" stdset="0">
 87 |         <size>
 88 |          <width>40</width>
 89 |          <height>20</height>
 90 |         </size>
 91 |        </property>
 92 |       </spacer>
 93 |      </item>
 94 |      <item>
 95 |       <widget class="QLabel" name="lblSampleRate">
 96 |        <property name="text">
 97 |         <string>Sampling rate: 22KHz</string>
 98 |        </property>
 99 |       </widget>
100 |      </item>
101 |      <item>
102 |       <spacer name="horizontalSpacer_3">
103 |        <property name="orientation">
104 |         <enum>Qt::Horizontal</enum>
105 |        </property>
106 |        <property name="sizeHint" stdset="0">
107 |         <size>
108 |          <width>40</width>
109 |          <height>20</height>
110 |         </size>
111 |        </property>
112 |       </spacer>
113 |      </item>
114 |      <item>
115 |       <widget class="QLabel" name="lblVersion">
116 |        <property name="text">
117 |         <string>Version: 1 </string>
118 |        </property>
119 |       </widget>
120 |      </item>
121 |     </layout>
122 |    </item>
123 |    <item>
124 |     <layout class="QHBoxLayout" name="horizontalLayout_2">
125 |      <item>
126 |       <widget class="QLabel" name="lblModelArchitecture">
127 |        <property name="text">
128 |         <string>Architecture: TensorflowTTS FastSpeech2 &amp; Multi-Band MelGAN</string>
129 |        </property>
130 |       </widget>
131 |      </item>
132 |      <item>
133 |       <spacer name="horizontalSpacer_2">
134 |        <property name="orientation">
135 |         <enum>Qt::Horizontal</enum>
136 |        </property>
137 |        <property name="sizeHint" stdset="0">
138 |         <size>
139 |          <width>40</width>
140 |          <height>20</height>
141 |         </size>
142 |        </property>
143 |       </spacer>
144 |      </item>
145 |     </layout>
146 |    </item>
147 |    <item>
148 |     <widget class="QDialogButtonBox" name="buttonBox">
149 |      <property name="orientation">
150 |       <enum>Qt::Horizontal</enum>
151 |      </property>
152 |      <property name="standardButtons">
153 |       <set>QDialogButtonBox::Ok</set>
154 |      </property>
155 |     </widget>
156 |    </item>
157 |   </layout>
158 |  </widget>
159 |  <resources>
160 |   <include location="stdres.qrc"/>
161 |  </resources>
162 |  <connections>
163 |   <connection>
164 |    <sender>buttonBox</sender>
165 |    <signal>accepted()</signal>
166 |    <receiver>ModelInfoDlg</receiver>
167 |    <slot>accept()</slot>
168 |    <hints>
169 |     <hint type="sourcelabel">
170 |      <x>248</x>
171 |      <y>254</y>
172 |     </hint>
173 |     <hint type="destinationlabel">
174 |      <x>157</x>
175 |      <y>274</y>
176 |     </hint>
177 |    </hints>
178 |   </connection>
179 |   <connection>
180 |    <sender>buttonBox</sender>
181 |    <signal>rejected()</signal>
182 |    <receiver>ModelInfoDlg</receiver>
183 |    <slot>reject()</slot>
184 |    <hints>
185 |     <hint type="sourcelabel">
186 |      <x>316</x>
187 |      <y>260</y>
188 |     </hint>
189 |     <hint type="destinationlabel">
190 |      <x>286</x>
191 |      <y>274</y>
192 |     </hint>
193 |    </hints>
194 |   </connection>
195 |  </connections>
196 | </ui>
197 | 


--------------------------------------------------------------------------------
/phddialog.cpp:
--------------------------------------------------------------------------------
  1 | #include "phddialog.h"
  2 | #include "ui_phddialog.h"
  3 | #include <QTableWidget>
  4 | #include <QTableWidgetItem>
  5 | #include <QFileDialog>
  6 | #include <QMessageBox>
  7 | PhdDialog::PhdDialog(QWidget *parent) :
  8 |     QDialog(parent),
  9 |     ui(new Ui::PhdDialog)
 10 | {
 11 |     ui->setupUi(this);
 12 |     ui->tbDict->horizontalHeader()->setStretchLastSection(true);
 13 | 
 14 | }
 15 | 
 16 | PhdDialog::~PhdDialog()
 17 | {
 18 |     delete ui;
 19 | }
 20 | 
 21 | int PhdDialog::exec()
 22 | {
 23 |     // Populate the list
 24 | 
 25 |     PopulateWithEntries();
 26 | 
 27 | 
 28 |   //  ui->tbDict->setColumnWidth(0,ui->tbDict->width() / 2);
 29 |    // ui->tbDict->setColumnWidth(1,ui->tbDict->width() / 2);
 30 |     return QDialog::exec();
 31 | }
 32 | 
 33 | void PhdDialog::accept()
 34 | {
 35 |     // Validate input
 36 |     for (int i = 0; i < ui->tbDict->rowCount();++i)
 37 |     {
 38 |         if (ui->tbDict->item(i,0)->text().isEmpty()
 39 |             || ui->tbDict->item(i,0)->text() == " " ||
 40 |             ui->tbDict->item(i,1)->text().isEmpty())
 41 |         {
 42 |             QMessageBox::critical(this,"Invalid input","None of the cells can be empty, and words cannot be spaces. Check your input and try again.");
 43 |             return;
 44 | 
 45 |         }
 46 | 
 47 | 
 48 |     }
 49 | 
 50 |     // Now clear and run second loop
 51 | 
 52 |     Entrs.clear();
 53 |     Entrs.reserve((size_t)ui->tbDict->rowCount());
 54 |     // Second loop
 55 |     for (int i = 0; i < ui->tbDict->rowCount();++i)
 56 |     {
 57 |         DictEntry de;
 58 |         de.Word = ui->tbDict->item(i,0)->text().toStdString();
 59 |         de.PhSpelling = ui->tbDict->item(i,1)->text().toStdString();
 60 |         de.Language = ui->tbDict->item(i,2)->text().toStdString();
 61 |         Entrs.push_back(de);
 62 | 
 63 | 
 64 | 
 65 |     }
 66 | 
 67 |     QDialog::accept();
 68 | }
 69 | 
 70 | void PhdDialog::on_btnAdd_clicked()
 71 | {
 72 |     ui->tbDict->insertRow(ui->tbDict->rowCount());
 73 |     ui->tbDict->scrollToItem(ui->tbDict->item(ui->tbDict->rowCount() - 1,0));
 74 | 
 75 |     QTableWidgetItem* LangItem = new QTableWidgetItem(QString::fromStdString(CurrentLang));
 76 |     LangItem->setFlags(LangItem->flags() ^ Qt::ItemIsEditable);
 77 | 
 78 |     ui->tbDict->setItem(ui->tbDict->rowCount() - 1,2,LangItem);
 79 | 
 80 | }
 81 | 
 82 | void PhdDialog::PopulateWithEntries()
 83 | {
 84 |     ui->tbDict->clearContents();
 85 |     ui->tbDict->setRowCount((int)Entrs.size());
 86 |     for (size_t i = 0;i < Entrs.size();++i)
 87 |     {
 88 |         ui->tbDict->setItem((int)i,0,new QTableWidgetItem(QString::fromStdString(Entrs[i].Word)));
 89 |         ui->tbDict->setItem((int)i,1,new QTableWidgetItem(QString::fromStdString(Entrs[i].PhSpelling)));
 90 | 
 91 |         QTableWidgetItem* LangItem = new QTableWidgetItem(QString::fromStdString(Entrs[i].Language));
 92 |         LangItem->setFlags(LangItem->flags() ^ Qt::ItemIsEditable);
 93 | 
 94 |         ui->tbDict->setItem((int)i,2,LangItem);
 95 | 
 96 | 
 97 |     }
 98 | 
 99 | }
100 | 
101 | void PhdDialog::on_btnRemove_clicked()
102 | {
103 |     QList<QTableWidgetItem*> seli = ui->tbDict->selectedItems();
104 |     QList<QTableWidgetItem*>::iterator It = seli.begin();
105 |     while (It != seli.end())
106 |     {
107 |         QTableWidgetItem* item = *It;
108 |         ui->tbDict->removeRow(item->row());
109 | 
110 |         ++It;
111 |     }
112 | }
113 | 
114 | void PhdDialog::on_btnImport_clicked()
115 | {
116 |     QString fnamei = QFileDialog::getOpenFileName(this, tr("Open dictionary to import"), QString(), tr("DeltaVox Phonetic Dictionary Files (*.phd)"));
117 | 
118 |     if (fnamei == "")
119 |         return;
120 | 
121 |     PhoneticDict Pd;
122 |     if (!Pd.Import(fnamei)){
123 |         QMessageBox::critical(this,"Error","Failed to import this file.");
124 |         return;
125 |     }
126 | 
127 |     Entrs.reserve(Entrs.size() + Pd.Entries.size());
128 |     for (DictEntry& De : Pd.Entries )
129 |     {
130 |         Entrs.push_back(De);
131 | 
132 | 
133 |     }
134 |     PopulateWithEntries();
135 | 
136 | 
137 | 
138 | }
139 | 
140 | void PhdDialog::on_tbDict_cellChanged(int row, int column)
141 | {
142 |     if (row != 0)
143 |         return;
144 | 
145 | }
146 | 


--------------------------------------------------------------------------------
/phddialog.h:
--------------------------------------------------------------------------------
 1 | #ifndef PHDDIALOG_H
 2 | #define PHDDIALOG_H
 3 | 
 4 | #include <QDialog>
 5 | #include "phoneticdict.h"
 6 | namespace Ui {
 7 | class PhdDialog;
 8 | }
 9 | 
10 | class PhdDialog : public QDialog
11 | {
12 |     Q_OBJECT
13 | 
14 | public:
15 |     explicit PhdDialog(QWidget *parent = nullptr);
16 |     ~PhdDialog();
17 | 
18 |     int exec() override;
19 |     void accept() override;
20 | 
21 |     std::vector<DictEntry> Entrs;
22 | 
23 | 
24 |     std::string CurrentLang;
25 | private slots:
26 |     void on_btnAdd_clicked();
27 | 
28 |     void on_btnRemove_clicked();
29 | 
30 |     void on_btnImport_clicked();
31 | 
32 |     void on_tbDict_cellChanged(int row, int column);
33 | 
34 | private:
35 |     void PopulateWithEntries();
36 |     Ui::PhdDialog *ui;
37 | };
38 | 
39 | #endif // PHDDIALOG_H
40 | 


--------------------------------------------------------------------------------
/phddialog.ui:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <ui version="4.0">
  3 |  <class>PhdDialog</class>
  4 |  <widget class="QDialog" name="PhdDialog">
  5 |   <property name="geometry">
  6 |    <rect>
  7 |     <x>0</x>
  8 |     <y>0</y>
  9 |     <width>640</width>
 10 |     <height>480</height>
 11 |    </rect>
 12 |   </property>
 13 |   <property name="font">
 14 |    <font>
 15 |     <family>Verdana</family>
 16 |     <pointsize>9</pointsize>
 17 |    </font>
 18 |   </property>
 19 |   <property name="windowTitle">
 20 |    <string>Phonetic Dictionary</string>
 21 |   </property>
 22 |   <layout class="QVBoxLayout" name="verticalLayout">
 23 |    <item>
 24 |     <layout class="QHBoxLayout" name="horizontalLayout">
 25 |      <item>
 26 |       <widget class="QTableWidget" name="tbDict">
 27 |        <property name="font">
 28 |         <font>
 29 |          <family>Verdana</family>
 30 |          <pointsize>8</pointsize>
 31 |         </font>
 32 |        </property>
 33 |        <column>
 34 |         <property name="text">
 35 |          <string>Word</string>
 36 |         </property>
 37 |        </column>
 38 |        <column>
 39 |         <property name="text">
 40 |          <string>Phonetic spelling</string>
 41 |         </property>
 42 |        </column>
 43 |        <column>
 44 |         <property name="text">
 45 |          <string>Language</string>
 46 |         </property>
 47 |        </column>
 48 |       </widget>
 49 |      </item>
 50 |      <item>
 51 |       <layout class="QVBoxLayout" name="verticalLayout_2">
 52 |        <item>
 53 |         <widget class="QPushButton" name="btnAdd">
 54 |          <property name="text">
 55 |           <string>Add</string>
 56 |          </property>
 57 |         </widget>
 58 |        </item>
 59 |        <item>
 60 |         <widget class="QPushButton" name="btnImport">
 61 |          <property name="text">
 62 |           <string>Import</string>
 63 |          </property>
 64 |         </widget>
 65 |        </item>
 66 |        <item>
 67 |         <widget class="QPushButton" name="btnRemove">
 68 |          <property name="text">
 69 |           <string>Remove</string>
 70 |          </property>
 71 |         </widget>
 72 |        </item>
 73 |       </layout>
 74 |      </item>
 75 |     </layout>
 76 |    </item>
 77 |    <item>
 78 |     <widget class="QDialogButtonBox" name="buttonBox">
 79 |      <property name="orientation">
 80 |       <enum>Qt::Horizontal</enum>
 81 |      </property>
 82 |      <property name="standardButtons">
 83 |       <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
 84 |      </property>
 85 |     </widget>
 86 |    </item>
 87 |   </layout>
 88 |  </widget>
 89 |  <resources/>
 90 |  <connections>
 91 |   <connection>
 92 |    <sender>buttonBox</sender>
 93 |    <signal>accepted()</signal>
 94 |    <receiver>PhdDialog</receiver>
 95 |    <slot>accept()</slot>
 96 |    <hints>
 97 |     <hint type="sourcelabel">
 98 |      <x>248</x>
 99 |      <y>254</y>
100 |     </hint>
101 |     <hint type="destinationlabel">
102 |      <x>157</x>
103 |      <y>274</y>
104 |     </hint>
105 |    </hints>
106 |   </connection>
107 |   <connection>
108 |    <sender>buttonBox</sender>
109 |    <signal>rejected()</signal>
110 |    <receiver>PhdDialog</receiver>
111 |    <slot>reject()</slot>
112 |    <hints>
113 |     <hint type="sourcelabel">
114 |      <x>316</x>
115 |      <y>260</y>
116 |     </hint>
117 |     <hint type="destinationlabel">
118 |      <x>286</x>
119 |      <y>274</y>
120 |     </hint>
121 |    </hints>
122 |   </connection>
123 |  </connections>
124 | </ui>
125 | 


--------------------------------------------------------------------------------
/phonemizer.cpp:
--------------------------------------------------------------------------------
  1 | #include "phonemizer.h"
  2 | #include <fstream>
  3 | #include "ext/ZCharScanner.h"
  4 | 
  5 | #include <QString>
  6 | int32_t GetID(const std::vector<IdStr>& In, const std::string &InStr)
  7 | {
  8 |     for (const IdStr& It : In)
  9 |         if (It.STR == InStr)
 10 |             return It.ID;
 11 | 
 12 |     return -1;
 13 | }
 14 | 
 15 | std::string GetSTR(const std::vector<IdStr>& In, int32_t InID)
 16 | {
 17 |     for (const IdStr& It : In)
 18 |         if (It.ID == InID)
 19 |             return It.STR;
 20 | 
 21 |     return "";
 22 | 
 23 | }
 24 | 
 25 | 
 26 | 
 27 | std::vector<IdStr> Phonemizer::GetDelimitedFile(const std::string &InFname)
 28 | {
 29 | 
 30 | 
 31 |     std::ifstream InFile (InFname);
 32 | 
 33 |     int32_t CuID;
 34 |     std::string Tok;
 35 |     std::vector<IdStr> RetVec;
 36 | 
 37 | 
 38 |     std::string Line;
 39 |     while (std::getline(InFile, Line)) {
 40 | 
 41 |         if (Line.find("\t") == std::string::npos)
 42 |             continue;
 43 | 
 44 | 
 45 |         ZStringDelimiter Deline(Line);
 46 |         Deline.AddDelimiter("\t");
 47 | 
 48 |         CuID = stoi(Deline[1]);
 49 |         Tok = Deline[0];
 50 | 
 51 | 
 52 |         RetVec.push_back(IdStr{CuID,Tok});
 53 | 
 54 |     }
 55 | 
 56 |     return RetVec;
 57 | 
 58 | 
 59 | }
 60 | 
 61 | void Phonemizer::LoadDictionary(const std::string &InDictFn)
 62 | {
 63 | 
 64 |     std::ifstream InFile (InDictFn);
 65 | 
 66 |     std::string Word;
 67 |     std::string Phn;
 68 | 
 69 | 
 70 |     if (MapDict.size())
 71 |         MapDict.clear();
 72 | 
 73 | 
 74 |     std::string Line;
 75 |     while (std::getline(InFile, Line)) {
 76 | 
 77 |         if (Line.find("\t") == std::string::npos)
 78 |             continue;
 79 | 
 80 | 
 81 |         ZStringDelimiter Deline(Line);
 82 |         Deline.AddDelimiter("\t");
 83 | 
 84 |         Word = Deline[0];
 85 |         Phn = Deline[1];
 86 | 
 87 |         MapDict.insert({Word,Phn});
 88 | 
 89 | 
 90 |     }
 91 | 
 92 | 
 93 | }
 94 | 
 95 | std::string Phonemizer::DictLookup(const std::string &InWord)
 96 | {
 97 |     auto It = MapDict.find(InWord);
 98 | 
 99 |     if (It == MapDict.end())
100 |         return "";
101 | 
102 |     return It->second;
103 | 
104 | 
105 | }
106 | // To remove from the string before dicting
107 | const std::u32string StripPonct = U",.;!?";
108 | 
109 | 
110 | std::string Phonemizer::CleanWord(const std::string &InW)
111 | {
112 |     // U32string = guaranteed 1 char = 1 value
113 |     std::u32string Word = VoxUtil::StrToU32(InW);
114 | 
115 | 
116 |     std::u32string RetWord;
117 |     RetWord.reserve(Word.size());
118 | 
119 |     for (auto Ch : Word){
120 |         if (StripPonct.find(Ch) == std::u32string::npos)
121 |             RetWord.push_back(Ch);
122 |     }
123 | 
124 |     return VoxUtil::U32ToStr(RetWord);
125 | }
126 | 
127 | 
128 | Phonemizer::Phonemizer()
129 | {
130 |     IsMinimal = true;
131 | 
132 | }
133 | 
134 | bool Phonemizer::Initialize(const std::string InPath, bool Minimal)
135 | {
136 |     IsMinimal = Minimal;
137 | 
138 | 
139 | 
140 |     // Load char indices
141 |     CharId = GetDelimitedFile(InPath + "/char2id.txt");
142 | 
143 |     // If we're doing minimal loading then stop here
144 |     if (IsMinimal)
145 |         return true;
146 | 
147 | 
148 |     PhnId = GetDelimitedFile(InPath + "/phn2id.txt");
149 | 
150 |     // Load model
151 |     G2pModel.Initialize(InPath + "/model");
152 | 
153 |     LoadDictionary(InPath + "/dict.txt");
154 | 
155 | 
156 | 
157 | 
158 |     IsMinimal = false;
159 |     return true;
160 | 
161 | 
162 | }
163 | 
164 | 
165 | 
166 | 
167 | std::string Phonemizer::ProcessWord(const std::string &InWord,float Temperature)
168 | {
169 |     if (IsMinimal)
170 |         return InWord;
171 | 
172 | 
173 |     // First we try dictionary lookup
174 |     // This is because the g2p model can be unreliable, we only want to use it for novel sentences
175 | 
176 |     std::string PhnDict = DictLookup(CleanWord(InWord));
177 |     if (!PhnDict.empty())
178 |         return PhnDict;
179 | 
180 |     std::vector<int32_t> InIndexes;
181 |     std::u32string IterStr = VoxUtil::StrToU32(InWord);
182 | 
183 |     InIndexes.reserve(IterStr.size());
184 | 
185 | 
186 |     // Turn word into indices
187 |     for (const char32_t ch : IterStr)
188 |     {
189 |         std::u32string Single(1,ch);
190 |         int32_t Idx = GetID(CharId,VoxUtil::U32ToStr(Single));
191 | 
192 |         if (Idx != -1)
193 |             InIndexes.push_back(Idx);
194 | 
195 | 
196 |     }
197 | 
198 |     TFTensor<int32_t> PhnPrediction = G2pModel.DoInference(InIndexes,Temperature);
199 | 
200 | 
201 |     std::string RetStr = "";
202 |     bool FirstIter = true;
203 | 
204 |     for (int32_t PhnIdx : PhnPrediction.Data)
205 |     {
206 |         std::string PhnTxt = GetSTR(PhnId,PhnIdx);
207 |         if (!PhnTxt.empty())
208 |         {
209 |             if (!FirstIter)
210 |                 RetStr.append(" ");
211 | 
212 |             RetStr.append(PhnTxt);
213 | 
214 |         }
215 | 
216 |         FirstIter = false;
217 |     }
218 | 
219 | 
220 | 
221 |     return  RetStr;
222 | 
223 | }
224 | 
225 | std::string Phonemizer::GetPhnLanguage() const
226 | {
227 |     return PhnLanguage;
228 | }
229 | 
230 | void Phonemizer::SetPhnLanguage(const std::string &value)
231 | {
232 | 
233 |     PhnLanguage = value;
234 | }
235 | 
236 | std::string Phonemizer::GetGraphemeChars()
237 | {
238 | 
239 |     std::string RetAllowed = "";
240 |     for (const IdStr& Idx : CharId)
241 |         RetAllowed.append(Idx.STR);
242 | 
243 |     return RetAllowed;
244 | 
245 | }
246 | 
247 | Phonemizer::~Phonemizer()
248 | {
249 | 
250 | }
251 | 
252 | 
253 | 
254 | 
255 | bool operator<(const StrStr &right, const StrStr &left)
256 | {
257 |   return right.Word.length() < left.Word.length();
258 | }
259 | 


--------------------------------------------------------------------------------
/phonemizer.h:
--------------------------------------------------------------------------------
 1 | #ifndef PHONEMIZER_H
 2 | #define PHONEMIZER_H
 3 | #include "tfg2p.h"
 4 | #include <tuple>
 5 | #include <set>
 6 | #include <algorithm>
 7 | 
 8 | struct IdStr{
 9 |     int32_t ID;
10 |     std::string STR;
11 | };
12 | 
13 | 
14 | struct StrStr{
15 |     std::string Word;
16 |     std::string Phn;
17 | };
18 | 
19 | // Length, start index in vec
20 | typedef std::pair<size_t,size_t> VBucket;
21 | 
22 | class Phonemizer
23 | {
24 | private:
25 |     TFG2P G2pModel;
26 | 
27 |     std::vector<IdStr> CharId;
28 |     std::vector<IdStr> PhnId;
29 | 
30 |     std::unordered_map<std::string,std::string> MapDict;
31 | 
32 | 
33 |     std::string NumTxtLang;
34 | 
35 |     bool IsMinimal;
36 | 
37 | 
38 | 
39 | 
40 |     std::vector<IdStr> GetDelimitedFile(const std::string& InFname);
41 | 
42 |     void LoadDictionary(const std::string& InDictFn);
43 | 
44 |     std::string DictLookup(const std::string& InWord);
45 | 
46 |     std::string CleanWord(const std::string& InW);
47 | 
48 | 
49 | 
50 |     std::string PhnLanguage;
51 | public:
52 |     std::string PhnLangID;
53 | public:
54 |     Phonemizer();
55 |     /*
56 |      * Initialize a phonemizer
57 |      * Expects: (if Minimal == false)
58 |      * - Two files consisting in TOKEN \t ID:
59 |      * -- char2id.txt: Translation from input character to ID the model can accept
60 |      * -- phn2id.txt: Translation from output ID from the model to phoneme
61 |      * - A model/ folder where a G2P-Tensorflow model was saved as SavedModel
62 |      * - dict.txt: Phonetic dictionary. First it searches the word there and if it can't be found then it uses the model.
63 |      *
64 |      *
65 |      * If Minimal == true, it only requires the .sor and char2id (for determining allowed graphemes only,
66 |      * the IDs can be arbitrary in this case)
67 |      * A Minimal phonemizer only serves to hold values useful to the processor and tokenizer, for char-based models.
68 | 
69 |     */
70 |     bool Initialize(const std::string InPath, bool Minimal);
71 | 
72 | 
73 |     std::string ProcessWord(const std::string& InWord, float Temperature = 0.1f);
74 |     std::string GetPhnLanguage() const;
75 |     void SetPhnLanguage(const std::string &value);
76 | 
77 |     std::string GetGraphemeChars();
78 | 
79 |     ~Phonemizer();
80 | 
81 |     inline const std::string& GetNumTxtLang() {return NumTxtLang;}
82 | };
83 | 
84 | 
85 | bool operator<(const StrStr& right,const StrStr& left);
86 | #endif // PHONEMIZER_H
87 | 


--------------------------------------------------------------------------------
/phoneticdict.cpp:
--------------------------------------------------------------------------------
 1 | #include "phoneticdict.h"
 2 | #include "ext/ZFile.h"
 3 | #include <map>
 4 | 
 5 | const std::map<std::string,std::string> LegToV1{
 6 |   {"English","English-ARPA"},
 7 |   {"Spanish","Spanish-GlobalPhone"}
 8 | };
 9 | 
10 | void AutoConvertToV1(std::string& LangStr){
11 |    auto It = LegToV1.find(LangStr);
12 |    if (It != LegToV1.end())
13 |        LangStr = It->second;
14 | 
15 | }
16 | 
17 | ZFILE_IOVR(DictEntry,inentr){
18 |     right << inentr.Word;
19 |     right << inentr.PhSpelling;
20 |     right << inentr.Language;
21 |     return right;
22 | }
23 | 
24 | ZFILE_OOVR(DictEntry,entr){
25 |     right >> entr.Word;
26 |     right >> entr.PhSpelling;
27 |     right >> entr.Language;
28 | 
29 |     AutoConvertToV1(entr.Language);
30 | 
31 |     return right;
32 | 
33 | }
34 | PhoneticDict::PhoneticDict()
35 | {
36 | 
37 | }
38 | 
39 | void PhoneticDict::Export(const QString &exfn)
40 | {
41 |     ZFile ofi;
42 |     ofi.Open(exfn.toStdString(),EZFOpenMode::BinaryWrite);
43 | 
44 |     ofi << Entries;
45 |     ofi.Close();
46 | 
47 | 
48 | }
49 | 
50 | bool PhoneticDict::Import(const QString &infn)
51 | {
52 |     ZFile fi;
53 |     if (!fi.Open(infn.toStdString(),EZFOpenMode::BinaryRead))
54 |         return false;
55 | 
56 | 
57 |     if (fi.GetFileLength() == 0){
58 |         fi.Close();
59 |         return true;
60 | 
61 |     }
62 | 
63 |     fi >> Entries;
64 | 
65 |     fi.Close();
66 | 
67 | 
68 | 
69 |     return true;
70 | 
71 | 
72 | 
73 | }
74 | 
75 | 
76 | bool operator==(const DictEntry &left, const std::string &right)
77 | {
78 |     return left.Word == right;
79 | 
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/phoneticdict.h:
--------------------------------------------------------------------------------
 1 | #ifndef PHONETICDICT_H
 2 | #define PHONETICDICT_H
 3 | #include "ext/ZFile.h"
 4 | #include <string>
 5 | #include <QString>
 6 | struct DictEntry{
 7 |     std::string Word;
 8 |     std::string PhSpelling;
 9 |     std::string Language;
10 | };
11 | 
12 | 
13 | // Check if the base word is equal to this string
14 | bool operator==(const DictEntry& left,const std::string& right);
15 | 
16 | ZFILE_OOVR(DictEntry,entr);
17 | 
18 | ZFILE_IOVR(DictEntry,inentr);
19 | class PhoneticDict
20 | {
21 | public:
22 |     PhoneticDict();
23 | 
24 |     void Export(const QString& exfn);
25 |     bool Import(const QString &infn);
26 | 
27 |     std::vector<DictEntry> Entries;
28 | 
29 | private:
30 | 
31 | };
32 | 
33 | #endif // PHONETICDICT_H
34 | 


--------------------------------------------------------------------------------
/phonetichighlighter.cpp:
--------------------------------------------------------------------------------
 1 | #include "phonetichighlighter.h"
 2 | 
 3 | 
 4 | PhoneticHighlighter::PhoneticHighlighter(QTextDocument *parent) : QSyntaxHighlighter(parent)
 5 | {
 6 | 
 7 |     QString MatchExp = "\\{(\\s*?.*?)*?\\}";
 8 |     PhonemeFormat.setForeground(Qt::magenta);
 9 |     PhonemeFormat.setFontWeight(QFont::Bold);
10 |     PhonemeExp = QRegularExpression(MatchExp);
11 | 
12 |     QString SingleExp = "@.\\S*";
13 |     SinglePhonemeExp = QRegularExpression(SingleExp);
14 | 
15 |     QString LongExp = "\\b\\w{23,}";
16 |     TooLongExp = QRegularExpression(LongExp);
17 | 
18 |     ErrorFormat = PhonemeFormat;
19 |     ErrorFormat.setForeground(Qt::red);
20 |     ErrorFormat.setBackground(Qt::black);
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | }
28 | 
29 | void PhoneticHighlighter::highlightBlock(const QString &text)
30 | {
31 | 
32 |     // Phoneme
33 |     HighlightRegex(text,PhonemeExp,PhonemeFormat);
34 |     HighlightRegex(text,SinglePhonemeExp,PhonemeFormat);
35 | 
36 |     // Error
37 |     HighlightRegex(text,TooLongExp,ErrorFormat);
38 | 
39 | }
40 | 
41 | void PhoneticHighlighter::HighlightRegex(const QString& Text,const QRegularExpression &Reg, const QTextCharFormat &Fmt)
42 | {
43 |     QRegularExpressionMatchIterator MatchIter = Reg.globalMatch(Text);
44 |     while (MatchIter.hasNext()) {
45 |         QRegularExpressionMatch match = MatchIter.next();
46 |         setFormat(match.capturedStart(), match.capturedLength(), Fmt);
47 |     }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/phonetichighlighter.h:
--------------------------------------------------------------------------------
 1 | #ifndef PHONETICHIGHLIGHTER_H
 2 | #define PHONETICHIGHLIGHTER_H
 3 | #include <QSyntaxHighlighter>
 4 | #include <QRegularExpression>
 5 | class PhoneticHighlighter : public QSyntaxHighlighter
 6 | {
 7 | public:
 8 |     PhoneticHighlighter(QTextDocument *parent = 0);
 9 | 
10 |     // This is public because the main window uses it
11 |     QRegularExpression PhonemeExp;
12 | 
13 | 
14 | protected:
15 |     void highlightBlock(const QString &text) override;
16 | private:
17 | 
18 |     void HighlightRegex(const QString &Text, const QRegularExpression& Reg, const QTextCharFormat& Fmt);
19 |     QRegularExpression SinglePhonemeExp;
20 |     QRegularExpression TooLongExp;
21 |     QTextCharFormat PhonemeFormat;
22 |     QTextCharFormat ErrorFormat;
23 | 
24 | };
25 | 
26 | #endif // PHONETICHIGHLIGHTER_H
27 | 


--------------------------------------------------------------------------------
/res/clear64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/clear64.png


--------------------------------------------------------------------------------
/res/infico.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/infico.png


--------------------------------------------------------------------------------
/res/multiwav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/multiwav.png


--------------------------------------------------------------------------------
/res/noim.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/noim.png


--------------------------------------------------------------------------------
/res/phoneticdico.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/phoneticdico.png


--------------------------------------------------------------------------------
/res/random64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/random64.png


--------------------------------------------------------------------------------
/res/refresh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/refresh.png


--------------------------------------------------------------------------------
/res/speak64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/speak64.png


--------------------------------------------------------------------------------
/res/stdico.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/stdico.png


--------------------------------------------------------------------------------
/res/wav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/res/wav.png


--------------------------------------------------------------------------------
/spectrogram.cpp:
--------------------------------------------------------------------------------
  1 | #include "spectrogram.h"
  2 | 
  3 | 
  4 | 
  5 | void Spectrogram::TimerTick()
  6 | {
  7 |     if (!DoSlide)
  8 |         return;
  9 | 
 10 |     float RemSecs = ((float)timEndTick->remainingTime()) / 1000.f;
 11 |     float CurrentPos = TotSecs - RemSecs;
 12 |     float TickSet = CurrentPos/TotSecs;
 13 | 
 14 |     PlayRect->topLeft->setCoords(TickSet,0);
 15 | 
 16 |     layer("Lay2")->replot();
 17 | 
 18 | 
 19 | 
 20 | }
 21 | 
 22 | void Spectrogram::EndSlide()
 23 | {
 24 |     timGenericTick->stop();
 25 |     timEndTick->stop();
 26 |     PlayRect->topLeft->setCoords(1,0);
 27 | 
 28 |     layer("Lay2")->replot();
 29 | 
 30 | }
 31 | 
 32 | size_t Spectrogram::Get2DIndex(size_t x, size_t y, size_t xSize)
 33 | {
 34 |       return x + xSize*y;
 35 | }
 36 | 
 37 | 
 38 | Spectrogram::Spectrogram(QWidget *parent) : QCustomPlot(parent)
 39 | {
 40 | 
 41 |     QBrush FillBrush(QColor(100,100,100));
 42 |     this->setBackground(FillBrush);
 43 |     QColor White(255,255,255);
 44 |     QPen AxisPen(QColor(150,150,150));
 45 |     xAxis->setTickLabelColor(White);
 46 |     yAxis->setTickLabelColor(White);
 47 | 
 48 |     xAxis->setBasePen(AxisPen);
 49 |     yAxis->setBasePen(AxisPen);
 50 | 
 51 |     yAxis->setLabel("Frequency");
 52 |     xAxis->setLabel("Time");
 53 | 
 54 | 
 55 |     // They show the wrong info
 56 | 
 57 |     xAxis->setTickLabels(false);
 58 |     yAxis->setTickLabels(false);
 59 | 
 60 | 
 61 |     xAxis->setTicks(false);
 62 |     yAxis->setTicks(false);
 63 |     xAxis->setLabelColor(White);
 64 |     yAxis->setLabelColor(White);
 65 |     QFont Fnt = QFont(font().family(), 10);
 66 | 
 67 |     xAxis->setLabelFont(Fnt);
 68 |     yAxis->setLabelFont(Fnt);
 69 | 
 70 | 
 71 | 
 72 |     PlayRect = new QCPItemRect(this);
 73 |     PlayRect->topLeft->setType(QCPItemPosition::ptViewportRatio);
 74 |     PlayRect->bottomRight->setType(QCPItemPosition::ptViewportRatio);
 75 | 
 76 | 
 77 | 
 78 |     // The rect is not visible without adding a layer, probably because we are using a more unusual type of plot
 79 |     addLayer("Lay2");
 80 | 
 81 |     QPen RectPen(QColor(255,255,255,150));
 82 |     QBrush RectBrush(QColor(200,200,200,75));
 83 | 
 84 |     RectPen.setWidth(3);
 85 |     PlayRect->topLeft->setCoords(0,0);
 86 |     PlayRect->bottomRight->setCoords(1,1);
 87 |     PlayRect->setPen(RectPen);
 88 |     PlayRect->setBrush(RectBrush);
 89 |     PlayRect->setLayer("Lay2");
 90 | 
 91 | 
 92 | 
 93 |     timGenericTick = new QTimer(this);
 94 |     timGenericTick->setInterval(10);
 95 |     timGenericTick->setSingleShot(false);
 96 | 
 97 |     timEndTick = new QTimer(this);
 98 |     timEndTick->setInterval(1000);
 99 |     timEndTick->setSingleShot(false);
100 | 
101 |     connect(timGenericTick,&QTimer::timeout,this,&Spectrogram::TimerTick);
102 |     connect(timEndTick,&QTimer::timeout,this,&Spectrogram::EndSlide);
103 | 
104 |     DoSlide = false;
105 | 
106 | 
107 | }
108 | 
109 | void Spectrogram::DoPlot(const TFTensor<float> &InMel, float TimeInSeconds)
110 | {
111 | 
112 |     const TFTensor<float>& Mel = InMel;
113 | 
114 | 
115 |     const auto& Shp = Mel.Shape;
116 | 
117 | 
118 |     Map->data()->setSize((int32_t)Shp[2],(int32_t)Shp[1]);
119 | 
120 |     Map->data()->setRange(QCPRange(0.0,(double)Shp[1]),QCPRange(0.0,(double)Shp[2]));
121 |     for (int64_t x = 0; x < Shp[2];x++)
122 |     {
123 |         for (int64_t y = 0;y < Shp[1];y++)
124 |         {
125 |             size_t i = Get2DIndex(x,y,Shp[2]);
126 |             Map->data()->setCell(x,y,(double)Mel.Data[i]);
127 | 
128 |         }
129 | 
130 | 
131 |     }
132 |     Map->rescaleDataRange(true);
133 | 
134 | 
135 | 
136 | 
137 | 
138 |     rescaleAxes();
139 | 
140 |     replot();
141 | 
142 |     TotSecs = TimeInSeconds;
143 | 
144 | 
145 |     PlayRect->setVisible(true);
146 | 
147 |     PlayRect->topLeft->setCoords(1,0);
148 | 
149 |     timGenericTick->start();
150 | 
151 |     timEndTick->start((int)(TimeInSeconds * 1000));
152 | 
153 | 
154 | 
155 | 
156 | 
157 | }
158 | 


--------------------------------------------------------------------------------
/spectrogram.h:
--------------------------------------------------------------------------------
 1 | #ifndef SPECTROGRAM_H
 2 | #define SPECTROGRAM_H
 3 | 
 4 | #include "ext/qcustomplot.h"
 5 | #include "VoxCommon.hpp"
 6 | 
 7 | class Spectrogram : public QCustomPlot
 8 | {
 9 | public slots:
10 |     void TimerTick();
11 |     void EndSlide();
12 | private:
13 |     inline size_t Get2DIndex(size_t x,size_t y,size_t xSize);
14 | 
15 |     QCPItemRect* PlayRect;
16 | 
17 |     QTimer* timGenericTick;
18 |     QTimer* timEndTick;
19 | 
20 |     float TotSecs;
21 | 
22 | 
23 | public:
24 |     bool DoSlide;
25 |     Spectrogram(QWidget *parent = nullptr);
26 | 
27 |     void DoPlot(const TFTensor<float>& InMel,float TimeInSeconds);
28 | 
29 |     QCPColorMap* Map;
30 | };
31 | 
32 | #endif // SPECTROGRAM_H
33 | 


--------------------------------------------------------------------------------
/stdres.qrc:
--------------------------------------------------------------------------------
 1 | <RCC>
 2 |     <qresource prefix="/">
 3 |         <file>res/stdico.png</file>
 4 |         <file>res/phoneticdico.png</file>
 5 |         <file>res/infico.png</file>
 6 |         <file>res/refresh.png</file>
 7 |         <file>res/noim.png</file>
 8 |         <file>res/clear64.png</file>
 9 |         <file>res/multiwav.png</file>
10 |         <file>res/random64.png</file>
11 |         <file>res/wav.png</file>
12 |         <file>res/speak64.png</file>
13 |     </qresource>
14 | </RCC>
15 | 


--------------------------------------------------------------------------------
/tacotron2.cpp:
--------------------------------------------------------------------------------
  1 | #include "tacotron2.h"
  2 | 
  3 | 
  4 | 
  5 | TFTensor<float> Tacotron2::DoInferenceTFTTS(const std::vector<int32_t> &InputIDs, int32_t SpeakerID, int32_t EmotionID)
  6 | {
  7 |     if (!CurrentMdl)
  8 |           throw std::exception("Tried to do inference on unloaded or invalid model!");
  9 | 
 10 | 
 11 | 
 12 |       // Convenience reference so that we don't have to constantly derefer pointers.
 13 |       cppflow::model& Mdl = *CurrentMdl;
 14 | 
 15 | 
 16 |       // Define the tensors
 17 | 
 18 |       // This is the shape of the input IDs, our equivalent to tf.expand_dims.
 19 |       std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };
 20 | 
 21 | 
 22 | 
 23 |       cppflow::tensor input_ids{ InputIDs, InputIDShape };
 24 |       cppflow::tensor speaker_ids{SpeakerID };
 25 |       cppflow::tensor input_lengths{(int32_t)InputIDs.size() };
 26 |       cppflow::tensor* emotion_ids = nullptr;
 27 | 
 28 | 
 29 |       // This is a multi-emotion model
 30 |       if (EmotionID != -1)
 31 |       {
 32 |           emotion_ids = new cppflow::tensor{std::vector<int32_t>{EmotionID}};
 33 | 
 34 |       }
 35 | 
 36 |       TensorVec Inputs = {{"serving_default_input_ids:0",input_ids},
 37 |                           {"serving_default_input_lengths:0",input_lengths},
 38 |                           {"serving_default_speaker_ids:0",speaker_ids}};
 39 | 
 40 | 
 41 | 
 42 |       // Define output tensor
 43 |       if (EmotionID != -1)
 44 |           Inputs.push_back({"serving_default_emotion_ids:0",*emotion_ids});
 45 | 
 46 | 
 47 |       // Do inference
 48 | 
 49 |       // We only care about the after mel-after [1] and alignment history [3]
 50 |       auto Outputs = Mdl(Inputs,{"StatefulPartitionedCall:0","StatefulPartitionedCall:1","StatefulPartitionedCall:2","StatefulPartitionedCall:3"});
 51 | 
 52 |       // Define output and return it
 53 |       TFTensor<float> MelOut = VoxUtil::CopyTensor<float>(Outputs[1]);
 54 |       Attention = VoxUtil::CopyTensor<float>(Outputs[3]);
 55 | 
 56 | 
 57 |       // We allocated the emotion_ids cppflow::tensor dynamically, delete it
 58 |       if (emotion_ids)
 59 |           delete emotion_ids;
 60 | 
 61 |       // We could just straight out define it in the return statement, but I like it more this way
 62 | 
 63 |       return MelOut;
 64 | }
 65 | 
 66 | TFTensor<float> Tacotron2::DoInferenceCoqui(const std::vector<int32_t> &InputIDs)
 67 | {
 68 |     // Convenience reference so that we don't have to constantly derefer pointers.
 69 |     cppflow::model& Mdl = *CurrentMdl;
 70 | 
 71 | 
 72 |     // Define the tensors
 73 | 
 74 |     // This is the shape of the input IDs, our equivalent to tf.expand_dims.
 75 | 
 76 |     std::vector<int64_t> InputIDShape = { 1, (int64_t)InputIDs.size() };
 77 |     cppflow::tensor input_ids{ InputIDs, InputIDShape };
 78 | 
 79 | 
 80 |     TensorVec Inputs = {{"serving_default_characters:0",input_ids}};
 81 | 
 82 | 
 83 |     // We only care about the after mel-after [1] and alignment history [2]
 84 |     auto Outputs = Mdl(Inputs,{"StatefulPartitionedCall:0","StatefulPartitionedCall:1","StatefulPartitionedCall:2","StatefulPartitionedCall:3"});
 85 | 
 86 |     // Define output and return it
 87 |     TFTensor<float> MelOut = VoxUtil::CopyTensor<float>(Outputs[1]);
 88 | 
 89 | 
 90 |     // Coqui TT2 attention output is inverse of what our attention plotter expects, so we transpose it.
 91 |     cppflow::tensor AttTransposed = cppflow::transpose(Outputs[2],cppflow::tensor{0,2,1});
 92 |     Attention = VoxUtil::CopyTensor<float>(AttTransposed);
 93 | 
 94 | 
 95 |     return MelOut;
 96 | }
 97 | 
 98 | Tacotron2::Tacotron2()
 99 | {
100 | 
101 | }
102 | 
103 | TFTensor<float> Tacotron2::DoInference(const std::vector<int32_t> &InputIDs, const std::vector<float> &ArgsFloat, const std::vector<int32_t> ArgsInt, int32_t SpeakerID, int32_t EmotionID)
104 | {
105 | 
106 | 
107 |     if (!CurrentMdl)
108 |         throw std::runtime_error("Tried to do inference on unloaded or invalid model!");
109 | 
110 |     if (GetCurrentRepo() == ETTSRepo::TensorflowTTS)
111 |         return DoInferenceTFTTS(InputIDs,SpeakerID,EmotionID);
112 |     else if (GetCurrentRepo() == ETTSRepo::CoquiTTS)
113 |         return DoInferenceCoqui(InputIDs);
114 |     else
115 |         throw std::runtime_error("Unknown/unset/unimplemented TTS repo!!!");
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/tacotron2.h:
--------------------------------------------------------------------------------
 1 | #ifndef TACOTRON2_H
 2 | #define TACOTRON2_H
 3 | 
 4 | #include "melgen.h"
 5 | 
 6 | class Tacotron2 : public MelGen
 7 | {
 8 | private:
 9 | 
10 |     TFTensor<float> DoInferenceTFTTS(const std::vector<int32_t>& InputIDs,int32_t SpeakerID = 0, int32_t EmotionID = -1);
11 |     TFTensor<float> DoInferenceCoqui(const std::vector<int32_t>& InputIDs);
12 | 
13 | 
14 | 
15 | public:
16 |     Tacotron2();
17 |     TFTensor<float> Attention;
18 | 
19 |     /*
20 |     Do inference on a Tacotron2 model.
21 | 
22 |     -> InputIDs: Input IDs of tokens for inference
23 |     -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model.
24 | 
25 |     <- Returns: TFTensor<float> with shape {1,<len of mel in frames>,80} containing contents of mel spectrogram.
26 |     */
27 |     TFTensor<float> DoInference(const std::vector<int32_t>& InputIDs,const std::vector<float>& ArgsFloat,const std::vector<int32_t> ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1);
28 | 
29 | };
30 | 
31 | #endif // TACOTRON2_H
32 | 


--------------------------------------------------------------------------------
/tacotron2torch.cpp:
--------------------------------------------------------------------------------
 1 | #include "tacotron2torch.h"
 2 | 
 3 | Tacotron2Torch::Tacotron2Torch()
 4 | {
 5 | 
 6 | }
 7 | 
 8 | bool Tacotron2Torch::Initialize(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo)
 9 | {
10 |     try {
11 |         // Deserialize the ScriptModule from a file using torch::jit::load().
12 | 
13 |         Model = torch::jit::load(SavedModelFolder);
14 | 
15 |     }
16 |     catch (const c10::Error& e) {
17 |         return false;
18 | 
19 |     }
20 | 
21 |     CurrentRepo = InTTSRepo;
22 |     return true;
23 | 
24 | }
25 | 
26 | TFTensor<float> Tacotron2Torch::DoInference(const std::vector<int32_t> &InputIDs, const std::vector<float> &ArgsFloat, const std::vector<int32_t> ArgsInt, int32_t SpeakerID, int32_t EmotionID)
27 | {
28 |     // without this memory consumption is 4x
29 |     torch::NoGradGuard no_grad;
30 | 
31 | 
32 |     std::vector<int64_t> IInputIDs;
33 |     IInputIDs.reserve(InputIDs.size());
34 |     for (const int32_t& Id : InputIDs){
35 |         int64_t casted = (int64_t)Id;
36 |         IInputIDs.push_back(casted);
37 | 
38 |     }
39 | 
40 | 
41 | 
42 |     torch::TensorOptions Opts = torch::TensorOptions().requires_grad(false);
43 | 
44 |     // This Tacotron2 always takes in speaker IDs
45 |     if (SpeakerID == -1)
46 |         SpeakerID = 0;
47 | 
48 |     auto InSpkid = torch::tensor({SpeakerID},Opts);
49 |     auto InIDS = torch::tensor(IInputIDs, Opts).unsqueeze(0);
50 | 
51 | 
52 | 
53 |     std::vector<torch::jit::IValue> inputs{ InSpkid,InIDS};
54 | 
55 | 
56 | 
57 |     // Infer
58 |     c10::IValue Output = Model(inputs);
59 | 
60 | 
61 |     // Output = list (mel_outputs, mel_outputs_postnet, gate_outputs, alignments)
62 | 
63 |     auto OutputL = Output.toList();
64 | 
65 |     auto MelTens = OutputL[1].get().toTensor();
66 |     auto AttTens = OutputL[3].get().toTensor();//.transpose(1,2); // [1, dec_t, enc_t ] -> [1, enc_t, dec_t]
67 | 
68 | 
69 |     Attention = VoxUtil::CopyTensor<float>(AttTens);
70 | 
71 | 
72 |     return VoxUtil::CopyTensor<float>(MelTens);
73 | 
74 | 
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/tacotron2torch.h:
--------------------------------------------------------------------------------
 1 | #ifndef TACOTRON2TORCH_H
 2 | #define TACOTRON2TORCH_H
 3 | #include "melgen.h"
 4 | 
 5 | class Tacotron2Torch : public MelGen
 6 | {
 7 | private:
 8 |    torch::jit::script::Module Model;
 9 | 
10 | public:
11 | 
12 |     TFTensor<float> Attention;
13 | 
14 | 
15 |     Tacotron2Torch();
16 |     /*
17 |     Initialize and load the model
18 | 
19 |     -> SavedModelFolder: Folder where the TorchScript models are exported
20 |     <- Returns: (bool)Success
21 |     */
22 |     bool Initialize(const std::string& SavedModelFolder, ETTSRepo::Enum InTTSRepo);
23 | 
24 | 
25 |     /*
26 |     Do inference on a Tacotron2 model.
27 | 
28 |     -> InputIDs: Input IDs of tokens for inference
29 |     -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model.
30 | 
31 |     <- Returns: TFTensor<float> with shape {1,<len of mel in frames>,80} containing contents of mel spectrogram.
32 |     */
33 |     TFTensor<float> DoInference(const std::vector<int32_t>& InputIDs,const std::vector<float>& ArgsFloat,const std::vector<int32_t> ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1);
34 | 
35 | };
36 | 
37 | #endif // TACOTRON2TORCH_H
38 | 


--------------------------------------------------------------------------------
/tfg2p.cpp:
--------------------------------------------------------------------------------
 1 | #include "tfg2p.h"
 2 | TFG2P::TFG2P()
 3 | {
 4 |     G2P = nullptr;
 5 | 
 6 | }
 7 | 
 8 | TFG2P::TFG2P(const std::string &SavedModelFolder)
 9 | {
10 |     G2P = nullptr;
11 | 
12 |     Initialize(SavedModelFolder);
13 | }
14 | 
15 | bool TFG2P::Initialize(const std::string &SavedModelFolder)
16 | {
17 |     try {
18 | 
19 |         G2P = new cppflow::model(SavedModelFolder);
20 | 
21 |     }
22 |     catch (...) {
23 |         G2P = nullptr;
24 |         return false;
25 | 
26 |     }
27 |     return true;
28 | }
29 | 
30 | TFTensor<int32_t> TFG2P::DoInference(const std::vector<int32_t> &InputIDs, float Temperature)
31 | {
32 |     if (!G2P)
33 |         throw std::exception("Tried to do inference on unloaded or invalid model!");
34 | 
35 |     // Convenience reference so that we don't have to constantly derefer pointers.
36 |     cppflow::model& Mdl = *G2P;
37 | 
38 | 
39 |     // Convenience reference so that we don't have to constantly derefer pointers.
40 | 
41 |     cppflow::tensor input_ids{ InputIDs, std::vector<int64_t>{(int64_t)InputIDs.size()}};
42 |     cppflow::tensor input_len{(int32_t)InputIDs.size()};
43 |     cppflow::tensor input_temp{Temperature};
44 | 
45 | 
46 | 
47 | 
48 | 
49 |     auto Outs = Mdl({{"serving_default_input_ids:0",input_ids},
50 |          {"serving_default_input_len:0",input_len},
51 |          {"serving_default_input_temperature:0",input_temp}},{"StatefulPartitionedCall:0"});
52 | 
53 |     TFTensor<int32_t> RetTensor = VoxUtil::CopyTensor<int32_t>(Outs[0]);
54 | 
55 |     return RetTensor;
56 | 
57 | 
58 | }
59 | 
60 | TFG2P::~TFG2P()
61 | {
62 |     if (G2P)
63 |         delete G2P;
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/tfg2p.h:
--------------------------------------------------------------------------------
 1 | #ifndef TFG2P_H
 2 | #define TFG2P_H
 3 | 
 4 | #include "VoxCommon.hpp"
 5 | 
 6 | 
 7 | class TFG2P
 8 | {
 9 | private:
10 |     cppflow::model* G2P;
11 | 
12 | public:
13 |     TFG2P();
14 |     TFG2P(const std::string& SavedModelFolder);
15 | 
16 |     /*
17 |     Initialize and load the model
18 | 
19 |     -> SavedModelFolder: Folder where the .pb, variables, and other characteristics of the exported SavedModel
20 |     <- Returns: (bool)Success
21 |     */
22 |     bool Initialize(const std::string& SavedModelFolder);
23 | 
24 |     /*
25 |     Do inference on a G2P-TF-RNN model.
26 | 
27 |     -> InputIDs: Input IDs of tokens for inference
28 |     -> Temperature: Temperature of the RNN, values higher than 0.1 cause instability.
29 | 
30 |     <- Returns: TFTensor<int32_t> containing phoneme IDs
31 |     */
32 |     TFTensor<int32_t> DoInference(const std::vector<int32_t>& InputIDs, float Temperature = 0.1f);
33 | 
34 |     ~TFG2P();
35 | 
36 | };
37 | 
38 | #endif // TFG2P_H
39 | 


--------------------------------------------------------------------------------
/torchmoji.cpp:
--------------------------------------------------------------------------------
 1 | #include "torchmoji.h"
 2 | #include "ext/ZCharScanner.h"
 3 | 
 4 | void TorchMoji::LoadDict(const std::string& Path)
 5 | {
 6 |    if (Dictionary.size())
 7 |        Dictionary.clear();
 8 | 
 9 |    std::vector<std::string> Lined = VoxUtil::GetLinedFile(Path);
10 | 
11 |    ZStringDelimiter Delim;
12 |    Delim.AddDelimiter("\t");
13 | 
14 |    for (const auto& Li : Lined){
15 |        Delim.SetText(Li);
16 | 
17 |        if (Delim.szTokens() < 2)
18 |            continue;
19 | 
20 |        Dictionary.insert({Delim[0], std::stoi(Delim[1])});
21 |    }
22 | }
23 | 
24 | std::vector<int32_t> TorchMoji::WordsToIDs(const std::vector<std::string>& Words)
25 | {
26 |     std::vector<int32_t> IDs(VoxCommon::TorchMojiLen,0);
27 | 
28 |     for (size_t i = 0; i < Words.size();i++)
29 |     {
30 |         if (i + 1 > VoxCommon::TorchMojiLen)
31 |             break;
32 | 
33 |         auto Iter = Dictionary.find(Words[i]);
34 | 
35 |         if (Iter == Dictionary.end())
36 |             IDs[i] = 1; // unknown
37 |         else
38 |             IDs[i] = Iter->second;
39 | 
40 | 
41 | 
42 |     }
43 | 
44 |     return IDs;
45 | 
46 | 
47 | 
48 | }
49 | 
50 | TorchMoji::TorchMoji()
51 | {
52 | 
53 | }
54 | 
55 | TorchMoji::TorchMoji(const std::string &InitPath, const std::string &DPath)
56 | {
57 |     Initialize(InitPath,DPath);
58 | 
59 | }
60 | 
61 | void TorchMoji::Initialize(const std::string &Path, const std::string &DictPath)
62 | {
63 | 
64 |     Model = torch::jit::load(Path);
65 |     LoadDict(DictPath);
66 | }
67 | 
68 | std::vector<float> TorchMoji::Infer(const std::vector<std::string> &Seq)
69 | {
70 |     std::vector<int32_t> Input = WordsToIDs(Seq);
71 | 
72 |     auto InIDS = torch::tensor(Input).unsqueeze(0); // (1, TMLen)
73 | 
74 |     at::Tensor Output = Model({InIDS}).toTensor(); // (1, VoxCommon::TorchMojiEmbSize)
75 | 
76 |     Output = Output.squeeze(); // (TorchMojiEmbSize)
77 | 
78 |     TFTensor<float> Tens = VoxUtil::CopyTensor<float>(Output);
79 | 
80 | 
81 |     return Tens.Data;
82 | 
83 | 
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/torchmoji.h:
--------------------------------------------------------------------------------
 1 | #ifndef TORCHMOJI_H
 2 | #define TORCHMOJI_H
 3 | #include "VoxCommon.hpp"
 4 | 
 5 | 
 6 | // TorchMoji: Emotion contextualizer model (Cookie design: skipping last layer and using hidden states to feed TTS model)
 7 | // Allows for manipulation of emotion at inference time
 8 | class TorchMoji
 9 | {
10 | private:
11 |         // Word, ID
12 |      std::map<std::string,int32_t> Dictionary;
13 | 
14 |      torch::jit::script::Module Model;
15 | 
16 |      void LoadDict(const std::string& Path);
17 | 
18 |      std::vector<int32_t> WordsToIDs(const std::vector<std::string> &Words);
19 | public:
20 |     TorchMoji();
21 | 
22 |     TorchMoji(const std::string& InitPath,const std::string& DPath);
23 | 
24 |     void Initialize(const std::string& Path,const std::string& DictPath);
25 | 
26 |     // Return hidden states of emotion state.
27 |     // -> Seq: Vector of words
28 |     // <- Returns float vec of size VoxCommon::TorchMojiEmbSize containing hidden states, ready to feed into TTS model.
29 |     std::vector<float> Infer(const std::vector<std::string>& Seq);
30 | };
31 | 
32 | #endif // TORCHMOJI_H
33 | 


--------------------------------------------------------------------------------
/track.cpp:
--------------------------------------------------------------------------------
  1 | #include "track.h"
  2 | 
  3 | #include <QAudioDecoder>
  4 | 
  5 | Track::Track(QWidget *parent)
  6 |     : QCustomPlot(parent)
  7 |     , decoder(new QAudioDecoder(this))
  8 | {
  9 | 
 10 |     wavePlot = addGraph();
 11 | 
 12 |     QBrush FillBrush(QColor(100,100,100));
 13 |     this->setBackground(FillBrush);
 14 |     QPen ThePen(QColor(127,255,0));
 15 |     wavePlot->setPen(ThePen);
 16 |     wavePlot->setBrush(FillBrush);
 17 | 
 18 |     yAxis->setVisible(false);
 19 |     xAxis->setVisible(false);
 20 | 
 21 |     // add independent layer for playrect and labels so we don't replot the entire thing every time
 22 | 
 23 |     addLayer("Playing");
 24 |     setCurrentLayer("Playing");
 25 |     layer("Playing")->setMode(QCPLayer::LayerMode::lmBuffered);
 26 | 
 27 | 
 28 |     PlayRect = new QCPItemRect(this);
 29 |     PlayRect->topLeft->setType(QCPItemPosition::ptViewportRatio);
 30 |     PlayRect->bottomRight->setType(QCPItemPosition::ptViewportRatio);
 31 | 
 32 | 
 33 |     QPen RectPen(QColor(255,255,255,150));
 34 |     QBrush RectBrush(QColor(200,200,200,75));
 35 | 
 36 |     RectPen.setWidth(3);
 37 |     PlayRect->topLeft->setCoords(0,0);
 38 |     PlayRect->bottomRight->setCoords(1,1);
 39 |     PlayRect->setPen(RectPen);
 40 |     PlayRect->setBrush(RectBrush);
 41 | 
 42 | 
 43 | 
 44 | 
 45 |     timGenericTick = new QTimer(this);
 46 |     timGenericTick->setInterval(10);
 47 |     timGenericTick->setSingleShot(false);
 48 | 
 49 |     timEndTick = new QTimer(this);
 50 |     timEndTick->setInterval(1000);
 51 |     timEndTick->setSingleShot(false);
 52 | 
 53 |     connect(timGenericTick,&QTimer::timeout,this,&Track::TimerTick);
 54 |     connect(timEndTick,&QTimer::timeout,this,&Track::EndSlide);
 55 | 
 56 |     SecsTxt = new QCPItemText(this);
 57 |     SecsTxt->setPositionAlignment(Qt::AlignTop|Qt::AlignLeft);
 58 |     SecsTxt->position->setType(QCPItemPosition::ptViewportRatio);
 59 |     SecsTxt->position->setCoords(0.02, 0.05);
 60 |     SecsTxt->setText("Ready");
 61 |     SecsTxt->setFont(QFont(font().family(), 10));
 62 |     SecsTxt->setColor(QColor(255,255,255));
 63 |     SecsTxt->setClipToAxisRect(false);
 64 |     DoSlide = false;
 65 | 
 66 |     //wavePlot->setPen(ThePen);
 67 | 
 68 | }
 69 | 
 70 | Track::~Track()
 71 | {
 72 |     delete decoder;
 73 |     // wavePlot delete auto ?
 74 | }
 75 | 
 76 | void Track::setSource(const QAudioBuffer &inbuffer)
 77 | {
 78 |     buffer = inbuffer;
 79 | 
 80 | 
 81 |     setBuffer();
 82 | 
 83 | 
 84 |     startPlaying(((float)buffer.duration()) / 1e+6);
 85 | 
 86 | }
 87 | 
 88 | void Track::setBuffer()
 89 | {
 90 |     samples.clear();
 91 |     qreal peak = getPeakValue(buffer.format());
 92 |     const float *data = buffer.constData<float>();
 93 |     int count = buffer.sampleCount();
 94 | 
 95 |     for (int i=0; i<count; i += 2){
 96 |         double val = ((double)data[i])/peak;
 97 |         samples.append(val);
 98 |     }
 99 | 
100 | }
101 | 
102 | void Track::plot()
103 | {
104 |     QVector<double> x(samples.size());
105 |     for (int i=0; i<x.size(); i++)
106 |         x[i] = i;
107 |     wavePlot->addData(x, samples);
108 |     yAxis->setRange(QCPRange(-1.0, 1.0));
109 | 
110 |     xAxis->setRange(QCPRange(0, samples.size()));
111 |     replot();
112 | }
113 | 
114 | void Track::startPlaying(float TimeInSecs)
115 | {
116 |     //TickAdd = 1.f/( TimeInSecs / 0.025f );
117 |     TotSecs = TimeInSecs;
118 | 
119 | 
120 |     timGenericTick->start();
121 | 
122 |     timEndTick->start((int)(TimeInSecs * 1000));
123 | 
124 | 
125 | }
126 | 
127 | void Track::TimerTick()
128 | {
129 |     if (!DoSlide)
130 |         return;
131 | 
132 |     float RemSecs = ((float)timEndTick->remainingTime()) / 1000.f;
133 |     float CurrentPos = TotSecs - RemSecs;
134 |     TickSet = CurrentPos/TotSecs;
135 | 
136 |     PlayRect->topLeft->setCoords(TickSet,0);
137 |     SetTimeLabel(CurrentPos,TotSecs);
138 | 
139 | 
140 |     layer("Playing")->replot();
141 | 
142 | 
143 | }
144 | 
145 | void Track::EndSlide()
146 | {
147 | 
148 |     timGenericTick->stop();
149 |     timEndTick->stop();
150 |     PlayRect->topLeft->setCoords(1,0);
151 |     SetTimeLabel(TotSecs,TotSecs);
152 | 
153 |     layer("Playing")->replot();
154 | 
155 | }
156 | 
157 | void Track::SetTimeLabel(float Cur, float Remaining)
158 | {
159 |     SecsTxt->setText(QString::number(Cur,'f',1) + " / " + QString::number(Remaining,'f',1) + " (sec)");
160 | 
161 | 
162 | }
163 | 
164 | /**
165 |  * https://stackoverflow.com/questions/46947668/draw-waveform-from-raw-data-using-qaudioprobe
166 |  * @brief Track::getPeakValue
167 |  * @param format
168 |  * @return The peak value
169 |  */
170 | qreal Track::getPeakValue(const QAudioFormat &format)
171 | {
172 |     qreal ret(0);
173 |     if (format.isValid()){
174 |         switch (format.sampleType()) {
175 |             case QAudioFormat::Unknown:
176 |             break;
177 |             case QAudioFormat::Float:
178 |                 if (format.sampleSize() != 32) // other sample formats are not supported
179 |                     ret = 0;
180 |                 else
181 |                     ret = 1.00003;
182 |             break;
183 |             case QAudioFormat::SignedInt:
184 |                 if (format.sampleSize() == 32)
185 | #ifdef Q_OS_WIN
186 |                     ret = INT_MAX;
187 | #endif
188 | #ifdef Q_OS_UNIX
189 |                     ret = SHRT_MAX;
190 | #endif
191 |                 else if (format.sampleSize() == 16)
192 |                     ret = SHRT_MAX;
193 |                 else if (format.sampleSize() == 8)
194 |                     ret = CHAR_MAX;
195 |                 break;
196 |             case QAudioFormat::UnSignedInt:
197 |                 if (format.sampleSize() == 32)
198 |                     ret = UINT_MAX;
199 |                 else if (format.sampleSize() == 16)
200 |                     ret = USHRT_MAX;
201 |                 else if (format.sampleSize() == 8)
202 |                     ret = UCHAR_MAX;
203 |             break;
204 |         default:
205 |             break;
206 |         }
207 |     }
208 |     return ret;
209 | }
210 | 


--------------------------------------------------------------------------------
/track.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRACK_H
 2 | #define TRACK_H
 3 | #include "ext/qcustomplot.h"
 4 | #include <QAudioBuffer>
 5 | 
 6 | 
 7 | // Copied from https://stackoverflow.com/questions/50277132/qt-audio-file-to-wave-like-audacity
 8 | 
 9 | class QAudioDecoder;
10 | 
11 | class Track : public QCustomPlot
12 | {
13 |     Q_OBJECT
14 | 
15 | public:
16 |     Track(QWidget *parent = Q_NULLPTR);
17 |     ~Track();
18 |     void setSource(const QAudioBuffer &inbuffer);
19 | 
20 | public:
21 |     bool DoSlide;
22 | 
23 | 
24 |     void setBuffer();
25 |     void plot();
26 |     void startPlaying(float TimeInSecs);
27 | 
28 | 
29 | public slots:
30 |     void TimerTick();
31 |     void EndSlide();
32 | private:
33 |     void SetTimeLabel(float Cur, float Remaining);
34 |     QTimer* timGenericTick;
35 |     QTimer* timEndTick;
36 | 
37 |     float TickSet;
38 |     float TotSecs;
39 | 
40 |     QCPItemRect* PlayRect;
41 |     QCPItemText* SecsTxt;
42 | 
43 |     qreal getPeakValue(const QAudioFormat& format);
44 | 
45 |     QAudioDecoder *decoder;
46 |     QAudioBuffer buffer;
47 |     QVector<double> samples;
48 |     QCPGraph *wavePlot;
49 | };
50 | #endif // TRACK_H
51 | 


--------------------------------------------------------------------------------
/vits.cpp:
--------------------------------------------------------------------------------
  1 | #include "vits.h"
  2 | 
  3 | std::vector<int64_t> VITS::ZeroPadVec(const std::vector<int32_t> &InIDs)
  4 | {
  5 |     std::vector<int64_t> NewIDs;
  6 |     NewIDs.reserve(InIDs.size() * 2);
  7 | 
  8 |     NewIDs.push_back(0);
  9 | 
 10 |     for (auto CharID : InIDs)
 11 |     {
 12 | 
 13 |         NewIDs.push_back((int64_t)CharID);
 14 |         NewIDs.push_back(0);
 15 | 
 16 | 
 17 |     }
 18 |     // Add final 0
 19 |    // NewIDs.push_back(0);
 20 | 
 21 | 
 22 |     return NewIDs;
 23 | 
 24 | }
 25 | 
 26 | VITS::VITS()
 27 | {
 28 | 
 29 | }
 30 | 
 31 | bool VITS::Initialize(const std::string &SavedModelFolder, ETTSRepo::Enum InTTSRepo)
 32 | {
 33 |     try {
 34 |         // Deserialize the ScriptModule from a file using torch::jit::load().
 35 | 
 36 |         Model = torch::jit::load(SavedModelFolder);
 37 | 
 38 |     }
 39 |     catch (const c10::Error& e) {
 40 |         return false;
 41 | 
 42 |     }
 43 | 
 44 |     CurrentRepo = InTTSRepo;
 45 |     return true;
 46 | }
 47 | 
 48 | TFTensor<float> VITS::DoInference(const std::vector<int32_t> &InputIDs, const std::vector<float> &ArgsFloat, const std::vector<int32_t> ArgsInt, int32_t SpeakerID, int32_t EmotionID)
 49 | {
 50 |     // without this memory consumption is 4x
 51 |     torch::NoGradGuard no_grad;
 52 | 
 53 |     // TorchMoji hidden states are added to ArgsFloat
 54 |     const bool UsesTorchMoji = ArgsFloat.size() > 1;
 55 | 
 56 |     std::vector<int64_t> PaddedIDs;
 57 | 
 58 | 
 59 |     // Our current TM-enabled models don't use zero interspersion
 60 |     if (UsesTorchMoji)
 61 |         PaddedIDs.assign(InputIDs.begin(),InputIDs.end());
 62 |     else
 63 |         PaddedIDs = ZeroPadVec(InputIDs);
 64 | 
 65 | 
 66 |     std::vector<int64_t> inLen = { (int64_t)PaddedIDs.size() };
 67 | 
 68 | 
 69 |     // ZDisket: Is this really necessary?
 70 |     torch::TensorOptions Opts = torch::TensorOptions().requires_grad(false);
 71 | 
 72 |     auto InIDS = torch::tensor(PaddedIDs, Opts).unsqueeze(0);
 73 |     auto InLens = torch::tensor(inLen, Opts);
 74 |     auto InLenScale = torch::tensor({ ArgsFloat[0]}, Opts);
 75 | 
 76 | 
 77 | 
 78 |     std::vector<torch::jit::IValue> inputs{ InIDS,InLens,InLenScale };
 79 | 
 80 |     if (SpeakerID != -1){
 81 |         auto InSpkid = torch::tensor({SpeakerID},Opts);
 82 |         inputs.push_back(InSpkid);
 83 |     }
 84 | 
 85 |     if (EmotionID != -1){
 86 |         auto InEmid = torch::tensor({EmotionID},Opts);
 87 |         inputs.push_back(InEmid);
 88 |     }
 89 | 
 90 |     // Handle TorchMoji Emb
 91 |     if (UsesTorchMoji){
 92 |         // Make a copy stripping first elem
 93 |         std::vector<float> TMHidden(ArgsFloat.begin() + 1, ArgsFloat.end());
 94 | 
 95 |         auto InMoji = torch::tensor(TMHidden,Opts).unsqueeze(0);
 96 |         inputs.push_back(InMoji);
 97 | 
 98 |     }
 99 | 
100 |     // Infer
101 | 
102 |     c10::IValue Output = Model.get_method("infer_ts")(inputs);
103 | 
104 |     // Output = tuple (audio,att)
105 | 
106 |     auto OutputT = Output.toTuple();
107 | 
108 |     // Grab audio
109 |     // [1, frames] -> [frames]
110 |     auto AuTens = OutputT.get()->elements()[0].toTensor().squeeze();
111 | 
112 |     // Grab Attention
113 |     // [1, 1, x, y] -> [x, y] -> [y,x] -> [1, y, x]
114 |     auto AttTens = OutputT.get()->elements()[1].toTensor().squeeze().transpose(0,1).unsqueeze(0);
115 | 
116 |     Attention = VoxUtil::CopyTensor<float>(AttTens);
117 | 
118 |     return VoxUtil::CopyTensor<float>(AuTens);
119 | 
120 | }
121 | 


--------------------------------------------------------------------------------
/vits.h:
--------------------------------------------------------------------------------
 1 | #ifndef VITS_H
 2 | #define VITS_H
 3 | 
 4 | 
 5 | #include "melgen.h"
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | // VITS is a fully E2E model; no separate vocoder needed
12 | class VITS : public MelGen
13 | {
14 | private:
15 |     torch::jit::script::Module Model;
16 | 
17 |     // Most VITS model require zero-interspersed input IDs
18 |     std::vector<int64_t> ZeroPadVec(const std::vector<int32_t>& InIDs);
19 | 
20 | public:
21 |     TFTensor<float> Attention;
22 | 
23 |     VITS();
24 | 
25 |     // Since VITS runs on PyTorch, we override the loader
26 |     /*
27 |     Initialize and load the model
28 | 
29 |     -> SavedModelFolder: Not a folder, but path to TorchScripted .pt file
30 |     <- Returns: (bool)Success
31 |     */
32 |     virtual bool Initialize(const std::string& SavedModelFolder, ETTSRepo::Enum InTTSRepo);
33 | 
34 | 
35 |     /*
36 |     Do inference on a VITS model.
37 | 
38 |     -> InputIDs: Input IDs of tokens for inference
39 |     -> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model.
40 |     -> ArgsFloat[0]: Length scale.
41 | 
42 |     <- Returns: TFTensor<float> with shape {frames} of audio data
43 |     */
44 |     TFTensor<float> DoInference(const std::vector<int32_t>& InputIDs,const std::vector<float>& ArgsFloat,const std::vector<int32_t> ArgsInt, int32_t SpeakerID = 0, int32_t EmotionID = -1);
45 | };
46 | 
47 | #endif // VITS_H
48 | 


--------------------------------------------------------------------------------
/voicemanager.cpp:
--------------------------------------------------------------------------------
  1 | #include "voicemanager.h"
  2 | #define SAFE_DELETE(pdel)if (pdel){delete pdel;}
  3 | #include <QCoreApplication>
  4 | 
  5 | Phonemizer* VoiceManager::LoadPhonemizer(const QString& InPhnLang,int32_t InLangNum)
  6 | {
  7 | 
  8 |     for (Phonemizer*& Phn : Phonemizers)
  9 |     {
 10 |        if (Phn->GetPhnLanguage() == InPhnLang.toStdString())
 11 |            return Phn;
 12 | 
 13 | 
 14 |     }
 15 | 
 16 | 
 17 |     Phonemizer* CreatePhn = new Phonemizer;
 18 | 
 19 |     // Initialize regularly or minimally
 20 |     CreatePhn->Initialize(QString(QCoreApplication::applicationDirPath() + "/g2p/" + InPhnLang).toStdString(),
 21 |                           InLangNum == ETTSLanguageType::Char);
 22 | 
 23 |     CreatePhn->SetPhnLanguage(InPhnLang.toStdString());
 24 | 
 25 | 
 26 |     Phonemizers.push_back(CreatePhn);
 27 | 
 28 |     return Phonemizers[Phonemizers.size() - 1];
 29 | 
 30 | 
 31 | }
 32 | 
 33 | ESpeakPhonemizer *VoiceManager::LoadESpeakPhonemizer(const QString &InVoiceName)
 34 | {
 35 |     for (ESpeakPhonemizer*& Phn : ENGPhonemizers)
 36 |     {
 37 |        if (Phn->GetVoiceName() == InVoiceName.toStdString())
 38 |            return Phn;
 39 | 
 40 | 
 41 |     }
 42 | 
 43 |     ESpeakPhonemizer* CreatePhn = new ESpeakPhonemizer;
 44 |     CreatePhn->Initialize(QString(QCoreApplication::applicationDirPath() + "/g2p/eSpeak-NG").toStdString()
 45 |                           ,InVoiceName.toStdString());
 46 | 
 47 |     ENGPhonemizers.push_back(CreatePhn);
 48 | 
 49 |     return CreatePhn;
 50 | 
 51 | }
 52 | 
 53 | size_t VoiceManager::LoadVoice(const QString &Voname)
 54 | {
 55 |     Voice* NuVoice = new Voice(QString(QCoreApplication::applicationDirPath() + "/models/" + Voname).toStdString(),Voname.toStdString(),nullptr);
 56 | 
 57 |     QString PLang = QString::fromStdString(NuVoice->GetInfo().s_Language_Fullname);
 58 | 
 59 |     Phonemizer* Phon = LoadPhonemizer(PLang,NuVoice->GetInfo().LangType);
 60 |     ESpeakPhonemizer* ENG_Phon = nullptr;
 61 | 
 62 |     if (NuVoice->GetInfo().s_eSpeakLang.size()){
 63 |         ENG_Phon = LoadESpeakPhonemizer(QString::fromStdString(NuVoice->GetInfo().s_eSpeakLang));
 64 | 
 65 | 
 66 |     }
 67 | 
 68 | 
 69 |     NuVoice->AddPhonemizer(Phon,ENG_Phon);
 70 | 
 71 |     std::string NumTxtPath = QString(QCoreApplication::applicationDirPath() + "/num2txt/" +
 72 |                                      QString::fromStdString(NuVoice->GetInfo().s_Language) + ".sor").toStdString();
 73 | 
 74 |     NuVoice->LoadNumberText(NumTxtPath);
 75 | 
 76 |     Voices.push_back(NuVoice);
 77 |     Voices[Voices.size() - 1]->SetDictEntries(ManDict);
 78 |     return Voices.size() - 1;
 79 | }
 80 | 
 81 | int VoiceManager::FindVoice(const QString &inName, bool autoload)
 82 | {
 83 |     for (size_t i = 0; i < Voices.size();i++)
 84 |     {
 85 |         if (Voices[i]->Name == inName.toStdString())
 86 |             return (int)i;
 87 | 
 88 | 
 89 | 
 90 | 
 91 |     }
 92 | 
 93 |     if (autoload)
 94 |         return (int)LoadVoice(inName);
 95 |     else
 96 |         return -1;
 97 | 
 98 | 
 99 | }
100 | 
101 | Voice *VoiceManager::operator[](size_t in)
102 | {
103 | 
104 |     return Voices[in];
105 | 
106 | }
107 | 
108 | void VoiceManager::SetDict(const std::vector<DictEntry> &InDict)
109 | {
110 |     ManDict = InDict;
111 | 
112 | }
113 | 
114 | VoiceManager::VoiceManager()
115 | {
116 | 
117 | }
118 | 
119 | VoiceManager::~VoiceManager()
120 | {
121 | 
122 |     for (Phonemizer* Phni : Phonemizers)
123 |     {
124 |         SAFE_DELETE(Phni)
125 | 
126 | 
127 |     }
128 |     for (Voice* Vo : Voices)
129 |     {
130 | 
131 |         SAFE_DELETE(Vo)
132 | 
133 |     }
134 | 
135 |     Voices.clear();
136 |     Phonemizers.clear();
137 | 
138 | 
139 | 
140 | }
141 | 


--------------------------------------------------------------------------------
/voicemanager.h:
--------------------------------------------------------------------------------
 1 | #ifndef VOICEMANAGER_H
 2 | #define VOICEMANAGER_H
 3 | #include "Voice.h"
 4 | #include <QString>
 5 | #include "phoneticdict.h"
 6 | #include "phonemizer.h"
 7 | class VoiceManager
 8 | {
 9 | private:
10 |     std::vector<Voice*> Voices;
11 |     std::vector<DictEntry> ManDict;
12 | 
13 |     std::vector<Phonemizer*> Phonemizers;
14 |     std::vector<ESpeakPhonemizer*> ENGPhonemizers;
15 | 
16 |     Phonemizer* LoadPhonemizer(const QString& InPhnLang, int32_t InLangNum);
17 |     ESpeakPhonemizer* LoadESpeakPhonemizer(const QString& InVoiceName);
18 | 
19 | 
20 | 
21 | public:
22 | 
23 |     // Load a voice and return index in vector
24 |     size_t LoadVoice(const QString& Voname);
25 |     // Find a voice in Voices
26 |     // Returns index in Voices vector, if not found returns -1
27 |     int FindVoice(const QString& inName, bool autoload = true);
28 | 
29 |     Voice* operator[](size_t in);
30 | 
31 |     inline std::vector<Voice*>& GetVoices(){return Voices;}
32 | 
33 |     void SetDict(const std::vector<DictEntry>& InDict);
34 | 
35 | 
36 |     VoiceManager();
37 |     ~VoiceManager();
38 | };
39 | 
40 | #endif // VOICEMANAGER_H
41 | 


--------------------------------------------------------------------------------
/voxer.cpp:
--------------------------------------------------------------------------------
  1 | #include "voxer.h"
  2 | using namespace std::chrono;
  3 | #include "r8b/r8bsrc.h"
  4 | 
  5 | float remap(float OldValue, float OldMin, float OldMax, float NewMin, float NewMax ){
  6 | 
  7 |     float NewValue = (((OldValue - OldMin) * (NewMax - NewMin)) / (OldMax - OldMin)) + NewMin;
  8 | 
  9 |     return NewValue;
 10 | 
 11 | }
 12 | 
 13 | std::vector<float> Resample(const std::vector<float>& InAudata,uint32_t SrcSampleRate,uint32_t OutSampleRate)
 14 | {
 15 |     if (SrcSampleRate == OutSampleRate)
 16 |         return InAudata;
 17 | 
 18 |     // Define the resampler
 19 | 
 20 |     int32_t SampleCount = (int32_t)InAudata.size();
 21 | 
 22 | 
 23 |     // 2.5 is a good middle-ground number for this parameter whose name I just forgot
 24 |     CR8BResampler Resampler = r8b_create((double)SrcSampleRate,(double)OutSampleRate,SampleCount,2.5,ER8BResamplerRes::r8brr24);
 25 | 
 26 |     double* OutBuff = nullptr;
 27 | 
 28 |     std::vector<double> DBuff;
 29 |     DBuff.resize(InAudata.size());
 30 | 
 31 |     // Cast input buffer to double
 32 |     for (size_t i = 0; i < InAudata.size();i++)
 33 |         DBuff[i] = (double)InAudata[i];
 34 | 
 35 |     int32_t NumSamples = r8b_process(Resampler,DBuff.data(),SampleCount,OutBuff);
 36 | 
 37 |     // Create output buffer
 38 |     std::vector<float> OutAud;
 39 |     OutAud.resize((size_t)NumSamples);
 40 | 
 41 | 
 42 |     // Re-cast to float
 43 |     for (size_t i = 0; i < (size_t)NumSamples;i++)
 44 |         OutAud[i] = (float)OutBuff[i];
 45 | 
 46 | 
 47 |     // Cleanup
 48 |     r8b_clear(Resampler);
 49 |     r8b_delete(Resampler);
 50 | 
 51 | 
 52 |     return OutAud;
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | }
 59 | 
 60 | std::vector<float> DoDenoise(const std::vector<float>& InAudata,DenoiseState* DenState)
 61 | {
 62 |  //   if (!DenState)
 63 |    //     return InAudata;
 64 | 
 65 |     std::vector<float> NewAudata(InAudata.size());
 66 |     float buf[RNNoiseFrameSize];
 67 | 
 68 |     // Find the min and max vals in the vector
 69 |     float MinVal = -1.f;
 70 |     float MaxVal = 1.f;
 71 | 
 72 |     for (size_t f = 0; f < InAudata.size();f += RNNoiseFrameSize)
 73 |     {
 74 |         //RNNoise expects a float in range [-32768.f,32768.f]
 75 |         for (size_t y = 0; y < RNNoiseFrameSize;y++)
 76 |         {
 77 |             size_t TotalIndex = f + y;
 78 | 
 79 |             if (TotalIndex > InAudata.size())
 80 |                 break;
 81 | 
 82 |             buf[y] = remap(InAudata[TotalIndex],MinVal,MaxVal,-32768.f,32768.f);
 83 | 
 84 |         }
 85 | 
 86 | 
 87 |         rnnoise_process_frame(DenState,buf,buf);
 88 | 
 89 |         for (size_t x = 0; x < RNNoiseFrameSize;x++)
 90 |         {
 91 |             size_t TotalIndex = f + x;
 92 |             if (TotalIndex > NewAudata.size())
 93 |                 break;
 94 | 
 95 |             NewAudata[TotalIndex] = remap(buf[x],-32768.f,32768.f,-1.f,1.f);
 96 | 
 97 |         }
 98 | 
 99 | 
100 | 
101 |     }
102 | 
103 | 
104 | 
105 | 
106 |     // Due to post-normalization, the audio is about 2.1x louder. Apply makeup deamplification
107 |    // for (float& f : NewAudata)
108 |      //   f *= 0.4f;
109 | 
110 |     return NewAudata;
111 | }
112 | 
113 | void Voxer::run()
114 | {
115 | 
116 | 
117 | 
118 | 
119 | 
120 |     pAttItem->setBackgroundColor(InProcessColor);
121 | 
122 | 
123 |     high_resolution_clock::time_point Start = high_resolution_clock::now();
124 |     std::vector<float> Audat;
125 | 
126 |     VoxResults Res;
127 | 
128 |     if (!ForcedAudio.size())
129 |     {
130 |         Res = pAttVoice->Vocalize(Prompt.toStdString(),Speed,SpeakerID,Energy,F0,EmotionID,EmotionOverride.toStdString());
131 |         Audat = Res.Audio;
132 | 
133 |     }
134 |     else
135 |     {
136 |         Audat = ForcedAudio;
137 | 
138 |     }
139 | 
140 | 
141 |     high_resolution_clock::time_point End = high_resolution_clock::now();
142 | 
143 | 
144 |     // Resample the audio to 48KHz
145 |     std::vector<float> AudRes = Resample(Audat,SampleRate,CommonSampleRate);
146 | 
147 | 
148 | 
149 |     DenoiseState* Denoiser = nullptr;
150 |     if (Denoise)
151 |     {
152 |         // Every thread creates its own denoiser.
153 |         // This is because a generic passed denoiser created from the main window
154 |         // worked well for the first generation but later shat itself (heavy artifacts then just silence)
155 | 
156 |         Denoiser = rnnoise_create(nullptr);
157 |         // Denoise. Function will return same vec if there is no denoiser
158 |         AudRes = DoDenoise(AudRes,Denoiser);
159 | 
160 | 
161 | 
162 | 
163 |     }
164 | 
165 |     // Apply Amplification
166 |     for (float& f : AudRes)
167 |         f *= Amplify;
168 | 
169 | 
170 | 
171 |      pAttItem->setBackgroundColor(DoneColor);
172 | 
173 | 
174 |     if (ForcedAudio.size())
175 |     {
176 |         Res.Mel.Shape.push_back(-1);
177 |         // see MakeInferDetails at batchdenoisedlg.cpp
178 |         AudRes = Resample(AudRes,CommonSampleRate,SpeakerID);
179 | 
180 | 
181 |     }
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 |     if (ExportFileName.size())
189 |     {
190 |         VoxUtil::ExportWAV(ExportFileName.toStdString(),AudRes,SpeakerID);
191 |         AudRes.clear();
192 | 
193 |         CurrentID = UINT32_MAX;
194 |     }
195 |     emit Done(AudRes,Res.Mel,duration_cast<duration<double>>(End - Start),CurrentID);
196 | 
197 | 
198 | 
199 | 
200 |     if (Res.Alignment.Data.size() > 0)
201 |         emit AttentionReady(Res.Alignment,CurrentID);
202 | 
203 |     // rnnoise_destroy throws some exception we can't do anything about
204 |     if (Denoise)
205 |     {
206 |         try {
207 |             rnnoise_destroy(Denoiser);
208 | 
209 |         } catch (...) {
210 | 
211 |         }
212 | 
213 |     }
214 | 
215 | }
216 | 
217 | Voxer::Voxer()
218 | {
219 | 
220 | }
221 | 


--------------------------------------------------------------------------------
/voxer.h:
--------------------------------------------------------------------------------
 1 | #ifndef VOXER_H
 2 | #define VOXER_H
 3 | 
 4 | #include "Voice.h"
 5 | #include <QThread>
 6 | 
 7 | #include <QListWidgetItem>
 8 | #include <chrono>
 9 | #include "rnnoise.h"
10 | 
11 | const QColor DoneColor = QColor(0,128,0);
12 | const QColor PlayingColor = QColor(168, 40, 94);
13 | const QColor InProcessColor = QColor(0,0,255);
14 | 
15 | // A Voxer is a thread spawned for the sole purpose of doing inference
16 | class Voxer : public QThread
17 | {
18 |     Q_OBJECT
19 | 
20 |     void run() override;
21 | public:
22 | 
23 |     Voice* pAttVoice;
24 |     QListWidgetItem* pAttItem;
25 |     QString Prompt;
26 |     float Speed;
27 |     float Energy;
28 |     float F0;
29 |     int32_t SpeakerID;
30 |     uint32_t SampleRate;
31 |     int32_t EmotionID;
32 |     bool Denoise;
33 |     QString EmotionOverride;
34 | 
35 |     // DANGER: If this is set, the item will not emit anything
36 |     QString ExportFileName;
37 | 
38 |     float Amplify;
39 |     Voxer();
40 | 
41 |     uint32_t CurrentID;
42 | 
43 |     std::vector<float> ForcedAudio;
44 | 
45 | 
46 | 
47 | signals:
48 |     void Done(std::vector<float> AudioData,TFTensor<float> Mel,std::chrono::duration<double> infer_span,uint32_t ID);
49 |     void AttentionReady(TFTensor<float> Att,uint32_t ID);
50 | 
51 | };
52 | 
53 | #endif // VOXER_H
54 | 


--------------------------------------------------------------------------------
/winicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZDisket/TensorVox/911c2d538d3dbfda26aa82fe5ca1109be33c2140/winicon.ico


--------------------------------------------------------------------------------