├── .gitignore
├── LICENSE
├── README.md
├── backend
    ├── __init__.py
    ├── config
    │   └── default.yaml
    ├── functional.py
    ├── hparams.py
    ├── manager.py
    ├── models.py
    ├── mytts.py
    ├── pretrained
    │   ├── ljspeech-melgan-epoch3200.pth
    │   └── ljspeech-parallel-epoch0100.pth
    ├── synthesizer.py
    └── transform.py
├── client.py
├── client2.py
├── frontend
    ├── .editorconfig
    ├── .gitignore
    ├── README.md
    ├── babel.config.js
    ├── package-lock.json
    ├── package.json
    ├── public
    │   ├── demo.wav
    │   ├── favicon.ico
    │   └── index.html
    ├── src
    │   ├── App.vue
    │   ├── assets
    │   │   ├── logo.png
    │   │   └── logo.svg
    │   ├── components
    │   │   ├── HelloWorld.vue
    │   │   └── MyParaTTS.vue
    │   ├── main.js
    │   ├── plugins
    │   │   └── vuetify.js
    │   ├── router
    │   │   └── index.js
    │   └── views
    │   │   ├── About.vue
    │   │   └── Home.vue
    └── vue.config.js
├── requirements.txt
└── server.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Projects
  2 | .vscode/
  3 | dist/
  4 | outputs/
  5 | 000-*
  6 | 
  7 | # Logs
  8 | logs
  9 | *.log
 10 | npm-debug.log*
 11 | yarn-debug.log*
 12 | yarn-error.log*
 13 | lerna-debug.log*
 14 | 
 15 | # Diagnostic reports (https://nodejs.org/api/report.html)
 16 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 17 | 
 18 | # Runtime data
 19 | pids
 20 | *.pid
 21 | *.seed
 22 | *.pid.lock
 23 | 
 24 | # Directory for instrumented libs generated by jscoverage/JSCover
 25 | lib-cov
 26 | 
 27 | # Coverage directory used by tools like istanbul
 28 | coverage
 29 | *.lcov
 30 | 
 31 | # nyc test coverage
 32 | .nyc_output
 33 | 
 34 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 35 | .grunt
 36 | 
 37 | # Bower dependency directory (https://bower.io/)
 38 | bower_components
 39 | 
 40 | # node-waf configuration
 41 | .lock-wscript
 42 | 
 43 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 44 | build/Release
 45 | 
 46 | # Dependency directories
 47 | node_modules/
 48 | jspm_packages/
 49 | 
 50 | # TypeScript v1 declaration files
 51 | typings/
 52 | 
 53 | # TypeScript cache
 54 | *.tsbuildinfo
 55 | 
 56 | # Optional npm cache directory
 57 | .npm
 58 | 
 59 | # Optional eslint cache
 60 | .eslintcache
 61 | 
 62 | # Microbundle cache
 63 | .rpt2_cache/
 64 | .rts2_cache_cjs/
 65 | .rts2_cache_es/
 66 | .rts2_cache_umd/
 67 | 
 68 | # Optional REPL history
 69 | .node_repl_history
 70 | 
 71 | # Output of 'npm pack'
 72 | *.tgz
 73 | 
 74 | # Yarn Integrity file
 75 | .yarn-integrity
 76 | 
 77 | # dotenv environment variables file
 78 | .env
 79 | .env.test
 80 | 
 81 | # parcel-bundler cache (https://parceljs.org/)
 82 | .cache
 83 | 
 84 | # Next.js build output
 85 | .next
 86 | 
 87 | # Nuxt.js build / generate output
 88 | .nuxt
 89 | dist
 90 | 
 91 | # Gatsby files
 92 | .cache/
 93 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
 94 | # https://nextjs.org/blog/next-9-1#public-directory-support
 95 | # public
 96 | 
 97 | # vuepress build output
 98 | .vuepress/dist
 99 | 
100 | # Serverless directories
101 | .serverless/
102 | 
103 | # FuseBox cache
104 | .fusebox/
105 | 
106 | # DynamoDB Local files
107 | .dynamodb/
108 | 
109 | # TernJS port file
110 | .tern-port
111 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Atomicoo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PTTS 网页演示
 2 | 
 3 | [TOC]
 4 | 
 5 | 使用 [Flask](https://github.com/pallets/flask) + [Vue](https://github.com/vuejs/vue)（框架：[Vuetify](https://github.com/vuetifyjs/vuetify)）完成的语音合成单网页演示项目，语音合成后端基于我的另一个项目 [atomicoo/ParallelTTS](https://github.com/atomicoo/ParallelTTS)。
 6 | 
 7 | ## 目录结构
 8 | 
 9 | ```
10 | .
11 | |--- backend/
12 |      |--- pretrained/  # 预训练模型
13 |      |--- mytts.py     # 封装 TTS 类
14 |      |--- ...
15 | |--- dist/             # 前端的编译输出
16 | |--- frontend/
17 |      |--- public/
18 |      |--- src/
19 |           |--- components/
20 |                |--- MyParaTTS.vue  # 语音合成页面
21 |           |--- ...
22 |      |--- ...
23 | |--- client.py         # 接口测试脚本
24 | |--- LICENSE
25 | |--- README.md         # 说明文档
26 | |--- requirements.txt  # 依赖文件
27 | |--- server.py         # 服务器端启动脚本
28 | ```
29 | 
30 | ## 快速开始
31 | 
32 | ```shell
33 | $ git clone https://github.com/atomicoo/PTTS-WebAPP.git
34 | $ cd PTTS-WebAPP/frontend/
35 | $ npm install --save
36 | $ npm run dev
37 | $ cd ..
38 | $ pip install -r requirements.txt
39 | $ python server.py
40 | $ python client.py
41 | ```
42 | 
43 | 运行 `npm run dev` 命令后，项目根目录下应该已经生成前端代码的编译输出，在 `./dist/` 目录下。
44 | 
45 | 运行 `python server.py` 命令后，服务器端已经启动，可以先试试 `python client.py` 测试一下语音合成接口是否正常。
46 | 
47 | 如果至此一切正常，那直接访问 http://localhost:5000/ 即可。
48 | 
49 | ![image-20210412175503742](https://cdn.jsdelivr.net/gh/atomicoo/picture-bed@latest/2021/04/image-20210412175503742.png)
50 | 
51 | ## 一些问题
52 | 
53 | - 语音合成后端基于我自己的另一个项目 [atomicoo/ParallelTTS](https://github.com/atomicoo/ParallelTTS)，但为了简化重构了代码结构，如果想要换成其他语言的话，理论上只需要替换掉 `./config/` 下的配置文件和 `./pretrained/` 下的模型文件即可，但没有经过完全测试，不能确保不会出现问题。
54 | - ~~目前只支持调整语速，后续会增加音量和语调的调整，如果有大佬能帮忙搞定就更好了 [doge]。~~（已完成）
55 | - 语调的调整使用 <u>变速不变调（TSM）+ 重采样</u> 方案来完成；音量的调整使用比较简单粗暴的方式，后续会改掉。
56 | 
57 | ## 参考资料
58 | 
59 | - [Flask：Python Web 微框架](https://flask.palletsprojects.com/en/1.1.x/)
60 | - [Vuetify：Material Design 框架](https://vuetifyjs.com/zh-Hans/)
61 | - [变速不变调方法总结 - 知乎](https://zhuanlan.zhihu.com/p/337193578)


--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atomicoo/PTTS-WebAPP/ff76ebd7e44542e79dcdb9c3bc23e91199de3a54/backend/__init__.py


--------------------------------------------------------------------------------
/backend/config/default.yaml:
--------------------------------------------------------------------------------
 1 | text:
 2 |   graphemes: &gs !!python/object/apply:eval ['list("abcdefghijklmnopqrstuvwxyz")']
 3 |   phonemes: &ps !!python/object/apply:eval ['["AA0","AA1","AA2","AE0","AE1","AE2","AH0","AH1","AH2","AO0","AO1","AO2","AW0","AW1","AW2","AY0","AY1","AY2","B","CH","D","DH","EH0","EH1","EH2","ER0","ER1","ER2","EY0","EY1","EY2","F","G","HH","IH0","IH1","IH2","IY0","IY1","IY2","JH","K","L","M","N","NG","OW0","OW1","OW2","OY0","OY1","OY2","P","R","S","SH","T","TH","UH0","UH1","UH2","UW","UW0","UW1","UW2","V","W","Y","Z","ZH"]']
 4 |   specials: &sp !!python/object/apply:eval ['["<pad>", "<unk>"]']
 5 |   punctuations: &pt !!python/object/apply:eval ['[".", ",", "?", "!", " ", "-"]']
 6 |   units_list: &ul !!python/object/apply:eval ['us+sp+pt', {'us': *ps, 'sp': *sp, 'pt': *pt}]
 7 |   use_phonemes: &up true
 8 | audio:
 9 |   n_mel_channels: &nm 80
10 |   filter_length: 1024
11 |   hop_length: 256  # WARNING: this can't be changed.
12 |   win_length: 1024
13 |   sampling_rate: &sr 22050
14 |   segment_length: *sr
15 |   pad_short: 2000
16 |   mel_fmin: 80.0
17 |   mel_fmax: 7600.0
18 |   # Precomputed statistics for log-mel-spectrs for speech dataset
19 |   spec_mean: -5.522  # for LJSpeech dataset
20 |   spec_std: 2.063  # for LJSpeech dataset
21 |   spec_min: -11.5129  # for LJSpeech dataset
22 |   spec_max: 2.0584  # for LJSpeech dataset
23 |   # Others
24 |   force_frame_rate: true  # force match sampling rate
25 |   normalize: 
26 |     match_volume: false
27 |     trim_silence: false
28 |   reduction_rate: 4
29 | parallel:
30 |   ground_truth: false
31 |   out_channels: *nm  # equal to ${audio.n_mel_channels}
32 |   alphabet_size: !!python/object/apply:eval ['len(ul)', {'ul': *ul}]
33 |   channels: 128
34 |   enc_kernel_size: 4
35 |   dec_kernel_size: 4
36 |   enc_dilations: !!python/object/apply:eval ['4 * [1,2,4]   + [1]']  # receptive field is max 15
37 |   dec_dilations: !!python/object/apply:eval ['4 * [1,2,4,8] + [1]']  # receptive field is max 32
38 |   normalize: FreqNorm  # 'freq', 'layer', 'batch'
39 |   activation: torch.nn.ReLU  # 'relu', 'linear', 'sigmoid'
40 |   final_activation: torch.nn.Identity
41 |   pos_mode: 'duration'  # 'standard', 'duration'
42 |   interpolate: false  # true
43 |   separate_duration_grad: true
44 |   checkpoint: 'ljspeech-parallel-epoch0100.pth'
45 | vocoder:
46 |   checkpoint: 'ljspeech-melgan-epoch3200.pth'


--------------------------------------------------------------------------------
/backend/functional.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def mask(shape, lengths, dim=-1):
 5 | 
 6 |     assert dim != 0, 'Masking not available for batch dimension'
 7 |     assert len(lengths) == shape[0], 'Lengths must contain as many elements as there are items in the batch'
 8 | 
 9 |     lengths = torch.as_tensor(lengths)
10 | 
11 |     to_expand = [1] * (len(shape)-1)+[-1]
12 |     mask = torch.arange(shape[dim]).expand(to_expand).transpose(dim, -1).expand(shape).to(lengths.device)
13 |     mask = mask < lengths.expand(to_expand).transpose(0, -1)
14 |     return mask
15 | 
16 | 
17 | def positional_encoding(channels, length, w=1):
18 |     """The positional encoding from `Attention is all you need` paper
19 | 
20 |     :param channels: How many channels to use
21 |     :param length: 
22 |     :param w: Scaling factor
23 |     :return:
24 |     """
25 |     enc = torch.FloatTensor(length, channels)
26 |     rows = torch.arange(length, out=torch.FloatTensor())[:, None]
27 |     cols = 2 * torch.arange(channels//2, out=torch.FloatTensor())
28 | 
29 |     enc[:, 0::2] = torch.sin(w * rows / (10.0**4 ** (cols / channels)))
30 |     enc[:, 1::2] = torch.cos(w * rows / (10.0**4 ** (cols / channels)))
31 |     return enc
32 | 


--------------------------------------------------------------------------------
/backend/hparams.py:
--------------------------------------------------------------------------------
 1 | # modified from https://github.com/HarryVolek/PyTorch_Speaker_Verification
 2 | 
 3 | import os
 4 | import yaml
 5 | 
 6 | def load_hparam_str(hp_str):
 7 |     path = 'temp-restore.yaml'
 8 |     with open(path, 'w') as f:
 9 |         f.write(hp_str)
10 |     ret = HParam(path)
11 |     os.remove(path)
12 |     return ret
13 | 
14 | 
15 | def load_hparam(filename):
16 |     stream = open(filename, 'r', encoding='utf-8')
17 |     docs = yaml.load_all(stream, Loader=yaml.Loader)
18 |     hparam_dict = dict()
19 |     for doc in docs:
20 |         for k, v in doc.items():
21 |             hparam_dict[k] = v
22 |     return hparam_dict
23 | 
24 | 
25 | def merge_dict(user, default):
26 |     if isinstance(user, dict) and isinstance(default, dict):
27 |         for k, v in default.items():
28 |             if k not in user:
29 |                 user[k] = v
30 |             else:
31 |                 user[k] = merge_dict(user[k], v)
32 |     return user
33 | 
34 | 
35 | class Dotdict(dict):
36 |     """
37 |     a dictionary that supports dot notation 
38 |     as well as dictionary access notation 
39 |     usage: d = DotDict() or d = DotDict({'val1':'first'})
40 |     set attributes: d.val2 = 'second' or d['val2'] = 'second'
41 |     get attributes: d.val2 or d['val2']
42 |     """
43 |     __getattr__ = dict.__getitem__
44 |     __setattr__ = dict.__setitem__
45 |     __delattr__ = dict.__delitem__
46 | 
47 |     def __init__(self, dct=None):
48 |         dct = dict() if not dct else dct
49 |         for key, value in dct.items():
50 |             if hasattr(value, 'keys'):
51 |                 value = Dotdict(value)
52 |             self[key] = value
53 | 
54 | 
55 | class HParam(Dotdict):
56 | 
57 |     def __init__(self, file):
58 |         super(Dotdict, self).__init__()
59 |         hp_dict = load_hparam(file)
60 |         hp_dotdict = Dotdict(hp_dict)
61 |         for k, v in hp_dotdict.items():
62 |             setattr(self, k, v)
63 |             
64 |     __getattr__ = Dotdict.__getitem__
65 |     __setattr__ = Dotdict.__setitem__
66 |     __delattr__ = Dotdict.__delitem__
67 | 


--------------------------------------------------------------------------------
/backend/manager.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Aug 22 19:41:55 2017
  4 | @author: Quantum Liu
  5 | """
  6 | '''
  7 | Example:
  8 | gm=GPUManager()
  9 | with torch.cuda.device(gm.auto_choice()):
 10 |     blabla
 11 | Or:
 12 | gm=GPUManager()
 13 | torch.cuda.set_device(gm.auto_choice())
 14 | '''
 15 | 
 16 | import os
 17 | import torch
 18 | 
 19 | 
 20 | def check_gpus():
 21 |     '''
 22 |     GPU available check
 23 |     http://pytorch-cn.readthedocs.io/zh/latest/package_references/torch-cuda/
 24 |     '''
 25 |     if not torch.cuda.is_available():
 26 |         print('This script could only be used to manage NVIDIA GPUs, but not GPU found in your device!')
 27 |         return False
 28 |     elif not 'NVIDIA System Management' in os.popen('nvidia-smi -h').read():
 29 |         print("The 'nvidia-smi' tool not found.")
 30 |         return False
 31 |     return True
 32 | 
 33 | 
 34 | if check_gpus():
 35 |     def parse(line, qargs):
 36 |         '''
 37 |         line:
 38 |             a line of text
 39 |         qargs:
 40 |             query arguments
 41 |         return:
 42 |             a dict of gpu infos
 43 |         Pasing a line of csv format text returned by nvidia-smi
 44 |         解析一行nvidia-smi返回的csv格式文本
 45 |         '''
 46 |         numberic_args = ['memory.free', 'memory.total', 'power.draw', 'power.limit']#可计数的参数
 47 |         power_manage_enable = lambda v:(not 'Not Support' in v)#lambda表达式，显卡是否滋瓷power management（笔记本可能不滋瓷）
 48 |         to_numberic = lambda v:float(v.upper().strip().replace('MIB','').replace('W',''))#带单位字符串去掉单位
 49 |         process = lambda k,v:((int(to_numberic(v)) if power_manage_enable(v) else 1) if k in numberic_args else v.strip())
 50 |         return {k:process(k,v) for k,v in zip(qargs,line.strip().split(','))}
 51 |     
 52 |     def query_gpu(qargs=[]):
 53 |         '''
 54 |         qargs:
 55 |             query arguments
 56 |         return:
 57 |             a list of dict
 58 |         Querying GPUs infos
 59 |         查询GPU信息
 60 |         '''
 61 |         qargs = ['index','gpu_name', 'memory.free', 'memory.total', 'power.draw', 'power.limit']+ qargs
 62 |         cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(','.join(qargs))
 63 |         results = os.popen(cmd).readlines()
 64 |         return [parse(line,qargs) for line in results]
 65 |     
 66 |     def by_power(d):
 67 |         '''
 68 |         helper function fo sorting gpus by power
 69 |         '''
 70 |         power_infos = (d['power.draw'],d['power.limit'])
 71 |         if any(v==1 for v in power_infos):
 72 |             print('Power management unable for GPU {}'.format(d['index']))
 73 |             return 1
 74 |         return float(d['power.draw'])/d['power.limit']
 75 | 
 76 |     class GPUManager:
 77 |         '''
 78 |         qargs:
 79 |             query arguments
 80 |         A manager which can list all available GPU devices
 81 |         and sort them and choice the most free one.Unspecified 
 82 |         ones pref.
 83 |         GPU设备管理器，考虑列举出所有可用GPU设备，并加以排序，自动选出
 84 |         最空闲的设备。在一个GPUManager对象内会记录每个GPU是否已被指定，
 85 |         优先选择未指定的GPU。
 86 |         '''
 87 |         def __init__(self, qargs=[]):
 88 |             '''
 89 |             '''
 90 |             self.qargs = qargs
 91 |             self.gpus = query_gpu(qargs)
 92 |             for gpu in self.gpus:
 93 |                 gpu['specified'] = False
 94 |             self.gpu_num = len(self.gpus)
 95 |     
 96 |         def _sort_by_memory(self, gpus, by_size=False):
 97 |             if by_size:
 98 |                 print('Sorted by free memory size')
 99 |                 return sorted(gpus, key=lambda d:d['memory.free'], reverse=True)
100 |             else:
101 |                 print('Sorted by free memory rate')
102 |                 return sorted(gpus, key=lambda d:float(d['memory.free'])/d['memory.total'], reverse=True)
103 |     
104 |         def _sort_by_power(self, gpus):
105 |             return sorted(gpus, key=by_power)
106 |         
107 |         def _sort_by_custom(self, gpus, key, reverse=False, qargs=[]):
108 |             if isinstance(key, str) and (key in qargs):
109 |                 return sorted(gpus,key=lambda d:d[key],reverse=reverse)
110 |             if isinstance(key, type(lambda a:a)):
111 |                 return sorted(gpus, key=key, reverse=reverse)
112 |             raise ValueError("The argument 'key' must be a function or a key in query args,please read the documention of nvidia-smi")
113 | 
114 |         def auto_choice(self, mode=0):
115 |             '''
116 |             mode:
117 |                 0:(default)sorted by free memory size
118 |             return:
119 |                 a PT device object
120 |             Auto choice the freest GPU device, not specified ones 
121 |             自动选择最空闲GPU,返回索引
122 |             '''
123 |             for old_infos, new_infos in zip(self.gpus, query_gpu(self.qargs)):
124 |                 old_infos.update(new_infos)
125 |             unspecified_gpus = [gpu for gpu in self.gpus if not gpu['specified']] or self.gpus
126 |             
127 |             if mode==0:
128 |                 print('Choosing the GPU device has largest free memory...')
129 |                 chosen_gpu = self._sort_by_memory(unspecified_gpus, True)[0]
130 |             elif mode==1:
131 |                 print('Choosing the GPU device has highest free memory rate...')
132 |                 chosen_gpu = self._sort_by_power(unspecified_gpus)[0]
133 |             elif mode==2:
134 |                 print('Choosing the GPU device by power...')
135 |                 chosen_gpu = self._sort_by_power(unspecified_gpus)[0]
136 |             else:
137 |                 print('Given an unaviliable mode,will be chosen by memory')
138 |                 chosen_gpu = self._sort_by_memory(unspecified_gpus)[0]
139 |             chosen_gpu['specified'] = True
140 |             index = chosen_gpu['index']
141 |             print('Using GPU {i}, its info: \n\t{info}'.format(i=index,info='\n\t'.join([str(k)+': '+str(v) for k,v in chosen_gpu.items()])))
142 |             return int(index)
143 | else:
144 |     raise ImportError('GPU available check failed!')
145 | 


--------------------------------------------------------------------------------
/backend/models.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from .functional import mask, positional_encoding
  8 | from .transform import Pad
  9 | 
 10 | 
 11 | # ===============================================
 12 | # Parallel Text2Mel
 13 | # ===============================================
 14 | 
 15 | def expand_encodings(encodings, durations):
 16 |     """Expand phoneme encodings according to corresponding estimated durations
 17 | 
 18 |     Durations should be 0-masked, to prevent expanding of padded characters
 19 |     :param encodings:
 20 |     :param durations: (batch, time)
 21 |     :return:
 22 |     """
 23 |     encodings = [torch.repeat_interleave(e, d, dim=0)
 24 |                  for e, d in zip(encodings, durations.long())]
 25 | 
 26 |     return encodings
 27 | 
 28 | 
 29 | def expand_positional_encodings(durations, channels, repeat=False):
 30 |     """Expand positional encoding to align with phoneme durations
 31 | 
 32 |     Example:
 33 |         If repeat:
 34 |         phonemes a, b, c have durations 3,5,4
 35 |         The expanded encoding is
 36 |           a   a   a   b   b   b   b   b   c   c   c   c
 37 |         [e1, e2, e3, e1, e2, e3, e4, e5, e1, e2, e3, e4]
 38 | 
 39 |     Use Pad from transforms to get batched tensor.
 40 | 
 41 |     :param durations: (batch, time), 0-masked tensor
 42 |     :return: positional_encodings as list of tensors, (batch, time)
 43 |     """
 44 | 
 45 |     durations = durations.long()
 46 |     def rng(l): return list(range(l))
 47 | 
 48 |     if repeat:
 49 |         max_len = torch.max(durations)
 50 |         pe = positional_encoding(channels, max_len)
 51 |         idx = []
 52 |         for d in durations:
 53 |             idx.append(list(itertools.chain.from_iterable([rng(dd) for dd in d])))
 54 |         return [pe[i] for i in idx]
 55 |     else:
 56 |         max_len = torch.max(durations.sum(dim=-1))
 57 |         pe = positional_encoding(channels, max_len)
 58 |         return [pe[:s] for s in durations.sum(dim=-1)]
 59 | 
 60 | 
 61 | def round_and_mask(pred_durations, plen):
 62 |     pred_durations[pred_durations < 1] = 1  # we do not care about gradient outside training
 63 |     pred_durations = mask_durations(pred_durations, plen)  # the durations now expand only phonemes and not padded values
 64 |     pred_durations = torch.round(pred_durations)
 65 |     return pred_durations
 66 | 
 67 | 
 68 | def mask_durations(durations, plen):
 69 |     m = mask(durations.shape, plen, dim=-1).to(durations.device).float()
 70 |     return durations * m
 71 | 
 72 | 
 73 | def expand_enc(encodings, durations, mode=None):
 74 |     """Copy each phoneme encoding as many times as the duration predictor predicts"""
 75 |     encodings = Pad(0)(expand_encodings(encodings, durations))
 76 |     if mode:
 77 |         if mode == 'duration':
 78 |             encodings += Pad(0)(expand_positional_encodings(durations, encodings.shape[-1])).to(encodings.device)
 79 |         elif mode == 'standard':
 80 |             encodings += positional_encoding(encodings.shape[-1], encodings.shape[1]).to(encodings.device)
 81 |     return encodings
 82 | 
 83 | 
 84 | class ZeroTemporalPad(nn.ZeroPad2d):
 85 |     """Pad sequences to equal lentgh in the temporal dimension"""
 86 |     def __init__(self, kernel_size, dilation, causal=False):
 87 |         total_pad = (dilation * (kernel_size - 1))
 88 | 
 89 |         if causal:
 90 |             super(ZeroTemporalPad, self).__init__((0, 0, total_pad, 0))
 91 |         else:
 92 |             begin = total_pad // 2
 93 |             end = total_pad - begin
 94 |             super(ZeroTemporalPad, self).__init__((0, 0, begin, end))
 95 | 
 96 | 
 97 | class Conv1d(nn.Conv1d):
 98 |     """A wrapper around nn.Conv1d, that works on (batch, time, channels)"""
 99 | 
100 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, dilation=1, groups=1, bias=True, padding=0):
101 |         super(Conv1d, self).__init__(in_channels=in_channels, out_channels=out_channels,
102 |                                      kernel_size=kernel_size, stride=stride, dilation=dilation,
103 |                                      groups=groups, bias=bias, padding=padding)
104 | 
105 |     def forward(self, x):
106 |         return super().forward(x.transpose(2,1)).transpose(2,1)
107 | 
108 | 
109 | class FreqNorm(nn.BatchNorm1d):
110 |     """Normalize separately each frequency channel in spectrogram and batch,
111 | 
112 | 
113 |     Examples:
114 |         t = torch.arange(2*10*5).reshape(2, 10, 5).float()
115 |         b1 = nn.BatchNorm1d(10, affine=False, momentum=None)
116 |         b2 = (t - t.mean([0,2], keepdim=True))/torch.sqrt(t.var([0,2], unbiased=False, keepdim=True)+1e-05)
117 |         -> b1 and b2 give the same results
118 |         -> BatchNorm1D by default normalizes over channels and batch - not useful for differet length sequences
119 |         If we transpose last two dims, we get normalizaton across batch and time
120 |         -> normalization for each frequency channel over time and batch
121 | 
122 |         # compare to layer norm:
123 |         Layer_norm: (t - t.mean(-1, keepdim=True))/torch.sqrt(t.var(-1, unbiased=False, keepdim=True)+1e-05)
124 |         -> layer norm normalizes across all frequencies for each timestep independently of batch
125 | 
126 |         => LayerNorm: Normalize each freq. bin wrt to other freq bins in the same timestep -> time independent, batch independent, freq deendent
127 |         => FreqNorm: Normalize each freq. bin wrt to the same freq bin across time and batch -> time dependent, other freq independent
128 |     """
129 |     def __init__(self, channels, affine=True, track_running_stats=True, momentum=0.1):
130 |         super(FreqNorm, self).__init__(channels, affine=affine, track_running_stats=track_running_stats, momentum=momentum)
131 | 
132 |     def forward(self, x):
133 |         return super().forward(x.transpose(2,1)).transpose(2,1)
134 | 
135 | 
136 | class ResidualBlock(nn.Module):
137 |     """Implements conv->PReLU->norm n-times"""
138 | 
139 |     def __init__(self, channels, kernel_size, dilation, n=2, causal=False, norm=FreqNorm, activation=nn.ReLU):
140 |         super(ResidualBlock, self).__init__()
141 | 
142 |         self.blocks = [
143 |             nn.Sequential(
144 |                 Conv1d(channels, channels, kernel_size, dilation=dilation),
145 |                 ZeroTemporalPad(kernel_size, dilation, causal=causal),
146 |                 activation(),
147 |                 norm(channels),  # Normalize after activation. if we used ReLU, half of our neurons would be dead!
148 |             )
149 |             for i in range(n)
150 |         ]
151 | 
152 |         self.blocks = nn.Sequential(*self.blocks)
153 | 
154 |     def forward(self, x):
155 |         return x + self.blocks(x)
156 | 
157 | 
158 | class TextEncoder(nn.Module):
159 |     """Encodes input phonemes for the duration predictor and the decoder"""
160 |     def __init__(self, hp):
161 |         super(TextEncoder, self).__init__()
162 |         self.kernel_size = hp.enc_kernel_size
163 |         self.dilations = hp.enc_dilations
164 | 
165 |         self.prenet = nn.Sequential(
166 |             nn.Embedding(hp.alphabet_size, hp.channels, padding_idx=0),
167 |             Conv1d(hp.channels, hp.channels),
168 |             eval(hp.activation)(),
169 |         )
170 | 
171 |         self.res_blocks = nn.Sequential(*[
172 |             ResidualBlock(hp.channels, self.kernel_size, d, n=2, norm=eval(hp.normalize), activation=eval(hp.activation))
173 |             for d in self.dilations
174 |         ])
175 | 
176 |         self.post_net1 = nn.Sequential(
177 |             Conv1d(hp.channels, hp.channels),
178 |         )
179 | 
180 |         self.post_net2 = nn.Sequential(
181 |             eval(hp.activation)(),
182 |             eval(hp.normalize)(hp.channels),
183 |             Conv1d(hp.channels, hp.channels)
184 |         )
185 | 
186 |     def forward(self, x):
187 |         embedding = self.prenet(x)
188 |         x = self.res_blocks(embedding)
189 |         x = self.post_net1(x) + embedding
190 |         return self.post_net2(x)
191 | 
192 | 
193 | class SpecDecoder(nn.Module):
194 |     """Decodes the expanded phoneme encoding into spectrograms"""
195 |     def __init__(self, hp):
196 |         super(SpecDecoder, self).__init__()
197 |         self.kernel_size = hp.dec_kernel_size
198 |         self.dilations = hp.dec_dilations
199 | 
200 |         self.res_blocks = nn.Sequential(
201 |             *[ResidualBlock(hp.channels, self.kernel_size, d, n=2, norm=eval(hp.normalize), activation=eval(hp.activation))
202 |             for d in self.dilations],
203 |         )
204 | 
205 |         self.post_net1 = nn.Sequential(
206 |             Conv1d(hp.channels, hp.channels),
207 |         )
208 | 
209 |         self.post_net2 = nn.Sequential(
210 |             ResidualBlock(hp.channels, self.kernel_size, 1, n=2),
211 |             Conv1d(hp.channels, hp.out_channels),
212 |             eval(hp.final_activation)()
213 |         )
214 | 
215 |     def forward(self, x):
216 |         xx = self.res_blocks(x)
217 |         x = self.post_net1(xx) + x
218 |         return self.post_net2(x)
219 | 
220 | 
221 | class DurationPredictor(nn.Module):
222 |     """Predicts phoneme log durations based on the encoder outputs"""
223 |     def __init__(self, hp):
224 |         super(DurationPredictor, self).__init__()
225 | 
226 |         self.layers = nn.Sequential(
227 |             ResidualBlock(hp.channels, 4, 1, n=1, norm=eval(hp.normalize), activation=nn.ReLU),
228 |             ResidualBlock(hp.channels, 3, 1, n=1, norm=eval(hp.normalize), activation=nn.ReLU),
229 |             ResidualBlock(hp.channels, 1, 1, n=1, norm=eval(hp.normalize), activation=nn.ReLU),
230 |             Conv1d(hp.channels, 1))
231 | 
232 |     def forward(self, x):
233 |         """Outputs interpreted as log(durations)
234 |         To get actual durations, do exp transformation
235 |         :param x:
236 |         :return:
237 |         """
238 |         return self.layers(x)
239 | 
240 | 
241 | class VoiceEncoder(nn.Module):
242 |     """Reference audio encoder"""
243 |     def __init__(self, hp):
244 |         super(VoiceEncoder, self).__init__()
245 | 
246 |         # Define the network
247 |         self.lstm = nn.LSTM(hp.n_mel_channels, hp.channels, 3, batch_first=True)
248 |         self.linear = nn.Linear(hp.channels, hp.speaker_dim)
249 |         self.relu = nn.ReLU()
250 | 
251 |     def forward(self, mels):
252 |         # Pass the input through the LSTM layers and retrieve the final hidden state of the last
253 |         # layer. Apply a cutoff to 0 for negative values and L2 normalize the embeddings.
254 |         _, (hidden, _) = self.lstm(mels)
255 |         # Take only the hidden state of the last layer
256 |         embeds_raw = self.relu(self.linear(hidden[-1]))
257 |         # L2-normalize it
258 |         embeds = embeds_raw / (torch.norm(embeds_raw, dim=1, keepdim=True) + 1e-5)
259 |         return embeds
260 | 
261 | 
262 | class Interpolate(nn.Module):
263 |     """Use multihead attention to increase variability in expanded phoneme encodings
264 |     
265 |     Not used in the final model, but used in reported experiments.
266 |     """
267 |     def __init__(self, hp):
268 |         super(Interpolate, self).__init__()
269 | 
270 |         ch = hp.channels
271 |         self.att = nn.MultiheadAttention(ch, num_heads=4)
272 |         self.norm = FreqNorm(ch)
273 |         self.conv = Conv1d(ch, ch, kernel_size=1)
274 | 
275 |     def forward(self, x):
276 |         xx = x.permute(1, 0, 2)  # (batch, time, channels) -> (time, batch, channels)
277 |         xx = self.att(xx, xx, xx)[0].permute(1, 0, 2)  # (batch, time, channels)
278 |         xx = self.conv(xx)
279 |         return self.norm(xx) + x
280 | 
281 | 
282 | class ParallelText2Mel(nn.Module):
283 |     def __init__(self, hp):
284 |         """Text to melspectrogram network.
285 |         Args:
286 |             hp: hyper parameters
287 |         Input:
288 |             L: (B, N) text inputs
289 |         Outputs:
290 |             Y: (B, T, f) predicted melspectrograms
291 |         """
292 |         super(ParallelText2Mel, self).__init__()
293 |         self.hparams = hp
294 |         self.encoder = TextEncoder(hp)
295 |         self.decoder = SpecDecoder(hp)
296 |         self.duration_predictor = DurationPredictor(hp)
297 | 
298 |     def forward(self, inputs):
299 |         texts, tlens, durations, alpha = inputs
300 |         alpha = alpha or 1.0
301 | 
302 |         encodings = self.encoder(texts)  # batch, time, channels
303 |         prd_durans = self.duration_predictor(encodings.detach() if self.hparams.separate_duration_grad 
304 |                                    else encodings)[..., 0]  # batch, time
305 | 
306 |         # use exp(log(durations)) = durations
307 |         if durations is None:
308 |             prd_durans = (round_and_mask(torch.exp(prd_durans), tlens) * alpha).long()
309 |             encodings = expand_enc(encodings, prd_durans, mode='duration')
310 |         else:
311 |             encodings = expand_enc(encodings, durations, mode='duration')
312 | 
313 |         melspecs = self.decoder(encodings)
314 |         return melspecs, prd_durans
315 | 
316 | 
317 | # ===============================================
318 | # MelGAN Vocoder
319 | # ===============================================
320 | 
321 | MAX_WAV_VALUE = 32768.0
322 | 
323 | 
324 | class ResStack(nn.Module):
325 |     def __init__(self, channel):
326 |         super(ResStack, self).__init__()
327 | 
328 |         self.blocks = nn.ModuleList([
329 |             nn.Sequential(
330 |                 nn.LeakyReLU(0.2),
331 |                 nn.ReflectionPad1d(3**i),
332 |                 nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=3, dilation=3**i)),
333 |                 nn.LeakyReLU(0.2),
334 |                 nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1)),
335 |             )
336 |             for i in range(3)
337 |         ])
338 | 
339 |         self.shortcuts = nn.ModuleList([
340 |             nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1))
341 |             for i in range(3)
342 |         ])
343 | 
344 |     def forward(self, x):
345 |         for block, shortcut in zip(self.blocks, self.shortcuts):
346 |             x = shortcut(x) + block(x)
347 |         return x
348 | 
349 |     def remove_weight_norm(self):
350 |         for block, shortcut in zip(self.blocks, self.shortcuts):
351 |             nn.utils.remove_weight_norm(block[2])
352 |             nn.utils.remove_weight_norm(block[4])
353 |             nn.utils.remove_weight_norm(shortcut)
354 | 
355 | 
356 | class MelGenerator(nn.Module):
357 |     def __init__(self, mel_channel):
358 |         super(MelGenerator, self).__init__()
359 |         self.mel_channel = mel_channel
360 | 
361 |         self.generator = nn.Sequential(
362 |             nn.ReflectionPad1d(3),
363 |             nn.utils.weight_norm(nn.Conv1d(mel_channel, 512, kernel_size=7, stride=1)),
364 | 
365 |             nn.LeakyReLU(0.2),
366 |             nn.utils.weight_norm(nn.ConvTranspose1d(512, 256, kernel_size=16, stride=8, padding=4)),
367 | 
368 |             ResStack(256),
369 | 
370 |             nn.LeakyReLU(0.2),
371 |             nn.utils.weight_norm(nn.ConvTranspose1d(256, 128, kernel_size=16, stride=8, padding=4)),
372 | 
373 |             ResStack(128),
374 | 
375 |             nn.LeakyReLU(0.2),
376 |             nn.utils.weight_norm(nn.ConvTranspose1d(128, 64, kernel_size=4, stride=2, padding=1)),
377 | 
378 |             ResStack(64),
379 | 
380 |             nn.LeakyReLU(0.2),
381 |             nn.utils.weight_norm(nn.ConvTranspose1d(64, 32, kernel_size=4, stride=2, padding=1)),
382 | 
383 |             ResStack(32),
384 | 
385 |             nn.LeakyReLU(0.2),
386 |             nn.ReflectionPad1d(3),
387 |             nn.utils.weight_norm(nn.Conv1d(32, 1, kernel_size=7, stride=1)),
388 |             nn.Tanh(),
389 |         )
390 | 
391 |     def forward(self, mel):
392 |         mel = (mel + 5.0) / 5.0 # roughly normalize spectrogram
393 |         return self.generator(mel)
394 | 
395 |     def eval(self, inference=False):
396 |         super(MelGenerator, self).eval()
397 | 
398 |         # don't remove weight norm while validation in training loop
399 |         if inference:
400 |             self.remove_weight_norm()
401 | 
402 |     def remove_weight_norm(self):
403 |         for idx, layer in enumerate(self.generator):
404 |             if len(layer.state_dict()) != 0:
405 |                 try:
406 |                     nn.utils.remove_weight_norm(layer)
407 |                 except:
408 |                     layer.remove_weight_norm()
409 | 
410 |     def inference(self, mel):
411 |         hop_length = 256
412 |         # pad input mel with zeros to cut artifact
413 |         # see https://github.com/seungwonpark/melgan/issues/8
414 |         zero = torch.full((1, self.mel_channel, 10), -11.5129).to(mel.device)
415 |         mel = torch.cat((mel, zero), dim=2)
416 | 
417 |         audio = self.forward(mel)
418 |         audio = audio.squeeze() # collapse all dimension except time axis
419 |         audio = audio[:-(hop_length*10)]
420 |         audio = MAX_WAV_VALUE * audio
421 |         audio = audio.clamp(min=-MAX_WAV_VALUE, max=MAX_WAV_VALUE-1)
422 |         audio = audio.short()
423 | 
424 |         return audio
425 | 


--------------------------------------------------------------------------------
/backend/mytts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os.path as osp
 3 | import librosa
 4 | 
 5 | import torch
 6 | from .hparams import HParam
 7 | from .transform import StandardNorm, TextProcessor
 8 | from .models import MelGenerator, ParallelText2Mel
 9 | from .synthesizer import Synthesizer
10 | 
11 | try:
12 |     from .manager import GPUManager
13 | except ImportError as err:
14 |     print(err); gm = None
15 | else:
16 |     gm = GPUManager()
17 | 
18 | 
19 | def select_device(device):
20 |     cpu_request = device.lower() == 'cpu'
21 |     # if device requested other than 'cpu'
22 |     if device and not cpu_request:
23 |         c = 1024 ** 2  # bytes to MB
24 |         x = torch.cuda.get_device_properties(int(device))
25 |         s = f'Using torch {torch.__version__} '
26 |         print("%sCUDA:%s (%s, %dMB)" % (s, device, x.name, x.total_memory / c))
27 |         return torch.device(f'cuda:{device}')
28 |     else:
29 |         print(f'Using torch {torch.__version__} CPU')
30 |         return torch.device('cpu')
31 | 
32 | 
33 | class MyTTS:
34 |     def __init__(self, config=None, device=None):
35 |         if torch.cuda.is_available():
36 |             index = device if device else str(0 if gm is None else gm.auto_choice())
37 |         else:
38 |             index = 'cpu'
39 |         self.device = device = select_device(index)
40 | 
41 |         self.hparams = hparams = HParam(config) \
42 |             if config else HParam(osp.join(osp.dirname(osp.abspath(__file__)), "config", "default.yaml"))
43 | 
44 |         checkpoint = osp.join(osp.dirname(osp.abspath(__file__)), "pretrained", hparams.parallel.checkpoint)
45 |         vocoder_checkpoint = osp.join(osp.dirname(osp.abspath(__file__)), "pretrained", hparams.vocoder.checkpoint)
46 | 
47 |         normalizer = StandardNorm(hparams.audio.spec_mean, hparams.audio.spec_std)
48 |         processor = TextProcessor(hparams.text)
49 |         text2mel = ParallelText2Mel(hparams.parallel)
50 |         text2mel.eval()
51 |         vocoder = MelGenerator(hparams.audio.n_mel_channels).to(device)
52 |         vocoder.eval(inference=True)
53 | 
54 |         self.synthesizer = Synthesizer(
55 |             model=text2mel,
56 |             checkpoint=checkpoint,
57 |             vocoder=vocoder,
58 |             vocoder_checkpoint=vocoder_checkpoint,
59 |             processor=processor,
60 |             normalizer=normalizer,
61 |             device=device
62 |         )
63 | 
64 |     def __call__(self, texts, speed, volume, tone):
65 |         rate = int(tone) / 3
66 |         alpha = (4 / int(speed)) * rate
67 |         beta = int(volume) / 3
68 |         wave = self.synthesizer.inference(texts, alpha=alpha, beta=beta)
69 |         wave = wave.cpu().detach().numpy()
70 |         sr = self.hparams.audio.sampling_rate
71 |         # use TSM + resample to change tone
72 |         wave = librosa.core.resample(wave, int(sr*rate), sr)
73 |         return wave, sr
74 | 
75 | 


--------------------------------------------------------------------------------
/backend/pretrained/ljspeech-melgan-epoch3200.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atomicoo/PTTS-WebAPP/ff76ebd7e44542e79dcdb9c3bc23e91199de3a54/backend/pretrained/ljspeech-melgan-epoch3200.pth


--------------------------------------------------------------------------------
/backend/pretrained/ljspeech-parallel-epoch0100.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atomicoo/PTTS-WebAPP/ff76ebd7e44542e79dcdb9c3bc23e91199de3a54/backend/pretrained/ljspeech-parallel-epoch0100.pth


--------------------------------------------------------------------------------
/backend/synthesizer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from .functional import mask
 7 | 
 8 | 
 9 | class Synthesizer:
10 |     def __init__(self, 
11 |                  model=None, checkpoint=None, 
12 |                  vocoder=None, vocoder_checkpoint=None, 
13 |                  processor=None, normalizer=None, 
14 |                  device='cuda'):
15 |         # model
16 |         self.model = model
17 |         self.vocoder = vocoder
18 |         self.processor = processor
19 |         self.normalizer = normalizer
20 | 
21 |         # device
22 |         self.device = device
23 |         self.model.to(self.device)
24 |         print(f'Model sent to {self.device}')
25 | 
26 |         # helper vars
27 |         self.checkpoint = None
28 |         self.epoch, self.step = 0, 0
29 |         if checkpoint is not None:
30 |             self.checkpoint = checkpoint
31 |             self.load_checkpoint(checkpoint)
32 | 
33 |         self.vocoder_checkpoint = None
34 |         if vocoder_checkpoint is not None:
35 |             self.vocoder_checkpoint = vocoder_checkpoint
36 |             self.load_voc_checkpoint(vocoder_checkpoint)
37 | 
38 |     def to_device(self, device):
39 |         print(f'Sending network to {device}')
40 |         self.device = device
41 |         self.model.to(device)
42 |         self.vocoder.to(device)
43 |         return self
44 | 
45 |     def load_checkpoint(self, checkpoint):
46 |         checkpoint = torch.load(checkpoint, map_location=self.device)
47 |         self.epoch = checkpoint['epoch']
48 |         self.step = checkpoint['step']
49 |         self.model.load_state_dict(checkpoint['state_dict'])
50 |         print("Loaded checkpoint (e=%d s=%d) finished" % (self.epoch, self.step))
51 | 
52 |         self.checkpoint = None  # prevent overriding old checkpoint
53 |         return self
54 | 
55 |     def load_voc_checkpoint(self, checkpoint):
56 |         checkpoint = torch.load(checkpoint, map_location=self.device)
57 |         self.vocoder.load_state_dict(checkpoint)
58 |         print("Loaded melgan checkpoint finished")
59 | 
60 |     def inference(self, texts, alpha=1.0, beta=1.0):
61 |         print('Synthesizing...')
62 |         since = time.time()
63 |         texts, tlens = self.processor(texts)
64 |         texts = torch.from_numpy(texts).long().to(self.device)
65 |         texts = torch.cat((texts, torch.zeros(len(texts), 7).long().to(self.device)), dim=-1)
66 |         tlens = torch.Tensor(tlens).to(self.device)
67 |         with torch.no_grad():
68 |             melspecs, prd_durans = self.model((texts, tlens, None, alpha))
69 |         melspecs = self.normalizer.inverse(melspecs * beta)
70 |         msk = mask(melspecs.shape, prd_durans.sum(dim=-1).long(), dim=1).to(self.device)
71 |         melspecs = melspecs.masked_fill(~msk, -11.5129).permute(0, 2, 1)
72 |         melspecs = torch.cat((melspecs, -11.5129*torch.ones(len(melspecs), melspecs.size(1), 3).to(self.device)), dim=-1)
73 |         print(f"Inference {len(texts)} spectrograms, total elapsed {time.time()-since:.3f}s. Done.")
74 |         waves = self.vocoder(melspecs).squeeze(1)
75 |         print(f"Generate {len(texts)} audios, total elapsed {time.time()-since:.3f}s. Done.")
76 |         return waves
77 | 


--------------------------------------------------------------------------------
/backend/transform.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import numpy as np
 3 | from g2p_en import G2p
 4 | 
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | from torch import as_tensor, stack
 8 | 
 9 | 
10 | class Pad:
11 |     """Pad all tensors in first (length) dimension"""
12 | 
13 |     def __init__(self, pad_value=0, get_lens=False):
14 |         self.pad_value = pad_value
15 |         self.get_lens = get_lens
16 | 
17 |     def __call__(self, x):
18 |         """Pad each tensor in x to the same length
19 | 
20 |         Pad tensors in the first dimension and stack them to form a batch
21 | 
22 |         :param x: list of tensors/lists/arrays
23 |         :returns batch: (len_x, max_len_x, ...)
24 |         """
25 | 
26 |         if self.get_lens:
27 |             return self.pad_batch(x, self.pad_value), [len(xx) for xx in x]
28 | 
29 |         return self.pad_batch(x, self.pad_value)
30 | 
31 |     @staticmethod
32 |     def pad_batch(items, pad_value=0):
33 |         max_len = len(max(items, key=lambda x: len(x)))
34 |         zeros = (2*as_tensor(items[0]).ndim -1) * [pad_value]
35 |         return stack([F.pad(as_tensor(x), pad= zeros + [max_len - len(x)], value=pad_value)
36 |                       for x in items])
37 | 
38 | 
39 | class StandardNorm(nn.Module):
40 |     def __init__(self, mean, std):
41 |         super(StandardNorm, self).__init__()
42 |         self.mean = mean
43 |         self.std = std
44 | 
45 |     def forward(self, x):
46 |         return (x - self.mean)/self.std
47 | 
48 |     def inverse(self, x):
49 |         return x * self.std + self.mean
50 | 
51 | 
52 | 
53 | class TextProcessor:
54 | 
55 |     g2p = G2p()
56 | 
57 |     def __init__(self, hparams):
58 |         self.units = self.graphemes = hparams.graphemes
59 |         self.phonemes = hparams.phonemes
60 |         self.phonemize = hparams.use_phonemes
61 |         if self.phonemize:
62 |             self.units = self.phonemes
63 |         self.specials = hparams.specials
64 |         self.punctuations = hparams.punctuations
65 |         self.units = self.specials + self.units + self.punctuations
66 |         self.txt2idx = {txt: idx for idx, txt in enumerate(self.units)}
67 |         self.idx2txt = {idx: txt for idx, txt in enumerate(self.units)}
68 | 
69 |     def normalize(self, text):
70 |         text = text.lower()
71 |         text = re.sub("[ ]+", " ", text)
72 |         # keep_re = "[^" + str(self.graphemes+self.punctuations) +"]"
73 |         # text = re.sub(keep_re, " ", text)  # remove
74 |         text = [ch if ch in self.graphemes+self.punctuations else ' ' for ch in text]
75 |         text = list(text)
76 |         if self.phonemize:
77 |             text = self.g2p(''.join(text))
78 |         return text
79 | 
80 |     def __call__(self, texts, max_n=None):
81 |         if not isinstance(texts, (str, list)):
82 |             raise TypeError("Inputs must be str or list(str)")
83 |         if isinstance(texts, str):
84 |             texts = [texts]
85 |         normalized_texts = [self.normalize(line) for line in texts]  # text normalization
86 |         tlens = [len(l) for l in normalized_texts]
87 |         max_n = max_n or max(tlens)
88 |         texts = np.zeros((len(normalized_texts), max_n), np.long)
89 |         for i, text in enumerate(normalized_texts):
90 |             texts[i, :len(text)] = [self.txt2idx.get(ch, 1) for ch in text]
91 |         return texts, tlens
92 | 


--------------------------------------------------------------------------------
/client.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import requests
 4 | from urllib.parse import urlencode
 5 | import json, time, uuid
 6 | 
 7 | 
 8 | url = "http://127.0.0.1:5000"
 9 | 
10 | payload = {
11 |     "speed": 4,
12 |     "volume": 4,
13 |     "tone": 4,
14 |     "text": "To install precompiled package of eSpeak NG on Linux, use standard package manager of your distribution.",
15 | }
16 | headers = {
17 |     'content-type': "application/json"
18 | }
19 | 
20 | outputs_dir = "outputs"
21 | os.makedirs(outputs_dir, exist_ok=True)
22 | 
23 | 
24 | print("="*12 + " POST TEST " + "="*12)
25 | data = json.dumps(payload)
26 | response = requests.request("POST", url+"/api/mytts", data=data, headers=headers)
27 | if response.status_code == 200:
28 |     filename = f"{time.strftime('%Y-%m-%d')}_{uuid.uuid4()}.wav"
29 |     with open(osp.join(outputs_dir, filename), "wb") as fw:
30 |         fw.write(response.content)
31 |     # print(f"Audios saved to {outputs_dir}. Done.")
32 |     print("POST TEST SUCCESSED!")
33 | else:
34 |     print("POST TEST FAILED!")
35 | 
36 | 
37 | print("="*12 + " GET  TEST " + "="*12)
38 | data = urlencode(payload)
39 | response = requests.request("GET", url+"/api/mytts?"+data, headers=headers)
40 | if response.status_code == 200:
41 |     filename = f"{time.strftime('%Y-%m-%d')}_{uuid.uuid4()}.wav"
42 |     with open(osp.join(outputs_dir, filename), "wb") as fw:
43 |         fw.write(response.content)
44 |     # print(f"Audios saved to {outputs_dir}. Done.")
45 |     print("GET TEST SUCCESSED!")
46 | else:
47 |     print("GET TEST FAILED!")
48 | 


--------------------------------------------------------------------------------
/client2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import requests
 4 | from urllib.parse import urlencode
 5 | import json, time, uuid
 6 | import numpy as np
 7 | from scipy.io.wavfile import write
 8 | 
 9 | 
10 | url = "http://127.0.0.1:5000"
11 | 
12 | payload = {
13 |     "speed": 4,
14 |     "volume": 4,
15 |     "tone": 4,
16 |     "text": "To install precompiled package of eSpeak NG on Linux, use standard package manager of your distribution.",
17 | }
18 | headers = {
19 |     'content-type': "application/json"
20 | }
21 | 
22 | outputs_dir = "outputs"
23 | os.makedirs(outputs_dir, exist_ok=True)
24 | 
25 | 
26 | print("="*12 + " POST TEST " + "="*12)
27 | data = json.dumps(payload)
28 | response = requests.request("POST", url+"/api/mytts", data=data, headers=headers)
29 | if response.status_code == 200:
30 |     content = response.content.decode('utf-8')
31 |     content = json.loads(content)
32 |     wave, sr = content['wave'], content['sr']
33 |     print('Saving audio...')
34 |     filename = osp.join(outputs_dir, f"{time.strftime('%Y-%m-%d')}_{uuid.uuid4()}.wav")
35 |     write(filename, sr, np.array(wave, dtype=np.float32))
36 |     print(f"Audios saved to {outputs_dir}. Done.")
37 |     print("POST TEST SUCCESSED!")
38 | else:
39 |     print("POST TEST FAILED!")
40 | 
41 | 
42 | print("="*12 + " GET  TEST " + "="*12)
43 | data = urlencode(payload)
44 | response = requests.request("GET", url+"/api/mytts?"+data, headers=headers)
45 | if response.status_code == 200:
46 |     content = response.content.decode('utf-8')
47 |     content = json.loads(content)
48 |     wave, sr = content['wave'], content['sr']
49 |     print('Saving audio...')
50 |     filename = osp.join(outputs_dir, f"{time.strftime('%Y-%m-%d')}_{uuid.uuid4()}.wav")
51 |     write(filename, sr, np.array(wave, dtype=np.float32))
52 |     print(f"Audios saved to {outputs_dir}. Done.")
53 |     print("GET TEST SUCCESSED!")
54 | else:
55 |     print("GET TEST FAILED!")
56 | 


--------------------------------------------------------------------------------
/frontend/.editorconfig:
--------------------------------------------------------------------------------
1 | [*.{js,jsx,ts,tsx,vue}]
2 | indent_style = space
3 | indent_size = 2
4 | trim_trailing_whitespace = true
5 | insert_final_newline = true
6 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | node_modules
 3 | /dist
 4 | 
 5 | 
 6 | # local env files
 7 | .env.local
 8 | .env.*.local
 9 | 
10 | # Log files
11 | npm-debug.log*
12 | yarn-debug.log*
13 | yarn-error.log*
14 | pnpm-debug.log*
15 | 
16 | # Editor directories and files
17 | .idea
18 | .vscode
19 | *.suo
20 | *.ntvs*
21 | *.njsproj
22 | *.sln
23 | *.sw?
24 | 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # frontend
 2 | 
 3 | ## Project setup
 4 | ```
 5 | npm install
 6 | ```
 7 | 
 8 | ### Compiles and hot-reloads for development
 9 | ```
10 | npm run serve
11 | ```
12 | 
13 | ### Compiles and minifies for production
14 | ```
15 | npm run build
16 | ```
17 | 
18 | ### Lints and fixes files
19 | ```
20 | npm run lint
21 | ```
22 | 
23 | ### Customize configuration
24 | See [Configuration Reference](https://cli.vuejs.org/config/).
25 | 


--------------------------------------------------------------------------------
/frontend/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [
3 |     '@vue/cli-plugin-babel/preset'
4 |   ]
5 | }
6 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "serve": "vue-cli-service serve",
 7 |     "build": "vue-cli-service build",
 8 |     "lint": "vue-cli-service lint"
 9 |   },
10 |   "dependencies": {
11 |     "axios": "^0.21.1",
12 |     "core-js": "^3.10.1",
13 |     "vue": "^2.6.12",
14 |     "vue-audio-native": "^0.1.41",
15 |     "vue-router": "^3.5.1",
16 |     "vuetify": "^2.4.9"
17 |   },
18 |   "devDependencies": {
19 |     "@vue/cli-plugin-babel": "^4.5.12",
20 |     "@vue/cli-plugin-eslint": "^4.5.12",
21 |     "@vue/cli-plugin-router": "^4.5.12",
22 |     "@vue/cli-service": "^4.5.12",
23 |     "@vue/eslint-config-standard": "^5.1.2",
24 |     "babel-eslint": "^10.1.0",
25 |     "eslint": "^6.8.0",
26 |     "eslint-plugin-import": "^2.22.1",
27 |     "eslint-plugin-node": "^11.1.0",
28 |     "eslint-plugin-promise": "^4.3.1",
29 |     "eslint-plugin-standard": "^4.1.0",
30 |     "eslint-plugin-vue": "^6.2.2",
31 |     "sass": "^1.32.8",
32 |     "sass-loader": "^10.1.1",
33 |     "vue-cli-plugin-vuetify": "^2.3.1",
34 |     "vue-template-compiler": "^2.6.12",
35 |     "vuetify-loader": "^1.7.2"
36 |   },
37 |   "eslintConfig": {
38 |     "root": true,
39 |     "env": {
40 |       "node": true
41 |     },
42 |     "extends": [
43 |       "plugin:vue/essential",
44 |       "@vue/standard"
45 |     ],
46 |     "parserOptions": {
47 |       "parser": "babel-eslint"
48 |     },
49 |     "rules": {}
50 |   },
51 |   "browserslist": [
52 |     "> 1%",
53 |     "last 2 versions",
54 |     "not dead"
55 |   ]
56 | }
57 | 


--------------------------------------------------------------------------------
/frontend/public/demo.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atomicoo/PTTS-WebAPP/ff76ebd7e44542e79dcdb9c3bc23e91199de3a54/frontend/public/demo.wav


--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atomicoo/PTTS-WebAPP/ff76ebd7e44542e79dcdb9c3bc23e91199de3a54/frontend/public/favicon.ico


--------------------------------------------------------------------------------
/frontend/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width,initial-scale=1.0">
 7 |     <link rel="icon" href="<%= BASE_URL %>favicon.ico">
 8 |     <title><%= htmlWebpackPlugin.options.title %></title>
 9 |     <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:100,300,400,500,700,900">
10 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@mdi/font@latest/css/materialdesignicons.min.css">
11 |   </head>
12 |   <body>
13 |     <noscript>
14 |       <strong>We're sorry but <%= htmlWebpackPlugin.options.title %> doesn't work properly without JavaScript enabled. Please enable it to continue.</strong>
15 |     </noscript>
16 |     <div id="app"></div>
17 |     <!-- built files will be auto injected -->
18 |   </body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/frontend/src/App.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <v-app>
 3 |     <v-main>
 4 |       <v-container><v-row>
 5 |         <v-col cols="12" md="8" offset-md="2">
 6 |           <MyParaTTS/>
 7 |         </v-col>
 8 |       </v-row></v-container>
 9 |     </v-main>
10 |   </v-app>
11 | </template>
12 | 
13 | <script>
14 | import MyParaTTS from './components/MyParaTTS'
15 | 
16 | export default {
17 |   name: 'App',
18 | 
19 |   components: {
20 |     MyParaTTS
21 |   },
22 | 
23 |   data: () => ({
24 |     //
25 |   })
26 | }
27 | </script>
28 | 


--------------------------------------------------------------------------------
/frontend/src/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atomicoo/PTTS-WebAPP/ff76ebd7e44542e79dcdb9c3bc23e91199de3a54/frontend/src/assets/logo.png


--------------------------------------------------------------------------------
/frontend/src/assets/logo.svg:
--------------------------------------------------------------------------------
1 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 87.5 100"><defs><style>.cls-1{fill:#1697f6;}.cls-2{fill:#7bc6ff;}.cls-3{fill:#1867c0;}.cls-4{fill:#aeddff;}</style></defs><title>Artboard 46</title><polyline class="cls-1" points="43.75 0 23.31 0 43.75 48.32"/><polygon class="cls-2" points="43.75 62.5 43.75 100 0 14.58 22.92 14.58 43.75 62.5"/><polyline class="cls-3" points="43.75 0 64.19 0 43.75 48.32"/><polygon class="cls-4" points="64.58 14.58 87.5 14.58 43.75 100 43.75 62.5 64.58 14.58"/></svg>
2 | 


--------------------------------------------------------------------------------
/frontend/src/components/HelloWorld.vue:
--------------------------------------------------------------------------------
  1 | <template>
  2 |   <v-container>
  3 |     <v-row class="text-center">
  4 |       <v-col cols="12">
  5 |         <v-img
  6 |           :src="require('../assets/logo.svg')"
  7 |           class="my-3"
  8 |           contain
  9 |           height="200"
 10 |         />
 11 |       </v-col>
 12 | 
 13 |       <v-col class="mb-4">
 14 |         <h1 class="display-2 font-weight-bold mb-3">
 15 |           Welcome to Vuetify
 16 |         </h1>
 17 | 
 18 |         <p class="subheading font-weight-regular">
 19 |           For help and collaboration with other Vuetify developers,
 20 |           <br>please join our online
 21 |           <a
 22 |             href="https://community.vuetifyjs.com"
 23 |             target="_blank"
 24 |           >Discord Community</a>
 25 |         </p>
 26 |       </v-col>
 27 | 
 28 |       <v-col
 29 |         class="mb-5"
 30 |         cols="12"
 31 |       >
 32 |         <h2 class="headline font-weight-bold mb-3">
 33 |           What's next?
 34 |         </h2>
 35 | 
 36 |         <v-row justify="center">
 37 |           <a
 38 |             v-for="(next, i) in whatsNext"
 39 |             :key="i"
 40 |             :href="next.href"
 41 |             class="subheading mx-3"
 42 |             target="_blank"
 43 |           >
 44 |             {{ next.text }}
 45 |           </a>
 46 |         </v-row>
 47 |       </v-col>
 48 | 
 49 |       <v-col
 50 |         class="mb-5"
 51 |         cols="12"
 52 |       >
 53 |         <h2 class="headline font-weight-bold mb-3">
 54 |           Important Links
 55 |         </h2>
 56 | 
 57 |         <v-row justify="center">
 58 |           <a
 59 |             v-for="(link, i) in importantLinks"
 60 |             :key="i"
 61 |             :href="link.href"
 62 |             class="subheading mx-3"
 63 |             target="_blank"
 64 |           >
 65 |             {{ link.text }}
 66 |           </a>
 67 |         </v-row>
 68 |       </v-col>
 69 | 
 70 |       <v-col
 71 |         class="mb-5"
 72 |         cols="12"
 73 |       >
 74 |         <h2 class="headline font-weight-bold mb-3">
 75 |           Ecosystem
 76 |         </h2>
 77 | 
 78 |         <v-row justify="center">
 79 |           <a
 80 |             v-for="(eco, i) in ecosystem"
 81 |             :key="i"
 82 |             :href="eco.href"
 83 |             class="subheading mx-3"
 84 |             target="_blank"
 85 |           >
 86 |             {{ eco.text }}
 87 |           </a>
 88 |         </v-row>
 89 |       </v-col>
 90 |     </v-row>
 91 |   </v-container>
 92 | </template>
 93 | 
 94 | <script>
 95 | export default {
 96 |   name: 'HelloWorld',
 97 | 
 98 |   data: () => ({
 99 |     ecosystem: [
100 |       {
101 |         text: 'vuetify-loader',
102 |         href: 'https://github.com/vuetifyjs/vuetify-loader'
103 |       },
104 |       {
105 |         text: 'github',
106 |         href: 'https://github.com/vuetifyjs/vuetify'
107 |       },
108 |       {
109 |         text: 'awesome-vuetify',
110 |         href: 'https://github.com/vuetifyjs/awesome-vuetify'
111 |       }
112 |     ],
113 |     importantLinks: [
114 |       {
115 |         text: 'Documentation',
116 |         href: 'https://vuetifyjs.com'
117 |       },
118 |       {
119 |         text: 'Chat',
120 |         href: 'https://community.vuetifyjs.com'
121 |       },
122 |       {
123 |         text: 'Made with Vuetify',
124 |         href: 'https://madewithvuejs.com/vuetify'
125 |       },
126 |       {
127 |         text: 'Twitter',
128 |         href: 'https://twitter.com/vuetifyjs'
129 |       },
130 |       {
131 |         text: 'Articles',
132 |         href: 'https://medium.com/vuetify'
133 |       }
134 |     ],
135 |     whatsNext: [
136 |       {
137 |         text: 'Explore components',
138 |         href: 'https://vuetifyjs.com/components/api-explorer'
139 |       },
140 |       {
141 |         text: 'Select a layout',
142 |         href: 'https://vuetifyjs.com/getting-started/pre-made-layouts'
143 |       },
144 |       {
145 |         text: 'Frequently Asked Questions',
146 |         href: 'https://vuetifyjs.com/getting-started/frequently-asked-questions'
147 |       }
148 |     ]
149 |   })
150 | }
151 | </script>
152 | 


--------------------------------------------------------------------------------
/frontend/src/components/MyParaTTS.vue:
--------------------------------------------------------------------------------
  1 | <template>
  2 |   <v-container>
  3 |     <v-row>
  4 |       <v-col cols="12">
  5 |         <v-card><v-card-text>
  6 |           <v-container><v-row><v-col>
  7 |             <v-textarea
  8 |               v-model="text"
  9 |               counter
 10 |               solo
 11 |               name="input-7-4"
 12 |               label="请输入英文文本"
 13 |               auto-grow
 14 |               rows="7"
 15 |             ></v-textarea>
 16 |           </v-col></v-row></v-container>
 17 | 
 18 |           <v-container><v-row><v-col>
 19 |             <v-slider
 20 |               v-model="speed"
 21 |               max="5" min="1" step="1"
 22 |               thumb-label
 23 |               thumb-size="24"
 24 |               ticks
 25 |               label="语速"
 26 |               style="height: 30px"
 27 |               class="align-center"
 28 |             ></v-slider>
 29 | 
 30 |             <v-slider
 31 |               v-model="volume"
 32 |               max="5" min="1" step="1"
 33 |               thumb-label
 34 |               thumb-size="24"
 35 |               ticks
 36 |               label="音量"
 37 |               style="height: 30px"
 38 |               class="align-center"
 39 |             ></v-slider>
 40 | 
 41 |             <v-slider
 42 |               v-model="tone"
 43 |               max="5" min="1" step="1"
 44 |               thumb-label
 45 |               thumb-size="24"
 46 |               ticks
 47 |               label="语调"
 48 |               style="height: 30px"
 49 |               class="align-center"
 50 |             ></v-slider>
 51 | 
 52 |             <v-btn block elevation="2" color="primary" @click="getAudioFromBackend">合成</v-btn>
 53 |           </v-col></v-row></v-container>
 54 | 
 55 |           <v-container><v-row><v-col>
 56 |             <vue-audio-native
 57 |               ref="MyWave"
 58 |               :url="msource"
 59 |               :show-current-time="true"
 60 |               :show-controls="false"
 61 |               :show-download="true"
 62 |               :autoplay="true"
 63 |               :wait-buffer="true"/>
 64 |           </v-col></v-row></v-container>
 65 |         </v-card-text></v-card>
 66 |       </v-col>
 67 |     </v-row>
 68 |   </v-container>
 69 | </template>
 70 | 
 71 | <script>
 72 | import axios from 'axios'
 73 | 
 74 | export default {
 75 |   data () {
 76 |     return {
 77 |       wave_url: 'demo.wav',
 78 |       speed: 4,
 79 |       volume: 3,
 80 |       tone: 3,
 81 |       text: 'Welcome to my parallel text to speech system !'
 82 |     }
 83 |   },
 84 |   methods: {
 85 |     // Ref: https://blog.csdn.net/qq_41844169/article/details/101028469
 86 |     getAudioFromBackend () {
 87 |       const path = '/api/mytts'
 88 |       axios({
 89 |         url: path,
 90 |         data: {
 91 |           speed: this.speed,
 92 |           volume: this.volume,
 93 |           tone: this.tone,
 94 |           text: this.text
 95 |         },
 96 |         method: 'post',
 97 |         responseType: 'blob'
 98 |       })
 99 |         .then(res => {
100 |           this.$refs.MyWave.url = URL.createObjectURL(res.data)
101 |         })
102 |         .catch(error => {
103 |           console.log(error)
104 |         })
105 |     }
106 |   },
107 |   created () {
108 |     this.getAudioFromBackend()
109 |   }
110 | }
111 | </script>
112 | 


--------------------------------------------------------------------------------
/frontend/src/main.js:
--------------------------------------------------------------------------------
 1 | import Vue from 'vue'
 2 | import App from './App.vue'
 3 | import router from './router'
 4 | import vuetify from './plugins/vuetify'
 5 | import vueAudioNative from 'vue-audio-native'
 6 | Vue.use(vueAudioNative)
 7 | 
 8 | Vue.config.productionTip = false
 9 | 
10 | new Vue({
11 |   router,
12 |   vuetify,
13 |   render: h => h(App)
14 | }).$mount('#app')
15 | 


--------------------------------------------------------------------------------
/frontend/src/plugins/vuetify.js:
--------------------------------------------------------------------------------
1 | import Vue from 'vue'
2 | import Vuetify from 'vuetify/lib/framework'
3 | 
4 | Vue.use(Vuetify)
5 | 
6 | export default new Vuetify({
7 | })
8 | 


--------------------------------------------------------------------------------
/frontend/src/router/index.js:
--------------------------------------------------------------------------------
 1 | import Vue from 'vue'
 2 | import VueRouter from 'vue-router'
 3 | import Home from '../views/Home.vue'
 4 | 
 5 | Vue.use(VueRouter)
 6 | 
 7 | const routes = [
 8 |   {
 9 |     path: '/',
10 |     name: 'Home',
11 |     component: Home
12 |   },
13 |   {
14 |     path: '/about',
15 |     name: 'About',
16 |     // route level code-splitting
17 |     // this generates a separate chunk (about.[hash].js) for this route
18 |     // which is lazy-loaded when the route is visited.
19 |     component: () => import(/* webpackChunkName: "about" */ '../views/About.vue')
20 |   }
21 | ]
22 | 
23 | const router = new VueRouter({
24 |   mode: 'history',
25 |   base: process.env.BASE_URL,
26 |   routes
27 | })
28 | 
29 | export default router
30 | 


--------------------------------------------------------------------------------
/frontend/src/views/About.vue:
--------------------------------------------------------------------------------
1 | <template>
2 |   <div class="about">
3 |     <h1>This is an about page</h1>
4 |   </div>
5 | </template>
6 | 


--------------------------------------------------------------------------------
/frontend/src/views/Home.vue:
--------------------------------------------------------------------------------
 1 | <template>
 2 |   <div class="home">
 3 |     <img alt="Vue logo" src="../assets/logo.png">
 4 |     <HelloWorld msg="Welcome to Your Vue.js App"/>
 5 |   </div>
 6 | </template>
 7 | 
 8 | <script>
 9 | // @ is an alias to /src
10 | import HelloWorld from '@/components/HelloWorld.vue'
11 | 
12 | export default {
13 |   name: 'Home',
14 |   components: {
15 |     HelloWorld
16 |   }
17 | }
18 | </script>
19 | 


--------------------------------------------------------------------------------
/frontend/vue.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   transpileDependencies: [
3 |     'vuetify'
4 |   ],
5 | 
6 |   outputDir: '../dist',
7 |   assetsDir: 'static'
8 | }
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | requests==2.25.0
 2 | Flask==1.1.2
 3 | Flask_Cors==3.0.9
 4 | scipy==1.5.4
 5 | numpy==1.19.2
 6 | numba==0.48.0
 7 | g2p_en==2.1.0
 8 | torch==1.5.0
 9 | PyYAML==5.4.1
10 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import requests
 3 | 
 4 | from flask import Flask, request, render_template, jsonify, send_file
 5 | from flask_cors import CORS
 6 | 
 7 | from random import randint
 8 | from backend.mytts import MyTTS
 9 | from scipy.io.wavfile import write
10 | 
11 | tts = MyTTS(device='cpu')
12 | 
13 | app = Flask(__name__,
14 |             static_folder = "./dist/static",
15 |             template_folder = "./dist")
16 | cors = CORS(app, resources={r"/api/*": {"origins": "*"}})
17 | 
18 | 
19 | @app.route('/', defaults={'path': ''})
20 | @app.route('/<path:path>')
21 | def catch_all(path):
22 |     if app.debug:
23 |         return requests.get('http://127.0.0.1:8080/{}'.format(path)).text
24 |     return render_template("index.html")
25 | 
26 | 
27 | @app.route('/api/random')
28 | def api_random():
29 |     response = {
30 |         'randomNumber': randint(1, 100)
31 |     }
32 |     return jsonify(response)
33 | 
34 | @app.route('/api/mytts', methods=['GET', 'POST'])
35 | def api_mytts():
36 |     req = request.json if request.method == 'POST' else request.args
37 |     print(req)
38 |     text, speed, volume, tone = \
39 |         req.get('text'), req.get('speed', 4), req.get('volume', 4), req.get('tone', 4)
40 |     waves, sr = tts([text], int(speed), int(volume), int(tone))
41 |     filepath = osp.join('dist', 'demo.wav')
42 |     write(filepath, sr, waves[0])
43 |     return send_file(filepath)
44 |     # return jsonify({'wave': waves[0].tolist(), 'sr': sr})
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     app.run(host='0.0.0.0', port=5000, debug=False)
49 | 


--------------------------------------------------------------------------------