├── .gitignore ├── LICENSE ├── README.md ├── WaveNet.py ├── config_128.json ├── config_64.json ├── dataset.py ├── distributed_train.py ├── distributed_util.py ├── exp ├── ch128_T200_betaT0.02 │ ├── logs │ │ └── checkpoint │ │ │ └── 1000000.pkl │ └── speeches │ │ ├── 128_200_1000k_LJ001-0001.wav │ │ ├── 128_200_1000k_LJ001-0002.wav │ │ ├── 128_200_1000k_LJ001-0003.wav │ │ ├── 128_200_1000k_LJ001-0004.wav │ │ ├── 128_200_1000k_LJ001-0005.wav │ │ ├── 128_200_1000k_LJ001-0006.wav │ │ ├── 128_200_1000k_LJ001-0007.wav │ │ ├── 128_200_1000k_LJ001-0008.wav │ │ ├── 128_200_1000k_LJ001-0009.wav │ │ ├── 128_200_1000k_LJ001-0010.wav │ │ ├── 128_200_1000k_LJ001-0011.wav │ │ ├── 128_200_1000k_LJ001-0012.wav │ │ ├── 128_200_1000k_LJ001-0013.wav │ │ ├── 128_200_1000k_LJ001-0014.wav │ │ ├── 128_200_1000k_LJ001-0015.wav │ │ └── 128_200_1000k_LJ001-0016.wav └── ch64_T50_betaT0.05 │ ├── logs │ └── checkpoint │ │ └── 1000000.pkl │ └── speeches │ ├── 64_50_1000k_LJ001-0001.wav │ ├── 64_50_1000k_LJ001-0002.wav │ ├── 64_50_1000k_LJ001-0003.wav │ ├── 64_50_1000k_LJ001-0004.wav │ ├── 64_50_1000k_LJ001-0005.wav │ ├── 64_50_1000k_LJ001-0006.wav │ ├── 64_50_1000k_LJ001-0007.wav │ ├── 64_50_1000k_LJ001-0008.wav │ ├── 64_50_1000k_LJ001-0009.wav │ ├── 64_50_1000k_LJ001-0010.wav │ ├── 64_50_1000k_LJ001-0011.wav │ ├── 64_50_1000k_LJ001-0012.wav │ ├── 64_50_1000k_LJ001-0013.wav │ ├── 64_50_1000k_LJ001-0014.wav │ ├── 64_50_1000k_LJ001-0015.wav │ └── 64_50_1000k_LJ001-0016.wav ├── inference.py ├── mel2samp.py ├── mel_spectrogram ├── LJ001-0001.wav.pt ├── LJ001-0002.wav.pt ├── LJ001-0003.wav.pt ├── LJ001-0004.wav.pt ├── LJ001-0005.wav.pt ├── LJ001-0006.wav.pt ├── LJ001-0007.wav.pt ├── LJ001-0008.wav.pt ├── LJ001-0009.wav.pt ├── LJ001-0010.wav.pt ├── LJ001-0011.wav.pt ├── LJ001-0012.wav.pt ├── LJ001-0013.wav.pt ├── LJ001-0014.wav.pt ├── LJ001-0015.wav.pt ├── LJ001-0016.wav.pt ├── LJ001-0017.wav.pt ├── LJ001-0018.wav.pt ├── LJ001-0019.wav.pt ├── LJ001-0020.wav.pt ├── LJ001-0021.wav.pt ├── LJ001-0022.wav.pt ├── LJ001-0023.wav.pt ├── LJ001-0024.wav.pt ├── LJ001-0025.wav.pt ├── LJ001-0026.wav.pt ├── LJ001-0027.wav.pt ├── LJ001-0028.wav.pt ├── LJ001-0029.wav.pt ├── LJ001-0030.wav.pt ├── LJ001-0031.wav.pt ├── LJ001-0032.wav.pt ├── LJ001-0033.wav.pt ├── LJ001-0034.wav.pt ├── LJ001-0035.wav.pt ├── LJ001-0036.wav.pt ├── LJ001-0037.wav.pt ├── LJ001-0038.wav.pt ├── LJ001-0039.wav.pt ├── LJ001-0040.wav.pt ├── LJ001-0041.wav.pt ├── LJ001-0042.wav.pt ├── LJ001-0043.wav.pt ├── LJ001-0044.wav.pt ├── LJ001-0045.wav.pt ├── LJ001-0046.wav.pt ├── LJ001-0047.wav.pt ├── LJ001-0048.wav.pt ├── LJ001-0049.wav.pt ├── LJ001-0050.wav.pt ├── LJ001-0051.wav.pt ├── LJ001-0052.wav.pt ├── LJ001-0053.wav.pt ├── LJ001-0054.wav.pt ├── LJ001-0055.wav.pt ├── LJ001-0056.wav.pt ├── LJ001-0057.wav.pt ├── LJ001-0058.wav.pt ├── LJ001-0059.wav.pt ├── LJ001-0060.wav.pt ├── LJ001-0061.wav.pt ├── LJ001-0062.wav.pt ├── LJ001-0063.wav.pt ├── LJ001-0064.wav.pt ├── LJ001-0065.wav.pt ├── LJ001-0066.wav.pt ├── LJ001-0067.wav.pt ├── LJ001-0068.wav.pt ├── LJ001-0069.wav.pt ├── LJ001-0070.wav.pt ├── LJ001-0071.wav.pt ├── LJ001-0072.wav.pt ├── LJ001-0073.wav.pt ├── LJ001-0074.wav.pt ├── LJ001-0075.wav.pt ├── LJ001-0076.wav.pt ├── LJ001-0077.wav.pt ├── LJ001-0078.wav.pt ├── LJ001-0079.wav.pt ├── LJ001-0080.wav.pt ├── LJ001-0081.wav.pt ├── LJ001-0082.wav.pt ├── LJ001-0083.wav.pt ├── LJ001-0084.wav.pt ├── LJ001-0085.wav.pt ├── LJ001-0086.wav.pt ├── LJ001-0087.wav.pt ├── LJ001-0088.wav.pt ├── LJ001-0089.wav.pt ├── LJ001-0090.wav.pt ├── LJ001-0091.wav.pt ├── LJ001-0092.wav.pt ├── LJ001-0093.wav.pt ├── LJ001-0094.wav.pt ├── LJ001-0095.wav.pt ├── LJ001-0096.wav.pt ├── LJ001-0097.wav.pt ├── LJ001-0098.wav.pt ├── LJ001-0099.wav.pt ├── LJ001-0100.wav.pt ├── LJ001-0101.wav.pt ├── LJ001-0102.wav.pt ├── LJ001-0103.wav.pt ├── LJ001-0104.wav.pt ├── LJ001-0105.wav.pt ├── LJ001-0106.wav.pt ├── LJ001-0107.wav.pt ├── LJ001-0108.wav.pt ├── LJ001-0109.wav.pt ├── LJ001-0110.wav.pt ├── LJ001-0111.wav.pt ├── LJ001-0112.wav.pt ├── LJ001-0113.wav.pt ├── LJ001-0114.wav.pt ├── LJ001-0115.wav.pt ├── LJ001-0116.wav.pt ├── LJ001-0117.wav.pt ├── LJ001-0118.wav.pt ├── LJ001-0119.wav.pt ├── LJ001-0120.wav.pt ├── LJ001-0121.wav.pt ├── LJ001-0122.wav.pt ├── LJ001-0123.wav.pt ├── LJ001-0124.wav.pt ├── LJ001-0125.wav.pt ├── LJ001-0126.wav.pt ├── LJ001-0127.wav.pt ├── LJ001-0128.wav.pt ├── LJ001-0129.wav.pt ├── LJ001-0130.wav.pt ├── LJ001-0131.wav.pt ├── LJ001-0132.wav.pt ├── LJ001-0133.wav.pt ├── LJ001-0134.wav.pt ├── LJ001-0135.wav.pt ├── LJ001-0136.wav.pt ├── LJ001-0137.wav.pt ├── LJ001-0138.wav.pt ├── LJ001-0139.wav.pt ├── LJ001-0140.wav.pt ├── LJ001-0141.wav.pt ├── LJ001-0142.wav.pt ├── LJ001-0143.wav.pt ├── LJ001-0144.wav.pt ├── LJ001-0145.wav.pt ├── LJ001-0146.wav.pt ├── LJ001-0147.wav.pt ├── LJ001-0148.wav.pt ├── LJ001-0149.wav.pt ├── LJ001-0150.wav.pt ├── LJ001-0151.wav.pt ├── LJ001-0152.wav.pt ├── LJ001-0153.wav.pt ├── LJ001-0154.wav.pt ├── LJ001-0155.wav.pt ├── LJ001-0156.wav.pt ├── LJ001-0157.wav.pt ├── LJ001-0158.wav.pt ├── LJ001-0159.wav.pt ├── LJ001-0160.wav.pt ├── LJ001-0161.wav.pt ├── LJ001-0162.wav.pt ├── LJ001-0163.wav.pt ├── LJ001-0164.wav.pt ├── LJ001-0165.wav.pt ├── LJ001-0166.wav.pt ├── LJ001-0167.wav.pt ├── LJ001-0168.wav.pt ├── LJ001-0169.wav.pt ├── LJ001-0170.wav.pt ├── LJ001-0171.wav.pt ├── LJ001-0172.wav.pt ├── LJ001-0173.wav.pt ├── LJ001-0174.wav.pt ├── LJ001-0175.wav.pt ├── LJ001-0176.wav.pt ├── LJ001-0177.wav.pt ├── LJ001-0178.wav.pt ├── LJ001-0179.wav.pt ├── LJ001-0180.wav.pt ├── LJ001-0181.wav.pt ├── LJ001-0182.wav.pt ├── LJ001-0183.wav.pt ├── LJ001-0184.wav.pt ├── LJ001-0185.wav.pt └── LJ001-0186.wav.pt ├── train.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | tensorboard/ 2 | */tensorboard/ 3 | *.log 4 | */*.log 5 | __pycache__/ 6 | */__pycache__/ 7 | *.zip 8 | */*.zip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 philsyn 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is a reimplementaion of the neural vocoder in [DIFFWAVE: A VERSATILE DIFFUSION MODEL FOR AUDIO SYNTHESIS](https://arxiv.org/pdf/2009.09761.pdf). 2 | 3 | ## Usage: 4 | 5 | - To continue training the model, run ```python distributed_train.py -c config_${channel}.json```, where ```${channel}``` can be either ```64``` or ```128```. 6 | 7 | - To retrain the model, change the parameter ```ckpt_iter``` in the corresponding ```json``` file to ```-1``` and use the above command. 8 | 9 | - To generate audio, run ```python inference.py -c config_${channel}.json -cond ${conditioner_name}```. For example, if the name of the mel spectrogram is ```LJ001-0001.wav.pt```, then ```${conditioner_name}``` is ```LJ001-0001```. Provided mel spectrograms include ```LJ001-0001``` through ```LJ001-0186```. 10 | 11 | 12 | - Note, you may need to carefully adjust some parameters in the ```json``` file, such as ```data_path``` and ```batch_size_per_gpu```. 13 | 14 | ## Pretrained models and generated samples: 15 | - [channel=64 model](https://github.com/philsyn/DiffWave-Vocoder/tree/master/exp/ch64_T50_betaT0.05/logs/checkpoint) 16 | - [channel=64 samples](https://github.com/philsyn/DiffWave-Vocoder/tree/master/exp/ch64_T50_betaT0.05/speeches) 17 | - [channel=128 model](https://github.com/philsyn/DiffWave-Vocoder/tree/master/exp/ch128_T200_betaT0.02/logs/checkpoint) 18 | - [channel=128 samples](https://github.com/philsyn/DiffWave-Vocoder/tree/master/exp/ch128_T200_betaT0.02/speeches) 19 | -------------------------------------------------------------------------------- /WaveNet.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from util import calc_diffusion_step_embedding 8 | 9 | 10 | def swish(x): 11 | return x * torch.sigmoid(x) 12 | 13 | 14 | # dilated conv layer with kaiming_normal initialization 15 | # from https://github.com/ksw0306/FloWaveNet/blob/master/modules.py 16 | class Conv(nn.Module): 17 | def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1): 18 | super(Conv, self).__init__() 19 | self.padding = dilation * (kernel_size - 1) // 2 20 | self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation, padding=self.padding) 21 | self.conv = nn.utils.weight_norm(self.conv) 22 | nn.init.kaiming_normal_(self.conv.weight) 23 | 24 | def forward(self, x): 25 | out = self.conv(x) 26 | return out 27 | 28 | 29 | # conv1x1 layer with zero initialization 30 | # from https://github.com/ksw0306/FloWaveNet/blob/master/modules.py but the scale parameter is removed 31 | class ZeroConv1d(nn.Module): 32 | def __init__(self, in_channel, out_channel): 33 | super(ZeroConv1d, self).__init__() 34 | self.conv = nn.Conv1d(in_channel, out_channel, kernel_size=1, padding=0) 35 | self.conv.weight.data.zero_() 36 | self.conv.bias.data.zero_() 37 | 38 | def forward(self, x): 39 | out = self.conv(x) 40 | return out 41 | 42 | 43 | # every residual block (named residual layer in paper) 44 | # contains one noncausal dilated conv 45 | class Residual_block(nn.Module): 46 | def __init__(self, res_channels, skip_channels, dilation, 47 | diffusion_step_embed_dim_out): 48 | super(Residual_block, self).__init__() 49 | self.res_channels = res_channels 50 | 51 | # the layer-specific fc for diffusion step embedding 52 | self.fc_t = nn.Linear(diffusion_step_embed_dim_out, self.res_channels) 53 | 54 | # dilated conv layer 55 | self.dilated_conv_layer = Conv(self.res_channels, 2 * self.res_channels, kernel_size=3, dilation=dilation) 56 | 57 | # add mel spectrogram upsampler and conditioner conv1x1 layer 58 | self.upsample_conv2d = torch.nn.ModuleList() 59 | for s in [16, 16]: 60 | conv_trans2d = torch.nn.ConvTranspose2d(1, 1, (3, 2 * s), padding=(1, s // 2), stride=(1, s)) 61 | conv_trans2d = torch.nn.utils.weight_norm(conv_trans2d) 62 | torch.nn.init.kaiming_normal_(conv_trans2d.weight) 63 | self.upsample_conv2d.append(conv_trans2d) 64 | self.mel_conv = Conv(80, 2 * self.res_channels, kernel_size=1) # 80 is mel bands 65 | 66 | # residual conv1x1 layer, connect to next residual layer 67 | self.res_conv = nn.Conv1d(res_channels, res_channels, kernel_size=1) 68 | self.res_conv = nn.utils.weight_norm(self.res_conv) 69 | nn.init.kaiming_normal_(self.res_conv.weight) 70 | 71 | # skip conv1x1 layer, add to all skip outputs through skip connections 72 | self.skip_conv = nn.Conv1d(res_channels, skip_channels, kernel_size=1) 73 | self.skip_conv = nn.utils.weight_norm(self.skip_conv) 74 | nn.init.kaiming_normal_(self.skip_conv.weight) 75 | 76 | def forward(self, input_data): 77 | x, mel_spec, diffusion_step_embed = input_data 78 | h = x 79 | B, C, L = x.shape 80 | assert C == self.res_channels 81 | 82 | # add in diffusion step embedding 83 | part_t = self.fc_t(diffusion_step_embed) 84 | part_t = part_t.view([B, self.res_channels, 1]) 85 | h += part_t 86 | 87 | # dilated conv layer 88 | h = self.dilated_conv_layer(h) 89 | 90 | # add mel spectrogram as (local) conditioner 91 | assert mel_spec is not None 92 | 93 | # Upsample spectrogram to size of audio 94 | mel_spec = torch.unsqueeze(mel_spec, dim=1) 95 | mel_spec = F.leaky_relu(self.upsample_conv2d[0](mel_spec), 0.4) 96 | mel_spec = F.leaky_relu(self.upsample_conv2d[1](mel_spec), 0.4) 97 | mel_spec = torch.squeeze(mel_spec, dim=1) 98 | 99 | assert(mel_spec.size(2) >= L) 100 | if mel_spec.size(2) > L: 101 | mel_spec = mel_spec[:, :, :L] 102 | 103 | mel_spec = self.mel_conv(mel_spec) 104 | h += mel_spec 105 | 106 | # gated-tanh nonlinearity 107 | out = torch.tanh(h[:,:self.res_channels,:]) * torch.sigmoid(h[:,self.res_channels:,:]) 108 | 109 | # residual and skip outputs 110 | res = self.res_conv(out) 111 | assert x.shape == res.shape 112 | skip = self.skip_conv(out) 113 | 114 | return (x + res) * math.sqrt(0.5), skip # normalize for training stability 115 | 116 | 117 | class Residual_group(nn.Module): 118 | def __init__(self, res_channels, skip_channels, num_res_layers, dilation_cycle, 119 | diffusion_step_embed_dim_in, 120 | diffusion_step_embed_dim_mid, 121 | diffusion_step_embed_dim_out): 122 | super(Residual_group, self).__init__() 123 | self.num_res_layers = num_res_layers 124 | self.diffusion_step_embed_dim_in = diffusion_step_embed_dim_in 125 | 126 | # the shared two fc layers for diffusion step embedding 127 | self.fc_t1 = nn.Linear(diffusion_step_embed_dim_in, diffusion_step_embed_dim_mid) 128 | self.fc_t2 = nn.Linear(diffusion_step_embed_dim_mid, diffusion_step_embed_dim_out) 129 | 130 | # stack all residual blocks with dilations 1, 2, ... , 512, ... , 1, 2, ..., 512 131 | self.residual_blocks = nn.ModuleList() 132 | for n in range(self.num_res_layers): 133 | self.residual_blocks.append(Residual_block(res_channels, skip_channels, 134 | dilation=2 ** (n % dilation_cycle), 135 | diffusion_step_embed_dim_out=diffusion_step_embed_dim_out)) 136 | 137 | def forward(self, input_data): 138 | x, mel_spectrogram, diffusion_steps = input_data 139 | 140 | # embed diffusion step t 141 | diffusion_step_embed = calc_diffusion_step_embedding(diffusion_steps, self.diffusion_step_embed_dim_in) 142 | diffusion_step_embed = swish(self.fc_t1(diffusion_step_embed)) 143 | diffusion_step_embed = swish(self.fc_t2(diffusion_step_embed)) 144 | 145 | # pass all residual layers 146 | h = x 147 | skip = 0 148 | for n in range(self.num_res_layers): 149 | h, skip_n = self.residual_blocks[n]((h, mel_spectrogram, diffusion_step_embed)) # use the output from last residual layer 150 | skip += skip_n # accumulate all skip outputs 151 | 152 | return skip * math.sqrt(1.0 / self.num_res_layers) # normalize for training stability 153 | 154 | 155 | class WaveNet_vocoder(nn.Module): 156 | def __init__(self, in_channels, res_channels, skip_channels, out_channels, 157 | num_res_layers, dilation_cycle, 158 | diffusion_step_embed_dim_in, 159 | diffusion_step_embed_dim_mid, 160 | diffusion_step_embed_dim_out): 161 | super(WaveNet_vocoder, self).__init__() 162 | 163 | # initial conv1x1 with relu 164 | self.init_conv = nn.Sequential(Conv(in_channels, res_channels, kernel_size=1), nn.ReLU()) 165 | 166 | # all residual layers 167 | self.residual_layer = Residual_group(res_channels=res_channels, 168 | skip_channels=skip_channels, 169 | num_res_layers=num_res_layers, 170 | dilation_cycle=dilation_cycle, 171 | diffusion_step_embed_dim_in=diffusion_step_embed_dim_in, 172 | diffusion_step_embed_dim_mid=diffusion_step_embed_dim_mid, 173 | diffusion_step_embed_dim_out=diffusion_step_embed_dim_out) 174 | 175 | # final conv1x1 -> relu -> zeroconv1x1 176 | self.final_conv = nn.Sequential(Conv(skip_channels, skip_channels, kernel_size=1), 177 | nn.ReLU(), 178 | ZeroConv1d(skip_channels, out_channels)) 179 | 180 | def forward(self, input_data): 181 | audio, mel_spectrogram, diffusion_steps = input_data 182 | 183 | x = audio 184 | x = self.init_conv(x) 185 | x = self.residual_layer((x, mel_spectrogram, diffusion_steps)) 186 | x = self.final_conv(x) 187 | 188 | return x 189 | -------------------------------------------------------------------------------- /config_128.json: -------------------------------------------------------------------------------- 1 | { 2 | "diffusion_config":{ 3 | "T": 200, 4 | "beta_0": 0.0001, 5 | "beta_T": 0.02 6 | }, 7 | "wavenet_config": { 8 | "in_channels": 1, 9 | "res_channels": 128, 10 | "skip_channels": 128, 11 | "out_channels": 1, 12 | "num_res_layers": 30, 13 | "dilation_cycle": 10, 14 | "diffusion_step_embed_dim_in": 128, 15 | "diffusion_step_embed_dim_mid": 512, 16 | "diffusion_step_embed_dim_out": 512 17 | }, 18 | "train_config": { 19 | "output_directory": "logs/checkpoint", 20 | "tensorboard_directory": "logs/tensorboard", 21 | "ckpt_iter": "max", 22 | "iters_per_ckpt": 10000, 23 | "iters_per_logging": 100, 24 | "n_iters": 1000001, 25 | "learning_rate": 2e-4, 26 | "batch_size_per_gpu": 2 27 | }, 28 | "trainset_config": { 29 | "segment_length": 16000, 30 | "data_path": "/tmp2/LJSpeech-1.1/train", 31 | "valid": false, 32 | "sampling_rate": 22050, 33 | "filter_length": 1024, 34 | "hop_length": 256, 35 | "win_length": 1024, 36 | "mel_fmin": 0.0, 37 | "mel_fmax": 8000.0 38 | }, 39 | "gen_config":{ 40 | "tensorboard_directory": "logs/tensorboard", 41 | "mel_path": "./mel_spectrogram", 42 | "output_directory": "speeches", 43 | "ckpt_path": "logs/checkpoint" 44 | }, 45 | "dist_config": { 46 | "dist_backend": "nccl", 47 | "dist_url": "tcp://localhost:54321" 48 | } 49 | } -------------------------------------------------------------------------------- /config_64.json: -------------------------------------------------------------------------------- 1 | { 2 | "diffusion_config":{ 3 | "T": 50, 4 | "beta_0": 0.0001, 5 | "beta_T": 0.05 6 | }, 7 | "wavenet_config": { 8 | "in_channels": 1, 9 | "res_channels": 64, 10 | "skip_channels": 64, 11 | "out_channels": 1, 12 | "num_res_layers": 30, 13 | "dilation_cycle": 10, 14 | "diffusion_step_embed_dim_in": 128, 15 | "diffusion_step_embed_dim_mid": 512, 16 | "diffusion_step_embed_dim_out": 512 17 | }, 18 | "train_config": { 19 | "output_directory": "logs/checkpoint", 20 | "tensorboard_directory": "logs/tensorboard", 21 | "ckpt_iter": "max", 22 | "iters_per_ckpt": 10000, 23 | "iters_per_logging": 100, 24 | "n_iters": 1000001, 25 | "learning_rate": 2e-4, 26 | "batch_size_per_gpu": 2 27 | }, 28 | "trainset_config": { 29 | "segment_length": 16000, 30 | "data_path": "/tmp2/LJSpeech-1.1/train", 31 | "valid": false, 32 | "sampling_rate": 22050, 33 | "filter_length": 1024, 34 | "hop_length": 256, 35 | "win_length": 1024, 36 | "mel_fmin": 0.0, 37 | "mel_fmax": 8000.0 38 | }, 39 | "gen_config":{ 40 | "tensorboard_directory": "logs/tensorboard", 41 | "mel_path": "./mel_spectrogram", 42 | "output_directory": "speeches", 43 | "ckpt_path": "logs/checkpoint" 44 | }, 45 | "dist_config": { 46 | "dist_backend": "nccl", 47 | "dist_url": "tcp://localhost:54321" 48 | } 49 | } -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.distributed import DistributedSampler 3 | from mel2samp import Mel2Samp 4 | 5 | def load_LJSpeech(trainset_config, batch_size=4, num_gpus=1): 6 | LJSpeech_dataset = Mel2Samp(**trainset_config) 7 | 8 | # distributed sampler 9 | train_sampler = DistributedSampler(LJSpeech_dataset) if num_gpus > 1 else None 10 | 11 | trainloader = torch.utils.data.DataLoader(LJSpeech_dataset, 12 | batch_size=batch_size, 13 | sampler=train_sampler, 14 | num_workers=4, 15 | pin_memory=False, 16 | drop_last=True) 17 | return trainloader 18 | -------------------------------------------------------------------------------- /distributed_train.py: -------------------------------------------------------------------------------- 1 | # ***************************************************************************** 2 | # Adapted from https://github.com/NVIDIA/waveglow/blob/master/distributed.py 3 | # ***************************************************************************** 4 | 5 | # ***************************************************************************** 6 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 7 | # 8 | # Redistribution and use in source and binary forms, with or without 9 | # modification, are permitted provided that the following conditions are met: 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above copyright 13 | # notice, this list of conditions and the following disclaimer in the 14 | # documentation and/or other materials provided with the distribution. 15 | # * Neither the name of the NVIDIA CORPORATION nor the 16 | # names of its contributors may be used to endorse or promote products 17 | # derived from this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 23 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # *****************************************************************************\ 31 | 32 | import os 33 | import sys 34 | import time 35 | import subprocess 36 | import argparse 37 | import warnings 38 | warnings.filterwarnings("ignore") 39 | 40 | import torch 41 | 42 | from distributed_util import * 43 | 44 | 45 | def main(config, stdout_dir, args_str): 46 | args_list = ['train.py'] 47 | args_list += args_str.split(' ') if len(args_str) > 0 else [] 48 | 49 | args_list.append('--config={}'.format(config)) 50 | 51 | num_gpus = torch.cuda.device_count() 52 | args_list.append('--num_gpus={}'.format(num_gpus)) 53 | args_list.append("--group_name=group_{}".format(time.strftime("%Y_%m_%d-%H%M%S"))) 54 | 55 | if not os.path.isdir(stdout_dir): 56 | os.makedirs(stdout_dir) 57 | os.chmod(stdout_dir, 0o775) 58 | 59 | workers = [] 60 | 61 | for i in range(num_gpus): 62 | args_list[-2] = '--rank={}'.format(i) 63 | stdout = None if i == 0 else open( 64 | os.path.join(stdout_dir, "GPU_{}.log".format(i)), "w") 65 | print(args_list) 66 | p = subprocess.Popen([str(sys.executable)]+args_list, stdout=stdout) 67 | workers.append(p) 68 | 69 | for p in workers: 70 | p.wait() 71 | 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument('-c', '--config', type=str, 76 | help='JSON file for configuration') 77 | parser.add_argument('-s', '--stdout_dir', type=str, default="exp/", 78 | help='directory to save stoud logs') 79 | parser.add_argument('-a', '--args_str', type=str, default='', 80 | help='double quoted string with space separated key value pairs') 81 | 82 | args = parser.parse_args() 83 | main(args.config, args.stdout_dir, args.args_str) 84 | -------------------------------------------------------------------------------- /distributed_util.py: -------------------------------------------------------------------------------- 1 | # ***************************************************************************** 2 | # Adapted from https://github.com/NVIDIA/waveglow/blob/master/distributed.py 3 | # ***************************************************************************** 4 | 5 | # ***************************************************************************** 6 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 7 | # 8 | # Redistribution and use in source and binary forms, with or without 9 | # modification, are permitted provided that the following conditions are met: 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above copyright 13 | # notice, this list of conditions and the following disclaimer in the 14 | # documentation and/or other materials provided with the distribution. 15 | # * Neither the name of the NVIDIA CORPORATION nor the 16 | # names of its contributors may be used to endorse or promote products 17 | # derived from this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 23 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # *****************************************************************************\ 31 | 32 | import os 33 | import sys 34 | import time 35 | import subprocess 36 | import argparse 37 | import warnings 38 | warnings.filterwarnings("ignore") 39 | 40 | import torch 41 | import torch.distributed as dist 42 | from torch.autograd import Variable 43 | 44 | def reduce_tensor(tensor, num_gpus): 45 | rt = tensor.clone() 46 | dist.all_reduce(rt, op=dist.ReduceOp.SUM) 47 | rt /= num_gpus 48 | return rt 49 | 50 | def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url): 51 | assert torch.cuda.is_available(), "Distributed mode requires CUDA." 52 | print("Initializing Distributed") 53 | 54 | # Set cuda device so everything is done on the right GPU. 55 | torch.cuda.set_device(rank % torch.cuda.device_count()) 56 | 57 | # Initialize distributed communication 58 | dist.init_process_group(dist_backend, init_method=dist_url, 59 | world_size=num_gpus, rank=rank, 60 | group_name=group_name) 61 | 62 | def _flatten_dense_tensors(tensors): 63 | """Flatten dense tensors into a contiguous 1D buffer. Assume tensors are of 64 | same dense type. 65 | Since inputs are dense, the resulting tensor will be a concatenated 1D 66 | buffer. Element-wise operation on this buffer will be equivalent to 67 | operating individually. 68 | Arguments: 69 | tensors (Iterable[Tensor]): dense tensors to flatten. 70 | Returns: 71 | A contiguous 1D buffer containing input tensors. 72 | """ 73 | if len(tensors) == 1: 74 | return tensors[0].contiguous().view(-1) 75 | flat = torch.cat([t.contiguous().view(-1) for t in tensors], dim=0) 76 | return flat 77 | 78 | def _unflatten_dense_tensors(flat, tensors): 79 | """View a flat buffer using the sizes of tensors. Assume that tensors are of 80 | same dense type, and that flat is given by _flatten_dense_tensors. 81 | Arguments: 82 | flat (Tensor): flattened dense tensors to unflatten. 83 | tensors (Iterable[Tensor]): dense tensors whose sizes will be used to 84 | unflatten flat. 85 | Returns: 86 | Unflattened dense tensors with sizes same as tensors and values from 87 | flat. 88 | """ 89 | outputs = [] 90 | offset = 0 91 | for tensor in tensors: 92 | numel = tensor.numel() 93 | outputs.append(flat.narrow(0, offset, numel).view_as(tensor)) 94 | offset += numel 95 | return tuple(outputs) 96 | 97 | def apply_gradient_allreduce(module): 98 | """ 99 | Modifies existing model to do gradient allreduce, but doesn't change class 100 | so you don't need "module" 101 | """ 102 | if not hasattr(dist, '_backend'): 103 | module.warn_on_half = True 104 | else: 105 | module.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False 106 | 107 | for p in module.state_dict().values(): 108 | if not torch.is_tensor(p): 109 | continue 110 | dist.broadcast(p, 0) 111 | 112 | def allreduce_params(): 113 | if(module.needs_reduction): 114 | module.needs_reduction = False 115 | buckets = {} 116 | for param in module.parameters(): 117 | if param.requires_grad and param.grad is not None: 118 | tp = type(param.data) 119 | if tp not in buckets: 120 | buckets[tp] = [] 121 | buckets[tp].append(param) 122 | if module.warn_on_half: 123 | if torch.cuda.HalfTensor in buckets: 124 | print("WARNING: gloo dist backend for half parameters may be extremely slow." + 125 | " It is recommended to use the NCCL backend in this case. This currently requires" + 126 | "PyTorch built from top of tree master.") 127 | module.warn_on_half = False 128 | 129 | for tp in buckets: 130 | bucket = buckets[tp] 131 | grads = [param.grad.data for param in bucket] 132 | coalesced = _flatten_dense_tensors(grads) 133 | dist.all_reduce(coalesced) 134 | coalesced /= dist.get_world_size() 135 | for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)): 136 | buf.copy_(synced) 137 | 138 | for param in list(module.parameters()): 139 | def allreduce_hook(*unused): 140 | Variable._execution_engine.queue_callback(allreduce_params) 141 | if param.requires_grad: 142 | param.register_hook(allreduce_hook) 143 | dir(param) 144 | 145 | def set_needs_reduction(self, input, output): 146 | self.needs_reduction = True 147 | 148 | module.register_forward_hook(set_needs_reduction) 149 | return module 150 | 151 | -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/logs/checkpoint/1000000.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/logs/checkpoint/1000000.pkl -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0001.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0002.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0003.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0004.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0005.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0006.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0007.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0007.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0008.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0008.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0009.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0010.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0010.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0011.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0011.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0012.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0012.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0013.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0013.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0014.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0014.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0015.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0015.wav -------------------------------------------------------------------------------- /exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0016.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch128_T200_betaT0.02/speeches/128_200_1000k_LJ001-0016.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/logs/checkpoint/1000000.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/logs/checkpoint/1000000.pkl -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0001.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0002.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0003.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0004.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0005.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0006.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0007.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0007.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0008.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0008.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0009.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0010.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0010.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0011.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0011.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0012.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0012.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0013.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0013.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0014.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0014.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0015.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0015.wav -------------------------------------------------------------------------------- /exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0016.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/exp/ch64_T50_betaT0.05/speeches/64_50_1000k_LJ001-0016.wav -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import json 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | from torch.utils.tensorboard import SummaryWriter 9 | 10 | from scipy.io.wavfile import write as wavwrite 11 | from scipy.io.wavfile import read as wavread 12 | 13 | from util import rescale, find_max_epoch, print_size, sampling, calc_diffusion_hyperparams 14 | from WaveNet import WaveNet_vocoder as WaveNet 15 | 16 | def generate(output_directory, tensorboard_directory, 17 | mel_path, condition_name, 18 | ckpt_path, ckpt_iter): 19 | """ 20 | Generate audio based on ground truth mel spectrogram 21 | 22 | Parameters: 23 | output_directory (str): save generated speeches to this path 24 | tensorboard_directory (str): save tensorboard events to this path 25 | mel_path (str): ground truth mel spectrogram path 26 | condition_name (str): name of ground truth mel spectrogram to be conditioned on 27 | e.g. LJ001-0001 28 | ckpt_path (str): checkpoint path 29 | ckpt_iter (int or 'max'): the pretrained checkpoint to be loaded; 30 | automitically selects the maximum iteration if 'max' is selected 31 | """ 32 | 33 | # generate experiment (local) path 34 | local_path = "ch{}_T{}_betaT{}".format(wavenet_config["res_channels"], 35 | diffusion_config["T"], 36 | diffusion_config["beta_T"]) 37 | 38 | # Get shared output_directory ready 39 | output_directory = os.path.join('exp', local_path, output_directory) 40 | if not os.path.isdir(output_directory): 41 | os.makedirs(output_directory) 42 | os.chmod(output_directory, 0o775) 43 | print("output directory", output_directory, flush=True) 44 | 45 | # map diffusion hyperparameters to gpu 46 | for key in diffusion_hyperparams: 47 | if key is not "T": 48 | diffusion_hyperparams[key] = diffusion_hyperparams[key].cuda() 49 | 50 | # predefine model 51 | net = WaveNet(**wavenet_config).cuda() 52 | print_size(net) 53 | 54 | # load checkpoint 55 | ckpt_path = os.path.join('exp', local_path, ckpt_path) 56 | if ckpt_iter == 'max': 57 | ckpt_iter = find_max_epoch(ckpt_path) 58 | model_path = os.path.join(ckpt_path, '{}.pkl'.format(ckpt_iter)) 59 | try: 60 | checkpoint = torch.load(model_path, map_location='cpu') 61 | net.load_state_dict(checkpoint['model_state_dict']) 62 | print('Successfully loaded model at iteration {}'.format(ckpt_iter)) 63 | except: 64 | raise Exception('No valid model found') 65 | 66 | # use ground truth mel spec 67 | try: 68 | ground_truth_mel_name = os.path.join(mel_path, '{}.wav.pt'.format(condition_name)) 69 | ground_truth_mel_spectrogram = torch.load(ground_truth_mel_name).unsqueeze(0).cuda() 70 | except: 71 | raise Exception('No ground truth mel spectrogram found') 72 | audio_length = ground_truth_mel_spectrogram.shape[-1] * trainset_config["hop_length"] 73 | print('begin generating audio of length %s' % audio_length) 74 | 75 | # inference 76 | start = torch.cuda.Event(enable_timing=True) 77 | end = torch.cuda.Event(enable_timing=True) 78 | start.record() 79 | 80 | generated_audio = sampling(net, (1,1,audio_length), 81 | diffusion_hyperparams, 82 | condition=ground_truth_mel_spectrogram) 83 | 84 | end.record() 85 | torch.cuda.synchronize() 86 | print('generated {} at iteration {} in {} seconds'.format(condition_name, 87 | ckpt_iter, 88 | int(start.elapsed_time(end)/1000))) 89 | 90 | # save audio to .wav 91 | outfile = '{}_{}_{}k_{}.wav'.format(wavenet_config["res_channels"], 92 | diffusion_config["T"], 93 | ckpt_iter // 1000, 94 | condition_name) 95 | wavwrite(os.path.join(output_directory, outfile), 96 | trainset_config["sampling_rate"], 97 | generated_audio.squeeze().cpu().numpy()) 98 | 99 | # save audio to tensorboard 100 | tb = SummaryWriter(os.path.join('exp', local_path, tensorboard_directory)) 101 | tb.add_audio(tag=outfile, snd_tensor=generated_audio.squeeze(0), sample_rate=trainset_config["sampling_rate"]) 102 | tb.close() 103 | 104 | print('saved generated samples at iteration %s' % ckpt_iter) 105 | 106 | 107 | if __name__ == "__main__": 108 | parser = argparse.ArgumentParser() 109 | parser.add_argument('-c', '--config', type=str, 110 | help='JSON file for configuration') 111 | parser.add_argument('-ckpt_iter', '--ckpt_iter', default='max', 112 | help='Which checkpoint to use; assign a number or "max"') 113 | parser.add_argument('-cond', '--condition_name', type=str, 114 | help='Name of the ground truth mel spectrogram to be conditioned on') 115 | args = parser.parse_args() 116 | 117 | # Parse configs. Globals nicer in this case 118 | with open(args.config) as f: 119 | data = f.read() 120 | config = json.loads(data) 121 | gen_config = config["gen_config"] 122 | global wavenet_config 123 | wavenet_config = config["wavenet_config"] # to define wavenet 124 | global diffusion_config 125 | diffusion_config = config["diffusion_config"] # basic hyperparameters 126 | global trainset_config 127 | trainset_config = config["trainset_config"] # to read trainset configurations 128 | global diffusion_hyperparams 129 | diffusion_hyperparams = calc_diffusion_hyperparams(**diffusion_config) # dictionary of all diffusion hyperparameters 130 | 131 | torch.backends.cudnn.enabled = True 132 | torch.backends.cudnn.benchmark = True 133 | generate(**gen_config, 134 | ckpt_iter=args.ckpt_iter, 135 | condition_name=args.condition_name) 136 | -------------------------------------------------------------------------------- /mel2samp.py: -------------------------------------------------------------------------------- 1 | # ***************************************************************************** 2 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of the NVIDIA CORPORATION nor the 12 | # names of its contributors may be used to endorse or promote products 13 | # derived from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | # 26 | # *****************************************************************************\ 27 | import os 28 | import random 29 | import argparse 30 | import json 31 | import torch 32 | import torch.utils.data 33 | import sys 34 | from scipy.io.wavfile import read 35 | 36 | # We're using the audio processing from TacoTron2 to make sure it matches 37 | sys.path.insert(0, 'tacotron2') 38 | from tacotron2.layers import TacotronSTFT 39 | 40 | MAX_WAV_VALUE = 32768.0 41 | 42 | def files_to_list(data_path): 43 | """ 44 | Load all .wav files in data_path 45 | """ 46 | files = [os.path.join(data_path, f.rstrip()) for f in os.listdir(data_path) if len(f)>=4 and f[-4:]=='.wav'] 47 | return files 48 | 49 | def load_wav_to_torch(full_path): 50 | """ 51 | Loads wavdata into torch array 52 | """ 53 | sampling_rate, data = read(full_path) 54 | return torch.from_numpy(data).float(), sampling_rate 55 | 56 | 57 | class Mel2Samp(torch.utils.data.Dataset): 58 | """ 59 | This is the main class that calculates the spectrogram and returns the 60 | spectrogram, audio pair. 61 | """ 62 | def __init__(self, data_path, valid, segment_length, filter_length, 63 | hop_length, win_length, sampling_rate, mel_fmin, mel_fmax): 64 | self.audio_files = files_to_list(data_path) 65 | self.valid = valid 66 | random.seed(1234) 67 | random.shuffle(self.audio_files) 68 | self.stft = TacotronSTFT(filter_length=filter_length, 69 | hop_length=hop_length, 70 | win_length=win_length, 71 | sampling_rate=sampling_rate, 72 | mel_fmin=mel_fmin, mel_fmax=mel_fmax) 73 | self.segment_length = segment_length 74 | self.sampling_rate = sampling_rate 75 | 76 | def get_mel(self, audio): 77 | audio_norm = audio / MAX_WAV_VALUE 78 | audio_norm = audio_norm.unsqueeze(0) 79 | audio_norm = torch.autograd.Variable(audio_norm, requires_grad=False) 80 | melspec = self.stft.mel_spectrogram(audio_norm) 81 | melspec = torch.squeeze(melspec, 0) 82 | return melspec 83 | 84 | def __getitem__(self, index): 85 | # Read audio 86 | filename = self.audio_files[index] 87 | audio, sampling_rate = load_wav_to_torch(filename) 88 | if sampling_rate != self.sampling_rate: 89 | raise ValueError("{} SR doesn't match target {} SR".format( 90 | sampling_rate, self.sampling_rate)) 91 | 92 | # Take segment 93 | if self.valid: 94 | # whole audio for valid set 95 | pass 96 | else: # training 97 | if audio.size(0) >= self.segment_length: 98 | max_audio_start = audio.size(0) - self.segment_length 99 | audio_start = random.randint(0, max_audio_start) 100 | audio = audio[audio_start:audio_start+self.segment_length] 101 | else: 102 | audio = torch.nn.functional.pad(audio, (0, self.segment_length - audio.size(0)), 'constant').data 103 | 104 | mel = self.get_mel(audio) 105 | audio = audio / MAX_WAV_VALUE 106 | 107 | return (mel, audio) 108 | 109 | def __len__(self): 110 | return len(self.audio_files) 111 | 112 | # =================================================================== 113 | # Takes directory of clean audio and makes directory of spectrograms 114 | # Useful for making test sets 115 | # =================================================================== 116 | if __name__ == "__main__": 117 | # Get defaults so it can work with no Sacred 118 | parser = argparse.ArgumentParser() 119 | parser.add_argument('-f', "--filelist_path", required=True) 120 | parser.add_argument('-c', '--config', type=str, 121 | help='JSON file for configuration') 122 | parser.add_argument('-o', '--output_dir', type=str, 123 | help='Output directory') 124 | args = parser.parse_args() 125 | 126 | with open(args.config) as f: 127 | data = f.read() 128 | data_config = json.loads(data)["validset_config"] 129 | mel2samp = Mel2Samp(**data_config) 130 | 131 | filepaths = files_to_list(args.filelist_path) 132 | 133 | # Make directory if it doesn't exist 134 | if not os.path.isdir(args.output_dir): 135 | os.makedirs(args.output_dir) 136 | os.chmod(args.output_dir, 0o775) 137 | 138 | for filepath in filepaths: 139 | audio, sr = load_wav_to_torch(filepath) 140 | melspectrogram = mel2samp.get_mel(audio) 141 | filename = os.path.basename(filepath) 142 | new_filepath = args.output_dir + '/' + filename + '.pt' 143 | print(new_filepath) 144 | torch.save(melspectrogram, new_filepath) 145 | -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0001.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0001.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0002.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0002.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0003.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0003.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0004.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0004.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0005.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0005.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0006.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0006.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0007.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0007.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0008.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0008.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0009.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0009.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0010.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0010.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0011.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0011.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0012.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0012.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0013.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0013.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0014.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0014.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0015.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0015.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0016.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0016.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0017.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0017.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0018.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0018.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0019.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0019.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0020.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0020.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0021.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0021.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0022.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0022.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0023.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0023.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0024.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0024.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0025.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0025.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0026.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0026.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0027.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0027.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0028.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0028.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0029.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0029.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0030.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0030.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0031.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0031.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0032.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0032.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0033.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0033.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0034.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0034.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0035.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0035.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0036.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0036.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0037.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0037.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0038.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0038.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0039.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0039.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0040.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0040.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0041.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0041.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0042.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0042.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0043.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0043.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0044.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0044.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0045.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0045.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0046.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0046.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0047.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0047.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0048.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0048.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0049.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0049.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0050.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0050.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0051.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0051.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0052.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0052.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0053.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0053.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0054.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0054.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0055.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0055.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0056.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0056.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0057.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0057.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0058.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0058.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0059.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0059.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0060.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0060.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0061.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0061.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0062.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0062.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0063.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0063.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0064.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0064.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0065.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0065.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0066.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0066.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0067.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0067.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0068.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0068.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0069.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0069.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0070.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0070.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0071.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0071.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0072.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0072.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0073.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0073.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0074.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0074.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0075.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0075.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0076.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0076.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0077.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0077.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0078.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0078.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0079.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0079.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0080.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0080.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0081.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0081.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0082.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0082.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0083.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0083.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0084.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0084.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0085.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0085.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0086.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0086.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0087.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0087.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0088.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0088.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0089.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0089.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0090.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0090.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0091.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0091.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0092.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0092.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0093.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0093.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0094.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0094.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0095.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0095.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0096.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0096.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0097.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0097.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0098.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0098.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0099.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0099.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0100.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0100.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0101.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0101.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0102.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0102.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0103.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0103.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0104.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0104.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0105.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0105.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0106.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0106.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0107.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0107.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0108.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0108.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0109.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0109.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0110.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0110.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0111.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0111.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0112.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0112.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0113.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0113.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0114.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0114.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0115.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0115.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0116.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0116.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0117.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0117.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0118.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0118.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0119.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0119.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0120.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0120.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0121.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0121.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0122.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0122.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0123.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0123.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0124.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0124.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0125.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0125.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0126.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0126.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0127.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0127.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0128.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0128.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0129.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0129.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0130.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0130.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0131.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0131.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0132.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0132.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0133.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0133.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0134.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0134.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0135.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0135.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0136.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0136.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0137.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0137.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0138.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0138.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0139.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0139.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0140.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0140.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0141.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0141.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0142.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0142.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0143.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0143.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0144.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0144.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0145.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0145.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0146.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0146.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0147.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0147.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0148.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0148.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0149.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0149.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0150.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0150.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0151.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0151.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0152.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0152.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0153.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0153.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0154.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0154.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0155.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0155.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0156.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0156.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0157.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0157.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0158.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0158.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0159.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0159.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0160.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0160.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0161.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0161.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0162.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0162.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0163.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0163.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0164.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0164.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0165.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0165.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0166.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0166.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0167.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0167.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0168.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0168.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0169.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0169.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0170.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0170.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0171.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0171.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0172.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0172.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0173.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0173.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0174.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0174.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0175.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0175.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0176.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0176.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0177.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0177.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0178.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0178.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0179.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0179.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0180.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0180.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0181.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0181.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0182.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0182.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0183.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0183.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0184.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0184.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0185.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0185.wav.pt -------------------------------------------------------------------------------- /mel_spectrogram/LJ001-0186.wav.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philsyn/DiffWave-Vocoder/b1648aae88e87c64bd95c3bcfb56843929c6d219/mel_spectrogram/LJ001-0186.wav.pt -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import json 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | from torch.utils.tensorboard import SummaryWriter 9 | 10 | from dataset import load_LJSpeech 11 | from util import rescale, find_max_epoch, print_size 12 | from util import training_loss, calc_diffusion_hyperparams 13 | 14 | from distributed_util import init_distributed, apply_gradient_allreduce, reduce_tensor 15 | 16 | from WaveNet import WaveNet_vocoder as WaveNet 17 | 18 | 19 | def train(num_gpus, rank, group_name, output_directory, tensorboard_directory, 20 | ckpt_iter, n_iters, iters_per_ckpt, iters_per_logging, 21 | learning_rate, batch_size_per_gpu): 22 | """ 23 | Train the WaveNet model on the LJSpeech dataset 24 | 25 | Parameters: 26 | num_gpus, rank, group_name: parameters for distributed training 27 | output_directory (str): save model checkpoints to this path 28 | tensorboard_directory (str): save tensorboard events to this path 29 | ckpt_iter (int or 'max'): the pretrained checkpoint to be loaded; 30 | automitically selects the maximum iteration if 'max' is selected 31 | n_iters (int): number of iterations to train, default is 1M 32 | iters_per_ckpt (int): number of iterations to save checkpoint, 33 | default is 10k, for models with residual_channel=64 this number can be larger 34 | iters_per_logging (int): number of iterations to save training log, default is 100 35 | learning_rate (float): learning rate 36 | batch_size_per_gpu (int): batchsize per gpu, default is 2 so total batchsize is 16 with 8 gpus 37 | """ 38 | 39 | # generate experiment (local) path 40 | local_path = "ch{}_T{}_betaT{}".format(wavenet_config["res_channels"], 41 | diffusion_config["T"], 42 | diffusion_config["beta_T"]) 43 | # Create tensorboard logger. 44 | if rank == 0: 45 | tb = SummaryWriter(os.path.join('exp', local_path, tensorboard_directory)) 46 | 47 | # distributed running initialization 48 | if num_gpus > 1: 49 | init_distributed(rank, num_gpus, group_name, **dist_config) 50 | 51 | # Get shared output_directory ready 52 | output_directory = os.path.join('exp', local_path, output_directory) 53 | if rank == 0: 54 | if not os.path.isdir(output_directory): 55 | os.makedirs(output_directory) 56 | os.chmod(output_directory, 0o775) 57 | print("output directory", output_directory, flush=True) 58 | 59 | # map diffusion hyperparameters to gpu 60 | for key in diffusion_hyperparams: 61 | if key is not "T": 62 | diffusion_hyperparams[key] = diffusion_hyperparams[key].cuda() 63 | 64 | # load training data 65 | trainloader = load_LJSpeech(trainset_config=trainset_config, 66 | batch_size=batch_size_per_gpu, 67 | num_gpus=num_gpus) 68 | print('Data loaded') 69 | 70 | # predefine model 71 | net = WaveNet(**wavenet_config).cuda() 72 | print_size(net) 73 | 74 | # apply gradient all reduce 75 | if num_gpus > 1: 76 | net = apply_gradient_allreduce(net) 77 | 78 | # define optimizer 79 | optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) 80 | 81 | # load checkpoint 82 | if ckpt_iter == 'max': 83 | ckpt_iter = find_max_epoch(output_directory) 84 | if ckpt_iter >= 0: 85 | try: 86 | # load checkpoint file 87 | model_path = os.path.join(output_directory, '{}.pkl'.format(ckpt_iter)) 88 | checkpoint = torch.load(model_path, map_location='cpu') 89 | 90 | # feed model dict and optimizer state 91 | net.load_state_dict(checkpoint['model_state_dict']) 92 | if 'optimizer_state_dict' in checkpoint: 93 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 94 | 95 | print('Successfully loaded model at iteration {}'.format(ckpt_iter)) 96 | except: 97 | ckpt_iter = -1 98 | print('No valid checkpoint model found, start training from initialization.') 99 | else: 100 | ckpt_iter = -1 101 | print('No valid checkpoint model found, start training from initialization.') 102 | 103 | # training 104 | n_iter = ckpt_iter + 1 105 | while n_iter < n_iters + 1: 106 | for mel_spectrogram, audio in trainloader: 107 | # load audio and mel spectrogram 108 | mel_spectrogram = mel_spectrogram.cuda() 109 | audio = audio.unsqueeze(1).cuda() 110 | 111 | # back-propagation 112 | optimizer.zero_grad() 113 | X = (mel_spectrogram, audio) 114 | loss = training_loss(net, nn.MSELoss(), X, diffusion_hyperparams) 115 | if num_gpus > 1: 116 | reduced_loss = reduce_tensor(loss.data, num_gpus).item() 117 | else: 118 | reduced_loss = loss.item() 119 | loss.backward() 120 | optimizer.step() 121 | 122 | # output to log 123 | # note, only do this on the first gpu 124 | if n_iter % iters_per_logging == 0 and rank == 0: 125 | # save training loss to tensorboard 126 | print("iteration: {} \treduced loss: {} \tloss: {}".format(n_iter, reduced_loss, loss.item())) 127 | tb.add_scalar("Log-Train-Loss", torch.log(loss).item(), n_iter) 128 | tb.add_scalar("Log-Train-Reduced-Loss", np.log(reduced_loss), n_iter) 129 | 130 | # save checkpoint 131 | if n_iter > 0 and n_iter % iters_per_ckpt == 0 and rank == 0: 132 | checkpoint_name = '{}.pkl'.format(n_iter) 133 | torch.save({'model_state_dict': net.state_dict(), 134 | 'optimizer_state_dict': optimizer.state_dict()}, 135 | os.path.join(output_directory, checkpoint_name)) 136 | print('model at iteration %s is saved' % n_iter) 137 | 138 | n_iter += 1 139 | 140 | # Close TensorBoard. 141 | if rank == 0: 142 | tb.close() 143 | 144 | 145 | if __name__ == "__main__": 146 | parser = argparse.ArgumentParser() 147 | parser.add_argument('-c', '--config', type=str, default='config.json', 148 | help='JSON file for configuration') 149 | parser.add_argument('-r', '--rank', type=int, default=0, 150 | help='rank of process for distributed') 151 | parser.add_argument('-g', '--group_name', type=str, default='', 152 | help='name of group for distributed') 153 | args = parser.parse_args() 154 | 155 | # Parse configs. Globals nicer in this case 156 | with open(args.config) as f: 157 | data = f.read() 158 | config = json.loads(data) 159 | train_config = config["train_config"] # training parameters 160 | global dist_config 161 | dist_config = config["dist_config"] # to initialize distributed training 162 | global wavenet_config 163 | wavenet_config = config["wavenet_config"] # to define wavenet 164 | global diffusion_config 165 | diffusion_config = config["diffusion_config"] # basic hyperparameters 166 | global trainset_config 167 | trainset_config = config["trainset_config"] # to load trainset 168 | global diffusion_hyperparams 169 | diffusion_hyperparams = calc_diffusion_hyperparams(**diffusion_config) # dictionary of all diffusion hyperparameters 170 | 171 | num_gpus = torch.cuda.device_count() 172 | if num_gpus > 1: 173 | if args.group_name == '': 174 | print("WARNING: Multiple GPUs detected but no distributed group set") 175 | print("Only running 1 GPU. Use distributed.py for multiple GPUs") 176 | num_gpus = 1 177 | 178 | if num_gpus == 1 and args.rank != 0: 179 | raise Exception("Doing single GPU training on rank > 0") 180 | 181 | torch.backends.cudnn.enabled = True 182 | torch.backends.cudnn.benchmark = True 183 | train(num_gpus, args.rank, args.group_name, **train_config) 184 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | 5 | def flatten(v): 6 | """ 7 | Flatten a list of lists/tuples 8 | """ 9 | 10 | return [x for y in v for x in y] 11 | 12 | 13 | def rescale(x): 14 | """ 15 | Rescale a tensor to 0-1 16 | """ 17 | 18 | return (x - x.min()) / (x.max() - x.min()) 19 | 20 | 21 | def find_max_epoch(path): 22 | """ 23 | Find maximum epoch/iteration in path, formatted ${n_iter}.pkl 24 | E.g. 100000.pkl 25 | 26 | Parameters: 27 | path (str): checkpoint path 28 | 29 | Returns: 30 | maximum iteration, -1 if there is no (valid) checkpoint 31 | """ 32 | 33 | files = os.listdir(path) 34 | epoch = -1 35 | for f in files: 36 | if len(f) <= 4: 37 | continue 38 | if f[-4:] == '.pkl': 39 | try: 40 | epoch = max(epoch, int(f[:-4])) 41 | except: 42 | continue 43 | return epoch 44 | 45 | 46 | def print_size(net): 47 | """ 48 | Print the number of parameters of a network 49 | """ 50 | 51 | if net is not None and isinstance(net, torch.nn.Module): 52 | module_parameters = filter(lambda p: p.requires_grad, net.parameters()) 53 | params = sum([np.prod(p.size()) for p in module_parameters]) 54 | print("{} Parameters: {:.6f}M".format( 55 | net.__class__.__name__, params / 1e6), flush=True) 56 | 57 | 58 | # Utilities for diffusion models 59 | 60 | def std_normal(size): 61 | """ 62 | Generate the standard Gaussian variable of a certain size 63 | """ 64 | 65 | return torch.normal(0, 1, size=size).cuda() 66 | 67 | 68 | def calc_diffusion_step_embedding(diffusion_steps, diffusion_step_embed_dim_in): 69 | """ 70 | Embed a diffusion step $t$ into a higher dimensional space 71 | E.g. the embedding vector in the 128-dimensional space is 72 | [sin(t * 10^(0*4/63)), ... , sin(t * 10^(63*4/63)), cos(t * 10^(0*4/63)), ... , cos(t * 10^(63*4/63))] 73 | 74 | Parameters: 75 | diffusion_steps (torch.long tensor, shape=(batchsize, 1)): 76 | diffusion steps for batch data 77 | diffusion_step_embed_dim_in (int, default=128): 78 | dimensionality of the embedding space for discrete diffusion steps 79 | 80 | Returns: 81 | the embedding vectors (torch.tensor, shape=(batchsize, diffusion_step_embed_dim_in)): 82 | """ 83 | 84 | assert diffusion_step_embed_dim_in % 2 == 0 85 | 86 | half_dim = diffusion_step_embed_dim_in // 2 87 | _embed = np.log(10000) / (half_dim - 1) 88 | _embed = torch.exp(torch.arange(half_dim) * -_embed).cuda() 89 | _embed = diffusion_steps * _embed 90 | diffusion_step_embed = torch.cat((torch.sin(_embed), 91 | torch.cos(_embed)), 1) 92 | 93 | return diffusion_step_embed 94 | 95 | 96 | def calc_diffusion_hyperparams(T, beta_0, beta_T): 97 | """ 98 | Compute diffusion process hyperparameters 99 | 100 | Parameters: 101 | T (int): number of diffusion steps 102 | beta_0 and beta_T (float): beta schedule start/end value, 103 | where any beta_t in the middle is linearly interpolated 104 | 105 | Returns: 106 | a dictionary of diffusion hyperparameters including: 107 | T (int), Beta/Alpha/Alpha_bar/Sigma (torch.tensor on cpu, shape=(T, )) 108 | These cpu tensors are changed to cuda tensors on each individual gpu 109 | """ 110 | 111 | Beta = torch.linspace(beta_0, beta_T, T) 112 | Alpha = 1 - Beta 113 | Alpha_bar = Alpha + 0 114 | Beta_tilde = Beta + 0 115 | for t in range(1, T): 116 | Alpha_bar[t] *= Alpha_bar[t-1] # \bar{\alpha}_t = \prod_{s=1}^t \alpha_s 117 | Beta_tilde[t] *= (1-Alpha_bar[t-1]) / (1-Alpha_bar[t]) # \tilde{\beta}_t = \beta_t * (1-\bar{\alpha}_{t-1}) / (1-\bar{\alpha}_t) 118 | Sigma = torch.sqrt(Beta_tilde) # \sigma_t^2 = \tilde{\beta}_t 119 | 120 | _dh = {} 121 | _dh["T"], _dh["Beta"], _dh["Alpha"], _dh["Alpha_bar"], _dh["Sigma"] = T, Beta, Alpha, Alpha_bar, Sigma 122 | diffusion_hyperparams = _dh 123 | return diffusion_hyperparams 124 | 125 | 126 | def sampling(net, size, diffusion_hyperparams, condition=None): 127 | """ 128 | Perform the complete sampling step according to p(x_0|x_T) = \prod_{t=1}^T p_{\theta}(x_{t-1}|x_t) 129 | 130 | Parameters: 131 | net (torch network): the wavenet model 132 | size (tuple): size of tensor to be generated, 133 | usually is (number of audios to generate, channels=1, length of audio) 134 | diffusion_hyperparams (dict): dictionary of diffusion hyperparameters returned by calc_diffusion_hyperparams 135 | note, the tensors need to be cuda tensors 136 | condition (torch.tensor): ground truth mel spectrogram read from disk 137 | None if used for unconditional generation 138 | 139 | Returns: 140 | the generated audio(s) in torch.tensor, shape=size 141 | """ 142 | 143 | _dh = diffusion_hyperparams 144 | T, Alpha, Alpha_bar, Sigma = _dh["T"], _dh["Alpha"], _dh["Alpha_bar"], _dh["Sigma"] 145 | assert len(Alpha) == T 146 | assert len(Alpha_bar) == T 147 | assert len(Sigma) == T 148 | assert len(size) == 3 149 | 150 | print('begin sampling, total number of reverse steps = %s' % T) 151 | 152 | x = std_normal(size) 153 | with torch.no_grad(): 154 | for t in range(T-1, -1, -1): 155 | diffusion_steps = (t * torch.ones((size[0], 1))).cuda() # use the corresponding reverse step 156 | epsilon_theta = net((x, condition, diffusion_steps,)) # predict \epsilon according to \epsilon_\theta 157 | x = (x - (1-Alpha[t])/torch.sqrt(1-Alpha_bar[t]) * epsilon_theta) / torch.sqrt(Alpha[t]) # update x_{t-1} to \mu_\theta(x_t) 158 | if t > 0: 159 | x = x + Sigma[t] * std_normal(size) # add the variance term to x_{t-1} 160 | return x 161 | 162 | 163 | def training_loss(net, loss_fn, X, diffusion_hyperparams): 164 | """ 165 | Compute the training loss of epsilon and epsilon_theta 166 | 167 | Parameters: 168 | net (torch network): the wavenet model 169 | loss_fn (torch loss function): the loss function, default is nn.MSELoss() 170 | X (tuple, shape=(2,)): training data in tuple form (mel_spectrograms, audios) 171 | mel_spectrograms: torch.tensor, shape is batchsize followed by each mel_spectrogram shape 172 | audios: torch.tensor, shape=(batchsize, 1, length of audio) 173 | diffusion_hyperparams (dict): dictionary of diffusion hyperparameters returned by calc_diffusion_hyperparams 174 | note, the tensors need to be cuda tensors 175 | 176 | Returns: 177 | training loss 178 | """ 179 | assert type(X) == tuple and len(X) == 2 180 | 181 | _dh = diffusion_hyperparams 182 | T, Alpha_bar = _dh["T"], _dh["Alpha_bar"] 183 | 184 | mel_spectrogram, audio = X 185 | B, C, L = audio.shape # B is batchsize, C=1, L is audio length 186 | diffusion_steps = torch.randint(T, size=(B,1,1)).cuda() # randomly sample diffusion steps from 1~T 187 | z = std_normal(audio.shape) 188 | transformed_X = torch.sqrt(Alpha_bar[diffusion_steps]) * audio + torch.sqrt(1 - Alpha_bar[diffusion_steps]) * z # compute x_t from q(x_t|x_0) 189 | epsilon_theta = net((transformed_X, mel_spectrogram, diffusion_steps.view(B,1),)) # predict \epsilon according to \epsilon_\theta 190 | 191 | 192 | return loss_fn(epsilon_theta, z) 193 | --------------------------------------------------------------------------------