├── .gitignore ├── .ipynb_checkpoints ├── MAGNet-New-checkpoint.ipynb └── train-for-javascript-checkpoint.ipynb ├── LICENSE ├── PyTorch ├── generate.py ├── model.py └── train.py ├── README.md ├── legacy ├── MAGNet-New.ipynb ├── train-for-javascript.ipynb └── train-for-python.ipynb └── utils ├── audio_dataset_generator.py ├── load_and_convert.py ├── random_search.py ├── sequence_stfts_test.ipynb ├── stft_net_with_hop.ipynb └── stft_test.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.npy 3 | local_assets/ 4 | assets/ -------------------------------------------------------------------------------- /.ipynb_checkpoints/train-for-javascript-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MAGnet \n", 8 | "### Train your own models to generate audio in python or convert to use in the browser on mimicproject.com" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import sys\n", 18 | "import tensorflow as tf\n", 19 | "import pywt\n", 20 | "from utils.audio_dataset_generator import AudioDatasetGenerator\n", 21 | "import numpy as np\n", 22 | "import tensorflowjs as tfjs" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Set up variables\n", 30 | "Including the path to your audio" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 6, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# Model\n", 42 | "load_model = False\n", 43 | "\n", 44 | "# Dataset\n", 45 | "sequence_length = 40\n", 46 | "audio_data_path = \"assets/grime/\"\n", 47 | "force_new_dataset = True\n", 48 | "\n", 49 | "# Feature Extraction and Audio Genreation\n", 50 | "sample_rate = 44100\n", 51 | "fft_settings = [2048, 1024, 512]\n", 52 | "fft_size = fft_settings[0]\n", 53 | "window_size = fft_settings[1]\n", 54 | "hop_size = fft_settings[2]\n", 55 | "\n", 56 | "# General Network\n", 57 | "learning_rate = 0.001\n", 58 | "amount_epochs = 100\n", 59 | "batch_size = 64\n", 60 | "loss_type = \"mse\"\n", 61 | "weight_decay = 0.0001\n", 62 | "\n", 63 | "# Recurrent Neural Network\n", 64 | "rnn_type = \"lstm\"\n", 65 | "number_rnn_layers = 2\n", 66 | "rnn_number_units = 256" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Make the dataset from the audio" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# Make your dataset\n", 83 | "\n", 84 | "dataset = AudioDatasetGenerator(fft_size, window_size, hop_size,\n", 85 | " sequence_length, sample_rate)\n", 86 | "\n", 87 | "dataset.load(audio_data_path, force_new_dataset)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "# Set up the model\n", 99 | "\n", 100 | "model = tf.keras.Sequential()\n", 101 | "\n", 102 | "model.add(tf.keras.layers.BatchNormalization(input_shape=[dataset.x_frames.shape[1], dataset.x_frames.shape[2]]))\n", 103 | "\n", 104 | "for layer in range(number_rnn_layers):\n", 105 | " return_sequence = False if layer == (number_rnn_layers - 1) else True\n", 106 | " model.add(tf.keras.layers.LSTM(rnn_number_units, return_sequences= return_sequence))\n", 107 | " \n", 108 | "model.add(tf.keras.layers.Dense(dataset.y_frames.shape[1]))\n", 109 | "\n", 110 | "model.add(tf.keras.layers.Activation('linear'))\n", 111 | "opt = tf.keras.optimizers.Adam(learning_rate)\n", 112 | "model.compile(optimizer=opt, loss=loss_type)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "### Train your model" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "scrolled": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "# Train\n", 131 | "\n", 132 | "model.fit(dataset.x_frames, dataset.y_frames, batch_size=batch_size, epochs=amount_epochs)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "### Save your model as a keras model" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "# Save your model\n", 151 | "\n", 152 | "model.save(\".h5\")" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Convert to use online with tensorflow.js. \n", 160 | "Find example code at https://mimicproject.com/code/b530ba9e-dfd9-0440-8358-86b6420b210d\n", 161 | "Upload the \n", 162 | "* .json file\n", 163 | "* the shards\n", 164 | "* your audio\n", 165 | "Update the MODEL_URLS and SAMPLE_URLS in dataset-paths.js" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "tfjs.converters.save_keras_model(model, \".json\")" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "### Or generate samples in python" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 84, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stdout", 193 | "output_type": "stream", 194 | "text": [ 195 | "100% audio generation complete. \r" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "# Or generate samples in python\n", 201 | "\n", 202 | "amount_samples = 1\n", 203 | "sequence_length_max = 500\n", 204 | "impulse_scale = 1.0\n", 205 | "griffin_iterations = 60\n", 206 | "random_chance = 0.05\n", 207 | "random_strength = 0.0\n", 208 | "\n", 209 | "dimension1 = dataset.x_frames.shape[1]\n", 210 | "dimension2 = dataset.x_frames.shape[2]\n", 211 | "shape = (1, dimension1, dimension2, 1) if use_cnn else (1, dimension1, dimension2)\n", 212 | "\n", 213 | "audio = []\n", 214 | "\n", 215 | "if use_wavelets:\n", 216 | " temp_audio = np.array(0)\n", 217 | "for i in range(amount_samples): \n", 218 | " \n", 219 | " random_index = np.random.randint(0, (len(dataset.x_frames) - 1)) \n", 220 | " \n", 221 | " impulse = np.array(dataset.x_frames[random_index]) * impulse_scale\n", 222 | " predicted_magnitudes = impulse\n", 223 | " \n", 224 | " if use_wavelets:\n", 225 | " for seq in range (impulse.shape[0]):\n", 226 | " coeffs = pywt.array_to_coeffs(impulse[seq], dataset.coeff_slices)\n", 227 | " recon = (pywt.waverecn(coeffs, wavelet=wavelet))\n", 228 | " temp_audio = np.append(temp_audio, recon)\n", 229 | " for j in range(sequence_length_max):\n", 230 | " prediction = model.predict(impulse.reshape(shape))\n", 231 | " #Wavelet audio\n", 232 | " if use_wavelets:\n", 233 | " coeffs = pywt.array_to_coeffs(prediction[0], dataset.coeff_slices)\n", 234 | " recon = (pywt.waverecn(coeffs, wavelet=wavelet))\n", 235 | " temp_audio = np.append(temp_audio, recon)\n", 236 | " \n", 237 | " if use_cnn:\n", 238 | " prediction = prediction.reshape(1, dataset.y_frames.shape[1], 1)\n", 239 | " \n", 240 | " predicted_magnitudes = np.vstack((predicted_magnitudes, prediction)) \n", 241 | " impulse = predicted_magnitudes[-sequence_length:]\n", 242 | " \n", 243 | " if (np.random.random_sample() < random_chance) :\n", 244 | " idx = np.random.randint(0, dataset.sequence_length)\n", 245 | " impulse[idx] = impulse[idx] + np.random.random_sample(impulse[idx].shape) * random_strength\n", 246 | " \n", 247 | " done = int(float(i * sequence_length_max + j) / float(amount_samples * sequence_length_max) * 100.0) + 1\n", 248 | " sys.stdout.write('{}% audio generation complete. \\r'.format(done))\n", 249 | " sys.stdout.flush()\n", 250 | " \n", 251 | " if use_wavelets: \n", 252 | " audio += [temp_audio]\n", 253 | " else:\n", 254 | " predicted_magnitudes = np.array(predicted_magnitudes).reshape(-1, int(window_size)+1) \n", 255 | " audio += [dataset.griffin_lim(predicted_magnitudes.T, griffin_iterations)]\n", 256 | "audio = np.array(audio)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 72, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stderr", 266 | "output_type": "stream", 267 | "text": [ 268 | "IOPub data rate exceeded.\n", 269 | "The notebook server will temporarily stop sending output\n", 270 | "to the client in order to avoid crashing it.\n", 271 | "To change this limit, set the config variable\n", 272 | "`--NotebookApp.iopub_data_rate_limit`.\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "# Play them back\n", 278 | "\n", 279 | "from IPython.display import Audio\n", 280 | "i = 0\n", 281 | "Audio(audio[i], rate=sample_rate)" 282 | ] 283 | } 284 | ], 285 | "metadata": { 286 | "kernelspec": { 287 | "display_name": "Python 3", 288 | "language": "python", 289 | "name": "python3" 290 | }, 291 | "language_info": { 292 | "codemirror_mode": { 293 | "name": "ipython", 294 | "version": 3 295 | }, 296 | "file_extension": ".py", 297 | "mimetype": "text/x-python", 298 | "name": "python", 299 | "nbconvert_exporter": "python", 300 | "pygments_lexer": "ipython3", 301 | "version": "3.8.5" 302 | } 303 | }, 304 | "nbformat": 4, 305 | "nbformat_minor": 2 306 | } 307 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Mick Grierson 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /PyTorch/generate.py: -------------------------------------------------------------------------------- 1 | import soundfile as sf 2 | import numpy as np 3 | import librosa 4 | from model import RNNModel, SpectrogramDataset, preprocess_data 5 | import torch 6 | from datetime import datetime 7 | 8 | n_fft=2048 9 | hop_length=512 10 | win_length=2048 11 | sequence_length = 20 12 | file_name = "../assets/Wiley.wav" 13 | x_frames, y_frames = preprocess_data(file_name, n_fft=n_fft, 14 | hop_length=hop_length, win_length=win_length, 15 | sequence_length=sequence_length) 16 | spectrogram_dataset = SpectrogramDataset(x_frames, y_frames) 17 | 18 | 19 | points = [0.0, 0.5, 0.2, 0.7] 20 | lengths = [200, 200, 200, 200] 21 | random_strength = 0.2 22 | 23 | model = RNNModel(input_size=1025, hidden_size=128, num_layers=2, output_size=1025) # Example model initialization 24 | checkpoint = "model_weights_26-Feb-2024-17-10-36.pth" 25 | model.load_state_dict(torch.load(checkpoint)) 26 | model.eval() 27 | 28 | output_sequence_length = np.array(lengths).sum() 29 | dimension1 = x_frames.shape[1] 30 | dimension2 = x_frames.shape[2] 31 | shape = (1, dimension1, dimension2) 32 | ctr = 0 33 | change_at = lengths[ctr] 34 | 35 | audio = [] 36 | index = int(points[ctr] * len(x_frames)) 37 | impulse = x_frames[index] 38 | predicted_magnitudes = impulse 39 | random_chance = 0.05 40 | print(x_frames.shape, impulse.shape) 41 | 42 | for j in range(output_sequence_length): 43 | prediction = model(impulse.unsqueeze(0)) 44 | predicted_magnitudes = torch.cat((predicted_magnitudes, prediction.transpose(0,1)), dim=1) 45 | impulse = predicted_magnitudes[:,-sequence_length:] 46 | if (np.random.random_sample() < random_chance) : 47 | np.random.seed() 48 | random_index = np.random.randint(0, (len(x_frames) - 1)) 49 | impulse = x_frames[random_index] 50 | if j > change_at: 51 | print(ctr, j, change_at, index) 52 | ctr = ctr + 1 53 | index = int(points[ctr] * len(x_frames)) 54 | impulse = x_frames[index] 55 | change_at = change_at + lengths[ctr] 56 | 57 | predicted_magnitudes = predicted_magnitudes.detach().numpy() 58 | audio = librosa.griffinlim(predicted_magnitudes, n_fft=n_fft, hop_length=hop_length, win_length=win_length) 59 | print(predicted_magnitudes.shape, len(audio)) 60 | timestampStr = datetime.now().strftime("%d-%b-%Y-%H-%M-%S") 61 | # # WRITE AUDIO 62 | output_name = "wiley" 63 | sf.write(f"{output_name}_{timestampStr}.wav", audio, 44100) -------------------------------------------------------------------------------- /PyTorch/model.py: -------------------------------------------------------------------------------- 1 | # Recurrent Neural Network 2 | from torch.utils.data import Dataset 3 | import torch 4 | import torch.nn as nn 5 | import numpy as np 6 | import librosa 7 | import sys 8 | from os.path import isdir, exists 9 | from os import listdir 10 | 11 | 12 | def preprocess_data(path, n_fft=2048,hop_length=512, win_length=2048, sequence_length = 40, sr = 44100): 13 | cached_x_path = path + '_x_frames.npy' 14 | cached_y_path = path + '_y_frames.npy' 15 | if exists(cached_x_path) and exists(cached_y_path): 16 | x_frames = np.load(cached_x_path) 17 | y_frames = np.load(cached_y_path) 18 | print("loading cached data") 19 | return torch.tensor(x_frames), torch.tensor(y_frames) 20 | 21 | x = [0] 22 | if not isdir(path): 23 | x, sr = librosa.load(path, sr=sr) 24 | else: 25 | files = listdir(path) 26 | x = np.array([0]) 27 | for file in files: 28 | if not ".DS" in file: 29 | audio, sr, = librosa.load(path + file, sr = 44100) 30 | x = np.concatenate((x, audio)) 31 | x = np.array(x, dtype=np.float32) 32 | data_tf = torch.tensor(x) 33 | # Compute STFT 34 | n = torch.stft(data_tf, n_fft=n_fft, hop_length=hop_length, win_length=win_length, 35 | window=torch.hann_window(win_length), center=True, normalized=False, onesided=True, return_complex=True) 36 | 37 | magnitude_spectrograms = torch.abs(n) 38 | print(data_tf.shape, n.shape, magnitude_spectrograms.shape) 39 | 40 | start = 0 41 | end = magnitude_spectrograms.shape[1] - sequence_length - 1 42 | step = 1 43 | x_frames = [] 44 | y_frames = [] 45 | 46 | for i in range(start, end, step): 47 | done = int((float(i) / float(end)) * 100.0) 48 | sys.stdout.write('{}% data generation complete. \r'.format(done)) 49 | sys.stdout.flush() 50 | x = magnitude_spectrograms[:, i:i + sequence_length] 51 | y = magnitude_spectrograms[:, i + sequence_length] 52 | x_frames.append(x) 53 | y_frames.append(y) 54 | 55 | x_frames = torch.stack(x_frames) 56 | y_frames = torch.stack(y_frames) 57 | print(x_frames.shape, y_frames.shape) 58 | np.save(cached_x_path, x_frames) 59 | np.save(cached_y_path, y_frames) 60 | return x_frames, y_frames 61 | 62 | class SpectrogramDataset(Dataset): 63 | def __init__(self, x_frames, y_frames): 64 | self.x_frames = x_frames 65 | self.y_frames = y_frames 66 | 67 | def __len__(self): 68 | return self.x_frames.shape[0] # Number of frames 69 | 70 | def __getitem__(self, idx): 71 | return self.x_frames[idx], self.y_frames[idx] 72 | 73 | class RNNModel(nn.Module): 74 | def __init__(self, input_size, hidden_size, num_layers, output_size): 75 | super(RNNModel, self).__init__() 76 | 77 | self.batch_norm = nn.BatchNorm1d(input_size) 78 | print(input_size, hidden_size, num_layers) 79 | self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) 80 | self.fc = nn.Linear(hidden_size, output_size) 81 | 82 | def forward(self, x): 83 | x = self.batch_norm(x) # BatchNorm expects [batch, features, seq_len] 84 | x, _ = self.lstm(x.transpose(1, 2)) # lstm expects [batch, seq_len, features] 85 | x = self.fc(x[:, -1, :]) 86 | return x 87 | 88 | -------------------------------------------------------------------------------- /PyTorch/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.optim import Adam 4 | from torch.utils.data import DataLoader 5 | from model import RNNModel, SpectrogramDataset, preprocess_data 6 | from datetime import datetime 7 | 8 | n_fft = 2048 9 | hop_length = 512 10 | win_length = 2048 11 | sequence_length = 40 12 | file_name = "../assets/Wiley.wav" 13 | x_frames, y_frames = preprocess_data(file_name, n_fft=n_fft, 14 | hop_length=hop_length, win_length=win_length, 15 | sequence_length=sequence_length, sr = 44100) 16 | # Create an instance of the dataset 17 | spectrogram_dataset = SpectrogramDataset(x_frames, y_frames) 18 | 19 | # Create a DataLoader 20 | batch_size = 64 # Define your batch size 21 | shuffle = True # Shuffle the data every epoch 22 | 23 | dataloader = DataLoader(spectrogram_dataset, batch_size=batch_size, shuffle=shuffle, drop_last=True) 24 | 25 | # # Model parameters 26 | learning_rate = 0.001 27 | amount_epochs = 200 28 | batch_size = 64 29 | loss_type = nn.MSELoss() 30 | weight_decay = 0.0001 31 | 32 | model = RNNModel(input_size=n_fft//2+1, hidden_size=128, num_layers=2, output_size=n_fft//2+1) 33 | 34 | # checkpoint = 'model_weights_26-Feb-2024-16-58-29.pth' 35 | # model.load_state_dict(torch.load(checkpoint)) 36 | # model.eval() 37 | 38 | opt = Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) 39 | 40 | for epoch in range(amount_epochs): 41 | running_loss = 0.0 42 | for inputs, targets in dataloader: 43 | opt.zero_grad() 44 | outputs = model(inputs) 45 | loss = loss_type(outputs, targets) 46 | loss.backward() 47 | opt.step() 48 | running_loss += loss.item() 49 | print(f'Epoch [{epoch+1}/{amount_epochs}], Loss: {running_loss/len(dataloader):.4f}') 50 | running_loss = 0.0 51 | 52 | timestampStr = datetime.now().strftime("%d-%b-%Y-%H-%M-%S") 53 | torch.save(model.state_dict(), f"model_weights_{timestampStr}.pth") 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MAGNet 2 | 3 | ## Colab 4 | 5 | Colab for training and generating audio using Keras 6 | 7 | [![colab_badge](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CgXT8xsmoPA1MWH4tifY6sTAnutsANRv?usp=sharing) 8 | 9 | ## Train / Generate in [PyTorch](PyTorch/) 10 | 11 | Recommended approach and needed for using realtime with Dorothy 12 | 13 | ## Realtime implementation 14 | 15 | Implemented as part of the [Dorothy](https://github.com/lmccallum/dorothy) creative coding library 16 | 17 | ## Description 18 | 19 | This demonstrates an LSTM audio generation process using MAGNet, a spectral approach to audio analysis and generation with neural networks. The techniques included here were used as part of the Mezzanine Vs. MAGNet project featured as part of the Barbican's AI: More than Human exhibition 20 | 21 | It represents ongoing work from researchers at The Creative Computing Institute, UAL and Goldsmiths, University of London. MAGNet trains on the magnitude spectra of acoustic audio signals, and reproduces entirely new magnitude spectra that can be turned back in to sound using phase reconstruction - it's very high quality in terms of audio fidelity. 22 | 23 | This repo provides a chance for people to train their own models with their own source audio and genreate new sounds. Both given projects are designed to be simple to understand and easy to run. 24 | 25 | ## Legacy versions 26 | 27 | train-for-python contains a walkthrough of how to use tflearn to do this entirely in python. Phase reconstruction is done using griffin-lim 28 | 29 | train-for-javascript contains a walkthrough of how use Keras to train a model, which can then be converted to be used in javascript projects in the browser using tensorflow.js. Example code for this is on the MIMIC platform https://mimicproject.com/code/b530ba9e-dfd9-0440-8358-86b6420b210d. Phase reconstruction is doing using a port of LWS, developed by the Goldsmiths for this purpose. 30 | 31 | Contributions have been made by Mick Grierson, Leon Fedden, Sam Park-Wolfe, Jakub Fiala and Louis McCallum. 32 | -------------------------------------------------------------------------------- /legacy/MAGNet-New.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sys\n", 10 | "import librosa\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import librosa.display\n", 13 | "import IPython.display as ipd\n", 14 | "import os\n", 15 | "import tensorflow as tf\n", 16 | "import numpy as np" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stderr", 26 | "output_type": "stream", 27 | "text": [ 28 | "2024-02-26 13:13:13.612659: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1\n", 29 | "2024-02-26 13:13:13.612680: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB\n", 30 | "2024-02-26 13:13:13.612683: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB\n", 31 | "2024-02-26 13:13:13.612732: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n", 32 | "2024-02-26 13:13:13.613041: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: )\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "a = tf.convert_to_tensor(([0,1,2,3,4,5]), np.float32)\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "" 49 | ] 50 | }, 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "a" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 7, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "2.0" 69 | ] 70 | }, 71 | "execution_count": 7, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "a.numpy()[2]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stderr", 87 | "output_type": "stream", 88 | "text": [ 89 | ":228: RuntimeWarning: scipy._lib.messagestream.MessageStream size changed, may indicate binary incompatibility. Expected 56 from C header, got 64 from PyObject\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "path = \"assets/\"\n", 95 | "\n", 96 | "files = os.listdir(path)\n", 97 | "x = np.array([0])\n", 98 | "for file in files:\n", 99 | " if not \".DS\" in file:\n", 100 | " audio, sr, = librosa.load(path + file)\n", 101 | " x = np.concatenate((x, audio))" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "(3583747,)" 113 | ] 114 | }, 115 | "execution_count": 4, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "x.shape" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 5, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stderr", 131 | "output_type": "stream", 132 | "text": [ 133 | "2024-02-26 12:53:05.121663: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1\n", 134 | "2024-02-26 12:53:05.121695: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB\n", 135 | "2024-02-26 12:53:05.121698: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB\n", 136 | "2024-02-26 12:53:05.121932: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n", 137 | "2024-02-26 12:53:05.122124: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: )\n" 138 | ] 139 | }, 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "TensorShape([3583747])" 144 | ] 145 | }, 146 | "execution_count": 5, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "data_tf = tf.convert_to_tensor(x, np.float32)\n", 153 | "data_tf.shape" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 6, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "ename": "", 163 | "evalue": "", 164 | "output_type": "error", 165 | "traceback": [ 166 | "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", 167 | "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", 168 | "\u001b[1;31mClick here for more info. \n", 169 | "\u001b[1;31mView Jupyter log for further details." 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "n = tf.signal.stft(data_tf,2048,512)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "magnitude_spectrograms = tf.abs(n)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "#we need to get all the fft frames and organise them into sequence batches\n", 193 | "start = 0\n", 194 | "sequence_length = 40\n", 195 | "end = magnitude_spectrograms.shape[0] - sequence_length - 1\n", 196 | "step = 1\n", 197 | "x_frames = []\n", 198 | "y_frames = []\n", 199 | "for i in range(start, end, step):\n", 200 | " done = int(float(i) / float(end) * 100.0)\n", 201 | " sys.stdout.write('{}% data generation complete. \\r'.format(done))\n", 202 | " sys.stdout.flush()\n", 203 | " x = magnitude_spectrograms[i:i+sequence_length]\n", 204 | " y = magnitude_spectrograms[i+sequence_length]\n", 205 | " x_frames.append(x)\n", 206 | " y_frames.append(y)\n", 207 | "x_frames = np.array(x_frames)\n", 208 | "y_frames = np.array(y_frames)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "learning_rate = 0.001\n", 218 | "amount_epochs = 500\n", 219 | "batch_size = 64\n", 220 | "loss_type = \"mse\"\n", 221 | "weight_decay = 0.0001\n", 222 | "\n", 223 | "\n", 224 | "\n", 225 | "# Recurrent Neural Network\n", 226 | "rnn_type = \"lstm\"\n", 227 | "number_rnn_layers = 3\n", 228 | "rnn_number_units = 128\n", 229 | "model = tf.keras.Sequential()\n", 230 | "\n", 231 | "model.add(tf.keras.layers.BatchNormalization(input_shape=[x_frames.shape[1], x_frames.shape[2]]))\n", 232 | "\n", 233 | "for layer in range(number_rnn_layers):\n", 234 | " return_sequence = False if layer == (number_rnn_layers - 1) else True\n", 235 | " model.add(tf.keras.layers.LSTM(rnn_number_units, return_sequences= return_sequence))\n", 236 | " \n", 237 | "model.add(tf.keras.layers.Dense(y_frames.shape[1]))\n", 238 | "\n", 239 | "model.add(tf.keras.layers.Activation('linear'))\n", 240 | "opt = tf.keras.optimizers.Adam(learning_rate)\n", 241 | "model.compile(optimizer=opt, loss=loss_type)\n", 242 | "\n", 243 | "# this model trains much much faster than the prior models " 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "scrolled": true 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "model.fit(x_frames, y_frames, batch_size=batch_size, epochs=amount_epochs)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "# Save your model\n", 264 | "model.save(\"myModel.h5\")" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "# Load your model\n", 274 | "model = tf.keras.models.load_model(\"myModel.h5\")" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "amount_samples = 1\n", 284 | "sequence_length_max = 1000\n", 285 | "impulse_scale = 1.0\n", 286 | "random_chance = 0.1\n", 287 | "random_strength = 1.0\n", 288 | "window_size = 1024\n", 289 | "\n", 290 | "dimension1 = x_frames.shape[1]\n", 291 | "dimension2 = x_frames.shape[2]\n", 292 | "shape = (1, dimension1, dimension2)\n", 293 | "\n", 294 | "audio = []\n", 295 | "\n", 296 | "for i in range(amount_samples): \n", 297 | " \n", 298 | " random_index = np.random.randint(0, (len(x_frames) - 1)) \n", 299 | " impulse = np.array(x_frames[random_index]) * impulse_scale\n", 300 | " predicted_magnitudes = impulse\n", 301 | " \n", 302 | " for j in range(sequence_length_max):\n", 303 | " prediction = model.predict(impulse.reshape(shape))\n", 304 | " predicted_magnitudes = np.vstack((predicted_magnitudes, prediction))\n", 305 | " impulse = predicted_magnitudes[-sequence_length:]\n", 306 | " \n", 307 | " if (np.random.random_sample() < random_chance) :\n", 308 | " random_index = np.random.randint(0, (len(x_frames) - 1)) \n", 309 | " impulse = np.array(x_frames[random_index]) * impulse_scale * random_strength\n", 310 | " #predicted_magnitudes = impulse\n", 311 | " \n", 312 | " done = int(float(i * sequence_length_max + j) / float(amount_samples * sequence_length_max) * 100.0) + 1\n", 313 | " sys.stdout.write('{}% audio generation complete. \\r'.format(done))\n", 314 | " sys.stdout.flush()\n", 315 | " \n", 316 | " #predicted_magnitudes = np.array(predicted_magnitudes).reshape(-1, window_size+1) \n", 317 | " predicted_magnitudes = np.array(predicted_magnitudes).reshape(-1, window_size+1) \n", 318 | " #audio += [librosa.griffinlim(predicted_magnitudes.T)]\n", 319 | " #audio+=[predicted_magnitudes.T]\n", 320 | " new_sample = [librosa.griffinlim(predicted_magnitudes.T)]\n", 321 | " audio.append(new_sample)\n", 322 | "audio = np.array(audio)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "from IPython.display import Audio" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "i = 0\n", 341 | "Audio(audio[0], rate=sr)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [] 371 | } 372 | ], 373 | "metadata": { 374 | "kernelspec": { 375 | "display_name": "Python 3 (ipykernel)", 376 | "language": "python", 377 | "name": "python3" 378 | }, 379 | "language_info": { 380 | "codemirror_mode": { 381 | "name": "ipython", 382 | "version": 3 383 | }, 384 | "file_extension": ".py", 385 | "mimetype": "text/x-python", 386 | "name": "python", 387 | "nbconvert_exporter": "python", 388 | "pygments_lexer": "ipython3", 389 | "version": "3.9.13" 390 | } 391 | }, 392 | "nbformat": 4, 393 | "nbformat_minor": 2 394 | } 395 | -------------------------------------------------------------------------------- /legacy/train-for-javascript.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MAGnet \n", 8 | "### Train your own models to generate audio in python or convert to use in the browser on mimicproject.com" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 2, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import sys\n", 18 | "import tensorflow as tf\n", 19 | "import pywt\n", 20 | "from utils.audio_dataset_generator import AudioDatasetGenerator\n", 21 | "import numpy as np\n", 22 | "import tensorflowjs as tfjs" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Set up variables\n", 30 | "Including the path to your audio" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 6, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "# Model\n", 42 | "load_model = False\n", 43 | "\n", 44 | "# Dataset\n", 45 | "sequence_length = 40\n", 46 | "audio_data_path = \"assets/grime/\"\n", 47 | "force_new_dataset = True\n", 48 | "\n", 49 | "# Feature Extraction and Audio Genreation\n", 50 | "sample_rate = 44100\n", 51 | "fft_settings = [2048, 1024, 512]\n", 52 | "fft_size = fft_settings[0]\n", 53 | "window_size = fft_settings[1]\n", 54 | "hop_size = fft_settings[2]\n", 55 | "\n", 56 | "# General Network\n", 57 | "learning_rate = 0.001\n", 58 | "amount_epochs = 100\n", 59 | "batch_size = 64\n", 60 | "loss_type = \"mse\"\n", 61 | "weight_decay = 0.0001\n", 62 | "\n", 63 | "# Recurrent Neural Network\n", 64 | "rnn_type = \"lstm\"\n", 65 | "number_rnn_layers = 2\n", 66 | "rnn_number_units = 256" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Make the dataset from the audio" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# Make your dataset\n", 83 | "\n", 84 | "dataset = AudioDatasetGenerator(fft_size, window_size, hop_size,\n", 85 | " sequence_length, sample_rate)\n", 86 | "\n", 87 | "dataset.load(audio_data_path, force_new_dataset)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": true 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "# Set up the model\n", 99 | "\n", 100 | "model = tf.keras.Sequential()\n", 101 | "\n", 102 | "model.add(tf.keras.layers.BatchNormalization(input_shape=[dataset.x_frames.shape[1], dataset.x_frames.shape[2]]))\n", 103 | "\n", 104 | "for layer in range(number_rnn_layers):\n", 105 | " return_sequence = False if layer == (number_rnn_layers - 1) else True\n", 106 | " model.add(tf.keras.layers.LSTM(rnn_number_units, return_sequences= return_sequence))\n", 107 | " \n", 108 | "model.add(tf.keras.layers.Dense(dataset.y_frames.shape[1]))\n", 109 | "\n", 110 | "model.add(tf.keras.layers.Activation('linear'))\n", 111 | "opt = tf.keras.optimizers.Adam(learning_rate)\n", 112 | "model.compile(optimizer=opt, loss=loss_type)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "### Train your model" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "scrolled": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "# Train\n", 131 | "\n", 132 | "model.fit(dataset.x_frames, dataset.y_frames, batch_size=batch_size, epochs=amount_epochs)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "### Save your model as a keras model" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "# Save your model\n", 151 | "\n", 152 | "model.save(\".h5\")" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Convert to use online with tensorflow.js. \n", 160 | "Find example code at https://mimicproject.com/code/b530ba9e-dfd9-0440-8358-86b6420b210d\n", 161 | "Upload the \n", 162 | "* .json file\n", 163 | "* the shards\n", 164 | "* your audio\n", 165 | "Update the MODEL_URLS and SAMPLE_URLS in dataset-paths.js" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 3, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "# load your model\n", 175 | "\n", 176 | "model = tf.keras.models.load_model(\"../../models/linn_22050_2048_512_lstm_2_128.h5\")\n", 177 | "\n", 178 | "tfjs.converters.save_keras_model(model, \"linn_22050.json\")" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Or generate samples in python" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 84, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "100% audio generation complete. \r" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "# Or generate samples in python\n", 203 | "\n", 204 | "amount_samples = 1\n", 205 | "sequence_length_max = 500\n", 206 | "impulse_scale = 1.0\n", 207 | "griffin_iterations = 60\n", 208 | "random_chance = 0.05\n", 209 | "random_strength = 0.0\n", 210 | "\n", 211 | "dimension1 = dataset.x_frames.shape[1]\n", 212 | "dimension2 = dataset.x_frames.shape[2]\n", 213 | "shape = (1, dimension1, dimension2, 1) if use_cnn else (1, dimension1, dimension2)\n", 214 | "\n", 215 | "audio = []\n", 216 | "\n", 217 | "if use_wavelets:\n", 218 | " temp_audio = np.array(0)\n", 219 | "for i in range(amount_samples): \n", 220 | " \n", 221 | " random_index = np.random.randint(0, (len(dataset.x_frames) - 1)) \n", 222 | " \n", 223 | " impulse = np.array(dataset.x_frames[random_index]) * impulse_scale\n", 224 | " predicted_magnitudes = impulse\n", 225 | " \n", 226 | " if use_wavelets:\n", 227 | " for seq in range (impulse.shape[0]):\n", 228 | " coeffs = pywt.array_to_coeffs(impulse[seq], dataset.coeff_slices)\n", 229 | " recon = (pywt.waverecn(coeffs, wavelet=wavelet))\n", 230 | " temp_audio = np.append(temp_audio, recon)\n", 231 | " for j in range(sequence_length_max):\n", 232 | " prediction = model.predict(impulse.reshape(shape))\n", 233 | " #Wavelet audio\n", 234 | " if use_wavelets:\n", 235 | " coeffs = pywt.array_to_coeffs(prediction[0], dataset.coeff_slices)\n", 236 | " recon = (pywt.waverecn(coeffs, wavelet=wavelet))\n", 237 | " temp_audio = np.append(temp_audio, recon)\n", 238 | " \n", 239 | " if use_cnn:\n", 240 | " prediction = prediction.reshape(1, dataset.y_frames.shape[1], 1)\n", 241 | " \n", 242 | " predicted_magnitudes = np.vstack((predicted_magnitudes, prediction)) \n", 243 | " impulse = predicted_magnitudes[-sequence_length:]\n", 244 | " \n", 245 | " if (np.random.random_sample() < random_chance) :\n", 246 | " idx = np.random.randint(0, dataset.sequence_length)\n", 247 | " impulse[idx] = impulse[idx] + np.random.random_sample(impulse[idx].shape) * random_strength\n", 248 | " \n", 249 | " done = int(float(i * sequence_length_max + j) / float(amount_samples * sequence_length_max) * 100.0) + 1\n", 250 | " sys.stdout.write('{}% audio generation complete. \\r'.format(done))\n", 251 | " sys.stdout.flush()\n", 252 | " \n", 253 | " if use_wavelets: \n", 254 | " audio += [temp_audio]\n", 255 | " else:\n", 256 | " predicted_magnitudes = np.array(predicted_magnitudes).reshape(-1, int(window_size)+1) \n", 257 | " audio += [dataset.griffin_lim(predicted_magnitudes.T, griffin_iterations)]\n", 258 | "audio = np.array(audio)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 72, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "name": "stderr", 268 | "output_type": "stream", 269 | "text": [ 270 | "IOPub data rate exceeded.\n", 271 | "The notebook server will temporarily stop sending output\n", 272 | "to the client in order to avoid crashing it.\n", 273 | "To change this limit, set the config variable\n", 274 | "`--NotebookApp.iopub_data_rate_limit`.\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "# Play them back\n", 280 | "\n", 281 | "from IPython.display import Audio\n", 282 | "i = 0\n", 283 | "Audio(audio[i], rate=sample_rate)" 284 | ] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.8.5" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 2 308 | } 309 | -------------------------------------------------------------------------------- /utils/audio_dataset_generator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import librosa 5 | import pywt 6 | import math 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | 11 | class AudioDatasetGenerator: 12 | """ 13 | Class to manage the dataset for audio generation. 14 | """ 15 | 16 | def __init__(self, fft_size=2048, window_size=1024, hop_size=512, 17 | sequence_length=16, sample_rate=44100): 18 | """Inits the class. Set the fft values to have a significant effect on 19 | the training of the neural network.""" 20 | self.counter = 0 21 | self.epoch_count = 0 22 | self.previous_epoch = -1 23 | self.x_frames = [] 24 | self.y_frames = [] 25 | self.fft_size = fft_size 26 | self.window_size = window_size 27 | self.hop_size = hop_size 28 | self.sequence_length = sequence_length 29 | self.sample_rate = sample_rate 30 | 31 | def load(self, data_path, force=False): 32 | """Loads the dataset from either the binary numpy file, or generates 33 | from a folder of wav files specified at the data_path.""" 34 | x_frames_name = os.path.join(data_path, "x_frames.npy") 35 | y_frames_name = os.path.join(data_path, "y_frames.npy") 36 | if os.path.isfile(x_frames_name) and os.path.isfile(y_frames_name) and force == False: 37 | self.x_frames = np.load(x_frames_name) 38 | self.y_frames = np.load(y_frames_name) 39 | elif os.path.exists(data_path): 40 | self._generate_data(data_path) 41 | self.x_frames = np.array(self.x_frames) 42 | self.y_frames = np.array(self.y_frames) 43 | self.x_frames, self.y_frames = self.unison_shuffled_copies(self.x_frames, 44 | self.y_frames) 45 | np.save(x_frames_name, self.x_frames) 46 | np.save(y_frames_name, self.y_frames) 47 | else: 48 | raise ValueError("Couldn't load files from the supplied path.") 49 | 50 | def get_next_batch(self, batch_size): 51 | """Gets a new batch. Reshuffles the dataset at the end of the epoch.""" 52 | if self.counter + batch_size > len(self.y_frames): 53 | self.counter = 0 54 | self.epoch_count += 1 55 | self.x_frames, self.y_frames = self.unison_shuffled_copies(self.x_frames, 56 | self.y_frames) 57 | return_x = self.x_frames[self.counter:self.counter + batch_size] 58 | return_y = self.y_frames[self.counter:self.counter + batch_size] 59 | self.counter += batch_size 60 | return return_x, return_y 61 | 62 | def is_new_epoch(self): 63 | """Returns true if there has been a new epoch.""" 64 | if self.epoch_count != self.previous_epoch: 65 | self.previous_epoch = self.epoch_count 66 | return True 67 | return False 68 | 69 | def get_epoch(self): 70 | """Returns the current epoch.""" 71 | return self.epoch_count 72 | 73 | def reset_epoch(self): 74 | """Resets the current epoch.""" 75 | self.epoch_count = 0 76 | self.previous_epoch -1 77 | 78 | def get_x_shape(self): 79 | """Gets the shame for the x frames. Useful for placeholders.""" 80 | return [None, self.x_frames.shape[1], self.x_frames.shape[2]] 81 | 82 | def get_y_shape(self): 83 | """Gets the shame for the y frames. Useful for placeholders.""" 84 | return [None, self.y_frames.shape[1]] 85 | 86 | def completed_all_epochs(self, desired_epochs): 87 | """Returns true once the get next batch method has been called enough 88 | to have run through desired_epochs amount of epochs.""" 89 | return self.epoch_count >= desired_epochs 90 | 91 | def unison_shuffled_copies(self, a, b): 92 | """Shuffle NumPy arrays in unison.""" 93 | assert len(a) == len(b) 94 | p = np.random.permutation(len(a)) 95 | return a[p], b[p] 96 | 97 | def griffin_lim(self, stftm_matrix, max_iter=100): 98 | """"Iterative method to 'build' phases for magnitudes.""" 99 | stft_matrix = np.random.random(stftm_matrix.shape) 100 | y = librosa.core.istft(stft_matrix, self.hop_size, self.window_size) 101 | for i in range(max_iter): 102 | stft_matrix = librosa.core.stft(y, self.fft_size, self.hop_size, self.window_size) 103 | stft_matrix = stftm_matrix * stft_matrix / np.abs(stft_matrix) 104 | y = librosa.core.istft(stft_matrix, self.hop_size, self.window_size) 105 | return y 106 | 107 | def generate_samples(self, prediction_tensor, x, training, keep_prob, 108 | amount_samples=5, sequence_max_length=2000, 109 | impulse_scale=666, griffin_iterations=100): 110 | """Generates samples in the supplied folder path.""" 111 | all_audio = [] 112 | with tf.Session() as sess: 113 | sess.run(tf.global_variables_initializer()) 114 | for i in range(amount_samples): 115 | random_index = random.randint(0, (len(self.x_frames) - 1)) 116 | 117 | impulse_shape = np.array(self.x_frames[random_index]).shape 118 | #impulse = np.random.random_sample(size=impulse_shape) * impulse_scale 119 | impulse = self.x_frames[random_index] 120 | predicted_magnitudes = impulse 121 | for j in range(sequence_max_length): 122 | impulse = np.array(impulse).reshape(1,self.x_frames.shape[1], self.x_frames.shape[2]) 123 | 124 | prediction = sess.run(prediction_tensor, 125 | feed_dict={x: impulse, training: False, keep_prob: 1.0}) 126 | prediction = prediction.reshape(1, prediction.shape[1]) 127 | predicted_magnitudes = np.vstack((predicted_magnitudes, prediction)) 128 | impulse = predicted_magnitudes[-self.sequence_length:] 129 | 130 | predicted_magnitudes = np.array(predicted_magnitudes) 131 | all_audio += [self.griffin_lim(predicted_magnitudes.T, griffin_iterations)] 132 | return np.array(all_audio) 133 | 134 | def _generate_data(self, data_path): 135 | """Create some data from a folder of wav files. 136 | NOTE: the augmentation process should be parameterised.""" 137 | file_names = os.listdir(data_path) 138 | fft_frames = [] 139 | self.x_frames = [] 140 | self.y_frames = [] 141 | for file in file_names: 142 | if file.endswith('.wav'): 143 | file = os.path.join(data_path, file) 144 | data, sample_rate = librosa.load(file, sr=self.sample_rate, 145 | mono=True) 146 | data = np.append(np.zeros(self.window_size * self.sequence_length), data) 147 | mags_phases = librosa.stft(data, n_fft=self.fft_size, 148 | win_length=self.window_size, 149 | hop_length=self.hop_size) 150 | magnitudes, phases = librosa.magphase(mags_phases) 151 | for magnitude_bins in magnitudes.T: 152 | fft_frames += [magnitude_bins] 153 | 154 | start = 0 155 | end = len(fft_frames) - self.sequence_length - 1 156 | step = 1 157 | for i in range(start, end, step): 158 | done = int(float(i) / float(end) * 100.0) 159 | sys.stdout.write('{}% data generation complete. \r'.format(done)) 160 | sys.stdout.flush() 161 | 162 | x = fft_frames[i:i + self.sequence_length] 163 | y = fft_frames[i + self.sequence_length] 164 | self.x_frames.append(x) 165 | self.y_frames.append(y) 166 | 167 | sys.stdout.write('100% data generation complete.') 168 | sys.stdout.flush() 169 | 170 | 171 | class AudioWaveletDatasetGenerator: 172 | """ 173 | Class for wavelets 174 | """ 175 | 176 | def __init__(self, window_size=1024, sequence_length=16, sample_rate=44100, wavelet='db10'): 177 | """Inits the class. Set the fft values to have a significant effect on 178 | the training of the neural network.""" 179 | self.x_frames = [] 180 | self.y_frames = [] 181 | self.window_size = window_size 182 | print(self.window_size) 183 | self.sample_rate = sample_rate 184 | self.sequence_length = sequence_length 185 | self.coeff_slices = [] 186 | self.wavelet = wavelet 187 | 188 | def load(self, data_path, force=False): 189 | """Loads the dataset from either the binary numpy file, or generates 190 | from a folder of wav files specified at the data_path.""" 191 | x_frames_name = os.path.join(data_path, "x_frames.npy") 192 | y_frames_name = os.path.join(data_path, "y_frames.npy") 193 | if os.path.isfile(x_frames_name) and os.path.isfile(y_frames_name) and force == False: 194 | self.x_frames = np.load(x_frames_name) 195 | self.y_frames = np.load(y_frames_name) 196 | elif os.path.exists(data_path): 197 | self._generate_data(data_path) 198 | self.x_frames = np.array(self.x_frames) 199 | self.y_frames = np.array(self.y_frames) 200 | self.x_frames, self.y_frames = self.unison_shuffled_copies(self.x_frames, 201 | self.y_frames) 202 | np.save(x_frames_name, self.x_frames) 203 | np.save(y_frames_name, self.y_frames) 204 | else: 205 | raise ValueError("Couldn't load files from the supplied path.") 206 | 207 | def unison_shuffled_copies(self, a, b): 208 | """Shuffle NumPy arrays in unison.""" 209 | assert len(a) == len(b) 210 | p = np.random.permutation(len(a)) 211 | return a[p], b[p] 212 | 213 | def _generate_data(self, data_path): 214 | """Create some data from a folder of wav files. 215 | NOTE: the augmentation process should be parameterised.""" 216 | file_names = os.listdir(data_path) 217 | 218 | self.x_frames = [] 219 | self.y_frames = [] 220 | ws = self.window_size 221 | for file in file_names: 222 | if file.endswith('.wav'): 223 | file = os.path.join(data_path, file) 224 | data, sample_rate = librosa.load(file, sr=self.sample_rate, mono=True) 225 | data = np.append(np.zeros(self.window_size * self.sequence_length), data) 226 | for offset in range(0, 1): 227 | wavelet_frames = [] 228 | sys.stdout.write('{} offset \r'.format(offset)) 229 | sys.stdout.flush() 230 | for i in range (0, math.floor((len(data)-offset)/float(ws))): 231 | coeffs = pywt.wavedec(data[(i*ws)+offset:(i*ws+ws)+offset], self.wavelet) 232 | coeff_arr, self.coeff_slices = pywt.coeffs_to_array(coeffs) #slices to flat array 233 | wavelet_frames.append(coeff_arr) 234 | 235 | start = 0 236 | end = len(wavelet_frames) - self.sequence_length 237 | assert end > 0 238 | step = 1 239 | for i in range(start, end, step): 240 | x = wavelet_frames[i:i + self.sequence_length] 241 | y = wavelet_frames[i + self.sequence_length] 242 | self.x_frames.append(x) 243 | self.y_frames.append(y) 244 | done = int(float(offset) / float(1020) * 100.0) 245 | sys.stdout.write('{}% data generation complete. \r'.format(done)) 246 | sys.stdout.flush() 247 | sys.stdout.write('100% data generation complete.') 248 | sys.stdout.flush() 249 | -------------------------------------------------------------------------------- /utils/load_and_convert.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflowjs as tfjs 3 | 4 | model = tf.keras.models.model_from_json('{"class_name": "Sequential", "config": {"name": "sequential_9", "layers": [{"class_name": "BatchNormalization", "config": {"name": "batch_normalization_4", "trainable": true, "batch_input_shape": [null, 40, 1025], "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}}, {"class_name": "LSTM", "config": {"name": "lstm_13", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 256, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}}, {"class_name": "LSTM", "config": {"name": "lstm_14", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 256, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}}, {"class_name": "Dense", "config": {"name": "dense_13", "trainable": true, "dtype": "float32", "units": 1025, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_5", "trainable": true, "dtype": "float32", "activation": "linear"}}]}, "keras_version": "2.2.4-tf", "backend": "tensorflow"}') 5 | model.load_weights('/Users/louismccallum/Documents/programming/MIMIC/Examples/rnn-audio/python/wiley2layer_adam_nodropout_155_44100/wiley2layer_adam_nodropout_155_44100.h5') 6 | tfjs.converters.save_keras_model(model, "/Users/louismccallum/Documents/programming/MIMIC/Examples/rnn-audio/wileyModel") 7 | -------------------------------------------------------------------------------- /utils/random_search.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tensorflow as tf 3 | import tflearn 4 | from tflearn.layers.recurrent import bidirectional_rnn, BasicLSTMCell, GRUCell 5 | from tflearn.layers.core import dropout 6 | from tflearn.layers.conv import conv_2d, max_pool_2d 7 | from audio_dataset_generator import AudioDatasetGenerator 8 | import random 9 | import numpy as np 10 | import json 11 | 12 | 13 | def conv_net(net, filters, kernels, non_linearity): 14 | """ 15 | A quick function to build a conv net. 16 | At the end it reshapes the network to be 3d to work with recurrent units. 17 | """ 18 | assert len(filters) == len(kernels) 19 | 20 | for i in range(len(filters)): 21 | net = conv_2d(net, filters[i], kernels[i], activation=non_linearity) 22 | net = max_pool_2d(net, 2) 23 | 24 | dim1 = net.get_shape().as_list()[1] 25 | dim2 = net.get_shape().as_list()[2] 26 | dim3 = net.get_shape().as_list()[3] 27 | return tf.reshape(net, [-1, dim1 * dim3, dim2]) 28 | 29 | 30 | def recurrent_net(net, rec_type, rec_size, return_sequence): 31 | """ 32 | A quick if else block to build a recurrent layer, based on the type specified 33 | by the user. 34 | """ 35 | if rec_type == 'lstm': 36 | net = tflearn.layers.recurrent.lstm(net, rec_size, return_seq=return_sequence) 37 | elif rec_type == 'gru': 38 | net = tflearn.layers.recurrent.gru(net, rec_size, return_seq=return_sequence) 39 | elif rec_type == 'bi_lstm': 40 | net = bidirectional_rnn(net, 41 | BasicLSTMCell(rec_size), 42 | BasicLSTMCell(rec_size), 43 | return_seq=return_sequence) 44 | elif rec_type == 'bi_gru': 45 | net = bidirectional_rnn(net, 46 | GRUCell(rec_size), 47 | GRUCell(rec_size), 48 | return_seq=return_sequence) 49 | else: 50 | raise ValueError('Incorrect rnn type passed. Try lstm, gru, bi_lstm or bi_gru.') 51 | return net 52 | 53 | 54 | def create_random_parameters(): 55 | hyperparameters = dict() 56 | 57 | # Dataset 58 | hyperparameters['sequence_length'] = random.choice([40, 50, 60, 70, 80]) 59 | 60 | # Feature Extraction and Audio Genreation 61 | hyperparameters['sample_rate'] = 22050 62 | hyperparameters['fft_size'] = 2048 63 | hyperparameters['window_size'] = 1024 64 | hyperparameters['hop_size'] = 512 65 | 66 | # General Network 67 | hyperparameters['learning_rate'] = random.choice([1e-2, 1e-3, 1e-4]) 68 | hyperparameters['amount_epochs'] = 700 69 | hyperparameters['batch_size'] = random.choice([32, 64, 128, 256]) 70 | hyperparameters['keep_prob'] = random.choice([0.1, 0.2, 0.3, 0.5, 0.75, 1.0]) 71 | hyperparameters['activation'] = random.choice(['sigmoid', 'tanh', 'relu', 'leaky_relu', 'selu']) 72 | hyperparameters['optimiser'] = random.choice(['adam', 'rmsprop']) 73 | hyperparameters['fully_connected_dim'] = random.choice([512, 1024, 2048]) 74 | 75 | # Recurrent Neural Network 76 | hyperparameters['rnn_type'] = random.choice(["lstm", "gru", "bi_lstm", "bi_gru"]) 77 | hyperparameters['number_rnn_layers'] = random.choice([1, 2, 3, 4]) 78 | hyperparameters['rnn_number_units'] = random.choice([256, 512, 1024]) 79 | 80 | # Convolutional Neural Network 81 | hyperparameters['use_cnn'] = random.choice([True, False]) 82 | cnn_int = random.randint(0, 3) 83 | hyperparameters['number_filters'] = [[32], [64], [32, 64], [64, 32]][cnn_int] 84 | hyperparameters['filter_sizes'] = [[1], [3], [1, 5], [1, 3]][cnn_int] 85 | 86 | hyperparameters['fitness'] = 0.0 87 | 88 | return hyperparameters 89 | 90 | 91 | epoch = 0 92 | 93 | for model_no in range(100): 94 | try: 95 | hyperparameters = create_random_parameters() 96 | 97 | paths = ["assets/electronic_piano/", "assets/other", "assets/test_samples/"] 98 | 99 | for audio_data_path in paths: 100 | 101 | tf.reset_default_graph() 102 | 103 | dataset = AudioDatasetGenerator(hyperparameters['fft_size'], 104 | hyperparameters['window_size'], 105 | hyperparameters['hop_size'], 106 | hyperparameters['sequence_length'], 107 | hyperparameters['sample_rate']) 108 | 109 | dataset.load(audio_data_path, True) 110 | 111 | if hyperparameters['use_cnn']: 112 | dataset.x_frames = dataset.x_frames.reshape(dataset.x_frames.shape[0], 113 | dataset.x_frames.shape[1], 114 | dataset.x_frames.shape[2], 115 | 1) 116 | if hyperparameters['use_cnn']: 117 | net = tflearn.input_data([None, 118 | dataset.x_frames.shape[1], 119 | dataset.x_frames.shape[2], 120 | dataset.x_frames.shape[3]], 121 | name="input_data0") 122 | net = conv_net(net, 123 | hyperparameters['number_filters'], 124 | hyperparameters['filter_sizes'], 125 | hyperparameters['activation']) 126 | else: 127 | net = tflearn.input_data([None, 128 | dataset.x_frames.shape[1], 129 | dataset.x_frames.shape[2]], 130 | name="input_data0") 131 | 132 | # Batch Norm 133 | net = tflearn.batch_normalization(net, name="batch_norm0") 134 | 135 | # Recurrent 136 | for layer in range(hyperparameters['number_rnn_layers']): 137 | return_sequence = not layer == (hyperparameters['number_rnn_layers'] - 1) 138 | net = recurrent_net(net, 139 | hyperparameters['rnn_type'], 140 | hyperparameters['rnn_number_units'], 141 | return_sequence) 142 | if hyperparameters['keep_prob'] < 1.0: 143 | net = dropout(net, 1.0 - hyperparameters['keep_prob']) 144 | 145 | # Dense + MLP Out 146 | net = tflearn.fully_connected(net, 147 | dataset.y_frames.shape[1], 148 | activation=hyperparameters['activation'], 149 | regularizer='L2', 150 | weight_decay=0.001) 151 | 152 | net = tflearn.fully_connected(net, 153 | dataset.y_frames.shape[1], 154 | activation='linear') 155 | 156 | net = tflearn.regression(net, 157 | optimizer=hyperparameters['optimiser'], 158 | learning_rate=hyperparameters['learning_rate'], 159 | loss="mean_square") 160 | 161 | model = tflearn.DNN(net, tensorboard_verbose=1) 162 | 163 | model.fit(dataset.x_frames, 164 | dataset.y_frames, 165 | show_metric=True, 166 | batch_size=hyperparameters['batch_size'], 167 | n_epoch=hyperparameters['amount_epochs']) 168 | 169 | model_name = '{}_{}'.format(epoch, model_no) 170 | with open(model_name + '.json', 'w') as fp: 171 | json.dump(hyperparameters, fp) 172 | 173 | amount_samples = 1 174 | sequence_length_max = 1000 175 | impulse_scale = 1.0 176 | griffin_iterations = 60 177 | random_chance = 0.0 178 | random_strength = 0.0 179 | 180 | dimension1 = dataset.x_frames.shape[1] 181 | dimension2 = dataset.x_frames.shape[2] 182 | shape = (1, dimension1, dimension2, 1) if hyperparameters['use_cnn'] else (1, dimension1, dimension2) 183 | 184 | audio = [] 185 | 186 | for i in range(amount_samples): 187 | 188 | random_index = 5 189 | 190 | impulse = np.array(dataset.x_frames[random_index]) * impulse_scale 191 | predicted_magnitudes = impulse 192 | 193 | for j in range(sequence_length_max): 194 | 195 | prediction = model.predict(impulse.reshape(shape)) 196 | 197 | if hyperparameters['use_cnn']: 198 | prediction = prediction.reshape(1, dataset.y_frames.shape[1], 1) 199 | 200 | predicted_magnitudes = np.vstack((predicted_magnitudes, prediction)) 201 | impulse = predicted_magnitudes[-sequence_length:] 202 | 203 | if (np.random.random_sample() < random_chance) : 204 | idx = np.random.randint(0, dataset.sequence_length) 205 | impulse[idx] = impulse[idx] + np.random.random_sample(impulse[idx].shape) * random_strength 206 | 207 | predicted_magnitudes = np.array(predicted_magnitudes).reshape(-1, window_size+1) 208 | audio = np.array(dataset.griffin_lim(predicted_magnitudes.T, griffin_iterations)) 209 | filepath = model_name + '_{}_{}.wav'.format(i, audio_data_path) 210 | librosa.output.write_wav(filepath, 211 | audio, 212 | hyperparameters['sample_rate']) 213 | 214 | except (KeyboardInterrupt, SystemExit): 215 | raise 216 | except Exception as ex: 217 | print(ex) 218 | pass 219 | --------------------------------------------------------------------------------