├── .gitignore ├── Collect Metadata.ipynb ├── Collect Samples.ipynb ├── Dockerfile ├── Fingerprints and Labels to Classifier.ipynb ├── Fingerprints to K-Means Colors.ipynb ├── Fingerprints to Spritesheet.ipynb ├── Fingerprints to t-SNE.ipynb ├── Generating Spectrograms.ipynb ├── Gentle to Samples.ipynb ├── HDF5 to LSTM.ipynb ├── Instantaneous Frequency and Phase Derivatives.ipynb ├── Makefile ├── Metadata to Labels.ipynb ├── Multisamples to Samples.ipynb ├── Samples to Audio Spritesheet.ipynb ├── Samples to Fingerprints.ipynb ├── Samples to HDF5.ipynb ├── Sphinx to Samples.ipynb ├── license.md ├── readme.md ├── requirements.txt └── utils ├── __init__.py ├── bh_tsne ├── bhtsne.py ├── ffmpeg_load_audio.py ├── ffmpeg_save_audio.py ├── list_all_files.py ├── make_mosaic.py ├── mkdir_p.py ├── normalize.py └── show_array.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | failed/ 3 | inprogress/ 4 | model/ 5 | 6 | *.pyc 7 | 8 | .* 9 | !.gitignore 10 | !.gitkeep -------------------------------------------------------------------------------- /Collect Metadata.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook starts with samples listed in `filenames.txt` and assumes there is an equivalently named `.json` file for every sample. It loads all the `.json` files and saves them into a big `.json` file. This is a good format for exporting, and also happens to be faster for loading than `.pkl` in this case." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "data_root = 'data/drums'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "%matplotlib inline\n", 30 | "from matplotlib import pyplot as plt\n", 31 | "from os.path import join\n", 32 | "from utils import list_all_files\n", 33 | "from multiprocessing import Pool\n", 34 | "import json" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "def replace_extension(fn, new_extension):\n", 46 | " parts = fn.split('.')\n", 47 | " parts[-1] = new_extension\n", 48 | " return '.'.join(parts)\n", 49 | "files = open(join(data_root, 'filenames.txt')).read().splitlines()\n", 50 | "files = [replace_extension(fn, 'json') for fn in files]\n", 51 | "len(files)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "def job(fn):\n", 63 | " with open(fn) as f:\n", 64 | " return json.load(f)\n", 65 | "p = Pool()\n", 66 | "%time results = p.map(job, files)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "with open(join(data_root, 'metadata.json'), 'wb') as f:\n", 78 | " json.dump(results, f)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# example of getting the most common tokens from the 'description' field\n", 90 | "descriptions = '\\n'.join([x['description'] for x in results])\n", 91 | "from collections import Counter\n", 92 | "counter = Counter(descriptions.lower().split())\n", 93 | "counter.most_common()[:20]" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "# example of plotting a histogram of a numeric field like num_comments, avg_rating, bitrate, num_downloads\n", 105 | "measure = [x['num_downloads'] for x in results]\n", 106 | "plt.hist(measure, bins=20)\n", 107 | "plt.yscale('log')\n", 108 | "plt.show()" 109 | ] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 2", 115 | "language": "python", 116 | "name": "python2" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 2 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython2", 128 | "version": "2.7.11" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 0 133 | } 134 | -------------------------------------------------------------------------------- /Collect Samples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This tool converts a folder of samples to a big rectangular matrix with one mono sample per row.\n", 8 | "\n", 9 | "Samples should be placed in `data/mydataset/samples/`. They could be `.mp3`, `.wav`, or anything else that ffmpeg can work with. They may be all in one folder, or in nested sub-folders.\n", 10 | "\n", 11 | "Change the path below to point to the root directory, e.g., `data/mydataset/`.\n", 12 | "\n", 13 | "The samplerate `sr` is not necessarily the native samplerate of the samples, it's the samplerate you want to load them at.\n", 14 | "\n", 15 | "The output of this notebook is:\n", 16 | "* `data/mydataset/durations.txt`\n", 17 | "* `data/mydataset/filenames.txt`\n", 18 | "* `data/mydataset/samples.npy`" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "data_root = 'data/drums/'\n", 30 | "sr = 48000\n", 31 | "max_length = sr*4 # ignore samples longer than 4 seconds\n", 32 | "fixed_length = sr/4 # trim all samples to 250 milliseconds\n", 33 | "limit = None # set this to 100 to only load the first 100 samples" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "import numpy as np\n", 45 | "from os.path import join\n", 46 | "from utils import *\n", 47 | "from multiprocessing import Pool" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "files = list(list_all_files(join(data_root, 'samples'), ['.mp3']))\n", 59 | "len(files)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "def load_sample(fn, sr=None,\n", 71 | " max_length=None, fixed_length=None, normalize=True):\n", 72 | " if fn == '': # ignore empty filenames\n", 73 | " return None\n", 74 | " audio, _ = ffmpeg_load_audio(fn, sr, mono=True)\n", 75 | " duration = len(audio)\n", 76 | " if duration == 0: # ignore zero-length samples\n", 77 | " return None\n", 78 | " if max_length and duration >= max_length: # ignore long samples\n", 79 | " return None\n", 80 | " if fixed_length:\n", 81 | " audio.resize(fixed_length)\n", 82 | " max_val = np.abs(audio).max()\n", 83 | " if max_val == 0: # ignore completely silent sounds\n", 84 | " return None\n", 85 | " if normalize:\n", 86 | " audio /= max_val\n", 87 | " return (fn, audio, duration)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "def job(fn):\n", 99 | " return load_sample(fn, sr=sr,\n", 100 | " max_length=max_length, fixed_length=fixed_length)\n", 101 | "pool = Pool()\n", 102 | "%time results = pool.map(job, files[:limit])\n", 103 | "print 'Processed', len(results), 'samples'" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "valid = filter(None, results)\n", 115 | "filenames = [x[0] for x in valid]\n", 116 | "samples = [x[1] for x in valid]\n", 117 | "durations = [x[2] for x in valid]\n", 118 | "samples = np.asarray(samples)\n", 119 | "np.savetxt(join(data_root, 'filenames.txt'), filenames, fmt='%s')\n", 120 | "np.savetxt(join(data_root, 'durations.txt'), durations, fmt='%i')\n", 121 | "%time np.save(join(data_root, 'samples.npy'), samples)\n", 122 | "print 'Saved', len(valid), 'samples'" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": true 130 | }, 131 | "outputs": [], 132 | "source": [] 133 | } 134 | ], 135 | "metadata": { 136 | "kernelspec": { 137 | "display_name": "Python 2", 138 | "language": "python", 139 | "name": "python2" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 2 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython2", 151 | "version": "2.7.11" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 0 156 | } 157 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2-jessie 2 | 3 | RUN echo "deb http://deb.debian.org/debian jessie main contrib non-free" > /etc/apt/sources.list 4 | RUN echo "deb-src http://deb.debian.org/debian jessie main contrib non-free" >> /etc/apt/sources.list 5 | RUN echo "deb http://deb.debian.org/debian jessie-updates main contrib non-free" >> /etc/apt/sources.list 6 | RUN echo "deb-src http://deb.debian.org/debian jessie-updates main contrib non-free" >> /etc/apt/sources.list 7 | RUN echo "deb http://security.debian.org/debian-security/ jessie/updates main contrib non-free" >> /etc/apt/sources.list 8 | RUN echo "deb-src http://security.debian.org/debian-security/ jessie/updates main contrib non-free" >> /etc/apt/sources.list 9 | 10 | RUN apt-get update -y 11 | RUN apt-get install -y libsamplerate0 libsamplerate0-dev 12 | 13 | RUN pip install numpy 14 | RUN pip install scikits.samplerate 15 | 16 | WORKDIR /AudioNotebooks 17 | 18 | COPY requirements.txt . 19 | RUN pip install --no-cache-dir -r requirements.txt 20 | 21 | COPY . . 22 | 23 | # CMD /bin/bash 24 | 25 | CMD jupyter notebook --ip=0.0.0.0 --port=8888 --allow-root -------------------------------------------------------------------------------- /Fingerprints and Labels to Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "data_root = 'data/drums/'" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "%matplotlib inline\n", 23 | "from matplotlib import pyplot as plt\n", 24 | "import numpy as np\n", 25 | "import pickle\n", 26 | "import json\n", 27 | "from os.path import join\n", 28 | "from utils import *\n", 29 | "from keras.datasets import cifar10\n", 30 | "from keras.preprocessing.image import ImageDataGenerator\n", 31 | "from keras.models import Sequential, Model\n", 32 | "from keras.layers import Input\n", 33 | "from keras.layers.core import Dense, Reshape, Dropout, Activation, Flatten\n", 34 | "from keras.layers.convolutional import Convolution2D, MaxPooling2D\n", 35 | "from keras.layers.normalization import BatchNormalization\n", 36 | "from keras.optimizers import SGD\n", 37 | "from keras.utils import np_utils" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "%time fingerprints = np.load(join(data_root, 'fingerprints.npy'))\n", 49 | "img_rows, img_cols = fingerprints.shape[1:]\n", 50 | "print fingerprints.shape" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "collapsed": false 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "with open(join(data_root, 'labels_to_samples.pkl'), 'rb') as f:\n", 62 | " labels_to_samples = pickle.load(f)\n", 63 | "with open(join(data_root, 'samples_to_labels.pkl'), 'rb') as f:\n", 64 | " samples_to_labels = pickle.load(f)\n", 65 | "synsets = json.load(open(join(data_root, 'synsets.json')))\n", 66 | "nb_classes = len(synsets)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false, 74 | "scrolled": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "# take a sample of the data with even chunks for each class\n", 79 | "# the total number of samples can be larger than len(data) because some samples may be included under multiple labels\n", 80 | "# and the total can be less than limit_per_class * nb_classes because some classes might have less data\n", 81 | "def get_data(data, labels_to_samples, samples_to_labels, limit_per_class=100):\n", 82 | " X_train = []\n", 83 | " labels_train = []\n", 84 | " for samples in labels_to_samples:\n", 85 | " np.random.shuffle(samples)\n", 86 | " for i in samples[:limit_per_class]:\n", 87 | " X_train.append(data[i])\n", 88 | " labels_train.append(samples_to_labels[i])\n", 89 | " X_train = np.asarray(X_train)\n", 90 | " nb_train = len(X_train)\n", 91 | " nb_classes = len(labels_to_samples)\n", 92 | " y_train = np.zeros((nb_train, nb_classes), dtype=np.float32)\n", 93 | " for i, w in enumerate(labels_train):\n", 94 | " y_train[i, w] = 1.\n", 95 | " return X_train, y_train" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "X_train, y_train = get_data(fingerprints, labels_to_samples, samples_to_labels)\n", 107 | "print(X_train.dtype, X_train.shape, X_train.min(), X_train.max())\n", 108 | "print(y_train.dtype, y_train.shape)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "inputs = Input(shape=(img_rows, img_cols))\n", 120 | "x = Reshape((1, img_rows, img_cols))(inputs)\n", 121 | "\n", 122 | "x = Convolution2D(32, 3, 3, border_mode='same', activation='relu')(x)\n", 123 | "x = Convolution2D(32, 3, 3, activation='relu')(x)\n", 124 | "x = MaxPooling2D(pool_size=(2, 2))(x)\n", 125 | "x = BatchNormalization()(x)\n", 126 | "x = Dropout(0.25)(x)\n", 127 | "\n", 128 | "x = Convolution2D(64, 3, 3, border_mode='same', activation='relu')(x)\n", 129 | "x = Convolution2D(64, 3, 3, activation='relu')(x)\n", 130 | "x = MaxPooling2D(pool_size=(2, 2))(x)\n", 131 | "x = BatchNormalization()(x)\n", 132 | "x = Dropout(0.25)(x)\n", 133 | "\n", 134 | "x = Flatten()(x)\n", 135 | "encoded = Dense(512, activation='relu')(x)\n", 136 | "x = BatchNormalization()(encoded)\n", 137 | "x = Dropout(0.5)(x)\n", 138 | "x = Dense(nb_classes, activation='softmax')(x)\n", 139 | "\n", 140 | "classifier = Model(input=inputs, output=x)\n", 141 | "classifier.compile(optimizer='rmsprop', loss='categorical_crossentropy')\n", 142 | "print(classifier.summary())" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": false 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "from keras import backend as K\n", 154 | "\n", 155 | "batch_size = 32\n", 156 | "nb_epoch = 10\n", 157 | "nb_slices = 48\n", 158 | "lr = 0.001\n", 159 | "decay = 0.99\n", 160 | "\n", 161 | "for d in range(nb_slices):\n", 162 | " X_train, y_train = get_data(fingerprints, labels_to_samples, samples_to_labels)\n", 163 | " print('Learning rate', lr)\n", 164 | " K.set_value(classifier.optimizer.lr, lr)\n", 165 | " classifier.fit(X_train, y_train,\n", 166 | " batch_size=batch_size,\n", 167 | " nb_epoch=nb_epoch,\n", 168 | " verbose=1,\n", 169 | " shuffle=True)\n", 170 | " lr *= decay" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "# save the network, it can be used later to predict labels for new sounds\n", 182 | "open(join(data_root, 'classifier.json'), 'w').write(classifier.to_json())\n", 183 | "classifier.save_weights(join(data_root, 'classifier.h5'))" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "# make label predictions\n", 195 | "predicted_labels = classifier.predict(fingerprints)\n", 196 | "np.save(join(data_root, 'predicted_labels.npy'), predicted_labels)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "# make encoding predictions\n", 208 | "encoder = Model(input=inputs, output=encoded)\n", 209 | "predicted_encoding = encoder.predict(fingerprints)\n", 210 | "np.save(join(data_root, 'predicted_encoding.npy'), predicted_encoding)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "# show some examples of predicted encodings\n", 222 | "plt.figure(figsize=(30,2))\n", 223 | "plt.plot(predicted_encoding[:3].T)\n", 224 | "plt.show()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "collapsed": false 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "# show some examples of predicted labels\n", 236 | "# red lines indicate the real labels\n", 237 | "indices = np.arange(nb_classes)\n", 238 | "np.random.shuffle(indices)\n", 239 | "for i in indices[:10]:\n", 240 | " cur = fingerprints[i].reshape(1,img_rows,img_cols)\n", 241 | " cl = classifier.predict(cur, verbose=0)\n", 242 | " plt.figure(figsize=(30,2))\n", 243 | " for j in samples_to_labels[i]:\n", 244 | " plt.axvline(j+.5,c='red')\n", 245 | " print ', '.join(synsets[j])\n", 246 | " plt.bar(np.arange(nb_classes), classifier.predict(cur, verbose=0)[0])\n", 247 | " plt.xlim([0,nb_classes])\n", 248 | " plt.show()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": { 255 | "collapsed": false, 256 | "scrolled": false 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "# show at a mosaic of fingerprints for given synsets\n", 261 | "for label in [1,5,10]:\n", 262 | " samples = labels_to_samples[label]\n", 263 | " total = len(samples)\n", 264 | " print ', '.join(synsets[label]), total\n", 265 | " if total > 0:\n", 266 | " show_array(255 * make_mosaic(fingerprints[samples]))" 267 | ] 268 | } 269 | ], 270 | "metadata": { 271 | "kernelspec": { 272 | "display_name": "Python 2", 273 | "language": "python", 274 | "name": "python2" 275 | }, 276 | "language_info": { 277 | "codemirror_mode": { 278 | "name": "ipython", 279 | "version": 2 280 | }, 281 | "file_extension": ".py", 282 | "mimetype": "text/x-python", 283 | "name": "python", 284 | "nbconvert_exporter": "python", 285 | "pygments_lexer": "ipython2", 286 | "version": "2.7.11" 287 | } 288 | }, 289 | "nbformat": 4, 290 | "nbformat_minor": 0 291 | } 292 | -------------------------------------------------------------------------------- /Fingerprints to K-Means Colors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This converts `fingerprints.npy` to `.tsv` formatted list of colors based on k-means clustering in the high dimensional space." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "data_root = 'data/drums'\n", 19 | "tsne_type = 'fingerprints.256.64'\n", 20 | "n_clusters = 128" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "%matplotlib inline\n", 32 | "from matplotlib import pyplot as plt\n", 33 | "from utils import *\n", 34 | "from sklearn.cluster import MiniBatchKMeans\n", 35 | "from os.path import join\n", 36 | "import numpy as np" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "def load_tsv(fn):\n", 48 | " return np.genfromtxt(fn)\n", 49 | "def save_tsv(data, fn):\n", 50 | " np.savetxt(fn, data, fmt='%.5f', delimiter='\\t')" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "tsne2d = load_tsv(join(data_root, 'tsne/{}.2d.tsv'.format(tsne_type)))\n", 62 | "tsne3d = load_tsv(join(data_root, 'tsne/{}.3d.tsv'.format(tsne_type)))" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": false, 70 | "scrolled": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# before\n", 75 | "plt.figure(figsize=(16,16))\n", 76 | "plt.scatter(tsne2d[:,0], tsne2d[:,1], edgecolor='', s=2, c=tsne3d)\n", 77 | "plt.show()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "fingerprints = np.load(join(data_root, 'fingerprints.npy'))\n", 89 | "fingerprints = fingerprints.reshape(len(fingerprints), -1)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "import warnings\n", 101 | "warnings.filterwarnings('ignore', category=DeprecationWarning) \n", 102 | "kmeans = MiniBatchKMeans(n_clusters=n_clusters)\n", 103 | "kmeans.fit(fingerprints)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "clusters = kmeans.predict(fingerprints) # this is the approach from the largevis paper\n", 115 | "# clusters = fingerprints.argmax(axis=1) # this is another approach\n", 116 | "save_tsv(plt.cm.rainbow(clusters)[:,:3], join(data_root, 'colors.tsv'))" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "# after\n", 128 | "plt.figure(figsize=(16,16))\n", 129 | "plt.scatter(tsne2d[:,0], tsne2d[:,1], edgecolor='', s=2, c=clusters, cmap='hsv')\n", 130 | "plt.show()" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 2", 137 | "language": "python", 138 | "name": "python2" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 2 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython2", 150 | "version": "2.7.11" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 0 155 | } 156 | -------------------------------------------------------------------------------- /Fingerprints to Spritesheet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "After running ofxAssignment or CloudToGrid to get a grid, this notebook will convert the output .tsv to a spritesheet. It will also output " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "data_root = 'data/drums/'\n", 19 | "perplexity = 100 # perplexity of source embedding\n", 20 | "rows = 32\n", 21 | "cols = 32" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": { 28 | "collapsed": true 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "%matplotlib inline\n", 33 | "from matplotlib import pyplot as plt\n", 34 | "from os.path import join\n", 35 | "import numpy as np\n", 36 | "from utils import *" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "data = np.load(join(data_root, 'fingerprints.npy')).astype(np.float64)\n", 48 | "data = data.reshape(len(data), -1) # flatten" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "First we save an unsorted spritesheet (or really, sorted in the same order as the filenames and points)." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "show_array(255 * make_mosaic(data, nx=cols, ny=rows), filename=join(data_root, 'spritesheet.png'))" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "Then we construct the sorted / grid spritesheet." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "base = '{}.{}x{}'.format(p, cols, rows)\n", 85 | "fn = join(data_root, 'grid/', base + '.tsv')\n", 86 | "print 'Trying to load grid from', fn\n", 87 | "grid = np.genfromtxt(fn)\n", 88 | "combined = zip(grid, data)\n", 89 | "combined = sorted(combined, key=lambda x: (x[0][1], x[0][0]))\n", 90 | "grid_sorted, data_sorted = zip(*combined)\n", 91 | "data_sorted = np.asarray(data_sorted)\n", 92 | "show_array(255 * make_mosaic(data_sorted, nx=cols, ny=rows), filename=join(data_root, base + '.png'))" 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "Python 2", 99 | "language": "python", 100 | "name": "python2" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 2 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython2", 112 | "version": "2.7.11" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 0 117 | } 118 | -------------------------------------------------------------------------------- /Fingerprints to t-SNE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This converts `fingerprints.npy` to `.tsv` formatted t-SNE embeddings and plots of those embeddings in the `tsne/` and `plot/` folders respectively. If you add multiple values to `perplexity` and `initial_dims` then all combinations will be computed (in parallel). Good perplexities are in the range 1-200 with the best range around 30-100. Good `initial_dims` are in the range 30 and higher, with the dimensionality of your input data being the highest possible value (e.g., a 32x32 fingerprint would have a highest possible `initial_dims` value of 32x32=1024).\n", 8 | "\n", 9 | "Change the \"mode\" to try different t-SNE variations.\n", 10 | "* \"fingerprints\" will only use `fingerprints.npy`\n", 11 | "* \"predicted_labels\" will only use `predicted_labels.npy`\n", 12 | "* \"predicted_encoding\" will only use `predicted_encoding.npy`\n", 13 | "* \"combined\" will use all of the above data" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "data_root = 'data/drums/'\n", 25 | "initial_dims = [30]\n", 26 | "perplexities = [30]\n", 27 | "mode = 'fingerprints'\n", 28 | "# mode = 'predicted_labels'\n", 29 | "# mode = 'predicted_encoding'\n", 30 | "# mode = 'combined'" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "%matplotlib inline\n", 42 | "from matplotlib import pyplot as plt\n", 43 | "from time import time\n", 44 | "from utils import *\n", 45 | "from os.path import join\n", 46 | "from multiprocessing import Pool\n", 47 | "import numpy as np\n", 48 | "import itertools" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "def save_tsv(data, fn):\n", 60 | " np.savetxt(fn, data, fmt='%.5f', delimiter='\\t')\n", 61 | "def tsne(data, data_root, prefix, initial_dims=30, perplexity=30):\n", 62 | " mkdir_p(data_root + 'tsne')\n", 63 | " mkdir_p(data_root + 'plot')\n", 64 | " \n", 65 | " figsize = (16,16)\n", 66 | " pointsize = 2\n", 67 | "\n", 68 | " X_2d = list(bh_tsne(data, initial_dims=initial_dims, perplexity=perplexity, no_dims=2))\n", 69 | " X_2d = normalize(np.array(X_2d))\n", 70 | " save_tsv(X_2d, join(data_root, 'tsne/{}.{}.{}.2d.tsv'.format(prefix, initial_dims, perplexity)))\n", 71 | " \n", 72 | " plt.figure(figsize=figsize)\n", 73 | " plt.scatter(X_2d[:,0], X_2d[:,1], edgecolor='', s=pointsize)\n", 74 | " plt.tight_layout()\n", 75 | " plt.savefig(join(data_root, 'plot/{}.{}.{}.png'.format(prefix, initial_dims, perplexity)))\n", 76 | " plt.close()\n", 77 | " \n", 78 | " X_3d = list(bh_tsne(data, initial_dims=initial_dims, perplexity=perplexity, no_dims=3))\n", 79 | " X_3d = normalize(np.array(X_3d))\n", 80 | " save_tsv(X_3d, join(data_root, 'tsne/{}.{}.{}.3d.tsv'.format(prefix, initial_dims, perplexity)))\n", 81 | " \n", 82 | " plt.figure(figsize=figsize)\n", 83 | " plt.scatter(X_2d[:,0], X_2d[:,1], edgecolor='', s=pointsize, c=X_3d)\n", 84 | " plt.tight_layout()\n", 85 | " plt.savefig(join(data_root, 'plot/{}.{}.{}.png'.format(prefix, initial_dims, perplexity)))\n", 86 | " plt.close()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "# load and normalize any dataset we need\n", 98 | "if mode == 'fingerprints' or mode == 'combined':\n", 99 | " fingerprints = np.load(join(data_root, 'fingerprints.npy'))\n", 100 | " fingerprints = fingerprints.reshape(len(fingerprints), -1)\n", 101 | "if mode == 'predicted_labels' or mode == 'combined':\n", 102 | " predicted_labels = np.load(join(data_root, 'predicted_labels.npy'))\n", 103 | " predicted_labels -= predicted_labels.min()\n", 104 | " predicted_labels /= predicted_labels.max()\n", 105 | "if mode == 'predicted_encoding' or mode == 'combined':\n", 106 | " predicted_encoding = np.load(join(data_root, 'predicted_encoding.npy'))\n", 107 | " std = predicted_encoding.std(axis=0)\n", 108 | " predicted_encoding = predicted_encoding[:, std > 0] / std[std > 0]\n", 109 | " \n", 110 | "if mode == 'fingerprints':\n", 111 | " data = fingerprints\n", 112 | "if mode == 'predicted_labels':\n", 113 | " data = predicted_labels\n", 114 | "if mode == 'predicted_encoding':\n", 115 | " data = predicted_encoding\n", 116 | "if mode == 'combined':\n", 117 | " data = np.hstack((fingerprints, predicted_labels, predicted_encoding))\n", 118 | " \n", 119 | "print data.shape" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "data = data.astype(np.float64)\n", 131 | "def job(params):\n", 132 | " start = time()\n", 133 | " tsne(data, data_root, mode, initial_dims=params[0], perplexity=params[1])\n", 134 | " print 'initial_dims={}, perplexity={}, {} seconds'.format(params[0], params[1], time() - start)\n", 135 | "params = list(itertools.product(initial_dims, perplexities))\n", 136 | "pool = Pool()\n", 137 | "pool.map(job, params)" 138 | ] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python 2", 144 | "language": "python", 145 | "name": "python2" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 2 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython2", 157 | "version": "2.7.11" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 0 162 | } 163 | -------------------------------------------------------------------------------- /Gentle to Samples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook takes a file `data/project/raw/audio.wav` and `data/project/raw/align.json` (output from [Gentle](https://lowerquality.com/gentle/)) and outputs one `data/project/samples/*.wav` per phoneme and/or `data/project/samples.npy` with all phonemes. Note that the `samples.npy` output by this script is not rectangular if `width = None`." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "data_root = 'data/speech/'\n", 19 | "save_wav = False # output data_root/samples/*.wav\n", 20 | "save_samples = True # output data_root/samples.npy\n", 21 | "width = None # 0.080 # window size in milliseconds, this will create a rectangular sample matrix\n", 22 | "use_center = False # when using a fixed width, center the window on the chunk\n", 23 | "limit = None # only analyze the first `limit` phonemes" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "%matplotlib inline\n", 35 | "from matplotlib import pyplot as plt\n", 36 | "from os.path import join\n", 37 | "from tqdm import tqdm\n", 38 | "from utils import *\n", 39 | "import numpy as np\n", 40 | "import json\n", 41 | "import librosa" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "align = json.load(open(join(data_root, 'raw/align.json')))\n", 53 | "words = align['words']" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "y, sr = ffmpeg_load_audio(data_root + 'raw/audio.wav', mono=True)\n", 65 | "print y.shape" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "# this could benefit from being rewritten for multiple threads\n", 77 | "i = 0\n", 78 | "if width is not None:\n", 79 | " width_sample = librosa.time_to_samples(width, sr=sr)[0]\n", 80 | "filenames = []\n", 81 | "samples = []\n", 82 | "mkdir_p(join(data_root, 'samples'))\n", 83 | "for word in tqdm(words[:limit], leave=True):\n", 84 | " if 'start' in word:\n", 85 | " start = word['start']\n", 86 | " for phone in word['phones']:\n", 87 | " end = start + phone['duration']\n", 88 | " start_sample, end_sample = librosa.time_to_samples([start, end], sr=sr)\n", 89 | " if use_center:\n", 90 | " center = (start + end) / 2.\n", 91 | " start_sample = librosa.time_to_samples([center - width/2], sr=sr)[0]\n", 92 | " if width is not None:\n", 93 | " end_sample = start_sample + width_sample\n", 94 | " if start_sample > 0 and end_sample < len(y) and end_sample - start_sample > 0:\n", 95 | " cur = y[start_sample:end_sample]\n", 96 | " if save_wav:\n", 97 | " fn = join(data_root, 'samples/{}_{}.wav'.format(i, phone['phone']))\n", 98 | " ffmpeg_save_audio(fn, cur, sr=sr)\n", 99 | " filenames.append(fn)\n", 100 | " if save_samples:\n", 101 | " samples.append(cur)\n", 102 | " i = i + 1\n", 103 | " start = end\n", 104 | "samples = np.asarray(samples)\n", 105 | "\n", 106 | "if save_samples:\n", 107 | " np.save(join(data_root, 'samples.npy'), samples)\n", 108 | "if save_wav:\n", 109 | " np.savetxt(join(data_root, 'filenames.txt'), filenames, fmt='%s')" 110 | ] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python 2", 116 | "language": "python", 117 | "name": "python2" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 2 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython2", 129 | "version": "2.7.11" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 0 134 | } 135 | -------------------------------------------------------------------------------- /HDF5 to LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "data_root = 'data/blizzard'" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "%matplotlib inline\n", 23 | "from matplotlib import pyplot as plt\n", 24 | "from os.path import join\n", 25 | "from tqdm import tqdm\n", 26 | "import numpy as np\n", 27 | "import h5py" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "f = h5py.File(join(data_root, 'samples.hdf5'), 'r')\n", 39 | "dataset = f['samples']" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEACAYAAACznAEdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHQJJREFUeJzt3Xu4VNV5x/HvK4gKoiLeKghiVJB4V8CaaKYmgpiKxkaC\nt6ipSY1Nk8eaqFRbsRcxTdBoGsxNiRiJURsTbKMi1fO0WhDjDRQUbKMCImK9xRvXt3/sfTxz5syc\nue09a8/M7/M85zl71uzZ+50FZ95Za6+1trk7IiIiW4UOQEREskEJQUREACUEERGJKSGIiAighCAi\nIjElBBERASpICGZ2k5mtNbPFeWVjzGyRmT0Z/z4y77mpZrbCzJaZ2fi0AhcRkWRV0kKYBUwoKPtn\n4Ap3Pwy4EvgOgJmNBiYDBwATgZlmZsmFKyIiaSmbENz9YeDNguI1wI7x9k7A6nh7EnC7u29y9xeB\nFcDYZEIVEZE09a3xdZcBj5jZDMCAo+PyIcCCvP1Wx2UiIpJxtV5Uvgn4K3cfBlwE3JxcSCIiEkKt\nLYRx7n48gLvfZWY/jctXA3vl7TeUru6kbsxMiyiJiNTA3VO5NltpC8Hin04rzOxTAGb2aaJrBQBz\ngSlm1s/MRgD7AotKHdTd9ePOlVdeGTyGrPyoLlQXqovef9JUtoVgZnOAHDDYzF4mGlX0FaIRRP2A\nD+PHuPtSM7sDWApsBC70tN+BiIgkomxCcPczSjw1rsT+04Hp9QQlIiKNp5nKGZDL5UKHkBmqiy6q\niy6qi8awUD06ZqbeJBGRKpkZHviisoiItDglBBERAZQQRKTF/P3fw/vvh46iOekagoi0FDP4z/+E\nY44JHUk6dA1BRKQXL78Mp5zS9fjqq8PF0szUQhCRpnfrrfDFL4J71EKAaLsVqYUgIlKlSy4JHUHz\nUUIQkZY0Y0boCJqPuoxEpKmtWQN77hlt53cZdT5uNeoyEhEp4fXXu7a3bAkXRytQQhCRlnH55aEj\naG7qMhKRprZkCRx8cPHnWvEjRl1GIiKSOiUEEREBlBBERCSmhCAiIoASgog0OUvl8mp7UkIQERFA\nCUFERGJKCCIiAighiIhITAlBRJraCSeUfm7q1MbF0Qq0dIWIlPXhh7D11tCnT+hIeuptlNFee8GL\nL8JWLfTVV0tXiEhQAwfC174WOorqrVwJ06eHjqJ5KCGISFmbNsEzz8CqVbBsWehoqrN8eegImkff\n0AGISHNwj7pgAN59FwYMCBuPJK9sC8HMbjKztWa2uKD8r8xsmZktMbNr8sqnmtmK+LnxaQQtImHp\nRjStqZIWwizg+8DszgIzywEnAQe5+yYz2yUuPwCYDBwADAXmm9l+unosIpJ9ZVsI7v4w8GZB8VeB\na9x9U7xP503sTgZud/dN7v4isAIYm1y4ItJI7nDttdH2I490L28Ws2eX30citV5U3h841swWmtlD\nZnZEXD4EWJm33+q4TESa1MUXh45AGqXWi8p9gUHufpSZjQHuBPZJLiwRyYJf/jJ0BNJItSaElcCv\nANz9MTPbbGaDiVoEw/L2GxqXFTVt2rSPtnO5HLlcrsZwRCQN558fOgLp6Oigo6OjIeeqaKayme0N\n3OPuB8WPvwIMcfcrzWx/4AF3H25mo4HbgHFEXUUPAEUvKmumskj2bb89vPdez/K334Yddmh8PMVU\ncj+EXXeF115LP5ZGSHOmctkWgpnNAXLAYDN7GbgSuBmYZWZLgPXAFwHcfamZ3QEsBTYCF+pTX0RC\nW7cudATNQWsZiUhJpVoIb70FO+7Y+HiKqfSOaT/7GZxzTqqhNITWMhKRIEp9Z9u8ubFx5Fu/Hj74\noPrXzZ+ffCytRi0EESmpf//iH77jx8P99zc+HuhqEXR+fFRzT+VW+MgJeg1BRNpXqW/iv/tdY+Mo\nZsMGePjh0FG0FiUEEalaFr5p//EfwxNPhI6itegagohU7c3CxWwaZMGCru0VK8LE0Mp0DUFESuqt\nfz7En29+PAMHwh/+UN3rW+EjR6OMRKTh1q7t/fkLLoCFCxsTSzHVJgMpTy0EESnqoovge98rv18j\n/4yrGVFUTCt85KiFICKSgA8/DB1BtikhiEjbWLMmdATZpoQgIkVlrXslibkPp55a/zFamRKCiDSF\nJGZGP/VU/cdoZUoIIlJUvRdwk3bFFaEjaH1KCCJS1IYNle3nDhs3Vn7cjRur2z9pW7aEXZwvy5QQ\nRKSomTMr2+9zn4N+/So/br9+0U+oG9Z8/vNwyCFhzp11mocgIkVV22W0ZUv512zYANtsE23fcQec\ndlp68ZTTrB8/mocgIpn3wAPl97n88q7tyZPh8cfTi0eqp4QgIol4663y+7zySvfH776bTixSGyUE\nEQkmZLfNpk3hzp1VSggikrrNm+G220JH0V0t1yT+8R/hiCPgscfghz+Ez3wGzjorevzMM8nH2Gi6\nQY6IpG7x4uiD84wzQkdSn7/92+j32LHdy2+7DQYMaP4uMLUQRCQRP/tZ8fK77oK7746258zp/lyz\njvQp5r33YP780FHUR8NORaSoWrpUiv1J93acBx+EP/mT9OLpzaZN0KdPda8pF8PIkfDcc7XHVFkM\nGnYqIi1oxoxw577uunDnziq1EESkqEa0EEq9Jql4erPttvDBB9W9plwMo0bBsmW1x1RZDOm1EHRR\nWUR6uOSS+o8xYQIcc0z9x2kmaXcXpU0tBBHpYffda1trKP9P2gwOOgiWLKn8Nb1phhYCpH+hXNcQ\nRKShklp4rpIPx3nzyu+zZUv9sRTS99GelBBEJDX/8z/l96nkpjVpfHivX1/d/knNbN68ObrOkMWZ\n0mUTgpndZGZrzWxxkecuNrMtZrZzXtlUM1thZsvMbHzSAYtI86ikS+bSS8vvk4Wb9fzTPyVznO9/\nH0aPhu98J5njJamSFsIsYEJhoZkNBY4HXsorOwCYDBwATARmmmXhn1JEGmHVKli4EN54I9njhure\nefzx6Nzvvw/33FPZa958s2fZmjUwaxbMng033BCV/fu/JxdnUsqOMnL3h81seJGnrgO+BczNKzsZ\nuN3dNwEvmtkKYCzwaBLBiki2jRgRdYVUOtks6448Ep54Au68s/KluqdM6Xn/54MOgv/7v+5ljzyS\nTIxJqukagplNAla6e+H4gSHAyrzHq+MyEcmI3/8+WkKi3OifWnT2i7/wQnWvK/ywLHXcRlq5suvc\n1YxGevvt7o/dy7+/rKh6HoKZbQf8DVF3UV2mTZv20XYulyOXy9V7SBEp44QTYPnyaDutrpiVK8vv\nk+/v/g5+8IPSzy9YUF88tTjqqOi3GfSt4pOysE7rHSHV0dFBR0dHfQepUC0T0z4G7A08HV8fGAo8\nYWZjiVoEw/L2HRqXFZWfEESkfWVxCGhnTGPGVPe6RYu6P77rrvriKPyyfNVVV9V3wF5U2mVk8Q/u\n/oy77+Hu+7j7CGAVcJi7v0Z0PeELZtbPzEYA+wKLSh5VRIR05hnUK6nhMFOmJHOcRqhk2Okc4L+B\n/c3sZTM7r2AXpytZLAXuAJYCvwUu1HRkESnnRz8Kc96LLur++IQT4Nlno+3C2322Ay1dIdJmRo4s\nfw0hxGDx3j4OOjrSG7lUuNzGjBnw139dXx0UHrOS/SqlpStEpGGydqvLEErd7KdaWZyN3BslBBHp\n5r77QkfQU6NbLHPnlt+nEhs3JnOcRlFCEGkznd1FpVQzxLIVXXxx1y0/a/X1r0e/m61XXAlBRLqp\n9raSSfnDH0o/9+GH6Z03jRFODz8c/X7vveSPnSYlBBHpZqtAnwo77FD6uc9+Nr3z/vCHyR+zs4vr\n4IOTP3aa2rxxKNJeChedu/baaERNviwuR7l5c3rHvuEGWLwYDjwwuWM+/zy8/jq8+mpyx2wEDTsV\naSNHHQWPFiw1Wfhn+Bd/AT/+ceNiyrdlS/GElMUkVc5++8GKFb3vo2GnIhJMsUXWVq3q/jhrq3D+\n5jehI6hNuWQAsGFD+nFUQwlBpM1Nntz9cedM3RCKdQ2dckrj42iUSu+x0ChKCCJtpNiy1AsWwJe/\n3PhYiknzWkEWhbqAX0rGwhGREH7609ARRNotIYQa4luKEoKIAHDSSaEjgAk9btbb2tRCEJFM+rd/\nCx1B14SudqGEICKZlYXhnddeG604apaNeNKUtS4jTUwTkUyZPbv51gCqVdZaCEoIIpIpTz8dOoLG\nUULIs3hxyLOLtKbly+G112DPPWHIkOh3sQlpEp4SQp6zzgp5dpHW89JL8M47PcsHD26/IZ3NQAkh\nj1oIIsn65S+L39T9wgvh6KNh4sTGxySl6aKyiKTm05/u/vjqq+Gxx+C44+CQQ8LEJKVlbRSVEoJI\nC9lll/YZodMKstZllLFwRKTd9ekD118fOorGUEIQESnh6qth06bonsTu0QipVpa1LiMlBBH5SOHN\ncxqtsLvr3nvDxNEoWeveU0IQkY+MHRv2/Dvv3P3xsGFh4mgUJQQRCWbw4NLP/epXjYujlML7Muy0\nU5g4GkUJQUSCGTCg9HOf+1zj4igla+Py240SgkibO/10+OCD0FGUluXY6nX44aEj6K5sQjCzm8xs\nrZktziv7ZzNbZmZPmdm/mtkOec9NNbMV8fPj0wpcRKp36qk9y048EbbdtvGxVCrLsdVrm21CR9Bd\nJS2EWUDhfYzmAR9390OBFcBUADMbDUwGDgAmAjPNsjawSqR9XXddz7Ik1xR7663kjpXvqqvSOW5a\n9t8/dAS1KZsQ3P1h4M2CsvnuviV+uBAYGm9PAm53903u/iJRsgg8bkFESkm6z3677ZI9XrPK2sXi\nSiVxDeFLwG/j7SHAyrznVsdlIpJBCxYke7x+/Wp/7fheOpjVz9AYda1lZGaXAxvd/Re1vH7atGkf\nbedyOXK5XD3hiEgCzLL3DbfZEsKf/mnx7rladHR00NHRkczByqg5IZjZucCJwHF5xauBvfIeD43L\nispPCCKSDX37wsaNoaPortkSwt57J3eswi/LV6V4QaXSLiOLf6IHZicA3wImufv6vP3mAlPMrJ+Z\njQD2BRYlFayIpE9zAep39tnNuUBfJcNO5wD/DexvZi+b2XnA94HtgQfM7Akzmwng7kuBO4ClRNcV\nLnTPWuNTRDoVW21zhx16ljVCb62AtFsIkycne7xBg6IF+ppN2S4jdz+jSPGsXvafDkyvJygRSd/n\nP198YtSDD8KBBzY+npAJYcIE2LABfv3rdM+TdZqpLNKmTjml+Aft0KE9yxph991LP/fCC+mee+ed\nszdrOAQlBJE2dUaxtj/hLuDOnFn6uTlz0jvv9Olw8snJHe8b30juWI2mhCDSpkp98Ie6i1f//qWf\n22ef9M57/PFRXSQ1MihUCysJSggibWb06N6fTyIhjBhR/zHynXdessfL15kYzzoL3n032WO//36y\nx0ubEoJImznpJDjiiNLP19tlNG5c8UX06pFmN1bntQuzaHnwUaPgyCOTOXazLeVR10xlEWk+11zT\n+/P1thAWLox+z5hR33Ea4b77YEjB4jrLlsE998CkSWFiCkktBBHpJtQ1hN4ce2w6xx01Kp3j5tt1\n1/TPkZQM/tOLSEhZnKk8Zkw6xx0+PPljFnZv3X138udIixKCiHQTYtjpIYc0/py9jVwaN6724xZ2\nNTXTOkxKCCLSTa0fYAcfXPs5L7qo9tfWqrc14nbbrfbj7rdf7a8NTQlBRBKRxWsP9fjJT5I5TqkZ\n0EmPxEpCi/0TikhWVNPtksVkcv75ydwXotQ9oc8+u/5jJy2D/wwi0gruvhuWLKls3y98Id1Yigm9\nDnPo8xejeQgikojCxen+6I+in0rUc+vNWoVa5rtTb0t1hKIWgogk4uc/r+11y5cnG0clDjssus1l\nJZ59Nvnzd3T0fg/pUJQQRCQRAwfW9roQo3ImTqx8vkW5tZ8qMWBA1/bHPw6f+lQ2h6MqIYhIqr75\nzdLPffKTlR9n3Tq49db644H0Poy/9rXi5evWwTvvwCuvwO9+l865k6BrCCKSiFLfuI8+uvRrJk6s\n/Pi77JLcBLZDD03mOIVKtZI6F7mrtRXVKBbqlsdmptsti2RULd+ge/tzLnW8Wj4Ckvh2X+15Kz3n\n1Klw9dXVx1NdLIa7p9LGUZeRiNRtp51CR5ANzf4dVwlBROr25pu9P/8v/9KzLNSHZxZnCGeFEoKI\npG7ChO6Pp0wJEwfAX/5luHNnnRKCiKRu3327tmfOhF/8Ilwsxx1X/WtyucTDyCSNMhKRulR6/+Tj\njoNNm+C009KNJ6SvfjV0BPVRQhCRutx3X2X7/cd/pBtHmiodZTRsWLpxpE1dRiLSw6JFle/b7CNr\nKpHFWcVpUEIQkR5GjgwdQTpa9X0lRQlBRHrI8jfielYJTbM1c9BB6R27UcomBDO7yczWmtnivLJB\nZjbPzJ43s/vNbMe856aa2QozW2ZmGVzPT0SS1OhlnMeObez5oLIE2Qqtj0paCLOAglHEXAbMd/eR\nwIPAVAAzGw1MBg4AJgIzzbL8XUNEiqnmr3avvdKLI2ntcL2jHmUTgrs/DBTOQzwZuCXevgU4Jd6e\nBNzu7pvc/UVgBRAgn4tIPVr1a1ytCaGSVVmruWVoVtV6DWE3d18L4O6vArvF5UOAlXn7rY7LRKSJ\nZDkhhIht2jTYuLH3fXpb5rtZJDUPoaa8O23atI+2c7kcuXaZDigiNTvzTHjoodpee955ycbSCB0d\nHXR0dDTkXBUtf21mw4F73P3g+PEyIOfua81sD+Ahdz/AzC4D3N2/He93H3Cluz9a5Jha/lokoz74\noLKLxRdcADfemH48hWptJdTzkbN5M/Qt8RX6wANhyZLaj12NLCx/bfFPp7nAufH2OcBv8sqnmFk/\nMxsB7AtUMcVFRLIgy11GoWy1FYwaVfxeyFdc0fh40lC2y8jM5gA5YLCZvQxcCVwD3GlmXwJeIhpZ\nhLsvNbM7gKXARuBCNQNEmo8SQk9msGxZtP3nfw433xxtt9InXNmE4O5nlHjqMyX2nw5MrycoEQmr\nX7/QEWTbJZdEF5mruQVoM9DidiLSg1oIvRs5EmbPDh1F8rR0hYiIAEoIIlKHgQNDRyBJUkIQkZrM\nmgVXXRXm3PPmhTlvq1NCEJGa7LcfbLddmHPn35JTkqOEICJNp9Lbdkp1lBBEpCa77VZ+H2kuFS1d\nkcqJtXSFSKaVG3oa+s+3lqGxoWNOQhaWrhARyZRx42Dw4NBRtBa1EESkqKy3EDpV01LISsz1UAtB\nRKRO554bOoLsUwtBRIpqtRbCc8+1xn2P1UIQkYb70Y9CR1CZr3yl9HNz5jQujlaghCAiRZ16augI\nKnPmmcXLhw2D00/vepyVFk2WKSGISFGttuLp/vuHjiD7lBBEpC1spU+7slRFIlK1Z58NHUHlBgwI\nHUHzUEIQkaK23rr0c6NHNy6OWu23X/S7Wa6FZIGGnYpISaWuI2TpT/e//guOPbZn+XvvQf/+sH49\nvP8+DBrU+NjSkOawU91CU0SaWqlF9vr3j35vs030I+WphSAiJTVDCwGKx5m1GJOiiWkiIlUYMyZ0\nBM1JCUFEqrJ0aegIyrvtttARNCclBBEp6YILQkdQmZNPDh1Ba1BCEJGSbrwxdASV+Yd/6P641WZZ\nN4oSgohUZe+9Q0dQXqteUE6bEoKIVGW77UJHIGlRQhCRpjd6NOyxR+goml9dCcHMpprZs2a22Mxu\nM7N+ZjbIzOaZ2fNmdr+Z7ZhUsCIixfTpA2vWhI6i+dWcEMxsOPBl4DB3P5ho1vPpwGXAfHcfCTwI\nTE0iUBERSVc9LYR3gA3AADPrC2wHrAZOBm6J97kFOKWuCEVEpCFqTgju/iYwA3iZKBG87e7zgd3d\nfW28z6tAiZVGRKQZ5E/yWrcuXByV2Guv0BE0t3q6jPYBLgKGA3sStRTOBAoHfGkAmEgT23nnru1d\ndgkXRyUmTYp+DxwYNo5mVc9qp0cCj7j7GwBmdjdwNLDWzHZ397VmtgfwWqkDTJs27aPtXC5HLper\nIxwRSdOf/VnoCMq74Qb47ndh221DR5Kcjo4OOjo6GnKumlc7NbNDgJ8DY4D1wCzgMWAY8Ia7f9vM\nLgUGuftlRV6v1U5FmsC998KJJ8JPfgLnnx86Gsnk/RDc/Wkzmw08DmwGngR+DAwE7jCzLwEvAZOT\nCFREwlIyaH26H4KI9GrVKsjl4IUXQkcikG4LQQlBRKSJ6AY5IiKSOiUEEREBlBBERCSmhCAiIoAS\ngoiIxJQQREQEUEIQEZGYEoKIiABKCCIiElNCEBERQAlBRERiSggiIgIoIYiISEwJQUREACUEERGJ\nKSGIiAighCAiIjElBBERAZQQREQkpoQgIiKAEoKIiMSUEEREBFBCEBGRmBKCiIgASggiIhJTQhAR\nEUAJQUREYnUlBDPb0czuNLNlZvasmY0zs0FmNs/Mnjez+81sx6SCFRGR9NTbQrge+K27HwAcAjwH\nXAbMd/eRwIPA1DrP0fI6OjpCh5AZqosuqosuqovGqDkhmNkOwDHuPgvA3Te5+9vAycAt8W63AKfU\nHWWL03/2LqqLLqqLLqqLxqinhTACeN3MZpnZE2b2YzPrD+zu7msB3P1VYLckAhURkXTVkxD6AocD\nP3D3w4H3iLqLvGC/wsciIpJB5l7b57WZ7Q4scPd94sefJEoIHwNy7r7WzPYAHoqvMRS+XolCRKQG\n7m5pHLdvrS+MP/BXmtn+7r4c+DTwbPxzLvBt4BzgNyVen8obEhGR2tTcQgAws0OAnwJbA/8LnAf0\nAe4A9gJeAia7+1v1hyoiImmqKyGIiEjrCDJT2cxOMLPnzGy5mV0aIoakmdlNZrbWzBbnlZWcpGdm\nU81sRTypb3xe+eFmtjium+/llfczs9vj1ywws2GNe3fVMbOhZvZgPFlxiZl9PS5vu/ows23M7FEz\nezKuj6vj8rari05mtlU8MnFu/Lgt68LMXjSzp+P/G4visrB14e4N/SFKQi8Aw4m6mp4CRjU6jhTe\n1yeBQ4HFeWXfBi6Jty8From3RwNPEl3D2Tuuj87W2qPAmHj7t8CEePurwMx4+wvA7aHfcy91sQdw\naLy9PfA8MKqN66N//LsPsBD4RLvWRRzjRcDPgbnx47asC6Ju9kEFZUHrIkQlHAXcm/f4MuDS0P84\nCb234XRPCM8RzcuA6EPyuWLvGbgXGBfvszSvfApwY7x9HzAu3u4DrAv9fquol18Dn2n3+gD6A4vi\nP+62rAtgKPAAkKMrIbRrXfweGFxQFrQuQnQZDQFW5j1eFZe1ot28+CS9wjpYHZcNIaqPTvl189Fr\n3H0z8JaZ7Zxe6Mkws72JWk4LKT1psaXrI+4ieRJ4Fehw96W0aV0A1wHfovv8pHatCwceMLPHzOz8\nuCxoXdQ87FRqkuQV/MwP2zWz7YG7gG+4+7vWc+5JW9SHu28BDrNouZf7zSxHuhM4M1kXZvZZYK27\nPxXXQSktXxexT7j7GjPbFZhnZs8T+P9FiBbCaiD/4sbQuKwVrbVoAh8WTdJ7LS5fTTQst1NnHZQq\n7/YaM+sD7ODub6QXen3MrC9RMrjV3TvnorRtfQC4+ztEfbxH0p518Qlgkpn9L/AL4DgzuxV4tQ3r\nAndfE/9eR9StOpbA/y9CJITHgH3NbLiZ9SPq85obII40GN2z8FyiSXrQfZLeXGBKPApgBLAvsChu\nIr5tZmPNzIAvFrzmnHj7NKKVZLPsZqK+zevzytquPsxsl86RIma2HXA80cXBtqsLd/8bdx/m0eoG\nU4AH3f1s4B7arC7MrH/cgsbMBgDjgSWE/n8R6GLKCUQjT1YAl4W+uJPQe5oDvAKsB14mmqQ3CJgf\nv9d5wE55+08lGimwDBifV35E/B9jBXB9Xvk2RBP+VhD1x+8d+j33UhefADYTjSB7Engi/jffud3q\nAzgofv9PAk8D34zL264uCurlU3RdVG67uiBaHLTz72NJ5+dg6LrQxDQREQF0C00REYkpIYiICKCE\nICIiMSUEEREBlBBERCSmhCAiIoASgoiIxJQQREQEgP8Hgo1lPmtBVN8AAAAASUVORK5CYII=\n", 52 | "text/plain": [ 53 | "" 54 | ] 55 | }, 56 | "metadata": {}, 57 | "output_type": "display_data" 58 | } 59 | ], 60 | "source": [ 61 | "plt.plot(dataset[:3*16000])\n", 62 | "plt.show()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [ 72 | { 73 | "name": "stderr", 74 | "output_type": "stream", 75 | "text": [ 76 | "Using Theano backend.\n", 77 | "Using gpu device 0: GeForce GT 750M (CNMeM is enabled with initial size: 75.0% of memory, cuDNN 4007)\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "from keras.layers import merge, Input\n", 83 | "from keras.models import Model\n", 84 | "from keras.layers.core import Dense, Activation\n", 85 | "from keras.layers.convolutional import Convolution1D\n", 86 | "from keras.layers.recurrent import LSTM" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 6, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "def generate_samples(data, batch_size, sample_length, discretize_input=False):\n", 98 | " sample_offset = 0\n", 99 | " step_size = sample_length + 1\n", 100 | " while True:\n", 101 | " if discretize_input:\n", 102 | " history = np.zeros((batch_size, sample_length, nb_classes), dtype='float32')\n", 103 | " else:\n", 104 | " history = np.zeros((batch_size, sample_length, 1), dtype='float32')\n", 105 | " predictions = np.zeros((batch_size, nb_classes), dtype='float32')\n", 106 | " for i in range(batch_size):\n", 107 | " if discretize_input:\n", 108 | " for j, k in enumerate(data[sample_offset:sample_offset+sample_length]):\n", 109 | " history[i, j, k] = 1\n", 110 | " else:\n", 111 | " history[i] = data[sample_offset:sample_offset+sample_length].reshape(-1, 1)\n", 112 | " prediction = data[sample_offset+sample_length]\n", 113 | " predictions[i, prediction] = 1\n", 114 | " sample_offset = (sample_offset + step_size) % (data.shape[0] - step_size)\n", 115 | " if not discretize_input:\n", 116 | " history /= 255\n", 117 | " yield (history, predictions)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 8, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "(3, 2, 256) (3, 256)\n", 132 | "[[[ 0. 0. 0. ..., 0. 0. 0.]\n", 133 | " [ 0. 0. 0. ..., 0. 0. 0.]]\n", 134 | "\n", 135 | " [[ 0. 0. 0. ..., 0. 0. 0.]\n", 136 | " [ 0. 0. 0. ..., 0. 0. 0.]]\n", 137 | "\n", 138 | " [[ 0. 0. 0. ..., 0. 0. 0.]\n", 139 | " [ 0. 0. 0. ..., 0. 0. 0.]]]\n", 140 | "[[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 141 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 142 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 143 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 144 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 145 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 146 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 147 | " 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 148 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 149 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 150 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 151 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 152 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 153 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 154 | " 0. 0. 0. 0.]\n", 155 | " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 156 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 157 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 158 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 159 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 160 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 161 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 162 | " 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 163 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 164 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 165 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 166 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 167 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 168 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 169 | " 0. 0. 0. 0.]\n", 170 | " [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 171 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 172 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 173 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 174 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 175 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 176 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 177 | " 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 178 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 179 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 180 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 181 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 182 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 183 | " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 184 | " 0. 0. 0. 0.]]\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "# check out an example batch, make sure the generator is sane\n", 190 | "itr = generate_samples(dataset,\n", 191 | " batch_size=3,\n", 192 | " sample_length=2,\n", 193 | " discretize_input=discretize_input)\n", 194 | "for i in range(10000):\n", 195 | " X, y = next(itr)\n", 196 | "X, y = next(itr)\n", 197 | "print X.shape, y.shape\n", 198 | "print X\n", 199 | "print y" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 7, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [ 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": [ 213 | "____________________________________________________________________________________________________\n", 214 | "Layer (type) Output Shape Param # Connected to \n", 215 | "====================================================================================================\n", 216 | "input_1 (InputLayer) (None, 256, 256) 0 \n", 217 | "____________________________________________________________________________________________________\n", 218 | "lstm_1 (LSTM) (None, 256, 128) 197120 input_1[0][0] \n", 219 | "____________________________________________________________________________________________________\n", 220 | "activation_1 (Activation) (None, 256, 128) 0 lstm_1[0][0] \n", 221 | "____________________________________________________________________________________________________\n", 222 | "convolution1d_1 (Convolution1D) (None, 256, 128) 16512 activation_1[0][0] \n", 223 | "____________________________________________________________________________________________________\n", 224 | "activation_2 (Activation) (None, 256, 128) 0 convolution1d_1[0][0] \n", 225 | "____________________________________________________________________________________________________\n", 226 | "lstm_2 (LSTM) (None, 256, 128) 131584 activation_2[0][0] \n", 227 | "____________________________________________________________________________________________________\n", 228 | "activation_3 (Activation) (None, 256, 128) 0 lstm_2[0][0] \n", 229 | "____________________________________________________________________________________________________\n", 230 | "convolution1d_2 (Convolution1D) (None, 256, 128) 16512 activation_3[0][0] \n", 231 | "____________________________________________________________________________________________________\n", 232 | "activation_4 (Activation) (None, 256, 128) 0 convolution1d_2[0][0] \n", 233 | "____________________________________________________________________________________________________\n", 234 | "merge_1 (Merge) (None, 256, 128) 0 activation_2[0][0] \n", 235 | " activation_4[0][0] \n", 236 | "____________________________________________________________________________________________________\n", 237 | "lstm_3 (LSTM) (None, 256, 128) 131584 merge_1[0][0] \n", 238 | "____________________________________________________________________________________________________\n", 239 | "activation_5 (Activation) (None, 256, 128) 0 lstm_3[0][0] \n", 240 | "____________________________________________________________________________________________________\n", 241 | "convolution1d_3 (Convolution1D) (None, 256, 128) 16512 activation_5[0][0] \n", 242 | "____________________________________________________________________________________________________\n", 243 | "activation_6 (Activation) (None, 256, 128) 0 convolution1d_3[0][0] \n", 244 | "____________________________________________________________________________________________________\n", 245 | "lstm_4 (LSTM) (None, 256, 128) 131584 activation_6[0][0] \n", 246 | "____________________________________________________________________________________________________\n", 247 | "activation_7 (Activation) (None, 256, 128) 0 lstm_4[0][0] \n", 248 | "____________________________________________________________________________________________________\n", 249 | "convolution1d_4 (Convolution1D) (None, 256, 128) 16512 activation_7[0][0] \n", 250 | "____________________________________________________________________________________________________\n", 251 | "activation_8 (Activation) (None, 256, 128) 0 convolution1d_4[0][0] \n", 252 | "____________________________________________________________________________________________________\n", 253 | "merge_2 (Merge) (None, 256, 128) 0 activation_6[0][0] \n", 254 | " activation_8[0][0] \n", 255 | "____________________________________________________________________________________________________\n", 256 | "lstm_5 (LSTM) (None, 256, 128) 131584 merge_2[0][0] \n", 257 | "____________________________________________________________________________________________________\n", 258 | "activation_9 (Activation) (None, 256, 128) 0 lstm_5[0][0] \n", 259 | "____________________________________________________________________________________________________\n", 260 | "convolution1d_5 (Convolution1D) (None, 256, 128) 16512 activation_9[0][0] \n", 261 | "____________________________________________________________________________________________________\n", 262 | "activation_10 (Activation) (None, 256, 128) 0 convolution1d_5[0][0] \n", 263 | "____________________________________________________________________________________________________\n", 264 | "lstm_6 (LSTM) (None, 256, 128) 131584 activation_10[0][0] \n", 265 | "____________________________________________________________________________________________________\n", 266 | "activation_11 (Activation) (None, 256, 128) 0 lstm_6[0][0] \n", 267 | "____________________________________________________________________________________________________\n", 268 | "convolution1d_6 (Convolution1D) (None, 256, 128) 16512 activation_11[0][0] \n", 269 | "____________________________________________________________________________________________________\n", 270 | "activation_12 (Activation) (None, 256, 128) 0 convolution1d_6[0][0] \n", 271 | "____________________________________________________________________________________________________\n", 272 | "merge_3 (Merge) (None, 256, 128) 0 activation_10[0][0] \n", 273 | " activation_12[0][0] \n", 274 | "____________________________________________________________________________________________________\n", 275 | "lstm_7 (LSTM) (None, 256) 394240 merge_3[0][0] \n", 276 | "====================================================================================================\n", 277 | "Total params: 1348352\n", 278 | "____________________________________________________________________________________________________\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "discretize_input = True\n", 284 | "use_residual = True\n", 285 | "use_convolutions = True\n", 286 | "lstm_activation = 'relu'\n", 287 | "conv_activation = 'linear'\n", 288 | "\n", 289 | "nb_classes = 256\n", 290 | "sample_length = 256\n", 291 | "features = 128\n", 292 | "hidden_units = 128\n", 293 | "layers = 7 # should be odd\n", 294 | "\n", 295 | "if discretize_input:\n", 296 | " inputs = Input(shape=(sample_length, nb_classes))\n", 297 | "else:\n", 298 | " inputs = Input(shape=(sample_length, 1)) \n", 299 | "x = inputs\n", 300 | "\n", 301 | "for i in range((layers - 1) / 2):\n", 302 | " x = LSTM(hidden_units, return_sequences=True)(x)\n", 303 | " x = Activation(lstm_activation)(x)\n", 304 | " if use_convolutions:\n", 305 | " x = Convolution1D(features, 1)(x)\n", 306 | " x = Activation(conv_activation)(x)\n", 307 | " \n", 308 | " y = LSTM(hidden_units, return_sequences=True)(x)\n", 309 | " y = Activation(lstm_activation)(y)\n", 310 | " if use_convolutions:\n", 311 | " y = Convolution1D(features, 1)(y)\n", 312 | " y = Activation(conv_activation)(y)\n", 313 | " \n", 314 | " if use_residual:\n", 315 | " x = merge([x, y], mode='sum')\n", 316 | " else:\n", 317 | " x = y\n", 318 | " \n", 319 | "prediction = LSTM(nb_classes, activation='softmax', return_sequences=False)(x)\n", 320 | "\n", 321 | "model = Model(input=inputs, output=prediction)\n", 322 | "model.summary()\n", 323 | "\n", 324 | "model.compile(loss='binary_crossentropy', optimizer='rmsprop')" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": { 331 | "collapsed": false 332 | }, 333 | "outputs": [], 334 | "source": [ 335 | "batch_size = 32\n", 336 | "batches_per_epoch = 100\n", 337 | "\n", 338 | "total_samples = len(dataset) / (sample_length + 1)\n", 339 | "samples_per_epoch = batches_per_epoch * batch_size\n", 340 | "epochs_per_real_epoch = total_samples / samples_per_epoch\n", 341 | "print total_samples, 'samples in total'\n", 342 | "print batch_size, 'batch_size'\n", 343 | "print batches_per_epoch, 'batches_per_epoch'\n", 344 | "print samples_per_epoch, 'samples_per_epoch'\n", 345 | "print epochs_per_real_epoch, 'epochs for a full pass through the data'\n", 346 | "\n", 347 | "model.fit_generator(generate_samples(dataset, batch_size, sample_length, discretize_input),\n", 348 | " samples_per_epoch=samples_per_epoch,\n", 349 | " nb_epoch=epochs_per_real_epoch)" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 25, 355 | "metadata": { 356 | "collapsed": false 357 | }, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAEACAYAAACUMoD1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXucXWV5739PMvdrbpPJPYQkmHBriIqogKNISKgQtekx\nKYrgpRTBnpaqyFEPoT0H2x7bWkS5tCBNEYJVi/ko5VYcWrQEVEiATG7SBJgkk8ll7pdMJs/549mP\n611rr7X3mr3XzJ7L8/185jMza6+19rtu7+/9Pc/7vouYGYZhGIaRL5MKXQDDMAxjfGCCYhiGYSSC\nCYphGIaRCCYohmEYRiKYoBiGYRiJYIJiGIZhJEIsQSGi1US0k4h2E9HNEevcQUR7iOhlIlqRbVsi\n+msiakqt/0Miqkktn0ZEzxBRJxHdEfiOn6X29RIR/ZqIZuR22IZhGEbSZBUUIpoE4E4AlwE4C8AG\nIloWWGcNgMXMvBTAdQDujrHtkwDOYuYVAPYAuCW1vA/AVwH8WUSRNjDzecy8kpmPxD5SwzAMY1iJ\n41DOB7CHmfcz8wCAzQDWBtZZC2ATADDzVgC1RFSfaVtmfpqZT6W2fx7AvNTyHmb+BYD+PMpsGIZh\njDBxKue5AN50/n8rtSzOOnG2BYBPAfi3GGUBgAdS4a6vxlzfMAzDGAGGq7VPsVck+gqAAWZ+KMbq\nf8DM5wC4CMBFRPTxXAtoGIZhJEtRjHWaASxw/p+XWhZcZ37IOiWZtiWiawBcDuADcQrLzAdTv7uJ\n6CFISO3B4HpEZBOUGYZh5AAzxzYEQeI4lBcBLCGihURUAmA9gC2BdbYAuBoAiOgCAG3M3JJpWyJa\nDeCLAK5k5qh8yW8PjIgmE9H01N/FAD4E4NWoQjPzuP259dZbC14GOzY7Pju+8feTL1kdCjMPEtGN\nkF5ZkwDcx8xNRHSdfMz3MvNjRHQ5Ee0F0A3g2kzbpnb9LYiDeYqIAOB5Zv4cABDRfwOoBlBCRGsB\nrALwBoAniKgIwGQATwP4h7zPgGEYhpEIcUJeYObHAbwtsOyewP83xt02tXxphu9bFPHRO7IW1jAM\nwygI1gV3DNLQ0FDoIgwb4/nYADu+sc54P758oSTiZqMNIuLxeFyGYRjDCRGBhzkpbxiGYRhZMUEx\nDMMwEsEExTAMw0gEExTDMAwjEUxQDMMwjEQwQTEMwzASwQTFMAzDSAQTFMMwDCMRTFAMwzCMRDBB\nMQzDMBLBBMUwDMNIBBMUwzAMIxFMUAzDMIxEMEExDMMwEsEExTAMw0gEExTDMAwjEUxQDCPF/fcD\np04VuhSGMXaxNzYaRoqKCqClBaiuLnRJDKMw2BsbDSMhBgflxzCM3DBBMYwUp06ZoBhGPpigGEaK\nU6csh2IY+WCCYhgAmM2hGEa+mKAYBkRQABMUw8gHExTDgCckFvIyjNwxQTEMeEJiDsUwcscExTDg\nCYkJimHkTixBIaLVRLSTiHYT0c0R69xBRHuI6GUiWpFtWyL6ayJqSq3/QyKqSS2fRkTPEFEnEd0R\n+I6VRLQ9ta9v5nbIhpGOOhQLeRlG7mQVFCKaBOBOAJcBOAvABiJaFlhnDYDFzLwUwHUA7o6x7ZMA\nzmLmFQD2ALgltbwPwFcB/FlIce4C8GlmPgPAGUR02RCO1TAiMYdiGPkTx6GcD2APM+9n5gEAmwGs\nDayzFsAmAGDmrQBqiag+07bM/DQza3vweQDzUst7mPkXAPrdLyCiWQCqmfnF1KJNAD48pKM1jAgs\nh2IY+RNHUOYCeNP5/63UsjjrxNkWAD4F4N9ilOOtGPsyjCFjvbwMI3+GKykfe3IxIvoKgAFmfmiY\nymIYWTGHYhj5UxRjnWYAC5z/56WWBdeZH7JOSaZtiegaAJcD+EDMcoR9RygbN2787d8NDQ1oaGiI\n8RXGRMUExZiINDY2orGxMbH9ZZ2+nogmA9gF4BIABwG8AGADMzc561wO4AZm/l0iugDAN5n5gkzb\nEtFqAH8D4GJmPhryvZ8E8A5m/ryz7HkAfwzgRQA/BXAHMz8esq1NX28MieZmYN484IUXgHe+s9Cl\nMYzCkO/09VkdCjMPEtGNkF5ZkwDclxKE6+RjvpeZHyOiy4loL4BuANdm2ja1629BHMxTRAQAzzPz\n51IH9d8AqgGUENFaAKuYeSeAGwA8AKAMwGNhYmIYuWAOxTDyx16wZRgA9u0DFi0CnnsOeO97C10a\nwygM9oItw0gAG9hoGPljgmIYsIGNhpEEJiiGAcuhGEYSmKAYBmxgo2EkgQmKYcAcimEkgQmKYcBy\nKIaRBCYohgHr5WUYSWCCYhiwkJdhJIEJimHAQl6GkQQmKIYBC3kZRhKYoBgGzKEYRhKYoBgGLIdi\nGElggmIYsIGNhpEEJiiGAXMohpEEJiiGAcuhGEYSmKAYBqyXl2EkgQmKYcAcimEkgQmKYcByKIaR\nBCYohgELeRlGEpigGAYs5GUYSWCCYhiwkJdhJIEJimHABjYaRhKYoBgGzKEYRhKYoBgGzKEYRhKY\noBgGzKEYRhKYoBgGrJeXYSSBCYphwMahGEYSmKAYBsyhGEYSmKAYBiyHYhhJEEtQiGg1Ee0kot1E\ndHPEOncQ0R4iepmIVmTbloj+moiaUuv/kIhqnM9uSe2riYhWOct/ltrXS0T0ayKakdthG4YfC3kZ\nRv5kFRQimgTgTgCXATgLwAYiWhZYZw2Axcy8FMB1AO6Ose2TAM5i5hUA9gC4JbXNmQD+B4DlANYA\n+A4RkfN1G5j5PGZeycxHcjtsw/BjIS/DyJ84DuV8AHuYeT8zDwDYDGBtYJ21ADYBADNvBVBLRPWZ\ntmXmp5lZ24PPA5iX+vtKAJuZ+SQz74OIzflDLLNhDAkLeRlG/sSpnOcCeNP5/63UsjjrxNkWAD4F\n4LGIfTUHtnkgFe76aoyyG0YsBgcBIgt5GUY+FA3Tfin7KqkVib4CYICZH46x+h8w80EiqgTwIyL6\nODM/GLbixo0bf/t3Q0MDGhoa4hbJmICcOgUUF5tDMSYWjY2NaGxsTGx/cQSlGcAC5/95qWXBdeaH\nrFOSaVsiugbA5QA+EGNfYOaDqd/dRPQQJBSWVVAMIxuDgyYoxsQj2Ni+7bbb8tpfnJDXiwCWENFC\nIioBsB7AlsA6WwBcDQBEdAGANmZuybQtEa0G8EUAVzJzf2Bf64mohIgWAVgC4AUimkxE01PbFgP4\nEIBXczpqwwigDsVCXoaRO1kdCjMPEtGNkF5ZkwDcx8xNRHSdfMz3MvNjRHQ5Ee0F0A3g2kzbpnb9\nLYiDeSrViet5Zv4cM+8gou8D2AFgAMDnmJmJqBTAE0RUBGAygKcB/ENiZ8KY0AwOAiUl5lAMIx+I\nmQtdhsQhIh6Px2UMH3/xF8A99wAf/CDwwAOFLo1hFAYiAjPHzoEHsS64hgELeRlGEpigGAYsKW8Y\nSWCCYhiwbsOGkQQmKIYBz6FYyMswcscExTBgDsUwksAExTBgORTDSAITFMOA9fIyjCQwQTEM2MBG\nw0gCExTDgOVQDCMJTFAMA9bLyzCSwATFMGAOxTCSwATFMGCCYhhJYIJiGLCQl2EkgQmKYcAcimEk\ngQmKYcAGNhpGEpigGAZsYKNhJIEJyihj1Srg9dcLXYqJhw1sNIz8MUEZZezZAxw7VuhSTDwsh2IY\n+WOCMsro7AROnix0KSYe1svLMPLHBGWUYYJSGMyhGEb+mKCMIk6ckB8TlJHHenkZRv6YoIwiOjvl\ntwnKyGO9vAwjf0xQRhEmKIXDQl6GkT8mKKMIE5TCYSEvw8gfE5RRhAlK4bCQl2HkjwnKKMIEpXDY\nwEbDyB8TlFGECUrhsByKYeSPCcoowgSlcAwOAkVFFvIyjHyIJShEtJqIdhLRbiK6OWKdO4hoDxG9\nTEQrsm1LRH9NRE2p9X9IRDXOZ7ek9tVERKuc5SuJaHtqX9/M7ZBHLyYohePUKQt5GUa+ZBUUIpoE\n4E4AlwE4C8AGIloWWGcNgMXMvBTAdQDujrHtkwDOYuYVAPYAuCW1zZkA/geA5QDWAPgOEVFqm7sA\nfJqZzwBwBhFdluuBj0ZMUAqH9fIyjPyJ41DOB7CHmfcz8wCAzQDWBtZZC2ATADDzVgC1RFSfaVtm\nfpqZNcDwPIB5qb+vBLCZmU8y8z6I2JxPRLMAVDPzi6n1NgH48JCPeBRjglI4rJeXYeRPHEGZC+BN\n5/+3UsvirBNnWwD4FIDHIvbV7OzrrRj7GrN0dgJEJiiFwByKYeRP0TDtl7KvklqR6CsABpj54SQL\nsHHjxt/+3dDQgIaGhiR3Pyx0dgJTppigFALr5WVMRBobG9HY2JjY/uIISjOABc7/81LLguvMD1mn\nJNO2RHQNgMsBfCDGvqKWh+IKylihsxOYOtUEpRBYyMuYiAQb27fddlte+4sT8noRwBIiWkhEJQDW\nA9gSWGcLgKsBgIguANDGzC2ZtiWi1QC+COBKZu4P7Gs9EZUQ0SIASwC8wMyHALQT0fmpJP3VAH6c\n22GPTkxQCocNbDSM/MnqUJh5kIhuhPTKmgTgPmZuIqLr5GO+l5kfI6LLiWgvgG4A12baNrXrb0Ec\nzFOpTlzPM/PnmHkHEX0fwA4AAwA+x8yc2uYGAA8AKAPwGDM/nsRJGC10dJigFArXoTBLLsswjKFB\nXl09fiAiHovHtXw5cM45wJlnAmMwYjemWbYM+Nd/lXM/OAhMsiG/xgSEiMDMOTen7LEZQf7t34Dj\nx6M/t5BX4Th1SkRk8uTChb0GBizkZoxtTFBGkNtvB375y+jPu7qsl1ehGBwUMSmkoHzpS8CmTYX5\n7vHAm28CX/96oUsxsTFBGUG6uzOLRW8vUF3trfOrXwHPPTcyZdu9W8o3UVGHMmmS/P2FLwDf+97I\nluHwYaC1dWS/czyxY4eELY3CYYIygvT0SFgjjMFB+ayy0hOUH/8Y+NGPwtd/4IFkK58/+RNg8+bk\n9jfWCDqUt96SCmok6ewUl2rkRkcH0NdX6FJMbExQRpBMDqWvDygrk55Gus6xY+Jawti4Efiv/0qu\nbK2twCuvyN/btgF33pncvkea5mbg2WeHtk0wh9LZKSGUkaSrywQlH4ZTUD772ehn0fAY14Ly/e8D\n3/lOoUvhkcmh9PYC5eUyhXo2QenpAfbvB954I7mytbYCr74qf7/yCvD4GO6Q/dOfAn/7t0PbRh2K\nhrwKISjmUPKjo2P4Kv1HHpHn0cjMcE29MirYuXPkY9Kvvio9uS66KP2znp5ohxImKMePAzU16evu\n2iW/k6zwWlu9h7G/f2znUw4cANrahrZN0KF0dUkFNZKYQ8mP4XQofX3yXBiZGdcOpadneG4wZmDv\n3vDPnngCePDB9OWDg1KWoQhK0KHs3y+/d+6U9ZISlJ4eKd+JE5IY7usbeUF5+WXg+uvl3ObLgQOZ\nu2eHceqUP4fS2Sl5lJEczmQOJT/CBCWJqXQ0v2mCkh0TlBzYsQNYtSr8s5Mnw1vHKgwDAxJOCoaU\nsgnKG28AK1dKBdfUBLz3vZkFZc0aEYg4HDkC1NUBZ58tDqsQDmXHDuDuu4H77st/X7kIig5mdENe\nAwMj63CTcChf+tLQ3dlo4pFHcq+4w0Jea9cCD+c57azr3I3MmKDkwH//t+w7jChB0Qr65ElJGD/0\nkP/zbILS0SH/t7SIoFx6aXQOpb9fBCuuKLS2AjNmyCjxpqbCOJTeXmDJkmRyXkmFvE4/feTyKMz5\nO5T9+4H/9//EWRWKZ58Fnn469+2/8AXg9dczr6OdR4J0dMi1O3nSa1C88Yb0YDx6NPcyaR1igpId\nE5Qc2Lcver8nTwLt7eFl0c8HBqQnlUtQUE6dkodCBUW3b2qSnw9+EDh0KHwQniYP4ziUzZtlP3V1\nMqiys7MwDqWnBzjtNO8lY/nQ3CwVs3aAiFOZuN2GBwbkvC9blmzHh0z09ck1z0dQfvhDb18ux4/n\nV8kPhSeeyO+7urv9jbVXXwUee8y/zoUXhl9TzXl1dQELF8r57OkB3v9+4Hd+B3jhhdzKZA4lPhNS\nUD7/+fBKPy7ZBCWTQxkYkJ+mJn+FHxSUjg5pterNrNv/4hfSEl2xApg2TcQgiApKtgeAGbjmGuCZ\nZ0RQysrk+wrlUOrq8v/egQE5/ilT5BoPDgILFkTvd9s24CMf8Q9s7OgAKipku5FyKCqk+QpKUVH6\ndf+v/xq5EeRdXf7vZwb+6Z/ib9/T4xeUp55KD4MGRUdRQTl6VM5nb6+s941vADfcANx/f/T3Njdn\n7tIPmKDEYUIKyo9+JGGrXNm3T26usIRflKAEHcrAgH/gXFBQjh2Tyi3oUO69F3jPe4DSUmD+/PAK\nL66gHD0q6zz1lIS8ysu93iy9vd7Mu5dcEv6wHTmSXMu3p0fKkE1Q1LkFueYa4Gc/E4GdOROYPl3W\n08pHe8YFuesuqXBdh9LeDlRVAbNnAwcP5n1osejqkh59uQoKM7B1K/DOd6Zf9/5+f3f1hx8evull\nOjv9DaWeHrk2cTo3DA56955y7Jg/hHfypKwX1j3YFRT9bn2u3v/+zA7lk5+U+ycMcyjxmZCCcuKE\nVIa5or2twm6wbCEvFRPAH/YKE5RZs/wOpbpaQjAf/KAsixIUfaBOnJCWaVTM+cAB+f3KK+IOyss9\nh6Jl7uoSBxNWiT/7bHItX9ehZKp8tmwB1q1LX/7661LxHDgAzJkjk2y2tXkCtXNn+jZdXTJW6ehR\nOeeaQ2lrk3NdU5NMCC4OnZ0iYHEFpakJ+PnPvf+7usRh1tam3/N9fX5Buf56aZFHkakXYzaCDkXv\n3zihZ71Wrvs4ftx/jwcbWC4dHTIwWJ9tbUxUVIij37UrOvepriYME5T4jGtB6e4eHkHZt0/elxG2\n74EBWR78zE3KDwwAb3tbdkGZO9f/AL397fL3JZfI7zlzwlvQrkN54gng298OP47mZvlOwB/y0gen\nu9vbV9igrvb25Crc3l6pDImiB38C4iZ2744uy4EDct6mTPEcChAuKI2NwHnnSehQHUppqdwbhRCU\n6dOlMo+T+/rhD/1zjbW1yfkrLU2v+IKC0t8fPUiPGfjc5+T+zKUCDToUt3HS25u5sRAmKMeOievU\n8uvzEOVQZs70nu2uLilLWZn8nHkm8NJL4d/d1hY9KNJCXvEZ14IyHA5Fu3bOnBm+bw0NBV1KMOQ1\nd67/oY4jKMuWATffLK0tQBxMWA5FHUp/v/w88kh4WZubPXFyQ166bne350zCkqDt7ckN/uvpke+v\nqMgc9tq61Uu633qrvywqKLNni0M5ftxr8Tc1pe/r2DER5Vmz5P9Jk2Tb3bsl5FVdPXKDG7u65Puq\nquK5lJYW/zVtbxcRzSYozJkFZe9emUOuujq3PGOUQ+ntla72N9wQPTYkyqEwew2nKEHRXnJ1dd69\nevSoCIm+LO3886PDXpkExRxKfCasoOTajXD/fulBopVvkChBCSblq6oyJ+WPHQPq672YcXe3TBz5\nl3/pvfzJFRStKAB/L6++PgkDhE2lcuCA9H6ZMyc85JXNobS1JVfh6vFXVkYLyuCgzMBcVyfvlvnz\nP/fKqoKiY2qmTPFCXpWV4Q6lvV1ciCsoc+fKutXV8jOSDqWqKndBaWuTYy4rCw956X158qTcK1GC\ncuAAsHixNDByEZSgQ3EbREeOAP/yL8BPfhK+rQpJ0KG4g3j12IKVf0+PiGlVlfdsHzkiDRSloUGm\n5Qly6pQcq9tF3w2zBR3Kf/xHdBh5ojPhBEW75IY5FObsA9k0pJJNUIKJ+aBDqazMLCjHj0sIRCt5\njQW7zJ7tCcoTTwBXXy1/Bx3K2WeH51qam0VMHnhAkrlhIa9sDiWXCvfw4fRlenyZBKWpSY75vPO8\nLrIHD3oVQmen11JXh9LdLaK5d296IrqjQ8JEs2d7Ij1njsTaRzrkNVSHcuhQuqDECXnpNpkEZc4c\n2VcuAyS127niOorubrkXoxpzet1dsTh+XN5kqon5KIfS0SHnr7zce7aDgnLFFTIjg+ZA3TK7PSof\nfhj46lfTj0GP6+GHJfe2Y4e9vybIhBMUrcSPHJHBhW5LQwcMZuL4cYm5h7UEgcyComMcVFCCD17Q\noUyd6heUykr/Pl2H0tzsVf5BhzJjRnglpeJ46aXyPcGQV09P9hxKV9fQp7dYuTI99xPHobz0kuSR\nTj/da+UeOCBl0JCHVqzqULq6xLHMnJlekbgORQVl7lzgN78Z+ZBXvg4lbsgr6GKDHDwoAltbG+1Q\nenuj3zuieQt3XcDrDjxrVrRIR4W8zj3XaxBFJeU7OuRalpX5HYrmCAH57GMfS+/GrMep+z582H8N\ngoLS0yO9RB9/fGK/8iGMcSsop06FJ8ddQbn3Xv8LrNrb03sz7dzpbw1lCi0AmUNeNTXxHUpnp6yv\ngtLdne5QZs3yKubjx70b/uhRKaM6lOnTwyspdSiKfld/v2wfJykPDL2ra2tr+ja9vdkdSkeHiPnp\np8s6U6eKoGg5ohxKVRWwaJF0pgjur7ZWzuPkybJs7lw5/8PhUPbti35jZ745lGwhr7iCog5Fx/GE\nsX07cO214Q2JbA6lvj76+IKCwizX8Jxz4jmUMEEJPjOrV6e/9kEbf7rPI0f892Aw5NXbK4Kyd6/N\nQBxk3AqKVtDM/jEUrqDs3Om337296Tf79dcD//7v3v/Hj0tlpeGhhga/CJ08KZVimEOprfUEJVsO\nRf/P5FDq6uQ4Bgf9gnLsmLQy+/szO5TmZqlAFXdg4/TpXshr7tzokBcwtFZ8X5/nnILnJ5tD6e+X\nFvjpp8v/q1enC4o6FLfbcGWl5L3iOhTAy6G4x9bf73XZzoWvfz16apnOzviC0tcnZQ8TlDCH4o5D\nGYpDiQp5tbfLTzAvdfKkd33dsgJesj7qXgTSBaWzU+4JdwqcqByKCoob8jp6NF1Qamu9RgIz8JWv\neOvrPltb/fdg0KGooOzZY4ISZNwKisbky8q8wVJ//udysxPJDdHSkt7TKliZHTzoXxZsCf7qV/6c\nwMmT8tAEH8bubrmZ3ZDXUAQlzKEUF0vFeeRIukOZM0f2rw4leFw6ory+3lvmDmycNs1zKEuWRCfl\ngaG14oOtweDxZxKUvj6pMM84QwTi3HM9QSHyOxTtNtzVJfs87bT4DgXwQl7uuJg9e6RxEXfSTZcT\nJ4Af/CB6HERXV/yQV0uLdz6U9vbMORRtVA3Vofz61+n7U5ENtvS13GEO5ehRub6ZXJ+GhfUcHTsm\n9+GMGV6DZigOpbXVH/IC/N+/bRtw++3Aa6/59xnmUIqL/YJy8KD3qgrDY0IISl+fPCh33ik3RV2d\n16p1W95ambqO5uDB9CShOhQd+OeGBk6elAo8zKEMJeQV5lCCggJ4eZShOhSN2WtFCvh7ebkOZcmS\naIcyZcrQHIqeqzCHEgx5XX21v5NEf7+c97PPlopuzhxPUDQ2rw5l2jQps4a8wgRFK2E3KV9XJ+ek\nulp+l5V55dHR9mEVfmdneE8y5amn5DxFCcpQHEpLi5yrXEJe2ZLywRzKZz7jH0AJyPKionRB0Yo6\nLIdy5Ihc20zH190t952eI33WXBGIEhQ9fyooRUXhIS+3596WLfJ72zZpkEQJSm+vF0IGvPK1tkoZ\nk5gif7wwrgWlstJ7wDo7PTuuD15VVbpDAfzWWyuBF1+UB+v4cW973TYoKGFdLtWhuIKSKSkfJ+QF\neD29VFB0ypQpU2QbZvk7SlBc3F5ecRxKezswb57s69QpGdOSbUT0UBzKs8/6e6dpyAuQ8qmgtLV5\n5VCR01ZtppCXtmrnz5fwGSAiMnu2VDyAP+ylghLWwr7/fhkjFEVjo7x0LZOgqENpapJeRnffHb5u\nS4scT1huL05S3r13gwR7eTU3p9/L7e3Au98d7lCCc4m5lXRFhZzPTIIyY4a3jToU9xpEhbzU4WlE\nQq9/0KG4+9qyRe6b7dul00YmQamt9TuU6dMlFFdePnI9AccC41pQXIfS1eUJSmmp3HAXXJCeQwG8\nG15DC729cvN973vykKlD0daz20IfGIh2KG7Ia6g5lLCQF+B3KH19XiVZWip/q3AGH2JNAru4vbzU\noUQJCrMnKB0dYv+jpmhh9s5RlEMJS8prQ0Dp65PjUVyHooKiDmXGDKkYMoW81KFUVMjgT2XuXE9s\n3dax61D+9V9l+f/9vzITwbPPZu5m29Ii5zFKUFQQqqqAb35TvitqhgMVlKGEvFxBmT07XFC6uuTe\n015yR45IODfoQNvbgXe8I/186mj/fBxKXV24Q9Ey9PaKmwieR204qIDMnCnPZ/CZ0evZ1iYDWD/2\nMbl3Z8/2RvK3tvr339fndyi9vcBZZwFLl4rgWdjLY0IJik5LroLy7neHOxS94bUHVU+P3LDaNVcd\niibzgg6lvj49RBRMygdDXpqUHmrIa/Zsr1wa4iovB0pK5CHUwV5xHIrby0sdSlTIq69PwkR1dbKv\nxkZZHpb/+P73Jd9x4kR6F83g8VdWeg9zf79fUFyHAvgFZf58uZbFxXLsVVVyno8elb/nzZN13XCm\nim+Qv/gLmSIdSHcokydLmb72NRm1v20bcM89Mtgt00DAw4elp1lUfkhDTe95j8yO+53veHOtBTl0\nKF1Q3JBXmKAMDnqDX6MERctAJPeqzi4QFJSODm+eOXcqla4uEZQoh6KC0tnpDUJ1CQqKdp13w1Qa\nfsrkUADZT1dX+jNTWipu+s035RgWL5Z9qaDoa7qzhbze/nYJvU6bZol5l3ErKNqid0NegPwuKQH+\n6I+Aj3403KHozaRjPPRG0/CKjg8JcygnT0rlpe7GLU8uSXkNQ2kLLMjChdJSdENe5eW5OZSSEvnu\nnh6/QzntNFnmzgelLWJtPT77rP/cudx/v6zz4INeK96tDHU2gJISv0Pp7/efW03KK3o+33pLRHzS\nJFkGSKU4Y4aEuSorZd/19TKA88knPdcUJiiXXirHD3gtWmYRlLPOknOn4SBNzvb0ZBcUPY9haGW+\nZg3wZ38m369h2iCtrTK1flQvr7AcCiDnWRsL7oy9P/oRcNNNMlj0rLNk2ZQpXk4ozKFMm+ZPVAPh\nDkUbOOqG7Kr+AAAgAElEQVQWNOR1993S9dilpyfdoUybJttpblPDTUFB0VyZKyhAesiLSK7p/v3y\nHC9YIMtVUI4cEaE5edJrfPT1pYe8vvQlSeiboPgZt4IS5lAAeRhKSoDrrpOHx02qBR2KCkqUQ1FB\nCTqUuXPT59gKS8oPNYcS5lAWLZIea66glJV5glJaKt8V5lCCgkIk2w4OysOm01FoN1zX2msFpnM+\nPfusuAQVg5YWKXNzs4y9+Od/Bv7+78NDXnqsRJ6gMKeHvDT+75Z3xQqZQr+2VsoyZYr3uQqKOrFL\nL5VzsmePfEdZmZzvTKhDaW2V71u0yAuZHDggP+vWAb/7u7kLil7fqVO9ZZMmRc/VpvO89fdLBfhX\nf5U95AV470UvLfWHal56Sa7NN74hHVcA2Zc+D2E5lNpauWbu8XR1yTkP3tfTpqWHvA4dkhdnubMe\naw5F96mCT+S5lL4+OU9hDiUY8gLCn5nqarkvpk0LF5S6On/DJuhQenu9Di3B52KiE0tQiGg1Ee0k\not1EFJp6JKI7iGgPEb1MRCuybUtE64joVSIaJKKVzvJiIrqfiLYT0UtE9D7ns5+l9vUSEf2aiGZE\nlTlKUDo6RFAAaWFVVPjjs4DfoeiN1t3t5SqCghJ0KHPmyGdu74+goOQyDiVKUF55RSoJdRduyCuT\nQwmGvADPFVVWyohxnQG4vl5a0T09IjiuQ9m2Tcr2trfJfgcHpfL+u7+TiQavuAL4wAekha+tOT3X\n+lY9rQh0ckh1Q8EciutQABkHtGuXJyjqUACpnA4d8pzd/fcD69fLNdTyZ0Mdys6dcnzV1d6AyeZm\nEZR//Efgu9/1XooWhDmzoBw6JOKhkxgqGtJT7rpL9nPsmBxbcbGc+y9/We63TONQAG8mbBUUDTm1\ntclEm88951Wwem70Pjp2zBMWN/fkVuxRORTtceeGvFpbpTL+7ne9dTXk5Y770OutTlj3ly3klUlQ\namrE1U+bJg4f8J5zfR12UFDUoegULXq/mkPxk1VQiGgSgDsBXAbgLAAbiGhZYJ01ABYz81IA1wG4\nO8a2rwD4CIBnA1/5WQDMzOcCWAXgbwKfb2Dm85h5JTNHzhkcFJRgyEuZPj29knNzKKef7jkUZtm2\ntDSzQ6mslIrHvdH6+uRGzmccSlTIq6VFHk5XRMIcilvZhTkUwNu2slLG2HzsY7J8wQJ5F8sf/qHM\nX6SVSnU18J//KU5BH8JNm+SB3bpVegJdeKEcx5QpUvm7XV7/9/+WFrY++LoPrQQzORRABAXwxC3o\nUHSfirYoo8JdQbRlvGuXJyjaqt6xQ855TY1UZiUl4YLR1ibHp3mpINpwCRJ8PcFdd0l3ae395N6D\nRUXeNO1RIS91KGVlcr22bpXl7e0idsucp1rP4xlnyLm6/XbgjjtkmY7fqajwd6F1RVrDRSoAx47J\n+tq4aW2V0N5dd/nnjnNDXtqBBvCEXfcXlZTPFvIC/A6lpsbrOq4OZcYM/6zXblL+xAnvRWyACUqQ\nOA7lfAB7mHk/Mw8A2AxgbWCdtQA2AQAzbwVQS0T1mbZl5l3MvAdAoF2GMwE8k1qnFUAbEb1jiGVO\n6zbsOhS3lautJ8A/qheQB/300/0DHjUsoUn5qqp0QSkqSg9X9PXJunHGoWjnARUUHbhXXJx+nGVl\n3gulSkul8go6lJISeQCC8e5MDkUr59tuk+Xa7bapSSo116G0tMgEjCoGmzcD/+f/eILy7nd7+9i+\n3f/isAMHRLj0wdd96LXIlEMBJIldVOSF34IOBfAfpwrKUBxKR4cnKFVV3jQgv/qVXwii5r86fFha\nzCUl4t6C73vR/EmQ2bP9DqWtTSrioKC85z3iCInih7xWrZIJRXW/wXOh/y9fLsd/6JAnbq5D0Yr9\n935PQmbV1bJ/vbc15wHItdXGxKFDMuvA2WdLPuILX0gPeZ044TX+NPSYzaEMNeQFAH/8x1IOV1Ci\nQl6uOwGsl1eQOJXzXADuXLVvpZbFWSfOtkG2AbiSiCYT0SIAbwcw3/n8gVS466vhmwtxQl5AukPR\n0dGAPECLFnkOZcECr+Wm+507V/apvWhUUOrr/Yn5vj7ZtzoUbUnpdOKavJw8WcJA2goqL/fCBVEs\nWuQJSnu75zLa270KOBj2CkvKA56gvPOdUvlrpbxggTyEv/mNhNj0ve26D1dQ2trk3ROTJ0uFt3y5\nrLNwoWxfX+9Vcm1tMlI5V4dSVSUvhFqyJDyHovtU8nEoy5bJ/2++KRVJR4d/LrRsgkKUHiYCogUl\nGPJqa/NCXioohw+L0OlkmVGCMnmyl5RXQfn3f/deWe2eN8ALBy9fLsd0+LB3P+uUNeqemeVe0R51\nJSX+fINW3JWVkhuqqBAHO3OmjLf56U9lXr3XX/d6iZ06Jb/1WVVh7+sLF5S4SXndl4a8AJlBww15\naQ5Fhc1Nymv3dmXqVHMoLllSkjkTdB1D4X4AywG8CGA/gJ8D0InH/4CZDxJRJYAfEdHHmfnBsJ08\n/vhGFBXJwzJ7dgM6OxsAeEl5xXUovb3eqPKuLqlEbr1VuoR2d4v91xtZb9y5c2WfH/848IlP+B1K\nUFBch6LdW0+ckIesqMiz0SokgCcoYS0tRRPFrkNxe3kBnqBoJdvZ6cWPXcrKPFs/35HxhQtlnElv\nrwjK3LnAxRf7BeXnP5fzpC3Yd71LvlNHoS9cKJWP61Da2+X43vY2+T+boAQdCiAJZWB4HEp1tVxH\ndSj794tDWb5cjjeOQ2lp8beYNZ+mHDzovZPFZc4cr/ecThja0uKfraG11d8wiAp5aTdqPYezZ0sj\n4cUXvcGgQaZOlbccPvqofLdWsEGH0tws3/t7vyfX2HUoOqYJ8IS9qkocSl2drL93L7B2rYz10sR6\nb68/5KXCnsmhVFZ6oTYVlCiHcviwJyiA18GgpUUaQ0NxKGNZUBobG9Goff4TII6gNANY4Pw/L7Us\nuM78kHVKYmzrg5kHAdyk/xPRzwHsTn12MPW7m4gegoTUQgXl7W/fiHnzpAeUO6gt2DKdPt0vKPpe\n88ZGGbylMd2eHhnI9MYbsq4rKL/8pbTQjh3zOxQNeelNXlbmOZTiYu/Ba272kqGAbD9UQXnzTU9Q\nNMzlhvfCHEpUyCtsrqoFCyRpe845cg4efVQmO9Sk9+LF3kOoFc6VV/pj3Spgs2b5HQqQ7lD082xJ\neZe4DuXYMS8PkI0ZM2QQ4xtvyDFqyOt97xORiOtQdM40t+WrHDwog2yDuCEv3e9vfiP70JxJa6tU\n+kqUQ5kxw5+UB2Rs0O7d4SEvQHrPFRfLuervl+ty4oTsp6LCE5TXXpOQ0X33yXZ/+qfhDkWvsYav\n3Hv6oos8QVEXF+ZQogRFHYrO1aciFpWUB/y96rRB9+abwIc+FC0obgcSYOyHvBoaGtCgiUgAt2mM\nO0fihLxeBLCEiBYSUQmA9QC2BNbZAuBqACCiCwC0MXNLzG0Bx9EQUTkRVaT+vhTAADPvTIXApqeW\nFwP4EIBXowq9bZu0KN2QF1F6yMttYaigdHVJfPmyy7ybu7tbBGbxYlnXFZTjx+VBP3EiPOSlI7y1\n3/6kSfKjDuW557yBdIBfUCorJRSQKeT10Y8CGzakOxTmaEGJSsrrtkF0IN3ixVIRLVokInPGGcDG\njXI8QUG59lp55au7D8AvKJofCuZQtEIKzvYbDHm5nHuuF14DvJ5Qbu5JHUpLiyc4mVi3TuLq8+bJ\nedFeXlOmyLUfSg4FCH/FcZyQlwrvzp1eBR3mUIKCouHUoEMBPBcdFvICJMSnx9TaKuuqEGv4rqdH\nxuHo+BUtg5tDcUNegJRFz4dy8cXeOrrfYA5FHcrUqZ4oP/KIOEV1KNpDUb8rKikP+B2Krrtvnzy7\neh/qDMpuyMvd57JlwKc/nf4dE5WsDoWZB4noRgBPQgToPmZuIqLr5GO+l5kfI6LLiWgvgG4A12ba\nFgCI6MMAvgVgBoCfENHLzLwGwEwATxDRIMTNfCJVlNLU8iIAkwE8DeAfosr9wgtykz7/vCcoGvcO\nDo7Th7a3VyrJri4Jc23e7Fnh7m7gqquAT31K1tWbyh1zotOEa8hLB4apoBQVyb60gnMF5X3v88rk\nCsoVV0ivqUxzZOk75lVQ5s3zHsRgyEuJcijaUyjI7NnSglu8WIRBw1hTpkhCFZCHcP9+OaawlqEK\nSn29ODpAyrt0abiglJZKJbJ7t1T+2RzKTTf5/58xI/0YVVD275cwXTZKS+VFbBoV0MqotlaOx3WW\nmQRFK1w3kQ2ImLzyilyzILNne4nwtjapxPfsEZcIyHVqackc8jpxwnPDKigqpLNmifPq6orOJ9XU\nyPnScSktLZ6b0dDUa695HS+AzDkUQK5JcPzPeecB73+/l1hXQQl2Gw7mUJ54wns/jnZJrqyU7VT0\ngmQSlDfe8ARl+3bpkt3V5Q95ufucPVvC3YYQK4fCzI8DeFtg2T2B/2+Mu21q+aMAHg1Zvh/AspDl\nPQDeEVwexbnnetNZ9/bKjTZjRrpDcaeYUIeyc6dY33PPFbHo6JAkobudVro6VQVzPIfS2+sXlP5+\nEZT/9b+8fbuCUlMjA8DivN+7rEzWW7rUexBdh+K2jDM5lGA4Qcs0b54IylVXpY+ZAOQhPHjQG4wW\nRIVIk/I6H9gll/hDXvqmTZ3W5ctflskbszmUIPPmSSjGpbpa9r13r4Tk4nDuufIDeAI1ZYo4M1ew\nogRFE71AuqA0NACf/Wy4uLmj5dvavI4RQ3Eoes6Ki72kvJ7DWbMkMV9V5TUQgpSUyPr19VLuPXs8\n8XFDXp/5jL8MYb283JBX8N4rLpYcnbtfN+RVXS3HquEnnV2hq8sbH1VRIXm/r33NE5NMIa8wQdHw\nZGWlNEq1R586lGDIy/AzbkfKX3KJ/HZDXjoLsCsM7pvpNCn/y19KKCfYy8qtJPWhrKnxblCdHiJK\nUIqK/IJSWioVREeHl5QG5HP3pp08Of3mD8PNoaiQuA7l9de9dz9kyqFEuYB3vUtaku7+XSorJR8U\nlZuorZVQZFWVN1izuFiO3c0Z9fV54t7ZKZW/tk4zOZQgdXXiNF2I5Jpv2xbeKSEbWhHqO1fclnaU\noOjoa8AvKO3tcr60AgwyaZI3m3Rbm9yTgF9Qjh3LLCjuvRcMedXXS+MpLNzlUlMjIar6enGLen31\nWA4e9L+oLY5DCYa8XNyQV9jARnd8VleX5EnLy+V8lZV5YVZ35LyLni83hwJ4c8npz/bt4iwnT/Zm\ntgiGvAw/w9XLq+BcdZX8dgWlrk4q1KBDcadU19HVmqeqqBB3EsxhuBV1TY03k/HJk1JJ6mynQHrI\nSyvykhKJ2bohJMDvUIZCaamImI5D0WVazttukxDgI49kHtgY5QLcGXnDqKyU8GGmyuLss+V861sH\na2uld5zmSiZNkvIfPy7X4rXXPIcZ1ctrqEydKiKVj6CEiWZtreTSgujYBsAvKNphJExMFE3MHz8u\njquszC8ogD9cpe7g1Ck5l647DiblZ82S+09DaFGooPT1ybgizfdopa6TOAbLAET38tJlYbi9vILj\nULR7vSsohw6FN46+/W1/SNI9nurq9HFd5eX+zhPHjkkvz6VL5ZkcHBSXb4ISzbgVFB31646UDwt5\nuQ5FwyyAl9zVWGyUoGhXVe0/rw6lrk4ERZOi+lAD/pBXsIUJ5CcomRxKe7vnmjKNQ8m10q6slP0v\nXZp5Pa0MNBkcXF8f5tpaqRg10Z+koOgstkPFDXkFyeRQ3B5nGnrct086N2TCfefL1KlyXwUFxT0O\nIrmv7rpLZsTV8SoqKMGkPHP23m4qKP39ks/btEmWV1R4jQO3DEGHomKj4ae6Or+jCeK+OdTt5aVJ\n+bIyv6Ds2xeeg1q3Lnz/1dXhjl/HYAHe875kiUzYCXjPV6YelxOdcSsoir5xT613MBfiVgLqUABP\nULQHUiaH8qlPSTLPzaGUlHhdd12HAvgFRQeDuWiX0KGigxnDHEptrSTvVVCyjZTPhcpKOb/ZKigV\n+aixICooZWVer6qjR+W8RcX6h8LUqbm5E8Af8goSJijMUnY3j+A6lDiCcvCgJ74zZ2YWFECu+aZN\n8t0XXyz/hwnKtGneLAOZUEE5cULWv+IK71hee03Op+uy1KHorADudD6ADCTMdB017xkchxIMeekb\nU3VuvLjMny+h2yDl5d65dAXFPS7tRWmEM25zKMrs2dKLRrsjAum9vNyQV9ChAJkFpbpa+t0vWOB3\nKIDnUqIEpbRUWq9JOhRNGgYdyp/+qUxT3tIilX5vb3hX5Kj8SBx0f3EFJaq7akWFVMLaTVeTz7kK\nXZB8BKWiwntfSJAwQeno8Au8KyhxHIqGvOI6FF2+Y0d6Y8YdKQ94HSSyCUptrecq1qzxvq+iQsoW\nzEWoQ9Hv1gS53h8lJZlnedb7ww156RQ/AwOeOHV3e24vU7f6IKefLmOLgrghr4oKOT+nneZ9boKS\nnXHvUC68UB5crZiA8JCXzq80darcSG4YJqy3iOtQdJ/aXVNbX5pHyRbyCj6Q+QiKli3Yy6u6Wh66\nzk5p8WsSM8i8ef73zA8FPRfZBEXDFdkcik7tMnu29L5JItwFeBNp5gKRHGdYJVxXlz7dvM5eqwQd\nijOmLJQ5c6TL8uCgfOfnP+8NZMzkUFpbvR5iwZCXK8yzZmUXlCuvlBb9smX+LrLl5dKpIBg+Uofi\nJrC/+MXw2QDCcF/0ptdp+XK5N5ubvY4V7e3iUGprh+ZQMn2vm0NZsMB/n5SUiKC4g1kNP+NeUEpL\nZRK6114LFxRdppXs9OnSfdGtvNzWlVJeLg+aW3H39PhbXupQTpzIHPIKJg7zFRS3RexWHvqGxddf\nj84fBF96NBSScigqKPX10jL9nd+RuaqScigf+MDQWrRBbropvFJZulQaLwMDwOOPy4hrN38CyL2k\nLiZOyEvHohQXy7m6/HLvM239B49F7wPXoYQl5QE5x9mul469Avz3ZUWFVPA62FdRh+IKyi23ZP4O\nF9ehuPf0U095r2GeMkWe2a4ub6brfKmq8jocLFjgHxsGSFn0DaZGOONeUAB5CPfv927OYOt0yhRp\nWepLnoI3UljIa/JkedeHUlIi9jsoKIcPez2nJk/2zxqcKYeStENRZs6UcTfZWqW5EFdQ3KR8lENp\nbZWyX3WVhKe++93wxGsu/P7v57f9xo3hy8vKJCz0i19IY+NXvwoXlAMHJL8xlKS8vuQs+H1hY0hU\neLVnXFRSHojnUKLQWSSiHIq6zKESFvIC5Dg1FK3PbHGxnPMkHMrtt3sNrfPOAx54wP+5hbyyMyEE\n5fd/X256HSkeFJTaWmkFRt0oYQ4lSCaHUlfnPeRFRf4cytGjyeZQgGiHAkiL9Jln/PM/JYUKcjZB\n0bESmRzKrl3emIL9+5Pr4TXcLF/uvTTqX/5Fxti4gqIDN1tb5RplO1dz5sjgOn0nh4t2WghSWioN\nh6iQl3sev/KV3CtjDQNH5VB27fLGzgwFN+QVdc2nTpXzUlUlophEZ41sITkd9KzvCDLSGfdJeUBu\nvnXrvMo1eJNqaycqpBLmUIKEOZRgDgXwC4o6lKQERb+jrMybwTh4rPX1EjqIM+3IUNHka7Zp4SdN\n8sbMZMqhuPkfILmQ13By5pkiJFdcAfzgB9E5lB07/B0/opg+XUJnXV3pFV4mQTnvvOxJeUAS1JnG\nDWVC79GgoKhD0Sn/h4rbyyvsHUCAPLOuoCQR8srG9deLWzSHEs2EEBQlLIcCSKWmIa8w4jiUqJBX\nUFDcyQp1Lq+kQ166bWlpuEM5cMCbSiRpKivjzeJbVibliHIox4+nJ53HikPp6ZF3tDDLXFNhgtLU\nFM8lEgEPPij3Z3AwYJSglJdLXqGzU4SooiLaoeRDNoeyc2fugqKvPYjqIOIKymc/K+d7uFm3ThxX\nLuOXJgomKMguKHEcSmlpdA4lk0MBkg95uccZ5lCA4XEowNAE5fnnw8tRWemfKbm4OPMI/tGEuo53\nvQv45CdlrixXUBYskJZ7XIeSibKycDf47W8D69eLQ9GQq5uUT+o8qqBE5VByFRR9S2km4XMFZf78\n9I4Bw8HkyfJ+Gh3oaKRjggJ/Uj6MuA7FnUkYyO5Q3GlRXIbTocycKQKW6ziMbFx4ob/vfhQ6Vsad\n9lzRc+1WKDU1Y8OhnHuuJO2nThVBIfILyooVEuZ88sn881hz5oT3OFq2TL6zszNdUJJ0KFEhLzeH\n4s5RF5eysvQZLYKooIxEqMtl1qyxcR8WChMUSIt6+/b0B0P58IflBUCZCHMobg5Fb8I4DqWyMrfe\nMUGHUlqafvMvXChTciSRxAxj06bM02ooZWXSfTdq1mJdR9GZo0c75eUy/xMgbuT66/1OZNIkmbh0\n9+78BeV97wPuuSf8M52qRHM4IxnyKi31uqbHcatBdNbsTIIydar3nhdj9DAhenkpmZLyO3YAf/u3\n4dutXZt93+pQ3NaoOhSdfwiIJyjf/W5u8wUFBaW8PH0/F18s7/AuNOXl8v6LMMIcSnX12GwZfvvb\n6ctWrZJrkFQ36DD0dQWHD8t9WFTkvW0x10GdQTLlUP7zP3PP08UNeQEmKKONCSkowQdq6lR5G+Oq\nVbnvu7RUWkyuQykvl/9bW72XYAWT8kD6Q5Fr0q+0VL5Py/DjH6dPvBj10qGR5g//UJxfGGM55BWH\nK66Q9+1kmmU4X3TW5n37vJBXd7fcc0l9b3Gx5BXCHMqBA8Ddd+e23zgOxQRldDKhBCVqYOMnPiGV\nWz4Pmu4zOEdRXZ1UHlHjUIDkeo2UlvpzL7mMARgp/uiPoj8byyGvONTVeWGx4aSmRiYt1ZDX0aPZ\nu3QPBSLp0qzz3ynl5dLZ4kMfym2/6lAyjQtRQRnpHIqRGcuhQOK8+YYf3ByJy8yZfkGJ41DyKcN4\nqHTDJvEcqyGvQlJdLXOAqUNpbU1+hoRHH02/59atk6lncm2gaVI+0/XWGQLMoYwuTFASIpNDOXgw\nOoeiU4snQdChjFWiQl7jQSxHkpoaqXB1pPxwCEoYFRXxJ4IMo6xMhDDTc6oTRJqgjC4mpKAMR0s3\nyqHU1cl08VGCkuQgqbEyViMbYSEvcyhDp7ra6yRSVCTzio2EoOSL2+09E1OnmqCMNiZkDiUpR+CS\nyaEA0eNQknwgZsyI12V3tBPmUNavH76uzuOV6mrv/lOHop1DRjNxIwnmUEYfE0pQJk2SiRGHI+SV\nKYcCjIxDWbJEjm+sEyYoK1cWpixjmZoamb8LkHuuvX1sOJShCIol5UcXE0pQgOixD/kyebI3IaNL\nJodSUmItrDDCQl7G0Kmu9lyd3nNjQVDihry+9rXwmRaMwjHhBGU4CUuwBwVF3zcPSOLSXtaTTphD\nMYZOTY3XY24sCUpU9/4gwfcWGYXHBCVBwt6VHSYo+nCff75MVWL40ZcmjYcea4Vk/XqZZBPw7sux\nICg6OHc4QtPG8GKCkiA6Ut0lmENxQ15GNG+8YUn4fHET8GPJoQDe5KHG2MIe2QQZqkMxojExSZax\nJihlZeZQxiL22CZImEOpqJB5q9xYtgmKMdKMRUExhzL2iCUoRLSaiHYS0W4iujlinTuIaA8RvUxE\nK7JtS0TriOhVIhokopXO8mIiup+IthPRS0T0Puezlanlu4nom7kd8vAR5lAAmWJc3zw3XgYfGmOL\nsZRDASTkZQ5l7JFVUIhoEoA7AVwG4CwAG4hoWWCdNQAWM/NSANcBuDvGtq8A+AiAZwNf+VkAzMzn\nAlgF4G+cz+4C8GlmPgPAGUR02RCOddgJcyhBvv51mevIMEaSsehQTFDGHnEcyvkA9jDzfmYeALAZ\nQPANIWsBbAIAZt4KoJaI6jNty8y7mHkPgOAUcmcCeCa1TiuANiJ6BxHNAlDNzC+m1tsEIGIC9MIQ\n5VBc6utHx/TxxsRiLAqKhbzGHnEEZS6AN53/30oti7NOnG2DbANwJRFNJqJFAN4OYH5qu7eGuK8R\nJY5DMYxCUFws9+ZYacxYyGtsMlzVXz6v8LkfwHIALwLYD+DnAAaTKNRwE8ehGEYhKC4WdzKcL/VK\nEgt5jU3iVH/NABY4/89LLQuuMz9knZIY2/pg5kEAN+n/RPRzALsBtEV8RygbN2787d8NDQ1oaGjI\n9LWJYA7FGK1UVwOzZxe6FPGxcSgjQ2NjIxobGxPbX5zq70UAS4hoIYCDANYD2BBYZwuAGwA8QkQX\nAGhj5hYiOhJjW8BxNERUDoCYuYeILgUwwMw7U5+1E9H5qTJdDeCOqEK7gjJSlJRYl2BjdHLaacDW\nrYUuRXzMoYwMwcb2bbfdltf+sgoKMw8S0Y0AnoTkXO5j5iYiuk4+5nuZ+TEiupyI9gLoBnBtpm0B\ngIg+DOBbAGYA+AkRvczMawDMBPAEEQ1CHMgnnOLcAOABAGUAHmPmx/M6+oQxh2KMZsbSVDYbNgBL\nlxa6FMZQIdbJfsYRRMSFOK6rrwYWLx6Z94UbhmEkDRGBmXPOtNlI+QQxh2IYxkTGBCVBrJeXYRgT\nGROUBJk2DaitLXQpDMMwCoPlUBJkcFD6+dtMuYZhjEXyzaFYgCZBdAJIwzCMiYi1pQ3DMIxEMEEx\nDMMwEsEExTAMw0gEExTDMAwjEUxQDMMwjEQwQTEMwzASwQTFMAzDSAQTFMMwDCMRTFAMwzCMRDBB\nMQzDMBLBBMUwDMNIBBMUwzAMIxFMUAzDMIxEMEExDMMwEsEExTAMw0gEExTDMAwjEUxQDMMwjEQw\nQTEMwzASwQTFMAzDSAQTFMMwDCMRTFAMwzCMRDBBMQzDMBLBBMUwDMNIhFiCQkSriWgnEe0mopsj\n1rmDiPYQ0ctEtCLbtkS0joheJaJBIlrpLC8looeIaDsRvUZEX3Y++1lqXy8R0a+JaEZuh20YhmEk\nTVZBIaJJAO4EcBmAswBsIKJlgXXWAFjMzEsBXAfg7hjbvgLgIwCeDXzlegBg5nMBvAPAdUS0wPl8\nA7QZTREAAAVISURBVDOfx8wrmfnIUA52vNDY2FjoIgwb4/nYADu+sc54P758ieNQzgewh5n3M/MA\ngM0A1gbWWQtgEwAw81YAtURUn2lbZt7FzHsAUGBfhwBUEtFkABUA+gF0DLHM45rxfFOP52MD7PjG\nOuP9+PIlTuU8F8Cbzv9vpZbFWSfOtj6Y+QmIgBwEsA/AN5i5zVnlgVS466sxym4YhmGMEMPV2g+6\njvgbEl0FoBzALACnA/gCEZ2W+vgPmPkcABcBuIiIPp5nOQ3DMIykYOaMPwAuAPC48/+XAdwcWOdu\nAB9z/t8JoD7mtj8DsNL5/zsArnL+vw/AupByfRLAHRFlZvuxH/uxH/sZ+k82Tcj0U4TsvAhgCREt\nhISh1gPYEFhnC4AbADxCRBcAaGPmFiI6EmNbwO9odgK4BMD3iKgSIkp/l8qpTGHmo0RUDOBDAJ4K\nKzAz5+yQDMMwjNzIKijMPEhENwJ4EhIiu4+Zm4joOvmY72Xmx4jociLaC6AbwLWZtgUAIvowgG8B\nmAHgJ0T0MjOvAXAPgPuI6BWI0NzHzK8SUQWAJ4ioCMBkAE8D+IckT4ZhGIaRO5QKERmGYRhGXoyr\nLrhxBmCONYhoHxFtSw3mfCG1bCoRPUlEu4joCSKqLXQ540JE9xFRCxFtd5ZFHg8R3ZIaMNtERKsK\nU+r4RBzfrUT0Vqp34q+JaLXz2Zg5PiKaR0TPpAYcv0JEf5xaPi6uX8jxfT61fLxcv1Ii2pqqS14j\nottTy5O7fvkkYEbTD0Qc9wJYCKAYwMsAlhW6XAkc1+sApgaW/RWAL6X+vhnAXxa6nEM4ngsBrACw\nPdvxADgTwEuQ0OxpqetLhT6GHI7vVgA3hay7fCwdH6Tn5YrU31UAdgFYNl6uX4bjGxfXL1XmitTv\nyQCeB/DeJK/feHIocQZgjkUI6U5yLYB/Sv39TwA+PKIlygNmfg7A8cDiqOO5EsBmZj7JzPsA7IFc\n51FLxPEB4V3p12IMHR8zH2Lml1N/dwFoAjAP4+T6RRyfjpsb89cPAJi5J/VnKaReOY4Er994EpQh\nD6IcIzCAp4joRSL6TGpZPTO3APIQAJhZsNIlw8yI4wle02aM3Wt6I8k8d//ohBTG7PGlxoatgLRy\no+7H8XB8W1OLxsX1I6JJRPQSZEaSRmbegQSv33gSlPHKe5l5JYDLAdxARBdBRMZlvPWsGG/H8x0A\npzPzCsiD/DcFLk9eEFEVgB8A+J+plvy4uh9Djm/cXD9mPsXM50Gc5UVE1IAEr994EpRmAO4kkvNS\ny8Y0zHww9bsVwKMQy9mSmisNRDQLwOHClTARoo6nGcB8Z70xeU2ZuZVTQWlIV3cNG4y540t12/8B\ngH9m5h+nFo+b6xd2fOPp+inM3AHgMcgEvIldv/EkKL8dgElEJZBBlFsKXKa8IKKKVGsJqUGeqyCz\nNG8BcE1qtU8C+HHoDkYvBH9MOup4tgBYT0QlRLQIwBIAL4xUIfPAd3yph1T5KIBXU3+PxeO7H8AO\nZv57Z9l4un5pxzderh8RzdBwHRGVA7gUknRP7voVutdBwj0YVkN6ZuwB8OVClyeB41kE6a32EkRI\nvpxaPg0ysHMXZNDolEKXdQjH9BCAA5BZpN+ADIKdGnU8AG6B9C5pArCq0OXP8fg2AdieupaPQmLW\nY+74ID2CBp178tepZy7yfhwnxzdert85qWN6CcA2AF9ILU/s+tnARsMwDCMRxlPIyzAMwyggJiiG\nYRhGIpigGIZhGIlggmIYhmEkggmKYRiGkQgmKIZhGEYimKAYhmEYiWCCYhiGYSTC/weefPm/CAoI\n1AAAAABJRU5ErkJggg==\n", 362 | "text/plain": [ 363 | "" 364 | ] 365 | }, 366 | "metadata": {}, 367 | "output_type": "display_data" 368 | } 369 | ], 370 | "source": [ 371 | "itr = generate_samples(dataset, 1, sample_length, discretize_input)\n", 372 | "X, y = next(itr)\n", 373 | "prediction = model.predict(X)\n", 374 | "plt.plot(prediction.flat)\n", 375 | "plt.show()" 376 | ] 377 | } 378 | ], 379 | "metadata": { 380 | "kernelspec": { 381 | "display_name": "Python 2", 382 | "language": "python", 383 | "name": "python2" 384 | }, 385 | "language_info": { 386 | "codemirror_mode": { 387 | "name": "ipython", 388 | "version": 2 389 | }, 390 | "file_extension": ".py", 391 | "mimetype": "text/x-python", 392 | "name": "python", 393 | "nbconvert_exporter": "python", 394 | "pygments_lexer": "ipython2", 395 | "version": "2.7.11" 396 | } 397 | }, 398 | "nbformat": 4, 399 | "nbformat_minor": 0 400 | } 401 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | docker build -t audionotebooks . 3 | 4 | run: build 5 | docker run -it -p 8888:8888 audionotebooks -------------------------------------------------------------------------------- /Metadata to Labels.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "data_root = 'data/drums/'" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "from os.path import join\n", 23 | "from collections import Counter\n", 24 | "from sklearn.feature_extraction.text import CountVectorizer\n", 25 | "import collections\n", 26 | "import pickle\n", 27 | "import sklearn\n", 28 | "import json\n", 29 | "import re" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "metadata = json.load(open(join(data_root, 'metadata.json')))" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "docs = [' '.join([m['name'], m['description']] + m['tags']) for m in metadata]\n", 52 | "docs = [d.replace('-', '_') for d in docs] # replace dash with underscore" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "# print the `limit` most common tokens\n", 64 | "limit = 50\n", 65 | "vectorizer = CountVectorizer(min_df=10, stop_words='english', binary=True)\n", 66 | "vectors = vectorizer.fit_transform(docs)\n", 67 | "freqs = [(vectors.getcol(idx).sum(), word) for word, idx in vectorizer.vocabulary_.items()]\n", 68 | "for freq, token in sorted(freqs, key=lambda x: -x[0])[:limit]:\n", 69 | " print freq, token" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "`synsets.json` should be a list of synsets, where each synset is a list of synonyms. A synonym should not contain characters like spaces that a tokenizer would split into two tokens. An example:\n", 77 | "\n", 78 | "```json\n", 79 | "[\n", 80 | " [\"bass\"],\n", 81 | " [\"kick\", \"kickdrum\", \"kicks\"],\n", 82 | " [\"hat\", \"hihat\", \"hi_hat\"],\n", 83 | " [\"snare\", \"snares\"]\n", 84 | "]\n", 85 | "```" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "# vectorize only using words in the synsets\n", 97 | "synsets = json.load(open(join(data_root, 'synsets.json')))\n", 98 | "vocabulary = [item for sublist in synsets for item in sublist]\n", 99 | "vectorizer = CountVectorizer(min_df=1, stop_words='english', binary=True, vocabulary=vocabulary)\n", 100 | "vectors = vectorizer.fit_transform(docs)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# map vocabulary indices to synset indices\n", 112 | "vocabulary_to_synset = [i for i, synset in enumerate(synsets) for syn in synset]\n", 113 | "# fill out mapping from samples to labels and from labels to samples\n", 114 | "samples_to_labels = [set() for m in metadata]\n", 115 | "labels_to_samples = [set() for s in synsets]\n", 116 | "for i, vector in enumerate(vectors):\n", 117 | " nonzero = vector.nonzero()[1]\n", 118 | " labels = [vocabulary_to_synset[j] for j in nonzero]\n", 119 | " samples_to_labels[i].update(labels)\n", 120 | " for label in labels:\n", 121 | " labels_to_samples[label].add(i)\n", 122 | "# convert from list of sets to list of lists\n", 123 | "samples_to_labels = [list(labels) for labels in samples_to_labels]\n", 124 | "labels_to_samples = [list(samples) for samples in labels_to_samples]" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "with open(join(data_root, 'labels_to_samples.pkl'), 'wb') as f:\n", 136 | " pickle.dump(labels_to_samples, f)\n", 137 | "with open(join(data_root, 'samples_to_labels.pkl'), 'wb') as f:\n", 138 | " pickle.dump(samples_to_labels, f)" 139 | ] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 2", 145 | "language": "python", 146 | "name": "python2" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 2 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython2", 158 | "version": "2.7.11" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 0 163 | } 164 | -------------------------------------------------------------------------------- /Samples to Audio Spritesheet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "data_root = 'data/drums/'\n", 12 | "sr = 48000 # this is the samplerate initially used to load the samples\n", 13 | "total_limit = 100 #None # set this to 100 to export 100 samples\n", 14 | "length_limit = sr/8 # set this to sr/4 to only export 250ms of audio per sample" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "from os.path import join\n", 26 | "from utils import ffmpeg_save_audio\n", 27 | "import numpy as np\n", 28 | "%time samples = np.load(join(data_root, 'samples.npy'))" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "y = samples[:total_limit, :length_limit].reshape(-1)\n", 40 | "%time ffmpeg_save_audio(data_root + 'spritesheet.mp3', y, sr)" 41 | ] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 2", 47 | "language": "python", 48 | "name": "python2" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 2 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython2", 60 | "version": "2.7.11" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 0 65 | } 66 | -------------------------------------------------------------------------------- /Samples to Fingerprints.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook converts `samples.npy` to `fingerprints.npy`. After processing, check the max and mean images to make sure that you are not over- or under-cropping the data. Set `crop_rows` or `crop_cols` to `None` to see all the data." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "data_root = 'data/drums/'\n", 19 | "n_fft = 1024\n", 20 | "hop_length = n_fft/4\n", 21 | "use_logamp = False # boost the brightness of quiet sounds\n", 22 | "reduce_rows = 10 # how many frequency bands to average into one\n", 23 | "reduce_cols = 1 # how many time steps to average into one\n", 24 | "crop_rows = 32 # limit how many frequency bands to use\n", 25 | "crop_cols = 32 # limit how many time steps to use\n", 26 | "limit = None # set this to 100 to only process 100 samples" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "%matplotlib inline\n", 38 | "from utils import *\n", 39 | "from tqdm import *\n", 40 | "from os.path import join\n", 41 | "from matplotlib import pyplot as plt\n", 42 | "from skimage.measure import block_reduce\n", 43 | "from multiprocessing import Pool\n", 44 | "import numpy as np\n", 45 | "import librosa" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "%time samples = np.load(join(data_root, 'samples.npy'))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": false, 64 | "scrolled": false 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "window = np.hanning(n_fft)\n", 69 | "def job(y):\n", 70 | " S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, window=window)\n", 71 | " amp = np.abs(S)\n", 72 | " if reduce_rows > 1 or reduce_cols > 1:\n", 73 | " amp = block_reduce(amp, (reduce_rows, reduce_cols), func=np.mean)\n", 74 | " if amp.shape[1] < crop_cols:\n", 75 | " amp = np.pad(amp, ((0, 0), (0, crop_cols-amp.shape[1])), 'constant')\n", 76 | " amp = amp[:crop_rows, :crop_cols]\n", 77 | " if use_logamp:\n", 78 | " amp = librosa.logamplitude(amp**2)\n", 79 | " amp -= amp.min()\n", 80 | " if amp.max() > 0:\n", 81 | " amp /= amp.max()\n", 82 | " amp = np.flipud(amp) # for visualization, put low frequencies on bottom\n", 83 | " return amp\n", 84 | "pool = Pool()\n", 85 | "%time fingerprints = pool.map(job, samples[:limit])\n", 86 | "fingerprints = np.asarray(fingerprints).astype(np.float32)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "np.save(join(data_root, 'fingerprints.npy'), fingerprints)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "print 'data shape:', np.shape(fingerprints)\n", 109 | "\n", 110 | "mean = np.mean(fingerprints, axis=0)\n", 111 | "mean -= mean.min()\n", 112 | "mean /= mean.max()\n", 113 | "\n", 114 | "print 'mean:'\n", 115 | "show_array(255 * mean)\n", 116 | "print 'max:'\n", 117 | "show_array(255 * np.max(fingerprints, axis=0))\n", 118 | "\n", 119 | "print 'random selection:'\n", 120 | "indices = range(len(fingerprints))\n", 121 | "np.random.shuffle(indices)\n", 122 | "show_array(255 * make_mosaic(np.array(fingerprints)[indices], n=16))" 123 | ] 124 | } 125 | ], 126 | "metadata": { 127 | "kernelspec": { 128 | "display_name": "Python 2", 129 | "language": "python", 130 | "name": "python2" 131 | }, 132 | "language_info": { 133 | "codemirror_mode": { 134 | "name": "ipython", 135 | "version": 2 136 | }, 137 | "file_extension": ".py", 138 | "mimetype": "text/x-python", 139 | "name": "python", 140 | "nbconvert_exporter": "python", 141 | "pygments_lexer": "ipython2", 142 | "version": "2.7.11" 143 | } 144 | }, 145 | "nbformat": 4, 146 | "nbformat_minor": 0 147 | } 148 | -------------------------------------------------------------------------------- /Samples to HDF5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Convert all the samples in `samples/` to a single continuous stream of audio with a given samplerate and `uint8` datatype." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "data_root = 'data/blizzard'\n", 19 | "sr = 16000\n", 20 | "limit = None" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "from utils import list_all_files, ffmpeg_load_audio\n", 32 | "from os.path import join\n", 33 | "from tqdm import tqdm\n", 34 | "import numpy as np\n", 35 | "import h5py" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "f = h5py.File(join(data_root, 'samples-flat.hdf5'), 'w', libver='latest')\n", 47 | "dataset = f.create_dataset('samples', (0,), maxshape=(None,), dtype='uint8')" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": false, 55 | "scrolled": false 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "%matplotlib inline\n", 60 | "from matplotlib import pyplot as plt\n", 61 | "fns = list(list_all_files(join(data_root, 'samples'), ['.wav', '.mp3']))\n", 62 | "for fn in tqdm(fns[:limit], leave=True):\n", 63 | " y, _ = ffmpeg_load_audio(fn, sr=sr, normalize=False, mono=True, out_type='int8') \n", 64 | " y = y.view('uint8')\n", 65 | " y += 128\n", 66 | " cur_len = dataset.shape[0]\n", 67 | " extra_len = y.shape[0]\n", 68 | " dataset.resize(cur_len+extra_len, axis=0)\n", 69 | " dataset[cur_len:cur_len+extra_len] = y\n", 70 | "f.close()" 71 | ] 72 | } 73 | ], 74 | "metadata": { 75 | "kernelspec": { 76 | "display_name": "Python 2", 77 | "language": "python", 78 | "name": "python2" 79 | }, 80 | "language_info": { 81 | "codemirror_mode": { 82 | "name": "ipython", 83 | "version": 2 84 | }, 85 | "file_extension": ".py", 86 | "mimetype": "text/x-python", 87 | "name": "python", 88 | "nbconvert_exporter": "python", 89 | "pygments_lexer": "ipython2", 90 | "version": "2.7.11" 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 0 95 | } 96 | -------------------------------------------------------------------------------- /Sphinx to Samples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook uses sphinx via pocketsphinx to convert continuous recording of speech to a set of phonemes. On Mac you can install pocketsphinx with `brew install cmu-pocketsphinx`. This only works on audio that is 16KHz, and works at something like 5% realtime rate. So a 10 minute file might time 4 hours." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "data_root = 'data/speech'\n", 19 | "sphinx_model_root = 'model' # point this to the folder called \"model\"" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "from os import environ, path\n", 31 | "from os.path import join\n", 32 | "from pocketsphinx.pocketsphinx import *\n", 33 | "from sphinxbase.sphinxbase import *\n", 34 | "from tqdm import tqdm\n", 35 | "from utils import *\n", 36 | "import librosa\n", 37 | "import os\n", 38 | "import numpy as np" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "config = Decoder.default_config()\n", 50 | "config.set_string('-hmm', join(sphinx_model_root, 'en-us/en-us/'))\n", 51 | "config.set_string('-allphone', join(sphinx_model_root, 'en-us/en-us-phone.lm.bin'))\n", 52 | "decoder = Decoder(config)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "stream = open(join(data_root, 'raw/audio.wav'))\n", 64 | "total = os.fstat(stream.fileno()).st_size\n", 65 | "pbar = tqdm(total=total)\n", 66 | "buf_size = 1024\n", 67 | "decoder.start_utt()\n", 68 | "while True:\n", 69 | " buf = stream.read(buf_size)\n", 70 | " pbar.update(buf_size)\n", 71 | " if buf:\n", 72 | " decoder.process_raw(buf, False, False)\n", 73 | " else:\n", 74 | " break\n", 75 | "decoder.end_utt()" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "y, sr = librosa.load(fn, sr=16000)\n", 87 | "frame_size = len(y) / decoder.n_frames()\n", 88 | "print frame_size" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "for i, seg in enumerate(tqdm(decoder.seg(), leave=True)):\n", 100 | " start_sample = frame_size * seg.start_frame\n", 101 | " stop_sample = frame_size * seg.end_frame\n", 102 | " cur = y[start_sample:stop_sample]\n", 103 | " ffmpeg_save_audio(join(data_root, 'samples/{}.wav'.format(i)), cur, sr=sr)" 104 | ] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 2", 110 | "language": "python", 111 | "name": "python2" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 2 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython2", 123 | "version": "2.7.11" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 0 128 | } 129 | -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- 1 | The code in this repository is available under the [MIT License](https://secure.wikimedia.org/wikipedia/en/wiki/Mit_license). 2 | 3 | Copyright (c) 2016- Kyle McDonald 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Audio Notebooks 2 | 3 | A collection of Jupyter Notebooks related to audio processing. 4 | 5 | The notebooks act like interactive utility scripts for converting between different representations, usually stored in `data/project/` where `project` is the dataset you're working with. Generally, if you change `data_root` near the top of the notebook and run the reset of the notebook, it will do something useful. 6 | 7 | ## Setup 8 | 9 | [librosa](https://github.com/bmcfee/librosa) currently needs some extra help on OS X, make sure to follow the instructions [here](https://github.com/bmcfee/librosa#hints-for-os-x) first. 10 | 11 | ``` 12 | $ brew install ffmpeg # for loading and saving audio 13 | $ git clone https://github.com/kylemcdonald/AudioNotebooks.git 14 | $ cd AudioNotebooks.git 15 | $ pip install -r requirements.txt 16 | $ jupyter notebook 17 | ``` 18 | 19 | ## Terminology 20 | 21 | Here are some words used in the names of the notebooks, and what they mean: 22 | 23 | * _Samples_ refers to one-shot sounds, usually less than 1-2 seconds long. These can be loaded from a directory, like `data/project/samples/` or from a precomputed numpy matrix like `data/project/samples.npy`. When they are stored in a `.npy` file, all the samples are necessarily concatenated or expanded to be the same length. 24 | * _Multisamples_ refers to audio that needs to be segmented into samples. 25 | * _Fingerprints_ refer to small images, usually 32x32 pixels, representing a small chunk of time like 250ms or 500ms. These are either calculated with CQT, STFT, or another frequency domain analysis technique. They are useful for running t-SNE or training neural nets. 26 | * _Spritesheets_ are single files with multiple sounds, either visually as fingerprints or sonically as a sequence of sounds, organized carefully so they can be chopped up again later. 27 | 28 | Some formats in use: 29 | 30 | * `.npy` are numpy matrices. Numpy can load and save these very quickly, even for large datasets. 31 | * `.tsv` are tab separated files referring to one sample per line, usally with normalized numbers in each column. These are good for loading into openFrameworks apps, or into the browser. 32 | * `.txt` are like `.tsv` but only have one item per line, usually a single string. Also good for loading into openFrameworks apps, or into the browser. 33 | * `.pkl` are Pickle files, which is the native Python serialization format, and is used for saving and loading datastructures that have lists of objects with lots of different kinds of values (not just numbers or strings). 34 | * `.h5` is the way the Keras saves the weights for a neural net. 35 | * `.json` is good for taking what would usually go into a Pickle file, and saving it in a format that can be loaded onto the web. It's also one of the formats used by Keras, part of a saved model. 36 | 37 | ## Example Workflows 38 | 39 | ### Audio spritesheet 40 | 41 | 1. Collect Samples 42 | 2. Samples to Audio Spritesheet 43 | 44 | ### t-SNE embedding for samples 45 | 46 | 1. Collect Samples 47 | 2. Samples to Fingerprints 48 | 3. Fingerprints to t-SNE (with `mode = "fingerprints"`) 49 | 50 | The standard workflow is to create a t-SNE embedding from fingerprints, but it's also possible to create an embedding after learning a classifier: 51 | 52 | 1. Collect Samples 53 | 2. Samples to Fingerprints 54 | 3. Collect Metadata 55 | 4. Metadata to Labels 56 | 5. Fingerprints and Labels to Classifier 57 | 6. Fingerprints to t-SNE (with `mode = "combined"`) 58 | 59 | ### t-SNE embedding for phonemes 60 | 61 | Right this only really works with extracting phonemes from transcribed speech, using [Gentle](https://lowerquality.com/gentle/). 62 | 63 | 1. Gentle to Samples (with `save_wav = True`) 64 | 2. Samples to Fingerprints 65 | 3. Fingerprints to t-SNE 66 | 67 | It's also possible to use Sphinx for speech that does not have transcriptions, but it can be very significantly slower: 68 | 69 | 1. Sphinx to Samples 70 | 2. Collect Samples 71 | 3. Samples to Fingerprints 72 | 4. Fingerprints to t-SNE 73 | 74 | ### t-SNE grid fingerprints spritesheet 75 | 76 | By virtue of creating a rectangular grid, you may lose some points. This technique will only work on 10-20k points maximum 77 | 78 | 1. Collect Samples 79 | 2. Samples to Fingerprints 80 | 3. Fingerprints to t-SNE 81 | 4. Run the `example-data` app from [ofxAssignment](https://github.com/kylemcdonald/ofxAssignment/) or use [CloudToGrid](https://github.com/kylemcdonald/CloudToGrid/) to convert a 2d t-SNE embedding to a grid embedding. 82 | 5. Fingerprints to Spritesheet 83 | 84 | If you only want a spritesheet without any sorting, skip step 4 and only run step 5 partially. 85 | 86 | ### Predict tags given tagged audio 87 | 88 | 1. Collect Samples 89 | 2. Samples to Fingerprints 90 | 3. Collect Metadata 91 | 4. Metadata to Labels 92 | 5. Fingerprints and Labels to Classifier -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter 2 | librosa 3 | numpy 4 | scikits.samplerate 5 | scikit-image 6 | scikit-learn 7 | matplotlib 8 | scipy 9 | tqdm 10 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from list_all_files import list_all_files 2 | from ffmpeg_load_audio import ffmpeg_load_audio 3 | from ffmpeg_save_audio import ffmpeg_save_audio 4 | from show_array import show_array 5 | from make_mosaic import make_mosaic 6 | from bhtsne import bh_tsne 7 | from normalize import normalize 8 | from mkdir_p import mkdir_p -------------------------------------------------------------------------------- /utils/bh_tsne: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kylemcdonald/AudioNotebooks/64cd7e48006064be7bed8f82cde63c1233044940/utils/bh_tsne -------------------------------------------------------------------------------- /utils/bhtsne.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | A simple Python wrapper for the bh_tsne binary that makes it easier to use it 5 | for TSV files in a pipeline without any shell script trickery. 6 | 7 | Note: The script does some minimal sanity checking of the input, but don't 8 | expect it to cover all cases. After all, it is a just a wrapper. 9 | 10 | Example: 11 | 12 | > echo -e '1.0\t0.0\n0.0\t1.0' | ./bhtsne.py -d 2 -p 0.1 13 | -2458.83181442 -6525.87718385 14 | 2458.83181442 6525.87718385 15 | 16 | The output will not be normalised, maybe the below one-liner is of interest?: 17 | 18 | python -c 'import numpy; from sys import stdin, stdout; 19 | d = numpy.loadtxt(stdin); d -= d.min(axis=0); d /= d.max(axis=0); 20 | numpy.savetxt(stdout, d, fmt="%.8f", delimiter="\t")' 21 | 22 | Authors: Pontus Stenetorp 23 | Philippe Remy 24 | Version: 2016-03-08 25 | ''' 26 | 27 | # Copyright (c) 2013, Pontus Stenetorp 28 | # 29 | # Permission to use, copy, modify, and/or distribute this software for any 30 | # purpose with or without fee is hereby granted, provided that the above 31 | # copyright notice and this permission notice appear in all copies. 32 | # 33 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 34 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 35 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 36 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 37 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 38 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 39 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 40 | 41 | from argparse import ArgumentParser, FileType 42 | from os.path import abspath, dirname, isfile, join as path_join 43 | from shutil import rmtree 44 | from struct import calcsize, pack, unpack 45 | from subprocess import Popen 46 | from sys import stderr, stdin, stdout 47 | from tempfile import mkdtemp 48 | from platform import system 49 | from os import devnull 50 | import numpy as np 51 | 52 | ### Constants 53 | IS_WINDOWS = True if system() == 'Windows' else False 54 | BH_TSNE_BIN_PATH = path_join(dirname(__file__), 'windows', 'bh_tsne.exe') if IS_WINDOWS else path_join(dirname(__file__), 'bh_tsne') 55 | assert isfile(BH_TSNE_BIN_PATH), ('Unable to find the bh_tsne binary in the ' 56 | 'same directory as this script, have you forgotten to compile it?: {}' 57 | ).format(BH_TSNE_BIN_PATH) 58 | # Default hyper-parameter values from van der Maaten (2014) 59 | # https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf (Experimental Setup, page 13) 60 | DEFAULT_NO_DIMS = 2 61 | INITIAL_DIMENSIONS = 50 62 | DEFAULT_PERPLEXITY = 50 63 | DEFAULT_THETA = 0.5 64 | EMPTY_SEED = -1 65 | 66 | ### 67 | 68 | def _argparse(): 69 | argparse = ArgumentParser('bh_tsne Python wrapper') 70 | argparse.add_argument('-d', '--no_dims', type=int, 71 | default=DEFAULT_NO_DIMS) 72 | argparse.add_argument('-p', '--perplexity', type=float, 73 | default=DEFAULT_PERPLEXITY) 74 | # 0.0 for theta is equivalent to vanilla t-SNE 75 | argparse.add_argument('-t', '--theta', type=float, default=DEFAULT_THETA) 76 | argparse.add_argument('-r', '--randseed', type=int, default=EMPTY_SEED) 77 | argparse.add_argument('-n', '--initial_dims', type=int, default=INITIAL_DIMENSIONS) 78 | argparse.add_argument('-v', '--verbose', action='store_true') 79 | argparse.add_argument('-i', '--input', type=FileType('r'), default=stdin) 80 | argparse.add_argument('-o', '--output', type=FileType('w'), 81 | default=stdout) 82 | return argparse 83 | 84 | 85 | class TmpDir: 86 | def __enter__(self): 87 | self._tmp_dir_path = mkdtemp() 88 | return self._tmp_dir_path 89 | 90 | def __exit__(self, type, value, traceback): 91 | rmtree(self._tmp_dir_path) 92 | 93 | 94 | def _read_unpack(fmt, fh): 95 | return unpack(fmt, fh.read(calcsize(fmt))) 96 | 97 | def bh_tsne(samples, no_dims=DEFAULT_NO_DIMS, initial_dims=INITIAL_DIMENSIONS, perplexity=DEFAULT_PERPLEXITY, 98 | theta=DEFAULT_THETA, randseed=EMPTY_SEED, verbose=False): 99 | 100 | samples -= np.mean(samples, axis=0) 101 | cov_x = np.dot(np.transpose(samples), samples) 102 | [eig_val, eig_vec] = np.linalg.eig(cov_x) 103 | 104 | # sorting the eigen-values in the descending order 105 | eig_vec = eig_vec[:, eig_val.argsort()[::-1]] 106 | 107 | if initial_dims > len(eig_vec): 108 | initial_dims = len(eig_vec) 109 | 110 | # truncating the eigen-vectors matrix to keep the most important vectors 111 | eig_vec = eig_vec[:, :initial_dims] 112 | samples = np.dot(samples, eig_vec) 113 | 114 | # Assume that the dimensionality of the first sample is representative for 115 | # the whole batch 116 | sample_dim = len(samples[0]) 117 | sample_count = len(samples) 118 | 119 | # bh_tsne works with fixed input and output paths, give it a temporary 120 | # directory to work in so we don't clutter the filesystem 121 | with TmpDir() as tmp_dir_path: 122 | # Note: The binary format used by bh_tsne is roughly the same as for 123 | # vanilla tsne 124 | with open(path_join(tmp_dir_path, 'data.dat'), 'wb') as data_file: 125 | # Write the bh_tsne header 126 | data_file.write(pack('iiddi', sample_count, sample_dim, theta, perplexity, no_dims)) 127 | # Then write the data 128 | for sample in samples: 129 | data_file.write(pack('{}d'.format(len(sample)), *sample)) 130 | # Write random seed if specified 131 | if randseed != EMPTY_SEED: 132 | data_file.write(pack('i', randseed)) 133 | 134 | # Call bh_tsne and let it do its thing 135 | with open(devnull, 'w') as dev_null: 136 | bh_tsne_p = Popen((abspath(BH_TSNE_BIN_PATH), ), cwd=tmp_dir_path, close_fds=True) 137 | bh_tsne_p.wait() 138 | assert not bh_tsne_p.returncode, ('ERROR: Call to bh_tsne exited ' 139 | 'with a non-zero return code exit status, please ' + 140 | ('enable verbose mode and ' if not verbose else '') + 141 | 'refer to the bh_tsne output for further details') 142 | 143 | # Read and pass on the results 144 | with open(path_join(tmp_dir_path, 'result.dat'), 'rb') as output_file: 145 | # The first two integers are just the number of samples and the 146 | # dimensionality 147 | result_samples, result_dims = _read_unpack('ii', output_file) 148 | # Collect the results, but they may be out of order 149 | results = [_read_unpack('{}d'.format(result_dims), output_file) 150 | for _ in xrange(result_samples)] 151 | # Now collect the landmark data so that we can return the data in 152 | # the order it arrived 153 | results = [(_read_unpack('i', output_file), e) for e in results] 154 | # Put the results in order and yield it 155 | results.sort() 156 | for _, result in results: 157 | yield result 158 | # The last piece of data is the cost for each sample, we ignore it 159 | #read_unpack('{}d'.format(sample_count), output_file) 160 | 161 | def main(args): 162 | argp = _argparse().parse_args(args[1:]) 163 | 164 | # Read the data, with some sanity checking 165 | data = [] 166 | for sample_line_num, sample_line in enumerate((l.rstrip('\n') 167 | for l in argp.input), start=1): 168 | sample_data = sample_line.split('\t') 169 | try: 170 | assert len(sample_data) == dims, ('Input line #{} of ' 171 | 'dimensionality {} although we have previously observed ' 172 | 'lines with dimensionality {}, possible data error or is ' 173 | 'the data sparsely encoded?' 174 | ).format(sample_line_num, len(sample_data), dims) 175 | except NameError: 176 | # First line, record the dimensionality 177 | dims = len(sample_data) 178 | data.append([float(e) for e in sample_data]) 179 | 180 | for result in bh_tsne(data, no_dims=argp.no_dims, perplexity=argp.perplexity, theta=argp.theta, randseed=argp.randseed, 181 | verbose=argp.verbose, initial_dims=argp.initial_dims): 182 | fmt = '' 183 | for i in range(1, len(result)): 184 | fmt = fmt + '{}\t' 185 | fmt = fmt + '{}\n' 186 | argp.output.write(fmt.format(*result)) 187 | 188 | if __name__ == '__main__': 189 | from sys import argv 190 | exit(main(argv)) -------------------------------------------------------------------------------- /utils/ffmpeg_load_audio.py: -------------------------------------------------------------------------------- 1 | # can we adda aframes to avoid reading more than necessary? 2 | # what is the correct bufsize? do we need bufsize at all? 3 | # what is the best chunk_size for reading data? 4 | # could we ask ffmpeg to do type conversion for us? 5 | # reference: 6 | # http://git.numm.org/?p=numm.git;a=blob_plain;f=numm3/media.py;hb=refs/heads/numm3 7 | 8 | import numpy as np 9 | import subprocess as sp 10 | import os 11 | DEVNULL = open(os.devnull, 'w') 12 | 13 | # attempts to handle all float/integer conversions with and without normalizing 14 | def convert_bit_depth(y, in_type, out_type, normalize=False): 15 | in_type = np.dtype(in_type).type 16 | out_type = np.dtype(out_type).type 17 | 18 | if normalize: 19 | peak = np.abs(y).max() 20 | if peak == 0: 21 | normalize = False 22 | 23 | if issubclass(in_type, np.floating): 24 | if normalize: 25 | y /= peak 26 | if issubclass(out_type, np.integer): 27 | y *= np.iinfo(out_type).max 28 | y = y.astype(out_type) 29 | elif issubclass(in_type, np.integer): 30 | if issubclass(out_type, np.floating): 31 | y = y.astype(out_type) 32 | if normalize: 33 | y /= peak 34 | elif issubclass(out_type, np.integer): 35 | in_max = peak if normalize else np.iinfo(in_type).max 36 | out_max = np.iinfo(out_type).max 37 | if out_max > in_max: 38 | y = y.astype(out_type) 39 | y *= (out_max / in_max) 40 | elif out_max < in_max: 41 | y /= (in_max / out_max) 42 | y = y.astype(out_type) 43 | return y 44 | 45 | # load_audio can not detect the input type 46 | def ffmpeg_load_audio(filename, sr=44100, mono=False, normalize=True, in_type=np.int16, out_type=np.float32): 47 | in_type = np.dtype(in_type).type 48 | out_type = np.dtype(out_type).type 49 | channels = 1 if mono else 2 50 | format_strings = { 51 | np.float64: 'f64le', 52 | np.float32: 'f32le', 53 | np.int16: 's16le', 54 | np.int32: 's32le', 55 | np.uint32: 'u32le' 56 | } 57 | format_string = format_strings[in_type] 58 | command = [ 59 | 'ffmpeg', 60 | '-i', filename, 61 | '-f', format_string, 62 | '-acodec', 'pcm_' + format_string, 63 | '-ar', str(sr), 64 | '-ac', str(channels), 65 | '-'] 66 | p = sp.Popen(command, stdout=sp.PIPE, stderr=DEVNULL, bufsize=4096, close_fds=True) 67 | bytes_per_sample = np.dtype(in_type).itemsize 68 | frame_size = bytes_per_sample * channels 69 | chunk_size = frame_size * sr # read in 1-second chunks 70 | raw = b'' 71 | with p.stdout as stdout: 72 | while True: 73 | data = stdout.read(chunk_size) 74 | if data: 75 | raw += data 76 | else: 77 | break 78 | audio = np.fromstring(raw, dtype=in_type) 79 | if channels > 1: 80 | audio = audio.reshape((-1, channels)).transpose() 81 | 82 | if audio.size == 0: 83 | return audio.astype(out_type), sr 84 | 85 | audio = convert_bit_depth(audio, in_type, out_type, normalize) 86 | 87 | return audio, sr -------------------------------------------------------------------------------- /utils/ffmpeg_save_audio.py: -------------------------------------------------------------------------------- 1 | # http://zulko.github.io/blog/2013/10/04/read-and-write-audio-files-in-python-using-ffmpeg/ 2 | # https://github.com/Zulko/moviepy/blob/master/moviepy/audio/io/ffmpeg_audiowriter.py 3 | 4 | import numpy as np 5 | import subprocess as sp 6 | import os 7 | DEVNULL = open(os.devnull, 'w') 8 | 9 | def ffmpeg_save_audio(filename, y, sr=44100): 10 | # should allow bitrate argument 11 | # should allow stereo output 12 | pipe = sp.Popen([ 13 | 'ffmpeg', 14 | '-y', # (optional) means overwrite the output file if it already exists. 15 | '-f', 's16le', # means 16bit input 16 | '-acodec', 'pcm_s16le', # means raw 16bit input 17 | '-ar', str(sr), # the input will have 44100 Hz 18 | '-ac','1', # the input will have 1 channels (mono) 19 | '-i', '-', # means that the input will arrive from the pipe 20 | '-vn', # means 'don't expect any video input' 21 | filename], 22 | stdin=sp.PIPE, stdout=DEVNULL, stderr=DEVNULL, bufsize=4096, close_fds=True) 23 | y16 = (y * np.iinfo(np.int16).max).astype(np.int16) 24 | pipe.stdin.write(y16.tostring()) 25 | pipe.stdin.close() 26 | pipe.wait() -------------------------------------------------------------------------------- /utils/list_all_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fnmatch 3 | 4 | def list_all_files(directory, extensions=None): 5 | for root, dirnames, filenames in os.walk(directory): 6 | for filename in filenames: 7 | base, ext = os.path.splitext(filename) 8 | joined = os.path.join(root, filename) 9 | if extensions is None or ext.lower() in extensions: 10 | yield joined -------------------------------------------------------------------------------- /utils/make_mosaic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | def find_rectangle(n, max_ratio=2): 5 | sides = [] 6 | square = int(math.sqrt(n)) 7 | for w in range(square, max_ratio * square): 8 | h = n / w 9 | used = w * h 10 | leftover = n - used 11 | sides.append((leftover, (w, h))) 12 | return sorted(sides)[0][1] 13 | 14 | # should work for 1d and 2d images, assumes images are square but can be overriden 15 | def make_mosaic(images, n=None, nx=None, ny=None, w=None, h=None): 16 | if n is None and nx is None and ny is None: 17 | nx, ny = find_rectangle(len(images)) 18 | else: 19 | nx = n if nx is None else nx 20 | ny = n if ny is None else ny 21 | images = np.array(images) 22 | if images.ndim == 2: 23 | side = int(np.sqrt(len(images[0]))) 24 | h = side if h is None else h 25 | w = side if w is None else w 26 | images = images.reshape(-1, h, w) 27 | else: 28 | h = images.shape[1] 29 | w = images.shape[2] 30 | image_gen = iter(images) 31 | mosaic = np.empty((h*ny, w*nx)) 32 | for i in range(ny): 33 | ia = (i)*h 34 | ib = (i+1)*h 35 | for j in range(nx): 36 | ja = j*w 37 | jb = (j+1)*w 38 | mosaic[ia:ib, ja:jb] = next(image_gen) 39 | return mosaic -------------------------------------------------------------------------------- /utils/mkdir_p.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | 4 | def mkdir_p(path): 5 | try: 6 | os.makedirs(path) 7 | except OSError as exc: # Python >2.5 8 | if exc.errno == errno.EEXIST and os.path.isdir(path): 9 | pass 10 | else: 11 | raise -------------------------------------------------------------------------------- /utils/normalize.py: -------------------------------------------------------------------------------- 1 | def normalize(X): 2 | X -= X.min(axis=0) 3 | X /= X.max(axis=0) 4 | return X -------------------------------------------------------------------------------- /utils/show_array.py: -------------------------------------------------------------------------------- 1 | from cStringIO import StringIO 2 | import numpy as np 3 | import PIL.Image 4 | import IPython.display 5 | import shutil 6 | 7 | def show_array(a, fmt='png', filename=None): 8 | a = np.uint8(np.clip(a, 0, 255)) 9 | image_data = StringIO() 10 | PIL.Image.fromarray(a).save(image_data, fmt) 11 | if filename is None: 12 | IPython.display.display(IPython.display.Image(data=image_data.getvalue())) 13 | else: 14 | with open(filename, 'w') as f: 15 | image_data.seek(0) 16 | shutil.copyfileobj(image_data, f) --------------------------------------------------------------------------------