├── README.md ├── 100Hz_44100Hz_16bit_05sec.wav ├── 440Hz_44100Hz_16bit_05sec.wav ├── _static └── img │ └── steam-train-whistle-daniel_simon-converted-from-mp3.wav ├── environment.yml ├── batch.sh ├── run.sh ├── convert.py ├── runbatch.sh ├── StridedBuffer.ipynb ├── Levenshtein.ipynb ├── voice_activity_detection_online_with_mic.py ├── Viterbi.ipynb ├── datasets.ipynb ├── PySpeech.ipynb ├── PipelineTrain.py └── Plot_JSON.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # audio-tutorial 2 | Tutorial for torchaudio 3 | -------------------------------------------------------------------------------- /100Hz_44100Hz_16bit_05sec.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vincentqb/audio-tutorial/HEAD/100Hz_44100Hz_16bit_05sec.wav -------------------------------------------------------------------------------- /440Hz_44100Hz_16bit_05sec.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vincentqb/audio-tutorial/HEAD/440Hz_44100Hz_16bit_05sec.wav -------------------------------------------------------------------------------- /_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vincentqb/audio-tutorial/HEAD/_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: torchaudio 2 | channels: 3 | - anaconda 4 | - pytorch 5 | dependencies: 6 | - torchaudio 7 | - notebook 8 | - matplotlib 9 | - dotfiles 10 | - black 11 | - pylint 12 | - flake8 13 | - isort 14 | - neovim 15 | -------------------------------------------------------------------------------- /batch.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | for arch in 'wav2letter' 'lstm'; do 4 | for bs in 256 512 1024; do 5 | for lr in 1. .01 .0001; do 6 | sbatch /private/home/vincentqb/experiment/run.sh $arch $bs $lr 7 | done; 8 | done; 9 | done; 10 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | #SBATCH --job-name=torchaudiomodel 4 | #SBATCH --output=/checkpoint/%u/jobs/audio-%j.out 5 | #SBATCH --error=/checkpoint/%u/jobs/audio-%j.err 6 | #SBATCH --signal=USR1@600 7 | #SBATCH --open-mode=append 8 | #SBATCH --partition=learnfair 9 | #SBATCH --time=4320 10 | #SBATCH --mem-per-cpu=5120 11 | #SBATCH --nodes=1 12 | #SBATCH --ntasks-per-node=1 13 | #SBATCH --gres=gpu:8 14 | #SBATCH --cpus-per-task=80 15 | # 2x (number of data workers + number of GPUs requested) 16 | 17 | arch=$1 18 | bs=$2 19 | lr=$3 20 | 21 | # PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' 22 | 23 | # The ENV below are only used in distributed training with env:// initialization 24 | export MASTER_ADDR=${SLURM_JOB_NODELIST:0:9}${SLURM_JOB_NODELIST:10:4} 25 | export MASTER_PORT=29500 26 | 27 | srun --label \ 28 | python /private/home/vincentqb/experiment/PipelineTrain.py \ 29 | --arch $arch --batch-size $bs --learning-rate $lr \ 30 | --resume /private/home/vincentqb/experiment/checkpoint-$SLURM_JOB_ID-$arch-$bs-$lr.pth.tar 31 | # --distributed --world-size $SLURM_JOB_NUM_NODES --dist-url 'env://' --dist-backend='nccl' 32 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | """Convert jupyter notebook to sphinx gallery notebook styled examples. 2 | 3 | Usage: python ipynb_to_gallery.py 4 | 5 | Dependencies: 6 | pypandoc: install using `pip install pypandoc` 7 | """ 8 | import pypandoc as pdoc 9 | import json 10 | 11 | def convert_ipynb_to_gallery(file_name): 12 | python_file = "" 13 | 14 | nb_dict = json.load(open(file_name)) 15 | cells = nb_dict['cells'] 16 | 17 | for i, cell in enumerate(cells): 18 | if i == 0: 19 | assert cell['cell_type'] == 'markdown', \ 20 | 'First cell has to be markdown' 21 | 22 | md_source = ''.join(cell['source']) 23 | rst_source = pdoc.convert_text(md_source, 'rst', 'md') 24 | python_file = '"""\n' + rst_source + '\n"""' 25 | else: 26 | if cell['cell_type'] == 'markdown': 27 | md_source = ''.join(cell['source']) 28 | rst_source = pdoc.convert_text(md_source, 'rst', 'md') 29 | commented_source = '\n'.join(['# ' + x for x in 30 | rst_source.split('\n')]) 31 | python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \ 32 | commented_source 33 | elif cell['cell_type'] == 'code': 34 | source = ''.join(cell['source']) 35 | python_file = python_file + '\n' * 2 + source 36 | 37 | python_file = python_file.replace("\n%", "\n# %") 38 | open(file_name.replace('.ipynb', '.py'), 'w').write(python_file) 39 | 40 | if __name__ == '__main__': 41 | import sys 42 | convert_ipynb_to_gallery(sys.argv[-1]) 43 | -------------------------------------------------------------------------------- /runbatch.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | #SBATCH --job-name=torchaudiomodel 4 | #SBATCH --output=/checkpoint/%u/jobs/audio-%A-%a.out 5 | #SBATCH --error=/checkpoint/%u/jobs/audio-%A-%a.err 6 | #SBATCH --signal=USR1@600 7 | #SBATCH --open-mode=append 8 | #SBATCH --partition=learnfair 9 | #SBATCH --time=4320 10 | #SBATCH --mem-per-cpu=5120 11 | #SBATCH --nodes=1 12 | #SBATCH --ntasks-per-node=1 13 | #SBATCH --gres=gpu:8 14 | #SBATCH --cpus-per-task=80 15 | #SBATCH --array=1-4 16 | # number of CPUs = 2x (number of data workers + number of GPUs requested) 17 | 18 | COUNT=$((1 * 1 * 2 * 1 * 2)) 19 | 20 | if [[ "$SLURM_ARRAY_TASK_COUNT" -ne $COUNT ]]; then 21 | echo "SLURM_ARRAY_TASK_COUNT = $SLURM_ARRAY_TASK_COUNT is not equal to $COUNT" 22 | exit 23 | fi 24 | 25 | archs=('wav2letter') 26 | bss=(128) 27 | lrs=(.5 .1) 28 | gammas=(.98) 29 | nbinss=(13 128) 30 | 31 | i=$SLURM_ARRAY_TASK_ID 32 | 33 | l=${#archs[@]} 34 | j=$(($i % $l)) 35 | i=$(($i / $l)) 36 | arch=${archs[$j]} 37 | 38 | l=${#bss[@]} 39 | j=$(($i % $l)) 40 | i=$(($i / $l)) 41 | bs=${bss[$j]} 42 | 43 | l=${#lrs[@]} 44 | j=$(($i % $l)) 45 | i=$(($i / $l)) 46 | lr=${lrs[$j]} 47 | 48 | l=${#gammas[@]} 49 | j=$(($i % $l)) 50 | i=$(($i / $l)) 51 | gamma=${gammas[$j]} 52 | 53 | l=${#nbinss[@]} 54 | j=$(($i % $l)) 55 | i=$(($i / $l)) 56 | nbins=${nbinss[$j]} 57 | 58 | echo $SLURM_JOB_ID $arch $bs $lr $gamma 59 | 60 | # The ENV below are only used in distributed training with env:// initialization 61 | export MASTER_ADDR=${SLURM_JOB_NODELIST:0:9}${SLURM_JOB_NODELIST:10:4} 62 | export MASTER_PORT=29500 63 | 64 | # export PYTHONWARNINGS='ignore:semaphore_tracker:UserWarning' 65 | 66 | srun --label \ 67 | python /private/home/vincentqb/experiment/PipelineTrain.py \ 68 | --arch $arch --batch-size $bs --learning-rate $lr --gamma $gamma --n-bins $nbins \ 69 | --resume /private/home/vincentqb/experiment/checkpoint-$SLURM_JOB_ID-$arch-$bs-$lr.pth.tar 70 | # --distributed --world-size $SLURM_JOB_NUM_NODES --dist-url 'env://' --dist-backend='nccl' 71 | -------------------------------------------------------------------------------- /StridedBuffer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "class _StridedBuffer:\n", 10 | " \n", 11 | " def __init__(self, generator, stride, length):\n", 12 | " self._generator = generator\n", 13 | " self._stride = stride\n", 14 | " self._length = length\n", 15 | " self._buffer = [None] * stride\n", 16 | " self._mod = 0\n", 17 | " \n", 18 | " def __iter__(self):\n", 19 | " return self\n", 20 | " \n", 21 | " def __next__(self):\n", 22 | " while (not self._buffer[0]) or len(self._buffer[0]) < self._length:\n", 23 | " item = next(self._generator)\n", 24 | " if self._buffer[self._mod]:\n", 25 | " self._buffer[self._mod].append(item)\n", 26 | " else:\n", 27 | " self._buffer[self._mod] = [item]\n", 28 | " self._mod = (self._mod + 1) % self._stride\n", 29 | " item = self._buffer.pop(0)\n", 30 | " self._buffer.append([])\n", 31 | " return item" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 70, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "[0, 2, 4, 6]\n", 44 | "[1, 3, 5, 8]\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "dataset = iter(list(range(10)))\n", 50 | "\n", 51 | "for d in StridedBuffer(dataset, 2, 4):\n", 52 | " print(d)" 53 | ] 54 | } 55 | ], 56 | "metadata": { 57 | "kernelspec": { 58 | "display_name": "Python 3", 59 | "language": "python", 60 | "name": "python3" 61 | }, 62 | "language_info": { 63 | "codemirror_mode": { 64 | "name": "ipython", 65 | "version": 3 66 | }, 67 | "file_extension": ".py", 68 | "mimetype": "text/x-python", 69 | "name": "python", 70 | "nbconvert_exporter": "python", 71 | "pygments_lexer": "ipython3", 72 | "version": "3.7.6" 73 | } 74 | }, 75 | "nbformat": 4, 76 | "nbformat_minor": 4 77 | } 78 | -------------------------------------------------------------------------------- /Levenshtein.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "[link](https://martin-thoma.com/word-error-rate-calculation/)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 64, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from typing import Optional\n", 17 | "\n", 18 | "import torch" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 65, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "r = \"ab\"\n", 28 | "h = \"cc\"" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 66, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/plain": [ 39 | "False" 40 | ] 41 | }, 42 | "execution_count": 66, 43 | "metadata": {}, 44 | "output_type": "execute_result" 45 | } 46 | ], 47 | "source": [ 48 | "def levenshtein_distance_list(r, h):\n", 49 | "\n", 50 | " # initialisation\n", 51 | " d = [[0] * (len(h)+1)] * (len(r)+1) # FIXME\n", 52 | "\n", 53 | " # computation\n", 54 | " for i in range(1, len(r)+1):\n", 55 | " for j in range(1, len(h)+1):\n", 56 | "\n", 57 | " if r[i-1] == h[j-1]:\n", 58 | " d[i].append(d[i-1][j-1])\n", 59 | " else:\n", 60 | " substitution = d[i-1][j-1] + 1\n", 61 | " insertion = d[i][j-1] + 1\n", 62 | " deletion = d[i-1][j] + 1\n", 63 | " d[i].append(min(substitution, insertion, deletion))\n", 64 | "\n", 65 | " return d[len(r)][len(h)]\n", 66 | "\n", 67 | "\n", 68 | "levenshtein_distance_list(r, h) == 2" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 67, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "False" 80 | ] 81 | }, 82 | "execution_count": 67, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "def levenshtein_distance(r: str, h: str, device: Optional[str] = None):\n", 89 | "\n", 90 | " # initialisation\n", 91 | " d = torch.zeros((2, len(h)+1), dtype=torch.long) # , device=device)\n", 92 | " dold = 0\n", 93 | " dnew = 1\n", 94 | "\n", 95 | " # computation\n", 96 | " for i in range(1, len(r)+1):\n", 97 | " d[dnew, 0] = 0\n", 98 | " for j in range(1, len(h)+1):\n", 99 | "\n", 100 | " if r[i-1] == h[j-1]:\n", 101 | " d[dnew, j] = d[dnew-1, j-1]\n", 102 | " else:\n", 103 | " substitution = d[dnew-1, j-1] + 1\n", 104 | " insertion = d[dnew, j-1] + 1\n", 105 | " deletion = d[dnew-1, j] + 1\n", 106 | " d[dnew, j] = min(substitution, insertion, deletion)\n", 107 | "\n", 108 | " dnew, dold = dold, dnew\n", 109 | "\n", 110 | " dist = d[dnew, -1].item()\n", 111 | "\n", 112 | " return dist\n", 113 | "\n", 114 | "\n", 115 | "levenshtein_distance(r, h) == 2" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 68, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "False" 127 | ] 128 | }, 129 | "execution_count": 68, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "def levenshtein_distance_torch(r: str, h: str, device: Optional[str] = None):\n", 136 | "\n", 137 | " # initialisation\n", 138 | " d = torch.zeros((2, len(h)+1), dtype=torch.long) # , device=device) # FIXME\n", 139 | " dold = 0\n", 140 | " dnew = 1\n", 141 | "\n", 142 | " # computation\n", 143 | " for i in range(1, len(r)+1):\n", 144 | " d[dnew, 0] = 0\n", 145 | " for j in range(1, len(h)+1):\n", 146 | " # print(r[i-1], h[j-1])\n", 147 | " if r[i-1] == h[j-1]:\n", 148 | " d[dnew, j] = d[dnew-1, j-1]\n", 149 | " else:\n", 150 | " substitution = d[dnew-1, j-1] + 1\n", 151 | " insertion = d[dnew, j-1] + 1\n", 152 | " deletion = d[dnew-1, j] + 1\n", 153 | " d[dnew, j] = min(substitution, insertion, deletion)\n", 154 | "\n", 155 | " dnew, dold = dold, dnew\n", 156 | "\n", 157 | " dist = d[dnew, -1].item()\n", 158 | "\n", 159 | " return dist\n", 160 | "\n", 161 | "\n", 162 | "levenshtein_distance_torch(r, h) == 2" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 69, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "True" 174 | ] 175 | }, 176 | "execution_count": 69, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "def levenshtein_distance_list_2(r: str, h: str):\n", 183 | "\n", 184 | " # initialisation\n", 185 | " dold = list(range(len(h)+1))\n", 186 | " dnew = list(0 for _ in range(len(h)+1))\n", 187 | "\n", 188 | " # computation\n", 189 | " for i in range(1, len(r)+1):\n", 190 | " dnew[0] = i\n", 191 | " for j in range(1, len(h)+1):\n", 192 | " if r[i-1] == h[j-1]:\n", 193 | " dnew[j] = dold[j-1]\n", 194 | " else:\n", 195 | " substitution = dold[j-1] + 1\n", 196 | " insertion = dnew[j-1] + 1\n", 197 | " deletion = dold[j] + 1\n", 198 | " dnew[j] = min(substitution, insertion, deletion)\n", 199 | "\n", 200 | " dnew, dold = dold, dnew\n", 201 | "\n", 202 | " return dold[-1]\n", 203 | "\n", 204 | "\n", 205 | "levenshtein_distance_list_2(r, h) == 2" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 70, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "def levenshtein_distance_list_3(r, h):\n", 215 | "\n", 216 | " # initialisation\n", 217 | " d = [[0] * (len(h)+1)] * 2\n", 218 | "\n", 219 | " # computation\n", 220 | " for i in range(1, len(r)+1):\n", 221 | " for j in range(1, len(h)+1):\n", 222 | "\n", 223 | " if r[i-1 % 2] == h[j-1]:\n", 224 | " d[i].append(d[i-1 % 2][j-1])\n", 225 | " else:\n", 226 | " substitution = d[i-1 % 2][j-1] + 1\n", 227 | " insertion = d[i % 2][j-1] + 1\n", 228 | " deletion = d[i-1 % 2][j] + 1\n", 229 | " d[i].append(min(substitution, insertion, deletion))\n", 230 | "\n", 231 | " # print(d)\n", 232 | " return d[len(r)][len(h)]\n", 233 | "\n", 234 | "\n", 235 | "# levenshtein_distance_list_3(r, h) == 2" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 71, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "True" 247 | ] 248 | }, 249 | "execution_count": 71, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "def levenshtein_distance_numpy(r, h):\n", 256 | " # initialisation\n", 257 | " import numpy\n", 258 | "\n", 259 | " d = numpy.zeros((len(r) + 1) * (len(h) + 1), dtype=numpy.uint8)\n", 260 | " d = d.reshape((len(r) + 1, len(h) + 1))\n", 261 | " for i in range(len(r) + 1):\n", 262 | " for j in range(len(h) + 1):\n", 263 | " if i == 0:\n", 264 | " d[0][j] = j\n", 265 | " elif j == 0:\n", 266 | " d[i][0] = i\n", 267 | "\n", 268 | " # computation\n", 269 | " for i in range(1, len(r) + 1):\n", 270 | " for j in range(1, len(h) + 1):\n", 271 | " if r[i - 1] == h[j - 1]:\n", 272 | " d[i][j] = d[i - 1][j - 1]\n", 273 | " else:\n", 274 | " substitution = d[i - 1][j - 1] + 1\n", 275 | " insertion = d[i][j - 1] + 1\n", 276 | " deletion = d[i - 1][j] + 1\n", 277 | " d[i][j] = min(substitution, insertion, deletion)\n", 278 | "\n", 279 | " return d[len(r)][len(h)]\n", 280 | "\n", 281 | "\n", 282 | "levenshtein_distance_numpy(r, h) == 2" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 73, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "3.95 ms ± 69.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", 295 | "3.99 ms ± 80.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", 296 | "2.16 ms ± 90.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", 297 | "61.9 µs ± 1.91 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", 298 | "55.2 µs ± 13.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "r = \"abcdddee\"\n", 304 | "h = \"abcddde\"\n", 305 | "\n", 306 | "%timeit levenshtein_distance(r, h)\n", 307 | "%timeit levenshtein_distance_torch(r, h)\n", 308 | "jitted = torch.jit.script(levenshtein_distance)\n", 309 | "%timeit jitted(r, h)\n", 310 | "%timeit levenshtein_distance_list(r, h)\n", 311 | "%timeit levenshtein_distance_list_2(r, h)\n", 312 | "# %timeit levenshtein_distance_list_3(r, h)\n", 313 | "\n", 314 | "# jitted = torch.jit.script(levenshtein_distance_list)\n", 315 | "# %timeit jitted(r, h)\n", 316 | "\n", 317 | "# %timeit levenshtein_distance_array(r, h)\n", 318 | "# jitted = torch.jit.script(levenshtein_distance_array)\n", 319 | "# %timeit jitted(r, h)" 320 | ] 321 | } 322 | ], 323 | "metadata": { 324 | "kernelspec": { 325 | "display_name": "Python 3", 326 | "language": "python", 327 | "name": "python3" 328 | }, 329 | "language_info": { 330 | "codemirror_mode": { 331 | "name": "ipython", 332 | "version": 3 333 | }, 334 | "file_extension": ".py", 335 | "mimetype": "text/x-python", 336 | "name": "python", 337 | "nbconvert_exporter": "python", 338 | "pygments_lexer": "ipython3", 339 | "version": "3.7.6" 340 | } 341 | }, 342 | "nbformat": 4, 343 | "nbformat_minor": 4 344 | } 345 | -------------------------------------------------------------------------------- /voice_activity_detection_online_with_mic.py: -------------------------------------------------------------------------------- 1 | """ 2 | Following `a simple but efficient real-time voice activity detection 3 | algorithm `__. 4 | 5 | """ 6 | 7 | # %matplotlib notebook 8 | 9 | import math 10 | # import IPython.display as ipd 11 | # import matplotlib.pyplot as plt 12 | 13 | import pyaudio 14 | import numpy as np 15 | import librosa 16 | 17 | import torch 18 | import torchaudio 19 | 20 | from six.moves import queue 21 | from collections import deque 22 | 23 | # Voice sound 24 | dataset = torchaudio.datasets.YESNO("~/yesno") 25 | waveform = dataset[0][0] 26 | sample_rate = 8 * 10**3 27 | 28 | # Train sound 29 | # filename = "_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav" 30 | # waveform, sample_rate = torchaudio.load(filename) 31 | 32 | # Convert to mono 33 | waveform = waveform.mean(0) 34 | 35 | 36 | ###################################################################### 37 | # There are three criteria to decide if a frame contains speech: energy, 38 | # most dominant frequency, and spectral flatness. If any two of those are 39 | # higher than a minimum plus a threshold, then the frame contains speech. 40 | # In the offline case, the list of frames is postprocessed to remove too 41 | # short silence and speech sequences. In the online case here, inertia is 42 | # added before switching from speech to silence or vice versa. 43 | # 44 | 45 | def compute_spectral_flatness(frame): 46 | EPSILON = 0.01 47 | n = frame.nonzero().size(0) 48 | geometric_mean = torch.exp((EPSILON + frame).log().mean(-1)) - EPSILON 49 | arithmetic_mean = frame.mean(-1) 50 | return -10 * torch.log10(EPSILON + geometric_mean/arithmetic_mean) 51 | 52 | 53 | def compute_energy(frame): 54 | return frame.pow(2).sum(-1) 55 | 56 | 57 | class VoiceActivityDetection(object): 58 | 59 | def __init__(self): 60 | 61 | self.num_init_frames = 30 62 | self.ignore_silent_count = 10 63 | self.ignore_speech_count = 5 64 | 65 | self.energy_prim_thresh = 40 66 | self.frequency_prim_thresh = 5 67 | self.spectral_flatness_prim_thresh = 3 68 | 69 | self.ignore_silent_count = 4 70 | self.ignore_speech_count = 1 71 | 72 | self.speech_mark = True 73 | self.silence_mark = False 74 | 75 | self.silent_count = 0 76 | self.speech_count = 0 77 | self.n = 0 78 | 79 | self.energy_list = [] 80 | self.frequency_list = [] 81 | self.spectral_flatness_list = [] 82 | 83 | def iter(self, frame): 84 | 85 | EPSILON = 1. 86 | frame_fft = torch.rfft(frame, 1) 87 | amplitudes = torchaudio.functional.complex_norm(frame_fft) 88 | 89 | # Compute frame energy 90 | energy = compute_energy(frame) 91 | 92 | # Most dominant frequency component 93 | frequency = amplitudes.argmax() 94 | 95 | # Spectral flatness measure 96 | spectral_flatness = compute_spectral_flatness(amplitudes) 97 | 98 | self.energy_list.append(energy) 99 | self.frequency_list.append(frequency) 100 | self.spectral_flatness_list.append(spectral_flatness) 101 | 102 | if self.n == 0: 103 | self.min_energy = energy 104 | self.min_frequency = frequency 105 | self.min_spectral_flatness = spectral_flatness 106 | elif self.n < self.num_init_frames: 107 | self.min_energy = min(energy, self.min_energy) 108 | self.min_frequency = min(frequency, self.min_frequency) 109 | self.min_spectral_flatness = min(spectral_flatness, self.min_spectral_flatness) 110 | 111 | self.n +=1 112 | 113 | thresh_energy = self.energy_prim_thresh * torch.log(EPSILON + self.min_energy) 114 | thresh_frequency = self.frequency_prim_thresh 115 | thresh_spectral_flatness = self.spectral_flatness_prim_thresh 116 | 117 | # Check all three conditions 118 | 119 | counter = 0 120 | if energy - self.min_energy >= thresh_energy: 121 | counter += 1 122 | if frequency - self.min_frequency >= thresh_frequency: 123 | counter += 1 124 | if spectral_flatness - self.min_spectral_flatness >= thresh_spectral_flatness: 125 | counter += 1 126 | 127 | # Detection 128 | if counter > 1: 129 | # Speech detected 130 | self.speech_count += 1 131 | # Inertia against switching 132 | if self.n >= self.num_init_frames and self.speech_count <= self.ignore_speech_count: 133 | # Too soon to change 134 | return self.silence_mark 135 | else: 136 | self.silent_count = 0 137 | return self.speech_mark 138 | else: 139 | # Silence detected 140 | self.min_energy = ((self.silent_count * self.min_energy) + energy) / (self.silent_count + 1) 141 | self.silent_count += 1 142 | # Inertia against switching 143 | if self.n >= self.num_init_frames and self.silent_count <= self.ignore_silent_count: 144 | # Too soon to change 145 | return self.speech_mark 146 | else: 147 | self.speech_count = 0 148 | return self.silence_mark 149 | 150 | class MicrophoneStream(object): 151 | """Opens a recording stream as a generator yielding the audio chunks.""" 152 | def __init__(self, device=None, rate=22050, chunk=2205): 153 | """ 154 | The 22050 is the librosa default, which is what our models were 155 | trained on. The ratio of [chunk / rate] is the amount of time between 156 | audio samples - for example, with these defaults, 157 | an audio fragment will be processed every tenth of a second. 158 | """ 159 | self._rate = rate 160 | self._chunk = chunk 161 | self._device = device 162 | 163 | # Create a thread-safe buffer of audio data 164 | self._buff = queue.Queue() 165 | self.closed = True 166 | 167 | def __enter__(self): 168 | self._audio_interface = pyaudio.PyAudio() 169 | self._audio_stream = self._audio_interface.open( 170 | #format=pyaudio.paInt16, 171 | format=pyaudio.paFloat32, 172 | # The API currently only supports 1-channel (mono) audio 173 | # https://goo.gl/z757pE 174 | channels=1, rate=self._rate, 175 | input=True, frames_per_buffer=self._chunk, 176 | input_device_index=self._device, 177 | # Run the audio stream asynchronously to fill the buffer object. 178 | # This is necessary so that the input device's buffer doesn't 179 | # overflow while the calling thread makes network requests, etc. 180 | stream_callback=self._fill_buffer, 181 | ) 182 | 183 | self.closed = False 184 | 185 | return self 186 | 187 | def __exit__(self, type, value, traceback): 188 | self._audio_stream.stop_stream() 189 | self._audio_stream.close() 190 | self.closed = True 191 | # Signal the generator to terminate so that the client's 192 | # streaming_recognize method will not block the process termination. 193 | self._buff.put(None) 194 | self._audio_interface.terminate() 195 | 196 | def _fill_buffer(self, in_data, frame_count, time_info, status_flags): 197 | """Continuously collect data from the audio stream, into the buffer.""" 198 | self._buff.put(in_data) 199 | return None, pyaudio.paContinue 200 | 201 | def generator(self): 202 | while not self.closed: 203 | # Use a blocking get() to ensure there's at least one chunk of 204 | # data, and stop iteration if the chunk is None, indicating the 205 | # end of the audio stream. 206 | chunk = self._buff.get() 207 | if chunk is None: 208 | return 209 | data = [chunk] 210 | 211 | # Now consume whatever other data's still buffered. 212 | while True: 213 | try: 214 | chunk = self._buff.get(block=False) 215 | if chunk is None: 216 | return 217 | data.append(chunk) 218 | except queue.Empty: 219 | break 220 | 221 | ans = np.fromstring(b''.join(data), dtype=np.float32) 222 | # yield uniform-sized chunks 223 | ans = np.split(ans, np.shape(ans)[0] / self._chunk) 224 | # Resample the audio to 22050, librosa default 225 | for chunk in ans: 226 | yield librosa.core.resample(chunk, self._rate, 22050) 227 | 228 | # Iterate VAD 229 | 230 | vad = VoiceActivityDetection() 231 | speech_frames = [] 232 | chunks = [] 233 | 234 | # fig, ax = plt.subplots() 235 | m = .2 236 | # ax.set_ylim(-m,m) 237 | 238 | min_to_cumulate = 20 # 2 seconds, with defaults 239 | max_to_cumulate = 100 # 10 seconds with defaults 240 | precumulate = 5 241 | 242 | max_to_visualize = 100 243 | 244 | cumulated = [] 245 | precumulated = deque(maxlen=precumulate) 246 | # colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 247 | 248 | with MicrophoneStream() as stream: 249 | audio_generator = stream.generator() 250 | chunk_length = stream._chunk 251 | waveform = torch.zeros(max_to_visualize*chunk_length) 252 | speechform = torch.zeros(max_to_visualize*chunk_length) 253 | try: 254 | for chunk in audio_generator: 255 | 256 | # Is speech? 257 | 258 | chunk = torch.tensor(chunk) 259 | is_speech = vad.iter(chunk) 260 | 261 | # Cumulate speech 262 | 263 | if is_speech or cumulated: 264 | cumulated.append(chunk) 265 | else: 266 | precumulated.append(chunk) 267 | 268 | if (not is_speech and len(cumulated) >= min_to_cumulate) or (len(cumulated) > max_to_cumulate): 269 | z = torch.cat(list(precumulated) + cumulated, -1) 270 | print("RUN PYSPEECH") 271 | print(z) 272 | cumulated = [] 273 | precumulated = deque(maxlen=precumulate) 274 | 275 | continue 276 | # Plot 277 | 278 | waveform[:-chunk_length] = waveform[chunk_length:] 279 | waveform[-chunk_length:] = chunk 280 | speechform[:-chunk_length] = speechform[chunk_length:] 281 | speechform[-chunk_length:] = int(is_speech) 282 | 283 | if ax.lines: 284 | ax.lines[0].set_ydata(waveform) 285 | ax.lines[1].set_ydata(.95*m*speechform) 286 | ax.lines[2].set_ydata(-.95*m*speechform) 287 | else: 288 | ax.plot(waveform) 289 | ax.plot(.95*m*speechform, color=colors[1], linewidth=2) 290 | ax.plot(-.95*m*speechform, color=colors[1], linewidth=2) 291 | fig.canvas.draw() 292 | 293 | except KeyboardInterrupt: 294 | pass 295 | -------------------------------------------------------------------------------- /Viterbi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "[with pytorch](https://gist.github.com/PetrochukM/afaa3613a99a8e7213d2efdd02ae4762)\n", 8 | "\n", 9 | "[notebook](https://github.com/napsternxg/pytorch-practice/blob/master/Viterbi%20decoding%20and%20CRF.ipynb)\n", 10 | "\n", 11 | "[with different rings](https://www.audiolabs-erlangen.de/resources/MIR/FMP/C5/C5S3_Viterbi.html)\n", 12 | "\n", 13 | "[python only?](https://stackoverflow.com/questions/9729968/python-implementation-of-viterbi-algorithm)\n", 14 | "\n", 15 | "[numpy](http://www.adeveloperdiary.com/data-science/machine-learning/implement-viterbi-algorithm-in-hidden-markov-model-using-python-and-r/)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 174, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/plain": [ 26 | "" 27 | ] 28 | }, 29 | "execution_count": 174, 30 | "metadata": {}, 31 | "output_type": "execute_result" 32 | } 33 | ], 34 | "source": [ 35 | "import numpy as np\n", 36 | "import torch\n", 37 | "\n", 38 | "np.random.seed(2017)\n", 39 | "\n", 40 | "torch.manual_seed(2017)\n", 41 | "\n", 42 | "# from scipy.misc import logsumexp # Use it for reference checking implementation" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 175, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "Emissions:\n", 55 | "[[ 9. 6.]\n", 56 | " [13. 10.]\n", 57 | " [ 8. 18.]\n", 58 | " [ 3. 15.]]\n", 59 | "Transitions:\n", 60 | "[[7. 8.]\n", 61 | " [0. 8.]]\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "seq_length, num_states = 4, 2\n", 67 | "emissions = np.random.randint(20, size=(seq_length, num_states))*1.\n", 68 | "transitions = np.random.randint(10, size=(num_states, num_states))*1.\n", 69 | "\n", 70 | "print(\"Emissions:\", emissions, sep=\"\\n\")\n", 71 | "print(\"Transitions:\", transitions, sep=\"\\n\")" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 176, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "((4, 2), (2, 2))" 83 | ] 84 | }, 85 | "execution_count": 176, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "emissions.shape, transitions.shape" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 177, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "(78.0, [0, 0, 1, 1])" 103 | ] 104 | }, 105 | "execution_count": 177, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "def viterbi_decoding_numpy(emissions, transitions):\n", 112 | " # Use help from: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/crf/python/ops/crf.py\n", 113 | " scores = np.zeros_like(emissions)\n", 114 | " back_pointers = np.zeros_like(emissions, dtype=\"int\")\n", 115 | " scores = emissions[0]\n", 116 | "\n", 117 | " # Generate most likely scores and paths for each step in sequence\n", 118 | " for i in range(1, emissions.shape[0]):\n", 119 | " score_with_transition = np.expand_dims(scores, 1) + transitions\n", 120 | " scores = emissions[i] + score_with_transition.max(axis=0)\n", 121 | " back_pointers[i] = np.argmax(score_with_transition, 0)\n", 122 | " \n", 123 | "\n", 124 | " # Generate the most likely path\n", 125 | " viterbi = [np.argmax(scores)]\n", 126 | " for bp in reversed(back_pointers[1:]):\n", 127 | " viterbi.append(bp[viterbi[-1]])\n", 128 | " viterbi.reverse()\n", 129 | " viterbi_score = np.max(scores)\n", 130 | "\n", 131 | " return viterbi_score, viterbi\n", 132 | "\n", 133 | "\n", 134 | "viterbi_decoding_numpy(emissions, transitions)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 178, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "(78.0, [0, 0, 1, 1])" 146 | ] 147 | }, 148 | "execution_count": 178, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "def zeros_like(m):\n", 155 | " return zeros(len(m), len(m[0]))\n", 156 | "\n", 157 | "\n", 158 | "def zeros(d1, d2):\n", 159 | " return list(list(0 for _ in range(d2)) for _ in range(d1))\n", 160 | "\n", 161 | "\n", 162 | "def apply_transpose(f, m):\n", 163 | " return list(map(f, zip(*m)))\n", 164 | "\n", 165 | "\n", 166 | "def argmax(l):\n", 167 | " return max(range(len(l)), key=lambda i: l[i])\n", 168 | "\n", 169 | "\n", 170 | "def add1d2d(m1, m2):\n", 171 | " return [[v2 + v1 for v2 in m2_row] for m2_row, v1 in zip(m2, m1)]\n", 172 | "\n", 173 | "\n", 174 | "def add1d1d(v1, v2):\n", 175 | " return [e + s for e, s in zip(v1, v2)]\n", 176 | "\n", 177 | "\n", 178 | "def viterbi_decoding_list(emissions, transitions):\n", 179 | " scores = zeros_like(emissions)\n", 180 | " back_pointers = zeros_like(emissions)\n", 181 | " scores = emissions[0]\n", 182 | "\n", 183 | " # Generate most likely scores and paths for each step in sequence\n", 184 | " for i in range(1, len(emissions)):\n", 185 | " score_with_transition = add1d2d(scores, transitions)\n", 186 | " max_score_with_transition = apply_transpose(max, score_with_transition)\n", 187 | " scores = add1d1d(emissions[i], max_score_with_transition)\n", 188 | " back_pointers[i] = apply_transpose(argmax, score_with_transition)\n", 189 | "\n", 190 | " # Generate the most likely path\n", 191 | " viterbi = [argmax(scores)]\n", 192 | " for bp in reversed(back_pointers[1:]):\n", 193 | " viterbi.append(bp[viterbi[-1]])\n", 194 | " viterbi.reverse()\n", 195 | " viterbi_score = max(scores)\n", 196 | "\n", 197 | " return viterbi_score, viterbi\n", 198 | "\n", 199 | "\n", 200 | "emissions_list = emissions.tolist()\n", 201 | "transitions_list = transitions.tolist()\n", 202 | "\n", 203 | "viterbi_decoding_list(emissions_list, transitions_list)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 179, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "123 µs ± 143 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", 216 | "36 µs ± 138 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "%timeit viterbi_decoding_numpy(emissions, transitions)\n", 222 | "%timeit viterbi_decoding_list(emissions_list, transitions_list)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 180, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "words = [\"hello\", \"world\", \"how\", \"are\", \"you\"]" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 181, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "data": { 241 | "text/plain": [ 242 | "[[7, 4, 11, 11, 14],\n", 243 | " [22, 14, 17, 11, 3],\n", 244 | " [7, 14, 22],\n", 245 | " [0, 17, 4],\n", 246 | " [24, 14, 20],\n", 247 | " [4, 4, 4, 4, 4]]" 248 | ] 249 | }, 250 | "execution_count": 181, 251 | "metadata": {}, 252 | "output_type": "execute_result" 253 | } 254 | ], 255 | "source": [ 256 | "words = [\"hello\", \"world\", \"how\", \"are\", \"you\", \"eeeee\"]\n", 257 | "lm = \"abcdefghijklmnopqrstuvwxyz\"\n", 258 | "tokens = [[lm.find(w) for w in word] for word in words]\n", 259 | "tokens" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 182, 265 | "metadata": { 266 | "scrolled": true 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "from collections import Counter\n", 271 | "\n", 272 | "\n", 273 | "def build_transitions(data_loader, n=2):\n", 274 | "\n", 275 | " # Count n-grams\n", 276 | " count = Counter()\n", 277 | " for label in data_loader:\n", 278 | " count += Counter(a for a in zip(*(label[i:] for i in range(n))))\n", 279 | " \n", 280 | " # Write as matrix \n", 281 | " transitions = zeros(len(lm), len(lm)) \n", 282 | " for (k1, k2), v in count.items():\n", 283 | " transitions[k1][k2] = v\n", 284 | "\n", 285 | " return transitions\n", 286 | "\n", 287 | "\n", 288 | "transitions = build_transitions(tokens)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 183, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": [ 299 | "tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", 300 | " 0, 0],\n", 301 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 302 | " 0, 0],\n", 303 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 304 | " 0, 0],\n", 305 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 306 | " 0, 0],\n", 307 | " [0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 308 | " 0, 0],\n", 309 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 310 | " 0, 0],\n", 311 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 312 | " 0, 0],\n", 313 | " [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 314 | " 0, 0],\n", 315 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 316 | " 0, 0],\n", 317 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 318 | " 0, 0],\n", 319 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 320 | " 0, 0],\n", 321 | " [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 322 | " 0, 0],\n", 323 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 324 | " 0, 0],\n", 325 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 326 | " 0, 0],\n", 327 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,\n", 328 | " 0, 0],\n", 329 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 330 | " 0, 0],\n", 331 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 332 | " 0, 0],\n", 333 | " [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 334 | " 0, 0],\n", 335 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 336 | " 0, 0],\n", 337 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 338 | " 0, 0],\n", 339 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 340 | " 0, 0],\n", 341 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 342 | " 0, 0],\n", 343 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 344 | " 0, 0],\n", 345 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 346 | " 0, 0],\n", 347 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 348 | " 0, 0],\n", 349 | " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 350 | " 0, 0]])" 351 | ] 352 | }, 353 | "execution_count": 183, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [ 359 | "torch.tensor(transitions)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 184, 365 | "metadata": {}, 366 | "outputs": [ 367 | { 368 | "name": "stdout", 369 | "output_type": "stream", 370 | "text": [ 371 | "115 µs ± 338 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 372 | ] 373 | } 374 | ], 375 | "source": [ 376 | "%timeit build_transitions(tokens)" 377 | ] 378 | } 379 | ], 380 | "metadata": { 381 | "kernelspec": { 382 | "display_name": "Python 3", 383 | "language": "python", 384 | "name": "python3" 385 | }, 386 | "language_info": { 387 | "codemirror_mode": { 388 | "name": "ipython", 389 | "version": 3 390 | }, 391 | "file_extension": ".py", 392 | "mimetype": "text/x-python", 393 | "name": "python", 394 | "nbconvert_exporter": "python", 395 | "pygments_lexer": "ipython3", 396 | "version": "3.7.6" 397 | } 398 | }, 399 | "nbformat": 4, 400 | "nbformat_minor": 4 401 | } 402 | -------------------------------------------------------------------------------- /datasets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Other examples of dataset:\n", 8 | "* [torchvision](https://github.com/pytorch/vision/blob/master/torchvision/datasets/mnist.py) and [here](https://github.com/pytorch/vision/blob/master/torchvision/datasets/utils.py)\n", 9 | "* generator for [tarballs](https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractfile) and [zip](https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile.open)\n", 10 | "* [AsrDataset](https://github.com/pytorch/fairseq/blob/4812f64b651ab64881510d38d4e35ce4ce22b04f/examples/speech_recognition/data/asr_dataset.py#L14)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 49, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import torch\n", 20 | "import torchvision \n", 21 | "import torchaudio\n", 22 | "\n", 23 | "import os\n", 24 | "import random\n", 25 | "from functools import reduce, partial\n", 26 | "from warnings import warn\n", 27 | "import pickle\n", 28 | "\n", 29 | "import six\n", 30 | "import csv\n", 31 | "import os\n", 32 | "import tarfile\n", 33 | "import logging\n", 34 | "import re\n", 35 | "import sys\n", 36 | "import zipfile" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 50, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "def get_data(URL):\n", 46 | " r = requests.get(URL)\n", 47 | " file_like_object = io.BytesIO(r.content)\n", 48 | " tar = tarfile.open(fileobj=file_like_object)\n", 49 | " d = {}\n", 50 | " for member in tar.getmembers():\n", 51 | " if member.isfile() and member.name.endswith('csv'):\n", 52 | " k = 'train' if 'train' in member.name else 'test'\n", 53 | " d[k] = tar.extractfile(member)\n", 54 | " return d" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 51, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "def unicode_csv_reader(unicode_csv_data, **kwargs):\n", 64 | " r\"\"\"Since the standard csv library does not handle unicode in Python 2, we need a wrapper.\n", 65 | " Borrowed and slightly modified from the Python docs:\n", 66 | " https://docs.python.org/2/library/csv.html#csv-examples\n", 67 | " Arguments:\n", 68 | " unicode_csv_data: unicode csv data (see example below)\n", 69 | " Examples:\n", 70 | " >>> from torchtext.utils import unicode_csv_reader\n", 71 | " >>> import io\n", 72 | " >>> with io.open(data_path, encoding=\"utf8\") as f:\n", 73 | " >>> reader = unicode_csv_reader(f)\n", 74 | " \"\"\"\n", 75 | "\n", 76 | " # Fix field larger than field limit error\n", 77 | " maxInt = sys.maxsize\n", 78 | " while True:\n", 79 | " # decrease the maxInt value by factor 10\n", 80 | " # as long as the OverflowError occurs.\n", 81 | " try:\n", 82 | " csv.field_size_limit(maxInt)\n", 83 | " break\n", 84 | " except OverflowError:\n", 85 | " maxInt = int(maxInt / 10)\n", 86 | " csv.field_size_limit(maxInt)\n", 87 | "\n", 88 | " if six.PY2:\n", 89 | " # csv.py doesn't do Unicode; encode temporarily as UTF-8:\n", 90 | " csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs)\n", 91 | " for row in csv_reader:\n", 92 | " # decode UTF-8 back to Unicode, cell by cell:\n", 93 | " yield [cell.decode('utf-8') for cell in row]\n", 94 | " else:\n", 95 | " for line in csv.reader(unicode_csv_data, **kwargs):\n", 96 | " yield line" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "suggestions:\n", 104 | "* small functional\n", 105 | "* get length\n", 106 | "* shuffle\n", 107 | "* meaningful error on function mismatch\n", 108 | "* ~~cache or buffer~~\n", 109 | "* ~~generator~~\n", 110 | "* stream files from disk or web\n", 111 | "* stream archives\n", 112 | "* ~~no compose function~~\n", 113 | "* ~~currie instead of partial?~~" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "# Common tools" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 52, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "class Cache:\n", 130 | " \"\"\"\n", 131 | " Wrap a generator so that, whenever a new item is returned, it is saved to disk in a pickle.\n", 132 | " \"\"\"\n", 133 | "\n", 134 | " def __init__(self, generator, location):\n", 135 | " self.generator = generator\n", 136 | " self.location = location\n", 137 | "\n", 138 | " self._id = id(self)\n", 139 | " self._cache = []\n", 140 | " self._internal_index = 0\n", 141 | "\n", 142 | " def __iter__(self):\n", 143 | " self._internal_index = 0\n", 144 | " return self\n", 145 | "\n", 146 | " def __next__(self):\n", 147 | " if self._internal_index < len(self):\n", 148 | " item = self[self._internal_index]\n", 149 | " else:\n", 150 | " item = next(self.generator)\n", 151 | " \n", 152 | " file = str(self._id) + \"-\" + str(len(self))\n", 153 | " file = os.path.join(self.location, file)\n", 154 | " self._cache.append(file)\n", 155 | " \n", 156 | " os.makedirs(self.location, exist_ok=True)\n", 157 | " with open(file, 'wb') as file:\n", 158 | " pickle.dump(item, file)\n", 159 | "\n", 160 | " self._internal_index += 1\n", 161 | " return item\n", 162 | " \n", 163 | " def __getitem__(self, index):\n", 164 | " file = self._cache[index]\n", 165 | " with open(file, 'rb') as file:\n", 166 | " item = pickle.load(file)\n", 167 | " return item\n", 168 | " \n", 169 | " def __len__(self):\n", 170 | " # Return length of cache\n", 171 | " return len(self._cache)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 53, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "class Buffer:\n", 181 | " \"\"\"\n", 182 | " Wrap a generator so as to keep the last few in memory.\n", 183 | " \"\"\"\n", 184 | " \n", 185 | " def __init__(self, generator, capacity=10):\n", 186 | " self.generator = generator\n", 187 | " self.capacity = capacity\n", 188 | " self._cache = []\n", 189 | " self._fill()\n", 190 | " \n", 191 | " def _fill(self):\n", 192 | " while len(self._cache) <= self.capacity:\n", 193 | " self._cache.append(next(self.generator))\n", 194 | " \n", 195 | " def __getitem__(self, n):\n", 196 | " self._fill()\n", 197 | " return self._cache[n]\n", 198 | " \n", 199 | " def __iter__(self):\n", 200 | " return self\n", 201 | " \n", 202 | " def __next__(self):\n", 203 | " item = self._cache.pop(0)\n", 204 | " self._fill()\n", 205 | " return item" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 54, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "def download_to_file(urls, root_path):\n", 215 | " \"\"\"\n", 216 | " Download each url to root_path.\n", 217 | " \n", 218 | " Input: url generator, folder inside archive\n", 219 | " Output: downloaded archive, folder inside archive\n", 220 | " \"\"\"\n", 221 | " for url, folder in urls:\n", 222 | " # torchvision.datasets.utils.download_url(url, root_path)\n", 223 | " file = os.path.join(root_path, os.path.basename(url))\n", 224 | " yield file, folder\n", 225 | " \n", 226 | " \n", 227 | "def extract(files):\n", 228 | " \"\"\"\n", 229 | " Extract each archive to their respective folder.\n", 230 | " \n", 231 | " Input: (url, folder name inside archive) generator\n", 232 | " Output: path to inside archive\n", 233 | " \"\"\"\n", 234 | " for file, folder in files:\n", 235 | " # torchvision.datasets.utils.extract_archive(file)\n", 236 | " path = os.path.dirname(file)\n", 237 | " path = os.path.join(path, folder)\n", 238 | " yield path\n", 239 | " \n", 240 | " \n", 241 | "def walk(paths, extension):\n", 242 | " \"\"\"\n", 243 | " Walk inside a path recursively to find all files with given extension.\n", 244 | " \n", 245 | " Input: path\n", 246 | " Output: path, file name identifying a row of data\n", 247 | " \"\"\"\n", 248 | " for path in paths:\n", 249 | " for dp, dn, fn in os.walk(path):\n", 250 | " for f in fn:\n", 251 | " if extension in f:\n", 252 | " yield path, f\n", 253 | "\n", 254 | " \n", 255 | "def shuffle(generator):\n", 256 | " \"\"\"\n", 257 | " Shuffle the order of a generator.\n", 258 | " \n", 259 | " Input: generator\n", 260 | " Output: generator\n", 261 | " \"\"\"\n", 262 | "\n", 263 | " # Load whole generator in memory\n", 264 | " generator = list(generator)\n", 265 | " # print(len(generator))\n", 266 | " random.shuffle(generator)\n", 267 | " for g in generator:\n", 268 | " yield g\n", 269 | "\n", 270 | " \n", 271 | "def filtering(fileids, reference):\n", 272 | " \"\"\"\n", 273 | " Skip fileids that are not present in given reference file.\n", 274 | " \n", 275 | " Output: (path, file) generator, reference file\n", 276 | " Output: path, file\n", 277 | " \"\"\"\n", 278 | " \n", 279 | " path_old = \"\"\n", 280 | " \n", 281 | " for path, fileid in fileids:\n", 282 | " \n", 283 | " # Check if same path to avoid reloading the file constantly\n", 284 | " if path != path_old:\n", 285 | " ref = os.path.join(path, reference)\n", 286 | " with open(ref) as ref:\n", 287 | " r = \"\".join(ref.readlines())\n", 288 | " path_old = path\n", 289 | "\n", 290 | " # It would be more efficient to loop through the reference file instead\n", 291 | " if fileid in r:\n", 292 | " yield path, fileid" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "# YesNo\n", 300 | "\n", 301 | "[original](https://www.openslr.org/1/), [torchaudio](https://pytorch.org/audio/_modules/torchaudio/datasets/yesno.html)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 55, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "{'label': ['0', '1', '1', '1', '1', '1', '1', '1'],\n", 313 | " 'waveform': tensor([[3.0518e-05, 6.1035e-05, 3.0518e-05, ..., 2.7466e-03, 1.8005e-03,\n", 314 | " 2.2888e-03]]),\n", 315 | " 'sample_rate': 8000}" 316 | ] 317 | }, 318 | "execution_count": 55, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "def load_yesno(fileids):\n", 325 | " \"\"\"\n", 326 | " Load data corresponding to each YESNO fileids.\n", 327 | " \n", 328 | " Input: path, file name identifying a row of data\n", 329 | " Output: label, waveform, sample_rate\n", 330 | " \"\"\"\n", 331 | " \n", 332 | " extension = \".wav\"\n", 333 | " for path, fileid in fileids:\n", 334 | " file = os.path.join(path, fileid)\n", 335 | " waveform, sample_rate = torchaudio.load(file)\n", 336 | " label = os.path.basename(fileid).split(\".\")[0].split(\"_\")\n", 337 | " \n", 338 | " yield {\n", 339 | " \"label\": label,\n", 340 | " \"waveform\": waveform,\n", 341 | " \"sample_rate\": sample_rate,\n", 342 | " }\n", 343 | " \n", 344 | "\n", 345 | "def YESNO(root):\n", 346 | " \"\"\"\n", 347 | " Cache a pipeline loading YESNO.\n", 348 | " \"\"\"\n", 349 | " \n", 350 | " url = [\n", 351 | " (\"http://www.openslr.org/resources/1/waves_yesno.tar.gz\", \"waves_yesno\")\n", 352 | " ]\n", 353 | " \n", 354 | " path = download(url, root_path=root)\n", 355 | " path = extract(path)\n", 356 | " path = walk(path, extension=\".wav\")\n", 357 | " path = shuffle(path)\n", 358 | " data = load_yesno(path)\n", 359 | " \n", 360 | " # return Buffer(data)\n", 361 | " # return Cache(data, \"tmp/\")\n", 362 | " return data\n", 363 | "\n", 364 | "\n", 365 | "data = YESNO(\"/Users/vincentqb/yesnotest\")\n", 366 | "\n", 367 | "next(data)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 56, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "data": { 377 | "text/plain": [ 378 | "{'label': ['1', '1', '1', '0', '1', '0', '1', '0'],\n", 379 | " 'waveform': tensor([[ 0.0016, 0.0017, 0.0016, ..., -0.0016, -0.0010, -0.0002]]),\n", 380 | " 'sample_rate': 8000}" 381 | ] 382 | }, 383 | "execution_count": 56, 384 | "metadata": {}, 385 | "output_type": "execute_result" 386 | } 387 | ], 388 | "source": [ 389 | "next(data)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "# VCTK\n", 397 | "\n", 398 | "[original](https://datashare.is.ed.ac.uk/handle/10283/2651), [torchaudio](https://pytorch.org/audio/datasets.html?highlight=dataset#torchaudio.datasets.VCTK)" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 57, 404 | "metadata": {}, 405 | "outputs": [ 406 | { 407 | "data": { 408 | "text/plain": [ 409 | "{'id': 'p231_181',\n", 410 | " 'content': 'I am not ready to walk away.\\n',\n", 411 | " 'waveform': tensor([[-0.0117, -0.0173, -0.0150, ..., 0.0106, 0.0099, 0.0113]]),\n", 412 | " 'sample_rate': 48000}" 413 | ] 414 | }, 415 | "execution_count": 57, 416 | "metadata": {}, 417 | "output_type": "execute_result" 418 | } 419 | ], 420 | "source": [ 421 | "def load_vctk(fileids):\n", 422 | " \"\"\"\n", 423 | " Load data corresponding to each VCTK fileids.\n", 424 | "\n", 425 | " Input: path, file name identifying a row of data\n", 426 | " Output: id, content, waveform, sample_rate\n", 427 | " \"\"\"\n", 428 | " \n", 429 | " txt_folder = \"txt\"\n", 430 | " txt_extension = \".txt\"\n", 431 | " \n", 432 | " audio_folder = \"wav48\"\n", 433 | " audio_extension = \".wav\"\n", 434 | " \n", 435 | " for path, fileid in fileids:\n", 436 | " \n", 437 | " fileid = os.path.basename(fileid).split(\".\")[0]\n", 438 | " folder = fileid.split(\"_\")[0]\n", 439 | " txt_file = os.path.join(path, txt_folder, folder, fileid + txt_extension) \n", 440 | " audio_file = os.path.join(path, audio_folder, folder, fileid + audio_extension) \n", 441 | " \n", 442 | " try:\n", 443 | " with open(txt_file) as txt_file:\n", 444 | " content = txt_file.readlines()[0]\n", 445 | " except FileNotFoundError:\n", 446 | " warn(\"Translation not found for {}\".format(audio_file))\n", 447 | " # warn(\"File not found: {}\".format(txt_file))\n", 448 | " continue\n", 449 | "\n", 450 | " waveform, sample_rate = torchaudio.load(audio_file)\n", 451 | " \n", 452 | " yield {\n", 453 | " \"id\": fileid,\n", 454 | " \"content\": content,\n", 455 | " \"waveform\": waveform,\n", 456 | " \"sample_rate\": sample_rate,\n", 457 | " }\n", 458 | " \n", 459 | " \n", 460 | "def VCTK(root):\n", 461 | " \"\"\"\n", 462 | " Cache a pipeline loading VCTK.\n", 463 | " \"\"\"\n", 464 | " \n", 465 | " url = [\n", 466 | " ('http://homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz', \"VCTK-Corpus/\")\n", 467 | " ]\n", 468 | " \n", 469 | " path = download(url, root_path=root)\n", 470 | " path = extract(path)\n", 471 | " path = walk(path, extension=\".wav\")\n", 472 | " path = shuffle(path)\n", 473 | " data = load_vctk(path)\n", 474 | " \n", 475 | " # return Cache(data, \"tmp/\")\n", 476 | " return data\n", 477 | "\n", 478 | "\n", 479 | "data = VCTK(\"/Users/vincentqb/vctktest/\")\n", 480 | "\n", 481 | "next(data)" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": {}, 487 | "source": [ 488 | "# LibriSpeech\n", 489 | "\n", 490 | "[original](http://www.openslr.org/12)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 58, 496 | "metadata": {}, 497 | "outputs": [ 498 | { 499 | "data": { 500 | "text/plain": [ 501 | "{'id': '7850-73752-0015',\n", 502 | " 'content': 'WAS IT NOT ALL A DREAM OF HIS OWN CREATION WHILE HIS EYE HAD BEEN FIXED IN ABSTRACTION ON THAT BRIGHT AND FLOWING RIVER',\n", 503 | " 'waveform': tensor([[-0.0017, -0.0019, -0.0016, ..., 0.0017, 0.0018, 0.0015]]),\n", 504 | " 'sample_rate': 16000}" 505 | ] 506 | }, 507 | "execution_count": 58, 508 | "metadata": {}, 509 | "output_type": "execute_result" 510 | } 511 | ], 512 | "source": [ 513 | "def load_librispeech(fileids):\n", 514 | " \"\"\"\n", 515 | " Load data corresponding to each LIBRISPEECH fileids.\n", 516 | " \n", 517 | " Input: path, file name identifying a row of data\n", 518 | " Output: id, waveform, sample_rate, translation\n", 519 | " \"\"\"\n", 520 | " \n", 521 | " text_extension = \".trans.txt\"\n", 522 | " audio_extension = \".flac\"\n", 523 | " for data_path, fileid in fileids:\n", 524 | " fileid = os.path.basename(fileid).split(\".\")[0]\n", 525 | " folder1, folder2, file = fileid.split(\"-\")\n", 526 | " file_text = folder1 + \"-\" + folder2 + text_extension\n", 527 | " file_text = os.path.join(data_path, folder1, folder2, file_text)\n", 528 | " file_audio = folder1 + \"-\"+ folder2 + \"-\" + file + audio_extension\n", 529 | " file_audio = os.path.join(data_path, folder1, folder2, file_audio)\n", 530 | " waveform, sample_rate = torchaudio.load(file_audio)\n", 531 | " \n", 532 | " found = False\n", 533 | " for line in open(file_text):\n", 534 | " fileid_text, content = line.strip().split(\" \", 1)\n", 535 | " if fileid == fileid_text:\n", 536 | " found = True\n", 537 | " break\n", 538 | " if not found:\n", 539 | " from warnings import warn\n", 540 | " warn(\"Translation not found for {}.\".format(fileid))\n", 541 | " continue\n", 542 | "\n", 543 | " yield {\n", 544 | " \"id\": fileid,\n", 545 | " \"content\": content,\n", 546 | " \"waveform\": waveform,\n", 547 | " \"sample_rate\": sample_rate,\n", 548 | " }\n", 549 | " \n", 550 | "\n", 551 | "def LIBRISPEECH(root, selection=\"dev-clean\"):\n", 552 | " \"\"\"\n", 553 | " Cache a pipeline loading LIBRISPEECH.\n", 554 | " \"\"\"\n", 555 | " \n", 556 | " # http://www.openslr.org/resources/12/dev-clean.tar.gz\n", 557 | " # http://www.openslr.org/resources/12/test-clean.tar.gz\n", 558 | " # http://www.openslr.org/resources/12/test-other.tar.gz\n", 559 | " # http://www.openslr.org/resources/12/train-clean-100.tar.gz\n", 560 | " # http://www.openslr.org/resources/12/train-clean-360.tar.gz\n", 561 | " # http://www.openslr.org/resources/12/train-other-500.tar.gz\n", 562 | "\n", 563 | " selections = [\n", 564 | " \"dev-clean\",\n", 565 | " \"test-clean\",\n", 566 | " \"test-other\",\n", 567 | " \"train-clean-100\",\n", 568 | " \"train-clean-360\",\n", 569 | " \"train-other-500\"\n", 570 | " ]\n", 571 | " \n", 572 | " base = \"http://www.openslr.org/resources/12/\"\n", 573 | " url = [\n", 574 | " (os.path.join(base, selection + \".tar.gz\"), os.path.join(\"LibriSpeech\", selection))\n", 575 | " ]\n", 576 | " \n", 577 | " path = download(url, root_path=root)\n", 578 | " path = extract(path)\n", 579 | " path = walk(path, extension=\".flac\")\n", 580 | " path = shuffle(path)\n", 581 | " data = load_librispeech(path)\n", 582 | " \n", 583 | " # return Cache(data, \"tmp/\")\n", 584 | " return data\n", 585 | "\n", 586 | "\n", 587 | "data = LIBRISPEECH(\"/Users/vincentqb/librispeechtest/\")\n", 588 | "\n", 589 | "next(data)" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "# CommonVoice\n", 597 | "\n", 598 | "[original](https://voice.mozilla.org/en/datasets)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 61, 604 | "metadata": {}, 605 | "outputs": [ 606 | { 607 | "data": { 608 | "text/plain": [ 609 | "{'client_id': '11d5e99f7bd5b4f8492a06bb1ec22aa9110bba6ea9918f2a9adec05d686304d568ab7063daf8915d3fccfb4dd44b81646bd13a33ca130ac4014560bba4c2db0b',\n", 610 | " 'path': 'common_voice_tt_17531596.mp3',\n", 611 | " 'sentence': 'Мин анда ялгыз бара алмам бит.',\n", 612 | " 'up_votes': '2',\n", 613 | " 'down_votes': '0',\n", 614 | " 'age': 'thirties',\n", 615 | " 'gender': 'male',\n", 616 | " 'accent': '',\n", 617 | " 'waveform': tensor([[ 0.0000e+00, 0.0000e+00, 0.0000e+00, ..., -2.8685e-07,\n", 618 | " -2.3097e-06, -2.8796e-06]]),\n", 619 | " 'sample_rate': 48000}" 620 | ] 621 | }, 622 | "execution_count": 61, 623 | "metadata": {}, 624 | "output_type": "execute_result" 625 | } 626 | ], 627 | "source": [ 628 | "def load_commonvoice(fileids, tsv_file):\n", 629 | " \"\"\"\n", 630 | " Load data corresponding to each COMMONVOICE fileids.\n", 631 | " \n", 632 | " Input: path, file name identifying a row of data\n", 633 | " Output: client_id, path, sentence, up_votes, down_votes, age, gender, accent, waveform, sample_rate\n", 634 | " \"\"\"\n", 635 | " \n", 636 | " for path, fileid in fileids:\n", 637 | " filename = os.path.join(path, \"clips\", fileid)\n", 638 | " tsv = os.path.join(path, tsv_file)\n", 639 | "\n", 640 | " found = False\n", 641 | " with open(tsv) as tsv:\n", 642 | " first_line = True\n", 643 | " for line in unicode_csv_reader(tsv, delimiter='\\t'):\n", 644 | " if first_line:\n", 645 | " header = line\n", 646 | " first_line = False\n", 647 | " continue\n", 648 | " if fileid in line:\n", 649 | " found = True\n", 650 | " break\n", 651 | " if not found:\n", 652 | " continue\n", 653 | "\n", 654 | " waveform, sample_rate = torchaudio.load(filename) \n", 655 | "\n", 656 | " dic = dict(zip(header, line))\n", 657 | " dic[\"waveform\"] = waveform\n", 658 | " dic[\"sample_rate\"] = sample_rate\n", 659 | "\n", 660 | " yield dic\n", 661 | "\n", 662 | "\n", 663 | "def COMMONVOICE(root, language=\"tatar\", tsv=\"train.tsv\"):\n", 664 | " \"\"\"\n", 665 | " Cache a pipeline loading COMMONVOICE.\n", 666 | " \"\"\"\n", 667 | " \n", 668 | " web = \"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-3/\"\n", 669 | "\n", 670 | " languages = {\n", 671 | " \"tatar\": \"tt\",\n", 672 | " \"english\": \"en\",\n", 673 | " \"german\": \"de\",\n", 674 | " \"french\": \"fr\",\n", 675 | " \"welsh\": \"cy\",\n", 676 | " \"breton\": \"br\",\n", 677 | " \"chuvash\": \"cv\",\n", 678 | " \"turkish\": \"tr\",\n", 679 | " \"kyrgyz\": \"ky\",\n", 680 | " \"irish\": \"ga-IE\",\n", 681 | " \"kabyle\": \"kab\",\n", 682 | " \"catalan\": \"ca\",\n", 683 | " \"taiwanese\": \"zh-TW\",\n", 684 | " \"slovenian\": \"sl\",\n", 685 | " \"italian\": \"it\",\n", 686 | " \"dutch\": \"nl\",\n", 687 | " \"hakha chin\": \"cnh\",\n", 688 | " \"esperanto\": \"eo\",\n", 689 | " \"estonian\": \"et\",\n", 690 | " \"persian\": \"fa\",\n", 691 | " \"basque\": \"eu\",\n", 692 | " \"spanish\": \"es\",\n", 693 | " \"chinese\": \"zh-CN\",\n", 694 | " \"mongolian\": \"mn\",\n", 695 | " \"sakha\": \"sah\",\n", 696 | " \"dhivehi\": \"dv\",\n", 697 | " \"kinyarwanda\": \"rw\",\n", 698 | " \"swedish\": \"sv-SE\",\n", 699 | " \"russian\": \"ru\",\n", 700 | " }\n", 701 | "\n", 702 | " url = web + languages[language] + \".tar.gz\"\n", 703 | " url = [(url, \"\")]\n", 704 | " \n", 705 | " path = download(url, root_path=root)\n", 706 | " path = extract(path)\n", 707 | " path = walk(path, extension=\".mp3\")\n", 708 | " # path = shuffle(path)\n", 709 | " # path = filtering(path, reference=tsv)\n", 710 | " data = load_commonvoice(path, tsv)\n", 711 | " \n", 712 | " # return Cache(data, \"tmp/\")\n", 713 | " return data\n", 714 | "\n", 715 | "\n", 716 | "data = COMMONVOICE(\"/Users/vincentqb/commonvoicetest/\")\n", 717 | "\n", 718 | "next(data)" 719 | ] 720 | } 721 | ], 722 | "metadata": { 723 | "kernelspec": { 724 | "display_name": "Python 3", 725 | "language": "python", 726 | "name": "python3" 727 | }, 728 | "language_info": { 729 | "codemirror_mode": { 730 | "name": "ipython", 731 | "version": 3 732 | }, 733 | "file_extension": ".py", 734 | "mimetype": "text/x-python", 735 | "name": "python", 736 | "nbconvert_exporter": "python", 737 | "pygments_lexer": "ipython3", 738 | "version": "3.7.4" 739 | } 740 | }, 741 | "nbformat": 4, 742 | "nbformat_minor": 2 743 | } 744 | -------------------------------------------------------------------------------- /PySpeech.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "[example](https://github.com/LearnedVector/Wav2Letter/blob/master/Google%20Speech%20Command%20Example.ipynb)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 70, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# python train.py\n", 17 | "# $DIR_FOR_PREPROCESSED_DATA\n", 18 | "# --save-dir $MODEL_PATH\n", 19 | "# --max-epoch 80\n", 20 | "# --task speech_recognition\n", 21 | "# --arch vggtransformer_2\n", 22 | "# --optimizer adadelta\n", 23 | "# --lr 1.0\n", 24 | "# --adadelta-eps 1e-8\n", 25 | "# --adadelta-rho 0.95\n", 26 | "# --clip-norm 10.0\n", 27 | "# --max-tokens 5000\n", 28 | "# --log-format json\n", 29 | "# --log-interval 1\n", 30 | "# --criterion cross_entropy_acc\n", 31 | "# --user-dir examples/speech_recognition/" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 71, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import torch\n", 41 | "import torchaudio\n", 42 | "from torchaudio.datasets import LIBRISPEECH, SPEECHCOMMANDS\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 72, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "torchaudio.set_audio_backend(\"soundfile\")" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 73, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "from torchaudio.datasets import LIBRISPEECH\n", 61 | "# waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id\n", 62 | "\n", 63 | "class SAFE_LIBRISPEECH(LIBRISPEECH):\n", 64 | "\n", 65 | " def __getitem__(self, n):\n", 66 | " try:\n", 67 | " return super().__getitem__(n)\n", 68 | " except (FileNotFoundError, RuntimeError):\n", 69 | " return None\n", 70 | " \n", 71 | " def __next__(self):\n", 72 | " try:\n", 73 | " return super().__next__()\n", 74 | " except (FileNotFoundError, RuntimeError):\n", 75 | " return self.__next__()\n", 76 | "\n", 77 | " \n", 78 | "def datasets():\n", 79 | "\n", 80 | " folder_in_archive = 'LibriSpeech'\n", 81 | " download = True\n", 82 | " root = \"./\"\n", 83 | "\n", 84 | " print(\"train\")\n", 85 | " dataset1 = SAFE_LIBRISPEECH(root, url='train-clean-100', folder_in_archive=folder_in_archive, download=download)\n", 86 | " # print(dataset1[0])\n", 87 | " dataset2 = SAFE_LIBRISPEECH(root, url='train-clean-360', folder_in_archive=folder_in_archive, download=download)\n", 88 | " # dataset3 = SAFE_LIBRISPEECH(root, url='train-other-500', folder_in_archive=folder_in_archive, download=download)\n", 89 | " # train = torch.utils.data.ConcatDataset([dataset1, dataset2, dataset3])\n", 90 | " train = torch.utils.data.ConcatDataset([dataset1, dataset2])\n", 91 | " # print(train[0])\n", 92 | "\n", 93 | " print(\"valid\")\n", 94 | " dataset1 = SAFE_LIBRISPEECH(root, url='dev-clean', folder_in_archive=folder_in_archive, download=download)\n", 95 | " dataset2 = SAFE_LIBRISPEECH(root, url='dev-other', folder_in_archive=folder_in_archive, download=download)\n", 96 | " valid = torch.utils.data.ConcatDataset([dataset1, dataset2])\n", 97 | "\n", 98 | " print(\"test\")\n", 99 | " dataset1 = SAFE_LIBRISPEECH(root, url='test-other', folder_in_archive=folder_in_archive, download=download)\n", 100 | " dataset2 = SAFE_LIBRISPEECH(root, url='test-clean', folder_in_archive=folder_in_archive, download=download)\n", 101 | " test = torch.utils.data.ConcatDataset([dataset1, dataset2])\n", 102 | "\n", 103 | " return train, valid, test" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "class PROCESSED_SPEECHCOMMANDS(SPEECHCOMMANDS):\n", 113 | " def __getitem__(self, n):\n", 114 | " return super().__getitem__(n)\n", 115 | "\n", 116 | " def __next__(self):\n", 117 | " return super().__next__()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "class MemoryCache(Dataset):\n", 127 | " \"\"\"\n", 128 | " Wrap a dataset so that, whenever a new item is returned, it is saved to disk.\n", 129 | " \"\"\"\n", 130 | " \n", 131 | " def __init__(self, dataset):\n", 132 | " self.dataset = dataset \n", 133 | " self.location = location\n", 134 | " \n", 135 | " self._id = id(self) \n", 136 | " self._cache = [None] * len(dataset)\n", 137 | " \n", 138 | " def __getitem__(self, n): \n", 139 | " if self._cache[n]: \n", 140 | " return self._cache[n] \n", 141 | " \n", 142 | " item = self.dataset[n] \n", 143 | " self._cache[n] = item \n", 144 | " \n", 145 | " return item \n", 146 | " \n", 147 | " def __len__(self): \n", 148 | " return len(self.dataset) " 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 74, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# waveform, sample_rate, label, speaker_id, utterance_number\n", 158 | "\n", 159 | "def datasets():\n", 160 | "\n", 161 | " download = True\n", 162 | " root = \"./\"\n", 163 | "\n", 164 | " print(\"train\")\n", 165 | " dataset1 = SPEECHCOMMANDS(root, download=download)\n", 166 | " dataset1 = MemoryCache(dataset1)\n", 167 | "\n", 168 | " return dataset1, None, None" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 75, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "train\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "train, valid, test = datasets()" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 76, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "# spm_train \n", 195 | "# --input=data/lang_char/input.txt\n", 196 | "# --vocab_size=${nbpe}\n", 197 | "# --model_type=${bpemode}\n", 198 | "# --model_prefix=${bpemodel}\n", 199 | "# --input_sentence_size=100000000\n", 200 | "# --unk_id =3\n", 201 | "# --eos_id=2\n", 202 | "# --pad_id=1\n", 203 | "# --bos_id=-1\n", 204 | "# --character_coverage=1" 205 | ] 206 | }, 207 | { 208 | "cell_type": "raw", 209 | "metadata": { 210 | "scrolled": false 211 | }, 212 | "source": [ 213 | "vocab_size = 5000\n", 214 | "nbpe = vocab_size\n", 215 | "bpemode = \"unigram\"\n", 216 | "input_sentence_size = 100000000\n", 217 | "fname = 'data_lang_char_input.txt'\n", 218 | "bpemodel = \"data_lang_char_train_\" + bpemode + \"_\" + str(nbpe)\n", 219 | "\n", 220 | "with open(fname, 'a') as f:\n", 221 | " # FIXME Load audio along with text\n", 222 | " for u in train:\n", 223 | " if u:\n", 224 | " f.write(u[2] + \"\\n\")\n", 225 | " f.write(\" 3\")\n", 226 | " f.write(\" 2\")\n", 227 | " f.write(\" 1\")\n", 228 | "\n", 229 | "import sentencepiece as spm\n", 230 | "spm.SentencePieceTrainer.Train(\n", 231 | " f\"--input={fname} \"\n", 232 | " f\"--vocab_size={nbpe} \"\n", 233 | " f\"--model_type={bpemode} \"\n", 234 | " f\"--model_prefix={bpemodel} \"\n", 235 | " f\"--input_sentence_size={input_sentence_size} \"\n", 236 | " f\"--unk_id=3 \"\n", 237 | " f\"--eos_id=2 \"\n", 238 | " f\"--pad_id=1 \"\n", 239 | " f\"--bos_id=-1 \"\n", 240 | " f\"--character_coverage=1\"\n", 241 | ")" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 77, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | "▁T his ▁ is ▁ a ▁ test\n", 254 | "[640, 3, 394, 3, 394, 3, 394, 3]\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "import sentencepiece as spm\n", 260 | "sp = spm.SentencePieceProcessor()\n", 261 | "# sp.Load(bpemodel + \".model\")\n", 262 | "sp.Load(\"/Users/vincentqb/spm.model\")\n", 263 | "\n", 264 | "token = sp.encode_as_pieces(\"This is a test\")\n", 265 | "token = \" \".join(token)\n", 266 | "\n", 267 | "print(token)\n", 268 | "\n", 269 | "token = sp.encode_as_ids(\"This is a test\")\n", 270 | "# token = \" \".join(str(token))\n", 271 | "\n", 272 | "print(token)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 79, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/plain": [ 283 | "['h', 'o', 'u', 's', 'e', '*', '*', '*']" 284 | ] 285 | }, 286 | "execution_count": 79, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "labels = [\n", 293 | " '-', '*', 'right', 'eight', 'cat', 'tree', 'bed', 'happy', 'go', 'dog', 'no', \n", 294 | " 'wow', 'nine', 'left', 'stop', 'three', 'sheila', 'one', 'bird', 'zero',\n", 295 | " 'seven', 'up', 'marvin', 'two', 'house', 'down', 'six', 'yes', 'on', \n", 296 | " 'five', 'off', 'four',\n", 297 | "]\n", 298 | "\n", 299 | "labels = [\n", 300 | " '-', '*',\n", 301 | " \"backward\",\n", 302 | " \"bed\",\n", 303 | " \"bird\",\n", 304 | " \"cat\",\n", 305 | " \"dog\",\n", 306 | " \"down\",\n", 307 | " \"eight\",\n", 308 | " \"five\",\n", 309 | " \"follow\",\n", 310 | " \"forward\",\n", 311 | " \"four\",\n", 312 | " \"go\",\n", 313 | " \"happy\",\n", 314 | " \"house\",\n", 315 | " \"learn\",\n", 316 | " \"left\",\n", 317 | " \"marvin\",\n", 318 | " \"nine\",\n", 319 | " \"no\",\n", 320 | " \"off\",\n", 321 | " \"on\",\n", 322 | " \"one\",\n", 323 | " \"right\",\n", 324 | " \"seven\",\n", 325 | " \"sheila\",\n", 326 | " \"six\",\n", 327 | " \"stop\",\n", 328 | " \"three\",\n", 329 | " \"tree\",\n", 330 | " \"two\",\n", 331 | " \"up\",\n", 332 | " \"visual\",\n", 333 | " \"wow\",\n", 334 | " \"yes\",\n", 335 | " \"zero\",\n", 336 | "]\n", 337 | "\n", 338 | "import collections\n", 339 | "\n", 340 | "\n", 341 | "def build_mapping(labels):\n", 342 | " labels = list(collections.OrderedDict.fromkeys(list(\"\".join(labels))))\n", 343 | " enumerated = list(enumerate(labels))\n", 344 | " flipped = [(sub[1], sub[0]) for sub in enumerated]\n", 345 | "\n", 346 | " d1 = collections.OrderedDict(enumerated)\n", 347 | " d2 = collections.OrderedDict(flipped)\n", 348 | " return {**d1, **d2}\n", 349 | "\n", 350 | "def padding(l, max_length, fillwith):\n", 351 | " return l + [fillwith] * (max_length-len(l))\n", 352 | "\n", 353 | "def map_with_dict(mapping, l):\n", 354 | " return [mapping[t] for t in l]\n", 355 | "\n", 356 | "def apply_with_padding(l, mapping, max_length, fillwith):\n", 357 | " l = map_with_dict(mapping, l)\n", 358 | " l = padding(l, max_length, mapping[\"*\"])\n", 359 | " return l\n", 360 | "\n", 361 | "\n", 362 | "test = \"house\"\n", 363 | "max_length = max(map(len, labels))\n", 364 | "vocab_size = len(labels) + 2\n", 365 | "\n", 366 | "mapping = build_mapping(labels)\n", 367 | "\n", 368 | "# test = apply(mapping, test)\n", 369 | "# test = padding(test, max_length, mapping[\"*\"])\n", 370 | "\n", 371 | "encode = lambda l: apply_with_padding(l, mapping, max_length, mapping[\"*\"])\n", 372 | "decode = lambda l: apply_with_padding(l, mapping, max_length, mapping[1])\n", 373 | "\n", 374 | "decode(encode(test))" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "from torchaudio.transforms import MFCC\n", 384 | "\n", 385 | "num_features = 13\n", 386 | "\n", 387 | "melkwargs = {\n", 388 | " 'n_fft': 512,\n", 389 | " 'n_mels': 20,\n", 390 | " 'hop_length': 80,\n", 391 | "}\n", 392 | "\n", 393 | "mfcc = MFCC(sample_rate=16000, n_mfcc=num_features, melkwargs=melkwargs)\n", 394 | "\n", 395 | "# audio, self.sr, window_stride=(160, 80),\n", 396 | "# fft_size=512, num_filt=20, num_coeffs=13\n", 397 | "\n", 398 | "def process_waveform(waveform):\n", 399 | " # pick first channel, apply mfcc, tranpose for pad_sequence\n", 400 | " return mfcc(waveform)[0, ...].transpose(0, -1)\n", 401 | "\n", 402 | "def process_target(target):\n", 403 | "\n", 404 | " # targets = []\n", 405 | " # for b in batch:\n", 406 | " # if b:\n", 407 | " # token = sp.encode_as_pieces(b[2])\n", 408 | " # print(len(token))\n", 409 | " # token = \" \".join(token)\n", 410 | " # targets.append(token)\n", 411 | "\n", 412 | " # return \" \".join(sp.encode_as_ids(target))\n", 413 | " \n", 414 | " # return torch.IntTensor(sp.encode_as_ids(target))\n", 415 | " # print(target)\n", 416 | " return torch.IntTensor(encode(target))" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 80, 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "from torch.utils.data import DataLoader\n", 426 | "from random import randint\n", 427 | "\n", 428 | "\n", 429 | "\n", 430 | "def collate_fn(batch):\n", 431 | "\n", 432 | " tensors = [process_waveform(b[0]) for b in batch if b]\n", 433 | " targets = [process_target(b[2]) for b in batch if b]\n", 434 | "\n", 435 | " # truncate tensor list\n", 436 | " # length = 2**10\n", 437 | " # a = max(0, min([tensor.shape[-1] for tensor in tensors]) - length)\n", 438 | " # m = randint(0, a)\n", 439 | " # n = m + length\n", 440 | " # tensors = [t[..., m:n] for t in tensors]\n", 441 | " \n", 442 | " input_lengths = [t.shape[0] for t in tensors]\n", 443 | " target_lengths = [len(t) for t in targets]\n", 444 | "\n", 445 | " if tensors: \n", 446 | " targets = torch.nn.utils.rnn.pad_sequence(targets, batch_first=True)\n", 447 | " tensors = torch.nn.utils.rnn.pad_sequence(tensors, batch_first=True)\n", 448 | " tensors = tensors.transpose(1, -1)\n", 449 | " return tensors, targets, input_lengths, target_lengths\n", 450 | " else:\n", 451 | " return None, None, None, None\n", 452 | "\n", 453 | "max_tokens = 5000 # max number of tokens per batch\n", 454 | "# vocab_size = max_tokens\n", 455 | "batch_size = 32 # max number of sentences per batch\n", 456 | "loader_train = DataLoader(train, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)" 457 | ] 458 | }, 459 | { 460 | "cell_type": "raw", 461 | "metadata": {}, 462 | "source": [ 463 | "from torchaudio.datasets.ljspeech import LJSPEECH\n", 464 | "train = LJSPEECH(\"./\", download=True)\n", 465 | "\n", 466 | "class SAFE_LJSPEECH(LIBRISPEECH):\n", 467 | "\n", 468 | " def __getitem__(self, n):\n", 469 | " try:\n", 470 | " waveform, _, _, utterrance = super().__getitem__(n)\n", 471 | " return waveform, utterance\n", 472 | " except (FileNotFoundError, RuntimeError):\n", 473 | " return None\n", 474 | " \n", 475 | " def __next__(self):\n", 476 | " try:\n", 477 | " waveform, _, _, utterrance = super().__next__()\n", 478 | " return waveform, utterance\n", 479 | " except (FileNotFoundError, RuntimeError):\n", 480 | " waveform, _, _, utterrance = self.__next__()\n", 481 | " return waveform, utterance\n" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": 81, 487 | "metadata": { 488 | "scrolled": true 489 | }, 490 | "outputs": [ 491 | { 492 | "name": "stdout", 493 | "output_type": "stream", 494 | "text": [ 495 | "torch.Size([32, 13, 201])\n", 496 | "torch.Size([32, 8])\n", 497 | "[201, 201, 201, 201, 201, 201, 201, 201, 201, 180, 201, 201, 171, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 188, 201, 182, 201, 201, 201, 201, 201, 201]\n", 498 | "[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]\n" 499 | ] 500 | } 501 | ], 502 | "source": [ 503 | "for a, b, c, d in loader_train:\n", 504 | " print(a.shape)\n", 505 | " print(b.shape)\n", 506 | " print(c)\n", 507 | " print(d)\n", 508 | " break" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 82, 514 | "metadata": {}, 515 | "outputs": [], 516 | "source": [ 517 | "input_feat_per_channel = 80\n", 518 | "vggblock_enc_config = \"[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]\"\n", 519 | "transformer_enc_config = \"((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 16\"\n", 520 | "enc_output_dim = 1024\n", 521 | "tgt_embed_dim = 512\n", 522 | "conv_dec_config = \"((256, 3, True),) * 4\"\n", 523 | "transformer_dec_config = \"((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 6\"" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 96, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [ 532 | "from torch import nn\n", 533 | "\n", 534 | "\n", 535 | "class PrintLayer(nn.Module):\n", 536 | " def __init__(self):\n", 537 | " super(PrintLayer, self).__init__()\n", 538 | " \n", 539 | " def forward(self, x):\n", 540 | " # Do your print / debug stuff here\n", 541 | " print(x)\n", 542 | " return x\n", 543 | " \n", 544 | " \n", 545 | "\n", 546 | "class Wav2Letter(nn.Module):\n", 547 | " \"\"\"Wav2Letter Speech Recognition model\n", 548 | " Architecture is based off of Facebooks AI Research paper\n", 549 | " https://arxiv.org/pdf/1609.03193.pdf\n", 550 | " This specific architecture accepts mfcc or\n", 551 | " power spectrums speech signals\n", 552 | " TODO: use cuda if available\n", 553 | " Args:\n", 554 | " num_features (int): number of mfcc features\n", 555 | " num_classes (int): number of unique grapheme class labels\n", 556 | " \"\"\"\n", 557 | "\n", 558 | " def __init__(self, num_features, num_classes):\n", 559 | " super(Wav2Letter, self).__init__()\n", 560 | "\n", 561 | " # Conv1d(in_channels, out_channels, kernel_size, stride)\n", 562 | " self.layers = nn.Sequential(\n", 563 | " # PrintLayer(),\n", 564 | " nn.Conv1d(num_features, 250, 48, 2),\n", 565 | " nn.ReLU(),\n", 566 | " nn.Conv1d(250, 250, 7),\n", 567 | " nn.ReLU(),\n", 568 | " nn.Conv1d(250, 250, 7),\n", 569 | " nn.ReLU(),\n", 570 | " nn.Conv1d(250, 250, 7),\n", 571 | " nn.ReLU(),\n", 572 | " nn.Conv1d(250, 250, 7),\n", 573 | " nn.ReLU(),\n", 574 | " # nn.Conv1d(250, 250, 7),\n", 575 | " # nn.ReLU(),\n", 576 | " # nn.Conv1d(250, 250, 7),\n", 577 | " # nn.ReLU(),\n", 578 | " nn.Conv1d(250, 250, 7),\n", 579 | " nn.ReLU(),\n", 580 | " nn.Conv1d(250, 2000, 32),\n", 581 | " nn.ReLU(),\n", 582 | " nn.Conv1d(2000, 2000, 1),\n", 583 | " nn.ReLU(),\n", 584 | " nn.Conv1d(2000, num_classes, 1),\n", 585 | " )\n", 586 | "\n", 587 | " def forward(self, batch):\n", 588 | " \"\"\"Forward pass through Wav2Letter network than \n", 589 | " takes log probability of output\n", 590 | " Args:\n", 591 | " batch (int): mini batch of data\n", 592 | " shape (batch, num_features, frame_len)\n", 593 | " Returns:\n", 594 | " log_probs (torch.Tensor):\n", 595 | " shape (batch_size, num_classes, output_len)\n", 596 | " \"\"\"\n", 597 | " # y_pred shape (batch_size, num_classes, output_len)\n", 598 | " y_pred = self.layers(batch)\n", 599 | "\n", 600 | " # compute log softmax probability on graphemes\n", 601 | " log_probs = nn.functional.log_softmax(y_pred, dim=1)\n", 602 | "\n", 603 | " return log_probs\n", 604 | "\n", 605 | "\n", 606 | "model = Wav2Letter(num_features, vocab_size)" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": 99, 612 | "metadata": { 613 | "scrolled": false 614 | }, 615 | "outputs": [ 616 | { 617 | "ename": "KeyboardInterrupt", 618 | "evalue": "", 619 | "output_type": "error", 620 | "traceback": [ 621 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 622 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 623 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;31m# print(\"stepping\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 82\u001b[0m \u001b[0;31m# torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 624 | "\u001b[0;32m~/anaconda3/envs/pyspeech/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 194\u001b[0m \"\"\"\n\u001b[0;32m--> 195\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 196\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 625 | "\u001b[0;32m~/anaconda3/envs/pyspeech/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 97\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 98\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 626 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 627 | ] 628 | } 629 | ], 630 | "source": [ 631 | "import torchaudio\n", 632 | "from torch.optim import Adadelta\n", 633 | "\n", 634 | "model = Wav2Letter(num_features, vocab_size)\n", 635 | "\n", 636 | "optimizer_params = {\n", 637 | " \"lr\": 1.0,\n", 638 | " \"eps\": 1e-8,\n", 639 | " \"rho\": 0.95,\n", 640 | "}\n", 641 | "optimizer = Adadelta(model.parameters(), **optimizer_params)\n", 642 | "\n", 643 | "max_epoch = 2 # 80\n", 644 | "clip_norm = 10.\n", 645 | "\n", 646 | "criterion = torch.nn.CTCLoss()\n", 647 | "\n", 648 | "# max_files = 10\n", 649 | "for epoch in range(max_epoch):\n", 650 | " # print(epoch)\n", 651 | " \n", 652 | " i_files = 0\n", 653 | " for inputs, targets, _, target_lengths in loader_train:\n", 654 | " # if i_files > max_files:\n", 655 | " # break\n", 656 | "\n", 657 | " # print(i_files, max_files)\n", 658 | "\n", 659 | " if inputs is None or targets is None:\n", 660 | " continue\n", 661 | "\n", 662 | " # print(\"input\", inputs.shape)\n", 663 | " outputs = model(inputs)\n", 664 | " # (input length, batch size, number of classes)\n", 665 | " # input_lengths = [len(o) for o in outputs]\n", 666 | "\n", 667 | " outputs = outputs.transpose(1, 2).transpose(0, 1)\n", 668 | " # print(\"output\", outputs.shape)\n", 669 | " # print(\"target\", targets.shape)\n", 670 | " \n", 671 | " # print(inputs.shape)\n", 672 | " # print(outputs.shape)\n", 673 | " # print(targets.shape)\n", 674 | " # print(len(targets))\n", 675 | " # print(targets.shape)\n", 676 | " # print(input_lengths)\n", 677 | " # input_lengths = [len(o) for o in outputs]\n", 678 | " # print(len(input_lengths))\n", 679 | " # target_lengths = [len(t) for t in targets]\n", 680 | " # print(target_lengths)\n", 681 | " # ctc_loss(input, target, input_lengths, target_lengths)\n", 682 | "\n", 683 | " # input_lengths = [outputs.shape[0]] * outputs.shape[1]\n", 684 | " \n", 685 | " # CTC arguments\n", 686 | " # https://pytorch.org/docs/master/nn.html#torch.nn.CTCLoss\n", 687 | " # better definitions for ctc arguments\n", 688 | " # https://discuss.pytorch.org/t/ctcloss-with-warp-ctc-help/8788/3\n", 689 | " mini_batch_size = len(inputs)\n", 690 | " \n", 691 | " input_lengths = torch.full((mini_batch_size,), outputs.shape[0], dtype=torch.long)\n", 692 | " target_lengths = torch.IntTensor([target.shape[0] for target in targets])\n", 693 | " \n", 694 | " # print(torch.isnan(outputs).any())\n", 695 | " # print(torch.isnan(targets).any())\n", 696 | " # print(torch.isnan(input_lengths).any())\n", 697 | " # print(torch.isnan(target_lengths).any())\n", 698 | " # print(outputs.shape)\n", 699 | " # print(targets.shape)\n", 700 | " # print(input_lengths.shape)\n", 701 | " # print(target_lengths.shape)\n", 702 | "\n", 703 | " # outputs: input length, batch size, number of classes (including blank) \n", 704 | " # targets: batch size, max target length\n", 705 | " # input_lengths: batch size\n", 706 | " # target_lengths: batch size\n", 707 | " loss = criterion(outputs, targets, input_lengths, target_lengths)\n", 708 | "\n", 709 | " # print(\"stepping\")\n", 710 | " optimizer.zero_grad()\n", 711 | " loss.backward()\n", 712 | " # torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm)\n", 713 | " optimizer.step()\n", 714 | " \n", 715 | " i_files += 1\n", 716 | " \n", 717 | " print(epoch, loss)" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 87, 723 | "metadata": {}, 724 | "outputs": [], 725 | "source": [ 726 | "from torch import topk\n", 727 | "\n", 728 | "def GreedyDecoder(ctc_matrix, blank_label=0):\n", 729 | " \"\"\"Greedy Decoder. Returns highest probability of\n", 730 | " class labels for each timestep\n", 731 | " # TODO: collapse blank labels\n", 732 | " Args:\n", 733 | " ctc_matrix (torch.Tensor): \n", 734 | " shape (1, num_classes, output_len)\n", 735 | " blank_label (int): blank labels to collapse\n", 736 | " \n", 737 | " Returns:\n", 738 | " torch.Tensor: class labels per time step.\n", 739 | " shape (ctc timesteps)\n", 740 | " \"\"\"\n", 741 | " _, indices = topk(ctc_matrix, k=1, dim=1)\n", 742 | " return indices[:, 0, :]" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 88, 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "name": "stdout", 752 | "output_type": "stream", 753 | "text": [ 754 | "tensor([[[nan, nan, nan, nan],\n", 755 | " [nan, nan, nan, nan],\n", 756 | " [nan, nan, nan, nan],\n", 757 | " [nan, nan, nan, nan],\n", 758 | " [nan, nan, nan, nan],\n", 759 | " [nan, nan, nan, nan],\n", 760 | " [nan, nan, nan, nan],\n", 761 | " [nan, nan, nan, nan],\n", 762 | " [nan, nan, nan, nan],\n", 763 | " [nan, nan, nan, nan],\n", 764 | " [nan, nan, nan, nan],\n", 765 | " [nan, nan, nan, nan],\n", 766 | " [nan, nan, nan, nan],\n", 767 | " [nan, nan, nan, nan],\n", 768 | " [nan, nan, nan, nan],\n", 769 | " [nan, nan, nan, nan],\n", 770 | " [nan, nan, nan, nan],\n", 771 | " [nan, nan, nan, nan],\n", 772 | " [nan, nan, nan, nan],\n", 773 | " [nan, nan, nan, nan],\n", 774 | " [nan, nan, nan, nan],\n", 775 | " [nan, nan, nan, nan],\n", 776 | " [nan, nan, nan, nan],\n", 777 | " [nan, nan, nan, nan],\n", 778 | " [nan, nan, nan, nan],\n", 779 | " [nan, nan, nan, nan],\n", 780 | " [nan, nan, nan, nan],\n", 781 | " [nan, nan, nan, nan],\n", 782 | " [nan, nan, nan, nan],\n", 783 | " [nan, nan, nan, nan],\n", 784 | " [nan, nan, nan, nan],\n", 785 | " [nan, nan, nan, nan],\n", 786 | " [nan, nan, nan, nan],\n", 787 | " [nan, nan, nan, nan],\n", 788 | " [nan, nan, nan, nan],\n", 789 | " [nan, nan, nan, nan],\n", 790 | " [nan, nan, nan, nan],\n", 791 | " [nan, nan, nan, nan],\n", 792 | " [nan, nan, nan, nan]]], grad_fn=)\n" 793 | ] 794 | } 795 | ], 796 | "source": [ 797 | "sample = inputs[0].unsqueeze(0)\n", 798 | "target = targets[0]\n", 799 | "\n", 800 | "# decode(targets[0].tolist())\n", 801 | "\n", 802 | "output = model(sample)\n", 803 | "print(output)\n", 804 | "\n", 805 | "# output = GreedyDecoder(output)\n", 806 | "\n", 807 | "# print(output.shape)" 808 | ] 809 | } 810 | ], 811 | "metadata": { 812 | "kernelspec": { 813 | "display_name": "Python 3", 814 | "language": "python", 815 | "name": "python3" 816 | }, 817 | "language_info": { 818 | "codemirror_mode": { 819 | "name": "ipython", 820 | "version": 3 821 | }, 822 | "file_extension": ".py", 823 | "mimetype": "text/x-python", 824 | "name": "python", 825 | "nbconvert_exporter": "python", 826 | "pygments_lexer": "ipython3", 827 | "version": "3.7.6" 828 | } 829 | }, 830 | "nbformat": 4, 831 | "nbformat_minor": 4 832 | } 833 | -------------------------------------------------------------------------------- /PipelineTrain.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[ ]: 5 | 6 | 7 | # https://github.com/pytorch/pytorch/issues/13883 8 | import torch.multiprocessing as mp 9 | 10 | if __name__ == '__main__': 11 | mp.set_start_method('forkserver') 12 | 13 | 14 | # In[ ]: 15 | 16 | 17 | import argparse 18 | import collections 19 | import cProfile 20 | import hashlib 21 | import itertools 22 | import math 23 | import os 24 | import pprint 25 | import pstats 26 | import random 27 | import re 28 | import shutil 29 | import signal 30 | import statistics 31 | import string 32 | from array import array 33 | from collections import defaultdict 34 | from datetime import datetime 35 | from io import StringIO 36 | from typing import Optional 37 | 38 | import matplotlib 39 | import torch 40 | import torch.distributed as dist 41 | import torchaudio 42 | from matplotlib import pyplot as plt 43 | from tabulate import tabulate 44 | from torch import nn, topk 45 | from torch.optim import SGD, Adadelta, Adam 46 | from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau 47 | from torch.utils.data import DataLoader 48 | from torchaudio.datasets import LIBRISPEECH, SPEECHCOMMANDS 49 | from torchaudio.datasets.utils import bg_iterator, diskcache_iterator 50 | from torchaudio.transforms import MFCC, Resample 51 | from tqdm.notebook import tqdm as tqdm 52 | 53 | print("start time: {}".format(str(datetime.now())), flush=True) 54 | 55 | try: 56 | get_ipython().run_line_magic('matplotlib', 'inline') 57 | in_notebook = True 58 | except NameError: 59 | matplotlib.use("Agg") 60 | in_notebook = False 61 | 62 | # Empty CUDA cache 63 | torch.cuda.empty_cache() 64 | 65 | # Profiling performance 66 | pr = cProfile.Profile() 67 | pr.enable() 68 | 69 | 70 | # In[ ]: 71 | 72 | 73 | # Create argument parser 74 | parser = argparse.ArgumentParser() 75 | 76 | parser.add_argument('--workers', default=0, type=int, 77 | metavar='N', help='number of data loading workers') 78 | parser.add_argument('--resume', default='', type=str, 79 | metavar='PATH', help='path to latest checkpoint') 80 | parser.add_argument('--figures', default='', type=str, 81 | metavar='PATH', help='folder path to save figures') 82 | 83 | parser.add_argument('--epochs', default=200, type=int, 84 | metavar='N', help='number of total epochs to run') 85 | parser.add_argument('--start-epoch', default=0, type=int, 86 | metavar='N', help='manual epoch number') 87 | parser.add_argument('--print-freq', default=10, type=int, 88 | metavar='N', help='print frequency in epochs') 89 | 90 | parser.add_argument('--arch', metavar='ARCH', default='wav2letter', 91 | choices=["wav2letter", "lstm"], help='model architecture') 92 | parser.add_argument('--batch-size', default=64, type=int, 93 | metavar='N', help='mini-batch size') 94 | 95 | parser.add_argument('--learning-rate', default=1., type=float, 96 | metavar='LR', help='initial learning rate') 97 | parser.add_argument('--gamma', default=.96, type=float, 98 | metavar='GAMMA', help='learning rate exponential decay constant') 99 | # parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') 100 | parser.add_argument('--weight-decay', default=1e-5, 101 | type=float, metavar='W', help='weight decay') 102 | parser.add_argument("--eps", metavar='EPS', type=float, default=1e-8) 103 | parser.add_argument("--rho", metavar='RHO', type=float, default=.95) 104 | 105 | parser.add_argument('--n-bins', default=13, type=int, 106 | metavar='N', help='number of bins in transforms') 107 | 108 | parser.add_argument('--world-size', default=1, type=int, 109 | help='number of distributed processes') 110 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', 111 | type=str, help='url used to set up distributed training') 112 | parser.add_argument('--dist-backend', default='nccl', 113 | type=str, help='distributed backend') 114 | parser.add_argument('--distributed', action="store_true") 115 | 116 | parser.add_argument('--dataset', default='librispeech', type=str) 117 | parser.add_argument('--gradient', action="store_true") 118 | parser.add_argument('--jit', action="store_true") 119 | parser.add_argument('--viterbi-decoder', action="store_true") 120 | 121 | if in_notebook: 122 | args, _ = parser.parse_known_args() 123 | else: 124 | args = parser.parse_args() 125 | 126 | 127 | # In[ ]: 128 | 129 | 130 | if args.learning_rate < 0.: 131 | args.learning_rate = 10 ** random.uniform(-3, 1) 132 | 133 | if args.weight_decay < 0.: 134 | args.weight_decay = 10 ** random.uniform(-6, 0) 135 | 136 | if args.gamma < 0.: 137 | args.gamma = random.uniform(.95, 1.) 138 | 139 | 140 | # # Checkpoint 141 | 142 | # In[ ]: 143 | 144 | 145 | MAIN_PID = os.getpid() 146 | CHECKPOINT_filename = args.resume if args.resume else 'checkpoint.pth.tar' 147 | CHECKPOINT_tempfile = CHECKPOINT_filename + '.temp' 148 | HALT_filename = CHECKPOINT_filename + '.HALT' 149 | SIGNAL_RECEIVED = False 150 | 151 | # HALT file is used as a sign of job completion. 152 | # Make sure no HALT file left from previous runs. 153 | if os.path.isfile(HALT_filename): 154 | os.remove(HALT_filename) 155 | 156 | # Remove CHECKPOINT_tempfile, in case the signal arrives in the 157 | # middle of copying from CHECKPOINT_tempfile to CHECKPOINT_filename 158 | if os.path.isfile(CHECKPOINT_tempfile): 159 | os.remove(CHECKPOINT_tempfile) 160 | 161 | 162 | def SIGTERM_handler(a, b): 163 | print('received sigterm') 164 | pass 165 | 166 | 167 | def signal_handler(a, b): 168 | global SIGNAL_RECEIVED 169 | print('Signal received', a, datetime.now().strftime( 170 | "%y%m%d.%H%M%S"), flush=True) 171 | SIGNAL_RECEIVED = True 172 | 173 | # If HALT file exists, which means the job is done, exit peacefully. 174 | if os.path.isfile(HALT_filename): 175 | print('Job is done, exiting') 176 | exit(0) 177 | 178 | return 179 | 180 | 181 | def trigger_job_requeue(): 182 | # Submit a new job to resume from checkpoint. 183 | if os.path.isfile(CHECKPOINT_filename) and os.environ['SLURM_PROCID'] == '0' and os.getpid() == MAIN_PID: 184 | print('pid: ', os.getpid(), ' ppid: ', os.getppid(), flush=True) 185 | print('time is up, back to slurm queue', flush=True) 186 | command = 'scontrol requeue ' + os.environ['SLURM_JOB_ID'] 187 | print(command) 188 | if os.system(command): 189 | raise RuntimeError('requeue failed') 190 | print('New job submitted to the queue', flush=True) 191 | exit(0) 192 | 193 | 194 | # Install signal handler 195 | signal.signal(signal.SIGUSR1, signal_handler) 196 | signal.signal(signal.SIGTERM, SIGTERM_handler) 197 | print('Signal handler installed', flush=True) 198 | 199 | 200 | def save_checkpoint(state, is_best, filename=CHECKPOINT_filename): 201 | """ 202 | Save the model to a temporary file first, 203 | then copy it to filename, in case the signal interrupts 204 | the torch.save() process. 205 | """ 206 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 207 | torch.save(state, CHECKPOINT_tempfile) 208 | if os.path.isfile(CHECKPOINT_tempfile): 209 | os.rename(CHECKPOINT_tempfile, filename) 210 | if is_best: 211 | shutil.copyfile(filename, 'model_best.pth.tar') 212 | print("Checkpoint: saved") 213 | 214 | 215 | # # Distributed 216 | 217 | # In[ ]: 218 | 219 | 220 | # Use #nodes as world_size 221 | if 'SLURM_NNODES' in os.environ: 222 | args.world_size = int(os.environ['SLURM_NNODES']) 223 | 224 | args.distributed = args.distributed or args.world_size > 1 225 | 226 | if args.distributed: 227 | os.environ['RANK'] = os.environ['SLURM_PROCID'] 228 | os.environ['WORLD_SIZE'] = str(args.world_size) 229 | print('in distributed', os.environ['RANK'], 230 | os.environ['MASTER_ADDR'], os.environ['MASTER_PORT'], flush=True) 231 | dist.init_process_group(backend=args.dist_backend, 232 | init_method=args.dist_url, world_size=args.world_size) 233 | 234 | print('init process', flush=True) 235 | 236 | 237 | # # Parameters 238 | 239 | # In[ ]: 240 | 241 | 242 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 243 | print(pprint.pformat(vars(args)), flush=True) 244 | 245 | 246 | # In[ ]: 247 | 248 | 249 | audio_backend = "soundfile" 250 | torchaudio.set_audio_backend(audio_backend) 251 | 252 | root = "/datasets01/" 253 | folder_in_archive = "librispeech/062419/" 254 | 255 | device = "cuda" if torch.cuda.is_available() else "cpu" 256 | num_devices = torch.cuda.device_count() 257 | # num_devices = 1 258 | print(num_devices, "GPUs", flush=True) 259 | 260 | # max number of sentences per batch 261 | batch_size = args.batch_size 262 | # batch_size = 2048 263 | # batch_size = 512 264 | # batch_size = 256 265 | # batch_size = 64 266 | # batch_size = 1 267 | 268 | training_percentage = 90. 269 | validation_percentage = 5. 270 | 271 | data_loader_training_params = { 272 | "num_workers": args.workers, 273 | "pin_memory": True, 274 | "shuffle": True, 275 | "drop_last": True, 276 | } 277 | data_loader_validation_params = data_loader_training_params.copy() 278 | data_loader_validation_params["shuffle"] = False 279 | 280 | non_blocking = True 281 | 282 | 283 | # text preprocessing 284 | 285 | char_blank = "*" 286 | char_space = " " 287 | char_apostrophe = "'" 288 | 289 | labels = char_blank + char_space + char_apostrophe + string.ascii_lowercase 290 | 291 | # excluded_dir = ["_background_noise_"] 292 | # folder_speechcommands = './SpeechCommands/speech_commands_v0.02' 293 | # labels = [char_blank, char_space] + [d for d in next(os.walk(folder_speechcommands))[1] if d not in excluded_dir] 294 | 295 | 296 | # audio 297 | 298 | sample_rate_original = 16000 299 | sample_rate_new = 8000 300 | 301 | n_bins = args.n_bins # 13, 128 302 | melkwargs = { 303 | 'n_fft': 512, 304 | 'n_mels': 20, 305 | 'hop_length': 80, # (160, 80) 306 | } 307 | 308 | transforms = nn.Sequential( 309 | # torchaudio.transforms.Resample(sample_rate_original, sample_rate_new), 310 | # torchaudio.transforms.MFCC(sample_rate=sample_rate_original, n_mfcc=n_bins, melkwargs=melkwargs), 311 | torchaudio.transforms.MelSpectrogram( 312 | sample_rate=sample_rate_original, n_mels=n_bins), 313 | # torchaudio.transforms.FrequencyMasking(freq_mask_param=n_bins), 314 | # torchaudio.transforms.TimeMasking(time_mask_param=35) 315 | ) 316 | 317 | 318 | # Optimizer 319 | 320 | optimizer_params_adadelta = { 321 | "lr": args.learning_rate, 322 | "eps": args.eps, 323 | "rho": args.rho, 324 | "weight_decay": args.weight_decay, 325 | } 326 | 327 | optimizer_params_adam = { 328 | "lr": args.learning_rate, 329 | "eps": args.eps, 330 | "weight_decay": args.weight_decay, 331 | } 332 | 333 | optimizer_params_sgd = { 334 | "lr": args.learning_rate, 335 | "weight_decay": args.weight_decay, 336 | } 337 | 338 | optimizer_params_adadelta = { 339 | "lr": args.learning_rate, 340 | "eps": args.eps, 341 | "rho": args.rho, 342 | "weight_decay": args.weight_decay, 343 | } 344 | 345 | Optimizer = Adadelta 346 | optimizer_params = optimizer_params_sgd 347 | 348 | # Model 349 | 350 | num_features = n_bins if n_bins else 1 351 | 352 | lstm_params = { 353 | "hidden_size": 800, 354 | "num_layers": 5, 355 | "batch_first": False, 356 | "bidirectional": False, 357 | "dropout": 0., 358 | } 359 | 360 | clip_norm = 0. # 10. 361 | 362 | zero_infinity = False 363 | 364 | 365 | # # Text encoding 366 | 367 | # In[ ]: 368 | 369 | 370 | class Coder: 371 | def __init__(self, labels): 372 | labels = [l for l in labels] 373 | self.length = len(labels) 374 | enumerated = list(enumerate(labels)) 375 | flipped = [(sub[1], sub[0]) for sub in enumerated] 376 | 377 | d1 = collections.OrderedDict(enumerated) 378 | d2 = collections.OrderedDict(flipped) 379 | self.mapping = {**d1, **d2} 380 | 381 | def encode(self, iterable): 382 | if isinstance(iterable, list): 383 | return [self.encode(i) for i in iterable] 384 | else: 385 | return [self.mapping[i] + self.mapping[char_blank] for i in iterable] 386 | 387 | def decode(self, tensor): 388 | if isinstance(tensor[0], list): 389 | return [self.decode(t) for t in tensor] 390 | else: 391 | # not idempotent, since clean string 392 | x = (self.mapping[i] for i in tensor) 393 | x = ''.join(i for i, _ in itertools.groupby(x)) 394 | x = x.replace(char_blank, "") 395 | # x = x.strip() 396 | return x 397 | 398 | 399 | coder = Coder(labels) 400 | encode = coder.encode 401 | decode = coder.decode 402 | vocab_size = coder.length 403 | print("vocab_size", vocab_size, flush=True) 404 | 405 | 406 | # # Model 407 | # 408 | # [Wav2Letter](https://github.com/LearnedVector/Wav2Letter/blob/master/Google%20Speech%20Command%20Example.ipynb) 409 | 410 | # In[ ]: 411 | 412 | 413 | def weight_init(m): 414 | if isinstance(m, nn.Linear): 415 | size = m.weight.size() 416 | fan_out = size[0] # number of rows 417 | fan_in = size[1] # number of columns 418 | variance = math.sqrt(2.0/(fan_in + fan_out)) 419 | m.weight.data.normal_(0.0, variance) 420 | 421 | 422 | class PrintLayer(nn.Module): 423 | def __init__(self): 424 | super().__init__() 425 | 426 | def forward(self, x): 427 | print(x, flush=True) 428 | return x 429 | 430 | 431 | class Wav2Letter(nn.Module): 432 | """Wav2Letter Speech Recognition model 433 | https://arxiv.org/pdf/1609.03193.pdf 434 | This specific architecture accepts mfcc or power spectrums speech signals 435 | 436 | Args: 437 | num_features (int): number of mfcc features 438 | num_classes (int): number of unique grapheme class labels 439 | """ 440 | 441 | def __init__(self, num_features, num_classes): 442 | super().__init__() 443 | 444 | # Conv1d(in_channels, out_channels, kernel_size, stride) 445 | self.layers = nn.Sequential( 446 | nn.Conv1d(in_channels=num_features, out_channels=250, 447 | kernel_size=48, stride=2, padding=23), 448 | nn.ReLU(inplace=True), 449 | nn.Conv1d(in_channels=250, out_channels=250, 450 | kernel_size=7, stride=1, padding=3), 451 | nn.ReLU(inplace=True), 452 | nn.Conv1d(in_channels=250, out_channels=250, 453 | kernel_size=7, stride=1, padding=3), 454 | nn.ReLU(inplace=True), 455 | nn.Conv1d(in_channels=250, out_channels=250, 456 | kernel_size=7, stride=1, padding=3), 457 | nn.ReLU(inplace=True), 458 | nn.Conv1d(in_channels=250, out_channels=250, 459 | kernel_size=7, stride=1, padding=3), 460 | nn.ReLU(inplace=True), 461 | nn.Conv1d(in_channels=250, out_channels=250, 462 | kernel_size=7, stride=1, padding=3), 463 | nn.ReLU(inplace=True), 464 | nn.Conv1d(in_channels=250, out_channels=250, 465 | kernel_size=7, stride=1, padding=3), 466 | nn.ReLU(inplace=True), 467 | nn.Conv1d(in_channels=250, out_channels=250, 468 | kernel_size=7, stride=1, padding=3), 469 | nn.ReLU(inplace=True), 470 | nn.Conv1d(in_channels=250, out_channels=2000, 471 | kernel_size=32, stride=1, padding=16), 472 | nn.ReLU(inplace=True), 473 | nn.Conv1d(in_channels=2000, out_channels=2000, 474 | kernel_size=1, stride=1, padding=0), 475 | nn.ReLU(inplace=True), 476 | nn.Conv1d(in_channels=2000, out_channels=num_classes, 477 | kernel_size=1, stride=1, padding=0), 478 | nn.ReLU(inplace=True), 479 | ) 480 | 481 | def forward(self, batch): 482 | """Forward pass through Wav2Letter network than 483 | takes log probability of output 484 | Args: 485 | batch (int): mini batch of data 486 | shape (batch, num_features, frame_len) 487 | Returns: 488 | Tensor with shape (batch_size, num_classes, output_len) 489 | """ 490 | # batch: (batch_size, num_features, seq_len) 491 | y_pred = self.layers(batch) 492 | # y_pred: (batch_size, num_classes, output_len) 493 | y_pred = y_pred.transpose(-1, -2) 494 | # y_pred: (batch_size, output_len, num_classes) 495 | return nn.functional.log_softmax(y_pred, dim=-1) 496 | 497 | 498 | # In[ ]: 499 | 500 | 501 | class LSTMModel(nn.Module): 502 | 503 | def __init__(self, num_features, num_classes, hidden_size, num_layers, bidirectional, dropout, batch_first): 504 | super().__init__() 505 | 506 | directions = bidirectional + 1 507 | 508 | self.layer = nn.LSTM( 509 | num_features, hidden_size=hidden_size, 510 | num_layers=num_layers, bidirectional=bidirectional, dropout=dropout, batch_first=batch_first 511 | ) 512 | # self.activation = nn.ReLU(inplace=True) 513 | self.hidden2class = nn.Linear(directions*hidden_size, num_classes) 514 | 515 | def forward(self, batch): 516 | self.layer.flatten_parameters() 517 | # print("forward", flush=True) 518 | # batch: batch, num_features, seq_len 519 | # print(batch.shape, flush=True) 520 | batch = batch.transpose(-1, -2).contiguous() 521 | # batch: batch, seq_len, num_features 522 | # print(batch.shape, flush=True) 523 | outputs, _ = self.layer(batch) 524 | # outputs = self.activation(outputs) 525 | # outputs: batch, seq_len, directions*num_features 526 | outputs = self.hidden2class(outputs) 527 | # outputs: batch, seq_len, num_features 528 | # print(outputs.shape, flush=True) 529 | return nn.functional.log_softmax(outputs, dim=-1) 530 | 531 | 532 | # In[ ]: 533 | 534 | 535 | if args.arch == "wav2letter": 536 | model = Wav2Letter(num_features, vocab_size) 537 | 538 | def model_length_function(tensor): 539 | return int(tensor.shape[0])//2 + 1 540 | 541 | elif args.arch == "lstm": 542 | model = LSTMModel(num_features, vocab_size, **lstm_params) 543 | 544 | def model_length_function(tensor): 545 | return int(tensor.shape[0]) 546 | 547 | 548 | # # Dataset 549 | 550 | # In[ ]: 551 | 552 | 553 | class IterableMemoryCache: 554 | 555 | def __init__(self, iterable): 556 | self.iterable = iterable 557 | self._iter = iter(iterable) 558 | self._done = False 559 | self._values = [] 560 | 561 | def __iter__(self): 562 | if self._done: 563 | return iter(self._values) 564 | return itertools.chain(self._values, self._gen_iter()) 565 | 566 | def _gen_iter(self): 567 | for new_value in self._iter: 568 | self._values.append(new_value) 569 | yield new_value 570 | self._done = True 571 | 572 | def __len__(self): 573 | return len(self._iterable) 574 | 575 | 576 | class MapMemoryCache(torch.utils.data.Dataset): 577 | """ 578 | Wrap a dataset so that, whenever a new item is returned, it is saved to memory. 579 | """ 580 | 581 | def __init__(self, dataset): 582 | self.dataset = dataset 583 | self._cache = [None] * len(dataset) 584 | 585 | def __getitem__(self, n): 586 | if self._cache[n]: 587 | return self._cache[n] 588 | 589 | item = self.dataset[n] 590 | self._cache[n] = item 591 | 592 | return item 593 | 594 | def __len__(self): 595 | return len(self.dataset) 596 | 597 | 598 | class Processed(torch.utils.data.Dataset): 599 | 600 | def __init__(self, process_datapoint, dataset): 601 | self.process_datapoint = process_datapoint 602 | self.dataset = dataset 603 | 604 | def __getitem__(self, n): 605 | try: 606 | item = self.dataset[n] 607 | return self.process_datapoint(item) 608 | except (FileNotFoundError, RuntimeError): 609 | return None 610 | 611 | def __next__(self): 612 | try: 613 | item = next(self.dataset) 614 | return self.process_datapoint(item) 615 | except (FileNotFoundError, RuntimeError): 616 | return self.__next__() 617 | 618 | def __len__(self): 619 | return len(self.dataset) 620 | 621 | 622 | # In[ ]: 623 | 624 | 625 | # @torch.jit.script 626 | 627 | 628 | def process_datapoint(item): 629 | transformed = item[0] # .to(device, non_blocking=non_blocking) 630 | target = item[2].lower() 631 | 632 | transformed = transforms(transformed) 633 | 634 | transformed = transformed[0, ...].transpose(0, -1) 635 | 636 | target = " " + target + " " 637 | target = encode(target) 638 | target = torch.tensor(target, dtype=torch.long, device=transformed.device) 639 | 640 | transformed = transformed # .to("cpu") 641 | target = target # .to("cpu") 642 | return transformed, target 643 | 644 | 645 | # In[ ]: 646 | 647 | 648 | def datasets_librispeech(): 649 | 650 | def create(tag): 651 | 652 | if isinstance(tag, str): 653 | data = LIBRISPEECH( 654 | root, tag, folder_in_archive=folder_in_archive, download=False) 655 | else: 656 | data = torch.utils.data.ConcatDataset([LIBRISPEECH( 657 | root, t, folder_in_archive=folder_in_archive, download=False) for t in tag]) 658 | 659 | data = Processed(process_datapoint, data) 660 | data = diskcache_iterator(data) 661 | # data = MapMemoryCache(data) 662 | return data 663 | 664 | # return create("train-clean-100"), create("dev-clean"), None 665 | return create(["train-clean-100", "train-clean-360", "train-other-500"]), create(["dev-clean", "dev-other"]), None 666 | 667 | 668 | # In[ ]: 669 | 670 | 671 | def which_set(filename, validation_percentage, testing_percentage): 672 | """Determines which data partition the file should belong to. 673 | 674 | We want to keep files in the same training, validation, or testing sets even 675 | if new ones are added over time. This makes it less likely that testing 676 | samples will accidentally be reused in training when long runs are restarted 677 | for example. To keep this stability, a hash of the filename is taken and used 678 | to determine which set it should belong to. This determination only depends on 679 | the name and the set proportions, so it won't change as other files are added. 680 | 681 | It's also useful to associate particular files as related (for example words 682 | spoken by the same person), so anything after '_nohash_' in a filename is 683 | ignored for set determination. This ensures that 'bobby_nohash_0.wav' and 684 | 'bobby_nohash_1.wav' are always in the same set, for example. 685 | 686 | Args: 687 | filename: File path of the data sample. 688 | validation_percentage: How much of the data set to use for validation. 689 | testing_percentage: How much of the data set to use for testing. 690 | 691 | Returns: 692 | String, one of 'training', 'validation', or 'testing'. 693 | """ 694 | 695 | MAX_NUM_WAVS_PER_CLASS = 2**27 - 1 # ~134M 696 | 697 | base_name = os.path.basename(filename) 698 | 699 | # We want to ignore anything after '_nohash_' in the file name when 700 | # deciding which set to put a wav in, so the data set creator has a way of 701 | # grouping wavs that are close variations of each other. 702 | hash_name = re.sub(r'_nohash_.*$', '', base_name).encode("utf-8") 703 | 704 | # This looks a bit magical, but we need to decide whether this file should 705 | # go into the training, testing, or validation sets, and we want to keep 706 | # existing files in the same set even if more files are subsequently 707 | # added. 708 | # To do that, we need a stable way of deciding based on just the file name 709 | # itself, so we do a hash of that and then use that to generate a 710 | # probability value that we use to assign it. 711 | hash_name_hashed = hashlib.sha1(hash_name).hexdigest() 712 | percentage_hash = ((int(hash_name_hashed, 16) % ( 713 | MAX_NUM_WAVS_PER_CLASS + 1)) * (100.0 / MAX_NUM_WAVS_PER_CLASS)) 714 | 715 | if percentage_hash < validation_percentage: 716 | result = 'validation' 717 | elif percentage_hash < (testing_percentage + validation_percentage): 718 | result = 'testing' 719 | else: 720 | result = 'training' 721 | 722 | return result 723 | 724 | 725 | def filter_speechcommands(tag, training_percentage, data): 726 | if training_percentage < 100.: 727 | testing_percentage = ( 728 | 100. - training_percentage - validation_percentage) 729 | 730 | def which_set_filter(x): return which_set( 731 | x, validation_percentage, testing_percentage) == tag 732 | data._walker = list(filter(which_set_filter, data._walker)) 733 | return data 734 | 735 | 736 | def datasets_speechcommands(): 737 | 738 | root = "./" 739 | 740 | def create(tag): 741 | data = SPEECHCOMMANDS(root, download=True) 742 | data = filter_speechcommands(tag, training_percentage, data) 743 | data = Processed(process_datapoint, data) 744 | data = MapMemoryCache(data) 745 | return data 746 | 747 | return create("training"), create("validation"), create("testing") 748 | 749 | 750 | # In[ ]: 751 | 752 | 753 | if args.dataset == "librispeech": 754 | training, validation, _ = datasets_librispeech() 755 | elif args.dataset == "speechcommand": 756 | training, validation, _ = datasets_speechcommands() 757 | 758 | 759 | # In[ ]: 760 | 761 | 762 | if False: 763 | 764 | from collections import Counter 765 | from collections import OrderedDict 766 | 767 | training_unprocessed = SPEECHCOMMANDS("./", download=True) 768 | training_unprocessed = filter_speechcommands( 769 | training_percentage, training_unprocessed) 770 | 771 | counter = Counter([t[2] for t in training_unprocessed]) 772 | counter = OrderedDict(counter.most_common()) 773 | 774 | plt.bar(counter.keys(), counter.values(), align='center') 775 | 776 | if resample is not None: 777 | waveform, sample_rate = training_unprocessed[0][0], training_unprocessed[0][1] 778 | 779 | fn = "sound.wav" 780 | torchaudio.save(fn, waveform, sample_rate_new) 781 | ipd.Audio(fn) 782 | 783 | 784 | # # Word Decoder 785 | 786 | # In[ ]: 787 | 788 | 789 | def greedy_decode(outputs): 790 | """Greedy Decoder. Returns highest probability of class labels for each timestep 791 | 792 | Args: 793 | outputs (torch.Tensor): shape (input length, batch size, number of classes (including blank)) 794 | 795 | Returns: 796 | torch.Tensor: class labels per time step. 797 | """ 798 | _, indices = topk(outputs, k=1, dim=-1) 799 | return indices[..., 0] 800 | 801 | 802 | # In[ ]: 803 | 804 | 805 | def build_transitions(): 806 | 807 | from collections import Counter 808 | 809 | c = None 810 | 811 | for _, label in training: 812 | # Count bigrams 813 | count = [((a.item(), b.item())) for (a, b) in zip(label, label[1:])] 814 | count = Counter(count) 815 | if c is None: 816 | c = count 817 | else: 818 | c = c + count 819 | 820 | # Encode as transition matrix 821 | 822 | ind = torch.tensor(list(zip(*[a for (a, b) in c.items()]))) 823 | val = torch.tensor([b for (a, b) in c.items()], dtype=torch.float) 824 | 825 | transitions = torch.sparse_coo_tensor(indices=ind, values=val, size=[ 826 | vocab_size, vocab_size]).coalesce().to_dense() 827 | transitions = (transitions/torch.max(torch.tensor(1.), 828 | transitions.max(dim=1)[0]).unsqueeze(1)) 829 | 830 | return transitions 831 | 832 | 833 | if args.viterbi_decoder: 834 | print("transitions: building", flush=True) 835 | transitions = build_transitions() 836 | print("transitions: done", flush=True) 837 | 838 | 839 | # In[ ]: 840 | 841 | 842 | # https://gist.github.com/PetrochukM/afaa3613a99a8e7213d2efdd02ae4762 843 | # https://github.com/napsternxg/pytorch-practice/blob/master/Viterbi%20decoding%20and%20CRF.ipynb 844 | 845 | 846 | def viterbi_decode(tag_sequence: torch.Tensor, transition_matrix: torch.Tensor, top_k: int = 5): 847 | """ 848 | Perform Viterbi decoding in log space over a sequence given a transition matrix 849 | specifying pairwise (transition) potentials between tags and a matrix of shape 850 | (sequence_length, num_tags) specifying unary potentials for possible tags per 851 | timestep. 852 | Parameters 853 | ---------- 854 | tag_sequence : torch.Tensor, required. 855 | A tensor of shape (sequence_length, num_tags) representing scores for 856 | a set of tags over a given sequence. 857 | transition_matrix : torch.Tensor, required. 858 | A tensor of shape (num_tags, num_tags) representing the binary potentials 859 | for transitioning between a given pair of tags. 860 | top_k : int, required. 861 | Integer defining the top number of paths to decode. 862 | Returns 863 | ------- 864 | viterbi_path : List[int] 865 | The tag indices of the maximum likelihood tag sequence. 866 | viterbi_score : float 867 | The score of the viterbi path. 868 | """ 869 | sequence_length, num_tags = tag_sequence.size() 870 | 871 | path_scores = [] 872 | path_indices = [] 873 | # At the beginning, the maximum number of permutations is 1; therefore, we unsqueeze(0) 874 | # to allow for 1 permutation. 875 | path_scores.append(tag_sequence[0, :].unsqueeze(0)) 876 | # assert path_scores[0].size() == (n_permutations, num_tags) 877 | 878 | # Evaluate the scores for all possible paths. 879 | for timestep in range(1, sequence_length): 880 | # Add pairwise potentials to current scores. 881 | # assert path_scores[timestep - 1].size() == (n_permutations, num_tags) 882 | summed_potentials = path_scores[timestep - 883 | 1].unsqueeze(2) + transition_matrix 884 | summed_potentials = summed_potentials.view(-1, num_tags) 885 | 886 | # Best pairwise potential path score from the previous timestep. 887 | max_k = min(summed_potentials.size()[0], top_k) 888 | scores, paths = torch.topk(summed_potentials, k=max_k, dim=0) 889 | # assert scores.size() == (n_permutations, num_tags) 890 | # assert paths.size() == (n_permutations, num_tags) 891 | 892 | scores = tag_sequence[timestep, :] + scores 893 | # assert scores.size() == (n_permutations, num_tags) 894 | path_scores.append(scores) 895 | path_indices.append(paths.squeeze()) 896 | 897 | # Construct the most likely sequence backwards. 898 | path_scores = path_scores[-1].view(-1) 899 | max_k = min(path_scores.size()[0], top_k) 900 | viterbi_scores, best_paths = torch.topk(path_scores, k=max_k, dim=0) 901 | 902 | viterbi_paths = [] 903 | for i in range(max_k): 904 | 905 | viterbi_path = [best_paths[i].item()] 906 | for backward_timestep in reversed(path_indices): 907 | viterbi_path.append( 908 | int(backward_timestep.view(-1)[viterbi_path[-1]])) 909 | 910 | # Reverse the backward path. 911 | viterbi_path.reverse() 912 | 913 | # Viterbi paths uses (num_tags * n_permutations) nodes; therefore, we need to modulo. 914 | viterbi_path = [j % num_tags for j in viterbi_path] 915 | viterbi_paths.append(viterbi_path) 916 | 917 | return viterbi_paths, viterbi_scores 918 | 919 | 920 | def batch_viterbi_decode(tag_sequence: torch.Tensor, transition_matrix: torch.Tensor, top_k: int = 5): 921 | 922 | outputs = [] 923 | scores = [] 924 | for i in range(tag_sequence.shape[1]): 925 | paths, score = viterbi_decode(tag_sequence[:, i, :], transitions) 926 | outputs.append(paths) 927 | scores.append(score) 928 | 929 | return torch.tensor(outputs).transpose(0, -1), torch.cat(scores) 930 | 931 | 932 | def top_batch_viterbi_decode(tag_sequence: torch.Tensor): 933 | output, _ = batch_viterbi_decode(tag_sequence, transitions, top_k=1) 934 | return output[:, 0, :] 935 | 936 | 937 | # In[ ]: 938 | 939 | 940 | def levenshtein_distance_array(r, h): 941 | 942 | # initialisation 943 | dnew = array('d', [0] * (len(h)+1)) 944 | dold = array('d', [0] * (len(h)+1)) 945 | 946 | # computation 947 | for i in range(1, len(r)+1): 948 | for j in range(1, len(h)+1): 949 | 950 | if r[i-1] == h[j-1]: 951 | dnew[j] = dold[j-1] 952 | else: 953 | substitution = dold[j-1] + 1 954 | insertion = dnew[j-1] + 1 955 | deletion = dold[j] + 1 956 | dnew[j] = min(substitution, insertion, deletion) 957 | 958 | dnew, dold = dold, dnew 959 | 960 | return dnew[-1] 961 | 962 | 963 | # In[ ]: 964 | 965 | 966 | def levenshtein_distance_list(r, h): 967 | 968 | # initialisation 969 | d = [[0] * (len(h)+1)] * (len(r)+1) 970 | 971 | # computation 972 | for i in range(1, len(r)+1): 973 | for j in range(1, len(h)+1): 974 | 975 | if r[i-1] == h[j-1]: 976 | d[i].append(d[i-1][j-1]) 977 | else: 978 | substitution = d[i-1][j-1] + 1 979 | insertion = d[i][j-1] + 1 980 | deletion = d[i-1][j] + 1 981 | d[i].append(min(substitution, insertion, deletion)) 982 | 983 | return d[len(r)][len(h)] 984 | 985 | 986 | # In[ ]: 987 | 988 | 989 | # https://martin-thoma.com/word-error-rate-calculation/ 990 | 991 | 992 | def levenshtein_distance(r: str, h: str, device: Optional[str] = None): 993 | 994 | # initialisation 995 | d = torch.zeros((2, len(h)+1), dtype=torch.long) # , device=device) 996 | dold = 0 997 | dnew = 1 998 | 999 | # computation 1000 | for i in range(1, len(r)+1): 1001 | d[dnew, 0] = 0 1002 | for j in range(1, len(h)+1): 1003 | 1004 | if r[i-1] == h[j-1]: 1005 | d[dnew, j] = d[dnew-1, j-1] 1006 | else: 1007 | substitution = d[dnew-1, j-1] + 1 1008 | insertion = d[dnew, j-1] + 1 1009 | deletion = d[dnew-1, j] + 1 1010 | d[dnew, j] = min(substitution, insertion, deletion) 1011 | 1012 | dnew, dold = dold, dnew 1013 | 1014 | dist = d[dnew, -1].item() 1015 | 1016 | return dist 1017 | 1018 | 1019 | # In[ ]: 1020 | 1021 | 1022 | if False: 1023 | r = "abcdddee" 1024 | h = "abcddde" 1025 | 1026 | get_ipython().run_line_magic('timeit', 'levenshtein_distance(r, h)') 1027 | 1028 | jitted = torch.jit.script(levenshtein_distance) 1029 | get_ipython().run_line_magic('timeit', 'jitted(r, h)') 1030 | 1031 | get_ipython().run_line_magic('timeit', 'levenshtein_distance_list(r, h)') 1032 | 1033 | jitted = torch.jit.script(levenshtein_distance_list) 1034 | # %timeit jitted(r, h) 1035 | 1036 | get_ipython().run_line_magic('timeit', 'levenshtein_distance_array(r, h)') 1037 | 1038 | jitted = torch.jit.script(levenshtein_distance_array) 1039 | # %timeit jitted(r, h) 1040 | 1041 | 1042 | # # Train 1043 | 1044 | # In[ ]: 1045 | 1046 | 1047 | def collate_fn(batch): 1048 | 1049 | tensors = [b[0] for b in batch if b] 1050 | 1051 | tensors_lengths = torch.tensor( 1052 | [model_length_function(t) for t in tensors], dtype=torch.long, device=tensors[0].device 1053 | ) 1054 | 1055 | tensors = torch.nn.utils.rnn.pad_sequence(tensors, batch_first=True) 1056 | tensors = tensors.transpose(1, -1) 1057 | 1058 | targets = [b[1] for b in batch if b] 1059 | target_lengths = torch.tensor( 1060 | [target.shape[0] for target in targets], dtype=torch.long, device=tensors.device 1061 | ) 1062 | targets = torch.nn.utils.rnn.pad_sequence(targets, batch_first=True) 1063 | 1064 | return tensors, targets, tensors_lengths, target_lengths 1065 | 1066 | 1067 | # In[ ]: 1068 | 1069 | 1070 | if args.jit: 1071 | model = torch.jit.script(model) 1072 | 1073 | if not args.distributed: 1074 | model = torch.nn.DataParallel(model) 1075 | else: 1076 | model.cuda() 1077 | model = torch.nn.parallel.DistributedDataParallel(model) 1078 | # model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) 1079 | 1080 | model = model.to(device, non_blocking=non_blocking) 1081 | print('model cuda', flush=True) 1082 | # model.apply(weight_init) 1083 | 1084 | 1085 | # In[ ]: 1086 | 1087 | 1088 | def count_parameters(model): 1089 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 1090 | 1091 | 1092 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1093 | n = count_parameters(model) 1094 | print(f"Number of parameters: {n}", flush=True) 1095 | # Each float32 is 4 bytes. 1096 | print(f"Approximate space taken: {n * 4 / (10 ** 6):.1f} MB", flush=True) 1097 | 1098 | if False: 1099 | print("Total memory: ", torch.cuda.get_device_properties( 1100 | 0).total_memory / 10**6) # Convert to MB 1101 | 1102 | t = torch.cuda.get_device_properties(0).total_memory 1103 | c = torch.cuda.memory_cached(0) 1104 | a = torch.cuda.memory_allocated(0) 1105 | f = c-a # free inside cache 1106 | 1107 | print("Free memory inside cache: ", f) 1108 | 1109 | 1110 | # In[ ]: 1111 | 1112 | 1113 | print(torch.cuda.memory_summary(), flush=True) 1114 | 1115 | 1116 | # In[ ]: 1117 | 1118 | 1119 | optimizer = Optimizer(model.parameters(), **optimizer_params) 1120 | scheduler = ExponentialLR(optimizer, gamma=args.gamma) 1121 | # scheduler = ReduceLROnPlateau(optimizer, patience=2, threshold=1e-3) 1122 | 1123 | criterion = torch.nn.CTCLoss( 1124 | blank=coder.mapping[char_blank], zero_infinity=zero_infinity) 1125 | # criterion = nn.MSELoss() 1126 | # criterion = torch.nn.NLLLoss() 1127 | 1128 | best_loss = 1. 1129 | 1130 | 1131 | # In[ ]: 1132 | 1133 | 1134 | loader_training = DataLoader( 1135 | training, batch_size=batch_size, collate_fn=collate_fn, **data_loader_training_params 1136 | ) 1137 | 1138 | loader_validation = DataLoader( 1139 | validation, batch_size=batch_size, collate_fn=collate_fn, **data_loader_validation_params 1140 | ) 1141 | 1142 | print("Length of data loaders: ", len(loader_training), 1143 | len(loader_validation), flush=True) 1144 | 1145 | # num_features = next(iter(loader_training))[0].shape[1] 1146 | # print(num_features, flush=True) 1147 | 1148 | 1149 | # In[ ]: 1150 | 1151 | 1152 | def forward_loss(inputs, targets, tensors_lengths, target_lengths): 1153 | 1154 | inputs = inputs.to(device, non_blocking=non_blocking) 1155 | targets = targets.to(device, non_blocking=non_blocking) 1156 | 1157 | # keep batch first for data parallel 1158 | outputs = model(inputs).transpose(0, 1) 1159 | 1160 | # this_batch_size = outputs.shape[1] 1161 | # seq_len = outputs.shape[0] 1162 | # input_lengths = torch.full((this_batch_size,), seq_len, dtype=torch.long, device=outputs.device) 1163 | # input_lengths = tensors_lengths 1164 | 1165 | # CTC 1166 | # outputs: input length, batch size, number of classes (including blank) 1167 | # targets: batch size, max target length 1168 | # input_lengths: batch size 1169 | # target_lengths: batch size 1170 | 1171 | return criterion(outputs, targets, tensors_lengths, target_lengths) 1172 | 1173 | 1174 | inds = random.sample(range(args.batch_size), k=2) 1175 | 1176 | 1177 | def forward_decode(inputs, targets, decoder): 1178 | 1179 | inputs = inputs.to(device, non_blocking=True) 1180 | output = model(inputs).to("cpu") 1181 | output = decoder(output) 1182 | 1183 | output = decode(output.tolist()) 1184 | target = decode(targets.tolist()) 1185 | 1186 | print_length = 20 1187 | for i in inds: 1188 | output_print = output[i].ljust(print_length)[:print_length] 1189 | target_print = target[i].ljust(print_length)[:print_length] 1190 | print( 1191 | f"Epoch: {epoch:4} Target: {target_print} Output: {output_print}", flush=True) 1192 | 1193 | cers = [levenshtein_distance(a, b) for a, b in zip(target, output)] 1194 | cers_normalized = [d/len(a) for a, d in zip(target, cers)] 1195 | cers = statistics.mean(cers) 1196 | cers_normalized = statistics.mean(cers_normalized) 1197 | 1198 | output = [o.split(char_space) for o in output] 1199 | target = [o.split(char_space) for o in target] 1200 | 1201 | wers = [levenshtein_distance(a, b) for a, b in zip(target, output)] 1202 | wers_normalized = [d/len(a) for a, d in zip(target, wers)] 1203 | wers = statistics.mean(wers) 1204 | wers_normalized = statistics.mean(wers_normalized) 1205 | 1206 | print(f"Epoch: {epoch:4} CER: {cers:1.5f} WER: {wers:1.5f}", flush=True) 1207 | 1208 | return cers, wers, cers_normalized, wers_normalized 1209 | 1210 | 1211 | # In[ ]: 1212 | 1213 | 1214 | history_loader = defaultdict(list) 1215 | history_training = defaultdict(list) 1216 | history_validation = defaultdict(list) 1217 | 1218 | if args.resume and os.path.isfile(CHECKPOINT_filename): 1219 | print("Checkpoint: loading '{}'".format(CHECKPOINT_filename)) 1220 | checkpoint = torch.load(CHECKPOINT_filename) 1221 | 1222 | args.start_epoch = checkpoint['epoch'] 1223 | best_loss = checkpoint['best_loss'] 1224 | history_training = checkpoint['history_training'] 1225 | history_validation = checkpoint['history_validation'] 1226 | 1227 | model.load_state_dict(checkpoint['state_dict']) 1228 | optimizer.load_state_dict(checkpoint['optimizer']) 1229 | scheduler.load_state_dict(checkpoint['scheduler']) 1230 | 1231 | print("Checkpoint: loaded '{}' at epoch {}".format( 1232 | CHECKPOINT_filename, checkpoint['epoch'])) 1233 | print(tabulate(history_training, headers="keys"), flush=True) 1234 | print(tabulate(history_validation, headers="keys"), flush=True) 1235 | else: 1236 | print("Checkpoint: not found") 1237 | 1238 | save_checkpoint({ 1239 | 'epoch': args.start_epoch, 1240 | 'state_dict': model.state_dict(), 1241 | 'best_loss': best_loss, 1242 | 'optimizer': optimizer.state_dict(), 1243 | 'scheduler': scheduler.state_dict(), 1244 | 'history_training': history_training, 1245 | 'history_validation': history_validation, 1246 | }, False) 1247 | 1248 | 1249 | # In[ ]: 1250 | 1251 | 1252 | with tqdm(total=args.epochs, unit_scale=1, disable=args.distributed) as pbar: 1253 | for epoch in range(args.start_epoch, args.epochs): 1254 | torch.cuda.reset_max_memory_allocated() 1255 | model.train() 1256 | 1257 | sum_loss = 0. 1258 | total_norm = 0. 1259 | for inputs, targets, tensors_lengths, target_lengths in bg_iterator(loader_training, maxsize=2): 1260 | 1261 | loss = forward_loss( 1262 | inputs, targets, tensors_lengths, target_lengths) 1263 | sum_loss += loss.item() 1264 | 1265 | optimizer.zero_grad() 1266 | loss.backward() 1267 | 1268 | norm = 0. 1269 | if clip_norm > 0: 1270 | norm = torch.nn.utils.clip_grad_norm_( 1271 | model.parameters(), clip_norm) 1272 | total_norm += norm 1273 | elif args.gradient: 1274 | for p in list(filter(lambda p: p.grad is not None, model.parameters())): 1275 | norm += p.grad.data.norm(2).item() ** 2 1276 | norm = norm ** .5 1277 | total_norm += norm 1278 | 1279 | optimizer.step() 1280 | 1281 | memory = torch.cuda.max_memory_allocated() 1282 | 1283 | history_loader["epoch"].append(epoch) 1284 | history_loader["n"].append(pbar.n) 1285 | history_loader["memory"].append(memory) 1286 | 1287 | if SIGNAL_RECEIVED: 1288 | save_checkpoint({ 1289 | 'epoch': epoch, 1290 | 'state_dict': model.state_dict(), 1291 | 'best_loss': best_loss, 1292 | 'optimizer': optimizer.state_dict(), 1293 | 'scheduler': scheduler.state_dict(), 1294 | 'history_training': history_training, 1295 | 'history_validation': history_validation, 1296 | }, False) 1297 | trigger_job_requeue() 1298 | 1299 | pbar.update(1/len(loader_training)) 1300 | 1301 | total_norm = (total_norm ** .5) / len(loader_training) 1302 | if total_norm > 0: 1303 | print( 1304 | f"Epoch: {epoch:4} Gradient: {total_norm:4.5f}", flush=True) 1305 | 1306 | # Average loss 1307 | sum_loss = sum_loss / len(loader_training) 1308 | sum_loss_str = f"Epoch: {epoch:4} Train: {sum_loss:4.5f}" 1309 | 1310 | scheduler.step() 1311 | 1312 | memory = torch.cuda.max_memory_allocated() 1313 | print(f"memory after training: {memory}", flush=True) 1314 | 1315 | history_training["epoch"].append(epoch) 1316 | history_training["gradient_norm"].append(total_norm) 1317 | history_training["sum_loss"].append(sum_loss) 1318 | history_training["max_memory_allocated"].append(memory) 1319 | 1320 | if not epoch % args.print_freq or epoch == args.epochs - 1: 1321 | 1322 | with torch.no_grad(): 1323 | 1324 | # Switch to evaluation mode 1325 | model.eval() 1326 | 1327 | sum_loss = 0. 1328 | sum_out_greedy = [0, 0, 0, 0] 1329 | sum_out_viterbi = [0, 0, 0, 0] 1330 | 1331 | for inputs, targets, tensors_lengths, target_lengths in bg_iterator(loader_validation, maxsize=2): 1332 | sum_loss += forward_loss(inputs, targets, 1333 | tensors_lengths, target_lengths).item() 1334 | 1335 | if True: 1336 | out_greedy = forward_decode( 1337 | inputs, targets, greedy_decode) 1338 | for i in range(len(out_greedy)): 1339 | sum_out_greedy[i] += out_greedy[i] 1340 | if args.viterbi_decoder: 1341 | out_viterbi = forward_decode( 1342 | inputs, targets, top_batch_viterbi_decode) 1343 | for i in range(len(out_greedy)): 1344 | sum_out_viterbi[i] += out_viterbi[i] 1345 | 1346 | if SIGNAL_RECEIVED: 1347 | break 1348 | 1349 | # Average loss 1350 | sum_loss = sum_loss / len(loader_validation) 1351 | sum_loss_str += f" Validation: {sum_loss:.5f}" 1352 | print(sum_loss_str, flush=True) 1353 | 1354 | if True: 1355 | for i in range(len(out_greedy)): 1356 | sum_out_greedy[i] /= len(loader_validation) 1357 | print(f"greedy decoder: {sum_out_greedy}", flush=True) 1358 | cer1, wer1, cern1, wern1 = sum_out_greedy 1359 | if args.viterbi_decoder: 1360 | for i in range(len(out_viterbi)): 1361 | sum_out_viterbi[i] /= len(loader_validation) 1362 | print(f"viterbi decoder: {sum_out_viterbi}", flush=True) 1363 | cer2, wer2, cern2, wern2 = sum_out_viterbi 1364 | 1365 | memory = torch.cuda.max_memory_allocated() 1366 | print(f"memory after validation: {memory}", flush=True) 1367 | 1368 | history_validation["epoch"].append(epoch) 1369 | history_validation["max_memory_allocated"].append(memory) 1370 | history_validation["sum_loss"].append(sum_loss) 1371 | 1372 | if True: 1373 | history_validation["greedy_cer"].append(cer1) 1374 | history_validation["greedy_cer_normalized"].append(cern1) 1375 | history_validation["greedy_wer"].append(wer1) 1376 | history_validation["greedy_wer_normalized"].append(wern1) 1377 | if args.viterbi_decoder: 1378 | history_validation["viterbi_cer"].append(cer2) 1379 | history_validation["viterbi_cer_normalized"].append(cern2) 1380 | history_validation["viterbi_wer"].append(wer2) 1381 | history_validation["viterbi_wer_normalized"].append(wern2) 1382 | 1383 | is_best = sum_loss < best_loss 1384 | best_loss = min(sum_loss, best_loss) 1385 | save_checkpoint({ 1386 | 'epoch': epoch + 1, 1387 | 'state_dict': model.state_dict(), 1388 | 'best_loss': best_loss, 1389 | 'optimizer': optimizer.state_dict(), 1390 | 'scheduler': scheduler.state_dict(), 1391 | 'history_training': history_training, 1392 | 'history_validation': history_validation, 1393 | }, is_best) 1394 | 1395 | print(tabulate(history_training, headers="keys"), flush=True) 1396 | print(tabulate(history_validation, headers="keys"), flush=True) 1397 | print(torch.cuda.memory_summary(), flush=True) 1398 | 1399 | # scheduler.step(sum_loss) 1400 | 1401 | # Create an empty file HALT_filename, mark the job as finished 1402 | if epoch == args.epochs - 1: 1403 | open(HALT_filename, 'a').close() 1404 | 1405 | 1406 | # In[ ]: 1407 | 1408 | 1409 | print(tabulate(history_training, headers="keys"), flush=True) 1410 | print(tabulate(history_validation, headers="keys"), flush=True) 1411 | print(torch.cuda.memory_summary(), flush=True) 1412 | 1413 | 1414 | # In[ ]: 1415 | 1416 | 1417 | print(tabulate(history_loader, headers="keys"), flush=True) 1418 | 1419 | 1420 | # In[ ]: 1421 | 1422 | 1423 | plt.plot(history_loader["epoch"], 1424 | history_loader["memory"], label="memory") 1425 | 1426 | 1427 | # In[ ]: 1428 | 1429 | 1430 | history_validation["epoch"] 1431 | 1432 | 1433 | # In[ ]: 1434 | 1435 | 1436 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1437 | 1438 | if "greedy_cer" in history_validation: 1439 | plt.plot(history_validation["epoch"], 1440 | history_validation["greedy_cer"], label="greedy") 1441 | if "viterbi_cer" in history_validation: 1442 | plt.plot(history_validation["epoch"], 1443 | history_validation["viterbi_cer"], label="viterbi") 1444 | plt.legend() 1445 | plt.savefig(os.path.join(args.figures, "cer.png") 1446 | 1447 | 1448 | # In[ ]: 1449 | 1450 | 1451 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1452 | 1453 | if "greedy_wer" in history_validation: 1454 | plt.plot(history_validation["epoch"], 1455 | history_validation["greedy_wer"], label="greedy") 1456 | if "viterbi_wer" in history_validation: 1457 | plt.plot(history_validation["epoch"], 1458 | history_validation["viterbi_wer"], label="viterbi") 1459 | plt.legend() 1460 | plt.savefig(os.path.join(args.figures, "wer.png") 1461 | 1462 | 1463 | # In[ ]: 1464 | 1465 | 1466 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1467 | 1468 | if "greedy_cer_normalized" in history_validation: 1469 | plt.plot(history_validation["epoch"], 1470 | history_validation["greedy_cer_normalized"], label="greedy") 1471 | if "viterbi_cer_normalized" in history_validation: 1472 | plt.plot(history_validation["epoch"], 1473 | history_validation["viterbi_cer_normalized"], label="viterbi") 1474 | plt.legend() 1475 | plt.savefig(os.path.join(args.figures, "cer_normalized.png") 1476 | 1477 | 1478 | # In[ ]: 1479 | 1480 | 1481 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1482 | 1483 | if "greedy_wer_normalized" in history_validation: 1484 | plt.plot(history_validation["epoch"], 1485 | history_validation["greedy_wer_normalized"], label="greedy") 1486 | if "viterbi_wer_normalized" in history_validation: 1487 | plt.plot(history_validation["epoch"], 1488 | history_validation["viterbi_wer_normalized"], label="viterbi") 1489 | plt.legend() 1490 | plt.savefig(os.path.join(args.figures, "wer_normalized.png") 1491 | 1492 | 1493 | # In[ ]: 1494 | 1495 | 1496 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1497 | 1498 | plt.plot(history_training["epoch"], 1499 | history_training["sum_loss"], label="training") 1500 | plt.plot(history_validation["epoch"], 1501 | history_validation["sum_loss"], label="validation") 1502 | plt.legend() 1503 | plt.savefig(os.path.join(args.figures, "sum_loss.png") 1504 | 1505 | 1506 | # In[ ]: 1507 | 1508 | 1509 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1510 | 1511 | plt.plot(history_training["epoch"], 1512 | history_training["sum_loss"], label="training") 1513 | plt.plot(history_validation["epoch"], 1514 | history_validation["sum_loss"], label="validation") 1515 | plt.yscale("log") 1516 | plt.legend() 1517 | plt.savefig(os.path.join(args.figures, "log_sum_loss.png") 1518 | 1519 | 1520 | # In[ ]: 1521 | 1522 | 1523 | if not args.distributed or os.environ['SLURM_PROCID'] == '0': 1524 | print(torch.cuda.memory_summary(), flush=True) 1525 | 1526 | 1527 | # In[ ]: 1528 | 1529 | 1530 | # Print performance 1531 | pr.disable() 1532 | s = StringIO() 1533 | ps = ( 1534 | pstats 1535 | .Stats(pr, stream=s) 1536 | .strip_dirs() 1537 | .sort_stats("cumtime") 1538 | .print_stats(20) 1539 | ) 1540 | print(s.getvalue(), flush=True) 1541 | print("stop time: {}".format(str(datetime.now())), flush=True) 1542 | 1543 | -------------------------------------------------------------------------------- /Plot_JSON.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 128, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import json" 11 | ] 12 | }, 13 | { 14 | "cell_type": "raw", 15 | "metadata": {}, 16 | "source": [ 17 | "Namespace(\n", 18 | " batch_size=128,\n", 19 | " checkpoint='/checkpoint/vincentqb/checkpoint/checkpoint-27818024-128-.6-.99-13-.8-0.-adadelta-exponential.pth.tar',\n", 20 | " clip_grad=0.0, dataset='librispeech', decoder='greedy', distributed=False, epochs=200, eps=1e-08,\n", 21 | " gamma=0.99, jit=False, learning_rate=0.6, momentum=0.8, n_bins=13,\n", 22 | " optimizer='adadelta', print_freq=10, progress_bar=False, rho=0.95, scheduler='exponential',\n", 23 | " seed=0, start_epoch=0, weight_decay=1e-05, workers=0, world_size=8\n", 24 | ")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 129, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/html": [ 35 | "
\n", 36 | "\n", 49 | "\n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | "
grouplossnlrcerwercer over dataset lengthwer over dataset lengthcer over target lengthwer over target lengthtarget lengthdataset length
0train_iteration19.1493260NaNNaNNaNNaNNaNNaNNaNNaNNaN
1train_iteration18.0412860NaNNaNNaNNaNNaNNaNNaNNaNNaN
2train_iteration5.1413270NaNNaNNaNNaNNaNNaNNaNNaNNaN
3train_iteration30.5287460NaNNaNNaNNaNNaNNaNNaNNaNNaN
4train_iteration14.5584730NaNNaNNaNNaNNaNNaNNaNNaNNaN
.......................................
44616train_iteration0.0275640NaNNaNNaNNaNNaNNaNNaNNaNNaN
44617train_iteration0.0325080NaNNaNNaNNaNNaNNaNNaNNaNNaN
44618train_iteration0.0336250NaNNaNNaNNaNNaNNaNNaNNaNNaN
44619train_epoch0.0232570[0.08119980294421923]NaNNaNNaNNaNNaNNaNNaNNaN
44620validation4.4495270NaN94023.039818.034.97879514.8132440.33469314.8132442688.02688.0
\n", 235 | "

44621 rows × 12 columns

\n", 236 | "
" 237 | ], 238 | "text/plain": [ 239 | " group loss n lr cer wer \\\n", 240 | "0 train_iteration 19.149326 0 NaN NaN NaN \n", 241 | "1 train_iteration 18.041286 0 NaN NaN NaN \n", 242 | "2 train_iteration 5.141327 0 NaN NaN NaN \n", 243 | "3 train_iteration 30.528746 0 NaN NaN NaN \n", 244 | "4 train_iteration 14.558473 0 NaN NaN NaN \n", 245 | "... ... ... .. ... ... ... \n", 246 | "44616 train_iteration 0.027564 0 NaN NaN NaN \n", 247 | "44617 train_iteration 0.032508 0 NaN NaN NaN \n", 248 | "44618 train_iteration 0.033625 0 NaN NaN NaN \n", 249 | "44619 train_epoch 0.023257 0 [0.08119980294421923] NaN NaN \n", 250 | "44620 validation 4.449527 0 NaN 94023.0 39818.0 \n", 251 | "\n", 252 | " cer over dataset length wer over dataset length \\\n", 253 | "0 NaN NaN \n", 254 | "1 NaN NaN \n", 255 | "2 NaN NaN \n", 256 | "3 NaN NaN \n", 257 | "4 NaN NaN \n", 258 | "... ... ... \n", 259 | "44616 NaN NaN \n", 260 | "44617 NaN NaN \n", 261 | "44618 NaN NaN \n", 262 | "44619 NaN NaN \n", 263 | "44620 34.978795 14.813244 \n", 264 | "\n", 265 | " cer over target length wer over target length target length \\\n", 266 | "0 NaN NaN NaN \n", 267 | "1 NaN NaN NaN \n", 268 | "2 NaN NaN NaN \n", 269 | "3 NaN NaN NaN \n", 270 | "4 NaN NaN NaN \n", 271 | "... ... ... ... \n", 272 | "44616 NaN NaN NaN \n", 273 | "44617 NaN NaN NaN \n", 274 | "44618 NaN NaN NaN \n", 275 | "44619 NaN NaN NaN \n", 276 | "44620 0.334693 14.813244 2688.0 \n", 277 | "\n", 278 | " dataset length \n", 279 | "0 NaN \n", 280 | "1 NaN \n", 281 | "2 NaN \n", 282 | "3 NaN \n", 283 | "4 NaN \n", 284 | "... ... \n", 285 | "44616 NaN \n", 286 | "44617 NaN \n", 287 | "44618 NaN \n", 288 | "44619 NaN \n", 289 | "44620 2688.0 \n", 290 | "\n", 291 | "[44621 rows x 12 columns]" 292 | ] 293 | }, 294 | "execution_count": 129, 295 | "metadata": {}, 296 | "output_type": "execute_result" 297 | } 298 | ], 299 | "source": [ 300 | "# Namespace(batch_size=128, checkpoint='/checkpoint/vincentqb/checkpoint/checkpoint-27818024-128-.6-.99-13-.8-0.-adadelta-exponential.pth.tar', clip_grad=0.0, dataset='librispeech', decoder='greedy', distributed=False, epochs=200, eps=1e-08, gamma=0.99, jit=False, learning_rate=0.6, momentum=0.8, n_bins=13, optimizer='adadelta', print_freq=10, progress_bar=False, rho=0.95, scheduler='exponential', seed=0, start_epoch=0, weight_decay=1e-05, workers=0, world_size=8)\n", 301 | "filename = \"/checkpoint/vincentqb/jobs/audio-27817666-1.out\"\n", 302 | "\n", 303 | "with open(filename, \"r\") as f:\n", 304 | " data = f.read()\n", 305 | " \n", 306 | "data = data.replace(\"0: {\", \"{\").replace(\"'\", '\"')\n", 307 | "data = [json.loads(l) for l in data.splitlines()]\n", 308 | "data = pd.DataFrame(data)\n", 309 | "\n", 310 | "data" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 130, 316 | "metadata": {}, 317 | "outputs": [ 318 | { 319 | "data": { 320 | "text/plain": [ 321 | "2688.0" 322 | ] 323 | }, 324 | "execution_count": 130, 325 | "metadata": {}, 326 | "output_type": "execute_result" 327 | } 328 | ], 329 | "source": [ 330 | "# This is the number of iterations for one epoch\n", 331 | "data[\"dataset length\"].max()" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 131, 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "data": { 341 | "text/plain": [ 342 | "" 343 | ] 344 | }, 345 | "execution_count": 131, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | }, 349 | { 350 | "data": { 351 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deXxU9b3/8dd3JnsiYQuIbAmLLCoiIosouAsi9brW7dprrcttbW8X6w+12traFq231d62V61ar7W12tZaFRE3tCogBlQWCTshAUKAkIWQfb6/P84kmYTsmck5mXk/H488MnNm5sx3jvKebz7f7/keY61FRESin8/tBoiISM9Q4IuIxAgFvohIjFDgi4jECAW+iEiMiHO7AW0ZOHCgzczMdLsZIiK9xurVqw9YazNaeszTgZ+ZmUl2drbbzRAR6TWMMbmtPaaSjohIjPBk4BtjFhhjnigpKXG7KSIiUcOTgW+tfdVae0t6errbTRERiRqeDHwREQk/Bb6ISIxQ4IuIxAgFvohIjIjKwM/eWcSmgjK3myEi4imePvGqq654bAUAOxfNd7klIiLe4ckevubhi4iEnycDX/PwRUTCz5OBLyIi4afAFxGJEQp8EZEYocAXEYkRCnwRkRihwBcRiREKfBGRGKHAFxGJEZ4MfJ1pKyISfp4MfJ1pKyISfp4MfBERCT8FvohIjFDgi4jECAW+iEiMUOCLiMQIBb6ISIxQ4IuIxAgFvohIjFDgi4jECAW+iEiMUOCLiMQIBb6ISIxQ4IuIxIgeC3xjzChjzFPGmL/11HuKiEijbgW+MeZpY0yhMWZ9s+1zjTGbjDFbjTELAay12621N3Xn/UREpOu628N/BpgbusEY4wd+C8wDJgLXGGMmdvN9RESkm7oV+NbafwFFzTZPA7YGe/TVwF+ASzq6T2PMLcaYbGNM9v79+7vTPBERCRGJGv5QIC/kfj4w1BgzwBjzGHCKMeau1l5srX3CWjvVWjs1IyMjAs0TEYlNcRHYp2lhm7XWHgRui8D7iYhIB0Sih58PDA+5PwzY05kd6CLmIiLhF4nA/wQYa4zJMsYkAFcDr3RmB7qIuYhI+HV3WubzwApgnDEm3xhzk7W2FrgdWApsBF601m7oflNFRKQ7ulXDt9Ze08r214HXu7pfY8wCYMGYMWO6ugsREWnGk0srqKQjIhJ+ngx8EREJPwW+iEiM8GTga1qmiEj4eTLwVcMXEQk/Twa+iIiEnwJfRCRGeDLwe1MNv7KmjtyD5W43Q0SkXZ4M/N5Uw//W858y5xfvUV0bcLspIiJt8mTg9ybvb3bW7K8LWJdbIiLSNgV+N/mMsxq0RYEvIt7mycAPVw3/UHl1mFrUumDeow6+iHidJwM/XDX8XUVHwtSi1tVf7cVaJb6IeJsnAz9cHlj8RcTfwzSUdEREwuBwIez8MCK7jsQlDj3jk52HIv4e9SUddfBFpFOshZJ82Pu581Ow1vldthcwcPduSEgN61tGdeADfJZXzOThfSO2f5V0RKRdgQAUbYe9nzUG+97PoSLYKTU+GDgOsubAkEkw5GTwJ4S9GVEf+I+8vZlnbpwWsf03lHSU9yJSWQrFu6A4Fw7lwqEdULDeCfnqw85z/AkwaCJMWOAE+5DJzv2ElIg3z5OBH84rXsX5IjtM0VDSiei7iIgn1FQ4gX4o1wn1+mCvD/mKZmXkhDQYfAJMvtYJ92MnQcZ4iAt/770jPBn41tpXgVenTp16c3f3lRBn2n9SN6ikIxKFyg865Ze9n8O+DY3BXl7Y9Hn+ROg7AvqNhKGnNt7uOwL6ZkJK/8ZeoQd4MvDDacn6gojuX7N0RHq5w4VOsO/5rDHkS/IaH08fAf0z4fgLg2Ge2RjsqYMgwlWEcIr6wI90x7uxhx/Z9xGRbrIWygoaQ31P8HfZnsbnDBgDw6fBtFuC9fVJkNzPvTaHWdQHfqQ11vCV+CKeUlsNe9bAjg8gf5UT8A0lGQMDj4fMM+C4yY319aQ+rjY50qIy8JfdcRYXPPw2CdRQTjKVNXUkxfsj9G4atRXxhLpap/e+41+w8wPYtRJqgmfbZ0yAMec5wX7cZBh8IiSmudteF0Rl4GcNTGWWbz2/j/9v1tix/PpHL7Otz3S+e8OVjBsS3iWXfcp7EXcE6pzpjjs+cAI+dwVUlzmPZUyAU66HzDOdXnxKf3fb6hFRGfgAu+wgnqy7iNm+tdwZ/yJUvMiBx+6HSXNhzLkw+hxIG9Tt9yksqwIgoCK+SGQFAlC4ISTgP4LK4AKLA8bCpCuDAX8mpGW421aP8mTgh2Me/psP3MSYe47jQa4hg2LO8K1jtn8tl257F9a96Dzp2JNg9LnOF8DwGd2aG1tbp8AXCStrnbNTty+D7e8768tUFDmP9cuCiZdA5mynB99niLtt7SWMl+ePT5061WZnZ3f59et3l3Dx/zRdhMgQYKLJ5d7xe5lctZrEgk8wgVqIT4WsM2H0uWw+ZjoZIyfQL7X9L4DMhYsB+ODOsxneP/JnyolEtbJ9sON9J+C3vwel+c72PsMga7bzbzTzTOg73NVmepkxZrW1dmpLj3myhx8uJw5N56Yzsnjqwx0N2yw+Ntgsrt6YBZxOKhXM9H3B7Nq1zM75lMzNb3A8kBfIYKPNYMKoEZSSSrkvjdr4dMaOHEZynwGQ1BeS+5Jl9lJiU6GuxrXPKdJrVZY6pZnt7zkhv3+jsz2prxPwZ34HRp0N/Ud56gSm3iqqe/j1DhyuYuoDb3fouSNNAbN9a5np+4IMU0w65aSbctIpJ8m0HerlNpESUimxqWyzQ/k4MJ6PAxPYYodi8XHysHSevWk66cnx3f5MIr1SbRXkf9IY8LtXg62DuCQYMRNGnQWj5jhTJH2RmlkX3drq4cdE4IfadfAIs3+xrEuvTaSaPhyhT/ALoP6LoPn9vqacE3w7OM449cYim8aqwARWBb8ANtoRBPDxzvfmkDUgFZ9PPReJYocLIWcx5LwGOz+C2gpndcjjpjQG/LBpEJ/kdkujggK/gw5X1VJSUcPBw1XUBAdhV2w7wMNvbu7C3izDTSEzfBuZZnKY7tvICJ9zwfNSm8KqwLiGL4D1Nov7vjSJ66aPIM7fe07TFmlV8S7Y+BpsfBV2rQCsM9A69gIn4EfOguTILVseyxT4HrBxbylfffRlpvs2Ms23kem+HEb79gJw2CaxOnB8Qwlo6AmzePS6aQ3r9Ij0Cvs3w8ZXnJDf+5mzbfCJzjLAExY4SwDr/+mIU+B7TFVtHeN+8AYZHGK6Lyf4JZDDOJ8zI6HIpvHXujnkZl3Nt6+8gEF99KeueJC1zlo0G191fg5scrYPO80J+PEXw4DR7rYxBinwPay6NsDCv6/lpU93059Spvly+JJ/ORf4sokzAd6vm8Sngy/n2/95O/ijelKV9AaBOshb1RjyJbvA+CFzFkz4EoyfD32Oc7uVMU2B3wtYa/ntsq0N4wWDKeJq/zKuiXuXY80h9tj+PF97Dt+84yck9NM/KOlBlaXOyU+b34QtbzoLkPkTnLPVJyyA4+dB6gC3WylBvS7wQ860vXnLli1uN6fHlVbWMOlHbwLgp47zfGu4zv82s/3rqLF+lgamctF/3INv1GzVRCX8rIUDm2HzUifgd62AQC0kpTtnpk+4GMacH/UrS/ZWvS7w68VSD78l9bX+eplmL9f53+FK//v0NeVsDRxH2hm3UnXCVYwcql6/dENNhbN0QX3IF+c62wedAMdf4MyuGTZNZcVeQIHfyy1Zt5f//NOahvuJVHOxbyXXx73NKb6tHLGJmJMuJ/n0W+C4U1xsqfQqxbuCAf+Ws6RwbQXEp0DWHCfkx5yvJQx6IQV+lCirrOGkYKmn3glmB9f73+YS/3JSTBV2yGTMhAXO4FnGeJV8pNGRIsjPdlaa3PIm7M9xtvfLci7fN/Z8GHmGToDq5RT4UWbLvjLO/9W/mmzrQzmX+j/kMv8HnOzb7mzslwnjLnJ+RszUn+OxJFDnBHreKmcpg7xVcDA4HuaLh5GnB0P+AueyfuoYRA0FfhSy1jLj5++wr7TqqMcGcYjz/Gs4z7easxI24qurchajGnsBjJvnXPlHA27RpeKQ03vPW+Vczm/3GqgqdR5LGeDU34ef5vweOgUSUt1tr0SMAj+KVdbUMf7eN1p9PIVKzvSt5cET80nPexdTUeT08LLODPb+50H6sB5ssXRbIOCc5FQf7nmfNJ70ZHww+IRgwE9zToLSSpMxRYEfI+rX5m+NjwC/mF7BuWY1x+S+if9QsPRz7KRg+M+FY08Gn9bz8ZTqcmdVyV0fQ95KJ+Crgld6Su7vhPrw02D4dGdBshi8Vqs0UuDHkJyCUuY+8kGHnrvp26NJ3PoGbFoCeR8D1in9jDw9+DPL+TJQ7b9nle51gr0+4AvWOfPgwblW64jpTrgPm+YsXaDeu4RQ4MegtfnFfOk3H3XouS/cMoPpgwKw7R1nLnbuR86l5QAS0pxwyZzlfAEcN6Vbl4KUZgJ1UPiF84VbH/DFu5zH4pJh6KnBgJ/h9OKT+7nbXvE8BX4Me+C1L3gy5Ipf7Vl1z7kMOibJ6WXuWu6sX567vPFKRHHJMGyqcx3Rkac75YT45Ai1PkpYC0cOQuluKN0T/NkNez51BlrrB1fTBjtfriNmOAE/ZBL4dbEc6RwFvrDjQDlnP/xeh59/25zRLJw3vnFD+UHnCyB3ufNXQME6wDoDwENPbSwBDT/NOQU/VtTVwuF9jSFetjck2Pc2bqurbvo643POk2gI+OnONFqVZ6SbFPjSYG9JBTN//m6nXjMtqz8/vuQERg1MIyEuOKBbUeyUIXKDfwHs+TRYZzbOGugjZjT+9NZZQNZCZTGU5Ds/xXlQkhe8H/x9eB/YQNPX+ROdFSNDf46pvz0U+gyB1EEaG5GIUODLUd5Yv5fX1jo/nfHyN2YxeXgLVyqqLg/OA//YWWwrbxVUH3YeSx8e8gUw0xl4dHMmUKAOaiud66tWlTm98PoQL85rDPiSvMbPUM+f4HyBpQ9zPld9gPcZ2hjsKf3VUxfXKPClVdW1AVZuP8gNT6/q9Gu/dc4Y9pZU8sClJ5IY1+yC03W1ULgBdq10vgByV8DhAuexpPTGUsaImc5AcP3p/IGAE7LVh50vkaqykNuHobos+Lvc2V5V5iz8VR/grf2uq2q8Xz/jpSUpAxrDPH1403DvOxxSBmraqniaJwLfGJMK/A6oBt6z1v6pvdco8HtOXcCyOvcQVz2+okuvP2f8IA5X1fLirTNbfoK1zgqM9V8Au1Y2ruXiT3Cmg1YfhpojHX/T+FTnjNGEFGcwOS4R4pKCvxOb3U9qdjv4Oz7F6ZnXh3tCSpc+v4hXRCzwjTFPAxcDhdbaE0O2zwUeBfzAk9baRcaYfweKrbWvGmNesNZ+ub39K/Dd095JXK0ZmJbIO9+bw5pdhzh73KC2n3ykqLEEVFniTAFNSHNOHEpIhYRjQm6nQeIxjbcTUsHnb3v/IjEokoE/GzgMPFsf+MYYP7AZOB/IBz4BrgEuAZZYaz8zxvzZWntte/tX4LurpKKGl9bkc/+rX3Tp9WePyyDvUAUPXTGJKSP6UVhWyfb95cwYpasjiURKW4HfrWkC1tp/GWMym22eBmy11m4PvvlfcMI+HxgGfAaoCNoLpCfHc+OsLL4yM5N9ZZXM+cV7VNcG2n9h0LJN+wG47HfLSU+Op6SiBoCdi+ZHpL0i0rZIBO9QIC/kfn5w20vA5caY/wVebe3FxphbjDHZxpjs/fv3R6B50lk+n2FIejKbH5jHZ/edz9C+nT/Rqj7swSkXfeeFz7jl2WwKSirD2VQRaUMkJgK3NB/NWmvLgRvbe7G19gngCXBKOmFum3RT35QEPlp4DgC1dQHG3LOkS/v5x6e7AXjzi30ATBqWziu3nxGeRopIiyLRw88HQq+LNgzY05kdGGMWGGOeKCkpCWvDJLzi/D52LprP/ElDur2vtfklZC5cTObCxYy++3VezM7DWssfV+ZyqLy6/R2ISLu6PS0zWMN/LWTQNg5n0PZcYDfOoO211toNnd23Bm17h7LKGl5ft5erpjrf81l3vU7GMYnsLzv64izd8eebp3P66IFh3adItInkLJ3ngbOAgcA+4IfW2qeMMRcBj+BMy3zaWvvTruxfgd+71dQFGPeDJQTCXJh79qvTmD6qP1W1AfokaXExkVCeOPGqM4wxC4AFY8aMuXnLli1uN0e64eDhKl5fX8CwfsksWbeXF7Pzw7r/Z786jTPHDsQYQ/3/y0bLGkgM63WBX089/OjzWV4xt/1xNT9cMJFFb+SQe7ATZ9Z2wNC+yQ2DyiKxSIEvnvbw0k38ZtnWsO3vxKF92LCnlG+ePYbvXjAubPsV6Yg31hdw23Orefu7cxgzqOcvN9lW4OsEKHHdHReOY+ei+excNJ+Th3V/Lf31u0uxFn797taGmT/v5uwLQ0tF2rdkvbMC7frd3ptl6MkFuUNq+G43RXrYn26eQWFpJf1SErjsf5ez40B5WPb71Wea/qV46sh+/PXWmRRX1NA/NYGcglIeeWsL/3PtKcT71Q+S6OTJwLfWvgq8OnXq1Jvdbov0rLTEONIynD+Dl91xFpU1dQAkxftZvvUA1z75cVjeZ3XuIUbd/ToAiXE+MgeksmlfGZsKyjhxaAxdsUtiiicDX6ReUnzjipgzRw9g4bzxnJbZn+dW5jacrdtdVbUBNu0rAyD34BH2H65ib3El07L6kTUwDb9Ps36k8yzO+OiOA+WM7J+CzwP/HynwpdcwxnDbnNGAU5L50YIT+PmSjcyfNITfLdvGiu0Hu/0e3/jzmha333HB8dx+zthu7196n1U7inhuZS6PXj25Q1N+Q5+xqaCMCx/5F9+/cBzfONv9ErUni5VaWkE6Ij0lnkWXT+LMsRk8f8sMNtx/IV+dlRWR93r4zc1N7ltryT8U3imlEl4/emUDD72R0619VNbUcf2TH/PK53uoqKlj8dq9HH/PkoZSY3t2Fzv/j2TvLOpWO8LFk4FvrX3VWntLerpqqdJxqYlx3LdgYsSWX66f8XP2w+/x63e2csaDyzjn4fcIhJxKbK2lLtynFkuXPLN8J797b1u39jH+3jeormtcEvzBN3Korguwr7R3rvLqycAX6a6HrzwZgL/cMoNh/ZLDssBbvR0HyvnV206Pf/uBcu742+es2lFE/qEjPPL2Fkbf/TrFR6qpqu1YL1B6XmVNHT96ZQOllTXtP7mLvHiKk2r4EpWuOHUYV5w6DIAP/59z5u1vr4XiI9U88a/tnJbVnxv/8ElY3uulNbt5aU3TAeTJP36LgWkJZP/g/LC8h4TXC5/k8czynfh9hnsvnhjWfbdU5w/N/n2llZz54DJe+vrpTWaE5RSUUlhaxezjM8LanlDq4UtM6ZuSwJ1zx3P2uEH88qqT+ey+yAXygcPVXPX4CjIXLuZ7L34esfeRzqsvu3W1/NZS733ZpkLW5ZcQunqBaeHyIO9tKqS6LsCzK3Y22T73kQ+44elVXWpPR3ky8DVoKz3hsinD6JuSQM5P5hIXoSlzq3Y4g3V/X5NP5sLFLN1QwIuf5LXzKvGqtibp3PiHT1jwmw879Fy3eDLwNWgrPSkp3s/mB+Zx3fQRLI/wwmu3/nE1d/59bcMA8E3PhKes5LbdxRVsLTwckX1f9fgKnluZ2+Hn5x86wud5xQ33z3zoXb72f59QUd3xMZWcgtJu/7dRDV/Eo3w+w08vPQmAHT+/CHBqseVVtZzww6URe993cgr58atfUF5VywvZefzb5OP41ZcnUxewxPWiJR5mLXoXiMwF6lftKGLVjiKunzGyQ88/48FlTe7nFVWQV1TBr97ezN0XTWjy2N9W5/Pl04YzYUifhm2vfL6Hbz3/aZfb22IN3yPhr8AXaSb0H2xqYhw7F82nsKySnL1lnD56QJev49uapz/a0XD75c/28PJnjVcEfebG0zhr3KCjXvP4+9v4+ZIcNv54LskJ/qMe97KV2w+y+1AFlwcH1Ttr9N2vc874Qfz+hhYXhGxVWWXtUdsOV9Uy79EPmnxRtRb2ofV4j+R3p/WeLoSIiwYdk8Ts4zOI8/voyTPk/yM4k6i2LsC8Rz/gxj+swlrb8CVRXOGt6/2WHGl/muPVT6zke39tfxD7SHUtb33RuMrpb5c5q5/WBSxvfbGP1bmHyD0YnsX1qmrruOx3H7V5gpTFNkT+NU+s5NSfvNXh/Xulnu/JHr5WyxQv+/S+C6isqWNwnySstWzbf5ii8hquenxFRN4vc+Fi3v3eHDbuLWXjXti0r6yht9mdUsGbGwoYlZEW1jXbT/7xm6y861zyDh1h6sh+R5U32rr+xo9e2UBe0RGe+o/TALj7pXVN/tp5+M1NTZ5/+f8uBzpTRnLeu7yqluKKpl9MP/jHetbsKuaef6zvwB6gQCdehY8GbcXL0pPjGdwnCXDKP2MGHcO0rP7c/6UTIvae5/z3+w23X/wkn7pgcO4prqC6NtDiawrbCaVb/ria8375fpvP6Ypfv7uFKx9bwevrCo56LOuu11t93TPLd/JOTmHD/R3NrobW3Tr44SpnmYQTfriUX7/T9NKpf13d/qU3W5pi2ZbQ9nqlhu/JwBfpjW6YOZIP7jw74u/z9Ec72F9WBcAVj63gzr855ZHKmjp+8PI6io9U89SHO5j2s3e4+x/rOHi4inX5JTz4Rk6bPex6gYDlTx/ntvpFUllT1+b89e37ndk6uUVOuaUuYFm0JIeDh6s6/BmrawPkFYV3raK3vijgw60HurWPjkS+afWOw83w92RJR6Q3MsYwvH9KkxJD5sLFEX/f5gO9K7YdZNt+J2z//PEu/vzxrobHLpl8HBlpiQxIS2x1f39bk889/1jPwcPVfOvco1cIHX/vG/gM3DJ7NAPTEvjamaNa3M9Db2xieL8UUhP9PPb+tg4H+Jpdh3hhVR5F5Z0bn6itC/DA4o2tPt7dJY5O/NHSVr/oessyGurhi0TQ984/noFpifzzG7N67D3rw74lcx/5gFMfeJt7X269Vl0arG8XH6nh4+0HmbXoXcqrms5wCVh47P1tLQbsyu2NA58PLc2hps4JyapW/mJYm1/Md1/4rOH+Zb9bzgvZHT85rf4vindzCnlm+c7Wn9iBwK+/LkJLWgv7FdsOMu4Hb7S/8yA3B3DVwxeJoG+eO5ZvBnvJOxfN75Eef0f8sdmJTMu3HWBMRhpxfh/LNjl19H2llXz5iZUA5BSUkVd0pMUpoBv3lrb6PnlFFa0+dvbD7zHxuD4sXru3Kx+hQf34xiNfntzm80JXvQynFdtaLhPZVm67SYEv0oOmjuxHdu6hhvsThvRpMzB7yrW//5hBxyQyqE8i63c77Vm8rmkQfzukFx5q3qMfdOk9dxwoD9s1iwHKIrjyZVtaC3NDx2r+PUmBL9KDbp0zmuxns/lo4TkM7ZsMwMNLN/GbZVtda1P9Xx2FZVUUlrU8sFo/BbIrnvrAOWfg7Y372nlm79TZQVg3B209WcPX4mkSrc6fOJidi+Y3hD3AHReO4/MfXuBiqyJrVQ9d7enef27okfdpzjbr43uxlFPPk4GvefgSa9KT49m5aD47F83nxVtnApDg9zFjVH+XWybt6WyPXYO2ItJgWlZ/3vrObMYMSqMuYMO+do+EV/O8N63c9gJP9vBFYt3YwcdgjCHO72PnovnMGjMAgB9fErmzeaVrmvfw2+vwq4YvIm36zTVT+NmlJ3HDzEy3myLNNK/h12uvdNPZpRrCQYEv0gv0S03g2ukjAHjl9lmcMWagyy2SBq302JuupXP0k1r7oogkBb5ILzNpWF+e+9r0hvsjB6Qw6JjWl0qQyGqzht9CN1+DtiLSaevvvxCfgZQE55+xV87ijTXNe+9em4oZSj18kV4qLTGuIewBfnvtFBdbE7taG4RtrSevQVsR6baLTjqWr5812u1mxJzW8ru9YHdj0FYlHZEoYYzhzrnjuXPueEoqaliz6xA3Bi+RKJHT1R67Bm2DtLSCSPekJ8czPcs5Szc53s9DV0xyuUXRq7PTMt0ctPVk4GtpBZHui/c7/7xPGpbOVVOHu9ya6OWVyxd2hEo6IlEq3u/jn9+YRVZGqttNkRC6xKGIRMTJw/s23P76WaPJGpjKiUPTu7yGvRytresEt1W90Zm2IhIxd84dz5VThzNhSB/e//5ZzBw1wO0mRYWudtg1aCsiPWLkgFQev+FU7r14YsO2288e42KLeq+jFk9raRmFkE0atBWRHtcnKZ6bzshquH/HheNcbE3v1VZPvaVwVw1fRFzzyu2z6JMU73Yzeq3mAd7S+jktUQ1fRHrcpGF9yRzozORZdsdZPPHvp7rcot6lrQ6716ZsqocvIg2yBqZSWVPndjN6lY7U8Ft8nQuDtgp8EWliwpA+LLrsJM6bOJhNBWVc9+THbjfJ4zpXw9fyyCLiKVdPcy62Yo517j94+UmkJydw23OrXWyVN3W2hq9BWxHxpAFpiexcNN/tZnhaV0s6Wi1TRDztpa+fzsj+KbyzsZA7/77W7eZ4wtb9h1vcHtrRd6Ne35Iem6VjjBlljHnKGPO3nnpPEQmvKSP6MSAtkatOG86ae89nWmZ/5k8a4nazXLU691CL261tuxfv2TNtjTFPG2MKjTHrm22fa4zZZIzZaoxZ2NY+rLXbrbU3daexIuId/VMTePG2mfzP1adw/sTBbjdHOqCjPfxngLmhG4wxfuC3wDxgInCNMWaiMeYkY8xrzX4GhbXVIuIZPp/h9zdMdbsZntPebBzP1vCttf8yxmQ22zwN2Gqt3Q5gjPkLcIm19ufAxeFspIh434KTj+PVz/e43QxPCh3HdbOa351B26FAXsj9fGB6a082xgwAfgqcYoy5K/jF0NLzbgFuARgxYkQ3miciPenRL0/mwctPariw+ud5xVzy249cbpU3eGU+fncGbVtqbqtfXtbag9ba26y1o1sL++DznrDWTrXWTs3IyOhG80SkJ/l8piHswVmLf+ei+QxJT3KxVe7yyuycet0J/Hwg9Lppw4Cw/D2na9qKRI8P7jybx2N8fZ7Qer2bJ151J/A/AcYaY7XFCjUAAAmZSURBVLKMMQnA1cAr4WiUrmkrEj3i/D4uPOHYJlffEg+XdIwxzwMrgHHGmHxjzE3W2lrgdmApsBF40Vq7IXJNFZHe7J/fmMXORfO57JShbjclokI78C315t0s83R0ls41rWx/HXg9rC3CKekAC8aM0RV4RKLNL788mWPTk/jde9vcbkqPcXPBtFCeXA9fJR2R6Hbn3PHcOmeU282IiNBs99aQrUcDX0Si313zJrDpgblcNz26pl+3F/KNZR5d8UpEYkhinJ+fXnoS/3XuWLebEjZL1hUA7Z9J69lB256maZkiseWb54zhT19rPG/z5W/McrE13VNdFwBaH5x1s8zjycBXDV8ktsT5fcwaM5CNP57Lm9+ZzeThfVl197luN6tduQfL23y8o2vj9xRPBr6IxKbkBD/HDz4GgEF9vH+G7qaCslYfc2NxtPYo8EXEsyYO6eN2E9rU0f77B1sOsGpHUfBFzqvc+DrwZOCrhi8iACcPd8q6t8z25hTOtko2zWv4Vz2+osl9DdoGqYYvIgDfv3A8V5w6jO+efzw7F8333PV1dxUdafPxlr4ONGgrItKC/qkJPHzlySTF+xu2bXpgLlNGeGNdnp+9ntPqY6rhi4h0U2Kcnz/cOI3zJvS+yypmLlzMff90b8mx7lwARUTEFenJ8Tz5lalU1wY4Ul3LoSM1nP3we243q4mAtdz+pzWtPt7aXwDWWkyECvye7OFr0FZEOiIhzkfflASyBqYydlCa281poqC0kvLqulYfby3TAxEs8nsy8DVoKyKd5fc5CfqERy628tAbm7r0ukAET9byZOCLiHTWnHHOJVFPHt6XJ2+Y6nJrOid7Z1HD7boIdvEV+CISFe68cDwr7jqHwX2SOG/iYHJ+MpfrZ/SOlTgfWtr418DybQci9j4KfBGJCn6fYUh6csP9pHg/D/zbSS62qG3Prsilqtap8YeW89/6ojBi76nAF5Go9vzNM/j+hePcbkaLfvnm5ha2Rq6kY7y2mhs0ucThzVu2bHG7OSISBeoCltF3h/2KrBHRnTOKjTGrrbUtDmJ4soevWToiEm5+n2k4Q3fGqP4ut6ZtReXVEdmvTrwSkZjx7E3TKSytZFRGGrf+MZulG/YxLbM/q0JmyXhBpGbqeLKHLyISCWmJcYzKcE7Qqj/T9cZZmS62qGX/+dxqAhEIfQW+iMSkQX0SAUhNbFroiPO5v+hZdu6hiOxXJR0RiUl3XzSBScP6cubYgRw/OI3N+w5z6sh+1AYsn+cVu928iKyXr8AXkZiUFO/nilOHAfDEv0/lyQ+3c/+XTqSwrJIXP8mnoqaOx97f5lr7IrGAmqZlioi0InPhYtfeu6tTMzUtU0REvBn4IiISfgp8EZFWnDl2YJP7iy5rXJvn4klDero53aZBWxGRVvzfjdN4MTuPU0b0o29KPIP7JLHwpXUA/ObaKby21r0af1co8EVEWuHzGa6e1nSJ5WV3nEVyyEXVQ8078ViWrC/oiaZ1iUo6IiKdkDUwlWPTkwC4brrzZXD5FGd65znjB7nWro5QD19EpIt+eulJ/PRSp67/31edHLFFz8JFgS8iEib9UxN47PopDElPpqi8mikj+3Hy/W+63awGCnwRkTCae6J3Z++ohi8iEkFfP2u0201o4MnAN8YsMMY8UVJS4nZTRES65bLggG69Ef1TeObG0/j8vgt6vC2eXEun3tSpU212drbbzRAR6ZYj1bXkFJSRkZbI8P4pTbZPvG/pUc8/tk8SK+8+t0vv1dZaOqrhi4hEWEpCHFNG9Dtqe2vz+c+fODgi7fBkSUdEJBZEYgnktijwRURc9M735vD+98/qkfdS4IuIuGh0RhojB6SS85O5Ddvi/ZGJZgW+iIgHJMX72fzAPG6dPYrvnD82Iu+hQVsREY9IiPNx10UTIrZ/9fBFRGKEAl9EJEYo8EVEYoQCX0QkRijwRURihAJfRCRGKPBFRGKEAl9EJEZ4enlkY8x+ILeLLx8IHAhjc3o7HY+mdDyOpmPSVG89HiOttRktPeDpwO8OY0x2a2tCxyIdj6Z0PI6mY9JUNB4PlXRERGKEAl9EJEZEc+A/4XYDPEbHoykdj6PpmDQVdccjamv4IiLSVDT38EVEJIQCX0QkRkRd4Btj5hpjNhljthpjFrrdnnAyxjxtjCk0xqwP2dbfGPOWMWZL8He/kMfuCh6HTcaYC0O2n2qMWRd87NcmeCVlY0yiMeaF4PaPjTGZPfn5OssYM9wYs8wYs9EYs8EY81/B7TF5TIwxScaYVcaYz4PH4/7g9pg8HvWMMX5jzKfGmNeC92P3eFhro+YH8APbgFFAAvA5MNHtdoXx880GpgDrQ7Y9BCwM3l4IPBi8PTH4+ROBrOBx8QcfWwXMBAywBJgX3P514LHg7auBF9z+zO0cjyHAlODtY4DNwc8dk8ck2Pa04O144GNgRqwej5Dj8l3gz8Brwfsxezxcb0CY/8POBJaG3L8LuMvtdoX5M2Y2C/xNwJDg7SHAppY+O7A0eHyGADkh268BHg99TvB2HM5Zhsbtz9yJY/NP4HwdEwuQAqwBpsfy8QCGAe8A54QEfswej2gr6QwF8kLu5we3RbPB1tq9AMHfg4LbWzsWQ4O3m29v8hprbS1QAgyIWMvDKPin9Ck4vdqYPSbB8sVnQCHwlrU2po8H8AhwJxAI2RazxyPaAt+0sC1W5522dizaOka98vgZY9KAvwPfttaWtvXUFrZF1TGx1tZZayfj9GynGWNObOPpUX08jDEXA4XW2tUdfUkL26LmeED0BX4+MDzk/jBgj0tt6Sn7jDFDAIK/C4PbWzsW+cHbzbc3eY0xJg5IB4oi1vIwMMbE44T9n6y1LwU3x/QxAbDWFgPvAXOJ3eMxC/iSMWYn8BfgHGPMc8Tu8Yi6wP8EGGuMyTLGJOAMorzicpsi7RXgK8HbX8GpY9dvvzo4iyALGAusCv4JW2aMmRGcaXBDs9fU7+sK4F0bLE56UbD9TwEbrbW/DHkoJo+JMSbDGNM3eDsZOA/IIUaPh7X2LmvtMGttJk4WvGutvZ4YPR5AdA3aBo/zRTizNbYB97jdnjB/tueBvUANTs/iJpx64TvAluDv/iHPvyd4HDYRnFUQ3D4VWB987Dc0nnGdBPwV2IozK2GU25+5neNxBs6fz2uBz4I/F8XqMQEmAZ8Gj8d64L7g9pg8Hs2OzVk0DtrG7PHQ0goiIjEi2ko6IiLSCgW+iEiMUOCLiMQIBb6ISIxQ4IuIxAgFvohIjFDgi4jEiP8PDA/NxRltaGcAAAAASUVORK5CYII=\n", 352 | "text/plain": [ 353 | "
" 354 | ] 355 | }, 356 | "metadata": { 357 | "needs_background": "light" 358 | }, 359 | "output_type": "display_data" 360 | } 361 | ], 362 | "source": [ 363 | "data.loc[data[\"group\"] == \"train_iteration\", \"loss\"].plot(logy=True)\n", 364 | "data.loc[data[\"group\"] == \"validation\", \"loss\"].plot(logy=True)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 132, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "data": { 374 | "text/plain": [ 375 | "" 376 | ] 377 | }, 378 | "execution_count": 132, 379 | "metadata": {}, 380 | "output_type": "execute_result" 381 | }, 382 | { 383 | "data": { 384 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deXgUVb7/8fdJZ+nsIQshkECCInsSIOyLbIIgoqMo6uAyLrhcR+8wo+LM/TmOM95xHEa96Lgw7jsq7qMjiiKiCCSaIJsgBEgkQBay753z+6MqIQlZOtCdTnd/X89TT1VXV3d/U5BPqk+dOqW01gghhHA/Pq4uQAghxKmRABdCCDclAS6EEG5KAlwIIdyUBLgQQrgp3+78sOjoaJ2YmNidHymEEG4vIyOjQGsd03p9twZ4YmIi6enp3fmRQgjh9pRSB9taL00oQgjhpiTAhRDCTUmACyGEm+rWNnAh3FFdXR25ublUV1e7uhTh4axWK/Hx8fj5+dm1vQS4EJ3Izc0lNDSUxMRElFKuLkd4KK01hYWF5ObmkpSUZNdrpAlFiE5UV1cTFRUl4S2cSilFVFRUl77pSYALYQcJb9Eduvr/TAK8hzpYWMGajFxkuF8hRHskwHsYrTUvfXuQcx/5it++mcX6PfmuLkmI0/L8889z6623drjN+vXr+eabbxz6ucXFxTz++OPtPh8SEuLQzwPIzMzko48+anp87733smLFCod/TiMJ8B4kr6SKq57dwv97dztpib2IDQvg6a/2u7os4cbq6+ud/hk2m+2038MVAe4MrQPc2ewKcKXUAaXUD0qpTKVUurkuUin1qVJqrznv5dxSPZfWmne+z2XOwxtIP3CcP184ghevHcevJifx9U+F7Dhc4uoShYu9+OKLJCcnk5KSwpVXXglAfn4+F198MWPHjmXs2LF8/fXXgHHUt3TpUubMmcNVV13V4n201txxxx2MGDGCkSNHsnr1agAWL17cIniuueYa1qxZg81m44477mDs2LEkJyfz1FNPAUbgzpgxgyuuuIKRI0eeVO9zzz3HWWedxdlnn91UF8AHH3zA+PHjGTVqFLNnz+bo0aMcOHCAJ598kocffpjU1FS++uqrNrcD+PLLL0lNTSU1NZVRo0ZRVlYGwN///vemGv/4xz8CsHz5cvbt20dqaip33HFHh/u3rdcfOHCAoUOHcsMNNzB8+HDmzJlDVVUVAFu3biU5OZmJEyc27c/a2lruueceVq9eTWpqatO+3blzJ9OnT2fgwIGsXLmy03/rruhKN8IZWuuCZo+XA+u01g8opZabj+9yaHVeoLC8hj+8s53/7DhC2oBerLgkhcToYAAuH9efR9ft5ZmvsnlocaqLKxUAf/pgBzsPlzr0PYf1DeOP5w9v9/kdO3Zw//338/XXXxMdHU1RUREAt99+O7/5zW+YMmUKhw4dYu7cuezatQuAjIwMNm7cSGBgYIv3evvtt8nMzCQrK4uCggLGjh3LtGnTuOyyy1i9ejXz58+ntraWdevW8cQTT/DMM88QHh7O1q1bqampYfLkycyZMweALVu2sH379pO6vOXl5fHHP/6RjIwMwsPDmTFjBqNGjQJgypQpfPvttyilePrpp3nwwQf5xz/+wU033URISAi/+93vADh+/Hib261YsYJ//vOfTJ48mfLycqxWK2vXrmXv3r1s2bIFrTULFy5kw4YNPPDAA2zfvp3MzMwO9397r+/fvz979+7ltdde41//+heXXnopa9asYcmSJfzqV79i1apVTJo0ieXLlwPg7+/PfffdR3p6Oo899hhg/DHdvXs3X3zxBWVlZQwePJibb77Z7n7enTmdfuAXANPN5ReA9UiAd8naHUf4/Ts/UFpVz93zhnD91IFYfE6chQ4P9OPSsQm8tOkgd547hD7hVhdWK1zl888/Z9GiRURHRwMQGRkJwGeffcbOnTubtistLW06Il24cOFJ4Q2wceNGLr/8ciwWC7GxsZx99tls3bqVefPmcdttt1FTU8N//vMfpk2bRmBgIGvXrmXbtm289dZbAJSUlLB37178/f0ZN25cm/2VN2/ezPTp04mJMQbPW7x4MXv27AGMPvWLFy8mLy+P2tradvs7t7fd5MmTWbZsGb/85S+56KKLiI+PZ+3ataxdu7bpj0R5eTl79+6lf//+du3fjl6flJREaqpx8DRmzBgOHDhAcXExZWVlTJo0CYArrriCDz/8sN33P++88wgICCAgIIDevXtz9OhR4uPj7aqtM/YGuAbWKqU08JTWehUQq7XOA9Ba5ymlejukIi9QUlXHnz7Ywdvf/czwvmG8cn0qg/uEtrnttZOTeOGbAzz/zQGWzxvSzZWK1jo6UnYWrXWb3csaGhrYtGlTm0EdHBzc7nu1xWq1Mn36dD755BNWr17N5Zdf3rT9o48+yty5c1tsv379+nY/A9rvDvfrX/+aZcuWsXDhQtavX8+9997bpe2WL1/Oeeedx0cffcSECRP47LPP0Fpz9913c+ONN7Z4jwMHDrRbX3MdvT4gIKDpscVioaqqqss9w1q/hyPPS9h7EnOy1no0MA/4L6XUNHs/QCm1VCmVrpRKz8+XHhUb9xZw7iMbeC/zMLfNPJN3bpncbngDJEQGMW9EHK9sPkh5jfNPSImeZ9asWbzxxhsUFhYCNDWhzJkzp+mrOtBpUwHAtGnTWL16NTabjfz8fDZs2MC4ceMAuOyyy3juuef46quvmgJ77ty5PPHEE9TV1QGwZ88eKioqOvyM8ePHs379egoLC6mrq+PNN99seq6kpIR+/foB8MILLzStDw0Nbfr20NF2+/btY+TIkdx1112kpaWxe/du5s6dy7PPPkt5eTkAP//8M8eOHTvpPdvT3uvb06tXL0JDQ/n2228BeP3119v9OZzNrgDXWh8258eAd4BxwFGlVByAOW/zJ9Zar9Jap2mt0xq/Unmjytp67nlvO0ue2UyQv4U1N09i2ZzB+Pt2/k9w/dQkyqrreWNrTjdUKnqa4cOH84c//IGzzz6blJQUli1bBsDKlStJT08nOTmZYcOG8eSTT3b6Xr/4xS+aTobOnDmTBx98kD59+gDGH4QNGzYwe/Zs/P39Abj++usZNmwYo0ePZsSIEdx4442dHkHGxcVx7733MnHiRGbPns3o0aObnrv33nu55JJLmDp1alOTEMD555/PO++803QSs73tHnnkEUaMGEFKSgqBgYHMmzePOXPmcMUVVzBx4kRGjhzJokWLKCsrIyoqismTJzNixIgOT2K29/qOPPPMMyxdupSJEyeitSY8PByAGTNmsHPnzhYnMZ1JdfZ1QCkVDPhorcvM5U+B+4BZQGGzk5iRWus7O3qvtLQ07Y03dMg4WMRv38jiQGEl105O4s5zB2P1s3TpPRY98Q1HSqtZ/7vp+Fqk92d32rVrF0OHDnV1GaIHKS8vb+pH/sADD5CXl8f//d//OeS92/r/ppTK0Fqntd7WnjbwWOAds03LF3hVa/0fpdRW4A2l1HXAIeCS067cw9TU23jks7089eU++kYE8toNE5h4RtQpvdcN0wZy40sZfLLjKOclxzm4UiFEV/z73//mr3/9K/X19QwYMIDnn3/eJXV0GuBa6/1AShvrCzGOwkU77nl3B6vTc7hsbAL/s2AYIQGn3uln9tBYEqOC+NdX+5k/so+MzSGECy1evJjFixe7ugy5EtOZvtyTz4LkOB64OPm0whvA4qO4bkoSmTnFZBw87qAKhRDuTALcSY6UVHOktJoxAxx3geqiMQlEBPmxaoNcXi+EkAB3msycYgBSEiIc9p6B/haWjB/Ap7uOkl3QcVcuIYTnkwB3ksycYvwsimFxYQ5936smDcDPx4dnN2Y79H2FEO5HAtxJMnOOMywurMvdBTvTO9TKBal9eTMjh+MVtQ59byGcbfr06XTWlfiRRx6hsrLSoZ/b0WiH9gx3eyqef/55Dh8+3PQ4MTGRgoKCDl7RdRLgTmBr0PyQW0KqA5tPmrth2kCq6xp4ZfNBp7y/8EyOGPa1M464TLy7A9xZWge4M0iAO8FPx8qpqLU5tP27ubNiQzn7rBhe2HSQmnrn/1IK13rwwQebhiH9zW9+w8yZMwFYt24dS5YsAYwBmSZOnMjo0aO55JJLmi4LT0xM5L777mPKlCktLmkHOHjwILNmzSI5OZlZs2Zx6NAhSkpKSExMpKGhAYDKykoSEhKoq6tj3759nHvuuYwZM4apU6eye/duwBh6dtmyZcyYMYO77mo5nl1VVRWXXXYZycnJLF68uGk4VoCbb76ZtLQ0hg8f3jSE68qVKzl8+DAzZsxgxowZ7W4Hxrgow4YNIzk5uWkUw7aG2G1ruNr2dDRE77XXXtvmsLB//vOfGTJkCOeccw6XX345K1as4K233iI9PZ1f/vKXpKamNv3cjz76KKNHj2bkyJFN++90yF3pnSAzx+jm56wjcIAbpg5kyTObeS/zMJemJTjtc0QrHy+HIz849j37jIR5D7T79LRp0/jHP/7BbbfdRnp6OjU1NdTV1bFx40amTp1KQUEBf/nLX/jss88IDg7mb3/7Gw899BD33HMPYAxUtXHjxpPe99Zbb+Wqq67i6quv5tlnn+W2227j3XffJSUlhS+//JIZM2bwwQcfMHfuXPz8/Fi6dClPPvkkgwYNYvPmzdxyyy18/vnngDFGymeffYbF0rLJ8IknniAoKIht27axbdu2FpfV33///URGRmKz2Zg1axbbtm3jtttu46GHHuKLL75ouoS+re3i4+N555132L17N0opiouNTgPtDbHberja9nQ0RG9bw8JmZWWxZs0avv/+e+rr6xk9ejRjxoxh0aJFPPbYY6xYsYK0tBMXUEZHR/Pdd9/x+OOPs2LFCp5++ukO6+mMBLgTZOYUE2b1JSm6/dHaTtfkM6MY0ieUp7/azyVj4uXCHg82ZswYMjIyKCsrIyAggNGjR5Oens5XX33FypUr+fbbb9m5cyeTJ08GoLa2lokTJza9vr0LTjZt2sTbb78NwJVXXsmdd97ZtP3q1auZMWMGr7/+Orfccgvl5eV88803XHLJiQuua2pqmpYvueSSk8IbYMOGDdx2220AJCcnk5yc3PTcG2+8wapVq6ivrycvL4+dO3e2eL6j7YYNG4bVauX666/nvPPOY8GCBUDHQ+zao6PXtzUs7MaNG7nggguaRoQ8//zzO3z/iy66CDD+TRv3/emQAHeCzJwSUhIinBqqSilumDqQ376ZxZd78pk+WEbz7RYdHCk7i5+fH4mJiTz33HNMmjSJ5ORkvvjiC/bt28fQoUPZt28f55xzDq+99lqbr+9o2NfmGv+/Lly4kLvvvpuioiIyMjKYOXMmFRUVREREtDviYVeHls3OzmbFihVs3bqVXr16cc0111BdXW33dr6+vmzZsoV169bx+uuv89hjj/H55593OMSuPTp6fVvDwp7q0LKOGlZW2sAdrLK2nh+PlDLKic0njc5P6WveN1O6FHq6adOmsWLFCqZNm8bUqVN58sknSU1NRSnFhAkT+Prrr/npp58Ao9268QYKHZk0aVLTUKivvPIKU6ZMAYyb/Y4bN47bb7+dBQsWYLFYCAsLIykpqakdXWtNVlaWXXW/8sorAGzfvp1t27YBxpFtcHAw4eHhHD16lI8//rjpNc2HZG1vu/LyckpKSpg/fz6PPPJI0x+W9obYtXeY164O0TtlyhQ++OADqqurKS8v59///nebP4ezSIA72A+5JTRoSO3v/AD39/Xh6kmJbPypwOG3+RI9y9SpU8nLy2PixInExsZitVqZOnUqADExMTz//PNcfvnlJCcnM2HCBLtOkK1cuZLnnnuO5ORkXnrppRaj6S1evJiXX365RfPLK6+8wjPPPENKSgrDhw/nvffe6/Qzbr75ZsrLy0lOTubBBx9sGns8JSWFUaNGMXz4cK699tqm5h+ApUuXMm/ePGbMmNHudmVlZSxYsIDk5GTOPvtsHn744aafqa0hdlsPV9vRPunKEL1jx45l4cKFpKSkcNFFF5GWltY0tOw111zDTTfd1OIkpqN1OpysI3nDcLKrNuzjfz/aTcb/zCYqJKDzF5ymkso6Jj6wjnNH9OGhS+W+mc4gw8mKjjQOLVtZWcm0adNYtWpVi5O1XdWV4WTlCNzBMnOKSYgM7JbwBggP8uPStAQ+yDrMkZKT2xCFEM61dOlSUlNTGT16NBdffPFphXdXyUlMB8s8VMyYxMhu/czrpiTx4ia5b6YQrvDqq6+67LPlCNyBjpVWc7ik2qn9v9uSEBnEuSP68Ormg1TIfTOdojubGoX36ur/MwlwB2ocgTA1IbzbP/v6qQMpra7njXS5b6ajWa1WCgsLJcSFU2mtKSwsxGq12v0aaUJxoMycYnx9FMP7dn+Aj+7fizEDevHs19lcNTERi49c2OMo8fHx5Obmkp+f7+pShIezWq3Ex8fbvb0EuANl5hQz1AkjENrrhqlJ3PTyd3yy4wjzR8p9Mx3Fz8+PpKQkV5chxEmkCcVBGho023JLSHFB80mjc4b1YYB530whhOeTAHeQffnllNfUk5rguFuodVXjfTO/P1RMxsEil9UhhOgeEuAO8n3TCczu7YHS2qIx8YQHyn0zhfAGEuAOkpVTTKjVl4FOHIHQHkH+viyZ0J+1O49yQO6bKYRHkwB3kMycYlLiI/DpAb0/rp6YiNXXwj3v75Cub0J4MAlwB6iqtbH7SJnLm08a9Q6zcvf8IWzYk8+rWw65uhwhhJNIgDvA9sMl2Bp0jwlwgCXjBzDlzGju//cuDhZKU4oQnkgC3AGyzBOYzroH5qnw8VE8uCgZi1Lc8eY2bA3SlCKEp5EAd4Dvc4rpFxFITGj3jEBor74Rgfxx4XC2HCjiua/lpg9CeBoJcAfIPFTcLTdwOBUXj+7H7KGxPPjJj+w96ty7gwghupcE+GnKL6vh5+IqUuN7ZoArpfjrRSMJ9rfw2zezqLM1uLokIYSDSICfpsb27556BA4QExrA/b8YybbcEh7/Yp+ryxFCOIgE+GnKzCnG4qMY4YIRCLti/sg4Lkjty6Of72X7zyWuLkcI4QAS4KcpM6eYwbGhBPq7ZgTCrvjTwuFEBvuz7I1Mqutsri5HCHGaJMBPQ0ODJiu3557AbC0iyJ+/LUpmz9FyHv5sj6vLEUKcJgnw07C/oIKy6voedQFPZ2YM7s3l4xJYtWG/jFgohJuTAD8NjbdQG+VGAQ7wh/OG0S8ikGVvZFFZK/fQFMJd2R3gSimLUup7pdSH5uNIpdSnSqm95tx1A2G7SFZOMSEBvgyMCXF1KV0SEuDLiktSOFRUyQMf73Z1OUKIU9SVI/DbgV3NHi8H1mmtBwHrzMdeJTOnmOT4cLe8/+SEgVFcOzmJFzcdZOPeAleXI4Q4BXYFuFIqHjgPeLrZ6guAF8zlF4ALHVtaz1ZdZ2NXXqlbtX+3dsfcwQyMCeaOt7Iora5zdTlCiC6y9wj8EeBOoPllfLFa6zwAc967rRcqpZYqpdKVUumedFfvHYdLqG/QPWoAq66y+ll46NJUjpXV8Kf3d7q6HCFEF3Ua4EqpBcAxrXXGqXyA1nqV1jpNa50WExNzKm/RI2XmGBfDuNsJzNZSEyK4ZfoZrPkul7U7jri6HCFEF9hzBD4ZWKiUOgC8DsxUSr0MHFVKxQGY82NOq7IHyswppm+4ld5hVleXctp+PXMQw+LC+P07P1BUUevqcoQQduo0wLXWd2ut47XWicBlwOda6yXA+8DV5mZXA+85rcoeKDPnuFs3nzTn7+vDQ4tTKKmq43/e/UFuwyaEmzidfuAPAOcopfYC55iPvUJheQ05RVVufQKztSF9wvjNOWfx0Q9HeD/rsKvLEULYwbcrG2ut1wPrzeVCYJbjS+r5snLNEQg9KMABbpx2Bp/uPMo97+1gwsAoYj2geUgITyZXYp6CzEPF+CgYGd+zRyDsKouP4qFLU6mpt/HrV7+XAa+E6OEkwE/B9znFnBUbSpB/l77AuIWk6GAeXJTC1oNF3PLKd9TWyw0ghOipJMC7SGtNVk4xo9xkBMJTsTClL/dfOJLPdx9j2RuZckNkIXoozzuEdLLsggpK3WwEwlNxxfj+lFXX8dePdxNq9eV/fzESpdxvyAAhPJkEeBc1jkDoKV0IO3Lj2WdQWl3HP7/YR6jVj7vnDZEQF6IHkQDvoqycYoL9LQzqHerqUrrF7+YMpqy6nlUb9hNm9eXWmYNcXZIQwiQB3kWZOcWMdNMRCE+FUop7zx9OeXU9K9buISTAl2smJ7m6LCEEchKzS6rrbOzMKyU1wbuGPvfxUTy4KJk5w2K594OdrMnIdXVJQggkwLtkZ14pdTZNaoJn9f+2h6/Fh5WXj2LymVHc8VYW/9kuA18J4WoS4F2QldN4BaZ3HYE3svpZWHVlGikJEdz22vd8tddzhgcWwh1JgHdBZk4xfcKs9An33kvMgwN8ef6acQyMCWbpixlyY2QhXEgCvAsyc4pJ8cLmk9bCg/x46brxxIYFcM1zW9l5uNTVJQnhlSTA7XS8opaDhZVe23zSWkxoAC9fP57QAF+uenYz+/PLXV2SEF5HAtxOmR46AuHpiO8VxEvXj0drWPL0Zn4urnJ1SUJ4FQlwO2UeKkZ54AiEp+uMmBBevG4cZTX1LHl6M/llNa4uSQivIQFup8ycYs7qHUpIgFz71NrwvuE8d81YjpRUc+UzmymplDvcC9EdJI3soLUmK7eYucP6uLqUHistMZKnrhzD9S+ks3jVJq6dnMTcEX0ID/RzdWlCdEprTU19A1W1NirrbFTV1lNZa6Oy1masq7VRWVtPVZ2N2voGam0N1NVr6mwN1NnMx7YGausbqLNp8/kG83nj8e/nD3V4E6wEuB0OFlZSXFlHqgcPIesI086K4Yklo7nvw53cuWYb//Puds4eHMPClL7MHhpLoL/F1SUKD6W1pqrOxvHKOooraymurON4ZS3HK+soMefHK2spqayjuKqOihojjE8EdD2nMmqyr4/Cz+KDn0Xh7+tjLhuP/Sw+zdYpp9xrVgLcDk0jEMZLgHdm1tBYZg7pzbbcEt7POsyH2w7z6c6jBPlbOGdYLAtT+jJ1UAz+vtJ658601hwtrSEowEJogK9TR6ksq64jp6iKnOOV5BRVknu8iiMl1RxvFtTFVXUd3nwk2N9CRJA/EUF+RAT5ERkcRJC/hSB/C4F+vsbcfGws+xLctM583s94LsDPYgS0jw8+Lh4TSQLcDpk5xQT6WTgrNsTVpbgFpRQpCRGkJETw+/lD2ZJdxPtZh/l4ex7vZR4mIsiPeSP6cH5KX8YnRXnNwGDuTGvNT8fK+Ta7iM37C9mSXcQx84S11c+HmNAAeodaiQkJoHdYQNO8d6jVfC6AyGB/fC0n/+Guqbfx8/Eqco5XkVNUSc7xSnKLqjhkLhe3OqcSEuBLXLiVXkH+DIgKIjUhgohgPyIC/ekV5EdEUMt5eJAfAb6e+e1POeOwvj1paWk6PT292z7PUS7859f4+/rwxo0TXV2KW6utb2DjT/m8l2kclVfW2ogNC+C8kX25ILUvyfHhMt54D9HQoPnxaBmb9xeyObuILdlFFFbUAhAbFsD4pChG94+gzqbJL6/hWGk1x8pqyC+r4VhZDSVVJ5/I9lEQGWyEeUxoAJW19eQUVXG0rJrmMeRv8aFfr0DiewXSPzKIhMggEnoFkRAZSEKvICKC/Lzu/4lSKkNrndZ6vRyBd6Km3sbOw6VcMznR1aW4PX9fH2YOiWXmkFiqam18tuso72cd5uVvD/Ls19kMiApi/sg4zogJafol7x0aQK8gf5d/VfV0tgbNrrxSNjceYR8oajry7RcRyNlnxTB+YCTjk6IYEBXUaYDW1Nuawrz5PL+smmOlNeSX12D1tTD5zOimYE6INEI6NtQq/952kgDvxO68MmptDXIBj4MF+ls4P6Uv56f0paSqjk+2H+H9rMM89eW+k04m+fooolt9NY9p9tW8+dxTvyo7UmOTxaGiSvYcLWPz/iK2HCiirLoegP6RQZwzNJbxA6MYnxRJQmRQlz8jwNdCfK8g4nt1/bXCfhLgncjMkSswnS080I9LxyZw6dgEqmptHC2tNr+Wm0dszY7gDpdUk5VbQmFFDW21/vUK8iMuPJC+EVbiwgOJi7DSNzyQPuHGPDbcMSFfXWejuLKOkipjqqitx+prITig8USYL8H+vgT6W7r9hG1Dg9GskVNUySFzyik60b58pLRlk8XA6GAWJMcxPimK8QMjiQsP7NZ6xamTAO9EZk4xMaEBxHnxCITdKdDfQmJ0MInRwR1uV29roLCi1vx6Xm3MS2vIK60mr7iK3ONVbD1wvM222OiQAPpGWOkTZqVvRCBx4VbiIgIJ9rdQUlXXIphLzXlx1Yl1JZ30eGjNz6II9LMQHGAEemOwB5tB3xjyFqWw+Ch8fRQWizn38THnquXccmJ9aVWdGdJGWOcer6KmWX1KQWyolf6RQUw6w2iyaGxbTooOJjokwP5/INGjSIB3oKFB8/2h46QmRHjdSZOeztfiQ2yYldgwK9D+8AYVNfXklVRzpKSawyVV5BVXk1dSxeGSag4UVrBpXyFlNfVtvjbU6kt4oF/TNKh3iLEc5NdifXigH0H+vtTU26isMS4Eqaypb7r4o7LZhSAVtSeeKyivpbK2kspaG3U2ja2hgfoGja3ZVG9n5+TQAF8SIoMY1DuUmUN6nzj5FxlEv4hArH7StOSJJMDbUV1n4463tnGgsJKl085wdTniFAUH+HJm7xDO7N1+F9Cy6jrySqqpqrU1BXJYoF+P6N6otaZBQ31DQ1Og22y6KejrbA2EBPh6Zc8MIQHepsLyGpa+lEHGweMsnzeEy8cluLok4UShVj9CrT3zkn+lFBYFFh85ghYnkwBvZV9+Ob96bitHS6t5/JejmT8yztUlCSFEmyTAm9m0r5CbXs7Az6J4bekERveXmzcIIXouCXDTmoxclr+9jQFRwTx3zdhT6vsqhBDdyesDXGvNw5/uYeXnPzHpjCieWDJGhkAVQrgFrw7w6jobd63ZxnuZh7k0LZ6/XPHb1mYAABQJSURBVDhSRskTQrgNrw3woopalr6YTvrB49wxdzC3TD9DumEJIdyKVwb4/vxyfvX8VvJKqnnsilEsSO7r6pKEEKLLvC7AN+8vZOlLGVh8FK/dMIExA6SniRDCPXXa4KuUsiqltiilspRSO5RSfzLXRyqlPlVK7TXnPT4J3/4ulyXPbCY6xJ93b5ks4S2EcGv2nLGrAWZqrVOAVOBcpdQEYDmwTms9CFhnPu6RtNY89Okelr2RRdqASN6+eTL9o6SboBDCvXXahKKNW/aUmw/9zEkDFwDTzfUvAOuBuxxe4WmqtzXwuzezeDfzMIvGxPO/v5CeJkIIz2BXkimlLEqpTOAY8KnWejMQq7XOAzDnvdt57VKlVLpSKj0/P99Rddtt/Y/5vJt5mNtmDeLvi5IlvIUQHsOuNNNa27TWqUA8ME4pNcLeD9Bar9Jap2mt02JiYk61zlO295jx5eH6qUnSTVAI4VG6dDiqtS7GaCo5FziqlIoDMOfHHF6dA2QXlBMdEkBYDx1tTgghTpU9vVBilFIR5nIgMBvYDbwPXG1udjXwnrOKPB3ZBRUMjOn47i5CCOGO7OkHHge8oJSyYAT+G1rrD5VSm4A3lFLXAYeAS5xY5ynLLqhg9tBYV5chhBAOZ08vlG3AqDbWFwKznFGUo5RU1VFQXktSJ/dXFEIId+TRXTKyCyoAJMCFEB7JwwPc6IEibeBCCE/k2QGeX4GPgv6REuBCCM/j0QG+v6CChMgguXhHCOGRPDrZsgsqpP1bCOGxPDbAtdYS4EIIj+axAX60tIbKWhsDJcCFEB7KYwN8f1MPlBAXVyKEEM7hsQEufcCFEJ7OcwM8vwKrnw99wqyuLkUIIZzCcwO8oILEqGB8fGQIWSGEZ/LYAN8voxAKITycRwZ4na2BQ0WV0v4thPBoHhngOUWV2Bo0A6OlB4oQwnN5ZIA39UCRJhQhhAfz6ACXi3iEEJ7MIwN8X34FvYL8iAjyd3UpQgjhNB4Z4NkF5XICUwjh8Tw0wCvkEnohhMfzuACvqKnnaGmNHIELITyexwW4nMAUQngLjw1w6UIohPB0Hhfg+/ONAE+MkgAXQng2jwvw7IJy+kUEYvWzuLoUIYRwKg8McBnESgjhHTwqwLXW7Jf7YAohvIRHBXhhRS1l1fUS4EIIr+BRAd54AlMCXAjhDTwqwLMbb2Qsw8gKIbyARwX4/oIK/C0+9OsV6OpShBDC6TwqwLPzKxgQFYRF7oMphPACnhXg0gNFCOFFPCbAbQ2ag4WVcgm9EMJreEyA/3y8ilpbgwxiJYTwGh4T4PvNHihJ0gNFCOElPCbAm4aRlSYUIYSX6DTAlVIJSqkvlFK7lFI7lFK3m+sjlVKfKqX2mvNezi+3fdkFFYRafYkKlvtgCiG8gz1H4PXAb7XWQ4EJwH8ppYYBy4F1WutBwDrzsctkF1QwMDoYpaQLoRDCO3Qa4FrrPK31d+ZyGbAL6AdcALxgbvYCcKGzirTH/nzpQiiE8C5dagNXSiUCo4DNQKzWOg+MkAd6t/OapUqpdKVUen5+/ulV247qOhs/F1fJCUwhhFexO8CVUiHAGuC/tdal9r5Oa71Ka52mtU6LiYk5lRo7daBQTmAKIbyPXQGulPLDCO9XtNZvm6uPKqXizOfjgGPOKbFz2TIKoRDCC9nTC0UBzwC7tNYPNXvqfeBqc/lq4D3Hl2ef/QUS4EII7+NrxzaTgSuBH5RSmea63wMPAG8opa4DDgGXOKfEzu3PryA2LIDgAHt+HCGE8AydJp7WeiPQXt+8WY4t59RkF5TL0bcQwut4xCFrdkEF80bGOfZNbfWw813IeB7qqyEoypwimy1HQWCzx4ER4GNxbB1CCNEOtw/w4xW1HK+sc9wgVtWl8P1L8O0TUJIDkWdARH8oPQxHtkNVEdRVtvNiBYG9WgZ8WBxEn2VMMYMhNA7kYiMhhAO4fYBnFzroBGZJLmx+EjJegJpSGDAZ5j0IZ50LPq3O9dZWGkFeWWhOzZebrSs+CAc2Qk3Jidf6h0L0ICPMowdB9GBjuVciWPxO72cQQngVtw/w076R8eFM2PQY7HgHtIbhF8LEW6Hf6PZf4x9kTOHxnb+/1lB+DAp+hPwfoWCPMe3/ErJeO7Gdjx9EDoQY82g9ejDEDjcmOWIXQrTB7QM8u6Aci48iITLI/hc1NMDetUZwH/gK/ENg3I0w4SajucSRlILQWGNKmtbyuepSKNwL+XuMgC/YC8d2w+6PQNuMbcITYMh5MGQB9J8IFrf/JxNCOIjbp0F2QQX9I4Pws9hxTVJdNWx7HTb90zgKDusH5/wZxlwN1nDnF9uaNQz6jTGm5uproWg//JwOu/9tnEjd/KRxwnTwfBh6PgycDn7W7q9ZCNFjuH2A78+v6PwEZkUBbH0atvwLKgugTzJc9C8Y/oue2e7s6w+9hxjTqCVQUw771sGuD2DX+5D5svGt4czZRpgPmmP8MRBCeBW3DvCGBs2BwgqmnBnd/kaHv4fn5hs9RwbNhUm3QuJU92pXDgiBYRcYU30tHNgAuz40js53vgsWf0g6G4YugMHnQYhzxpwRQvQsbh3geaXVVNc1dHwj46/+YQTcDV8YR7TuztffOPI+czac9w/I3Wocme/+ED64HT74b6OtfOgCGLoQIhJcXbEQwkncOsA7HcSqaL9xpDrlN54R3q35WKD/BGOa8xc4ut08Mv8QPvm9McWPhWEXGkfvEuZCeBT3DnDzRsYD2xsHfPNT4OML45Z2Y1UuohT0GWlMM+6Gwn2w8z2je+TaPxhT/Dijm+SwC+zrAimE6NHcOsD3F1QQ5G8hNizg5CerjsN3L8GIi42rIb1N1BkwdZkxFe4z2sp3vHPiyDxh/Ikj8/B+rq5WCHEK3DrAswuM26i1eR/MjBegrgIm/lf3F9bTRJ0BU39rTIX7jCDf8S58crcxJUw4cWQe1tfV1Qoh7OT2AT6yXxv9t211RvNJ0jSIS+7+wnqyqDNg2u+MqeAn2GmG+X+WG1P/icaR+eBzIWKAe/XWEcLLuG2A19TbyCmq5IKUNo4Yd7wLZYfh/Ee6vzB3En0mTLvDmPL3mM0s78J/7jKmsH4wYJI5TTYu8ZdAF6LHcNsAzymqpEFzchdCrWHToxA1CM48xzXFuaOYs+DsO40pfw9kfwkHv4bsDfDDm8Y2QVHGEfqAyUao9xkpw+cK4UJuG+AnBrFq1QPl4NeQlwULHjl5FEFhn5izjGncDcYfxKL9xn49uMmY7/7Q2C4gDBLGnThC7zsKfNs4oSyEI9nqoLbcGLO/oQ4a6o11DbZmjzt6rvFxfaup2TZNj1s/Xw/Kx+jdZvEzBqGz+BtjFPn4meuaP+d3Yjl+rMMvsnPbAM9u7z6Ym/5pjBmScpkLqvJAShnt5lFnwOirjHUluUaYH/oGDn4D6+4z1vtajf+k/UYbzS+hfSCkjzEP7eOacNfa+IW11ZpTnfFL2rjcNK8zfglD+kBI7545xIK707rlcl2F0VusqticH4fqZssnrTcf15Z3X82NYd04KR+j9oa6E/+X7PXLNTBotkPLc+sAjw7xJzyw2S9awU/w48dGm65foOuK83Th8ZB8iTGBMdbMoU1GmB/8xvgj2lB/8usCI0+EeWhcq4A3HwdHQ12VMSZ7dakxryk7sVxd0uy5spO3q6s68ctlq227jk4po46QPsYoko3z0DgIiTXrjnXdH6XOaG2EXNM49UXtj19fU2oeWdqMETAbjzJ1Q+frtK1lKKPbLanLLP7GzVECe4E1AsLiIXbEiXX+ISeOdpuOeNtYbn407GNp9pz5uPl2zR9b/EBZOv8Wr7WxL2y15v+7+pbhbmu2HDnQcfvH5LYBvj+/4uSj728fN3b8uBtcU5S3Co42BtUaer7xuKHBCIyyPCg7Ys6Ptnyc/6Ox3DhsbldY/I3mG2uYMQ8INX45AkKNP9wW/1Zfbxvn5lfdpuVW29TXQPkRo9bm86M7jDHd26o1sNeJo/bm9bSY2lnnH2IMjdBIa6OG2nJzqjCnZss1Zc3Wm89Vl5wc1LbadnaeMm4L2HgrwJA+Zmj5mIFlhlhjeCnLiXBrfL7xqFT5GFOLt299klu1/7xfkHEbwuZB3bjsF+geJ8yVMv9PuSZK3TfACyqYOaRZe1JlEWS+CsmXGr9MwnV8fIxQD442TnS2p8FmhE3zgK8sMH6xG0OvMRSt4SfWuWIY3aZajxhTi6A/YgR8+THzW0EZ1JYZR6yd8bWCf7Bx5FZb3rU/aP4hxmsDwoww7pUI/Ua1fb/Wxvu5WsPlxLMHccsAL62uo6C8puUJzPRnob4KJsiFO27Dx2L8sQ3pDT39YtkWtdpxbYHWxgiYjYHe2MTTYmq2zhJghLF/8IlgPmk52PgD5h8MvoFykl64Z4AfME9gDmzsQlhfA1tWwRkzIXaYCysTwqTUidAN7ePqaoSHcss/4Y09UJpu5LB9DZQfNe5lKYQQXsItA3xffgVKQf+oIPPCnX9CzFDjCFwIIbyEWwZ4dkEF8b0CCfC1GFcMHt1uDFrlDmethRDCQdw0wMtPnMD85jEI7m30PhFCCC/idgGutSa78UbGx3bDT58a/b574gUVQgjhRG4X4PllNVTU2oweKN8+bvSjTbvW1WUJIUS3c7sA32/2QDkrpBqyXjfGPAnu4K70QgjhodwvwM1RCIfmvgm2GrlwRwjhtdwuwLMLygnxrSfsh+dh0Fxj2FMhhPBCbhjgFVwbuhVVWSD3uxRCeDW3C/D9+eVcZnsfYkca97wUQggv5VZjodTbGkg8vom+fgdh0lNy4Y4Qwqu51RF4zvEqrvH5iKqAGBh+kavLEUIIl+o0wJVSzyqljimltjdbF6mU+lQptdec93JumYZjP2UwzfIDRSOuaTkQvhBCeCF7jsCfB85ttW45sE5rPQhYZz52ul5Z/6JSBxA0Ue64I4QQnQa41noDUNRq9QXAC+byC8CFDq7rZGVHGHjkI95X0+kVHev0jxNCiJ7uVNvAY7XWeQDmvN17mCmlliql0pVS6fn5+af4ccDWp/HRNjZELjr19xBCCA/i9JOYWutVWus0rXVaTExM5y9oS20lbH2Gr9RYrH3kwh0hhIBTD/CjSqk4AHN+zHEltSHrNagq4rHqc0/chUcIIbzcqQb4+8DV5vLVwHuOKacdVcepiBvPVj245Y2MhRDCi9nTjfA1YBMwWCmVq5S6DngAOEcptRc4x3zsPNN+xxcTngUUSXIELoQQgB1XYmqtL2/nqVkOrqVD2QVVABLgQghhcpsrMbMLKugbbiXQ3+LqUoQQokdwmwDfV1BBUowcfQshRCO3CHDjPpjl0nwihBDNuEWAF1XUUlpdLz1QhBCiGbcI8GzzPpgDpQlFCCGauEWAN97IWC7iEUKIE9wjwPMr8LMo+kUEuroUIYToMdwiwBOjgvjFqH74WtyiXCGE6BZucUu1y8b157Jx/V1dhhBC9ChySCuEEG5KAlwIIdyUBLgQQrgpCXAhhHBTEuBCCOGmJMCFEMJNSYALIYSbkgAXQgg3pbTW3fdhSuUDB0/hpdFAgYPLcWeyP04m+6Ql2R8nc+d9MkBrHdN6ZbcG+KlSSqVrrdNcXUdPIfvjZLJPWpL9cTJP3CfShCKEEG5KAlwIIdyUuwT4KlcX0MPI/jiZ7JOWZH+czOP2iVu0gQshhDiZuxyBCyGEaEUCXAgh3FSPDnCl1LlKqR+VUj8ppZa7uh5HUko9q5Q6ppTa3mxdpFLqU6XUXnPeq9lzd5v74Uel1Nxm68copX4wn1uplFLm+gCl1Gpz/WalVGJ3/nynQimVoJT6Qim1Sym1Qyl1u7neK/eLUsqqlNqilMoy98efzPVeuT8aKaUsSqnvlVIfmo+9d39orXvkBFiAfcBAwB/IAoa5ui4H/nzTgNHA9mbrHgSWm8vLgb+Zy8PMnz8ASDL3i8V8bgswEVDAx8A8c/0twJPm8mXAalf/zHbskzhgtLkcCuwxf3av3C9m7SHmsh+wGZjgrfuj2X5ZBrwKfGg+9tr94fICOvhHmgh80uzx3cDdrq7LwT9jYqsA/xGIM5fjgB/b+tmBT8z9Ewfsbrb+cuCp5tuYy74YV6ApV//MXdw/7wHnyH7RAEHAd8B4b94fQDywDpjZLMC9dn/05CaUfkBOs8e55jpPFqu1zgMw573N9e3ti37mcuv1LV6jta4HSoAop1XuYOZX11EYR51eu1/M5oJM4Bjwqdbaq/cH8AhwJ9DQbJ3X7o+eHOCqjXXe2uexvX3R0T5y2/2nlAoB1gD/rbUu7WjTNtZ51H7RWtu01qkYR57jlFIjOtjco/eHUmoBcExrnWHvS9pY5zH7A3p2gOcCCc0exwOHXVRLdzmqlIoDMOfHzPXt7Ytcc7n1+havUUr5AuFAkdMqdxCllB9GeL+itX7bXO31+0VrXQysB87Fe/fHZGChUuoA8DowUyn1Mt67P3p0gG8FBimlkpRS/hgnFN53cU3O9j5wtbl8NUYbcOP6y8wz5EnAIGCL+XWxTCk1wTyLflWr1zS+1yLgc2027PVU5s/wDLBLa/1Qs6e8cr8opWKUUhHmciAwG9iNl+4PrfXdWut4rXUiRh58rrVegpfuD6DnnsQ099l8jJ4I+4A/uLoeB/9srwF5QB3GX/3rMNra1gF7zXlks+3/YO6HHzHPmJvr04Dt5nOPceLqWivwJvATxhn3ga7+me3YJ1Mwvq5uAzLNab637hcgGfje3B/bgXvM9V65P1rtm+mcOInptftDLqUXQgg31ZObUIQQQnRAAlwIIdyUBLgQQrgpCXAhhHBTEuBCCOGmJMCFEMJNSYALIYSb+v+jZg3+0YJo/AAAAABJRU5ErkJggg==\n", 385 | "text/plain": [ 386 | "
" 387 | ] 388 | }, 389 | "metadata": { 390 | "needs_background": "light" 391 | }, 392 | "output_type": "display_data" 393 | } 394 | ], 395 | "source": [ 396 | "data.loc[data[\"group\"] == \"validation\", [\"cer over dataset length\", \"wer over dataset length\"]].plot()" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 133, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "data": { 406 | "text/plain": [ 407 | "" 408 | ] 409 | }, 410 | "execution_count": 133, 411 | "metadata": {}, 412 | "output_type": "execute_result" 413 | }, 414 | { 415 | "data": { 416 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deXxU9b3/8dcnkw1I2CMgW9AiSmIIu+z7WooLUKCu1Ra9amu1P9f+sGpv+3C77S3aK7Vlaa1VRNxupSpqLYIoBEWJrKKAyBZAAmFJMsnn/nFOhkmYrJNkkjOf5+Mxj3Pme7bvnMD7nPmeM98jqooxxhjviol0BYwxxtQtC3pjjPE4C3pjjPE4C3pjjPE4C3pjjPG42EhXIJS2bdtqampqpKthjDGNxvr16w+pakqoaQ0y6FNTU8nKyop0NYwxptEQkV3lTbOmG2OM8TgLemOM8TgLemOM8bgG2UZvjNcUFhayZ88eTp8+HemqmEYuMTGRTp06ERcXV+VlLOiNqQd79uwhOTmZ1NRURCTS1TGNlKpy+PBh9uzZQ7du3aq8nDXdGFMPTp8+TZs2bSzkTVhEhDZt2lT7m6EFvTH1xELe1Iaa/DuyppvakJcD2cugRSdofzG07AL2n9oY00DYGX24Tn0Lf70U3rgbllwJv8+Ah7vCosmw/C74+BnY+wkU2kU4Y6rjN7/5Tb1sZ8OGDSxfvjzktPfee48pU6bU+jZfeeUVNm3aFHg/cuTIOv2RqJ3Rh6PgBDz7fTi8HWYvgWYpsP8zOJAN+zfChmehIM+ZV3zQ9gLnjL99ujNsdzEkhfzFsjENht/vJza2bqOiqKgIn89Xquw3v/kN9913X9jrqcyGDRvIyspi8uTJ1VouHK+88gpTpkyhZ8+e9bI9O6OvKX8+LLkKvsmCaQugx0To1Bf6/RC++19ww1twz9fwk49hxl9g2B3Qqivs+gBW3A/PXA6Pfwce7wF/mw7r/gynjkb6UxkP++tf/0pGRga9evXi6quvBiAnJ4dp06bRv39/+vfvz+rVqwF44IEHmDNnDuPHj+eaa64ptR5V5c477yQ9PZ2LL76YJUuWADBz5sxSZ8bXXXcdy5Yto6ioiDvvvJP+/fuTkZHBH//4R8A5Wx41ahQ/+MEPuPjii0tt45577uHUqVNkZmZy5ZVXAnDZZZfRt29f0tLSePrppwPzJiUlcf/99zNw4EDWrFnD8uXLufDCCxk6dCg//elPA2fkJ06c4Prrr6d///707t2bV199lYKCAu6//36WLFlCZmZm4LOEEmp5gMWLF3PFFVcwceJEunfvzl133RVYZsGCBVxwwQWMHDmSH//4x9x666188MEHvPbaa9x5551kZmayY8cOAJYuXcqAAQO44IILeP/996v6Z60SO6OvieIieGkO7HgXpj4JPaeGni8mBtqc77zSLjtTfvLImbP+/dmwZx28/nN48xdw4RTofSV0G+ksbzznwf/9nE17j9XqOnue25xffi+t3Omff/45v/71r1m9ejVt27blyJEjANx2223cfvvtDB06lN27dzNhwgQ2b94MwPr161m1ahVNmjQpta6XXnqJDRs28Omnn3Lo0CH69+/P8OHDmTVrFkuWLGHy5MkUFBTwzjvv8NRTT7FgwQJatGjBunXryM/PZ8iQIYwfPx6AtWvXkp2dfdatgg8//DBPPvkkGzZsCJQtXLiQ1q1bc+rUKfr378+0adNo06YNJ06cID09nYceeojTp0/TvXt3Vq5cSbdu3Zg9e3Zg+V//+teMHj2ahQsXcvToUQYMGMDYsWN56KGHyMrK4sknn6xwH5e3PDjfCj755BMSEhLo0aMHP/nJT/D5fPzqV7/i448/Jjk5mdGjR9OrVy8GDx7M1KlTmTJlCtOnTw+s3+/3s3btWpYvX86DDz7I22+/XWF9qsOCvrpU4R+3w6ZXYPx/Qp+rq7+Opq2h23DnVbLOfRvgk2dh4wuQ/SI07wSZP3Berat+v6wxobz77rtMnz6dtm3bAtC6dWsA3n777VJtxceOHeP48eMATJ069ayQB1i1ahWzZ8/G5/PRrl07RowYwbp165g0aRI//elPyc/P54033mD48OE0adKEt956i88++4wXX3wRgNzcXLZv3058fDwDBgyo8v3g8+bN4+WXXwbg66+/Zvv27bRp0wafz8e0adMA2LJlC+edd15gnbNnzw6c/b/11lu89tprPP7444Bzy+vu3burvA8rWn7MmDG0aNECgJ49e7Jr1y4OHTrEiBEjAvt6xowZbNu2rdz1X3HFFQD07duXnTt3VrleVWFBX11vPwAf/wWG/RwG/6R21ikC5/Z2XuP/E7a+7oT+ysdg5aPQdSj0vsr55hDfrHa2aSKmojPvuqKqIW/LKy4uZs2aNSEDvVmz0P/WVDVkeWJiIiNHjuTNN99kyZIlgbNpVeWJJ55gwoQJpeZ/7733yt1GWe+99x5vv/02a9asoWnTpowcOTJwL3liYmKgXb68upVMW7ZsGT169ChV/tFHH1WpDhUtn5CQEHjv8/nw+/0V1iWUknWULF+brG2gOlb9Dlb/N/S7HkbPrZttxCVC+jS4+iW4PRtG/3849g28cpPTnv/qrbD7I+dbgDFVNGbMGF544QUOHz4MEGi6GT9+fKkmi+CmkvIMHz6cJUuWUFRURE5ODitXrmTAgAEAzJo1i0WLFvH+++8Hgn3ChAk89dRTFBYWArBt2zZOnDhR6Xbi4uICy+Tm5tKqVSuaNm3Kli1b+PDDD0Muc+GFF/Lll18GzoiD29wnTJjAE088EQjgTz75BIDk5OTAt5iKlLd8eQYMGMC///1vvv32W/x+P8uWLQtMq+o2a4sFfVWtX+yczadPg8mP18998i06wfA74aefwHXLnTP67GWwcDw82R/e/y0c21f39TCNXlpaGr/4xS8YMWIEvXr14o477gCc5pCsrCwyMjLo2bMn8+fPr3Rdl19+eeCi7ujRo3n00Udp37494Bw4Vq5cydixY4mPjwfgRz/6ET179qRPnz6kp6dz4403VumMdc6cOWRkZHDllVcyceJE/H4/GRkZzJ07l0suuSTkMk2aNOF//ud/mDhxIkOHDqVdu3aBJpW5c+dSWFhIRkYG6enpzJ3rnKyNGjWKTZs2VXoxtrzly9OxY0fuu+8+Bg4cyNixY+nZs2egLrNmzeKxxx6jd+/egYuxdUkq+3ohIguBKcBBVU13y5YAJd9fWgJHVTUzxLI7geNAEeBX1X5VqVS/fv20QT14JPslePF6+M5YmPV3iI2PXF3yj8Pnrzi3bu5eAxIDvWbDpEchISly9TIV2rx5MxdddFGkqxEV8vLySEpKQlW55ZZb6N69O7fffntE6+L3+7n88su5/vrrufzyy8Neb6h/TyKyvryMrcoZ/WJgYnCBqs5U1Uw33JcBL1Ww/Ch33iqFfIPzxdvOHTadB8L3/xrZkAdISHYuAF//Bty6Hgb+B3z6HPxpFBzcHNm6GdMA/OlPfyIzM5O0tDRyc3O58cYbI1aXBx54gMzMTNLT0+nWrRuXXXZZ5QvVgUrP6AFEJBX4R8kZfVC5ALuB0aq6PcRyO4F+qnqoOpVqMGf0uz+CZy6D1ufDdf+AJi0jXaPQvvw3LPuRc7Y/5XeQObvyZUy9sjN6U5vq4oy+IsOAA6FC3qXAWyKyXkTmVLQiEZkjIlkikpWTkxNmtWrB/mz4+wxIbu9cGG2oIQ9w3gi46X3o2Ne5aPvqrVB4KtK1MsY0EOEG/WzguQqmD1HVPsAk4BYRGV7ejKr6tKr2U9V+KSkR7hbg8A7nl6txzeDqVyDpnMjWpyqS28M1rzq3fX7yDPx5rPM5jDFRr8ZBLyKxwBVAuZepVXWvOzwIvAwMqOn26s2xvU5zTbEfrn7Z6bagsfDFwpj74coXnVsy/zgCPn850rUyxkRYOGf0Y4Etqron1EQRaSYiySXjwHggO4zt1b2TR5wz+ZNH4KoX4ZwLI12jmuk+Dm5836n/0utg+Z1O3zzGmKhUadCLyHPAGqCHiOwRkRvcSbMo02wjIueKSEmvRu2AVSLyKbAWeF1V36i9qtey/Dx4djoc+QpmP+e0dzdmLTs7995fcgusfRoWToRvd0W6VsZUW0XdCNe2xYsXs3fv3pDTrrvuukA3DrUpuDvmnTt3kp6eXsHcNVNp0KvqbFXtoKpxqtpJVRe45dep6vwy8+5V1cnu+Jeq2st9panqr2u99rVpxf2wdwPMWHSmD5rGLjYeJv4GZv4NDn8BfxwOW/8Z6VqZRqyoqKjOt1H2x1Q1CfqadiFQUdDXlfrod99+GVti1wfOD6Iu/G6ka1L7Lvoe3Phv58lXz81yDmpFhZGulalHjz76KPPmzQPg9ttvZ/To0QC88847XHXVVYDTadegQYPo06cPM2bMIC/PeZZCamoqDz30EEOHDmXp0qWl1rtr1y7GjBlDRkYGY8aMYffu3eTm5pKamkpxcTEAJ0+epHPnzhQWFrJjxw4mTpxI3759GTZsGFu2bAGcs+U77riDUaNGcffddwfWH6ob4bVr1zJ48GB69+7N4MGD2bp1K+CE9IwZM/je977H+PHjKS4u5uabbyYtLY0pU6YwefLkwBn5+vXrGTFiBH379mXChAns27ePF198kaysLK688koyMzM5dar8O9dCLQ/OA0Tuvvvus7obPnnyJN///vfJyMhg5syZDBw4kKysrJDdMRcVFfHjH/+YtLQ0xo8fX2E9qkxVG9yrb9++Wq8KTqk+0Er1nV/V73brW8Ep1f/9meovm6sumKCa+02kaxQ1Nm3adObN8rtVF06u3dfyuyvc/po1a3T69Omqqjp06FDt37+/FhQU6AMPPKDz58/XnJwcHTZsmObl5amq6sMPP6wPPvigqqp27dpVH3nkkZDrnTJlii5evFhVVRcsWKCXXnqpqqpOnTpV3333XVVVff755/WGG25QVdXRo0frtm3bVFX1ww8/1FGjRqmq6rXXXqvf/e531e/3n7WNRYsW6S233BJ4n5ubq4WFhaqqumLFCr3iiisC83Xs2FEPHz6sqqpLly7VSZMmaVFRke7bt09btmypS5cu1YKCAh00aJAePHgwUL8f/vCHqqo6YsQIXbduXcjPeu2111Zp+TvuuENVVV9//XUdM2aMqqo+9thjOmfOHFVV3bhxo/p8vsB2mjVrFtjGV199pT6fTz/55BNVVZ0xY4Y+88wzZ9Wl1L8nF5Cl5WSq9V4JkLMZtMh56pOXxSU6P6jqMhj+9zaYPwym/QnOHx3pmpk61rdvX9avX8/x48dJSEigT58+ZGVl8f777zNv3jw+/PBDNm3axJAhQwDnTHrQoEGB5WfOnBlyvWvWrOGll5wfxl999dWBh27MnDmTJUuWMGrUKJ5//nluvvlm8vLy+OCDD5gxY0Zg+fz8MzcJzJgxo0pPh8rNzeXaa69l+/btiEig4zOAcePGBboFXrVqFTNmzCAmJob27dszatQoALZu3Up2djbjxo0DnDPoDh06VL4TXZUtH6q74VWrVnHbbbcBkJ6eTkZGRrnr79atG5mZmWetIxwW9OA8AASgXe1fBGmQMmZAh17wwjXwzBXOLZlDb7cHmteXSQ/X+ybj4uJITU1l0aJFDB48mIyMDP71r3+xY8cOLrroInbs2MG4ceN47rnQP4upanfCJV0hT506lXvvvZcjR46wfv16Ro8ezYkTJ2jZsmW5PWRWdRtz585l1KhRvPzyy+zcuZORI0eGXIeW86t/VSUtLY01a9ZUaXvVXT5Ud8Pl1aWi5UvWURtNN9ZGD07QxydBqyh6wEfKBfDjdyDtcnjnQec2zPy8SNfK1KHhw4fz+OOPM3z4cIYNG8b8+fPJzMxERLjkkktYvXo1X3zxBeC0KVf0kIwSgwcP5vnnnwfg2WefZejQoYDzeL8BAwZw2223MWXKFHw+H82bN6dbt26Bdn5V5dNPP610G2W79M3NzaVjx46A0y5fnqFDh7Js2TKKi4s5cOAA7733HgA9evQgJycnENSFhYV8/vnnIbcVSkXLV1SXF154AYBNmzaxcePGwLTg7pjrigU9OEHfLj36Ht0X3wymL4RxD8Hm12DBODjyZaRrZerIsGHD2LdvH4MGDaJdu3YkJiYybNgwAFJSUli8eDGzZ88mIyODSy65JHChtCLz5s1j0aJFZGRk8Mwzz/D73/8+MG3mzJn87W9/K9Xs8+yzz7JgwQJ69epFWlpa4LmrFSnbjfBdd93Fvffey5AhQyq8C2jatGl06tQp0DXywIEDadGiBfHx8bz44ovcfffd9OrVi8zMTD744APAuSh80003VXgxtqLly3PzzTeTk5NDRkYGjzzyCBkZGYEui4O7Y64rVerUrL7Va6dmxcXwcBfoNQu++3j9bLMh2vEuLP0hoE74f2dspGvkKdapWWSUdBN8+PBhBgwYwOrVqwN959enoqIiCgsLSUxMZMeOHYwZM4Zt27YF+uyvrup2amZt9Ed3QcFx71+Ircz5o2HOe7DkKnh2htNuP+Rn1m5vGrUpU6Zw9OhRCgoKmDt3bkRCHpymsFGjRlFYWIiq8tRTT9U45GvCgr7kQmz7KLkQW5HW3eCGt5zeL99+wPkB2aV/sAeamEarpF0+0pKTk4lk1+tR1igdwv6NzlOazukZ6Zo0DGe124+3dvta0hCbSU3jU5N/Rxb0+zdC2wsgrkmka9JwiMCQ2+CqZU4vmE+Pcp60ZWosMTGRw4cPW9ibsKgqhw8fJjExsVrLWdPNgWzoEvpBw1HP2u1rTadOndizZw8N4qE6plFLTEykU6dO1VomuoP+5BHI/Rra/SjSNWm4rN2+VsTFxdGtWxT9TsM0KNHddHPA7R4/2u+4qYy12xvTqEV30AfuuLGgr1RJu33J06us3d6YRiPKgz4bkto3jmfCNhTfGeO02zfv6LTbv/7/nCYwY0yDFeVBv9HO5muidTf40QrodwNkLYB5veGjP1of98Y0UNEb9P4CyNliP5SqqfhmTpcRN61yesL8510wf6jTlYIxpkGpyjNjF4rIQRHJDip7QES+EZEN7mtyOctOFJGtIvKFiNxTmxUPW84WKC60M/pwtUuDa16FWX8H/2nn4erPzYbDOyJdM2OMqypn9IuBiSHKf6eqme7rrAc6iogP+AMwCegJzBaRhvPz08CF2PIfAGCqSMR5BOMta2HsA/DVSvjDQHhrLpw+FunaGRP1qvJw8JVATa62DQC+UOch4QXA88ClNVhP3TiQDXFNofV5ka6Jd8QmOA8w+cl6yJgJH8yDJ/rAx884vYQaYyIinB9M3Soi1wBZwM9V9dsy0zsCXwe93wMMLG9lIjIHmAPQpUuXMKpVRfs3Ov3bxFT+6DJTTcnt4bI/QP8b4I174LVbYd2fYOIj0HVQ5csbE0mqTjNk4SkoPOkOT5V5H1TuDzGtqNA5kYxvBgnJzjC+mfOAo/ikM+MJSaWn+eLq5CPVNOifAn4FqDv8L+D6MvOE+p18uR19qOrTwNPg9Edfw3pVjSrs/wzSrqjTzUS9jn3g+jchexmsuB8WTYT0aTD2QWjZOdK1M15VcBIObXNeJw9DwQknfAtOQuEJd3iy/PLCk6A1+AbqS3D6zIprCr5YJ/Tz85wDQVUlnws/31z9bVeiRkGvqgdKxkXkT8A/Qsy2Bwj+39wJ2FuT7dW63K/hdK5diK0PInDxdOgxCVb/3nltWe78+GrQzZDYItI1NI1Vfh4c2go5W52bKw5ucYZHd3PWOaX4nLPmuKYQ3xTimjnD+CRIand2eVzToLKmEJt4JsTjmgS93PexieW3DhQXOQeVgrzSw/y8s8vqqIWhRkEvIh1UdZ/79nIgO8Rs64DuItIN+AaYBfygRrWsbXYhtv7FN4NR90Hvq2DFL+HfD8PKR51HOHYd4jTpdBkMSSmRrWdxsXN2l3889KvghHNdp3N/O0jVl9O5kLPNCfHAa6tzwlbCF+/0Qtupn/NvLKUHtO3h/BgyvpkzPVKd8cX4ILG584qQSoNeRJ4DRgJtRWQP8EtgpIhk4hw2dwI3uvOeC/xZVSerql9EbgXeBHzAQlWt+Am69WV/NiDQruHcBBQ1WnaBGYtg8K2wfQXsWg3rF8NHTznT23SHroPPvFqGeb1G1fnl7rdfOf3zHPkKcnc7dwOVCvC8M+PltzCeITHOraVdBjm9n3YZBM3PDa+uDU1xMRzdCQc+hxM54M932q79Be4wH4rKKwsqLyogsE9VqzDOmXF/Ppw4eKZOsYlOoHcZBCnXQcqFcM5F0LKr01xiQorOZ8Y+f6VzVvCT9XW3DVN1/gLY96kT+rvXOK/Tuc60Fp2d/9Qlwd/2grPPzIqL4fg+J8i//coJ8+Dx/DK3eCZ3gMSWzkWyhGTnglhCMiQ0dy+cJQVNa156ntgmkLMZdn/o1PPrdc43AHDCJjj4217QeB44X3ACDmyCAxudE6ED2U7AF+SFnj8m1gldX7wzjE0IepUp98UBEvR3q8Z4jM/5BpVyoXOW3rKr3UBRjoqeGRudQf/fGc6FwhmL624bpuaKi+HgJtj1wZnwz3MvCzVt6wRpi87w7U4n0I/ucs4eS8TEOoHQuhu06uYERWt32LIrxFXvoQ0VKvI7F/ZLgn/3GufsF6BJazf03eDvkAmx9fec0JBUIXePE+T7s88E+5EvCZxFJzR3mtTap58ZJp97JsRjEyxsGyAL+mCnjsIjXZ2HaAz7ed1sw9QuVSeIdn1wJvxP5ECrVCe8S4YlYd68U+S+xpfUtST0d38Ih79wpsUmOrf0tkqFVl2dg07JeIvOtXdrXZHf+YZz7Bsn1I99A0e/hoObnWAv+bYEzvbbpTs3JpSEesuu9nCZRqiioI++Rq0D7mUCuxDbeIhAm/OdV5+rI12bigXXtfdVTlneQfeM/0Pnm8q+DU6//sX+oOVinANUq67uQSDVHU91gjfpHGfdxcXOt5tj37hB/k3pQM/9BvL2n317YHyS0/yRdvmZYD+nZ0QvEJr6E4VB794g1M46MzP1JOkc6DnVeZUoLoJje51mp293wrfu8Ogu2P62E9bBYptA09ZOyAcfIMD5ptC8I7ToCOeNdIbNO0KLTs6w+bnOHUJ2lh61oi/o93/mtPMmt490TUw0i/E5Pxpr2RlSh549vfCUcz94yUHg6C7nxz9J7c4EeIuOzreApq0txE2FojDo3T7o7T+Gacjimjh3maT0iHRNjAc0knu/aklRoXNByn4Ra4yJItEV9Ie2OT/esAuxxpgoEl1Bv9+9EGtPlTLGRJEoC/rPnB7m2nSPdE2MMabeRFnQb3T6t7E+MYwxUSR6gl71zB03xhgTRaIn6I/vg1NHoJ0FvTEmukRP0Af6oLegN8ZElygK+s+cYbu0yNbDGGPqWRQF/Uany1rrxMkYE2WiK+it2cYYE4UqDXoRWSgiB0UkO6jsMRHZIiKficjLItKynGV3ishGEdkgInX4JJFK5B93njRkQW+MiUJVOaNfDEwsU7YCSFfVDGAbcG8Fy49S1czyOsSvFwc2AWpBb4yJSpUGvaquBI6UKXtLVUs6xf4Q6FQHdas9JRdiLeiNMVGoNtrorwf+Wc40Bd4SkfUiMqcWtlUz+zdCk1ZOH97GGBNlwuoLQER+AfiBZ8uZZYiq7hWRc4AVIrLF/YYQal1zgDkAXbp0CadaZ9u/0XmilPVBb4yJQjU+oxeRa4EpwJVazhPGVXWvOzwIvAwMKG99qvq0qvZT1X4pKSk1rdbZivzOczqta2JjTJSqUdCLyETgbmCqqp4sZ55mIpJcMg6MB7JDzVunjuwA/2lrnzfGRK2q3F75HLAG6CEie0TkBuBJIBmnOWaDiMx35z1XRJa7i7YDVonIp8Ba4HVVfaNOPkVFrOsDY0yUq7SNXlVnhyheUM68e4HJ7viXQK+walcb9n8Gvnhoe0Gka2KMMRHh/V/G7t/oPGA5Nj7SNTHGmIiIgqDPtguxxpio5u2gP34AThy09nljTFTzdtDbhVhjjPF60Jf0QZ8e2XoYY0wEeTvoD2RDiy7QJGTnmsYYExW8HfTWB70xxng46AtOwKHtFvTGmKjn3aA/uBnrg94YY7wc9IE+6O1CrDEmunk46LMhoTm07BrpmhhjTER5OOjdC7HWB70xJsp5M+iLi+DA59Y+b4wxeDXoj3wFhScs6I0xBq8Gvf0i1hhjArwZ9AeyISYWUi6MdE2MMSbivBn0+zdC2x4QlxjpmhhjTMR5N+itfd4YY4CqPTN2oYgcFJHsoLLWIrJCRLa7w1blLDtRRLaKyBcick9tVrxceTlwfJ/9UMoYY1xVOaNfDEwsU3YP8I6qdgfecd+XIiI+4A/AJKAnMFtEeoZV26o4YH3QG2NMsEqDXlVXAkfKFF8K/MUd/wtwWYhFBwBfqOqXqloAPO8uV7f2u1882lnQG2MM1LyNvp2q7gNwh+eEmKcj8HXQ+z1uWUgiMkdEskQkKycnp4bVwmmfb94RmrWp+TqMMcZD6vJibKi+B7S8mVX1aVXtp6r9UlJSar5VuxBrjDGl1DToD4hIBwB3eDDEPHuAzkHvOwF7a7i9qik8BYe22Q+ljDEmSE2D/jXgWnf8WuDVEPOsA7qLSDcRiQdmucvVnYObQYvsjN4YY4JU5fbK54A1QA8R2SMiNwAPA+NEZDswzn2PiJwrIssBVNUP3Aq8CWwGXlDVz+vmY7gOuBdiLeiNMSYgtrIZVHV2OZPGhJh3LzA56P1yYHmNa1dd+zdCfBK06lZvmzTGmIbOW7+M3b8R2qVBjLc+ljHGhMM7iVhc7NxDb802xhhTSqVNN42GFsPU30PL1EjXxBhjGhTvBL0vFtKnRboWxhjT4Hin6cYYY0xIFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxFvTGGONxNQ56EekhIhuCXsdE5Gdl5hkpIrlB89wffpWNMcZUR437o1fVrUAmgIj4gG+Al0PM+r6qTqnpdowxxoSntppuxgA7VHVXLa3PGGNMLamtoJ8FPFfOtJnCTLMAAAyHSURBVEEi8qmI/FNE0spbgYjMEZEsEcnKycmppWoZY4wJO+hFJB6YCiwNMfljoKuq9gKeAF4pbz2q+rSq9lPVfikpKeFWyxhjjKs2zugnAR+r6oGyE1T1mKrmuePLgTgRaVsL2zTGGFNFtRH0symn2UZE2ouIuOMD3O0droVtGmOMqaIa33UDICJNgXHAjUFlNwGo6nxgOvAfIuIHTgGzVFXD2aYxxpjqCSvoVfUk0KZM2fyg8SeBJ8PZhjHGmPDYL2ONMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjwgp6EdkpIhtFZIOIZIWYLiIyT0S+EJHPRKRPONszxhhTfWE9M9Y1SlUPlTNtEtDdfQ0EnnKHxhhj6kldN91cCvxVHR8CLUWkQx1v0xhjTJBwg16Bt0RkvYjMCTG9I/B10Ps9btlZRGSOiGSJSFZOTk6Y1TLGGFMi3KAfoqp9cJpobhGR4WWmS4hlNNSKVPVpVe2nqv1SUlLCrJYxxpgSYQW9qu51hweBl4EBZWbZA3QOet8J2BvONo0xxlRPjYNeRJqJSHLJODAeyC4z22vANe7dN5cAuaq6r8a1NcYYU23h3HXTDnhZRErW83dVfUNEbgJQ1fnAcmAy8AVwEvhheNU1xhhTXTUOelX9EugVonx+0LgCt9R0G8YYY8Jnv4w1xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPs6A3xhiPC+fh4J1F5F8isllEPheR20LMM1JEckVkg/u6P7zqGmOMqa5wHg7uB36uqh+LSDKwXkRWqOqmMvO9r6pTwtiOMcaYMNT4jF5V96nqx+74cWAz0LG2KmaMMaZ21EobvYikAr2Bj0JMHiQin4rIP0UkrYJ1zBGRLBHJysnJqY1qGWOMoRaCXkSSgGXAz1T1WJnJHwNdVbUX8ATwSnnrUdWnVbWfqvZLSUkJt1rGGGNcYQW9iMThhPyzqvpS2emqekxV89zx5UCciLQNZ5vGGGOqJ5y7bgRYAGxW1d+WM097dz5EZIC7vcM13aYxxpjqC+eumyHA1cBGEdnglt0HdAFQ1fnAdOA/RMQPnAJmqaqGsU1jjDHVVOOgV9VVgFQyz5PAkzXdhjHGmPDZL2ONMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjLOiNMcbjwumP3tQCf1ExR04UcLqwGH9xMUXFir9Yg4bF+Is0dLn7PjYmhmYJPpISYmmWEBsYNkvwkRDri/RHNMZEmAV9HTpVUMT+Y6fZn3uaA8dOsy8wPMX+Y/kcyD3NweOnKa7DR7HE+cQJ/fiSA4Cv9MEg3kdinI+E2BgS3GFi0DAxRFnwMD42hjhfDHE+wX2YmDGmgbGgB1SVg8fzyTmej79Y8RcVU1ik+N2z6cIi5+y5sKS8qJhCdz5/kVJYXMzpgiIOHMtn/7EzoZ57qvCsbSUnxNK+RSLtWyTS/Zy2dGiRyDnNE2ka5yPWJ/hihNgYwRcT4w7lzNB3dnlMjOAvUvLy/ZxwX4HxgqLA+JnpRRw/7Wd/7unAPKcLi8j3F4e9H+PdwD8T/s6BIN4XQ1ysBMoS3Om+GCFGIEaEGBEkMI77Pmh6DKXeC5Q6sJSMChI0Xmaau1zJxJJ3wfOXux73TfChLHB8Dnpomp5dhFL+kVzKPLun7LHyrENnOAdTt1LqjioaqGdwWUmBU6ZueellVJVi932xEijTEGVlP0xl+71kWklZoI7B26d0GaXKNFDfwEcPmla2jFLzBU0PLi/zJyz7Nz17evkqesZe8yax/Pb7mRUsXTNhBb2ITAR+D/iAP6vqw2Wmizt9MnASuE5VPw5nm+E4XVjElzkn+PJQnjPMyWNHzgm+OnSCvHx/WOsWgbZJCbRvnkinVk3pn9raCfTmTqi3c4dJCQ3z2Kqq5PuLnZcb/KcLizhdWEy+P/TwdGGRe/ArpsA9IBb4iwNl+X7nwFjoL5nHmX4i309BUTFFxSWB4QRDsRsUxSVlxWcCpWSe4PlL/tMG/4ct+x/6rEAIjJ9ZxhmUDb3SoViZ4Pw9c4CRs8pK7fMQf4OKp1dej8qUDlc5O2Sl9AGy5IAXmN8dj4lxli05QAevK/hAXBLYwWFd9jOXPTiW/VuEOgCHOtCX1PdM3cv5G4T4WxE0b+my4HkrPiiXVdE33PKmnCqMq3ilNVTj1BERH/AHYBywB1gnIq+p6qag2SYB3d3XQOApd1hnVJX9x06XCvIdOU6w7809VeofWseWTTgvpRnT+nTk/HOSOCc5kfhYITYmhlifcwYaG+MOfU55nE+I9cUQF+MMY31CXFB5YyUigaYamtTNPzYvUFVrojKNTjinlwOAL1T1SwAReR64FAgO+kuBv7oPBP9QRFqKSAdV3RfGdkPyFxVzxVMfsONgHicKigLlzeJ9nJeSRL/UVpzXtjPnpTTjvJRmdGvbjKbxDfPs2jRcFvKmMQon6ToCXwe938PZZ+uh5ukI1HrQx/piOD8lib5dW3FeShLnt23GeSlJtGueYP85jTFRLZygr0qzY1XmcWYUmQPMAejSpUuNKvS7mbV/EcMYYxq7cBqV9wCdg953AvbWYB4AVPVpVe2nqv1SUlLCqJYxxphg4QT9OqC7iHQTkXhgFvBamXleA64RxyVAbl20zxtjjClfjZtuVNUvIrcCb+LcXrlQVT8XkZvc6fOB5Ti3Vn6Bc3vlD8OvsjHGmOoI67YTVV2OE+bBZfODxhW4JZxtGGOMCU/jvfHbGGNMlVjQG2OMx1nQG2OMx1nQG2OMx0nZjpQaAhHJAXZVc7G2wKE6qE5jZvukNNsfZ7N9Ulpj3h9dVTXkj5AaZNDXhIhkqWq/SNejIbF9Uprtj7PZPinNq/vDmm6MMcbjLOiNMcbjvBT0T0e6Ag2Q7ZPSbH+czfZJaZ7cH55pozfGGBOal87ojTHGhGBBb4wxHueJoBeRiSKyVUS+EJF7Il2f2iQiC0XkoIhkB5W1FpEVIrLdHbYKmnavux+2isiEoPK+IrLRnTbPfXA7IpIgIkvc8o9EJLU+P191iUhnEfmXiGwWkc9F5Da3PJr3SaKIrBWRT9198qBbHrX7BJznWovIJyLyD/d99O4PVW3UL5wukncA5wHxwKdAz0jXqxY/33CgD5AdVPYocI87fg/wiDve0/38CUA3d7/43GlrgUE4T/36JzDJLb8ZmO+OzwKWRPozV7I/OgB93PFkYJv7uaN5nwiQ5I7HAR8Bl0TzPnHreQfwd+Af7vuo3R8Rr0At/DEHAW8Gvb8XuDfS9arlz5haJui3Ah3c8Q7A1lCfHedZAYPcebYElc8G/hg8jzsei/OrQIn0Z67GvnkVGGf7JPA5mgIf4zy/OWr3Cc7T7N4BRgcFfdTuDy803ZT3AHIva6fuk7rc4TlueXn7oqM7Xra81DKq6gdygTZ1VvNa5H5d7o1zBhvV+8RtptgAHARWqGq075P/Bu4CioPKonZ/eCHoq/wA8ihQ3r6oaB81yv0nIknAMuBnqnqsollDlHlun6hqkapm4pzJDhCR9Apm9/Q+EZEpwEFVXV/VRUKUeWZ/gDeCvsoPIPeQAyLSAcAdHnTLy9sXe9zxsuWllhGRWKAFcKTOal4LRCQOJ+SfVdWX3OKo3iclVPUo8B4wkejdJ0OAqSKyE3geGC0ifyN694cngr4qDyn3mteAa93xa3HaqUvKZ7l3BHQDugNr3a+px0XkEveugWvKLFOyrunAu+o2PDZEbv0XAJtV9bdBk6J5n6SISEt3vAkwFthClO4TVb1XVTupaipOHryrqlcRpfsDaPwXY919Oxnn7osdwC8iXZ9a/mzPAfuAQpyziBtw2gLfAba7w9ZB8//C3Q9bce8QcMv7AdnutCc586voRGApzgPc1wLnRfozV7I/huJ8Rf4M2OC+Jkf5PskAPnH3STZwv1setfsk6POM5MzF2KjdH9YFgjHGeJwXmm6MMcZUwILeGGM8zoLeGGM8zoLeGGM8zoLeGGM8zoLeGGM8zoLeGGM87v8ASG+8vyTv1VEAAAAASUVORK5CYII=\n", 417 | "text/plain": [ 418 | "
" 419 | ] 420 | }, 421 | "metadata": { 422 | "needs_background": "light" 423 | }, 424 | "output_type": "display_data" 425 | } 426 | ], 427 | "source": [ 428 | "data.loc[data[\"group\"] == \"validation\", [\"cer over target length\", \"wer over target length\"]].plot()" 429 | ] 430 | } 431 | ], 432 | "metadata": { 433 | "kernelspec": { 434 | "display_name": "Python 3", 435 | "language": "python", 436 | "name": "python3" 437 | }, 438 | "language_info": { 439 | "codemirror_mode": { 440 | "name": "ipython", 441 | "version": 3 442 | }, 443 | "file_extension": ".py", 444 | "mimetype": "text/x-python", 445 | "name": "python", 446 | "nbconvert_exporter": "python", 447 | "pygments_lexer": "ipython3", 448 | "version": "3.7.6" 449 | } 450 | }, 451 | "nbformat": 4, 452 | "nbformat_minor": 4 453 | } 454 | --------------------------------------------------------------------------------