├── 0000-random-stuff
    ├── A Statistical Model that Predicts Listeners’ Preference Ratings of Around-Ear and On-Ear Headphones.pdf
    ├── AN-1112-v1.1.pdf
    ├── Deep Learning for Audio Signal Processing.pdf
    └── README.md
├── 0001-phase-aware-deep-complex-unet-dcu
    ├── DCUnet_pytorch (1).ipynb
    └── README.md
├── 0002-loss-metrics
    ├── DNN-Based-Monaural-Speech-Enhancement-with-Temporal-and-Spectral-Variations-Equalization.pdf
    └── README.md
├── 0003-clean-speech-corpuses
    └── README.md
├── 0004-reverberation-corpuses
    ├── Hadad Heese Vary Gannot MULTICHANNEL AUDIO DATABASE IN VARIOUS ACOUSTIC ENVIRONMENTS.pdf
    ├── README.md
    ├── jeub09a-binaural-room-impulse-database.pdf
    └── stewart2010.pdf
├── 0005-existing-applications
    └── README.md
├── 0006-small-rooms-classifier
    ├── README.md
    ├── rir-small-rooms-n.log
    ├── rir-small-rooms-y.log
    ├── rir-small-rooms.py
    ├── small-rooms-nn-predict.py
    └── small-rooms-nn.py
├── 0007-pretrained-models
    └── README.md
└── 9999-asteroid.md


/0000-random-stuff/A Statistical Model that Predicts Listeners’ Preference Ratings of Around-Ear and On-Ear Headphones.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonashaag/speech-enhancement/dcee451ce755ae7e881f77909d86f1204e3662ae/0000-random-stuff/A Statistical Model that Predicts Listeners’ Preference Ratings of Around-Ear and On-Ear Headphones.pdf


--------------------------------------------------------------------------------
/0000-random-stuff/AN-1112-v1.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonashaag/speech-enhancement/dcee451ce755ae7e881f77909d86f1204e3662ae/0000-random-stuff/AN-1112-v1.1.pdf


--------------------------------------------------------------------------------
/0000-random-stuff/Deep Learning for Audio Signal Processing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonashaag/speech-enhancement/dcee451ce755ae7e881f77909d86f1204e3662ae/0000-random-stuff/Deep Learning for Audio Signal Processing.pdf


--------------------------------------------------------------------------------
/0000-random-stuff/README.md:
--------------------------------------------------------------------------------
 1 | https://reference.wolfram.com/language/ref/AudioDistance.html
 2 | 
 3 | https://kuleshov.github.io/audio-super-res/
 4 | 
 5 | https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
 6 | 
 7 | https://pyroomacoustics.readthedocs.io/
 8 | 
 9 | https://www.youtube.com/watch?v=frN_UsNtyGw
10 | 
11 | https://dsp.stackexchange.com/questions/35360/determine-sound-intensity-in-spl/35366
12 | 
13 | https://www.innerfidelity.com/content/harman-tweaks-its-headphone-target-response
14 | 
15 | https://dsp.stackexchange.com/questions/33957/what-is-the-difference-between-the-psd-and-the-power-spectrum
16 | 
17 | https://gist.github.com/danstowell/f2d81a897df9e23cc1da
18 | 
19 | https://github.com/titu1994/keras-squeeze-excite-network
20 | 
21 | https://github.com/ozan-oktay/Attention-Gated-Networks
22 | 


--------------------------------------------------------------------------------
/0001-phase-aware-deep-complex-unet-dcu/DCUnet_pytorch (1).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "DCUnet.pytorch",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "machine_shape": "hm"
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "SappraI2J_a8",
 22 |         "colab_type": "code",
 23 |         "colab": {}
 24 |       },
 25 |       "source": [
 26 |         "!pip install -q soundfile"
 27 |       ],
 28 |       "execution_count": 0,
 29 |       "outputs": []
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "metadata": {
 34 |         "id": "jjam5f753TCw",
 35 |         "colab_type": "code",
 36 |         "colab": {}
 37 |       },
 38 |       "source": [
 39 |         "!git clone -q https://github.com/chanil1218/DCUnet.pytorch"
 40 |       ],
 41 |       "execution_count": 0,
 42 |       "outputs": []
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "metadata": {
 47 |         "id": "tbGwn9QhJ4Vi",
 48 |         "colab_type": "code",
 49 |         "colab": {}
 50 |       },
 51 |       "source": [
 52 |         "!wget -q \"HIDDEN/einschlafen-chunks-10s/einschlafen2-chunks-10000.tar\" -O - | tar xf -"
 53 |       ],
 54 |       "execution_count": 0,
 55 |       "outputs": []
 56 |     },
 57 |     {
 58 |       "cell_type": "code",
 59 |       "metadata": {
 60 |         "id": "opNDdUO4HSJ7",
 61 |         "colab_type": "code",
 62 |         "colab": {}
 63 |       },
 64 |       "source": [
 65 |         "!wget \"HIDDEN/einschlafen-chunks-10s/AIR_1_4.tar\" -O - | tar xf -"
 66 |       ],
 67 |       "execution_count": 0,
 68 |       "outputs": []
 69 |     },
 70 |     {
 71 |       "cell_type": "code",
 72 |       "metadata": {
 73 |         "id": "H79jyI64G-ix",
 74 |         "colab_type": "code",
 75 |         "colab": {}
 76 |       },
 77 |       "source": [
 78 |         "from google.colab import drive\n",
 79 |         "drive.mount('/content/drive')"
 80 |       ],
 81 |       "execution_count": 0,
 82 |       "outputs": []
 83 |     },
 84 |     {
 85 |       "cell_type": "code",
 86 |       "metadata": {
 87 |         "id": "eKcshqC5IAVP",
 88 |         "colab_type": "code",
 89 |         "colab": {}
 90 |       },
 91 |       "source": [
 92 |         "import os\n",
 93 |         "import sys\n",
 94 |         "if 'DCUnet.pytorch' not in sys.path: sys.path.append('DCUnet.pytorch')\n",
 95 |         "\n",
 96 |         "import numpy as np\n",
 97 |         "import torch\n",
 98 |         "import torch.nn as nn\n",
 99 |         "import torch.nn.functional as F\n",
100 |         "import torch.optim as optim\n",
101 |         "from torch.optim.lr_scheduler import ExponentialLR\n",
102 |         "\n",
103 |         "from scipy.io import wavfile\n",
104 |         "import librosa\n",
105 |         "import tqdm\n",
106 |         "\n",
107 |         "import glob\n",
108 |         "import utils\n",
109 |         "from models.unet import Unet\n",
110 |         "from models.layers.istft import ISTFT\n",
111 |         "#from se_dataset import AudioDataset\n",
112 |         "from torch.utils.data import DataLoader\n",
113 |         "\n",
114 |         "\n",
115 |         "\"\"\"\n",
116 |         "parser = argparse.ArgumentParser()\n",
117 |         "parser.add_argument('--model_dir', default='experiments/base_model', help=\"Directory containing params.json\")\n",
118 |         "parser.add_argument('--restore_file', default=None, help=\"Optional, name of the file in --model_dir containing weights to reload before training\")  # 'best' or 'train'\n",
119 |         "parser.add_argument('--batch_size', default=32, type=int, help='train batch size')\n",
120 |         "parser.add_argument('--num_epochs', default=100, type=int, help='train epochs number')\n",
121 |         "args = parser.parse_args()\n",
122 |         "\"\"\"\n",
123 |         "\n",
124 |         "#n_fft, hop_length = 400, 160\n",
125 |         "n_fft, hop_length = 1024, 256\n",
126 |         "window = torch.hann_window(n_fft).cuda()\n",
127 |         "stft = lambda x: torch.stft(x, n_fft, hop_length, window=window)\n",
128 |         "istft = ISTFT(n_fft, hop_length, window='hanning').cuda()\n",
129 |         "\n",
130 |         "def wSDRLoss(mixed, clean, clean_est, eps=2e-7):\n",
131 |         "    # Used on signal level(time-domain). Backprop-able istft should be used.\n",
132 |         "    # Batched audio inputs shape (N x T) required.\n",
133 |         "    bsum = lambda x: torch.sum(x, dim=1) # Batch preserving sum for convenience.\n",
134 |         "    def mSDRLoss(orig, est):\n",
135 |         "        # Modified SDR loss, <x, x`> / (||x|| * ||x`||) : L2 Norm.\n",
136 |         "        # Original SDR Loss: <x, x`>**2 / <x`, x`> (== ||x`||**2)\n",
137 |         "        #  > Maximize Correlation while producing minimum energy output.\n",
138 |         "        correlation = bsum(orig * est)\n",
139 |         "        energies = torch.norm(orig, p=2, dim=1) * torch.norm(est, p=2, dim=1)\n",
140 |         "        return -(correlation / (energies + eps))\n",
141 |         "\n",
142 |         "    noise = mixed - clean\n",
143 |         "    noise_est = mixed - clean_est\n",
144 |         "\n",
145 |         "    a = bsum(clean**2) / (bsum(clean**2) + bsum(noise**2) + eps)\n",
146 |         "    wSDR = a * mSDRLoss(clean, clean_est) + (1 - a) * mSDRLoss(noise, noise_est)\n",
147 |         "    return torch.mean(wSDR)\n"
148 |       ],
149 |       "execution_count": 0,
150 |       "outputs": []
151 |     },
152 |     {
153 |       "cell_type": "code",
154 |       "metadata": {
155 |         "id": "kp6yNIPmEe-H",
156 |         "colab_type": "code",
157 |         "colab": {}
158 |       },
159 |       "source": [
160 |         "import glob\n",
161 |         "import tarfile\n",
162 |         "import soundfile as sf\n",
163 |         "import random\n",
164 |         "import pickle\n",
165 |         "\n",
166 |         "try:\n",
167 |         "    TARS, CLEAN = pickle.load(open('tars-clean.pk', 'rb'))\n",
168 |         "except IOError:\n",
169 |         "    TARS = glob.glob(\"/content/drive/My Drive/einschlafen-10s-AIRs/**/*.tar\", recursive=True)\n",
170 |         "    TARS.sort()\n",
171 |         "    CLEAN = glob.glob(\"einschlafen2/**/*.wav\", recursive=True)\n",
172 |         "    CLEAN.sort()\n",
173 |         "    pickle.dump((TARS, CLEAN), open('tars-clean.pk', 'wb'))\n",
174 |         "\n",
175 |         "SAMPLE_LEN = 3\n",
176 |         "FIRST_FRAME = 16_000\n",
177 |         "LAST_FRAME = SAMPLE_LEN * 16_000 + FIRST_FRAME\n",
178 |         "\n",
179 |         "TARS = TARS[:500]\n",
180 |         "\n",
181 |         "class MyAudioDataset(torch.utils.data.IterableDataset):\n",
182 |         "    def __init__(self, tars, clean_files):\n",
183 |         "        self.tars = tars\n",
184 |         "        self.preloaded = []\n",
185 |         "    \n",
186 |         "    def __iter__(self):\n",
187 |         "        tar_iter = iter(self.tars)\n",
188 |         "        while True:\n",
189 |         "            if not self.preloaded:\n",
190 |         "                for _ in range(3):\n",
191 |         "                    try:\n",
192 |         "                        tar = next(tar_iter)\n",
193 |         "                    except StopIteration:\n",
194 |         "                        yield from self.preloaded\n",
195 |         "                        return\n",
196 |         "                    else:\n",
197 |         "                        try:\n",
198 |         "                            self.refill(tar, self.preloaded)\n",
199 |         "                        except Exception as e:\n",
200 |         "                            print(\"Error reading\", tar, e)\n",
201 |         "                if not self.preloaded:\n",
202 |         "                    raise RuntimeError(\"Could not read any tars\")\n",
203 |         "            yield self.preloaded.pop()\n",
204 |         "\n",
205 |         "    def refill(self, tar, res):                \n",
206 |         "        episode, chunk = tar.split(\"/\")[-1].split(\"-\", 1)\n",
207 |         "        clean = f\"einschlafen2/{episode}/{episode}-{chunk[:-4]}\"\n",
208 |         "        clean_data, clean_sr = sf.read(clean)\n",
209 |         "        assert clean_sr == 16_000\n",
210 |         "        clean_data = torch.from_numpy(clean_data[FIRST_FRAME:LAST_FRAME]).type(torch.FloatTensor)\n",
211 |         "        with tarfile.open(tar) as tarf:\n",
212 |         "            members = tarf.getmembers()\n",
213 |         "            random.shuffle(members)\n",
214 |         "            for member in members:\n",
215 |         "                cur_f = tarf.extractfile(member)\n",
216 |         "                noisy_data, noisy_sr = sf.read(cur_f)\n",
217 |         "                assert noisy_sr == 16_000\n",
218 |         "                noisy_data = torch.from_numpy(noisy_data[FIRST_FRAME:LAST_FRAME]).type(torch.FloatTensor)\n",
219 |         "                res.append((noisy_data, clean_data))"
220 |       ],
221 |       "execution_count": 0,
222 |       "outputs": []
223 |     },
224 |     {
225 |       "cell_type": "code",
226 |       "metadata": {
227 |         "id": "-ieDnohcIDwe",
228 |         "colab_type": "code",
229 |         "colab": {}
230 |       },
231 |       "source": [
232 |         "if 0:\n",
233 |         "    params = utils.Params(\"DCUnet.pytorch/exp/unet16.json\")\n",
234 |         "    net = Unet(params.model).cuda()\n",
235 |         "else:\n",
236 |         "    MODEL = {\n",
237 |         "        \"leaky_slope\" : 0.1,\n",
238 |         "        \"ratio_mask\" : \"BDT\",\n",
239 |         "        \"encoders\" : [\n",
240 |         "            [1, 32, [7, 5], [2, 2], [3, 2]],\n",
241 |         "            [32, 64, [7, 5], [2, 2], [3, 2]],\n",
242 |         "            [64, 64, [5, 3], [2, 2], [2, 1]],\n",
243 |         "            [64, 64, [5, 3], [2, 2], [2, 1]],\n",
244 |         "            [64, 64, [5, 3], [2, 1], [2, 1]]\n",
245 |         "        ],\n",
246 |         "        \"decoders\" : [\n",
247 |         "            [64, 64, [5, 3], [2, 1], [2, 1]],\n",
248 |         "            [128, 64, [5, 3], [2, 2], [2, 1]],\n",
249 |         "            [128, 64, [5, 3], [2, 2], [2, 1]],\n",
250 |         "            [128, 32, [7, 5], [2, 2], [3, 2]],\n",
251 |         "            [64, 1, [7, 5], [2, 2], [3, 2]]\n",
252 |         "        ],\n",
253 |         "        \"__coder_keys\" : [\n",
254 |         "            \"in_channels\", \"out_channels\", \"kernel_size\", \"stride\", \"padding\"\n",
255 |         "        ]\n",
256 |         "    }\n",
257 |         "    net = Unet(MODEL).cuda()\n",
258 |         "\n",
259 |         "print(\"Model has\", sum(p.numel() for p in net.parameters() if p.requires_grad)/1e6, \"M params\")\n",
260 |         "\n",
261 |         "# TODO - check exists\n",
262 |         "START_EPOCH = 28\n",
263 |         "N_EPOCHS = 50\n",
264 |         "NET_NAME = 'net-2-'\n",
265 |         "if START_EPOCH > 0:\n",
266 |         "    checkpoint = torch.load(f'/content/drive/My Drive/{NET_NAME}{START_EPOCH-1}.pth.tar')\n",
267 |         "    net.load_state_dict(checkpoint)\n",
268 |         "\n",
269 |         "BATCH_SIZE = 32\n",
270 |         "\n",
271 |         "train_dataset = MyAudioDataset(TARS, CLEAN)\n",
272 |         "train_data_loader = DataLoader(\n",
273 |         "    dataset=train_dataset,\n",
274 |         "    batch_size=BATCH_SIZE,\n",
275 |         "    ) #collate_fn=train_dataset.collate,#, num_workers=1)"
276 |       ],
277 |       "execution_count": 0,
278 |       "outputs": []
279 |     },
280 |     {
281 |       "cell_type": "code",
282 |       "metadata": {
283 |         "id": "MKYWP1bGrC4c",
284 |         "colab_type": "code",
285 |         "colab": {}
286 |       },
287 |       "source": [
288 |         "def train():\n",
289 |         "    torch.set_printoptions(precision=10, profile=\"full\")\n",
290 |         "\n",
291 |         "    # Optimizer\n",
292 |         "    optimizer = optim.Adam(net.parameters(), lr=1e-3)\n",
293 |         "    # Learning rate scheduler\n",
294 |         "    scheduler = ExponentialLR(optimizer, 0.95)\n",
295 |         "\n",
296 |         "    #mse_loss = torch.nn.MSELoss()\n",
297 |         "\n",
298 |         "    for epoch in range(START_EPOCH, START_EPOCH+N_EPOCHS):\n",
299 |         "        train_bar = tqdm.notebook.tqdm(train_data_loader, total=int(len(TARS) * 214 / BATCH_SIZE))\n",
300 |         "        ct = 0\n",
301 |         "        for train_mixed_cpu, train_clean_cpu in train_bar:\n",
302 |         "            ct += 1\n",
303 |         "            train_mixed = train_mixed_cpu.cuda()\n",
304 |         "\n",
305 |         "            mixed_spec = stft(train_mixed).unsqueeze(dim=1)\n",
306 |         "            mixed_real, mixed_imag = mixed_spec[..., 0], mixed_spec[..., 1]\n",
307 |         "\n",
308 |         "            out_real, out_imag = net(mixed_real, mixed_imag)\n",
309 |         "            train_clean = train_clean_cpu.cuda()\n",
310 |         "\n",
311 |         "            #clean_spec = stft(train_clean).unsqueeze(dim=1)\n",
312 |         "            #clean_real, clean_imag = clean_spec[..., 0], clean_spec[..., 1]\n",
313 |         "            #out_spec = torch.cat([torch.unsqueeze(out_real, 4), torch.unsqueeze(out_imag, 4)], dim=4)\n",
314 |         "            #loss = mse_loss(clean_spec, out_spec)\n",
315 |         "\n",
316 |         "            out_real, out_imag = torch.squeeze(out_real, 1), torch.squeeze(out_imag, 1)\n",
317 |         "            out_audio = istft(out_real, out_imag, train_mixed.size(1))\n",
318 |         "            out_audio = torch.squeeze(out_audio, dim=1)\n",
319 |         "            #for i, l in enumerate(seq_len):\n",
320 |         "            #    out_audio[i, l:] = 0\n",
321 |         "            #librosa.output.write_wav('mixed.wav', train_mixed[0].cpu().data.numpy()[:seq_len[0].cpu().data.numpy()], 16000)\n",
322 |         "            #librosa.output.write_wav('clean.wav', train_clean[0].cpu().data.numpy()[:seq_len[0].cpu().data.numpy()], 16000)\n",
323 |         "            #librosa.output.write_wav('out.wav', out_audio[0].cpu().data.numpy()[:seq_len[0].cpu().data.numpy()], 16000)\n",
324 |         "            loss = wSDRLoss(train_mixed, train_clean, out_audio)\n",
325 |         "\n",
326 |         "            if ct % 30 == 0:\n",
327 |         "                print(epoch, loss)\n",
328 |         "            optimizer.zero_grad()\n",
329 |         "            loss.backward()\n",
330 |         "            optimizer.step()\n",
331 |         "        scheduler.step()\n",
332 |         "        torch.save(net.state_dict(), f'/content/drive/My Drive/{NET_NAME}{epoch}.pth.tar')\n",
333 |         "    #torch.save(net.state_dict(), './final.pth.tar')"
334 |       ],
335 |       "execution_count": 0,
336 |       "outputs": []
337 |     },
338 |     {
339 |       "cell_type": "code",
340 |       "metadata": {
341 |         "id": "kK_0HxHGzyx2",
342 |         "colab_type": "code",
343 |         "colab": {}
344 |       },
345 |       "source": [
346 |         "def eval_samples(mixed):\n",
347 |         "    mixed_spec = stft(mixed.cuda()).unsqueeze(dim=1)\n",
348 |         "    mixed_real, mixed_imag = mixed_spec[..., 0], mixed_spec[..., 1]\n",
349 |         "    out_real, out_imag = net(mixed_real, mixed_imag)\n",
350 |         "    out_real, out_imag = torch.squeeze(out_real, 1), torch.squeeze(out_imag, 1)\n",
351 |         "    out_audio = istft(out_real, out_imag, mixed.size(1))\n",
352 |         "    out_audio = torch.squeeze(out_audio, dim=1)\n",
353 |         "    return out_audio\n",
354 |         "\n",
355 |         "def eval_():\n",
356 |         "    train_data_loader_it = iter(train_data_loader)\n",
357 |         "    for _ in range(random.randint(0,5)*BATCH_SIZE):\n",
358 |         "        next(train_data_loader_it)\n",
359 |         "    train_mixed_cpu, train_clean_cpu = next(train_data_loader_it)\n",
360 |         "\n",
361 |         "    randidx = random.randint(0, train_mixed_cpu.shape[0]-1)\n",
362 |         "    mixed = train_mixed_cpu[randidx:randidx+1]\n",
363 |         "    out_audio = eval_samples(mixed)\n",
364 |         "    train_clean = train_clean_cpu[randidx:randidx+1].cuda()\n",
365 |         "    librosa.output.write_wav('mixed.wav', mixed[0].cpu().data.numpy(), 16000)\n",
366 |         "    librosa.output.write_wav('clean.wav', train_clean[0].cpu().data.numpy(), 16000)\n",
367 |         "    librosa.output.write_wav('out.wav', out_audio[0].cpu().data.numpy(), 16000)\n",
368 |         "\n",
369 |         "#eval_()"
370 |       ],
371 |       "execution_count": 0,
372 |       "outputs": []
373 |     },
374 |     {
375 |       "cell_type": "code",
376 |       "metadata": {
377 |         "id": "QR_ZsOJS7eNs",
378 |         "colab_type": "code",
379 |         "colab": {}
380 |       },
381 |       "source": [
382 |         "episodes = set(c.split('/')[1] for c in CLEAN)\n",
383 |         "\n",
384 |         "conseq = []\n",
385 |         "for episode in episodes:\n",
386 |         "    chunks = [c for c in CLEAN if episode in c]\n",
387 |         "    chunks = sorted(chunks, key=lambda c: (int(c.split('-')[-3]), int(c.split('-')[-2])))\n",
388 |         "    conseq.append([chunks[0]])\n",
389 |         "    for chunk in chunks[1:]:\n",
390 |         "        if not conseq:\n",
391 |         "            conseq.append([chunk])\n",
392 |         "        else:\n",
393 |         "            prev_rir = conseq[-1][-1].split('-')[-3]\n",
394 |         "            prev_end = conseq[-1][-1].split('-')[-1][:-4]\n",
395 |         "            cur_start = chunk.split('-')[-2]\n",
396 |         "            cur_rir = chunk.split('-')[-3]\n",
397 |         "            if prev_rir != cur_rir or prev_end != cur_start:\n",
398 |         "                conseq.append([chunk])\n",
399 |         "            else:\n",
400 |         "                conseq[-1].append(chunk)\n",
401 |         "\n",
402 |         "conseq = [c for c in conseq if len(c) > 2]"
403 |       ],
404 |       "execution_count": 0,
405 |       "outputs": []
406 |     },
407 |     {
408 |       "cell_type": "code",
409 |       "metadata": {
410 |         "id": "3C60QU9XGS3K",
411 |         "colab_type": "code",
412 |         "colab": {}
413 |       },
414 |       "source": [
415 |         "import scipy.signal\n",
416 |         "\n",
417 |         "def test_sample(clean):\n",
418 |         "    mixed = scipy.signal.convolve(\n",
419 |         "        clean,\n",
420 |         "        librosa.core.load(random.choice(glob.glob(\"AIR_1_4/*wav\")), sr=None)[0][:16000]\n",
421 |         "    )[16000:-16000]\n",
422 |         "\n",
423 |         "    nsplit = len(mixed) // (SAMPLE_LEN*16000)\n",
424 |         "    audiolen = nsplit * (SAMPLE_LEN*16000)\n",
425 |         "    return clean[16000:-16000], mixed, eval_sample(\n",
426 |         "        torch.from_numpy(mixed[:audiolen].reshape((nsplit, -1))\n",
427 |         "                        ).type(torch.FloatTensor)\n",
428 |         "    ).reshape(audiolen).cpu().data.numpy()"
429 |       ],
430 |       "execution_count": 0,
431 |       "outputs": []
432 |     },
433 |     {
434 |       "cell_type": "code",
435 |       "metadata": {
436 |         "id": "VDhNRfBgOwRi",
437 |         "colab_type": "code",
438 |         "colab": {}
439 |       },
440 |       "source": [
441 |         "conseq_sample_clean, conseq_sample_mixed, conseq_sample_out = test_sample(\n",
442 |         "    np.concatenate([librosa.core.load(wav,sr=None)[0] for wav in conseq[1]])\n",
443 |         ")"
444 |       ],
445 |       "execution_count": 0,
446 |       "outputs": []
447 |     },
448 |     {
449 |       "cell_type": "code",
450 |       "metadata": {
451 |         "id": "t0k28WB3Qop8",
452 |         "colab_type": "code",
453 |         "colab": {}
454 |       },
455 |       "source": [
456 |         "librosa.output.write_wav(\"conseq-clean.wav\", conseq_sample_clean*0.1,sr=16000)\n",
457 |         "librosa.output.write_wav(\"conseq-mixed.wav\", conseq_sample_mixed*0.1,sr=16000)\n",
458 |         "librosa.output.write_wav(\"conseq-out.wav\", conseq_sample_out*0.1,sr=16000)"
459 |       ],
460 |       "execution_count": 0,
461 |       "outputs": []
462 |     },
463 |     {
464 |       "cell_type": "code",
465 |       "metadata": {
466 |         "id": "tVwrV_0UNqjA",
467 |         "colab_type": "code",
468 |         "colab": {}
469 |       },
470 |       "source": [
471 |         "Id.Audio(conseq_sample_mixed,rate=16000)"
472 |       ],
473 |       "execution_count": 0,
474 |       "outputs": []
475 |     },
476 |     {
477 |       "cell_type": "code",
478 |       "metadata": {
479 |         "id": "vDMoZyP2NsQG",
480 |         "colab_type": "code",
481 |         "colab": {}
482 |       },
483 |       "source": [
484 |         "Id.Audio(conseq_sample_out,rate=16000)"
485 |       ],
486 |       "execution_count": 0,
487 |       "outputs": []
488 |     },
489 |     {
490 |       "cell_type": "code",
491 |       "metadata": {
492 |         "id": "8SxBSjZWQ74w",
493 |         "colab_type": "code",
494 |         "colab": {}
495 |       },
496 |       "source": [
497 |         "tagesschau_clean, tagesschau_mixed, tagesschau_out = test_sample(\n",
498 |         "    librosa.core.load(\"tagesschau---orig.wav\",sr=16000, duration=20)[0]\n",
499 |         ")"
500 |       ],
501 |       "execution_count": 0,
502 |       "outputs": []
503 |     },
504 |     {
505 |       "cell_type": "code",
506 |       "metadata": {
507 |         "id": "kiq7x9cbRFq0",
508 |         "colab_type": "code",
509 |         "colab": {}
510 |       },
511 |       "source": [
512 |         "Id.Audio(tagesschau_mixed,rate=16000)"
513 |       ],
514 |       "execution_count": 0,
515 |       "outputs": []
516 |     },
517 |     {
518 |       "cell_type": "code",
519 |       "metadata": {
520 |         "id": "lEQgBkr0R3if",
521 |         "colab_type": "code",
522 |         "colab": {}
523 |       },
524 |       "source": [
525 |         "Id.Audio(tagesschau_out,rate=16000)"
526 |       ],
527 |       "execution_count": 0,
528 |       "outputs": []
529 |     },
530 |     {
531 |       "cell_type": "code",
532 |       "metadata": {
533 |         "id": "wNTwAQU0SRcs",
534 |         "colab_type": "code",
535 |         "colab": {}
536 |       },
537 |       "source": [
538 |         "librosa.output.write_wav(\"tagesschau-clean.wav\", tagesschau_clean*0.1,sr=16000)\n",
539 |         "librosa.output.write_wav(\"tagesschau-mixed.wav\", tagesschau_mixed*0.1,sr=16000)\n",
540 |         "librosa.output.write_wav(\"tagesschau-out.wav\", tagesschau_out*0.1,sr=16000)"
541 |       ],
542 |       "execution_count": 0,
543 |       "outputs": []
544 |     },
545 |     {
546 |       "cell_type": "code",
547 |       "metadata": {
548 |         "id": "J37La99n05p2",
549 |         "colab_type": "code",
550 |         "colab": {}
551 |       },
552 |       "source": [
553 |         "import IPython.display as Id\n",
554 |         "\n",
555 |         "Id.Audio(\"clean.wav\", rate=16000)"
556 |       ],
557 |       "execution_count": 0,
558 |       "outputs": []
559 |     },
560 |     {
561 |       "cell_type": "code",
562 |       "metadata": {
563 |         "id": "js9vA4hF1GRw",
564 |         "colab_type": "code",
565 |         "colab": {}
566 |       },
567 |       "source": [
568 |         "Id.Audio(\"mixed.wav\", rate=16000)"
569 |       ],
570 |       "execution_count": 0,
571 |       "outputs": []
572 |     },
573 |     {
574 |       "cell_type": "code",
575 |       "metadata": {
576 |         "id": "8C6zKnlT1HOl",
577 |         "colab_type": "code",
578 |         "colab": {}
579 |       },
580 |       "source": [
581 |         "Id.Audio(\"out.wav\", rate=16000)"
582 |       ],
583 |       "execution_count": 0,
584 |       "outputs": []
585 |     },
586 |     {
587 |       "cell_type": "code",
588 |       "metadata": {
589 |         "id": "RyLC2ZEPPgLk",
590 |         "colab_type": "code",
591 |         "colab": {}
592 |       },
593 |       "source": [
594 |         "torch.cuda.empty_cache()\n",
595 |         "import gc; gc.collect()\n",
596 |         "\n",
597 |         "def pretty_size(size):\n",
598 |         "\t\"\"\"Pretty prints a torch.Size object\"\"\"\n",
599 |         "\tassert(isinstance(size, torch.Size))\n",
600 |         "\treturn \" × \".join(map(str, size))\n",
601 |         "\n",
602 |         "def dump_tensors(gpu_only=True):\n",
603 |         "\t\"\"\"Prints a list of the Tensors being tracked by the garbage collector.\"\"\"\n",
604 |         "\timport gc\n",
605 |         "\ttotal_size = 0\n",
606 |         "\tfor obj in gc.get_objects():\n",
607 |         "\t\ttry:\n",
608 |         "\t\t\tif torch.is_tensor(obj):\n",
609 |         "\t\t\t\tif not gpu_only or obj.is_cuda:\n",
610 |         "\t\t\t\t\tprint(\"%s:%s%s %s\" % (type(obj).__name__, \n",
611 |         "\t\t\t\t\t\t\t\t\t\t  \" GPU\" if obj.is_cuda else \"\",\n",
612 |         "\t\t\t\t\t\t\t\t\t\t  \" pinned\" if obj.is_pinned else \"\",\n",
613 |         "\t\t\t\t\t\t\t\t\t\t  pretty_size(obj.size())))\n",
614 |         "\t\t\t\t\ttotal_size += obj.numel()\n",
615 |         "\t\t\telif hasattr(obj, \"data\") and torch.is_tensor(obj.data):\n",
616 |         "\t\t\t\tif not gpu_only or obj.is_cuda:\n",
617 |         "\t\t\t\t\tprint(\"%s → %s:%s%s%s%s %s\" % (type(obj).__name__, \n",
618 |         "\t\t\t\t\t\t\t\t\t\t\t\t   type(obj.data).__name__, \n",
619 |         "\t\t\t\t\t\t\t\t\t\t\t\t   \" GPU\" if obj.is_cuda else \"\",\n",
620 |         "\t\t\t\t\t\t\t\t\t\t\t\t   \" pinned\" if obj.data.is_pinned else \"\",\n",
621 |         "\t\t\t\t\t\t\t\t\t\t\t\t   \" grad\" if obj.requires_grad else \"\", \n",
622 |         "\t\t\t\t\t\t\t\t\t\t\t\t   \" volatile\" if obj.volatile else \"\",\n",
623 |         "\t\t\t\t\t\t\t\t\t\t\t\t   pretty_size(obj.data.size())))\n",
624 |         "\t\t\t\t\ttotal_size += obj.data.numel()\n",
625 |         "\t\texcept Exception as e:\n",
626 |         "\t\t\tpass        \n",
627 |         "\tprint(\"Total size:\", total_size)\n",
628 |         " \n",
629 |         "dump_tensors()"
630 |       ],
631 |       "execution_count": 0,
632 |       "outputs": []
633 |     }
634 |   ]
635 | }
636 | 


--------------------------------------------------------------------------------
/0001-phase-aware-deep-complex-unet-dcu/README.md:
--------------------------------------------------------------------------------
 1 | https://arxiv.org/abs/1903.03107
 2 | 
 3 | Note: Result scores are INVALID (see openreview.net thread)
 4 | 
 5 | Pretty good reverb cancellation after 30 epochs. 2 training batches per second (batch size 32 * 3 seconds, 16kHz 16 bit)
 6 | 
 7 | Maybe something off with phase? Sounds weird sometimes.
 8 | 
 9 | Remove churches etc from training data.
10 | 


--------------------------------------------------------------------------------
/0002-loss-metrics/DNN-Based-Monaural-Speech-Enhancement-with-Temporal-and-Spectral-Variations-Equalization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonashaag/speech-enhancement/dcee451ce755ae7e881f77909d86f1204e3662ae/0002-loss-metrics/DNN-Based-Monaural-Speech-Enhancement-with-Temporal-and-Spectral-Variations-Equalization.pdf


--------------------------------------------------------------------------------
/0002-loss-metrics/README.md:
--------------------------------------------------------------------------------
 1 | Problem: PESQ is well studied speech quality metric, good for reverb scoring, but reference implementation is CPU only and no GPU implementation available.
 2 | 
 3 | Maybe PMSQE works better? http://sigmat.ugr.es/PMSQE/
 4 | 
 5 | Also look into wMSE.
 6 | 
 7 | Other metrics that work ok are SI-SDR (but does not work well with phase information) and wSDR (SDR adapted for phase information). Or simply spectrogram MSE. (Wave MSE does not work well at all.)
 8 | 
 9 | Google Visqol metric is good but slower than PESQ.
10 | 
11 | POLQA is commercial with no free reference implementation
12 | 
13 | Or simply use DCT based spectrograms to avoid phase problem? https://arxiv.org/pdf/1910.07840.pdf
14 | 
15 | PMSQE seems to not perform well: On Loss Functions for Supervised Monaural Time-Domain Speech Enhancement
16 | 
17 | SI-SDR seems to work fine for most cases, but should account for time-shifted signals when evaluating systems that were trained using a spectrogram loss.
18 | 
19 | https://ieeexplore.ieee.org/abstract/document/9052949
20 | 
21 | https://github.com/gabrielmittag/NISQA
22 | 
23 | https://ieeexplore.ieee.org/abstract/document/7602933
24 | 


--------------------------------------------------------------------------------
/0003-clean-speech-corpuses/README.md:
--------------------------------------------------------------------------------
 1 | SEGAN
 2 | 
 3 | VCTK https://datashare.is.ed.ac.uk/handle/10283/3443: Has 44k samples of various length, ~2300 samples of length >= 4 seconds (w/ silence removed). Quality mostly good to very good (no noise etc). ~14h
 4 | 
 5 | https://github.com/microsoft/MS-SNSD 500h from 2150 speakers, quality medium to good. Not first choice for training.
 6 | 
 7 | https://catalog.ldc.upenn.edu/LDC95S24
 8 | 
 9 | https://catalog.ldc.upenn.edu/LDC2014S03
10 | 
11 | https://keithito.com/LJ-Speech-Dataset/
12 | 
13 | https://archive.org/details/daps_dataset
14 | 


--------------------------------------------------------------------------------
/0004-reverberation-corpuses/Hadad Heese Vary Gannot MULTICHANNEL AUDIO DATABASE IN VARIOUS ACOUSTIC ENVIRONMENTS.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonashaag/speech-enhancement/dcee451ce755ae7e881f77909d86f1204e3662ae/0004-reverberation-corpuses/Hadad Heese Vary Gannot MULTICHANNEL AUDIO DATABASE IN VARIOUS ACOUSTIC ENVIRONMENTS.pdf


--------------------------------------------------------------------------------
/0004-reverberation-corpuses/README.md:
--------------------------------------------------------------------------------
 1 | https://speech.fit.vutbr.cz/software/but-speech-fit-reverb-database
 2 | 
 3 | https://www.iks.rwth-aachen.de/en/research/tools-downloads/databases/aachen-impulse-response-database/
 4 | 
 5 | https://reverb2014.dereverberation.com/
 6 | 
 7 | https://acecorpus.ee.ic.ac.uk/
 8 | 
 9 | https://odeon.dk/
10 | 
11 | http://isophonics.net/content/room-impulse-response-data-set
12 | 
13 | https://www.iks.rwth-aachen.de/en/research/tools-downloads/databases/multi-channel-impulse-response-database/
14 | 
15 | http://www.dreams-itn.eu/index.php/dissemination/science-blogs/24-rir-databases
16 | 
17 | https://github.com/Marvin182/rir-database
18 | 
19 | https://www.vsl.co.at/
20 | 
21 | https://github.com/DavidDiazGuerra/gpuRIR
22 | 
23 | https://github.com/IoSR-Surrey/IoSR_ListeningRoom_BRIRs
24 | 
25 | https://voices18.github.io/
26 | 
27 | https://zenodo.org/record/2593714
28 | 
29 | https://zenodo.org/record/2660782
30 | 
31 | https://github.com/IoSR-Surrey/RealRoomBRIRs
32 | 
33 | https://oramics.github.io/sampled/
34 | 
35 | https://github.com/ShanonPearce/ASH-IR-Dataset
36 | 
37 | https://github.com/pyBinSim/HeadRelatedDatabase
38 | 
39 | https://zenodo.org/record/1209820
40 | 
41 | https://zenodo.org/record/1434786
42 | 
43 | https://zenodo.org/record/1321996
44 | 
45 | https://zenodo.org/record/160749
46 | 
47 | https://zenodo.org/record/3767044
48 | 
49 | https://zenodo.org/record/2558629
50 | 
51 | https://zenodo.org/record/61072
52 | 
53 | https://github.com/jpcima/HybridReverb2-impulse-response-database
54 | 
55 | https://zenodo.org/record/1417727
56 | 
57 | https://zenodo.org/record/2641166
58 | 
59 | https://zenodo.org/record/1169161
60 | 
61 | https://zenodo.org/record/2635758
62 | 
63 | https://zenodo.org/record/3833940
64 | 
65 | http://legacy.spa.aalto.fi/projects/poririrs/
66 | 
67 | http://web.archive.org/web/20041212110817/http://www.james-hopgood.net/Users/jrh/?doc=ImpulseResponses
68 | 
69 | https://zenodo.org/record/5464104
70 | 


--------------------------------------------------------------------------------
/0004-reverberation-corpuses/jeub09a-binaural-room-impulse-database.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonashaag/speech-enhancement/dcee451ce755ae7e881f77909d86f1204e3662ae/0004-reverberation-corpuses/jeub09a-binaural-room-impulse-database.pdf


--------------------------------------------------------------------------------
/0004-reverberation-corpuses/stewart2010.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonashaag/speech-enhancement/dcee451ce755ae7e881f77909d86f1204e3662ae/0004-reverberation-corpuses/stewart2010.pdf


--------------------------------------------------------------------------------
/0005-existing-applications/README.md:
--------------------------------------------------------------------------------
1 | Nvidia GTX voice does not work well for reverberation
2 | 


--------------------------------------------------------------------------------
/0006-small-rooms-classifier/README.md:
--------------------------------------------------------------------------------
1 | Reverb corpuses have many large rooms like churches. There are metrics (like RT60) that may help to classify small and large rooms. Or you throw ML at it!
2 | 


--------------------------------------------------------------------------------
/0006-small-rooms-classifier/rir-small-rooms-n.log:
--------------------------------------------------------------------------------
  1 | ../audio-experiments-data/IRs/IOSR/1821-1.wav;-1;
  2 | ../audio-experiments-data/IRs/isophonics/greathallY.zip1/Y/Yx00y03.wav;-1;
  3 | ../audio-experiments-data/IRs/IOSR/1723-0.wav;-1;
  4 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID11_20180110_T/31/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v02.wav;-1;
  5 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx05y00.wav;-1;
  6 | ../audio-experiments-data/IRs/IOSR/505-0.wav;-1;
  7 | ../audio-experiments-data/IRs/IOSR/2787-1.wav;-1;
  8 | ../audio-experiments-data/IRs/openair/jack-lyons-concert-hall-university-york/b-format/rir_jack_lyons_lp3_96k.wav;3;
  9 | ../audio-experiments-data/IRs/IOSR/586-1.wav;-1;
 10 | ../audio-experiments-data/IRs/isophonics/octagonOmni.zip1/octagonOmni/Omni/x09y05.wav;-1;
 11 | ../audio-experiments-data/IRs/AIR_1_4/air_binaural_stairway_1_1_2_0-0.wav;-1;
 12 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_1m_000-6.wav;-1;
 13 | ../audio-experiments-data/IRs/IOSR/1117-1.wav;-1;
 14 | ../audio-experiments-data/IRs/IOSR/1260-0.wav;-1;
 15 | ../audio-experiments-data/IRs/IOSR/1905-1.wav;-1;
 16 | ../audio-experiments-data/IRs/IOSR/3022-0.wav;-1;
 17 | ../audio-experiments-data/IRs/IOSR/1798-0.wav;-1;
 18 | ../audio-experiments-data/IRs/IOSR/523-0.wav;-1;
 19 | ../audio-experiments-data/IRs/IOSR/740-0.wav;-1;
 20 | ../audio-experiments-data/IRs/IOSR/1895-1.wav;-1;
 21 | ../audio-experiments-data/IRs/IOSR/743-1.wav;-1;
 22 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID02_20180105_S/08/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
 23 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID11_20180110_T/06/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v02.wav;-1;
 24 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_4-4-4-8-4-4-4_1m_030-0.wav;-1;
 25 | ../audio-experiments-data/IRs/IOSR/1120-0.wav;-1;
 26 | ../audio-experiments-data/IRs/isophonics/octagonY.zip1/Y/Yx10y00.wav;-1;
 27 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx07y11.wav;-1;
 28 | ../audio-experiments-data/IRs/IOSR/1305-1.wav;-1;
 29 | ../audio-experiments-data/IRs/IOSR/2060-1.wav;-1;
 30 | ../audio-experiments-data/IRs/AIR_1_4/air_binaural_stairway_0_1_2_45-0.wav;-1;
 31 | ../audio-experiments-data/IRs/isophonics/classroomW.zip1/classroomW/W/W50x25y.wav;-1;
 32 | ../audio-experiments-data/IRs/IOSR/2056-0.wav;-1;
 33 | ../audio-experiments-data/IRs/IOSR/1123-0.wav;-1;
 34 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx04y06.wav;-1;
 35 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx04y00.wav;-1;
 36 | ../audio-experiments-data/IRs/IOSR/2106-0.wav;-1;
 37 | ../audio-experiments-data/IRs/IOSR/3177-0.wav;-1;
 38 | ../audio-experiments-data/IRs/IOSR/3136-1.wav;-1;
 39 | ../audio-experiments-data/IRs/isophonics/greathallZ.zip1/Z/Zx00y07.wav;-1;
 40 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx11y08.wav;-1;
 41 | ../audio-experiments-data/IRs/isophonics/octagonY.zip1/Y/Yx10y11.wav;-1;
 42 | ../audio-experiments-data/IRs/isophonics/classroomW.zip1/classroomW/W/W00x25y.wav;-1;
 43 | ../audio-experiments-data/IRs/IOSR/16-1.wav;-1;
 44 | ../audio-experiments-data/IRs/IOSR/2328-0.wav;-1;
 45 | ../audio-experiments-data/IRs/IOSR/3172-1.wav;-1;
 46 | ../audio-experiments-data/IRs/IOSR/2443-0.wav;-1;
 47 | ../audio-experiments-data/IRs/isophonics/greathallY.zip1/Y/Yx08y00.wav;-1;
 48 | ../audio-experiments-data/IRs/isophonics/octagonOmni.zip1/octagonOmni/Omni/x08y00.wav;-1;
 49 | ../audio-experiments-data/IRs/IOSR/2413-0.wav;-1;
 50 | ../audio-experiments-data/IRs/isophonics/octagonZ.zip1/Z/Zx12y00.wav;-1;
 51 | ../audio-experiments-data/IRs/IOSR/582-0.wav;-1;
 52 | ../audio-experiments-data/IRs/IOSR/2696-1.wav;-1;
 53 | ../audio-experiments-data/IRs/IOSR/2635-1.wav;-1;
 54 | ../audio-experiments-data/IRs/IOSR/1664-0.wav;-1;
 55 | ../audio-experiments-data/IRs/IOSR/2013-0.wav;-1;
 56 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID02_20180105_S/26/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
 57 | ../audio-experiments-data/IRs/IOSR/1767-0.wav;-1;
 58 | ../audio-experiments-data/IRs/IOSR/1316-1.wav;-1;
 59 | ../audio-experiments-data/IRs/IOSR/242-0.wav;-1;
 60 | ../audio-experiments-data/IRs/isophonics/greathallZ.zip1/Z/Zx02y01.wav;-1;
 61 | ../audio-experiments-data/IRs/IOSR/1917-1.wav;-1;
 62 | ../audio-experiments-data/IRs/IOSR/1459-1.wav;-1;
 63 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_4-4-4-8-4-4-4_2m_285-7.wav;-1;
 64 | ../audio-experiments-data/IRs/IOSR/534-0.wav;-1;
 65 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID06_20180105_S/26/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
 66 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx00y01.wav;-1;
 67 | ../audio-experiments-data/IRs/IOSR/556-1.wav;-1;
 68 | ../audio-experiments-data/IRs/IOSR/1049-0.wav;-1;
 69 | ../audio-experiments-data/IRs/IOSR/147-0.wav;-1;
 70 | ../audio-experiments-data/IRs/IOSR/813-0.wav;-1;
 71 | ../audio-experiments-data/IRs/isophonics/greathallX.zip1/X/Xx01y01.wav;-1;
 72 | ../audio-experiments-data/IRs/isophonics/greathallZ.zip1/Z/Zx07y12.wav;-1;
 73 | ../audio-experiments-data/IRs/IOSR/1352-0.wav;-1;
 74 | ../audio-experiments-data/IRs/IOSR/866-1.wav;-1;
 75 | ../audio-experiments-data/IRs/isophonics/octagonY.zip1/Y/Yx08y07.wav;-1;
 76 | ../audio-experiments-data/IRs/IOSR/2789-0.wav;-1;
 77 | ../audio-experiments-data/IRs/IOSR/1549-0.wav;-1;
 78 | ../audio-experiments-data/IRs/IOSR/1829-1.wav;-1;
 79 | ../audio-experiments-data/IRs/IOSR/1704-1.wav;-1;
 80 | ../audio-experiments-data/IRs/IOSR/1535-1.wav;-1;
 81 | ../audio-experiments-data/IRs/IOSR/1735-0.wav;-1;
 82 | ../audio-experiments-data/IRs/IOSR/77-1.wav;-1;
 83 | ../audio-experiments-data/IRs/openair/r1-nuclear-reactor-hall/mono/r1_omni_48k.wav;-1;
 84 | ../audio-experiments-data/IRs/isophonics/classroomZ.zip1/Z/Z00x25y.wav;-1;
 85 | ../audio-experiments-data/IRs/isophonics/greathallY.zip1/Y/Yx08y07.wav;-1;
 86 | ../audio-experiments-data/IRs/IOSR/2562-0.wav;-1;
 87 | ../audio-experiments-data/IRs/isophonics/classroomOmni.zip1/classroomOmni/Omni/30x00y.wav;-1;
 88 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID11_20180110_T/13/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;
 89 | ../audio-experiments-data/IRs/IOSR/1855-1.wav;-1;
 90 | ../audio-experiments-data/IRs/IOSR/2974-1.wav;-1;
 91 | ../audio-experiments-data/IRs/IOSR/3166-0.wav;-1;
 92 | ../audio-experiments-data/IRs/IOSR/1278-1.wav;-1;
 93 | ../audio-experiments-data/IRs/IOSR/2016-1.wav;-1;
 94 | ../audio-experiments-data/IRs/IOSR/1518-1.wav;-1;
 95 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_4-4-4-8-4-4-4_2m_330-2.wav;-1;
 96 | ../audio-experiments-data/IRs/IOSR/2745-1.wav;-1;
 97 | ../audio-experiments-data/IRs/IOSR/1702-1.wav;-1;
 98 | ../audio-experiments-data/IRs/IOSR/471-1.wav;-1;
 99 | ../audio-experiments-data/IRs/isophonics/octagonX.zip1/X/Xx02y04.wav;-1;
100 | ../audio-experiments-data/IRs/IOSR/3010-1.wav;-1;
101 | ../audio-experiments-data/IRs/IOSR/1903-0.wav;-1;
102 | ../audio-experiments-data/IRs/IOSR/2650-1.wav;-1;
103 | ../audio-experiments-data/IRs/IOSR/2818-0.wav;-1;
104 | ../audio-experiments-data/IRs/openair/st-margarets-church-national-centre-early-music/b-format/r7_3rd_configuration.wav;1;
105 | ../audio-experiments-data/IRs/IOSR/1416-1.wav;-1;
106 | ../audio-experiments-data/IRs/isophonics/greathallX.zip1/X/Xx11y02.wav;-1;
107 | ../audio-experiments-data/IRs/isophonics/classroomY.zip1/Y/Y25x40y.wav;-1;
108 | ../audio-experiments-data/IRs/IOSR/1196-0.wav;-1;
109 | ../audio-experiments-data/IRs/IOSR/2182-0.wav;-1;
110 | ../audio-experiments-data/IRs/IOSR/1794-1.wav;-1;
111 | ../audio-experiments-data/IRs/isophonics/greathallX.zip1/X/Xx00y07.wav;-1;
112 | ../audio-experiments-data/IRs/IOSR/2695-1.wav;-1;
113 | ../audio-experiments-data/IRs/isophonics/octagonW.zip1/W/Wx08y11.wav;-1;
114 | ../audio-experiments-data/IRs/IOSR/2122-0.wav;-1;
115 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx02y08.wav;-1;
116 | ../audio-experiments-data/IRs/IOSR/870-0.wav;-1;
117 | ../audio-experiments-data/IRs/isophonics/octagonX.zip1/X/Xx08y00.wav;-1;
118 | ../audio-experiments-data/IRs/isophonics/greathallW.zip1/W/Wx04y03.wav;-1;
119 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID10_20180107_T/27/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;
120 | ../audio-experiments-data/IRs/IOSR/651-1.wav;-1;
121 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID11_20180110_T/09/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v03.wav;-1;
122 | ../audio-experiments-data/IRs/IOSR/2518-1.wav;-1;
123 | ../audio-experiments-data/IRs/IOSR/611-0.wav;-1;
124 | ../audio-experiments-data/IRs/IOSR/2650-0.wav;-1;
125 | ../audio-experiments-data/IRs/BUT/VUT_FIT_C236/MicID01/SpkID06_20190503_S/26/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
126 | ../audio-experiments-data/IRs/IOSR/1767-0.wav;-1;
127 | ../audio-experiments-data/IRs/IOSR/2620-1.wav;-1;
128 | ../audio-experiments-data/IRs/IOSR/121-0.wav;-1;
129 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_2m_030-1.wav;-1;
130 | ../audio-experiments-data/IRs/IOSR/1315-1.wav;-1;
131 | ../audio-experiments-data/IRs/isophonics/classroomW.zip1/classroomW/W/W50x35y.wav;-1;
132 | ../audio-experiments-data/IRs/IOSR/531-1.wav;-1;
133 | ../audio-experiments-data/IRs/IOSR/370-0.wav;-1;
134 | ../audio-experiments-data/IRs/isophonics/greathallY.zip1/Y/Yx05y05.wav;-1;
135 | ../audio-experiments-data/IRs/IOSR/916-0.wav;-1;
136 | ../audio-experiments-data/IRs/isophonics/octagonY.zip1/Y/Yx00y09.wav;-1;
137 | ../audio-experiments-data/IRs/isophonics/octagonX.zip1/X/Xx02y00.wav;-1;
138 | ../audio-experiments-data/IRs/IOSR/310-0.wav;-1;
139 | ../audio-experiments-data/IRs/IOSR/2737-1.wav;-1;
140 | ../audio-experiments-data/IRs/IOSR/2071-1.wav;-1;
141 | ../audio-experiments-data/IRs/IOSR/2108-0.wav;-1;
142 | ../audio-experiments-data/IRs/IOSR/1384-0.wav;-1;
143 | ../audio-experiments-data/IRs/IOSR/49-1.wav;-1;
144 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_2m_030-4.wav;-1;
145 | ../audio-experiments-data/IRs/IOSR/324-1.wav;-1;
146 | ../audio-experiments-data/IRs/IOSR/2813-1.wav;-1;
147 | ../audio-experiments-data/IRs/IOSR/277-0.wav;-1;
148 | ../audio-experiments-data/IRs/isophonics/greathallY.zip1/Y/Yx09y12.wav;-1;
149 | ../audio-experiments-data/IRs/IOSR/967-1.wav;-1;
150 | ../audio-experiments-data/IRs/IOSR/995-0.wav;-1;
151 | ../audio-experiments-data/IRs/IOSR/2397-0.wav;-1;
152 | ../audio-experiments-data/IRs/isophonics/classroomZ.zip1/Z/Z60x15y.wav;-1;
153 | ../audio-experiments-data/IRs/IOSR/1232-1.wav;-1;
154 | ../audio-experiments-data/IRs/isophonics/octagonY.zip1/Y/Yx02y06.wav;-1;
155 | ../audio-experiments-data/IRs/isophonics/classroomY.zip1/Y/Y05x40y.wav;-1;
156 | ../audio-experiments-data/IRs/isophonics/greathallOmni.zip1/greathallOmni/Omni/x07y00.wav;-1;
157 | ../audio-experiments-data/IRs/isophonics/greathallY.zip1/Y/Yx04y05.wav;-1;
158 | ../audio-experiments-data/IRs/isophonics/classroomW.zip1/classroomW/W/W20x00y.wav;-1;
159 | ../audio-experiments-data/IRs/IOSR/2229-0.wav;-1;
160 | ../audio-experiments-data/IRs/IOSR/1522-0.wav;-1;
161 | ../audio-experiments-data/IRs/IOSR/1727-1.wav;-1;
162 | ../audio-experiments-data/IRs/IOSR/1508-0.wav;-1;
163 | ../audio-experiments-data/IRs/IOSR/897-1.wav;-1;
164 | ../audio-experiments-data/IRs/IOSR/2714-0.wav;-1;
165 | ../audio-experiments-data/IRs/isophonics/greathallOmni.zip1/greathallOmni/Omni/x08y05.wav;-1;
166 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID04_20180105_S/06/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
167 | ../audio-experiments-data/IRs/IOSR/2301-0.wav;-1;
168 | ../audio-experiments-data/IRs/isophonics/octagonY.zip1/Y/Yx03y12.wav;-1;
169 | ../audio-experiments-data/IRs/IOSR/19-1.wav;-1;
170 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_4-4-4-8-4-4-4_1m_345-4.wav;-1;
171 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_285-4.wav;-1;
172 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_1m_300-0.wav;-1;
173 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_1m_270-2.wav;-1;
174 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_2m_285-4.wav;-1;
175 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_030-3.wav;-1;
176 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_270-0.wav;-1;
177 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_4-4-4-8-4-4-4_1m_270-0.wav;-1;
178 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_015-0.wav;-1;
179 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_045-4.wav;-1;
180 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID10_20180107_T/05/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
181 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID08_20180105_S/31/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
182 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_ConferenceRoom2/MicID01/SpkID02_20170906_S/18/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
183 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID04_20180105_S/28/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
184 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID10_20180107_T/03/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;
185 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID08_20180105_S/05/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
186 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID08_20180105_S/14/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
187 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID10_20180107_T/01/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;
188 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID06_20180105_S/01/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
189 | ../audio-experiments-data/IRs/BUT/VUT_FIT_C236/MicID01/SpkID10_20190503_S/16/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
190 | ../audio-experiments-data/IRs/openair/innocent-railway-tunnel/mono/tunnel_entrance_b_1way_mono.wav;-1;
191 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID06_20180105_S/05/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
192 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID09_20180105_S/06/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
193 | ../audio-experiments-data/IRs/openair/innocent-railway-tunnel/b-format/tunnel_entrance_e_4way_bformat.wav;3;
194 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_4-4-4-8-4-4-4_1m_315-1.wav;-1;
195 | ../audio-experiments-data/IRs/openair/innocent-railway-tunnel/mono/tunnel_entrance_c_4way_mono.wav;-1;
196 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_000-2.wav;-1;
197 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_1m_315-4.wav;-1;
198 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_1m_000-2.wav;-1;
199 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_075-1.wav;-1;
200 | ../audio-experiments-data/IRs/openair/innocent-railway-tunnel/mono/tunnel_entrance_c_1way_mono.wav;-1;
201 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID10_20180107_T/06/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
202 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_315-3.wav;-1;
203 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID10_20180107_T/14/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;
204 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID02_20180105_S/09/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
205 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_000-1.wav;-1;
206 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_315-5.wav;-1;
207 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_4-4-4-8-4-4-4_1m_030-1.wav;-1;
208 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_ConferenceRoom2/MicID01/SpkID02_20170906_S/30/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
209 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_345-2.wav;-1;
210 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_8-8-8-8-8-8-8_1m_345-4.wav;-1;
211 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID09_20180105_S/31/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
212 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID08_20180105_S/31/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
213 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.610s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.610s)_3-3-3-8-3-3-3_1m_330-4.wav;-1;
214 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L227/MicID01/SpkID06_20180105_S/07/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
215 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_ConferenceRoom2/MicID01/SpkID01_20170906_S/29/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;
216 | 


--------------------------------------------------------------------------------
/0006-small-rooms-classifier/rir-small-rooms-y.log:
--------------------------------------------------------------------------------
 1 | ../audio-experiments-data/IRs/AIR_1_4/air_binaural_meeting_0_1_1-0.wav;-1;y
 2 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_1m_060-5.wav;-1;y
 3 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_4-4-4-8-4-4-4_2m_345-7.wav;-1;y
 4 | ../audio-experiments-data/IRs/openair/trollers-gill/b-format/dales_site1_1way_bformat.wav;2;y
 5 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_2m_285-3.wav;-1;y
 6 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_4-4-4-8-4-4-4_2m_330-2.wav;-1;y
 7 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID06_20171231_T/07/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;y
 8 | ../audio-experiments-data/IRs/BUT/VUT_FIT_Q301/MicID01/SpkID04_20170915_S/30/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
 9 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_2m_090-1.wav;-1;y
10 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID06_20171231_T/13/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;y
11 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_2m_045-3.wav;-1;y
12 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_2m_330-2.wav;-1;y
13 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_2m_345-7.wav;-1;y
14 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_Room112/MicID01/SpkID01_20170906_S/30/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
15 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_1m_015-4.wav;-1;y
16 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_Room112/MicID01/SpkID03_20170906_S/26/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
17 | ../audio-experiments-data/IRs/AIR_1_4/air_binaural_office_1_0_2-0.wav;-1;y
18 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_1m_075-7.wav;-1;y
19 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID01_20171225_T/09/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v02.wav;-1;y
20 | ../audio-experiments-data/IRs/BUT/VUT_FIT_Q301/MicID01/SpkID04_20170915_S/13/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
21 | ../audio-experiments-data/IRs/BUT/VUT_FIT_D105/MicID01/SpkID07_20170904_T/12/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v04.wav;-1;y
22 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID02_20171229_T/08/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
23 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_Room112/MicID01/SpkID03_20170906_S/11/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
24 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_1m_330-5.wav;-1;y
25 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_2m_030-4.wav;-1;y
26 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_Room112/MicID01/SpkID04_20170906_S/27/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
27 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID06_20171231_T/20/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
28 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_4-4-4-8-4-4-4_1m_045-6.wav;-1;y
29 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID05_20171229_S/19/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
30 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_1m_015-3.wav;-1;y
31 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_2m_075-5.wav;-1;y
32 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.160s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.160s)_3-3-3-8-3-3-3_1m_090-2.wav;-1;y
33 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.160s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.160s)_8-8-8-8-8-8-8_2m_075-1.wav;-1;y
34 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_4-4-4-8-4-4-4_2m_315-5.wav;-1;y
35 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_1m_345-1.wav;-1;y
36 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_1m_030-7.wav;-1;y
37 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L212/MicID01/SpkID02_20170820_T/21/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;y
38 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.160s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.160s)_8-8-8-8-8-8-8_1m_300-4.wav;-1;y
39 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.160s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.160s)_3-3-3-8-3-3-3_2m_030-3.wav;-1;y
40 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_4-4-4-8-4-4-4_2m_300-6.wav;-1;y
41 | ../audio-experiments-data/IRs/BUT/VUT_FIT_D105/MicID01/SpkID05_20170901_S/16/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
42 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_2m_015-5.wav;-1;y
43 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_1m_075-2.wav;-1;y
44 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_2m_045-0.wav;-1;y
45 | ../audio-experiments-data/IRs/BUT/Hotel_SkalskyDvur_Room112/MicID01/SpkID05_20170906_S/09/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
46 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.160s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.160s)_4-4-4-8-4-4-4_2m_015-2.wav;-1;y
47 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_4-4-4-8-4-4-4_2m_330-1.wav;-1;y
48 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID02_20171227_S/20/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v00.wav;-1;y
49 | ../audio-experiments-data/IRs/BUT/VUT_FIT_Q301/MicID01/SpkID01_20170910_T/03/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v02.wav;-1;y
50 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__8-8-8-8-8-8-8/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_8-8-8-8-8-8-8_2m_330-7.wav;-1;y
51 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.160s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.160s)_3-3-3-8-3-3-3_1m_285-1.wav;-1;y
52 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_2m_015-1.wav;-1;y
53 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_2m_330-1.wav;-1;y
54 | ../audio-experiments-data/IRs/AIR_1_4/air_binaural_meeting_0_1_4-0.wav;-1;y
55 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L212/MicID01/SpkID02_20170820_T/18/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v01.wav;-1;y
56 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__4-4-4-8-4-4-4/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_4-4-4-8-4-4-4_2m_315-7.wav;-1;y
57 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.360s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.360s)_3-3-3-8-3-3-3_1m_330-6.wav;-1;y
58 | ../audio-experiments-data/IRs/BUT/VUT_FIT_L207/MicID01/SpkID06_20171231_T/11/RIR/IR_sweep_15s_45Hzto22kHz_FS16kHz.v02.wav;-1;y
59 | ../audio-experiments-data/IRs/MIRD/Impulse_response_Acoustic_Lab_Bar-Ilan_University__Reverberation_0.160s__3-3-3-8-3-3-3/Impulse_response_Acoustic_Lab_Bar-Ilan_University_(Reverberation_0.160s)_3-3-3-8-3-3-3_1m_345-4.wav;-1;y
60 | 


--------------------------------------------------------------------------------
/0006-small-rooms-classifier/rir-small-rooms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | import os
 4 | import soundfile as sf
 5 | import scipy.signal
 6 | import random
 7 | import pyloudnorm
 8 | import librosa
 9 | 
10 | voices = open(sys.argv[1]).read().strip().splitlines()
11 | #irs = open(sys.argv[2]).read().strip().splitlines()
12 | #irs = [l.split(';')[0] for l in open(sys.argv[2]).read().strip().splitlines() if float(l.split(';')[-1])>=0.5]
13 | irs = [l.split(';')[0] for l in open(sys.argv[2]).read().strip().splitlines() if l.endswith('y')]
14 | out = open(sys.argv[3], "a")
15 | 
16 | random.shuffle(irs)
17 | 
18 | #import tensorflow as tf
19 | #model = tf.keras.models.load_model('small-rooms.model')
20 | 
21 | while 1:
22 |     voice = random.choice(voices)
23 |     voicew = sf.read(voice)[0]
24 |     ir = irs.pop()
25 |     print(ir)
26 |     irw = librosa.core.load(ir,sr=16000,mono=False)[0]
27 |     if len(irw.shape) > 1:
28 |         chan = random.randint(0, irw.shape[0]-1)
29 |         irw = irw[chan]
30 |     else:
31 |         chan = -1
32 |     irw = irw[:32000]
33 |     ird = scipy.signal.convolve(voicew, irw)[len(irw):len(voicew)]
34 |     ird = pyloudnorm.normalize.peak(ird, -10)
35 |     if 0:
36 |         pred = model.predict(np.expand_dims(
37 |             np.abs(librosa.core.stft(np.asfortranarray(librosa.util.fix_length(irw,32000)),1024))
38 |             ,0
39 |         ))[0]
40 |     else:
41 |         pred = -1
42 |     sf.write("/tmp/wav.wav", ird, samplerate=16000)
43 |     os.system("ffplay -nodisp /tmp/wav.wav 2>/dev/null")
44 |     choice = input(f"Room? {pred}")
45 |     out.write(f"{ir};{chan};{choice}\n")
46 |     out.flush()
47 | 


--------------------------------------------------------------------------------
/0006-small-rooms-classifier/small-rooms-nn-predict.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import librosa
 5 | import tqdm
 6 | import tqdm.contrib.concurrent
 7 | import random
 8 | 
 9 | model = tf.keras.models.load_model('small-rooms-magspec.model')
10 | 
11 | out = open(sys.argv[2], 'w')
12 | 
13 | def eval_(f):
14 |     r, _ = librosa.core.load(f,sr=16000,mono=False)
15 |     if len(r.shape) < 2:
16 |         r = [r]
17 |     s = np.array([
18 |         np.abs(librosa.core.stft(np.asfortranarray(librosa.util.fix_length(w,32000)),1024))
19 |         for w in r
20 |     ])
21 |     for i, p in enumerate(model.predict(s)):
22 |         out.write(f"{f};{i};{p[0]}\n")
23 | 
24 | files = open(sys.argv[1]).read().strip().splitlines()
25 | random.shuffle(files)
26 | tqdm.contrib.concurrent.thread_map(eval_, files, max_workers=4)
27 | 


--------------------------------------------------------------------------------
/0006-small-rooms-classifier/small-rooms-nn.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import librosa
 5 | import tqdm
 6 | import pickle
 7 | import random
 8 | 
 9 | if int(sys.argv[1]):
10 |     def load(f, chan):
11 |         r, _ = librosa.core.load(f,sr=16000,mono=False)
12 |         if chan >= 0:
13 |             w = r[chan]
14 |         else:
15 |             w = r
16 |         w = np.asfortranarray(librosa.util.fix_length(w,32000))
17 |         return (w,np.abs(librosa.core.stft(w,1024)))
18 | 
19 |     X = [load(f, int(chan)) for f, chan, _ in
20 |          tqdm.tqdm([x.split(';') for x in open('rir-small-rooms-n.log').read().splitlines()])] \
21 |       + [load(f, int(chan)) for f, chan, _ in
22 |          tqdm.tqdm([x.split(';') for x in open('rir-small-rooms-y.log').read().splitlines()])]
23 |     Y = [False for _ in open('rir-small-rooms-n.log').read().splitlines()] \
24 |       + [True  for _ in open('rir-small-rooms-y.log').read().splitlines()]
25 |     pickle.dump((X,Y), open('small-rooms-xy.pkl', 'wb'))
26 | else:
27 |     X,Y = pickle.load(open('small-rooms-xy.pkl','rb'))
28 | 
29 | xy = list(zip(X,Y))
30 | random.shuffle(xy)
31 | X,Y=list(zip(*xy))
32 | 
33 | if 1:
34 |     X,Y=np.array([x[1] for x in X]),np.array(Y)
35 | 
36 |     inp = tf.keras.Input(X[0].shape)
37 |     dense1=tf.keras.layers.Dense(300,activation='relu')(tf.keras.layers.Flatten()(inp))
38 |     dense2=tf.keras.layers.Dense(50,activation='relu')(tf.keras.layers.Dropout(0.2)(dense1))
39 |     dense3=tf.keras.layers.Dense(50,activation='relu')(dense2)
40 |     out=tf.keras.layers.Dense(1,activation='sigmoid')(dense3)
41 | 
42 |     model = tf.keras.Model(inputs=inp,outputs=out)
43 |     model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-4),loss='binary_crossentropy', metrics=['acc'])
44 |     model.fit(X,Y,epochs=10,batch_size=5,validation_split=0.3,shuffle=True)
45 | 
46 |     model.save('small-rooms-magspec.model')
47 | elif 0:
48 |     X,Y=np.array([x[0] for x in X]),np.array(Y)
49 | 
50 |     inp = tf.keras.Input(X[0].shape)
51 |     dense1=tf.keras.layers.Dense(100,activation='relu')(tf.keras.layers.Flatten()(inp))
52 |     dense2=tf.keras.layers.Dense(50,activation='relu')(tf.keras.layers.Dropout(0.1)(dense1))
53 |     out=tf.keras.layers.Dense(1,activation='sigmoid')(dense2)
54 | 
55 |     model = tf.keras.Model(inputs=inp,outputs=out)
56 |     model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-3),loss='binary_crossentropy', metrics=['acc'])
57 |     model.fit(X,Y,epochs=20,batch_size=5,validation_split=0.3,shuffle=True)
58 | 
59 |     model.save('small-rooms-waveform.model')
60 | else:
61 |     X,Y=np.array([librosa.feature.rms(x[0], frame_length=1024)[0] for x in X]),np.array(Y)
62 | 
63 |     inp = tf.keras.Input(X[0].shape)
64 |     dense1=tf.keras.layers.Dense(100,activation='relu')(tf.keras.layers.Flatten()(inp))
65 |     dense2=tf.keras.layers.Dense(10,activation='relu')(tf.keras.layers.Dropout(0.1)(dense1))
66 |     out=tf.keras.layers.Dense(1,activation='sigmoid')(dense2)
67 | 
68 |     model = tf.keras.Model(inputs=inp,outputs=out)
69 |     model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-3),loss='binary_crossentropy', metrics=['acc'])
70 |     model.fit(X,Y,epochs=100,batch_size=5,validation_split=0.3,shuffle=True)
71 | 
72 |     model.save('small-rooms-rms.model')
73 | 


--------------------------------------------------------------------------------
/0007-pretrained-models/README.md:
--------------------------------------------------------------------------------
 1 | List of models with downloadable pretrained checkpoints
 2 | 
 3 | - https://github.com/ykoyama58/tcnse
 4 | - https://github.com/funcwj/conv-tasnet
 5 | - https://github.com/AppleHolic/source_separation
 6 | - https://github.com/anicolson/DeepXi (ResNet-1.0c)
 7 | - https://github.com/BYRTIMO/END-TO-END-SPEECH-ENHANCEMENT-BASED-ON-DISCRETE-COSINE-TRANSFORM
 8 | - https://github.com/mpariente/asteroid#pretrained-models
 9 | - https://github.com/francoisgermain/SpeechDenoisingWithDeepFeatureLosses
10 | - https://github.com/pheepa/DCUnet
11 | - https://github.com/JusperLee/Dual-Path-RNN-Pytorch
12 | - https://github.com/ShiZiqiang/dual-path-RNNs-DPRNNs-based-speech-separation
13 | - https://github.com/JusperLee/Conv-TasNet
14 | - https://github.com/JusperLee/Deep-Clustering-for-Speech-Separation
15 | - https://github.com/facebookresearch/demucs/
16 | 


--------------------------------------------------------------------------------
/9999-asteroid.md:
--------------------------------------------------------------------------------
1 | Great framework + community for speech separation and enhancement https://asteroid-team.github.io
2 | 


--------------------------------------------------------------------------------