├── .gitignore
├── README.md
├── code
├── conf_generalists.yaml
├── conf_specialists.yaml
├── datasets
│ ├── demand.csv
│ ├── fsd50k.csv
│ ├── librispeech.csv
│ └── musan.csv
├── exp_data.py
├── exp_models.py
├── exp_utils.py
├── finetune.py
├── notebooks
│ └── noise_sparsity.ipynb
├── requirements.txt
├── run.py
├── snr_predictor
├── speakers
│ ├── test.csv
│ ├── train.csv
│ └── validation.csv
├── test.py
├── train_generalists.py
└── train_specialists.py
└── docs
└── images
├── pse_ssl_overview.png
├── waveforms_cm.png
└── waveforms_pseudose.png
/.gitignore:
--------------------------------------------------------------------------------
1 | ## subdirectories
2 | notebooks/*
3 | notebooks/figures/
4 |
5 |
6 | # Created by https://www.toptal.com/developers/gitignore/api/audio,python,pycharm+all,jupyternotebooks,windows,macos,vim,visualstudio
7 | # Edit at https://www.toptal.com/developers/gitignore?templates=audio,python,pycharm+all,jupyternotebooks,windows,macos,vim,visualstudio
8 |
9 | ### Audio ###
10 | *.aif
11 | *.iff
12 | *.m3u
13 | *.m4a
14 | *.mid
15 | *.mp3
16 | *.mpa
17 | *.ra
18 | *.wav
19 | *.wma
20 | *.ogg
21 | *.flac
22 |
23 | ### JupyterNotebooks ###
24 | # gitignore template for Jupyter Notebooks
25 | # website: http://jupyter.org/
26 |
27 | .ipynb_checkpoints
28 | */.ipynb_checkpoints/*
29 |
30 | # IPython
31 | profile_default/
32 | ipython_config.py
33 |
34 | # Remove previous ipynb_checkpoints
35 | # git rm -r .ipynb_checkpoints/
36 |
37 | ### macOS ###
38 | # General
39 | .DS_Store
40 | .AppleDouble
41 | .LSOverride
42 |
43 | # Icon must end with two \r
44 | Icon
45 |
46 |
47 | # Thumbnails
48 | ._*
49 |
50 | # Files that might appear in the root of a volume
51 | .DocumentRevisions-V100
52 | .fseventsd
53 | .Spotlight-V100
54 | .TemporaryItems
55 | .Trashes
56 | .VolumeIcon.icns
57 | .com.apple.timemachine.donotpresent
58 |
59 | # Directories potentially created on remote AFP share
60 | .AppleDB
61 | .AppleDesktop
62 | Network Trash Folder
63 | Temporary Items
64 | .apdisk
65 |
66 | ### PyCharm+all ###
67 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
68 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
69 |
70 | # User-specific stuff
71 | .idea/**/workspace.xml
72 | .idea/**/tasks.xml
73 | .idea/**/usage.statistics.xml
74 | .idea/**/dictionaries
75 | .idea/**/shelf
76 |
77 | # AWS User-specific
78 | .idea/**/aws.xml
79 |
80 | # Generated files
81 | .idea/**/contentModel.xml
82 |
83 | # Sensitive or high-churn files
84 | .idea/**/dataSources/
85 | .idea/**/dataSources.ids
86 | .idea/**/dataSources.local.xml
87 | .idea/**/sqlDataSources.xml
88 | .idea/**/dynamic.xml
89 | .idea/**/uiDesigner.xml
90 | .idea/**/dbnavigator.xml
91 |
92 | # Gradle
93 | .idea/**/gradle.xml
94 | .idea/**/libraries
95 |
96 | # Gradle and Maven with auto-import
97 | # When using Gradle or Maven with auto-import, you should exclude module files,
98 | # since they will be recreated, and may cause churn. Uncomment if using
99 | # auto-import.
100 | # .idea/artifacts
101 | # .idea/compiler.xml
102 | # .idea/jarRepositories.xml
103 | # .idea/modules.xml
104 | # .idea/*.iml
105 | # .idea/modules
106 | # *.iml
107 | # *.ipr
108 |
109 | # CMake
110 | cmake-build-*/
111 |
112 | # Mongo Explorer plugin
113 | .idea/**/mongoSettings.xml
114 |
115 | # File-based project format
116 | *.iws
117 |
118 | # IntelliJ
119 | out/
120 |
121 | # mpeltonen/sbt-idea plugin
122 | .idea_modules/
123 |
124 | # JIRA plugin
125 | atlassian-ide-plugin.xml
126 |
127 | # Cursive Clojure plugin
128 | .idea/replstate.xml
129 |
130 | # Crashlytics plugin (for Android Studio and IntelliJ)
131 | com_crashlytics_export_strings.xml
132 | crashlytics.properties
133 | crashlytics-build.properties
134 | fabric.properties
135 |
136 | # Editor-based Rest Client
137 | .idea/httpRequests
138 |
139 | # Android studio 3.1+ serialized cache file
140 | .idea/caches/build_file_checksums.ser
141 |
142 | ### PyCharm+all Patch ###
143 | # Ignores the whole .idea folder and all .iml files
144 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
145 |
146 | .idea/
147 |
148 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
149 |
150 | *.iml
151 | modules.xml
152 | .idea/misc.xml
153 | *.ipr
154 |
155 | # Sonarlint plugin
156 | .idea/sonarlint
157 |
158 | ### Python ###
159 | # Byte-compiled / optimized / DLL files
160 | __pycache__/
161 | *.py[cod]
162 | *$py.class
163 |
164 | # C extensions
165 | *.so
166 |
167 | # Distribution / packaging
168 | .Python
169 | build/
170 | develop-eggs/
171 | dist/
172 | downloads/
173 | eggs/
174 | .eggs/
175 | lib/
176 | lib64/
177 | parts/
178 | sdist/
179 | var/
180 | wheels/
181 | share/python-wheels/
182 | *.egg-info/
183 | .installed.cfg
184 | *.egg
185 | MANIFEST
186 |
187 | # PyInstaller
188 | # Usually these files are written by a python script from a template
189 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
190 | *.manifest
191 | *.spec
192 |
193 | # Installer logs
194 | pip-log.txt
195 | pip-delete-this-directory.txt
196 |
197 | # Unit test / coverage reports
198 | htmlcov/
199 | .tox/
200 | .nox/
201 | .coverage
202 | .coverage.*
203 | .cache
204 | nosetests.xml
205 | coverage.xml
206 | *.cover
207 | *.py,cover
208 | .hypothesis/
209 | .pytest_cache/
210 | cover/
211 |
212 | # Translations
213 | *.mo
214 | *.pot
215 |
216 | # Django stuff:
217 | *.log
218 | local_settings.py
219 | db.sqlite3
220 | db.sqlite3-journal
221 |
222 | # Flask stuff:
223 | instance/
224 | .webassets-cache
225 |
226 | # Scrapy stuff:
227 | .scrapy
228 |
229 | # Sphinx documentation
230 | docs/_build/
231 |
232 | # PyBuilder
233 | .pybuilder/
234 | target/
235 |
236 | # Jupyter Notebook
237 |
238 | # IPython
239 |
240 | # pyenv
241 | # For a library or package, you might want to ignore these files since the code is
242 | # intended to run in multiple environments; otherwise, check them in:
243 | # .python-version
244 |
245 | # pipenv
246 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
247 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
248 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
249 | # install all needed dependencies.
250 | #Pipfile.lock
251 |
252 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
253 | __pypackages__/
254 |
255 | # Celery stuff
256 | celerybeat-schedule
257 | celerybeat.pid
258 |
259 | # SageMath parsed files
260 | *.sage.py
261 |
262 | # Environments
263 | .env
264 | .venv
265 | env/
266 | venv/
267 | ENV/
268 | env.bak/
269 | venv.bak/
270 |
271 | # Spyder project settings
272 | .spyderproject
273 | .spyproject
274 |
275 | # Rope project settings
276 | .ropeproject
277 |
278 | # mkdocs documentation
279 | /site
280 |
281 | # mypy
282 | .mypy_cache/
283 | .dmypy.json
284 | dmypy.json
285 |
286 | # Pyre type checker
287 | .pyre/
288 |
289 | # pytype static type analyzer
290 | .pytype/
291 |
292 | # Cython debug symbols
293 | cython_debug/
294 |
295 | ### Vim ###
296 | # Swap
297 | [._]*.s[a-v][a-z]
298 | !*.svg # comment out if you don't need vector files
299 | [._]*.sw[a-p]
300 | [._]s[a-rt-v][a-z]
301 | [._]ss[a-gi-z]
302 | [._]sw[a-p]
303 |
304 | # Session
305 | Session.vim
306 | Sessionx.vim
307 |
308 | # Temporary
309 | .netrwhist
310 | *~
311 | # Auto-generated tag files
312 | tags
313 | # Persistent undo
314 | [._]*.un~
315 |
316 | ### Windows ###
317 | # Windows thumbnail cache files
318 | Thumbs.db
319 | Thumbs.db:encryptable
320 | ehthumbs.db
321 | ehthumbs_vista.db
322 |
323 | # Dump file
324 | *.stackdump
325 |
326 | # Folder config file
327 | [Dd]esktop.ini
328 |
329 | # Recycle Bin used on file shares
330 | $RECYCLE.BIN/
331 |
332 | # Windows Installer files
333 | *.cab
334 | *.msi
335 | *.msix
336 | *.msm
337 | *.msp
338 |
339 | # Windows shortcuts
340 | *.lnk
341 |
342 | ### VisualStudio ###
343 | ## Ignore Visual Studio temporary files, build results, and
344 | ## files generated by popular Visual Studio add-ons.
345 | ##
346 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
347 |
348 | # User-specific files
349 | *.rsuser
350 | *.suo
351 | *.user
352 | *.userosscache
353 | *.sln.docstates
354 |
355 | # User-specific files (MonoDevelop/Xamarin Studio)
356 | *.userprefs
357 |
358 | # Mono auto generated files
359 | mono_crash.*
360 |
361 | # Build results
362 | [Dd]ebug/
363 | [Dd]ebugPublic/
364 | [Rr]elease/
365 | [Rr]eleases/
366 | x64/
367 | x86/
368 | [Ww][Ii][Nn]32/
369 | [Aa][Rr][Mm]/
370 | [Aa][Rr][Mm]64/
371 | bld/
372 | [Bb]in/
373 | [Oo]bj/
374 | [Ll]og/
375 | [Ll]ogs/
376 |
377 | # Visual Studio 2015/2017 cache/options directory
378 | .vs/
379 | # Uncomment if you have tasks that create the project's static files in wwwroot
380 | #wwwroot/
381 |
382 | # Visual Studio 2017 auto generated files
383 | Generated\ Files/
384 |
385 | # MSTest test Results
386 | [Tt]est[Rr]esult*/
387 | [Bb]uild[Ll]og.*
388 |
389 | # NUnit
390 | *.VisualState.xml
391 | TestResult.xml
392 | nunit-*.xml
393 |
394 | # Build Results of an ATL Project
395 | [Dd]ebugPS/
396 | [Rr]eleasePS/
397 | dlldata.c
398 |
399 | # Benchmark Results
400 | BenchmarkDotNet.Artifacts/
401 |
402 | # .NET Core
403 | project.lock.json
404 | project.fragment.lock.json
405 | artifacts/
406 |
407 | # ASP.NET Scaffolding
408 | ScaffoldingReadMe.txt
409 |
410 | # StyleCop
411 | StyleCopReport.xml
412 |
413 | # Files built by Visual Studio
414 | *_i.c
415 | *_p.c
416 | *_h.h
417 | *.ilk
418 | *.meta
419 | *.obj
420 | *.iobj
421 | *.pch
422 | *.pdb
423 | *.ipdb
424 | *.pgc
425 | *.pgd
426 | *.rsp
427 | *.sbr
428 | *.tlb
429 | *.tli
430 | *.tlh
431 | *.tmp
432 | *.tmp_proj
433 | *_wpftmp.csproj
434 | *.tlog
435 | *.vspscc
436 | *.vssscc
437 | .builds
438 | *.pidb
439 | *.svclog
440 | *.scc
441 |
442 | # Chutzpah Test files
443 | _Chutzpah*
444 |
445 | # Visual C++ cache files
446 | ipch/
447 | *.aps
448 | *.ncb
449 | *.opendb
450 | *.opensdf
451 | *.sdf
452 | *.cachefile
453 | *.VC.db
454 | *.VC.VC.opendb
455 |
456 | # Visual Studio profiler
457 | *.psess
458 | *.vsp
459 | *.vspx
460 | *.sap
461 |
462 | # Visual Studio Trace Files
463 | *.e2e
464 |
465 | # TFS 2012 Local Workspace
466 | $tf/
467 |
468 | # Guidance Automation Toolkit
469 | *.gpState
470 |
471 | # ReSharper is a .NET coding add-in
472 | _ReSharper*/
473 | *.[Rr]e[Ss]harper
474 | *.DotSettings.user
475 |
476 | # TeamCity is a build add-in
477 | _TeamCity*
478 |
479 | # DotCover is a Code Coverage Tool
480 | *.dotCover
481 |
482 | # AxoCover is a Code Coverage Tool
483 | .axoCover/*
484 | !.axoCover/settings.json
485 |
486 | # Coverlet is a free, cross platform Code Coverage Tool
487 | coverage*.json
488 | coverage*.xml
489 | coverage*.info
490 |
491 | # Visual Studio code coverage results
492 | *.coverage
493 | *.coveragexml
494 |
495 | # NCrunch
496 | _NCrunch_*
497 | .*crunch*.local.xml
498 | nCrunchTemp_*
499 |
500 | # MightyMoose
501 | *.mm.*
502 | AutoTest.Net/
503 |
504 | # Web workbench (sass)
505 | .sass-cache/
506 |
507 | # Installshield output folder
508 | [Ee]xpress/
509 |
510 | # DocProject is a documentation generator add-in
511 | DocProject/buildhelp/
512 | DocProject/Help/*.HxT
513 | DocProject/Help/*.HxC
514 | DocProject/Help/*.hhc
515 | DocProject/Help/*.hhk
516 | DocProject/Help/*.hhp
517 | DocProject/Help/Html2
518 | DocProject/Help/html
519 |
520 | # Click-Once directory
521 | publish/
522 |
523 | # Publish Web Output
524 | *.[Pp]ublish.xml
525 | *.azurePubxml
526 | # Note: Comment the next line if you want to checkin your web deploy settings,
527 | # but database connection strings (with potential passwords) will be unencrypted
528 | *.pubxml
529 | *.publishproj
530 |
531 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
532 | # checkin your Azure Web App publish settings, but sensitive information contained
533 | # in these scripts will be unencrypted
534 | PublishScripts/
535 |
536 | # NuGet Packages
537 | *.nupkg
538 | # NuGet Symbol Packages
539 | *.snupkg
540 | # The packages folder can be ignored because of Package Restore
541 | **/[Pp]ackages/*
542 | # except build/, which is used as an MSBuild target.
543 | !**/[Pp]ackages/build/
544 | # Uncomment if necessary however generally it will be regenerated when needed
545 | #!**/[Pp]ackages/repositories.config
546 | # NuGet v3's project.json files produces more ignorable files
547 | *.nuget.props
548 | *.nuget.targets
549 |
550 | # Nuget personal access tokens and Credentials
551 | nuget.config
552 |
553 | # Microsoft Azure Build Output
554 | csx/
555 | *.build.csdef
556 |
557 | # Microsoft Azure Emulator
558 | ecf/
559 | rcf/
560 |
561 | # Windows Store app package directories and files
562 | AppPackages/
563 | BundleArtifacts/
564 | Package.StoreAssociation.xml
565 | _pkginfo.txt
566 | *.appx
567 | *.appxbundle
568 | *.appxupload
569 |
570 | # Visual Studio cache files
571 | # files ending in .cache can be ignored
572 | *.[Cc]ache
573 | # but keep track of directories ending in .cache
574 | !?*.[Cc]ache/
575 |
576 | # Others
577 | ClientBin/
578 | ~$*
579 | *.dbmdl
580 | *.dbproj.schemaview
581 | *.jfm
582 | *.pfx
583 | *.publishsettings
584 | orleans.codegen.cs
585 |
586 | # Including strong name files can present a security risk
587 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
588 | #*.snk
589 |
590 | # Since there are multiple workflows, uncomment next line to ignore bower_components
591 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
592 | #bower_components/
593 |
594 | # RIA/Silverlight projects
595 | Generated_Code/
596 |
597 | # Backup & report files from converting an old project file
598 | # to a newer Visual Studio version. Backup files are not needed,
599 | # because we have git ;-)
600 | _UpgradeReport_Files/
601 | Backup*/
602 | UpgradeLog*.XML
603 | UpgradeLog*.htm
604 | ServiceFabricBackup/
605 | *.rptproj.bak
606 |
607 | # SQL Server files
608 | *.mdf
609 | *.ldf
610 | *.ndf
611 |
612 | # Business Intelligence projects
613 | *.rdl.data
614 | *.bim.layout
615 | *.bim_*.settings
616 | *.rptproj.rsuser
617 | *- [Bb]ackup.rdl
618 | *- [Bb]ackup ([0-9]).rdl
619 | *- [Bb]ackup ([0-9][0-9]).rdl
620 |
621 | # Microsoft Fakes
622 | FakesAssemblies/
623 |
624 | # GhostDoc plugin setting file
625 | *.GhostDoc.xml
626 |
627 | # Node.js Tools for Visual Studio
628 | .ntvs_analysis.dat
629 | node_modules/
630 |
631 | # Visual Studio 6 build log
632 | *.plg
633 |
634 | # Visual Studio 6 workspace options file
635 | *.opt
636 |
637 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
638 | *.vbw
639 |
640 | # Visual Studio LightSwitch build output
641 | **/*.HTMLClient/GeneratedArtifacts
642 | **/*.DesktopClient/GeneratedArtifacts
643 | **/*.DesktopClient/ModelManifest.xml
644 | **/*.Server/GeneratedArtifacts
645 | **/*.Server/ModelManifest.xml
646 | _Pvt_Extensions
647 |
648 | # Paket dependency manager
649 | .paket/paket.exe
650 | paket-files/
651 |
652 | # FAKE - F# Make
653 | .fake/
654 |
655 | # CodeRush personal settings
656 | .cr/personal
657 |
658 | # Python Tools for Visual Studio (PTVS)
659 | *.pyc
660 |
661 | # Cake - Uncomment if you are using it
662 | # tools/**
663 | # !tools/packages.config
664 |
665 | # Tabs Studio
666 | *.tss
667 |
668 | # Telerik's JustMock configuration file
669 | *.jmconfig
670 |
671 | # BizTalk build output
672 | *.btp.cs
673 | *.btm.cs
674 | *.odx.cs
675 | *.xsd.cs
676 |
677 | # OpenCover UI analysis results
678 | OpenCover/
679 |
680 | # Azure Stream Analytics local run output
681 | ASALocalRun/
682 |
683 | # MSBuild Binary and Structured Log
684 | *.binlog
685 |
686 | # NVidia Nsight GPU debugger configuration file
687 | *.nvuser
688 |
689 | # MFractors (Xamarin productivity tool) working folder
690 | .mfractor/
691 |
692 | # Local History for Visual Studio
693 | .localhistory/
694 |
695 | # BeatPulse healthcheck temp database
696 | healthchecksdb
697 |
698 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
699 | MigrationBackup/
700 |
701 | # Ionide (cross platform F# VS Code tools) working folder
702 | .ionide/
703 |
704 | # Fody - auto-generated XML schema
705 | FodyWeavers.xsd
706 |
707 | # VS Code files for those working on multiple tools
708 | .vscode/*
709 | !.vscode/settings.json
710 | !.vscode/tasks.json
711 | !.vscode/launch.json
712 | !.vscode/extensions.json
713 | *.code-workspace
714 |
715 | # Local History for Visual Studio Code
716 | .history/
717 |
718 | # Windows Installer files from build outputs
719 |
720 | # JetBrains Rider
721 | *.sln.iml
722 |
723 | ### VisualStudio Patch ###
724 | # Additional files built by Visual Studio
725 |
726 | # End of https://www.toptal.com/developers/gitignore/api/audio,python,pycharm+all,jupyternotebooks,windows,macos,vim,visualstudio
727 |
728 | # ignore everything inside of checkpoint directories
729 | *.json
730 | *.pt
731 | events.out.*
732 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Efficient Personalized Speech Enhancement through Self-Supervised Learning
2 |
3 | [Aswin Sivaraman](https://actuallyaswin.github.io/) and [Minje Kim](https://saige.sice.indiana.edu) (Indiana University)
4 |
5 |
6 | [](https://ieeexplore.ieee.org/abstract/document/9794565/)
7 |
8 | ## Abstract
9 | This work presents self-supervised learning (SSL) methods for developing monaural speaker-specific (i.e., personalized) speech enhancement (SE) models. While generalist models must broadly address many speakers, specialist models can adapt their enhancement function towards a particular speaker's voice, expecting to solve a narrower problem. Hence, specialists are capable of achieving more optimal performance in addition to reducing computational complexity. However, naive personalization methods can require clean speech from the target user, which is inconvenient to acquire, e.g., due to subpar recording conditions. To this end, we pose personalization as either a zero-shot learning (ZSL) task, in which no additional clean speech of the target speaker is used for training, or a few-shot learning (FSL) task, in which the goal is to minimize the duration of the clean speech used for transfer learning. With this paper, we propose self-supervised learning methods as a solution to both zero- and few-shot personalization tasks. The proposed methods are designed to learn the personalized speech features from unlabeled data (i.e., in-the-wild noisy recordings from the target user) without knowing the corresponding clean sources. Our experiments investigate three different self-supervised learning mechanisms. The results show that self-supervised models achieve zero-shot and few-shot personalization using fewer model parameters and less clean data from the target user, achieving the data efficiency and model compression goals.
10 |
11 |
12 |
14 | An overview of the baseline and proposed personalization methods. With the baseline, the SE model is first pretrained using a large speaker-agnostic dataset as a generalist and then finetuned using clean speech signals of the test user. This method relies entirely on the finetuning process for personalization. On the other hand, the proposed methods provide various SSL options to pretrain the model using noisy, but speaker-specific speech, which serve a better initialization point for the subsequent finetuning process, leading to better SE performance. The pretrained models can also conduct a certain level of SE as a ZSL model, while the FSL-based finetuning tends to improve the pretrained model.
15 |
16 |
17 |
18 | ## Proposed Methods
19 |
20 | ### Pseudo Speech Enhancement (PseudoSE)
21 |
22 |
23 | ### Contrastive Mixtures (CM)
24 |
25 |
26 | ### Data Purification (DP)
27 |
28 |
29 | Note that DP may be applied onto the loss functions of either PseudoSE or CM.
30 |
31 | ## Installation & Usage
32 |
33 | Use pip to install the necessary Python packages (e.g., [pytorch-lightning](https://pytorch-lightning.readthedocs.io/en/stable/), [ray[tune]](https://docs.ray.io/en/latest/tune/), and [asteroid](https://asteroid-team.github.io/)).
34 |
35 | ```
36 | pip install -r requirements.txt
37 | ```
38 |
39 | Additionally, the following datasets must be downloaded and un-zipped:
40 | + [Librispeech](http://www.openslr.org/12/)
41 | + [FSD50K](https://zenodo.org/record/4060432)
42 | + [MUSAN](http://www.openslr.org/17/)
43 |
44 | We define a _generalist_ model as one which is speaker-agnostic; it is trained to enhance the voices of many different speakers. A _specialist_ model is one which is trained to enhance the voice of a single speaker. In this experiment, we train specialist models entirely using degraded recordings (sampling a single speaker from **Librispeech** and unseen noises from **FSD50K**).
45 |
46 | To train generalist models, first modify `code/conf_generalists.yaml` with the correct folder paths, then run:
47 | ```
48 | python code/train_generalists.py
49 | ```
50 | Similarly, to train specialist models, first modify `code/conf_specialists.yaml` with the correct folder paths, then run:
51 | ```
52 | python code/train_specialists.py
53 | ```
54 |
55 | Each YAML configuration file defines the experiment search space, and all values provided in a list expand the search space. For example, the provided `conf_generalists.yaml` will run four different experiments:
56 |
57 | 1. *{batch_size=64, model_name=convtasnet, model_size=tiny, distance_func=snr}*
58 | 2. *{batch_size=64, model_name=convtasnet, model_size=small, distance_func=snr}*
59 | 3. *{batch_size=64, model_name=convtasnet, model_size=medium, distance_func=snr}*
60 | 4. *{batch_size=64, model_name=convtasnet, model_size=large, distance_func=snr}*
61 |
62 | The experiments may be run across multiple GPUs and CPUs, which can be specified in the above YAML files.
63 |
64 | ## Citation
65 |
66 | ```
67 | @article{SivaramanA2022jstsp,
68 | title={{Efficient Personalized Speech Enhancement Through Self-Supervised Learning}},
69 | author={Sivaraman, Aswin and Kim, Minje},
70 | journal={{IEEE Journal of Selected Topics in Signal Processing}},
71 | year={2022},
72 | volume={16},
73 | number={6},
74 | pages={1342-1356}
75 | }
76 | ```
77 |
--------------------------------------------------------------------------------
/code/conf_generalists.yaml:
--------------------------------------------------------------------------------
1 | # This YAML configuration file defines the generalists experiment search space.
2 | # All values provided in a list expand the search space.
3 | #
4 | # For example, if model_name is ["convtasnet", "grunet"] and batch_size is
5 | # [32, 64, 128], then there are six experiments total. As another example, if
6 | # model_name and batch_size are defined as before, and distance_func is
7 | # [0.001, 0.01, 0.1] and distance_func is ['snr', 'mse'], then there would
8 | # be thirty-six experiments total to run.
9 |
10 | # -----------------------------------------------------------------------------
11 | # ray configuration
12 | # -----------------------------------------------------------------------------
13 | available_devices: '4,5,6,7' # (these should be GPU IDs, check `nvidia-smi`)
14 | num_gpus_per_experiment: 0.5
15 | num_cpus_per_experiment: 0.5
16 | output_folder: "~/ray_results/"
17 |
18 | # -----------------------------------------------------------------------------
19 | # data configuration
20 | # -----------------------------------------------------------------------------
21 | folder_librispeech: "/data/asivara/librispeech/"
22 | folder_musan: "/data/asivara/musan/"
23 |
24 | sample_rate: 16000
25 | example_duration: 4 # (in seconds)
26 |
27 | batch_size: # (should be specified based on available GPU memory)
28 | - 64
29 |
30 | # -----------------------------------------------------------------------------
31 | # model configuration
32 | # -----------------------------------------------------------------------------
33 | model_name: # (choices: "convtasnet", "grunet")
34 | - 'convtasnet'
35 | # - 'grunet'
36 |
37 | model_size: # (choices: "tiny", "small", "medium", "large")
38 | - 'tiny'
39 | - 'small'
40 | - 'medium'
41 | - 'large'
42 |
43 | learning_rate:
44 | - 0.001
45 |
46 | distance_func: # (distance function used to compare estimate & target signals)
47 | - 'snr'
48 | # - 'sisdr'
49 | # - 'mse'
--------------------------------------------------------------------------------
/code/conf_specialists.yaml:
--------------------------------------------------------------------------------
1 | # This YAML configuration file defines the specialists experiment search space.
2 | # All values provided in a list expand the search space.
3 | #
4 | # For example, if model_name is ["convtasnet", "grunet"] and speaker_id is
5 | # [19, 26, 39], then there are six experiments total. As another example, if
6 | # model_name and speaker_id are defined as before, and use_loss_contrastive is
7 | # [False, True] and use_loss_purification is [False, True], then there would
8 | # be twenty-four experiments total to run.
9 | #
10 | # Most importantly, the speaker_id value designates the personalization target;
11 | # the self-supervised models are trained using already-noisy single-speaker
12 | # observations (e.g., 19 + FSD50K, 26 + FSD50K).
13 |
14 | # -----------------------------------------------------------------------------
15 | # ray configuration
16 | # -----------------------------------------------------------------------------
17 | available_devices: '4,5,6,7' # (these should be GPU IDs, check `nvidia-smi`)
18 | num_gpus_per_experiment: 0.5
19 | num_cpus_per_experiment: 0.5
20 | output_folder: "~/ray_results/"
21 |
22 | # -----------------------------------------------------------------------------
23 | # data configuration
24 | # -----------------------------------------------------------------------------
25 | folder_librispeech: "/data/asivara/librispeech/"
26 | folder_fsd50k: "/data/asivara/fsd50k_16khz/" # (manually resampled to 16kHz)
27 | folder_musan: "/data/asivara/musan/"
28 |
29 | sample_rate: 16000
30 | example_duration: 4 # (in seconds)
31 |
32 | batch_size: # (should be specified based on available GPU memory)
33 | - 64
34 |
35 | speaker_id: # (Librispeech IDs / must be from "speakers/test.csv")
36 | - 19
37 | - 26
38 | - 39
39 | - 40
40 | - 78
41 | - 83
42 | - 87
43 | - 89
44 | - 118
45 | - 125
46 | - 163
47 | - 196
48 | - 198
49 | - 200
50 | - 201
51 | - 250
52 | - 254
53 | - 307
54 | - 405
55 | - 446
56 |
57 | # -----------------------------------------------------------------------------
58 | # model configuration
59 | # -----------------------------------------------------------------------------
60 | model_name: # (choices: "convtasnet", "grunet")
61 | - 'convtasnet'
62 | # - 'grunet'
63 |
64 | model_size: # (choices: "tiny", "small", "medium", "large")
65 | - 'tiny'
66 | - 'small'
67 | - 'medium'
68 | - 'large'
69 |
70 | learning_rate:
71 | - 0.001
72 |
73 | distance_func: # (distance function used to compare estimate & target signals)
74 | - 'snr'
75 | # - 'sisdr'
76 | # - 'mse'
77 |
78 | # -----------------------------------------------------------------------------
79 | # self-supervised loss function configuration
80 | # -----------------------------------------------------------------------------
81 | use_loss_contrastive: # enables CM training (choices: False, True)
82 | - False
83 | - True
84 |
85 | use_loss_purification: # enables DP training (choices: False, True)
86 | - False
87 | - True
--------------------------------------------------------------------------------
/code/datasets/demand.csv:
--------------------------------------------------------------------------------
1 | category_id,location_id,filepath,duration,sparsity
2 | domestic,kitchen,DKITCHEN/ch01.wav,300.004,0.0029915303457528353
3 | domestic,living,DLIVING/ch01.wav,300.004,0.00037294832873158157
4 | domestic,washing,DWASHING/ch01.wav,300.004,0.005440954118967056
5 | nature,field,NFIELD/ch01.wav,300.004,0.002194696804508567
6 | nature,park,NPARK/ch01.wav,300.004,0.0009642631048336625
7 | nature,river,NRIVER/ch01.wav,300.004,0.006473191548138857
8 | office,hallway,OHALLWAY/ch01.wav,300.004,0.0022714261431246996
9 | office,meeting,OMEETING/ch01.wav,300.004,0.001002078759483993
10 | office,office,OOFFICE/ch01.wav,300.004,0.0047870515845716
11 | public,cafeter,PCAFETER/ch01.wav,300.004,0.0021682872902601957
12 | public,resto,PRESTO/ch01.wav,300.004,0.0015622287755832076
13 | public,station,PSTATION/ch01.wav,300.004,0.0021246219985187054
14 | street,psquare,SPSQUARE/ch01.wav,300.004,0.005154266022145748
15 | street,traffic,STRAFFIC/ch01.wav,300.004,0.007706298492848873
16 | transportation,bus,TBUS/ch01.wav,300.004,0.05816786736249924
17 | transportation,car,TCAR/ch01.wav,300.004,0.03642342612147331
18 | transportation,metro,TMETRO/ch01.wav,300.004,0.019693519920110703
19 |
--------------------------------------------------------------------------------
/code/datasets/musan.csv:
--------------------------------------------------------------------------------
1 | split,filepath,duration,sparsity
2 | train,noise/free-sound/noise-free-sound-0000.wav,17.65875,0.1557888239622116
3 | train,noise/free-sound/noise-free-sound-0001.wav,40.5365,0.027538767084479332
4 | train,noise/free-sound/noise-free-sound-0002.wav,71.2175,0.07518414407968521
5 | train,noise/free-sound/noise-free-sound-0003.wav,14.2358125,0.02093704789876938
6 | train,noise/free-sound/noise-free-sound-0004.wav,2.9865,0.08528786152601242
7 | train,noise/free-sound/noise-free-sound-0005.wav,21.1853125,0.17761875689029694
8 | train,noise/free-sound/noise-free-sound-0006.wav,7.7701875,0.192704975605011
9 | train,noise/free-sound/noise-free-sound-0007.wav,16.613875,0.05709356814622879
10 | train,noise/free-sound/noise-free-sound-0008.wav,20.8050625,0.01302085630595684
11 | train,noise/free-sound/noise-free-sound-0009.wav,8.3866875,0.017926516011357307
12 | train,noise/free-sound/noise-free-sound-0010.wav,8.2415625,0.10532404482364655
13 | train,noise/free-sound/noise-free-sound-0011.wav,34.8501875,0.019042056053876877
14 | train,noise/free-sound/noise-free-sound-0012.wav,32.15625,0.1082201898097992
15 | train,noise/free-sound/noise-free-sound-0013.wav,1.5445,0.14064553380012512
16 | train,noise/free-sound/noise-free-sound-0014.wav,3.4159375,0.11936260014772415
17 | train,noise/free-sound/noise-free-sound-0015.wav,91.244875,0.031160425394773483
18 | train,noise/free-sound/noise-free-sound-0016.wav,4.0,0.21168480813503265
19 | train,noise/free-sound/noise-free-sound-0017.wav,7.8773125,0.03246782720088959
20 | train,noise/free-sound/noise-free-sound-0018.wav,14.5763125,0.15054874122142792
21 | train,noise/free-sound/noise-free-sound-0019.wav,11.55775,0.03360776603221893
22 | train,noise/free-sound/noise-free-sound-0020.wav,60.18575,0.005166677292436361
23 | train,noise/free-sound/noise-free-sound-0021.wav,18.0453125,0.04779457673430443
24 | train,noise/free-sound/noise-free-sound-0022.wav,24.832,0.12185768783092499
25 | train,noise/free-sound/noise-free-sound-0023.wav,11.24,0.11853642016649246
26 | train,noise/free-sound/noise-free-sound-0024.wav,19.7790625,0.028153477236628532
27 | train,noise/free-sound/noise-free-sound-0025.wav,2.3466875,0.09568151831626892
28 | train,noise/free-sound/noise-free-sound-0026.wav,3.7973125,0.19289067387580872
29 | train,noise/free-sound/noise-free-sound-0027.wav,23.04,0.02183876745402813
30 | train,noise/free-sound/noise-free-sound-0028.wav,19.6179375,0.1307886242866516
31 | train,noise/free-sound/noise-free-sound-0029.wav,8.90775,0.1712813675403595
32 | train,noise/free-sound/noise-free-sound-0030.wav,73.01225,0.14316979050636292
33 | train,noise/free-sound/noise-free-sound-0031.wav,35.4743125,0.06783615797758102
34 | train,noise/free-sound/noise-free-sound-0032.wav,61.42,0.007194836623966694
35 | train,noise/free-sound/noise-free-sound-0033.wav,5.5263125,0.015329566784203053
36 | train,noise/free-sound/noise-free-sound-0034.wav,20.689,0.022331789135932922
37 | train,noise/free-sound/noise-free-sound-0035.wav,31.9333125,0.04504496231675148
38 | train,noise/free-sound/noise-free-sound-0036.wav,4.57,0.21145282685756683
39 | train,noise/free-sound/noise-free-sound-0037.wav,0.94,0.2134968787431717
40 | train,noise/free-sound/noise-free-sound-0038.wav,0.9133125,0.15371179580688477
41 | train,noise/free-sound/noise-free-sound-0039.wav,3.4466875,0.21756970882415771
42 | train,noise/free-sound/noise-free-sound-0040.wav,16.73,0.21240685880184174
43 | train,noise/free-sound/noise-free-sound-0041.wav,57.782875,0.034967090934515
44 | train,noise/free-sound/noise-free-sound-0042.wav,319.5559375,0.006530998274683952
45 | train,noise/free-sound/noise-free-sound-0043.wav,1.5325,0.22383399307727814
46 | train,noise/free-sound/noise-free-sound-0044.wav,4.5975625,0.2571035325527191
47 | train,noise/free-sound/noise-free-sound-0045.wav,4.644,0.21450121700763702
48 | train,noise/free-sound/noise-free-sound-0046.wav,4.62075,0.21786445379257202
49 | train,noise/free-sound/noise-free-sound-0047.wav,5.2966875,0.1274796426296234
50 | train,noise/free-sound/noise-free-sound-0048.wav,494.6599375,0.010682842694222927
51 | train,noise/free-sound/noise-free-sound-0049.wav,3.4666875,0.24981054663658142
52 | train,noise/free-sound/noise-free-sound-0050.wav,4.133125,0.020578665658831596
53 | train,noise/free-sound/noise-free-sound-0051.wav,6.52,0.207366943359375
54 | train,noise/free-sound/noise-free-sound-0052.wav,5.6641875,0.009431295096874237
55 | train,noise/free-sound/noise-free-sound-0053.wav,5.2719375,0.02078789472579956
56 | train,noise/free-sound/noise-free-sound-0054.wav,10.053375,0.02097940258681774
57 | train,noise/free-sound/noise-free-sound-0055.wav,40.09775,0.05011861026287079
58 | train,noise/free-sound/noise-free-sound-0056.wav,9.74275,0.05332687869668007
59 | train,noise/free-sound/noise-free-sound-0057.wav,8.1304375,0.14586451649665833
60 | train,noise/free-sound/noise-free-sound-0058.wav,2.0,0.03634824976325035
61 | train,noise/free-sound/noise-free-sound-0059.wav,30.4428125,0.01942417398095131
62 | train,noise/free-sound/noise-free-sound-0060.wav,1.044875,0.056959543377161026
63 | train,noise/free-sound/noise-free-sound-0061.wav,11.0498125,0.13177379965782166
64 | train,noise/free-sound/noise-free-sound-0062.wav,6.0666875,0.10277925431728363
65 | train,noise/free-sound/noise-free-sound-0063.wav,6.2666875,0.09506578743457794
66 | train,noise/free-sound/noise-free-sound-0064.wav,100.1333125,0.039749082177877426
67 | train,noise/free-sound/noise-free-sound-0065.wav,6.9753125,0.11899636685848236
68 | train,noise/free-sound/noise-free-sound-0066.wav,60.9436875,0.042037464678287506
69 | train,noise/free-sound/noise-free-sound-0067.wav,39.889,0.014235257171094418
70 | train,noise/free-sound/noise-free-sound-0068.wav,20.075,0.041847988963127136
71 | train,noise/free-sound/noise-free-sound-0069.wav,2.5714375,0.1221093162894249
72 | train,noise/free-sound/noise-free-sound-0070.wav,17.98025,0.13005472719669342
73 | train,noise/free-sound/noise-free-sound-0071.wav,9.3919375,0.1472863107919693
74 | train,noise/free-sound/noise-free-sound-0072.wav,26.6225625,0.08104076981544495
75 | train,noise/free-sound/noise-free-sound-0073.wav,124.5685625,0.032078467309474945
76 | train,noise/free-sound/noise-free-sound-0074.wav,17.24,0.01517713163048029
77 | train,noise/free-sound/noise-free-sound-0075.wav,20.31125,0.007535384967923164
78 | train,noise/free-sound/noise-free-sound-0076.wav,80.0,0.04280521720647812
79 | train,noise/free-sound/noise-free-sound-0077.wav,11.7233125,0.017991654574871063
80 | train,noise/free-sound/noise-free-sound-0078.wav,10.370625,0.022274833172559738
81 | train,noise/free-sound/noise-free-sound-0079.wav,6.0005625,0.1618564873933792
82 | train,noise/free-sound/noise-free-sound-0080.wav,30.374625,0.06642832607030869
83 | train,noise/free-sound/noise-free-sound-0081.wav,21.5824375,0.08998014777898788
84 | train,noise/free-sound/noise-free-sound-0082.wav,20.20375,0.0831134244799614
85 | train,noise/free-sound/noise-free-sound-0083.wav,0.59125,0.13372854888439178
86 | train,noise/free-sound/noise-free-sound-0084.wav,19.4194375,0.07963409274816513
87 | train,noise/free-sound/noise-free-sound-0085.wav,4.671375,0.01780180260539055
88 | train,noise/free-sound/noise-free-sound-0086.wav,51.685,0.009196894243359566
89 | train,noise/free-sound/noise-free-sound-0087.wav,2.8,0.051390185952186584
90 | train,noise/free-sound/noise-free-sound-0088.wav,44.8666875,0.006017704494297504
91 | train,noise/free-sound/noise-free-sound-0089.wav,13.871,0.012411841191351414
92 | train,noise/free-sound/noise-free-sound-0090.wav,5.8441875,0.09183882921934128
93 | train,noise/free-sound/noise-free-sound-0091.wav,5.062125,0.04581494256854057
94 | train,noise/free-sound/noise-free-sound-0092.wav,15.079625,0.019245458766818047
95 | train,noise/free-sound/noise-free-sound-0093.wav,5.317375,0.03296150639653206
96 | train,noise/free-sound/noise-free-sound-0094.wav,5.5898125,0.03426990285515785
97 | train,noise/free-sound/noise-free-sound-0095.wav,4.068375,0.010393367148935795
98 | train,noise/free-sound/noise-free-sound-0096.wav,7.7841875,0.030091799795627594
99 | train,noise/free-sound/noise-free-sound-0097.wav,2.7369375,0.026996226981282234
100 | train,noise/free-sound/noise-free-sound-0098.wav,2.766,0.04624796658754349
101 | train,noise/free-sound/noise-free-sound-0099.wav,7.4946875,0.05058348551392555
102 | train,noise/free-sound/noise-free-sound-0100.wav,7.323125,0.044734448194503784
103 | train,noise/free-sound/noise-free-sound-0101.wav,112.90125,0.018048448488116264
104 | train,noise/free-sound/noise-free-sound-0102.wav,143.62125,0.010415861383080482
105 | train,noise/free-sound/noise-free-sound-0103.wav,5.2381875,0.05413604527711868
106 | train,noise/free-sound/noise-free-sound-0104.wav,15.888,0.030909907072782516
107 | train,noise/free-sound/noise-free-sound-0105.wav,306.4163125,0.012084255926311016
108 | train,noise/free-sound/noise-free-sound-0106.wav,153.952,0.027553599327802658
109 | train,noise/free-sound/noise-free-sound-0107.wav,11.766,0.16753751039505005
110 | train,noise/free-sound/noise-free-sound-0108.wav,19.3963125,0.026959596201777458
111 | train,noise/free-sound/noise-free-sound-0109.wav,37.101875,0.008607765659689903
112 | train,noise/free-sound/noise-free-sound-0110.wav,40.8931875,0.006380096077919006
113 | train,noise/free-sound/noise-free-sound-0111.wav,16.4451875,0.015219585970044136
114 | train,noise/free-sound/noise-free-sound-0112.wav,1.0144375,0.015558876097202301
115 | train,noise/free-sound/noise-free-sound-0113.wav,5.8461875,0.07099143415689468
116 | train,noise/free-sound/noise-free-sound-0114.wav,99.9298125,0.006937652360647917
117 | train,noise/free-sound/noise-free-sound-0115.wav,2.577125,0.05000652000308037
118 | train,noise/free-sound/noise-free-sound-0116.wav,8.82075,0.09594030678272247
119 | train,noise/free-sound/noise-free-sound-0117.wav,33.6573125,0.08519165962934494
120 | train,noise/free-sound/noise-free-sound-0118.wav,17.41,0.044342827051877975
121 | train,noise/free-sound/noise-free-sound-0119.wav,70.728,0.052567992359399796
122 | train,noise/free-sound/noise-free-sound-0120.wav,18.4344375,0.2627156674861908
123 | train,noise/free-sound/noise-free-sound-0121.wav,44.0,0.03234189376235008
124 | train,noise/free-sound/noise-free-sound-0122.wav,8.0,0.031088661402463913
125 | train,noise/free-sound/noise-free-sound-0123.wav,7.5,0.05381748080253601
126 | train,noise/free-sound/noise-free-sound-0124.wav,5.0,0.06208133324980736
127 | train,noise/free-sound/noise-free-sound-0125.wav,1.125,0.05549873784184456
128 | train,noise/free-sound/noise-free-sound-0126.wav,19.7823125,0.016480645164847374
129 | train,noise/free-sound/noise-free-sound-0127.wav,2.448,0.041250959038734436
130 | train,noise/free-sound/noise-free-sound-0128.wav,4.056,0.033089909702539444
131 | train,noise/free-sound/noise-free-sound-0129.wav,8.2939375,0.02219713293015957
132 | train,noise/free-sound/noise-free-sound-0130.wav,38.8701875,0.05674542486667633
133 | train,noise/free-sound/noise-free-sound-0131.wav,55.597375,0.06609814614057541
134 | train,noise/free-sound/noise-free-sound-0132.wav,0.7158125,0.0999051108956337
135 | train,noise/free-sound/noise-free-sound-0133.wav,0.5209375,0.11084580421447754
136 | train,noise/free-sound/noise-free-sound-0134.wav,2.513375,0.026915693655610085
137 | train,noise/free-sound/noise-free-sound-0135.wav,3.0045625,0.0799274742603302
138 | train,noise/free-sound/noise-free-sound-0136.wav,0.625,0.08511532843112946
139 | train,noise/free-sound/noise-free-sound-0137.wav,113.25,0.011948485858738422
140 | train,noise/free-sound/noise-free-sound-0138.wav,3.25,0.04840674623847008
141 | train,noise/free-sound/noise-free-sound-0139.wav,15.5146875,0.025845933705568314
142 | train,noise/free-sound/noise-free-sound-0140.wav,24.77475,0.012172553688287735
143 | train,noise/free-sound/noise-free-sound-0141.wav,135.0720625,0.04096035286784172
144 | train,noise/free-sound/noise-free-sound-0142.wav,38.99975,0.033781711012125015
145 | train,noise/free-sound/noise-free-sound-0143.wav,13.0430625,0.013805116526782513
146 | train,noise/free-sound/noise-free-sound-0144.wav,25.54775,0.136098712682724
147 | train,noise/free-sound/noise-free-sound-0145.wav,2.9121875,0.07779847830533981
148 | train,noise/free-sound/noise-free-sound-0146.wav,2.0775,0.026216337457299232
149 | train,noise/free-sound/noise-free-sound-0147.wav,8.6333125,0.20758233964443207
150 | train,noise/free-sound/noise-free-sound-0148.wav,2.214875,0.020685670897364616
151 | train,noise/free-sound/noise-free-sound-0149.wav,1.7618125,0.0842508003115654
152 | train,noise/free-sound/noise-free-sound-0150.wav,0.4504375,0.03692903742194176
153 | train,noise/free-sound/noise-free-sound-0151.wav,53.76,0.05299970135092735
154 | train,noise/free-sound/noise-free-sound-0152.wav,97.156,0.018531352281570435
155 | train,noise/free-sound/noise-free-sound-0153.wav,72.1493125,0.008794168010354042
156 | train,noise/free-sound/noise-free-sound-0154.wav,62.464,0.06217845529317856
157 | train,noise/free-sound/noise-free-sound-0155.wav,72.6613125,0.0456816628575325
158 | train,noise/free-sound/noise-free-sound-0156.wav,90.197375,0.018847698345780373
159 | train,noise/free-sound/noise-free-sound-0157.wav,3.0933125,0.15810731053352356
160 | train,noise/free-sound/noise-free-sound-0158.wav,64.884,0.007497943937778473
161 | train,noise/free-sound/noise-free-sound-0159.wav,5.216,0.07187561690807343
162 | train,noise/free-sound/noise-free-sound-0160.wav,34.325375,0.0507843978703022
163 | train,noise/free-sound/noise-free-sound-0161.wav,76.476,0.058606624603271484
164 | train,noise/free-sound/noise-free-sound-0162.wav,14.9333125,0.014961466193199158
165 | train,noise/free-sound/noise-free-sound-0163.wav,0.3626875,0.1304340958595276
166 | train,noise/free-sound/noise-free-sound-0164.wav,24.96,0.02270853891968727
167 | train,noise/free-sound/noise-free-sound-0165.wav,33.837375,0.020268714055418968
168 | train,noise/free-sound/noise-free-sound-0166.wav,0.768,0.12463682144880295
169 | train,noise/free-sound/noise-free-sound-0167.wav,2.005375,0.09784073382616043
170 | train,noise/free-sound/noise-free-sound-0168.wav,1.344,0.09780868887901306
171 | train,noise/free-sound/noise-free-sound-0169.wav,2.304,0.07819738984107971
172 | train,noise/free-sound/noise-free-sound-0170.wav,5.2906875,0.0410243459045887
173 | train,noise/free-sound/noise-free-sound-0171.wav,4.6293125,0.09066470712423325
174 | train,noise/free-sound/noise-free-sound-0172.wav,49.5543125,0.15253888070583344
175 | train,noise/free-sound/noise-free-sound-0173.wav,106.802,0.03660242632031441
176 | train,noise/free-sound/noise-free-sound-0174.wav,9.109125,0.03291758522391319
177 | train,noise/free-sound/noise-free-sound-0175.wav,4.7188125,0.030510850250720978
178 | train,noise/free-sound/noise-free-sound-0176.wav,2.9843125,0.07392294704914093
179 | train,noise/free-sound/noise-free-sound-0177.wav,106.737875,0.014649895019829273
180 | train,noise/free-sound/noise-free-sound-0178.wav,4.884625,0.07338564842939377
181 | train,noise/free-sound/noise-free-sound-0179.wav,2.813,0.026941245421767235
182 | train,noise/free-sound/noise-free-sound-0180.wav,15.5155,0.052562348544597626
183 | train,noise/free-sound/noise-free-sound-0181.wav,4.7964375,0.017637288197875023
184 | train,noise/free-sound/noise-free-sound-0182.wav,2.25225,0.05241338163614273
185 | train,noise/free-sound/noise-free-sound-0183.wav,8.0914375,0.04602668061852455
186 | train,noise/free-sound/noise-free-sound-0184.wav,32.407375,0.05794139578938484
187 | train,noise/free-sound/noise-free-sound-0185.wav,2.6693125,0.08157354593276978
188 | train,noise/free-sound/noise-free-sound-0186.wav,2.8361875,0.01683889329433441
189 | train,noise/free-sound/noise-free-sound-0187.wav,6.4230625,0.016764214262366295
190 | train,noise/free-sound/noise-free-sound-0188.wav,9.5511875,0.042392484843730927
191 | train,noise/free-sound/noise-free-sound-0189.wav,6.8401875,0.03030153177678585
192 | train,noise/free-sound/noise-free-sound-0190.wav,6.131125,0.033692434430122375
193 | train,noise/free-sound/noise-free-sound-0191.wav,3.9623125,0.04725954681634903
194 | train,noise/free-sound/noise-free-sound-0192.wav,6.6733125,0.03271656110882759
195 | train,noise/free-sound/noise-free-sound-0193.wav,4.004,0.02426316775381565
196 | train,noise/free-sound/noise-free-sound-0194.wav,7.882875,0.06913600116968155
197 | train,noise/free-sound/noise-free-sound-0195.wav,4.2959375,0.03338419273495674
198 | train,noise/free-sound/noise-free-sound-0196.wav,3.628625,0.03880972042679787
199 | train,noise/free-sound/noise-free-sound-0197.wav,10.5939375,0.06445953249931335
200 | train,noise/free-sound/noise-free-sound-0198.wav,9.134125,0.010551821440458298
201 | train,noise/free-sound/noise-free-sound-0199.wav,6.25625,0.04261793941259384
202 | train,noise/free-sound/noise-free-sound-0200.wav,8.75875,0.05165497586131096
203 | train,noise/free-sound/noise-free-sound-0201.wav,7.0486875,0.03031056560575962
204 | train,noise/free-sound/noise-free-sound-0202.wav,10.5521875,0.01994105614721775
205 | train,noise/free-sound/noise-free-sound-0203.wav,11.0944375,0.05353863909840584
206 | train,noise/free-sound/noise-free-sound-0204.wav,6.381375,0.034790318459272385
207 | train,noise/free-sound/noise-free-sound-0205.wav,14.389375,0.03819683939218521
208 | train,noise/free-sound/noise-free-sound-0206.wav,2.3356875,0.08835189044475555
209 | train,noise/free-sound/noise-free-sound-0207.wav,7.0904375,0.015486874617636204
210 | train,noise/free-sound/noise-free-sound-0208.wav,6.4648125,0.043124981224536896
211 | train,noise/free-sound/noise-free-sound-0209.wav,9.6763125,0.03408803790807724
212 | train,noise/free-sound/noise-free-sound-0210.wav,5.6723125,0.022591933608055115
213 | train,noise/free-sound/noise-free-sound-0211.wav,6.2145625,0.03797854855656624
214 | train,noise/free-sound/noise-free-sound-0212.wav,9.5511875,0.07562563568353653
215 | train,noise/free-sound/noise-free-sound-0213.wav,5.7140625,0.06729837507009506
216 | train,noise/free-sound/noise-free-sound-0214.wav,8.25825,0.04794452711939812
217 | train,noise/free-sound/noise-free-sound-0215.wav,6.4375,0.02876589260995388
218 | train,noise/free-sound/noise-free-sound-0216.wav,38.4588125,0.08392564207315445
219 | train,noise/free-sound/noise-free-sound-0217.wav,4.8381875,0.023469315841794014
220 | train,noise/free-sound/noise-free-sound-0218.wav,6.131125,0.06398402899503708
221 | train,noise/free-sound/noise-free-sound-0219.wav,6.5899375,0.042622439563274384
222 | train,noise/free-sound/noise-free-sound-0220.wav,8.4668125,0.03878623619675636
223 | train,noise/free-sound/noise-free-sound-0221.wav,10.9275625,0.02385111153125763
224 | train,noise/free-sound/noise-free-sound-0222.wav,1.2095625,0.08227390795946121
225 | train,noise/free-sound/noise-free-sound-0223.wav,3.5869375,0.0693453848361969
226 | train,noise/free-sound/noise-free-sound-0224.wav,12.8044375,0.054351672530174255
227 | train,noise/free-sound/noise-free-sound-0225.wav,12.452,0.015415950678288937
228 | train,noise/free-sound/noise-free-sound-0226.wav,26.1973125,0.10926525294780731
229 | train,noise/free-sound/noise-free-sound-0227.wav,31.1654375,0.011057702824473381
230 | train,noise/free-sound/noise-free-sound-0228.wav,16.6486875,0.042658109217882156
231 | train,noise/free-sound/noise-free-sound-0229.wav,5.3339375,0.11092111468315125
232 | train,noise/free-sound/noise-free-sound-0230.wav,41.592,0.05706547573208809
233 | train,noise/free-sound/noise-free-sound-0231.wav,22.33075,0.022546730935573578
234 | train,noise/free-sound/noise-free-sound-0232.wav,44.2890625,0.042499084025621414
235 | train,noise/free-sound/noise-free-sound-0233.wav,44.3495,0.05787660926580429
236 | train,noise/free-sound/noise-free-sound-0234.wav,75.0171875,0.03603597730398178
237 | train,noise/free-sound/noise-free-sound-0235.wav,200.7829375,0.038586996495723724
238 | train,noise/free-sound/noise-free-sound-0236.wav,5.5640625,0.1896110475063324
239 | train,noise/free-sound/noise-free-sound-0237.wav,5.302875,0.03839379549026489
240 | train,noise/free-sound/noise-free-sound-0238.wav,13.944,0.039737407118082047
241 | train,noise/free-sound/noise-free-sound-0239.wav,37.50525,0.08794654905796051
242 | train,noise/free-sound/noise-free-sound-0240.wav,26.2261875,0.03328998014330864
243 | train,noise/free-sound/noise-free-sound-0241.wav,52.6185,0.026958363130688667
244 | train,noise/free-sound/noise-free-sound-0242.wav,23.0038125,0.04528762027621269
245 | train,noise/free-sound/noise-free-sound-0243.wav,90.0005625,0.0357666015625
246 | train,noise/free-sound/noise-free-sound-0244.wav,118.8401875,0.026809563860297203
247 | train,noise/free-sound/noise-free-sound-0245.wav,29.562875,0.010990160517394543
248 | train,noise/free-sound/noise-free-sound-0246.wav,24.7246875,0.029057923704385757
249 | train,noise/free-sound/noise-free-sound-0247.wav,77.7345625,0.13703718781471252
250 | train,noise/free-sound/noise-free-sound-0248.wav,60.30275,0.06144831329584122
251 | train,noise/free-sound/noise-free-sound-0249.wav,40.684625,0.01766958460211754
252 | train,noise/free-sound/noise-free-sound-0250.wav,8.6204375,0.09253757447004318
253 | train,noise/free-sound/noise-free-sound-0251.wav,31.973875,0.10958096385002136
254 | train,noise/free-sound/noise-free-sound-0252.wav,20.9501875,0.037849124521017075
255 | train,noise/free-sound/noise-free-sound-0253.wav,0.9750625,0.03105640597641468
256 | train,noise/free-sound/noise-free-sound-0254.wav,64.142125,0.03622563183307648
257 | train,noise/free-sound/noise-free-sound-0255.wav,5.5375,0.09842009842395782
258 | train,noise/free-sound/noise-free-sound-0256.wav,37.5069375,0.012584677897393703
259 | train,noise/free-sound/noise-free-sound-0257.wav,5.0,0.004914766643196344
260 | train,noise/free-sound/noise-free-sound-0258.wav,25.7580625,0.2934410572052002
261 | train,noise/free-sound/noise-free-sound-0259.wav,14.7069375,0.023536955937743187
262 | train,noise/free-sound/noise-free-sound-0260.wav,14.4,0.08175045251846313
263 | train,noise/free-sound/noise-free-sound-0261.wav,88.0,0.044494520872831345
264 | train,noise/free-sound/noise-free-sound-0262.wav,35.0301875,0.014555398374795914
265 | train,noise/free-sound/noise-free-sound-0263.wav,0.9043125,0.006316532380878925
266 | train,noise/free-sound/noise-free-sound-0264.wav,29.631375,0.008275926113128662
267 | train,noise/free-sound/noise-free-sound-0265.wav,1.3325625,0.031494367867708206
268 | train,noise/free-sound/noise-free-sound-0266.wav,34.674625,0.052908796817064285
269 | train,noise/free-sound/noise-free-sound-0267.wav,36.1389375,0.009712645784020424
270 | train,noise/free-sound/noise-free-sound-0268.wav,22.710875,0.07346329838037491
271 | train,noise/free-sound/noise-free-sound-0269.wav,49.125125,0.05663708969950676
272 | train,noise/free-sound/noise-free-sound-0270.wav,398.0610625,0.04078911244869232
273 | train,noise/free-sound/noise-free-sound-0271.wav,29.6983125,0.026326850056648254
274 | train,noise/free-sound/noise-free-sound-0272.wav,41.058125,0.3127070665359497
275 | train,noise/free-sound/noise-free-sound-0273.wav,10.6433125,0.031103668734431267
276 | train,noise/free-sound/noise-free-sound-0274.wav,3.96775,0.037425946444272995
277 | train,noise/free-sound/noise-free-sound-0275.wav,12.096,0.034055352210998535
278 | train,noise/free-sound/noise-free-sound-0276.wav,2.8715,0.3022034466266632
279 | train,noise/free-sound/noise-free-sound-0277.wav,5.705125,0.0423748753964901
280 | train,noise/free-sound/noise-free-sound-0278.wav,37.59825,0.026329314336180687
281 | train,noise/free-sound/noise-free-sound-0279.wav,17.3333125,0.1291254162788391
282 | train,noise/free-sound/noise-free-sound-0280.wav,62.10325,0.08421215415000916
283 | train,noise/free-sound/noise-free-sound-0281.wav,0.1,0.0008713952847756445
284 | train,noise/free-sound/noise-free-sound-0282.wav,37.72675,0.03869716450572014
285 | train,noise/free-sound/noise-free-sound-0283.wav,22.9636875,0.005775699391961098
286 | train,noise/free-sound/noise-free-sound-0284.wav,2.786,0.14792244136333466
287 | train,noise/free-sound/noise-free-sound-0285.wav,4.0925,0.017943454906344414
288 | train,noise/free-sound/noise-free-sound-0286.wav,9.5603125,0.054936982691287994
289 | train,noise/free-sound/noise-free-sound-0287.wav,60.6563125,0.02655193768441677
290 | train,noise/free-sound/noise-free-sound-0288.wav,59.688,0.009608861990272999
291 | train,noise/free-sound/noise-free-sound-0289.wav,22.2821875,0.09163817763328552
292 | train,noise/free-sound/noise-free-sound-0290.wav,44.304,0.030345182865858078
293 | train,noise/free-sound/noise-free-sound-0291.wav,9.090625,0.2115832418203354
294 | train,noise/free-sound/noise-free-sound-0292.wav,17.97225,0.2524002492427826
295 | train,noise/free-sound/noise-free-sound-0293.wav,43.0381875,0.015832778066396713
296 | train,noise/free-sound/noise-free-sound-0294.wav,6.5875,0.05032056197524071
297 | train,noise/free-sound/noise-free-sound-0295.wav,6.8963125,0.07769472151994705
298 | train,noise/free-sound/noise-free-sound-0296.wav,1.8616875,0.08366407454013824
299 | train,noise/free-sound/noise-free-sound-0297.wav,1.799,0.1571785807609558
300 | train,noise/free-sound/noise-free-sound-0298.wav,32.0,0.1518697738647461
301 | train,noise/free-sound/noise-free-sound-0299.wav,0.212625,0.07357385754585266
302 | train,noise/free-sound/noise-free-sound-0300.wav,6.112625,0.12782976031303406
303 | train,noise/free-sound/noise-free-sound-0301.wav,180.9495625,0.03669818118214607
304 | train,noise/free-sound/noise-free-sound-0302.wav,17.914125,0.07714607566595078
305 | train,noise/free-sound/noise-free-sound-0303.wav,8.1955,0.02229280211031437
306 | train,noise/free-sound/noise-free-sound-0304.wav,62.0408125,0.07652237266302109
307 | train,noise/free-sound/noise-free-sound-0305.wav,29.5,0.024681456387043
308 | train,noise/free-sound/noise-free-sound-0306.wav,24.5,0.023795874789357185
309 | train,noise/free-sound/noise-free-sound-0307.wav,10.704,0.025687742978334427
310 | train,noise/free-sound/noise-free-sound-0308.wav,58.28175,0.06723242998123169
311 | train,noise/free-sound/noise-free-sound-0309.wav,3.3729375,0.07240822911262512
312 | train,noise/free-sound/noise-free-sound-0310.wav,22.03575,0.022913042455911636
313 | train,noise/free-sound/noise-free-sound-0311.wav,13.746375,0.03471088036894798
314 | train,noise/free-sound/noise-free-sound-0312.wav,1.513,0.026560822501778603
315 | train,noise/free-sound/noise-free-sound-0313.wav,2.7875,0.053717926144599915
316 | train,noise/free-sound/noise-free-sound-0314.wav,0.6109375,0.03753811493515968
317 | train,noise/free-sound/noise-free-sound-0315.wav,2.7838125,0.03004627861082554
318 | train,noise/free-sound/noise-free-sound-0316.wav,11.1455625,0.010907059535384178
319 | train,noise/free-sound/noise-free-sound-0317.wav,7.256875,0.050118256360292435
320 | train,noise/free-sound/noise-free-sound-0318.wav,0.40925,0.05898527055978775
321 | train,noise/free-sound/noise-free-sound-0319.wav,1.515125,0.0426362082362175
322 | train,noise/free-sound/noise-free-sound-0320.wav,6.7395625,0.07185203582048416
323 | train,noise/free-sound/noise-free-sound-0321.wav,113.0,0.028845947235822678
324 | train,noise/free-sound/noise-free-sound-0322.wav,31.0335,0.018543396145105362
325 | train,noise/free-sound/noise-free-sound-0323.wav,10.73175,0.1975315362215042
326 | train,noise/free-sound/noise-free-sound-0324.wav,36.2056875,0.12887051701545715
327 | train,noise/free-sound/noise-free-sound-0325.wav,1.6456875,0.08280790597200394
328 | train,noise/free-sound/noise-free-sound-0326.wav,16.0775,0.1114916056394577
329 | train,noise/free-sound/noise-free-sound-0327.wav,5.1596875,0.026619240641593933
330 | train,noise/free-sound/noise-free-sound-0328.wav,0.7181875,0.07777177542448044
331 | train,noise/free-sound/noise-free-sound-0329.wav,9.69225,0.025797756388783455
332 | train,noise/free-sound/noise-free-sound-0330.wav,8.4288125,0.022084051743149757
333 | train,noise/free-sound/noise-free-sound-0331.wav,6.375,0.05982063338160515
334 | train,noise/free-sound/noise-free-sound-0332.wav,3.625,0.07769813388586044
335 | train,noise/free-sound/noise-free-sound-0333.wav,15.6735,0.04349810257554054
336 | train,noise/free-sound/noise-free-sound-0334.wav,178.1493125,0.032667893916368484
337 | train,noise/free-sound/noise-free-sound-0335.wav,34.0375625,0.04825016111135483
338 | train,noise/free-sound/noise-free-sound-0336.wav,48.08275,0.13560190796852112
339 | train,noise/free-sound/noise-free-sound-0337.wav,1.4255,0.2234208583831787
340 | train,noise/free-sound/noise-free-sound-0338.wav,9.24,0.018683331087231636
341 | train,noise/free-sound/noise-free-sound-0339.wav,20.328,0.02289791963994503
342 | train,noise/free-sound/noise-free-sound-0340.wav,33.672,0.03894010931253433
343 | train,noise/free-sound/noise-free-sound-0341.wav,23.071875,0.023344693705439568
344 | train,noise/free-sound/noise-free-sound-0342.wav,13.806,0.09261415898799896
345 | train,noise/free-sound/noise-free-sound-0343.wav,2.809625,0.0704667717218399
346 | train,noise/free-sound/noise-free-sound-0344.wav,20.544,0.026964064687490463
347 | train,noise/free-sound/noise-free-sound-0345.wav,5.9024375,0.11662321537733078
348 | train,noise/free-sound/noise-free-sound-0346.wav,18.5469375,0.021898966282606125
349 | train,noise/free-sound/noise-free-sound-0347.wav,13.7801875,0.2138994336128235
350 | train,noise/free-sound/noise-free-sound-0348.wav,50.1140625,0.03524171933531761
351 | train,noise/free-sound/noise-free-sound-0349.wav,1.0,0.1690613180398941
352 | train,noise/free-sound/noise-free-sound-0350.wav,9.0,0.040681108832359314
353 | train,noise/free-sound/noise-free-sound-0351.wav,4.968,0.027849895879626274
354 | train,noise/free-sound/noise-free-sound-0352.wav,10.0,0.07022271305322647
355 | train,noise/free-sound/noise-free-sound-0353.wav,26.0963125,0.2605692744255066
356 | train,noise/free-sound/noise-free-sound-0354.wav,0.9999375,0.05305643379688263
357 | train,noise/free-sound/noise-free-sound-0355.wav,14.664,0.08156871050596237
358 | train,noise/free-sound/noise-free-sound-0356.wav,24.96,0.1574023813009262
359 | train,noise/free-sound/noise-free-sound-0357.wav,8.1763125,0.2569054663181305
360 | train,noise/free-sound/noise-free-sound-0358.wav,21.24625,0.035855066031217575
361 | train,noise/free-sound/noise-free-sound-0359.wav,18.3078125,0.014943092130124569
362 | train,noise/free-sound/noise-free-sound-0360.wav,13.89375,0.022115277126431465
363 | train,noise/free-sound/noise-free-sound-0361.wav,7.7390625,0.0052618165500462055
364 | train,noise/free-sound/noise-free-sound-0362.wav,12.875,0.025662187486886978
365 | train,noise/free-sound/noise-free-sound-0363.wav,4.721875,0.011223689652979374
366 | train,noise/free-sound/noise-free-sound-0364.wav,11.8400625,0.012878494337201118
367 | train,noise/free-sound/noise-free-sound-0365.wav,10.057125,0.21046742796897888
368 | train,noise/free-sound/noise-free-sound-0366.wav,4.049,0.33490365743637085
369 | train,noise/free-sound/noise-free-sound-0367.wav,1.044875,0.22696106135845184
370 | train,noise/free-sound/noise-free-sound-0368.wav,1.6456875,0.2907167971134186
371 | train,noise/free-sound/noise-free-sound-0369.wav,4.049,0.2372066080570221
372 | train,noise/free-sound/noise-free-sound-0370.wav,72.2546875,0.0368385836482048
373 | train,noise/free-sound/noise-free-sound-0371.wav,21.28,0.09028219431638718
374 | train,noise/free-sound/noise-free-sound-0372.wav,22.88875,0.04608670249581337
375 | train,noise/free-sound/noise-free-sound-0373.wav,3.234625,0.06457925587892532
376 | train,noise/free-sound/noise-free-sound-0374.wav,0.7430625,2.589870007341233e-07
377 | train,noise/free-sound/noise-free-sound-0375.wav,0.75,0.008004882372915745
378 | train,noise/free-sound/noise-free-sound-0376.wav,5.0866875,0.04294144734740257
379 | train,noise/free-sound/noise-free-sound-0377.wav,8.986125,0.0908549502491951
380 | train,noise/free-sound/noise-free-sound-0378.wav,22.2824375,0.046287212520837784
381 | train,noise/free-sound/noise-free-sound-0379.wav,6.9893125,0.09434410184621811
382 | train,noise/free-sound/noise-free-sound-0380.wav,0.144,0.03649836778640747
383 | train,noise/free-sound/noise-free-sound-0381.wav,44.2775625,0.10409745573997498
384 | train,noise/free-sound/noise-free-sound-0382.wav,60.552,0.034003183245658875
385 | train,noise/free-sound/noise-free-sound-0383.wav,32.904,0.07692724466323853
386 | train,noise/free-sound/noise-free-sound-0384.wav,178.211375,0.025643497705459595
387 | train,noise/free-sound/noise-free-sound-0385.wav,8.408375,0.05071987211704254
388 | train,noise/free-sound/noise-free-sound-0386.wav,26.7350625,0.031059708446264267
389 | train,noise/free-sound/noise-free-sound-0387.wav,5.96025,0.06439970433712006
390 | train,noise/free-sound/noise-free-sound-0388.wav,4.382375,0.04423951730132103
391 | train,noise/free-sound/noise-free-sound-0389.wav,13.7295,0.10568549484014511
392 | train,noise/free-sound/noise-free-sound-0390.wav,15.7635,0.04382310435175896
393 | train,noise/free-sound/noise-free-sound-0391.wav,23.391,0.018330859020352364
394 | train,noise/free-sound/noise-free-sound-0392.wav,13.221,0.013229451142251492
395 | train,noise/free-sound/noise-free-sound-0393.wav,5.5379375,0.03775392845273018
396 | train,noise/free-sound/noise-free-sound-0394.wav,23.72,0.05215062201023102
397 | train,noise/free-sound/noise-free-sound-0395.wav,7.73225,0.03044125624001026
398 | train,noise/free-sound/noise-free-sound-0396.wav,9.7959375,0.03589799255132675
399 | train,noise/free-sound/noise-free-sound-0397.wav,45.328,0.011843379586935043
400 | train,noise/free-sound/noise-free-sound-0398.wav,44.8800625,0.010618491098284721
401 | train,noise/free-sound/noise-free-sound-0399.wav,16.9025,0.03693331032991409
402 | train,noise/free-sound/noise-free-sound-0400.wav,14.86075,0.012356513179838657
403 | train,noise/free-sound/noise-free-sound-0401.wav,10.3515625,0.01837407611310482
404 | train,noise/free-sound/noise-free-sound-0402.wav,29.6705625,0.013565191999077797
405 | train,noise/free-sound/noise-free-sound-0403.wav,13.009,0.06084136664867401
406 | train,noise/free-sound/noise-free-sound-0404.wav,19.040375,0.10327192395925522
407 | train,noise/free-sound/noise-free-sound-0405.wav,2.306875,0.03524678200483322
408 | train,noise/free-sound/noise-free-sound-0406.wav,19.8315,0.05282257869839668
409 | train,noise/free-sound/noise-free-sound-0407.wav,2.9594375,0.044623784720897675
410 | train,noise/free-sound/noise-free-sound-0408.wav,15.0,0.05332208797335625
411 | train,noise/free-sound/noise-free-sound-0409.wav,8.6186875,0.10226306319236755
412 | train,noise/free-sound/noise-free-sound-0410.wav,1.1808125,0.038361839950084686
413 | train,noise/free-sound/noise-free-sound-0411.wav,2.3625625,0.0322076678276062
414 | train,noise/free-sound/noise-free-sound-0412.wav,2.624875,0.044953443109989166
415 | train,noise/free-sound/noise-free-sound-0413.wav,21.995125,0.03321641683578491
416 | train,noise/free-sound/noise-free-sound-0414.wav,28.3785,0.03101469576358795
417 | train,noise/free-sound/noise-free-sound-0415.wav,6.624,0.017043698579072952
418 | train,noise/free-sound/noise-free-sound-0416.wav,14.9700625,0.07003152370452881
419 | train,noise/free-sound/noise-free-sound-0417.wav,10.1516875,0.024113353341817856
420 | train,noise/free-sound/noise-free-sound-0418.wav,40.99575,0.019238855689764023
421 | train,noise/free-sound/noise-free-sound-0419.wav,11.9901875,0.09793315082788467
422 | train,noise/free-sound/noise-free-sound-0420.wav,0.8214375,0.08973070979118347
423 | train,noise/free-sound/noise-free-sound-0421.wav,3.3698125,0.020990706980228424
424 | train,noise/free-sound/noise-free-sound-0422.wav,9.1626875,0.014323156327009201
425 | train,noise/free-sound/noise-free-sound-0423.wav,12.25,0.02368086576461792
426 | train,noise/free-sound/noise-free-sound-0424.wav,16.666125,0.09960763156414032
427 | train,noise/free-sound/noise-free-sound-0425.wav,1.410625,0.25385206937789917
428 | train,noise/free-sound/noise-free-sound-0426.wav,3.89225,0.2450893521308899
429 | train,noise/free-sound/noise-free-sound-0427.wav,21.995125,0.22187045216560364
430 | train,noise/free-sound/noise-free-sound-0428.wav,18.622375,0.06734079867601395
431 | train,noise/free-sound/noise-free-sound-0429.wav,24.0,0.05017399042844772
432 | train,noise/free-sound/noise-free-sound-0430.wav,67.2,0.039679136127233505
433 | train,noise/free-sound/noise-free-sound-0431.wav,22.9355,0.04259328544139862
434 | train,noise/free-sound/noise-free-sound-0432.wav,22.2540625,0.04090152680873871
435 | train,noise/free-sound/noise-free-sound-0433.wav,42.2370625,0.059927552938461304
436 | train,noise/free-sound/noise-free-sound-0434.wav,21.528,0.051974814385175705
437 | train,noise/free-sound/noise-free-sound-0435.wav,5.27675,0.015150045044720173
438 | train,noise/free-sound/noise-free-sound-0436.wav,51.2120625,0.03175858035683632
439 | train,noise/free-sound/noise-free-sound-0437.wav,1.19,3.958451202379365e-07
440 | train,noise/free-sound/noise-free-sound-0438.wav,7.52325,0.0758250281214714
441 | train,noise/free-sound/noise-free-sound-0439.wav,30.7875625,0.05084805563092232
442 | train,noise/free-sound/noise-free-sound-0440.wav,4.1883125,0.02127326838672161
443 | train,noise/free-sound/noise-free-sound-0441.wav,45.568,0.060903094708919525
444 | train,noise/free-sound/noise-free-sound-0442.wav,76.0721875,0.036283113062381744
445 | train,noise/free-sound/noise-free-sound-0443.wav,42.24,0.016077127307653427
446 | train,noise/free-sound/noise-free-sound-0444.wav,20.2478125,0.022445961833000183
447 | train,noise/free-sound/noise-free-sound-0445.wav,18.3901875,0.049584317952394485
448 | train,noise/free-sound/noise-free-sound-0446.wav,31.416,0.011185670271515846
449 | train,noise/free-sound/noise-free-sound-0447.wav,28.5779375,0.01004602387547493
450 | train,noise/free-sound/noise-free-sound-0448.wav,0.574,0.11578892171382904
451 | train,noise/free-sound/noise-free-sound-0449.wav,4.7380625,0.04884331300854683
452 | train,noise/free-sound/noise-free-sound-0450.wav,16.1713125,0.193118616938591
453 | train,noise/free-sound/noise-free-sound-0451.wav,44.04,0.06223294511437416
454 | train,noise/free-sound/noise-free-sound-0452.wav,17.8445625,0.15768374502658844
455 | train,noise/free-sound/noise-free-sound-0453.wav,0.9404375,0.0542965829372406
456 | train,noise/free-sound/noise-free-sound-0454.wav,41.885,0.03289172798395157
457 | train,noise/free-sound/noise-free-sound-0455.wav,168.2695,0.03914259746670723
458 | train,noise/free-sound/noise-free-sound-0456.wav,28.96325,0.05199239030480385
459 | train,noise/free-sound/noise-free-sound-0457.wav,44.3666875,0.017017558217048645
460 | train,noise/free-sound/noise-free-sound-0458.wav,20.7055625,0.06016414240002632
461 | train,noise/free-sound/noise-free-sound-0459.wav,40.3069375,0.09874601662158966
462 | train,noise/free-sound/noise-free-sound-0460.wav,13.104,0.038429416716098785
463 | train,noise/free-sound/noise-free-sound-0461.wav,53.3406875,0.04417074844241142
464 | train,noise/free-sound/noise-free-sound-0462.wav,18.76375,0.07823855429887772
465 | train,noise/free-sound/noise-free-sound-0463.wav,15.1105625,0.05224224925041199
466 | train,noise/free-sound/noise-free-sound-0464.wav,8.0831875,0.03142475709319115
467 | train,noise/free-sound/noise-free-sound-0465.wav,9.007375,0.13380466401576996
468 | train,noise/free-sound/noise-free-sound-0466.wav,13.9530625,0.04733031988143921
469 | train,noise/free-sound/noise-free-sound-0467.wav,14.1738125,0.04154488444328308
470 | train,noise/free-sound/noise-free-sound-0468.wav,23.12375,0.053234294056892395
471 | train,noise/free-sound/noise-free-sound-0469.wav,10.7805,0.0436321459710598
472 | train,noise/free-sound/noise-free-sound-0470.wav,4.757375,0.1546269804239273
473 | train,noise/free-sound/noise-free-sound-0471.wav,7.915125,0.02925923280417919
474 | train,noise/free-sound/noise-free-sound-0472.wav,94.0,0.04926766827702522
475 | train,noise/free-sound/noise-free-sound-0473.wav,9.11675,0.09038642793893814
476 | train,noise/free-sound/noise-free-sound-0474.wav,35.935,0.030509065836668015
477 | train,noise/free-sound/noise-free-sound-0475.wav,36.3624375,0.058621712028980255
478 | train,noise/free-sound/noise-free-sound-0476.wav,9.5608125,0.050260499119758606
479 | train,noise/free-sound/noise-free-sound-0477.wav,15.124875,0.07204657047986984
480 | train,noise/free-sound/noise-free-sound-0478.wav,8.5195,0.048078786581754684
481 | train,noise/free-sound/noise-free-sound-0479.wav,3.715625,0.10714650899171829
482 | train,noise/free-sound/noise-free-sound-0480.wav,35.2898125,0.04612552002072334
483 | train,noise/free-sound/noise-free-sound-0481.wav,7.907875,0.14924266934394836
484 | train,noise/free-sound/noise-free-sound-0482.wav,13.5314375,0.02954801917076111
485 | train,noise/free-sound/noise-free-sound-0483.wav,22.5959375,0.022938815876841545
486 | train,noise/free-sound/noise-free-sound-0484.wav,10.6318125,0.030818503350019455
487 | train,noise/free-sound/noise-free-sound-0485.wav,10.8930625,0.05822751671075821
488 | train,noise/free-sound/noise-free-sound-0486.wav,6.4098125,0.01118223275989294
489 | train,noise/free-sound/noise-free-sound-0487.wav,1.9301875,0.031102990731596947
490 | train,noise/free-sound/noise-free-sound-0488.wav,5.656875,0.040080949664115906
491 | train,noise/free-sound/noise-free-sound-0489.wav,62.8613125,0.023358652368187904
492 | train,noise/free-sound/noise-free-sound-0490.wav,12.9799375,0.013502035290002823
493 | train,noise/free-sound/noise-free-sound-0491.wav,1.544125,0.031686265021562576
494 | train,noise/free-sound/noise-free-sound-0492.wav,20.0,0.05861775949597359
495 | train,noise/free-sound/noise-free-sound-0493.wav,14.02775,0.029563307762145996
496 | train,noise/free-sound/noise-free-sound-0494.wav,56.05875,0.060667529702186584
497 | train,noise/free-sound/noise-free-sound-0495.wav,18.2066875,0.08725643903017044
498 | train,noise/free-sound/noise-free-sound-0496.wav,5.97325,0.055248506367206573
499 | train,noise/free-sound/noise-free-sound-0497.wav,302.0,0.045467883348464966
500 | train,noise/free-sound/noise-free-sound-0498.wav,5.0,0.016401100903749466
501 | train,noise/free-sound/noise-free-sound-0499.wav,7.889,0.017813293263316154
502 | train,noise/free-sound/noise-free-sound-0500.wav,34.374875,0.07433386147022247
503 | train,noise/free-sound/noise-free-sound-0501.wav,12.0079375,0.08994701504707336
504 | train,noise/free-sound/noise-free-sound-0502.wav,73.2,0.03240280598402023
505 | train,noise/free-sound/noise-free-sound-0503.wav,5.0648125,0.046961430460214615
506 | train,noise/free-sound/noise-free-sound-0504.wav,58.0034375,0.04388689994812012
507 | train,noise/free-sound/noise-free-sound-0505.wav,9.373875,0.09661614149808884
508 | train,noise/free-sound/noise-free-sound-0506.wav,24.58125,0.026245592162013054
509 | train,noise/free-sound/noise-free-sound-0507.wav,1.1199375,0.017153136432170868
510 | train,noise/free-sound/noise-free-sound-0508.wav,7.319875,0.009689533151686192
511 | train,noise/free-sound/noise-free-sound-0509.wav,10.79725,0.16952723264694214
512 | train,noise/free-sound/noise-free-sound-0510.wav,19.845125,0.04474690556526184
513 | train,noise/free-sound/noise-free-sound-0511.wav,4.0,0.05655713379383087
514 | train,noise/free-sound/noise-free-sound-0512.wav,3.78775,0.026339855045080185
515 | train,noise/free-sound/noise-free-sound-0513.wav,10.7885625,0.012656833045184612
516 | train,noise/free-sound/noise-free-sound-0514.wav,5.329,0.00939034391194582
517 | train,noise/free-sound/noise-free-sound-0515.wav,4.075125,0.02830776944756508
518 | train,noise/free-sound/noise-free-sound-0516.wav,1.0991875,0.07844716310501099
519 | train,noise/free-sound/noise-free-sound-0517.wav,9.962,0.014107516035437584
520 | train,noise/free-sound/noise-free-sound-0518.wav,18.7820625,0.04062504321336746
521 | train,noise/free-sound/noise-free-sound-0519.wav,33.5935,0.05104810744524002
522 | train,noise/free-sound/noise-free-sound-0520.wav,12.5,0.030691981315612793
523 | train,noise/free-sound/noise-free-sound-0521.wav,7.0008125,0.03188765048980713
524 | train,noise/free-sound/noise-free-sound-0522.wav,79.2853125,0.017952265217900276
525 | train,noise/free-sound/noise-free-sound-0523.wav,8.424,0.03407513350248337
526 | train,noise/free-sound/noise-free-sound-0524.wav,22.6495,0.035302065312862396
527 | train,noise/free-sound/noise-free-sound-0525.wav,0.9583125,0.07954972237348557
528 | train,noise/free-sound/noise-free-sound-0526.wav,1.4186875,0.03291534259915352
529 | train,noise/free-sound/noise-free-sound-0527.wav,9.3093125,0.027845852077007294
530 | train,noise/free-sound/noise-free-sound-0528.wav,3.06975,0.03752041608095169
531 | train,noise/free-sound/noise-free-sound-0529.wav,1.8594375,0.11263688653707504
532 | train,noise/free-sound/noise-free-sound-0530.wav,7.7573125,0.13751307129859924
533 | train,noise/free-sound/noise-free-sound-0531.wav,46.7388125,0.16500982642173767
534 | train,noise/free-sound/noise-free-sound-0532.wav,6.0,0.01770815998315811
535 | train,noise/free-sound/noise-free-sound-0533.wav,37.91875,0.1319381147623062
536 | train,noise/free-sound/noise-free-sound-0534.wav,17.477625,0.028349682688713074
537 | train,noise/free-sound/noise-free-sound-0535.wav,18.137625,0.02680165506899357
538 | train,noise/free-sound/noise-free-sound-0536.wav,0.69275,0.16672660410404205
539 | train,noise/free-sound/noise-free-sound-0537.wav,12.1733125,0.019201423972845078
540 | train,noise/free-sound/noise-free-sound-0538.wav,0.5738125,0.02545657753944397
541 | train,noise/free-sound/noise-free-sound-0539.wav,1.515125,0.0866997167468071
542 | train,noise/free-sound/noise-free-sound-0540.wav,3.0040625,0.016370726749300957
543 | train,noise/free-sound/noise-free-sound-0541.wav,16.37875,0.012002836912870407
544 | train,noise/free-sound/noise-free-sound-0542.wav,8.224,0.08064471930265427
545 | train,noise/free-sound/noise-free-sound-0543.wav,4.356,0.019136013463139534
546 | train,noise/free-sound/noise-free-sound-0544.wav,107.064,0.014804407954216003
547 | train,noise/free-sound/noise-free-sound-0545.wav,31.07325,0.03191414847970009
548 | train,noise/free-sound/noise-free-sound-0546.wav,117.6240625,0.016521433368325233
549 | train,noise/free-sound/noise-free-sound-0547.wav,4.848875,0.1954786479473114
550 | train,noise/free-sound/noise-free-sound-0548.wav,8.2024375,0.013446212746202946
551 | train,noise/free-sound/noise-free-sound-0549.wav,5.832,0.03358636796474457
552 | train,noise/free-sound/noise-free-sound-0550.wav,6.373875,0.08931571245193481
553 | train,noise/free-sound/noise-free-sound-0551.wav,33.18525,0.06328275054693222
554 | train,noise/free-sound/noise-free-sound-0552.wav,77.736,0.010960646905004978
555 | train,noise/free-sound/noise-free-sound-0553.wav,16.431,0.0317048504948616
556 | train,noise/free-sound/noise-free-sound-0554.wav,0.7836875,0.1763930767774582
557 | train,noise/free-sound/noise-free-sound-0555.wav,21.577125,0.054776307195425034
558 | train,noise/free-sound/noise-free-sound-0556.wav,19.9053125,0.08226145803928375
559 | train,noise/free-sound/noise-free-sound-0557.wav,9.11675,0.050691552460193634
560 | train,noise/free-sound/noise-free-sound-0558.wav,8.38575,0.06994509696960449
561 | train,noise/free-sound/noise-free-sound-0559.wav,14.204625,0.07384513318538666
562 | train,noise/free-sound/noise-free-sound-0560.wav,269.544,0.03145440295338631
563 | train,noise/free-sound/noise-free-sound-0561.wav,1.1755,0.02490137331187725
564 | train,noise/free-sound/noise-free-sound-0562.wav,9.0645,0.018991192802786827
565 | train,noise/free-sound/noise-free-sound-0563.wav,24.331875,0.05708249658346176
566 | train,noise/free-sound/noise-free-sound-0564.wav,0.1035625,0.011895965784788132
567 | train,noise/free-sound/noise-free-sound-0565.wav,7.429375,0.0369032584130764
568 | train,noise/free-sound/noise-free-sound-0566.wav,6.1883125,0.03060462884604931
569 | train,noise/free-sound/noise-free-sound-0567.wav,98.88,0.04714568704366684
570 | train,noise/free-sound/noise-free-sound-0568.wav,113.6384375,0.097239650785923
571 | train,noise/free-sound/noise-free-sound-0569.wav,3.84,0.020919956266880035
572 | train,noise/free-sound/noise-free-sound-0570.wav,6.373875,0.04353569820523262
573 | train,noise/free-sound/noise-free-sound-0571.wav,7.1314375,0.010439659468829632
574 | train,noise/free-sound/noise-free-sound-0572.wav,123.4285625,0.04965611547231674
575 | train,noise/free-sound/noise-free-sound-0573.wav,0.391,0.04393092170357704
576 | train,noise/free-sound/noise-free-sound-0574.wav,20.8456875,0.018946876749396324
577 | train,noise/free-sound/noise-free-sound-0575.wav,6.635125,0.11531470715999603
578 | train,noise/free-sound/noise-free-sound-0576.wav,3.732625,0.025617213919758797
579 | train,noise/free-sound/noise-free-sound-0577.wav,13.4530625,0.0597815178334713
580 | train,noise/free-sound/noise-free-sound-0578.wav,19.35675,0.03867294639348984
581 | train,noise/free-sound/noise-free-sound-0579.wav,17.2660625,0.037487927824258804
582 | train,noise/free-sound/noise-free-sound-0580.wav,135.68,0.02141750231385231
583 | train,noise/free-sound/noise-free-sound-0581.wav,51.4863125,0.046505432575941086
584 | train,noise/free-sound/noise-free-sound-0582.wav,1.4744375,0.09398862719535828
585 | train,noise/free-sound/noise-free-sound-0583.wav,2.04375,0.09569674730300903
586 | train,noise/free-sound/noise-free-sound-0584.wav,15.7739375,0.054114747792482376
587 | train,noise/free-sound/noise-free-sound-0585.wav,0.9288125,0.027606800198554993
588 | train,noise/free-sound/noise-free-sound-0586.wav,10.8333125,0.050648726522922516
589 | train,noise/free-sound/noise-free-sound-0587.wav,6.1226875,0.0615115761756897
590 | train,noise/free-sound/noise-free-sound-0588.wav,5.8026875,0.007875349372625351
591 | train,noise/free-sound/noise-free-sound-0589.wav,2.2465,0.07835634797811508
592 | train,noise/free-sound/noise-free-sound-0590.wav,3.78775,0.04725419357419014
593 | train,noise/free-sound/noise-free-sound-0591.wav,12.256875,0.011887668631970882
594 | train,noise/free-sound/noise-free-sound-0592.wav,32.875125,0.044854626059532166
595 | train,noise/free-sound/noise-free-sound-0593.wav,61.649,0.020986465737223625
596 | train,noise/free-sound/noise-free-sound-0594.wav,141.1773125,0.004017080180346966
597 | train,noise/free-sound/noise-free-sound-0595.wav,26.898625,0.06854495406150818
598 | train,noise/free-sound/noise-free-sound-0596.wav,13.33925,0.04375113174319267
599 | train,noise/free-sound/noise-free-sound-0597.wav,6.24325,0.06470189988613129
600 | train,noise/free-sound/noise-free-sound-0598.wav,57.920125,0.06305752694606781
601 | train,noise/free-sound/noise-free-sound-0599.wav,3.84,0.04084300249814987
602 | train,noise/free-sound/noise-free-sound-0600.wav,45.395,0.04135194420814514
603 | train,noise/free-sound/noise-free-sound-0601.wav,4.239875,0.2734590768814087
604 | train,noise/free-sound/noise-free-sound-0602.wav,4.125,0.21610486507415771
605 | train,noise/free-sound/noise-free-sound-0603.wav,7.3740625,0.3005824685096741
606 | train,noise/free-sound/noise-free-sound-0604.wav,7.358125,0.08612485975027084
607 | train,noise/free-sound/noise-free-sound-0605.wav,32.0,0.22763755917549133
608 | train,noise/free-sound/noise-free-sound-0606.wav,88.84,0.01871025189757347
609 | train,noise/free-sound/noise-free-sound-0607.wav,20.0098125,0.0681547224521637
610 | train,noise/free-sound/noise-free-sound-0608.wav,3.5265,0.0683574452996254
611 | train,noise/free-sound/noise-free-sound-0609.wav,10.161625,0.02855943888425827
612 | train,noise/free-sound/noise-free-sound-0610.wav,11.1804375,0.06024652719497681
613 | train,noise/free-sound/noise-free-sound-0611.wav,0.127,0.023479890078306198
614 | train,noise/free-sound/noise-free-sound-0612.wav,11.0498125,0.1207055076956749
615 | train,noise/free-sound/noise-free-sound-0613.wav,28.5518125,0.033192068338394165
616 | train,noise/free-sound/noise-free-sound-0614.wav,30.0,0.01605299487709999
617 | train,noise/free-sound/noise-free-sound-0615.wav,10.082875,0.08860311657190323
618 | train,noise/free-sound/noise-free-sound-0616.wav,22.6481875,0.006513164844363928
619 | train,noise/free-sound/noise-free-sound-0617.wav,8.2026875,0.016903722658753395
620 | train,noise/free-sound/noise-free-sound-0618.wav,3.0205625,0.059680454432964325
621 | train,noise/free-sound/noise-free-sound-0619.wav,0.5454375,0.005135993007570505
622 | train,noise/free-sound/noise-free-sound-0620.wav,11.5664375,0.03831050917506218
623 | train,noise/free-sound/noise-free-sound-0621.wav,6.724375,0.03734799474477768
624 | train,noise/free-sound/noise-free-sound-0622.wav,11.9901875,0.0628824383020401
625 | train,noise/free-sound/noise-free-sound-0623.wav,34.9518125,0.05226227641105652
626 | train,noise/free-sound/noise-free-sound-0624.wav,0.104,0.09036702662706375
627 | train,noise/free-sound/noise-free-sound-0625.wav,20.8195625,0.027273673564195633
628 | train,noise/free-sound/noise-free-sound-0626.wav,1.3235625,0.047352369874715805
629 | train,noise/free-sound/noise-free-sound-0627.wav,0.3135,0.14606566727161407
630 | train,noise/free-sound/noise-free-sound-0628.wav,29.0220625,0.03407597541809082
631 | train,noise/free-sound/noise-free-sound-0629.wav,3.5,0.3172118663787842
632 | train,noise/free-sound/noise-free-sound-0630.wav,3.1085625,0.08142469823360443
633 | train,noise/free-sound/noise-free-sound-0631.wav,5.6,0.09101958572864532
634 | train,noise/free-sound/noise-free-sound-0632.wav,2.568,0.16587309539318085
635 | train,noise/free-sound/noise-free-sound-0633.wav,18.272625,0.03823813423514366
636 | train,noise/free-sound/noise-free-sound-0634.wav,64.0,0.005076223518699408
637 | train,noise/free-sound/noise-free-sound-0635.wav,16.876,0.039830684661865234
638 | train,noise/free-sound/noise-free-sound-0636.wav,31.3991875,0.05804016813635826
639 | train,noise/free-sound/noise-free-sound-0637.wav,14.6841875,0.02190512977540493
640 | train,noise/free-sound/noise-free-sound-0638.wav,5.5926875,0.037356846034526825
641 | train,noise/free-sound/noise-free-sound-0639.wav,6.20375,0.03894766792654991
642 | train,noise/free-sound/noise-free-sound-0640.wav,32.7836875,0.056162264198064804
643 | train,noise/free-sound/noise-free-sound-0641.wav,35.1346875,0.03040275163948536
644 | train,noise/free-sound/noise-free-sound-0642.wav,4.4930625,0.03574768826365471
645 | train,noise/free-sound/noise-free-sound-0643.wav,2.78425,0.04011024534702301
646 | train,noise/free-sound/noise-free-sound-0644.wav,94.5981875,0.011430583894252777
647 | train,noise/free-sound/noise-free-sound-0645.wav,1.7240625,0.022321103140711784
648 | train,noise/free-sound/noise-free-sound-0646.wav,35.813,0.015494233928620815
649 | train,noise/free-sound/noise-free-sound-0647.wav,60.103375,0.027890006080269814
650 | train,noise/free-sound/noise-free-sound-0648.wav,12.288,0.020221997052431107
651 | train,noise/free-sound/noise-free-sound-0649.wav,50.4279375,0.042251117527484894
652 | train,noise/free-sound/noise-free-sound-0650.wav,16.236,0.10205979645252228
653 | train,noise/free-sound/noise-free-sound-0651.wav,65.2095,0.0796578973531723
654 | train,noise/free-sound/noise-free-sound-0652.wav,5.69925,0.014021473005414009
655 | train,noise/free-sound/noise-free-sound-0653.wav,29.125,0.05897703766822815
656 | train,noise/free-sound/noise-free-sound-0654.wav,5.9820625,0.1681751012802124
657 | train,noise/free-sound/noise-free-sound-0655.wav,21.34,0.01833922602236271
658 | train,noise/free-sound/noise-free-sound-0656.wav,4.884875,0.015281392261385918
659 | train,noise/free-sound/noise-free-sound-0657.wav,9.6670625,0.015602482482790947
660 | train,noise/free-sound/noise-free-sound-0658.wav,19.0,0.010228785686194897
661 | train,noise/free-sound/noise-free-sound-0659.wav,11.1241875,0.03744412958621979
662 | train,noise/free-sound/noise-free-sound-0660.wav,41.8373125,0.0351371206343174
663 | train,noise/free-sound/noise-free-sound-0661.wav,35.5150625,0.0055358815006911755
664 | train,noise/free-sound/noise-free-sound-0662.wav,1.410625,0.08759284019470215
665 | train,noise/free-sound/noise-free-sound-0663.wav,29.3544375,0.016564222052693367
666 | train,noise/free-sound/noise-free-sound-0664.wav,15.647375,0.06327983736991882
667 | train,noise/free-sound/noise-free-sound-0665.wav,17.2669375,0.03821848705410957
668 | train,noise/free-sound/noise-free-sound-0666.wav,13.685625,0.028673436492681503
669 | train,noise/free-sound/noise-free-sound-0667.wav,13.7598125,0.02481307089328766
670 | train,noise/free-sound/noise-free-sound-0668.wav,4.022875,0.07184793055057526
671 | train,noise/free-sound/noise-free-sound-0669.wav,4.022875,0.030827142298221588
672 | train,noise/free-sound/noise-free-sound-0670.wav,4.7600625,0.023740291595458984
673 | train,noise/free-sound/noise-free-sound-0671.wav,70.191,0.05368071794509888
674 | train,noise/free-sound/noise-free-sound-0672.wav,6.2955,0.00962742604315281
675 | train,noise/free-sound/noise-free-sound-0673.wav,3.8298125,0.045326001942157745
676 | train,noise/free-sound/noise-free-sound-0674.wav,31.131875,0.040758490562438965
677 | train,noise/free-sound/noise-free-sound-0675.wav,8.7690625,0.017590992152690887
678 | train,noise/free-sound/noise-free-sound-0676.wav,2.1630625,0.20533037185668945
679 | train,noise/free-sound/noise-free-sound-0677.wav,107.76,0.03978710621595383
680 | train,noise/free-sound/noise-free-sound-0678.wav,2.0636875,0.06593439728021622
681 | train,noise/free-sound/noise-free-sound-0679.wav,22.0,0.05452584847807884
682 | train,noise/free-sound/noise-free-sound-0680.wav,14.568,0.07073165476322174
683 | train,noise/free-sound/noise-free-sound-0681.wav,96.7053125,0.03480824455618858
684 | train,noise/free-sound/noise-free-sound-0682.wav,18.18125,0.026480460539460182
685 | train,noise/free-sound/noise-free-sound-0683.wav,27.616,0.03949678689241409
686 | train,noise/free-sound/noise-free-sound-0684.wav,139.00625,0.03529800847172737
687 | train,noise/free-sound/noise-free-sound-0685.wav,43.8785625,0.027088863775134087
688 | train,noise/free-sound/noise-free-sound-0686.wav,15.6038125,0.019841305911540985
689 | train,noise/free-sound/noise-free-sound-0687.wav,20.8020625,0.01232278160750866
690 | train,noise/free-sound/noise-free-sound-0688.wav,19.25225,0.06801490485668182
691 | train,noise/free-sound/noise-free-sound-0689.wav,1.05,0.05468634516000748
692 | train,noise/free-sound/noise-free-sound-0690.wav,5.2053125,0.036664824932813644
693 | train,noise/free-sound/noise-free-sound-0691.wav,10.0693125,0.009726029820740223
694 | train,noise/free-sound/noise-free-sound-0692.wav,0.927875,0.1667458862066269
695 | train,noise/free-sound/noise-free-sound-0693.wav,1.410625,0.13827519118785858
696 | train,noise/free-sound/noise-free-sound-0694.wav,19.8465,0.0698181539773941
697 | train,noise/free-sound/noise-free-sound-0695.wav,9.4238125,0.14697526395320892
698 | train,noise/free-sound/noise-free-sound-0696.wav,29.570625,0.016864970326423645
699 | train,noise/free-sound/noise-free-sound-0697.wav,6.112625,0.015389485284686089
700 | train,noise/free-sound/noise-free-sound-0698.wav,76.168875,0.040011338889598846
701 | train,noise/free-sound/noise-free-sound-0699.wav,119.5101875,0.01451085601001978
702 | train,noise/free-sound/noise-free-sound-0700.wav,41.04,0.027945347130298615
703 | train,noise/free-sound/noise-free-sound-0701.wav,6.057625,0.11962121725082397
704 | train,noise/free-sound/noise-free-sound-0702.wav,21.81225,0.01318186242133379
705 | train,noise/free-sound/noise-free-sound-0703.wav,8.1930625,0.03435487672686577
706 | train,noise/free-sound/noise-free-sound-0704.wav,20.16,0.07279205322265625
707 | train,noise/free-sound/noise-free-sound-0705.wav,11.3306875,0.16383984684944153
708 | train,noise/free-sound/noise-free-sound-0706.wav,28.290625,0.011244518682360649
709 | train,noise/free-sound/noise-free-sound-0707.wav,40.0718125,0.06048770621418953
710 | train,noise/free-sound/noise-free-sound-0708.wav,63.0595625,0.028981540352106094
711 | train,noise/free-sound/noise-free-sound-0709.wav,31.85775,0.032681904733181
712 | train,noise/free-sound/noise-free-sound-0710.wav,45.7404375,0.06862396746873856
713 | train,noise/free-sound/noise-free-sound-0711.wav,0.3926875,0.031020980328321457
714 | train,noise/free-sound/noise-free-sound-0712.wav,2.847375,0.09257514029741287
715 | train,noise/free-sound/noise-free-sound-0713.wav,1.6979375,0.08049245923757553
716 | train,noise/free-sound/noise-free-sound-0714.wav,1.12325,0.03631926700472832
717 | train,noise/free-sound/noise-free-sound-0715.wav,60.0555,0.06517808884382248
718 | train,noise/free-sound/noise-free-sound-0716.wav,0.624,0.03566426411271095
719 | train,noise/free-sound/noise-free-sound-0717.wav,5.9820625,0.016297001391649246
720 | train,noise/free-sound/noise-free-sound-0718.wav,13.035125,0.08569561690092087
721 | train,noise/free-sound/noise-free-sound-0719.wav,7.0008125,0.012838777154684067
722 | train,noise/free-sound/noise-free-sound-0720.wav,10.7363125,0.0775250568985939
723 | train,noise/free-sound/noise-free-sound-0721.wav,9.03,0.09511198103427887
724 | train,noise/free-sound/noise-free-sound-0722.wav,7.77,0.0734415054321289
725 | train,noise/free-sound/noise-free-sound-0723.wav,89.721875,0.019564535468816757
726 | train,noise/free-sound/noise-free-sound-0724.wav,0.9109375,0.12772886455059052
727 | train,noise/free-sound/noise-free-sound-0725.wav,33.2295625,0.05877486243844032
728 | train,noise/free-sound/noise-free-sound-0726.wav,120.0138125,0.010473191738128662
729 | train,noise/free-sound/noise-free-sound-0727.wav,29.022,0.023159675300121307
730 | train,noise/free-sound/noise-free-sound-0728.wav,123.84,0.025120077654719353
731 | train,noise/free-sound/noise-free-sound-0729.wav,187.14125,0.031593095511198044
732 | train,noise/free-sound/noise-free-sound-0730.wav,1.489,0.07127058506011963
733 | train,noise/free-sound/noise-free-sound-0731.wav,1.43675,0.07526694238185883
734 | train,noise/free-sound/noise-free-sound-0732.wav,31.73875,0.026971112936735153
735 | train,noise/free-sound/noise-free-sound-0733.wav,31.94775,0.026688866317272186
736 | train,noise/free-sound/noise-free-sound-0734.wav,21.1591875,0.01646844856441021
737 | train,noise/free-sound/noise-free-sound-0735.wav,21.81225,0.0156633909791708
738 | train,noise/free-sound/noise-free-sound-0736.wav,0.9194375,0.12268686294555664
739 | train,noise/free-sound/noise-free-sound-0737.wav,4.8469375,0.07719245553016663
740 | train,noise/free-sound/noise-free-sound-0738.wav,27.89875,0.09538252651691437
741 | train,noise/free-sound/noise-free-sound-0739.wav,19.224,0.04355339705944061
742 | train,noise/free-sound/noise-free-sound-0740.wav,64.0714375,0.14594630897045135
743 | train,noise/free-sound/noise-free-sound-0741.wav,18.696,0.07057242840528488
744 | train,noise/free-sound/noise-free-sound-0742.wav,22.5436875,0.03319770097732544
745 | train,noise/free-sound/noise-free-sound-0743.wav,43.9640625,0.059158992022275925
746 | train,noise/free-sound/noise-free-sound-0744.wav,1.044875,0.11951413005590439
747 | train,noise/free-sound/noise-free-sound-0745.wav,4.7333125,0.057827286422252655
748 | train,noise/free-sound/noise-free-sound-0746.wav,0.227,0.011438189074397087
749 | train,noise/free-sound/noise-free-sound-0747.wav,17.4236875,0.023010853677988052
750 | train,noise/free-sound/noise-free-sound-0748.wav,5.64,0.0689081996679306
751 | train,noise/free-sound/noise-free-sound-0749.wav,1.0,0.06701701134443283
752 | train,noise/free-sound/noise-free-sound-0750.wav,5.4595625,0.028374891728162766
753 | train,noise/free-sound/noise-free-sound-0751.wav,1.446375,0.2519388496875763
754 | train,noise/free-sound/noise-free-sound-0752.wav,33.6083125,0.030209451913833618
755 | train,noise/free-sound/noise-free-sound-0753.wav,24.696,0.12444719672203064
756 | train,noise/free-sound/noise-free-sound-0754.wav,80.72325,0.04299146309494972
757 | train,noise/free-sound/noise-free-sound-0755.wav,14.4195625,0.04165901988744736
758 | train,noise/free-sound/noise-free-sound-0756.wav,4.15875,0.030746938660740852
759 | train,noise/free-sound/noise-free-sound-0757.wav,13.7665,0.10464916378259659
760 | train,noise/free-sound/noise-free-sound-0758.wav,12.644375,0.05036643147468567
761 | train,noise/free-sound/noise-free-sound-0759.wav,2.731375,0.06787645071744919
762 | train,noise/free-sound/noise-free-sound-0760.wav,0.6233125,0.09675282984972
763 | train,noise/free-sound/noise-free-sound-0761.wav,32.3990625,0.03686794266104698
764 | train,noise/free-sound/noise-free-sound-0762.wav,12.8,0.20819808542728424
765 | train,noise/free-sound/noise-free-sound-0763.wav,20.491625,0.10151830315589905
766 | train,noise/free-sound/noise-free-sound-0764.wav,0.0645625,0.0005920132389292121
767 | train,noise/free-sound/noise-free-sound-0765.wav,0.20275,0.0034669337328523397
768 | train,noise/free-sound/noise-free-sound-0766.wav,0.026125,0.0
769 | train,noise/free-sound/noise-free-sound-0767.wav,29.6385625,0.05864330753684044
770 | train,noise/free-sound/noise-free-sound-0768.wav,0.4229375,0.06983917951583862
771 | train,noise/free-sound/noise-free-sound-0769.wav,25.761625,0.05337362736463547
772 | train,noise/free-sound/noise-free-sound-0770.wav,55.5624375,0.02889862284064293
773 | train,noise/free-sound/noise-free-sound-0771.wav,3.1579375,0.1223643571138382
774 | train,noise/free-sound/noise-free-sound-0772.wav,4.6535625,0.02311071753501892
775 | train,noise/free-sound/noise-free-sound-0773.wav,9.045375,0.034549664705991745
776 | train,noise/free-sound/noise-free-sound-0774.wav,3.1471875,0.19626016914844513
777 | train,noise/free-sound/noise-free-sound-0775.wav,29.5101875,0.038521576672792435
778 | train,noise/free-sound/noise-free-sound-0776.wav,15.661875,0.06775304675102234
779 | train,noise/free-sound/noise-free-sound-0777.wav,6.9224375,0.09624522179365158
780 | train,noise/free-sound/noise-free-sound-0778.wav,18.2613125,0.025550562888383865
781 | train,noise/free-sound/noise-free-sound-0779.wav,12.512625,0.044095106422901154
782 | train,noise/free-sound/noise-free-sound-0780.wav,30.2759375,0.046052154153585434
783 | train,noise/free-sound/noise-free-sound-0781.wav,60.7200625,0.0475756973028183
784 | train,noise/free-sound/noise-free-sound-0782.wav,9.8986875,0.030712144449353218
785 | train,noise/free-sound/noise-free-sound-0783.wav,8.614625,0.14119373261928558
786 | train,noise/free-sound/noise-free-sound-0784.wav,35.8040625,0.008375727571547031
787 | train,noise/free-sound/noise-free-sound-0785.wav,26.919875,0.00985590647906065
788 | train,noise/free-sound/noise-free-sound-0786.wav,19.8,0.09170135855674744
789 | train,noise/free-sound/noise-free-sound-0787.wav,34.9853125,0.05825154483318329
790 | train,noise/free-sound/noise-free-sound-0788.wav,34.8718125,0.0597430057823658
791 | train,noise/free-sound/noise-free-sound-0789.wav,8.8515,0.17519313097000122
792 | train,noise/free-sound/noise-free-sound-0790.wav,16.138125,0.03609300032258034
793 | train,noise/free-sound/noise-free-sound-0791.wav,9.0,0.021120132878422737
794 | train,noise/free-sound/noise-free-sound-0792.wav,41.5869375,0.05190187692642212
795 | train,noise/free-sound/noise-free-sound-0793.wav,9.4301875,0.0664939358830452
796 | train,noise/free-sound/noise-free-sound-0794.wav,4.434125,0.046769555658102036
797 | train,noise/free-sound/noise-free-sound-0795.wav,45.814125,0.031657714396715164
798 | train,noise/free-sound/noise-free-sound-0796.wav,10.24,0.09117131680250168
799 | train,noise/free-sound/noise-free-sound-0797.wav,16.384,0.1648857593536377
800 | train,noise/free-sound/noise-free-sound-0798.wav,12.9706875,0.18368032574653625
801 | train,noise/free-sound/noise-free-sound-0799.wav,5.6319375,0.03792005777359009
802 | train,noise/free-sound/noise-free-sound-0800.wav,9.6,0.018907127901911736
803 | train,noise/free-sound/noise-free-sound-0801.wav,6.01075,0.0029609836637973785
804 | train,noise/free-sound/noise-free-sound-0802.wav,5.1245,0.0005925616133026779
805 | train,noise/free-sound/noise-free-sound-0803.wav,14.02775,0.09098536521196365
806 | train,noise/free-sound/noise-free-sound-0804.wav,59.6985,0.006640758831053972
807 | train,noise/free-sound/noise-free-sound-0805.wav,15.16,0.05793767049908638
808 | train,noise/free-sound/noise-free-sound-0806.wav,11.78125,0.028702983632683754
809 | train,noise/free-sound/noise-free-sound-0807.wav,4.10125,0.03980659693479538
810 | train,noise/free-sound/noise-free-sound-0808.wav,1.6979375,0.1011076346039772
811 | train,noise/free-sound/noise-free-sound-0809.wav,25.1298125,0.09053251892328262
812 | train,noise/free-sound/noise-free-sound-0810.wav,4.008375,0.26876741647720337
813 | train,noise/free-sound/noise-free-sound-0811.wav,7.9760625,0.28619760274887085
814 | train,noise/free-sound/noise-free-sound-0812.wav,12.198,0.06944070011377335
815 | train,noise/free-sound/noise-free-sound-0813.wav,78.8018125,0.18343234062194824
816 | train,noise/free-sound/noise-free-sound-0814.wav,15.661625,0.09769771993160248
817 | train,noise/free-sound/noise-free-sound-0815.wav,2.4718125,0.12637928128242493
818 | train,noise/free-sound/noise-free-sound-0816.wav,1.324625,0.08751863986253738
819 | train,noise/free-sound/noise-free-sound-0817.wav,1.6123125,0.10826314985752106
820 | train,noise/free-sound/noise-free-sound-0818.wav,57.678375,0.015582166612148285
821 | train,noise/free-sound/noise-free-sound-0819.wav,109.9043125,0.013354653492569923
822 | train,noise/free-sound/noise-free-sound-0820.wav,18.450875,0.02505454793572426
823 | train,noise/free-sound/noise-free-sound-0821.wav,3.3436875,0.06855987757444382
824 | train,noise/free-sound/noise-free-sound-0822.wav,4.989375,0.1806216686964035
825 | train,noise/free-sound/noise-free-sound-0823.wav,2.749125,0.05665133148431778
826 | train,noise/free-sound/noise-free-sound-0824.wav,33.12325,0.04190948233008385
827 | train,noise/free-sound/noise-free-sound-0825.wav,2.6735625,0.07563027739524841
828 | train,noise/free-sound/noise-free-sound-0826.wav,11.18175,0.07534576207399368
829 | train,noise/free-sound/noise-free-sound-0827.wav,0.548,0.05151750519871712
830 | train,noise/free-sound/noise-free-sound-0828.wav,60.081625,0.011978531256318092
831 | train,noise/free-sound/noise-free-sound-0829.wav,6.7504375,0.06023939326405525
832 | train,noise/free-sound/noise-free-sound-0830.wav,0.9581875,0.06875304877758026
833 | train,noise/free-sound/noise-free-sound-0831.wav,11.441625,0.03155514597892761
834 | train,noise/free-sound/noise-free-sound-0832.wav,14.9420625,0.027190793305635452
835 | train,noise/free-sound/noise-free-sound-0833.wav,4.97375,0.07724756002426147
836 | train,noise/free-sound/noise-free-sound-0834.wav,17.0666875,0.0489436499774456
837 | train,noise/free-sound/noise-free-sound-0835.wav,5.0515625,0.13070523738861084
838 | train,noise/free-sound/noise-free-sound-0836.wav,64.896,0.019817186519503593
839 | train,noise/free-sound/noise-free-sound-0837.wav,13.6533125,0.010230294428765774
840 | train,noise/free-sound/noise-free-sound-0838.wav,1.0029375,0.0714196264743805
841 | train,noise/free-sound/noise-free-sound-0839.wav,2.1420625,0.1818079650402069
842 | train,noise/free-sound/noise-free-sound-0840.wav,70.191,0.05368071049451828
843 | train,noise/free-sound/noise-free-sound-0841.wav,90.0,0.015552169643342495
844 | train,noise/free-sound/noise-free-sound-0842.wav,300.0,0.0035129054449498653
845 | test,noise/sound-bible/noise-sound-bible-0000.wav,6.7499375,0.1441386342048645
846 | test,noise/sound-bible/noise-sound-bible-0001.wav,2.063,0.25420311093330383
847 | test,noise/sound-bible/noise-sound-bible-0002.wav,1.044,0.019799862056970596
848 | test,noise/sound-bible/noise-sound-bible-0003.wav,6.55675,0.06503631919622421
849 | test,noise/sound-bible/noise-sound-bible-0004.wav,11.049,0.05913013219833374
850 | test,noise/sound-bible/noise-sound-bible-0005.wav,12.408,0.042438969016075134
851 | test,noise/sound-bible/noise-sound-bible-0006.wav,0.768,0.04820356145501137
852 | test,noise/sound-bible/noise-sound-bible-0007.wav,21.06,0.1836613565683365
853 | test,noise/sound-bible/noise-sound-bible-0008.wav,9.247,0.19213145971298218
854 | test,noise/sound-bible/noise-sound-bible-0009.wav,2.115,0.0920131504535675
855 | test,noise/sound-bible/noise-sound-bible-0010.wav,7.497,0.020488228648900986
856 | test,noise/sound-bible/noise-sound-bible-0011.wav,5.093,0.06959190219640732
857 | test,noise/sound-bible/noise-sound-bible-0012.wav,3.134,0.14128883183002472
858 | test,noise/sound-bible/noise-sound-bible-0013.wav,5.12,0.08152676373720169
859 | test,noise/sound-bible/noise-sound-bible-0014.wav,27.506,0.05008871480822563
860 | test,noise/sound-bible/noise-sound-bible-0015.wav,22.23,0.05889064073562622
861 | test,noise/sound-bible/noise-sound-bible-0016.wav,7.026,0.040081411600112915
862 | test,noise/sound-bible/noise-sound-bible-0017.wav,3.2391875,0.0472022108733654
863 | test,noise/sound-bible/noise-sound-bible-0018.wav,3.683,0.07156568765640259
864 | test,noise/sound-bible/noise-sound-bible-0019.wav,6.922,0.0659433975815773
865 | test,noise/sound-bible/noise-sound-bible-0020.wav,4.336,0.08530513942241669
866 | test,noise/sound-bible/noise-sound-bible-0021.wav,12.721,0.03518088907003403
867 | test,noise/sound-bible/noise-sound-bible-0022.wav,4.153,0.11733922362327576
868 | test,noise/sound-bible/noise-sound-bible-0023.wav,2.56,0.036052703857421875
869 | test,noise/sound-bible/noise-sound-bible-0024.wav,2.455,0.07451076060533524
870 | test,noise/sound-bible/noise-sound-bible-0025.wav,5.537,0.1307377815246582
871 | test,noise/sound-bible/noise-sound-bible-0026.wav,0.653,0.16861580312252045
872 | test,noise/sound-bible/noise-sound-bible-0027.wav,3.004,0.09237220138311386
873 | test,noise/sound-bible/noise-sound-bible-0028.wav,12.617,0.09574437886476517
874 | test,noise/sound-bible/noise-sound-bible-0029.wav,3.0040625,0.013525698333978653
875 | test,noise/sound-bible/noise-sound-bible-0030.wav,14.889,0.03332362323999405
876 | test,noise/sound-bible/noise-sound-bible-0031.wav,9.769,0.035738807171583176
877 | test,noise/sound-bible/noise-sound-bible-0032.wav,25.152,0.010232904925942421
878 | test,noise/sound-bible/noise-sound-bible-0033.wav,6.687,0.02844083495438099
879 | test,noise/sound-bible/noise-sound-bible-0034.wav,20.21875,0.1358114778995514
880 | test,noise/sound-bible/noise-sound-bible-0035.wav,6.5045,0.04590274393558502
881 | test,noise/sound-bible/noise-sound-bible-0036.wav,20.035,0.10746238380670547
882 | test,noise/sound-bible/noise-sound-bible-0037.wav,1.044,0.03590782731771469
883 | test,noise/sound-bible/noise-sound-bible-0038.wav,16.248,0.1472569853067398
884 | test,noise/sound-bible/noise-sound-bible-0039.wav,4.048,0.03187888488173485
885 | test,noise/sound-bible/noise-sound-bible-0040.wav,9.142,0.026945974677801132
886 | test,noise/sound-bible/noise-sound-bible-0041.wav,2.638,0.05780833214521408
887 | test,noise/sound-bible/noise-sound-bible-0042.wav,6.269,0.14264912903308868
888 | test,noise/sound-bible/noise-sound-bible-0043.wav,23.928,0.050584856420755386
889 | test,noise/sound-bible/noise-sound-bible-0044.wav,30.0408125,0.002178351627662778
890 | test,noise/sound-bible/noise-sound-bible-0045.wav,61.413875,0.08873486518859863
891 | test,noise/sound-bible/noise-sound-bible-0046.wav,0.8098125,0.15509864687919617
892 | test,noise/sound-bible/noise-sound-bible-0047.wav,6.164,0.09232277423143387
893 | test,noise/sound-bible/noise-sound-bible-0048.wav,62.641625,0.011549966409802437
894 | test,noise/sound-bible/noise-sound-bible-0049.wav,49.3453125,0.07298757880926132
895 | test,noise/sound-bible/noise-sound-bible-0050.wav,1.056,0.042787592858076096
896 | test,noise/sound-bible/noise-sound-bible-0051.wav,2.063,0.01880437508225441
897 | test,noise/sound-bible/noise-sound-bible-0052.wav,6.765,0.2076394259929657
898 | test,noise/sound-bible/noise-sound-bible-0053.wav,12.878,0.05402258038520813
899 | test,noise/sound-bible/noise-sound-bible-0054.wav,37.172,0.31475576758384705
900 | test,noise/sound-bible/noise-sound-bible-0055.wav,18.102,0.07953961193561554
901 | test,noise/sound-bible/noise-sound-bible-0056.wav,7.888,0.274869441986084
902 | test,noise/sound-bible/noise-sound-bible-0057.wav,4.1795625,0.0715896338224411
903 | test,noise/sound-bible/noise-sound-bible-0058.wav,1.332,0.018020672723650932
904 | test,noise/sound-bible/noise-sound-bible-0059.wav,49.528,0.051448073238134384
905 | test,noise/sound-bible/noise-sound-bible-0060.wav,45.139,0.03268921747803688
906 | test,noise/sound-bible/noise-sound-bible-0061.wav,19.464,0.00734285730868578
907 | test,noise/sound-bible/noise-sound-bible-0062.wav,15.908,0.06429439783096313
908 | test,noise/sound-bible/noise-sound-bible-0063.wav,14.3935,0.03714064508676529
909 | test,noise/sound-bible/noise-sound-bible-0064.wav,2.847,0.10039804875850677
910 | test,noise/sound-bible/noise-sound-bible-0065.wav,7.235,0.10588019341230392
911 | test,noise/sound-bible/noise-sound-bible-0066.wav,2.716,0.05088731274008751
912 | test,noise/sound-bible/noise-sound-bible-0067.wav,27.7420625,0.08898525685071945
913 | test,noise/sound-bible/noise-sound-bible-0068.wav,5.784,0.03145468607544899
914 | test,noise/sound-bible/noise-sound-bible-0069.wav,1.175,0.10087023675441742
915 | test,noise/sound-bible/noise-sound-bible-0070.wav,2.063,0.21097871661186218
916 | test,noise/sound-bible/noise-sound-bible-0071.wav,4.049,0.08047668635845184
917 | test,noise/sound-bible/noise-sound-bible-0072.wav,4.832,0.06273679435253143
918 | test,noise/sound-bible/noise-sound-bible-0073.wav,7.896,0.08007873594760895
919 | test,noise/sound-bible/noise-sound-bible-0074.wav,2.063,0.018543394282460213
920 | test,noise/sound-bible/noise-sound-bible-0075.wav,32.679,0.06352479755878448
921 | test,noise/sound-bible/noise-sound-bible-0076.wav,57.704,0.09347757697105408
922 | test,noise/sound-bible/noise-sound-bible-0077.wav,5.146,0.19708338379859924
923 | test,noise/sound-bible/noise-sound-bible-0078.wav,23.4056875,0.09098900854587555
924 | test,noise/sound-bible/noise-sound-bible-0079.wav,21.394,0.08347520977258682
925 | test,noise/sound-bible/noise-sound-bible-0080.wav,4.702,0.08633428066968918
926 | test,noise/sound-bible/noise-sound-bible-0081.wav,16.848,0.07527593523263931
927 | test,noise/sound-bible/noise-sound-bible-0082.wav,3.604,0.06656266748905182
928 | test,noise/sound-bible/noise-sound-bible-0083.wav,58.61875,0.10153720527887344
929 | test,noise/sound-bible/noise-sound-bible-0084.wav,8.045,0.0457281619310379
930 | test,noise/sound-bible/noise-sound-bible-0085.wav,2.063,0.12232034653425217
931 | test,noise/sound-bible/noise-sound-bible-0086.wav,0.552,0.0682542473077774
932 |
--------------------------------------------------------------------------------
/code/exp_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pathlib
4 | from collections import namedtuple
5 | from typing import List, Optional, Sequence, Tuple, Union, Callable
6 |
7 | import json
8 | import librosa
9 | import numpy as np
10 | import pandas as pd
11 | import socket
12 | import soundfile as sf
13 | import torch
14 | from asteroid.losses.sdr import singlesrc_neg_sisdr
15 | from asteroid.losses.sdr import singlesrc_neg_snr
16 | from numpy.random import Generator
17 | from scipy.signal import convolve
18 |
19 | from exp_utils import ExperimentError
20 |
21 | ROOT_DIR = os.path.dirname(os.path.realpath(__file__))
22 |
23 | example_duration: float = 4
24 | sample_rate: int = 16000
25 | example_length: int = int(sample_rate * example_duration)
26 |
27 | _eps: float = 1e-8
28 | _rng = np.random.default_rng(0)
29 |
30 | Batch = namedtuple(
31 | 'Batch', ('inputs','targets','pre_snrs','post_snrs'))
32 | ContrastiveBatch = namedtuple(
33 | 'ContrastiveBatch', ('inputs_1','targets_1','inputs_2','targets_2',
34 | 'labels','pre_snrs','post_snrs'))
35 |
36 |
37 | def _make_2d(x: torch.Tensor):
38 | """Normalize shape of `x` to two dimensions: [batch, time]."""
39 | if isinstance(x, np.ndarray):
40 | x = torch.from_numpy(x)
41 | if x.ndim == 1:
42 | return x.reshape(1, -1)
43 | elif x.ndim == 3:
44 | return x.squeeze(1)
45 | else:
46 | if x.ndim != 2: raise ValueError('Could not force 2d.')
47 | return x
48 |
49 |
50 | def _make_3d(x: torch.Tensor):
51 | """Normalize shape of `x` to three dimensions: [batch, n_chan, time]."""
52 | if isinstance(x, np.ndarray):
53 | x = torch.from_numpy(x)
54 | if x.ndim == 1:
55 | return x.reshape(1, 1, -1)
56 | elif x.ndim == 2:
57 | return x.unsqueeze(1)
58 | else:
59 | if x.ndim != 3: raise ValueError('Could not force 3d.')
60 | return x
61 |
62 |
63 | def mix_signals(
64 | source: np.ndarray,
65 | noise: np.ndarray,
66 | snr_db: Union[float, np.ndarray]
67 | ) -> np.ndarray:
68 | """Function to mix signals.
69 |
70 | Args:
71 | source (np.ndarray): source signal
72 | noise (np.ndarray): noise signal
73 | snr_db (float): desired mixture SNR in decibels (scales noise)
74 |
75 | Returns:
76 | mixture (np.ndarray): mixture signal
77 | """
78 | energy_s = np.sum(source ** 2, axis=-1, keepdims=True)
79 | energy_n = np.sum(noise ** 2, axis=-1, keepdims=True)
80 | b = np.sqrt((energy_s / energy_n) * (10 ** (-snr_db / 10.)))
81 | return source + b * noise
82 |
83 |
84 | def sparsity_index(
85 | signal: np.ndarray
86 | ) -> float:
87 | """Defines a sparsity value for a given signal, by computing the
88 | standard deviation of the segmental root-mean-square (RMS).
89 | """
90 | return float(np.std(librosa.feature.rms(y=signal).reshape(-1)))
91 |
92 |
93 | def wav_read(
94 | filepath: Union[str, os.PathLike]
95 | ) -> Tuple[np.ndarray, float]:
96 | """Reads mono audio from WAV.
97 | """
98 | y, sr = sf.read(filepath, dtype='float32', always_2d=True)
99 | if sr != sample_rate:
100 | raise IOError(f'Expected sample_rate={sample_rate}, got {sr}.')
101 | # always pick up the first channel
102 | y = np.array(y[..., 0])
103 | return y, float(len(y) / sample_rate)
104 |
105 |
106 | def wav_write(
107 | filepath: Union[str, os.PathLike],
108 | array: np.ndarray
109 | ):
110 | sf.write(filepath, array, samplerate=sample_rate)
111 | return
112 |
113 |
114 | def wav_read_multiple(
115 | filepaths: Sequence[Union[str, os.PathLike]],
116 | concatenate: bool = False,
117 | randomly_offset: bool = True,
118 | seed: Optional[int] = None
119 | ) -> np.ndarray:
120 | """Loads multiple audio signals from file; may be batched or concatenated.
121 | """
122 | rng = np.random.default_rng(seed)
123 | signals = []
124 | collate_fn: Callable = np.concatenate if concatenate else np.stack
125 | for filepath in filepaths:
126 | s, duration = wav_read(filepath)
127 | if not concatenate:
128 | # pad shorter signals up to expected length
129 | if len(s) < example_length:
130 | lengths = [(0, 0)] * s.ndim
131 | lengths[-1] = (0, example_length - len(s))
132 | s = np.pad(s, lengths, mode='constant')
133 |
134 | # randomly offset longer signals if desired
135 | offset: int = 0
136 | remainder: int = len(s) - example_length
137 | if randomly_offset and remainder > 0:
138 | offset = rng.integers(0, remainder)
139 |
140 | # trim exactly to the expected length
141 | s = s[offset:offset + example_length]
142 | signals.append(s)
143 | return collate_fn(signals, axis=0)
144 |
145 |
146 | def wav_sample(
147 | data: np.ndarray,
148 | num_clips: int,
149 | seed: Optional[int] = None
150 | ) -> np.ndarray:
151 | rng = np.random.default_rng(seed)
152 | start_indices = rng.integers(0, len(data) - example_length - 1, num_clips)
153 | signals = [data[i:i+example_length] for i in start_indices]
154 | return np.stack(signals, axis=0)
155 |
156 | def sisdr(
157 | estimate: torch.Tensor,
158 | target: torch.Tensor,
159 | reduction: Optional[str] = None
160 | ) -> torch.Tensor:
161 | """Calculate single source SI-SDR."""
162 | return sdr(estimate, target, reduction=reduction, scale_invariant=True)
163 |
164 |
165 | def sisdr_improvement(
166 | estimate: torch.Tensor,
167 | target: torch.Tensor,
168 | mixture: torch.Tensor,
169 | reduction: Optional[str] = None
170 | ) -> torch.Tensor:
171 | """Calculate estimate to target SI-SDR improvement relative to mixture.
172 | """
173 | return sdr_improvement(
174 | estimate, target, mixture, reduction=reduction, scale_invariant=True)
175 |
176 |
177 | def sdr(
178 | estimate: torch.Tensor,
179 | target: torch.Tensor,
180 | scale_invariant: bool = False,
181 | reduction: Optional[str] = None
182 | ) -> torch.Tensor:
183 | """Calculate single source SDR."""
184 | ml = min(estimate.shape[-1], target.shape[-1])
185 | estimate = _make_2d(estimate)[..., :ml]
186 | target = _make_2d(target)[..., :ml]
187 | if scale_invariant:
188 | output = -1 * singlesrc_neg_sisdr(estimate, target)
189 | else:
190 | output = -1 * singlesrc_neg_snr(estimate, target)
191 | if reduction == 'mean':
192 | output = torch.mean(output)
193 | return output
194 |
195 |
196 | def sdr_improvement(
197 | estimate: torch.Tensor,
198 | target: torch.Tensor,
199 | mixture: torch.Tensor,
200 | reduction: Optional[str] = None,
201 | scale_invariant: bool = False
202 | ) -> torch.Tensor:
203 | """Calculate estimate to target SDR improvement relative to mixture.
204 | """
205 | output = (
206 | sdr(estimate, target, scale_invariant=scale_invariant)
207 | - sdr(mixture, target, scale_invariant=scale_invariant)
208 | )
209 | if reduction == 'mean':
210 | output = torch.mean(output)
211 | return output
212 |
213 |
214 | def dataframe_librispeech(
215 | dataset_directory: Optional[Union[str, os.PathLike]] = None,
216 | omit_clipped: bool = False
217 | ) -> pd.DataFrame:
218 | """Creates a Pandas DataFrame with files from the LibriSpeech corpus.
219 | Root directory should mimic archive-extracted folder structure.
220 | Dataset may be downloaded at ``_.
221 | """
222 | columns = [
223 | 'subset_id',
224 | 'speaker_id',
225 | 'chapter_id',
226 | 'utterance_id',
227 | 'filepath',
228 | 'duration',
229 | 'sparsity'
230 | ]
231 | if dataset_directory is None:
232 | return pd.DataFrame(columns=columns)
233 | dataset_directory = pathlib.Path(dataset_directory)
234 | dataset_dataframe = pathlib.Path(ROOT_DIR).joinpath('datasets', 'librispeech.csv')
235 | if not dataset_directory.exists():
236 | raise ValueError(f'{dataset_directory} does not exist.')
237 | valid_subsets = [
238 | 'train-clean-100',
239 | 'train-clean-360',
240 | 'dev-clean',
241 | 'test-clean'
242 | ]
243 | if not dataset_dataframe.exists():
244 | raise ValueError(f'{dataset_dataframe} does not exist')
245 | rows = []
246 | for filepath in dataset_directory.rglob('*.wav'):
247 | try:
248 | subset_id = [_ for _ in valid_subsets if _ in str(filepath)][0]
249 | speaker_id, chapter_id, utterance_id = filepath.stem.split('-')
250 | except ValueError:
251 | continue
252 | y, duration = wav_read(filepath)
253 | sparsity = sparsity_index(y)
254 | rows.append((subset_id, speaker_id, chapter_id,
255 | utterance_id, str(filepath), duration, sparsity))
256 | if not len(rows):
257 | raise ValueError(f'Could not find any .WAV files within '
258 | f'{dataset_directory}.')
259 | df = pd.DataFrame(rows, columns=columns)
260 | df.to_csv(dataset_dataframe,
261 | header=columns,
262 | index=False,
263 | index_label=False)
264 | else:
265 | df = pd.read_csv(dataset_dataframe)
266 |
267 | dataset_directory = str(dataset_directory)
268 | df = df.sort_values('filepath', ascending=True).reset_index(drop=True)
269 | df['filepath'] = df['filepath'].apply(lambda f: os.path.join(dataset_directory, f))
270 |
271 | if omit_clipped:
272 | # discard recordings from speakers who possess clipped recordings
273 | # (manually found using SoX, where 'volume adjustment' == 1.000)
274 | clipped_speakers = [
275 | '101', '1069', '1175', '118', '1290', '1379', '1456', '1552',
276 | '1578', '1629', '1754', '1933', '1943', '1963', '198', '204',
277 | '2094', '2113', '2149', '22', '2269', '2618', '2751', '307',
278 | '3168', '323', '3294', '3374', '345', '3486', '3490', '3615',
279 | '3738', '380', '4148', '446', '459', '4734', '481', '5002',
280 | '5012', '5333', '549', '5561', '5588', '559', '5678', '5740',
281 | '576', '593', '6295', '6673', '7139', '716', '7434', '7800',
282 | '781', '8329', '8347', '882'
283 | ]
284 | df = df[~df['speaker_id'].isin(clipped_speakers)]
285 |
286 | # omit recordings which are smaller than an example
287 | df = df.query('duration >= @example_duration')
288 |
289 | # organize by split
290 | def assign_split_per_speaker(
291 | sgroup,
292 | duration_s_test: int = 30,
293 | duration_s_validation: int = 30,
294 | duration_s_train: int = 60,
295 | duration_s_prevalidation: int = 30,
296 | ):
297 | # designate partition indices based on the nearest cumulative duration
298 | sp_id = set(sgroup['speaker_id']).pop()
299 | cs = sgroup['duration'].cumsum()
300 | offset = min(sgroup.index)
301 | _d = duration_s_test
302 | split_te = (cs - _d).abs().idxmin() - offset
303 | _d += duration_s_validation
304 | split_vl = (cs - _d).abs().idxmin() - offset
305 | if split_vl == split_te: split_vl += 1
306 | _d += duration_s_train
307 | split_tr = (cs - _d).abs().idxmin() - offset
308 | if split_tr == split_vl: split_tr += 1
309 | _d += duration_s_prevalidation
310 | split_pvl = (cs - _d).abs().idxmin() - offset
311 | if split_pvl == split_tr: split_pvl += 1
312 |
313 | assert (split_te != split_vl), (sp_id, split_te, split_vl)
314 | assert (split_vl != split_tr), (sp_id, split_vl, split_tr)
315 | assert (split_tr != split_pvl), (sp_id, split_tr, split_pvl)
316 |
317 | sgroup = sgroup.reset_index(drop=True)
318 |
319 | # assign split
320 | for i in range(0, split_te):
321 | sgroup.at[i, 'split'] = 'test'
322 | for i in range(split_te, split_vl):
323 | sgroup.at[i, 'split'] = 'val'
324 | for i in range(split_vl, split_tr):
325 | sgroup.at[i, 'split'] = 'train'
326 | for i in range(split_tr, split_pvl):
327 | sgroup.at[i, 'split'] = 'preval'
328 |
329 | # return the modified speaker group
330 | return sgroup
331 |
332 | df = df.assign(split='pretrain').sort_values(['speaker_id', 'duration'])
333 | g = df.reset_index(drop=True).groupby('speaker_id')
334 | df = g.apply(assign_split_per_speaker)
335 |
336 | # shuffle the recordings
337 | df = df.sample(frac=1, random_state=0)
338 |
339 | # organize by subset and split
340 | df['subset_id'] = pd.Categorical(df['subset_id'], valid_subsets)
341 | df['split'] = pd.Categorical(df['split'], ['pretrain', 'preval', 'train',
342 | 'val', 'test'])
343 | df = df.sort_values(['subset_id', 'split'])
344 |
345 | # ensure that all the audio files exist
346 | if not all([f for f in df.filepath if os.path.isfile(f)]):
347 | raise ValueError(f'Audio files missing, check {dataset_directory}.')
348 |
349 | # reindex and name the dataframe
350 | df = df[['filepath', 'subset_id', 'speaker_id',
351 | 'split', 'duration', 'sparsity']]
352 | df = df.reset_index(drop=True)
353 | df.index.name = 'LIBRISPEECH'
354 | return df
355 |
356 |
357 | def dataframe_demand(
358 | dataset_directory: Optional[Union[str, os.PathLike]] = None
359 | ) -> pd.DataFrame:
360 | """Creates a Pandas DataFrame with files from the DEMAND corpus.
361 | Root directory should mimic archive-extracted folder structure.
362 | Dataset may be downloaded at ``_.
363 | """
364 | columns = [
365 | 'category_id',
366 | 'location_id',
367 | 'filepath',
368 | 'duration',
369 | 'sparsity'
370 | ]
371 | if dataset_directory is None:
372 | return pd.DataFrame(columns=columns)
373 | dataset_directory = pathlib.Path(dataset_directory)
374 | dataset_dataframe = pathlib.Path(ROOT_DIR).joinpath('datasets', 'demand.csv')
375 | if not dataset_directory.exists():
376 | raise ValueError(f'{dataset_directory} does not exist.')
377 | valid_categories = [
378 | 'domestic',
379 | 'nature',
380 | 'office',
381 | 'public',
382 | 'street',
383 | 'transportation'
384 | ]
385 | valid_locations = [
386 | 'kitchen',
387 | 'washing',
388 | 'park',
389 | 'hallway',
390 | 'office',
391 | 'resto',
392 | 'psquare',
393 | 'bus',
394 | 'metro',
395 | 'living',
396 | 'field',
397 | 'river',
398 | 'meeting',
399 | 'cafeter',
400 | 'station',
401 | 'traffic',
402 | 'car'
403 | ]
404 | if not dataset_dataframe.exists():
405 | raise ValueError(f'{dataset_dataframe} does not exist')
406 | rows = []
407 | for filepath in sorted(dataset_directory.rglob('*.wav')):
408 | if 'ch01' not in filepath.stem:
409 | continue
410 | category_id = [_ for _ in valid_categories if
411 | _[0].upper() == filepath.parent.stem[0].upper()][0]
412 | location_id = [_ for _ in valid_locations if
413 | filepath.parent.stem[1:].upper() in _.upper()][0]
414 | y, duration = wav_read(filepath)
415 | sparsity = sparsity_index(y)
416 | rows.append((
417 | category_id,
418 | location_id,
419 | str(filepath),
420 | duration,
421 | sparsity))
422 | if not len(rows):
423 | raise ValueError(f'Could not find any .WAV files within '
424 | f'{dataset_directory}.')
425 | df = pd.DataFrame(rows, columns=columns)
426 | df.to_csv(dataset_dataframe,
427 | header=columns,
428 | index=False,
429 | index_label=False)
430 | else:
431 | df = pd.read_csv(dataset_dataframe)
432 |
433 | dataset_directory = str(dataset_directory)
434 | df = df.sort_values('filepath', ascending=True).reset_index(drop=True)
435 | df['filepath'] = df['filepath'].apply(lambda f: os.path.join(dataset_directory, f))
436 |
437 | # shuffle the recordings
438 | df = df.sample(frac=1, random_state=0)
439 |
440 | # ensure that all the audio files exist
441 | if not all([f for f in df.filepath if os.path.isfile(f)]):
442 | raise ValueError(f'Audio files missing, check {dataset_directory}.')
443 |
444 | # reindex and name the dataframe
445 | df = df[['filepath', 'duration', 'sparsity']]
446 | df = df.reset_index(drop=True)
447 | df.index.name = 'DEMAND'
448 | return df
449 |
450 |
451 | def dataframe_fsd50k(
452 | dataset_directory: Optional[Union[str, os.PathLike]] = None,
453 | ) -> pd.DataFrame:
454 | """Creates a Pandas DataFrame with files from the FSD50K corpus.
455 | Root directory should mimic archive-extracted folder structure.
456 | Dataset may be downloaded at ``_.
457 | """
458 | columns = [
459 | 'fname',
460 | 'labels',
461 | 'mids',
462 | 'split',
463 | 'filepath',
464 | 'duration',
465 | 'sparsity'
466 | ]
467 | if dataset_directory is None:
468 | return pd.DataFrame(columns=columns)
469 | dataset_directory = pathlib.Path(dataset_directory)
470 | dataset_dataframe = pathlib.Path(ROOT_DIR).joinpath('datasets', 'fsd50k.csv')
471 | if not dataset_directory.exists():
472 | raise ValueError(f'{dataset_directory} does not exist.')
473 | if not dataset_dataframe.exists():
474 | raise ValueError(f'{dataset_dataframe} does not exist')
475 |
476 | # merge separate dev and eval sets into one big table
477 | df1 = pd.read_csv(next(dataset_directory.rglob('dev.csv')))
478 | df2 = pd.read_csv(next(dataset_directory.rglob('eval.csv')))
479 | df2['split'] = 'test'
480 | df = pd.concat([df1, df2])
481 |
482 | durations, filepaths, sparsities = [], [], []
483 | for row in df.itertuples():
484 | subdir = ('FSD50K.eval_audio' if row.split == 'test'
485 | else 'FSD50K.dev_audio')
486 | filepath = dataset_directory.joinpath(subdir, str(row.fname) + '.wav')
487 | if not filepath.exists():
488 | raise ValueError(f'{filepath} does not exist.')
489 | y, duration = wav_read(filepath)
490 | sparsity = sparsity_index(y)
491 | durations.append(duration)
492 | sparsities.append(sparsity)
493 | filepaths.append(filepath)
494 | df['filepath'] = filepaths
495 | df['duration'] = durations
496 | df['sparsity'] = sparsities
497 | if not len(filepaths):
498 | raise ValueError(f'Could not find any .WAV files within '
499 | f'{dataset_directory}.')
500 | df.to_csv(dataset_dataframe,
501 | header=columns,
502 | index=False,
503 | index_label=False)
504 | else:
505 | df = pd.read_csv(dataset_dataframe)
506 |
507 | dataset_directory = str(dataset_directory)
508 | df = df.sort_values('filepath', ascending=True).reset_index(drop=True)
509 | df['filepath'] = df['filepath'].apply(lambda f: os.path.join(dataset_directory, f))
510 |
511 | # omit sounds labeled as containing speech or music
512 | df['labels'] = df['labels'].apply(str.lower)
513 | df = df[~df['labels'].str.contains('speech')]
514 | df = df[~df['labels'].str.contains('music')]
515 |
516 | # omit recordings which are smaller than an example
517 | df = df.query('duration >= @example_duration')
518 |
519 | # shuffle the recordings
520 | df = df.sample(frac=1, random_state=0)
521 |
522 | # organize by split
523 | df['split'] = pd.Categorical(df['split'], ['train', 'val', 'test'])
524 | df = df.sort_values('split')
525 |
526 | # ensure that all the audio files exist
527 | if not all([f for f in df.filepath if os.path.isfile(f)]):
528 | raise ValueError(f'Audio files missing, check {dataset_directory}.')
529 |
530 | # reindex and name the dataframe
531 | df = df[['filepath', 'split', 'duration', 'labels', 'sparsity']]
532 | df = df.reset_index(drop=True)
533 | df.index.name = 'FSD50K'
534 | return df
535 |
536 |
537 | def dataframe_musan(
538 | dataset_directory: Optional[Union[str, os.PathLike]] = None,
539 | ) -> pd.DataFrame:
540 | """Creates a Pandas DataFrame with files from the MUSAN corpus.
541 | Root directory should mimic archive-extracted folder structure.
542 | Dataset may be downloaded at ``_.
543 | """
544 | columns = [
545 | 'split',
546 | 'filepath',
547 | 'duration',
548 | 'sparsity'
549 | ]
550 | if dataset_directory is None:
551 | return pd.DataFrame(columns=columns)
552 | dataset_directory = pathlib.Path(dataset_directory)
553 | dataset_dataframe = pathlib.Path(ROOT_DIR).joinpath('datasets', 'musan.csv')
554 | if not dataset_directory.exists():
555 | raise ValueError(f'{dataset_directory} does not exist.')
556 | if not dataset_dataframe.exists():
557 | raise ValueError(f'{dataset_dataframe} does not exist')
558 | rows = []
559 | for filepath in sorted(dataset_directory.rglob('*.wav')):
560 | is_train = bool('FREE-SOUND' in str(filepath).upper())
561 | is_test = bool('SOUND-BIBLE' in str(filepath).upper())
562 | if not (is_train or is_test):
563 | continue
564 | split_id = 'train' if is_train else 'test'
565 | y, duration = wav_read(filepath)
566 | sparsity = sparsity_index(y)
567 | rows.append((split_id, str(filepath), duration, sparsity))
568 | if not len(rows):
569 | raise ValueError(f'Could not find any .WAV files within '
570 | f'{dataset_directory}.')
571 | df = pd.DataFrame(rows, columns=columns)
572 | df.to_csv(dataset_dataframe,
573 | header=columns,
574 | index=False,
575 | index_label=False)
576 | else:
577 | df = pd.read_csv(dataset_dataframe)
578 |
579 | dataset_directory = str(dataset_directory)
580 | df = df.sort_values('filepath', ascending=True).reset_index(drop=True)
581 | df['filepath'] = df['filepath'].apply(lambda f: os.path.join(dataset_directory, f))
582 |
583 | # omit recordings which are smaller than an example
584 | df = df.query('duration >= @example_duration')
585 |
586 | # set aside the last sixty training signals for validation
587 | val_indices = df.query('split == "train"').iloc[-60:].index
588 | df.loc[val_indices, 'split'] = 'val'
589 |
590 | # organize by subset and split
591 | df['split'] = pd.Categorical(df['split'], ['train', 'val', 'test'])
592 | df = df.sort_values(['split'])
593 |
594 | # shuffle the recordings
595 | df = df.sample(frac=1, random_state=0)
596 |
597 | # ensure that all the audio files exist
598 | if not all([f for f in df.filepath if os.path.isfile(f)]):
599 | raise ValueError(f'Audio files missing, check {dataset_directory}.')
600 |
601 | # reindex and name the dataframe
602 | df = df[['filepath', 'split', 'duration', 'sparsity']]
603 | df = df.reset_index(drop=True)
604 | df.index.name = 'MUSAN'
605 | return df
606 |
607 |
608 | class Mixtures:
609 | """Dataset for noisy speech signals.
610 | """
611 |
612 | def __init__(
613 | self,
614 | speaker_id_or_ids: Union[int, Sequence[int]],
615 | folder_librispeech: Optional[str] = None,
616 | folder_fsd50k: Optional[str] = None,
617 | folder_musan: Optional[str] = None,
618 | split_speech: Optional[str] = 'all',
619 | split_premixture: Optional[str] = 'train',
620 | split_mixture: Optional[str] = 'train',
621 | split_reverb: Optional[str] = None,
622 | frac_speech: Optional[float] = 1.,
623 | snr_premixture: Optional[Union[float, Tuple[float, float]]] = None,
624 | snr_mixture: Optional[Union[float, Tuple[float, float]]] = None,
625 | dataset_duration: Union[int, float] = 0
626 | ):
627 | # verify speaker ID(s)
628 | if isinstance(speaker_id_or_ids, int):
629 | speaker_id_or_ids = [speaker_id_or_ids]
630 | elif not isinstance(speaker_id_or_ids, (list, set)):
631 | raise ValueError('Expected one or a sequence of speaker IDs.')
632 | if len(speaker_id_or_ids) < 1:
633 | raise ValueError('Expected one or more speaker IDs.')
634 | self.speaker_ids = speaker_id_or_ids
635 | self.frac_speech = frac_speech
636 | self.speaker_ids_repr = repr(self.speaker_ids)
637 |
638 | # missing pairs of arguments
639 | if not split_premixture:
640 | if snr_premixture is not None:
641 | raise ValueError('Missing argument `split_premixture`.')
642 | if not split_mixture:
643 | if snr_mixture is not None:
644 | raise ValueError('Missing argument `split_mixture`.')
645 |
646 | # unpack mixture SNR values
647 | if isinstance(snr_premixture, tuple):
648 | snr_premixture_min = float(min(snr_premixture))
649 | snr_premixture_max = float(max(snr_premixture))
650 | elif isinstance(snr_premixture, (float, int)):
651 | snr_premixture_min = float(snr_premixture)
652 | snr_premixture_max = float(snr_premixture)
653 | elif snr_premixture is None:
654 | snr_premixture_min = None
655 | snr_premixture_max = None
656 | else:
657 | raise ValueError('Expected `snr_premixture` to be a float type or '
658 | 'a tuple of floats.')
659 | if isinstance(snr_mixture, tuple):
660 | snr_mixture_min = float(min(snr_mixture))
661 | snr_mixture_max = float(max(snr_mixture))
662 | elif isinstance(snr_mixture, (float, int)):
663 | snr_mixture_min = float(snr_mixture)
664 | snr_mixture_max = float(snr_mixture)
665 | elif snr_mixture is None:
666 | snr_mixture_min = None
667 | snr_mixture_max = None
668 | else:
669 | raise ValueError('Expected `snr_mixture` to be a float type or '
670 | 'a tuple of floats.')
671 | self.snr_premixture_min = snr_premixture_min
672 | self.snr_premixture_max = snr_premixture_max
673 | self.snr_mixture_min = snr_mixture_min
674 | self.snr_mixture_max = snr_mixture_max
675 |
676 | # verify corpus partitions
677 | if not (split_speech in
678 | ('all', 'pretrain', 'preval', 'train', 'val', 'test')):
679 | raise ValueError('Expected `split_speech` to be either "all", '
680 | '"pretrain", "preval", "train", "val", or "test".')
681 | if snr_premixture is not None:
682 | if not (split_premixture in ('train', 'val', 'test')):
683 | raise ValueError('Expected `split_premixture` to be either '
684 | '"train", "val", or "test".')
685 | if snr_mixture is not None:
686 | if not (split_mixture in ('train', 'val', 'test')):
687 | raise ValueError('Expected `split_mixture` to be either '
688 | '"train", "val", or "test".')
689 | if split_reverb is not None:
690 | if not (split_reverb in ('train', 'val', 'test')):
691 | raise ValueError('Expected `split_reverb` to be either '
692 | '"train", "val", or "test".')
693 | self.split_speech = split_speech
694 | self.split_premixture = split_premixture or ''
695 | self.split_mixture = split_mixture or ''
696 | self.split_reverb = split_reverb or ''
697 |
698 | # verify dataset duration
699 | if not isinstance(dataset_duration, (int, float, type(None))):
700 | raise ValueError('Expected `dataset_duration` to be a number.')
701 | self.dataset_duration = int(dataset_duration or 0)
702 |
703 | self.index = 0
704 | self.example_duration = example_duration
705 |
706 | # instantiate corpora
707 | self.instantiate_corpora(
708 | folder_librispeech,
709 | folder_fsd50k,
710 | folder_musan,
711 | )
712 |
713 | # calculate maximum random offset for all utterances
714 | max_offset_func = lambda d: d.assign(max_offset=(
715 | sample_rate * d['duration'] - example_length)).astype({
716 | 'max_offset': int})
717 | self.corpus_s = max_offset_func(self.corpus_s)
718 | self.corpus_m = max_offset_func(self.corpus_m)
719 | self.corpus_n = max_offset_func(self.corpus_n)
720 |
721 | # keep track of the number of utterances, premixture noises,
722 | # and injected noises
723 | self.len_s = len(self.corpus_s)
724 | self.len_m = len(self.corpus_m)
725 | self.len_n = len(self.corpus_n)
726 | self.len_r = len(self.corpus_r)
727 | if self.len_s < 1:
728 | raise ValueError('Invalid speaker_id')
729 |
730 | # if a dataset duration is provided,
731 | # truncate the audio data to the expected size
732 | self.speech_data = np.array([])
733 | if self.dataset_duration:
734 | self.speech_data = wav_read_multiple(
735 | self.corpus_s.filepath, concatenate=True)
736 | self.speech_data = self.speech_data[:(
737 | self.dataset_duration * sample_rate)]
738 |
739 | # define flags
740 | self.is_personalized = bool(len(self.speaker_ids) == 1)
741 | self.add_premixture_noise = bool(
742 | (snr_premixture is not None) and (self.len_m > 0))
743 | self.add_noise = bool(
744 | (snr_mixture is not None) and (self.len_n > 0))
745 | self.add_reverb = bool(self.len_r > 0)
746 |
747 | if not self.is_personalized and self.add_premixture_noise:
748 | raise ExperimentError('Non-personalized dataset contains '
749 | 'premixture noise.')
750 |
751 | if self.dataset_duration and self.add_premixture_noise:
752 | raise ExperimentError('Fine-tuning dataset contains '
753 | 'premixture noise.')
754 |
755 | def instantiate_corpora(self, folder_librispeech, folder_fsd50k, folder_musan):
756 |
757 | self.corpus_s = dataframe_librispeech(folder_librispeech).query(
758 | f'speaker_id in {self.speaker_ids}')
759 | if self.split_speech != 'all':
760 | self.corpus_s = self.corpus_s.query(
761 | f'split == "{self.split_speech}"')
762 | if 0 < self.frac_speech < 1:
763 | self.corpus_s = self.corpus_s.sample(
764 | frac=self.frac_speech, random_state=0)
765 | print('Length of subsampled dataset:', len(self.corpus_s))
766 |
767 | self.corpus_m = dataframe_fsd50k(folder_fsd50k).query(
768 | f'split == "{self.split_premixture}"')
769 |
770 | self.corpus_n = dataframe_musan(folder_musan).query(
771 | f'split == "{self.split_mixture}"')
772 |
773 | self.corpus_r = pd.DataFrame() # disable support for reverb
774 | # self.corpus_r = df_irsurvey.query(
775 | # f'split == "{self.split_reverb}"')
776 | return
777 |
778 | def __dict__(self):
779 | return {
780 | 'flags': {
781 | 'is_personalized': self.is_personalized,
782 | 'add_premixture_noise': self.add_premixture_noise,
783 | 'add_noise': self.add_noise,
784 | },
785 | 'speaker_ids': self.speaker_ids_repr,
786 | 'snr_premixture_min': self.snr_premixture_min,
787 | 'snr_premixture_max': self.snr_premixture_max,
788 | 'snr_mixture_min': self.snr_mixture_min,
789 | 'snr_mixture_max': self.snr_mixture_max,
790 | 'split_speech': self.split_speech,
791 | 'split_premixture': self.split_premixture,
792 | 'split_mixture': self.split_mixture,
793 | 'dataset_duration': self.dataset_duration
794 | }
795 |
796 | def __repr__(self):
797 | return json.dumps(self.__dict__(), indent=2, sort_keys=True)
798 |
799 | def __call__(self, batch_size: int, seed: Optional[int] = None):
800 |
801 | if batch_size < 1:
802 | raise ValueError('batch_size must be at least 1.')
803 |
804 | if seed is None: self.index += 1
805 | tmp_index: int = 0 if seed is not None else self.index
806 | tmp_rng: Generator = np.random.default_rng(tmp_index)
807 |
808 | indices = np.arange(batch_size * tmp_index,
809 | batch_size * (tmp_index + 1))
810 | s_filepaths = (list(self.corpus_s.filepath.iloc[indices % self.len_s])
811 | if self.len_s else [])
812 | m_filepaths = (list(self.corpus_m.filepath.iloc[indices % self.len_m])
813 | if self.len_m else [])
814 | n_filepaths = (list(self.corpus_n.filepath.iloc[indices % self.len_n])
815 | if self.len_n else [])
816 | r_filepaths = (list(self.corpus_r.filepath.iloc[indices % self.len_r])
817 | if self.len_r else [])
818 |
819 | if self.speech_data.size > 0:
820 | s = wav_sample(self.speech_data, batch_size, seed=seed)
821 | else:
822 | s = wav_read_multiple(s_filepaths, seed=seed)
823 | x = p = s
824 |
825 | pre_snrs = np.array([])
826 | if self.add_premixture_noise:
827 | m = wav_read_multiple(m_filepaths, seed=seed)
828 | pre_snrs = tmp_rng.uniform(
829 | self.snr_premixture_min, self.snr_premixture_max,
830 | (batch_size, 1))
831 | x = p = mix_signals(s, m, pre_snrs)
832 |
833 | if self.add_reverb:
834 | r = wav_read_multiple(r_filepaths, randomly_offset=False, seed=seed)
835 | p_rev = np.empty_like(p)
836 | p_len = p.shape[-1]
837 | for i, filt in enumerate(r):
838 | p_rev[i] = convolve(p[i], filt, mode='full')[:p_len]
839 | x = p = p_rev
840 |
841 | post_snrs = np.array([])
842 | if self.add_noise:
843 | n = wav_read_multiple(n_filepaths, seed=seed)
844 | post_snrs = tmp_rng.uniform(
845 | self.snr_mixture_min, self.snr_mixture_max,
846 | (batch_size, 1))
847 | x = mix_signals(p, n, post_snrs)
848 |
849 | scale_factor = float(np.abs(x).max() + _eps)
850 | return Batch(
851 | inputs=torch.cuda.FloatTensor(x) / scale_factor,
852 | targets=torch.cuda.FloatTensor(p) / scale_factor,
853 | pre_snrs=torch.cuda.FloatTensor(pre_snrs),
854 | post_snrs=torch.cuda.FloatTensor(post_snrs)
855 | )
856 |
857 |
858 | class ContrastiveMixtures(Mixtures):
859 |
860 | def __call__(
861 | self,
862 | batch_size: int,
863 | ratio_positive: float = 0.5,
864 | seed: Optional[int] = None
865 | ):
866 | if not (0 <= ratio_positive <= 1):
867 | raise ValueError('ratio_positive should be between 0 and 1.')
868 | if batch_size < 2:
869 | raise ValueError('batch_size must be at least 2.')
870 | if batch_size % 2:
871 | raise ValueError('batch_size must be an even number.')
872 |
873 | if seed is None: self.index += 1
874 | tmp_index: int = 0 if seed is not None else self.index
875 | tmp_rng: Generator = np.random.default_rng(tmp_index)
876 |
877 | indices = np.arange(batch_size * tmp_index,
878 | batch_size * (tmp_index + 1))
879 | s_filepaths = (list(self.corpus_s.filepath.iloc[indices % self.len_s])
880 | if self.len_s else [])
881 | m_filepaths = (list(self.corpus_m.filepath.iloc[indices % self.len_m])
882 | if self.len_m else [])
883 | n_filepaths = (list(self.corpus_n.filepath.iloc[indices % self.len_n])
884 | if self.len_n else [])
885 | r_filepaths = (list(self.corpus_r.filepath.iloc[indices % self.len_r])
886 | if self.len_r else [])
887 |
888 | ordering = tmp_rng.permutation(batch_size//2)
889 | num_positive = int(batch_size//2 * ratio_positive)
890 | num_negative = batch_size//2 - num_positive
891 | labels = np.array([1]*num_positive + [0]*num_negative)
892 |
893 | bx_1, bx_2, bp_1, bp_2, bs_1, bs_2 = [], [], [], [], [], []
894 | bpre_snrs, bpost_snrs = [], []
895 |
896 | # generate pairs
897 | for i in range(0, batch_size, 2):
898 |
899 | is_positive = bool(i/2 < num_positive)
900 |
901 | if self.speech_data.size > 0:
902 | if is_positive:
903 | s_1 = s_2 = wav_sample(self.speech_data, 1, seed=seed)
904 | else:
905 | s_1, s_2 = wav_sample(self.speech_data, 2, seed=seed)
906 | else:
907 | if is_positive:
908 | s_1 = s_2 = wav_read_multiple([s_filepaths[i]], seed=seed)
909 | else:
910 | s_1, s_2 = wav_read_multiple(s_filepaths[i:i+2], seed=seed)
911 |
912 | s_1, s_2 = s_1.reshape(-1), s_2.reshape(-1)
913 |
914 | p_1, p_2 = s_1, s_2
915 | pre_snr = [None, None]
916 | if self.add_premixture_noise:
917 | if is_positive:
918 | m_1 = m_2 = wav_read_multiple([m_filepaths[i]], seed=seed)
919 | pre_snr = [tmp_rng.uniform(
920 | self.snr_premixture_min, self.snr_premixture_max)] * 2
921 | else:
922 | m_1, m_2 = wav_read_multiple(m_filepaths[i:i+2], seed=seed)
923 | pre_snr = tmp_rng.uniform(
924 | self.snr_premixture_min, self.snr_premixture_max, 2)
925 | m_1, m_2 = m_1.reshape(-1), m_2.reshape(-1)
926 | p_1 = mix_signals(s_1, m_1, pre_snr[0])
927 | p_2 = mix_signals(s_2, m_2, pre_snr[1])
928 |
929 | if self.add_reverb:
930 | if is_positive:
931 | r_1 = r_2 = wav_read_multiple([r_filepaths[i]], seed=seed)
932 | else:
933 | r_1, r_2 = wav_read_multiple(r_filepaths[i:i+2], seed=seed)
934 | r_1, r_2 = r_1.reshape(-1), r_2.reshape(-1)
935 | p_len = p_1.shape[-1]
936 | p_1 = convolve(p_1, r_1, mode='full')[:p_len]
937 | p_2 = convolve(p_2, r_2, mode='full')[:p_len]
938 |
939 | x_1, x_2 = p_1, p_2
940 | post_snr = [None, None]
941 | if self.add_noise:
942 | if not is_positive:
943 | n_1 = n_2 = wav_read_multiple([n_filepaths[i]], seed=seed)
944 | post_snr = [tmp_rng.uniform(
945 | self.snr_mixture_min, self.snr_mixture_max)] * 2
946 | else:
947 | n_1, n_2 = wav_read_multiple(n_filepaths[i:i+2], seed=seed)
948 | post_snr = tmp_rng.uniform(
949 | self.snr_mixture_min, self.snr_mixture_max, 2)
950 | n_1, n_2 = n_1.reshape(-1), n_2.reshape(-1)
951 | x_1 = mix_signals(p_1, n_1, post_snr[0])
952 | x_2 = mix_signals(p_2, n_2, post_snr[1])
953 |
954 | bp_1.append(p_1)
955 | bp_2.append(p_2)
956 | bx_1.append(x_1)
957 | bx_2.append(x_2)
958 | if pre_snr[0]:
959 | bpre_snrs.append(pre_snr)
960 | if post_snr[0]:
961 | bpost_snrs.append(post_snr)
962 |
963 | # stack and shuffle all the data in the right order
964 | bp_1 = np.stack(bp_1)[ordering]
965 | bp_2 = np.stack(bp_2)[ordering]
966 | bx_1 = np.stack(bx_1)[ordering]
967 | bx_2 = np.stack(bx_2)[ordering]
968 | if bpre_snrs:
969 | bpre_snrs = np.stack(bpre_snrs)[ordering]
970 | if bpost_snrs:
971 | bpost_snrs = np.stack(bpost_snrs)[ordering]
972 | labels = labels[ordering]
973 |
974 | scale_factor_1 = float(np.abs(bx_1).max() + _eps)
975 | scale_factor_2 = float(np.abs(bx_2).max() + _eps)
976 | scale_factor = max([scale_factor_1, scale_factor_2])
977 | return ContrastiveBatch(
978 | inputs_1=torch.cuda.FloatTensor(bx_1) / scale_factor,
979 | inputs_2=torch.cuda.FloatTensor(bx_2) / scale_factor,
980 | targets_1=torch.cuda.FloatTensor(bp_1) / scale_factor,
981 | targets_2=torch.cuda.FloatTensor(bp_2) / scale_factor,
982 | labels=torch.cuda.BoolTensor(labels),
983 | pre_snrs=torch.cuda.FloatTensor(bpre_snrs),
984 | post_snrs=torch.cuda.FloatTensor(bpost_snrs)
985 | )
986 |
--------------------------------------------------------------------------------
/code/exp_models.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import pathlib
4 | from contextlib import suppress
5 | from typing import Any, Optional, Union, Sequence, Tuple, Dict, Callable
6 |
7 | import asteroid.models
8 | import torch
9 | import torch.nn.functional as tf
10 | from torch.nn.modules.loss import _Loss
11 |
12 | from exp_data import Mixtures, sample_rate, sisdr_improvement, sdr, sisdr, wav_write
13 | from exp_utils import make_2d, make_3d, pad_x_to_y, shape_reconstructed
14 |
15 |
16 | _fft_size: int = 1024
17 | _hop_size: int = 256
18 | _eps: float = 1e-8
19 | _recover_noise: bool = False
20 | _window: torch.Tensor = torch.hann_window(_fft_size)
21 | try:
22 | from pesq import pesq
23 | except ImportError:
24 | pesq = lambda *a, **k: 0
25 | print('Module `pesq` not installed, this metric will be a no-op.')
26 | try:
27 | from pystoi import stoi
28 | except ImportError:
29 | stoi = lambda *a, **k: 0
30 | print('Module `pystoi` not installed, this metric will be a no-op.')
31 |
32 |
33 | def _forward_single_mask(self, waveform: torch.Tensor):
34 | """Custom forward function to do single-mask two-source estimation.
35 | """
36 | # Remember shape to shape reconstruction
37 | shape = torch.tensor(waveform.shape)
38 |
39 | # Reshape to (batch, n_mix, time)
40 | waveform = make_3d(waveform)
41 |
42 | # Real forward
43 | tf_rep = self.forward_encoder(waveform)
44 | est_masks = self.forward_masker(tf_rep)
45 | est_masks = est_masks.repeat(1, 2, 1, 1)
46 | est_masks[:, 1] = 1 - est_masks[:, 1]
47 | masked_tf_rep = self.apply_masks(tf_rep, est_masks)
48 | decoded = self.forward_decoder(masked_tf_rep)
49 |
50 | reconstructed = pad_x_to_y(decoded, waveform)
51 | return shape_reconstructed(reconstructed, shape)
52 |
53 |
54 | def _logistic(v, beta: float = 1., offset: float = 0.):
55 | return 1 / (1 + torch.exp(-beta * (v - offset)))
56 |
57 |
58 | def _stft(waveform: torch.Tensor):
59 | """Calculates the Short-time Fourier transform (STFT)."""
60 |
61 | # perform the short-time Fourier transform
62 | spectrogram = torch.stft(
63 | waveform, _fft_size, _hop_size,
64 | window=_window.to(waveform.device),
65 | return_complex=False
66 | )
67 |
68 | # swap seq_len & feature_dim of the spectrogram (for RNN processing)
69 | spectrogram = spectrogram.permute(0, 2, 1, 3)
70 |
71 | # calculate the magnitude spectrogram
72 | magnitude_spectrogram = torch.sqrt(spectrogram[..., 0] ** 2 +
73 | spectrogram[..., 1] ** 2)
74 |
75 | return spectrogram, magnitude_spectrogram
76 |
77 |
78 | def _istft(spectrogram: torch.Tensor, mask: Optional[torch.Tensor] = None):
79 | """Calculates the inverse Short-time Fourier transform (ISTFT)."""
80 |
81 | # apply a time-frequency mask if provided
82 | if mask is not None:
83 | spectrogram[..., 0] *= mask
84 | spectrogram[..., 1] *= mask
85 |
86 | # swap seq_len & feature_dim of the spectrogram (undo RNN processing)
87 | spectrogram = spectrogram.permute(0, 2, 1, 3)
88 |
89 | # perform the inverse short-time Fourier transform
90 | waveform = torch.istft(
91 | spectrogram, _fft_size, _hop_size,
92 | window=_window.to(spectrogram.device),
93 | return_complex=False
94 | )
95 |
96 | return waveform
97 |
98 |
99 | class ConvTasNet(asteroid.models.ConvTasNet):
100 | if _recover_noise:
101 | forward = _forward_single_mask
102 |
103 |
104 | class DPRNNTasNet(asteroid.models.DPRNNTasNet):
105 | if _recover_noise:
106 | forward = _forward_single_mask
107 |
108 |
109 | class DPTNet(asteroid.models.DPTNet):
110 | if _recover_noise:
111 | forward = _forward_single_mask
112 |
113 |
114 | class GRUNet(torch.nn.Module):
115 |
116 | def __init__(self, hidden_size: int, num_layers: int = 2,
117 | bidirectional: bool = False):
118 | super().__init__()
119 | self.hidden_size = hidden_size
120 | self.num_layers = num_layers
121 | self.bidirectional = bidirectional
122 |
123 | # create a neural network which predicts a TF binary ratio mask
124 | self.rnn = torch.nn.GRU(
125 | input_size=int(_fft_size // 2 + 1),
126 | hidden_size=self.hidden_size,
127 | num_layers=self.num_layers,
128 | bidirectional=self.bidirectional,
129 | batch_first=True
130 | )
131 | self.dnn = torch.nn.Sequential(
132 | torch.nn.Linear(
133 | in_features=self.hidden_size * (1+self.bidirectional),
134 | out_features=int(_fft_size // 2 + 1)
135 | ),
136 | torch.nn.Sigmoid()
137 | )
138 |
139 | def forward(self, waveform: torch.Tensor):
140 | # convert waveform to spectrogram
141 | (x, x_magnitude) = _stft(waveform)
142 |
143 | # generate a time-frequency mask
144 | h = self.rnn(x_magnitude)[0]
145 | y = self.dnn(h)
146 | y = y.reshape_as(x_magnitude)
147 |
148 | # convert masked spectrogram back to waveform
149 | denoised = _istft(x, mask=y)
150 |
151 | return denoised
152 |
153 |
154 | class SNRPredictor(torch.nn.Module):
155 |
156 | def __init__(self, hidden_size: int = 1024, num_layers: int = 3):
157 | super().__init__()
158 | self.hidden_size: int = hidden_size
159 | self.num_layers: int = num_layers
160 |
161 | # layers
162 | self.rnn = torch.nn.GRU(
163 | input_size=int(_fft_size // 2 + 1),
164 | hidden_size=self.hidden_size,
165 | num_layers=self.num_layers,
166 | batch_first=True
167 | )
168 | self.dnn = torch.nn.Linear(
169 | in_features=self.hidden_size,
170 | out_features=1
171 | )
172 |
173 | def forward(self, waveform: torch.Tensor):
174 |
175 | # convert to time-frequency domain
176 | (_, X_magnitude) = _stft(waveform)
177 |
178 | # generate frame-by-frame SNR predictions
179 | predicted_snrs = self.dnn(self.rnn(X_magnitude)[0]).reshape(
180 | -1, X_magnitude.shape[1]).detach()
181 |
182 | return predicted_snrs if self.training else _logistic(predicted_snrs)
183 |
184 | def load(self):
185 | self.load_state_dict(torch.load('snr_predictor'), strict=False)
186 |
187 |
188 | class SegmentalLoss(_Loss):
189 | """Loss function applied to audio segmented frame by frame."""
190 |
191 | def __init__(
192 | self,
193 | loss_type: str = 'sisdr',
194 | reduction: str = 'none',
195 | segment_size: int = 1024,
196 | hop_length: int = 256,
197 | windowing: bool = True,
198 | centering: bool = True,
199 | pad_mode: str = 'reflect'
200 | ):
201 | super().__init__(reduction=reduction)
202 | assert loss_type in ('mse', 'snr', 'sisdr', 'sdsdr')
203 | assert pad_mode in ('constant', 'reflect')
204 | assert isinstance(centering, bool)
205 | assert isinstance(windowing, bool)
206 | assert segment_size > hop_length > 0
207 |
208 | self.loss_type = loss_type
209 | self.segment_size = segment_size
210 | self.hop_length = hop_length
211 | self.pad_mode = pad_mode
212 |
213 | self.centering = centering
214 | self.windowing = windowing
215 |
216 | self.unfold = torch.nn.Unfold(
217 | kernel_size=(1, segment_size),
218 | stride=(1, hop_length)
219 | )
220 | self.window = torch.hann_window(self.segment_size).view(1, 1, -1)
221 |
222 | def forward(
223 | self,
224 | estimate: torch.Tensor,
225 | target: torch.Tensor,
226 | weights: Optional[torch.Tensor] = None,
227 | ):
228 | assert target.size() == estimate.size()
229 | assert target.ndim == 2
230 | assert self.segment_size < target.size()[-1]
231 |
232 | # subtract signal means
233 | target -= torch.mean(target, dim=1, keepdim=True)
234 | estimate -= torch.mean(estimate, dim=1, keepdim=True)
235 |
236 | # center the signals using padding
237 | if self.centering:
238 | signal_dim = target.dim()
239 | ext_shape = [1] * (3 - signal_dim) + list(target.size())
240 | p = int(self.segment_size // 2)
241 | target = tf.pad(target.view(ext_shape), [p, p], self.pad_mode)
242 | target = target.view(target.shape[-signal_dim:])
243 | estimate = tf.pad(estimate.view(ext_shape), [p, p], self.pad_mode)
244 | estimate = estimate.view(estimate.shape[-signal_dim:])
245 |
246 | # use unfold to construct overlapping frames out of inputs
247 | n_batch = target.size()[0]
248 | target = self.unfold(target.view(n_batch,1,1,-1)).permute(0,2,1)
249 | estimate = self.unfold(estimate.view(n_batch,1,1,-1)).permute(0,2,1)
250 | losses: torch.Tensor
251 |
252 | # window all the frames
253 | if self.windowing:
254 | self.window = self.window.to(target.device)
255 | target = torch.multiply(target, self.window)
256 | estimate = torch.multiply(estimate, self.window)
257 |
258 | # MSE loss
259 | if self.loss_type == 'mse':
260 | losses = ((target - estimate)**2).sum(dim=2)
261 | losses /= self.segment_size
262 |
263 | # SDR based loss
264 | else:
265 |
266 | if self.loss_type == 'snr':
267 | scaled_target = target
268 | else:
269 | dot = (estimate * target).sum(dim=2, keepdim=True)
270 | s_target_energy = (target ** 2).sum(dim=2, keepdim=True) + _eps
271 | scaled_target = dot * target / s_target_energy
272 |
273 | if self.loss_type == 'sisdr':
274 | e_noise = estimate - scaled_target
275 | else:
276 | e_noise = estimate - target
277 |
278 | losses = (scaled_target ** 2).sum(dim=2)
279 | losses = losses / ((e_noise ** 2).sum(dim=2) + _eps)
280 | losses += _eps
281 | losses = torch.log10(losses)
282 | losses *= -10
283 |
284 | # apply weighting (if provided)
285 | if weights is not None:
286 | assert losses.size() == weights.size()
287 | weights = weights.detach()
288 | losses = torch.multiply(losses, weights).mean(dim=1)
289 |
290 | if self.reduction == 'mean':
291 | losses = losses.mean()
292 |
293 | return losses
294 |
295 |
296 | def feedforward(
297 | inputs: torch.Tensor,
298 | targets: torch.Tensor,
299 | model: torch.nn.Module,
300 | loss_reg: Callable,
301 | loss_segm: Callable,
302 | weights: Optional[torch.Tensor] = None,
303 | accumulation: bool = False,
304 | test: bool = False,
305 | skip_input_metrics: bool = False,
306 | num_examples_to_save: int = 0
307 | ) -> Dict[str, float]:
308 | """Runs a feedforward pass through a model by unraveling batched data.
309 | """
310 | batch_size = inputs.shape[0]
311 | validation = not bool(model.training)
312 | context = torch.no_grad() if (validation or test) else suppress()
313 | r_sisdr_inp: float = 0
314 | r_sisdr_enh: float = 0
315 | r_sdr_inp: float = 0
316 | r_sdr_enh: float = 0
317 | r_pesq_inp: float = 0
318 | r_pesq_enh: float = 0
319 | r_stoi_inp: float = 0
320 | r_stoi_enh: float = 0
321 | r_loss: float = 0
322 |
323 | with context:
324 | for i in range(batch_size):
325 |
326 | # unravel batch
327 | x = inputs[i].unsqueeze(0).cuda()
328 | t = targets[i].unsqueeze(0).cuda()
329 |
330 | # forward pass
331 | y = make_2d(model(x))
332 | if 0 <= i < num_examples_to_save:
333 | wav_write(f'example_{i:02d}.wav',
334 | y.reshape(-1).detach().cpu().numpy())
335 |
336 | # backwards pass
337 | if not test:
338 | if weights is not None:
339 | w = weights[i].unsqueeze(0)
340 | loss_tensor = torch.mean(
341 | loss_segm(y, t, w))
342 | else:
343 | loss_tensor = torch.mean(
344 | loss_reg(y, t))
345 | loss_tensor /= batch_size
346 | r_loss += float(loss_tensor)
347 | if not (validation or test):
348 | loss_tensor.backward()
349 |
350 | # compute PESQ and STOI (perceptual scores) only during testing
351 | if test:
352 | _x = x.detach().cpu().numpy().squeeze()
353 | _y = y.detach().cpu().numpy().squeeze()
354 | _t = t.detach().cpu().numpy().squeeze()
355 | if not skip_input_metrics:
356 | r_pesq_inp += pesq(sample_rate, _t, _x, 'wb')
357 | r_pesq_enh += pesq(sample_rate, _t, _y, 'wb')
358 | if not skip_input_metrics:
359 | r_stoi_inp += stoi(_t, _x, sample_rate, True)
360 | r_stoi_enh += stoi(_t, _y, sample_rate, True)
361 |
362 | # calculate signal improvement
363 | if not skip_input_metrics:
364 | r_sdr_inp += float(sdr(x, t, reduction='mean'))
365 | r_sdr_enh += float(sdr(y, t, reduction='mean'))
366 | r_sisdr_inp += float(sisdr(x, t, reduction='mean'))
367 | r_sisdr_enh += float(sisdr(y, t, reduction='mean'))
368 |
369 | r_sdr_inp /= batch_size
370 | r_sdr_enh /= batch_size
371 | r_sisdr_inp /= batch_size
372 | r_sisdr_enh /= batch_size
373 | r_pesq_inp /= batch_size
374 | r_pesq_enh /= batch_size
375 | r_stoi_inp /= batch_size
376 | r_stoi_enh /= batch_size
377 |
378 | return dict(loss=r_loss,
379 | sdr_inp=r_sdr_inp,
380 | sdr_enh=r_sdr_enh,
381 | sisdri=(r_sisdr_enh-r_sisdr_inp),
382 | sisdr_inp=r_sisdr_inp,
383 | sisdr_enh=r_sisdr_enh,
384 | pesq_inp=r_pesq_inp,
385 | pesq_enh=r_pesq_enh,
386 | stoi_inp=r_stoi_inp,
387 | stoi_enh=r_stoi_enh,
388 | )
389 |
390 |
391 | def contrastive_negative_term(ly, lt, term_type: str = 'max'):
392 | if term_type == 'max':
393 | return torch.mean(torch.max(ly, lt))
394 | elif term_type == 'abs':
395 | return torch.mean(torch.abs(ly - lt))
396 | else:
397 | return torch.mean(torch.pow(ly - lt, 2))
398 |
399 |
400 | def contrastive_feedforward(
401 | inputs_1: torch.Tensor,
402 | inputs_2: torch.Tensor,
403 | targets_1: torch.Tensor,
404 | targets_2: torch.Tensor,
405 | labels: torch.BoolTensor,
406 | loss_reg: Callable,
407 | loss_segm: Callable,
408 | lambda_positive: float,
409 | lambda_negative: float,
410 | model: torch.nn.Module,
411 | weights_1: Optional[torch.Tensor] = None,
412 | weights_2: Optional[torch.Tensor] = None,
413 | negative_term_type: str = 'max',
414 | accumulation: bool = False,
415 | validation: bool = False,
416 | test: bool = False
417 | ) -> Dict[str, float]:
418 | """Runs a feedforward pass through a model by unraveling batched data.
419 | """
420 | labels = labels.bool()
421 | batch_size = inputs_1.shape[0]
422 | context = torch.no_grad() if validation else suppress()
423 | ratio_pos = float(sum(labels) / batch_size)
424 | ratio_neg = float(sum(~labels) / batch_size)
425 | use_dp = bool(weights_1 is not None) and bool(weights_2 is not None)
426 | r_sisdri: float = 0
427 | r_loss: float = 0
428 | r_loss_sig: float = 0
429 | r_loss_pos: float = 0
430 | r_loss_neg: float = 0
431 |
432 | with context:
433 | for i in range(batch_size):
434 |
435 | loss_tensor_sig, loss_tensor_pos, loss_tensor_neg = 0, 0, 0
436 |
437 | # unravel batch
438 | x_1 = inputs_1[i].unsqueeze(0).cuda()
439 | x_2 = inputs_2[i].unsqueeze(0).cuda()
440 | t_1 = targets_1[i].unsqueeze(0).cuda()
441 | t_2 = targets_2[i].unsqueeze(0).cuda()
442 |
443 | # forward pass
444 | y_1 = make_2d(model(x_1))
445 | y_2 = make_2d(model(x_2))
446 |
447 | # stack for batchwise loss
448 | x = torch.cat([x_1, x_2], dim=0)
449 | t = torch.cat([t_1, t_2], dim=0)
450 | y = torch.cat([y_1, y_2], dim=0)
451 |
452 | # calculate loss
453 | if use_dp:
454 | w_1 = weights_1[i].unsqueeze(0).cuda()
455 | w_2 = weights_2[i].unsqueeze(0).cuda()
456 | w_p = w_1 * w_2
457 | w = torch.cat([w_1, w_2], dim=0)
458 | loss_tensor_sig = torch.mean(loss_segm(y, t, w))
459 | if labels[i]:
460 | loss_tensor_pos = torch.mean(
461 | loss_segm(y_1, y_2, w_1))
462 | else:
463 | loss_tensor_neg = contrastive_negative_term(
464 | loss_segm(y_1, y_2, w_p), loss_segm(t_1, t_2, w_p))
465 | else:
466 | loss_tensor_sig = torch.mean(loss_reg(y, t))
467 | if labels[i]:
468 | loss_tensor_pos = torch.mean(
469 | loss_reg(y_1, y_2))
470 | else:
471 | loss_tensor_neg = contrastive_negative_term(
472 | loss_reg(y_1, y_2), loss_reg(t_1, t_2))
473 |
474 | loss_tensor_sig /= batch_size
475 | loss_tensor_pos *= lambda_positive / (batch_size / 2)
476 | loss_tensor_neg *= lambda_negative / (batch_size / 2)
477 | loss_tensor_total = (
478 | loss_tensor_sig + loss_tensor_pos + loss_tensor_neg)
479 |
480 | r_loss += float(loss_tensor_total)
481 | r_loss_sig += float(loss_tensor_sig)
482 | r_loss_pos += float(loss_tensor_pos)
483 | r_loss_neg += float(loss_tensor_neg)
484 |
485 | # backwards pass
486 | if not validation:
487 | loss_tensor_total.backward()
488 |
489 | # calculate signal improvement
490 | r_sisdri += float(sisdr_improvement(y, t, x, 'mean'))
491 |
492 | r_sisdri /= batch_size
493 |
494 | return dict(loss=r_loss,
495 | loss_sig=r_loss_sig,
496 | loss_pos=r_loss_pos,
497 | loss_neg=r_loss_neg,
498 | sisdri=r_sisdri)
499 |
500 |
501 | def init_ctn(N=512, L=16, B=128, H=512, Sc=128, P=3, X=8, R=3, causal=False):
502 | model_config = locals()
503 | return (ConvTasNet(
504 | n_src=1,
505 | sample_rate=sample_rate,
506 | n_filters=N,
507 | kernel_size=L,
508 | bn_chan=B,
509 | hid_chan=H,
510 | skip_chan=Sc,
511 | conv_kernel_size=P,
512 | n_blocks=X,
513 | n_repeats=R,
514 | causal=causal
515 | ), model_config)
516 |
517 |
518 | def init_dprnn(N=64, L=2, B=128, H=128, R=6, K=250, T='lstm', causal=False):
519 | model_config = locals()
520 | return (DPRNNTasNet(
521 | n_src=1,
522 | sample_rate=sample_rate,
523 | n_filters=N,
524 | kernel_size=L,
525 | bn_chan=B,
526 | hid_size=H,
527 | n_repeats=R,
528 | chunk_size=K,
529 | rnn_type=T,
530 | bidirectional=(not causal)
531 | ), model_config)
532 |
533 |
534 | def init_gru(hidden_size=64, num_layers=2, bidirectional=True):
535 | model_config = locals()
536 | return (GRUNet(
537 | hidden_size=hidden_size,
538 | num_layers=num_layers,
539 | bidirectional=bidirectional
540 | ), model_config)
541 |
542 |
543 | def init_model(
544 | model_name: str,
545 | model_size: Optional[str] = None,
546 | model_config: Optional[dict] = None
547 | ) -> Tuple[torch.nn.Module, int, dict]:
548 | """Instantiates model based on name and size.
549 | """
550 | # instantiate network
551 | model: torch.nn.Module
552 | model_config: dict = model_config or {}
553 | if not bool(model_size or model_config):
554 | raise ValueError('Expected either `model_size` or `model_config`.')
555 | if not (model_size in {'tiny', 'small', 'medium', 'large'}):
556 | raise ValueError('Size must be either "small", "medium", or "large".')
557 | if model_name == 'convtasnet':
558 | if model_config:
559 | model, model_config = init_ctn(**model_config)
560 | else:
561 | model, model_config = init_ctn(**{
562 | 'tiny': dict(H=32, B=8, X=7, R=2),
563 | 'small': dict(H=64, B=16, X=7, R=2),
564 | 'medium': dict(H=128, B=32, X=7, R=2),
565 | 'large': dict(H=256, B=64, X=7, R=2),
566 | }.get(model_size))
567 | elif model_name == 'grunet':
568 | if model_config:
569 | model, model_config = init_gru(**model_config)
570 | else:
571 | model, model_config = init_gru(**{
572 | 'tiny': dict(hidden_size=32, num_layers=2),
573 | 'small': dict(hidden_size=64, num_layers=2),
574 | 'medium': dict(hidden_size=128, num_layers=2),
575 | 'large': dict(hidden_size=256, num_layers=2)
576 | }.get(model_size))
577 | else:
578 | raise ValueError(f'Unsupported model name: "{model_name}".')
579 | model_nparams: int = count_parameters(model)
580 |
581 | return model, model_nparams, model_config
582 |
583 |
584 | def load_checkpoint(
585 | path: Union[str, os.PathLike]
586 | ) -> (torch.nn.Module, dict, int):
587 |
588 | input_path = pathlib.Path(path)
589 | print(input_path)
590 |
591 | # If the path suffix is the PyTorch file extension,
592 | # then it's already a checkpoint
593 | if input_path.is_file() and input_path.suffix == '.pt':
594 | checkpoint_path = str(input_path)
595 |
596 | # If it's a directory, get the latest checkpoint
597 | # from that folder.
598 | elif input_path.is_dir():
599 | try:
600 | m = {
601 | input_path.joinpath('ckpt_best.pt'),
602 | input_path.joinpath('ckpt_last.pt')
603 | }
604 | checkpoints = set(input_path.glob('*.pt'))
605 | if m.issubset(checkpoints):
606 | checkpoints.remove(input_path.joinpath('ckpt_last.pt'))
607 | checkpoint_path = str(max(checkpoints, key=os.path.getctime))
608 | except ValueError:
609 | raise IOError(f'Input directory {str(input_path)} does not contain '
610 | f'checkpoints.')
611 | else:
612 | raise IOError(f'{str(input_path)} is not a checkpoint or directory.')
613 |
614 | # Get the appropriate config file.
615 | config_path = pathlib.Path(checkpoint_path).with_name('config.json')
616 | if not config_path.is_file():
617 | raise IOError(f'Missing config file at {str(input_path)}.')
618 |
619 | # Load the config file.
620 | with open(config_path, 'r') as fp:
621 | config: dict = json.load(fp)
622 |
623 | # Initialize the model
624 | model = init_model(model_name=config.get('model_name'),
625 | model_size=config.get('model_size'))[0]
626 | ckpt = torch.load(checkpoint_path)
627 | num_examples: int = ckpt.get('num_examples')
628 | try:
629 | model.load_state_dict(ckpt.get('model_state_dict'), strict=True)
630 | except RuntimeError as e:
631 | if 'state_dict' in str(e):
632 | raise RuntimeError(f'{str(checkpoint_path)} is a mismatched model.')
633 | model.cuda()
634 |
635 | return model, config, num_examples
636 |
637 |
638 | def count_parameters(network: Any) -> int:
639 | return sum(p.numel() for p in network.parameters() if p.requires_grad)
640 |
641 |
642 |
643 | def test_denoiser_with_speaker(
644 | model: torch.nn.Module,
645 | speaker_id: int = 200,
646 | num_examples_to_save: int = 0
647 | ) -> dict:
648 | no_op_loss = lambda *a, **k: 0
649 | dataset = Mixtures(speaker_id, split_speech='test', split_mixture='test',
650 | snr_mixture=(-5, 5))
651 | batch = dataset(100, seed=0)
652 | results = feedforward(batch.inputs, batch.targets, model,
653 | weights=None, accumulation=True,
654 | loss_reg=no_op_loss, loss_segm=no_op_loss,
655 | test=True, num_examples_to_save=num_examples_to_save)
656 | return results
657 |
658 |
659 | @torch.no_grad()
660 | def test_denoiser_from_module(
661 | model: torch.nn.Module,
662 | data_te: Union[Mixtures, Sequence[Mixtures]],
663 | accumulation: bool = False
664 | ) -> dict:
665 | """Evaluates speech enhancement model using provided dataset.
666 | """
667 | no_op_loss = lambda *a, **k: 0
668 | if not isinstance(data_te, (list, tuple)):
669 | data_te = [data_te]
670 | results = {}
671 | for dataset in data_te:
672 | batch = dataset(100, seed=0)
673 | key = dataset.speaker_ids_repr
674 | results[key] = feedforward(
675 | batch.inputs, batch.targets,
676 | model, weights=None, accumulation=accumulation, test=True,
677 | loss_reg=no_op_loss, loss_segm=no_op_loss
678 | )
679 | return results
680 |
681 |
682 | @torch.no_grad()
683 | def test_denoiser_from_file(
684 | checkpoint_path: Union[str, os.PathLike],
685 | data_te: Union[Mixtures, Sequence[Mixtures]],
686 | accumulation: bool = False
687 | ) -> dict:
688 | """Evaluates speech enhancement model checkpoint using provided dataset.
689 | """
690 | # load a config yaml file which should be in the same location
691 | config_file = pathlib.Path(checkpoint_path).with_name('config.json')
692 | if not config_file.exists():
693 | raise ValueError(f'Could not find {str(config_file)}.')
694 | with open(config_file, 'r') as fp:
695 | config: dict = json.load(fp)
696 |
697 | model = init_model(model_name=config.get('model_name'),
698 | model_size=config.get('model_size'))[0]
699 | ckpt = torch.load(checkpoint_path)
700 | num_examples: int = ckpt.get('num_examples')
701 | model.load_state_dict(ckpt.get('model_state_dict'), strict=True)
702 | model.cuda()
703 |
704 | results = test_denoiser_from_module(model, data_te, accumulation)
705 | results['num_examples'] = num_examples
706 |
707 | return results
708 |
709 |
710 | @torch.no_grad()
711 | def test_denoiser_from_folder(
712 | checkpoint_folder: Union[str, os.PathLike],
713 | data_te: Union[Mixtures, Sequence[Mixtures]],
714 | accumulation: bool = False,
715 | finetune: int = 0,
716 | use_last: bool = False
717 | ):
718 | """Selects speech enhancement model checkpoint from folder, and then
719 | evaluates using provided dataset.
720 | """
721 | finetune_suffix = f'_ft_{int(finetune):02d}' if finetune else ''
722 | # identify the best checkpoint using saved text file
723 | checkpoint_folder = pathlib.Path(checkpoint_folder)
724 | if use_last:
725 | checkpoint_path = checkpoint_folder.joinpath(f'ckpt_last.pt')
726 | elif checkpoint_folder.joinpath(f'ckpt_best{finetune_suffix}.pt').exists():
727 | checkpoint_path = checkpoint_folder.joinpath(
728 | f'ckpt_best{finetune_suffix}.pt')
729 | else:
730 | best_step_file = next(checkpoint_folder.glob('best_step*'))
731 | if not best_step_file.exists():
732 | raise ValueError(f'Could not find {str(best_step_file)}.')
733 | with open(best_step_file, 'r') as fp:
734 | best_step = int(fp.readline())
735 | checkpoint_path = checkpoint_folder.joinpath(
736 | f'ckpt_{best_step:08}{finetune_suffix}.pt')
737 | if not checkpoint_path.exists():
738 | raise IOError(f'{str(checkpoint_path)} does not exist.')
739 |
740 | return test_denoiser_from_file(checkpoint_path, data_te, accumulation)
741 |
742 |
743 | @torch.no_grad()
744 | def test_denoiser(
745 | model: Union[str, os.PathLike, torch.nn.Module],
746 | data_te: Union[Mixtures, Sequence[Mixtures]],
747 | accumulation: bool = False,
748 | finetune: int = 0,
749 | use_last: bool = False
750 | ):
751 | if isinstance(model, torch.nn.Module):
752 | return test_denoiser_from_module(model, data_te, accumulation)
753 | elif isinstance(model, (str, os.PathLike)):
754 | path = pathlib.Path(model)
755 | if path.is_dir():
756 | return test_denoiser_from_folder(path, data_te, accumulation,
757 | finetune, use_last)
758 | elif path.is_file():
759 | return test_denoiser_from_file(path, data_te, accumulation)
760 | else:
761 | raise ValueError(f'{str(path)} does not exist.')
762 | else:
763 | raise ValueError('Expected input to be PyTorch model or filepath.')
764 |
--------------------------------------------------------------------------------
/code/exp_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import torch
4 | import torch.nn.functional
5 | import yaml
6 |
7 |
8 | class EarlyStopping(Exception):
9 | pass
10 |
11 |
12 | class SmokeTest(Exception):
13 | pass
14 |
15 |
16 | class ExperimentError(Exception):
17 | pass
18 |
19 |
20 | def make_2d(x: torch.Tensor):
21 | """Normalize shape of `x` to two dimensions: [batch, time]."""
22 | if isinstance(x, np.ndarray):
23 | x = torch.from_numpy(x)
24 | if x.ndim == 1:
25 | return x.reshape(1, -1)
26 | elif x.ndim == 3:
27 | return x.squeeze(1)
28 | else:
29 | if x.ndim != 2: raise ValueError('Could not force 2d.')
30 | return x
31 |
32 |
33 | def make_3d(x: torch.Tensor):
34 | """Normalize shape of `x` to three dimensions: [batch, n_chan, time]."""
35 | if isinstance(x, np.ndarray):
36 | x = torch.from_numpy(x)
37 | if x.ndim == 1:
38 | return x.reshape(1, 1, -1)
39 | elif x.ndim == 2:
40 | return x.unsqueeze(1)
41 | else:
42 | if x.ndim != 3: raise ValueError('Could not force 3d.')
43 | return x
44 |
45 |
46 | def pad_x_to_y(x: torch.Tensor, y: torch.Tensor, axis: int = -1):
47 | """Right-pad or right-trim first argument to have same size as second argument
48 | Args:
49 | x (torch.Tensor): Tensor to be padded.
50 | y (torch.Tensor): Tensor to pad `x` to.
51 | axis (int): Axis to pad on.
52 | Returns:
53 | torch.Tensor, `x` padded to match `y`'s shape.
54 | """
55 | if axis != -1:
56 | raise NotImplementedError
57 | inp_len = y.shape[axis]
58 | output_len = x.shape[axis]
59 | return torch.nn.functional.pad(x, [0, inp_len - output_len])
60 |
61 |
62 | def shape_reconstructed(reconstructed: torch.Tensor, size: torch.Tensor):
63 | """Reshape `reconstructed` to have same size as `size`
64 | Args:
65 | reconstructed (torch.Tensor): Reconstructed waveform
66 | size (torch.Tensor): Size of desired waveform
67 | Returns:
68 | torch.Tensor: Reshaped waveform
69 | """
70 | if len(size) == 1:
71 | return reconstructed.squeeze(0)
72 | return reconstructed
73 |
74 |
75 | def get_config_from_yaml(yaml_filepath: str):
76 |
77 | if not os.path.exists(yaml_filepath):
78 | raise OSError(f'{yaml_filepath} not found')
79 |
80 | config = {}
81 | with open(yaml_filepath) as fp:
82 | config = yaml.safe_load(fp)
83 | nonlist_keys = (
84 | 'available_devices',
85 | 'num_gpus_per_experiment',
86 | 'num_cpus_per_experiment',
87 | 'output_folder',
88 | 'folder_librispeech',
89 | 'folder_fsd50k',
90 | 'folder_musan',
91 | 'sample_rate',
92 | 'example_duration',
93 | )
94 | for k in config.keys():
95 | if k not in nonlist_keys:
96 | if not isinstance(config[k], list):
97 | config[k] = [config[k],]
98 |
99 | return config
100 |
--------------------------------------------------------------------------------
/code/finetune.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from run import finetune_denoiser
3 |
4 | if __name__ == '__main__':
5 | finetune_denoiser(dataset_duration=float(sys.argv[1]),
6 | checkpoint_locations=sys.argv[2:])
7 |
8 |
--------------------------------------------------------------------------------
/code/requirements.txt:
--------------------------------------------------------------------------------
1 | asteroid==0.6.0
2 | librosa==0.10.0
3 | numpy==1.23.5
4 | pandas==1.5.3
5 | pesq==0.0.4
6 | pystoi==0.3.3
7 | pytorch_lightning==1.9.3
8 | PyYAML==6.0
9 | ray==2.3.0
10 | scipy==1.10.1
11 | soundfile==0.12.1
12 | torch==1.13.1
13 |
--------------------------------------------------------------------------------
/code/run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import copy
3 | import itertools
4 | import json
5 | import os
6 | import socket
7 | import sys
8 | import time
9 | import warnings
10 | from ast import literal_eval
11 | from datetime import datetime
12 | from math import ceil
13 | from pathlib import Path
14 | from typing import Optional, List, Sequence
15 | from typing import Union
16 |
17 | import asteroid.losses
18 | import numpy as np
19 | import torch
20 | import yaml
21 | from pytorch_lightning import seed_everything
22 | from ray import tune
23 | from torch.utils.tensorboard import SummaryWriter
24 |
25 | from exp_data import ContrastiveMixtures, Mixtures
26 | from exp_data import example_duration, sample_rate
27 | from exp_models import SegmentalLoss, SNRPredictor, init_model, load_checkpoint
28 | from exp_models import contrastive_feedforward, feedforward
29 | from exp_utils import EarlyStopping, ExperimentError, SmokeTest
30 |
31 | warnings.filterwarnings('ignore')
32 | torch.backends.cuda.matmul.allow_tf32 = False
33 | torch.backends.cudnn.allow_tf32 = False
34 |
35 | _host = str(socket.gethostname().split('.')[-3:].pop(0))
36 | _snrp_path = Path(__file__).resolve().parent.joinpath('snr_predictor')
37 | _tune_kwargs = dict(
38 | reuse_actors=True,
39 | log_to_file=True,
40 | local_dir='.',
41 | fail_fast=True,
42 | verbose=1
43 | )
44 |
45 | def save_config(
46 | output_directory: Union[str, os.PathLike],
47 | config: dict
48 | ):
49 | """Saves the config dict to file."""
50 | output_directory = Path(output_directory)
51 | with open(output_directory.joinpath('config.json'), 'w',
52 | encoding='utf-8') as fp:
53 | json.dump(config, fp, indent=2, sort_keys=True)
54 | print(yaml.safe_dump(config, default_flow_style=False))
55 |
56 |
57 | # noinspection PyTypeChecker
58 | def train_denoiser(
59 | model_name: str,
60 | model_size: str,
61 | data_tr: Mixtures,
62 | data_vl: Mixtures,
63 | use_loss_purification: bool = False,
64 | lambda_p: float = 1.,
65 | lambda_n: float = 1.,
66 | learning_rate: float = 1e-3,
67 | batch_size: int = 64,
68 | checkpoint_path: Optional[str] = None,
69 | num_examples_validation: int = 1000,
70 | num_examples_minimum: int = 100000,
71 | num_examples_earlystopping: int = 100000,
72 | trial_name: Optional[str] = None,
73 | output_folder: Union[str, os.PathLike] = f'trials_{_host}',
74 | early_stopping_metric: str = 'sisdri',
75 | distance_func: str = 'mse',
76 | called_by_ray: bool = False,
77 | run_smoke_test: bool = False
78 | ) -> str:
79 |
80 | seed_everything(0)
81 |
82 | # prepare model, optimizer, and loss function
83 | current_time = datetime.now().strftime('%b%d_%H-%M-%S')
84 | model, nparams, model_config = init_model(model_name, model_size)
85 | model = model.cuda()
86 | optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
87 | predictor = torch.nn.Identity()
88 | if use_loss_purification:
89 | predictor = SNRPredictor()
90 | predictor.load_state_dict(torch.load(str(_snrp_path)), strict=False)
91 | predictor.cuda()
92 | predictor.eval()
93 |
94 | use_loss_contrastive: bool = bool(isinstance(data_tr, ContrastiveMixtures))
95 | if not type(data_tr) is type(data_vl):
96 | raise ValueError('`data_tr` and `data_vl` should be the same type.')
97 |
98 | # load a previous checkpoint if provided
99 | init_num_examples = 0
100 | output_directory: Optional[Path] = None
101 | is_finetuning = bool(data_tr.dataset_duration or 0)
102 | if checkpoint_path:
103 | # reuse output directory (to pick up experiment where left off)
104 | output_directory = Path(checkpoint_path).parent
105 | ckpt = torch.load(checkpoint_path)
106 | model.load_state_dict(ckpt['model_state_dict'])
107 | # if finetuning a generalist, make a subdirectory
108 | if is_finetuning:
109 | output_directory = output_directory.joinpath(
110 | current_time + '_ft_' + trial_name)
111 | # otherwise, resuming training so reuse the old optimizer
112 | else:
113 | optimizer.load_state_dict(ckpt['optimizer_state_dict'])
114 | init_num_examples = ckpt['num_examples']
115 |
116 | # define experiment configuration
117 | config = {
118 | 'batch_size': batch_size,
119 | 'checkpoint_path': str(checkpoint_path or ''),
120 | 'data_tr': data_tr.__dict__(),
121 | 'data_vl': data_vl.__dict__(),
122 | 'distance_func': distance_func,
123 | 'example_duration': example_duration,
124 | 'lambda_p': lambda_p,
125 | 'lambda_n': lambda_n,
126 | 'learning_rate': learning_rate,
127 | 'model_config': model_config,
128 | 'model_name': model_name,
129 | 'model_nparams': nparams,
130 | 'model_size': model_size,
131 | 'num_examples_minimum': num_examples_minimum,
132 | 'num_examples_earlystopping': num_examples_earlystopping,
133 | 'num_examples_validation': num_examples_validation,
134 | 'sample_rate': sample_rate,
135 | 'speaker_ids': data_tr.speaker_ids_repr,
136 | 'use_loss_contrastive': use_loss_contrastive,
137 | 'use_loss_purification': use_loss_purification,
138 | 'early_stopping_metric': early_stopping_metric,
139 | 'is_finetuning': is_finetuning
140 | }
141 |
142 | # instantiate tensorboard
143 | if called_by_ray:
144 | trial_name = tune.get_trial_name()
145 | if output_directory is None:
146 | output_directory = Path(output_folder).joinpath(
147 | current_time + '_' + trial_name)
148 | writer = SummaryWriter(str(output_directory))
149 | save_config(output_directory, config)
150 |
151 | # begin training (use gradient accumulation for TasNet models)
152 | num_examples: int = init_num_examples
153 | num_validations: int = ceil(num_examples / num_examples_validation)
154 | best_score: float = np.inf * (1 if early_stopping_metric == 'loss' else -1)
155 | best_score_step: int = init_num_examples
156 | use_gradient_accumulation: bool = not bool('grunet' in model_name)
157 | print(f'Output Directory: {str(output_directory)}')
158 |
159 | # define the distance function
160 | if distance_func == 'snr':
161 | distfunc_reg = asteroid.losses.sdr.SingleSrcNegSDR('snr')
162 | distfunc_segm = SegmentalLoss('snr', reduction='none')
163 | elif distance_func == 'sisdr':
164 | distfunc_reg = asteroid.losses.sdr.SingleSrcNegSDR('sisdr')
165 | distfunc_segm = SegmentalLoss('sisdr', reduction='none')
166 | else:
167 | distfunc_reg = torch.nn.MSELoss(reduction='none')
168 | distfunc_segm = SegmentalLoss('mse', reduction='none')
169 |
170 | try:
171 | for num_examples in itertools.count(start=init_num_examples,
172 | step=batch_size):
173 | model.train()
174 | if use_loss_contrastive:
175 |
176 | # pick up a training batch
177 | batch = data_tr(batch_size)
178 | x_1 = batch.inputs_1.cuda()
179 | x_2 = batch.inputs_2.cuda()
180 | p_1 = batch.targets_1.cuda()
181 | p_2 = batch.targets_2.cuda()
182 |
183 | # estimate data purification weights
184 | w_1 = predictor(p_1) if use_loss_purification else None
185 | w_2 = predictor(p_2) if use_loss_purification else None
186 |
187 | # forward propagation
188 | metrics_tr = contrastive_feedforward(
189 | inputs_1=x_1, inputs_2=x_2,
190 | targets_1=p_1, targets_2=p_2,
191 | weights_1=w_1, weights_2=w_2,
192 | lambda_positive=lambda_p, lambda_negative=lambda_n,
193 | loss_reg=distfunc_reg, loss_segm=distfunc_segm,
194 | labels=batch.labels.cuda(),
195 | model=model.cuda(),
196 | accumulation=use_gradient_accumulation,
197 | validation=False)
198 |
199 | else:
200 |
201 | # pick up a training batch
202 | batch = data_tr(batch_size)
203 | x = batch.inputs.cuda()
204 | p = batch.targets.cuda()
205 |
206 | # estimate data purification weights
207 | w = predictor(p) if use_loss_purification else None
208 |
209 | # forward propagation
210 | metrics_tr = feedforward(
211 | inputs=x, targets=p, model=model.train(),
212 | loss_reg=distfunc_reg, loss_segm=distfunc_segm,
213 | weights=w, accumulation=use_gradient_accumulation)
214 |
215 | # update parameters
216 | optimizer.step()
217 | optimizer.zero_grad(set_to_none=True)
218 |
219 | if num_examples < (num_validations * num_examples_validation):
220 | continue
221 |
222 | num_validations += 1
223 | model.eval()
224 |
225 | validation_time: float = 0
226 | if run_smoke_test:
227 | validation_time = time.time()
228 |
229 | with torch.no_grad():
230 |
231 | if use_loss_contrastive:
232 |
233 | # pick up a validation batch
234 | batch = data_vl(batch_size, seed=0)
235 | x_1 = batch.inputs_1.cuda()
236 | x_2 = batch.inputs_2.cuda()
237 | p_1 = batch.targets_1.cuda()
238 | p_2 = batch.targets_2.cuda()
239 |
240 | # estimate data purification weights
241 | w_1 = predictor(p_1) if use_loss_purification else None
242 | w_2 = predictor(p_2) if use_loss_purification else None
243 |
244 | # forward propagation
245 | metrics_vl = contrastive_feedforward(
246 | inputs_1=x_1, inputs_2=x_2,
247 | targets_1=p_1, targets_2=p_2,
248 | weights_1=w_1, weights_2=w_2,
249 | lambda_positive=lambda_p, lambda_negative=lambda_n,
250 | loss_reg=distfunc_reg, loss_segm=distfunc_segm,
251 | labels=batch.labels.cuda(),
252 | model=model.cuda(),
253 | accumulation=use_gradient_accumulation,
254 | validation=True)
255 |
256 | else:
257 |
258 | # pick up a validation batch
259 | batch = data_vl(batch_size, seed=0)
260 | x = batch.inputs.cuda()
261 | p = batch.targets.cuda()
262 |
263 | # estimate data purification weights
264 | w = predictor(p) if use_loss_purification else None
265 |
266 | # forward propagation
267 | metrics_vl = feedforward(
268 | inputs=x, targets=p, model=model.eval(),
269 | loss_reg=distfunc_reg, loss_segm=distfunc_segm,
270 | weights=w, accumulation=use_gradient_accumulation)
271 |
272 | # checkpoint whenever validation score improves
273 | if early_stopping_metric == 'loss':
274 | save_ckpt = bool(metrics_vl['loss']<=best_score)
275 | else:
276 | save_ckpt = bool(metrics_vl['sisdri']>=best_score)
277 |
278 | if save_ckpt:
279 | best_score = metrics_vl[early_stopping_metric]
280 | best_score_step = num_examples
281 | best_state_dict = model.state_dict()
282 | ckpt_path = output_directory.joinpath('ckpt_best.pt')
283 | torch.save({
284 | 'num_examples': num_examples,
285 | 'model_name': model_name,
286 | 'model_config': config,
287 | 'model_state_dict': best_state_dict,
288 | 'optimizer_state_dict': optimizer.state_dict()
289 | }, ckpt_path)
290 | if not called_by_ray:
291 | print(f'Examples: {num_examples:>10},\t'
292 | 'Validation SI-SDRi: '+str(metrics_vl['sisdri']))
293 | step_path = output_directory.joinpath('best_step.txt')
294 | with open(step_path, 'w') as fp:
295 | print(num_examples, file=fp)
296 |
297 | # write summaries
298 | for (k, v) in metrics_tr.items():
299 | if ('_inp' not in k) and ('_enh' not in k):
300 | writer.add_scalar(
301 | f'train/{k}', float(v), num_examples)
302 | for (k, v) in metrics_vl.items():
303 | if ('_inp' not in k) and ('_enh' not in k):
304 | writer.add_scalar(
305 | f'validation/{k}', float(v), num_examples)
306 | writer.add_scalar(
307 | f'validation/vl_score', best_score, num_examples)
308 | if called_by_ray:
309 | _e = early_stopping_metric
310 | tune.report(**{
311 | 'num_examples': num_examples,
312 | f'vl_{_e}': metrics_vl[_e],
313 | f'vl_score': best_score
314 | })
315 |
316 | if num_examples > num_examples_minimum:
317 | if num_examples - best_score_step > num_examples_earlystopping:
318 | raise EarlyStopping()
319 |
320 | if run_smoke_test:
321 | validation_time = time.time() - validation_time
322 | smoke_path = output_directory.joinpath(f'smoke_test.txt')
323 | with open(smoke_path, 'w') as fp:
324 | print('Validation Run-Time (in seconds):'
325 | f' {validation_time}', file=fp)
326 | raise SmokeTest()
327 |
328 | except EarlyStopping:
329 | step_path = output_directory.joinpath(f'early_stopping.txt')
330 | with open(step_path, 'w') as fp:
331 | print(f'{num_examples}\n{best_score_step}\n{best_score}', file=fp)
332 | print(f'Automatically exited after {num_examples_earlystopping} '
333 | f'examples; best model saw {best_score_step} examples.')
334 |
335 | except SmokeTest:
336 | print(f'Exiting due to smoke test.')
337 |
338 | except KeyboardInterrupt:
339 | print(f'Manually exited at {num_examples} examples; best model saw '
340 | f'{best_score_step} examples.')
341 | raise KeyboardInterrupt
342 |
343 | torch.save({
344 | 'num_examples': num_examples,
345 | 'model_name': model_name,
346 | 'model_config': model_config,
347 | 'model_state_dict': model.state_dict(),
348 | 'optimizer_state_dict': optimizer.state_dict()
349 | }, output_directory.joinpath(f'ckpt_last.pt'))
350 |
351 | # close the summary
352 | writer.close()
353 | print(f'Output Directory: {str(output_directory)}')
354 |
355 | # exit the trainer
356 | return
357 |
358 |
359 | def finetune_denoiser(
360 | dataset_duration: float,
361 | checkpoint_locations: Sequence[Union[str, os.PathLike]],
362 | learning_rate: float = 1e-4,
363 | num_examples_validation: int = 1000,
364 | num_examples_earlystopping: int = 10000,
365 | output_folder: Union[str, os.PathLike] = f'finetuning_{_host}',
366 | early_stopping_metric: str = 'sisdri',
367 | distance_func: str = 'mse'
368 | ):
369 | """Finetunes a denoiser, given checkpoint and dataset size.
370 | """
371 | if isinstance(checkpoint_locations, (str, os.PathLike)):
372 | checkpoint_locations = [checkpoint_locations]
373 | for checkpoint_location in checkpoint_locations:
374 |
375 | # Load checkpoint and previous settings from file.
376 | checkpoint_location = checkpoint_location.replace(
377 | 'early_stopping.txt', '')
378 | base_model, config = load_checkpoint(checkpoint_location)
379 | model_name = config.get('model_name')
380 | model_size = config.get('model_size')
381 | batch_size = config.get('batch_size')
382 | config['is_finetuning'] = True
383 | config['dataset_duration'] = dataset_duration
384 | config['learning_rate'] = learning_rate
385 | config['num_examples_validation'] = num_examples_validation
386 | config['num_examples_earlystopping'] = num_examples_earlystopping
387 | config['output_folder'] = output_folder
388 | config['early_stopping_metric'] = early_stopping_metric
389 | config['distance_func'] = distance_func
390 |
391 | # define the distance function
392 | if distance_func == 'snr':
393 | distfunc_reg = asteroid.losses.sdr.SingleSrcNegSDR('snr')
394 | distfunc_segm = SegmentalLoss('snr', reduction='none')
395 | elif distance_func == 'sisdr':
396 | distfunc_reg = asteroid.losses.sdr.SingleSrcNegSDR('sisdr')
397 | distfunc_segm = SegmentalLoss('sisdr', reduction='none')
398 | else:
399 | distfunc_reg = torch.nn.MSELoss(reduction='none')
400 | distfunc_segm = SegmentalLoss('mse', reduction='none')
401 |
402 | # If this is a generalist, loop through all the personalization targets.
403 | # Else, if it is a specialist, this loop will only run once.
404 | try:
405 | speaker_ids = sorted(map(
406 | int, config.get('speaker_ids').strip('][').split(', ')))
407 | config['is_generalist'] = False
408 | except ValueError:
409 | speaker_ids = speaker_ids_te
410 | config['is_generalist'] = True
411 | for speaker_id in speaker_ids:
412 |
413 | current_time = datetime.now().strftime('%b%d_%H-%M-%S')
414 | model = copy.deepcopy(base_model).cuda()
415 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
416 |
417 | data_tr = Mixtures(
418 | speaker_id, split_speech='train', split_mixture='train',
419 | snr_mixture=(-5, 5), dataset_duration=dataset_duration)
420 | data_vl = Mixtures(
421 | speaker_id, split_speech='val', split_mixture='val',
422 | snr_mixture=(-5, 5), dataset_duration=dataset_duration)
423 | config['data_tr'] = data_tr.__dict__()
424 | config['data_vl'] = data_vl.__dict__()
425 | config['speaker_ids'] = data_tr.speaker_ids_repr
426 |
427 | # Instantiate tensorboard
428 | trial_name = '{}_{}_{}p_{}c_{}{:03}_ft{:02}'.format(
429 | model_name, model_size,
430 | 'y' if config.get('use_loss_purification') else 'n',
431 | 'y' if config.get('use_loss_contrastive') else 'n',
432 | 'ge' if config.get('is_generalist') else 'sp',
433 | speaker_id, int(dataset_duration)
434 | )
435 | output_directory = Path(output_folder).joinpath(
436 | current_time + '_' + trial_name)
437 | writer = SummaryWriter(str(output_directory))
438 | save_config(output_directory, config)
439 |
440 | # Begin training
441 | num_examples: int = 0
442 | num_validations: int = 0
443 | best_score: float = np.inf * (1 if early_stopping_metric == 'loss'
444 | else
445 | -1)
446 | best_score_step: int = 0
447 | use_gradient_accumulation: bool = not bool('grunet' in model_name)
448 | print(f'Output Directory: {str(output_directory)}')
449 |
450 | try:
451 | for num_examples in itertools.count(start=0, step=batch_size):
452 |
453 | batch = data_tr(batch_size)
454 |
455 | metrics_tr = feedforward(
456 | batch.inputs, batch.targets, model.train(),
457 | loss_reg=distfunc_reg, loss_segm=distfunc_segm,
458 | accumulation=use_gradient_accumulation)
459 | optimizer.step()
460 | optimizer.zero_grad(set_to_none=True)
461 |
462 | if num_examples < (num_validations*num_examples_validation):
463 | continue
464 |
465 | num_validations += 1
466 | batch = data_vl(batch_size, seed=0)
467 | metrics_vl = feedforward(
468 | batch.inputs, batch.targets, model.eval(),
469 | loss_reg=distfunc_reg, loss_segm=distfunc_segm,
470 | accumulation=use_gradient_accumulation)
471 |
472 | # write summaries
473 | for (k, v) in metrics_tr.items():
474 | if ('_inp' not in k) and ('_enh' not in k):
475 | writer.add_scalar(
476 | f'train/{k}', float(v), num_examples)
477 | for (k, v) in metrics_vl.items():
478 | if ('_inp' not in k) and ('_enh' not in k):
479 | writer.add_scalar(
480 | f'validation/{k}', float(v), num_examples)
481 |
482 | do_save_checkpoint = {
483 | 'loss': bool(metrics_vl['loss'] <= best_score),
484 | 'sisdri': bool(metrics_vl['sisdri'] >= best_score)
485 | }.get(early_stopping_metric, False)
486 |
487 | if do_save_checkpoint:
488 | best_score = {
489 | 'loss': metrics_vl['loss'],
490 | 'sisdri': metrics_vl['sisdri']
491 | }.get(early_stopping_metric, 0)
492 | best_score_step = num_examples
493 | ckpt_path = output_directory.joinpath('ckpt_best.pt')
494 | torch.save({
495 | 'num_examples': num_examples,
496 | 'model_name': model_name,
497 | 'model_config': config,
498 | 'model_state_dict': model.state_dict(),
499 | 'optimizer_state_dict': optimizer.state_dict()
500 | }, ckpt_path)
501 | print(f'Examples: {num_examples:>10},\t'
502 | 'Validation SI-SDRi: '+str(metrics_vl['sisdri']))
503 | step_path = output_directory.joinpath('best_step.txt')
504 | with open(step_path, 'w') as fp:
505 | print(num_examples, file=fp)
506 |
507 | if (num_examples - best_score_step >
508 | num_examples_earlystopping):
509 | raise EarlyStopping()
510 |
511 | except EarlyStopping:
512 | step_path = output_directory.joinpath(f'early_stopping.txt')
513 | with open(step_path, 'w') as fp:
514 | print(f'{num_examples},{best_score_step}', file=fp)
515 | print(f'Automatically exited after {num_examples_earlystopping}'
516 | f' examples; best model saw {best_score_step} examples.')
517 |
518 | writer.close()
519 | print(f'Output Directory: {str(output_directory)}')
520 |
521 | return
522 |
523 |
524 | def parse_arguments(
525 | arg_list: Optional[List[str]] = None
526 | ) -> argparse.Namespace:
527 | """Parses arguments from a list."""
528 | # use system default arguments
529 | if arg_list is None: arg_list = sys.argv[1:]
530 | abs_path = lambda p: Path(p).absolute()
531 |
532 | def t_mixture_snr(string):
533 | try:
534 | return_val = float(string)
535 | except ValueError:
536 | return_val = literal_eval(string)
537 | return return_val
538 |
539 | parser = argparse.ArgumentParser()
540 | parser.add_argument('model_name', type=str)
541 | parser.add_argument('model_size', type=str,
542 | choices={'tiny', 'small', 'medium', 'large'})
543 | parser.add_argument('--speaker_id', type=int, nargs='+', required=False)
544 | parser.add_argument('-b', '--batch_size', type=int, default=64)
545 | parser.add_argument('-l', '--learning_rate', type=float, default=1e-3)
546 | parser.add_argument('--use_loss_purification', action='store_true')
547 | parser.add_argument('--use_loss_contrastive', action='store_true')
548 | parser.add_argument('--lambda_p', type=float, default=1.)
549 | parser.add_argument('--lambda_n', type=float, default=1.)
550 | parser.add_argument('--generalist_frac', type=float, default=1.)
551 | parser.add_argument('--distance_func', type=str,
552 | choices={'mse', 'snr', 'sisdr'}, required=True)
553 | parser.add_argument('--early_stopping_metric', type=str,
554 | choices={'loss', 'sisdri'}, default='sisdri')
555 | parser.add_argument("--premixture_snr",
556 | type=t_mixture_snr, default='(0, 15)')
557 | parser.add_argument("--mixture_snr",
558 | type=t_mixture_snr, default='(-5, 5)')
559 | parser.add_argument('--warm_start', type=abs_path)
560 | parser.add_argument('--trial_suffix', type=str, default='')
561 | parser.add_argument('--output_folder', type=abs_path,
562 | default=abs_path(__file__).parent / f'runs_{_host}')
563 | args = parser.parse_args(arg_list)
564 |
565 | # validate warm start argument
566 | if args.warm_start:
567 | if Path(args.warm_start).suffix != '.pt':
568 | raise IOError('Warm start checkpoint should have extension ".pt".')
569 | if not Path(args.warm_start).is_file():
570 | raise IOError('Warm start checkpoint does not exist.')
571 | args.warm_start = str(args.warm_start)
572 |
573 | # validate speaker IDs
574 | if args.speaker_id:
575 | # check that speaker IDs are valid for personalization experiments
576 | if not set(args.speaker_id).issubset(set(speaker_ids_te)):
577 | raise ExperimentError(
578 | 'Please choose speaker IDs specificed in "speakers/test.csv". '
579 | 'Allowed values are: {}.'.format(speaker_ids_te))
580 | return args
581 |
582 |
583 | def hparam_search_cm(
584 | speaker_id_or_ids: Union[int, Sequence[int]] = 200,
585 | num_cpus: int = 1,
586 | num_gpus: int = 1
587 | ):
588 | # define the hyperparameter search space
589 | search_space = {
590 | 'distance_func': tune.grid_search(['snr',]),
591 | 'use_loss_purification': tune.grid_search([False, True]),
592 | 'lambda_p': tune.grid_search([0, 0.0001, 0.0005, 0.001, 0.005,
593 | 0.01, 0.05, 0.1, 0.5, 1]),
594 | 'lambda_n': tune.grid_search([0, 0.0001, 0.0005, 0.001, 0.005,
595 | 0.01, 0.05, 0.1, 0.5, 1]),
596 | }
597 |
598 | def ray_search_cm(config):
599 | d_tr = ContrastiveMixtures(
600 | speaker_id_or_ids, split_speech='pretrain',
601 | split_premixture='train', snr_premixture=(0, 15),
602 | split_mixture='train', snr_mixture=(-5, 5))
603 | d_vl = ContrastiveMixtures(
604 | speaker_id_or_ids, split_speech='preval',
605 | split_premixture='val', snr_premixture=(0, 15),
606 | split_mixture='val', snr_mixture=(-5, 5))
607 | train_denoiser(
608 | model_name='convtasnet',
609 | model_size='small',
610 | data_tr=d_tr,
611 | data_vl=d_vl,
612 | use_loss_purification=config['use_loss_purification'],
613 | lambda_p=config['lambda_p'],
614 | lambda_n=config['lambda_n'],
615 | output_folder='.',
616 | distance_func=config['distance_func'],
617 | called_by_ray=True,
618 | )
619 | return
620 |
621 | analysis = tune.run(
622 | ray_search_cm,
623 | name='ray_search_cm',
624 | config=search_space,
625 | resources_per_trial={'cpu': num_cpus, 'gpu': num_gpus},
626 | reuse_actors=True,
627 | log_to_file=True,
628 | local_dir='.',
629 | fail_fast=True,
630 | verbose=1
631 | )
632 | ts = datetime.now().strftime('%b%d_%H-%M-%S')
633 | analysis.results_df.to_csv(f'ray_search_cm/results_{ts}.csv')
634 | return
635 |
636 |
637 | def hparam_search_df(
638 | speaker_id_or_ids: Union[int, Sequence[int]] = 200,
639 | num_cpus: int = 1,
640 | num_gpus: int = 1
641 | ):
642 | # define the hyperparameter search space
643 | search_space = {
644 | 'model_size': tune.grid_search(['tiny', 'small', 'medium', 'large']),
645 | 'distance_func': tune.grid_search(['mse', 'snr', 'sisdr']),
646 | 'use_loss_purification': tune.grid_search([False, True]),
647 | }
648 |
649 | def ray_search_distance_func(config):
650 | d_tr = Mixtures(
651 | speaker_id_or_ids, split_speech='pretrain',
652 | split_premixture='train', snr_premixture=(0, 15),
653 | split_mixture='train', snr_mixture=(-5, 5))
654 | d_vl = Mixtures(
655 | speaker_id_or_ids, split_speech='preval',
656 | split_premixture='val', snr_premixture=(0, 15),
657 | split_mixture='val', snr_mixture=(-5, 5))
658 | train_denoiser(
659 | model_name='convtasnet',
660 | model_size=config['model_size'],
661 | data_tr=d_tr,
662 | data_vl=d_vl,
663 | use_loss_purification=config['use_loss_purification'],
664 | output_folder='.',
665 | distance_func=config['distance_func'],
666 | called_by_ray=True,
667 | )
668 | return
669 |
670 | analysis = tune.run(
671 | ray_search_distance_func,
672 | name='ray_search_distance_func',
673 | config=search_space,
674 | resources_per_trial={'cpu': num_cpus, 'gpu': num_gpus},
675 | reuse_actors=True,
676 | log_to_file=True,
677 | local_dir='.',
678 | fail_fast=True,
679 | verbose=1
680 | )
681 | ts = datetime.now().strftime('%b%d_%H-%M-%S')
682 | analysis.results_df.to_csv(f'ray_search_distance_func/results_{ts}.csv')
683 | return
684 |
685 |
--------------------------------------------------------------------------------
/code/snr_predictor:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IU-SAIGE/pse/e9bf8f73f0d4a9a53dc1c0d19dd9b9b3d8979ce8/code/snr_predictor
--------------------------------------------------------------------------------
/code/speakers/test.csv:
--------------------------------------------------------------------------------
1 | speaker_id,speaker_name,gender
2 | 19,Kara Shallenberg,F
3 | 26,Denny Sayers,M
4 | 39,Sherry Crowther,F
5 | 40,Vicki Barbour,F
6 | 78,Hugh McGuire,M
7 | 83,Catharine Eastman,F
8 | 87,Rosalind Wills,F
9 | 89,Kristen McQuillin,F
10 | 118,Alex Buie,M
11 | 125,Claire Goget,F
12 | 163,Andrew Miller,M
13 | 196,Stewart Wills,M
14 | 198,Heather Barnett,F
15 | 200,Maureen S. O'Brien,F
16 | 201,Joplin James,M
17 | 250,Mary Reagan,F
18 | 254,Alan Davis Drake,M
19 | 307,Randy Phillips,M
20 | 405,Eric Dennison,M
21 | 446,Steve Hartzog,M
22 |
23 |
--------------------------------------------------------------------------------
/code/speakers/train.csv:
--------------------------------------------------------------------------------
1 | speaker_id,speaker_name,gender
2 | 27,Sean McKinley,M
3 | 32,Betsie Bush,F
4 | 60,|CBW|Simon,M
5 | 103,Karen Savage,F
6 | 150,Fox in the Stars,F
7 | 211,shanda_w,F
8 | 226,Deb Bacon-Ziegler,F
9 | 229,carnright,M
10 | 233,Steve Karafit,M
11 | 248,Becky Miller,F
12 | 289,Barbara Wedge,F
13 | 311,deadwhitemales,M
14 | 328,Elizabeth Palmer,F
15 | 332,Aaron Teiser,M
16 | 374,kumarei,M
17 | 403,Nocturna,F
18 | 412,Brian Roberg,M
19 | 441,Sandra,F
20 | 445,Dave Foss,M
21 | 458,Scott Splavec,M
22 | 460,Dave Ranson,M
23 | 481,Neal Foley,M
24 | 625,toriasuncle,M
25 | 669,Anne,F
26 | 696,Tamara R. Schwartz,F
27 | 831,Nick Gallant,M
28 | 839,rovert405,M
29 | 909,Greg Bryant,M
30 | 911,frankjf,M
31 | 1034,Kevin O'Coin,M
32 | 1040,John Garvin,M
33 | 1069,Dawn,F
34 | 1081,Fracture,M
35 | 1088,Christabel,F
36 | 1098,Merryb,F
37 | 1183,roolynninms,F
38 | 1246,Sandra,F
39 | 1334,John Schell,M
40 | 1355,Chris Gladis,M
41 | 1363,Tammy Sanders,F
42 | 1447,Luigina,F
43 | 1455,webslog,M
44 | 1553,Mim Ritty,F
45 | 1578,Lorelle Anderson,F
46 | 1624,Daniel Shorten,M
47 | 1737,Erin Hastings,F
48 | 1743,Bryan Ness,M
49 | 1841,Laura Caldwell,F
50 | 1898,Jennifer,F
51 | 1926,Nikki Sullivan,F
52 | 1963,Belinda Brown,F
53 | 1970,Dawn Larsen,F
54 | 1992,Michelle White,F
55 | 2002,Larry Maddocks,M
56 | 2007,Sheila Morton,F
57 | 2092,Elaine Hamby,F
58 | 2136,Great Plains,M
59 | 2182,Susan Umpleby,F
60 | 2196,Andrea Fiore,F
61 | 2384,Ger,M
62 | 2391,treefingers,F
63 | 2416,Julia Albath,F
64 | 2514,S. Young,M
65 | 2691,Donna Stewart,F
66 | 2764,Piper Hale,F
67 | 2817,Catherine Millward,F
68 | 2836,Linda McDaniel,F
69 | 2843,ricell,M
70 | 2893,Ryan Sutter,M
71 | 2910,Janna,F
72 | 2989,Jamie Strassenburg,F
73 | 3112,Jessica Louise,F
74 | 3168,David Anton,M
75 | 3214,fourteatoo,M
76 | 3235,Karen Commins,F
77 | 3240,flakker,M
78 | 3242,peac,M
79 | 3259,Kate West,F
80 | 3374,Craig Campbell,M
81 | 3436,Anders Lankford,M
82 | 3440,Heidi Will,F
83 | 3486,Robin Balmer,M
84 | 3526,Bereni,F
85 | 3607,Richard Wallis,M
86 | 3664,Barry Eads,M
87 | 3699,Bruce Pirie,M
88 | 3723,Kevin Lavin,M
89 | 3807,Jesse Noar,M
90 | 3830,rymd80,M
91 | 3857,Epistomolus,M
92 | 3879,Keneva,F
93 | 3947,johnell,F
94 | 3982,Kate Adams,F
95 | 3983,lavocedorata,F
96 | 4014,Tom Clifton,M
97 | 4018,Nicholas Clifford,M
98 | 4051,Liz Devens,F
99 | 4088,Blazin48,F
100 | 4137,Sarah LuAnn,F
101 | 4160,Rosie,F
102 | 4195,bj,F
103 | 4214,A. Janelle Risa,F
104 | 4267,Ric F,M
105 | 4297,Tina Horning,F
106 | 4340,kiwafruit,F
107 | 4362,Michelle Montano,F
108 | 4397,John Dennison,M
109 | 4406,Matthew Scott Surprenant,M
110 | 4441,William Peck,M
111 | 4481,margo zinberg,F
112 | 4640,Karen Mason,F
113 | 4680,pachayes,F
114 | 4788,Bill Boerst,M
115 | 4813,Steve Mattern,M
116 | 4830,George Aalto,M
117 | 4853,Barbara Derksen,F
118 | 4859,nathank,M
119 | 4898,greatbasinrain,M
120 | 5022,Kathleen Costa,F
121 | 5049,Bradley Smith,M
122 | 5104,Chuck Burke,M
123 | 5163,LilyAnne,F
124 | 5192,Jason Esteves,M
125 | 5322,Jay Bidal,M
126 | 5339,Lauren McCullough,F
127 | 5390,Charles Bice,M
128 | 5393,Amy Hengst,F
129 | 5456,e_scarab,M
130 | 5463,GLM,M
131 | 5514,Ella Jane Quentin,F
132 | 5561,Ellen Jones,F
133 | 5652,amicrazy2u,F
134 | 5678,jgoffena,M
135 | 5688,Jennifer Dionne,F
136 | 5703,Garth Comira,M
137 | 5750,laurencetrask,M
138 | 5778,Laura Victoria,F
139 | 5789,Kirsten Wever,F
140 | 5808,jeandelfrio,M
141 | 5867,Sharon Omi,F
142 | 6000,MissRose,F
143 | 6019,DerekP,M
144 | 6064,Deborah Knight,F
145 | 6078,dobsonfly,F
146 | 6081,Lazuli,M
147 | 6147,Liberty Stump,F
148 | 6181,Mike,M
149 | 6209,deckerteach,M
150 | 6272,jlenardon,F
151 | 6367,Vince Dee,M
152 | 6385,Novella Serena,F
153 | 6415,Daryl Wor,F
154 | 6437,John Hoerr,M
155 | 6454,David Wales,M
156 | 6476,Viridian,F
157 | 6529,Fred DeBerardinis,M
158 | 6531,janesandberg,F
159 | 6563,William Tomcho,M
160 | 6818,beckyboyd,F
161 | 6836,John,M
162 | 6848,KarlHenning,M
163 | 6880,Capybara,M
164 | 6925,Thomas Meaney,M
165 | 7059,Joannemmp,F
166 | 7067,Matthew Wall,M
167 | 7078,Mary in Arkansas,F
168 | 7113,Sukaina Jaffer,F
169 | 7148,Vickie Ranz,F
170 | 7178,J.K. Neely,F
171 | 7190,Tony Posante,M
172 | 7226,Jonathan Moore,M
173 | 7264,Sean McClain,M
174 | 7278,Jon Smith,M
175 | 7302,Asta1234,F
176 | 7312,nkneer,M
177 | 7367,NIneFive83,M
178 | 7402,Canby Ibarra,M
179 | 7447,dasbury,M
180 | 7505,Ron Lockhart,M
181 | 7511,Sherri Vance,F
182 | 7517,Raz Mason,F
183 | 7635,Judy Guinan,F
184 | 7780,tazzle,F
185 | 7794,mlcui,F
186 | 7800,Arie,F
187 | 7859,xinamarieuhl,F
188 | 8014,constatine,F
189 | 8051,Maria Kasper,F
190 | 8063,Robert Snoza,M
191 | 8088,Jason Bolestridge,M
192 | 8095,Theodulf,M
193 | 8098,Arnold,M
194 | 8108,drakaunus,M
195 | 8123,Sheila Wood,F
196 | 8226,Adam Picot,M
197 | 8238,Madam Fickle,F
198 | 8312,Jaimie Noy,F
199 | 8324,Kathy Wright,F
200 | 8419,Jon Kissack,M
201 | 8425,Larry Wilson,M
202 | 8465,TinaNygard2,F
203 | 8468,Jennifer Dorr,F
204 | 8580,Gary Dana,M
205 | 8609,noblesavage,M
206 | 8629,Shivansh Dhar,M
207 | 8630,Eduardo,M
208 | 8747,DeanOBuchanan,M
209 | 8770,Paul Simonin,M
210 | 8797,Sean Grabosky,M
211 | 8838,Kevin Owens,M
212 | 8975,Daisy Flaim,F
--------------------------------------------------------------------------------
/code/speakers/validation.csv:
--------------------------------------------------------------------------------
1 | speaker_id,speaker_name,gender
2 | 298,Caroline Morse,F
3 | 302,Chris Peterson,F
4 | 322,Elisabeth Shields,F
5 | 426,Norah Piehl,F
6 | 587,Joy Scaglione,F
7 | 730,Karen Labenz,F
8 | 887,Lana Taylor,F
9 | 1116,Megan Stemm-Wade,F
10 | 1235,Tim Gregory,M
11 | 1263,Leonie Rose,F
12 | 1502,Ann Boyer,F
13 | 1594,Jon Scott Jones,M
14 | 1723,Rob Whelan,M
15 | 1867,Rowdy Delaney,M
16 | 2159,Matthew Westra,M
17 | 2289,David Kleparek,M
18 | 2436,Seth Adam Sher,M
19 | 2518,Rob Powell,M
20 | 2911,David Lawrence,M
21 | 2952,Scott Carpenter,M
22 |
--------------------------------------------------------------------------------
/code/test.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import re
3 | import sys
4 | from datetime import datetime
5 |
6 | import torch
7 | from ray import tune
8 |
9 | from exp_data import Mixtures
10 | from exp_models import load_checkpoint, feedforward
11 |
12 |
13 | def get_timestamp() -> str:
14 | # format_str = "%A, %d %b %Y %H:%M:%S %p"
15 | format_str = "%Y_%b_%d"
16 | result = str(datetime.now().strftime(format_str))
17 | return result
18 |
19 |
20 | def no_op_loss(*args, **kwargs):
21 | return 0
22 |
23 |
24 | @torch.no_grad()
25 | def test_function(
26 | filepath: str,
27 | print_to_console: bool = True,
28 | write_to_file: bool = True,
29 | called_by_tune: bool = True
30 | ):
31 | use_gradient_accumulation = not bool('grunet' in filepath)
32 | filepath = pathlib.Path(filepath.strip().replace('early_stopping.txt', ''))
33 |
34 | # load the experiment configuration (should be in the same directory)
35 | model, config, num_examples = load_checkpoint(filepath)
36 |
37 | # indentify the personalization target (if there is one)
38 | # and prepare the speaker-specific test sets
39 | speaker_id = 200
40 | if 'ray' not in str(filepath):
41 | try:
42 | match = re.match(r'.*_(sp|ge)(\d\d\d).*', str(filepath))
43 | speaker_id = int(match.group(2))
44 | except AttributeError:
45 | raise NotImplementedError('need to add support for generalists')
46 | dataset = Mixtures(speaker_id,
47 | split_speech='test',
48 | split_mixture='test',
49 | snr_mixture=(-5, 5))
50 |
51 | # run the test
52 | batch = dataset(100, seed=0)
53 | results = feedforward(batch.inputs, batch.targets, model,
54 | weights=None, accumulation=use_gradient_accumulation,
55 | test=True, loss_reg=no_op_loss, loss_segm=no_op_loss)
56 |
57 | if print_to_console:
58 | print(f'{filepath} ({num_examples}),{results}')
59 | if write_to_file:
60 | with open('log.txt', 'a') as op:
61 | print(f'{filepath} ({num_examples}),{results}', file=op)
62 | if called_by_tune:
63 | tune.report(**results)
64 | return
65 | else:
66 | return results
67 |
68 |
69 | def main(use_tune: bool = False):
70 |
71 | if len(sys.argv) > 1:
72 | folders = sys.argv[1:]
73 | else:
74 | p = pathlib.Path('/N/u/asivara/2022-jstsp/0408_hparams_cm').rglob(
75 | 'ckpt_best.pt')
76 | folders = [str(f) for f in p if '619b6' in str(f)]
77 | # else:
78 | # raise ValueError('Expected subsequent arguments to be checkpoint paths '
79 | # 'or directory.')
80 |
81 | def test_wrapper(config):
82 | return test_function(
83 | filepath=config['filepath'],
84 | called_by_tune=True
85 | )
86 |
87 | if use_tune:
88 | tune.run(
89 | test_wrapper,
90 | config={
91 | 'filepath': tune.grid_search(folders)
92 | },
93 | resources_per_trial={'cpu': 1, 'gpu': 0.25},
94 | local_dir=f'test_results-({get_timestamp()})'
95 | )
96 | pass
97 | else:
98 | for f in folders:
99 | test_function(f, called_by_tune=False)
100 |
101 |
102 | if __name__ == '__main__':
103 | main(False)
104 |
--------------------------------------------------------------------------------
/code/train_generalists.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import sys
4 | import pandas as pd
5 | from ray import tune
6 |
7 | from datetime import datetime
8 | from exp_data import Mixtures
9 | from exp_utils import get_config_from_yaml
10 | from run import train_denoiser
11 |
12 |
13 | ROOT_DIR = os.path.dirname(os.path.realpath(sys.argv[0]))
14 |
15 |
16 | def train_generalist(config: dict):
17 |
18 | speaker_ids_tr = pd.read_csv(
19 | ROOT_DIR+'/speakers/train.csv')['speaker_id'].to_list()
20 | speaker_ids_vl = pd.read_csv(
21 | ROOT_DIR+'/speakers/validation.csv')['speaker_id'].to_list()
22 |
23 | data_train = Mixtures(
24 | speaker_ids_tr,
25 | config['folder_librispeech'],
26 | None,
27 | config['folder_musan'],
28 | frac_speech=config.get('generalist_frac', 1),
29 | split_mixture='train',
30 | snr_mixture=(-5, 5)
31 | )
32 | data_validation = Mixtures(
33 | speaker_ids_vl,
34 | config['folder_librispeech'],
35 | None,
36 | config['folder_musan'],
37 | split_mixture='val',
38 | snr_mixture=(-5, 5)
39 | )
40 |
41 | train_denoiser(
42 | model_name=config['model_name'],
43 | model_size=config['model_size'],
44 | distance_func=config['distance_func'],
45 | data_tr=data_train,
46 | data_vl=data_validation,
47 | learning_rate=config['learning_rate'],
48 | batch_size=config['batch_size'],
49 | output_folder=config['output_folder'],
50 | called_by_ray=True,
51 | run_smoke_test=config['run_smoke_test']
52 | )
53 |
54 | return
55 |
56 |
57 | if __name__ == '__main__':
58 |
59 | parser = argparse.ArgumentParser()
60 | parser.add_argument("-t", "--run_smoke_test",
61 | help="check if a single training iteration runs succesfully",
62 | action="store_true")
63 | args = parser.parse_args()
64 |
65 | config = get_config_from_yaml(ROOT_DIR+'/conf_generalists.yaml')
66 | os.environ['CUDA_VISIBLE_DEVICES'] = config['available_devices']
67 |
68 | analysis = tune.run(
69 | train_generalist,
70 | name='train_generalist',
71 | config={
72 | 'model_name': tune.grid_search(config['model_name']),
73 | 'model_size': tune.grid_search(config['model_size']),
74 | 'distance_func': tune.grid_search(config['distance_func']),
75 | 'batch_size': tune.grid_search(config['batch_size']),
76 | 'learning_rate': tune.grid_search(config['learning_rate']),
77 | 'folder_librispeech': config['folder_librispeech'],
78 | 'folder_musan': config['folder_musan'],
79 | 'sample_rate': config['sample_rate'],
80 | 'example_duration': config['example_duration'],
81 | 'output_folder': config['output_folder'],
82 | 'run_smoke_test': args.run_smoke_test
83 | },
84 | resources_per_trial={
85 | 'cpu': config['num_cpus_per_experiment'],
86 | 'gpu': config['num_gpus_per_experiment']
87 | },
88 | reuse_actors=True,
89 | log_to_file=True,
90 | local_dir=config['output_folder'],
91 | fail_fast=True,
92 | verbose=3
93 | )
94 | ts = datetime.now().strftime('%b%d_%H-%M-%S')
95 | output_filepath = os.path.join(
96 | config['output_folder'], f'train_generalist/results_{ts}.csv')
97 | analysis.results_df.to_csv(output_filepath)
98 | print('Completed training generalist(s).')
99 |
100 |
--------------------------------------------------------------------------------
/code/train_specialists.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import sys
4 | from ray import tune
5 |
6 | from datetime import datetime
7 | from exp_data import ContrastiveMixtures, Mixtures
8 | from exp_utils import get_config_from_yaml
9 | from run import train_denoiser
10 |
11 |
12 | ROOT_DIR = os.path.dirname(os.path.realpath(sys.argv[0]))
13 |
14 |
15 | def train_specialist(config: dict):
16 |
17 | data_class = Mixtures
18 | if config.get('use_loss_contrastive', False):
19 | data_class = ContrastiveMixtures
20 |
21 | data_train = data_class(
22 | config['speaker_id'],
23 | config['folder_librispeech'],
24 | config['folder_fsd50k'],
25 | config['folder_musan'],
26 | split_speech='pretrain',
27 | split_premixture='train',
28 | snr_premixture=(0, 15),
29 | split_mixture='train',
30 | snr_mixture=(-5, 5)
31 | )
32 | data_validation = data_class(
33 | config['speaker_id'],
34 | config['folder_librispeech'],
35 | config['folder_fsd50k'],
36 | config['folder_musan'],
37 | split_speech='preval',
38 | split_premixture='val',
39 | snr_premixture=(0, 15),
40 | split_mixture='val',
41 | snr_mixture=(-5, 5)
42 | )
43 |
44 | train_denoiser(
45 | model_name=config['model_name'],
46 | model_size=config['model_size'],
47 | distance_func=config['distance_func'],
48 | data_tr=data_train,
49 | data_vl=data_validation,
50 | learning_rate=config['learning_rate'],
51 | use_loss_purification=config['use_loss_purification'],
52 | batch_size=config['batch_size'],
53 | output_folder=config['output_folder'],
54 | called_by_ray=True,
55 | run_smoke_test=config['run_smoke_test']
56 | )
57 |
58 | return
59 |
60 |
61 | if __name__ == '__main__':
62 |
63 | parser = argparse.ArgumentParser()
64 | parser.add_argument("-t", "--run_smoke_test",
65 | help="check if a single training iteration runs succesfully",
66 | action="store_true")
67 | args = parser.parse_args()
68 |
69 | config = get_config_from_yaml(ROOT_DIR+'/conf_specialists.yaml')
70 | os.environ['CUDA_VISIBLE_DEVICES'] = config['available_devices']
71 |
72 | analysis = tune.run(
73 | train_specialist,
74 | name='train_specialist',
75 | config={
76 | 'model_name': tune.grid_search(config['model_name']),
77 | 'model_size': tune.grid_search(config['model_size']),
78 | 'distance_func': tune.grid_search(config['distance_func']),
79 | 'speaker_id': tune.grid_search(config['speaker_id']),
80 | 'use_loss_contrastive': tune.grid_search(
81 | config['use_loss_contrastive']),
82 | 'use_loss_purification': tune.grid_search(
83 | config['use_loss_purification']),
84 | 'batch_size': tune.grid_search(config['batch_size']),
85 | 'learning_rate': tune.grid_search(config['learning_rate']),
86 | 'folder_librispeech': config['folder_librispeech'],
87 | 'folder_fsd50k': config['folder_fsd50k'],
88 | 'folder_musan': config['folder_musan'],
89 | 'sample_rate': config['sample_rate'],
90 | 'example_duration': config['example_duration'],
91 | 'output_folder': config['output_folder'],
92 | 'run_smoke_test': args.run_smoke_test
93 | },
94 | resources_per_trial={
95 | 'cpu': config['num_cpus_per_experiment'],
96 | 'gpu': config['num_gpus_per_experiment']
97 | },
98 | reuse_actors=True,
99 | log_to_file=True,
100 | local_dir=config['output_folder'],
101 | fail_fast=True,
102 | verbose=3
103 | )
104 | ts = datetime.now().strftime('%b%d_%H-%M-%S')
105 | output_filepath = os.path.join(
106 | config['output_folder'], f'train_specialist/results_{ts}.csv')
107 | analysis.results_df.to_csv(output_filepath)
108 | print('Completed training specialist(s).')
109 |
110 |
--------------------------------------------------------------------------------
/docs/images/pse_ssl_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IU-SAIGE/pse/e9bf8f73f0d4a9a53dc1c0d19dd9b9b3d8979ce8/docs/images/pse_ssl_overview.png
--------------------------------------------------------------------------------
/docs/images/waveforms_cm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IU-SAIGE/pse/e9bf8f73f0d4a9a53dc1c0d19dd9b9b3d8979ce8/docs/images/waveforms_cm.png
--------------------------------------------------------------------------------
/docs/images/waveforms_pseudose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IU-SAIGE/pse/e9bf8f73f0d4a9a53dc1c0d19dd9b9b3d8979ce8/docs/images/waveforms_pseudose.png
--------------------------------------------------------------------------------