├── .bumpversion.cfg ├── .coveragerc ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── DESCRIPTION.rst ├── LICENSE ├── README.md ├── doc ├── configurable.md ├── examples.md ├── padertorch.svg ├── sacred.md └── virtual_batch_size_multi_gpu.md ├── jenkins.bash ├── maintenance.md ├── padertorch ├── __init__.py ├── base.py ├── configurable.py ├── contrib │ ├── __init__.py │ ├── cb │ │ ├── __init__.py │ │ ├── array.py │ │ ├── complex.py │ │ ├── data.py │ │ ├── feature_extractor.py │ │ ├── hooks.py │ │ ├── io.py │ │ ├── summary.py │ │ ├── tensorboard_symlink_tree.py │ │ ├── track.py │ │ └── transform.py │ ├── data │ │ ├── __init__.py │ │ ├── utils.py │ │ └── wsj0_mix │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── create_json.py │ │ │ └── prepare_data.sh │ ├── examples │ │ ├── __init__.py │ │ ├── audio_synthesis │ │ │ └── wavenet │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── data.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ └── train.py │ │ ├── examples.md │ │ ├── sound_recognition │ │ │ ├── __init__.py │ │ │ └── audio_tagging │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── data.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ └── train.py │ │ ├── source_localization │ │ │ └── distance_estimator │ │ │ │ ├── Makefile │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── create_jsons.py │ │ │ │ ├── data.py │ │ │ │ ├── download.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ └── train.py │ │ ├── source_separation │ │ │ ├── __init__.py │ │ │ ├── or_pit │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ ├── templates.py │ │ │ │ └── train.py │ │ │ ├── pit │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── data.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ ├── templates.py │ │ │ │ └── train.py │ │ │ └── tasnet │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ ├── tas_coders.py │ │ │ │ ├── templates.py │ │ │ │ └── train.py │ │ ├── speaker_classification │ │ │ └── supervised │ │ │ │ ├── README.md │ │ │ │ ├── data.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ └── train.py │ │ ├── speech_enhancement │ │ │ ├── __init__.py │ │ │ └── mask_estimator │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── evaluate.py │ │ │ │ ├── model.py │ │ │ │ └── train.py │ │ └── toy_examples │ │ │ ├── configurable │ │ │ ├── __init__.py │ │ │ ├── configurable.py │ │ │ └── shared_parameter.py │ │ │ ├── mnist │ │ │ └── mnist_example.py │ │ │ └── multi_gpu │ │ │ └── train.py │ ├── je │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── filters.py │ │ │ ├── transforms.py │ │ │ └── utils.py │ │ ├── hooks │ │ │ └── swa.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ └── clf.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── augment.py │ │ │ ├── conv.py │ │ │ ├── conv_utils.py │ │ │ ├── features.py │ │ │ ├── hybrid.py │ │ │ ├── reduce.py │ │ │ ├── rnn.py │ │ │ └── transformer.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_conv.py │ ├── jensheit │ │ ├── __init__.py │ │ ├── base.py │ │ ├── batch.py │ │ ├── data.py │ │ ├── eval_sad.py │ │ ├── evaluation.py │ │ ├── mask_estimator_example │ │ │ ├── __init__.py │ │ │ ├── model.py │ │ │ └── modul.py │ │ ├── norm.py │ │ ├── tests │ │ │ ├── test_mask_estimator.py │ │ │ └── test_utils.py │ │ ├── train_convtasnet.py │ │ └── utils.py │ ├── ldrude │ │ ├── __init__.py │ │ ├── data.py │ │ └── utils.py │ ├── mk │ │ ├── __init__.py │ │ ├── alignments.py │ │ ├── io.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── contrastive.py │ │ │ └── features │ │ │ │ ├── __init__.py │ │ │ │ ├── ssl │ │ │ │ ├── __init__.py │ │ │ │ ├── hubert.py │ │ │ │ └── wav2vec2.py │ │ │ │ └── timefreq.py │ │ ├── synthesis │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── parametric │ │ │ │ ├── __init__.py │ │ │ │ └── griffin_lim.py │ │ │ └── vocoder │ │ │ │ ├── __init__.py │ │ │ │ ├── bigvgan.py │ │ │ │ ├── nvidia_bigvgan │ │ │ │ ├── __init__.py │ │ │ │ ├── activations.py │ │ │ │ ├── alias_free_activation │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── activation1d.py │ │ │ │ │ │ ├── anti_alias_activation.cpp │ │ │ │ │ │ ├── anti_alias_activation_cuda.cu │ │ │ │ │ │ ├── compat.h │ │ │ │ │ │ ├── load.py │ │ │ │ │ │ └── type_shim.h │ │ │ │ │ └── torch │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ ├── filter.py │ │ │ │ │ │ └── resample.py │ │ │ │ ├── bigvgan.py │ │ │ │ ├── env.py │ │ │ │ ├── meldataset.py │ │ │ │ └── utils.py │ │ │ │ └── pwg.py │ │ ├── tbx_utils.py │ │ ├── typing.py │ │ └── visualization.py │ ├── neumann │ │ ├── __init__.py │ │ ├── chunking.py │ │ └── evaluation.py │ └── tcl │ │ ├── __init__.py │ │ ├── dc.py │ │ ├── speaker_embeddings │ │ ├── __init__.py │ │ ├── dvectors.py │ │ ├── eer_metrics.py │ │ ├── loss.py │ │ ├── student_embeddings.py │ │ └── teacher_student.py │ │ └── utils │ │ └── augmentation.py ├── data │ ├── __init__.py │ ├── batch.py │ ├── segment.py │ └── utils.py ├── io.py ├── modules │ ├── __init__.py │ ├── convnet.py │ ├── dual_path_rnn.py │ ├── fully_connected.py │ ├── normalization.py │ ├── recurrent.py │ └── wavenet │ │ ├── __init__.py │ │ ├── nv_wavenet │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── build.py │ │ ├── matrix.cpp │ │ ├── matrix.h │ │ ├── matrix_math.cuh │ │ ├── nv_wavenet.cuh │ │ ├── nv_wavenet.py │ │ ├── nv_wavenet_conversions.cuh │ │ ├── nv_wavenet_dualblock.cuh │ │ ├── nv_wavenet_persistent.cuh │ │ ├── nv_wavenet_singleblock.cuh │ │ ├── nv_wavenet_util.cuh │ │ ├── softmax.cuh │ │ ├── wavenet_infer.cu │ │ ├── wavenet_infer.h │ │ └── wavenet_infer_wrapper.cpp │ │ └── wavenet.py ├── ops │ ├── __init__.py │ ├── _stft.py │ ├── einsum.py │ ├── losses │ │ ├── __init__.py │ │ ├── classification.py │ │ ├── kl_divergence.py │ │ ├── regression.py │ │ └── source_separation.py │ ├── mappings.py │ ├── mu_law.py │ ├── sequence │ │ ├── __init__.py │ │ ├── mask.py │ │ ├── pack_module.py │ │ ├── pointwise.py │ │ └── reduction.py │ └── tensor.py ├── summary │ ├── __init__.py │ ├── model_info.py │ ├── tbx_utils.py │ └── tfevents.py ├── testing │ ├── __init__.py │ └── test_db.py ├── train │ ├── __init__.py │ ├── hooks.py │ ├── optimizer.py │ ├── runtime_tests.py │ ├── trainer.py │ └── trigger.py └── utils.py ├── pyproject.toml ├── pytest.ini ├── setup.py └── tests ├── __init__.py ├── contrib └── __init__.py ├── test_configurable.py ├── test_data └── test_segmenter.py ├── test_models ├── __init__.py └── test_bss.py ├── test_modules ├── __init__.py └── test_norm.py ├── test_ops ├── __init__.py ├── test_losses.py ├── test_sequence.py └── test_stft.py ├── test_summary └── test_tbx_utils.py └── test_train ├── test_hooks.py ├── test_optimizer.py ├── test_runtime_tests.py └── test_trainer.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.0.1 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = padertorch/contrib/* 3 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | fail-fast: false # Let other jobs keep running even if one fails 15 | matrix: 16 | python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] 17 | os: [ubuntu-latest] 18 | include: 19 | - os: ubuntu-22.04 20 | python-version: 3.7 21 | - os: macos-latest 22 | python-version: "3.12" 23 | 24 | env: 25 | TMPDIR: /private/tmp # Default TMPDIR on macOS is /var which pathlib.Path resolves to /private/var 26 | if: matrix.os == 'macos-latest' 27 | steps: 28 | - uses: actions/checkout@v3 29 | - name: Set up Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v4 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install linux dependencies 34 | run: | 35 | sudo apt-get install libsndfile1 36 | if: matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-22.04' 37 | - name: Install macos dependencies 38 | run: | 39 | brew install libsndfile 40 | echo $TMPDIR 41 | if: matrix.os == 'macos-latest' 42 | - name: Install python dependencies 43 | run: | 44 | python -m pip install --upgrade pip setuptools wheel 45 | pip install flake8 pytest pytest-cov codecov 46 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 47 | pip install numpy scipy Cython 48 | pip install --editable .[all] 49 | - name: Lint with flake8 50 | run: | 51 | # stop the build if there are Python syntax errors or undefined names 52 | #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 53 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 54 | #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 55 | - name: Run unittest and doctest on Ubuntu 56 | run: | 57 | pytest -v "tests/" "padertorch/" 58 | if: matrix.os != 'macos-latest' 59 | - name: Run unittest on macOS # Some doctests fail because numeric precision is too high on macOS 60 | run: | 61 | pytest -v "tests/" 62 | if: matrix.os == 'macos-latest' 63 | - name: Codecov 64 | run: | 65 | codecov 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # IPython 78 | profile_default/ 79 | ipython_config.py 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | .dmypy.json 112 | dmypy.json 113 | 114 | # Pyre type checker 115 | .pyre/ 116 | -------------------------------------------------------------------------------- /DESCRIPTION.rst: -------------------------------------------------------------------------------- 1 | PyTorch Framework 2 | ================= 3 | 4 | When first working with padertorch, have a look at `padertorch/contrib/examples` 5 | 6 | A simple example on how to use the `padertorch.Trainer` may be found in 7 | `padertorch/contrib/examples/mask_estimator/simple_train.py` 8 | 9 | For an examples on how to use the `Configurable` in combination with the `Trainer` 10 | refer to: `padertorch/contrib/examples/pit/train.py` 11 | 12 | All other examples show different approaches for using `padertorch` and may be 13 | interpreted as specific to the use case and the likes of the example owner 14 | 15 | # ToDo: 16 | 17 | This module contains functions and classes where the vanilla API is messed up. 18 | 19 | The general idea is to move all independent axis to the left if possible. The 20 | exception to this rule of thumb are sequences. It is computational more 21 | efficient to use the steps as outer axis. This also aligns well with how 22 | `torch.nn.utils.rnn.PackedSequence` is defined. 23 | 24 | Examples, why the API is seriously broken: 25 | - torch.Tensor.size() vs. torch.nn.utils.rnn.PackedSequence().batch_sizes 26 | - torch.randn(d1, d2, ...) vs. torch.randint(low, high, size=(d1, d2, ...)) 27 | - torch.transpose(input, dim0, dim1) although input is already defined 28 | 29 | Milestones: 30 | 2. Make it possible to decode (=predict) both models 31 | - Does the batch axis stay? Christoph always wants to allow independent axis. 32 | Christoph investigates if all ops support independent axis. 33 | - How do I reconstruct the trained model? 34 | 35 | 51. Sequence normalization and batch norm with tracking from batch to batch 36 | - Sequence norm 37 | - Batch norm 38 | 39 | 40 | Structures: 41 | - Module (comparable to chain or chain_list in Chainer, building_blocks in PF) 42 | - Ops (comparable to ops in PF) 43 | 44 | 45 | Definitions: 46 | packed: Uses `torch.nn.utils.rnn.PackedSequence` 47 | padded: Uses `padded` and `sequence_length` 48 | 49 | padded to packed: `pack_padded_sequence` yields `PackedSequence` 50 | packed to padded: `pad_packed_sequence` yields `Tensor` 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Communications Engineering Group, Paderborn University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /jenkins.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This file is only required for internal testing 4 | # cd dirname "$(readlink -f "$0")" 5 | 6 | git clone https://github.com/fgnt/paderbox 7 | 8 | # include common stuff (installation of toolbox, paths, traps, nice level...) 9 | source paderbox/jenkins_common.bash 10 | 11 | # Cuda 12 | source paderbox/bash/cuda.bash 13 | 14 | pip install --user -e .[test] 15 | 16 | # Unittets 17 | # It seems, that jenkins currentliy does not work with matlab: Error: Segmentation violation 18 | 19 | # nosetests --with-xunit --with-coverage --cover-package=padertorch -v -w "tests" # --processes=-1 20 | pytest -v "tests/" "padertorch/" 21 | # Use as many processes as you have cores: --processes=-1 22 | # Acording to https://gist.github.com/hangtwenty/1aeb36ee85f4bdce0899 23 | # `--cov-report term` solves the problem that doctests are not included 24 | # in coverage 25 | 26 | # Export coverage 27 | python -m coverage xml --include="padertorch*" 28 | 29 | # Pylint tests 30 | pylint --rcfile="paderbox/pylint.cfg" -f parseable padertorch > pylint.txt 31 | # --files-output=y is a bad option, because it produces hundreds of files 32 | 33 | pip freeze > pip.txt 34 | pip uninstall --quiet --yes padertorch 35 | 36 | # copy html code to lighttpd webserver 37 | # rsync -a --delete-after /var/lib/jenkins/jobs/python_toolbox/workspace/toolbox/doc/build/html/ /var/www/doku/html/python_toolbox/ 38 | -------------------------------------------------------------------------------- /maintenance.md: -------------------------------------------------------------------------------- 1 | 2 | # PyPi upload 3 | 4 | Package a Python Package/ version bump See: https://packaging.python.org/tutorials/packaging-projects/ 5 | 6 | 1. Update `setup.py` to new version number 7 | 2. Commit this change 8 | 3. Tag and upload 9 | 10 | ## pypirc 11 | 12 | Example `~/.pypirc` (see https://packaging.python.org/en/latest/specifications/pypirc/) 13 | ``` 14 | [distutils] 15 | index-servers = 16 | pypi 17 | testpypi 18 | 19 | [pypi] 20 | username = __token__ 21 | password = 22 | 23 | [testpypi] 24 | username = __token__ 25 | password = 26 | ``` 27 | 28 | ## Install dependencies: 29 | ```bash 30 | pip install --upgrade setuptools 31 | pip install --upgrade wheel 32 | pip install --upgrade twine 33 | # pip install --upgrade bleach html5lib # some versions do not work 34 | pip install --upgrade bump2version 35 | ``` 36 | 37 | `bump2version` takes care to increase the version number, create the commit and tag. 38 | 39 | 40 | ## Publish 41 | 42 | ```bash 43 | export SETUP_PY_IGNORE_GIT_DEPENDENCIES=1 44 | bump2version --verbose --tag patch # major, minor or patch 45 | python setup.py sdist # bdist_wheel # It is difficult to get bdist_wheel working with binary files 46 | git push origin --tags 47 | # Wait for the github action to build the windows wheels, ToDO: Fix wheels. 48 | twine upload --repository testpypi dist/* # 49 | twine upload dist/* 50 | git push 51 | ``` 52 | -------------------------------------------------------------------------------- /padertorch/__init__.py: -------------------------------------------------------------------------------- 1 | from padertorch import utils 2 | from padertorch.train import trainer, optimizer 3 | from padertorch.train.trainer import * 4 | 5 | from . import base 6 | from . import configurable 7 | from . import data 8 | from . import ops 9 | from . import summary 10 | from . import io 11 | from .base import * 12 | from .configurable import Configurable 13 | from .ops import * 14 | 15 | # This import has to be late, otherwise you can not use pt.Models in models. 16 | from . import modules 17 | -------------------------------------------------------------------------------- /padertorch/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/cb/__init__.py: -------------------------------------------------------------------------------- 1 | from .io import ( 2 | get_new_folder, 3 | write_makefile_and_config, 4 | ) 5 | from . import data 6 | 7 | -------------------------------------------------------------------------------- /padertorch/contrib/cb/array.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def overlap_add( 5 | tensor, 6 | shift, 7 | ): 8 | """ 9 | 10 | >>> overlap_add(torch.arange(12).to(torch.float).reshape(3, 4), 4) 11 | tensor([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.]) 12 | >>> overlap_add(torch.arange(12).to(torch.float).reshape(3, 4), 2) 13 | tensor([ 0., 1., 6., 8., 14., 16., 10., 11.]) 14 | >>> overlap_add(torch.ones(12).to(torch.float).reshape(3, 4), 2) 15 | tensor([1., 1., 2., 2., 2., 2., 1., 1.]) 16 | >>> overlap_add(torch.ones(2, 3, 4, 5).to(torch.float), 2).shape 17 | torch.Size([2, 3, 11]) 18 | """ 19 | *independent, frames, frequencies = tensor.shape 20 | 21 | samples = frequencies + frames * shift - shift 22 | tensor = tensor.reshape(-1, frames, frequencies).transpose(-1, -2) 23 | out = torch.nn.Fold( 24 | output_size=(1, samples), 25 | kernel_size=(1, frequencies), 26 | dilation=1, 27 | padding=0, 28 | stride=(1, shift), 29 | )(tensor) 30 | return out.squeeze(-3).squeeze(-2).reshape(*independent, samples) 31 | -------------------------------------------------------------------------------- /padertorch/contrib/cb/complex.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | __all__ = { 5 | } 6 | 7 | 8 | def is_torch(obj): 9 | """ 10 | The namespace here is not torch, hece renamce is_tensor to is_torch. 11 | 12 | >>> is_torch(np.zeros(3)) 13 | False 14 | >>> is_torch(torch.zeros(3)) 15 | True 16 | >>> is_torch(ComplexTensor(np.zeros(3))) 17 | True 18 | """ 19 | if torch.is_tensor(obj): 20 | return True 21 | if type(obj).__name__ == 'ComplexTensor': 22 | from torch_complex import ComplexTensor 23 | if isinstance(obj, ComplexTensor): 24 | return True 25 | return False 26 | 27 | -------------------------------------------------------------------------------- /padertorch/contrib/cb/hooks.py: -------------------------------------------------------------------------------- 1 | import signal 2 | from padertorch.train.hooks import StopTrainingHook, StopTraining, Hook 3 | from padertorch.train.trigger import Trigger, IntervalTrigger 4 | 5 | 6 | class CPUTimeLimitExceededHookTrigger(Trigger): 7 | """ 8 | Graceful shutdown of Training. 9 | 10 | Shutdown after next iteration (i.e. as fast as possible, finish validation) 11 | $ ccssignal XCPU 12 | Use `ccsalloc ... --notifyjob=XCPU,60m ...` to let ccs send the signal. 13 | 14 | Shutdown after next epoch (i.e. finish current epoch, good iterator state) 15 | $ ccssignal USR1 # Shutdown after next epoch 16 | 17 | """ 18 | def __init__(self): 19 | self._SIGXCPU_received = False 20 | self._SIGUSR1_received = False 21 | signal.signal(signal.SIGXCPU, self.handler_SIGXCPU) 22 | signal.signal(signal.SIGUSR1, self.handler_SIGUSR1) 23 | 24 | self.epoch_trigger = IntervalTrigger(1, 'epoch') 25 | 26 | def handler_SIGXCPU(self, signum, frame): 27 | print('Received SIGXCPU: CPU time limit exceeded') 28 | print('Graceful shutdown training') 29 | self._SIGXCPU_received = True 30 | 31 | def handler_SIGUSR1(self, signum, frame): 32 | print('Received SIGUSR1: User-defined signal 1.') 33 | print(f'Graceful shutdown training when epoch ' 34 | f'{self.epoch_trigger.last + 1} is finished') 35 | self._SIGUSR1_received = True 36 | 37 | def set_last(self, iteration, epoch): 38 | pass 39 | 40 | def __call__(self, iteration, epoch): 41 | return ( 42 | ( 43 | self.epoch_trigger(iteration, epoch) 44 | and self._SIGUSR1_received 45 | ) 46 | or self._SIGXCPU_received 47 | ) 48 | 49 | 50 | class CPUTimeLimitExceededHook(StopTrainingHook): 51 | def __init__(self): 52 | # Do not call super, to prevent a copy of this trigger 53 | self.trigger = CPUTimeLimitExceededHookTrigger() 54 | 55 | 56 | class PyroHook(Hook): 57 | 58 | pyro_inspector = None 59 | 60 | def pre_step(self, trainer): 61 | from cbj.pyro_inspect import PyroInspector 62 | if self.pyro_inspector is None: 63 | self.pyro_inspector = PyroInspector(2) 64 | self.pyro_inspector.__enter__() 65 | 66 | def close(self, trainer): 67 | if self.pyro_inspector is not None: 68 | self.pyro_inspector.__exit__() 69 | 70 | 71 | if __name__ == '__main__': 72 | 73 | import os 74 | import time 75 | from threading import Thread 76 | 77 | hook = CPUTimeLimitExceededHook() 78 | 79 | pid = os.getpid() 80 | 81 | def killer(): 82 | time.sleep(2.5) 83 | os.kill(pid, signal.SIGXCPU) 84 | 85 | thread = Thread(target=killer) 86 | thread.start() 87 | 88 | class Trainer: 89 | iteration = 0 90 | epoch = 0 91 | 92 | 93 | try: 94 | while True: 95 | print(time.perf_counter()) 96 | if hook.pre_step(Trainer()): 97 | break 98 | time.sleep(1) 99 | except StopTraining: 100 | print('StopTraining') 101 | thread.join() 102 | 103 | 104 | hook = CPUTimeLimitExceededHook() 105 | 106 | def killer(): 107 | time.sleep(0.5) 108 | os.kill(pid, signal.SIGUSR1) 109 | 110 | thread = Thread(target=killer) 111 | thread.start() 112 | 113 | class Trainer: 114 | iteration = 0 115 | epoch = 0 116 | 117 | try: 118 | while True: 119 | Trainer.iteration += 1 120 | if (Trainer.iteration % 5) == 0: 121 | Trainer.epoch += 1 122 | print(time.perf_counter(), Trainer.iteration, Trainer.epoch) 123 | if hook.pre_step(Trainer()): 124 | break 125 | time.sleep(1) 126 | except StopTraining: 127 | print('StopTraining') 128 | thread.join() 129 | -------------------------------------------------------------------------------- /padertorch/contrib/cb/tensorboard_symlink_tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create a symlink tree for all specified files in the current folder. 3 | 4 | python -m padertorch.contrib.cb.tensorboard_symlink_tree ../*/*tfevents* --max_age=1days 5 | 6 | Usecase: 7 | 8 | Tensorboard does a recursive search for all tfevent files. 9 | In many cases this works good and is better than this workaround. 10 | 11 | When you have a slow recursive search, this script can be used as a workaround. 12 | This can be caused by a slow filesystem (usually remote) and to many files 13 | inside the tensorboard (e.g. a Kaldi experiment folder). 14 | 15 | The problem of tensorboard in this case is, that it does not support either 16 | multiple tfevent in the command line interface (only one is supported) or a 17 | customisation for the search pattern of the event files (e.g. limited depth 18 | search). 19 | 20 | This workaround mirrors the folder tree, but only for the files that are the 21 | input of this file. In the commandline you can use bash wildcards like `*`: 22 | 23 | python -m padertorch.contrib.cb.tensorboard_symlink_tree ../*/*tfevents* 24 | 25 | This command creates a symlinks to all tfevent that match the pattern 26 | `../*/*tfevents*` in the current folder. 27 | Sadly, this command has to be executed each time, you create a new experiment. 28 | Because of this I created a Makefile in that folder: 29 | 30 | .../tensorboard$ cat Makefile 31 | symlink_tree1day: 32 | find . -xtype l -delete # Remove broken symlinks: https://unix.stackexchange.com/a/314975/283777 33 | python -m padertorch.contrib.cb.tensorboard_symlink_tree --prefix=.. ../*/*tfevents* --max_age=1days 34 | 35 | symlink_tree: 36 | find . -xtype l -delete # Remove broken symlinks: https://unix.stackexchange.com/a/314975/283777 37 | python -m padertorch.contrib.cb.tensorboard_symlink_tree --prefix=.. ../*/*tfevents* 38 | 39 | tensorboard: 40 | date && $(cd .../tensorboard && ulimit -v 10000000 && tensorboard --bind_all -v 1 --logdir=. --port=...) && date || date 41 | 42 | """ 43 | 44 | import os 45 | from pathlib import Path 46 | import datetime 47 | 48 | import paderbox as pb 49 | 50 | 51 | def main(*files, prefix=None, max_age=None): 52 | if prefix is None: 53 | prefix = os.path.commonpath(files) 54 | print('Common Prefix', prefix) 55 | print('Create') 56 | 57 | files = [Path(f) for f in files] 58 | 59 | if max_age is not None: 60 | # Panda import is slow, but pd.Timedelta 61 | # accepts many styles for time 62 | # (e.g. '1day') 63 | import pandas as pd 64 | max_age = pd.Timedelta(max_age) 65 | now = pd.Timestamp('now') 66 | 67 | files = sorted(files, key=lambda file: file.stat().st_mtime) 68 | 69 | for file in files: 70 | link_name = file.relative_to(prefix) 71 | if max_age is not None: 72 | last_modified = file.stat().st_mtime 73 | last_modified = datetime.datetime.fromtimestamp(last_modified) 74 | 75 | if max_age > now - last_modified: 76 | # Create symlink if it doesn't exist. 77 | pass 78 | else: 79 | if not link_name.is_symlink(): 80 | print(f'Skip {file}, it is {now - last_modified} > {max_age} old.') 81 | continue 82 | 83 | link_name.parent.mkdir(exist_ok=True) 84 | source = os.path.relpath(file, link_name.parent) 85 | if not link_name.exists(): 86 | print(f'\t{link_name} -> {source}') 87 | 88 | # Create symlink if it does not exist, 89 | # or check that the symlink point to the 90 | # same file. 91 | pb.io.symlink(source, link_name) 92 | print('Finish') 93 | 94 | 95 | if __name__ == '__main__': 96 | import fire 97 | fire.Fire(main) 98 | -------------------------------------------------------------------------------- /padertorch/contrib/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/data/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/data/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import paderbox as pb 3 | 4 | 5 | def check_audio_files_exist( 6 | database_dict, 7 | speedup=None, 8 | extensions=('.wav', '.wv2', '.wv1', '.flac'), 9 | ): 10 | """ 11 | No structure for the database_dict is assumed. It will just search for all 12 | string values ending with a certain file type (e.g. wav). 13 | 14 | >>> check_audio_files_exist({2: [1, '/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav', 'abc.wav']}) 15 | Traceback (most recent call last): 16 | ... 17 | AssertionError: ('abc.wav', (2, '2')) 18 | >>> check_audio_files_exist(1) 19 | Traceback (most recent call last): 20 | ... 21 | AssertionError: Expect at least one wav file. It is likely that the database folder is empty and the greps did not work. to_check: {} 22 | >>> check_audio_files_exist('abc.wav') 23 | Traceback (most recent call last): 24 | ... 25 | AssertionError: ('abc.wav', ()) 26 | >>> check_audio_files_exist('/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav') 27 | >>> check_audio_files_exist(1, speedup='thread') 28 | Traceback (most recent call last): 29 | ... 30 | AssertionError: Expect at least one wav file. It is likely that the database folder is empty and the greps did not work. to_check: {} 31 | >>> check_audio_files_exist('abc.wav', speedup='thread') 32 | Traceback (most recent call last): 33 | ... 34 | AssertionError: ('abc.wav', ()) 35 | >>> check_audio_files_exist('/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav', speedup='thread') 36 | """ 37 | 38 | def path_exists(path): 39 | return Path(path).exists() 40 | 41 | def body(file_key_path): 42 | key_path, file = file_key_path 43 | assert path_exists(file), (file, key_path) 44 | 45 | def condition_fn(file): 46 | return isinstance(file, (str, Path)) and str(file).endswith(extensions) 47 | 48 | to_check = { 49 | k: v for k, v in pb.utils.nested.flatten(database_dict).items() 50 | if condition_fn(v) 51 | } 52 | 53 | assert len(to_check) > 0, ( 54 | f'Expect at least one wav file. ' 55 | f'It is likely that the database folder is empty ' 56 | f'and the greps did not work. to_check: {to_check}' 57 | ) 58 | 59 | if speedup and 'thread' == speedup: 60 | import os 61 | from multiprocessing.pool import ThreadPool 62 | 63 | # Use this number because ThreadPoolExecutor is often 64 | # used to overlap I/O instead of CPU work. 65 | # See: concurrent.futures.ThreadPoolExecutor 66 | # max_workers = (os.cpu_count() or 1) * 5 67 | 68 | # Not sufficiently benchmarked both, this is more conservative. 69 | max_workers = (os.cpu_count() or 1) 70 | 71 | with ThreadPool(max_workers) as pool: 72 | for _ in pool.imap_unordered( 73 | body, 74 | to_check.items() 75 | ): 76 | pass 77 | 78 | elif speedup is None: 79 | for file, key_path in to_check.items(): 80 | assert path_exists(file), (key_path, file) 81 | else: 82 | raise ValueError(speedup, type(speedup)) 83 | -------------------------------------------------------------------------------- /padertorch/contrib/data/wsj0_mix/README.md: -------------------------------------------------------------------------------- 1 | # WSJ0-mix data preparation 2 | 3 | To prepare the wsj0-2mix and wsj0-3mix data, follow the following steps: 4 | 1. Generate the mixtures using the matlab scripts. 5 | 2. Edit `prepare_data.sh` to match your paths. You need to specify paths to the generated data and to the WSJ(0) database. WSJ(0) is required to obtain transcriptions. You can edit the `--json_root` parameter to specify the path to the output JSON. 6 | 3. Run `prepare_data.sh`. 7 | 8 | This script creates a JSON file that can be used by the examples. 9 | The JSON file is compatible with `lazy_dataset.database.JsonDatabase`. 10 | An example of reading data: 11 | 12 | ```python 13 | from lazy_dataset.database import JsonDatabase 14 | import numpy as np 15 | import paderbox as pb 16 | 17 | db = JsonDatabase("/path/to/JSON.json") 18 | 19 | dataset = db.get_dataset("mix_2_spk_min") 20 | 21 | def pre_batch_transform(inputs): 22 | return { 23 | 's': np.ascontiguousarray([ 24 | pb.io.load_audio(p) 25 | for p in inputs['audio_path']['speech_source'] 26 | ], np.float32), 27 | 'y': np.ascontiguousarray( 28 | pb.io.load_audio(inputs['audio_path']['observation']), np.float32), 29 | 'num_samples': inputs['num_samples'], 30 | 'example_id': inputs['example_id'], 31 | 'audio_path': inputs['audio_path'], 32 | } 33 | dataset = dataset.map(pre_batch_transform) 34 | 35 | example = dataset[0] 36 | ``` 37 | -------------------------------------------------------------------------------- /padertorch/contrib/data/wsj0_mix/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/data/wsj0_mix/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/data/wsj0_mix/prepare_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Put your paths here! 4 | if [[ "$(hostname -d)" == nt.uni-paderborn.de ]]; then 5 | # NT specific defaults 6 | database_path=/net/db/merl_speaker_mixtures/data 7 | wsj0_root=/net/db/wsj 8 | elif [[ "${PC2SYSNAME}" == OCULUS ]]; then 9 | database_path=/scratch/hpc-prf-nt1/cbj/net/db/merl_speaker_mixtures/data 10 | wsj0_root=/scratch/hpc-prf-nt1/cbj/net/db/wsj 11 | else 12 | # Path to the database as generated by the matlab scripts 13 | database_path= 14 | 15 | # If you need transcriptions, put the path to the WSJ root folder here 16 | wsj0_root= 17 | fi 18 | 19 | python -m padertorch.contrib.data.wsj0_mix.create_json \ 20 | --database_path ${database_path} \ 21 | --wsj0_root ${wsj0_root} \ 22 | --json_path wsj0_mix_min_8k.json \ 23 | --num_speakers 2 --num_speakers 3 \ 24 | --signal_length min \ 25 | --sample_rate wav8k -------------------------------------------------------------------------------- /padertorch/contrib/examples/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import padercontrib.database 3 | except: 4 | import warnings 5 | warnings.warn( 6 | "These examples are depending on our internal database structure " 7 | "at the moment. " 8 | "Trying to execute them anyway may take considerable " 9 | "effort on your behalf." 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/audio_synthesis/wavenet/README.md: -------------------------------------------------------------------------------- 1 | # WaveNet Vocoder 2 | 3 | This example trains and evaluates a WaveNet vocoder synthesising waveforms 4 | from log mel spectrograms. The WaveNet is trained on the LibriSpeech corpus. 5 | 6 | ## Training 7 | 8 | The training script needs a JSON file that describes the structure of your 9 | database in the following format: 10 | ``` 11 | { 12 | "datasets": { 13 | : { 14 | : { 15 | "audio_path": { 16 | "observation": 17 | }, 18 | "num_samples": 19 | }, 20 | : { 21 | ... 22 | }, 23 | ... 24 | }, 25 | : { 26 | : { 27 | ... 28 | }, 29 | ... 30 | }, 31 | ... 32 | } 33 | } 34 | ``` 35 | 36 | To start the training, first define a path to where the trained models should 37 | be saved: 38 | ```bash 39 | export STORAGE_ROOT=; python -m padertorch.contrib.examples.audio_synthesis.wavenet.train 40 | ``` 41 | Your trained models can be found in `$STORAGE_ROOT/wavenet/`. 42 | 43 | Note that the data input pipeline only extracts STFTs while the log mel 44 | extraction and normalization is done in the model. 45 | 46 | ## Evaluation 47 | 48 | The evaluation script loads the best checkpoint (lowest achieved loss on the 49 | validation set) and performs autoregressive waveform synthesis. 50 | For test-time synthesis nv-wavenet needs to be installed. 51 | Do note that nv-wavenet requires a GPU with Compute Capability 6.0 or later 52 | (https://developer.nvidia.com/cuda-gpus), i.e., you can neither run the 53 | evaluation on a CPU nor, e.g., on a GTX 980. 54 | If nv-wavenet is not installed yet run 55 | ```bash 56 | cd /path/to/padertorch/padertorch/modules/wavenet/nv_wavenet 57 | ``` 58 | Update the Makefile with the appropriate ARCH, e.g., ARCH=sm_70 for Compute Capability 7.0. 59 | Then run 60 | ```bash 61 | make 62 | python build.py install 63 | ``` 64 | 65 | To run an evaluation, provide the evaluation script with the path to your trained model: 66 | ```bash 67 | mpiexec -np $(nproc --all) python -m padertorch.contrib.examples.audio_synthesis.wavenet.evaluate with exp_dir= 68 | ``` 69 | It requires [dlp_mpi](https://github.com/fgnt/dlp_mpi) to be installed. 70 | 71 | Evaluation results can be found in `/eval/`. 72 | For each example the root mean squared error between the true waveform and the 73 | synthesised one is saved to a file `rmse.json`. 74 | The 10 best and worst synthesised waveforms are saved in a sub directory `audio`. 75 | 76 | If you want to run evaluation on only a few examples run 77 | ```bash 78 | python -m padertorch.contrib.examples.audio_synthesis.wavenet.evaluate with exp_dir= max_examples=10 79 | ``` 80 | 81 | ## Results 82 | 83 | | Training set | Test set | RMSE | 84 | | :-----: | :-----: | :---: | 85 | | train_clean_100 + train_clean_360 | test_clean | 0.084 | 86 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/audio_synthesis/wavenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/audio_synthesis/wavenet/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/examples/audio_synthesis/wavenet/data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from padertorch.contrib.je.data.transforms import AudioReader, STFT, Collate 3 | from padertorch.data.segment import Segmenter 4 | 5 | 6 | def prepare_dataset( 7 | dataset, audio_reader, stft, max_length_in_sec=1., batch_size=3, 8 | is_train_set=False, shuffle=False 9 | ): 10 | 11 | def prepare_example(example): 12 | example['audio_path'] = example['audio_path']['observation'] 13 | return example 14 | 15 | dataset = dataset.map(prepare_example) 16 | 17 | audio_reader = AudioReader(**audio_reader) 18 | dataset = dataset.map(audio_reader) 19 | 20 | anchor = 'random' if is_train_set else 'centered_cutout' 21 | if max_length_in_sec is None: 22 | dataset = dataset.map(lambda ex: [ex]) 23 | else: 24 | segmenter = Segmenter( 25 | length=int(max_length_in_sec*audio_reader.target_sample_rate), 26 | include_keys=('audio_data',), mode='max', anchor=anchor 27 | ) 28 | dataset = dataset.map(segmenter) 29 | 30 | stft = STFT(**stft) 31 | dataset = dataset.batch_map(stft) 32 | 33 | def finalize(example): 34 | return { 35 | 'example_id': example['example_id'], 36 | 'audio_data': example['audio_data'].astype(np.float32), 37 | 'stft': example['stft'].astype(np.float32), 38 | 'seq_len': example['stft'].shape[1], 39 | } 40 | dataset = dataset.batch_map(finalize) 41 | 42 | if shuffle: 43 | dataset = dataset.shuffle(reshuffle=True) 44 | dataset = dataset.prefetch( 45 | num_workers=8, buffer_size=10*batch_size 46 | ).unbatch() 47 | if shuffle: 48 | dataset = dataset.shuffle( 49 | reshuffle=True, buffer_size=10*batch_size 50 | ) 51 | return dataset.batch_dynamic_time_series_bucket( 52 | batch_size=batch_size, len_key='seq_len', max_padding_rate=0.05, 53 | expiration=1000*batch_size, drop_incomplete=shuffle, 54 | sort_key='seq_len', reverse_sort=True 55 | ).map(Collate()) 56 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/audio_synthesis/wavenet/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from einops import rearrange 3 | from padertorch import modules 4 | from padertorch.base import Model 5 | from padertorch.contrib.je.modules.features import MelTransform 6 | from padertorch.modules.normalization import InputNormalization 7 | from padertorch.ops import mu_law_decode 8 | 9 | 10 | class WaveNet(Model): 11 | def __init__( 12 | self, 13 | wavenet, 14 | sample_rate, stft_size, 15 | number_of_mel_filters, lowest_frequency=50, highest_frequency=None 16 | ): 17 | super().__init__() 18 | self.wavenet = wavenet 19 | self.sample_rate = sample_rate 20 | self.mel_transform = MelTransform( 21 | number_of_filters=number_of_mel_filters, 22 | sample_rate=sample_rate, stft_size=stft_size, 23 | lowest_frequency=lowest_frequency, highest_frequency=highest_frequency, 24 | ) 25 | self.in_norm = InputNormalization( 26 | data_format='bcft', 27 | shape=(None, 1, number_of_mel_filters, None), 28 | statistics_axis='bt', 29 | independent_axis=None, 30 | ) 31 | 32 | def feature_extraction(self, x, seq_len=None): 33 | x = self.mel_transform(torch.sum(x**2, dim=(-1,))).transpose(-2, -1) 34 | x = self.in_norm(x, sequence_lengths=seq_len) 35 | x = rearrange(x, 'b c f t -> b (c f) t') 36 | return x 37 | 38 | def forward(self, inputs): 39 | x = inputs['stft'] 40 | seq_len = inputs['seq_len'] 41 | x = self.feature_extraction(x, seq_len) 42 | return self.wavenet(x.squeeze(1), inputs['audio_data'].squeeze(1)) 43 | 44 | def review(self, inputs, outputs): 45 | predictions, targets = outputs 46 | ce = torch.nn.CrossEntropyLoss(reduction='none')(predictions, targets) 47 | summary = dict( 48 | loss=ce.mean(), 49 | scalars=dict(), 50 | histograms=dict(reconstruction_ce=ce), 51 | audios=dict( 52 | target=(inputs['audio_data'][0], self.sample_rate), 53 | decode=( 54 | mu_law_decode( 55 | torch.argmax(outputs[0][0], dim=0), 56 | mu_quantization=self.wavenet.n_out_channels), 57 | self.sample_rate) 58 | ), 59 | images=dict() 60 | ) 61 | return summary 62 | 63 | @classmethod 64 | def finalize_dogmatic_config(cls, config): 65 | config['wavenet']['factory'] = modules.WaveNet 66 | config['wavenet']['n_cond_channels'] = config['number_of_mel_filters'] 67 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/audio_synthesis/wavenet/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example call: 3 | 4 | export STORAGE_ROOT= 5 | python -m padertorch.contrib.examples.wavenet.train 6 | """ 7 | import os 8 | from pathlib import Path 9 | 10 | from lazy_dataset.database import JsonDatabase 11 | from padertorch.contrib.examples.audio_synthesis.wavenet.data import \ 12 | prepare_dataset 13 | from padertorch.contrib.examples.audio_synthesis.wavenet.model import WaveNet 14 | from padertorch.io import get_new_storage_dir 15 | from padertorch.train.optimizer import Adam 16 | from padertorch.train.trainer import Trainer 17 | from sacred import Experiment, commands 18 | from sacred.observers import FileStorageObserver 19 | 20 | ex = Experiment('wavenet') 21 | 22 | 23 | @ex.config 24 | def config(): 25 | database_json = ( 26 | str((Path(os.environ['NT_DATABASE_JSONS_DIR']) / 'librispeech.json').expanduser()) 27 | if 'NT_DATABASE_JSONS_DIR' in os.environ else None 28 | ) 29 | assert database_json is not None, ( 30 | 'database_json cannot be None.\n' 31 | 'Either start the training with "python -m padertorch.contrib.examples.' 32 | 'audio_synthesis.wavenet.train with database_json=" ' 33 | 'or make sure there is an environment variable "NT_DATABASE_JSONS_DIR"' 34 | 'pointing to a directory with a "librispeech.json" in it (see README ' 35 | 'for the JSON format).' 36 | ) 37 | training_sets = ['train_clean_100', 'train_clean_360'] 38 | validation_sets = ['dev_clean'] 39 | audio_reader = { 40 | 'source_sample_rate': 16000, 41 | 'target_sample_rate': 16000, 42 | } 43 | stft = { 44 | 'shift': 200, 45 | 'window_length': 800, 46 | 'size': 1024, 47 | 'fading': 'full', 48 | 'pad': True, 49 | } 50 | max_length_in_sec = 1. 51 | batch_size = 3 52 | number_of_mel_filters = 80 53 | trainer = { 54 | 'model': { 55 | 'factory': WaveNet, 56 | 'wavenet': { 57 | 'n_cond_channels': number_of_mel_filters, 58 | 'upsamp_window': stft['window_length'], 59 | 'upsamp_stride': stft['shift'], 60 | 'fading': stft['fading'], 61 | }, 62 | 'sample_rate': audio_reader['target_sample_rate'], 63 | 'stft_size': stft['size'], 64 | 'number_of_mel_filters': number_of_mel_filters, 65 | 'lowest_frequency': 50 66 | }, 67 | 'optimizer': { 68 | 'factory': Adam, 69 | 'lr': 5e-4, 70 | }, 71 | 'storage_dir': get_new_storage_dir( 72 | 'wavenet', id_naming='time', mkdir=False 73 | ), 74 | 'summary_trigger': (1_000, 'iteration'), 75 | 'checkpoint_trigger': (10_000, 'iteration'), 76 | 'stop_trigger': (200_000, 'iteration'), 77 | } 78 | trainer = Trainer.get_config(trainer) 79 | resume = False 80 | ex.observers.append(FileStorageObserver.create(trainer['storage_dir'])) 81 | 82 | 83 | @ex.automain 84 | def main( 85 | _run, _log, trainer, database_json, training_sets, validation_sets, 86 | audio_reader, stft, max_length_in_sec, batch_size, resume 87 | ): 88 | commands.print_config(_run) 89 | trainer = Trainer.from_config(trainer) 90 | storage_dir = Path(trainer.storage_dir) 91 | storage_dir.mkdir(parents=True, exist_ok=True) 92 | commands.save_config( 93 | _run.config, _log, config_filename=str(storage_dir / 'config.json') 94 | ) 95 | 96 | db = JsonDatabase(database_json) 97 | training_data = db.get_dataset(training_sets) 98 | validation_data = db.get_dataset(validation_sets) 99 | training_data = prepare_dataset( 100 | training_data, audio_reader=audio_reader, stft=stft, 101 | max_length_in_sec=max_length_in_sec, batch_size=batch_size, shuffle=True 102 | ) 103 | validation_data = prepare_dataset( 104 | validation_data, audio_reader=audio_reader, stft=stft, 105 | max_length_in_sec=max_length_in_sec, batch_size=batch_size, shuffle=False 106 | ) 107 | 108 | trainer.test_run(training_data, validation_data) 109 | trainer.register_validation_hook(validation_data) 110 | trainer.train(training_data, resume=resume) 111 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/examples.md: -------------------------------------------------------------------------------- 1 | ../../../doc/examples.md -------------------------------------------------------------------------------- /padertorch/contrib/examples/sound_recognition/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/sound_recognition/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/examples/sound_recognition/audio_tagging/README.md: -------------------------------------------------------------------------------- 1 | # Audio Tagging 2 | 3 | This example trains and evaluates an audio tagging system based on WALNet [1] 4 | trained on AudioSet. A more sophisticated model for audio tagging and weakly 5 | labeled sound event detection which is also based on padertorch can be found at 6 | https://github.com/fgnt/pb_sed. 7 | 8 | ## Training 9 | 10 | The training script needs a JSON file that describes the structure of your 11 | database in the following format: 12 | ``` 13 | { 14 | "datasets": { 15 | : { 16 | : { 17 | "audio_path": , 18 | "audio_length": , 19 | "events": , 20 | }, 21 | : { 22 | ... 23 | }, 24 | ... 25 | }, 26 | : { 27 | : { 28 | ... 29 | }, 30 | ... 31 | }, 32 | ... 33 | } 34 | } 35 | ``` 36 | It is expected that it contains datasets "validate" and "eval" (In our case 37 | validate is a small part of unbalanced train set). 38 | 39 | To start the training, first define a path to where the trained models should 40 | be saved: 41 | ```bash 42 | export STORAGE_ROOT=; python -m padertorch.contrib.examples.sound_recognition.audio_tagging.train 43 | ``` 44 | Your trained models can be found in `$STORAGE_ROOT/audio_tagging/`. 45 | 46 | Note that the data input pipeline only extracts STFTs while the log mel 47 | extraction and normalization is done in the model. 48 | 49 | ## Evaluation 50 | 51 | The evaluation script loads the best checkpoint (by default the checkpoint with 52 | the highest achieved mAP on the validation set) and runs evaluation on the 53 | eval set. 54 | 55 | To run an evaluation, provide the evaluation script with the path to your trained model: 56 | ```bash 57 | python -m padertorch.contrib.examples.sound_recognition.audio_tagging.evaluate with exp_dir= 58 | ``` 59 | 60 | Evaluation results can be found in `/eval/`. 61 | In the file `overall.json` metrics averaged over all events can be found for 62 | the validation and eval sets. In the file `event_wise.json` you can find 63 | metrics for each event separately sorted by AP performance on the eval set. 64 | Further, there are files `fn.json` and `fp.json` in which the system's false 65 | negative and false positive predictions are saved. 66 | 67 | 68 | ## Results 69 | 70 | | Training set | Decision threshold tuning | Test set | mAP | mAUC | lwlrap | mF1 | 71 | | :-----: | :-----: | :-----: | :---: | :---: | :---: | :---: | 72 | | balanced_train | validate | validate | 22.02 | 92.16 | 48.4 | 31.76 | 73 | | balanced_train | validate | eval | 23.28 | 93.55 | 49.69 | 25.73 | 74 | 75 | Above Table reports mean Average Precision (mAP), mean Area Under ROC Curve 76 | (mAUC), label weighted label-ranking average precision (lwlrap) and mean 77 | F1-score (mF1) in %. Here, "mean" refers to macro-averaging over the 78 | event-wise metrics. While mAP, mAUC and lwlrap do not rely on decision 79 | thresholds, the computation of F1 scores requires thresholds. Therefore, the 80 | event-specific decision thresholds are tuned on the validation set to give best 81 | F1 scores. The big gap (>6%) between mF1 performance on the validation set 82 | and eval set can be explained due to bad generalization of the decision 83 | thresholds. 84 | 85 | [1] Shah, Ankit and Kumar, Anurag and Hauptmann, Alexander G and Raj, Bhiksha. 86 | "A closer look at weak label learning for audio events", 87 | arXiv preprint arXiv:1804.09288, 2018 88 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/sound_recognition/audio_tagging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/sound_recognition/audio_tagging/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/examples/sound_recognition/audio_tagging/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example call: 3 | 4 | export STORAGE_ROOT= 5 | python -m padertorch.contrib.examples.sound_recognition.audio_tagging.train 6 | """ 7 | import os 8 | from pathlib import Path 9 | 10 | from paderbox.utils.random_utils import Uniform 11 | from padertorch import Trainer 12 | from padertorch.contrib.examples.sound_recognition.audio_tagging.data import \ 13 | get_datasets 14 | from padertorch.contrib.examples.sound_recognition.audio_tagging.model import \ 15 | WALNet 16 | from padertorch.io import get_new_storage_dir 17 | from padertorch.train.optimizer import Adam 18 | from sacred import Experiment, commands 19 | from sacred.observers import FileStorageObserver 20 | 21 | ex = Experiment('audio_tagging') 22 | 23 | 24 | @ex.config 25 | def config(): 26 | database_json = ( 27 | str((Path(os.environ['NT_DATABASE_JSONS_DIR']) / 'audio_set.json').expanduser()) 28 | if 'NT_DATABASE_JSONS_DIR' in os.environ else None 29 | ) 30 | assert database_json is not None, ( 31 | 'database_json cannot be None.\n' 32 | 'Either start the training with "python -m padertorch.contrib.examples.' 33 | 'audio_synthesis.wavenet.train with database_json=" ' 34 | 'or make sure there is an environment variable "NT_DATABASE_JSONS_DIR"' 35 | 'pointing to a directory with a "audio_set.json" in it (see README ' 36 | 'for the JSON format).' 37 | ) 38 | training_set = 'balanced_train' 39 | audio_reader = { 40 | 'source_sample_rate': 44_100, 41 | 'target_sample_rate': 44_100, 42 | } 43 | stft = { 44 | 'shift': 882, 45 | 'window_length': 2*882, 46 | 'size': 2048, 47 | 'fading': None, 48 | 'pad': False, 49 | } 50 | num_workers = 8 51 | batch_size = 24 52 | max_padding_rate = .05 53 | trainer = { 54 | 'model': { 55 | 'factory': WALNet, 56 | 'sample_rate': audio_reader['target_sample_rate'], 57 | 'stft_size': stft['size'], 58 | 'output_size': 527, 59 | }, 60 | 'optimizer': { 61 | 'factory': Adam, 62 | 'lr': 3e-4, 63 | 'gradient_clipping': 60., 64 | }, 65 | 'storage_dir': get_new_storage_dir( 66 | 'audio_tagging', id_naming='time', mkdir=False 67 | ), 68 | 'summary_trigger': (100, 'iteration'), 69 | 'checkpoint_trigger': (1_000, 'iteration'), 70 | 'stop_trigger': (50_000, 'iteration'), 71 | } 72 | trainer = Trainer.get_config(trainer) 73 | validation_metric = 'map' 74 | maximize_metric = True 75 | resume = False 76 | ex.observers.append(FileStorageObserver.create(trainer['storage_dir'])) 77 | 78 | 79 | @ex.automain 80 | def main( 81 | _run, _log, trainer, database_json, training_set, 82 | validation_metric, maximize_metric, 83 | audio_reader, stft, num_workers, batch_size, max_padding_rate, resume 84 | ): 85 | commands.print_config(_run) 86 | trainer = Trainer.from_config(trainer) 87 | storage_dir = Path(trainer.storage_dir) 88 | storage_dir.mkdir(parents=True, exist_ok=True) 89 | commands.save_config( 90 | _run.config, _log, config_filename=str(storage_dir / 'config.json') 91 | ) 92 | 93 | training_data, validation_data, _ = get_datasets( 94 | database_json=database_json, min_signal_length=1.5, 95 | audio_reader=audio_reader, stft=stft, num_workers=num_workers, 96 | batch_size=batch_size, max_padding_rate=max_padding_rate, 97 | training_set=training_set, storage_dir=storage_dir, 98 | stft_stretch_factor_sampling_fn=Uniform(low=0.5, high=1.5), 99 | stft_segment_length=audio_reader['target_sample_rate'], 100 | stft_segment_shuffle_prob=0., 101 | mixup_probs=(1/2, 1/2), max_mixup_length=15., min_mixup_overlap=.8, 102 | ) 103 | 104 | trainer.test_run(training_data, validation_data) 105 | trainer.register_validation_hook( 106 | validation_data, metric=validation_metric, maximize=maximize_metric 107 | ) 108 | trainer.train(training_data, resume=resume) 109 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_localization/distance_estimator/Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL = complete 2 | 3 | # Add values for missing constants here or pass it via the terminal 4 | 5 | # path where the RIR database should be stored 6 | RIR_PATH = 7 | # path where the JSON of the RIR database should be stored (optional, otherwise stored in the same directory as the RIR database) 8 | RIR_JSON = 9 | # path where the LibriSpeech database should be stored, can be left empty if the database already exists 10 | LIBRI_PATH = 11 | # path where the JSON of the LibriSpeech database should be stored (optional, if LibriSpeech is not already existing, otherwise stored in the same directory as LibriSpeech) 12 | LIBRI_JSON = 13 | # path where the JSON containing the VAD information for LibriSpeech should be stored (optional, unless LibriSpeech gets updated) 14 | VAD_JSON = 15 | 16 | complete: #prepare all perequisites 17 | ifndef RIR_PATH 18 | $(error 'Please specify RIR_PATH; the path, where the RIR database should be stored or fill out the constants in the Makefile') 19 | endif 20 | ifndef LIBRI_PATH 21 | $(error 'Please specify LIBRI_PATH; the path, where the LibriSpeech database should be stored or fill out the constants in the Makefile') 22 | endif 23 | ifndef VAD_JSON 24 | $(eval VAD_JSON=$(LIBRI_PATH)) 25 | endif 26 | python download.py with rir_path=$(RIR_PATH) libri_path=$(LIBRI_PATH) vad_json_path=$(VAD_JSON) 27 | #since tar_info of LibriSpeech creates an own LibriSpeech subdirectory in the specified path, add this to the path before the JSON is created 28 | $(eval override LIBRI_PATH=$(addsuffix /LibriSpeech,$(LIBRI_PATH))) 29 | python create_jsons.py with rir_path=$(RIR_PATH) rir_json_path=$(RIR_JSON) libri_path=$(LIBRI_PATH) libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON) 30 | 31 | rir: #assumes that LibriSpeech already exists and should be updated with VAD information 32 | ifndef LIBRI_JSON 33 | $(error 'Please specify LIBRI_JSON; the path, where the LibriSpeech JSON is stored or fill out the constants in the Makefile') 34 | endif 35 | ifndef VAD_JSON 36 | $(error 'Please specify VAD_JSON; the path, where the JSON with VAD information should be stored or fill out the constants in the Makefile') 37 | endif 38 | ifndef RIR_PATH 39 | $(error 'Please specify RIR_PATH; the path, where the RIR database should be stored or fill out the constants in the Makefile') 40 | endif 41 | python download.py with rir_path=$(RIR_PATH) vad_json_path=$(VAD_JSON) 42 | python create_jsons.py with update_librispeech rir_path=$(RIR_PATH) rir_json_path=$(RIR_JSON) libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON) 43 | 44 | 45 | librispeech_full: # assumes that the RIR database already exists 46 | ifndef LIBRI_PATH 47 | $(error 'Please specify LIBRI_PATH, the path, where the LibriSpeech database should be stored or fill out the constants in the Makefile') 48 | endif 49 | ifndef VAD_JSON 50 | $(eval VAD_JSON=$(LIBRI_PATH)) 51 | endif 52 | python download.py with libri_path=$(LIBRI_PATH) vad_json_path=$(VAD_JSON) 53 | #since tar_info of LibriSpeech creates an own LibriSpeech subdirectory in the specified path, add this to the path before the JSON is created 54 | $(eval override LIBRI_PATH=$(addsuffix /LibriSpeech,$(LIBRI_PATH))) 55 | python create_jsons.py with libri_path=$(LIBRI_PATH) libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON) 56 | 57 | librispeech_update: #assumes that both databases exist, therefore only LibriSpeech must be updated with VAD information if LibriSpeech should be used for speech source signals. 58 | ifndef LIBRI_JSON 59 | $(error 'Please specify LIBRI_JSON; the path, where the LibriSpeech-JSON is stored or fill out the constants in the Makefile') 60 | endif 61 | ifndef VAD_JSON 62 | $(error 'Please specify VAD_JSON; the path, where the JSON with VAD information should be stored or fill out the constants in the Makefile') 63 | endif 64 | python download.py with vad_json_path=$(VAD_JSON) 65 | python create_jsons.py with update_librispeech libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON) 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_localization/distance_estimator/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import DistanceEstimator -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/source_separation/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/or_pit/README.md: -------------------------------------------------------------------------------- 1 | One-and-Rest-PIT 2 | ================ 3 | 4 | This directory contains scripts to train and evaluate a One-and-Rest-PIT model [1]. 5 | This is a recursive separation model that uses a time-domain separation network at its core. 6 | By default, it uses the DPRNN implementation from `padertorch.examples.tasnet` as a separator. 7 | 8 | Training 9 | -------- 10 | 11 | Prerequisites 12 | 13 | - Set `${STORAGE_ROOT}` to the location you want to store your experiment results 14 | - Set `OMP_NUM_THREADS=1` and `MKL_NUM_THREADS=1` 15 | - Prepare the JSON(s) for different numbers of speakers. Each example must have the key `'speaker_id'` and a list as 16 | value whose length corresponds to the number of speaker in the mixture 17 | 18 | The training procedure of the OR-PIT consists of two steps: no fine-tuning and fine-tuning. 19 | The training for the first step can be run with: 20 | 21 | ```bash 22 | $ python -m padertorch.contrib.examples.source_separation.or_pit.train with database_jsons=${path_to_your_jsons} 23 | ``` 24 | 25 | You can initialize an experiment directory with `python -m ...or_pit.train init with ...` and start it with: 26 | 27 | ```bash 28 | $ make train 29 | ``` 30 | 31 | The `database_jsons` can be a single file or a comma-separated list of files, if you want to supply multiple files. 32 | Make sure to set `train_datasets` and `validation_datasets` according to the datasets available in the supplied 33 | database JSONs (they are set to use WSJ0-2mix and WSJ0-3mix by default). 34 | 35 | The fine-tune experiment can be initialized with: 36 | 37 | ```bash 38 | $ make fine-tune 39 | ``` 40 | 41 | This command creates a new storage dir and uses the same configuration (including number of epochs, data, etc.) as the base training. 42 | 43 | Evaluation 44 | ---------- 45 | 46 | Start an evaluation with 47 | 48 | ```bash 49 | $ python -m padertorch.contrib.examples.source_separation.or_pit.evaluate with model_path=${path_to_the_model_dir} database_json=${path_to_the_json} "datasets=['your','datasets']" 50 | ``` 51 | 52 | Enable audio exporting with `dump_audio=True`. 53 | 54 | Important configuration values 55 | ------------------------------ 56 | 57 | - `batch_size`: Set the batch size 58 | - `trainer.stop_trigger`: Set the number of iterations or epochs to perform (e.g, `trainer.stop_trigger=(100,'epoch')` for 100 epochs) 59 | - `trainer.model.finetune`: Enables fine-tuning 60 | - `trainer.model.stop_condition`: The criterion to use for stopping during evaluation. Can be `'flag'` or `'threshold'`. 61 | - `trainer.model.unroll_type`: Determines how many iterations to perform for a given number of speakers. Can be `'res-single'` (iterate until the residual output contains a single speaker), `'res-silent'` (iterate until the residual signal is silent) or `'est-silent'` (iterate until the estimated signal is silent) 62 | 63 | 64 | References 65 | ---------- 66 | 67 | [1] Takahashi, Naoya, Sudarsanam Parthasaarathy, Nabarun Goswami, and Yuki Mitsufuji. „Recursive speech 68 | separation for unknown number of speakers“, 5. April 2019. 69 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/or_pit/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import OneAndRestPIT -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/or_pit/templates.py: -------------------------------------------------------------------------------- 1 | MAKEFILE_TEMPLATE_TRAIN = """SHELL := /bin/bash 2 | MODEL_PATH := $(shell pwd) 3 | 4 | export OMP_NUM_THREADS=1 5 | export MKL_NUM_THREADS=1 6 | 7 | train: 8 | \tpython -m {main_python_path} with config.json 9 | 10 | finetune: 11 | \tpython -m {main_python_path} init_with_new_storage_dir with config.json trainer.model.finetune=True load_model_from=$(MODEL_PATH)/checkpoints/ckpt_latest.pth batch_size=1" 12 | 13 | ccsalloc: 14 | \tccsalloc \\ 15 | \t\t--res=rset=1:ncpus=4:gtx1080=1:ompthreads=1 \\ 16 | \t\t--time=100h \\ 17 | \t\t--stdout=%x.%reqid.out \\ 18 | \t\t--stderr=%x.%reqid.err \\ 19 | \t\t--tracefile=%x.%reqid.trace \\ 20 | \t\t-N train_{experiment_name} \\ 21 | \t\tpython -m {main_python_path} with config.json 22 | 23 | evaluate: 24 | \tpython -m {eval_python_path} init with model_path=$(MODEL_PATH) 25 | 26 | evaluate_oracle_num_spk: 27 | \tpython -m {eval_python_path} init with model_path=$(MODEL_PATH) oracle_num_spk=True 28 | """ 29 | 30 | MAKEFILE_TEMPLATE_EVAL = """SHELL := /bin/bash 31 | 32 | evaluate: 33 | \tpython -m {main_python_path} with config.json 34 | 35 | ccsalloc: 36 | \tccsalloc \\ 37 | \t\t--res=rset=100:mpiprocs=1:ncpus=1:mem=4g:vmem=6g \\ 38 | \t\t--time=1h \\ 39 | \t\t--stdout=%x.%reqid.out \\ 40 | \t\t--stderr=%x.%reqid.err \\ 41 | \t\t--tracefile=%x.%reqid.trace \\ 42 | \t\t-N evaluate_{experiment_name} \\ 43 | \t\tompi ${{OMPI_PARAMS}} \\ 44 | \t\t-- \\ 45 | \t\tpython -m {main_python_path} with config.json 46 | """ 47 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/pit/README.md: -------------------------------------------------------------------------------- 1 | BLSTM-based Permutation Invariant Training (PIT) 2 | ================ 3 | 4 | This directory contains scripts to train and evaluate the basic utterance-level permutation 5 | invariant training model (uPIT) 6 | for source separation [1]. 7 | 8 | 9 | Training 10 | -------- 11 | 12 | Prerequisites 13 | 14 | - Set `${STORAGE_ROOT}` to the location you want to store your experiment results 15 | - Set `OMP_NUM_THREADS=1` and `MKL_NUM_THREADS=1` 16 | - Prepare the JSON(s) for your database. Each example must be sorted by `num_samples` as the model 17 | uses the `PackedSequence` of PyTorch 18 | 19 | ```bash 20 | $ python -m padertorch.contrib.examples.source_separation.pit.train with database_json=${path_to_your_jsons} 21 | ``` 22 | 23 | You can initialize an experiment directory with `python -m ...pit.train init with ...` and start it with: 24 | 25 | ```bash 26 | $ make train 27 | ``` 28 | 29 | Make sure to set `train_dataset` and `validation_dataset` according to the datasets available in the supplied 30 | database JSONs (they are set to use WSJ0-2mix by default). 31 | 32 | Evaluation 33 | ---------- 34 | 35 | Start an evaluation with 36 | 37 | ```bash 38 | $ python -m padertorch.contrib.examples.source_separation.pit.evaluate with model_path=${path_to_the_model_dir} database_json=${path_to_the_json} "datasets=['your','datasets']" 39 | ``` 40 | 41 | If you want to speed up your evaluation, you can also call 42 | ```bash 43 | $ mpiexec -np ${n_jobs} python padertorch.contrib.examples.source_separation.pit.evaluate with model_path=${path_to_the_model_dir} database_json=${path_to_the_json} "datasets=['your','datasets']" 44 | ``` 45 | to parallelize your evaluation over several CPU cores. 46 | 47 | Important configuration values 48 | ------------------------------ 49 | 50 | - `batch_size`: Set the batch size 51 | - `trainer.stop_trigger`: Set the number of iterations or epochs to perform (e.g, `trainer.stop_trigger=(100,'epoch')` for 100 epochs) 52 | 53 | 54 | References 55 | ---------- 56 | 57 | [1] Morten Kolbæk, Dong Yu, Zheng-Hua Tan, Jesper Jensen. „Multi-talker Speech Separation with Utterance-level 58 | Permutation Invariant Training of Deep Recurrent Neural Networks“, March 18 2017. 59 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/pit/__init__.py: -------------------------------------------------------------------------------- 1 | from . import model 2 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/pit/data.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import einops 4 | import numpy as np 5 | 6 | import paderbox as pb 7 | import padertorch as pt 8 | from paderbox.transform import stft 9 | 10 | 11 | def prepare_dataset( 12 | db, dataset_name: str, batch_size, return_keys=None, prefetch=True, shuffle=True 13 | ): 14 | audio_keys = ['observation', 'speech_source'] 15 | dataset = db.get_dataset(dataset_name) 16 | 17 | dataset = ( 18 | dataset 19 | .map(partial(read_audio, audio_keys=audio_keys)) 20 | .map(partial(pre_batch_transform, return_keys=return_keys)) 21 | ) 22 | if shuffle: 23 | dataset = dataset.shuffle(reshuffle=True) 24 | dataset = ( 25 | dataset 26 | .batch(batch_size) 27 | .map(pt.data.batch.Sorter('num_frames')) 28 | .map(pt.data.utils.collate_fn) 29 | .map(post_batch_transform) 30 | ) 31 | 32 | if prefetch: 33 | dataset = dataset.prefetch(4, 8) 34 | 35 | return dataset 36 | 37 | 38 | def read_audio(example, src_key="audio_path", audio_keys=None): 39 | data = { 40 | audio_key: pb.io.audioread.recursive_load_audio( 41 | example[src_key][audio_key], 42 | ) 43 | for audio_key in audio_keys 44 | } 45 | example["audio_data"] = data 46 | return example 47 | 48 | 49 | def pre_batch_transform(inputs, return_keys=None): 50 | s = inputs['audio_data']['speech_source'] 51 | y = inputs['audio_data']['observation'] 52 | S = stft(s, 512, 128) 53 | Y = stft(y, 512, 128) 54 | Y = einops.rearrange(Y, 't f -> t f') 55 | S = einops.rearrange(S, 'k t f -> t k f') 56 | X = S # Same for WSJ0_2MIX database 57 | num_frames = Y.shape[0] 58 | 59 | return_dict = dict() 60 | 61 | def maybe_add(key, value): 62 | if return_keys is None or key in return_keys: 63 | return_dict[key] = value 64 | 65 | maybe_add('example_id', inputs['example_id']) 66 | maybe_add('s', np.ascontiguousarray(s, np.float32)) 67 | maybe_add('S', np.ascontiguousarray(S, np.float32)) 68 | maybe_add('y', np.ascontiguousarray(y, np.float32)) 69 | maybe_add('Y', np.ascontiguousarray(Y, np.complex64)) 70 | maybe_add('X_abs', np.ascontiguousarray(np.abs(X), np.float32)) 71 | maybe_add('Y_abs', np.ascontiguousarray(np.abs(Y), np.float32)) 72 | maybe_add('num_frames', num_frames) 73 | maybe_add('cos_phase_difference', np.ascontiguousarray( 74 | np.cos(np.angle(Y[:, None, :]) - np.angle(X)), np.float32) 75 | ) 76 | 77 | return return_dict 78 | 79 | 80 | def post_batch_transform(batch): 81 | return batch 82 | 83 | 84 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/pit/templates.py: -------------------------------------------------------------------------------- 1 | MAKEFILE_TEMPLATE_TRAIN = """ 2 | SHELL := /bin/bash 3 | 4 | train: 5 | \tpython -m {main_python_path} with config.json 6 | 7 | ccsalloc: 8 | \tccsalloc \\ 9 | \t\t--res=rset=1:ncpus=4:gtx1080=1:ompthreads=1 \\ 10 | \t\t--time=100h \\ 11 | \t\t--stdout=%x.%reqid.out \\ 12 | \t\t--stderr=%x.%reqid.err \\ 13 | \t\t--tracefile=%x.%reqid.trace \\ 14 | \t\t-N train_{experiment_name} \\ 15 | \t\tpython -m {main_python_path} with config.json 16 | """ 17 | 18 | MAKEFILE_TEMPLATE_EVAL = """ 19 | SHELL := /bin/bash 20 | 21 | evaluate: 22 | \tpython -m {main_python_path} with config.json 23 | 24 | ccsalloc: 25 | \tccsalloc \\ 26 | \t\t--res=rset=200:mpiprocs=1:ncpus=1:mem=4g:vmem=6g \\ 27 | \t\t--time=1h \\ 28 | \t\t--stdout=%x.%reqid.out \\ 29 | \t\t--stderr=%x.%reqid.err \\ 30 | \t\t--tracefile=%x.%reqid.trace \\ 31 | \t\t-N evaluate_{experiment_name} \\ 32 | \t\tompi ${{OMPI_PARAMS}}\\ 33 | \t\t-- \\ 34 | \t\tpython -m {main_python_path} with config.json 35 | """ 36 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/tasnet/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tas_coders 2 | from .model import TasNet -------------------------------------------------------------------------------- /padertorch/contrib/examples/source_separation/tasnet/templates.py: -------------------------------------------------------------------------------- 1 | MAKEFILE_TEMPLATE_TRAIN = """SHELL := /bin/bash 2 | MODEL_PATH := $(shell pwd) 3 | 4 | export OMP_NUM_THREADS=1 5 | export MKL_NUM_THREADS=1 6 | 7 | train: 8 | \tpython -m {main_python_path} with config.json 9 | 10 | ccsalloc: 11 | \tccsalloc \\ 12 | \t\t--res=rset=1:ncpus=4:gtx1080=1:ompthreads=1 \\ 13 | \t\t--time=100h \\ 14 | \t\t--stdout=%x.%reqid.out \\ 15 | \t\t--stderr=%x.%reqid.err \\ 16 | \t\t--tracefile=%x.%reqid.trace \\ 17 | \t\t-N train_{experiment_name} \\ 18 | \t\tpython -m {main_python_path} with config.json 19 | 20 | evaluate: 21 | \tpython -m {eval_python_path} init with model_path=$(MODEL_PATH)""" 22 | 23 | MAKEFILE_TEMPLATE_EVAL = """SHELL := /bin/bash 24 | 25 | evaluate: 26 | \tpython -m {main_python_path} with config.json 27 | 28 | ccsalloc: 29 | \tccsalloc \\ 30 | \t\t--res=rset=100:mpiprocs=1:ncpus=1:mem=4g:vmem=6g \\ 31 | \t\t--time=1h \\ 32 | \t\t--stdout=%x.%reqid.out \\ 33 | \t\t--stderr=%x.%reqid.err \\ 34 | \t\t--tracefile=%x.%reqid.trace \\ 35 | \t\t-N evaluate_{experiment_name} \\ 36 | \t\tompi ${{OMPI_PARAMS}} \\ 37 | \t\t-- \\ 38 | \t\tpython -m {main_python_path} with config.json 39 | """ -------------------------------------------------------------------------------- /padertorch/contrib/examples/speaker_classification/supervised/README.md: -------------------------------------------------------------------------------- 1 | # Speaker Classification 2 | 3 | This example performs a simple speaker classification on the *clean_100* and 4 | *clean_360* datasets of the LibriSpeech corpus. 5 | 6 | ## Training 7 | To start the training, first define a path to where the trained models should be saved: 8 | ```bash 9 | export STORAGE_ROOT=; python -m padertorch.contrib.examples.speaker_classification.supervised.train with database_json= dataset= 10 | ``` 11 | Your trained models can be found in `$STORAGE_ROOT/speaker_clf`. During training, 12 | only 80% of the dataset is used for training. 10% are left out for validation 13 | and another 10% for evaluation. 14 | 15 | The training script needs a JSON file that describes the structure of your 16 | database in the following format: 17 | ``` 18 | { 19 | "datasets": { 20 | : { 21 | : { 22 | "audio_path": { 23 | "observation": 24 | }, 25 | "speaker_id": 26 | }, 27 | : { 28 | ... 29 | }, 30 | ... 31 | }, 32 | : { 33 | : { 34 | ... 35 | }, 36 | ... 37 | }, 38 | ... 39 | } 40 | } 41 | ``` 42 | If you train on LibriSpeech like we did, be aware that the speaker ID is defined 43 | as `-` by LibriSpeech, where `` is an 44 | identifier for a book chapter. 45 | Here, we perform a speaker identification across chapters so we omit the chapter 46 | ID (the part of the speaker ID after the hyphen). 47 | This is taken care of during the data preparation. 48 | Generally, if the speaker ID contains one or more hyphens, the data preparation 49 | will take the part before the **first** hyphen as the final speaker label for 50 | classification. 51 | If the speaker ID does not contain any hyphens, it will take the complete speaker 52 | ID string as it is as speaker label. 53 | 54 | ## Evaluation 55 | 56 | To run an evaluation, provide the evaluation script with the path to your 57 | trained model: 58 | ```bash 59 | mpiexec -np $(nproc --all) python -m padertorch.contrib.examples.speaker_classification.supervised.evaluate with model_path= 60 | ``` 61 | The evaluation script loads the best checkpoint (lowest achieved loss) and 62 | performs a speaker classification on the evaluation data. 63 | It requires [dlp_mpi](https://github.com/fgnt/dlp_mpi) to be installed. 64 | For each misclassified example, symlinks to the example audio file and to an audio 65 | example of the wrongly classified speaker are stored. 66 | 67 | ## Results 68 | 69 | | Database | Dataset | Num. Speakers | Num. Eval Examples | Classification Accuracy | 70 | | :------: | :-----: | :-----------: | :----------------: | :---------------------: | 71 | | LibriSpeech | clean_100 | 251 | 2853 | 98.60% | 72 | | LibriSpeech | clean_360 | 921 | 10401 | 94.72% | 73 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/speaker_classification/supervised/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from einops import rearrange 4 | from padertorch.base import Model 5 | from torchvision.utils import make_grid 6 | 7 | 8 | class SpeakerClf(Model): 9 | def __init__(self, feature_extractor, cnn, enc, fcn): 10 | super().__init__() 11 | self.feature_extractor = feature_extractor 12 | self.cnn = cnn 13 | self.enc = enc 14 | self.fcn = fcn 15 | 16 | def forward(self, inputs): 17 | x = inputs['features'] 18 | seq_len = inputs['seq_len'] 19 | 20 | x = self.feature_extractor(x, seq_len) 21 | 22 | # cnn 23 | x, seq_len = self.cnn(x, sequence_lengths=seq_len) 24 | 25 | # rnn 26 | if self.enc.batch_first: 27 | x = rearrange(x, 'b f t -> b t f') 28 | else: 29 | x = rearrange(x, 'b f t -> t b f') 30 | x, _ = self.enc(x) 31 | if not self.enc.batch_first: 32 | x = rearrange(x, 't b f -> b t f') 33 | x = x[torch.arange(len(seq_len)), seq_len - 1] 34 | 35 | x = self.fcn(x) 36 | return x 37 | 38 | def review(self, inputs, outputs): 39 | labels = inputs['speaker_id'] 40 | ce = torch.nn.CrossEntropyLoss(reduction='none')(outputs, labels) 41 | summary = dict( 42 | loss=ce.mean(), 43 | scalars=dict( 44 | labels=labels, 45 | predictions=torch.argmax(outputs, dim=-1) 46 | ), 47 | images=dict( 48 | features=inputs['features'][:3] 49 | ) 50 | ) 51 | return summary 52 | 53 | def modify_summary(self, summary): 54 | if 'labels' in summary['scalars']: 55 | labels = summary['scalars'].pop('labels') 56 | predictions = summary['scalars'].pop('predictions') 57 | summary['scalars']['accuracy'] = ( 58 | np.array(predictions) == np.array(labels) 59 | ).mean() 60 | summary = super().modify_summary(summary) 61 | for key, image in summary['images'].items(): 62 | summary['images'][key] = make_grid( 63 | image.flip(2), normalize=True, scale_each=False, nrow=1 64 | ) 65 | return summary 66 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/speech_enhancement/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/speech_enhancement/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/examples/speech_enhancement/mask_estimator/README.md: -------------------------------------------------------------------------------- 1 | Simple Mask Estimator 2 | ============= 3 | 4 | This directory contains scripts to train and evaluate a simple mask estimator 5 | inspired by [1]. 6 | 7 | Results 8 | ------- 9 | 10 | The mask estimator model achieves the following results on 11 | the CHiME 3 simulated evaluation set: 12 | 13 | 14 | data type | pesq | stoi | sdr 15 | :------------------|--------------|--------------|--------------: 16 | observed | 1.07 | 0.672 | -0.79 dB 17 | masked | 1.22 | 0.736 | 5.68 dB 18 | beamformed | 1.91 | 0.958 | 17.10 dB 19 | 20 | Masked and observed are evaluated on the first channel of the 6ch track. 21 | 22 | Training 23 | -------- 24 | 25 | A storage root must be set with `export STORAGE_ROOT=/path/to/your/storage`. 26 | After installing `padertorch`, a training can for example be started with 27 | 28 | ```bash 29 | $ STORAGE_ROOT=/path/to/your/storage; python -m padertorch.contrib.examples.speech_enhancement.simple_mask_estimator.train with database_json=/path/to/json 30 | ``` 31 | 32 | The database json path should point to a json containing all information about 33 | the CHiME3 data in a format described in ```lazy_dataset.database```. 34 | Each example should contain at least the following keys: 35 | ``` 36 | audio_path: 37 | speech_source: 38 | 39 | observation: 40 | array: [ 41 | 42 | 43 | ... 44 | ] 45 | # the following keys are not necessary during evaluation 46 | speech_image: [ 47 | ... 48 | ] 49 | noise_image: [ 50 | ... 51 | ] 52 | ``` 53 | 54 | Evaluation 55 | ---------- 56 | 57 | The evaluation requires `dlp_mpi` and `pb_bss` as additional dependencies. 58 | `dlp_mpi` can be installed via `pip install dlp_mpi` and `pb_bss` is available at [github.com/fgnt/pb_bss](github.com/fgnt/pb_bss). 59 | The evaluation can be started by 60 | 61 | ```bash 62 | $ STORAGE_ROOT=/path/to/your/storage; mpiexec -n $(nproc --all) python -m padertorch.contrib.examples.speech_enhancement.mask_estimator.evaluate with database_json=/path/to/json 63 | ``` 64 | It always evaluates the latest model in the specified STORAGE_ROOT 65 | 66 | If you want to evaluate a specific checkpoint, specify the path as an 67 | additional argument to the call. 68 | 69 | ```bash 70 | $ STORAGE_ROOT=/path/to/your/storage; mpiexec -n $(nproc --all) python -m padertorch.contrib.examples.speech_enhancement.mask_estimator.evaluate with with database_json=/path/to/json checkpoint_path=/path/to/checkpoint 71 | ``` 72 | References 73 | ---------- 74 | 75 | [1] J. Heymann and L. Drude and A. Chinaev and R. Haeb-Umbach. 76 | “BLSTM supported GEV beamformer front-end for the 3rd CHiME challenge” 77 | Proc. Worksh. Automat. Speech Recognition, Understanding, 2015 78 | https://www.researchgate.net/publication/304407561_BLSTM_supported_GEV_beamformer_front-end_for_the_3RD_CHiME_challenge 79 | 80 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/speech_enhancement/mask_estimator/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import SimpleMaskEstimator 2 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/speech_enhancement/mask_estimator/model.py: -------------------------------------------------------------------------------- 1 | import padertorch as pt 2 | import torch 3 | from padertorch.summary import mask_to_image, stft_to_image 4 | 5 | 6 | class SimpleMaskEstimator(pt.Model): 7 | def __init__(self, num_features, num_units=1024, dropout=0.5, 8 | activation='elu'): 9 | """ 10 | 11 | Args: 12 | num_features: number of input features 13 | num_units: number of units in linear layern 14 | dropout: dropout forget ratio 15 | activation: activation for the linear layer except the output layer 16 | 17 | >>> SimpleMaskEstimator(513) 18 | SmallExampleModel( 19 | (net): Sequential( 20 | (0): Dropout(p=0.5) 21 | (1): Linear(in_features=513, out_features=1024, bias=True) 22 | (2): ELU(alpha=1.0) 23 | (3): Dropout(p=0.5) 24 | (4): Linear(in_features=1024, out_features=1024, bias=True) 25 | (5): ELU(alpha=1.0) 26 | (6): Linear(in_features=1024, out_features=1026, bias=True) 27 | (7): Sigmoid() 28 | ) 29 | ) 30 | """ 31 | super().__init__() 32 | self.num_features = num_features 33 | self.net = torch.nn.Sequential( 34 | pt.modules.Normalization( 35 | 'btf', (1, 1, num_features), statistics_axis='t', 36 | independent_axis='f', batch_axis='b', sequence_axis='t' 37 | ), 38 | pt.modules.StatefulLSTM( 39 | num_features, num_units // 4, 40 | bidirectional=True, batch_first=True, save_states=False 41 | ), 42 | torch.nn.Dropout(dropout), 43 | torch.nn.Linear((num_units // 4) * 2, num_units), 44 | pt.mappings.ACTIVATION_FN_MAP[activation](), 45 | torch.nn.Dropout(dropout), 46 | torch.nn.Linear(num_units, num_units), 47 | pt.mappings.ACTIVATION_FN_MAP[activation](), 48 | # twice num_features for speech and noise_mask 49 | torch.nn.Linear(num_units, 2 * num_features), 50 | # Output activation to force outputs between 0 and 1 51 | torch.nn.Sigmoid() 52 | ) 53 | 54 | def forward(self, batch): 55 | 56 | x = batch['observation_abs'] 57 | out = self.net(x) 58 | return dict( 59 | speech_mask_prediction=out[..., :self.num_features], 60 | noise_mask_prediction=out[..., self.num_features:], 61 | ) 62 | 63 | def review(self, batch, output): 64 | noise_mask_loss = torch.nn.functional.binary_cross_entropy( 65 | output['noise_mask_prediction'], batch['noise_mask_target'] 66 | ) 67 | speech_mask_loss = torch.nn.functional.binary_cross_entropy( 68 | output['speech_mask_prediction'], batch['speech_mask_target'] 69 | ) 70 | return dict(loss=noise_mask_loss + speech_mask_loss, 71 | images=self.add_images(batch, output)) 72 | 73 | @staticmethod 74 | def add_images(batch, output): 75 | speech_mask = output['speech_mask_prediction'] 76 | observation = batch['observation_abs'] 77 | images = dict() 78 | images['speech_mask'] = mask_to_image(speech_mask, True) 79 | images['observed_stft'] = stft_to_image(observation, True) 80 | 81 | if 'noise_mask_prediction' in output: 82 | noise_mask = output['noise_mask_prediction'] 83 | images['noise_mask'] = mask_to_image(noise_mask, True) 84 | if batch is not None and 'speech_mask_prediction' in batch: 85 | images['speech_mask_target'] = mask_to_image( 86 | batch['speech_mask_target'], True) 87 | if 'speech_mask_target' in batch: 88 | images['noise_mask_target'] = mask_to_image( 89 | batch['noise_mask_target'], True) 90 | return images 91 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/speech_enhancement/mask_estimator/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Very simple training script for a mask estimator. 3 | Saves checkpoints and summaries to $STORAGE_ROOT/speech_enhancement/simple_mask_estimator_{id} 4 | may be called with: 5 | python -m padertorch.contrib.examples.speech_enhancement.simple_mask_estimator.train with database_json=/path/to/json 6 | """ 7 | 8 | from pathlib import Path 9 | 10 | import os 11 | import numpy as np 12 | import paderbox as pb 13 | import padertorch as pt 14 | from lazy_dataset.database import JsonDatabase 15 | from pb_bss.extraction.mask_module import biased_binary_mask 16 | from sacred import Experiment, observers 17 | 18 | from .model import SimpleMaskEstimator 19 | 20 | ex = Experiment('Train Simple Mask Estimator') 21 | 22 | 23 | @ex.config 24 | def config(): 25 | storage_dir = None 26 | if storage_dir is None: 27 | storage_dir = pt.io.get_new_storage_dir( 28 | 'speech_enhancement', prefix='simple_mask_estimator') 29 | database_json = None 30 | if database_json is None: 31 | if 'NT_DATABASE_JSONS_DIR' in os.environ: 32 | database_json = Path( 33 | os.environ['NT_DATABASE_JSONS_DIR']) / 'chime.json' 34 | assert database_json is not None, ( 35 | 'You have to specify a path to a json describing your database,' 36 | 'use "with database_json=/Path/To/Json" as suffix to your call' 37 | ) 38 | assert Path(database_json).exists(), database_json 39 | ex.observers.append(observers.FileStorageObserver( 40 | Path(storage_dir).expanduser().resolve() / 'sacred') 41 | ) 42 | 43 | 44 | def prepare_data(example): 45 | stft = pb.transform.STFT(shift=256, size=1024) 46 | net_input = dict() 47 | audio_data = dict() 48 | for key in ['observation', 'speech_image', 'noise_image']: 49 | audio_data[key] = stft(np.array([ 50 | pb.io.load_audio(audio) for audio in example['audio_path'][key]])) 51 | net_input['observation_abs'] = np.abs( 52 | audio_data['observation']).astype(np.float32) 53 | target_mask, noise_mask = biased_binary_mask(np.stack( 54 | [audio_data['speech_image'], audio_data['noise_image']], axis=0 55 | )) 56 | net_input['speech_mask_target'] = target_mask.astype(np.float32) 57 | net_input['noise_mask_target'] = noise_mask.astype(np.float32) 58 | return net_input 59 | 60 | 61 | def get_train_dataset(database: JsonDatabase): 62 | train_ds = database.get_dataset('tr05_simu') 63 | return (train_ds 64 | .map(prepare_data) 65 | .prefetch(num_workers=4, buffer_size=4)) 66 | 67 | 68 | def get_validation_dataset(database: JsonDatabase): 69 | # AudioReader is a specialized function to read audio organized 70 | # in a json as described in pb.database.database 71 | val_iterator = database.get_dataset('dt05_simu') 72 | return val_iterator.map(prepare_data) \ 73 | .prefetch(num_workers=4, buffer_size=4) 74 | 75 | 76 | @ex.command 77 | def test_run(storage_dir, database_json): 78 | model = SimpleMaskEstimator(513) 79 | print(f'Simple training for the following model: {model}') 80 | database = JsonDatabase(database_json) 81 | train_dataset = get_train_dataset(database) 82 | validation_dataset = get_validation_dataset(database) 83 | trainer = pt.train.trainer.Trainer( 84 | model, storage_dir, optimizer=pt.train.optimizer.Adam(), 85 | stop_trigger=(int(1e5), 'iteration') 86 | ) 87 | trainer.test_run(train_dataset, validation_dataset) 88 | 89 | 90 | @ex.automain 91 | def train(storage_dir, database_json): 92 | model = SimpleMaskEstimator(513) 93 | print(f'Simple training for the following model: {model}') 94 | database = JsonDatabase(database_json) 95 | train_dataset = get_train_dataset(database) 96 | validation_dataset = get_validation_dataset(database) 97 | trainer = pt.Trainer(model, storage_dir, 98 | optimizer=pt.train.optimizer.Adam(), 99 | stop_trigger=(int(1e5), 'iteration')) 100 | trainer.test_run(train_dataset, validation_dataset) 101 | trainer.register_validation_hook( 102 | validation_dataset, n_back_off=5, lr_update_factor=1 / 10, 103 | back_off_patience=1, early_stopping_patience=None) 104 | trainer.train(train_dataset) 105 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/toy_examples/configurable/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/toy_examples/configurable/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/examples/toy_examples/configurable/configurable.py: -------------------------------------------------------------------------------- 1 | import padertorch as pt 2 | 3 | class GRU(pt.Configurable): 4 | def __init__(self, nonlinearity='tanh'): 5 | pass 6 | 7 | 8 | class LSTM(pt.Configurable): 9 | def __init__(self, peephole=False): 10 | pass 11 | 12 | 13 | class DenseEncoder(pt.Configurable): 14 | def __init__(self, layers=2, nonlinearity='elu'): 15 | pass 16 | 17 | 18 | class RecurrentEncoder(pt.Configurable): 19 | 20 | @classmethod 21 | def get_signature(cls): 22 | defaults = super().get_signature() 23 | defaults['recurrent'] = { 24 | 'cls': GRU, 25 | } 26 | return defaults 27 | 28 | def __init__( 29 | self, 30 | recurrent, 31 | layers=2, 32 | bidirectional=False, 33 | ): 34 | pass 35 | 36 | 37 | class VAE(pt.Configurable): 38 | """ 39 | >>> from pprint import pprint 40 | >>> pprint(VAE.get_config({})) 41 | {'cls': 'configurable.VAE', 42 | 'kwargs': {'encoder': {'cls': 'configurable.DenseEncoder', 43 | 'kwargs': {'layers': 3, 'nonlinearity': 'sigmoid'}}, 44 | 'vae_param': 2}} 45 | >>> pprint(VAE.get_config({'encoder': {'cls': RecurrentEncoder}})) 46 | {'cls': 'configurable.VAE', 47 | 'kwargs': {'encoder': {'cls': 'configurable.RecurrentEncoder', 48 | 'kwargs': {'bidirectional': False, 49 | 'layers': 4, 50 | 'recurrent': {'cls': 'configurable.GRU', 51 | 'kwargs': {'nonlinearity': 'tanh'}}}}, 52 | 'vae_param': 2}} 53 | """ 54 | @classmethod 55 | def get_signature(cls): 56 | defaults = super().get_signature() 57 | defaults['encoder'] = { 58 | 'cls': DenseEncoder, 59 | 'kwargs': {'layers': 5}, 60 | DenseEncoder: {'layers': 3, 'nonlinearity': 'sigmoid'}, 61 | RecurrentEncoder: {'layers': 4}, 62 | } 63 | return defaults 64 | 65 | def __init__(self, encoder, vae_param=2): 66 | self.encoder = encoder 67 | self.vae_param = vae_param 68 | 69 | 70 | import sacred 71 | import sacred.run 72 | import sacred.commands 73 | exp = sacred.Experiment('vae') 74 | from paderbox.utils.nested import deflatten 75 | 76 | @exp.config 77 | def config(): 78 | 79 | model = {} 80 | VAE.get_config( 81 | dict( 82 | encoder={ 83 | 'cls': RecurrentEncoder, 84 | RecurrentEncoder: dict( 85 | recurrent={'cls': LSTM} 86 | ), 87 | }, 88 | ), 89 | model, 90 | ) 91 | VAE.get_config( # alternative dict update 92 | deflatten({ 93 | ('encoder', 'cls'): RecurrentEncoder, 94 | ('encoder', RecurrentEncoder, 'recurrent', 'cls'): LSTM, 95 | }, sep=None), 96 | model, 97 | ) 98 | VAE.get_config( # second alternative update 99 | deflatten({ 100 | 'encoder/cls': 'RecurrentEncoder', 101 | 'encoder/RecurrentEncoder/recurrent/cls': LSTM, 102 | }, sep='/'), 103 | model, 104 | ) 105 | 106 | 107 | @exp.automain 108 | def main(_config, _run: sacred.run.Run): 109 | """ 110 | python parametized.py print_config 111 | python parametized.py print_config with model.kwargs.encoder.cls=RecurrentEncoder model.kwargs.vae_param=10 112 | """ 113 | from IPython.lib.pretty import pprint 114 | sacred.commands.print_config(_run) 115 | 116 | model = VAE.from_config(_config['model']) 117 | 118 | print('Model config') 119 | pprint(model.config) 120 | print('Encoder config') 121 | pprint(model.encoder) 122 | 123 | 124 | if __name__ == '__main__': 125 | pass 126 | -------------------------------------------------------------------------------- /padertorch/contrib/examples/toy_examples/configurable/shared_parameter.py: -------------------------------------------------------------------------------- 1 | import padertorch as pts 2 | from IPython.lib.pretty import pprint 3 | from paderbox.utils.nested import deflatten 4 | 5 | 6 | class Load(pts.configurable.Configurable): 7 | def __init__(self, sample_rate=16000): 8 | self.sample_rate = sample_rate 9 | def __call__(self, arg): 10 | print(self.__class__.__name__, arg, self.sample_rate) 11 | return arg + 5 12 | 13 | 14 | class FeatureExtractor(pts.configurable.Configurable): 15 | def __init__(self, sample_rate=16000): 16 | self.sample_rate = sample_rate 17 | def __call__(self, arg): 18 | print(self.__class__.__name__, arg, self.sample_rate) 19 | return arg + 7 20 | 21 | 22 | class Compose(pts.configurable.Configurable): 23 | def __init__(self, layer1, layer2, sample_rate=8000): 24 | self.layer1 = layer1 25 | self.layer2 = layer2 26 | 27 | def __call__(self, arg): 28 | print(self.__class__.__name__, arg) 29 | return self.layer2(self.layer1(arg)) + 11 30 | 31 | @classmethod 32 | def get_config( 33 | cls, 34 | updates=None, 35 | config=None, 36 | ): 37 | config = super().get_config(updates=updates, config=config) 38 | config['kwargs']['layer1']['kwargs']['sample_rate'] = config['kwargs']['sample_rate'] 39 | config['kwargs']['layer2']['kwargs']['sample_rate'] = config['kwargs']['sample_rate'] 40 | return config 41 | 42 | 43 | class Model(pts.configurable.Configurable): 44 | """ 45 | >>> pprint(Model.get_config()) 46 | {'cls': 'parametized_shared_parameter.Model', 47 | 'kwargs': {'transform': {'cls': 'parametized_shared_parameter.Compose', 48 | 'kwargs': {'sample_rate': 8000, 49 | 'layer1': {'cls': 'parametized_shared_parameter.Load', 50 | 'kwargs': {'sample_rate': 8000}}, 51 | 'layer2': {'cls': 'parametized_shared_parameter.FeatureExtractor', 52 | 'kwargs': {'sample_rate': 8000}}}}}} 53 | """ 54 | @classmethod 55 | def get_signature(self): 56 | defaults = super().get_signature() 57 | defaults['transform'] = deflatten({ 58 | 'cls': Compose, 59 | 'kwargs.sample_rate': 8000, 60 | 'kwargs.layer1.cls': Load, 61 | 'kwargs.layer2.cls': FeatureExtractor, 62 | 63 | }, sep='.') 64 | return defaults 65 | 66 | def __init__(self, transform): 67 | self.transform = transform 68 | 69 | 70 | import sacred 71 | import sacred.run 72 | import sacred.commands 73 | exp = sacred.Experiment('Shared Parameter') 74 | 75 | @exp.config 76 | def config(): 77 | 78 | model = {} 79 | Model.get_config( # second alternative update 80 | deflatten({ 81 | 'transform.kwargs.sample_rate': 44100, 82 | }, sep='.'), 83 | model, 84 | ) 85 | 86 | 87 | @exp.automain 88 | def main(_config, _run: sacred.run.Run): 89 | """ 90 | """ 91 | sacred.commands.print_config(_run) 92 | 93 | model = Model.from_config(_config['model']) 94 | 95 | print('Model config') 96 | pprint(model.config) 97 | 98 | 99 | if __name__ == '__main__': 100 | pass -------------------------------------------------------------------------------- /padertorch/contrib/je/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/je/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/data/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/je/data/filters.py: -------------------------------------------------------------------------------- 1 | from padertorch.utils import to_list 2 | 3 | 4 | class DiscardLabelsFilter: 5 | def __init__(self, key, names): 6 | self.key = key 7 | self.names = to_list(names) 8 | 9 | def __call__(self, example): 10 | return not any([name in to_list(example[self.key]) for name in self.names]) 11 | 12 | 13 | class RestrictLabelsFilter: 14 | def __init__(self, key, names): 15 | self.key = key 16 | self.names = to_list(names) 17 | 18 | def __call__(self, example): 19 | return any([name in to_list(example[self.key]) for name in self.names]) 20 | -------------------------------------------------------------------------------- /padertorch/contrib/je/hooks/swa.py: -------------------------------------------------------------------------------- 1 | from padertorch.train.hooks import TriggeredHook 2 | from paderbox.utils.nested import nested_op 3 | 4 | 5 | class SWAHook(TriggeredHook): 6 | """ 7 | performs stochastic weight averaging of the trainers model or a submodule of it 8 | """ 9 | def __init__(self, trigger, submodule=None): 10 | """ 11 | 12 | Args: 13 | trigger: 14 | submodule: 15 | """ 16 | super().__init__(trigger) 17 | self.submodule = [] if submodule is None else submodule.split('.') 18 | self.swa_module = None 19 | self.count = 0 20 | 21 | def state_dict(self): 22 | return { 23 | "swa_module": self.swa_module, 24 | "count": self.count 25 | } 26 | 27 | def load_state_dict(self, state_dict): 28 | self.swa_module = state_dict["swa_module"] 29 | self.count = state_dict["count"] 30 | 31 | def get_module(self, trainer): 32 | module = trainer.model 33 | for attr_name in self.submodule: 34 | module = getattr(module, attr_name) 35 | return module 36 | 37 | def pre_step(self, trainer): 38 | if self.trigger(iteration=trainer.iteration, epoch=trainer.epoch) \ 39 | and trainer.iteration != 0: 40 | print('SWA') 41 | module = self.get_module(trainer) 42 | self.count += 1 43 | if self.swa_module is None: 44 | self.swa_module = module.state_dict() 45 | else: 46 | r = 1 / self.count 47 | self.swa_module = nested_op( 48 | lambda x, y: (1-r) * x.to(y.device) + r * y, 49 | self.swa_module, 50 | module.state_dict() 51 | ) 52 | -------------------------------------------------------------------------------- /padertorch/contrib/je/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/models/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/je/models/clf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from padertorch.base import Model 4 | from padertorch.contrib.je.modules.conv import CNN1d 5 | from padertorch.contrib.je.modules.features import NormalizedLogMelExtractor 6 | from padertorch.contrib.je.modules.reduce import Mean 7 | from torchvision.utils import make_grid 8 | from einops import rearrange 9 | 10 | 11 | class Classifier(Model): 12 | def __init__( 13 | self, net: CNN1d, feature_extractor=None, *, 14 | input_key='stft', input_seq_len_key='seq_len', target_key, 15 | ): 16 | super().__init__() 17 | self.net = net 18 | self.feature_extractor = feature_extractor 19 | self.input_key = input_key 20 | self.input_seq_len_key = input_seq_len_key 21 | self.target_key = target_key 22 | 23 | def forward(self, inputs): 24 | x = inputs[self.input_key] 25 | seq_len = inputs[self.input_seq_len_key] 26 | if self.feature_extractor is not None: 27 | x = self.feature_extractor(x, seq_len) 28 | if x.dim() == 4 and isinstance(self.net, CNN1d): 29 | x = rearrange(x, 'b c f t -> b (c f) t') 30 | return x, self.net(x, seq_len) 31 | 32 | def review(self, inputs, outputs): 33 | targets = inputs[self.target_key].long() 34 | x, (logits, seq_len) = outputs 35 | if logits.dim() > 2 and targets.dim() == 1: 36 | assert logits.dim() == 3, logits.shape 37 | targets = targets.unsqueeze(-1) # add time axis 38 | targets = targets.expand((targets.shape[0], logits.shape[-1])) 39 | predictions = torch.argmax(logits, dim=1) 40 | ce = torch.nn.CrossEntropyLoss(reduction='none')(logits, targets) 41 | ce = Mean(axis=-1)(ce, seq_len) 42 | return dict( 43 | loss=ce.mean(), 44 | scalars=dict( 45 | predictions=predictions, 46 | targets=targets, 47 | ), 48 | histograms=dict( 49 | ce_=ce.flatten(), 50 | logits_=logits.flatten(), 51 | ), 52 | images=dict( 53 | features=x[:3], 54 | ) 55 | ) 56 | 57 | def modify_summary(self, summary): 58 | if 'targets' in summary['scalars']: 59 | targets = summary['scalars'].pop('targets') 60 | predictions = summary['scalars'].pop('predictions') 61 | summary['scalars']['accuracy'] = ( 62 | np.array(predictions) == np.array(targets) 63 | ).mean() 64 | for key, image in summary['images'].items(): 65 | if image.dim() == 3: 66 | image = image.unsqueeze(1) 67 | summary['images'][key] = make_grid( 68 | image.flip(2), normalize=True, scale_each=False, nrow=1 69 | ) 70 | summary = super().modify_summary(summary) 71 | return summary 72 | 73 | @classmethod 74 | def finalize_dogmatic_config(cls, config): 75 | config['net']['factory'] = CNN1d 76 | config['feature_extractor'] = { 77 | 'factory': NormalizedLogMelExtractor, 78 | } 79 | if config['net']['factory'] == CNN1d: 80 | if config['feature_extractor']['factory'] == NormalizedLogMelExtractor: 81 | config['net']['in_channels'] = config['feature_extractor']['n_mels'] 82 | else: 83 | raise ValueError(f'Factory {config["encoder"]["factory"]} not allowed.') 84 | -------------------------------------------------------------------------------- /padertorch/contrib/je/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/modules/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/je/modules/reduce.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from padertorch.ops.sequence.mask import compute_mask 5 | 6 | 7 | class Sum(nn.Module): 8 | """ 9 | >>> seq_axis = 1 10 | >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis) 11 | >>> x = Sum(axis=seq_axis)(x, seq_len=[4,5,6]) 12 | """ 13 | def __init__(self, axis=-1, keepdims=False): 14 | self.axis = axis 15 | self.keepdims = keepdims 16 | super().__init__() 17 | 18 | def __call__(self, x, seq_len=None): 19 | if seq_len is None: 20 | x = x.sum(self.axis, keepdim=self.keepdims) 21 | else: 22 | mask = compute_mask(x, seq_len, 0, self.axis) 23 | x = (x * mask).sum(dim=self.axis, keepdim=self.keepdims) 24 | return x 25 | 26 | 27 | class Mean(Sum): 28 | """ 29 | >>> seq_axis = 1 30 | >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis) 31 | >>> x = Mean(axis=seq_axis)(x, seq_len=[4,5,6]) 32 | >>> x.shape 33 | >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis) 34 | >>> x = Mean(axis=seq_axis, keepdims=True)(x, seq_len=[4,5,6]) 35 | >>> x.shape 36 | """ 37 | def __call__(self, x, seq_len=None): 38 | if seq_len is None: 39 | x = x.mean(self.axis, keepdim=self.keepdims) 40 | else: 41 | mask = compute_mask(x, seq_len, 0, self.axis) 42 | x = (x * mask).sum(dim=self.axis, keepdim=self.keepdims) / (mask.sum(dim=self.axis, keepdim=self.keepdims) + 1e-6) 43 | return x 44 | 45 | 46 | class Max(nn.Module): 47 | """ 48 | >>> seq_axis = 1 49 | >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis) 50 | >>> Max(axis=seq_axis)(x, seq_len=[4,5,6]) 51 | """ 52 | def __init__(self, axis=-1, keepdims=False): 53 | self.axis = axis 54 | self.keepdims = keepdims 55 | super().__init__() 56 | 57 | def __call__(self, x, seq_len=None): 58 | if seq_len is not None: 59 | mask = compute_mask(x, seq_len, 0, self.axis) 60 | x = (x + torch.log(mask)) 61 | x = x.max(self.axis, keepdim=self.keepdims) 62 | return x 63 | 64 | 65 | class TakeLast(nn.Module): 66 | """ 67 | >>> x = torch.Tensor([[[1,2,3]],[[4,5,6]]]) 68 | >>> TakeLast()(x, [2, 3]) 69 | tensor([[2.], 70 | [6.]]) 71 | """ 72 | def __init__(self, axis=-1, keepdims=False): 73 | self.axis = axis 74 | self.keepdims = keepdims 75 | super().__init__() 76 | 77 | def __call__(self, x, seq_len=None): 78 | axis = self.axis 79 | if axis < 0: 80 | axis = x.dim() + axis 81 | if axis != 1: 82 | assert axis > 1, axis 83 | x = x.unsqueeze(1).transpose(1, axis+1).squeeze(axis + 1) 84 | if seq_len is None: 85 | x = x[:, -1] 86 | else: 87 | x = x[torch.arange(x.shape[0]), np.array(seq_len) - 1] 88 | if self.keepdims: 89 | x = x.unsqueeze(self.axis) 90 | return x 91 | 92 | 93 | class AutoPool(nn.Module): 94 | """ 95 | 96 | >>> autopool = AutoPool(10) 97 | >>> autopool(torch.cumsum(torch.ones(4, 10, 17), dim=-1), seq_len=[17, 15, 12, 9]) 98 | """ 99 | def __init__(self, n_classes, alpha=1., trainable=False): 100 | super().__init__() 101 | self.trainable = trainable 102 | if trainable: 103 | self.alpha = nn.Parameter(alpha*torch.ones((n_classes, 1))) 104 | else: 105 | self.alpha = alpha 106 | 107 | def forward(self, x, seq_len=None): 108 | x_ = self.alpha*x 109 | if seq_len is not None: 110 | seq_len = torch.Tensor(seq_len).to(x.device)[:, None, None] 111 | mask = (torch.cumsum(torch.ones_like(x_), dim=-1) <= seq_len).float() 112 | x_ = x_ * mask + torch.log(mask) 113 | weights = nn.Softmax(dim=-1)(x_) 114 | return (weights*x).sum(dim=-1) 115 | -------------------------------------------------------------------------------- /padertorch/contrib/je/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/tests/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/jensheit/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | -------------------------------------------------------------------------------- /padertorch/contrib/jensheit/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, asdict, field 2 | from padertorch.base import Module 3 | from padertorch.configurable import Configurable 4 | 5 | 6 | __all__ = [ 7 | 'Parameterized', 8 | 'BuildingBlock', 9 | 'dict_func' 10 | ] 11 | 12 | def dict_func(in_dict): 13 | return field(default_factory=lambda: in_dict) 14 | 15 | class Parameterized(Configurable): 16 | @dataclass 17 | class opts: 18 | pass 19 | 20 | def __init__(self, **kwargs): 21 | super().__init__() 22 | if 'opts' in kwargs: 23 | self.opts = kwargs['opts'] 24 | assert hasattr(self.opts, '__dataclass_fields__') 25 | else: 26 | self.opts = self.opts(**kwargs) 27 | 28 | def __repr__(self): 29 | return f'{type(self).__name__}:\n{str(self.opts)}' 30 | 31 | @classmethod 32 | def finalize_dogmatic_config(cls, config): 33 | for key, value in asdict(cls.opts()).items(): 34 | config[key] = value 35 | 36 | 37 | class BuildingBlock(Parameterized, Module): 38 | def __init__(self, **kwargs): 39 | super().__init__(**kwargs) 40 | super(Parameterized).__init__() 41 | self.build() 42 | 43 | def forward(self, *args, **kwargs): 44 | raise NotImplementedError 45 | 46 | def build(self, *args, **kwargs): 47 | pass 48 | -------------------------------------------------------------------------------- /padertorch/contrib/jensheit/evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import paderbox as pb 4 | from padercontrib.database import keys as DB_K 5 | from paderbox.utils.numpy_utils import morph 6 | from padertorch.data import example_to_device 7 | from padertorch.modules.mask_estimator import MaskKeys as M_K 8 | 9 | __all__ = [ 10 | 'beamforming' 11 | ] 12 | 13 | 14 | def beamforming(observation, speech_mask, noise_mask, 15 | speech_image=None, noise_image=None, 16 | get_bf_fn=pb.speech_enhancement.get_mvdr_vector_souden): 17 | """ 18 | 19 | :param observation: ...xCxTxF 20 | :param speech_mask: ...xCxTxF 21 | :param noise_mask: ...xCxTxF 22 | :param speech_image: ...xCxTxF 23 | :param noise_image: ...xCxTxF 24 | :return: predicted speech signal: ...xTxF 25 | """ 26 | speech_mask = np.median(speech_mask, axis=-3).swapaxes(-2, -1) 27 | noise_mask = np.median(noise_mask, axis=-3).swapaxes(-2, -1) 28 | obs = morph('...ctf->...fct', observation) 29 | covariance = pb.speech_enhancement.get_power_spectral_density_matrix 30 | speech_psd = covariance(obs, speech_mask) 31 | noise_psd = covariance(obs, noise_mask) 32 | bf_vec = get_bf_fn(speech_psd, noise_psd) 33 | speech_pred = pb.speech_enhancement.apply_beamforming_vector( 34 | bf_vec, obs).swapaxes(-2, -1) 35 | if speech_image is not None: 36 | image_contribution = pb.speech_enhancement.apply_beamforming_vector( 37 | bf_vec, morph('...ctf->...fct', speech_image)).swapaxes(-2, -1) 38 | else: 39 | image_contribution = None 40 | if noise_image is not None: 41 | noise_contribution = pb.speech_enhancement.apply_beamforming_vector( 42 | bf_vec, morph('...ctf->...fct', noise_image)).swapaxes(-2, -1) 43 | else: 44 | noise_contribution = None 45 | return speech_pred, image_contribution, noise_contribution 46 | 47 | 48 | def evaluate_masks(example, model, stft): 49 | model_out = model(example_to_device(example)) 50 | speech_image = example[DB_K.SPEECH_IMAGE][0] 51 | speech_pred, image_cont, noise_cont = beamforming( 52 | example[M_K.OBSERVATION_STFT][0], 53 | model_out[M_K.SPEECH_MASK_PRED][0].detach().numpy(), 54 | model_out[M_K.NOISE_MASK_PRED][0].detach().numpy(), 55 | stft(speech_image), 56 | stft(example[DB_K.NOISE_IMAGE][0]) 57 | ) 58 | ex_id = example[DB_K.EXAMPLE_ID][0] 59 | pesq = pb.evaluation.pesq(example[DB_K.SPEECH_IMAGE][0][0], 60 | stft.inverse(speech_pred))[0] 61 | snr = np.mean(-10 * np.log10(np.abs(image_cont) ** 2 62 | / np.abs(noise_cont) ** 2)) 63 | print(ex_id, snr, pesq) 64 | return ex_id, snr, pesq 65 | -------------------------------------------------------------------------------- /padertorch/contrib/jensheit/mask_estimator_example/__init__.py: -------------------------------------------------------------------------------- 1 | from .modul import MaskEstimator 2 | from .model import MaskEstimatorModel -------------------------------------------------------------------------------- /padertorch/contrib/jensheit/norm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is adapted version of https://github.com/funcwj/conv-tasnet 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | from einops import rearrange 8 | 9 | 10 | class TransposedLayerNorm(nn.LayerNorm): 11 | """ 12 | Channel wise layer normalization 13 | >>> norm = TransposedLayerNorm(256) 14 | >>> norm(torch.rand(5, 256, 343)).shape 15 | torch.Size([5, 256, 343]) 16 | """ 17 | 18 | def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True): 19 | super().__init__(normalized_shape, eps, elementwise_affine) 20 | 21 | def forward(self, x): 22 | """ 23 | x: N x F x T 24 | """ 25 | if x.dim() != 3: 26 | raise RuntimeError("{} accept 3D tensor as input".format( 27 | self.__name__)) 28 | x = rearrange(x, 'n f t -> n t f') 29 | # LN 30 | x = super().forward(x) 31 | x = rearrange(x, 'n t f -> n f t') 32 | return x 33 | 34 | class GlobalChannelLayerNorm(nn.Module): 35 | """ 36 | Global channel layer normalization 37 | 38 | >>> norm = GlobalChannelLayerNorm(256) 39 | >>> norm(torch.rand(5, 256, 343)).shape 40 | torch.Size([5, 256, 343]) 41 | """ 42 | 43 | def __init__(self, dim, eps=1e-05, elementwise_affine=True): 44 | super().__init__() 45 | self.eps = eps 46 | self.normalized_dim = dim 47 | self.elementwise_affine = elementwise_affine 48 | if elementwise_affine: 49 | self.beta = nn.Parameter(torch.zeros(dim, 1)) 50 | self.gamma = nn.Parameter(torch.ones(dim, 1)) 51 | else: 52 | self.register_parameter("weight", None) 53 | self.register_parameter("bias", None) 54 | 55 | def forward(self, x): 56 | """ 57 | x: N x F x T 58 | """ 59 | if x.dim() != 3: 60 | raise RuntimeError("{} accept 3D tensor as input".format( 61 | self.__name__)) 62 | # N x 1 x 1 63 | mean = torch.mean(x, (1, 2), keepdim=True) 64 | var = torch.mean((x - mean)**2, (1, 2), keepdim=True) 65 | # N x T x F 66 | if self.elementwise_affine: 67 | x = self.gamma * (x - mean) / torch.sqrt(var + self.eps) + self.beta 68 | else: 69 | x = (x - mean) / torch.sqrt(var + self.eps) 70 | return x 71 | 72 | def extra_repr(self): 73 | return "{normalized_dim}, eps={eps}, " \ 74 | "elementwise_affine={elementwise_affine}".format(**self.__dict__) 75 | 76 | 77 | def build_norm(norm, dim): 78 | """ 79 | Build normalize layer 80 | LN cost more memory than BN 81 | 82 | >>> norm = build_norm('cLN', 256) 83 | >>> norm(torch.rand(5, 256, 343)).shape 84 | torch.Size([5, 256, 343]) 85 | 86 | >>> norm = build_norm('gLN', 256) 87 | >>> norm(torch.rand(5, 256, 343)).shape 88 | torch.Size([5, 256, 343]) 89 | 90 | >>> norm = build_norm('BN', 256) 91 | >>> norm(torch.rand(5, 256, 343)).shape 92 | torch.Size([5, 256, 343]) 93 | """ 94 | if norm not in ["cLN", "gLN", "BN"]: 95 | raise RuntimeError("Unsupported normalize layer: {}".format(norm)) 96 | if norm == "cLN": 97 | return TransposedLayerNorm(dim, elementwise_affine=True) 98 | elif norm == "BN": 99 | return nn.BatchNorm1d(dim) 100 | else: 101 | return GlobalChannelLayerNorm(dim, elementwise_affine=True) -------------------------------------------------------------------------------- /padertorch/contrib/jensheit/tests/test_mask_estimator.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import padertorch as pt 3 | import numpy as np 4 | import torch 5 | 6 | K = pt.modules.mask_estimator.MaskKeys 7 | 8 | 9 | class TestMaskEstimatorModel(unittest.TestCase): 10 | # TODO: Test forward deterministic if not train 11 | C = 4 12 | 13 | def setUp(self): 14 | self.model_class= pt.models.mask_estimator.MaskEstimatorModel 15 | self.model = self.model_class.from_config( 16 | self.model_class.get_config()) 17 | self.T = 100 18 | self.B = 4 19 | self.F = 513 20 | self.num_frames = [100, 90, 80, 70] 21 | self.inputs = { 22 | K.OBSERVATION_ABS: [ 23 | np.abs(np.random.normal( 24 | size=(self.C, num_frames_, self.F) 25 | )).astype(np.float32) 26 | for num_frames_ in self.num_frames 27 | ], 28 | K.SPEECH_MASK_TARGET: [ 29 | np.abs(np.random.choice( 30 | [0, 1], 31 | size=(self.C, num_frames_, self.F) 32 | )).astype(np.float32) 33 | for num_frames_ in self.num_frames 34 | ], 35 | K.NOISE_MASK_TARGET: [ 36 | np.abs(np.random.choice( 37 | [0, 1], 38 | size=(self.C, num_frames_, self.F) 39 | )).astype(np.float32) 40 | for num_frames_ in self.num_frames 41 | ], 42 | K.NUM_FRAMES: [num_frames for num_frames in self.num_frames], 43 | } 44 | 45 | def test_signature(self): 46 | assert callable(getattr(self.model, 'forward', None)) 47 | assert callable(getattr(self.model, 'review', None)) 48 | 49 | def test_forward(self): 50 | inputs = pt.data.example_to_device(self.inputs) 51 | model_out = self.model(inputs) 52 | for mask, num_frames in zip(model_out[K.SPEECH_MASK_PRED], 53 | self.num_frames): 54 | expected_shape = (self.C, num_frames, self.F) 55 | assert mask.shape == expected_shape, mask.shape 56 | for mask, num_frames in zip(model_out[K.SPEECH_MASK_LOGITS], 57 | self.num_frames): 58 | expected_shape = (self.C, num_frames, self.F) 59 | assert mask.shape == expected_shape, mask.shape 60 | 61 | def test_review(self): 62 | inputs = pt.data.example_to_device(self.inputs) 63 | mask = self.model(inputs) 64 | review = self.model.review(inputs, mask) 65 | 66 | assert 'loss' in review, review.keys() 67 | assert 'loss' not in review['scalars'], review['scalars'].keys() 68 | 69 | def test_minibatch_equal_to_single_example(self): 70 | inputs = pt.data.example_to_device(self.inputs) 71 | model = self.model 72 | model.eval() 73 | mask = model(inputs) 74 | review = model.review(inputs, mask) 75 | actual_loss = review['loss'] 76 | 77 | reference_loss = list() 78 | 79 | for observation, target_mask, noise_mask in zip( 80 | self.inputs[K.OBSERVATION_ABS], 81 | self.inputs[K.SPEECH_MASK_TARGET], 82 | self.inputs[K.NOISE_MASK_TARGET], 83 | ): 84 | inputs = { 85 | K.OBSERVATION_ABS: [observation], 86 | K.SPEECH_MASK_TARGET: [target_mask], 87 | K.NOISE_MASK_TARGET: [noise_mask], 88 | K.NUM_FRAMES: [observation.shape[1]] 89 | } 90 | inputs = pt.data.example_to_device(inputs) 91 | mask = model(inputs) 92 | review = model.review(inputs, mask) 93 | reference_loss.append(review['loss']) 94 | 95 | reference_loss = torch.sum(torch.stack(reference_loss)) 96 | 97 | np.testing.assert_allclose( 98 | actual_loss.detach().numpy(), 99 | reference_loss.detach().numpy(), 100 | atol=1e-3 101 | ) 102 | 103 | 104 | class TestMaskEstimatorSingleChannelModel(TestMaskEstimatorModel): 105 | C = 1 106 | -------------------------------------------------------------------------------- /padertorch/contrib/jensheit/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from warnings import warn 3 | 4 | from paderbox.io import load_json 5 | from paderbox.utils.nested import flatten 6 | from padertorch.configurable import class_to_str 7 | 8 | 9 | def dict_compare(d1, d2): 10 | # From http://stackoverflow.com/questions/4527942/comparing-two-dictionaries-in-python 11 | d1_keys = set(d1.keys()) 12 | d2_keys = set(d2.keys()) 13 | intersect_keys = d1_keys.intersection(d2_keys) 14 | added = d1_keys - d2_keys 15 | removed = d2_keys - d1_keys 16 | 17 | # Init differs from defaults: 18 | modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]} 19 | 20 | same = set(o for o in intersect_keys if d1[o] == d2[o]) 21 | are_equal = not len(added) and not len(removed) and not len(modified) 22 | return added, removed, modified, same, are_equal 23 | 24 | 25 | def compare_configs(storage_dir, trainer_opts, provider_opts): 26 | opts = flatten(trainer_opts) 27 | opts.update(flatten(provider_opts)) 28 | init = load_json(Path(storage_dir) / 'init.json') 29 | 30 | added, removed, modified, _, _ = dict_compare(opts, init) 31 | if len(added): 32 | warn( 33 | f'The following options were added to the model: {added}' 34 | ) 35 | if len(removed): 36 | warn( 37 | f'The following options were removed from the model: {removed}' 38 | ) 39 | 40 | return init['trainer_opts'], init['provider_opts'] 41 | 42 | 43 | def get_experiment_name(model_opts, submodel=None): 44 | model_name = class_to_str(model_opts["factory"]) 45 | assert isinstance(model_name, str), (model_name, type(model_name)) 46 | model_name = model_name.split('.')[-1] 47 | if submodel is not None: 48 | sub_name = class_to_str(model_opts[submodel]["factory"]) 49 | assert isinstance(sub_name, str), (sub_name, type(sub_name)) 50 | sep_name = sub_name.split('.')[-1] 51 | else: 52 | sep_name = 'baseline' 53 | ex_name = f'{model_name}/{sep_name}' 54 | return ex_name 55 | -------------------------------------------------------------------------------- /padertorch/contrib/ldrude/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/ldrude/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/ldrude/data.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import einops 4 | import numpy as np 5 | import padertorch as pt 6 | from padercontrib.database.iterator import AudioReader 7 | from padercontrib.database.keys import * 8 | from paderbox.transform import stft 9 | from pb_bss.extraction import ideal_binary_mask 10 | 11 | 12 | def pre_batch_transform(inputs, return_keys=None): 13 | s = inputs['audio_data']['speech_source'] 14 | y = inputs['audio_data']['observation'] 15 | S = stft(s, 512, 128) 16 | Y = stft(y, 512, 128) 17 | Y = einops.rearrange(Y, 't f -> t f') 18 | S = einops.rearrange(S, 'k t f -> t k f') 19 | X = S # Same for MERL database 20 | num_frames = Y.shape[0] 21 | 22 | return_dict = dict() 23 | 24 | def maybe_add(key, value): 25 | if return_keys is None or key in return_keys: 26 | return_dict[key] = value 27 | 28 | maybe_add('example_id', inputs['example_id']) 29 | maybe_add('s', np.ascontiguousarray(s, np.float32)) 30 | maybe_add('y', np.ascontiguousarray(y, np.float32)) 31 | maybe_add('Y', np.ascontiguousarray(Y, np.complex64)) 32 | maybe_add('X_abs', np.ascontiguousarray(np.abs(X), np.float32)) 33 | maybe_add('Y_abs', np.ascontiguousarray(np.abs(Y), np.float32)) 34 | maybe_add('num_frames', num_frames) 35 | maybe_add('cos_phase_difference', np.ascontiguousarray( 36 | np.cos(np.angle(Y[:, None, :]) - np.angle(X)), np.float32) 37 | ) 38 | 39 | if return_keys is None or 'target_mask' in return_keys: 40 | return_dict['target_mask'] = np.ascontiguousarray( 41 | ideal_binary_mask(S, source_axis=-2), np.float32 42 | ) 43 | 44 | return return_dict 45 | 46 | 47 | def post_batch_transform(batch): 48 | return batch 49 | 50 | 51 | def prepare_iterable( 52 | db, dataset: str, batch_size, return_keys=None, prefetch=True, 53 | iterator_slice=None 54 | ): 55 | audio_keys = [OBSERVATION, SPEECH_SOURCE] 56 | audio_reader = AudioReader(audio_keys=audio_keys, read_fn=db.read_fn) 57 | iterator = db.get_iterator_by_names(dataset) 58 | 59 | if iterator_slice is not None: 60 | iterator = iterator[iterator_slice] 61 | 62 | iterator = ( 63 | iterator 64 | .map(audio_reader) 65 | .map(partial(pre_batch_transform, return_keys=return_keys)) 66 | .shuffle(reshuffle=False) 67 | .batch(batch_size) 68 | .map(lambda batch: sorted( 69 | batch, 70 | key=lambda example: example["num_frames"], 71 | reverse=True, 72 | )) 73 | .map(pt.data.utils.collate_fn) 74 | .map(post_batch_transform) 75 | .tile(reps=50, shuffle=True) # Simulates reshuffle to some degree 76 | ) 77 | 78 | if prefetch: 79 | iterator = iterator.prefetch(4, 8) 80 | 81 | return iterator 82 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/mk/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import List 4 | 5 | 6 | # https://stackoverflow.com/a/59803793/16085876 7 | def run_fast_scandir(dir: Path, ext: List[str]): 8 | subfolders, files = [], [] 9 | 10 | for f in os.scandir(dir): 11 | if f.is_dir(): 12 | subfolders.append(f.path) 13 | if f.is_file(): 14 | if os.path.splitext(f.name)[1].lower() in ext: 15 | files.append(Path(f.path)) 16 | 17 | 18 | for dir in list(subfolders): 19 | sf, f = run_fast_scandir(dir, ext) 20 | subfolders.extend(sf) 21 | files.extend(f) 22 | return subfolders, files 23 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/modules/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/mk/modules/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/modules/features/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/mk/modules/features/ssl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/modules/features/ssl/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/__init__.py: -------------------------------------------------------------------------------- 1 | from .vocoder import Vocoder 2 | from .parametric import fast_griffin_lim, FGLA 3 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/base.py: -------------------------------------------------------------------------------- 1 | import typing 2 | from functools import partial 3 | 4 | import numpy as np 5 | import torch 6 | from paderbox.transform.module_resample import resample_sox 7 | import padertorch as pt 8 | 9 | 10 | class Synthesis(pt.Configurable): 11 | sampling_rate: int 12 | 13 | def __init__( 14 | self, 15 | postprocessing: typing.Optional[typing.Callable] = None, 16 | ): 17 | super().__init__() 18 | self.postprocessing = postprocessing 19 | 20 | def __call__( 21 | self, 22 | time_signal: typing.Union[ 23 | np.ndarray, torch.Tensor, typing.List[np.ndarray], 24 | typing.List[torch.Tensor] 25 | ], 26 | target_sampling_rate: typing.Optional[int] = None, 27 | ) -> typing.Union[ 28 | np.ndarray, torch.Tensor, typing.List[np.ndarray], 29 | typing.List[torch.Tensor] 30 | ]: 31 | if self.postprocessing is not None: 32 | if isinstance(time_signal, list) or time_signal.ndim == 2: 33 | time_signal = list(map(self.postprocessing, time_signal)) 34 | else: 35 | time_signal = self.postprocessing(time_signal) 36 | return self.resample(time_signal, target_sampling_rate) 37 | 38 | def _resample( 39 | self, 40 | wav: typing.Union[np.ndarray, torch.Tensor], 41 | target_sampling_rate: typing.Optional[int] = None, 42 | ) -> typing.Union[np.ndarray, torch.Tensor]: 43 | to_torch = False 44 | if ( 45 | target_sampling_rate is None 46 | or target_sampling_rate == self.sampling_rate 47 | ): 48 | return wav 49 | if isinstance(wav, torch.Tensor): 50 | to_torch = True 51 | wav = pt.utils.to_numpy(wav, detach=True) 52 | wav = resample_sox( 53 | wav, 54 | in_rate=self.sampling_rate, 55 | out_rate=target_sampling_rate 56 | ) 57 | if to_torch: 58 | wav = torch.from_numpy(wav) 59 | return wav 60 | 61 | def resample( 62 | self, 63 | wav: typing.Union[ 64 | np.ndarray, torch.Tensor, typing.List[np.ndarray], 65 | typing.List[torch.Tensor] 66 | ], 67 | target_sampling_rate: typing.Optional[int] = None, 68 | ) -> typing.Union[ 69 | np.ndarray, torch.Tensor, typing.List[np.ndarray], 70 | typing.List[torch.Tensor] 71 | ]: 72 | if isinstance(wav, list) or wav.ndim == 2: 73 | wav = list(map( 74 | partial( 75 | self._resample, target_sampling_rate=target_sampling_rate 76 | ), wav 77 | )) 78 | try: 79 | m = np if isinstance(wav[0], np.ndarray) else torch 80 | wav = m.stack(wav) 81 | except (ValueError, RuntimeError): 82 | pass 83 | return wav 84 | return self._resample(wav, target_sampling_rate=target_sampling_rate) 85 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/parametric/__init__.py: -------------------------------------------------------------------------------- 1 | from .griffin_lim import fast_griffin_lim, FGLA 2 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .pwg import Vocoder 2 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/__init__.py: -------------------------------------------------------------------------------- 1 | # Copied from https://github.com/NVIDIA/BigVGAN/tree/main 2 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/activation1d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 NVIDIA CORPORATION. 2 | # Licensed under the MIT license. 3 | 4 | import torch 5 | import torch.nn as nn 6 | from ..torch.resample import UpSample1d, DownSample1d 7 | 8 | # load fused CUDA kernel: this enables importing anti_alias_activation_cuda 9 | from .load import load 10 | 11 | anti_alias_activation_cuda = load() 12 | 13 | 14 | class FusedAntiAliasActivation(torch.autograd.Function): 15 | """ 16 | Assumes filter size 12, replication padding on upsampling/downsampling, and logscale alpha/beta parameters as inputs. 17 | The hyperparameters are hard-coded in the kernel to maximize speed. 18 | NOTE: The fused kenrel is incorrect for Activation1d with different hyperparameters. 19 | """ 20 | 21 | @staticmethod 22 | def forward(ctx, inputs, up_ftr, down_ftr, alpha, beta): 23 | activation_results = anti_alias_activation_cuda.forward( 24 | inputs, up_ftr, down_ftr, alpha, beta 25 | ) 26 | 27 | return activation_results 28 | 29 | @staticmethod 30 | def backward(ctx, output_grads): 31 | raise NotImplementedError 32 | return output_grads, None, None 33 | 34 | 35 | class Activation1d(nn.Module): 36 | def __init__( 37 | self, 38 | activation, 39 | up_ratio: int = 2, 40 | down_ratio: int = 2, 41 | up_kernel_size: int = 12, 42 | down_kernel_size: int = 12, 43 | fused: bool = True, 44 | ): 45 | super().__init__() 46 | self.up_ratio = up_ratio 47 | self.down_ratio = down_ratio 48 | self.act = activation 49 | self.upsample = UpSample1d(up_ratio, up_kernel_size) 50 | self.downsample = DownSample1d(down_ratio, down_kernel_size) 51 | 52 | self.fused = fused # Whether to use fused CUDA kernel or not 53 | 54 | def forward(self, x): 55 | if not self.fused: 56 | x = self.upsample(x) 57 | x = self.act(x) 58 | x = self.downsample(x) 59 | return x 60 | else: 61 | if self.act.__class__.__name__ == "Snake": 62 | beta = self.act.alpha.data # Snake uses same params for alpha and beta 63 | else: 64 | beta = ( 65 | self.act.beta.data 66 | ) # Snakebeta uses different params for alpha and beta 67 | alpha = self.act.alpha.data 68 | if ( 69 | not self.act.alpha_logscale 70 | ): # Exp baked into cuda kernel, cancel it out with a log 71 | alpha = torch.log(alpha) 72 | beta = torch.log(beta) 73 | 74 | x = FusedAntiAliasActivation.apply( 75 | x, self.upsample.filter, self.downsample.lowpass.filter, alpha, beta 76 | ) 77 | return x 78 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp: -------------------------------------------------------------------------------- 1 | /* coding=utf-8 2 | * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | extern "C" torch::Tensor fwd_cuda(torch::Tensor const &input, torch::Tensor const &up_filter, torch::Tensor const &down_filter, torch::Tensor const &alpha, torch::Tensor const &beta); 20 | 21 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 22 | m.def("forward", &fwd_cuda, "Anti-Alias Activation forward (CUDA)"); 23 | } -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/compat.h: -------------------------------------------------------------------------------- 1 | /* coding=utf-8 2 | * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /*This code is copied fron NVIDIA apex: 18 | * https://github.com/NVIDIA/apex 19 | * with minor changes. */ 20 | 21 | #ifndef TORCH_CHECK 22 | #define TORCH_CHECK AT_CHECK 23 | #endif 24 | 25 | #ifdef VERSION_GE_1_3 26 | #define DATA_PTR data_ptr 27 | #else 28 | #define DATA_PTR data 29 | #endif 30 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/load.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 NVIDIA CORPORATION. 2 | # Licensed under the MIT license. 3 | 4 | import os 5 | import pathlib 6 | import subprocess 7 | 8 | from torch.utils import cpp_extension 9 | 10 | """ 11 | Setting this param to a list has a problem of generating different compilation commands (with diferent order of architectures) and leading to recompilation of fused kernels. 12 | Set it to empty stringo avoid recompilation and assign arch flags explicity in extra_cuda_cflags below 13 | """ 14 | os.environ["TORCH_CUDA_ARCH_LIST"] = "" 15 | 16 | 17 | def load(): 18 | # Check if cuda 11 is installed for compute capability 8.0 19 | cc_flag = [] 20 | _, bare_metal_major, _ = _get_cuda_bare_metal_version(cpp_extension.CUDA_HOME) 21 | if int(bare_metal_major) >= 11: 22 | cc_flag.append("-gencode") 23 | cc_flag.append("arch=compute_80,code=sm_80") 24 | 25 | # Build path 26 | srcpath = pathlib.Path(__file__).parent.absolute() 27 | buildpath = srcpath / "build" 28 | _create_build_dir(buildpath) 29 | 30 | # Helper function to build the kernels. 31 | def _cpp_extention_load_helper(name, sources, extra_cuda_flags): 32 | return cpp_extension.load( 33 | name=name, 34 | sources=sources, 35 | build_directory=buildpath, 36 | extra_cflags=[ 37 | "-O3", 38 | ], 39 | extra_cuda_cflags=[ 40 | "-O3", 41 | "-gencode", 42 | "arch=compute_70,code=sm_70", 43 | "--use_fast_math", 44 | ] 45 | + extra_cuda_flags 46 | + cc_flag, 47 | verbose=True, 48 | ) 49 | 50 | extra_cuda_flags = [ 51 | "-U__CUDA_NO_HALF_OPERATORS__", 52 | "-U__CUDA_NO_HALF_CONVERSIONS__", 53 | "--expt-relaxed-constexpr", 54 | "--expt-extended-lambda", 55 | ] 56 | 57 | sources = [ 58 | srcpath / "anti_alias_activation.cpp", 59 | srcpath / "anti_alias_activation_cuda.cu", 60 | ] 61 | anti_alias_activation_cuda = _cpp_extention_load_helper( 62 | "anti_alias_activation_cuda", sources, extra_cuda_flags 63 | ) 64 | 65 | return anti_alias_activation_cuda 66 | 67 | 68 | def _get_cuda_bare_metal_version(cuda_dir): 69 | raw_output = subprocess.check_output( 70 | [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True 71 | ) 72 | output = raw_output.split() 73 | release_idx = output.index("release") + 1 74 | release = output[release_idx].split(".") 75 | bare_metal_major = release[0] 76 | bare_metal_minor = release[1][0] 77 | 78 | return raw_output, bare_metal_major, bare_metal_minor 79 | 80 | 81 | def _create_build_dir(buildpath): 82 | try: 83 | os.mkdir(buildpath) 84 | except OSError: 85 | if not os.path.isdir(buildpath): 86 | print(f"Creation of the build directory {buildpath} failed") 87 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/__init__.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 2 | # LICENSE is in incl_licenses directory. 3 | 4 | from .filter import * 5 | from .resample import * 6 | from .act import * 7 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/act.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 2 | # LICENSE is in incl_licenses directory. 3 | 4 | import torch.nn as nn 5 | from .resample import UpSample1d, DownSample1d 6 | 7 | 8 | class Activation1d(nn.Module): 9 | def __init__( 10 | self, 11 | activation, 12 | up_ratio: int = 2, 13 | down_ratio: int = 2, 14 | up_kernel_size: int = 12, 15 | down_kernel_size: int = 12, 16 | ): 17 | super().__init__() 18 | self.up_ratio = up_ratio 19 | self.down_ratio = down_ratio 20 | self.act = activation 21 | self.upsample = UpSample1d(up_ratio, up_kernel_size) 22 | self.downsample = DownSample1d(down_ratio, down_kernel_size) 23 | 24 | # x: [B,C,T] 25 | def forward(self, x): 26 | x = self.upsample(x) 27 | x = self.act(x) 28 | x = self.downsample(x) 29 | 30 | return x 31 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/filter.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 2 | # LICENSE is in incl_licenses directory. 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import math 8 | 9 | if "sinc" in dir(torch): 10 | sinc = torch.sinc 11 | else: 12 | # This code is adopted from adefossez's julius.core.sinc under the MIT License 13 | # https://adefossez.github.io/julius/julius/core.html 14 | # LICENSE is in incl_licenses directory. 15 | def sinc(x: torch.Tensor): 16 | """ 17 | Implementation of sinc, i.e. sin(pi * x) / (pi * x) 18 | __Warning__: Different to julius.sinc, the input is multiplied by `pi`! 19 | """ 20 | return torch.where( 21 | x == 0, 22 | torch.tensor(1.0, device=x.device, dtype=x.dtype), 23 | torch.sin(math.pi * x) / math.pi / x, 24 | ) 25 | 26 | 27 | # This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License 28 | # https://adefossez.github.io/julius/julius/lowpass.html 29 | # LICENSE is in incl_licenses directory. 30 | def kaiser_sinc_filter1d( 31 | cutoff, half_width, kernel_size 32 | ): # return filter [1,1,kernel_size] 33 | even = kernel_size % 2 == 0 34 | half_size = kernel_size // 2 35 | 36 | # For kaiser window 37 | delta_f = 4 * half_width 38 | A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95 39 | if A > 50.0: 40 | beta = 0.1102 * (A - 8.7) 41 | elif A >= 21.0: 42 | beta = 0.5842 * (A - 21) ** 0.4 + 0.07886 * (A - 21.0) 43 | else: 44 | beta = 0.0 45 | window = torch.kaiser_window(kernel_size, beta=beta, periodic=False) 46 | 47 | # ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio 48 | if even: 49 | time = torch.arange(-half_size, half_size) + 0.5 50 | else: 51 | time = torch.arange(kernel_size) - half_size 52 | if cutoff == 0: 53 | filter_ = torch.zeros_like(time) 54 | else: 55 | filter_ = 2 * cutoff * window * sinc(2 * cutoff * time) 56 | """ 57 | Normalize filter to have sum = 1, otherwise we will have a small leakage of the constant component in the input signal. 58 | """ 59 | filter_ /= filter_.sum() 60 | filter = filter_.view(1, 1, kernel_size) 61 | 62 | return filter 63 | 64 | 65 | class LowPassFilter1d(nn.Module): 66 | def __init__( 67 | self, 68 | cutoff=0.5, 69 | half_width=0.6, 70 | stride: int = 1, 71 | padding: bool = True, 72 | padding_mode: str = "replicate", 73 | kernel_size: int = 12, 74 | ): 75 | """ 76 | kernel_size should be even number for stylegan3 setup, in this implementation, odd number is also possible. 77 | """ 78 | super().__init__() 79 | if cutoff < -0.0: 80 | raise ValueError("Minimum cutoff must be larger than zero.") 81 | if cutoff > 0.5: 82 | raise ValueError("A cutoff above 0.5 does not make sense.") 83 | self.kernel_size = kernel_size 84 | self.even = kernel_size % 2 == 0 85 | self.pad_left = kernel_size // 2 - int(self.even) 86 | self.pad_right = kernel_size // 2 87 | self.stride = stride 88 | self.padding = padding 89 | self.padding_mode = padding_mode 90 | filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size) 91 | self.register_buffer("filter", filter) 92 | 93 | # Input [B, C, T] 94 | def forward(self, x): 95 | _, C, _ = x.shape 96 | 97 | if self.padding: 98 | x = F.pad(x, (self.pad_left, self.pad_right), mode=self.padding_mode) 99 | out = F.conv1d(x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C) 100 | 101 | return out 102 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/resample.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0 2 | # LICENSE is in incl_licenses directory. 3 | 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | 7 | from .filter import LowPassFilter1d 8 | from .filter import kaiser_sinc_filter1d 9 | 10 | 11 | class UpSample1d(nn.Module): 12 | def __init__(self, ratio=2, kernel_size=None): 13 | super().__init__() 14 | self.ratio = ratio 15 | self.kernel_size = ( 16 | int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size 17 | ) 18 | self.stride = ratio 19 | self.pad = self.kernel_size // ratio - 1 20 | self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2 21 | self.pad_right = ( 22 | self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2 23 | ) 24 | filter = kaiser_sinc_filter1d( 25 | cutoff=0.5 / ratio, half_width=0.6 / ratio, kernel_size=self.kernel_size 26 | ) 27 | self.register_buffer("filter", filter) 28 | 29 | # x: [B, C, T] 30 | def forward(self, x): 31 | _, C, _ = x.shape 32 | 33 | x = F.pad(x, (self.pad, self.pad), mode="replicate") 34 | x = self.ratio * F.conv_transpose1d( 35 | x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C 36 | ) 37 | x = x[..., self.pad_left : -self.pad_right] 38 | 39 | return x 40 | 41 | 42 | class DownSample1d(nn.Module): 43 | def __init__(self, ratio=2, kernel_size=None): 44 | super().__init__() 45 | self.ratio = ratio 46 | self.kernel_size = ( 47 | int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size 48 | ) 49 | self.lowpass = LowPassFilter1d( 50 | cutoff=0.5 / ratio, 51 | half_width=0.6 / ratio, 52 | stride=ratio, 53 | kernel_size=self.kernel_size, 54 | ) 55 | 56 | def forward(self, x): 57 | xx = self.lowpass(x) 58 | 59 | return xx 60 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/env.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/jik876/hifi-gan under the MIT license. 2 | # LICENSE is in incl_licenses directory. 3 | 4 | import os 5 | import shutil 6 | 7 | 8 | class AttrDict(dict): 9 | def __init__(self, *args, **kwargs): 10 | super(AttrDict, self).__init__(*args, **kwargs) 11 | self.__dict__ = self 12 | 13 | 14 | def build_env(config, config_name, path): 15 | t_path = os.path.join(path, config_name) 16 | if config != t_path: 17 | os.makedirs(path, exist_ok=True) 18 | shutil.copyfile(config, os.path.join(path, config_name)) 19 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/utils.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/jik876/hifi-gan under the MIT license. 2 | # LICENSE is in incl_licenses directory. 3 | 4 | import glob 5 | import os 6 | import matplotlib 7 | import torch 8 | from torch.nn.utils import weight_norm 9 | 10 | matplotlib.use("Agg") 11 | import matplotlib.pylab as plt 12 | from scipy.io.wavfile import write 13 | 14 | from .meldataset import MAX_WAV_VALUE 15 | 16 | 17 | def plot_spectrogram(spectrogram): 18 | fig, ax = plt.subplots(figsize=(10, 2)) 19 | im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none") 20 | plt.colorbar(im, ax=ax) 21 | 22 | fig.canvas.draw() 23 | plt.close() 24 | 25 | return fig 26 | 27 | 28 | def plot_spectrogram_clipped(spectrogram, clip_max=2.0): 29 | fig, ax = plt.subplots(figsize=(10, 2)) 30 | im = ax.imshow( 31 | spectrogram, 32 | aspect="auto", 33 | origin="lower", 34 | interpolation="none", 35 | vmin=1e-6, 36 | vmax=clip_max, 37 | ) 38 | plt.colorbar(im, ax=ax) 39 | 40 | fig.canvas.draw() 41 | plt.close() 42 | 43 | return fig 44 | 45 | 46 | def init_weights(m, mean=0.0, std=0.01): 47 | classname = m.__class__.__name__ 48 | if classname.find("Conv") != -1: 49 | m.weight.data.normal_(mean, std) 50 | 51 | 52 | def apply_weight_norm(m): 53 | classname = m.__class__.__name__ 54 | if classname.find("Conv") != -1: 55 | weight_norm(m) 56 | 57 | 58 | def get_padding(kernel_size, dilation=1): 59 | return int((kernel_size * dilation - dilation) / 2) 60 | 61 | 62 | def load_checkpoint(filepath, device): 63 | assert os.path.isfile(filepath) 64 | print(f"Loading '{filepath}'") 65 | checkpoint_dict = torch.load(filepath, map_location=device) 66 | print("Complete.") 67 | return checkpoint_dict 68 | 69 | 70 | def save_checkpoint(filepath, obj): 71 | print(f"Saving checkpoint to {filepath}") 72 | torch.save(obj, filepath) 73 | print("Complete.") 74 | 75 | 76 | def scan_checkpoint(cp_dir, prefix, renamed_file=None): 77 | # Fallback to original scanning logic first 78 | pattern = os.path.join(cp_dir, prefix + "????????") 79 | cp_list = glob.glob(pattern) 80 | 81 | if len(cp_list) > 0: 82 | last_checkpoint_path = sorted(cp_list)[-1] 83 | print(f"[INFO] Resuming from checkpoint: '{last_checkpoint_path}'") 84 | return last_checkpoint_path 85 | 86 | # If no pattern-based checkpoints are found, check for renamed file 87 | if renamed_file: 88 | renamed_path = os.path.join(cp_dir, renamed_file) 89 | if os.path.isfile(renamed_path): 90 | print(f"[INFO] Resuming from renamed checkpoint: '{renamed_file}'") 91 | return renamed_path 92 | 93 | return None 94 | 95 | 96 | def save_audio(audio, path, sr): 97 | # wav: torch with 1d shape 98 | audio = audio * MAX_WAV_VALUE 99 | audio = audio.cpu().numpy().astype("int16") 100 | write(path, sr, audio) 101 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/tbx_utils.py: -------------------------------------------------------------------------------- 1 | import typing as tp 2 | 3 | import numpy as np 4 | from padertorch.utils import to_numpy 5 | from padertorch.summary.tbx_utils import spectrogram_to_image 6 | import torch 7 | from torch import Tensor 8 | from torchvision.utils import make_grid 9 | 10 | 11 | def tensor_to_image( 12 | signal: Tensor, input_type: str, sequence_last: bool = True 13 | ): 14 | x = to_numpy(signal, detach=True) 15 | if input_type == 'image': 16 | x = (x * 255).astype(np.uint8) 17 | elif input_type == 'spectrogram': 18 | if sequence_last: 19 | x = x.transpose(-1, -2) 20 | x = spectrogram_to_image(x, batch_first=None, log=False) 21 | else: 22 | raise ValueError(f'Unknown input type {input_type}') 23 | return x 24 | 25 | 26 | def batch_image_to_grid( 27 | batch_image: torch.Tensor, 28 | input_shape_format: str = 'bchw', 29 | height_axis: tp.Optional[str] = None, 30 | width_axis: tp.Optional[str] = None, 31 | sequence_axis: tp.Optional[str] = None, 32 | stack: tp.Optional[str] = None, 33 | origin: str = 'upper', 34 | normalize: bool = True, 35 | scale_each: bool = False, 36 | ): 37 | """ 38 | >>> batch_image = torch.rand(4, 3, 32, 32) 39 | >>> grid = batch_image_to_grid(batch_image) 40 | >>> grid.shape 41 | torch.Size([3, 138, 36]) 42 | >>> grid = batch_image_to_grid(\ 43 | torch.rand(4, 32, 32),\ 44 | input_shape_format='b h w'\ 45 | ) 46 | >>> grid.shape 47 | torch.Size([138, 36]) 48 | 49 | Args: 50 | batch_image: Batched images of shape (batch, channel, heigth, width) or 51 | (batch, height, width). 52 | input_shape_format: Format of the input shape. Should be a string of 53 | space-separated dimension names, e.g., 'b c h w'. 54 | height_axis: Name of the height (frequency) axis. 55 | width_axis: Name of the width (time) axis. 56 | stack: How to stack the images. `height_axis` for horizontal, 57 | `width_axis` for vertical stacking. 58 | origin: Origin of the plot. Can be `'upper'` or `'lower'`. 59 | normalize: See make_grid 60 | scale_each: See make_grid 61 | """ 62 | if origin not in ('upper', 'lower'): 63 | raise ValueError(f'"origin" should be "upper" or "lower" but got {origin}') 64 | 65 | dims = list(input_shape_format) 66 | if height_axis is None: 67 | height_axis = dims[-2] 68 | if width_axis is None: 69 | width_axis = dims[-1] 70 | if height_axis == width_axis: 71 | raise ValueError( 72 | f'Height and width axis should be different but got {height_axis} ' 73 | 'for both "height_axis" and "width_axis"' 74 | ) 75 | if stack is None: 76 | if sequence_axis is not None: 77 | sequence_last = dims[-1] == sequence_axis 78 | stack = height_axis if sequence_last else width_axis 79 | else: 80 | stack = height_axis 81 | 82 | if stack not in (height_axis, width_axis): 83 | raise ValueError( 84 | f'"stack" should be "{height_axis}" or ' 85 | f'"{width_axis}" but got {stack}' 86 | ) 87 | 88 | if len(dims) != batch_image.ndim: 89 | raise ValueError(f'Shape format {input_shape_format} does not match input shape {batch_image.shape}') 90 | 91 | if batch_image.ndim == 3: 92 | # Add channel dimension 93 | batch_image = batch_image.unsqueeze(1) 94 | dims.insert(1, 'c') 95 | 96 | if origin == 'lower': 97 | # Reverse the order of the height (frequency) dimension 98 | batch_image = batch_image.flip(dims.index(height_axis)) 99 | 100 | grid = make_grid( 101 | batch_image, 102 | normalize=normalize, 103 | scale_each=scale_each, 104 | nrow=1 if stack==height_axis else batch_image.shape[0], 105 | ) 106 | if batch_image.shape[1] == 1: 107 | # Remove color dimension 108 | grid = grid[0] 109 | return grid 110 | -------------------------------------------------------------------------------- /padertorch/contrib/mk/typing.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import typing as tp 3 | 4 | from torch import Tensor 5 | 6 | 7 | TPath = tp.Union[str, Path] 8 | TSeqLen = tp.Optional[tp.List[int]] 9 | TActivationFn = tp.Union[str, tp.Callable] 10 | TSeqReturn = tp.Tuple[Tensor, TSeqLen] 11 | TDevice = tp.Union[str, int, tp.Sequence[int]] 12 | -------------------------------------------------------------------------------- /padertorch/contrib/neumann/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/neumann/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/neumann/evaluation.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Iterable 2 | 3 | from logging import getLogger 4 | import paderbox as pb 5 | import numpy as np 6 | import pb_bss 7 | import operator 8 | import re 9 | 10 | logger = getLogger('evaluation') 11 | 12 | 13 | def compute_means( 14 | results: dict, 15 | mean_keys: Optional[Iterable] = None, 16 | exclude_keys: tuple = (r'.*selection', ), 17 | skip_invalid=False, 18 | ) -> dict: 19 | """ 20 | 21 | Args: 22 | results: Input data dict. Structure should be: 23 | `{'dataset_name': {'example_id': {...nested values...}}}` 24 | mean_keys: Keys (if nested, separate with '.') to compute a mean over. 25 | If `None`, computes mean over all keys found in the data. 26 | exclude_keys: Keys or key patterns to exclude when inferring mean keys 27 | from data. Has no effect if `mean_keys is not None`. 28 | skip_invalid: If `True`, invalid keys are skipped (e.g., not all 29 | examples have this key) 30 | 31 | Returns: 32 | {'dataset_name': {... nested means ...}} 33 | """ 34 | means = {} 35 | for dataset, dataset_results in results.items(): 36 | means[dataset] = {} 37 | 38 | # Flatten to structure {'example_id': {'path.to.sub.entry': value}} 39 | flattened = { 40 | k: pb.utils.nested.flatten(v) for k, v in 41 | dataset_results.items() 42 | } 43 | 44 | if mean_keys is None: 45 | # Try to infer mean keys from first element in data 46 | _mean_keys = list(filter(lambda x: not any( 47 | re.fullmatch(pattern, x) for pattern in exclude_keys 48 | ), next(iter(flattened.values())).keys())) 49 | else: 50 | _mean_keys = mean_keys 51 | 52 | for mean_key in _mean_keys: 53 | try: 54 | means[dataset][mean_key] = np.mean(np.array([ 55 | v[mean_key] for v in flattened.values() 56 | ])) 57 | except KeyError: 58 | if skip_invalid: 59 | pass 60 | else: 61 | raise 62 | means[dataset] = pb.utils.nested.deflatten(means[dataset]) 63 | 64 | return means 65 | -------------------------------------------------------------------------------- /padertorch/contrib/tcl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/tcl/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/tcl/dc.py: -------------------------------------------------------------------------------- 1 | import einops 2 | import torch 3 | from torch.nn.utils.rnn import PackedSequence 4 | 5 | import padertorch as pt 6 | 7 | 8 | class DeepClusteringModel(pt.Model): 9 | def __init__( 10 | self, 11 | F=257, 12 | recurrent_layers=2, 13 | units=600, 14 | E=20, 15 | input_feature_transform='identity' 16 | ): 17 | """ 18 | 19 | TODO: Dropout 20 | TODO: Loss mask to avoid to assign embeddings to silent regions 21 | 22 | Args: 23 | F: Number of frequency bins, fft_size / 2 + 1 24 | recurrent_layers: 25 | units: results in `units` forward and `units` backward units 26 | E: Dimensionality of the embedding 27 | """ 28 | super().__init__() 29 | self.E = E 30 | self.F = F 31 | self.input_feature_transform = input_feature_transform 32 | self.blstm = torch.nn.LSTM( 33 | F, units, recurrent_layers, bidirectional=True 34 | ) 35 | self.linear = torch.nn.Linear(2 * units, F * E) 36 | 37 | def forward(self, batch): 38 | """ 39 | 40 | Args: 41 | batch: Dictionary with lists of tensors 42 | 43 | Returns: List of mask tensors 44 | 45 | """ 46 | 47 | h = pt.ops.pack_sequence(batch['Y_abs']) 48 | 49 | if self.input_feature_transform == 'identity': 50 | pass 51 | elif self.input_feature_transform == 'log1p': 52 | # This is equal to the mu-law for mu=1. 53 | h = pt.ops.sequence.log1p(h) 54 | elif self.input_feature_transform == 'log': 55 | h = PackedSequence(h.data + 1e-10, h.batch_sizes) 56 | h = pt.ops.sequence.log(h) 57 | else: 58 | raise NotImplementedError(self.input_feature_transform) 59 | 60 | _, F = h.data.size() 61 | assert F == self.F, f'self.F = {self.F} != F = {F}' 62 | 63 | # Returns tensor with shape (t, b, num_directions * hidden_size) 64 | h, _ = self.blstm(h) 65 | 66 | h = PackedSequence(self.linear(h.data), h.batch_sizes) 67 | h_data = einops.rearrange(h.data, 'tb (e f) -> tb e f', e=self.E) 68 | 69 | # Hershey 2016 page 2 top right paragraph: Unit norm 70 | h_data = torch.nn.functional.normalize(h_data, dim=-2) 71 | 72 | embedding = PackedSequence(h_data, h.batch_sizes,) 73 | embedding = pt.ops.unpack_sequence(embedding) 74 | return embedding 75 | 76 | def review(self, batch, model_out): 77 | dc_loss = list() 78 | for embedding, target_mask in zip(model_out, batch['target_mask']): 79 | dc_loss.append(pt.ops.losses.deep_clustering_loss( 80 | einops.rearrange(embedding, 't e f -> (t f) e'), 81 | einops.rearrange(target_mask, 't k f -> (t f) k') 82 | )) 83 | 84 | return {'losses': {'dc_loss': torch.mean(torch.stack(dc_loss))}} 85 | -------------------------------------------------------------------------------- /padertorch/contrib/tcl/speaker_embeddings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/tcl/speaker_embeddings/__init__.py -------------------------------------------------------------------------------- /padertorch/contrib/tcl/speaker_embeddings/eer_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.interpolate import interp1d 3 | from scipy.optimize import brentq 4 | from sklearn.metrics import roc_curve 5 | 6 | 7 | def get_eer(scores, labels): 8 | """ 9 | Slightly adapted version of the VoxSRC EER calculation script 10 | """ 11 | fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1) 12 | eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.) 13 | return eer 14 | 15 | 16 | def get_dcf(scores, labels, p_target=0.05, c_miss=1, c_fa=1): 17 | """ 18 | Slightly adapted version of the VoxSRC DCF calculation script 19 | """ 20 | 21 | indices = np.argsort(scores) 22 | labels = np.array(labels).astype(np.int32)[indices] 23 | fnrs = [] 24 | fprs = [] 25 | for i in range(0, len(labels)): 26 | if i == 0: 27 | fnrs.append(labels[i]) 28 | fprs.append(1 - labels[i]) 29 | else: 30 | fnrs.append(fnrs[i - 1] + labels[i]) 31 | fprs.append(fprs[i - 1] + 1 - labels[i]) 32 | fnrs_norm = sum(labels) 33 | fprs_norm = len(labels) - fnrs_norm 34 | 35 | # Now divide by the total number of false negative errors to 36 | # obtain the false positive rates across all thresholds 37 | fnrs = [x / float(fnrs_norm) for x in fnrs] 38 | 39 | # Divide by the total number of corret positives to get the 40 | # true positive rate. Subtract these quantities from 1 to 41 | # get the false positive rates. 42 | fprs = [1 - x / float(fprs_norm) for x in fprs] 43 | 44 | min_c_det = float("inf") 45 | for i in range(0, len(fnrs)): 46 | # See Equation (2). it is a weighted sum of false negative 47 | # and false positive errors. 48 | c_det = c_miss * fnrs[i] * p_target + c_fa * fprs[i] * (1 - p_target) 49 | if c_det < min_c_det: 50 | min_c_det = c_det 51 | c_def = min(c_miss * p_target, c_fa * (1 - p_target)) 52 | min_dcf = min_c_det / c_def 53 | return min_dcf -------------------------------------------------------------------------------- /padertorch/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import batch 2 | from . import utils 3 | from . import segment 4 | 5 | from .batch import * 6 | -------------------------------------------------------------------------------- /padertorch/data/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | 5 | def pad_tensor(vec, pad, axis): 6 | """ 7 | args: 8 | vec - tensor to pad 9 | pad - the size to pad to 10 | axis - dimension to pad 11 | 12 | return: 13 | a new tensor padded to 'pad' in dimension 'dim' 14 | """ 15 | 16 | pad_size = list(vec.shape) 17 | pad_size[axis] = pad - vec.shape[axis] 18 | return np.concatenate([vec, np.zeros(pad_size)], axis=axis) 19 | 20 | 21 | def collate_fn(batch): 22 | """Moves list inside of dict/dataclass recursively. 23 | 24 | Can be used as map after batching of an dataset: 25 | `dataset.batch(...).map(collate_fn)` 26 | 27 | Args: 28 | batch: list of examples 29 | 30 | Returns: 31 | 32 | >>> batch = [{'a': 1}, {'a': 2}] 33 | >>> collate_fn(batch) 34 | {'a': [1, 2]} 35 | >>> collate_fn(tuple(batch)) 36 | {'a': (1, 2)} 37 | 38 | >>> batch = [{'a': {'b': [1, 2]}}, {'a': {'b': [3, 4]}}] 39 | >>> collate_fn(batch) 40 | {'a': {'b': [[1, 2], [3, 4]]}} 41 | 42 | >>> import dataclasses 43 | >>> Point = dataclasses.make_dataclass('Point', ['x', 'y']) 44 | >>> batch = [Point(1, 2), Point(3, 4)] 45 | >>> batch 46 | [Point(x=1, y=2), Point(x=3, y=4)] 47 | >>> collate_fn(batch) 48 | Point(x=[1, 3], y=[2, 4]) 49 | >>> collate_fn(tuple(batch)) 50 | Point(x=(1, 3), y=(2, 4)) 51 | """ 52 | assert isinstance(batch, (tuple, list)), (type(batch), batch) 53 | 54 | if isinstance(batch[0], dict): 55 | for b in batch[1:]: 56 | assert batch[0].keys() == b.keys(), batch 57 | return batch[0].__class__({ 58 | k: (collate_fn(batch.__class__([b[k] for b in batch]))) 59 | for k in batch[0] 60 | }) 61 | elif hasattr(batch[0], '__dataclass_fields__'): 62 | for b in batch[1:]: 63 | assert batch[0].__dataclass_fields__ == b.__dataclass_fields__, batch 64 | return batch[0].__class__(**{ 65 | k: (collate_fn(batch.__class__([getattr(b, k) for b in batch]))) 66 | for k in batch[0].__dataclass_fields__ 67 | }) 68 | else: 69 | return batch 70 | -------------------------------------------------------------------------------- /padertorch/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .fully_connected import fully_connected_stack 2 | from .normalization import Normalization 3 | from .recurrent import StatefulLSTM 4 | from .wavenet.wavenet import WaveNet 5 | from . import dual_path_rnn 6 | 7 | -------------------------------------------------------------------------------- /padertorch/modules/fully_connected.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from typing import List 3 | 4 | from torch import nn 5 | 6 | from padertorch.ops.mappings import ACTIVATION_FN_MAP 7 | 8 | 9 | def fully_connected_stack( 10 | input_size: int, 11 | hidden_size: List[int], 12 | output_size: int, 13 | activation: str = 'relu', 14 | dropout: float = 0.5, 15 | output_activation: str = None, 16 | ): 17 | """ 18 | 19 | dropout describes the forget-probability. 20 | More information to dropout: https://arxiv.org/pdf/1207.0580.pdf 21 | 22 | Args: 23 | input_size: has to be defined 24 | hidden_size: size of the hidden layers 25 | either None, int, list or tuple 26 | output_size: has to be defined 27 | activation: used in all layers except the last 28 | dropout: Dropout forget ratio (opposite to TensorFlow) 29 | default take from: 30 | https://www.reddit.com/r/MachineLearning/comments/3oztvk/why_50_when_using_dropout/ 31 | output_activation: applied after the last layer 32 | 33 | >>> fully_connected_stack(513, [1024, 1024], 1024) 34 | Sequential( 35 | (dropout_0): Dropout(p=0.5, inplace=False) 36 | (linear_0): Linear(in_features=513, out_features=1024, bias=True) 37 | (relu_0): ReLU() 38 | (dropout_1): Dropout(p=0.5, inplace=False) 39 | (linear_1): Linear(in_features=1024, out_features=1024, bias=True) 40 | (relu_1): ReLU() 41 | (dropout_2): Dropout(p=0.5, inplace=False) 42 | (linear_2): Linear(in_features=1024, out_features=1024, bias=True) 43 | ) 44 | """ 45 | assert input_size is not None, input_size 46 | assert output_size is not None, output_size 47 | 48 | layers = collections.OrderedDict() 49 | if hidden_size is None: 50 | l_n_units = [input_size, output_size] 51 | elif isinstance(hidden_size, (list, tuple)): 52 | l_n_units = [input_size] + list(hidden_size) + [output_size] 53 | elif isinstance(hidden_size, int): 54 | l_n_units = [input_size, hidden_size, output_size] 55 | else: 56 | raise TypeError(hidden_size) 57 | 58 | activation = [activation] * (len(l_n_units) - 2) + [output_activation] 59 | 60 | for l_idx, n_units in enumerate(l_n_units[:-1]): 61 | layers[f'dropout_{l_idx}'] = nn.Dropout(dropout) 62 | layers[f'linear_{l_idx}'] = nn.Linear(n_units, l_n_units[l_idx + 1]) 63 | if activation[l_idx] is not None and activation[l_idx] != 'identity': 64 | layers[f'{activation[l_idx]}_{l_idx}'] = \ 65 | ACTIVATION_FN_MAP[activation[l_idx]]() 66 | return nn.Sequential(layers) 67 | -------------------------------------------------------------------------------- /padertorch/modules/recurrent.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from padertorch.base import Module 3 | 4 | 5 | class StatefulLSTM(Module): 6 | _states = None 7 | 8 | def __init__( 9 | self, 10 | input_size: int, 11 | hidden_size: int, 12 | num_layers: int = 1, 13 | bidirectional: bool = False, 14 | dropout: float = 0., 15 | batch_first: bool = True, 16 | save_states: bool = True 17 | ): 18 | super().__init__() 19 | self.lstm = torch.nn.LSTM(input_size=input_size, 20 | hidden_size=hidden_size, 21 | num_layers=num_layers, 22 | bidirectional=bidirectional, 23 | dropout=dropout, 24 | batch_first=batch_first) 25 | self.hidden_size = hidden_size 26 | self.bidirectional = bidirectional 27 | self.num_layers = num_layers 28 | self.batch_first = batch_first 29 | self.save_states = save_states 30 | 31 | @property 32 | def states(self): 33 | return self._states 34 | 35 | @states.deleter 36 | def states(self): 37 | self._states = None 38 | 39 | @states.setter 40 | def states(self, states): 41 | self._states = states 42 | 43 | def forward(self, x): 44 | h, self.states = self.lstm(x, self.states) 45 | if not self.save_states: 46 | del self.states 47 | return h 48 | 49 | -------------------------------------------------------------------------------- /padertorch/modules/wavenet/__init__.py: -------------------------------------------------------------------------------- 1 | from .wavenet import * 2 | from . import nv_wavenet -------------------------------------------------------------------------------- /padertorch/modules/wavenet/nv_wavenet/Makefile: -------------------------------------------------------------------------------- 1 | # ****************************************************************************** 2 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of the NVIDIA CORPORATION nor the 12 | # names of its contributors may be used to endorse or promote products 13 | # derived from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | # 26 | # ****************************************************************************** 27 | 28 | NVCC = nvcc 29 | 30 | ARCH=sm_61 31 | NVCC_FLAGS = -arch=$(ARCH) -std=c++11 32 | NVCC_FLAGS += --use_fast_math 33 | 34 | MAX_REGS = 128 35 | 36 | HEADERS = ./nv_wavenet_util.cuh \ 37 | ./nv_wavenet_singleblock.cuh \ 38 | ./nv_wavenet_dualblock.cuh \ 39 | ./nv_wavenet_persistent.cuh \ 40 | ./nv_wavenet.cuh \ 41 | ./matrix_math.cuh \ 42 | ./softmax.cuh \ 43 | ./nv_wavenet_conversions.cuh 44 | 45 | default: wavenet_infer 46 | 47 | wavenet_infer: wavenet_infer.cu $(HEADERS) wavenet_infer.h 48 | $(NVCC) $(NVCC_FLAGS) -lineinfo -maxrregcount $(MAX_REGS) -I .. wavenet_infer.cu ./matrix.cpp -lz -Xcompiler -fPIC -shared -o libwavenet_infer.so 49 | -------------------------------------------------------------------------------- /padertorch/modules/wavenet/nv_wavenet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/modules/wavenet/nv_wavenet/__init__.py -------------------------------------------------------------------------------- /padertorch/modules/wavenet/nv_wavenet/build.py: -------------------------------------------------------------------------------- 1 | # ***************************************************************************** 2 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # * Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # * Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # * Neither the name of the NVIDIA CORPORATION nor the 12 | # names of its contributors may be used to endorse or promote products 13 | # derived from this software without specific prior written permission. 14 | # 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | # 26 | # ***************************************************************************** 27 | import os 28 | import torch 29 | from setuptools import setup 30 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 31 | 32 | abs_path = os.path.dirname(os.path.realpath(__file__)) 33 | library_dirs = [abs_path] 34 | extra_libraries = ['wavenet_infer'] 35 | extra_includes = [abs_path] 36 | 37 | setup( 38 | name='nv_wavenet_ext', 39 | ext_modules=[ 40 | CUDAExtension( 41 | name='nv_wavenet_ext', 42 | sources=['wavenet_infer_wrapper.cpp'], 43 | library_dirs=library_dirs, 44 | runtime_library_dirs=library_dirs, 45 | libraries=extra_libraries, 46 | include_dirs=extra_includes 47 | ) 48 | ], 49 | cmdclass={'build_ext': BuildExtension}, 50 | ) 51 | -------------------------------------------------------------------------------- /padertorch/modules/wavenet/nv_wavenet/matrix.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of the NVIDIA CORPORATION nor the 12 | * names of its contributors may be used to endorse or promote products 13 | * derived from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | * 26 | ******************************************************************************/ 27 | 28 | #ifndef __MATRIX__ 29 | #define __MATRIX__ 30 | 31 | class Matrix { 32 | private: 33 | float* m_data; 34 | bool m_isTransposed; 35 | int m_rows; 36 | int m_cols; 37 | 38 | public: 39 | Matrix(int rows, int cols, bool isTransposed=false); 40 | 41 | void randomize(float mean, float scale, int sparsity = 0); 42 | 43 | int index(int row, int col); 44 | 45 | void set(int row, int col, float val); 46 | 47 | float get(int row, int col); 48 | 49 | int rows(); 50 | 51 | int cols(); 52 | 53 | void print(const char* name); 54 | 55 | float* data(); 56 | }; 57 | 58 | void matrix_multiply(Matrix& C, Matrix& A, Matrix& B); 59 | void matrix_add(Matrix& C, Matrix& A, Matrix& B); 60 | void matrix_bias(Matrix& C, Matrix&A, Matrix& B); 61 | void matrix_compare(const char* name, Matrix& A, Matrix& B, float max_error=1.e-6, bool relu=false); 62 | void matrix_relu(Matrix& dst, Matrix& src); 63 | void matrix_softmax(Matrix& dst, Matrix& src); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /padertorch/modules/wavenet/nv_wavenet/nv_wavenet_util.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of the NVIDIA CORPORATION nor the 12 | * names of its contributors may be used to endorse or promote products 13 | * derived from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | * 26 | ******************************************************************************/ 27 | 28 | #ifndef __DEEPVOICE_UTIL_H__ 29 | #define __DEEPVOICE_UTIL_H__ 30 | 31 | #include 32 | #include "cuda_occupancy.h" 33 | 34 | #define gpuErrChk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 35 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) { 36 | if (code != cudaSuccess) { 37 | fprintf(stderr, "GPUassert: %s %s %d. Do note that nv-wavenet requires Compute Capability 6.0 or later (https://developer.nvidia.com/cuda-gpus).\n", cudaGetErrorString(code), file, line); 38 | if (abort) exit(code); 39 | } 40 | } 41 | 42 | int getOccupancy(int deviceId, size_t blockSize, void* func) { 43 | cudaDeviceProp prop; 44 | gpuErrChk ( cudaGetDeviceProperties(&prop, 0) ); 45 | cudaOccDeviceProp occProp = prop; 46 | 47 | cudaFuncAttributes attr; 48 | gpuErrChk ( cudaFuncGetAttributes(&attr, func) ); 49 | cudaOccFuncAttributes occAttr = attr; 50 | 51 | cudaOccDeviceState occState; 52 | 53 | cudaOccResult result; 54 | cudaOccMaxActiveBlocksPerMultiprocessor(&result, &occProp, &occAttr, &occState, blockSize, 0); 55 | 56 | return result.activeBlocksPerMultiprocessor; 57 | 58 | } 59 | 60 | __device__ __forceinline__ half loadVolatile(const volatile half* y, int index) { 61 | const volatile __half_raw* chr = (reinterpret_cast(y) ); 62 | __half_raw hr; 63 | hr.x = chr[index].x; 64 | return half( hr ); 65 | } 66 | __device__ __forceinline__ void storeVolatile(volatile half* y, int index, half val) { 67 | half* y_nv = (half*)y; 68 | y_nv[index] = val; 69 | } 70 | 71 | __device__ __forceinline__ float loadVolatile(const volatile float* y, int index) { 72 | return y[index]; 73 | } 74 | __device__ __forceinline__ void storeVolatile(volatile float* y, int index, float val) { 75 | y[index] = val; 76 | } 77 | 78 | __forceinline__ __device__ float sigmoid(float in) { 79 | float ans = 1.f / (1.f + expf(-in)); 80 | return ans; 81 | } 82 | 83 | __forceinline__ __device__ float _tanh(float in) { 84 | float ans = tanhf(in); 85 | return ans; 86 | } 87 | 88 | __device__ __forceinline__ float relu(float f) { return (f < 0.f) ? 0.f : f; } 89 | __device__ __forceinline__ half relu(half h) { half zero = 0.f; return (h < zero) ? zero : h; } 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /padertorch/modules/wavenet/nv_wavenet/wavenet_infer.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * * Redistributions of source code must retain the above copyright 7 | * notice, this list of conditions and the following disclaimer. 8 | * * Redistributions in binary form must reproduce the above copyright 9 | * notice, this list of conditions and the following disclaimer in the 10 | * documentation and/or other materials provided with the distribution. 11 | * * Neither the name of the NVIDIA CORPORATION nor the 12 | * names of its contributors may be used to endorse or promote products 13 | * derived from this software without specific prior written permission. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 19 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | * 26 | ******************************************************************************/ 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | // ------------------------------------------------ 31 | // C-compatible function for wrapper 32 | // ------------------------------------------------ 33 | void wavenet_infer(int sample_count, 34 | int batch_size, 35 | float* embedding_prev, 36 | float* embedding_curr, 37 | int num_layers, 38 | int max_dilation, 39 | float** in_layer_weights_prev, 40 | float** in_layer_weights_curr, 41 | float** in_layer_biases, 42 | float** res_layer_weights, 43 | float** res_layer_biases, 44 | float** skip_layer_weights, 45 | float** skip_layer_biases, 46 | float* conv_out_weight, 47 | float* conv_end_weight, 48 | int use_embed_tanh, 49 | float* cond_input, 50 | int implementation, 51 | int* samples); 52 | 53 | // -------------------------------------------------------- 54 | // For checking the number of channels match current build 55 | // -------------------------------------------------------- 56 | int get_R(void); 57 | int get_S(void); 58 | int get_A(void); 59 | #ifdef __cplusplus 60 | } 61 | #endif 62 | -------------------------------------------------------------------------------- /padertorch/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import * 2 | 3 | from . import sequence 4 | from . import mappings 5 | from . import tensor 6 | 7 | from ._stft import STFT 8 | from .einsum import * 9 | from .sequence import * 10 | from .tensor import * 11 | from .mu_law import * 12 | -------------------------------------------------------------------------------- /padertorch/ops/einsum.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | import torch 4 | 5 | __all__ = [ 6 | 'einsum' 7 | ] 8 | 9 | 10 | def einsum(operation: str, *operands): 11 | """Allows capital letters and collects operands as in `np.einsum`.""" 12 | remaining_letters = set(string.ascii_lowercase) 13 | remaining_letters = remaining_letters - set(operation) 14 | for capital_letter, replacement in zip(set.intersection( 15 | set(string.ascii_uppercase), 16 | set(operation) 17 | ), remaining_letters): 18 | operation = operation.replace(capital_letter, replacement) 19 | return torch.einsum(operation, operands) 20 | -------------------------------------------------------------------------------- /padertorch/ops/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from . import classification 2 | from . import regression 3 | from . import source_separation 4 | from . import kl_divergence 5 | 6 | from .classification import * 7 | from .regression import * 8 | from .source_separation import * 9 | from .kl_divergence import * 10 | -------------------------------------------------------------------------------- /padertorch/ops/losses/classification.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional 3 | from torch.nn.utils.rnn import PackedSequence 4 | import padertorch as pt 5 | 6 | 7 | __all__ = [ 8 | 'softmax_cross_entropy', 9 | ] 10 | 11 | 12 | IGNORE_INDEX = -1 13 | 14 | 15 | def softmax_cross_entropy(x, t): 16 | """Allow inputs to be of type `PackedSequence`. 17 | 18 | In my understanding, all dimensions but the last should be treated as 19 | independent dimensions. Therefore, I argue for x.size() == (..., K) where 20 | t.size() == (...). Similarly, for sequences x.size() == (T, B, ..., K) and 21 | t.size() == (T, B, ...). 22 | 23 | Check the test case for typical usage. 24 | 25 | Params: 26 | x: `Tensor` or `PackedSequence` holding a multidimensional array whose 27 | elements indicate unnormalized log probabilities (logits). 28 | t: Same object type as `x`. Holds integers of ground truth labels. 29 | 30 | Returns: 31 | 32 | >>> x = torch.randn(100, 3) 33 | >>> t = torch.randint(0, 3, size=(100,), dtype=torch.long) 34 | >>> softmax_cross_entropy(x, t).size() 35 | torch.Size([]) 36 | """ 37 | if isinstance(x, torch.Tensor) and isinstance(t, torch.Tensor): 38 | pass 39 | elif isinstance(x, PackedSequence) and isinstance(t, PackedSequence): 40 | # Data is already organized such that no padding is necessary. 41 | x, t = x.data, t.data 42 | else: 43 | raise ValueError(f'Incompatible types: {type(x)}, {type(t)}') 44 | 45 | assert x.size()[:-1] == t.size(), f'{x.size()}, {t.size()}' 46 | # remember torch.nn.CrossentropyLoss already includes softmax 47 | loss_fn = torch.nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX) 48 | return loss_fn(pt.ops.move_axis(x, -1, 1), t) 49 | -------------------------------------------------------------------------------- /padertorch/ops/losses/kl_divergence.py: -------------------------------------------------------------------------------- 1 | from torch.distributions import Normal, MultivariateNormal 2 | from torch.distributions import kl_divergence as kld 3 | 4 | 5 | __all__ = [ 6 | 'gaussian_kl_divergence', 7 | ] 8 | 9 | 10 | def _batch_diag(bmat): 11 | """ 12 | Returns the diagonals of a batch of square matrices. 13 | """ 14 | return bmat.reshape(bmat.shape[:-2] + (-1,))[..., ::bmat.size(-1) + 1] 15 | 16 | 17 | def gaussian_kl_divergence(q, p): 18 | """ 19 | Args: 20 | q: Normal posterior distributions (B1, ..., BN, D) 21 | p: (Multivariate) Normal prior distributions (K1, ..., KN, D) 22 | 23 | Returns: kl between all posteriors in batch and all components 24 | (B1, ..., BN, K1, ..., KN) 25 | 26 | """ 27 | assert isinstance(q, Normal), type(q) 28 | batch_shape = q.loc.shape[:-1] 29 | D = q.loc.shape[-1] 30 | component_shape = p.loc.shape[:-1] 31 | assert p.loc.shape[-1] == D, (p.loc.shape[-1], D) 32 | 33 | p_loc = p.loc.contiguous().view(-1, D) 34 | if isinstance(p, MultivariateNormal): 35 | p_scale_tril = p.scale_tril.contiguous().view(-1, D, D) 36 | q_loc = q.loc.contiguous().view(-1, D) 37 | q_scale = q.scale.contiguous().view(-1, D) 38 | 39 | term1 = ( 40 | _batch_diag(p_scale_tril).log().sum(-1)[:, None] 41 | - q_scale.log().sum(-1) 42 | ) 43 | L = p_scale_tril.inverse() 44 | term2 = (L.pow(2).sum(-2)[:, None, :] * q_scale.pow(2)).sum(-1) 45 | term3 = ( 46 | (p_loc[:, None, :] - q_loc) @ L.transpose(1, 2) 47 | ).pow(2.0).sum(-1) 48 | kl = (term1 + 0.5 * (term2 + term3 - D)).transpose(0, 1) 49 | elif isinstance(p, Normal): 50 | p_scale = p.scale.contiguous().view(-1, D) 51 | q_loc = q.loc.contiguous().view(-1, 1, D) 52 | q_scale = q.scale.contiguous().view(-1, 1, D) 53 | 54 | kl = kld( 55 | Normal(loc=q_loc, scale=q_scale), Normal(loc=p_loc, scale=p_scale) 56 | ).sum(-1) 57 | else: 58 | raise ValueError 59 | 60 | return kl.view(*batch_shape, *component_shape) 61 | -------------------------------------------------------------------------------- /padertorch/ops/mappings.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import optim 3 | from paderbox.utils.mapping import Dispatcher 4 | import numpy as np 5 | 6 | __all__ = [ 7 | 'ACTIVATION_FN_MAP', 8 | ] 9 | 10 | class _CallableDispatcher(Dispatcher): 11 | """ 12 | If the input is a callable it is returned. 13 | Otherwise, it is basically a dict 14 | with a better error message on key error. 15 | >>> from padertorch.ops.mappings import _CallableDispatcher 16 | >>> d = _CallableDispatcher(abc=1, bcd=2) 17 | >>> d['acd'] #doctest: +ELLIPSIS 18 | Traceback (most recent call last): 19 | ... 20 | paderbox.utils.mapping.DispatchError: Invalid option 'acd'. 21 | Close matches: ['bcd', 'abc']. 22 | >>> from padertorch.ops.mappings import _CallableDispatcher 23 | >>> d = _CallableDispatcher(abc=1, bcd=2) 24 | >>> d[np.median] #doctest: +ELLIPSIS 25 | = 0) 13 | x = x.float() 14 | mu = mu_quantization - 1. 15 | # Map values back to [-1, 1]. 16 | signal = 2 * (x / mu) - 1 17 | # Perform inverse of mu-law transformation. 18 | magnitude = (1 / mu) * ((1 + mu)**torch.abs(signal) - 1) 19 | return torch.sign(signal) * magnitude 20 | 21 | 22 | def mu_law_encode(x, mu_quantization=256): 23 | assert(torch.max(x) <= 1.0) 24 | assert(torch.min(x) >= -1.0) 25 | mu = mu_quantization - 1. 26 | scaling = np.log1p(mu) 27 | x_mu = torch.sign(x) * torch.log1p(mu * torch.abs(x)) / scaling 28 | encoding = ((x_mu + 1) / 2 * mu + 0.5).long() 29 | return encoding 30 | -------------------------------------------------------------------------------- /padertorch/ops/sequence/__init__.py: -------------------------------------------------------------------------------- 1 | from . import pack_module 2 | from . import pointwise 3 | from . import reduction 4 | 5 | from .pack_module import * 6 | from .pointwise import * 7 | from .reduction import * 8 | -------------------------------------------------------------------------------- /padertorch/ops/sequence/mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def compute_mask(x, sequence_lengths, batch_axis=0, sequence_axis=1): 5 | """ 6 | This function calculates a mask which indicates the position of non-padded values. 7 | It can be used to do subsequent operations only on non-padded values. 8 | 9 | >>> x, seq_len = 2*torch.ones((3,1,10,4)), [1, 2, 3] 10 | >>> mask = compute_mask(x, sequence_lengths=seq_len, batch_axis=0, sequence_axis=-1) 11 | >>> mask[:,0] 12 | tensor([[[1., 0., 0., 0.], 13 | [1., 0., 0., 0.], 14 | [1., 0., 0., 0.], 15 | [1., 0., 0., 0.], 16 | [1., 0., 0., 0.], 17 | [1., 0., 0., 0.], 18 | [1., 0., 0., 0.], 19 | [1., 0., 0., 0.], 20 | [1., 0., 0., 0.], 21 | [1., 0., 0., 0.]], 22 | 23 | [[1., 1., 0., 0.], 24 | [1., 1., 0., 0.], 25 | [1., 1., 0., 0.], 26 | [1., 1., 0., 0.], 27 | [1., 1., 0., 0.], 28 | [1., 1., 0., 0.], 29 | [1., 1., 0., 0.], 30 | [1., 1., 0., 0.], 31 | [1., 1., 0., 0.], 32 | [1., 1., 0., 0.]], 33 | 34 | [[1., 1., 1., 0.], 35 | [1., 1., 1., 0.], 36 | [1., 1., 1., 0.], 37 | [1., 1., 1., 0.], 38 | [1., 1., 1., 0.], 39 | [1., 1., 1., 0.], 40 | [1., 1., 1., 0.], 41 | [1., 1., 1., 0.], 42 | [1., 1., 1., 0.], 43 | [1., 1., 1., 0.]]]) 44 | 45 | Args: 46 | x: tensor to be masked 47 | sequence_lengths: list of int stating sequence length for each sequence 48 | in the mini-batch. If None a one-mask is returned, i.e., 49 | no values in x are masked. 50 | batch_axis: axis along which sequences are stacked 51 | sequence_axis: axis which may contain padding (of different lengths 52 | for each sequence) 53 | 54 | Returns: 55 | 56 | """ 57 | if sequence_lengths is None: 58 | return torch.ones_like(x) 59 | if batch_axis < 0: 60 | batch_axis = x.dim() + batch_axis 61 | if sequence_axis < 0: 62 | sequence_axis = x.dim() + sequence_axis 63 | if not torch.is_tensor(sequence_lengths): 64 | sequence_lengths = torch.Tensor(sequence_lengths).long().to(x.device) 65 | assert sequence_lengths.device == x.device, (sequence_lengths.device, x.device) 66 | for dim in range(batch_axis + 1, x.dim()): 67 | sequence_lengths = sequence_lengths.unsqueeze(-1) 68 | idx = torch.arange(x.shape[sequence_axis], device=x.device) 69 | for dim in range(sequence_axis + 1, x.dim()): 70 | idx = idx.unsqueeze(-1) 71 | mask = (idx < sequence_lengths).float().expand(x.shape) 72 | return mask 73 | -------------------------------------------------------------------------------- /padertorch/ops/sequence/pointwise.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn 3 | from functools import partial 4 | 5 | 6 | __all__ = [ 7 | 'sequence_elementwise', 8 | 'abs', 9 | 'ceil', 10 | 'clamp', 11 | 'exp', 12 | 'log', 13 | 'log1p', 14 | 'log10', 15 | 'sigmoid', 16 | 'sqrt', 17 | ] 18 | 19 | 20 | def sequence_elementwise(function, x, *args, **kwargs): 21 | """Expects the desired function and a `Tensor` or `PackedSequence`.""" 22 | if isinstance(x, torch.nn.utils.rnn.PackedSequence): 23 | return torch.nn.utils.rnn.PackedSequence( 24 | function(x.data, *args, **kwargs), 25 | x.batch_sizes 26 | ) 27 | else: 28 | return function(x, *args, **kwargs) 29 | 30 | 31 | abs = partial(sequence_elementwise, torch.abs) 32 | ceil = partial(sequence_elementwise, torch.ceil) 33 | clamp = partial(sequence_elementwise, torch.clamp) 34 | exp = partial(sequence_elementwise, torch.exp) 35 | log = partial(sequence_elementwise, torch.log) 36 | log10 = partial(sequence_elementwise, torch.log10) 37 | log1p = partial(sequence_elementwise, torch.log1p) 38 | sigmoid = partial(sequence_elementwise, torch.sigmoid) 39 | sqrt = partial(sequence_elementwise, torch.sqrt) 40 | -------------------------------------------------------------------------------- /padertorch/ops/tensor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | __all__ = [ 4 | 'move_axis' 5 | ] 6 | 7 | 8 | def move_axis(a: torch.Tensor, source: int, destination: int): 9 | """Move an axis from source location to destination location. 10 | 11 | API is a bit closer to Numpy but does not allow more than one source. 12 | 13 | Params: 14 | a: The Tensor whose axes should be reordered. 15 | source: Original positions of the axis to move. 16 | destination: Destination positions for each of the original axis. 17 | Returns: Tensor with moved axis. 18 | 19 | >>> x = torch.zeros((3, 4, 5)) 20 | >>> move_axis(x, 0, -1).size() 21 | torch.Size([4, 5, 3]) 22 | 23 | >>> move_axis(x, -1, 0).size() 24 | torch.Size([5, 3, 4]) 25 | """ 26 | source = source % len(a.size()) 27 | destination = destination % len(a.size()) 28 | permutation = [d for d in range(len(a.size())) if not d == source] 29 | permutation.insert(destination, source) 30 | return a.permute(permutation) 31 | 32 | 33 | def broadcast_to(tensor: torch.Tensor, shape): 34 | """ 35 | Alias for torch.Tensor.expand. Use torch.Tensor.expand. 36 | 37 | >>> broadcast_to(torch.ones(3), (4, 3)).shape 38 | torch.Size([4, 3]) 39 | >>> broadcast_to(torch.ones(1, 3), (4, 3)).shape 40 | torch.Size([4, 3]) 41 | >>> broadcast_to(torch.ones(4, 1), (4, 3)).shape 42 | torch.Size([4, 3]) 43 | """ 44 | return tensor.expand(shape) 45 | 46 | 47 | def matrix_diag(x): 48 | """ 49 | Apply the diag matrix operation on the last axis. 50 | 51 | >>> matrix_diag(torch.ones(2)) 52 | tensor([[1., 0.], 53 | [0., 1.]]) 54 | >>> matrix_diag(torch.ones(3, 4)).shape 55 | torch.Size([3, 4, 4]) 56 | 57 | """ 58 | if x.dim() == 1: 59 | return torch.diag(x) 60 | feature_dim = x.shape[-1] 61 | mat = x.reshape((-1, feature_dim)) 62 | 63 | # TODO: Find a way to remove the python loop without the multiplication 64 | # with the eye matrix. 65 | diags = torch.stack([torch.diag(vec) for vec in mat]) 66 | return diags.reshape((*x.shape, feature_dim)) 67 | 68 | 69 | def matrix_eye_like(x): 70 | """ 71 | Returns a eye matrix with `x.ndim() + 1` dimensions. 72 | 73 | Note: Usually the matrix from torch.eye is enough, because torch supports 74 | broadcasting. 75 | 76 | >>> matrix_eye_like(torch.ones(2) + 10) 77 | tensor([[1., 0.], 78 | [0., 1.]]) 79 | >>> matrix_eye_like(torch.ones(3, 2)).shape 80 | torch.Size([3, 2, 2]) 81 | >>> matrix_eye_like(torch.ones(4, 3, 2)).shape 82 | torch.Size([4, 3, 2, 2]) 83 | 84 | """ 85 | feature_dim = x.shape[-1] 86 | eye = torch.eye(feature_dim) 87 | if x.dim() == 1: 88 | return eye 89 | else: 90 | return broadcast_to(eye, [*x.shape, feature_dim]) 91 | 92 | 93 | def batch_tril(x): 94 | """Apply torch.tril along the minibatch axis.""" 95 | matrix_dims = x.shape[-2:] 96 | mats = x.reshape((-1, *matrix_dims)) 97 | trils = torch.stack([torch.tril(mat) for mat in mats]) 98 | return trils.reshape(x.shape) 99 | -------------------------------------------------------------------------------- /padertorch/summary/__init__.py: -------------------------------------------------------------------------------- 1 | from .tbx_utils import * 2 | from . import tfevents 3 | from .model_info import * 4 | -------------------------------------------------------------------------------- /padertorch/summary/model_info.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from torch import nn 3 | 4 | 5 | __all__ = [ 6 | 'num_parameters', 7 | ] 8 | 9 | 10 | @dataclass(repr=False) 11 | class ModelParameterSize: 12 | total_count: int = 0 13 | trainable_count: int = 0 14 | total_bytes: int = 0 15 | trainable_bytes: int = 0 16 | 17 | def __repr__(self): 18 | try: 19 | import humanize 20 | return ( 21 | f'{self.__class__.__name__}(' 22 | f'total_count={humanize.intword(self.total_count)}, ' 23 | f'trainable_count={humanize.intword(self.trainable_count)}, ' 24 | f'total_bytes={humanize.naturalsize(self.total_bytes)}, ' 25 | f'trainable_bytes={humanize.naturalsize(self.trainable_bytes)})' 26 | ) 27 | except ImportError: 28 | return ( 29 | f'{self.__class__.__name__}(' 30 | f'total_count={self.total_count}, ' 31 | f'trainable_count={self.trainable_count}, ' 32 | f'total_bytes={self.total_bytes}, ' 33 | f'trainable_bytes={self.trainable_bytes})' 34 | ) 35 | 36 | 37 | def num_parameters(module: nn.Module) -> ModelParameterSize: 38 | """Counts the number of parameters for `module`. 39 | 40 | Args: 41 | module: The module to count the number of parameters for 42 | 43 | Returns: The total number of parameters and the number of trainable 44 | parameters. 45 | 46 | Examples: 47 | >>> num_parameters(nn.Linear(10, 10)) 48 | ModelParameterSize(total_count=110, trainable_count=110, total_bytes=440 Bytes, trainable_bytes=440 Bytes) 49 | >>> net = nn.Sequential(nn.Linear(10, 10).requires_grad_(False), nn.Linear(10, 10)) 50 | >>> num_parameters(net) 51 | ModelParameterSize(total_count=220, trainable_count=110, total_bytes=880 Bytes, trainable_bytes=440 Bytes) 52 | """ 53 | result = ModelParameterSize() 54 | 55 | for parameter in module.parameters(): 56 | size = parameter.numel() 57 | bytes = parameter.element_size() 58 | 59 | if parameter.requires_grad: 60 | result.trainable_count += size 61 | result.trainable_bytes += size * bytes 62 | result.total_count += size 63 | result.total_bytes += size * bytes 64 | 65 | return result 66 | -------------------------------------------------------------------------------- /padertorch/summary/tfevents.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | ''' 4 | Event structure: 5 | 6 | { 7 | 'wall_time': ..., 8 | 'step': ..., 9 | 'summary': 10 | 'value': [{ # length is 1 11 | 'tag': ..., 12 | 'simple_value': ..., 13 | 'histo': { 14 | 'min': ..., 15 | 'max': ..., 16 | 'num': ..., 17 | 'sum': ..., 18 | 'sum_squares': ..., 19 | 'bucket_llimit': ..., 20 | 'bucket': ..., 21 | } 22 | }] 23 | } 24 | ''' 25 | 26 | def load_events_as_dict( 27 | path, 28 | backend='tbX', 29 | ): 30 | """ 31 | 32 | Args: 33 | path: 34 | Path to a tfevent file 35 | backend: 36 | 'tbX' or 'tf' 37 | Use tensorboardX or tensorflow to load the tfevents file. 38 | 39 | Returns: 40 | generator that yields the events as dict 41 | 42 | >>> path = '/net/vol/boeddeker/sacred/torch/am/32/events.out.tfevents.1545605113.ntsim1' 43 | >>> list(load_events_as_dict(path))[2] # doctest: +SKIP 44 | {'wall_time': 1545605119.7274427, 'step': 1, 'summary': {'value': [{'tag': 'training/grad_norm', 'simple_value': 0.21423661708831787}]}} 45 | >>> list(load_events_as_dict(path, backend='tf'))[2] # doctest: +SKIP 46 | {'wall_time': 1545605119.7274427, 'step': 1, 'summary': {'value': [{'tag': 'training/grad_norm', 'simple_value': 0.21423661708831787}]}} 47 | 48 | """ 49 | try: 50 | # protobuf3-to-dict (PyPI) 51 | from protobuf_to_dict import protobuf_to_dict 52 | except NameError as e: 53 | raise RuntimeError( 54 | 'protobuf3-to-dict is required for load_events_as_dict to work, ' 55 | 'but you seem to have installed protobuf-to-dict. You can ' 56 | 'install it with:\n pip install protobuf3-to-dict' 57 | ) from e 58 | 59 | # from google.protobuf.json_format import MessageToDict 60 | # MessageToDict(e, preserving_proto_field_name=True) 61 | # Converts int to str -> Bad behaviour 62 | if backend == 'tf': 63 | import tensorflow as tf 64 | return [ 65 | protobuf_to_dict(e) 66 | for e in tf.train.summary_iterator(str(path)) 67 | ] 68 | elif backend == 'tbX': 69 | from tensorboardX.event_file_writer import event_pb2 70 | 71 | def read(fd): 72 | # Original 73 | # https://github.com/lanpa/tensorboard-dumper/blob/master/dump.py 74 | # Remove this code, once 75 | # https://github.com/lanpa/tensorboardX/issues/318 76 | # has a solution. 77 | header_data = fd.read(8) 78 | if header_data == b'': 79 | return None 80 | header, = struct.unpack('Q', header_data) 81 | crc_hdr = struct.unpack('I', fd.read(4)) 82 | event_str = fd.read(header) # 8+4 83 | crc_ev = struct.unpack('>I', fd.read(4)) 84 | 85 | event = event_pb2.Event() 86 | event.ParseFromString(event_str) 87 | return event 88 | 89 | def read_all(path): 90 | with open(path, 'rb') as fd: 91 | event = read(fd) 92 | while event is not None: 93 | yield protobuf_to_dict(event) 94 | event = read(fd) 95 | 96 | return read_all(path) 97 | else: 98 | raise ValueError(backend) 99 | -------------------------------------------------------------------------------- /padertorch/testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/testing/__init__.py -------------------------------------------------------------------------------- /padertorch/train/__init__.py: -------------------------------------------------------------------------------- 1 | from . import optimizer 2 | from . import trigger 3 | from . import hooks 4 | from . import trainer 5 | from . import runtime_tests 6 | -------------------------------------------------------------------------------- /padertorch/train/optimizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import optim 3 | 4 | 5 | class Optimizer: 6 | optimizer_cls = None 7 | optimizer = None 8 | parameters = None 9 | 10 | def __init__( 11 | self, gradient_clipping, **kwargs 12 | ): 13 | self.gradient_clipping = gradient_clipping 14 | self.optimizer_kwargs = kwargs 15 | 16 | def set_parameters(self, parameters): 17 | self.parameters = tuple(parameters) 18 | self.optimizer = self.optimizer_cls( 19 | self.parameters, **self.optimizer_kwargs 20 | ) 21 | 22 | def check_if_set(self): 23 | assert self.optimizer is not None, \ 24 | 'The optimizer is not initialized, call set_parameter before' \ 25 | ' using any of the optimizer functions' 26 | 27 | def zero_grad(self): 28 | self.check_if_set() 29 | return self.optimizer.zero_grad() 30 | 31 | def step(self): 32 | self.check_if_set() 33 | return self.optimizer.step() 34 | 35 | def clip_grad(self): 36 | self.check_if_set() 37 | # Todo: report clipped and unclipped 38 | # Todo: allow clip=None but still report grad_norm 39 | grad_clips = self.gradient_clipping 40 | return torch.nn.utils.clip_grad_norm_( 41 | self.parameters, grad_clips 42 | ) 43 | 44 | def to(self, device): 45 | if device is None: 46 | return 47 | self.check_if_set() 48 | for state in self.optimizer.state.values(): 49 | for k, v in state.items(): 50 | if torch.is_tensor(v): 51 | state[k] = v.to(device) 52 | 53 | def cpu(self): 54 | return self.to('cpu') 55 | 56 | def cuda(self, device=None): 57 | assert device is None or isinstance(device, int), device 58 | if device is None: 59 | device = torch.device('cuda') 60 | return self.to(device) 61 | 62 | def load_state_dict(self, state_dict): 63 | self.check_if_set() 64 | return self.optimizer.load_state_dict(state_dict) 65 | 66 | def state_dict(self): 67 | self.check_if_set() 68 | return self.optimizer.state_dict() 69 | 70 | 71 | class Adam(Optimizer): 72 | optimizer_cls = optim.Adam 73 | 74 | def __init__( 75 | self, 76 | gradient_clipping=1e10, 77 | lr=1e-3, 78 | betas=(0.9, 0.999), 79 | eps=1e-8, 80 | weight_decay=0, 81 | amsgrad=False 82 | ): 83 | super().__init__( 84 | gradient_clipping, 85 | lr=lr, 86 | betas=betas, 87 | eps=eps, 88 | weight_decay=weight_decay, 89 | amsgrad=amsgrad 90 | ) 91 | 92 | 93 | class Adadelta(Optimizer): 94 | optimizer_cls = optim.Adadelta 95 | 96 | def __init__( 97 | self, 98 | gradient_clipping=1e10, 99 | lr=1.0, 100 | rho=0.9, 101 | eps=1e-6, 102 | weight_decay=0 103 | ): 104 | super().__init__( 105 | gradient_clipping, 106 | lr=lr, 107 | rho=rho, 108 | eps=eps, 109 | weight_decay=weight_decay, 110 | ) 111 | 112 | 113 | class SGD(Optimizer): 114 | optimizer_cls = optim.SGD 115 | 116 | def __init__( 117 | self, 118 | gradient_clipping=1e10, 119 | lr=1e-3, 120 | momentum=0, 121 | dampening=0, 122 | weight_decay=0, 123 | nesterov=False 124 | ): 125 | super().__init__( 126 | gradient_clipping, 127 | lr=lr, 128 | momentum=momentum, 129 | dampening=dampening, 130 | weight_decay=weight_decay, 131 | nesterov=nesterov 132 | ) 133 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "Cython", "numpy", "scipy"] 3 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = 3 | --ignore "padertorch/modules/wavenet/nv_wavenet" 4 | --ignore "padertorch/contrib" 5 | -m "not matlab" 6 | --doctest-modules 7 | --doctest-continue-on-failure 8 | --junitxml=junit/test-results.xml 9 | --cov=padertorch 10 | --cov-report=xml 11 | --cov-report=html 12 | 13 | markers = 14 | matlab: marks matlab tests, they are slow (deselect with '-m "not matlab"') 15 | torch: marks (py)torch tests, import torch fails in the moment on azure (deselect with '-m "not torch"') 16 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/__init__.py -------------------------------------------------------------------------------- /tests/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/contrib/__init__.py -------------------------------------------------------------------------------- /tests/test_configurable.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import paderbox as pb 6 | import padertorch as pt 7 | 8 | 9 | def foo(b=1, c=2): 10 | pass 11 | 12 | 13 | def bar(a, b=3, d=4): 14 | pass 15 | 16 | 17 | class A(pt.configurable.Configurable): 18 | @classmethod 19 | def finalize_dogmatic_config(cls, config): 20 | config['e'] = { 21 | 'factory': foo, 22 | 'b': 5 23 | } 24 | if config['e']['factory'] == foo: 25 | cfg_e = config['e'] 26 | cfg_e['c'] = 6 27 | elif config['e']['factory'] == bar: 28 | config['e']['d'] = 7 29 | else: 30 | raise ValueError(config['e']['factory']) 31 | return config 32 | 33 | def __init__(self, e, f=0): 34 | pass 35 | 36 | 37 | class Test: 38 | def test_(self): 39 | config = A.get_config() 40 | expect = { 41 | 'factory': 'tests.test_configurable.A', 42 | 'f': 0, 43 | 'e': { 44 | 'factory': 'tests.test_configurable.foo', 45 | 'b': 5, 46 | 'c': 6 47 | } 48 | } 49 | assert config == expect 50 | 51 | with np.testing.assert_raises_regex(Exception, "missing keys: {'a'}"): 52 | config = A.get_config({'e': {'factory': bar}}) 53 | 54 | config = A.get_config({'e': {'factory': bar, 'a': 10}}) 55 | expect = { 56 | 'factory': 'tests.test_configurable.A', 57 | 'f': 0, 58 | 'e': { 59 | 'factory': 'tests.test_configurable.bar', 60 | 'b': 5, 61 | 'd': 7, 62 | 'a': 10 63 | } 64 | } 65 | assert config == expect 66 | 67 | config = A.get_config({'e': {'factory': bar, 'a': 10}}) 68 | expect = { 69 | 'factory': 'tests.test_configurable.A', 70 | 'f': 0, 71 | 'e': { 72 | 'factory': 'tests.test_configurable.bar', 73 | 'b': 5, 74 | 'd': 7, 75 | 'a': 10 76 | } 77 | } 78 | assert config == expect 79 | 80 | 81 | class B(pt.Configurable): 82 | 83 | @classmethod 84 | def finalize_dogmatic_config(cls, config): 85 | config['a'] = 1 86 | config['b'] = 2 # Should raise an Exception 87 | 88 | def __init__(self, a): 89 | pass 90 | 91 | 92 | def test_wrong_finalize_dogmatic_config(): 93 | 94 | with pytest.raises(Exception) as exc_info: 95 | B.get_config() 96 | 97 | pb.testing.assert_doctest_like_equal( 98 | """ 99 | Tried to set an unexpected keyword argument for in finalize_dogmatic_config. 100 | See details below and stacktrace above. 101 | 102 | Too many keywords for the factory . 103 | Redundant keys: {'b'} 104 | Signature: (a) 105 | Current config with fallbacks: 106 | NestedChainMap({'factory': tests.test_configurable.B}, {'a': 1, 'b': 2}) 107 | """.strip(), 108 | str(exc_info.value) 109 | ) 110 | 111 | with pytest.raises(Exception) as exc_info: 112 | B.get_config(updates={'C': 3}) 113 | 114 | pb.testing.assert_doctest_like_equal( 115 | """ 116 | padertorch.Configurable.get_config(updates=...) got an unexpected keyword argument in updates for . 117 | See details below. 118 | 119 | Too many keywords for the factory . 120 | Redundant keys: {'C'} 121 | Signature: (a) 122 | Current config with fallbacks: 123 | NestedChainMap({'C': 3, 'factory': tests.test_configurable.B}, {}) 124 | """.strip(), 125 | str(exc_info.value) 126 | ) 127 | -------------------------------------------------------------------------------- /tests/test_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/test_models/__init__.py -------------------------------------------------------------------------------- /tests/test_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/test_modules/__init__.py -------------------------------------------------------------------------------- /tests/test_modules/test_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from padertorch.ops.sequence.mask import compute_mask 3 | from padertorch.modules.normalization import normalize 4 | import paderbox.testing as tc 5 | 6 | 7 | def normalize_ref(x, gamma, beta, statistics_axis, batch_axis, sequence_axis, seq_len, shift, scale, eps): 8 | # compute mask 9 | if seq_len is not None: 10 | mask = compute_mask(x, seq_len, batch_axis, sequence_axis) 11 | else: 12 | mask = torch.ones_like(x) 13 | 14 | # compute statistics 15 | n_values = mask.sum(dim=statistics_axis, keepdim=True) 16 | x = x * mask 17 | mean = x.sum(dim=statistics_axis, keepdim=True) / torch.max(n_values, torch.ones_like(n_values)) 18 | power = (x ** 2).sum(dim=statistics_axis, keepdim=True) / torch.max(n_values, torch.ones_like(n_values)) 19 | y = x 20 | if shift: 21 | y = y - mean 22 | power_scale = power - mean**2 23 | else: 24 | power_scale = power 25 | power_scale = torch.clamp(power_scale, min=0.) 26 | if scale: 27 | y = y / torch.sqrt(power_scale + eps) 28 | 29 | if gamma is not None: 30 | assert gamma.dim() == x.dim(), gamma.shape 31 | y = y * gamma 32 | if beta is not None: 33 | assert beta.dim() == x.dim(), beta.shape 34 | y = y + beta 35 | return y*mask, mean, power, n_values 36 | 37 | 38 | def test_outputs_and_grads(): 39 | x = torch.randn((2, 3, 5), requires_grad=True) 40 | gamma = 1+torch.randn((1, 3, 1)) 41 | gamma.requires_grad = True 42 | beta = torch.randn((1, 3, 1), requires_grad=True) 43 | seq_len = [5, 3] 44 | x_ref = x.clone().detach() 45 | x_ref.requires_grad = True 46 | gamma_ref = gamma.clone().detach() 47 | gamma_ref.requires_grad = True 48 | beta_ref = beta.clone().detach() 49 | beta_ref.requires_grad = True 50 | 51 | for shift in [True, False]: 52 | for scale in [True, False]: 53 | if x.grad is not None: 54 | x.grad.zero_() 55 | x_ref.grad.zero_() 56 | gamma.grad.zero_() 57 | gamma_ref.grad.zero_() 58 | beta.grad.zero_() 59 | beta_ref.grad.zero_() 60 | outs = normalize(x, gamma, beta, [0, 2], 0, 2, seq_len, shift, scale, 1e-3) 61 | y = outs[0] 62 | (y[0, [0, 1]] - y[0, 2]).sum().backward() 63 | outs_ref = normalize_ref(x_ref, gamma_ref, beta_ref, [0, 2], 0, 2, seq_len, shift, scale, 1e-3) 64 | y_ref = outs_ref[0] 65 | (y_ref[0, [0, 1]] - y_ref[0, 2]).sum().backward() 66 | 67 | for out, out_ref in zip(outs, outs_ref): 68 | tc.assert_array_almost_equal(out.detach().numpy(), out_ref.detach().numpy()) 69 | tc.assert_array_almost_equal(x.grad.numpy(), x_ref.grad.numpy(), decimal=4) 70 | tc.assert_array_almost_equal(gamma.grad.numpy(), gamma_ref.grad.numpy(), decimal=4) 71 | tc.assert_array_almost_equal(beta.grad.numpy(), beta_ref.grad.numpy(), decimal=4) 72 | -------------------------------------------------------------------------------- /tests/test_ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/test_ops/__init__.py -------------------------------------------------------------------------------- /tests/test_ops/test_sequence.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import padertorch as pts 3 | import numpy as np 4 | import torch 5 | from torch.nn.utils.rnn import PackedSequence 6 | 7 | 8 | class TestPackModule(unittest.TestCase): 9 | def setUp(self): 10 | self.sequence = [torch.zeros(5, 3), torch.ones(4, 3)] 11 | 12 | self.padded = torch.zeros(5, 2, 3) 13 | self.padded[:4, 1, :] = torch.ones(4, 3) 14 | 15 | self.lengths = torch.LongTensor([5, 4]) 16 | 17 | self.packed = PackedSequence( 18 | torch.stack([self.sequence[0][0], 19 | self.sequence[1][0], 20 | self.sequence[0][1], 21 | self.sequence[1][1], 22 | self.sequence[0][2], 23 | self.sequence[1][2], 24 | self.sequence[0][3], 25 | self.sequence[1][3], 26 | self.sequence[0][4], 27 | ], dim=0), 28 | torch.LongTensor(4 * [2] + 1 * [1]) 29 | ) 30 | 31 | def test_pack_sequence(self): 32 | actual = pts.ops.pack_sequence(self.sequence) 33 | assert isinstance(actual, type(self.packed)) 34 | np.testing.assert_equal( 35 | actual[0].data.numpy(), 36 | self.packed.data.numpy(), 37 | ) 38 | 39 | def test_unpack_sequence(self): 40 | actual = pts.ops.unpack_sequence(self.packed) 41 | assert isinstance(actual, type(self.sequence)) 42 | for actual_, reference_ in zip(actual, self.sequence): 43 | np.testing.assert_equal(actual_.numpy(), reference_.numpy()) 44 | 45 | def test_pad_sequence(self): 46 | actual = pts.ops.pad_sequence(self.sequence) 47 | assert isinstance(actual, type(self.padded)) 48 | np.testing.assert_equal(actual.numpy(), self.padded.numpy()) 49 | 50 | def test_unpad_sequence(self): 51 | actual = pts.ops.unpad_sequence(self.padded, self.lengths) 52 | assert isinstance(actual, type(self.sequence)) 53 | for actual_, reference_ in zip(actual, self.sequence): 54 | np.testing.assert_equal(actual_.numpy(), reference_.numpy()) 55 | 56 | def test_pad_packed_sequence(self): 57 | actual, lengths = pts.ops.pad_packed_sequence(self.packed) 58 | assert isinstance(actual, type(self.padded)) 59 | np.testing.assert_equal(lengths.numpy(), self.lengths.numpy()) 60 | np.testing.assert_equal(actual.numpy(), self.padded.numpy()) 61 | 62 | def test_pack_padded_sequence(self): 63 | actual = pts.ops.pack_padded_sequence(self.padded, self.lengths) 64 | assert isinstance(actual, type(self.packed)) 65 | np.testing.assert_equal(actual.data.numpy(), self.packed.data.numpy()) 66 | -------------------------------------------------------------------------------- /tests/test_summary/test_tbx_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from padertorch.summary.tbx_utils import audio 5 | 6 | 7 | def test_audio(): 8 | # A CPU tensor and a numpy array share the data. 9 | # Verify that the input is not changed. 10 | 11 | # Test normalization 12 | tensor = torch.ones((16000,)) 13 | array, _ = audio(tensor) 14 | np.testing.assert_allclose(tensor.numpy(), 1) 15 | np.testing.assert_allclose(array, 0.95) 16 | 17 | # Test zero signal 18 | tensor = torch.zeros((16000,)) 19 | array, _ = audio(tensor) 20 | np.testing.assert_allclose(tensor.numpy(), 0) 21 | np.testing.assert_allclose(array, 0) 22 | -------------------------------------------------------------------------------- /tests/test_train/test_optimizer.py: -------------------------------------------------------------------------------- 1 | import padertorch as pt 2 | import torch 3 | 4 | 5 | def test_frad_norm(): 6 | lin = torch.nn.Linear(16, 8) 7 | opti = pt.optimizer.Adam() 8 | opti.set_parameters(lin.parameters()) 9 | opti.zero_grad() 10 | l = lin.weight.sum() 11 | l.backward() 12 | grad_norm = opti.clip_grad() 13 | grad_norm_ref = torch.nn.utils.clip_grad_norm_( 14 | lin.parameters(), 10. 15 | ) 16 | assert grad_norm == grad_norm_ref and grad_norm_ref > 0., \ 17 | (grad_norm, grad_norm_ref) 18 | --------------------------------------------------------------------------------