├── .bumpversion.cfg
├── .coveragerc
├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── DESCRIPTION.rst
├── LICENSE
├── README.md
├── doc
    ├── configurable.md
    ├── examples.md
    ├── padertorch.svg
    ├── sacred.md
    └── virtual_batch_size_multi_gpu.md
├── jenkins.bash
├── maintenance.md
├── padertorch
    ├── __init__.py
    ├── base.py
    ├── configurable.py
    ├── contrib
    │   ├── __init__.py
    │   ├── cb
    │   │   ├── __init__.py
    │   │   ├── array.py
    │   │   ├── complex.py
    │   │   ├── data.py
    │   │   ├── feature_extractor.py
    │   │   ├── hooks.py
    │   │   ├── io.py
    │   │   ├── summary.py
    │   │   ├── tensorboard_symlink_tree.py
    │   │   ├── track.py
    │   │   └── transform.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── wsj0_mix
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── create_json.py
    │   │   │   └── prepare_data.sh
    │   ├── examples
    │   │   ├── __init__.py
    │   │   ├── audio_synthesis
    │   │   │   └── wavenet
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── data.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   └── train.py
    │   │   ├── examples.md
    │   │   ├── sound_recognition
    │   │   │   ├── __init__.py
    │   │   │   └── audio_tagging
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── data.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   └── train.py
    │   │   ├── source_localization
    │   │   │   └── distance_estimator
    │   │   │   │   ├── Makefile
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── create_jsons.py
    │   │   │   │   ├── data.py
    │   │   │   │   ├── download.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   └── train.py
    │   │   ├── source_separation
    │   │   │   ├── __init__.py
    │   │   │   ├── or_pit
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   ├── templates.py
    │   │   │   │   └── train.py
    │   │   │   ├── pit
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── data.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   ├── templates.py
    │   │   │   │   └── train.py
    │   │   │   └── tasnet
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   ├── tas_coders.py
    │   │   │   │   ├── templates.py
    │   │   │   │   └── train.py
    │   │   ├── speaker_classification
    │   │   │   └── supervised
    │   │   │   │   ├── README.md
    │   │   │   │   ├── data.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   └── train.py
    │   │   ├── speech_enhancement
    │   │   │   ├── __init__.py
    │   │   │   └── mask_estimator
    │   │   │   │   ├── README.md
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── evaluate.py
    │   │   │   │   ├── model.py
    │   │   │   │   └── train.py
    │   │   └── toy_examples
    │   │   │   ├── configurable
    │   │   │       ├── __init__.py
    │   │   │       ├── configurable.py
    │   │   │       └── shared_parameter.py
    │   │   │   ├── mnist
    │   │   │       └── mnist_example.py
    │   │   │   └── multi_gpu
    │   │   │       └── train.py
    │   ├── je
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── __init__.py
    │   │   │   ├── filters.py
    │   │   │   ├── transforms.py
    │   │   │   └── utils.py
    │   │   ├── hooks
    │   │   │   └── swa.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   └── clf.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── augment.py
    │   │   │   ├── conv.py
    │   │   │   ├── conv_utils.py
    │   │   │   ├── features.py
    │   │   │   ├── hybrid.py
    │   │   │   ├── reduce.py
    │   │   │   ├── rnn.py
    │   │   │   └── transformer.py
    │   │   └── tests
    │   │   │   ├── __init__.py
    │   │   │   └── test_conv.py
    │   ├── jensheit
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── batch.py
    │   │   ├── data.py
    │   │   ├── eval_sad.py
    │   │   ├── evaluation.py
    │   │   ├── mask_estimator_example
    │   │   │   ├── __init__.py
    │   │   │   ├── model.py
    │   │   │   └── modul.py
    │   │   ├── norm.py
    │   │   ├── tests
    │   │   │   ├── test_mask_estimator.py
    │   │   │   └── test_utils.py
    │   │   ├── train_convtasnet.py
    │   │   └── utils.py
    │   ├── ldrude
    │   │   ├── __init__.py
    │   │   ├── data.py
    │   │   └── utils.py
    │   ├── mk
    │   │   ├── __init__.py
    │   │   ├── alignments.py
    │   │   ├── io.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── contrastive.py
    │   │   │   └── features
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── ssl
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── hubert.py
    │   │   │   │       └── wav2vec2.py
    │   │   │   │   └── timefreq.py
    │   │   ├── synthesis
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── parametric
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── griffin_lim.py
    │   │   │   └── vocoder
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bigvgan.py
    │   │   │   │   ├── nvidia_bigvgan
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── activations.py
    │   │   │   │       ├── alias_free_activation
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── cuda
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── activation1d.py
    │   │   │   │       │   │   ├── anti_alias_activation.cpp
    │   │   │   │       │   │   ├── anti_alias_activation_cuda.cu
    │   │   │   │       │   │   ├── compat.h
    │   │   │   │       │   │   ├── load.py
    │   │   │   │       │   │   └── type_shim.h
    │   │   │   │       │   └── torch
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── act.py
    │   │   │   │       │   │   ├── filter.py
    │   │   │   │       │   │   └── resample.py
    │   │   │   │       ├── bigvgan.py
    │   │   │   │       ├── env.py
    │   │   │   │       ├── meldataset.py
    │   │   │   │       └── utils.py
    │   │   │   │   └── pwg.py
    │   │   ├── tbx_utils.py
    │   │   ├── typing.py
    │   │   └── visualization.py
    │   ├── neumann
    │   │   ├── __init__.py
    │   │   ├── chunking.py
    │   │   └── evaluation.py
    │   └── tcl
    │   │   ├── __init__.py
    │   │   ├── dc.py
    │   │   ├── speaker_embeddings
    │   │       ├── __init__.py
    │   │       ├── dvectors.py
    │   │       ├── eer_metrics.py
    │   │       ├── loss.py
    │   │       ├── student_embeddings.py
    │   │       └── teacher_student.py
    │   │   └── utils
    │   │       └── augmentation.py
    ├── data
    │   ├── __init__.py
    │   ├── batch.py
    │   ├── segment.py
    │   └── utils.py
    ├── io.py
    ├── modules
    │   ├── __init__.py
    │   ├── convnet.py
    │   ├── dual_path_rnn.py
    │   ├── fully_connected.py
    │   ├── normalization.py
    │   ├── recurrent.py
    │   └── wavenet
    │   │   ├── __init__.py
    │   │   ├── nv_wavenet
    │   │       ├── Makefile
    │   │       ├── __init__.py
    │   │       ├── build.py
    │   │       ├── matrix.cpp
    │   │       ├── matrix.h
    │   │       ├── matrix_math.cuh
    │   │       ├── nv_wavenet.cuh
    │   │       ├── nv_wavenet.py
    │   │       ├── nv_wavenet_conversions.cuh
    │   │       ├── nv_wavenet_dualblock.cuh
    │   │       ├── nv_wavenet_persistent.cuh
    │   │       ├── nv_wavenet_singleblock.cuh
    │   │       ├── nv_wavenet_util.cuh
    │   │       ├── softmax.cuh
    │   │       ├── wavenet_infer.cu
    │   │       ├── wavenet_infer.h
    │   │       └── wavenet_infer_wrapper.cpp
    │   │   └── wavenet.py
    ├── ops
    │   ├── __init__.py
    │   ├── _stft.py
    │   ├── einsum.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── classification.py
    │   │   ├── kl_divergence.py
    │   │   ├── regression.py
    │   │   └── source_separation.py
    │   ├── mappings.py
    │   ├── mu_law.py
    │   ├── sequence
    │   │   ├── __init__.py
    │   │   ├── mask.py
    │   │   ├── pack_module.py
    │   │   ├── pointwise.py
    │   │   └── reduction.py
    │   └── tensor.py
    ├── summary
    │   ├── __init__.py
    │   ├── model_info.py
    │   ├── tbx_utils.py
    │   └── tfevents.py
    ├── testing
    │   ├── __init__.py
    │   └── test_db.py
    ├── train
    │   ├── __init__.py
    │   ├── hooks.py
    │   ├── optimizer.py
    │   ├── runtime_tests.py
    │   ├── trainer.py
    │   └── trigger.py
    └── utils.py
├── pyproject.toml
├── pytest.ini
├── setup.py
└── tests
    ├── __init__.py
    ├── contrib
        └── __init__.py
    ├── test_configurable.py
    ├── test_data
        └── test_segmenter.py
    ├── test_models
        ├── __init__.py
        └── test_bss.py
    ├── test_modules
        ├── __init__.py
        └── test_norm.py
    ├── test_ops
        ├── __init__.py
        ├── test_losses.py
        ├── test_sequence.py
        └── test_stft.py
    ├── test_summary
        └── test_tbx_utils.py
    └── test_train
        ├── test_hooks.py
        ├── test_optimizer.py
        ├── test_runtime_tests.py
        └── test_trainer.py


/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 0.0.1
3 | commit = True
4 | tag = True
5 | 
6 | [bumpversion:file:setup.py]
7 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = padertorch/contrib/*
3 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       fail-fast: false  # Let other jobs keep running even if one fails
15 |       matrix:
16 |         python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
17 |         os: [ubuntu-latest]
18 |         include:
19 |           - os: ubuntu-22.04
20 |             python-version: 3.7
21 |           - os: macos-latest
22 |             python-version: "3.12"
23 | 
24 |     env:
25 |       TMPDIR: /private/tmp  # Default TMPDIR on macOS is /var which pathlib.Path resolves to /private/var
26 |       if: matrix.os == 'macos-latest'
27 |     steps:
28 |     - uses: actions/checkout@v3
29 |     - name: Set up Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v4
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install linux dependencies
34 |       run: |
35 |         sudo apt-get install libsndfile1
36 |       if: matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-22.04'
37 |     - name: Install macos dependencies
38 |       run: |
39 |         brew install libsndfile
40 |         echo $TMPDIR
41 |       if: matrix.os == 'macos-latest'
42 |     - name: Install python dependencies
43 |       run: |
44 |         python -m pip install --upgrade pip setuptools wheel
45 |         pip install flake8 pytest pytest-cov codecov
46 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
47 |         pip install numpy scipy Cython
48 |         pip install --editable .[all]
49 |     - name: Lint with flake8
50 |       run: |
51 |         # stop the build if there are Python syntax errors or undefined names
52 |         #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
53 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
54 |         #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
55 |     - name: Run unittest and doctest on Ubuntu
56 |       run: |
57 |         pytest -v "tests/" "padertorch/"
58 |       if: matrix.os != 'macos-latest'
59 |     - name: Run unittest on macOS  # Some doctests fail because numeric precision is too high on macOS
60 |       run: |
61 |         pytest -v "tests/"
62 |       if: matrix.os == 'macos-latest'
63 |     - name: Codecov
64 |       run: |
65 |         codecov
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # IPython
 78 | profile_default/
 79 | ipython_config.py
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # Environments
 91 | .env
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | .dmypy.json
112 | dmypy.json
113 | 
114 | # Pyre type checker
115 | .pyre/
116 | 


--------------------------------------------------------------------------------
/DESCRIPTION.rst:
--------------------------------------------------------------------------------
 1 | PyTorch Framework
 2 | =================
 3 | 
 4 | When first working with padertorch, have a look at `padertorch/contrib/examples`
 5 | 
 6 | A simple example on how to use the `padertorch.Trainer` may be found in
 7 | `padertorch/contrib/examples/mask_estimator/simple_train.py`
 8 | 
 9 | For an examples on how to use the `Configurable` in combination with the `Trainer`
10 | refer to: `padertorch/contrib/examples/pit/train.py`
11 | 
12 | All other examples show different approaches for using `padertorch` and may be
13 | interpreted as specific to the use case and the likes of the example owner
14 | 
15 | # ToDo:
16 | 
17 | This module contains functions and classes where the vanilla API is messed up.
18 | 
19 | The general idea is to move all independent axis to the left if possible. The
20 | exception to this rule of thumb are sequences. It is computational more
21 | efficient to use the steps as outer axis. This also aligns well with how
22 | `torch.nn.utils.rnn.PackedSequence` is defined.
23 | 
24 | Examples, why the API is seriously broken:
25 | - torch.Tensor.size() vs. torch.nn.utils.rnn.PackedSequence().batch_sizes
26 | - torch.randn(d1, d2, ...) vs. torch.randint(low, high, size=(d1, d2, ...))
27 | - torch.transpose(input, dim0, dim1) although input is already defined
28 | 
29 | Milestones:
30 | 2. Make it possible to decode (=predict) both models
31 |    - Does the batch axis stay? Christoph always wants to allow independent axis.
32 |      Christoph investigates if all ops support independent axis.
33 |    - How do I reconstruct the trained model?
34 | 
35 | 51. Sequence normalization and batch norm with tracking from batch to batch
36 |   - Sequence norm
37 |   - Batch norm
38 | 
39 | 
40 | Structures:
41 |  - Module (comparable to chain or chain_list in Chainer, building_blocks in PF)
42 |  - Ops (comparable to ops in PF)
43 | 
44 | 
45 | Definitions:
46 | packed: Uses `torch.nn.utils.rnn.PackedSequence`
47 | padded: Uses `padded` and `sequence_length`
48 | 
49 | padded to packed: `pack_padded_sequence` yields `PackedSequence`
50 | packed to padded: `pad_packed_sequence` yields `Tensor`
51 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Communications Engineering Group, Paderborn University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/jenkins.bash:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This file is only required for internal testing
 4 | # cd dirname "$(readlink -f "$0")"
 5 | 
 6 | git clone https://github.com/fgnt/paderbox
 7 | 
 8 | # include common stuff (installation of toolbox, paths, traps, nice level...)
 9 | source paderbox/jenkins_common.bash
10 | 
11 | # Cuda
12 | source paderbox/bash/cuda.bash
13 | 
14 | pip install --user -e .[test]
15 | 
16 | # Unittets
17 | # It seems, that jenkins currentliy does not work with matlab: Error: Segmentation violation
18 | 
19 | # nosetests --with-xunit --with-coverage --cover-package=padertorch -v -w "tests" # --processes=-1
20 | pytest -v "tests/" "padertorch/"
21 | # Use as many processes as you have cores: --processes=-1
22 | # Acording to https://gist.github.com/hangtwenty/1aeb36ee85f4bdce0899
23 | # `--cov-report term` solves the problem that doctests are not included
24 | # in coverage
25 | 
26 | # Export coverage
27 | python -m coverage xml --include="padertorch*"
28 | 
29 | # Pylint tests
30 | pylint --rcfile="paderbox/pylint.cfg" -f parseable padertorch > pylint.txt
31 | # --files-output=y is a bad option, because it produces hundreds of files
32 | 
33 | pip freeze > pip.txt
34 | pip uninstall --quiet --yes padertorch
35 | 
36 | # copy html code to lighttpd webserver
37 | # rsync -a --delete-after /var/lib/jenkins/jobs/python_toolbox/workspace/toolbox/doc/build/html/ /var/www/doku/html/python_toolbox/
38 | 


--------------------------------------------------------------------------------
/maintenance.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # PyPi upload
 3 | 
 4 | Package a Python Package/ version bump See: https://packaging.python.org/tutorials/packaging-projects/
 5 | 
 6 | 1. Update `setup.py` to new version number
 7 | 2. Commit this change
 8 | 3. Tag and upload
 9 | 
10 | ## pypirc
11 | 
12 | Example `~/.pypirc` (see https://packaging.python.org/en/latest/specifications/pypirc/)
13 | ```
14 | [distutils]
15 | index-servers =
16 |     pypi
17 |     testpypi
18 | 
19 | [pypi]
20 | username = __token__
21 | password = <PyPI token>
22 | 
23 | [testpypi]
24 | username = __token__
25 | password = <TestPyPI token>
26 | ```
27 | 
28 | ## Install dependencies:
29 | ```bash
30 | pip install --upgrade setuptools
31 | pip install --upgrade wheel
32 | pip install --upgrade twine
33 | # pip install --upgrade bleach html5lib  # some versions do not work
34 | pip install --upgrade bump2version
35 | ```
36 | 
37 | `bump2version` takes care to increase the version number, create the commit and tag.
38 | 
39 | 
40 | ## Publish
41 | 
42 | ```bash
43 | export SETUP_PY_IGNORE_GIT_DEPENDENCIES=1
44 | bump2version --verbose --tag patch  # major, minor or patch
45 | python setup.py sdist  # bdist_wheel  # It is difficult to get bdist_wheel working with binary files
46 | git push origin --tags
47 | # Wait for the github action to build the windows wheels, ToDO: Fix wheels.
48 | twine upload --repository testpypi dist/*  # 
49 | twine upload dist/*
50 | git push
51 | ```
52 | 


--------------------------------------------------------------------------------
/padertorch/__init__.py:
--------------------------------------------------------------------------------
 1 | from padertorch import utils
 2 | from padertorch.train import trainer, optimizer
 3 | from padertorch.train.trainer import *
 4 | 
 5 | from . import base
 6 | from . import configurable
 7 | from . import data
 8 | from . import ops
 9 | from . import summary
10 | from . import io
11 | from .base import *
12 | from .configurable import Configurable
13 | from .ops import *
14 | 
15 | # This import has to be late, otherwise you can not use pt.Models in models.
16 | from . import modules
17 | 


--------------------------------------------------------------------------------
/padertorch/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/cb/__init__.py:
--------------------------------------------------------------------------------
1 | from .io import (
2 |     get_new_folder,
3 |     write_makefile_and_config,
4 | )
5 | from . import data
6 | 
7 | 


--------------------------------------------------------------------------------
/padertorch/contrib/cb/array.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def overlap_add(
 5 |         tensor,
 6 |         shift,
 7 | ):
 8 |     """
 9 | 
10 |     >>> overlap_add(torch.arange(12).to(torch.float).reshape(3, 4), 4)
11 |     tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])
12 |     >>> overlap_add(torch.arange(12).to(torch.float).reshape(3, 4), 2)
13 |     tensor([ 0.,  1.,  6.,  8., 14., 16., 10., 11.])
14 |     >>> overlap_add(torch.ones(12).to(torch.float).reshape(3, 4), 2)
15 |     tensor([1., 1., 2., 2., 2., 2., 1., 1.])
16 |     >>> overlap_add(torch.ones(2, 3, 4, 5).to(torch.float), 2).shape
17 |     torch.Size([2, 3, 11])
18 |     """
19 |     *independent, frames, frequencies = tensor.shape
20 | 
21 |     samples = frequencies + frames * shift - shift
22 |     tensor = tensor.reshape(-1, frames, frequencies).transpose(-1, -2)
23 |     out = torch.nn.Fold(
24 |         output_size=(1, samples),
25 |         kernel_size=(1, frequencies),
26 |         dilation=1,
27 |         padding=0,
28 |         stride=(1, shift),
29 |     )(tensor)
30 |     return out.squeeze(-3).squeeze(-2).reshape(*independent, samples)
31 | 


--------------------------------------------------------------------------------
/padertorch/contrib/cb/complex.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | __all__ = {
 5 | }
 6 | 
 7 | 
 8 | def is_torch(obj):
 9 |     """
10 |     The namespace here is not torch, hece renamce is_tensor to is_torch.
11 | 
12 |     >>> is_torch(np.zeros(3))
13 |     False
14 |     >>> is_torch(torch.zeros(3))
15 |     True
16 |     >>> is_torch(ComplexTensor(np.zeros(3)))
17 |     True
18 |     """
19 |     if torch.is_tensor(obj):
20 |         return True
21 |     if type(obj).__name__ == 'ComplexTensor':
22 |         from torch_complex import ComplexTensor
23 |         if isinstance(obj, ComplexTensor):
24 |             return True
25 |     return False
26 | 
27 | 


--------------------------------------------------------------------------------
/padertorch/contrib/cb/hooks.py:
--------------------------------------------------------------------------------
  1 | import signal
  2 | from padertorch.train.hooks import StopTrainingHook, StopTraining, Hook
  3 | from padertorch.train.trigger import Trigger, IntervalTrigger
  4 | 
  5 | 
  6 | class CPUTimeLimitExceededHookTrigger(Trigger):
  7 |     """
  8 |     Graceful shutdown of Training.
  9 | 
 10 |     Shutdown after next iteration (i.e. as fast as possible, finish validation)
 11 |     $ ccssignal XCPU <reqid>
 12 |     Use `ccsalloc ... --notifyjob=XCPU,60m ...` to let ccs send the signal.
 13 | 
 14 |     Shutdown after next epoch (i.e. finish current epoch, good iterator state)
 15 |     $ ccssignal USR1 <reqid>  # Shutdown after next epoch
 16 | 
 17 |     """
 18 |     def __init__(self):
 19 |         self._SIGXCPU_received = False
 20 |         self._SIGUSR1_received = False
 21 |         signal.signal(signal.SIGXCPU, self.handler_SIGXCPU)
 22 |         signal.signal(signal.SIGUSR1, self.handler_SIGUSR1)
 23 | 
 24 |         self.epoch_trigger = IntervalTrigger(1, 'epoch')
 25 | 
 26 |     def handler_SIGXCPU(self, signum, frame):
 27 |         print('Received SIGXCPU: CPU time limit exceeded')
 28 |         print('Graceful shutdown training')
 29 |         self._SIGXCPU_received = True
 30 | 
 31 |     def handler_SIGUSR1(self, signum, frame):
 32 |         print('Received SIGUSR1: User-defined signal 1.')
 33 |         print(f'Graceful shutdown training when epoch '
 34 |               f'{self.epoch_trigger.last + 1} is finished')
 35 |         self._SIGUSR1_received = True
 36 | 
 37 |     def set_last(self, iteration, epoch):
 38 |         pass
 39 | 
 40 |     def __call__(self, iteration, epoch):
 41 |         return (
 42 |             (
 43 |                 self.epoch_trigger(iteration, epoch)
 44 |                 and self._SIGUSR1_received
 45 |             )
 46 |             or self._SIGXCPU_received
 47 |         )
 48 | 
 49 | 
 50 | class CPUTimeLimitExceededHook(StopTrainingHook):
 51 |     def __init__(self):
 52 |         # Do not call super, to prevent a copy of this trigger
 53 |         self.trigger = CPUTimeLimitExceededHookTrigger()
 54 | 
 55 | 
 56 | class PyroHook(Hook):
 57 | 
 58 |     pyro_inspector = None
 59 | 
 60 |     def pre_step(self, trainer):
 61 |         from cbj.pyro_inspect import PyroInspector
 62 |         if self.pyro_inspector is None:
 63 |             self.pyro_inspector = PyroInspector(2)
 64 |             self.pyro_inspector.__enter__()
 65 | 
 66 |     def close(self, trainer):
 67 |         if self.pyro_inspector is not None:
 68 |             self.pyro_inspector.__exit__()
 69 | 
 70 | 
 71 | if __name__ == '__main__':
 72 | 
 73 |     import os
 74 |     import time
 75 |     from threading import Thread
 76 | 
 77 |     hook = CPUTimeLimitExceededHook()
 78 | 
 79 |     pid = os.getpid()
 80 | 
 81 |     def killer():
 82 |         time.sleep(2.5)
 83 |         os.kill(pid, signal.SIGXCPU)
 84 | 
 85 |     thread = Thread(target=killer)
 86 |     thread.start()
 87 | 
 88 |     class Trainer:
 89 |         iteration = 0
 90 |         epoch = 0
 91 | 
 92 | 
 93 |     try:
 94 |         while True:
 95 |             print(time.perf_counter())
 96 |             if hook.pre_step(Trainer()):
 97 |                 break
 98 |             time.sleep(1)
 99 |     except StopTraining:
100 |         print('StopTraining')
101 |     thread.join()
102 | 
103 | 
104 |     hook = CPUTimeLimitExceededHook()
105 | 
106 |     def killer():
107 |         time.sleep(0.5)
108 |         os.kill(pid, signal.SIGUSR1)
109 | 
110 |     thread = Thread(target=killer)
111 |     thread.start()
112 | 
113 |     class Trainer:
114 |         iteration = 0
115 |         epoch = 0
116 | 
117 |     try:
118 |         while True:
119 |             Trainer.iteration += 1
120 |             if (Trainer.iteration % 5) == 0:
121 |                 Trainer.epoch += 1
122 |             print(time.perf_counter(), Trainer.iteration, Trainer.epoch)
123 |             if hook.pre_step(Trainer()):
124 |                 break
125 |             time.sleep(1)
126 |     except StopTraining:
127 |         print('StopTraining')
128 |     thread.join()
129 | 


--------------------------------------------------------------------------------
/padertorch/contrib/cb/tensorboard_symlink_tree.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Create a symlink tree for all specified files in the current folder.
 3 | 
 4 |     python -m padertorch.contrib.cb.tensorboard_symlink_tree ../*/*tfevents* --max_age=1days
 5 | 
 6 | Usecase:
 7 | 
 8 | Tensorboard does a recursive search for all tfevent files.
 9 | In many cases this works good and is better than this workaround.
10 | 
11 | When you have a slow recursive search, this script can be used as a workaround.
12 | This can be caused by a slow filesystem (usually remote) and to many files
13 | inside the tensorboard (e.g. a Kaldi experiment folder).
14 | 
15 | The problem of tensorboard in this case is, that it does not support either
16 | multiple tfevent in the command line interface (only one is supported) or a
17 | customisation for the search pattern of the event files (e.g. limited depth
18 | search).
19 | 
20 | This workaround mirrors the folder tree, but only for the files that are the
21 | input of this file. In the commandline you can use bash wildcards like `*`:
22 | 
23 |     python -m padertorch.contrib.cb.tensorboard_symlink_tree ../*/*tfevents*
24 | 
25 | This command creates a symlinks to all tfevent that match the pattern
26 | `../*/*tfevents*` in the current folder.
27 | Sadly, this command has to be executed each time, you create a new experiment.
28 | Because of this I created a Makefile in that folder:
29 | 
30 |     .../tensorboard$ cat Makefile
31 |     symlink_tree1day:
32 |         find . -xtype l -delete  # Remove broken symlinks: https://unix.stackexchange.com/a/314975/283777
33 |         python -m padertorch.contrib.cb.tensorboard_symlink_tree --prefix=.. ../*/*tfevents* --max_age=1days
34 | 
35 |     symlink_tree:
36 |         find . -xtype l -delete  # Remove broken symlinks: https://unix.stackexchange.com/a/314975/283777
37 |         python -m padertorch.contrib.cb.tensorboard_symlink_tree --prefix=.. ../*/*tfevents*
38 | 
39 |     tensorboard:
40 |         date && $(cd .../tensorboard && ulimit -v 10000000 && tensorboard --bind_all -v 1 --logdir=. --port=...) && date || date
41 | 
42 | """
43 | 
44 | import os
45 | from pathlib import Path
46 | import datetime
47 | 
48 | import paderbox as pb
49 | 
50 | 
51 | def main(*files, prefix=None, max_age=None):
52 |     if prefix is None:
53 |         prefix = os.path.commonpath(files)
54 |     print('Common Prefix', prefix)
55 |     print('Create')
56 | 
57 |     files = [Path(f) for f in files]
58 | 
59 |     if max_age is not None:
60 |         # Panda import is slow, but pd.Timedelta
61 |         # accepts many styles for time
62 |         # (e.g. '1day')
63 |         import pandas as pd
64 |         max_age = pd.Timedelta(max_age)
65 |         now = pd.Timestamp('now')
66 | 
67 |         files = sorted(files, key=lambda file: file.stat().st_mtime)
68 | 
69 |     for file in files:
70 |         link_name = file.relative_to(prefix)
71 |         if max_age is not None:
72 |             last_modified = file.stat().st_mtime
73 |             last_modified = datetime.datetime.fromtimestamp(last_modified)
74 | 
75 |             if max_age > now - last_modified:
76 |                 # Create symlink if it doesn't exist.
77 |                 pass
78 |             else:
79 |                 if not link_name.is_symlink():
80 |                     print(f'Skip {file}, it is {now - last_modified} > {max_age} old.')
81 |                 continue
82 | 
83 |         link_name.parent.mkdir(exist_ok=True)
84 |         source = os.path.relpath(file, link_name.parent)
85 |         if not link_name.exists():
86 |             print(f'\t{link_name} -> {source}')
87 | 
88 |         # Create symlink if it does not exist,
89 |         # or check that the symlink point to the
90 |         # same file.
91 |         pb.io.symlink(source, link_name)
92 |     print('Finish')
93 | 
94 | 
95 | if __name__ == '__main__':
96 |     import fire
97 |     fire.Fire(main)
98 | 


--------------------------------------------------------------------------------
/padertorch/contrib/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/data/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/data/utils.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import paderbox as pb
 3 | 
 4 | 
 5 | def check_audio_files_exist(
 6 |     database_dict,
 7 |     speedup=None,
 8 |     extensions=('.wav', '.wv2', '.wv1', '.flac'),
 9 | ):
10 |     """
11 |     No structure for the database_dict is assumed. It will just search for all
12 |     string values ending with a certain file type (e.g. wav).
13 | 
14 |     >>> check_audio_files_exist({2: [1, '/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav', 'abc.wav']})
15 |     Traceback (most recent call last):
16 |       ...
17 |     AssertionError: ('abc.wav', (2, '2'))
18 |     >>> check_audio_files_exist(1)
19 |     Traceback (most recent call last):
20 |       ...
21 |     AssertionError: Expect at least one wav file. It is likely that the database folder is empty and the greps did not work. to_check: {}
22 |     >>> check_audio_files_exist('abc.wav')
23 |     Traceback (most recent call last):
24 |       ...
25 |     AssertionError: ('abc.wav', ())
26 |     >>> check_audio_files_exist('/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav')
27 |     >>> check_audio_files_exist(1, speedup='thread')
28 |     Traceback (most recent call last):
29 |       ...
30 |     AssertionError: Expect at least one wav file. It is likely that the database folder is empty and the greps did not work. to_check: {}
31 |     >>> check_audio_files_exist('abc.wav', speedup='thread')
32 |     Traceback (most recent call last):
33 |       ...
34 |     AssertionError: ('abc.wav', ())
35 |     >>> check_audio_files_exist('/net/db/timit/pcm/train/dr1/fcjf0/sa1.wav', speedup='thread')
36 |     """
37 | 
38 |     def path_exists(path):
39 |         return Path(path).exists()
40 | 
41 |     def body(file_key_path):
42 |         key_path, file = file_key_path
43 |         assert path_exists(file), (file, key_path)
44 | 
45 |     def condition_fn(file):
46 |         return isinstance(file, (str, Path)) and str(file).endswith(extensions)
47 | 
48 |     to_check = {
49 |         k: v for k, v in pb.utils.nested.flatten(database_dict).items()
50 |         if condition_fn(v)
51 |     }
52 | 
53 |     assert len(to_check) > 0, (
54 |         f'Expect at least one wav file. '
55 |         f'It is likely that the database folder is empty '
56 |         f'and the greps did not work. to_check: {to_check}'
57 |     )
58 | 
59 |     if speedup and 'thread' == speedup:
60 |         import os
61 |         from multiprocessing.pool import ThreadPool
62 | 
63 |         # Use this number because ThreadPoolExecutor is often
64 |         # used to overlap I/O instead of CPU work.
65 |         # See: concurrent.futures.ThreadPoolExecutor
66 |         # max_workers = (os.cpu_count() or 1) * 5
67 | 
68 |         # Not sufficiently benchmarked both, this is more conservative.
69 |         max_workers = (os.cpu_count() or 1)
70 | 
71 |         with ThreadPool(max_workers) as pool:
72 |             for _ in pool.imap_unordered(
73 |                     body,
74 |                     to_check.items()
75 |             ):
76 |                 pass
77 | 
78 |     elif speedup is None:
79 |         for file, key_path in to_check.items():
80 |             assert path_exists(file), (key_path, file)
81 |     else:
82 |         raise ValueError(speedup, type(speedup))
83 | 


--------------------------------------------------------------------------------
/padertorch/contrib/data/wsj0_mix/README.md:
--------------------------------------------------------------------------------
 1 | # WSJ0-mix data preparation
 2 | 
 3 | To prepare the wsj0-2mix and wsj0-3mix data, follow the following steps:
 4 |  1. Generate the mixtures using the matlab scripts.
 5 |  2. Edit `prepare_data.sh` to match your paths. You need to specify paths to the generated data and to the WSJ(0) database. WSJ(0) is required to obtain transcriptions. You can edit the `--json_root` parameter to specify the path to the output JSON.
 6 |  3. Run `prepare_data.sh`.
 7 |  
 8 | This script creates a JSON file that can be used by the examples.
 9 | The JSON file is compatible with `lazy_dataset.database.JsonDatabase`.
10 | An example of reading data:
11 | 
12 | ```python
13 | from lazy_dataset.database import JsonDatabase
14 | import numpy as np
15 | import paderbox as pb
16 | 
17 | db = JsonDatabase("/path/to/JSON.json")
18 | 
19 | dataset = db.get_dataset("mix_2_spk_min")
20 | 
21 | def pre_batch_transform(inputs):
22 |     return {
23 |         's': np.ascontiguousarray([
24 |             pb.io.load_audio(p)
25 |             for p in inputs['audio_path']['speech_source']
26 |         ], np.float32),
27 |         'y': np.ascontiguousarray(
28 |             pb.io.load_audio(inputs['audio_path']['observation']), np.float32),
29 |         'num_samples': inputs['num_samples'],
30 |         'example_id': inputs['example_id'],
31 |         'audio_path': inputs['audio_path'],
32 |     }
33 | dataset = dataset.map(pre_batch_transform)
34 | 
35 | example = dataset[0]
36 | ```
37 |  


--------------------------------------------------------------------------------
/padertorch/contrib/data/wsj0_mix/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/data/wsj0_mix/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/data/wsj0_mix/prepare_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Put your paths here!
 4 | if [[ "$(hostname -d)" == nt.uni-paderborn.de ]]; then
 5 | 	# NT specific defaults
 6 | 	database_path=/net/db/merl_speaker_mixtures/data
 7 | 	wsj0_root=/net/db/wsj
 8 | elif [[ "${PC2SYSNAME}" == OCULUS ]]; then
 9 | 	database_path=/scratch/hpc-prf-nt1/cbj/net/db/merl_speaker_mixtures/data
10 | 	wsj0_root=/scratch/hpc-prf-nt1/cbj/net/db/wsj
11 | else
12 | 	# Path to the database as generated by the matlab scripts
13 | 	database_path=
14 | 
15 | 	# If you need transcriptions, put the path to the WSJ root folder here
16 | 	wsj0_root=
17 | fi
18 | 
19 | python -m padertorch.contrib.data.wsj0_mix.create_json \
20 | 	--database_path ${database_path} \
21 | 	--wsj0_root ${wsj0_root} \
22 | 	--json_path wsj0_mix_min_8k.json \
23 | 	--num_speakers 2 --num_speakers 3 \
24 | 	--signal_length min \
25 | 	--sample_rate wav8k


--------------------------------------------------------------------------------
/padertorch/contrib/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import padercontrib.database
 3 | except:
 4 |     import warnings
 5 |     warnings.warn(
 6 |         "These examples are depending on our internal database structure "
 7 |         "at the moment. "
 8 |         "Trying to execute them anyway may take considerable "
 9 |         "effort on your behalf."
10 |     )
11 | 
12 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/audio_synthesis/wavenet/README.md:
--------------------------------------------------------------------------------
 1 | # WaveNet Vocoder
 2 | 
 3 | This example trains and evaluates a WaveNet vocoder synthesising waveforms
 4 | from log mel spectrograms. The WaveNet is trained on the LibriSpeech corpus.
 5 | 
 6 | ## Training
 7 | 
 8 | The training script needs a JSON file that describes the structure of your
 9 | database in the following format:
10 | ```
11 | {
12 |     "datasets": {
13 |         <dataset_A>: {
14 |             <example_id_A0>: {
15 |                 "audio_path": {
16 |                     "observation": <path/to/wav>
17 |                 },
18 |                 "num_samples": <num_samples>
19 |             },
20 |             <example_id_A1>: {
21 |                 ...
22 |             },
23 |             ...
24 |         },
25 |         <dataset_B>: {
26 |             <example_id_B0>: {
27 |                 ...
28 |             },
29 |             ...
30 |         },
31 |         ...
32 |     }
33 | }
34 | ```
35 | 
36 | To start the training, first define a path to where the trained models should
37 | be saved:
38 | ```bash
39 | export STORAGE_ROOT=<your desired storage root>; python -m padertorch.contrib.examples.audio_synthesis.wavenet.train
40 | ```
41 | Your trained models can be found in `$STORAGE_ROOT/wavenet/<timestamp>`.
42 | 
43 | Note that the data input pipeline only extracts STFTs while the log mel 
44 | extraction and normalization is done in the model.
45 | 
46 | ## Evaluation
47 | 
48 | The evaluation script loads the best checkpoint (lowest achieved loss on the 
49 | validation set) and performs autoregressive waveform synthesis.
50 | For test-time synthesis nv-wavenet needs to be installed.
51 | Do note that nv-wavenet requires a GPU with Compute Capability 6.0 or later
52 | (https://developer.nvidia.com/cuda-gpus), i.e., you can neither run the
53 | evaluation on a CPU nor, e.g., on a GTX 980.
54 | If nv-wavenet is not installed yet run
55 | ```bash
56 | cd /path/to/padertorch/padertorch/modules/wavenet/nv_wavenet
57 | ```
58 | Update the Makefile with the appropriate ARCH, e.g., ARCH=sm_70 for Compute Capability 7.0.
59 | Then run
60 | ```bash                  
61 | make                                                                                                                                                         
62 | python build.py install
63 | ```
64 | 
65 | To run an evaluation, provide the evaluation script with the path to your trained model:
66 | ```bash
67 | mpiexec -np $(nproc --all) python -m padertorch.contrib.examples.audio_synthesis.wavenet.evaluate with exp_dir=<path/to/trainer/storage_dir>
68 | ```
69 | It requires [dlp_mpi](https://github.com/fgnt/dlp_mpi) to be installed.
70 | 
71 | Evaluation results can be found in `<exp_dir>/eval/<timestamp>`.
72 | For each example the root mean squared error between the true waveform and the 
73 | synthesised one is saved to a file `rmse.json`.
74 | The 10 best and worst synthesised waveforms are saved in a sub directory `audio`.
75 | 
76 | If you want to run evaluation on only a few examples run
77 | ```bash
78 | python -m padertorch.contrib.examples.audio_synthesis.wavenet.evaluate with exp_dir=<path/to/trainer/storage_dir> max_examples=10
79 | ```
80 | 
81 | ## Results
82 | 
83 | | Training set                      | Test set   | RMSE  |
84 | | :-----:                           | :-----:    | :---: |
85 | | train_clean_100 + train_clean_360 | test_clean | 0.084 |
86 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/audio_synthesis/wavenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/audio_synthesis/wavenet/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/examples/audio_synthesis/wavenet/data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from padertorch.contrib.je.data.transforms import AudioReader, STFT, Collate
 3 | from padertorch.data.segment import Segmenter
 4 | 
 5 | 
 6 | def prepare_dataset(
 7 |         dataset, audio_reader, stft, max_length_in_sec=1., batch_size=3,
 8 |         is_train_set=False, shuffle=False
 9 | ):
10 | 
11 |     def prepare_example(example):
12 |         example['audio_path'] = example['audio_path']['observation']
13 |         return example
14 | 
15 |     dataset = dataset.map(prepare_example)
16 | 
17 |     audio_reader = AudioReader(**audio_reader)
18 |     dataset = dataset.map(audio_reader)
19 | 
20 |     anchor = 'random' if is_train_set else 'centered_cutout'
21 |     if max_length_in_sec is None:
22 |         dataset = dataset.map(lambda ex: [ex])
23 |     else:
24 |         segmenter = Segmenter(
25 |             length=int(max_length_in_sec*audio_reader.target_sample_rate),
26 |             include_keys=('audio_data',),  mode='max', anchor=anchor
27 |         )
28 |         dataset = dataset.map(segmenter)
29 | 
30 |     stft = STFT(**stft)
31 |     dataset = dataset.batch_map(stft)
32 | 
33 |     def finalize(example):
34 |         return {
35 |             'example_id': example['example_id'],
36 |             'audio_data': example['audio_data'].astype(np.float32),
37 |             'stft': example['stft'].astype(np.float32),
38 |             'seq_len': example['stft'].shape[1],
39 |         }
40 |     dataset = dataset.batch_map(finalize)
41 | 
42 |     if shuffle:
43 |         dataset = dataset.shuffle(reshuffle=True)
44 |     dataset = dataset.prefetch(
45 |         num_workers=8, buffer_size=10*batch_size
46 |     ).unbatch()
47 |     if shuffle:
48 |         dataset = dataset.shuffle(
49 |             reshuffle=True, buffer_size=10*batch_size
50 |         )
51 |     return dataset.batch_dynamic_time_series_bucket(
52 |         batch_size=batch_size, len_key='seq_len', max_padding_rate=0.05,
53 |         expiration=1000*batch_size, drop_incomplete=shuffle,
54 |         sort_key='seq_len', reverse_sort=True
55 |     ).map(Collate())
56 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/audio_synthesis/wavenet/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from einops import rearrange
 3 | from padertorch import modules
 4 | from padertorch.base import Model
 5 | from padertorch.contrib.je.modules.features import MelTransform
 6 | from padertorch.modules.normalization import InputNormalization
 7 | from padertorch.ops import mu_law_decode
 8 | 
 9 | 
10 | class WaveNet(Model):
11 |     def __init__(
12 |             self,
13 |             wavenet,
14 |             sample_rate, stft_size,
15 |             number_of_mel_filters, lowest_frequency=50, highest_frequency=None
16 |     ):
17 |         super().__init__()
18 |         self.wavenet = wavenet
19 |         self.sample_rate = sample_rate
20 |         self.mel_transform = MelTransform(
21 |             number_of_filters=number_of_mel_filters,
22 |             sample_rate=sample_rate, stft_size=stft_size,
23 |             lowest_frequency=lowest_frequency, highest_frequency=highest_frequency,
24 |         )
25 |         self.in_norm = InputNormalization(
26 |             data_format='bcft',
27 |             shape=(None, 1, number_of_mel_filters, None),
28 |             statistics_axis='bt',
29 |             independent_axis=None,
30 |         )
31 | 
32 |     def feature_extraction(self, x, seq_len=None):
33 |         x = self.mel_transform(torch.sum(x**2, dim=(-1,))).transpose(-2, -1)
34 |         x = self.in_norm(x, sequence_lengths=seq_len)
35 |         x = rearrange(x, 'b c f t -> b (c f) t')
36 |         return x
37 | 
38 |     def forward(self, inputs):
39 |         x = inputs['stft']
40 |         seq_len = inputs['seq_len']
41 |         x = self.feature_extraction(x, seq_len)
42 |         return self.wavenet(x.squeeze(1), inputs['audio_data'].squeeze(1))
43 | 
44 |     def review(self, inputs, outputs):
45 |         predictions, targets = outputs
46 |         ce = torch.nn.CrossEntropyLoss(reduction='none')(predictions, targets)
47 |         summary = dict(
48 |             loss=ce.mean(),
49 |             scalars=dict(),
50 |             histograms=dict(reconstruction_ce=ce),
51 |             audios=dict(
52 |                 target=(inputs['audio_data'][0], self.sample_rate),
53 |                 decode=(
54 |                     mu_law_decode(
55 |                         torch.argmax(outputs[0][0], dim=0),
56 |                         mu_quantization=self.wavenet.n_out_channels),
57 |                     self.sample_rate)
58 |             ),
59 |             images=dict()
60 |         )
61 |         return summary
62 | 
63 |     @classmethod
64 |     def finalize_dogmatic_config(cls, config):
65 |         config['wavenet']['factory'] = modules.WaveNet
66 |         config['wavenet']['n_cond_channels'] = config['number_of_mel_filters']
67 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/audio_synthesis/wavenet/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example call:
  3 | 
  4 | export STORAGE_ROOT=<your desired storage root>
  5 | python -m padertorch.contrib.examples.wavenet.train
  6 | """
  7 | import os
  8 | from pathlib import Path
  9 | 
 10 | from lazy_dataset.database import JsonDatabase
 11 | from padertorch.contrib.examples.audio_synthesis.wavenet.data import \
 12 |     prepare_dataset
 13 | from padertorch.contrib.examples.audio_synthesis.wavenet.model import WaveNet
 14 | from padertorch.io import get_new_storage_dir
 15 | from padertorch.train.optimizer import Adam
 16 | from padertorch.train.trainer import Trainer
 17 | from sacred import Experiment, commands
 18 | from sacred.observers import FileStorageObserver
 19 | 
 20 | ex = Experiment('wavenet')
 21 | 
 22 | 
 23 | @ex.config
 24 | def config():
 25 |     database_json = (
 26 |         str((Path(os.environ['NT_DATABASE_JSONS_DIR']) / 'librispeech.json').expanduser())
 27 |         if 'NT_DATABASE_JSONS_DIR' in os.environ else None
 28 |     )
 29 |     assert database_json is not None, (
 30 |         'database_json cannot be None.\n'
 31 |         'Either start the training with "python -m padertorch.contrib.examples.'
 32 |         'audio_synthesis.wavenet.train with database_json=</path/to/json>" '
 33 |         'or make sure there is an environment variable "NT_DATABASE_JSONS_DIR"'
 34 |         'pointing to a directory with a "librispeech.json" in it (see README '
 35 |         'for the JSON format).'
 36 |     )
 37 |     training_sets = ['train_clean_100', 'train_clean_360']
 38 |     validation_sets = ['dev_clean']
 39 |     audio_reader = {
 40 |         'source_sample_rate': 16000,
 41 |         'target_sample_rate': 16000,
 42 |     }
 43 |     stft = {
 44 |         'shift': 200,
 45 |         'window_length': 800,
 46 |         'size': 1024,
 47 |         'fading': 'full',
 48 |         'pad': True,
 49 |     }
 50 |     max_length_in_sec = 1.
 51 |     batch_size = 3
 52 |     number_of_mel_filters = 80
 53 |     trainer = {
 54 |         'model': {
 55 |             'factory': WaveNet,
 56 |             'wavenet': {
 57 |                 'n_cond_channels': number_of_mel_filters,
 58 |                 'upsamp_window': stft['window_length'],
 59 |                 'upsamp_stride': stft['shift'],
 60 |                 'fading': stft['fading'],
 61 |             },
 62 |             'sample_rate': audio_reader['target_sample_rate'],
 63 |             'stft_size': stft['size'],
 64 |             'number_of_mel_filters': number_of_mel_filters,
 65 |             'lowest_frequency': 50
 66 |         },
 67 |         'optimizer': {
 68 |             'factory': Adam,
 69 |             'lr': 5e-4,
 70 |         },
 71 |         'storage_dir': get_new_storage_dir(
 72 |             'wavenet', id_naming='time', mkdir=False
 73 |         ),
 74 |         'summary_trigger': (1_000, 'iteration'),
 75 |         'checkpoint_trigger': (10_000, 'iteration'),
 76 |         'stop_trigger': (200_000, 'iteration'),
 77 |     }
 78 |     trainer = Trainer.get_config(trainer)
 79 |     resume = False
 80 |     ex.observers.append(FileStorageObserver.create(trainer['storage_dir']))
 81 | 
 82 | 
 83 | @ex.automain
 84 | def main(
 85 |         _run, _log, trainer, database_json, training_sets, validation_sets,
 86 |         audio_reader, stft, max_length_in_sec, batch_size, resume
 87 | ):
 88 |     commands.print_config(_run)
 89 |     trainer = Trainer.from_config(trainer)
 90 |     storage_dir = Path(trainer.storage_dir)
 91 |     storage_dir.mkdir(parents=True, exist_ok=True)
 92 |     commands.save_config(
 93 |         _run.config, _log, config_filename=str(storage_dir / 'config.json')
 94 |     )
 95 | 
 96 |     db = JsonDatabase(database_json)
 97 |     training_data = db.get_dataset(training_sets)
 98 |     validation_data = db.get_dataset(validation_sets)
 99 |     training_data = prepare_dataset(
100 |         training_data, audio_reader=audio_reader, stft=stft,
101 |         max_length_in_sec=max_length_in_sec, batch_size=batch_size, shuffle=True
102 |     )
103 |     validation_data = prepare_dataset(
104 |         validation_data, audio_reader=audio_reader, stft=stft,
105 |         max_length_in_sec=max_length_in_sec, batch_size=batch_size, shuffle=False
106 |     )
107 | 
108 |     trainer.test_run(training_data, validation_data)
109 |     trainer.register_validation_hook(validation_data)
110 |     trainer.train(training_data, resume=resume)
111 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/examples.md:
--------------------------------------------------------------------------------
1 | ../../../doc/examples.md


--------------------------------------------------------------------------------
/padertorch/contrib/examples/sound_recognition/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/sound_recognition/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/examples/sound_recognition/audio_tagging/README.md:
--------------------------------------------------------------------------------
 1 | # Audio Tagging
 2 | 
 3 | This example trains and evaluates an audio tagging system based on WALNet [1]
 4 | trained on AudioSet. A more sophisticated model for audio tagging and weakly
 5 | labeled sound event detection which is also based on padertorch can be found at 
 6 | https://github.com/fgnt/pb_sed.
 7 | 
 8 | ## Training
 9 | 
10 | The training script needs a JSON file that describes the structure of your
11 | database in the following format:
12 | ```
13 | {
14 |     "datasets": {
15 |         <dataset_A>: {
16 |             <example_id_A0>: {
17 |                 "audio_path": </path/to/wav>,
18 |                 "audio_length": <length in seconds>,
19 |                 "events": <list of sound events in example clip>,
20 |             },
21 |             <example_id_A1>: {
22 |                 ...
23 |             },
24 |             ...
25 |         },
26 |         <dataset_B>: {
27 |             <example_id_B0>: {
28 |                 ...
29 |             },
30 |             ...
31 |         },
32 |         ...
33 |     }
34 | }
35 | ```
36 | It is expected that it contains datasets "validate" and "eval" (In our case
37 | validate is a small part of unbalanced train set).
38 | 
39 | To start the training, first define a path to where the trained models should
40 | be saved:
41 | ```bash
42 | export STORAGE_ROOT=<your desired storage root>; python -m padertorch.contrib.examples.sound_recognition.audio_tagging.train
43 | ```
44 | Your trained models can be found in `$STORAGE_ROOT/audio_tagging/<timestamp>`.
45 | 
46 | Note that the data input pipeline only extracts STFTs while the log mel 
47 | extraction and normalization is done in the model.
48 | 
49 | ## Evaluation
50 | 
51 | The evaluation script loads the best checkpoint (by default the checkpoint with
52 | the highest achieved mAP on the  validation set) and runs evaluation on the 
53 | eval set.
54 | 
55 | To run an evaluation, provide the evaluation script with the path to your trained model:
56 | ```bash
57 | python -m padertorch.contrib.examples.sound_recognition.audio_tagging.evaluate with exp_dir=</path/to/trainer/storage_dir>
58 | ```
59 | 
60 | Evaluation results can be found in `<exp_dir>/eval/<timestamp>`.
61 | In the file `overall.json` metrics averaged over all events can be found for
62 | the validation and eval sets. In the file `event_wise.json` you can find
63 | metrics for each event separately sorted by AP performance on the eval set.
64 | Further, there are files `fn.json` and `fp.json` in which the system's false
65 | negative and false positive predictions are saved.
66 | 
67 | 
68 | ## Results
69 | 
70 | | Training set    | Decision threshold tuning | Test set   | mAP   | mAUC  | lwlrap | mF1   |
71 | | :-----:         | :-----:                   | :-----:    | :---: | :---: | :---:  | :---: |
72 | | balanced_train  | validate                  | validate   | 22.02 | 92.16 | 48.4   | 31.76 |
73 | | balanced_train  | validate                  | eval       | 23.28 | 93.55 | 49.69  | 25.73 |
74 | 
75 | Above Table reports mean Average Precision (mAP), mean Area Under ROC Curve
76 | (mAUC), label weighted label-ranking average precision (lwlrap) and mean
77 | F1-score (mF1) in %. Here, "mean" refers to macro-averaging over the
78 | event-wise metrics. While mAP, mAUC and lwlrap do not rely on decision
79 | thresholds, the computation of F1 scores requires thresholds. Therefore, the
80 | event-specific decision thresholds are tuned on the validation set to give best
81 | F1 scores. The big gap (>6%) between mF1 performance on the validation set
82 | and eval set can be explained due to bad generalization of the decision
83 | thresholds.
84 | 
85 | [1] Shah, Ankit and Kumar, Anurag and Hauptmann, Alexander G and Raj, Bhiksha.
86 | "A closer look at weak label learning for audio events",
87 | arXiv preprint arXiv:1804.09288, 2018
88 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/sound_recognition/audio_tagging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/sound_recognition/audio_tagging/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/examples/sound_recognition/audio_tagging/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example call:
  3 | 
  4 | export STORAGE_ROOT=<your desired storage root>
  5 | python -m padertorch.contrib.examples.sound_recognition.audio_tagging.train
  6 | """
  7 | import os
  8 | from pathlib import Path
  9 | 
 10 | from paderbox.utils.random_utils import Uniform
 11 | from padertorch import Trainer
 12 | from padertorch.contrib.examples.sound_recognition.audio_tagging.data import \
 13 |     get_datasets
 14 | from padertorch.contrib.examples.sound_recognition.audio_tagging.model import \
 15 |     WALNet
 16 | from padertorch.io import get_new_storage_dir
 17 | from padertorch.train.optimizer import Adam
 18 | from sacred import Experiment, commands
 19 | from sacred.observers import FileStorageObserver
 20 | 
 21 | ex = Experiment('audio_tagging')
 22 | 
 23 | 
 24 | @ex.config
 25 | def config():
 26 |     database_json = (
 27 |         str((Path(os.environ['NT_DATABASE_JSONS_DIR']) / 'audio_set.json').expanduser())
 28 |         if 'NT_DATABASE_JSONS_DIR' in os.environ else None
 29 |     )
 30 |     assert database_json is not None, (
 31 |         'database_json cannot be None.\n'
 32 |         'Either start the training with "python -m padertorch.contrib.examples.'
 33 |         'audio_synthesis.wavenet.train with database_json=</path/to/json>" '
 34 |         'or make sure there is an environment variable "NT_DATABASE_JSONS_DIR"'
 35 |         'pointing to a directory with a "audio_set.json" in it (see README '
 36 |         'for the JSON format).'
 37 |     )
 38 |     training_set = 'balanced_train'
 39 |     audio_reader = {
 40 |         'source_sample_rate': 44_100,
 41 |         'target_sample_rate': 44_100,
 42 |     }
 43 |     stft = {
 44 |         'shift': 882,
 45 |         'window_length': 2*882,
 46 |         'size': 2048,
 47 |         'fading': None,
 48 |         'pad': False,
 49 |     }
 50 |     num_workers = 8
 51 |     batch_size = 24
 52 |     max_padding_rate = .05
 53 |     trainer = {
 54 |         'model': {
 55 |             'factory': WALNet,
 56 |             'sample_rate': audio_reader['target_sample_rate'],
 57 |             'stft_size': stft['size'],
 58 |             'output_size': 527,
 59 |         },
 60 |         'optimizer': {
 61 |             'factory': Adam,
 62 |             'lr': 3e-4,
 63 |             'gradient_clipping': 60.,
 64 |         },
 65 |         'storage_dir': get_new_storage_dir(
 66 |             'audio_tagging', id_naming='time', mkdir=False
 67 |         ),
 68 |         'summary_trigger': (100, 'iteration'),
 69 |         'checkpoint_trigger': (1_000, 'iteration'),
 70 |         'stop_trigger': (50_000, 'iteration'),
 71 |     }
 72 |     trainer = Trainer.get_config(trainer)
 73 |     validation_metric = 'map'
 74 |     maximize_metric = True
 75 |     resume = False
 76 |     ex.observers.append(FileStorageObserver.create(trainer['storage_dir']))
 77 | 
 78 | 
 79 | @ex.automain
 80 | def main(
 81 |         _run, _log, trainer, database_json, training_set,
 82 |         validation_metric, maximize_metric,
 83 |         audio_reader, stft, num_workers, batch_size, max_padding_rate, resume
 84 | ):
 85 |     commands.print_config(_run)
 86 |     trainer = Trainer.from_config(trainer)
 87 |     storage_dir = Path(trainer.storage_dir)
 88 |     storage_dir.mkdir(parents=True, exist_ok=True)
 89 |     commands.save_config(
 90 |         _run.config, _log, config_filename=str(storage_dir / 'config.json')
 91 |     )
 92 | 
 93 |     training_data, validation_data, _ = get_datasets(
 94 |         database_json=database_json, min_signal_length=1.5,
 95 |         audio_reader=audio_reader, stft=stft, num_workers=num_workers,
 96 |         batch_size=batch_size, max_padding_rate=max_padding_rate,
 97 |         training_set=training_set, storage_dir=storage_dir,
 98 |         stft_stretch_factor_sampling_fn=Uniform(low=0.5, high=1.5),
 99 |         stft_segment_length=audio_reader['target_sample_rate'],
100 |         stft_segment_shuffle_prob=0.,
101 |         mixup_probs=(1/2, 1/2), max_mixup_length=15., min_mixup_overlap=.8,
102 |     )
103 | 
104 |     trainer.test_run(training_data, validation_data)
105 |     trainer.register_validation_hook(
106 |         validation_data, metric=validation_metric, maximize=maximize_metric
107 |     )
108 |     trainer.train(training_data, resume=resume)
109 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_localization/distance_estimator/Makefile:
--------------------------------------------------------------------------------
 1 | .DEFAULT_GOAL = complete
 2 | 
 3 | # Add values for missing constants here or pass it via the terminal
 4 | 
 5 | # path where the RIR database should be stored
 6 | RIR_PATH =
 7 | # path where the JSON of the RIR database should be stored (optional, otherwise stored in the same directory as the RIR database)
 8 | RIR_JSON =
 9 | # path where the LibriSpeech database should be stored, can be left empty if the database already exists
10 | LIBRI_PATH =
11 | # path where the JSON of the LibriSpeech database should be stored (optional, if LibriSpeech is not already existing, otherwise stored in the same directory as LibriSpeech)
12 | LIBRI_JSON =
13 | # path where the JSON containing the VAD information for LibriSpeech should be stored (optional, unless LibriSpeech gets updated)
14 | VAD_JSON =
15 | 
16 | complete: #prepare all perequisites
17 | ifndef RIR_PATH
18 | 	$(error 'Please specify RIR_PATH; the path, where the RIR database should be stored or fill out the constants in the Makefile')
19 | endif
20 | ifndef LIBRI_PATH
21 | 	$(error 'Please specify LIBRI_PATH; the path, where the LibriSpeech database should be stored or fill out the constants in the Makefile')
22 | endif
23 | ifndef VAD_JSON
24 | 	$(eval VAD_JSON=$(LIBRI_PATH))
25 | endif
26 | 	python download.py with rir_path=$(RIR_PATH) libri_path=$(LIBRI_PATH) vad_json_path=$(VAD_JSON)
27 | #since tar_info of LibriSpeech creates an own LibriSpeech subdirectory in the specified path, add this to the path before the JSON is created
28 | 	$(eval override LIBRI_PATH=$(addsuffix /LibriSpeech,$(LIBRI_PATH)))
29 | 	python create_jsons.py with rir_path=$(RIR_PATH) rir_json_path=$(RIR_JSON) libri_path=$(LIBRI_PATH) libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON)
30 | 
31 | rir: #assumes that LibriSpeech already exists and should be updated with VAD information
32 | ifndef LIBRI_JSON
33 | 	$(error 'Please specify LIBRI_JSON; the path, where the LibriSpeech JSON is stored or fill out the constants in the Makefile')
34 | endif
35 | ifndef VAD_JSON
36 | 	$(error 'Please specify VAD_JSON; the path, where the JSON with VAD information should be stored or fill out the constants in the Makefile')
37 | endif
38 | ifndef RIR_PATH
39 | 	$(error 'Please specify RIR_PATH; the path, where the RIR database should be stored or fill out the constants in the Makefile')
40 | endif
41 | 	python download.py with rir_path=$(RIR_PATH) vad_json_path=$(VAD_JSON)
42 | 	python create_jsons.py with update_librispeech rir_path=$(RIR_PATH) rir_json_path=$(RIR_JSON) libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON)
43 | 
44 | 
45 | librispeech_full: # assumes that the RIR database already exists
46 | ifndef LIBRI_PATH
47 | 	$(error 'Please specify LIBRI_PATH, the path, where the LibriSpeech database should be stored or fill out the constants in the Makefile')
48 | endif
49 | ifndef VAD_JSON
50 | 	$(eval VAD_JSON=$(LIBRI_PATH))
51 | endif
52 | 	python download.py with libri_path=$(LIBRI_PATH) vad_json_path=$(VAD_JSON)
53 | #since tar_info of LibriSpeech creates an own LibriSpeech subdirectory in the specified path, add this to the path before the JSON is created
54 | 	$(eval override LIBRI_PATH=$(addsuffix /LibriSpeech,$(LIBRI_PATH)))
55 | 	python create_jsons.py with libri_path=$(LIBRI_PATH) libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON)
56 | 
57 | librispeech_update: #assumes that both databases exist, therefore only LibriSpeech must be updated with VAD information if LibriSpeech should be used for speech source signals.
58 | ifndef LIBRI_JSON
59 | 	$(error 'Please specify LIBRI_JSON; the path, where the LibriSpeech-JSON is stored or fill out the constants in the Makefile')
60 | endif
61 | ifndef VAD_JSON
62 | 	$(error 'Please specify VAD_JSON; the path, where the JSON with VAD information should be stored or fill out the constants in the Makefile')
63 | endif
64 | 	python download.py with vad_json_path=$(VAD_JSON)
65 | 	python create_jsons.py with update_librispeech libri_json_path=$(LIBRI_JSON) vad_json_path=$(VAD_JSON)
66 | 	
67 | 	
68 | 
69 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_localization/distance_estimator/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import DistanceEstimator


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/source_separation/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/or_pit/README.md:
--------------------------------------------------------------------------------
 1 | One-and-Rest-PIT
 2 | ================
 3 | 
 4 | This directory contains scripts to train and evaluate a One-and-Rest-PIT model [1].
 5 | This is a recursive separation model that uses a time-domain separation network at its core.
 6 | By default, it uses the DPRNN implementation from `padertorch.examples.tasnet` as a separator.
 7 | 
 8 | Training
 9 | --------
10 | 
11 | Prerequisites
12 | 
13 |  - Set `${STORAGE_ROOT}` to the location you want to store your experiment results
14 |  - Set `OMP_NUM_THREADS=1` and `MKL_NUM_THREADS=1`
15 |  - Prepare the JSON(s) for different numbers of speakers. Each example must have the key `'speaker_id'` and a list as 
16 |      value whose length corresponds to the number of speaker in the mixture
17 | 
18 | The training procedure of the OR-PIT consists of two steps: no fine-tuning and fine-tuning.     
19 | The training for the first step can be run with:
20 |      
21 | ```bash
22 | $ python -m padertorch.contrib.examples.source_separation.or_pit.train with database_jsons=${path_to_your_jsons}
23 |  ```
24 | 
25 | You can initialize an experiment directory with `python -m ...or_pit.train init with ...` and start it with: 
26 | 
27 | ```bash
28 | $ make train
29 | ```
30 | 
31 | The `database_jsons` can be a single file or a comma-separated list of files, if you want to supply multiple files.
32 | Make sure to set `train_datasets` and `validation_datasets` according to the datasets available in the supplied 
33 | database JSONs (they are set to use WSJ0-2mix and WSJ0-3mix by default).
34 | 
35 | The fine-tune experiment can be initialized with:
36 | 
37 | ```bash
38 | $ make fine-tune
39 | ```
40 | 
41 | This command creates a new storage dir and uses the same configuration (including number of epochs, data, etc.) as the base training.
42 | 
43 | Evaluation
44 | ----------
45 | 
46 | Start an evaluation with 
47 | 
48 | ```bash
49 | $ python -m padertorch.contrib.examples.source_separation.or_pit.evaluate with model_path=${path_to_the_model_dir} database_json=${path_to_the_json} "datasets=['your','datasets']"
50 | ```
51 | 
52 | Enable audio exporting with `dump_audio=True`.
53 | 
54 | Important configuration values
55 | ------------------------------
56 | 
57 |  - `batch_size`: Set the batch size
58 |  - `trainer.stop_trigger`: Set the number of iterations or epochs to perform (e.g, `trainer.stop_trigger=(100,'epoch')` for 100 epochs)
59 |  - `trainer.model.finetune`: Enables fine-tuning
60 |  - `trainer.model.stop_condition`: The criterion to use for stopping during evaluation. Can be `'flag'` or `'threshold'`.
61 |  - `trainer.model.unroll_type`: Determines how many iterations to perform for a given number of speakers. Can be `'res-single'` (iterate until the residual output contains a single speaker), `'res-silent'` (iterate until the residual signal is silent) or `'est-silent'` (iterate until the estimated signal is silent)
62 |  
63 | 
64 | References
65 | ----------
66 | 
67 |   [1] Takahashi, Naoya, Sudarsanam Parthasaarathy, Nabarun Goswami, and Yuki Mitsufuji. „Recursive speech 
68 |         separation for unknown number of speakers“, 5. April 2019.
69 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/or_pit/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import OneAndRestPIT


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/or_pit/templates.py:
--------------------------------------------------------------------------------
 1 | MAKEFILE_TEMPLATE_TRAIN = """SHELL := /bin/bash
 2 | MODEL_PATH := $(shell pwd)
 3 | 
 4 | export OMP_NUM_THREADS=1
 5 | export MKL_NUM_THREADS=1
 6 | 
 7 | train:
 8 | \tpython -m {main_python_path} with config.json
 9 | 
10 | finetune:
11 | \tpython -m {main_python_path} init_with_new_storage_dir with config.json trainer.model.finetune=True load_model_from=$(MODEL_PATH)/checkpoints/ckpt_latest.pth batch_size=1"
12 | 
13 | ccsalloc:
14 | \tccsalloc \\
15 | \t\t--res=rset=1:ncpus=4:gtx1080=1:ompthreads=1 \\
16 | \t\t--time=100h \\
17 | \t\t--stdout=%x.%reqid.out \\
18 | \t\t--stderr=%x.%reqid.err \\
19 | \t\t--tracefile=%x.%reqid.trace \\
20 | \t\t-N train_{experiment_name} \\
21 | \t\tpython -m {main_python_path} with config.json
22 | 
23 | evaluate:
24 | \tpython -m {eval_python_path} init with model_path=$(MODEL_PATH)
25 | 
26 | evaluate_oracle_num_spk:
27 | \tpython -m {eval_python_path} init with model_path=$(MODEL_PATH) oracle_num_spk=True
28 | """
29 | 
30 | MAKEFILE_TEMPLATE_EVAL = """SHELL := /bin/bash
31 | 
32 | evaluate:
33 | \tpython -m {main_python_path} with config.json
34 | 
35 | ccsalloc:
36 | \tccsalloc \\
37 | \t\t--res=rset=100:mpiprocs=1:ncpus=1:mem=4g:vmem=6g \\
38 | \t\t--time=1h \\
39 | \t\t--stdout=%x.%reqid.out \\
40 | \t\t--stderr=%x.%reqid.err \\
41 | \t\t--tracefile=%x.%reqid.trace \\
42 | \t\t-N evaluate_{experiment_name} \\
43 | \t\tompi ${{OMPI_PARAMS}} \\
44 | \t\t-- \\
45 | \t\tpython -m {main_python_path} with config.json
46 | """
47 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/pit/README.md:
--------------------------------------------------------------------------------
 1 | BLSTM-based Permutation Invariant Training (PIT)
 2 | ================
 3 | 
 4 | This directory contains scripts to train and evaluate the basic utterance-level permutation
 5 | invariant training model (uPIT)
 6 | for source separation [1].
 7 | 
 8 | 
 9 | Training
10 | --------
11 | 
12 | Prerequisites
13 | 
14 |  - Set `${STORAGE_ROOT}` to the location you want to store your experiment results
15 |  - Set `OMP_NUM_THREADS=1` and `MKL_NUM_THREADS=1`
16 |  - Prepare the JSON(s) for your database. Each example must be sorted by `num_samples` as the model
17 |    uses the `PackedSequence` of PyTorch 
18 |      
19 | ```bash
20 | $ python -m padertorch.contrib.examples.source_separation.pit.train with database_json=${path_to_your_jsons}
21 |  ```
22 | 
23 | You can initialize an experiment directory with `python -m ...pit.train init with ...` and start it with: 
24 | 
25 | ```bash
26 | $ make train
27 | ```
28 | 
29 | Make sure to set `train_dataset` and `validation_dataset` according to the datasets available in the supplied 
30 | database JSONs (they are set to use WSJ0-2mix by default).
31 | 
32 | Evaluation
33 | ----------
34 | 
35 | Start an evaluation with 
36 | 
37 | ```bash
38 | $ python -m padertorch.contrib.examples.source_separation.pit.evaluate with model_path=${path_to_the_model_dir} database_json=${path_to_the_json} "datasets=['your','datasets']"
39 | ```
40 | 
41 | If you want to speed up your evaluation, you can also call
42 | ```bash
43 | $ mpiexec -np ${n_jobs} python padertorch.contrib.examples.source_separation.pit.evaluate with model_path=${path_to_the_model_dir} database_json=${path_to_the_json} "datasets=['your','datasets']"
44 | ```
45 | to parallelize your evaluation over several CPU cores.
46 | 
47 | Important configuration values
48 | ------------------------------
49 | 
50 |  - `batch_size`: Set the batch size
51 |  - `trainer.stop_trigger`: Set the number of iterations or epochs to perform (e.g, `trainer.stop_trigger=(100,'epoch')` for 100 epochs)
52 |  
53 | 
54 | References
55 | ----------
56 | 
57 |   [1] Morten Kolbæk, Dong Yu, Zheng-Hua Tan, Jesper Jensen. „Multi-talker Speech Separation with Utterance-level 
58 |   Permutation Invariant Training of Deep Recurrent Neural Networks“, March 18 2017.
59 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/pit/__init__.py:
--------------------------------------------------------------------------------
1 | from . import model
2 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/pit/data.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import einops
 4 | import numpy as np
 5 | 
 6 | import paderbox as pb
 7 | import padertorch as pt
 8 | from paderbox.transform import stft
 9 | 
10 | 
11 | def prepare_dataset(
12 |         db, dataset_name: str, batch_size, return_keys=None, prefetch=True, shuffle=True
13 | ):
14 |     audio_keys = ['observation', 'speech_source']
15 |     dataset = db.get_dataset(dataset_name)
16 | 
17 |     dataset = (
18 |         dataset
19 |         .map(partial(read_audio, audio_keys=audio_keys))
20 |         .map(partial(pre_batch_transform, return_keys=return_keys))
21 |     )
22 |     if shuffle:
23 |         dataset = dataset.shuffle(reshuffle=True)
24 |     dataset = (
25 |         dataset
26 |         .batch(batch_size)
27 |         .map(pt.data.batch.Sorter('num_frames'))
28 |         .map(pt.data.utils.collate_fn)
29 |         .map(post_batch_transform)
30 |     )
31 | 
32 |     if prefetch:
33 |         dataset = dataset.prefetch(4, 8)
34 | 
35 |     return dataset
36 | 
37 | 
38 | def read_audio(example, src_key="audio_path", audio_keys=None):
39 |     data = {
40 |         audio_key: pb.io.audioread.recursive_load_audio(
41 |             example[src_key][audio_key],
42 |         )
43 |         for audio_key in audio_keys
44 |     }
45 |     example["audio_data"] = data
46 |     return example
47 | 
48 | 
49 | def pre_batch_transform(inputs, return_keys=None):
50 |     s = inputs['audio_data']['speech_source']
51 |     y = inputs['audio_data']['observation']
52 |     S = stft(s, 512, 128)
53 |     Y = stft(y, 512, 128)
54 |     Y = einops.rearrange(Y, 't f -> t f')
55 |     S = einops.rearrange(S, 'k t f -> t k f')
56 |     X = S  # Same for WSJ0_2MIX database
57 |     num_frames = Y.shape[0]
58 | 
59 |     return_dict = dict()
60 | 
61 |     def maybe_add(key, value):
62 |         if return_keys is None or key in return_keys:
63 |             return_dict[key] = value
64 | 
65 |     maybe_add('example_id', inputs['example_id'])
66 |     maybe_add('s', np.ascontiguousarray(s, np.float32))
67 |     maybe_add('S', np.ascontiguousarray(S, np.float32))
68 |     maybe_add('y', np.ascontiguousarray(y, np.float32))
69 |     maybe_add('Y', np.ascontiguousarray(Y, np.complex64))
70 |     maybe_add('X_abs', np.ascontiguousarray(np.abs(X), np.float32))
71 |     maybe_add('Y_abs', np.ascontiguousarray(np.abs(Y), np.float32))
72 |     maybe_add('num_frames', num_frames)
73 |     maybe_add('cos_phase_difference', np.ascontiguousarray(
74 |         np.cos(np.angle(Y[:, None, :]) - np.angle(X)), np.float32)
75 |     )
76 | 
77 |     return return_dict
78 | 
79 | 
80 | def post_batch_transform(batch):
81 |     return batch
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/pit/templates.py:
--------------------------------------------------------------------------------
 1 | MAKEFILE_TEMPLATE_TRAIN = """
 2 | SHELL := /bin/bash
 3 | 
 4 | train:
 5 | \tpython -m {main_python_path} with config.json
 6 | 
 7 | ccsalloc:
 8 | \tccsalloc \\
 9 | \t\t--res=rset=1:ncpus=4:gtx1080=1:ompthreads=1 \\
10 | \t\t--time=100h \\
11 | \t\t--stdout=%x.%reqid.out \\
12 | \t\t--stderr=%x.%reqid.err \\
13 | \t\t--tracefile=%x.%reqid.trace \\
14 | \t\t-N train_{experiment_name} \\
15 | \t\tpython -m {main_python_path} with config.json
16 | """
17 | 
18 | MAKEFILE_TEMPLATE_EVAL = """
19 | SHELL := /bin/bash
20 | 
21 | evaluate:
22 | \tpython -m {main_python_path} with config.json
23 | 
24 | ccsalloc:
25 | \tccsalloc \\
26 | \t\t--res=rset=200:mpiprocs=1:ncpus=1:mem=4g:vmem=6g \\
27 | \t\t--time=1h \\
28 | \t\t--stdout=%x.%reqid.out \\
29 | \t\t--stderr=%x.%reqid.err \\
30 | \t\t--tracefile=%x.%reqid.trace \\
31 | \t\t-N evaluate_{experiment_name} \\
32 | \t\tompi ${{OMPI_PARAMS}}\\
33 | \t\t-- \\
34 | \t\tpython -m {main_python_path} with config.json
35 | """
36 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/tasnet/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tas_coders
2 | from .model import TasNet


--------------------------------------------------------------------------------
/padertorch/contrib/examples/source_separation/tasnet/templates.py:
--------------------------------------------------------------------------------
 1 | MAKEFILE_TEMPLATE_TRAIN = """SHELL := /bin/bash
 2 | MODEL_PATH := $(shell pwd)
 3 | 
 4 | export OMP_NUM_THREADS=1
 5 | export MKL_NUM_THREADS=1
 6 | 
 7 | train:
 8 | \tpython -m {main_python_path} with config.json
 9 | 
10 | ccsalloc:
11 | \tccsalloc \\
12 | \t\t--res=rset=1:ncpus=4:gtx1080=1:ompthreads=1 \\
13 | \t\t--time=100h \\
14 | \t\t--stdout=%x.%reqid.out \\
15 | \t\t--stderr=%x.%reqid.err \\
16 | \t\t--tracefile=%x.%reqid.trace \\
17 | \t\t-N train_{experiment_name} \\
18 | \t\tpython -m {main_python_path} with config.json
19 | 
20 | evaluate:
21 | \tpython -m {eval_python_path} init with model_path=$(MODEL_PATH)"""
22 | 
23 | MAKEFILE_TEMPLATE_EVAL = """SHELL := /bin/bash
24 | 
25 | evaluate:
26 | \tpython -m {main_python_path} with config.json
27 | 
28 | ccsalloc:
29 | \tccsalloc \\
30 | \t\t--res=rset=100:mpiprocs=1:ncpus=1:mem=4g:vmem=6g \\
31 | \t\t--time=1h \\
32 | \t\t--stdout=%x.%reqid.out \\
33 | \t\t--stderr=%x.%reqid.err \\
34 | \t\t--tracefile=%x.%reqid.trace \\
35 | \t\t-N evaluate_{experiment_name} \\
36 | \t\tompi ${{OMPI_PARAMS}} \\
37 | \t\t-- \\
38 | \t\tpython -m {main_python_path} with config.json
39 | """


--------------------------------------------------------------------------------
/padertorch/contrib/examples/speaker_classification/supervised/README.md:
--------------------------------------------------------------------------------
 1 | # Speaker Classification
 2 | 
 3 | This example performs a simple speaker classification on the *clean_100* and
 4 | *clean_360* datasets of the LibriSpeech corpus.
 5 | 
 6 | ## Training
 7 | To start the training, first define a path to where the trained models should be saved:
 8 | ```bash
 9 | export STORAGE_ROOT=<your desired storage root>; python -m padertorch.contrib.examples.speaker_classification.supervised.train with database_json=</path/to/json> dataset=<your_dataset>
10 | ```
11 | Your trained models can be found in `$STORAGE_ROOT/speaker_clf`. During training,
12 | only 80% of the dataset is used for training. 10% are left out for validation
13 | and another 10% for evaluation.
14 | 
15 | The training script needs a JSON file that describes the structure of your
16 | database in the following format:
17 | ```
18 | {
19 |     "datasets": {
20 |         <dataset_A>: {
21 |             <example_id_A0>: {
22 |                 "audio_path": {
23 |                     "observation": <path/to/wav>
24 |                 },
25 |                 "speaker_id": <speaker-id>
26 |             },
27 |             <example_id_A1>: {
28 |                 ...
29 |             },
30 |             ...
31 |         },
32 |         <dataset_B>: {
33 |             <example_id_B0>: {
34 |                 ...
35 |             },
36 |             ...
37 |         },
38 |         ...
39 |     }
40 | }
41 | ```
42 | If you train on LibriSpeech like we did, be aware that the speaker ID is defined
43 | as `<speaker-id>-<chapter-id>` by LibriSpeech, where `<chapter-id>` is an
44 | identifier for a book chapter.
45 | Here, we perform a speaker identification across chapters so we omit the chapter
46 | ID (the part of the speaker ID after the hyphen).
47 | This is taken care of during the data preparation.
48 | Generally, if the speaker ID contains one or more hyphens, the data preparation
49 | will take the part before the **first** hyphen as the final speaker label for
50 | classification.
51 | If the speaker ID does not contain any hyphens, it will take the complete speaker
52 | ID string as it is as speaker label.
53 | 
54 | ## Evaluation
55 | 
56 | To run an evaluation, provide the evaluation script with the path to your
57 | trained model:
58 | ```bash
59 | mpiexec -np $(nproc --all) python -m padertorch.contrib.examples.speaker_classification.supervised.evaluate with model_path=<path/to/trained/model>
60 | ```
61 | The evaluation script loads the best checkpoint (lowest achieved loss) and
62 | performs a speaker classification on the evaluation data.
63 | It requires [dlp_mpi](https://github.com/fgnt/dlp_mpi) to be installed.
64 | For each misclassified example, symlinks to the example audio file and to an audio
65 | example of the wrongly classified speaker are stored.
66 | 
67 | ## Results
68 | 
69 | | Database | Dataset | Num. Speakers | Num. Eval Examples | Classification Accuracy |
70 | | :------: | :-----: | :-----------: | :----------------: | :---------------------: |
71 | | LibriSpeech | clean_100 | 251 | 2853 | 98.60% |
72 | | LibriSpeech | clean_360 | 921 | 10401 | 94.72% |
73 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/speaker_classification/supervised/model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from einops import rearrange
 4 | from padertorch.base import Model
 5 | from torchvision.utils import make_grid
 6 | 
 7 | 
 8 | class SpeakerClf(Model):
 9 |     def __init__(self, feature_extractor, cnn, enc, fcn):
10 |         super().__init__()
11 |         self.feature_extractor = feature_extractor
12 |         self.cnn = cnn
13 |         self.enc = enc
14 |         self.fcn = fcn
15 | 
16 |     def forward(self, inputs):
17 |         x = inputs['features']
18 |         seq_len = inputs['seq_len']
19 | 
20 |         x = self.feature_extractor(x, seq_len)
21 | 
22 |         # cnn
23 |         x, seq_len = self.cnn(x, sequence_lengths=seq_len)
24 | 
25 |         # rnn
26 |         if self.enc.batch_first:
27 |             x = rearrange(x, 'b f t -> b t f')
28 |         else:
29 |             x = rearrange(x, 'b f t -> t b f')
30 |         x, _ = self.enc(x)
31 |         if not self.enc.batch_first:
32 |             x = rearrange(x, 't b f -> b t f')
33 |         x = x[torch.arange(len(seq_len)), seq_len - 1]
34 | 
35 |         x = self.fcn(x)
36 |         return x
37 | 
38 |     def review(self, inputs, outputs):
39 |         labels = inputs['speaker_id']
40 |         ce = torch.nn.CrossEntropyLoss(reduction='none')(outputs, labels)
41 |         summary = dict(
42 |             loss=ce.mean(),
43 |             scalars=dict(
44 |                 labels=labels,
45 |                 predictions=torch.argmax(outputs, dim=-1)
46 |             ),
47 |             images=dict(
48 |                 features=inputs['features'][:3]
49 |             )
50 |         )
51 |         return summary
52 | 
53 |     def modify_summary(self, summary):
54 |         if 'labels' in summary['scalars']:
55 |             labels = summary['scalars'].pop('labels')
56 |             predictions = summary['scalars'].pop('predictions')
57 |             summary['scalars']['accuracy'] = (
58 |                     np.array(predictions) == np.array(labels)
59 |             ).mean()
60 |         summary = super().modify_summary(summary)
61 |         for key, image in summary['images'].items():
62 |             summary['images'][key] = make_grid(
63 |                 image.flip(2),  normalize=True, scale_each=False, nrow=1
64 |             )
65 |         return summary
66 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/speech_enhancement/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/speech_enhancement/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/examples/speech_enhancement/mask_estimator/README.md:
--------------------------------------------------------------------------------
 1 | Simple Mask Estimator
 2 | =============
 3 | 
 4 | This directory contains scripts to train and evaluate a simple mask estimator
 5 | inspired by [1].
 6 | 
 7 | Results
 8 | -------
 9 | 
10 | The mask estimator model achieves the following results on
11 | the CHiME 3 simulated evaluation set:
12 | 
13 | 
14 | data type         |  pesq         | stoi         |      sdr 
15 | :------------------|--------------|--------------|--------------:
16 | observed           |  1.07       |  0.672       |  -0.79 dB
17 | masked             |  1.22       |  0.736       |  5.68 dB
18 | beamformed         |  1.91       |  0.958       |   17.10 dB  
19 | 
20 | Masked and observed are evaluated on the first channel of the 6ch track.
21 | 
22 | Training
23 | --------
24 | 
25 | A storage root must be set with `export STORAGE_ROOT=/path/to/your/storage`.
26 | After installing `padertorch`, a training can for example be started with
27 | 
28 | ```bash
29 | $ STORAGE_ROOT=/path/to/your/storage; python -m padertorch.contrib.examples.speech_enhancement.simple_mask_estimator.train with database_json=/path/to/json
30 | ```
31 | 
32 | The database json path should point to a json containing all information about 
33 | the CHiME3 data in a format described in ```lazy_dataset.database```.
34 | Each example should contain at least the following keys:
35 | ```
36 |     audio_path:
37 |         speech_source:
38 |             <path to clean speech>
39 |         observation:
40 |             array: [
41 |                 <path to observation of channel 0>
42 |                 <path to observation of channel 1>
43 |                 ...
44 |             ]
45 |         # the following keys are not necessary during evaluation
46 |         speech_image: [
47 |             ...
48 |         ]
49 |         noise_image: [
50 |             ...
51 |         ]
52 | ```
53 | 
54 | Evaluation
55 | ----------
56 | 
57 | The evaluation requires `dlp_mpi` and `pb_bss` as additional dependencies.
58 | `dlp_mpi` can be installed via `pip install dlp_mpi` and `pb_bss` is available at [github.com/fgnt/pb_bss](github.com/fgnt/pb_bss).
59 | The evaluation can be started by
60 | 
61 | ```bash
62 | $ STORAGE_ROOT=/path/to/your/storage; mpiexec -n $(nproc --all) python -m padertorch.contrib.examples.speech_enhancement.mask_estimator.evaluate with database_json=/path/to/json
63 | ```
64 | It always evaluates the latest model in the specified STORAGE_ROOT
65 | 
66 | If you want to evaluate a specific checkpoint, specify the path as an
67 | additional argument to the call.
68 | 
69 | ```bash
70 | $ STORAGE_ROOT=/path/to/your/storage; mpiexec -n $(nproc --all) python -m padertorch.contrib.examples.speech_enhancement.mask_estimator.evaluate with with database_json=/path/to/json checkpoint_path=/path/to/checkpoint
71 | ```
72 | References
73 | ----------
74 | 
75 |   [1] J. Heymann and L. Drude and A. Chinaev and R. Haeb-Umbach.
76 |     “BLSTM supported GEV beamformer front-end for the 3rd CHiME challenge”
77 |      Proc. Worksh. Automat. Speech Recognition, Understanding, 2015
78 |         https://www.researchgate.net/publication/304407561_BLSTM_supported_GEV_beamformer_front-end_for_the_3RD_CHiME_challenge
79 | 
80 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/speech_enhancement/mask_estimator/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import SimpleMaskEstimator
2 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/speech_enhancement/mask_estimator/model.py:
--------------------------------------------------------------------------------
 1 | import padertorch as pt
 2 | import torch
 3 | from padertorch.summary import mask_to_image, stft_to_image
 4 | 
 5 | 
 6 | class SimpleMaskEstimator(pt.Model):
 7 |     def __init__(self, num_features, num_units=1024, dropout=0.5,
 8 |                  activation='elu'):
 9 |         """
10 | 
11 |         Args:
12 |             num_features: number of input features
13 |             num_units: number of units in linear layern
14 |             dropout: dropout forget ratio
15 |             activation: activation for the linear layer except the output layer
16 | 
17 |         >>> SimpleMaskEstimator(513)
18 |         SmallExampleModel(
19 |           (net): Sequential(
20 |             (0): Dropout(p=0.5)
21 |             (1): Linear(in_features=513, out_features=1024, bias=True)
22 |             (2): ELU(alpha=1.0)
23 |             (3): Dropout(p=0.5)
24 |             (4): Linear(in_features=1024, out_features=1024, bias=True)
25 |             (5): ELU(alpha=1.0)
26 |             (6): Linear(in_features=1024, out_features=1026, bias=True)
27 |             (7): Sigmoid()
28 |           )
29 |         )
30 |         """
31 |         super().__init__()
32 |         self.num_features = num_features
33 |         self.net = torch.nn.Sequential(
34 |             pt.modules.Normalization(
35 |                 'btf', (1, 1, num_features), statistics_axis='t',
36 |                 independent_axis='f', batch_axis='b', sequence_axis='t'
37 |             ),
38 |             pt.modules.StatefulLSTM(
39 |                 num_features, num_units // 4,
40 |                 bidirectional=True, batch_first=True, save_states=False
41 |             ),
42 |             torch.nn.Dropout(dropout),
43 |             torch.nn.Linear((num_units // 4) * 2, num_units),
44 |             pt.mappings.ACTIVATION_FN_MAP[activation](),
45 |             torch.nn.Dropout(dropout),
46 |             torch.nn.Linear(num_units, num_units),
47 |             pt.mappings.ACTIVATION_FN_MAP[activation](),
48 |             # twice num_features for speech and noise_mask
49 |             torch.nn.Linear(num_units, 2 * num_features),
50 |             # Output activation to force outputs between 0 and 1
51 |             torch.nn.Sigmoid()
52 |         )
53 | 
54 |     def forward(self, batch):
55 | 
56 |         x = batch['observation_abs']
57 |         out = self.net(x)
58 |         return dict(
59 |             speech_mask_prediction=out[..., :self.num_features],
60 |             noise_mask_prediction=out[..., self.num_features:],
61 |         )
62 | 
63 |     def review(self, batch, output):
64 |         noise_mask_loss = torch.nn.functional.binary_cross_entropy(
65 |             output['noise_mask_prediction'], batch['noise_mask_target']
66 |         )
67 |         speech_mask_loss = torch.nn.functional.binary_cross_entropy(
68 |             output['speech_mask_prediction'], batch['speech_mask_target']
69 |         )
70 |         return dict(loss=noise_mask_loss + speech_mask_loss,
71 |                     images=self.add_images(batch, output))
72 | 
73 |     @staticmethod
74 |     def add_images(batch, output):
75 |         speech_mask = output['speech_mask_prediction']
76 |         observation = batch['observation_abs']
77 |         images = dict()
78 |         images['speech_mask'] = mask_to_image(speech_mask, True)
79 |         images['observed_stft'] = stft_to_image(observation, True)
80 | 
81 |         if 'noise_mask_prediction' in output:
82 |             noise_mask = output['noise_mask_prediction']
83 |             images['noise_mask'] = mask_to_image(noise_mask, True)
84 |         if batch is not None and 'speech_mask_prediction' in batch:
85 |             images['speech_mask_target'] = mask_to_image(
86 |                 batch['speech_mask_target'], True)
87 |             if 'speech_mask_target' in batch:
88 |                 images['noise_mask_target'] = mask_to_image(
89 |                     batch['noise_mask_target'], True)
90 |         return images
91 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/speech_enhancement/mask_estimator/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Very simple training script for a mask estimator.
  3 | Saves checkpoints and summaries to $STORAGE_ROOT/speech_enhancement/simple_mask_estimator_{id}
  4 | may be called with:
  5 | python -m padertorch.contrib.examples.speech_enhancement.simple_mask_estimator.train with database_json=/path/to/json
  6 | """
  7 | 
  8 | from pathlib import Path
  9 | 
 10 | import os
 11 | import numpy as np
 12 | import paderbox as pb
 13 | import padertorch as pt
 14 | from lazy_dataset.database import JsonDatabase
 15 | from pb_bss.extraction.mask_module import biased_binary_mask
 16 | from sacred import Experiment, observers
 17 | 
 18 | from .model import SimpleMaskEstimator
 19 | 
 20 | ex = Experiment('Train Simple Mask Estimator')
 21 | 
 22 | 
 23 | @ex.config
 24 | def config():
 25 |     storage_dir = None
 26 |     if storage_dir is None:
 27 |         storage_dir = pt.io.get_new_storage_dir(
 28 |             'speech_enhancement', prefix='simple_mask_estimator')
 29 |     database_json = None
 30 |     if database_json is None:
 31 |         if 'NT_DATABASE_JSONS_DIR' in os.environ:
 32 |             database_json = Path(
 33 |                 os.environ['NT_DATABASE_JSONS_DIR']) / 'chime.json'
 34 |     assert database_json is not None, (
 35 |         'You have to specify a path to a json describing your database,'
 36 |         'use "with database_json=/Path/To/Json" as suffix to your call'
 37 |     )
 38 |     assert Path(database_json).exists(), database_json
 39 |     ex.observers.append(observers.FileStorageObserver(
 40 |         Path(storage_dir).expanduser().resolve() / 'sacred')
 41 |     )
 42 | 
 43 | 
 44 | def prepare_data(example):
 45 |     stft = pb.transform.STFT(shift=256, size=1024)
 46 |     net_input = dict()
 47 |     audio_data = dict()
 48 |     for key in ['observation', 'speech_image', 'noise_image']:
 49 |         audio_data[key] = stft(np.array([
 50 |             pb.io.load_audio(audio) for audio in example['audio_path'][key]]))
 51 |     net_input['observation_abs'] = np.abs(
 52 |         audio_data['observation']).astype(np.float32)
 53 |     target_mask, noise_mask = biased_binary_mask(np.stack(
 54 |         [audio_data['speech_image'], audio_data['noise_image']], axis=0
 55 |     ))
 56 |     net_input['speech_mask_target'] = target_mask.astype(np.float32)
 57 |     net_input['noise_mask_target'] = noise_mask.astype(np.float32)
 58 |     return net_input
 59 | 
 60 | 
 61 | def get_train_dataset(database: JsonDatabase):
 62 |     train_ds = database.get_dataset('tr05_simu')
 63 |     return (train_ds
 64 |             .map(prepare_data)
 65 |             .prefetch(num_workers=4, buffer_size=4))
 66 | 
 67 | 
 68 | def get_validation_dataset(database: JsonDatabase):
 69 |     # AudioReader is a specialized function to read audio organized
 70 |     # in a json as described in pb.database.database
 71 |     val_iterator = database.get_dataset('dt05_simu')
 72 |     return val_iterator.map(prepare_data) \
 73 |         .prefetch(num_workers=4, buffer_size=4)
 74 | 
 75 | 
 76 | @ex.command
 77 | def test_run(storage_dir, database_json):
 78 |     model = SimpleMaskEstimator(513)
 79 |     print(f'Simple training for the following model: {model}')
 80 |     database = JsonDatabase(database_json)
 81 |     train_dataset = get_train_dataset(database)
 82 |     validation_dataset = get_validation_dataset(database)
 83 |     trainer = pt.train.trainer.Trainer(
 84 |         model, storage_dir, optimizer=pt.train.optimizer.Adam(),
 85 |         stop_trigger=(int(1e5), 'iteration')
 86 |     )
 87 |     trainer.test_run(train_dataset, validation_dataset)
 88 | 
 89 | 
 90 | @ex.automain
 91 | def train(storage_dir, database_json):
 92 |     model = SimpleMaskEstimator(513)
 93 |     print(f'Simple training for the following model: {model}')
 94 |     database = JsonDatabase(database_json)
 95 |     train_dataset = get_train_dataset(database)
 96 |     validation_dataset = get_validation_dataset(database)
 97 |     trainer = pt.Trainer(model, storage_dir,
 98 |                          optimizer=pt.train.optimizer.Adam(),
 99 |                          stop_trigger=(int(1e5), 'iteration'))
100 |     trainer.test_run(train_dataset, validation_dataset)
101 |     trainer.register_validation_hook(
102 |         validation_dataset, n_back_off=5, lr_update_factor=1 / 10,
103 |         back_off_patience=1, early_stopping_patience=None)
104 |     trainer.train(train_dataset)
105 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/toy_examples/configurable/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/examples/toy_examples/configurable/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/examples/toy_examples/configurable/configurable.py:
--------------------------------------------------------------------------------
  1 | import padertorch as pt
  2 | 
  3 | class GRU(pt.Configurable):
  4 |     def __init__(self, nonlinearity='tanh'):
  5 |         pass
  6 | 
  7 | 
  8 | class LSTM(pt.Configurable):
  9 |     def __init__(self, peephole=False):
 10 |         pass
 11 | 
 12 | 
 13 | class DenseEncoder(pt.Configurable):
 14 |     def __init__(self, layers=2, nonlinearity='elu'):
 15 |         pass
 16 | 
 17 | 
 18 | class RecurrentEncoder(pt.Configurable):
 19 | 
 20 |     @classmethod
 21 |     def get_signature(cls):
 22 |         defaults = super().get_signature()
 23 |         defaults['recurrent'] = {
 24 |             'cls': GRU,
 25 |         }
 26 |         return defaults
 27 | 
 28 |     def __init__(
 29 |             self,
 30 |             recurrent,
 31 |             layers=2,
 32 |             bidirectional=False,
 33 |     ):
 34 |         pass
 35 | 
 36 | 
 37 | class VAE(pt.Configurable):
 38 |     """
 39 |     >>> from pprint import pprint
 40 |     >>> pprint(VAE.get_config({}))
 41 |     {'cls': 'configurable.VAE',
 42 |      'kwargs': {'encoder': {'cls': 'configurable.DenseEncoder',
 43 |                             'kwargs': {'layers': 3, 'nonlinearity': 'sigmoid'}},
 44 |                 'vae_param': 2}}
 45 |     >>> pprint(VAE.get_config({'encoder': {'cls': RecurrentEncoder}}))
 46 |     {'cls': 'configurable.VAE',
 47 |      'kwargs': {'encoder': {'cls': 'configurable.RecurrentEncoder',
 48 |                             'kwargs': {'bidirectional': False,
 49 |                                        'layers': 4,
 50 |                                        'recurrent': {'cls': 'configurable.GRU',
 51 |                                                      'kwargs': {'nonlinearity': 'tanh'}}}},
 52 |                 'vae_param': 2}}
 53 |     """
 54 |     @classmethod
 55 |     def get_signature(cls):
 56 |         defaults = super().get_signature()
 57 |         defaults['encoder'] = {
 58 |             'cls': DenseEncoder,
 59 |             'kwargs': {'layers': 5},
 60 |             DenseEncoder: {'layers': 3, 'nonlinearity': 'sigmoid'},
 61 |             RecurrentEncoder: {'layers': 4},
 62 |         }
 63 |         return defaults
 64 | 
 65 |     def __init__(self, encoder, vae_param=2):
 66 |         self.encoder = encoder
 67 |         self.vae_param = vae_param
 68 | 
 69 | 
 70 | import sacred
 71 | import sacred.run
 72 | import sacred.commands
 73 | exp = sacred.Experiment('vae')
 74 | from paderbox.utils.nested import deflatten
 75 | 
 76 | @exp.config
 77 | def config():
 78 | 
 79 |     model = {}
 80 |     VAE.get_config(
 81 |         dict(
 82 |             encoder={
 83 |                 'cls': RecurrentEncoder,
 84 |                 RecurrentEncoder: dict(
 85 |                     recurrent={'cls': LSTM}
 86 |                 ),
 87 |             },
 88 |         ),
 89 |         model,
 90 |     )
 91 |     VAE.get_config(  # alternative dict update
 92 |         deflatten({
 93 |             ('encoder', 'cls'): RecurrentEncoder,
 94 |             ('encoder', RecurrentEncoder, 'recurrent', 'cls'): LSTM,
 95 |         }, sep=None),
 96 |         model,
 97 |     )
 98 |     VAE.get_config(  # second alternative update
 99 |         deflatten({
100 |             'encoder/cls': 'RecurrentEncoder',
101 |             'encoder/RecurrentEncoder/recurrent/cls': LSTM,
102 |         }, sep='/'),
103 |         model,
104 |     )
105 | 
106 | 
107 | @exp.automain
108 | def main(_config, _run: sacred.run.Run):
109 |     """
110 |     python parametized.py print_config
111 |     python parametized.py print_config with model.kwargs.encoder.cls=RecurrentEncoder model.kwargs.vae_param=10
112 |     """
113 |     from IPython.lib.pretty import pprint
114 |     sacred.commands.print_config(_run)
115 | 
116 |     model = VAE.from_config(_config['model'])
117 | 
118 |     print('Model config')
119 |     pprint(model.config)
120 |     print('Encoder config')
121 |     pprint(model.encoder)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     pass
126 | 


--------------------------------------------------------------------------------
/padertorch/contrib/examples/toy_examples/configurable/shared_parameter.py:
--------------------------------------------------------------------------------
  1 | import padertorch as pts
  2 | from IPython.lib.pretty import pprint
  3 | from paderbox.utils.nested import deflatten
  4 | 
  5 | 
  6 | class Load(pts.configurable.Configurable):
  7 |     def __init__(self, sample_rate=16000):
  8 |         self.sample_rate = sample_rate
  9 |     def __call__(self, arg):
 10 |         print(self.__class__.__name__, arg, self.sample_rate)
 11 |         return arg + 5
 12 | 
 13 | 
 14 | class FeatureExtractor(pts.configurable.Configurable):
 15 |     def __init__(self, sample_rate=16000):
 16 |         self.sample_rate = sample_rate
 17 |     def __call__(self, arg):
 18 |         print(self.__class__.__name__, arg, self.sample_rate)
 19 |         return arg + 7
 20 | 
 21 | 
 22 | class Compose(pts.configurable.Configurable):
 23 |     def __init__(self, layer1, layer2, sample_rate=8000):
 24 |         self.layer1 = layer1
 25 |         self.layer2 = layer2
 26 | 
 27 |     def __call__(self, arg):
 28 |         print(self.__class__.__name__, arg)
 29 |         return self.layer2(self.layer1(arg)) + 11
 30 | 
 31 |     @classmethod
 32 |     def get_config(
 33 |             cls,
 34 |             updates=None,
 35 |             config=None,
 36 |     ):
 37 |         config = super().get_config(updates=updates, config=config)
 38 |         config['kwargs']['layer1']['kwargs']['sample_rate'] = config['kwargs']['sample_rate']
 39 |         config['kwargs']['layer2']['kwargs']['sample_rate'] = config['kwargs']['sample_rate']
 40 |         return config
 41 | 
 42 | 
 43 | class Model(pts.configurable.Configurable):
 44 |     """
 45 |     >>> pprint(Model.get_config())
 46 |     {'cls': 'parametized_shared_parameter.Model',
 47 |      'kwargs': {'transform': {'cls': 'parametized_shared_parameter.Compose',
 48 |        'kwargs': {'sample_rate': 8000,
 49 |         'layer1': {'cls': 'parametized_shared_parameter.Load',
 50 |          'kwargs': {'sample_rate': 8000}},
 51 |         'layer2': {'cls': 'parametized_shared_parameter.FeatureExtractor',
 52 |          'kwargs': {'sample_rate': 8000}}}}}}
 53 |     """
 54 |     @classmethod
 55 |     def get_signature(self):
 56 |         defaults = super().get_signature()
 57 |         defaults['transform'] = deflatten({
 58 |             'cls': Compose,
 59 |             'kwargs.sample_rate': 8000,
 60 |             'kwargs.layer1.cls': Load,
 61 |             'kwargs.layer2.cls': FeatureExtractor,
 62 | 
 63 |         }, sep='.')
 64 |         return defaults
 65 | 
 66 |     def __init__(self, transform):
 67 |         self.transform = transform
 68 | 
 69 | 
 70 | import sacred
 71 | import sacred.run
 72 | import sacred.commands
 73 | exp = sacred.Experiment('Shared Parameter')
 74 | 
 75 | @exp.config
 76 | def config():
 77 | 
 78 |     model = {}
 79 |     Model.get_config(  # second alternative update
 80 |         deflatten({
 81 |             'transform.kwargs.sample_rate': 44100,
 82 |         }, sep='.'),
 83 |         model,
 84 |     )
 85 | 
 86 | 
 87 | @exp.automain
 88 | def main(_config, _run: sacred.run.Run):
 89 |     """
 90 |     """
 91 |     sacred.commands.print_config(_run)
 92 | 
 93 |     model = Model.from_config(_config['model'])
 94 | 
 95 |     print('Model config')
 96 |     pprint(model.config)
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     pass


--------------------------------------------------------------------------------
/padertorch/contrib/je/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/je/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/data/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/je/data/filters.py:
--------------------------------------------------------------------------------
 1 | from padertorch.utils import to_list
 2 | 
 3 | 
 4 | class DiscardLabelsFilter:
 5 |     def __init__(self, key, names):
 6 |         self.key = key
 7 |         self.names = to_list(names)
 8 | 
 9 |     def __call__(self, example):
10 |         return not any([name in to_list(example[self.key]) for name in self.names])
11 | 
12 | 
13 | class RestrictLabelsFilter:
14 |     def __init__(self, key, names):
15 |         self.key = key
16 |         self.names = to_list(names)
17 | 
18 |     def __call__(self, example):
19 |         return any([name in to_list(example[self.key]) for name in self.names])
20 | 


--------------------------------------------------------------------------------
/padertorch/contrib/je/hooks/swa.py:
--------------------------------------------------------------------------------
 1 | from padertorch.train.hooks import TriggeredHook
 2 | from paderbox.utils.nested import nested_op
 3 | 
 4 | 
 5 | class SWAHook(TriggeredHook):
 6 |     """
 7 |     performs stochastic weight averaging of the trainers model or a submodule of it
 8 |     """
 9 |     def __init__(self, trigger, submodule=None):
10 |         """
11 | 
12 |         Args:
13 |             trigger:
14 |             submodule:
15 |         """
16 |         super().__init__(trigger)
17 |         self.submodule = [] if submodule is None else submodule.split('.')
18 |         self.swa_module = None
19 |         self.count = 0
20 | 
21 |     def state_dict(self):
22 |         return {
23 |             "swa_module": self.swa_module,
24 |             "count": self.count
25 |         }
26 | 
27 |     def load_state_dict(self, state_dict):
28 |         self.swa_module = state_dict["swa_module"]
29 |         self.count = state_dict["count"]
30 | 
31 |     def get_module(self, trainer):
32 |         module = trainer.model
33 |         for attr_name in self.submodule:
34 |             module = getattr(module, attr_name)
35 |         return module
36 | 
37 |     def pre_step(self, trainer):
38 |         if self.trigger(iteration=trainer.iteration, epoch=trainer.epoch) \
39 |                 and trainer.iteration != 0:
40 |             print('SWA')
41 |             module = self.get_module(trainer)
42 |             self.count += 1
43 |             if self.swa_module is None:
44 |                 self.swa_module = module.state_dict()
45 |             else:
46 |                 r = 1 / self.count
47 |                 self.swa_module = nested_op(
48 |                     lambda x, y: (1-r) * x.to(y.device) + r * y,
49 |                     self.swa_module,
50 |                     module.state_dict()
51 |                 )
52 | 


--------------------------------------------------------------------------------
/padertorch/contrib/je/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/models/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/je/models/clf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from padertorch.base import Model
 4 | from padertorch.contrib.je.modules.conv import CNN1d
 5 | from padertorch.contrib.je.modules.features import NormalizedLogMelExtractor
 6 | from padertorch.contrib.je.modules.reduce import Mean
 7 | from torchvision.utils import make_grid
 8 | from einops import rearrange
 9 | 
10 | 
11 | class Classifier(Model):
12 |     def __init__(
13 |             self, net: CNN1d, feature_extractor=None, *,
14 |             input_key='stft', input_seq_len_key='seq_len', target_key,
15 |     ):
16 |         super().__init__()
17 |         self.net = net
18 |         self.feature_extractor = feature_extractor
19 |         self.input_key = input_key
20 |         self.input_seq_len_key = input_seq_len_key
21 |         self.target_key = target_key
22 | 
23 |     def forward(self, inputs):
24 |         x = inputs[self.input_key]
25 |         seq_len = inputs[self.input_seq_len_key]
26 |         if self.feature_extractor is not None:
27 |             x = self.feature_extractor(x, seq_len)
28 |             if x.dim() == 4 and isinstance(self.net, CNN1d):
29 |                 x = rearrange(x, 'b c f t -> b (c f) t')
30 |         return x, self.net(x, seq_len)
31 | 
32 |     def review(self, inputs, outputs):
33 |         targets = inputs[self.target_key].long()
34 |         x, (logits, seq_len) = outputs
35 |         if logits.dim() > 2 and targets.dim() == 1:
36 |             assert logits.dim() == 3, logits.shape
37 |             targets = targets.unsqueeze(-1)  # add time axis
38 |             targets = targets.expand((targets.shape[0], logits.shape[-1]))
39 |         predictions = torch.argmax(logits, dim=1)
40 |         ce = torch.nn.CrossEntropyLoss(reduction='none')(logits, targets)
41 |         ce = Mean(axis=-1)(ce, seq_len)
42 |         return dict(
43 |             loss=ce.mean(),
44 |             scalars=dict(
45 |                 predictions=predictions,
46 |                 targets=targets,
47 |             ),
48 |             histograms=dict(
49 |                 ce_=ce.flatten(),
50 |                 logits_=logits.flatten(),
51 |             ),
52 |             images=dict(
53 |                 features=x[:3],
54 |             )
55 |         )
56 | 
57 |     def modify_summary(self, summary):
58 |         if 'targets' in summary['scalars']:
59 |             targets = summary['scalars'].pop('targets')
60 |             predictions = summary['scalars'].pop('predictions')
61 |             summary['scalars']['accuracy'] = (
62 |                 np.array(predictions) == np.array(targets)
63 |             ).mean()
64 |         for key, image in summary['images'].items():
65 |             if image.dim() == 3:
66 |                 image = image.unsqueeze(1)
67 |             summary['images'][key] = make_grid(
68 |                 image.flip(2),  normalize=True, scale_each=False, nrow=1
69 |             )
70 |         summary = super().modify_summary(summary)
71 |         return summary
72 | 
73 |     @classmethod
74 |     def finalize_dogmatic_config(cls, config):
75 |         config['net']['factory'] = CNN1d
76 |         config['feature_extractor'] = {
77 |             'factory': NormalizedLogMelExtractor,
78 |         }
79 |         if config['net']['factory'] == CNN1d:
80 |             if config['feature_extractor']['factory'] == NormalizedLogMelExtractor:
81 |                 config['net']['in_channels'] = config['feature_extractor']['n_mels']
82 |         else:
83 |             raise ValueError(f'Factory {config["encoder"]["factory"]} not allowed.')
84 | 


--------------------------------------------------------------------------------
/padertorch/contrib/je/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/modules/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/je/modules/reduce.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch import nn
  4 | from padertorch.ops.sequence.mask import compute_mask
  5 | 
  6 | 
  7 | class Sum(nn.Module):
  8 |     """
  9 |     >>> seq_axis = 1
 10 |     >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis)
 11 |     >>> x = Sum(axis=seq_axis)(x, seq_len=[4,5,6])
 12 |     """
 13 |     def __init__(self, axis=-1, keepdims=False):
 14 |         self.axis = axis
 15 |         self.keepdims = keepdims
 16 |         super().__init__()
 17 | 
 18 |     def __call__(self, x, seq_len=None):
 19 |         if seq_len is None:
 20 |             x = x.sum(self.axis, keepdim=self.keepdims)
 21 |         else:
 22 |             mask = compute_mask(x, seq_len, 0, self.axis)
 23 |             x = (x * mask).sum(dim=self.axis, keepdim=self.keepdims)
 24 |         return x
 25 | 
 26 | 
 27 | class Mean(Sum):
 28 |     """
 29 |     >>> seq_axis = 1
 30 |     >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis)
 31 |     >>> x = Mean(axis=seq_axis)(x, seq_len=[4,5,6])
 32 |     >>> x.shape
 33 |     >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis)
 34 |     >>> x = Mean(axis=seq_axis, keepdims=True)(x, seq_len=[4,5,6])
 35 |     >>> x.shape
 36 |     """
 37 |     def __call__(self, x, seq_len=None):
 38 |         if seq_len is None:
 39 |             x = x.mean(self.axis, keepdim=self.keepdims)
 40 |         else:
 41 |             mask = compute_mask(x, seq_len, 0, self.axis)
 42 |             x = (x * mask).sum(dim=self.axis, keepdim=self.keepdims) / (mask.sum(dim=self.axis, keepdim=self.keepdims) + 1e-6)
 43 |         return x
 44 | 
 45 | 
 46 | class Max(nn.Module):
 47 |     """
 48 |     >>> seq_axis = 1
 49 |     >>> x = torch.cumsum(torch.ones((3,7,4)), dim=seq_axis)
 50 |     >>> Max(axis=seq_axis)(x, seq_len=[4,5,6])
 51 |     """
 52 |     def __init__(self, axis=-1, keepdims=False):
 53 |         self.axis = axis
 54 |         self.keepdims = keepdims
 55 |         super().__init__()
 56 | 
 57 |     def __call__(self, x, seq_len=None):
 58 |         if seq_len is not None:
 59 |             mask = compute_mask(x, seq_len, 0, self.axis)
 60 |             x = (x + torch.log(mask))
 61 |         x = x.max(self.axis, keepdim=self.keepdims)
 62 |         return x
 63 | 
 64 | 
 65 | class TakeLast(nn.Module):
 66 |     """
 67 |     >>> x = torch.Tensor([[[1,2,3]],[[4,5,6]]])
 68 |     >>> TakeLast()(x, [2, 3])
 69 |     tensor([[2.],
 70 |             [6.]])
 71 |     """
 72 |     def __init__(self, axis=-1, keepdims=False):
 73 |         self.axis = axis
 74 |         self.keepdims = keepdims
 75 |         super().__init__()
 76 | 
 77 |     def __call__(self, x, seq_len=None):
 78 |         axis = self.axis
 79 |         if axis < 0:
 80 |             axis = x.dim() + axis
 81 |         if axis != 1:
 82 |             assert axis > 1, axis
 83 |             x = x.unsqueeze(1).transpose(1, axis+1).squeeze(axis + 1)
 84 |         if seq_len is None:
 85 |             x = x[:, -1]
 86 |         else:
 87 |             x = x[torch.arange(x.shape[0]), np.array(seq_len) - 1]
 88 |         if self.keepdims:
 89 |             x = x.unsqueeze(self.axis)
 90 |         return x
 91 | 
 92 | 
 93 | class AutoPool(nn.Module):
 94 |     """
 95 | 
 96 |     >>> autopool = AutoPool(10)
 97 |     >>> autopool(torch.cumsum(torch.ones(4, 10, 17), dim=-1), seq_len=[17, 15, 12, 9])
 98 |     """
 99 |     def __init__(self, n_classes, alpha=1., trainable=False):
100 |         super().__init__()
101 |         self.trainable = trainable
102 |         if trainable:
103 |             self.alpha = nn.Parameter(alpha*torch.ones((n_classes, 1)))
104 |         else:
105 |             self.alpha = alpha
106 | 
107 |     def forward(self, x, seq_len=None):
108 |         x_ = self.alpha*x
109 |         if seq_len is not None:
110 |             seq_len = torch.Tensor(seq_len).to(x.device)[:, None, None]
111 |             mask = (torch.cumsum(torch.ones_like(x_), dim=-1) <= seq_len).float()
112 |             x_ = x_ * mask + torch.log(mask)
113 |         weights = nn.Softmax(dim=-1)(x_)
114 |         return (weights*x).sum(dim=-1)
115 | 


--------------------------------------------------------------------------------
/padertorch/contrib/je/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/je/tests/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/jensheit/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import *
2 | 


--------------------------------------------------------------------------------
/padertorch/contrib/jensheit/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, asdict, field
 2 | from padertorch.base import Module
 3 | from padertorch.configurable import Configurable
 4 | 
 5 | 
 6 | __all__ = [
 7 |     'Parameterized',
 8 |     'BuildingBlock',
 9 |     'dict_func'
10 | ]
11 | 
12 | def dict_func(in_dict):
13 |     return field(default_factory=lambda: in_dict)
14 | 
15 | class Parameterized(Configurable):
16 |     @dataclass
17 |     class opts:
18 |         pass
19 | 
20 |     def __init__(self, **kwargs):
21 |         super().__init__()
22 |         if 'opts' in kwargs:
23 |             self.opts = kwargs['opts']
24 |             assert hasattr(self.opts, '__dataclass_fields__')
25 |         else:
26 |             self.opts = self.opts(**kwargs)
27 | 
28 |     def __repr__(self):
29 |         return f'{type(self).__name__}:\n{str(self.opts)}'
30 | 
31 |     @classmethod
32 |     def finalize_dogmatic_config(cls, config):
33 |         for key, value in asdict(cls.opts()).items():
34 |             config[key] = value
35 | 
36 | 
37 | class BuildingBlock(Parameterized, Module):
38 |     def __init__(self, **kwargs):
39 |         super().__init__(**kwargs)
40 |         super(Parameterized).__init__()
41 |         self.build()
42 | 
43 |     def forward(self, *args, **kwargs):
44 |         raise NotImplementedError
45 | 
46 |     def build(self, *args, **kwargs):
47 |         pass
48 | 


--------------------------------------------------------------------------------
/padertorch/contrib/jensheit/evaluation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import paderbox as pb
 4 | from padercontrib.database import keys as DB_K
 5 | from paderbox.utils.numpy_utils import morph
 6 | from padertorch.data import example_to_device
 7 | from padertorch.modules.mask_estimator import MaskKeys as M_K
 8 | 
 9 | __all__ = [
10 |     'beamforming'
11 | ]
12 | 
13 | 
14 | def beamforming(observation, speech_mask, noise_mask,
15 |                 speech_image=None, noise_image=None,
16 |                 get_bf_fn=pb.speech_enhancement.get_mvdr_vector_souden):
17 |     """
18 | 
19 |     :param observation: ...xCxTxF
20 |     :param speech_mask: ...xCxTxF
21 |     :param noise_mask: ...xCxTxF
22 |     :param speech_image: ...xCxTxF
23 |     :param noise_image: ...xCxTxF
24 |     :return: predicted speech signal: ...xTxF
25 |     """
26 |     speech_mask = np.median(speech_mask, axis=-3).swapaxes(-2, -1)
27 |     noise_mask = np.median(noise_mask, axis=-3).swapaxes(-2, -1)
28 |     obs = morph('...ctf->...fct', observation)
29 |     covariance = pb.speech_enhancement.get_power_spectral_density_matrix
30 |     speech_psd = covariance(obs, speech_mask)
31 |     noise_psd = covariance(obs, noise_mask)
32 |     bf_vec = get_bf_fn(speech_psd, noise_psd)
33 |     speech_pred = pb.speech_enhancement.apply_beamforming_vector(
34 |         bf_vec, obs).swapaxes(-2, -1)
35 |     if speech_image is not None:
36 |         image_contribution = pb.speech_enhancement.apply_beamforming_vector(
37 |             bf_vec, morph('...ctf->...fct', speech_image)).swapaxes(-2, -1)
38 |     else:
39 |         image_contribution = None
40 |     if noise_image is not None:
41 |         noise_contribution = pb.speech_enhancement.apply_beamforming_vector(
42 |             bf_vec, morph('...ctf->...fct', noise_image)).swapaxes(-2, -1)
43 |     else:
44 |         noise_contribution = None
45 |     return speech_pred, image_contribution, noise_contribution
46 | 
47 | 
48 | def evaluate_masks(example, model, stft):
49 |     model_out = model(example_to_device(example))
50 |     speech_image = example[DB_K.SPEECH_IMAGE][0]
51 |     speech_pred, image_cont, noise_cont = beamforming(
52 |         example[M_K.OBSERVATION_STFT][0],
53 |         model_out[M_K.SPEECH_MASK_PRED][0].detach().numpy(),
54 |         model_out[M_K.NOISE_MASK_PRED][0].detach().numpy(),
55 |         stft(speech_image),
56 |         stft(example[DB_K.NOISE_IMAGE][0])
57 |     )
58 |     ex_id = example[DB_K.EXAMPLE_ID][0]
59 |     pesq = pb.evaluation.pesq(example[DB_K.SPEECH_IMAGE][0][0],
60 |                               stft.inverse(speech_pred))[0]
61 |     snr = np.mean(-10 * np.log10(np.abs(image_cont) ** 2
62 |                                  / np.abs(noise_cont) ** 2))
63 |     print(ex_id, snr, pesq)
64 |     return ex_id, snr, pesq
65 | 


--------------------------------------------------------------------------------
/padertorch/contrib/jensheit/mask_estimator_example/__init__.py:
--------------------------------------------------------------------------------
1 | from .modul import MaskEstimator
2 | from .model import MaskEstimatorModel


--------------------------------------------------------------------------------
/padertorch/contrib/jensheit/norm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code is adapted version of https://github.com/funcwj/conv-tasnet
  3 | """
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from einops import rearrange
  8 | 
  9 | 
 10 | class TransposedLayerNorm(nn.LayerNorm):
 11 |     """
 12 |     Channel wise layer normalization
 13 |     >>> norm = TransposedLayerNorm(256)
 14 |     >>> norm(torch.rand(5, 256, 343)).shape
 15 |     torch.Size([5, 256, 343])
 16 |     """
 17 | 
 18 |     def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
 19 |         super().__init__(normalized_shape, eps, elementwise_affine)
 20 | 
 21 |     def forward(self, x):
 22 |         """
 23 |         x: N x F x T
 24 |         """
 25 |         if x.dim() != 3:
 26 |             raise RuntimeError("{} accept 3D tensor as input".format(
 27 |                 self.__name__))
 28 |         x = rearrange(x, 'n f t -> n t f')
 29 |         # LN
 30 |         x = super().forward(x)
 31 |         x = rearrange(x, 'n t f -> n f t')
 32 |         return x
 33 | 
 34 | class GlobalChannelLayerNorm(nn.Module):
 35 |     """
 36 |     Global channel layer normalization
 37 | 
 38 |     >>> norm = GlobalChannelLayerNorm(256)
 39 |     >>> norm(torch.rand(5, 256, 343)).shape
 40 |     torch.Size([5, 256, 343])
 41 |     """
 42 | 
 43 |     def __init__(self, dim, eps=1e-05, elementwise_affine=True):
 44 |         super().__init__()
 45 |         self.eps = eps
 46 |         self.normalized_dim = dim
 47 |         self.elementwise_affine = elementwise_affine
 48 |         if elementwise_affine:
 49 |             self.beta = nn.Parameter(torch.zeros(dim, 1))
 50 |             self.gamma = nn.Parameter(torch.ones(dim, 1))
 51 |         else:
 52 |             self.register_parameter("weight", None)
 53 |             self.register_parameter("bias", None)
 54 | 
 55 |     def forward(self, x):
 56 |         """
 57 |         x: N x F x T
 58 |         """
 59 |         if x.dim() != 3:
 60 |             raise RuntimeError("{} accept 3D tensor as input".format(
 61 |                 self.__name__))
 62 |         # N x 1 x 1
 63 |         mean = torch.mean(x, (1, 2), keepdim=True)
 64 |         var = torch.mean((x - mean)**2, (1, 2), keepdim=True)
 65 |         # N x T x F
 66 |         if self.elementwise_affine:
 67 |             x = self.gamma * (x - mean) / torch.sqrt(var + self.eps) + self.beta
 68 |         else:
 69 |             x = (x - mean) / torch.sqrt(var + self.eps)
 70 |         return x
 71 | 
 72 |     def extra_repr(self):
 73 |         return "{normalized_dim}, eps={eps}, " \
 74 |             "elementwise_affine={elementwise_affine}".format(**self.__dict__)
 75 | 
 76 | 
 77 | def build_norm(norm, dim):
 78 |     """
 79 |     Build normalize layer
 80 |     LN cost more memory than BN
 81 | 
 82 |     >>> norm = build_norm('cLN', 256)
 83 |     >>> norm(torch.rand(5, 256, 343)).shape
 84 |     torch.Size([5, 256, 343])
 85 | 
 86 |     >>> norm = build_norm('gLN', 256)
 87 |     >>> norm(torch.rand(5, 256, 343)).shape
 88 |     torch.Size([5, 256, 343])
 89 | 
 90 |     >>> norm = build_norm('BN', 256)
 91 |     >>> norm(torch.rand(5, 256, 343)).shape
 92 |     torch.Size([5, 256, 343])
 93 |     """
 94 |     if norm not in ["cLN", "gLN", "BN"]:
 95 |         raise RuntimeError("Unsupported normalize layer: {}".format(norm))
 96 |     if norm == "cLN":
 97 |         return TransposedLayerNorm(dim, elementwise_affine=True)
 98 |     elif norm == "BN":
 99 |         return nn.BatchNorm1d(dim)
100 |     else:
101 |         return GlobalChannelLayerNorm(dim, elementwise_affine=True)


--------------------------------------------------------------------------------
/padertorch/contrib/jensheit/tests/test_mask_estimator.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import padertorch as pt
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | K = pt.modules.mask_estimator.MaskKeys
  7 | 
  8 | 
  9 | class TestMaskEstimatorModel(unittest.TestCase):
 10 |     # TODO: Test forward deterministic if not train
 11 |     C = 4
 12 | 
 13 |     def setUp(self):
 14 |         self.model_class= pt.models.mask_estimator.MaskEstimatorModel
 15 |         self.model = self.model_class.from_config(
 16 |             self.model_class.get_config())
 17 |         self.T = 100
 18 |         self.B = 4
 19 |         self.F = 513
 20 |         self.num_frames = [100, 90, 80, 70]
 21 |         self.inputs = {
 22 |             K.OBSERVATION_ABS: [
 23 |                 np.abs(np.random.normal(
 24 |                     size=(self.C, num_frames_, self.F)
 25 |                 )).astype(np.float32)
 26 |                 for num_frames_ in self.num_frames
 27 |             ],
 28 |             K.SPEECH_MASK_TARGET: [
 29 |                 np.abs(np.random.choice(
 30 |                     [0, 1],
 31 |                     size=(self.C, num_frames_, self.F)
 32 |                 )).astype(np.float32)
 33 |                 for num_frames_ in self.num_frames
 34 |             ],
 35 |             K.NOISE_MASK_TARGET: [
 36 |                 np.abs(np.random.choice(
 37 |                     [0, 1],
 38 |                     size=(self.C, num_frames_, self.F)
 39 |                 )).astype(np.float32)
 40 |                 for num_frames_ in self.num_frames
 41 |             ],
 42 |             K.NUM_FRAMES: [num_frames for num_frames in self.num_frames],
 43 |         }
 44 | 
 45 |     def test_signature(self):
 46 |         assert callable(getattr(self.model, 'forward', None))
 47 |         assert callable(getattr(self.model, 'review', None))
 48 | 
 49 |     def test_forward(self):
 50 |         inputs = pt.data.example_to_device(self.inputs)
 51 |         model_out = self.model(inputs)
 52 |         for mask, num_frames in zip(model_out[K.SPEECH_MASK_PRED],
 53 |                                     self.num_frames):
 54 |             expected_shape = (self.C, num_frames, self.F)
 55 |             assert mask.shape == expected_shape, mask.shape
 56 |         for mask, num_frames in zip(model_out[K.SPEECH_MASK_LOGITS],
 57 |                                     self.num_frames):
 58 |             expected_shape = (self.C, num_frames, self.F)
 59 |             assert mask.shape == expected_shape, mask.shape
 60 | 
 61 |     def test_review(self):
 62 |         inputs = pt.data.example_to_device(self.inputs)
 63 |         mask = self.model(inputs)
 64 |         review = self.model.review(inputs, mask)
 65 | 
 66 |         assert 'loss' in review, review.keys()
 67 |         assert 'loss' not in review['scalars'], review['scalars'].keys()
 68 | 
 69 |     def test_minibatch_equal_to_single_example(self):
 70 |         inputs = pt.data.example_to_device(self.inputs)
 71 |         model = self.model
 72 |         model.eval()
 73 |         mask = model(inputs)
 74 |         review = model.review(inputs, mask)
 75 |         actual_loss = review['loss']
 76 | 
 77 |         reference_loss = list()
 78 | 
 79 |         for observation, target_mask, noise_mask in zip(
 80 |             self.inputs[K.OBSERVATION_ABS],
 81 |             self.inputs[K.SPEECH_MASK_TARGET],
 82 |             self.inputs[K.NOISE_MASK_TARGET],
 83 |         ):
 84 |             inputs = {
 85 |                 K.OBSERVATION_ABS: [observation],
 86 |                 K.SPEECH_MASK_TARGET: [target_mask],
 87 |                 K.NOISE_MASK_TARGET: [noise_mask],
 88 |                 K.NUM_FRAMES: [observation.shape[1]]
 89 |             }
 90 |             inputs = pt.data.example_to_device(inputs)
 91 |             mask = model(inputs)
 92 |             review = model.review(inputs, mask)
 93 |             reference_loss.append(review['loss'])
 94 | 
 95 |         reference_loss = torch.sum(torch.stack(reference_loss))
 96 | 
 97 |         np.testing.assert_allclose(
 98 |             actual_loss.detach().numpy(),
 99 |             reference_loss.detach().numpy(),
100 |             atol=1e-3
101 |         )
102 | 
103 | 
104 | class TestMaskEstimatorSingleChannelModel(TestMaskEstimatorModel):
105 |     C = 1
106 | 


--------------------------------------------------------------------------------
/padertorch/contrib/jensheit/utils.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from warnings import warn
 3 | 
 4 | from paderbox.io import load_json
 5 | from paderbox.utils.nested import flatten
 6 | from padertorch.configurable import class_to_str
 7 | 
 8 | 
 9 | def dict_compare(d1, d2):
10 |     # From http://stackoverflow.com/questions/4527942/comparing-two-dictionaries-in-python
11 |     d1_keys = set(d1.keys())
12 |     d2_keys = set(d2.keys())
13 |     intersect_keys = d1_keys.intersection(d2_keys)
14 |     added = d1_keys - d2_keys
15 |     removed = d2_keys - d1_keys
16 | 
17 |     # Init differs from defaults:
18 |     modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]}
19 | 
20 |     same = set(o for o in intersect_keys if d1[o] == d2[o])
21 |     are_equal = not len(added) and not len(removed) and not len(modified)
22 |     return added, removed, modified, same, are_equal
23 | 
24 | 
25 | def compare_configs(storage_dir, trainer_opts, provider_opts):
26 |     opts = flatten(trainer_opts)
27 |     opts.update(flatten(provider_opts))
28 |     init = load_json(Path(storage_dir) / 'init.json')
29 | 
30 |     added, removed, modified, _, _ = dict_compare(opts, init)
31 |     if len(added):
32 |         warn(
33 |             f'The following options were added to the model: {added}'
34 |         )
35 |     if len(removed):
36 |         warn(
37 |             f'The following options were removed from the model: {removed}'
38 |         )
39 | 
40 |     return init['trainer_opts'], init['provider_opts']
41 | 
42 | 
43 | def get_experiment_name(model_opts, submodel=None):
44 |     model_name = class_to_str(model_opts["factory"])
45 |     assert isinstance(model_name, str), (model_name, type(model_name))
46 |     model_name = model_name.split('.')[-1]
47 |     if submodel is not None:
48 |         sub_name = class_to_str(model_opts[submodel]["factory"])
49 |         assert isinstance(sub_name, str), (sub_name, type(sub_name))
50 |         sep_name = sub_name.split('.')[-1]
51 |     else:
52 |         sep_name = 'baseline'
53 |     ex_name = f'{model_name}/{sep_name}'
54 |     return ex_name
55 | 


--------------------------------------------------------------------------------
/padertorch/contrib/ldrude/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/ldrude/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/ldrude/data.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import einops
 4 | import numpy as np
 5 | import padertorch as pt
 6 | from padercontrib.database.iterator import AudioReader
 7 | from padercontrib.database.keys import *
 8 | from paderbox.transform import stft
 9 | from pb_bss.extraction import ideal_binary_mask
10 | 
11 | 
12 | def pre_batch_transform(inputs, return_keys=None):
13 |     s = inputs['audio_data']['speech_source']
14 |     y = inputs['audio_data']['observation']
15 |     S = stft(s, 512, 128)
16 |     Y = stft(y, 512, 128)
17 |     Y = einops.rearrange(Y, 't f -> t f')
18 |     S = einops.rearrange(S, 'k t f -> t k f')
19 |     X = S  # Same for MERL database
20 |     num_frames = Y.shape[0]
21 | 
22 |     return_dict = dict()
23 | 
24 |     def maybe_add(key, value):
25 |         if return_keys is None or key in return_keys:
26 |             return_dict[key] = value
27 | 
28 |     maybe_add('example_id', inputs['example_id'])
29 |     maybe_add('s', np.ascontiguousarray(s, np.float32))
30 |     maybe_add('y', np.ascontiguousarray(y, np.float32))
31 |     maybe_add('Y', np.ascontiguousarray(Y, np.complex64))
32 |     maybe_add('X_abs', np.ascontiguousarray(np.abs(X), np.float32))
33 |     maybe_add('Y_abs', np.ascontiguousarray(np.abs(Y), np.float32))
34 |     maybe_add('num_frames', num_frames)
35 |     maybe_add('cos_phase_difference', np.ascontiguousarray(
36 |         np.cos(np.angle(Y[:, None, :]) - np.angle(X)), np.float32)
37 |     )
38 | 
39 |     if return_keys is None or 'target_mask' in return_keys:
40 |         return_dict['target_mask'] = np.ascontiguousarray(
41 |             ideal_binary_mask(S, source_axis=-2), np.float32
42 |         )
43 | 
44 |     return return_dict
45 | 
46 | 
47 | def post_batch_transform(batch):
48 |     return batch
49 | 
50 | 
51 | def prepare_iterable(
52 |         db, dataset: str, batch_size, return_keys=None, prefetch=True,
53 |         iterator_slice=None
54 | ):
55 |     audio_keys = [OBSERVATION, SPEECH_SOURCE]
56 |     audio_reader = AudioReader(audio_keys=audio_keys, read_fn=db.read_fn)
57 |     iterator = db.get_iterator_by_names(dataset)
58 | 
59 |     if iterator_slice is not None:
60 |         iterator = iterator[iterator_slice]
61 | 
62 |     iterator = (
63 |         iterator
64 |         .map(audio_reader)
65 |         .map(partial(pre_batch_transform, return_keys=return_keys))
66 |         .shuffle(reshuffle=False)
67 |         .batch(batch_size)
68 |         .map(lambda batch: sorted(
69 |             batch,
70 |             key=lambda example: example["num_frames"],
71 |             reverse=True,
72 |         ))
73 |         .map(pt.data.utils.collate_fn)
74 |         .map(post_batch_transform)
75 |         .tile(reps=50, shuffle=True)  # Simulates reshuffle to some degree
76 |     )
77 | 
78 |     if prefetch:
79 |         iterator = iterator.prefetch(4, 8)
80 | 
81 |     return iterator
82 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/mk/io.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from typing import List
 4 | 
 5 | 
 6 | # https://stackoverflow.com/a/59803793/16085876
 7 | def run_fast_scandir(dir: Path, ext: List[str]):
 8 |     subfolders, files = [], []
 9 | 
10 |     for f in os.scandir(dir):
11 |         if f.is_dir():
12 |             subfolders.append(f.path)
13 |         if f.is_file():
14 |             if os.path.splitext(f.name)[1].lower() in ext:
15 |                 files.append(Path(f.path))
16 | 
17 | 
18 |     for dir in list(subfolders):
19 |         sf, f = run_fast_scandir(dir, ext)
20 |         subfolders.extend(sf)
21 |         files.extend(f)
22 |     return subfolders, files
23 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/modules/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/mk/modules/features/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/modules/features/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/mk/modules/features/ssl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/modules/features/ssl/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/__init__.py:
--------------------------------------------------------------------------------
1 | from .vocoder import Vocoder
2 | from .parametric import fast_griffin_lim, FGLA
3 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/base.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | from functools import partial
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from paderbox.transform.module_resample import resample_sox
 7 | import padertorch as pt
 8 | 
 9 | 
10 | class Synthesis(pt.Configurable):
11 |     sampling_rate: int
12 | 
13 |     def __init__(
14 |         self,
15 |         postprocessing: typing.Optional[typing.Callable] = None,
16 |     ):
17 |         super().__init__()
18 |         self.postprocessing = postprocessing
19 | 
20 |     def __call__(
21 |         self,
22 |         time_signal: typing.Union[
23 |             np.ndarray, torch.Tensor, typing.List[np.ndarray],
24 |             typing.List[torch.Tensor]
25 |         ],
26 |         target_sampling_rate: typing.Optional[int] = None,
27 |     ) -> typing.Union[
28 |         np.ndarray, torch.Tensor, typing.List[np.ndarray],
29 |         typing.List[torch.Tensor]
30 |     ]:
31 |         if self.postprocessing is not None:
32 |             if isinstance(time_signal, list) or time_signal.ndim == 2:
33 |                 time_signal = list(map(self.postprocessing, time_signal))
34 |             else:
35 |                 time_signal = self.postprocessing(time_signal)
36 |         return self.resample(time_signal, target_sampling_rate)
37 | 
38 |     def _resample(
39 |         self,
40 |         wav: typing.Union[np.ndarray, torch.Tensor],
41 |         target_sampling_rate: typing.Optional[int] = None,
42 |     ) -> typing.Union[np.ndarray, torch.Tensor]:
43 |         to_torch = False
44 |         if (
45 |             target_sampling_rate is None
46 |             or target_sampling_rate == self.sampling_rate
47 |         ):
48 |             return wav
49 |         if isinstance(wav, torch.Tensor):
50 |             to_torch = True
51 |             wav = pt.utils.to_numpy(wav, detach=True)
52 |         wav = resample_sox(
53 |             wav,
54 |             in_rate=self.sampling_rate,
55 |             out_rate=target_sampling_rate
56 |         )
57 |         if to_torch:
58 |             wav = torch.from_numpy(wav)
59 |         return wav
60 | 
61 |     def resample(
62 |         self,
63 |         wav: typing.Union[
64 |             np.ndarray, torch.Tensor, typing.List[np.ndarray],
65 |             typing.List[torch.Tensor]
66 |         ],
67 |         target_sampling_rate: typing.Optional[int] = None,
68 |     ) -> typing.Union[
69 |         np.ndarray, torch.Tensor, typing.List[np.ndarray],
70 |         typing.List[torch.Tensor]
71 |     ]:
72 |         if isinstance(wav, list) or wav.ndim == 2:
73 |             wav = list(map(
74 |                 partial(
75 |                     self._resample, target_sampling_rate=target_sampling_rate
76 |                 ), wav
77 |             ))
78 |             try:
79 |                 m = np if isinstance(wav[0], np.ndarray) else torch
80 |                 wav = m.stack(wav)
81 |             except (ValueError, RuntimeError):
82 |                 pass
83 |             return wav
84 |         return self._resample(wav, target_sampling_rate=target_sampling_rate)
85 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/parametric/__init__.py:
--------------------------------------------------------------------------------
1 | from .griffin_lim import fast_griffin_lim, FGLA
2 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .pwg import Vocoder
2 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/__init__.py:
--------------------------------------------------------------------------------
1 | # Copied from https://github.com/NVIDIA/BigVGAN/tree/main
2 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/activation1d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 NVIDIA CORPORATION.
 2 | #   Licensed under the MIT license.
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | from ..torch.resample import UpSample1d, DownSample1d
 7 | 
 8 | # load fused CUDA kernel: this enables importing anti_alias_activation_cuda
 9 | from .load import load
10 | 
11 | anti_alias_activation_cuda = load()
12 | 
13 | 
14 | class FusedAntiAliasActivation(torch.autograd.Function):
15 |     """
16 |     Assumes filter size 12, replication padding on upsampling/downsampling, and logscale alpha/beta parameters as inputs.
17 |     The hyperparameters are hard-coded in the kernel to maximize speed.
18 |     NOTE: The fused kenrel is incorrect for Activation1d with different hyperparameters.
19 |     """
20 | 
21 |     @staticmethod
22 |     def forward(ctx, inputs, up_ftr, down_ftr, alpha, beta):
23 |         activation_results = anti_alias_activation_cuda.forward(
24 |             inputs, up_ftr, down_ftr, alpha, beta
25 |         )
26 | 
27 |         return activation_results
28 | 
29 |     @staticmethod
30 |     def backward(ctx, output_grads):
31 |         raise NotImplementedError
32 |         return output_grads, None, None
33 | 
34 | 
35 | class Activation1d(nn.Module):
36 |     def __init__(
37 |         self,
38 |         activation,
39 |         up_ratio: int = 2,
40 |         down_ratio: int = 2,
41 |         up_kernel_size: int = 12,
42 |         down_kernel_size: int = 12,
43 |         fused: bool = True,
44 |     ):
45 |         super().__init__()
46 |         self.up_ratio = up_ratio
47 |         self.down_ratio = down_ratio
48 |         self.act = activation
49 |         self.upsample = UpSample1d(up_ratio, up_kernel_size)
50 |         self.downsample = DownSample1d(down_ratio, down_kernel_size)
51 | 
52 |         self.fused = fused  # Whether to use fused CUDA kernel or not
53 | 
54 |     def forward(self, x):
55 |         if not self.fused:
56 |             x = self.upsample(x)
57 |             x = self.act(x)
58 |             x = self.downsample(x)
59 |             return x
60 |         else:
61 |             if self.act.__class__.__name__ == "Snake":
62 |                 beta = self.act.alpha.data  # Snake uses same params for alpha and beta
63 |             else:
64 |                 beta = (
65 |                     self.act.beta.data
66 |                 )  # Snakebeta uses different params for alpha and beta
67 |             alpha = self.act.alpha.data
68 |             if (
69 |                 not self.act.alpha_logscale
70 |             ):  # Exp baked into cuda kernel, cancel it out with a log
71 |                 alpha = torch.log(alpha)
72 |                 beta = torch.log(beta)
73 | 
74 |             x = FusedAntiAliasActivation.apply(
75 |                 x, self.upsample.filter, self.downsample.lowpass.filter, alpha, beta
76 |             )
77 |             return x
78 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/anti_alias_activation.cpp:
--------------------------------------------------------------------------------
 1 | /* coding=utf-8
 2 |  * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 |  #include <torch/extension.h>
18 | 
19 | extern "C" torch::Tensor fwd_cuda(torch::Tensor const &input, torch::Tensor const &up_filter, torch::Tensor const &down_filter, torch::Tensor const &alpha, torch::Tensor const &beta);
20 | 
21 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
22 |     m.def("forward", &fwd_cuda, "Anti-Alias Activation forward (CUDA)");
23 | }


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/compat.h:
--------------------------------------------------------------------------------
 1 | /* coding=utf-8
 2 |  * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /*This code is copied fron NVIDIA apex:
18 |  *     https://github.com/NVIDIA/apex
19 |  *     with minor changes. */
20 | 
21 | #ifndef TORCH_CHECK
22 | #define TORCH_CHECK AT_CHECK
23 | #endif
24 | 
25 | #ifdef VERSION_GE_1_3
26 | #define DATA_PTR data_ptr
27 | #else
28 | #define DATA_PTR data
29 | #endif
30 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/cuda/load.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 NVIDIA CORPORATION.
 2 | #   Licensed under the MIT license.
 3 | 
 4 | import os
 5 | import pathlib
 6 | import subprocess
 7 | 
 8 | from torch.utils import cpp_extension
 9 | 
10 | """
11 | Setting this param to a list has a problem of generating different compilation commands (with diferent order of architectures) and leading to recompilation of fused kernels. 
12 | Set it to empty stringo avoid recompilation and assign arch flags explicity in extra_cuda_cflags below
13 | """
14 | os.environ["TORCH_CUDA_ARCH_LIST"] = ""
15 | 
16 | 
17 | def load():
18 |     # Check if cuda 11 is installed for compute capability 8.0
19 |     cc_flag = []
20 |     _, bare_metal_major, _ = _get_cuda_bare_metal_version(cpp_extension.CUDA_HOME)
21 |     if int(bare_metal_major) >= 11:
22 |         cc_flag.append("-gencode")
23 |         cc_flag.append("arch=compute_80,code=sm_80")
24 | 
25 |     # Build path
26 |     srcpath = pathlib.Path(__file__).parent.absolute()
27 |     buildpath = srcpath / "build"
28 |     _create_build_dir(buildpath)
29 | 
30 |     # Helper function to build the kernels.
31 |     def _cpp_extention_load_helper(name, sources, extra_cuda_flags):
32 |         return cpp_extension.load(
33 |             name=name,
34 |             sources=sources,
35 |             build_directory=buildpath,
36 |             extra_cflags=[
37 |                 "-O3",
38 |             ],
39 |             extra_cuda_cflags=[
40 |                 "-O3",
41 |                 "-gencode",
42 |                 "arch=compute_70,code=sm_70",
43 |                 "--use_fast_math",
44 |             ]
45 |             + extra_cuda_flags
46 |             + cc_flag,
47 |             verbose=True,
48 |         )
49 | 
50 |     extra_cuda_flags = [
51 |         "-U__CUDA_NO_HALF_OPERATORS__",
52 |         "-U__CUDA_NO_HALF_CONVERSIONS__",
53 |         "--expt-relaxed-constexpr",
54 |         "--expt-extended-lambda",
55 |     ]
56 | 
57 |     sources = [
58 |         srcpath / "anti_alias_activation.cpp",
59 |         srcpath / "anti_alias_activation_cuda.cu",
60 |     ]
61 |     anti_alias_activation_cuda = _cpp_extention_load_helper(
62 |         "anti_alias_activation_cuda", sources, extra_cuda_flags
63 |     )
64 | 
65 |     return anti_alias_activation_cuda
66 | 
67 | 
68 | def _get_cuda_bare_metal_version(cuda_dir):
69 |     raw_output = subprocess.check_output(
70 |         [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True
71 |     )
72 |     output = raw_output.split()
73 |     release_idx = output.index("release") + 1
74 |     release = output[release_idx].split(".")
75 |     bare_metal_major = release[0]
76 |     bare_metal_minor = release[1][0]
77 | 
78 |     return raw_output, bare_metal_major, bare_metal_minor
79 | 
80 | 
81 | def _create_build_dir(buildpath):
82 |     try:
83 |         os.mkdir(buildpath)
84 |     except OSError:
85 |         if not os.path.isdir(buildpath):
86 |             print(f"Creation of the build directory {buildpath} failed")
87 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/__init__.py:
--------------------------------------------------------------------------------
1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
2 | #   LICENSE is in incl_licenses directory.
3 | 
4 | from .filter import *
5 | from .resample import *
6 | from .act import *
7 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/act.py:
--------------------------------------------------------------------------------
 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
 2 | #   LICENSE is in incl_licenses directory.
 3 | 
 4 | import torch.nn as nn
 5 | from .resample import UpSample1d, DownSample1d
 6 | 
 7 | 
 8 | class Activation1d(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         activation,
12 |         up_ratio: int = 2,
13 |         down_ratio: int = 2,
14 |         up_kernel_size: int = 12,
15 |         down_kernel_size: int = 12,
16 |     ):
17 |         super().__init__()
18 |         self.up_ratio = up_ratio
19 |         self.down_ratio = down_ratio
20 |         self.act = activation
21 |         self.upsample = UpSample1d(up_ratio, up_kernel_size)
22 |         self.downsample = DownSample1d(down_ratio, down_kernel_size)
23 | 
24 |     # x: [B,C,T]
25 |     def forward(self, x):
26 |         x = self.upsample(x)
27 |         x = self.act(x)
28 |         x = self.downsample(x)
29 | 
30 |         return x
31 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/filter.py:
--------------------------------------------------------------------------------
  1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
  2 | #   LICENSE is in incl_licenses directory.
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import math
  8 | 
  9 | if "sinc" in dir(torch):
 10 |     sinc = torch.sinc
 11 | else:
 12 |     # This code is adopted from adefossez's julius.core.sinc under the MIT License
 13 |     # https://adefossez.github.io/julius/julius/core.html
 14 |     #   LICENSE is in incl_licenses directory.
 15 |     def sinc(x: torch.Tensor):
 16 |         """
 17 |         Implementation of sinc, i.e. sin(pi * x) / (pi * x)
 18 |         __Warning__: Different to julius.sinc, the input is multiplied by `pi`!
 19 |         """
 20 |         return torch.where(
 21 |             x == 0,
 22 |             torch.tensor(1.0, device=x.device, dtype=x.dtype),
 23 |             torch.sin(math.pi * x) / math.pi / x,
 24 |         )
 25 | 
 26 | 
 27 | # This code is adopted from adefossez's julius.lowpass.LowPassFilters under the MIT License
 28 | # https://adefossez.github.io/julius/julius/lowpass.html
 29 | #   LICENSE is in incl_licenses directory.
 30 | def kaiser_sinc_filter1d(
 31 |     cutoff, half_width, kernel_size
 32 | ):  # return filter [1,1,kernel_size]
 33 |     even = kernel_size % 2 == 0
 34 |     half_size = kernel_size // 2
 35 | 
 36 |     # For kaiser window
 37 |     delta_f = 4 * half_width
 38 |     A = 2.285 * (half_size - 1) * math.pi * delta_f + 7.95
 39 |     if A > 50.0:
 40 |         beta = 0.1102 * (A - 8.7)
 41 |     elif A >= 21.0:
 42 |         beta = 0.5842 * (A - 21) ** 0.4 + 0.07886 * (A - 21.0)
 43 |     else:
 44 |         beta = 0.0
 45 |     window = torch.kaiser_window(kernel_size, beta=beta, periodic=False)
 46 | 
 47 |     # ratio = 0.5/cutoff -> 2 * cutoff = 1 / ratio
 48 |     if even:
 49 |         time = torch.arange(-half_size, half_size) + 0.5
 50 |     else:
 51 |         time = torch.arange(kernel_size) - half_size
 52 |     if cutoff == 0:
 53 |         filter_ = torch.zeros_like(time)
 54 |     else:
 55 |         filter_ = 2 * cutoff * window * sinc(2 * cutoff * time)
 56 |         """
 57 |         Normalize filter to have sum = 1, otherwise we will have a small leakage of the constant component in the input signal.
 58 |         """
 59 |         filter_ /= filter_.sum()
 60 |         filter = filter_.view(1, 1, kernel_size)
 61 | 
 62 |     return filter
 63 | 
 64 | 
 65 | class LowPassFilter1d(nn.Module):
 66 |     def __init__(
 67 |         self,
 68 |         cutoff=0.5,
 69 |         half_width=0.6,
 70 |         stride: int = 1,
 71 |         padding: bool = True,
 72 |         padding_mode: str = "replicate",
 73 |         kernel_size: int = 12,
 74 |     ):
 75 |         """
 76 |         kernel_size should be even number for stylegan3 setup, in this implementation, odd number is also possible.
 77 |         """
 78 |         super().__init__()
 79 |         if cutoff < -0.0:
 80 |             raise ValueError("Minimum cutoff must be larger than zero.")
 81 |         if cutoff > 0.5:
 82 |             raise ValueError("A cutoff above 0.5 does not make sense.")
 83 |         self.kernel_size = kernel_size
 84 |         self.even = kernel_size % 2 == 0
 85 |         self.pad_left = kernel_size // 2 - int(self.even)
 86 |         self.pad_right = kernel_size // 2
 87 |         self.stride = stride
 88 |         self.padding = padding
 89 |         self.padding_mode = padding_mode
 90 |         filter = kaiser_sinc_filter1d(cutoff, half_width, kernel_size)
 91 |         self.register_buffer("filter", filter)
 92 | 
 93 |     # Input [B, C, T]
 94 |     def forward(self, x):
 95 |         _, C, _ = x.shape
 96 | 
 97 |         if self.padding:
 98 |             x = F.pad(x, (self.pad_left, self.pad_right), mode=self.padding_mode)
 99 |         out = F.conv1d(x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C)
100 | 
101 |         return out
102 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/alias_free_activation/torch/resample.py:
--------------------------------------------------------------------------------
 1 | # Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
 2 | #   LICENSE is in incl_licenses directory.
 3 | 
 4 | import torch.nn as nn
 5 | from torch.nn import functional as F
 6 | 
 7 | from .filter import LowPassFilter1d
 8 | from .filter import kaiser_sinc_filter1d
 9 | 
10 | 
11 | class UpSample1d(nn.Module):
12 |     def __init__(self, ratio=2, kernel_size=None):
13 |         super().__init__()
14 |         self.ratio = ratio
15 |         self.kernel_size = (
16 |             int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
17 |         )
18 |         self.stride = ratio
19 |         self.pad = self.kernel_size // ratio - 1
20 |         self.pad_left = self.pad * self.stride + (self.kernel_size - self.stride) // 2
21 |         self.pad_right = (
22 |             self.pad * self.stride + (self.kernel_size - self.stride + 1) // 2
23 |         )
24 |         filter = kaiser_sinc_filter1d(
25 |             cutoff=0.5 / ratio, half_width=0.6 / ratio, kernel_size=self.kernel_size
26 |         )
27 |         self.register_buffer("filter", filter)
28 | 
29 |     # x: [B, C, T]
30 |     def forward(self, x):
31 |         _, C, _ = x.shape
32 | 
33 |         x = F.pad(x, (self.pad, self.pad), mode="replicate")
34 |         x = self.ratio * F.conv_transpose1d(
35 |             x, self.filter.expand(C, -1, -1), stride=self.stride, groups=C
36 |         )
37 |         x = x[..., self.pad_left : -self.pad_right]
38 | 
39 |         return x
40 | 
41 | 
42 | class DownSample1d(nn.Module):
43 |     def __init__(self, ratio=2, kernel_size=None):
44 |         super().__init__()
45 |         self.ratio = ratio
46 |         self.kernel_size = (
47 |             int(6 * ratio // 2) * 2 if kernel_size is None else kernel_size
48 |         )
49 |         self.lowpass = LowPassFilter1d(
50 |             cutoff=0.5 / ratio,
51 |             half_width=0.6 / ratio,
52 |             stride=ratio,
53 |             kernel_size=self.kernel_size,
54 |         )
55 | 
56 |     def forward(self, x):
57 |         xx = self.lowpass(x)
58 | 
59 |         return xx
60 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/env.py:
--------------------------------------------------------------------------------
 1 | # Adapted from https://github.com/jik876/hifi-gan under the MIT license.
 2 | #   LICENSE is in incl_licenses directory.
 3 | 
 4 | import os
 5 | import shutil
 6 | 
 7 | 
 8 | class AttrDict(dict):
 9 |     def __init__(self, *args, **kwargs):
10 |         super(AttrDict, self).__init__(*args, **kwargs)
11 |         self.__dict__ = self
12 | 
13 | 
14 | def build_env(config, config_name, path):
15 |     t_path = os.path.join(path, config_name)
16 |     if config != t_path:
17 |         os.makedirs(path, exist_ok=True)
18 |         shutil.copyfile(config, os.path.join(path, config_name))
19 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/synthesis/vocoder/nvidia_bigvgan/utils.py:
--------------------------------------------------------------------------------
  1 | # Adapted from https://github.com/jik876/hifi-gan under the MIT license.
  2 | #   LICENSE is in incl_licenses directory.
  3 | 
  4 | import glob
  5 | import os
  6 | import matplotlib
  7 | import torch
  8 | from torch.nn.utils import weight_norm
  9 | 
 10 | matplotlib.use("Agg")
 11 | import matplotlib.pylab as plt
 12 | from scipy.io.wavfile import write
 13 | 
 14 | from .meldataset import MAX_WAV_VALUE
 15 | 
 16 | 
 17 | def plot_spectrogram(spectrogram):
 18 |     fig, ax = plt.subplots(figsize=(10, 2))
 19 |     im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
 20 |     plt.colorbar(im, ax=ax)
 21 | 
 22 |     fig.canvas.draw()
 23 |     plt.close()
 24 | 
 25 |     return fig
 26 | 
 27 | 
 28 | def plot_spectrogram_clipped(spectrogram, clip_max=2.0):
 29 |     fig, ax = plt.subplots(figsize=(10, 2))
 30 |     im = ax.imshow(
 31 |         spectrogram,
 32 |         aspect="auto",
 33 |         origin="lower",
 34 |         interpolation="none",
 35 |         vmin=1e-6,
 36 |         vmax=clip_max,
 37 |     )
 38 |     plt.colorbar(im, ax=ax)
 39 | 
 40 |     fig.canvas.draw()
 41 |     plt.close()
 42 | 
 43 |     return fig
 44 | 
 45 | 
 46 | def init_weights(m, mean=0.0, std=0.01):
 47 |     classname = m.__class__.__name__
 48 |     if classname.find("Conv") != -1:
 49 |         m.weight.data.normal_(mean, std)
 50 | 
 51 | 
 52 | def apply_weight_norm(m):
 53 |     classname = m.__class__.__name__
 54 |     if classname.find("Conv") != -1:
 55 |         weight_norm(m)
 56 | 
 57 | 
 58 | def get_padding(kernel_size, dilation=1):
 59 |     return int((kernel_size * dilation - dilation) / 2)
 60 | 
 61 | 
 62 | def load_checkpoint(filepath, device):
 63 |     assert os.path.isfile(filepath)
 64 |     print(f"Loading '{filepath}'")
 65 |     checkpoint_dict = torch.load(filepath, map_location=device)
 66 |     print("Complete.")
 67 |     return checkpoint_dict
 68 | 
 69 | 
 70 | def save_checkpoint(filepath, obj):
 71 |     print(f"Saving checkpoint to {filepath}")
 72 |     torch.save(obj, filepath)
 73 |     print("Complete.")
 74 | 
 75 | 
 76 | def scan_checkpoint(cp_dir, prefix, renamed_file=None):
 77 |     # Fallback to original scanning logic first
 78 |     pattern = os.path.join(cp_dir, prefix + "????????")
 79 |     cp_list = glob.glob(pattern)
 80 | 
 81 |     if len(cp_list) > 0:
 82 |         last_checkpoint_path = sorted(cp_list)[-1]
 83 |         print(f"[INFO] Resuming from checkpoint: '{last_checkpoint_path}'")
 84 |         return last_checkpoint_path
 85 | 
 86 |     # If no pattern-based checkpoints are found, check for renamed file
 87 |     if renamed_file:
 88 |         renamed_path = os.path.join(cp_dir, renamed_file)
 89 |         if os.path.isfile(renamed_path):
 90 |             print(f"[INFO] Resuming from renamed checkpoint: '{renamed_file}'")
 91 |             return renamed_path
 92 | 
 93 |     return None
 94 | 
 95 | 
 96 | def save_audio(audio, path, sr):
 97 |     # wav: torch with 1d shape
 98 |     audio = audio * MAX_WAV_VALUE
 99 |     audio = audio.cpu().numpy().astype("int16")
100 |     write(path, sr, audio)
101 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/tbx_utils.py:
--------------------------------------------------------------------------------
  1 | import typing as tp
  2 | 
  3 | import numpy as np
  4 | from padertorch.utils import to_numpy
  5 | from padertorch.summary.tbx_utils import spectrogram_to_image
  6 | import torch
  7 | from torch import Tensor
  8 | from torchvision.utils import make_grid
  9 | 
 10 | 
 11 | def tensor_to_image(
 12 |     signal: Tensor, input_type: str, sequence_last: bool = True
 13 | ):
 14 |     x = to_numpy(signal, detach=True)
 15 |     if input_type == 'image':
 16 |         x = (x * 255).astype(np.uint8)
 17 |     elif input_type == 'spectrogram':
 18 |         if sequence_last:
 19 |             x = x.transpose(-1, -2)
 20 |         x = spectrogram_to_image(x, batch_first=None, log=False)
 21 |     else:
 22 |         raise ValueError(f'Unknown input type {input_type}')
 23 |     return x
 24 | 
 25 | 
 26 | def batch_image_to_grid(
 27 |     batch_image: torch.Tensor,
 28 |     input_shape_format: str = 'bchw',
 29 |     height_axis: tp.Optional[str] = None,
 30 |     width_axis: tp.Optional[str] = None,
 31 |     sequence_axis: tp.Optional[str] = None,
 32 |     stack: tp.Optional[str] = None,
 33 |     origin: str = 'upper',
 34 |     normalize: bool = True,
 35 |     scale_each: bool = False,
 36 | ):
 37 |     """
 38 |     >>> batch_image = torch.rand(4, 3, 32, 32)
 39 |     >>> grid = batch_image_to_grid(batch_image)
 40 |     >>> grid.shape
 41 |     torch.Size([3, 138, 36])
 42 |     >>> grid = batch_image_to_grid(\
 43 |             torch.rand(4, 32, 32),\
 44 |             input_shape_format='b h w'\
 45 |         )
 46 |     >>> grid.shape
 47 |     torch.Size([138, 36])
 48 | 
 49 |     Args:
 50 |         batch_image: Batched images of shape (batch, channel, heigth, width) or
 51 |             (batch, height, width).
 52 |         input_shape_format: Format of the input shape. Should be a string of
 53 |             space-separated dimension names, e.g., 'b c h w'.
 54 |         height_axis: Name of the height (frequency) axis.
 55 |         width_axis: Name of the width (time) axis.
 56 |         stack: How to stack the images. `height_axis` for horizontal,
 57 |             `width_axis` for vertical stacking.
 58 |         origin: Origin of the plot. Can be `'upper'` or `'lower'`.
 59 |         normalize: See make_grid
 60 |         scale_each: See make_grid
 61 |     """
 62 |     if origin not in ('upper', 'lower'):
 63 |         raise ValueError(f'"origin" should be "upper" or "lower" but got {origin}')
 64 | 
 65 |     dims = list(input_shape_format)
 66 |     if height_axis is None:
 67 |         height_axis = dims[-2]
 68 |     if width_axis is None:
 69 |         width_axis = dims[-1]
 70 |     if height_axis == width_axis:
 71 |         raise ValueError(
 72 |             f'Height and width axis should be different but got {height_axis} '
 73 |             'for both "height_axis" and "width_axis"'
 74 |         )
 75 |     if stack is None:
 76 |         if sequence_axis is not None:
 77 |             sequence_last = dims[-1] == sequence_axis
 78 |             stack = height_axis if sequence_last else width_axis
 79 |         else:
 80 |             stack = height_axis
 81 | 
 82 |     if stack not in (height_axis, width_axis):
 83 |         raise ValueError(
 84 |             f'"stack" should be "{height_axis}" or '
 85 |             f'"{width_axis}" but got {stack}'
 86 |         )
 87 | 
 88 |     if len(dims) != batch_image.ndim:
 89 |         raise ValueError(f'Shape format {input_shape_format} does not match input shape {batch_image.shape}')
 90 | 
 91 |     if batch_image.ndim == 3:
 92 |         # Add channel dimension
 93 |         batch_image = batch_image.unsqueeze(1)
 94 |         dims.insert(1, 'c')
 95 | 
 96 |     if origin == 'lower':
 97 |         # Reverse the order of the height (frequency) dimension
 98 |         batch_image = batch_image.flip(dims.index(height_axis))
 99 | 
100 |     grid = make_grid(
101 |         batch_image,
102 |         normalize=normalize,
103 |         scale_each=scale_each,
104 |         nrow=1 if stack==height_axis else batch_image.shape[0],
105 |     )
106 |     if batch_image.shape[1] == 1:
107 |         # Remove color dimension
108 |         grid = grid[0]
109 |     return grid
110 | 


--------------------------------------------------------------------------------
/padertorch/contrib/mk/typing.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import typing as tp
 3 | 
 4 | from torch import Tensor
 5 | 
 6 | 
 7 | TPath = tp.Union[str, Path]
 8 | TSeqLen = tp.Optional[tp.List[int]]
 9 | TActivationFn = tp.Union[str, tp.Callable]
10 | TSeqReturn = tp.Tuple[Tensor, TSeqLen]
11 | TDevice = tp.Union[str, int, tp.Sequence[int]]
12 | 


--------------------------------------------------------------------------------
/padertorch/contrib/neumann/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/neumann/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/neumann/evaluation.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Iterable
 2 | 
 3 | from logging import getLogger
 4 | import paderbox as pb
 5 | import numpy as np
 6 | import pb_bss
 7 | import operator
 8 | import re
 9 | 
10 | logger = getLogger('evaluation')
11 | 
12 | 
13 | def compute_means(
14 |         results: dict,
15 |         mean_keys: Optional[Iterable] = None,
16 |         exclude_keys: tuple = (r'.*selection', ),
17 |         skip_invalid=False,
18 | ) -> dict:
19 |     """
20 | 
21 |     Args:
22 |         results: Input data dict. Structure should be:
23 |             `{'dataset_name': {'example_id': {...nested values...}}}`
24 |         mean_keys: Keys (if nested, separate with '.') to compute a mean over.
25 |             If `None`, computes mean over all keys found in the data.
26 |         exclude_keys: Keys or key patterns to exclude when inferring mean keys
27 |             from data. Has no effect if `mean_keys is not None`.
28 |         skip_invalid: If `True`, invalid keys are skipped (e.g., not all
29 |             examples have this key)
30 | 
31 |     Returns:
32 |         {'dataset_name': {... nested means ...}}
33 |     """
34 |     means = {}
35 |     for dataset, dataset_results in results.items():
36 |         means[dataset] = {}
37 | 
38 |         # Flatten to structure {'example_id': {'path.to.sub.entry': value}}
39 |         flattened = {
40 |             k: pb.utils.nested.flatten(v) for k, v in
41 |             dataset_results.items()
42 |         }
43 | 
44 |         if mean_keys is None:
45 |             # Try to infer mean keys from first element in data
46 |             _mean_keys = list(filter(lambda x: not any(
47 |                 re.fullmatch(pattern, x) for pattern in exclude_keys
48 |             ), next(iter(flattened.values())).keys()))
49 |         else:
50 |             _mean_keys = mean_keys
51 | 
52 |         for mean_key in _mean_keys:
53 |             try:
54 |                 means[dataset][mean_key] = np.mean(np.array([
55 |                     v[mean_key] for v in flattened.values()
56 |                 ]))
57 |             except KeyError:
58 |                 if skip_invalid:
59 |                     pass
60 |                 else:
61 |                     raise
62 |         means[dataset] = pb.utils.nested.deflatten(means[dataset])
63 | 
64 |     return means
65 | 


--------------------------------------------------------------------------------
/padertorch/contrib/tcl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/tcl/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/tcl/dc.py:
--------------------------------------------------------------------------------
 1 | import einops
 2 | import torch
 3 | from torch.nn.utils.rnn import PackedSequence
 4 | 
 5 | import padertorch as pt
 6 | 
 7 | 
 8 | class DeepClusteringModel(pt.Model):
 9 |     def __init__(
10 |             self,
11 |             F=257,
12 |             recurrent_layers=2,
13 |             units=600,
14 |             E=20,
15 |             input_feature_transform='identity'
16 |     ):
17 |         """
18 | 
19 |         TODO: Dropout
20 |         TODO: Loss mask to avoid to assign embeddings to silent regions
21 | 
22 |         Args:
23 |             F: Number of frequency bins, fft_size / 2 + 1
24 |             recurrent_layers:
25 |             units: results in `units` forward and `units` backward units
26 |             E: Dimensionality of the embedding
27 |         """
28 |         super().__init__()
29 |         self.E = E
30 |         self.F = F
31 |         self.input_feature_transform = input_feature_transform
32 |         self.blstm = torch.nn.LSTM(
33 |             F, units, recurrent_layers, bidirectional=True
34 |         )
35 |         self.linear = torch.nn.Linear(2 * units, F * E)
36 | 
37 |     def forward(self, batch):
38 |         """
39 | 
40 |         Args:
41 |             batch: Dictionary with lists of tensors
42 | 
43 |         Returns: List of mask tensors
44 | 
45 |         """
46 | 
47 |         h = pt.ops.pack_sequence(batch['Y_abs'])
48 | 
49 |         if self.input_feature_transform == 'identity':
50 |             pass
51 |         elif self.input_feature_transform == 'log1p':
52 |             # This is equal to the mu-law for mu=1.
53 |             h = pt.ops.sequence.log1p(h)
54 |         elif self.input_feature_transform == 'log':
55 |             h = PackedSequence(h.data + 1e-10, h.batch_sizes)
56 |             h = pt.ops.sequence.log(h)
57 |         else:
58 |             raise NotImplementedError(self.input_feature_transform)
59 | 
60 |         _, F = h.data.size()
61 |         assert F == self.F, f'self.F = {self.F} != F = {F}'
62 | 
63 |         # Returns tensor with shape (t, b, num_directions * hidden_size)
64 |         h, _ = self.blstm(h)
65 | 
66 |         h = PackedSequence(self.linear(h.data), h.batch_sizes)
67 |         h_data = einops.rearrange(h.data, 'tb (e f) -> tb e f', e=self.E)
68 | 
69 |         # Hershey 2016 page 2 top right paragraph: Unit norm
70 |         h_data = torch.nn.functional.normalize(h_data, dim=-2)
71 | 
72 |         embedding = PackedSequence(h_data, h.batch_sizes,)
73 |         embedding = pt.ops.unpack_sequence(embedding)
74 |         return embedding
75 | 
76 |     def review(self, batch, model_out):
77 |         dc_loss = list()
78 |         for embedding, target_mask in zip(model_out, batch['target_mask']):
79 |             dc_loss.append(pt.ops.losses.deep_clustering_loss(
80 |                 einops.rearrange(embedding, 't e f -> (t f) e'),
81 |                 einops.rearrange(target_mask, 't k f -> (t f) k')
82 |             ))
83 | 
84 |         return {'losses': {'dc_loss': torch.mean(torch.stack(dc_loss))}}
85 | 


--------------------------------------------------------------------------------
/padertorch/contrib/tcl/speaker_embeddings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/contrib/tcl/speaker_embeddings/__init__.py


--------------------------------------------------------------------------------
/padertorch/contrib/tcl/speaker_embeddings/eer_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.interpolate import interp1d
 3 | from scipy.optimize import brentq
 4 | from sklearn.metrics import roc_curve
 5 | 
 6 | 
 7 | def get_eer(scores, labels):
 8 |     """
 9 |     Slightly adapted version of the VoxSRC EER calculation script
10 |     """
11 |     fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
12 |     eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
13 |     return eer
14 | 
15 | 
16 | def get_dcf(scores, labels, p_target=0.05, c_miss=1, c_fa=1):
17 |     """
18 |     Slightly adapted version of the VoxSRC DCF calculation script
19 |     """
20 | 
21 |     indices = np.argsort(scores)
22 |     labels = np.array(labels).astype(np.int32)[indices]
23 |     fnrs = []
24 |     fprs = []
25 |     for i in range(0, len(labels)):
26 |         if i == 0:
27 |             fnrs.append(labels[i])
28 |             fprs.append(1 - labels[i])
29 |         else:
30 |             fnrs.append(fnrs[i - 1] + labels[i])
31 |             fprs.append(fprs[i - 1] + 1 - labels[i])
32 |     fnrs_norm = sum(labels)
33 |     fprs_norm = len(labels) - fnrs_norm
34 | 
35 |     # Now divide by the total number of false negative errors to
36 |     # obtain the false positive rates across all thresholds
37 |     fnrs = [x / float(fnrs_norm) for x in fnrs]
38 | 
39 |     # Divide by the total number of corret positives to get the
40 |     # true positive rate.  Subtract these quantities from 1 to
41 |     # get the false positive rates.
42 |     fprs = [1 - x / float(fprs_norm) for x in fprs]
43 | 
44 |     min_c_det = float("inf")
45 |     for i in range(0, len(fnrs)):
46 |         # See Equation (2).  it is a weighted sum of false negative
47 |         # and false positive errors.
48 |         c_det = c_miss * fnrs[i] * p_target + c_fa * fprs[i] * (1 - p_target)
49 |         if c_det < min_c_det:
50 |             min_c_det = c_det
51 |     c_def = min(c_miss * p_target, c_fa * (1 - p_target))
52 |     min_dcf = min_c_det / c_def
53 |     return min_dcf


--------------------------------------------------------------------------------
/padertorch/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import batch
2 | from . import utils
3 | from . import segment
4 | 
5 | from .batch import *
6 | 


--------------------------------------------------------------------------------
/padertorch/data/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | 
 5 | def pad_tensor(vec, pad, axis):
 6 |     """
 7 |     args:
 8 |         vec - tensor to pad
 9 |         pad - the size to pad to
10 |         axis - dimension to pad
11 | 
12 |     return:
13 |         a new tensor padded to 'pad' in dimension 'dim'
14 |     """
15 | 
16 |     pad_size = list(vec.shape)
17 |     pad_size[axis] = pad - vec.shape[axis]
18 |     return np.concatenate([vec, np.zeros(pad_size)], axis=axis)
19 | 
20 | 
21 | def collate_fn(batch):
22 |     """Moves list inside of dict/dataclass recursively.
23 | 
24 |     Can be used as map after batching of an dataset:
25 |         `dataset.batch(...).map(collate_fn)`
26 | 
27 |     Args:
28 |         batch: list of examples
29 | 
30 |     Returns:
31 | 
32 |     >>> batch = [{'a': 1}, {'a': 2}]
33 |     >>> collate_fn(batch)
34 |     {'a': [1, 2]}
35 |     >>> collate_fn(tuple(batch))
36 |     {'a': (1, 2)}
37 | 
38 |     >>> batch = [{'a': {'b': [1, 2]}}, {'a': {'b': [3, 4]}}]
39 |     >>> collate_fn(batch)
40 |     {'a': {'b': [[1, 2], [3, 4]]}}
41 | 
42 |     >>> import dataclasses
43 |     >>> Point = dataclasses.make_dataclass('Point', ['x', 'y'])
44 |     >>> batch = [Point(1, 2), Point(3, 4)]
45 |     >>> batch
46 |     [Point(x=1, y=2), Point(x=3, y=4)]
47 |     >>> collate_fn(batch)
48 |     Point(x=[1, 3], y=[2, 4])
49 |     >>> collate_fn(tuple(batch))
50 |     Point(x=(1, 3), y=(2, 4))
51 |     """
52 |     assert isinstance(batch, (tuple, list)), (type(batch), batch)
53 | 
54 |     if isinstance(batch[0], dict):
55 |         for b in batch[1:]:
56 |             assert batch[0].keys() == b.keys(), batch
57 |         return batch[0].__class__({
58 |             k: (collate_fn(batch.__class__([b[k] for b in batch])))
59 |             for k in batch[0]
60 |         })
61 |     elif hasattr(batch[0], '__dataclass_fields__'):
62 |         for b in batch[1:]:
63 |             assert batch[0].__dataclass_fields__ == b.__dataclass_fields__, batch
64 |         return batch[0].__class__(**{
65 |             k: (collate_fn(batch.__class__([getattr(b, k) for b in batch])))
66 |             for k in batch[0].__dataclass_fields__
67 |         })
68 |     else:
69 |         return batch
70 | 


--------------------------------------------------------------------------------
/padertorch/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .fully_connected import fully_connected_stack
2 | from .normalization import Normalization
3 | from .recurrent import StatefulLSTM
4 | from .wavenet.wavenet import WaveNet
5 | from . import dual_path_rnn
6 | 
7 | 


--------------------------------------------------------------------------------
/padertorch/modules/fully_connected.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | from typing import List
 3 | 
 4 | from torch import nn
 5 | 
 6 | from padertorch.ops.mappings import ACTIVATION_FN_MAP
 7 | 
 8 | 
 9 | def fully_connected_stack(
10 |         input_size: int,
11 |         hidden_size: List[int],
12 |         output_size: int,
13 |         activation: str = 'relu',
14 |         dropout: float = 0.5,
15 |         output_activation: str = None,
16 | ):
17 |     """
18 | 
19 |         dropout describes the forget-probability.
20 |         More information to dropout: https://arxiv.org/pdf/1207.0580.pdf
21 | 
22 |         Args:
23 |             input_size: has to be defined
24 |             hidden_size: size of the hidden layers
25 |                 either None, int, list or tuple
26 |             output_size: has to be defined
27 |             activation: used in all layers except the last
28 |             dropout: Dropout forget ratio (opposite to TensorFlow)
29 |                 default take from:
30 |                     https://www.reddit.com/r/MachineLearning/comments/3oztvk/why_50_when_using_dropout/
31 |             output_activation: applied after the last layer
32 | 
33 |         >>> fully_connected_stack(513, [1024, 1024], 1024)
34 |         Sequential(
35 |           (dropout_0): Dropout(p=0.5, inplace=False)
36 |           (linear_0): Linear(in_features=513, out_features=1024, bias=True)
37 |           (relu_0): ReLU()
38 |           (dropout_1): Dropout(p=0.5, inplace=False)
39 |           (linear_1): Linear(in_features=1024, out_features=1024, bias=True)
40 |           (relu_1): ReLU()
41 |           (dropout_2): Dropout(p=0.5, inplace=False)
42 |           (linear_2): Linear(in_features=1024, out_features=1024, bias=True)
43 |         )
44 |         """
45 |     assert input_size is not None, input_size
46 |     assert output_size is not None, output_size
47 | 
48 |     layers = collections.OrderedDict()
49 |     if hidden_size is None:
50 |         l_n_units = [input_size, output_size]
51 |     elif isinstance(hidden_size, (list, tuple)):
52 |         l_n_units = [input_size] + list(hidden_size) + [output_size]
53 |     elif isinstance(hidden_size, int):
54 |         l_n_units = [input_size, hidden_size, output_size]
55 |     else:
56 |         raise TypeError(hidden_size)
57 | 
58 |     activation = [activation] * (len(l_n_units) - 2) + [output_activation]
59 | 
60 |     for l_idx, n_units in enumerate(l_n_units[:-1]):
61 |         layers[f'dropout_{l_idx}'] = nn.Dropout(dropout)
62 |         layers[f'linear_{l_idx}'] = nn.Linear(n_units, l_n_units[l_idx + 1])
63 |         if activation[l_idx] is not None and activation[l_idx] != 'identity':
64 |             layers[f'{activation[l_idx]}_{l_idx}'] = \
65 |                 ACTIVATION_FN_MAP[activation[l_idx]]()
66 |     return nn.Sequential(layers)
67 | 


--------------------------------------------------------------------------------
/padertorch/modules/recurrent.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from padertorch.base import Module
 3 | 
 4 | 
 5 | class StatefulLSTM(Module):
 6 |     _states = None
 7 | 
 8 |     def __init__(
 9 |             self,
10 |             input_size: int,
11 |             hidden_size: int,
12 |             num_layers: int = 1,
13 |             bidirectional: bool = False,
14 |             dropout: float = 0.,
15 |             batch_first: bool = True,
16 |             save_states: bool = True
17 |     ):
18 |         super().__init__()
19 |         self.lstm = torch.nn.LSTM(input_size=input_size,
20 |                                   hidden_size=hidden_size,
21 |                                   num_layers=num_layers,
22 |                                   bidirectional=bidirectional,
23 |                                   dropout=dropout,
24 |                                   batch_first=batch_first)
25 |         self.hidden_size = hidden_size
26 |         self.bidirectional = bidirectional
27 |         self.num_layers = num_layers
28 |         self.batch_first = batch_first
29 |         self.save_states = save_states
30 | 
31 |     @property
32 |     def states(self):
33 |         return self._states
34 | 
35 |     @states.deleter
36 |     def states(self):
37 |         self._states = None
38 | 
39 |     @states.setter
40 |     def states(self, states):
41 |         self._states = states
42 | 
43 |     def forward(self, x):
44 |         h, self.states = self.lstm(x, self.states)
45 |         if not self.save_states:
46 |             del self.states
47 |         return h
48 | 
49 | 


--------------------------------------------------------------------------------
/padertorch/modules/wavenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .wavenet import *
2 | from . import nv_wavenet


--------------------------------------------------------------------------------
/padertorch/modules/wavenet/nv_wavenet/Makefile:
--------------------------------------------------------------------------------
 1 | # ******************************************************************************
 2 | # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 3 | #
 4 | # Redistribution and use in source and binary forms, with or without
 5 | # modification, are permitted provided that the following conditions are met:
 6 | #     * Redistributions of source code must retain the above copyright
 7 | #       notice, this list of conditions and the following disclaimer.
 8 | #     * Redistributions in binary form must reproduce the above copyright
 9 | #       notice, this list of conditions and the following disclaimer in the
10 | #       documentation and/or other materials provided with the distribution.
11 | #     * Neither the name of the NVIDIA CORPORATION nor the
12 | #       names of its contributors may be used to endorse or promote products
13 | #       derived from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | #  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | #  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | #  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | #  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | #  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | #  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | #  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | #  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | #
26 | # ******************************************************************************
27 | 
28 | NVCC = nvcc
29 | 
30 | ARCH=sm_61
31 | NVCC_FLAGS = -arch=$(ARCH) -std=c++11 
32 | NVCC_FLAGS += --use_fast_math
33 | 
34 | MAX_REGS = 128
35 | 
36 | HEADERS = ./nv_wavenet_util.cuh \
37 |           ./nv_wavenet_singleblock.cuh \
38 |           ./nv_wavenet_dualblock.cuh \
39 |           ./nv_wavenet_persistent.cuh \
40 |           ./nv_wavenet.cuh \
41 |           ./matrix_math.cuh \
42 |           ./softmax.cuh \
43 |           ./nv_wavenet_conversions.cuh
44 | 
45 | default: wavenet_infer
46 | 
47 | wavenet_infer: wavenet_infer.cu $(HEADERS) wavenet_infer.h
48 | 	$(NVCC) $(NVCC_FLAGS) -lineinfo -maxrregcount $(MAX_REGS) -I .. wavenet_infer.cu ./matrix.cpp -lz -Xcompiler -fPIC -shared -o libwavenet_infer.so
49 | 


--------------------------------------------------------------------------------
/padertorch/modules/wavenet/nv_wavenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/modules/wavenet/nv_wavenet/__init__.py


--------------------------------------------------------------------------------
/padertorch/modules/wavenet/nv_wavenet/build.py:
--------------------------------------------------------------------------------
 1 | # *****************************************************************************
 2 | #  Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 3 | # 
 4 | #  Redistribution and use in source and binary forms, with or without
 5 | #  modification, are permitted provided that the following conditions are met:
 6 | #      * Redistributions of source code must retain the above copyright
 7 | #        notice, this list of conditions and the following disclaimer.
 8 | #      * Redistributions in binary form must reproduce the above copyright
 9 | #        notice, this list of conditions and the following disclaimer in the
10 | #        documentation and/or other materials provided with the distribution.
11 | #      * Neither the name of the NVIDIA CORPORATION nor the
12 | #        names of its contributors may be used to endorse or promote products
13 | #        derived from this software without specific prior written permission.
14 | # 
15 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | #  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | #  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | #  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | #  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | #  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | #  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | #  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | #  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | # 
26 | # *****************************************************************************
27 | import os
28 | import torch
29 | from setuptools import setup
30 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
31 | 
32 | abs_path = os.path.dirname(os.path.realpath(__file__))
33 | library_dirs = [abs_path]
34 | extra_libraries = ['wavenet_infer']
35 | extra_includes = [abs_path]
36 | 
37 | setup(
38 |     name='nv_wavenet_ext',
39 |     ext_modules=[
40 |         CUDAExtension(
41 |             name='nv_wavenet_ext',
42 |             sources=['wavenet_infer_wrapper.cpp'],
43 |             library_dirs=library_dirs,
44 |             runtime_library_dirs=library_dirs,
45 |             libraries=extra_libraries,
46 |             include_dirs=extra_includes
47 |         )
48 |     ],
49 |     cmdclass={'build_ext': BuildExtension},
50 | )
51 | 


--------------------------------------------------------------------------------
/padertorch/modules/wavenet/nv_wavenet/matrix.h:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following conditions are met:
 6 |  *     * Redistributions of source code must retain the above copyright
 7 |  *       notice, this list of conditions and the following disclaimer.
 8 |  *     * Redistributions in binary form must reproduce the above copyright
 9 |  *       notice, this list of conditions and the following disclaimer in the
10 |  *       documentation and/or other materials provided with the distribution.
11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
12 |  *       names of its contributors may be used to endorse or promote products
13 |  *       derived from this software without specific prior written permission.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  *
26 |  ******************************************************************************/
27 | 
28 | #ifndef __MATRIX__
29 | #define __MATRIX__
30 | 
31 | class Matrix {
32 |     private:
33 |         float* m_data;
34 |         bool   m_isTransposed;
35 |         int    m_rows;
36 |         int    m_cols;
37 | 
38 |     public:
39 |         Matrix(int rows, int cols, bool isTransposed=false);
40 | 
41 |         void randomize(float mean, float scale, int sparsity = 0);
42 |         
43 |         int index(int row, int col); 
44 | 
45 |         void set(int row, int col, float val);
46 | 
47 |         float get(int row, int col);
48 | 
49 |         int rows();
50 | 
51 |         int cols();
52 | 
53 |         void print(const char* name); 
54 | 
55 |         float* data();
56 | };
57 | 
58 | void matrix_multiply(Matrix& C, Matrix& A, Matrix& B);
59 | void matrix_add(Matrix& C, Matrix& A, Matrix& B);
60 | void matrix_bias(Matrix& C, Matrix&A, Matrix& B);
61 | void matrix_compare(const char* name, Matrix& A, Matrix& B, float max_error=1.e-6, bool relu=false);
62 | void matrix_relu(Matrix& dst, Matrix& src); 
63 | void matrix_softmax(Matrix& dst, Matrix& src);
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/padertorch/modules/wavenet/nv_wavenet/nv_wavenet_util.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following conditions are met:
 6 |  *     * Redistributions of source code must retain the above copyright
 7 |  *       notice, this list of conditions and the following disclaimer.
 8 |  *     * Redistributions in binary form must reproduce the above copyright
 9 |  *       notice, this list of conditions and the following disclaimer in the
10 |  *       documentation and/or other materials provided with the distribution.
11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
12 |  *       names of its contributors may be used to endorse or promote products
13 |  *       derived from this software without specific prior written permission.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  *
26 |  ******************************************************************************/
27 | 
28 | #ifndef __DEEPVOICE_UTIL_H__
29 | #define __DEEPVOICE_UTIL_H__
30 | 
31 | #include <stdio.h>
32 | #include "cuda_occupancy.h"
33 | 
34 | #define gpuErrChk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
35 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
36 |     if (code != cudaSuccess) {
37 |         fprintf(stderr, "GPUassert: %s %s %d. Do note that nv-wavenet requires Compute Capability 6.0 or later (https://developer.nvidia.com/cuda-gpus).\n", cudaGetErrorString(code), file, line);
38 |         if (abort) exit(code);
39 |     }
40 | }
41 | 
42 | int getOccupancy(int deviceId, size_t blockSize, void* func) {
43 |     cudaDeviceProp prop;
44 |     gpuErrChk ( cudaGetDeviceProperties(&prop, 0) );
45 |     cudaOccDeviceProp occProp = prop;
46 | 
47 |     cudaFuncAttributes attr;
48 |     gpuErrChk ( cudaFuncGetAttributes(&attr, func) );
49 |     cudaOccFuncAttributes occAttr = attr;
50 | 
51 |     cudaOccDeviceState occState;
52 | 
53 |     cudaOccResult result;
54 |     cudaOccMaxActiveBlocksPerMultiprocessor(&result, &occProp, &occAttr, &occState, blockSize, 0);
55 | 
56 |     return result.activeBlocksPerMultiprocessor;
57 | 
58 | }
59 | 
60 | __device__ __forceinline__ half loadVolatile(const volatile half* y, int index) {
61 |     const volatile __half_raw* chr = (reinterpret_cast<const volatile __half_raw *>(y) );
62 |     __half_raw hr;
63 |     hr.x = chr[index].x;
64 |     return half( hr );
65 | }
66 | __device__ __forceinline__ void storeVolatile(volatile half* y, int index, half val) {
67 |     half* y_nv = (half*)y;
68 |     y_nv[index] = val;
69 | }
70 | 
71 | __device__ __forceinline__ float loadVolatile(const volatile float* y, int index) {
72 |     return y[index];
73 | }
74 | __device__ __forceinline__ void storeVolatile(volatile float* y, int index, float val) {
75 |     y[index] = val;
76 | }
77 | 
78 | __forceinline__ __device__ float sigmoid(float in) {
79 |     float ans = 1.f / (1.f + expf(-in));
80 |     return ans;
81 | }
82 | 
83 | __forceinline__ __device__ float _tanh(float in) {
84 |     float ans = tanhf(in);
85 |     return ans;
86 | }
87 | 
88 | __device__ __forceinline__ float relu(float f) { return (f < 0.f) ? 0.f : f; }
89 | __device__ __forceinline__ half relu(half h) { half zero = 0.f; return (h < zero) ? zero : h; }
90 | 
91 | #endif
92 | 


--------------------------------------------------------------------------------
/padertorch/modules/wavenet/nv_wavenet/wavenet_infer.h:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 3 |  *
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following conditions are met:
 6 |  *     * Redistributions of source code must retain the above copyright
 7 |  *       notice, this list of conditions and the following disclaimer.
 8 |  *     * Redistributions in binary form must reproduce the above copyright
 9 |  *       notice, this list of conditions and the following disclaimer in the
10 |  *       documentation and/or other materials provided with the distribution.
11 |  *     * Neither the name of the NVIDIA CORPORATION nor the
12 |  *       names of its contributors may be used to endorse or promote products
13 |  *       derived from this software without specific prior written permission.
14 |  *
15 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 |  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |  *
26 |  ******************************************************************************/
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 | // ------------------------------------------------
31 | // C-compatible function for wrapper
32 | // ------------------------------------------------
33 | void wavenet_infer(int sample_count,
34 |                    int batch_size,
35 |                    float* embedding_prev,
36 |                    float* embedding_curr,
37 |                    int num_layers,
38 |                    int max_dilation,
39 |                    float** in_layer_weights_prev,
40 |                    float** in_layer_weights_curr,
41 |                    float** in_layer_biases,
42 |                    float** res_layer_weights,
43 |                    float** res_layer_biases,
44 |                    float** skip_layer_weights,
45 |                    float** skip_layer_biases,
46 |                    float* conv_out_weight,
47 |                    float* conv_end_weight,
48 |                    int use_embed_tanh,
49 |                    float* cond_input,
50 |                    int implementation,
51 |                    int* samples);
52 | 
53 | // --------------------------------------------------------
54 | // For checking the number of channels match current build
55 | // --------------------------------------------------------
56 | int get_R(void);
57 | int get_S(void);
58 | int get_A(void);
59 | #ifdef __cplusplus
60 | }
61 | #endif
62 | 


--------------------------------------------------------------------------------
/padertorch/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .losses import *
 2 | 
 3 | from . import sequence
 4 | from . import mappings
 5 | from . import tensor
 6 | 
 7 | from ._stft import STFT
 8 | from .einsum import *
 9 | from .sequence import *
10 | from .tensor import *
11 | from .mu_law import *
12 | 


--------------------------------------------------------------------------------
/padertorch/ops/einsum.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | 
 3 | import torch
 4 | 
 5 | __all__ = [
 6 |     'einsum'
 7 | ]
 8 | 
 9 | 
10 | def einsum(operation: str, *operands):
11 |     """Allows capital letters and collects operands as in `np.einsum`."""
12 |     remaining_letters = set(string.ascii_lowercase)
13 |     remaining_letters = remaining_letters - set(operation)
14 |     for capital_letter, replacement in zip(set.intersection(
15 |             set(string.ascii_uppercase),
16 |             set(operation)
17 |     ), remaining_letters):
18 |         operation = operation.replace(capital_letter, replacement)
19 |     return torch.einsum(operation, operands)
20 | 


--------------------------------------------------------------------------------
/padertorch/ops/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import classification
 2 | from . import regression
 3 | from . import source_separation
 4 | from . import kl_divergence
 5 | 
 6 | from .classification import *
 7 | from .regression import *
 8 | from .source_separation import *
 9 | from .kl_divergence import *
10 | 


--------------------------------------------------------------------------------
/padertorch/ops/losses/classification.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional
 3 | from torch.nn.utils.rnn import PackedSequence
 4 | import padertorch as pt
 5 | 
 6 | 
 7 | __all__ = [
 8 |     'softmax_cross_entropy',
 9 | ]
10 | 
11 | 
12 | IGNORE_INDEX = -1
13 | 
14 | 
15 | def softmax_cross_entropy(x, t):
16 |     """Allow inputs to be of type `PackedSequence`.
17 | 
18 |     In my understanding, all dimensions but the last should be treated as
19 |     independent dimensions. Therefore, I argue for x.size() == (..., K) where
20 |     t.size() == (...). Similarly, for sequences x.size() == (T, B, ..., K) and
21 |     t.size() == (T, B, ...).
22 | 
23 |     Check the test case for typical usage.
24 | 
25 |     Params:
26 |         x: `Tensor` or `PackedSequence` holding a multidimensional array whose
27 |             elements indicate unnormalized log probabilities (logits).
28 |         t: Same object type as `x`. Holds integers of ground truth labels.
29 | 
30 |     Returns:
31 | 
32 |     >>> x = torch.randn(100, 3)
33 |     >>> t = torch.randint(0, 3, size=(100,), dtype=torch.long)
34 |     >>> softmax_cross_entropy(x, t).size()
35 |     torch.Size([])
36 |     """
37 |     if isinstance(x, torch.Tensor) and isinstance(t, torch.Tensor):
38 |         pass
39 |     elif isinstance(x, PackedSequence) and isinstance(t, PackedSequence):
40 |         # Data is already organized such that no padding is necessary.
41 |         x, t = x.data, t.data
42 |     else:
43 |         raise ValueError(f'Incompatible types: {type(x)}, {type(t)}')
44 | 
45 |     assert x.size()[:-1] == t.size(), f'{x.size()}, {t.size()}'
46 |     # remember torch.nn.CrossentropyLoss already includes softmax
47 |     loss_fn = torch.nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)
48 |     return loss_fn(pt.ops.move_axis(x, -1, 1), t)
49 | 


--------------------------------------------------------------------------------
/padertorch/ops/losses/kl_divergence.py:
--------------------------------------------------------------------------------
 1 | from torch.distributions import Normal, MultivariateNormal
 2 | from torch.distributions import kl_divergence as kld
 3 | 
 4 | 
 5 | __all__ = [
 6 |     'gaussian_kl_divergence',
 7 | ]
 8 | 
 9 | 
10 | def _batch_diag(bmat):
11 |     """
12 |     Returns the diagonals of a batch of square matrices.
13 |     """
14 |     return bmat.reshape(bmat.shape[:-2] + (-1,))[..., ::bmat.size(-1) + 1]
15 | 
16 | 
17 | def gaussian_kl_divergence(q, p):
18 |     """
19 |     Args:
20 |         q: Normal posterior distributions (B1, ..., BN, D)
21 |         p: (Multivariate) Normal prior distributions (K1, ..., KN, D)
22 | 
23 |     Returns: kl between all posteriors in batch and all components
24 |         (B1, ..., BN, K1, ..., KN)
25 | 
26 |     """
27 |     assert isinstance(q, Normal), type(q)
28 |     batch_shape = q.loc.shape[:-1]
29 |     D = q.loc.shape[-1]
30 |     component_shape = p.loc.shape[:-1]
31 |     assert p.loc.shape[-1] == D, (p.loc.shape[-1], D)
32 | 
33 |     p_loc = p.loc.contiguous().view(-1, D)
34 |     if isinstance(p, MultivariateNormal):
35 |         p_scale_tril = p.scale_tril.contiguous().view(-1, D, D)
36 |         q_loc = q.loc.contiguous().view(-1, D)
37 |         q_scale = q.scale.contiguous().view(-1, D)
38 | 
39 |         term1 = (
40 |             _batch_diag(p_scale_tril).log().sum(-1)[:, None]
41 |             - q_scale.log().sum(-1)
42 |         )
43 |         L = p_scale_tril.inverse()
44 |         term2 = (L.pow(2).sum(-2)[:, None, :] * q_scale.pow(2)).sum(-1)
45 |         term3 = (
46 |                 (p_loc[:, None, :] - q_loc) @ L.transpose(1, 2)
47 |         ).pow(2.0).sum(-1)
48 |         kl = (term1 + 0.5 * (term2 + term3 - D)).transpose(0, 1)
49 |     elif isinstance(p, Normal):
50 |         p_scale = p.scale.contiguous().view(-1, D)
51 |         q_loc = q.loc.contiguous().view(-1, 1, D)
52 |         q_scale = q.scale.contiguous().view(-1, 1, D)
53 | 
54 |         kl = kld(
55 |             Normal(loc=q_loc, scale=q_scale), Normal(loc=p_loc, scale=p_scale)
56 |         ).sum(-1)
57 |     else:
58 |         raise ValueError
59 | 
60 |     return kl.view(*batch_shape, *component_shape)
61 | 


--------------------------------------------------------------------------------
/padertorch/ops/mappings.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import optim
 3 | from paderbox.utils.mapping import Dispatcher
 4 | import numpy as np
 5 | 
 6 | __all__ = [
 7 |     'ACTIVATION_FN_MAP',
 8 | ]
 9 | 
10 | class _CallableDispatcher(Dispatcher):
11 |     """
12 |        If the input is a callable it is returned.
13 |        Otherwise, it is basically a dict
14 |        with a better error message on key error.
15 |        >>> from padertorch.ops.mappings import _CallableDispatcher
16 |        >>> d = _CallableDispatcher(abc=1, bcd=2)
17 |        >>> d['acd']  #doctest: +ELLIPSIS
18 |        Traceback (most recent call last):
19 |        ...
20 |        paderbox.utils.mapping.DispatchError: Invalid option 'acd'.
21 |        Close matches: ['bcd', 'abc'].
22 |        >>> from padertorch.ops.mappings import _CallableDispatcher
23 |        >>> d = _CallableDispatcher(abc=1, bcd=2)
24 |        >>> d[np.median]  #doctest: +ELLIPSIS
25 |        <function median at ...
26 |        """
27 | 
28 |     def __getitem__(self, item):
29 |         if callable(item):
30 |             return item
31 |         else:
32 |             return super().__getitem__(item)
33 | 
34 | 
35 | ACTIVATION_FN_MAP = _CallableDispatcher(
36 |     relu=torch.nn.ReLU,
37 |     prelu=torch.nn.PReLU,
38 |     leaky_relu=torch.nn.LeakyReLU,
39 |     elu=torch.nn.ELU,
40 |     tanh=torch.nn.Tanh,
41 |     sigmoid=torch.nn.Sigmoid,
42 |     softmax=torch.nn.Softmax,  # Defaults to softmax along last dimension
43 |     identity=torch.nn.Identity,
44 | )
45 | 
46 | 
47 | # These mappings are not used at the moment if required they can be added again
48 | # but the naming convention shuld be updated.
49 | # NP_REDUCE_MAP = _CallableDispatcher(
50 | #     median=np.median,
51 | #     mean=np.mean,
52 | #     min=np.min,
53 | #     max=np.max,
54 | # )
55 | #
56 | # REDUCE_MAP = _CallableDispatcher(
57 | #     median=torch.median,
58 | #     mean=torch.mean,
59 | #     min=torch.min,
60 | #     max=torch.max,
61 | # )
62 | #
63 | # DTYPE_MAP = Dispatcher(
64 | #     float32=np.float32,
65 | #     float64=np.float64,
66 | #     complex64=np.complex64,
67 | #     complex128=np.complex128,
68 | # )
69 | #
70 | # OPTIMIZER_MAP = _CallableDispatcher(
71 | #     sgd=optim.SGD,
72 | #     adam=optim.Adam,
73 | #     adagrad=optim.Adagrad
74 | # )
75 | 


--------------------------------------------------------------------------------
/padertorch/ops/mu_law.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | __all__ = [
 5 |     'mu_law_encode',
 6 |     'mu_law_decode'
 7 | ]
 8 | 
 9 | 
10 | def mu_law_decode(x, mu_quantization=256):
11 |     assert(torch.max(x) <= mu_quantization - 1)
12 |     assert(torch.min(x) >= 0)
13 |     x = x.float()
14 |     mu = mu_quantization - 1.
15 |     # Map values back to [-1, 1].
16 |     signal = 2 * (x / mu) - 1
17 |     # Perform inverse of mu-law transformation.
18 |     magnitude = (1 / mu) * ((1 + mu)**torch.abs(signal) - 1)
19 |     return torch.sign(signal) * magnitude
20 | 
21 | 
22 | def mu_law_encode(x, mu_quantization=256):
23 |     assert(torch.max(x) <= 1.0)
24 |     assert(torch.min(x) >= -1.0)
25 |     mu = mu_quantization - 1.
26 |     scaling = np.log1p(mu)
27 |     x_mu = torch.sign(x) * torch.log1p(mu * torch.abs(x)) / scaling
28 |     encoding = ((x_mu + 1) / 2 * mu + 0.5).long()
29 |     return encoding
30 | 


--------------------------------------------------------------------------------
/padertorch/ops/sequence/__init__.py:
--------------------------------------------------------------------------------
1 | from . import pack_module
2 | from . import pointwise
3 | from . import reduction
4 | 
5 | from .pack_module import *
6 | from .pointwise import *
7 | from .reduction import *
8 | 


--------------------------------------------------------------------------------
/padertorch/ops/sequence/mask.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def compute_mask(x, sequence_lengths, batch_axis=0, sequence_axis=1):
 5 |     """
 6 |     This function calculates a mask which indicates the position of non-padded values.
 7 |     It can be used to do subsequent operations only on non-padded values.
 8 | 
 9 |     >>> x, seq_len = 2*torch.ones((3,1,10,4)), [1, 2, 3]
10 |     >>> mask = compute_mask(x, sequence_lengths=seq_len, batch_axis=0, sequence_axis=-1)
11 |     >>> mask[:,0]
12 |     tensor([[[1., 0., 0., 0.],
13 |              [1., 0., 0., 0.],
14 |              [1., 0., 0., 0.],
15 |              [1., 0., 0., 0.],
16 |              [1., 0., 0., 0.],
17 |              [1., 0., 0., 0.],
18 |              [1., 0., 0., 0.],
19 |              [1., 0., 0., 0.],
20 |              [1., 0., 0., 0.],
21 |              [1., 0., 0., 0.]],
22 |     <BLANKLINE>
23 |             [[1., 1., 0., 0.],
24 |              [1., 1., 0., 0.],
25 |              [1., 1., 0., 0.],
26 |              [1., 1., 0., 0.],
27 |              [1., 1., 0., 0.],
28 |              [1., 1., 0., 0.],
29 |              [1., 1., 0., 0.],
30 |              [1., 1., 0., 0.],
31 |              [1., 1., 0., 0.],
32 |              [1., 1., 0., 0.]],
33 |     <BLANKLINE>
34 |             [[1., 1., 1., 0.],
35 |              [1., 1., 1., 0.],
36 |              [1., 1., 1., 0.],
37 |              [1., 1., 1., 0.],
38 |              [1., 1., 1., 0.],
39 |              [1., 1., 1., 0.],
40 |              [1., 1., 1., 0.],
41 |              [1., 1., 1., 0.],
42 |              [1., 1., 1., 0.],
43 |              [1., 1., 1., 0.]]])
44 | 
45 |     Args:
46 |         x: tensor to be masked
47 |         sequence_lengths: list of int stating sequence length for each sequence
48 |             in the mini-batch. If None a one-mask is returned, i.e.,
49 |             no values in x are masked.
50 |         batch_axis: axis along which sequences are stacked
51 |         sequence_axis: axis which may contain padding (of different lengths
52 |             for each sequence)
53 | 
54 |     Returns:
55 | 
56 |     """
57 |     if sequence_lengths is None:
58 |         return torch.ones_like(x)
59 |     if batch_axis < 0:
60 |         batch_axis = x.dim() + batch_axis
61 |     if sequence_axis < 0:
62 |         sequence_axis = x.dim() + sequence_axis
63 |     if not torch.is_tensor(sequence_lengths):
64 |         sequence_lengths = torch.Tensor(sequence_lengths).long().to(x.device)
65 |     assert sequence_lengths.device == x.device, (sequence_lengths.device, x.device)
66 |     for dim in range(batch_axis + 1, x.dim()):
67 |         sequence_lengths = sequence_lengths.unsqueeze(-1)
68 |     idx = torch.arange(x.shape[sequence_axis], device=x.device)
69 |     for dim in range(sequence_axis + 1, x.dim()):
70 |         idx = idx.unsqueeze(-1)
71 |     mask = (idx < sequence_lengths).float().expand(x.shape)
72 |     return mask
73 | 


--------------------------------------------------------------------------------
/padertorch/ops/sequence/pointwise.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn
 3 | from functools import partial
 4 | 
 5 | 
 6 | __all__ = [
 7 |     'sequence_elementwise',
 8 |     'abs',
 9 |     'ceil',
10 |     'clamp',
11 |     'exp',
12 |     'log',
13 |     'log1p',
14 |     'log10',
15 |     'sigmoid',
16 |     'sqrt',
17 | ]
18 | 
19 | 
20 | def sequence_elementwise(function, x, *args, **kwargs):
21 |     """Expects the desired function and a `Tensor` or `PackedSequence`."""
22 |     if isinstance(x, torch.nn.utils.rnn.PackedSequence):
23 |         return torch.nn.utils.rnn.PackedSequence(
24 |             function(x.data, *args, **kwargs),
25 |             x.batch_sizes
26 |         )
27 |     else:
28 |         return function(x, *args, **kwargs)
29 | 
30 | 
31 | abs = partial(sequence_elementwise, torch.abs)
32 | ceil = partial(sequence_elementwise, torch.ceil)
33 | clamp = partial(sequence_elementwise, torch.clamp)
34 | exp = partial(sequence_elementwise, torch.exp)
35 | log = partial(sequence_elementwise, torch.log)
36 | log10 = partial(sequence_elementwise, torch.log10)
37 | log1p = partial(sequence_elementwise, torch.log1p)
38 | sigmoid = partial(sequence_elementwise, torch.sigmoid)
39 | sqrt = partial(sequence_elementwise, torch.sqrt)
40 | 


--------------------------------------------------------------------------------
/padertorch/ops/tensor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | __all__ = [
 4 |     'move_axis'
 5 | ]
 6 | 
 7 | 
 8 | def move_axis(a: torch.Tensor, source: int, destination: int):
 9 |     """Move an axis from source location to destination location.
10 | 
11 |     API is a bit closer to Numpy but does not allow more than one source.
12 | 
13 |     Params:
14 |         a: The Tensor whose axes should be reordered.
15 |         source: Original positions of the axis to move.
16 |         destination: Destination positions for each of the original axis.
17 |     Returns: Tensor with moved axis.
18 | 
19 |     >>> x = torch.zeros((3, 4, 5))
20 |     >>> move_axis(x, 0, -1).size()
21 |     torch.Size([4, 5, 3])
22 | 
23 |     >>> move_axis(x, -1, 0).size()
24 |     torch.Size([5, 3, 4])
25 |     """
26 |     source = source % len(a.size())
27 |     destination = destination % len(a.size())
28 |     permutation = [d for d in range(len(a.size())) if not d == source]
29 |     permutation.insert(destination, source)
30 |     return a.permute(permutation)
31 | 
32 | 
33 | def broadcast_to(tensor: torch.Tensor, shape):
34 |     """
35 |     Alias for torch.Tensor.expand. Use torch.Tensor.expand.
36 | 
37 |     >>> broadcast_to(torch.ones(3), (4, 3)).shape
38 |     torch.Size([4, 3])
39 |     >>> broadcast_to(torch.ones(1, 3), (4, 3)).shape
40 |     torch.Size([4, 3])
41 |     >>> broadcast_to(torch.ones(4, 1), (4, 3)).shape
42 |     torch.Size([4, 3])
43 |     """
44 |     return tensor.expand(shape)
45 | 
46 | 
47 | def matrix_diag(x):
48 |     """
49 |     Apply the diag matrix operation on the last axis.
50 | 
51 |     >>> matrix_diag(torch.ones(2))
52 |     tensor([[1., 0.],
53 |             [0., 1.]])
54 |     >>> matrix_diag(torch.ones(3, 4)).shape
55 |     torch.Size([3, 4, 4])
56 | 
57 |     """
58 |     if x.dim() == 1:
59 |         return torch.diag(x)
60 |     feature_dim = x.shape[-1]
61 |     mat = x.reshape((-1, feature_dim))
62 | 
63 |     # TODO: Find a way to remove the python loop without the multiplication
64 |     #       with the eye matrix.
65 |     diags = torch.stack([torch.diag(vec) for vec in mat])
66 |     return diags.reshape((*x.shape, feature_dim))
67 | 
68 | 
69 | def matrix_eye_like(x):
70 |     """
71 |     Returns a eye matrix with `x.ndim() + 1` dimensions.
72 | 
73 |     Note: Usually the matrix from torch.eye is enough, because torch supports
74 |           broadcasting.
75 | 
76 |     >>> matrix_eye_like(torch.ones(2) + 10)
77 |     tensor([[1., 0.],
78 |             [0., 1.]])
79 |     >>> matrix_eye_like(torch.ones(3, 2)).shape
80 |     torch.Size([3, 2, 2])
81 |     >>> matrix_eye_like(torch.ones(4, 3, 2)).shape
82 |     torch.Size([4, 3, 2, 2])
83 | 
84 |     """
85 |     feature_dim = x.shape[-1]
86 |     eye = torch.eye(feature_dim)
87 |     if x.dim() == 1:
88 |         return eye
89 |     else:
90 |         return broadcast_to(eye, [*x.shape, feature_dim])
91 | 
92 | 
93 | def batch_tril(x):
94 |     """Apply torch.tril along the minibatch axis."""
95 |     matrix_dims = x.shape[-2:]
96 |     mats = x.reshape((-1, *matrix_dims))
97 |     trils = torch.stack([torch.tril(mat) for mat in mats])
98 |     return trils.reshape(x.shape)
99 | 


--------------------------------------------------------------------------------
/padertorch/summary/__init__.py:
--------------------------------------------------------------------------------
1 | from .tbx_utils import *
2 | from . import tfevents
3 | from .model_info import *
4 | 


--------------------------------------------------------------------------------
/padertorch/summary/model_info.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from torch import nn
 3 | 
 4 | 
 5 | __all__ = [
 6 |     'num_parameters',
 7 | ]
 8 | 
 9 | 
10 | @dataclass(repr=False)
11 | class ModelParameterSize:
12 |     total_count: int = 0
13 |     trainable_count: int = 0
14 |     total_bytes: int = 0
15 |     trainable_bytes: int = 0
16 | 
17 |     def __repr__(self):
18 |         try:
19 |             import humanize
20 |             return (
21 |                 f'{self.__class__.__name__}('
22 |                 f'total_count={humanize.intword(self.total_count)}, '
23 |                 f'trainable_count={humanize.intword(self.trainable_count)}, '
24 |                 f'total_bytes={humanize.naturalsize(self.total_bytes)}, '
25 |                 f'trainable_bytes={humanize.naturalsize(self.trainable_bytes)})'
26 |             )
27 |         except ImportError:
28 |             return (
29 |                 f'{self.__class__.__name__}('
30 |                 f'total_count={self.total_count}, '
31 |                 f'trainable_count={self.trainable_count}, '
32 |                 f'total_bytes={self.total_bytes}, '
33 |                 f'trainable_bytes={self.trainable_bytes})'
34 |             )
35 | 
36 | 
37 | def num_parameters(module: nn.Module) -> ModelParameterSize:
38 |     """Counts the number of parameters for `module`.
39 | 
40 |     Args:
41 |         module: The module to count the number of parameters for
42 | 
43 |     Returns: The total number of parameters and the number of trainable
44 |         parameters.
45 | 
46 |     Examples:
47 |         >>> num_parameters(nn.Linear(10, 10))
48 |         ModelParameterSize(total_count=110, trainable_count=110, total_bytes=440 Bytes, trainable_bytes=440 Bytes)
49 |         >>> net = nn.Sequential(nn.Linear(10, 10).requires_grad_(False), nn.Linear(10, 10))
50 |         >>> num_parameters(net)
51 |         ModelParameterSize(total_count=220, trainable_count=110, total_bytes=880 Bytes, trainable_bytes=440 Bytes)
52 |     """
53 |     result = ModelParameterSize()
54 | 
55 |     for parameter in module.parameters():
56 |         size = parameter.numel()
57 |         bytes = parameter.element_size()
58 | 
59 |         if parameter.requires_grad:
60 |             result.trainable_count += size
61 |             result.trainable_bytes += size * bytes
62 |         result.total_count += size
63 |         result.total_bytes += size * bytes
64 | 
65 |     return result
66 | 


--------------------------------------------------------------------------------
/padertorch/summary/tfevents.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | 
 3 | '''
 4 | Event structure:
 5 | 
 6 | {
 7 |     'wall_time': ...,
 8 |     'step': ...,
 9 |     'summary': 
10 |         'value': [{  # length is 1
11 |             'tag': ...,
12 |             'simple_value': ...,
13 |             'histo': {
14 |                 'min': ...,
15 |                 'max': ...,
16 |                 'num': ...,
17 |                 'sum': ...,
18 |                 'sum_squares': ...,
19 |                 'bucket_llimit': ...,
20 |                 'bucket': ...,
21 |             }    
22 |         }]
23 | }
24 | '''
25 | 
26 | def load_events_as_dict(
27 |         path,
28 |         backend='tbX',
29 | ):
30 |     """
31 | 
32 |     Args:
33 |         path:
34 |             Path to a tfevent file
35 |         backend:
36 |             'tbX' or 'tf'
37 |             Use tensorboardX or tensorflow to load the tfevents file.
38 | 
39 |     Returns:
40 |         generator that yields the events as dict
41 | 
42 |     >>> path = '/net/vol/boeddeker/sacred/torch/am/32/events.out.tfevents.1545605113.ntsim1'
43 |     >>> list(load_events_as_dict(path))[2] # doctest: +SKIP
44 |     {'wall_time': 1545605119.7274427, 'step': 1, 'summary': {'value': [{'tag': 'training/grad_norm', 'simple_value': 0.21423661708831787}]}}
45 |     >>> list(load_events_as_dict(path, backend='tf'))[2] # doctest: +SKIP
46 |     {'wall_time': 1545605119.7274427, 'step': 1, 'summary': {'value': [{'tag': 'training/grad_norm', 'simple_value': 0.21423661708831787}]}}
47 | 
48 |     """
49 |     try:
50 |         # protobuf3-to-dict (PyPI)
51 |         from protobuf_to_dict import protobuf_to_dict
52 |     except NameError as e:
53 |         raise RuntimeError(
54 |             'protobuf3-to-dict is required for load_events_as_dict to work, '
55 |             'but you seem to have installed protobuf-to-dict. You can '
56 |             'install it with:\n  pip install protobuf3-to-dict'
57 |         ) from e
58 | 
59 |     # from google.protobuf.json_format import MessageToDict
60 |     # MessageToDict(e, preserving_proto_field_name=True)
61 |     #   Converts int to str -> Bad behaviour
62 |     if backend == 'tf':
63 |         import tensorflow as tf
64 |         return [
65 |             protobuf_to_dict(e)
66 |             for e in tf.train.summary_iterator(str(path))
67 |         ]
68 |     elif backend == 'tbX':
69 |         from tensorboardX.event_file_writer import event_pb2
70 | 
71 |         def read(fd):
72 |             # Original
73 |             # https://github.com/lanpa/tensorboard-dumper/blob/master/dump.py
74 |             # Remove this code, once
75 |             # https://github.com/lanpa/tensorboardX/issues/318
76 |             # has a solution.
77 |             header_data = fd.read(8)
78 |             if header_data == b'':
79 |                 return None
80 |             header, = struct.unpack('Q', header_data)
81 |             crc_hdr = struct.unpack('I', fd.read(4))
82 |             event_str = fd.read(header)  # 8+4
83 |             crc_ev = struct.unpack('>I', fd.read(4))
84 | 
85 |             event = event_pb2.Event()
86 |             event.ParseFromString(event_str)
87 |             return event
88 | 
89 |         def read_all(path):
90 |             with open(path, 'rb') as fd:
91 |                 event = read(fd)
92 |                 while event is not None:
93 |                     yield protobuf_to_dict(event)
94 |                     event = read(fd)
95 | 
96 |         return read_all(path)
97 |     else:
98 |         raise ValueError(backend)
99 | 


--------------------------------------------------------------------------------
/padertorch/testing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/padertorch/testing/__init__.py


--------------------------------------------------------------------------------
/padertorch/train/__init__.py:
--------------------------------------------------------------------------------
1 | from . import optimizer
2 | from . import trigger
3 | from . import hooks
4 | from . import trainer
5 | from . import runtime_tests
6 | 


--------------------------------------------------------------------------------
/padertorch/train/optimizer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import optim
  3 | 
  4 | 
  5 | class Optimizer:
  6 |     optimizer_cls = None
  7 |     optimizer = None
  8 |     parameters = None
  9 | 
 10 |     def __init__(
 11 |             self, gradient_clipping, **kwargs
 12 |     ):
 13 |         self.gradient_clipping = gradient_clipping
 14 |         self.optimizer_kwargs = kwargs
 15 | 
 16 |     def set_parameters(self, parameters):
 17 |         self.parameters = tuple(parameters)
 18 |         self.optimizer = self.optimizer_cls(
 19 |             self.parameters, **self.optimizer_kwargs
 20 |         )
 21 | 
 22 |     def check_if_set(self):
 23 |         assert self.optimizer is not None, \
 24 |             'The optimizer is not initialized, call set_parameter before' \
 25 |             ' using any of the optimizer functions'
 26 | 
 27 |     def zero_grad(self):
 28 |         self.check_if_set()
 29 |         return self.optimizer.zero_grad()
 30 | 
 31 |     def step(self):
 32 |         self.check_if_set()
 33 |         return self.optimizer.step()
 34 | 
 35 |     def clip_grad(self):
 36 |         self.check_if_set()
 37 |         # Todo: report clipped and unclipped
 38 |         # Todo: allow clip=None but still report grad_norm
 39 |         grad_clips = self.gradient_clipping
 40 |         return torch.nn.utils.clip_grad_norm_(
 41 |             self.parameters, grad_clips
 42 |         )
 43 | 
 44 |     def to(self, device):
 45 |         if device is None:
 46 |             return
 47 |         self.check_if_set()
 48 |         for state in self.optimizer.state.values():
 49 |             for k, v in state.items():
 50 |                 if torch.is_tensor(v):
 51 |                     state[k] = v.to(device)
 52 | 
 53 |     def cpu(self):
 54 |         return self.to('cpu')
 55 | 
 56 |     def cuda(self, device=None):
 57 |         assert device is None or isinstance(device, int), device
 58 |         if device is None:
 59 |             device = torch.device('cuda')
 60 |         return self.to(device)
 61 | 
 62 |     def load_state_dict(self, state_dict):
 63 |         self.check_if_set()
 64 |         return self.optimizer.load_state_dict(state_dict)
 65 | 
 66 |     def state_dict(self):
 67 |         self.check_if_set()
 68 |         return self.optimizer.state_dict()
 69 | 
 70 | 
 71 | class Adam(Optimizer):
 72 |     optimizer_cls = optim.Adam
 73 | 
 74 |     def __init__(
 75 |             self,
 76 |             gradient_clipping=1e10,
 77 |             lr=1e-3,
 78 |             betas=(0.9, 0.999),
 79 |             eps=1e-8,
 80 |             weight_decay=0,
 81 |             amsgrad=False
 82 |     ):
 83 |         super().__init__(
 84 |             gradient_clipping,
 85 |             lr=lr,
 86 |             betas=betas,
 87 |             eps=eps,
 88 |             weight_decay=weight_decay,
 89 |             amsgrad=amsgrad
 90 |         )
 91 | 
 92 | 
 93 | class Adadelta(Optimizer):
 94 |     optimizer_cls = optim.Adadelta
 95 | 
 96 |     def __init__(
 97 |             self,
 98 |             gradient_clipping=1e10,
 99 |             lr=1.0,
100 |             rho=0.9,
101 |             eps=1e-6,
102 |             weight_decay=0
103 |     ):
104 |         super().__init__(
105 |             gradient_clipping,
106 |             lr=lr,
107 |             rho=rho,
108 |             eps=eps,
109 |             weight_decay=weight_decay,
110 |         )
111 | 
112 | 
113 | class SGD(Optimizer):
114 |     optimizer_cls = optim.SGD
115 | 
116 |     def __init__(
117 |             self,
118 |             gradient_clipping=1e10,
119 |             lr=1e-3,
120 |             momentum=0,
121 |             dampening=0,
122 |             weight_decay=0,
123 |             nesterov=False
124 |     ):
125 |         super().__init__(
126 |             gradient_clipping,
127 |             lr=lr,
128 |             momentum=momentum,
129 |             dampening=dampening,
130 |             weight_decay=weight_decay,
131 |             nesterov=nesterov
132 |         )
133 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "Cython", "numpy", "scipy"]
3 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | addopts =
 3 |     --ignore "padertorch/modules/wavenet/nv_wavenet"
 4 |     --ignore "padertorch/contrib"
 5 |     -m "not matlab"
 6 |     --doctest-modules
 7 |     --doctest-continue-on-failure
 8 |     --junitxml=junit/test-results.xml
 9 |     --cov=padertorch
10 |     --cov-report=xml
11 |     --cov-report=html
12 | 
13 | markers =
14 |     matlab: marks matlab tests, they are slow (deselect with '-m "not matlab"')
15 |     torch: marks (py)torch tests, import torch fails in the moment on azure (deselect with '-m "not torch"')
16 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/__init__.py


--------------------------------------------------------------------------------
/tests/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/contrib/__init__.py


--------------------------------------------------------------------------------
/tests/test_configurable.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | 
  5 | import paderbox as pb
  6 | import padertorch as pt
  7 | 
  8 | 
  9 | def foo(b=1, c=2):
 10 |     pass
 11 | 
 12 | 
 13 | def bar(a, b=3, d=4):
 14 |     pass
 15 | 
 16 | 
 17 | class A(pt.configurable.Configurable):
 18 |     @classmethod
 19 |     def finalize_dogmatic_config(cls, config):
 20 |         config['e'] = {
 21 |             'factory': foo,
 22 |             'b': 5
 23 |         }
 24 |         if config['e']['factory'] == foo:
 25 |             cfg_e = config['e']
 26 |             cfg_e['c'] = 6
 27 |         elif config['e']['factory'] == bar:
 28 |             config['e']['d'] = 7
 29 |         else:
 30 |             raise ValueError(config['e']['factory'])
 31 |         return config
 32 | 
 33 |     def __init__(self, e, f=0):
 34 |         pass
 35 | 
 36 | 
 37 | class Test:
 38 |     def test_(self):
 39 |         config = A.get_config()
 40 |         expect = {
 41 |             'factory': 'tests.test_configurable.A',
 42 |             'f': 0,
 43 |             'e': {
 44 |                 'factory': 'tests.test_configurable.foo',
 45 |                 'b': 5,
 46 |                 'c': 6
 47 |             }
 48 |         }
 49 |         assert config == expect
 50 | 
 51 |         with np.testing.assert_raises_regex(Exception, "missing keys: {'a'}"):
 52 |             config = A.get_config({'e': {'factory': bar}})
 53 | 
 54 |         config = A.get_config({'e': {'factory': bar, 'a': 10}})
 55 |         expect = {
 56 |             'factory': 'tests.test_configurable.A',
 57 |             'f': 0,
 58 |             'e': {
 59 |                 'factory': 'tests.test_configurable.bar',
 60 |                 'b': 5,
 61 |                 'd': 7,
 62 |                 'a': 10
 63 |             }
 64 |         }
 65 |         assert config == expect
 66 | 
 67 |         config = A.get_config({'e': {'factory': bar, 'a': 10}})
 68 |         expect = {
 69 |             'factory': 'tests.test_configurable.A',
 70 |             'f': 0,
 71 |             'e': {
 72 |                 'factory': 'tests.test_configurable.bar',
 73 |                 'b': 5,
 74 |                 'd': 7,
 75 |                 'a': 10
 76 |             }
 77 |         }
 78 |         assert config == expect
 79 | 
 80 | 
 81 | class B(pt.Configurable):
 82 | 
 83 |     @classmethod
 84 |     def finalize_dogmatic_config(cls, config):
 85 |         config['a'] = 1
 86 |         config['b'] = 2  # Should raise an Exception
 87 | 
 88 |     def __init__(self, a):
 89 |         pass
 90 | 
 91 | 
 92 | def test_wrong_finalize_dogmatic_config():
 93 | 
 94 |     with pytest.raises(Exception) as exc_info:
 95 |         B.get_config()
 96 | 
 97 |     pb.testing.assert_doctest_like_equal(
 98 |         """
 99 | Tried to set an unexpected keyword argument for <class 'tests.test_configurable.B'> in finalize_dogmatic_config.
100 | See details below and stacktrace above.
101 | <BLANKLINE>
102 | Too many keywords for the factory <class 'tests.test_configurable.B'>.
103 | Redundant keys: {'b'}
104 | Signature: (a)
105 | Current config with fallbacks:
106 | NestedChainMap({'factory': tests.test_configurable.B}, {'a': 1, 'b': 2})
107 |         """.strip(),
108 |         str(exc_info.value)
109 |     )
110 | 
111 |     with pytest.raises(Exception) as exc_info:
112 |         B.get_config(updates={'C': 3})
113 | 
114 |         pb.testing.assert_doctest_like_equal(
115 |             """
116 | padertorch.Configurable.get_config(updates=...) got an unexpected keyword argument in updates for <class 'tests.test_configurable.B'>.
117 | See details below.
118 | <BLANKLINE>
119 | Too many keywords for the factory <class 'tests.test_configurable.B'>.
120 | Redundant keys: {'C'}
121 | Signature: (a)
122 | Current config with fallbacks:
123 | NestedChainMap({'C': 3, 'factory': tests.test_configurable.B}, {})
124 |             """.strip(),
125 |             str(exc_info.value)
126 |         )
127 | 


--------------------------------------------------------------------------------
/tests/test_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/test_models/__init__.py


--------------------------------------------------------------------------------
/tests/test_modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/test_modules/__init__.py


--------------------------------------------------------------------------------
/tests/test_modules/test_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from padertorch.ops.sequence.mask import compute_mask
 3 | from padertorch.modules.normalization import normalize
 4 | import paderbox.testing as tc
 5 | 
 6 | 
 7 | def normalize_ref(x, gamma, beta, statistics_axis, batch_axis, sequence_axis, seq_len, shift, scale, eps):
 8 |         # compute mask
 9 |         if seq_len is not None:
10 |             mask = compute_mask(x, seq_len, batch_axis, sequence_axis)
11 |         else:
12 |             mask = torch.ones_like(x)
13 | 
14 |         # compute statistics
15 |         n_values = mask.sum(dim=statistics_axis, keepdim=True)
16 |         x = x * mask
17 |         mean = x.sum(dim=statistics_axis, keepdim=True) / torch.max(n_values, torch.ones_like(n_values))
18 |         power = (x ** 2).sum(dim=statistics_axis, keepdim=True) / torch.max(n_values, torch.ones_like(n_values))
19 |         y = x
20 |         if shift:
21 |             y = y - mean
22 |             power_scale = power - mean**2
23 |         else:
24 |             power_scale = power
25 |         power_scale = torch.clamp(power_scale, min=0.)
26 |         if scale:
27 |             y = y / torch.sqrt(power_scale + eps)
28 | 
29 |         if gamma is not None:
30 |             assert gamma.dim() == x.dim(), gamma.shape
31 |             y = y * gamma
32 |         if beta is not None:
33 |             assert beta.dim() == x.dim(), beta.shape
34 |             y = y + beta
35 |         return y*mask, mean, power, n_values
36 | 
37 | 
38 | def test_outputs_and_grads():
39 |     x = torch.randn((2, 3, 5), requires_grad=True)
40 |     gamma = 1+torch.randn((1, 3, 1))
41 |     gamma.requires_grad = True
42 |     beta = torch.randn((1, 3, 1), requires_grad=True)
43 |     seq_len = [5, 3]
44 |     x_ref = x.clone().detach()
45 |     x_ref.requires_grad = True
46 |     gamma_ref = gamma.clone().detach()
47 |     gamma_ref.requires_grad = True
48 |     beta_ref = beta.clone().detach()
49 |     beta_ref.requires_grad = True
50 | 
51 |     for shift in [True, False]:
52 |         for scale in [True, False]:
53 |             if x.grad is not None:
54 |                 x.grad.zero_()
55 |                 x_ref.grad.zero_()
56 |                 gamma.grad.zero_()
57 |                 gamma_ref.grad.zero_()
58 |                 beta.grad.zero_()
59 |                 beta_ref.grad.zero_()
60 |             outs = normalize(x, gamma, beta, [0, 2], 0, 2, seq_len, shift, scale, 1e-3)
61 |             y = outs[0]
62 |             (y[0, [0, 1]] - y[0, 2]).sum().backward()
63 |             outs_ref = normalize_ref(x_ref, gamma_ref, beta_ref, [0, 2], 0, 2, seq_len, shift, scale, 1e-3)
64 |             y_ref = outs_ref[0]
65 |             (y_ref[0, [0, 1]] - y_ref[0, 2]).sum().backward()
66 | 
67 |             for out, out_ref in zip(outs, outs_ref):
68 |                 tc.assert_array_almost_equal(out.detach().numpy(), out_ref.detach().numpy())
69 |             tc.assert_array_almost_equal(x.grad.numpy(), x_ref.grad.numpy(), decimal=4)
70 |             tc.assert_array_almost_equal(gamma.grad.numpy(), gamma_ref.grad.numpy(), decimal=4)
71 |             tc.assert_array_almost_equal(beta.grad.numpy(), beta_ref.grad.numpy(), decimal=4)
72 | 


--------------------------------------------------------------------------------
/tests/test_ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fgnt/padertorch/abfca00d7f0393f7c5e5c3a08819a7fca99dec54/tests/test_ops/__init__.py


--------------------------------------------------------------------------------
/tests/test_ops/test_sequence.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import padertorch as pts
 3 | import numpy as np
 4 | import torch
 5 | from torch.nn.utils.rnn import PackedSequence
 6 | 
 7 | 
 8 | class TestPackModule(unittest.TestCase):
 9 |     def setUp(self):
10 |         self.sequence = [torch.zeros(5, 3), torch.ones(4, 3)]
11 | 
12 |         self.padded = torch.zeros(5, 2, 3)
13 |         self.padded[:4, 1, :] = torch.ones(4, 3)
14 | 
15 |         self.lengths = torch.LongTensor([5, 4])
16 | 
17 |         self.packed = PackedSequence(
18 |             torch.stack([self.sequence[0][0],
19 |                          self.sequence[1][0],
20 |                          self.sequence[0][1],
21 |                          self.sequence[1][1],
22 |                          self.sequence[0][2],
23 |                          self.sequence[1][2],
24 |                          self.sequence[0][3],
25 |                          self.sequence[1][3],
26 |                          self.sequence[0][4],
27 |                          ], dim=0),
28 |             torch.LongTensor(4 * [2] + 1 * [1])
29 |         )
30 | 
31 |     def test_pack_sequence(self):
32 |         actual = pts.ops.pack_sequence(self.sequence)
33 |         assert isinstance(actual, type(self.packed))
34 |         np.testing.assert_equal(
35 |             actual[0].data.numpy(),
36 |             self.packed.data.numpy(),
37 |         )
38 | 
39 |     def test_unpack_sequence(self):
40 |         actual = pts.ops.unpack_sequence(self.packed)
41 |         assert isinstance(actual, type(self.sequence))
42 |         for actual_, reference_ in zip(actual, self.sequence):
43 |             np.testing.assert_equal(actual_.numpy(), reference_.numpy())
44 | 
45 |     def test_pad_sequence(self):
46 |         actual = pts.ops.pad_sequence(self.sequence)
47 |         assert isinstance(actual, type(self.padded))
48 |         np.testing.assert_equal(actual.numpy(), self.padded.numpy())
49 | 
50 |     def test_unpad_sequence(self):
51 |         actual = pts.ops.unpad_sequence(self.padded, self.lengths)
52 |         assert isinstance(actual, type(self.sequence))
53 |         for actual_, reference_ in zip(actual, self.sequence):
54 |             np.testing.assert_equal(actual_.numpy(), reference_.numpy())
55 | 
56 |     def test_pad_packed_sequence(self):
57 |         actual, lengths = pts.ops.pad_packed_sequence(self.packed)
58 |         assert isinstance(actual, type(self.padded))
59 |         np.testing.assert_equal(lengths.numpy(), self.lengths.numpy())
60 |         np.testing.assert_equal(actual.numpy(), self.padded.numpy())
61 | 
62 |     def test_pack_padded_sequence(self):
63 |         actual = pts.ops.pack_padded_sequence(self.padded, self.lengths)
64 |         assert isinstance(actual, type(self.packed))
65 |         np.testing.assert_equal(actual.data.numpy(), self.packed.data.numpy())
66 | 


--------------------------------------------------------------------------------
/tests/test_summary/test_tbx_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from padertorch.summary.tbx_utils import audio
 5 | 
 6 | 
 7 | def test_audio():
 8 |     # A CPU tensor and a numpy array share the data.
 9 |     # Verify that the input is not changed.
10 |     
11 |     # Test normalization
12 |     tensor = torch.ones((16000,))
13 |     array, _ = audio(tensor)
14 |     np.testing.assert_allclose(tensor.numpy(), 1)
15 |     np.testing.assert_allclose(array, 0.95)
16 | 
17 |     # Test zero signal
18 |     tensor = torch.zeros((16000,))
19 |     array, _ = audio(tensor)
20 |     np.testing.assert_allclose(tensor.numpy(), 0)
21 |     np.testing.assert_allclose(array, 0)
22 | 


--------------------------------------------------------------------------------
/tests/test_train/test_optimizer.py:
--------------------------------------------------------------------------------
 1 | import padertorch as pt
 2 | import torch
 3 | 
 4 | 
 5 | def test_frad_norm():
 6 |     lin = torch.nn.Linear(16, 8)
 7 |     opti = pt.optimizer.Adam()
 8 |     opti.set_parameters(lin.parameters())
 9 |     opti.zero_grad()
10 |     l = lin.weight.sum()
11 |     l.backward()
12 |     grad_norm = opti.clip_grad()
13 |     grad_norm_ref = torch.nn.utils.clip_grad_norm_(
14 |         lin.parameters(), 10.
15 |     )
16 |     assert grad_norm == grad_norm_ref and grad_norm_ref > 0., \
17 |         (grad_norm, grad_norm_ref)
18 | 


--------------------------------------------------------------------------------