├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    ├── environment-ci.yml
    └── workflows
    │   ├── ci.yml
    │   └── publish.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── ISSUE_TEMPLATE.md
├── LICENSE.md
├── README.md
├── docs
    ├── Makefile
    ├── api.rst
    ├── changes.rst
    ├── conf.py
    ├── index.rst
    └── requirements.txt
├── pumpp
    ├── __init__.py
    ├── base.py
    ├── core.py
    ├── exceptions.py
    ├── feature
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── base.py
    │   ├── cqt.py
    │   ├── fft.py
    │   ├── mel.py
    │   ├── rhythm.py
    │   └── time.py
    ├── labels.py
    ├── sampler.py
    ├── task
    │   ├── __init__.py
    │   ├── base.py
    │   ├── beat.py
    │   ├── chord.py
    │   ├── key.py
    │   ├── regression.py
    │   ├── structure.py
    │   └── tags.py
    └── version.py
├── pyproject.toml
├── setup.cfg
├── setup.py
└── tests
    ├── data
        ├── test.jams
        └── test.ogg
    ├── test_core.py
    ├── test_decode.py
    ├── test_feature.py
    ├── test_misc.py
    ├── test_sampler.py
    ├── test_task.py
    └── test_utils.py


/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thanks for contributing a pull request! Please ensure you have taken a look at
 3 | the contribution guidelines: https://github.com/bmcfee/pumpp/blob/master/CONTRIBUTING.md#how-to-contribute
 4 | -->
 5 | #### Reference Issue
 6 | <!-- Example: Fixes #123 -->
 7 | 
 8 | 
 9 | #### What does this implement/fix? Explain your changes.
10 | 
11 | 
12 | #### Any other comments?
13 | 
14 | 


--------------------------------------------------------------------------------
/.github/environment-ci.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   # required
 7 |   - pip
 8 |   - librosa
 9 |   - keras>=2.6
10 |   - tensorflow>=2.0
11 |   - scikit-learn>=0.20
12 |   # optional, but required for testing
13 |   - pytest-mpl
14 |   - pytest-cov
15 |   - pytest-faulthandler
16 |   - pytest
17 |   - contextlib2
18 |   - coverage
19 |   - pip:
20 |     - soxr
21 |     - samplerate
22 |     - jams>=0.3
23 |     - mir_eval>=0.5
24 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |     - main
  7 |   pull_request:
  8 |     branches:
  9 |     - main
 10 | 
 11 | concurrency:
 12 |     group: ${{ github.workflow }}-${{ github.ref }}
 13 |     cancel-in-progress: True
 14 | 
 15 | jobs:
 16 |     test:
 17 |         name: "Python ${{ matrix.python-version }} on ${{ matrix.os }}"
 18 |         runs-on: ${{ matrix.os }}
 19 | 
 20 |         strategy:
 21 |             fail-fast: false
 22 |             matrix:
 23 |                 include:
 24 |                     - os: ubuntu-latest
 25 |                       python-version: "3.7"
 26 |                       channel-priority: "strict"
 27 |                       envfile: ".github/environment-ci.yml"
 28 | 
 29 |                     - os: ubuntu-latest
 30 |                       python-version: "3.8"
 31 |                       channel-priority: "strict"
 32 |                       envfile: ".github/environment-ci.yml"
 33 | 
 34 |                     - os: ubuntu-latest
 35 |                       python-version: "3.9"
 36 |                       channel-priority: "strict"
 37 |                       envfile: ".github/environment-ci.yml"
 38 | 
 39 |         steps:
 40 |         - uses: actions/checkout@v2
 41 |           with:
 42 |             submodules: true
 43 | 
 44 |         - name: Install OS dependencies
 45 |           shell: bash -l {0}
 46 |           run: |
 47 |             case "${{ runner.os }}" in
 48 |             Linux)
 49 |               sudo apt-get update -yy
 50 |               sudo apt-get install -yy  libsamplerate0
 51 |               ;; 
 52 |             macOS)
 53 |               brew install libsamplerate
 54 |               ;;
 55 |             esac
 56 | 
 57 |         - name: Cache conda
 58 |           uses: actions/cache@v2
 59 |           env:
 60 |             # Increase this value to reset cache if etc/example-environment.yml has not changed
 61 |             CACHE_NUMBER: 1
 62 |           with:
 63 |             path: ~/conda_pkgs_dir
 64 |             key: ${{ runner.os }}-${{ matrix.python-version }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles( matrix.envfile ) }}
 65 | 
 66 |         - name: Install Conda environment
 67 |           uses: conda-incubator/setup-miniconda@v2
 68 |           with:
 69 |             auto-update-conda: true
 70 |             python-version: ${{ matrix.python-version }}
 71 |             add-pip-as-python-dependency: true
 72 |             auto-activate-base: false
 73 |             activate-environment: test
 74 |             channel-priority: ${{ matrix.channel-priority }}
 75 |             environment-file: ${{ matrix.envfile }}
 76 |             use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
 77 | 
 78 |         - name: Conda info
 79 |           shell: bash -l {0}
 80 |           run: |
 81 |             conda info -a
 82 |             conda list
 83 | 
 84 |         - name: Install pumpp
 85 |           shell: bash -l {0}
 86 |           run: python -m pip install --upgrade-strategy only-if-needed -e .[tests]
 87 | 
 88 |         - name: Run pytest
 89 |           shell: bash -l {0}
 90 |           run: pytest
 91 | 
 92 |         - name: Upload coverage to Codecov
 93 |           uses: codecov/codecov-action@v1
 94 |           with:
 95 |             token: ${{ secrets.CODECOV_TOKEN }}
 96 |             files: ./coverage.xml
 97 |             directory: ./coverage/reports/
 98 |             flags: unittests
 99 |             env_vars: OS,PYTHON
100 |             name: codecov-umbrella
101 |             fail_ci_if_error: true
102 |             path_to_write_report: ./coverage/codecov_report.txt
103 |             verbose: true
104 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI
 2 | 
 3 | on: 
 4 |   release:
 5 |     types: [created] 
 6 | 
 7 | jobs:
 8 |   build-n-publish:
 9 |     name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |     - uses: actions/checkout@master
14 |     - name: Set up Python 3.7
15 |       uses: actions/setup-python@v1
16 |       with:
17 |         python-version: 3.7
18 | 
19 |     - name: Install pypa/build
20 |       run: >-
21 |         python -m
22 |         pip install
23 |         build
24 |         --user
25 |     - name: Build a binary wheel and a source tarball
26 |       run: >-
27 |         python -m
28 |         build
29 |         --sdist
30 |         --wheel
31 |         --outdir dist/
32 |         .
33 |     - name: Publish distribution 📦 to Test PyPI
34 |       uses: pypa/gh-action-pypi-publish@master
35 |       with:
36 |         password: ${{ secrets.TEST_PYPI_API_TOKEN }}
37 |         repository_url: https://test.pypi.org/legacy/
38 |     - name: Publish distribution 📦 to PyPI
39 |       if: startsWith(github.ref, 'refs/tags')
40 |       uses: pypa/gh-action-pypi-publish@master
41 |       with:
42 |         password: ${{ secrets.PYPI_API_TOKEN }}
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | docs/_*
66 | docs/generated/
67 | 
68 | # PyBuilder
69 | target/
70 | 
71 | # IPython Notebook
72 | .ipynb_checkpoints
73 | 
74 | # pyenv
75 | .python-version
76 | 
77 | # celery beat schedule file
78 | celerybeat-schedule
79 | 
80 | # dotenv
81 | .env
82 | 
83 | # virtualenv
84 | venv/
85 | ENV/
86 | 
87 | # Spyder project settings
88 | .spyderproject
89 | 
90 | # Rope project settings
91 | .ropeproject
92 | 
93 | # VS Code
94 | .vscode
95 | 
96 | # pytest
97 | .pytest_cache
98 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at brian.mcfee@nyu.edu. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | 
  2 | Contributing code
  3 | =================
  4 | 
  5 | How to contribute
  6 | -----------------
  7 | 
  8 | The preferred way to contribute to pumpp is to fork the 
  9 | [main repository](http://github.com/bmcfee/pumpp/) on
 10 | GitHub:
 11 | 
 12 | 1. Fork the [project repository](http://github.com/bmcfee/pumpp):
 13 |    click on the 'Fork' button near the top of the page. This creates
 14 |    a copy of the code under your account on the GitHub server.
 15 | 
 16 | 2. Clone this copy to your local disk:
 17 | 
 18 |           $ git clone git@github.com:YourLogin/pumpp.git
 19 |           $ cd pumpp
 20 | 
 21 | 3. Create a branch to hold your changes:
 22 | 
 23 |           $ git checkout -b my-feature
 24 | 
 25 |    and start making changes. Never work in the ``master`` branch!
 26 | 
 27 | 4. Work on this copy on your computer using Git to do the version
 28 |    control. When you're done editing, do:
 29 | 
 30 |           $ git add modified_files
 31 |           $ git commit
 32 | 
 33 |    to record your changes in Git, then push them to GitHub with:
 34 | 
 35 |           $ git push -u origin my-feature
 36 | 
 37 | Finally, go to the web page of the your fork of the pumpp repo,
 38 | and click 'Pull request' to send your changes to the maintainers for
 39 | review. This will send an email to the committers.
 40 | 
 41 | (If any of the above seems like magic to you, then look up the 
 42 | [Git documentation](http://git-scm.com/documentation) on the web.)
 43 | 
 44 | It is recommended to check that your contribution complies with the
 45 | following rules before submitting a pull request:
 46 | 
 47 | -  All public methods should have informative docstrings with sample
 48 |    usage presented.
 49 | 
 50 | You can also check for common programming errors with the following
 51 | tools:
 52 | 
 53 | -  Code with good unittest coverage (at least 80%), check with:
 54 | 
 55 |           $ pip install pytest pytest-cov
 56 |           $ pytest
 57 | 
 58 | -  No pyflakes warnings, check with:
 59 | 
 60 |            $ pip install pyflakes
 61 |            $ pyflakes path/to/module.py
 62 | 
 63 | -  No PEP8 warnings, check with:
 64 | 
 65 |            $ pip install pep8
 66 |            $ pep8 path/to/module.py
 67 | 
 68 | -  AutoPEP8 can help you fix some of the easy redundant errors:
 69 | 
 70 |            $ pip install autopep8
 71 |            $ autopep8 path/to/pep8.py
 72 | 
 73 | Filing bugs
 74 | -----------
 75 | We use Github issues to track all bugs and feature requests; feel free to
 76 | open an issue if you have found a bug or wish to see a feature implemented.
 77 | 
 78 | It is recommended to check that your issue complies with the
 79 | following rules before submitting:
 80 | 
 81 | -  Verify that your issue is not being currently addressed by other
 82 |    [issues](https://github.com/bmcfee/pumpp/issues?q=)
 83 |    or [pull requests](https://github.com/bmcfee/pumpp/pulls?q=).
 84 | 
 85 | -  Please ensure all code snippets and error messages are formatted in
 86 |    appropriate code blocks.
 87 |    See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks).
 88 | 
 89 | -  Please include your operating system type and version number, as well
 90 |    as your Python, scikit-learn, numpy, and scipy versions. This information
 91 |    can be found by runnning the following code snippet:
 92 | 
 93 |   ```python
 94 |   import platform; print(platform.platform())
 95 |   import sys; print("Python", sys.version)
 96 |   import numpy; print("NumPy", numpy.__version__)
 97 |   import scipy; print("SciPy", scipy.__version__)
 98 |   import sklearn; print("sklearn", sklearn.__version__)
 99 |   import librosa; print("librosa", librosa.__version__)
100 |   import jams; print("jams", jams.__version__)
101 |   import mir_eval; print("mir_eval", mir_eval.__version__)
102 |   ```
103 | 
104 | Documentation
105 | -------------
106 | 
107 | You can edit the documentation using any text editor and then generate
108 | the HTML output by typing ``make html`` from the docs/ directory.
109 | The resulting HTML files will be placed in _build/html/ and are viewable 
110 | in a web browser. See the README file in the doc/ directory for more information.
111 | 
112 | For building the documentation, you will need 
113 | [sphinx](http://sphinx.pocoo.org/),
114 | [matplotlib](http://matplotlib.sourceforge.net/), and [numpydoc](https://pypi.python.org/pypi/numpydoc).
115 | 
116 | Note
117 | ----
118 | This document was gleefully borrowed from [scikit-learn](http://scikit-learn.org/).
119 | 


--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- Instructions For Filing a Bug: https://github.com/bmcfee/pumpp/blob/master/CONTRIBUTING.md#filing-bugs -->
 2 | 
 3 | #### Description
 4 | <!-- Example: Error when computing spectral_contrast on a single frame-->
 5 | 
 6 | #### Steps/Code to Reproduce
 7 | <!--
 8 | If the code is too long, feel free to put it in a public gist and link
 9 | it in the issue: https://gist.github.com
10 | -->
11 | 
12 | #### Expected Results
13 | <!-- Please describe what you expect your example code to do-->
14 | 
15 | #### Actual Results
16 | <!-- Please paste or specifically describe the actual output or traceback. -->
17 | 
18 | #### Versions
19 | <!--
20 | Please run the following snippet and paste the output below.
21 | import platform; print(platform.platform())
22 | import sys; print("Python", sys.version)
23 | import numpy; print("NumPy", numpy.__version__)
24 | import scipy; print("SciPy", scipy.__version__)
25 | import librosa; print("librosa", librosa.__version__)
26 | -->
27 | 
28 | 
29 | <!-- Thanks for contributing! -->
30 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2016, 2017-, Brian McFee
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pumpp
 2 | [![GitHub license](https://img.shields.io/badge/license-ISC-blue.svg)](https://raw.githubusercontent.com/bmcfee/pumpp/master/LICENSE)
 3 | [![CI](https://github.com/bmcfee/pumpp/actions/workflows/ci.yml/badge.svg)](https://github.com/bmcfee/pumpp/actions/workflows/ci.yml)
 4 | [![codecov](https://codecov.io/gh/bmcfee/pumpp/branch/main/graph/badge.svg?token=i7YxIwweBQ)](https://codecov.io/gh/bmcfee/pumpp)
 5 | [![Documentation Status](http://readthedocs.org/projects/pumpp/badge/?version=latest)](http://pumpp.readthedocs.io/en/latest/?badge=latest)
 6 | 
 7 | 
 8 | 
 9 | practically universal music pre-processor
10 | 
11 | ### pumpp up the jams
12 | 
13 | The goal of this package is to make it easy to convert pairs of `(audio, jams)` into data that can
14 | be easily consumed by statistical algorithms.  Some desired features:
15 | 
16 | - Converting tags to sparse encoding vectors
17 | - Sampling `(start, end, label)` to frame-level annotations at a specific frame rate
18 | - Extracting input features (eg, Mel spectra or CQT) from audio
19 | - Converting between annotation spaces for a given task
20 | 
21 | ## Example usage
22 | 
23 | ```python
24 | 
25 | >>> import jams
26 | >>> import pumpp
27 | 
28 | >>> audio_f = '/path/to/audio/myfile.ogg'
29 | >>> jams_f = '/path/to/annotations/myfile.jamz'
30 | 
31 | >>> # Set up sampling and frame rate parameters
32 | >>> sr, hop_length = 44100, 512
33 | 
34 | >>> # Create a feature extraction object
35 | >>> p_cqt = pumpp.feature.CQT(name='cqt', sr=sr, hop_length=hop_length)
36 | 
37 | >>> # Create some annotation extractors
38 | >>> p_beat = pumpp.task.BeatTransformer(sr=sr, hop_length=hop_length)
39 | >>> p_chord = pumpp.task.SimpleChordTransformer(sr=sr, hop_length=hop_length)
40 | 
41 | >>> # Collect the operators in a pump
42 | >>> pump = pumpp.Pump(p_cqt, p_beat, p_chord)
43 | 
44 | >>> # Apply the extractors to generate training data
45 | >>> data = pump(audio_f=audio_f, jam=jams_fjams_f)
46 | 
47 | >>> # Or test data
48 | >>> test_data = pump(audio_f='/my/test/audio.ogg')
49 | 
50 | >>> # Or in-memory
51 | >>> y, sr = librosa.load(audio_f)
52 | >>> test_data = pump(y=y, sr=sr)
53 | ```
54 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 	@echo "  dummy      to check syntax errors of document sources"
 51 | 
 52 | .PHONY: clean
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | .PHONY: html
 57 | html:
 58 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pumpp.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pumpp.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/pumpp"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pumpp"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api:
 2 | 
 3 | .. module:: pump
 4 | 
 5 | .. automodule:: pumpp.core
 6 |     :members:
 7 |     :noindex:
 8 | 
 9 | .. automodule:: pumpp.feature
10 |     :members:
11 | 
12 | .. automodule:: pumpp.task
13 |     :members:
14 | 
15 | .. automodule:: pumpp.sampler
16 |     :members:
17 |     :special-members:
18 |     :noindex:
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/changes.rst:
--------------------------------------------------------------------------------
  1 | Changes
  2 | -------
  3 | 
  4 | 0.6.0
  5 | =====
  6 | - `#136`_ Fixed a bug in serialization of objects with randomstate
  7 | - `#135`_ Fixed deprecation warnings for numpy 1.20 and librosa 0.9
  8 | - `#134`_ Added tensorflow-keras layer constructors
  9 | - `#133`_ Fixed a bug in operator maps with missing keys
 10 | - `#131`_ Update to modern dependencies (tensorflow 2, librosa 0.8+).  Dropped support for python <
 11 |   3.6.
 12 | - `#128`_ Avoid redundant computation of features
 13 | - `#117`_ Added default names for transformations
 14 | 
 15 | .. _#136: https://github.com/bmcfee/pumpp/pull/136
 16 | .. _#135: https://github.com/bmcfee/pumpp/pull/135
 17 | .. _#134: https://github.com/bmcfee/pumpp/pull/134
 18 | .. _#133: https://github.com/bmcfee/pumpp/pull/133
 19 | .. _#131: https://github.com/bmcfee/pumpp/pull/131
 20 | .. _#128: https://github.com/bmcfee/pumpp/pull/128
 21 | .. _#117: https://github.com/bmcfee/pumpp/pull/117
 22 | 
 23 | 
 24 | 0.5.0
 25 | =====
 26 | - `#105`_ Expanding API for layer construction to eventually support alternative frameworks.
 27 | - `#104`_ Added API for explicit data types in feature modules.
 28 | - `#103`_ Added quantization support for feature modules.
 29 | - `#106`_ Dropped support for python 2.7.
 30 | 
 31 | .. _#106: https://github.com/bmcfee/pumpp/pull/106
 32 | .. _#103: https://github.com/bmcfee/pumpp/pull/103
 33 | .. _#104: https://github.com/bmcfee/pumpp/pull/104
 34 | .. _#105: https://github.com/bmcfee/pumpp/pull/105
 35 | 
 36 | 0.4.0
 37 | =====
 38 | - `#100`_ Added viterbi decoding options for tags, chords, and beat transformers
 39 | - `#99`_ Updated test suite
 40 | 
 41 | .. _#100: https://github.com/bmcfee/pumpp/pull/100
 42 | .. _#99: https://github.com/bmcfee/pumpp/pull/99
 43 | 
 44 | 0.3.3
 45 | =====
 46 | - `#95`_ Data durations are now checked before sampling
 47 | 
 48 | .. _#95: https://github.com/bmcfee/pumpp/pull/95
 49 | 
 50 | 0.3.2
 51 | =====
 52 | - `#91`_ JAMS annotations are now populated with confidence fields
 53 | - `#92`_ Pump objects can pretty-print in jupyter notebooks
 54 | 
 55 | .. _#91: https://github.com/bmcfee/pumpp/pull/91
 56 | .. _#92: https://github.com/bmcfee/pumpp/pull/92
 57 | 
 58 | 0.3.1
 59 | =====
 60 | - `#88`_ Added time-position coding
 61 | - `#87`_ Added variable-length sampler
 62 | 
 63 | .. _#88: https://github.com/bmcfee/pumpp/pull/88
 64 | .. _#87: https://github.com/bmcfee/pumpp/pull/87
 65 | 
 66 | 0.3.0
 67 | =====
 68 | - `#85`_ Fixed a bug in BeatPosition transforms
 69 | - `#84`_ Fixed a bug in the documentation build on readthedocs
 70 | - `#83`_ Fixed an off-by-one error in sampler
 71 | - `#81`_ Support multiple time-like dimensions in sampling
 72 | - `#80`_ Added `crop=` parameter to `Pump.transform`, which can slice temporal data down to a common duration.
 73 | 
 74 | .. _#85: https://github.com/bmcfee/pumpp/pull/85
 75 | .. _#84: https://github.com/bmcfee/pumpp/pull/84
 76 | .. _#83: https://github.com/bmcfee/pumpp/pull/83
 77 | .. _#81: https://github.com/bmcfee/pumpp/pull/81
 78 | .. _#80: https://github.com/bmcfee/pumpp/pull/80
 79 | 
 80 | 0.2.4
 81 | =====
 82 | - `#76`_ Implemented the beat-position task
 83 | 
 84 | .. _#76: https://github.com/bmcfee/pumpp/pull/76
 85 | 
 86 | 
 87 | 0.2.3
 88 | =====
 89 | - `#74`_ Implemented segmentation agreement task
 90 | 
 91 | .. _#74: https://github.com/bmcfee/pumpp/pull/74
 92 | 
 93 | 
 94 | 0.2.2
 95 | =====
 96 | 
 97 | - `#70`_ Future-proofing against jams 0.3
 98 | 
 99 | .. _#70: https://github.com/bmcfee/pumpp/pull/70
100 | 
101 | 0.2.1
102 | =====
103 | 
104 | - `#68`_ Fixed a frame alignment error in task transformers
105 | - `#66`_ Remove warnings for improperly cast STFT data
106 | 
107 | .. _#68: https://github.com/bmcfee/pumpp/pull/68
108 | .. _#66: https://github.com/bmcfee/pumpp/pull/66
109 | 
110 | 0.2.0
111 | =====
112 | - `#65`_ Removed old-style (function) transform API in favor of object interface
113 | - `#65`_ Support in-memory analysis
114 | 
115 | .. _#65: https://github.com/bmcfee/pumpp/pull/65
116 | 
117 | 0.1.5
118 | =====
119 | - `#61`_ Fixed an alignment bug in feature extractors
120 | 
121 | .. _#61: https://github.com/bmcfee/pumpp/pull/61
122 | 
123 | 0.1.4
124 | =====
125 | - `#59`_ harmonic CQT
126 | - `#58`_ Sparse chord output for chord labels
127 | - `#57`_ Updated sampler bindings for Pump object
128 | 
129 | .. _#59: https://github.com/bmcfee/pumpp/pull/59
130 | .. _#58: https://github.com/bmcfee/pumpp/pull/58
131 | .. _#57: https://github.com/bmcfee/pumpp/pull/57
132 | 
133 | 0.1.3
134 | =====
135 | 
136 | - `#55`_ Refactored the `Sampler` class, added support for random states and the `SequentialSampler` class
137 | 
138 | .. _#55: https://github.com/bmcfee/pumpp/pull/55
139 | 
140 | 0.1.2
141 | =====
142 | 
143 | - `#51`_ Added named operator index to `Pump` objects
144 | 
145 | .. _#51: https://github.com/bmcfee/pumpp/pull/51
146 | 
147 | 0.1.1
148 | =====
149 | 
150 | - `#49`_ Added `Pump.layers` constructor for Keras layers on pump containers
151 | - `#47`_ Fixed a bug in `Sampler` that caused a shape mismatch on input/output tensors
152 |   when the input JAMS had multiple matching annotations for a given task.
153 | 
154 | .. _#49: https://github.com/bmcfee/pumpp/pull/49
155 | .. _#47: https://github.com/bmcfee/pumpp/pull/47
156 | 
157 | 0.1.0
158 | =====
159 | 
160 | - Initial public release
161 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # pumpp documentation build configuration file, created by
  5 | # sphinx-quickstart on Thu Jul  7 21:27:51 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | sys.path.insert(0, os.path.abspath('../'))
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 |     'sphinx.ext.autosummary',
 35 |     'sphinx.ext.intersphinx',
 36 |     'sphinx.ext.viewcode',
 37 |     'numpydoc',
 38 | ]
 39 | 
 40 | from glob import glob
 41 | autosummary_generate = glob('*.rst')
 42 | 
 43 | # Add any paths that contain templates here, relative to this directory.
 44 | templates_path = ['_templates']
 45 | 
 46 | # The suffix(es) of source filenames.
 47 | # You can specify multiple suffix as a list of string:
 48 | # source_suffix = ['.rst', '.md']
 49 | source_suffix = '.rst'
 50 | 
 51 | # The encoding of source files.
 52 | #source_encoding = 'utf-8-sig'
 53 | 
 54 | # The master toctree document.
 55 | master_doc = 'index'
 56 | 
 57 | # General information about the project.
 58 | project = 'pumpp'
 59 | copyright = '2016-2022, Brian McFee'
 60 | author = 'Brian McFee'
 61 | 
 62 | import mock
 63 | MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse',
 64 |                 'sklearn', 'sklearn.preprocessing', 'sklearn.utils', 'sklearn.utils.sparsefuncs',
 65 |                 'sklearn.utils.validation', 'sklearn.utils.multiclass',
 66 |                 'mir_eval', 'mir_eval.util',
 67 |                 'jams', 'librosa', 'librosa.util', 'librosa.feature', 'librosa.sequence',
 68 |                 'librosa.note_to_midi', 'librosa.midi_to_note', 'librosa.time_to_frames',]
 69 | sys.modules.update((mod_name, mock.Mock()) for mod_name in MOCK_MODULES)
 70 | 
 71 | # The version info for the project you're documenting, acts as replacement for
 72 | # |version| and |release|, also used in various other places throughout the
 73 | # built documents.
 74 | #
 75 | import imp
 76 | pumpp_version = imp.load_source('pumpp.version', '../pumpp/version.py')
 77 | # The short X.Y version.
 78 | version = pumpp_version.short_version
 79 | # The full version, including alpha/beta/rc tags.
 80 | release = pumpp_version.version
 81 | 
 82 | # The language for content autogenerated by Sphinx. Refer to documentation
 83 | # for a list of supported languages.
 84 | #
 85 | # This is also used if you do content translation via gettext catalogs.
 86 | # Usually you set "language" from the command line for these cases.
 87 | language = None
 88 | 
 89 | # There are two options for replacing |today|: either, you set today to some
 90 | # non-false value, then it is used:
 91 | #today = ''
 92 | # Else, today_fmt is used as the format for a strftime call.
 93 | #today_fmt = '%B %d, %Y'
 94 | 
 95 | # List of patterns, relative to source directory, that match files and
 96 | # directories to ignore when looking for source files.
 97 | # This patterns also effect to html_static_path and html_extra_path
 98 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 99 | 
100 | # The reST default role (used for this markup: `text`) to use for all
101 | # documents.
102 | #default_role = None
103 | 
104 | # If true, '()' will be appended to :func: etc. cross-reference text.
105 | #add_function_parentheses = True
106 | 
107 | # If true, the current module name will be prepended to all description
108 | # unit titles (such as .. function::).
109 | #add_module_names = True
110 | 
111 | # If true, sectionauthor and moduleauthor directives will be shown in the
112 | # output. They are ignored by default.
113 | #show_authors = False
114 | 
115 | # The name of the Pygments (syntax highlighting) style to use.
116 | pygments_style = 'sphinx'
117 | 
118 | # A list of ignored prefixes for module index sorting.
119 | #modindex_common_prefix = []
120 | 
121 | # If true, keep warnings as "system message" paragraphs in the built documents.
122 | #keep_warnings = False
123 | 
124 | # If true, `todo` and `todoList` produce output, else they produce nothing.
125 | todo_include_todos = False
126 | 
127 | 
128 | # -- Options for HTML output ----------------------------------------------
129 | 
130 | # The theme to use for HTML and HTML Help pages.  See the documentation for
131 | # a list of builtin themes.
132 | html_theme = 'default'
133 | 
134 | # Theme options are theme-specific and customize the look and feel of a theme
135 | # further.  For a list of options available for each theme, see the
136 | # documentation.
137 | #html_theme_options = {}
138 | 
139 | # Add any paths that contain custom themes here, relative to this directory.
140 | #html_theme_path = []
141 | 
142 | # The name for this set of Sphinx documents.
143 | # "<project> v<release> documentation" by default.
144 | #html_title = 'pumpp v0.0.0pre'
145 | 
146 | # A shorter title for the navigation bar.  Default is the same as html_title.
147 | #html_short_title = None
148 | 
149 | # The name of an image file (relative to this directory) to place at the top
150 | # of the sidebar.
151 | #html_logo = None
152 | 
153 | # The name of an image file (relative to this directory) to use as a favicon of
154 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
155 | # pixels large.
156 | #html_favicon = None
157 | 
158 | # Add any paths that contain custom static files (such as style sheets) here,
159 | # relative to this directory. They are copied after the builtin static files,
160 | # so a file named "default.css" will overwrite the builtin "default.css".
161 | # html_static_path = ['_static']
162 | 
163 | # Add any extra paths that contain custom files (such as robots.txt or
164 | # .htaccess) here, relative to this directory. These files are copied
165 | # directly to the root of the documentation.
166 | #html_extra_path = []
167 | 
168 | # If not None, a 'Last updated on:' timestamp is inserted at every page
169 | # bottom, using the given strftime format.
170 | # The empty string is equivalent to '%b %d, %Y'.
171 | #html_last_updated_fmt = None
172 | 
173 | # If true, SmartyPants will be used to convert quotes and dashes to
174 | # typographically correct entities.
175 | #html_use_smartypants = True
176 | 
177 | # Custom sidebar templates, maps document names to template names.
178 | #html_sidebars = {}
179 | 
180 | # Additional templates that should be rendered to pages, maps page names to
181 | # template names.
182 | #html_additional_pages = {}
183 | 
184 | # If false, no module index is generated.
185 | #html_domain_indices = True
186 | 
187 | # If false, no index is generated.
188 | #html_use_index = True
189 | 
190 | # If true, the index is split into individual pages for each letter.
191 | #html_split_index = False
192 | 
193 | # If true, links to the reST sources are added to the pages.
194 | #html_show_sourcelink = True
195 | 
196 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
197 | #html_show_sphinx = True
198 | 
199 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
200 | #html_show_copyright = True
201 | 
202 | # If true, an OpenSearch description file will be output, and all pages will
203 | # contain a <link> tag referring to it.  The value of this option must be the
204 | # base URL from which the finished HTML is served.
205 | #html_use_opensearch = ''
206 | 
207 | # This is the file name suffix for HTML files (e.g. ".xhtml").
208 | #html_file_suffix = None
209 | 
210 | # Language to be used for generating the HTML full-text search index.
211 | # Sphinx supports the following languages:
212 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
213 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
214 | #html_search_language = 'en'
215 | 
216 | # A dictionary with options for the search language support, empty by default.
217 | # 'ja' uses this config value.
218 | # 'zh' user can custom change `jieba` dictionary path.
219 | #html_search_options = {'type': 'default'}
220 | 
221 | # The name of a javascript file (relative to the configuration directory) that
222 | # implements a search results scorer. If empty, the default will be used.
223 | #html_search_scorer = 'scorer.js'
224 | 
225 | # Output file base name for HTML help builder.
226 | htmlhelp_basename = 'pumppdoc'
227 | 
228 | # -- Options for LaTeX output ---------------------------------------------
229 | 
230 | latex_elements = {
231 | # The paper size ('letterpaper' or 'a4paper').
232 | #'papersize': 'letterpaper',
233 | 
234 | # The font size ('10pt', '11pt' or '12pt').
235 | #'pointsize': '10pt',
236 | 
237 | # Additional stuff for the LaTeX preamble.
238 | #'preamble': '',
239 | 
240 | # Latex figure (float) alignment
241 | #'figure_align': 'htbp',
242 | }
243 | 
244 | # Grouping the document tree into LaTeX files. List of tuples
245 | # (source start file, target name, title,
246 | #  author, documentclass [howto, manual, or own class]).
247 | latex_documents = [
248 |     (master_doc, 'pumpp.tex', 'pumpp Documentation',
249 |      'Brian McFee', 'manual'),
250 | ]
251 | 
252 | # The name of an image file (relative to this directory) to place at the top of
253 | # the title page.
254 | #latex_logo = None
255 | 
256 | # For "manual" documents, if this is true, then toplevel headings are parts,
257 | # not chapters.
258 | #latex_use_parts = False
259 | 
260 | # If true, show page references after internal links.
261 | #latex_show_pagerefs = False
262 | 
263 | # If true, show URL addresses after external links.
264 | #latex_show_urls = False
265 | 
266 | # Documents to append as an appendix to all manuals.
267 | #latex_appendices = []
268 | 
269 | # If false, no module index is generated.
270 | #latex_domain_indices = True
271 | 
272 | 
273 | # -- Options for manual page output ---------------------------------------
274 | 
275 | # One entry per manual page. List of tuples
276 | # (source start file, name, description, authors, manual section).
277 | man_pages = [
278 |     (master_doc, 'pumpp', 'pumpp Documentation',
279 |      [author], 1)
280 | ]
281 | 
282 | # If true, show URL addresses after external links.
283 | #man_show_urls = False
284 | 
285 | 
286 | # -- Options for Texinfo output -------------------------------------------
287 | 
288 | # Grouping the document tree into Texinfo files. List of tuples
289 | # (source start file, target name, title, author,
290 | #  dir menu entry, description, category)
291 | texinfo_documents = [
292 |     (master_doc, 'pumpp', 'pumpp Documentation',
293 |      author, 'pumpp', 'One line description of project.',
294 |      'Miscellaneous'),
295 | ]
296 | 
297 | # Documents to append as an appendix to all manuals.
298 | #texinfo_appendices = []
299 | 
300 | # If false, no module index is generated.
301 | #texinfo_domain_indices = True
302 | 
303 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
304 | #texinfo_show_urls = 'footnote'
305 | 
306 | # If true, do not generate a @detailmenu in the "Top" node's menu.
307 | #texinfo_no_detailmenu = False
308 | 
309 | 
310 | # Example configuration for intersphinx: refer to the Python standard library.
311 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None),
312 |                        'numpy': ('https://docs.scipy.org/doc/numpy/', None),
313 |                        'librosa': ('https://librosa.org/doc/latest/', None),
314 |                        'jams': ('https://jams.readthedocs.io/en/latest/', None)}
315 | 
316 | numpydoc_show_class_members = False
317 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. pumpp documentation master file, created by
 2 |    sphinx-quickstart on Thu Jul  7 21:27:51 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | 
 7 | Practically Universal Music Pre-Processor
 8 | =========================================
 9 | 
10 | Pumpp is designed to make it easy to convert pairs of `(audio, jams)` into data that can
11 | be easily consumed by statistical algorithms.  Some desired features:
12 | 
13 | - Converting tags to sparse encoding vectors
14 | - Sampling `(start, end, label)` to frame-level annotations at a specific sampling rate
15 | - Extracting first-level features (eg, Mel spectra or CQT) from audio
16 | - Aligning and storing the results in a simple data structure (npz, hdf5)
17 | - Converting between annotation spaces for a given task
18 | - Helper variables for semi-supervised learning
19 | 
20 | 
21 | API
22 | ===
23 | .. toctree::
24 |     :maxdepth: 2
25 | 
26 |     api
27 | 
28 | 
29 | Changes
30 | =======
31 | .. toctree::
32 |     :maxdepth: 2
33 | 
34 |     changes
35 | 
36 | Contribute
37 | ==========
38 | - `Issue Tracker <http://github.com/bmcfee/pumpp/issues>`_
39 | - `Source Code <http://github.com/bmcfee/pumpp>`_
40 | 
41 | Indices and tables
42 | ==================
43 | 
44 | * :ref:`genindex`
45 | * :ref:`modindex`
46 | * :ref:`search`
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpydoc>=0.5
2 | 


--------------------------------------------------------------------------------
/pumpp/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''Practically universal music pre-processing'''
 4 | 
 5 | from .version import version as __version__
 6 | from .core import *
 7 | from .exceptions import *
 8 | from . import feature
 9 | from . import task
10 | from . import labels
11 | from .sampler import *
12 | 


--------------------------------------------------------------------------------
/pumpp/base.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Base class definitions'''
  4 | 
  5 | from collections import namedtuple
  6 | from collections.abc import Iterable
  7 | import numpy as np
  8 | 
  9 | from .exceptions import ParameterError
 10 | __all__ = ['Tensor', 'Scope', 'Slicer']
 11 | 
 12 | # This type is used for storing shape information
 13 | Tensor = namedtuple('Tensor', ['shape', 'dtype'])
 14 | '''
 15 | Multi-dimensional array descriptions: `shape` and `dtype`
 16 | '''
 17 | 
 18 | 
 19 | class Scope:
 20 |     '''
 21 |     A base class for managing named tensors
 22 | 
 23 |     Attributes
 24 |     ----------
 25 |     name : str or None
 26 |         The name of this object.  If not `None`,
 27 |         all field keys are prefixed by `name/`.
 28 | 
 29 |     fields : dict of str : Tensor
 30 |         A dictionary of fields produced by this object.
 31 |         Each value defines the shape and data type of the field.
 32 |     '''
 33 |     def __init__(self, name):
 34 |         self.name = name
 35 |         self.fields = dict()
 36 | 
 37 |     def __repr__(self):
 38 |         return '<{}({}) fields={}>'.format(self.__class__.__name__, self.name, self.fields)
 39 | 
 40 |     def scope(self, key):
 41 |         '''Apply the name scope to a key
 42 | 
 43 |         Parameters
 44 |         ----------
 45 |         key : string
 46 | 
 47 |         Returns
 48 |         -------
 49 |         `name/key` if `name` is not `None`;
 50 |         otherwise, `key`.
 51 |         '''
 52 |         if self.name is None:
 53 |             return key
 54 |         return '{:s}/{:s}'.format(self.name, key)
 55 | 
 56 |     def register(self, field, shape, dtype):
 57 |         '''Register a field as a tensor with specified shape and type.
 58 | 
 59 |         A `Tensor` of the given shape and type will be registered in this
 60 |         object's `fields` dict.
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         field : str
 65 |             The name of the field
 66 | 
 67 |         shape : iterable of `int` or `None`
 68 |             The shape of the output variable.
 69 |             This does not include a dimension for multiple outputs.
 70 | 
 71 |             `None` may be used to indicate variable-length outputs
 72 | 
 73 |         dtype : type
 74 |             The data type of the field
 75 | 
 76 |         Raises
 77 |         ------
 78 |         ParameterError
 79 |             If dtype or shape are improperly specified
 80 |         '''
 81 |         if not isinstance(dtype, (type, np.dtype)):
 82 |             raise ParameterError('dtype={} must be a type or np.dtype'.format(dtype))
 83 | 
 84 |         if not (isinstance(shape, Iterable) and
 85 |                 all([s is None or isinstance(s, int) for s in shape])):
 86 |             raise ParameterError('shape={} must be an iterable of integers'.format(shape))
 87 | 
 88 |         self.fields[self.scope(field)] = Tensor(tuple(shape), dtype)
 89 | 
 90 |     def pop(self, field):
 91 |         return self.fields.pop(self.scope(field))
 92 | 
 93 |     def merge(self, data):
 94 |         '''Merge an array of output dictionaries into a single dictionary
 95 |         with properly scoped names.
 96 | 
 97 |         Parameters
 98 |         ----------
 99 |         data : list of dict
100 |             Output dicts as produced by `pumpp.task.BaseTaskTransformer.transform`
101 |             or `pumpp.feature.FeatureExtractor.transform`.
102 | 
103 |         Returns
104 |         -------
105 |         data_out : dict
106 |             All elements of the input dicts are stacked along the 0 axis,
107 |             and keys are re-mapped by `scope`.
108 |         '''
109 |         data_out = dict()
110 | 
111 |         # Iterate over all keys in data
112 |         for key in set().union(*data):
113 |             data_out[self.scope(key)] = np.stack([np.asarray(d[key]) for d in data],
114 |                                                  axis=0)
115 |         return data_out
116 | 
117 | 
118 | class Slicer:
119 |     '''Slicer can compute the duration of data with time-like fields,
120 |     and slice down to the common time index.
121 | 
122 |     This class serves as a base for Sampler and Pump, and should not
123 |     be used directly.
124 | 
125 |     Parameters
126 |     ----------
127 |     ops : one or more Scope (TaskTransformer or FeatureExtractor)
128 |     '''
129 |     def __init__(self, *ops):
130 | 
131 |         self._time = dict()
132 | 
133 |         for operator in ops:
134 |             self.add(operator)
135 | 
136 |     def add(self, operator):
137 |         '''Add an operator to the Slicer
138 | 
139 |         Parameters
140 |         ----------
141 |         operator : Scope (TaskTransformer or FeatureExtractor)
142 |             The new operator to add
143 |         '''
144 |         if not isinstance(operator, Scope):
145 |             raise ParameterError('Operator {} must be a TaskTransformer '
146 |                                  'or FeatureExtractor'.format(operator))
147 |         for key in operator.fields:
148 |             self._time[key] = []
149 |             # We add 1 to the dimension here to account for batching
150 |             for tdim, idx in enumerate(operator.fields[key].shape, 1):
151 |                 if idx is None:
152 |                     self._time[key].append(tdim)
153 | 
154 |     def data_duration(self, data):
155 |         '''Compute the valid data duration of a dict
156 | 
157 |         Parameters
158 |         ----------
159 |         data : dict
160 |             As produced by pumpp.transform
161 | 
162 |         Returns
163 |         -------
164 |         length : int
165 |             The minimum temporal extent of a dynamic observation in data
166 |         '''
167 |         # Find all the time-like indices of the data
168 |         lengths = []
169 |         for key in self._time:
170 |             for idx in self._time.get(key, []):
171 |                 lengths.append(data[key].shape[idx])
172 | 
173 |         return min(lengths)
174 | 
175 |     def crop(self, data):
176 |         '''Crop a data dictionary down to its common time
177 | 
178 |         Parameters
179 |         ----------
180 |         data : dict
181 |             As produced by pumpp.transform
182 | 
183 |         Returns
184 |         -------
185 |         data_cropped : dict
186 |             Like `data` but with all time-like axes truncated to the
187 |             minimum common duration
188 |         '''
189 | 
190 |         duration = self.data_duration(data)
191 |         data_out = dict()
192 |         for key in data:
193 |             idx = [slice(None)] * data[key].ndim
194 |             for tdim in self._time.get(key, []):
195 |                 idx[tdim] = slice(duration)
196 |             data_out[key] = data[key][tuple(idx)]
197 | 
198 |         return data_out
199 | 


--------------------------------------------------------------------------------
/pumpp/core.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | Core functionality
  5 | ==================
  6 | .. autosummary::
  7 |     :toctree: generated/
  8 | 
  9 |     Pump
 10 | '''
 11 | 
 12 | import librosa
 13 | import jams
 14 | 
 15 | from .base import Slicer
 16 | from .exceptions import ParameterError
 17 | from .task import BaseTaskTransformer
 18 | from .feature import FeatureExtractor
 19 | from .sampler import Sampler
 20 | 
 21 | 
 22 | class Pump(Slicer):
 23 |     '''Top-level pump object.
 24 | 
 25 |     This class is used to collect feature and task transformers
 26 | 
 27 |     Attributes
 28 |     ----------
 29 |     ops : list of (BaseTaskTransformer, FeatureExtractor)
 30 |         The operations to apply
 31 | 
 32 |     Examples
 33 |     --------
 34 |     Create a CQT and chord transformer
 35 | 
 36 |     >>> p_cqt = pumpp.feature.CQT('cqt', sr=44100, hop_length=1024)
 37 |     >>> p_chord = pumpp.task.ChordTagTransformer(sr=44100, hop_length=1024)
 38 |     >>> pump = pumpp.Pump(p_cqt, p_chord)
 39 |     >>> data = pump.transform(audio_f='/my/audio/file.mp3',
 40 |     ...                       jam='/my/jams/annotation.jams')
 41 | 
 42 |     Or use the call interface:
 43 | 
 44 |     >>> data = pump(audio_f='/my/audio/file.mp3',
 45 |     ...             jam='/my/jams/annotation.jams')
 46 | 
 47 |     Or apply to audio in memory, and without existing annotations:
 48 | 
 49 |     >>> y, sr = librosa.load('/my/audio/file.mp3')
 50 |     >>> data = pump(y=y, sr=sr)
 51 | 
 52 |     Access all the fields produced by this pump:
 53 | 
 54 |     >>> pump.fields
 55 |     {'chord/chord': Tensor(shape=(None, 170), dtype=<class 'bool'>),
 56 |      'cqt/mag': Tensor(shape=(None, 288), dtype=<class 'numpy.float32'>),
 57 |      'cqt/phase': Tensor(shape=(None, 288), dtype=<class 'numpy.float32'>)}
 58 | 
 59 |     Access a constituent operator by name:
 60 | 
 61 |     >>> pump['chord'].fields
 62 |     {'chord/chord': Tensor(shape=(None, 170), dtype=<class 'bool'>)}
 63 |     '''
 64 | 
 65 |     def __init__(self, *ops):
 66 | 
 67 |         self.ops = []
 68 |         self.opmap = dict()
 69 |         super(Pump, self).__init__(*ops)
 70 | 
 71 |     def add(self, operator):
 72 |         '''Add an operation to this pump.
 73 | 
 74 |         Parameters
 75 |         ----------
 76 |         operator : BaseTaskTransformer, FeatureExtractor
 77 |             The operation to add
 78 | 
 79 |         Raises
 80 |         ------
 81 |         ParameterError
 82 |             if `op` is not of a correct type
 83 |         '''
 84 |         if not isinstance(operator, (BaseTaskTransformer, FeatureExtractor)):
 85 |             raise ParameterError('operator={} must be one of '
 86 |                                  '(BaseTaskTransformer, FeatureExtractor)'
 87 |                                  .format(operator))
 88 | 
 89 |         if operator.name in self.opmap:
 90 |             raise ParameterError('Duplicate operator name detected: '
 91 |                                  '{}'.format(operator))
 92 | 
 93 |         super(Pump, self).add(operator)
 94 |         self.opmap[operator.name] = operator
 95 |         self.ops.append(operator)
 96 | 
 97 |     def transform(self, audio_f=None, jam=None, y=None, sr=None, crop=False,
 98 |                   data=None):
 99 |         '''Apply the transformations to an audio file, and optionally JAMS object.
100 | 
101 |         Parameters
102 |         ----------
103 |         audio_f : str
104 |             Path to audio file
105 | 
106 |         jam : optional, `jams.JAMS`, str or file-like
107 |             Optional JAMS object/path to JAMS file/open file descriptor.
108 | 
109 |             If provided, this will provide data for task transformers.
110 | 
111 |         y : np.ndarray
112 |         sr : number > 0
113 |             If provided, operate directly on an existing audio buffer `y` at
114 |             sampling rate `sr` rather than load from `audio_f`.
115 | 
116 |         crop : bool
117 |             If `True`, then data are cropped to a common time index across all
118 |             fields.  Otherwise, data may have different time extents.
119 |         data : None or dict
120 |             Optional data dict containing already computed features. Fields in
121 |             dict will be skipped unless ``refresh`` is True.
122 | 
123 |         Returns
124 |         -------
125 |         data : dict
126 |             Data dictionary containing the transformed audio (and annotations)
127 | 
128 |         Raises
129 |         ------
130 |         ParameterError
131 |             At least one of `audio_f` or `(y, sr)` must be provided.
132 | 
133 |         '''
134 |         data = dict() if data is None else data
135 |         existing_keys = set(data)
136 |         ops = [op for op in self.ops if set(op.fields) - existing_keys]
137 | 
138 |         if any(isinstance(op, FeatureExtractor) for op in ops):
139 |             if y is None:
140 |                 if audio_f is None:
141 |                     raise ParameterError('At least one of `y` or `audio_f` '
142 |                                          'must be provided')
143 | 
144 |                 # Load the audio
145 |                 y, sr = librosa.load(audio_f, sr=sr, mono=True)
146 | 
147 |             if sr is None:
148 |                 raise ParameterError('If audio is provided as `y`, you must '
149 |                                      'specify the sampling rate as sr=')
150 | 
151 |         if any(isinstance(op, BaseTaskTransformer) for op in ops):
152 |             if jam is None:
153 |                 jam = jams.JAMS()
154 |                 jam.file_metadata.duration = librosa.get_duration(y=y, sr=sr)
155 | 
156 |             # Load the jams
157 |             if not isinstance(jam, jams.JAMS):
158 |                 jam = jams.load(jam)
159 | 
160 |         for op in ops:
161 |             if isinstance(op, BaseTaskTransformer):
162 |                 data.update(op.transform(jam))
163 |             elif isinstance(op, FeatureExtractor):
164 |                 data.update(op.transform(y, sr))
165 |         if crop:
166 |             data = self.crop(data)
167 |         return data
168 | 
169 |     def sampler(self, n_samples, duration, random_state=None):
170 |         '''Construct a sampler object for this pump's operators.
171 | 
172 |         Parameters
173 |         ----------
174 |         n_samples : None or int > 0
175 |             The number of samples to generate
176 | 
177 |         duration : int > 0
178 |             The duration (in frames) of each sample patch
179 | 
180 |         random_state : None, int, or np.random.RandomState
181 |             If int, random_state is the seed used by the random number
182 |             generator;
183 | 
184 |             If RandomState instance, random_state is the random number
185 |             generator;
186 | 
187 |             If None, the random number generator is the RandomState instance
188 |             used by np.random.
189 | 
190 |         Returns
191 |         -------
192 |         sampler : pumpp.Sampler
193 |             The sampler object
194 | 
195 |         See Also
196 |         --------
197 |         pumpp.sampler.Sampler
198 |         '''
199 | 
200 |         return Sampler(n_samples, duration,
201 |                        random_state=random_state,
202 |                        *self.ops)
203 | 
204 |     @property
205 |     def fields(self):
206 |         '''A dictionary of fields constructed by this pump'''
207 |         out = dict()
208 |         for operator in self.ops:
209 |             out.update(**operator.fields)
210 | 
211 |         return out
212 | 
213 |     def layers(self, api='keras'):
214 |         '''Construct input layers for all feature transformers
215 |         in the pump.
216 | 
217 |         Parameters
218 |         ----------
219 |         api : {'keras', ...}
220 |             Which API to use for layer construction
221 | 
222 |         Returns
223 |         -------
224 |         layers : {field: keras.layers.Input}
225 |             A dictionary of keras input layers, keyed by the corresponding
226 |             fields.
227 |         '''
228 | 
229 |         layermap = dict()
230 |         for operator in self.ops:
231 |             if hasattr(operator, 'layers'):
232 |                 layermap.update(operator.layers(api=api))
233 |         return layermap
234 | 
235 |     def __getitem__(self, key):
236 |         return self.opmap[key]
237 | 
238 |     def __call__(self, *args, **kwargs):
239 |         return self.transform(*args, **kwargs)
240 | 
241 |     def __str__(self):
242 |         rstr = '<Pump [{:d} operators, {:d} fields]>'.format(len(self.ops),
243 |                                                              len(self.fields))
244 |         for key in self.opmap:
245 |             rstr += "\n  - '{}': {}".format(key, type(self.opmap[key]))
246 |             for field in self.opmap[key].fields:
247 |                 rstr += "\n    - '{}': {}".format(field, self.opmap[key].fields[field])
248 |         return rstr
249 | 
250 |     def _repr_html_(self):
251 | 
252 |         rstr = '<dl class="row">'
253 |         for key in self.opmap:
254 |             rstr += '\n  <dt class="col-sm-3">{:s}</dt>'.format(key)
255 |             rstr += '\n  <dd class="col-sm-9">{}'.format(self.opmap[key])
256 | 
257 |             rstr += '<ul>'
258 |             for fkey, field in self.opmap[key].fields.items():
259 |                 rstr += '\n  <li>{:s} [shape={}, dtype={}]</li>'.format(fkey,
260 |                                                                         field.shape,
261 |                                                                         repr(field.dtype))
262 |             rstr += '</ul></dd>'
263 |         rstr += '</dl>'
264 |         return rstr
265 | 


--------------------------------------------------------------------------------
/pumpp/exceptions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''Exception classes for pumpp'''
 4 | 
 5 | 
 6 | class PumppError(Exception):
 7 |     '''The root pumpp exception class'''
 8 |     pass
 9 | 
10 | 
11 | class DataError(PumppError):
12 |     '''Exceptions relating to data errors'''
13 |     pass
14 | 
15 | 
16 | class ParameterError(PumppError):
17 |     '''Exceptions relating to function and method parameters'''
18 |     pass
19 | 


--------------------------------------------------------------------------------
/pumpp/feature/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | Feature extractors
 5 | ==================
 6 | 
 7 | .. autosummary::
 8 |     :toctree: generated/
 9 | 
10 |     FeatureExtractor
11 |     CQT
12 |     CQTMag
13 |     CQTPhaseDiff
14 |     HCQT
15 |     HCQTMag
16 |     HCQTPhaseDiff
17 |     STFT
18 |     STFTMag
19 |     STFTPhaseDiff
20 |     Mel
21 |     Tempogram
22 |     TempoScale
23 |     TimePosition
24 | '''
25 | 
26 | from .base import *
27 | from .cqt import *
28 | from .fft import *
29 | from .mel import *
30 | from .rhythm import *
31 | from .time import *
32 | 


--------------------------------------------------------------------------------
/pumpp/feature/_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Utilities for feature extraction classes'''
  4 | 
  5 | import numpy as np
  6 | 
  7 | from ..exceptions import ParameterError
  8 | 
  9 | 
 10 | def phase_diff(phase, conv):
 11 |     '''Compute the phase differential along a given axis
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     phase : np.ndarray
 16 |         Input phase (in radians)
 17 | 
 18 |     conv: {None, 'tf', 'th', 'channels_last', 'channels_first'}
 19 |         Convolution mode
 20 | 
 21 |     Returns
 22 |     -------
 23 |     dphase : np.ndarray like `phase`
 24 |         The phase differential.
 25 |     '''
 26 | 
 27 |     if conv is None:
 28 |         axis = 0
 29 |     elif conv in ('channels_last', 'tf'):
 30 |         axis = 0
 31 |     elif conv in ('channels_first', 'th'):
 32 |         axis = 1
 33 | 
 34 |     # Compute the phase differential
 35 |     dphase = np.empty(phase.shape, dtype=phase.dtype)
 36 |     zero_idx = [slice(None)] * phase.ndim
 37 |     zero_idx[axis] = slice(1)
 38 |     else_idx = [slice(None)] * phase.ndim
 39 |     else_idx[axis] = slice(1, None)
 40 |     zero_idx = tuple(zero_idx)
 41 |     else_idx = tuple(else_idx)
 42 |     dphase[zero_idx] = phase[zero_idx]
 43 |     dphase[else_idx] = np.diff(np.unwrap(phase, axis=axis), axis=axis)
 44 |     return dphase
 45 | 
 46 | 
 47 | def quantize(x, ref_min=None, ref_max=None, dtype='uint8'):
 48 |     '''Quantize array entries to a fixed dtype.
 49 | 
 50 |     Parameters
 51 |     ----------
 52 |     x : np.ndarray
 53 |         The data to quantize
 54 | 
 55 |     ref_min : None or float
 56 | 
 57 |     ref_max : None or float
 58 |         The reference minimum (maximum) value for quantization.
 59 |         By default, `x.min()` (`x.max()`)
 60 | 
 61 |     dtype : np.dtype {'uint8', 'uint16'}
 62 |         The target data type.  Any unsigned int type is supported,
 63 |         but most cases will call for `uint8`.
 64 | 
 65 |     Returns
 66 |     -------
 67 |     y : np.ndarray, dtype=dtype
 68 |         The values of `x` quantized to integer values
 69 |     '''
 70 | 
 71 |     if ref_min is None:
 72 |         ref_min = np.min(x)
 73 | 
 74 |     if ref_max is None:
 75 |         ref_max = np.max(x)
 76 | 
 77 |     try:
 78 |         info = np.iinfo(dtype)
 79 |     except ValueError as exc:
 80 |         raise ParameterError('dtype={} must be an unsigned integer type'.format(dtype)) from exc
 81 |     if info.kind != 'u':
 82 |         raise ParameterError('dtype={} must be an unsigned integer type'.format(dtype))
 83 | 
 84 |     x_quant = np.empty_like(x, dtype=np.dtype(dtype))
 85 | 
 86 |     bins = np.linspace(ref_min, ref_max, num=info.max - info.min + 1)
 87 |     x_quant[:] = np.digitize(x, bins, right=True)
 88 |     x_quant[x > ref_max] = info.max
 89 |     x_quant[x < ref_min] = info.min
 90 |     return x_quant
 91 | 
 92 | 
 93 | def to_dtype(x, dtype):
 94 |     '''Convert an array to a target dtype.  Quantize if integrable.
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     x : np.ndarray
 99 |         The input data
100 | 
101 |     dtype : np.dtype or type specification
102 |         The target dtype
103 | 
104 |     Returns
105 |     -------
106 |     x_dtype : np.ndarray, dtype=dtype
107 |         The converted data.
108 | 
109 |         If dtype is integrable, `x_dtype` will be quantized.
110 | 
111 |     See Also
112 |     --------
113 |     quantize
114 |     '''
115 | 
116 |     if np.issubdtype(dtype, np.integer):
117 |         return quantize(x, dtype=dtype)
118 |     else:
119 |         return x.astype(dtype)
120 | 


--------------------------------------------------------------------------------
/pumpp/feature/base.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Feature extraction base class'''
  4 | 
  5 | import numpy as np
  6 | from librosa import resample, time_to_frames
  7 | 
  8 | from ..base import Scope
  9 | from ..exceptions import ParameterError
 10 | 
 11 | 
 12 | class FeatureExtractor(Scope):
 13 |     '''The base feature extractor class.
 14 | 
 15 |     Attributes
 16 |     ----------
 17 |     name : str
 18 |         The name for this feature extractor
 19 | 
 20 |     sr : number > 0
 21 |         The sampling rate of audio for analysis
 22 | 
 23 |     hop_length : int > 0
 24 |         The hop length between analysis frames
 25 | 
 26 |     conv : {'tf', 'th', 'channels_last', 'channels_first', None}
 27 |         convolution dimension ordering:
 28 | 
 29 |             - 'channels_last' for tensorflow-style 2D convolution
 30 |             - 'tf' equivalent to 'channels_last'
 31 |             - 'channels_first' for theano-style 2D convolution
 32 |             - 'th' equivalent to 'channels_first'
 33 |             - None for 1D or non-convolutional representations
 34 | 
 35 |     dtype : str or np.dtype
 36 |         The data type for features produced by this object.  Default is`float32`.
 37 | 
 38 |         Setting to `uint8` will produced quantized features.
 39 | 
 40 |     '''
 41 |     def __init__(self, name=None, sr=22050, hop_length=512, conv=None, dtype='float32'):
 42 | 
 43 |         super(FeatureExtractor, self).__init__(name)
 44 | 
 45 |         if conv not in ('tf', 'th', 'channels_last', 'channels_first', None):
 46 |             raise ParameterError('conv="{}", must be one of '
 47 |                                  '("channels_last", "tf", '
 48 |                                  '"channels_first", "th", None)'.format(conv))
 49 | 
 50 |         self.sr = sr
 51 |         self.hop_length = hop_length
 52 |         self.conv = conv
 53 |         self.dtype = np.dtype(dtype)
 54 | 
 55 |     def register(self, key, dimension, dtype, channels=1):
 56 | 
 57 |         shape = [None, dimension]
 58 | 
 59 |         if self.conv in ('channels_last', 'tf'):
 60 |             shape.append(channels)
 61 | 
 62 |         elif self.conv in ('channels_first', 'th'):
 63 |             shape.insert(0, channels)
 64 | 
 65 |         super(FeatureExtractor, self).register(key, shape, dtype)
 66 | 
 67 |     @property
 68 |     def idx(self):
 69 |         if self.conv is None:
 70 |             return Ellipsis
 71 | 
 72 |         elif self.conv in ('channels_last', 'tf'):
 73 |             return (slice(None), slice(None), np.newaxis)
 74 | 
 75 |         elif self.conv in ('channels_first', 'th'):
 76 |             return (np.newaxis, slice(None), slice(None))
 77 | 
 78 |     def transform(self, y, sr):
 79 |         '''Transform an audio signal
 80 | 
 81 |         Parameters
 82 |         ----------
 83 |         y : np.ndarray
 84 |             The audio signal
 85 | 
 86 |         sr : number > 0
 87 |             The native sampling rate of y
 88 | 
 89 |         Returns
 90 |         -------
 91 |         dict
 92 |             Data dictionary containing features extracted from y
 93 | 
 94 |         See Also
 95 |         --------
 96 |         transform_audio
 97 |         '''
 98 |         if sr != self.sr:
 99 |             y = resample(y, orig_sr=sr, target_sr=self.sr)
100 | 
101 |         return self.merge([self.transform_audio(y)])
102 | 
103 |     def transform_audio(self, y):
104 |         raise NotImplementedError
105 | 
106 |     def layers(self, api='keras'):
107 |         '''Construct input layers for the given transformer
108 | 
109 |         Parameters
110 |         ----------
111 |         api : string
112 |             One of 'k', 'keras' (for Keras mode)
113 |             'tf.keras', 'tensorflow.keras', 'tfk' for tensorflow.keras mode
114 |             'tf', 'tensorflow' for tensorflow (v1 compatible)
115 | 
116 |             Note that 'tensorflow' mode uses v1 compatibility, and disables eager execution.
117 | 
118 |         Returns
119 |         -------
120 |         layers : {field: layer object}
121 |             A dictionary of keras or tensorflow input layers, keyed by the corresponding
122 |             field keys.
123 |         '''
124 | 
125 |         if api in ('k', 'keras'):
126 |             return self.layers_keras()
127 |         elif api in ('tf.keras', 'tensorflow.keras', 'tfk'):
128 |             return self.layers_tfkeras()
129 |         elif api in ('tf', 'tensorflow'):
130 |             return self.layers_tensorflow()
131 |         else:
132 |             raise ParameterError('Unsupported layer api={}'.format(api))
133 | 
134 |     def layers_tensorflow(self):
135 |         import tensorflow.compat.v1 as tf
136 |         tf.disable_v2_behavior()
137 | 
138 |         L = dict()
139 |         for key in self.fields:
140 |             shape = tuple([None] + list(self.fields[key].shape))
141 |             L[key] = tf.placeholder(self.fields[key].dtype,
142 |                                  shape=shape, name=key)
143 |         return L
144 | 
145 |     def layers_keras(self):
146 |         from keras.layers import Input
147 | 
148 |         L = dict()
149 |         for key in self.fields:
150 |             L[key] = Input(name=key,
151 |                            shape=self.fields[key].shape,
152 |                            dtype=np.dtype(self.fields[key].dtype).name)
153 | 
154 |         return L
155 | 
156 |     def layers_tfkeras(self):
157 |         from tensorflow.keras.layers import Input
158 | 
159 |         L = dict()
160 |         for key in self.fields:
161 |             L[key] = Input(name=key,
162 |                            shape=self.fields[key].shape,
163 |                            dtype=np.dtype(self.fields[key].dtype).name)
164 | 
165 |         return L
166 | 
167 |     def n_frames(self, duration):
168 |         '''Get the number of frames for a given duration
169 | 
170 |         Parameters
171 |         ----------
172 |         duration : number >= 0
173 |             The duration, in seconds
174 | 
175 |         Returns
176 |         -------
177 |         n_frames : int >= 0
178 |             The number of frames at this extractor's sampling rate and
179 |             hop length
180 |         '''
181 | 
182 |         return int(time_to_frames(duration, sr=self.sr,
183 |                                   hop_length=self.hop_length))
184 | 


--------------------------------------------------------------------------------
/pumpp/feature/cqt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | '''CQT features'''
  3 | 
  4 | import numpy as np
  5 | from librosa import cqt, magphase, note_to_hz
  6 | from librosa import amplitude_to_db, get_duration
  7 | from librosa.util import fix_length
  8 | 
  9 | from ._utils import phase_diff, to_dtype
 10 | from .base import FeatureExtractor
 11 | from ..exceptions import ParameterError
 12 | 
 13 | __all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff',
 14 |            'HCQT', 'HCQTMag', 'HCQTPhaseDiff']
 15 | 
 16 | 
 17 | class CQT(FeatureExtractor):
 18 |     '''Constant-Q transform
 19 | 
 20 |     Attributes
 21 |     ----------
 22 |     name : str
 23 |         The name for this feature extractor
 24 | 
 25 |     sr : number > 0
 26 |         The sampling rate of audio
 27 | 
 28 |     hop_length : int > 0
 29 |         The number of samples between CQT frames
 30 | 
 31 |     n_octaves : int > 0
 32 |         The number of octaves in the CQT
 33 | 
 34 |     over_sample : int > 0
 35 |         The amount of frequency oversampling (bins per semitone)
 36 | 
 37 |     fmin : float > 0
 38 |         The minimum frequency of the CQT
 39 | 
 40 |     log : boolean
 41 |         If `True`, scale the magnitude to decibels
 42 | 
 43 |         Otherwise, use linear magnitude
 44 | 
 45 |     '''
 46 |     def __init__(self, name='cqt', sr=22050, hop_length=512, n_octaves=8, over_sample=3,
 47 |                  fmin=None, log=False, conv=None, dtype='float32'):
 48 |         super(CQT, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype)
 49 | 
 50 |         if fmin is None:
 51 |             fmin = note_to_hz('C1')
 52 | 
 53 |         self.n_octaves = n_octaves
 54 |         self.over_sample = over_sample
 55 |         self.fmin = fmin
 56 |         self.log = log
 57 | 
 58 |         n_bins = n_octaves * 12 * over_sample
 59 |         self.register('mag', n_bins, self.dtype)
 60 |         self.register('phase', n_bins, self.dtype)
 61 | 
 62 |     def transform_audio(self, y):
 63 |         '''Compute the CQT
 64 | 
 65 |         Parameters
 66 |         ----------
 67 |         y : np.ndarray
 68 |             The audio buffer
 69 | 
 70 |         Returns
 71 |         -------
 72 |         data : dict
 73 |             data['mag'] : np.ndarray, shape = (n_frames, n_bins)
 74 |                 The CQT magnitude
 75 | 
 76 |             data['phase']: np.ndarray, shape = mag.shape
 77 |                 The CQT phase
 78 |         '''
 79 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
 80 | 
 81 |         C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
 82 |                 fmin=self.fmin,
 83 |                 n_bins=(self.n_octaves * self.over_sample * 12),
 84 |                 bins_per_octave=(self.over_sample * 12))
 85 | 
 86 |         C = fix_length(C, size=n_frames)
 87 | 
 88 |         cqtm, phase = magphase(C)
 89 |         if self.log:
 90 |             cqtm = amplitude_to_db(cqtm, ref=np.max)
 91 | 
 92 |         return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
 93 |                 'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}
 94 | 
 95 | 
 96 | class CQTMag(CQT):
 97 |     '''Magnitude CQT
 98 | 
 99 |     See Also
100 |     --------
101 |     CQT
102 |     '''
103 | 
104 |     def __init__(self, *args, **kwargs):
105 |         super(CQTMag, self).__init__(*args, **kwargs)
106 |         self.pop('phase')
107 | 
108 |     def transform_audio(self, y):
109 |         '''Compute CQT magnitude.
110 | 
111 |         Parameters
112 |         ----------
113 |         y : np.ndarray
114 |             the audio buffer
115 | 
116 |         Returns
117 |         -------
118 |         data : dict
119 |             data['mag'] : np.ndarray, shape=(n_frames, n_bins)
120 |                 The CQT magnitude
121 |         '''
122 |         data = super(CQTMag, self).transform_audio(y)
123 |         data.pop('phase')
124 |         return data
125 | 
126 | 
127 | class CQTPhaseDiff(CQT):
128 |     '''CQT with unwrapped phase differentials
129 | 
130 |     See Also
131 |     --------
132 |     CQT
133 |     '''
134 |     def __init__(self, *args, **kwargs):
135 |         super(CQTPhaseDiff, self).__init__(*args, **kwargs)
136 |         phase_field = self.pop('phase')
137 | 
138 |         self.register('dphase',
139 |                       self.n_octaves * 12 * self.over_sample,
140 |                       phase_field.dtype)
141 | 
142 |     def transform_audio(self, y):
143 |         '''Compute the CQT
144 | 
145 |         Parameters
146 |         ----------
147 |         y : np.ndarray
148 |             The audio buffer
149 | 
150 |         Returns
151 |         -------
152 |         data : dict
153 |             data['mag'] : np.ndarray, shape = (n_frames, n_bins)
154 |                 The CQT magnitude
155 | 
156 |             data['phase']: np.ndarray, shape = mag.shape
157 |                 The CQT phase
158 |         '''
159 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
160 | 
161 |         C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
162 |                 fmin=self.fmin,
163 |                 n_bins=(self.n_octaves * self.over_sample * 12),
164 |                 bins_per_octave=(self.over_sample * 12))
165 | 
166 |         C = fix_length(C, size=n_frames)
167 | 
168 |         cqtm, phase = magphase(C)
169 |         if self.log:
170 |             cqtm = amplitude_to_db(cqtm, ref=np.max)
171 | 
172 |         dphase = phase_diff(np.angle(phase).T[self.idx], self.conv)
173 | 
174 |         return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
175 |                 'dphase': to_dtype(dphase, self.dtype)}
176 | 
177 | 
178 | class HCQT(FeatureExtractor):
179 |     '''Harmonic Constant-Q transform
180 | 
181 |     Attributes
182 |     ----------
183 |     name : str
184 |         The name for this feature extractor
185 | 
186 |     sr : number > 0
187 |         The sampling rate of audio
188 | 
189 |     hop_length : int > 0
190 |         The number of samples between CQT frames
191 | 
192 |     n_octaves : int > 0
193 |         The number of octaves in the CQT
194 | 
195 |     over_sample : int > 0
196 |         The amount of frequency oversampling (bins per semitone)
197 | 
198 |     fmin : float > 0
199 |         The minimum frequency of the CQT
200 | 
201 |     harmonics : list of int >= 1
202 |         The list of harmonics to compute
203 | 
204 |     log : boolean
205 |         If `True`, scale the magnitude to decibels
206 | 
207 |         Otherwise, use linear magnitude
208 | 
209 |     conv : {'tf', 'th', 'channels_last', 'channels_first', None}
210 |         convolution dimension ordering:
211 | 
212 |             - 'channels_last' for tensorflow-style 2D convolution
213 |             - 'tf' equivalent to 'channels_last'
214 |             - 'channels_first' for theano-style 2D convolution
215 |             - 'th' equivalent to 'channels_first'
216 | 
217 |     dtype : np.dtype
218 |         The data type for the output features.  Default is `float32`.
219 | 
220 |         Setting to `uint8` will produce quantized features.
221 |     '''
222 |     def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
223 |                  fmin=None, harmonics=None, log=False, conv='channels_last',
224 |                  dtype='float32'):
225 | 
226 |         if conv not in ('channels_last', 'tf', 'channels_first', 'th'):
227 |             raise ParameterError('Invalid conv={}'.format(conv))
228 | 
229 |         super(HCQT, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype)
230 | 
231 |         if fmin is None:
232 |             fmin = note_to_hz('C1')
233 | 
234 |         if harmonics is None:
235 |             harmonics = [1]
236 |         else:
237 |             harmonics = list(harmonics)
238 |             if not all(isinstance(_, int) and _ > 0 for _ in harmonics):
239 |                 raise ParameterError('Invalid harmonics={}'.format(harmonics))
240 | 
241 |         self.n_octaves = n_octaves
242 |         self.over_sample = over_sample
243 |         self.fmin = fmin
244 |         self.log = log
245 |         self.harmonics = harmonics
246 | 
247 |         n_bins = n_octaves * 12 * over_sample
248 |         self.register('mag', n_bins, self.dtype, channels=len(harmonics))
249 |         self.register('phase', n_bins, self.dtype, channels=len(harmonics))
250 | 
251 |     def transform_audio(self, y):
252 |         '''Compute the HCQT
253 | 
254 |         Parameters
255 |         ----------
256 |         y : np.ndarray
257 |             The audio buffer
258 | 
259 |         Returns
260 |         -------
261 |         data : dict
262 |             data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
263 |                 The CQT magnitude
264 | 
265 |             data['phase']: np.ndarray, shape = mag.shape
266 |                 The CQT phase
267 |         '''
268 |         cqtm, phase = [], []
269 | 
270 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
271 | 
272 |         for h in self.harmonics:
273 |             C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
274 |                     fmin=self.fmin * h,
275 |                     n_bins=(self.n_octaves * self.over_sample * 12),
276 |                     bins_per_octave=(self.over_sample * 12))
277 | 
278 |             C = fix_length(C, size=n_frames)
279 | 
280 |             C, P = magphase(C)
281 |             if self.log:
282 |                 C = amplitude_to_db(C, ref=np.max)
283 |             cqtm.append(C)
284 |             phase.append(P)
285 | 
286 |         cqtm = to_dtype(np.asarray(cqtm), self.dtype)
287 |         phase = to_dtype(np.angle(np.asarray(phase)), self.dtype)
288 | 
289 |         return {'mag': self._index(cqtm),
290 |                 'phase': self._index(phase)}
291 | 
292 |     def _index(self, value):
293 |         '''Rearrange a tensor according to the convolution mode
294 | 
295 |         Input is assumed to be in (channels, bins, time) format.
296 |         '''
297 | 
298 |         if self.conv in ('channels_last', 'tf'):
299 |             return np.transpose(value, (2, 1, 0))
300 | 
301 |         else:  # self.conv in ('channels_first', 'th')
302 |             return np.transpose(value, (0, 2, 1))
303 | 
304 | 
305 | class HCQTMag(HCQT):
306 |     '''Magnitude HCQT
307 | 
308 |     See Also
309 |     --------
310 |     HCQT
311 |     '''
312 | 
313 |     def __init__(self, *args, **kwargs):
314 |         super(HCQTMag, self).__init__(*args, **kwargs)
315 |         self.pop('phase')
316 | 
317 |     def transform_audio(self, y):
318 |         '''Compute HCQT magnitude.
319 | 
320 |         Parameters
321 |         ----------
322 |         y : np.ndarray
323 |             the audio buffer
324 | 
325 |         Returns
326 |         -------
327 |         data : dict
328 |             data['mag'] : np.ndarray, shape=(n_frames, n_bins)
329 |                 The CQT magnitude
330 |         '''
331 |         data = super(HCQTMag, self).transform_audio(y)
332 |         data.pop('phase')
333 |         return data
334 | 
335 | 
336 | class HCQTPhaseDiff(HCQT):
337 |     '''HCQT with unwrapped phase differentials
338 | 
339 |     See Also
340 |     --------
341 |     HCQT
342 |     '''
343 |     def __init__(self, *args, **kwargs):
344 |         super(HCQTPhaseDiff, self).__init__(*args, **kwargs)
345 |         phase_field = self.pop('phase')
346 | 
347 |         self.register('dphase',
348 |                       self.n_octaves * 12 * self.over_sample,
349 |                       phase_field.dtype,
350 |                       channels=len(self.harmonics))
351 | 
352 |     def transform_audio(self, y):
353 |         '''Compute the HCQT
354 | 
355 |         Parameters
356 |         ----------
357 |         y : np.ndarray
358 |             The audio buffer
359 | 
360 |         Returns
361 |         -------
362 |         data : dict
363 |             data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
364 |                 The CQT magnitude
365 | 
366 |             data['phase']: np.ndarray, shape = mag.shape
367 |                 The CQT phase
368 |         '''
369 |         cqtm, phase = [], []
370 | 
371 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
372 | 
373 |         for h in self.harmonics:
374 |             C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
375 |                     fmin=self.fmin * h,
376 |                     n_bins=(self.n_octaves * self.over_sample * 12),
377 |                     bins_per_octave=(self.over_sample * 12))
378 | 
379 |             C = fix_length(C, size=n_frames)
380 | 
381 |             C, P = magphase(C)
382 |             if self.log:
383 |                 C = amplitude_to_db(C, ref=np.max)
384 |             cqtm.append(C)
385 |             phase.append(P)
386 | 
387 |         cqtm = to_dtype(np.asarray(cqtm), self.dtype)
388 |         phase = np.angle(np.asarray(phase))
389 | 
390 |         dphase = to_dtype(phase_diff(self._index(phase), self.conv),
391 |                           self.dtype)
392 | 
393 |         return {'mag': self._index(cqtm),
394 |                 'dphase': dphase}
395 | 


--------------------------------------------------------------------------------
/pumpp/feature/fft.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """STFT feature extractors"""
  3 | 
  4 | import numpy as np
  5 | from librosa import stft, magphase
  6 | from librosa import amplitude_to_db, get_duration
  7 | from librosa.util import fix_length
  8 | 
  9 | from .base import FeatureExtractor
 10 | from ._utils import phase_diff, to_dtype
 11 | 
 12 | __all__ = ['STFT', 'STFTMag', 'STFTPhaseDiff']
 13 | 
 14 | 
 15 | class STFT(FeatureExtractor):
 16 |     '''Short-time Fourier Transform (STFT) with both magnitude
 17 |     and phase.
 18 | 
 19 |     Attributes
 20 |     ----------
 21 |     name : str
 22 |         The name of this transformer
 23 | 
 24 |     sr : number > 0
 25 |         The sampling rate of audio
 26 | 
 27 |     hop_length : int > 0
 28 |         The hop length of STFT frames
 29 | 
 30 |     n_fft : int > 0
 31 |         The number of FFT bins per frame
 32 | 
 33 |     log : bool
 34 |         If `True`, scale magnitude in decibels.
 35 | 
 36 |         Otherwise use linear magnitude.
 37 | 
 38 |     conv : str
 39 |         Convolution mode
 40 | 
 41 |     dtype : np.dtype
 42 |         The data type for the output features.  Default is `float32`.
 43 | 
 44 |         Setting to `uint8` will produce quantized features.
 45 | 
 46 |     See Also
 47 |     --------
 48 |     STFTMag
 49 |     STFTPhaseDiff
 50 |     '''
 51 |     def __init__(self, name='stft', sr=22050, hop_length=512, n_fft=2048, log=False,
 52 |                  conv=None, dtype='float32'):
 53 |         super(STFT, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype)
 54 | 
 55 |         self.n_fft = n_fft
 56 |         self.log = log
 57 | 
 58 |         self.register('mag', 1 + n_fft // 2, self.dtype)
 59 |         self.register('phase', 1 + n_fft // 2, self.dtype)
 60 | 
 61 |     def transform_audio(self, y):
 62 |         '''Compute the STFT magnitude and phase.
 63 | 
 64 |         Parameters
 65 |         ----------
 66 |         y : np.ndarray
 67 |             The audio buffer
 68 | 
 69 |         Returns
 70 |         -------
 71 |         data : dict
 72 |             data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
 73 |                 STFT magnitude
 74 | 
 75 |             data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
 76 |                 STFT phase
 77 |         '''
 78 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
 79 | 
 80 |         D = stft(y, hop_length=self.hop_length,
 81 |                  n_fft=self.n_fft)
 82 | 
 83 |         D = fix_length(D, size=n_frames)
 84 | 
 85 |         mag, phase = magphase(D)
 86 |         if self.log:
 87 |             mag = amplitude_to_db(mag, ref=np.max)
 88 | 
 89 |         return {'mag': to_dtype(mag.T[self.idx], self.dtype),
 90 |                 'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)}
 91 | 
 92 | 
 93 | class STFTPhaseDiff(STFT):
 94 |     '''STFT with phase differentials
 95 | 
 96 |     See Also
 97 |     --------
 98 |     STFT
 99 |     '''
100 |     def __init__(self, *args, **kwargs):
101 |         super(STFTPhaseDiff, self).__init__(*args, **kwargs)
102 |         phase_field = self.pop('phase')
103 |         self.register('dphase', 1 + self.n_fft // 2, phase_field.dtype)
104 | 
105 |     def transform_audio(self, y):
106 |         '''Compute the STFT magnitude and phase differential.
107 | 
108 |         Parameters
109 |         ----------
110 |         y : np.ndarray
111 |             The audio buffer
112 | 
113 |         Returns
114 |         -------
115 |         data : dict
116 |             data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
117 |                 STFT magnitude
118 | 
119 |             data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
120 |                 STFT phase
121 |         '''
122 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
123 | 
124 |         D = stft(y, hop_length=self.hop_length,
125 |                  n_fft=self.n_fft)
126 | 
127 |         D = fix_length(D, size=n_frames)
128 | 
129 |         mag, phase = magphase(D)
130 |         if self.log:
131 |             mag = amplitude_to_db(mag, ref=np.max)
132 | 
133 |         phase = phase_diff(np.angle(phase.T)[self.idx], self.conv)
134 | 
135 |         return {'mag': to_dtype(mag.T[self.idx], self.dtype),
136 |                 'dphase': to_dtype(phase, self.dtype)}
137 | 
138 | 
139 | class STFTMag(STFT):
140 |     '''STFT with only magnitude.
141 | 
142 |     See Also
143 |     --------
144 |     STFT
145 |     '''
146 |     def __init__(self, *args, **kwargs):
147 |         super(STFTMag, self).__init__(*args, **kwargs)
148 |         self.pop('phase')
149 | 
150 |     def transform_audio(self, y):
151 |         '''Compute the STFT
152 | 
153 |         Parameters
154 |         ----------
155 |         y : np.ndarray
156 |             The audio buffer
157 | 
158 |         Returns
159 |         -------
160 |         data : dict
161 |             data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
162 |                 The STFT magnitude
163 |         '''
164 |         data = super(STFTMag, self).transform_audio(y)
165 |         data.pop('phase')
166 | 
167 |         return data
168 | 


--------------------------------------------------------------------------------
/pumpp/feature/mel.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Mel spectrogram"""
 3 | 
 4 | import numpy as np
 5 | from librosa.feature import melspectrogram
 6 | from librosa import amplitude_to_db, get_duration
 7 | from librosa.util import fix_length
 8 | 
 9 | from .base import FeatureExtractor
10 | 
11 | from ._utils import to_dtype
12 | 
13 | __all__ = ['Mel']
14 | 
15 | 
16 | class Mel(FeatureExtractor):
17 |     '''Mel spectra feature extraction
18 | 
19 |     Attributes
20 |     ----------
21 |     name : str or None
22 |         naming scope for this feature extractor
23 | 
24 |     sr : number > 0
25 |         Sampling rate of the audio (in Hz)
26 | 
27 |     hop_length : int > 0
28 |         Number of samples to advance between frames
29 | 
30 |     n_fft :  int > 0
31 |         Number of samples per frame
32 | 
33 |     n_mels : int > 0
34 |         Number of Mel frequency bins
35 | 
36 |     fmax : number > 0
37 |         The maximum frequency bin.
38 |         Defaults to `0.5 * sr`
39 | 
40 |     log : bool
41 |         If `True`, scale magnitude in decibels.
42 | 
43 |         Otherwise, use a linear amplitude scale.
44 | 
45 |     dtype : np.dtype
46 |         The data type for the output features.  Default is `float32`.
47 | 
48 |         Setting to `uint8` will produce quantized features.
49 |     '''
50 |     def __init__(self, name='mel', sr=22050, hop_length=512, n_fft=2048, n_mels=128,
51 |                  fmax=None, log=False, conv=None, dtype='float32'):
52 |         super(Mel, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype)
53 | 
54 |         self.n_fft = n_fft
55 |         self.n_mels = n_mels
56 |         self.fmax = fmax
57 |         self.log = log
58 | 
59 |         self.register('mag', n_mels, self.dtype)
60 | 
61 |     def transform_audio(self, y):
62 |         '''Compute the Mel spectrogram
63 | 
64 |         Parameters
65 |         ----------
66 |         y : np.ndarray
67 |             The audio buffer
68 | 
69 |         Returns
70 |         -------
71 |         data : dict
72 |             data['mag'] : np.ndarray, shape=(n_frames, n_mels)
73 |                 The Mel spectrogram
74 |         '''
75 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
76 | 
77 |         mel = np.sqrt(melspectrogram(y=y, sr=self.sr,
78 |                                      n_fft=self.n_fft,
79 |                                      hop_length=self.hop_length,
80 |                                      n_mels=self.n_mels,
81 |                                      fmax=self.fmax))
82 | 
83 |         mel = fix_length(mel, size=n_frames)
84 | 
85 |         if self.log:
86 |             mel = amplitude_to_db(mel, ref=np.max)
87 | 
88 |         # Type convert
89 |         mel = to_dtype(mel, self.dtype)
90 | 
91 |         return {'mag': mel.T[self.idx]}
92 | 


--------------------------------------------------------------------------------
/pumpp/feature/rhythm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | '''Rhythm analysis features'''
  3 | 
  4 | import numpy as np
  5 | from librosa import fmt
  6 | from librosa.feature import tempogram
  7 | from librosa import get_duration
  8 | from librosa.util import fix_length
  9 | 
 10 | from .base import FeatureExtractor
 11 | from ._utils import to_dtype
 12 | 
 13 | __all__ = ['Tempogram', 'TempoScale']
 14 | 
 15 | 
 16 | class Tempogram(FeatureExtractor):
 17 |     '''Tempogram: the short-time autocorrelation of the accent signal
 18 | 
 19 |     Attributes
 20 |     ----------
 21 |     name : str
 22 |         The name of this feature extractor
 23 | 
 24 |     sr : number > 0
 25 |         The sampling rate of audio
 26 | 
 27 |     hop_length : int > 0
 28 |         The hop length of analysis windows
 29 | 
 30 |     win_length : int > 0
 31 |         The length of the analysis window (in frames)
 32 | 
 33 |     conv : str
 34 |         Convolution mode
 35 | 
 36 |     dtype : np.dtype
 37 |         The data type for the output features.  Default is `float32`.
 38 | 
 39 |         Setting to `uint8` will produce quantized features.
 40 |     '''
 41 |     def __init__(self, name='tempogram', sr=22050, hop_length=512, win_length=384,
 42 |                  conv=None, dtype='float32'):
 43 |         super(Tempogram, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype)
 44 | 
 45 |         self.win_length = win_length
 46 | 
 47 |         self.register('tempogram', win_length, self.dtype)
 48 | 
 49 |     def transform_audio(self, y):
 50 |         '''Compute the tempogram
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         y : np.ndarray
 55 |             Audio buffer
 56 | 
 57 |         Returns
 58 |         -------
 59 |         data : dict
 60 |             data['tempogram'] : np.ndarray, shape=(n_frames, win_length)
 61 |                 The tempogram
 62 |         '''
 63 |         n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
 64 | 
 65 |         tgram = tempogram(y=y, sr=self.sr,
 66 |                           hop_length=self.hop_length,
 67 |                           win_length=self.win_length)
 68 | 
 69 |         tgram = to_dtype(fix_length(tgram, size=n_frames), self.dtype)
 70 |         return {'tempogram': tgram.T[self.idx]}
 71 | 
 72 | 
 73 | class TempoScale(Tempogram):
 74 |     '''Tempogram scale transform.
 75 | 
 76 |     Mellin scale transform magnitude of the Tempogram.
 77 | 
 78 |     Attributes
 79 |     ----------
 80 |     name : str
 81 |         Name of this extractor
 82 | 
 83 |     sr : number > 0
 84 |         Sampling rate of audio
 85 | 
 86 |     hop_length : int > 0
 87 |         Hop length for analysis frames
 88 | 
 89 |     win_length : int > 0
 90 |         Number of frames per analysis window
 91 | 
 92 |     n_fmt : int > 0
 93 |         Number of scale coefficients to retain
 94 | 
 95 |     conv : str
 96 |         Convolution mode
 97 | 
 98 |     dtype : np.dtype
 99 |         The data type for the output features.  Default is `float32`.
100 | 
101 |         Setting to `uint8` will produce quantized features.
102 |     '''
103 |     def __init__(self, name, sr, hop_length, win_length, n_fmt=128, conv=None, dtype='float32'):
104 |         super(TempoScale, self).__init__(name, sr, hop_length, win_length,
105 |                                          conv=conv, dtype=dtype)
106 | 
107 |         self.n_fmt = n_fmt
108 | 
109 |         self.pop('tempogram')
110 |         self.register('temposcale', 1 + n_fmt // 2, self.dtype)
111 | 
112 |     def transform_audio(self, y):
113 |         '''Apply the scale transform to the tempogram
114 | 
115 |         Parameters
116 |         ----------
117 |         y : np.ndarray
118 |             The audio buffer
119 | 
120 |         Returns
121 |         -------
122 |         data : dict
123 |             data['temposcale'] : np.ndarray, shape=(n_frames, n_fmt)
124 |                 The scale transform magnitude coefficients
125 |         '''
126 |         data = super(TempoScale, self).transform_audio(y)
127 |         data['temposcale'] = np.abs(fmt(data.pop('tempogram'),
128 |                                         axis=1,
129 |                                         n_fmt=self.n_fmt))[self.idx]
130 |         data['temposcale'] = to_dtype(data['temposcale'], self.dtype)
131 | 
132 |         return data
133 | 


--------------------------------------------------------------------------------
/pumpp/feature/time.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | '''Time features'''
 3 | 
 4 | import numpy as np
 5 | 
 6 | from librosa import get_duration
 7 | 
 8 | from .base import FeatureExtractor
 9 | from ._utils import to_dtype
10 | 
11 | __all__ = ['TimePosition']
12 | 
13 | 
14 | class TimePosition(FeatureExtractor):
15 |     '''TimePosition: encode frame position as features.
16 | 
17 |     Attributes
18 |     ----------
19 |     name : str
20 |         The name of this feature extractor
21 | 
22 |     sr : number > 0
23 |         The sampling rate of audio
24 | 
25 |     hop_length : int > 0
26 |         The hop length of analysis windows
27 |     '''
28 | 
29 |     def __init__(self, name='time_position', sr=22050, hop_length=512,
30 |                  conv=None, dtype='float32'):
31 |         super(TimePosition, self).__init__(name, sr, hop_length, conv=conv,
32 |                                            dtype=dtype)
33 | 
34 |         self.register('relative', 2, self.dtype)
35 |         self.register('absolute', 2, self.dtype)
36 | 
37 |     def transform_audio(self, y):
38 |         '''Compute the time position encoding
39 | 
40 |         Parameters
41 |         ----------
42 |         y : np.ndarray
43 |             Audio buffer
44 | 
45 |         Returns
46 |         -------
47 |         data : dict
48 |             data['relative'] = np.ndarray, shape=(n_frames, 2)
49 |             data['absolute'] = np.ndarray, shape=(n_frames, 2)
50 | 
51 |                 Relative and absolute time positional encodings.
52 |         '''
53 | 
54 |         duration = get_duration(y=y, sr=self.sr)
55 |         n_frames = self.n_frames(duration)
56 | 
57 |         relative = np.zeros((n_frames, 2), dtype=np.float32)
58 |         relative[:, 0] = np.cos(np.pi * np.linspace(0, 1, num=n_frames))
59 |         relative[:, 1] = np.sin(np.pi * np.linspace(0, 1, num=n_frames))
60 | 
61 |         absolute = relative * np.sqrt(duration)
62 | 
63 |         return {'relative': to_dtype(relative[self.idx], self.dtype),
64 |                 'absolute': to_dtype(absolute[self.idx], self.dtype)}
65 | 


--------------------------------------------------------------------------------
/pumpp/sampler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | Data subsampling
  5 | ================
  6 | .. autosummary::
  7 |     :toctree: generated/
  8 | 
  9 |     Sampler
 10 |     SequentialSampler
 11 |     VariableLengthSampler
 12 | '''
 13 | 
 14 | from itertools import count
 15 | 
 16 | import numpy as np
 17 | 
 18 | from .base import Slicer
 19 | from .exceptions import ParameterError, DataError
 20 | 
 21 | __all__ = ['Sampler', 'SequentialSampler', 'VariableLengthSampler']
 22 | 
 23 | 
 24 | class Sampler(Slicer):
 25 |     '''Generate samples uniformly at random from a pumpp data dict.
 26 | 
 27 |     Attributes
 28 |     ----------
 29 |     n_samples : int or None
 30 |         the number of samples to generate.
 31 |         If `None`, generate indefinitely.
 32 | 
 33 |     duration : int > 0
 34 |         the duration (in frames) of each sample
 35 | 
 36 |     random_state : None, int, or np.random.RandomState
 37 |         If int, random_state is the seed used by the random number
 38 |         generator;
 39 | 
 40 |         If RandomState instance, random_state is the random number
 41 |         generator;
 42 | 
 43 |         If None, the random number generator is the RandomState instance
 44 |         used by np.random.
 45 | 
 46 |     ops : array of pumpp.feature.FeatureExtractor or pumpp.task.BaseTaskTransformer
 47 |         The operators to include when sampling data.
 48 | 
 49 | 
 50 |     Examples
 51 |     --------
 52 | 
 53 |     >>> # Set up the parameters
 54 |     >>> sr, n_fft, hop_length = 22050, 512, 2048
 55 |     >>> # Instantiate some transformers
 56 |     >>> p_stft = pumpp.feature.STFTMag('stft', sr=sr, n_fft=n_fft,
 57 |     ...                                hop_length=hop_length)
 58 |     >>> p_beat = pumpp.task.BeatTransformer('beat', sr=sr,
 59 |     ...                                     hop_length=hop_length)
 60 |     >>> # Apply the transformers to the data
 61 |     >>> data = pumpp.transform('test.ogg', 'test.jams', p_stft, p_beat)
 62 |     >>> # We'll sample 10 patches of duration = 32 frames
 63 |     >>> stream = pumpp.Sampler(10, 32, p_stft, p_beat)
 64 |     >>> # Apply the streamer to the data dict
 65 |     >>> for example in stream(data):
 66 |     ...     process(data)
 67 |     '''
 68 |     def __init__(self, n_samples, duration, *ops, **kwargs):
 69 | 
 70 |         super(Sampler, self).__init__(*ops)
 71 | 
 72 |         self.n_samples = n_samples
 73 |         self.duration = duration
 74 | 
 75 |         random_state = kwargs.pop('random_state', None)
 76 |         if random_state is None or isinstance(random_state, int):
 77 |             self.rng = np.random.RandomState(seed=random_state)
 78 |         elif isinstance(random_state, np.random.RandomState):
 79 |             self.rng = random_state
 80 |         else:
 81 |             raise ParameterError('Invalid random_state={}'.format(random_state))
 82 | 
 83 |     def sample(self, data, interval):
 84 |         '''Sample a patch from the data object
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         data : dict
 89 |             A data dict as produced by pumpp.Pump.transform
 90 | 
 91 |         interval : slice
 92 |             The time interval to sample
 93 | 
 94 |         Returns
 95 |         -------
 96 |         data_slice : dict
 97 |             `data` restricted to `interval`.
 98 |         '''
 99 |         data_slice = dict()
100 | 
101 |         for key in data:
102 |             if '_valid' in key:
103 |                 continue
104 | 
105 |             index = [slice(None)] * data[key].ndim
106 | 
107 |             # if we have multiple observations for this key, pick one
108 |             index[0] = self.rng.randint(0, data[key].shape[0])
109 |             index[0] = slice(index[0], index[0] + 1)
110 | 
111 |             for tdim in self._time[key]:
112 |                 index[tdim] = interval
113 | 
114 |             data_slice[key] = data[key][tuple(index)]
115 | 
116 |         return data_slice
117 | 
118 |     def indices(self, data):
119 |         '''Generate patch indices
120 | 
121 |         Parameters
122 |         ----------
123 |         data : dict of np.ndarray
124 |             As produced by pumpp.transform
125 | 
126 |         Yields
127 |         ------
128 |         start : int >= 0
129 |             The start index of a sample patch
130 |         '''
131 |         duration = self.data_duration(data)
132 | 
133 |         if self.duration > duration:
134 |             raise DataError('Data duration={} is less than '
135 |                             'sample duration={}'.format(duration, self.duration))
136 | 
137 |         while True:
138 |             # Generate a sampling interval
139 |             yield self.rng.randint(0, duration - self.duration + 1)
140 | 
141 |     def __call__(self, data):
142 |         '''Generate samples from a data dict.
143 | 
144 |         Parameters
145 |         ----------
146 |         data : dict
147 |             As produced by pumpp.transform
148 | 
149 |         Yields
150 |         ------
151 |         data_sample : dict
152 |             A sequence of patch samples from `data`,
153 |             as parameterized by the sampler object.
154 |         '''
155 |         if self.n_samples:
156 |             counter = range(self.n_samples)
157 |         else:
158 |             counter = count(0)
159 | 
160 |         for _, start in zip(counter, self.indices(data)):
161 |             yield self.sample(data, slice(start, start + self.duration))
162 | 
163 | 
164 | class SequentialSampler(Sampler):
165 |     '''Sample patches in sequential (temporal) order
166 | 
167 |     Attributes
168 |     ----------
169 |     duration : int > 0
170 |         the duration (in frames) of each sample
171 | 
172 |     stride : int > 0
173 |         The number of frames to advance between samples.
174 |         By default, matches `duration` so there is no overlap.
175 | 
176 |     ops : array of pumpp.feature.FeatureExtractor or pumpp.task.BaseTaskTransformer
177 |         The operators to include when sampling data.
178 | 
179 |     random_state : None, int, or np.random.RandomState
180 |         If int, random_state is the seed used by the random number
181 |         generator;
182 | 
183 |         If RandomState instance, random_state is the random number
184 |         generator;
185 | 
186 |         If None, the random number generator is the RandomState instance
187 | 
188 |     See Also
189 |     --------
190 |     Sampler
191 |     '''
192 | 
193 |     def __init__(self, duration, *ops, **kwargs):
194 | 
195 |         stride = kwargs.pop('stride', None)
196 | 
197 |         super(SequentialSampler, self).__init__(None, duration, *ops, **kwargs)
198 | 
199 |         if stride is None:
200 |             stride = duration
201 | 
202 |         if not stride > 0:
203 |             raise ParameterError('Invalid patch stride={}'.format(stride))
204 |         self.stride = stride
205 | 
206 |     def indices(self, data):
207 |         '''Generate patch start indices
208 | 
209 |         Parameters
210 |         ----------
211 |         data : dict of np.ndarray
212 |             As produced by pumpp.transform
213 | 
214 |         Yields
215 |         ------
216 |         start : int >= 0
217 |             The start index of a sample patch
218 |         '''
219 |         duration = self.data_duration(data)
220 | 
221 |         for start in range(0, duration - self.duration, self.stride):
222 |             yield start
223 | 
224 | 
225 | class VariableLengthSampler(Sampler):
226 |     '''Sample random patches like a `Sampler`, but allow for
227 |     output patches to be less than the target duration when the
228 |     data is too short.
229 | 
230 |     Attributes
231 |     ----------
232 |     n_samples : int or None
233 |         the number of samples to generate.
234 |         If `None`, generate indefinitely.
235 | 
236 |     min_duration : int > 0
237 |         The minimum duration (in frames) of each sample
238 | 
239 |     max_duration : int > 0
240 |         the maximum duration (in frames) of each sample
241 | 
242 |     random_state : None, int, or np.random.RandomState
243 |         If int, random_state is the seed used by the random number
244 |         generator;
245 | 
246 |         If RandomState instance, random_state is the random number
247 |         generator;
248 | 
249 |         If None, the random number generator is the RandomState instance
250 |         used by np.random.
251 | 
252 |     ops : array of pumpp.feature.FeatureExtractor or pumpp.task.BaseTaskTransformer
253 |         The operators to include when sampling data.
254 | 
255 | 
256 |     See Also
257 |     --------
258 |     Sampler
259 |     '''
260 |     def __init__(self, n_samples, min_duration, max_duration, *ops, **kwargs):
261 |         super(VariableLengthSampler, self).__init__(n_samples, max_duration,
262 |                                                     *ops, **kwargs)
263 | 
264 |         if min_duration < 1:
265 |             raise ParameterError('min_duration={} must be '
266 |                                  'at least 1.'.format(min_duration))
267 | 
268 |         if max_duration < min_duration:
269 |             raise ParameterError('max_duration={} must be at least '
270 |                                  'min_duration={}'.format(max_duration,
271 |                                                           min_duration))
272 | 
273 |         self.min_duration = min_duration
274 | 
275 |     def indices(self, data):
276 |         '''Generate patch indices
277 | 
278 |         Parameters
279 |         ----------
280 |         data : dict of np.ndarray
281 |             As produced by pumpp.transform
282 | 
283 |         Yields
284 |         ------
285 |         start : int >= 0
286 |             The start index of a sample patch
287 |         '''
288 |         duration = self.data_duration(data)
289 | 
290 |         while True:
291 |             # Generate a sampling interval
292 |             yield self.rng.randint(0, duration - self.min_duration + 1)
293 | 
294 |     def __call__(self, data):
295 |         '''Generate samples from a data dict.
296 | 
297 |         Parameters
298 |         ----------
299 |         data : dict
300 |             As produced by pumpp.transform
301 | 
302 |         Yields
303 |         ------
304 |         data_sample : dict
305 |             A sequence of patch samples from `data`,
306 |             as parameterized by the sampler object.
307 |         '''
308 |         if self.n_samples:
309 |             counter = range(self.n_samples)
310 |         else:
311 |             counter = count(0)
312 | 
313 |         duration = self.data_duration(data)
314 | 
315 |         for _, start in zip(counter, self.indices(data)):
316 |             yield self.sample(data,
317 |                               slice(start, min(duration, start + self.duration)))
318 | 


--------------------------------------------------------------------------------
/pumpp/task/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | Task transformations
 5 | ====================
 6 | .. autosummary::
 7 |     :toctree: generated/
 8 | 
 9 |     BaseTaskTransformer
10 |     BeatTransformer
11 |     BeatPositionTransformer
12 |     ChordTransformer
13 |     SimpleChordTransformer
14 |     ChordTagTransformer
15 |     VectorTransformer
16 |     DynamicLabelTransformer
17 |     StaticLabelTransformer
18 |     StructureTransformer
19 | '''
20 | 
21 | from .base import *
22 | from .chord import *
23 | from .beat import *
24 | from .regression import *
25 | from .tags import *
26 | from .structure import *
27 | from .key import *
28 | 


--------------------------------------------------------------------------------
/pumpp/task/base.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''The base class for task transformer objects'''
  4 | 
  5 | import numpy as np
  6 | from librosa import time_to_frames, times_like
  7 | from librosa.sequence import viterbi_binary, viterbi_discriminative
  8 | import jams
  9 | 
 10 | from ..base import Scope
 11 | 
 12 | __all__ = ['BaseTaskTransformer']
 13 | 
 14 | 
 15 | def fill_value(dtype):
 16 |     '''Get a fill-value for a given dtype
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     dtype : type
 21 | 
 22 |     Returns
 23 |     -------
 24 |     `np.nan` if `dtype` is real or complex
 25 | 
 26 |     0 otherwise
 27 |     '''
 28 |     if np.issubdtype(dtype, np.floating) or np.issubdtype(dtype, np.complexfloating):
 29 |         return dtype(np.nan)
 30 | 
 31 |     return dtype(0)
 32 | 
 33 | 
 34 | class BaseTaskTransformer(Scope):
 35 |     '''Base class for task transformer objects
 36 | 
 37 |     Attributes
 38 |     ----------
 39 |     name : str
 40 |         The name prefix for this transformer object
 41 | 
 42 |     namespace : str
 43 |         The JAMS namespace for annotations in this task
 44 | 
 45 |     sr : number > 0
 46 |         The sampling rate for audio
 47 | 
 48 |     hop_length : int > 0
 49 |         The number of samples between frames
 50 |     '''
 51 | 
 52 |     def __init__(self, name, namespace, sr, hop_length):
 53 |         super(BaseTaskTransformer, self).__init__(name)
 54 | 
 55 |         # This will trigger an exception if the namespace is not found
 56 |         jams.schema.is_dense(namespace)
 57 | 
 58 |         self.namespace = namespace
 59 |         self.sr = sr
 60 |         self.hop_length = hop_length
 61 | 
 62 |     def empty(self, duration):
 63 |         '''Create an empty jams.Annotation for this task.
 64 | 
 65 |         This method should be overridden by derived classes.
 66 | 
 67 |         Parameters
 68 |         ----------
 69 |         duration : int >= 0
 70 |             Duration of the annotation
 71 |         '''
 72 |         return jams.Annotation(namespace=self.namespace, time=0, duration=0)
 73 | 
 74 |     def transform(self, jam, query=None):
 75 |         '''Transform jam object to make data for this task
 76 | 
 77 |         Parameters
 78 |         ----------
 79 |         jam : jams.JAMS
 80 |             The jams container object
 81 | 
 82 |         query : string, dict, or callable [optional]
 83 |             An optional query to narrow the elements of `jam.annotations`
 84 |             to be considered.
 85 | 
 86 |             If not provided, all annotations are considered.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         data : dict
 91 |             A dictionary of transformed annotations.
 92 |             All annotations which can be converted to the target namespace
 93 |             will be converted.
 94 |         '''
 95 |         anns = []
 96 |         if query:
 97 |             results = jam.search(**query)
 98 |         else:
 99 |             results = jam.annotations
100 | 
101 |         # Find annotations that can be coerced to our target namespace
102 |         for ann in results:
103 |             try:
104 |                 anns.append(jams.nsconvert.convert(ann, self.namespace))
105 |             except jams.NamespaceError:
106 |                 pass
107 | 
108 |         duration = jam.file_metadata.duration
109 | 
110 |         # If none, make a fake one
111 |         if not anns:
112 |             anns = [self.empty(duration)]
113 | 
114 |         # Apply transformations
115 |         results = []
116 |         for ann in anns:
117 | 
118 |             results.append(self.transform_annotation(ann, duration))
119 |             # If the annotation range is None, it spans the entire track
120 |             if ann.time is None or ann.duration is None:
121 |                 valid = [0, duration]
122 |             else:
123 |                 valid = [ann.time, ann.time + ann.duration]
124 | 
125 |             results[-1]['_valid'] = time_to_frames(valid, sr=self.sr,
126 |                                                    hop_length=self.hop_length)
127 | 
128 |         # Prefix and collect
129 |         return self.merge(results)
130 | 
131 |     def transform_annotation(self, ann, duration):
132 |         '''Transform jams.Annotation to make data for a given task.
133 |         
134 |         Parameters
135 |         ----------
136 |         ann : jams.Annotation
137 |             The jams annotation containing the data
138 | 
139 |         duration : number > 0 
140 |            time in seconds of the output duration
141 | 
142 |         Returns
143 |         -------
144 |         data : dict
145 |             A dictionary of transformed annotation.
146 |         '''
147 |         raise NotImplementedError
148 | 
149 |     def encode_events(self, duration, events, values, dtype=bool):
150 |         '''Encode labeled events as a time-series matrix.
151 | 
152 |         Parameters
153 |         ----------
154 |         duration : number
155 |             The duration of the track
156 | 
157 |         events : ndarray, shape=(n,)
158 |             Time index of the events
159 | 
160 |         values : ndarray, shape=(n, m)
161 |             Values array.  Must have the same first index as `events`.
162 | 
163 |         dtype : numpy data type
164 | 
165 |         Returns
166 |         -------
167 |         target : ndarray, shape=(n_frames, n_values)
168 |         '''
169 | 
170 |         frames = time_to_frames(events, sr=self.sr,
171 |                                 hop_length=self.hop_length)
172 | 
173 |         n_total = int(time_to_frames(duration, sr=self.sr,
174 |                                      hop_length=self.hop_length))
175 | 
176 |         n_alloc = n_total
177 |         if np.any(frames):
178 |             n_alloc = max(n_total, 1 + int(frames.max()))
179 | 
180 |         target = np.empty((n_alloc, values.shape[1]),
181 |                           dtype=dtype)
182 | 
183 |         target.fill(fill_value(dtype))
184 |         values = values.astype(dtype)
185 |         for column, event in zip(values, frames):
186 |             target[event] += column
187 | 
188 |         return target[:n_total]
189 | 
190 |     def encode_intervals(self, duration, intervals, values, dtype=bool,
191 |                          multi=True, fill=None):
192 |         '''Encode labeled intervals as a time-series matrix.
193 | 
194 |         Parameters
195 |         ----------
196 |         duration : number
197 |             The duration (in frames) of the track
198 | 
199 |         intervals : np.ndarray, shape=(n, 2)
200 |             The list of intervals
201 | 
202 |         values : np.ndarray, shape=(n, m)
203 |             The (encoded) values corresponding to each interval
204 | 
205 |         dtype : np.dtype
206 |             The desired output type
207 | 
208 |         multi : bool
209 |             If `True`, allow multiple labels per interval.
210 | 
211 |         fill : dtype (optional)
212 |             Optional default fill value for missing data.
213 | 
214 |             If not provided, the default is inferred from `dtype`.
215 | 
216 |         Returns
217 |         -------
218 |         target : np.ndarray, shape=(duration * sr / hop_length, m)
219 |             The labeled interval encoding, sampled at the desired frame rate
220 |         '''
221 |         if fill is None:
222 |             fill = fill_value(dtype)
223 | 
224 |         frames = time_to_frames(intervals, sr=self.sr,
225 |                                 hop_length=self.hop_length)
226 | 
227 |         n_total = int(time_to_frames(duration, sr=self.sr,
228 |                                      hop_length=self.hop_length))
229 | 
230 |         values = values.astype(dtype)
231 | 
232 |         n_alloc = n_total
233 |         if np.any(frames):
234 |             n_alloc = max(n_total, 1 + int(frames.max()))
235 | 
236 |         target = np.empty((n_alloc, values.shape[1]),
237 | 
238 |                           dtype=dtype)
239 | 
240 |         target.fill(fill)
241 | 
242 |         for column, interval in zip(values, frames):
243 |             if multi:
244 |                 target[interval[0]:interval[1]] += column
245 |             else:
246 |                 target[interval[0]:interval[1]] = column
247 | 
248 |         return target[:n_total]
249 | 
250 |     def decode_events(self, encoded, transition=None, p_state=None, p_init=None):
251 |         '''Decode labeled events into (time, value) pairs
252 | 
253 |         Real-valued inputs are thresholded at 0.5.
254 | 
255 |         Optionally, viterbi decoding can be applied to each event class.
256 | 
257 |         Parameters
258 |         ----------
259 |         encoded : np.ndarray, shape=(n_frames, m)
260 |             Frame-level annotation encodings as produced by ``encode_events``.
261 | 
262 |         transition : None or np.ndarray [shape=(2, 2) or (m, 2, 2)]
263 |             Optional transition matrix for each event, used for Viterbi
264 | 
265 |         p_state : None or np.ndarray [shape=(m,)]
266 |             Optional marginal probability for each event
267 | 
268 |         p_init : None or np.ndarray [shape=(m,)]
269 |             Optional marginal probability for each event
270 | 
271 |         Returns
272 |         -------
273 |         [(time, value)] : iterable of tuples
274 |             where `time` is the event time and `value` is an
275 |             np.ndarray, shape=(m,) of the encoded value at that time
276 | 
277 |         See Also
278 |         --------
279 |         librosa.sequence.viterbi_binary
280 |         '''
281 |         if np.isrealobj(encoded):
282 |             if transition is None:
283 |                 encoded = (encoded >= 0.5)
284 |             else:
285 |                 encoded = viterbi_binary(encoded.T, transition,
286 |                                          p_state=p_state,
287 |                                          p_init=p_init).T
288 | 
289 |         times = times_like(encoded,
290 |                            sr=self.sr,
291 |                            hop_length=self.hop_length,
292 |                            axis=0)
293 | 
294 |         return zip(times, encoded)
295 | 
296 |     def decode_intervals(self, encoded, duration=None, multi=True, sparse=False,
297 |                          transition=None, p_state=None, p_init=None):
298 |         '''Decode labeled intervals into (start, end, value) triples
299 | 
300 |         Parameters
301 |         ----------
302 |         encoded : np.ndarray, shape=(n_frames, m)
303 |             Frame-level annotation encodings as produced by
304 |             ``encode_intervals``
305 | 
306 |         duration : None or float > 0
307 |             The max duration of the annotation (in seconds)
308 |             Must be greater than the length of encoded array.
309 | 
310 |         multi : bool
311 |             If true, allow multiple labels per input frame.
312 |             If false, take the most likely label per input frame.
313 | 
314 |         sparse : bool
315 |             If true, values are returned as indices, not one-hot.
316 |             If false, values are returned as one-hot encodings.
317 | 
318 |             Only applies when `multi=False`.
319 | 
320 |         transition : None or np.ndarray [shape=(m, m) or (2, 2) or (m, 2, 2)]
321 |             Optional transition matrix for each interval, used for Viterbi
322 |             decoding.  If `multi=True`, then transition should be `(2, 2)` or
323 |             `(m, 2, 2)`-shaped.  If `multi=False`, then transition should be
324 |             `(m, m)`-shaped.
325 | 
326 |         p_state : None or np.ndarray [shape=(m,)]
327 |             Optional marginal probability for each label.
328 | 
329 |         p_init : None or np.ndarray [shape=(m,)]
330 |             Optional marginal probability for each label.
331 | 
332 |         Returns
333 |         -------
334 |         [(start, end, value)] : iterable of tuples
335 |             where `start` and `end` are the interval boundaries (in seconds)
336 |             and `value` is an np.ndarray, shape=(m,) of the encoded value
337 |             for this interval.
338 |         '''
339 |         if np.isrealobj(encoded):
340 |             if multi:
341 |                 if transition is None:
342 |                     encoded = encoded >= 0.5
343 |                 else:
344 |                     encoded = viterbi_binary(encoded.T, transition,
345 |                                              p_init=p_init, p_state=p_state).T
346 |             elif sparse and encoded.shape[1] > 1:
347 |                 # map to argmax if it's densely encoded (logits)
348 |                 if transition is None:
349 |                     encoded = np.argmax(encoded, axis=1)[:, np.newaxis]
350 |                 else:
351 |                     encoded = viterbi_discriminative(encoded.T, transition,
352 |                                                      p_init=p_init,
353 |                                                      p_state=p_state)[:, np.newaxis]
354 |             elif not sparse:
355 |                 # if dense and multi, map to one-hot encoding
356 |                 if transition is None:
357 |                     encoded = (encoded == np.max(encoded, axis=1, keepdims=True))
358 |                 else:
359 |                     encoded_ = viterbi_discriminative(encoded.T, transition,
360 |                                                       p_init=p_init,
361 |                                                       p_state=p_state)
362 |                     # Map to one-hot encoding
363 |                     encoded = np.zeros(encoded.shape, dtype=bool)
364 |                     encoded[np.arange(len(encoded_)), encoded_] = True
365 | 
366 |         if duration is None:
367 |             # 1+ is fair here, because encode_intervals already pads
368 |             duration = 1 + encoded.shape[0]
369 |         else:
370 |             duration = 1 + time_to_frames(duration,
371 |                                           sr=self.sr,
372 |                                           hop_length=self.hop_length)
373 | 
374 |         # [0, duration] inclusive
375 |         times = times_like(duration + 1,
376 |                            sr=self.sr, hop_length=self.hop_length)
377 | 
378 |         # Find the change-points of the rows
379 |         if sparse:
380 |             idx = np.where(encoded[1:] != encoded[:-1])[0]
381 |         else:
382 |             idx = np.where(np.max(encoded[1:] != encoded[:-1], axis=-1))[0]
383 | 
384 |         idx = np.unique(np.append(idx, encoded.shape[0]))
385 |         delta = np.diff(np.append(-1, idx))
386 | 
387 |         # Starting positions can be integrated from changes
388 |         position = np.cumsum(np.append(0, delta))
389 | 
390 |         return [(times[p], times[p + d], encoded[p])
391 |                 for (p, d) in zip(position, delta)]
392 | 


--------------------------------------------------------------------------------
/pumpp/task/beat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Instantaneous event coding'''
  4 | 
  5 | import numpy as np
  6 | import jams
  7 | 
  8 | from librosa import time_to_frames
  9 | from librosa.sequence import transition_loop, transition_cycle
 10 | from mir_eval.util import boundaries_to_intervals, adjust_intervals
 11 | 
 12 | from .base import BaseTaskTransformer
 13 | from ..exceptions import ParameterError
 14 | from ..labels import LabelBinarizer, LabelEncoder
 15 | 
 16 | __all__ = ['BeatTransformer', 'BeatPositionTransformer']
 17 | 
 18 | 
 19 | class BeatTransformer(BaseTaskTransformer):
 20 |     '''Task transformation for beat tracking
 21 | 
 22 |     Attributes
 23 |     ----------
 24 |     name : str
 25 |         The name of this transformer
 26 | 
 27 |     sr : number > 0
 28 |         The audio sampling rate
 29 | 
 30 |     hop_length : int > 0
 31 |         The hop length for annotation frames
 32 | 
 33 |     p_self_beat : None, float in (0, 1), or np.ndarray [shape=(2,)]
 34 |         Optional self-loop probability(ies), used for Viterbi decoding
 35 | 
 36 |     p_state_beat : None or float in (0, 1)
 37 |         Optional marginal probability for beat state
 38 | 
 39 |     p_init_beat : None or float in (0, 1)
 40 |         Optional initial probability for beat state
 41 | 
 42 |     p_self_down : None, float in (0, 1), or np.ndarray [shape=(2,)]
 43 |         Optional self-loop probability(ies), used for Viterbi decoding
 44 | 
 45 |     p_state_down : None or float in (0, 1)
 46 |         Optional marginal probability for downbeat state
 47 | 
 48 |     p_init_down : None or float in (0, 1)
 49 |         Optional initial probability for downbeat state
 50 | 
 51 |     '''
 52 |     def __init__(self, name='beat', sr=22050, hop_length=512,
 53 |                  p_self_beat=None, p_init_beat=None, p_state_beat=None,
 54 |                  p_self_down=None, p_init_down=None, p_state_down=None):
 55 | 
 56 |         super(BeatTransformer, self).__init__(name=name,
 57 |                                               namespace='beat',
 58 |                                               sr=sr, hop_length=hop_length)
 59 | 
 60 |         self.set_transition_beat(p_self_beat)
 61 | 
 62 |         if p_init_beat is not None:
 63 |             if not np.isscalar(p_init_beat):
 64 |                 raise ParameterError('Invalid p_init_beat={}'.format(p_init_beat))
 65 | 
 66 |         self.beat_p_init = p_init_beat
 67 | 
 68 |         if p_state_beat is not None:
 69 |             if not np.isscalar(p_state_beat):
 70 |                 raise ParameterError('Invalid p_state_beat={}'.format(p_state_beat))
 71 | 
 72 |         self.beat_p_state = p_state_beat
 73 | 
 74 |         self.set_transition_down(p_self_beat)
 75 | 
 76 |         if p_init_down is not None:
 77 |             if not np.isscalar(p_init_down):
 78 |                 raise ParameterError('Invalid p_init_down={}'.format(p_init_down))
 79 | 
 80 |         self.down_p_init = p_init_down
 81 | 
 82 |         if p_state_down is not None:
 83 |             if not np.isscalar(p_state_down):
 84 |                 raise ParameterError('Invalid p_state_down={}'.format(p_state_down))
 85 | 
 86 |         self.down_p_state = p_state_down
 87 | 
 88 |         self.register('beat', [None], bool)
 89 |         self.register('downbeat', [None], bool)
 90 |         self.register('mask_downbeat', [1], bool)
 91 | 
 92 |     def set_transition_beat(self, p_self):
 93 |         '''Set the beat-tracking transition matrix according to
 94 |         self-loop probabilities.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         p_self : None, float in (0, 1), or np.ndarray [shape=(2,)]
 99 |             Optional self-loop probability(ies), used for Viterbi decoding
100 |         '''
101 |         if p_self is None:
102 |             self.beat_transition = None
103 |         else:
104 |             self.beat_transition = transition_loop(2, p_self)
105 | 
106 |     def set_transition_down(self, p_self):
107 |         '''Set the downbeat-tracking transition matrix according to
108 |         self-loop probabilities.
109 | 
110 |         Parameters
111 |         ----------
112 |         p_self : None, float in (0, 1), or np.ndarray [shape=(2,)]
113 |             Optional self-loop probability(ies), used for Viterbi decoding
114 |         '''
115 |         if p_self is None:
116 |             self.down_transition = None
117 |         else:
118 |             self.down_transition = transition_loop(2, p_self)
119 | 
120 |     def transform_annotation(self, ann, duration):
121 |         '''Apply the beat transformer
122 | 
123 |         Parameters
124 |         ----------
125 |         ann : jams.Annotation
126 |             The input annotation
127 | 
128 |         duration : number > 0
129 |             The duration of the audio
130 | 
131 |         Returns
132 |         -------
133 |         data : dict
134 |             data['beat'] : np.ndarray, shape=(n, 1)
135 |                 Binary indicator of beat/non-beat
136 | 
137 |             data['downbeat'] : np.ndarray, shape=(n, 1)
138 |                 Binary indicator of downbeat/non-downbeat
139 | 
140 |             mask_downbeat : bool
141 |                 True if downbeat annotations are present
142 |         '''
143 | 
144 |         mask_downbeat = False
145 | 
146 |         intervals, values = ann.to_interval_values()
147 |         values = np.asarray(values)
148 | 
149 |         beat_events = intervals[:, 0]
150 |         beat_labels = np.ones((len(beat_events), 1))
151 | 
152 |         idx = (values == 1)
153 |         if np.any(idx):
154 |             downbeat_events = beat_events[idx]
155 |             downbeat_labels = np.ones((len(downbeat_events), 1))
156 |             mask_downbeat = True
157 |         else:
158 |             downbeat_events = np.zeros(0)
159 |             downbeat_labels = np.zeros((0, 1))
160 | 
161 |         target_beat = self.encode_events(duration,
162 |                                          beat_events,
163 |                                          beat_labels)
164 | 
165 |         target_downbeat = self.encode_events(duration,
166 |                                              downbeat_events,
167 |                                              downbeat_labels)
168 | 
169 |         return {'beat': target_beat,
170 |                 'downbeat': target_downbeat,
171 |                 'mask_downbeat': mask_downbeat}
172 | 
173 |     def inverse(self, encoded, downbeat=None, duration=None):
174 |         '''Inverse transformation for beats and optional downbeats'''
175 | 
176 |         ann = jams.Annotation(namespace=self.namespace, duration=duration)
177 | 
178 |         beat_times = np.asarray([t for t, _ in self.decode_events(encoded,
179 |                                                                   transition=self.beat_transition,
180 |                                                                   p_init=self.beat_p_init,
181 |                                                                   p_state=self.beat_p_state) if _])
182 |         beat_frames = time_to_frames(beat_times,
183 |                                      sr=self.sr,
184 |                                      hop_length=self.hop_length)
185 | 
186 |         if downbeat is not None:
187 |             downbeat_times = set([t for t, _ in self.decode_events(downbeat,
188 |                                                                    transition=self.down_transition,
189 |                                                                    p_init=self.down_p_init,
190 |                                                                    p_state=self.down_p_state) if _])
191 |             pickup_beats = len([t for t in beat_times
192 |                                 if t < min(downbeat_times)])
193 |         else:
194 |             downbeat_times = set()
195 |             pickup_beats = 0
196 | 
197 |         value = - pickup_beats - 1
198 |         for beat_t, beat_f in zip(beat_times, beat_frames):
199 |             if beat_t in downbeat_times:
200 |                 value = 1
201 |             else:
202 |                 value += 1
203 |             confidence = encoded[beat_f]
204 |             ann.append(time=beat_t,
205 |                        duration=0,
206 |                        value=value,
207 |                        confidence=confidence)
208 | 
209 |         return ann
210 | 
211 | 
212 | class BeatPositionTransformer(BaseTaskTransformer):
213 |     '''Encode beat- and downbeat-annotations as labeled intervals.
214 | 
215 |     This transformer assumes that the `value` field of a beat annotation
216 |     encodes its metrical position (1, 2, 3, 4, ...).
217 | 
218 |     A `value` of 0 indicates that the beat does not belong to a bar,
219 |     and should be used to indicate pickup beats.
220 | 
221 |     Beat position strings are coded as SUBDIVISION/POSITION
222 | 
223 |     For example, in 4/4 time, the 2 beat would be coded as "04/02".
224 |     '''
225 |     def __init__(self, name, max_divisions=12,
226 |                  sr=22050, hop_length=512, sparse=False):
227 | 
228 |         super(BeatPositionTransformer, self).__init__(name=name,
229 |                                                       namespace='beat',
230 |                                                       sr=sr,
231 |                                                       hop_length=hop_length)
232 | 
233 |         # Make the vocab set
234 |         if not isinstance(max_divisions, int) or max_divisions < 1:
235 |             raise ParameterError('Invalid max_divisions={}'.format(max_divisions))
236 | 
237 |         self.max_divisions = max_divisions
238 |         labels = self.vocabulary()
239 |         self.sparse = sparse
240 | 
241 |         if self.sparse:
242 |             self.encoder = LabelEncoder()
243 |         else:
244 |             self.encoder = LabelBinarizer()
245 |         self.encoder.fit(labels)
246 |         self._classes = set(self.encoder.classes_)
247 | 
248 |         # transitions should use transition_loop here
249 |         #   construct block-wise for each metrical length
250 |         # initial-state distributions should be over X
251 |         #   X -> **/01 s
252 | 
253 |         if self.sparse:
254 |             self.register('position', [None, 1], int)
255 |         else:
256 |             self.register('position', [None, len(self._classes)], bool)
257 | 
258 |     def vocabulary(self):
259 |         states = ['X']
260 |         for d in range(1, self.max_divisions + 1):
261 |             for n in range(1, d + 1):
262 |                 states.append('{:02d}/{:02d}'.format(d, n))
263 |         return states
264 | 
265 |     def transform_annotation(self, ann, duration):
266 |         '''Transform an annotation to the beat-position encoding
267 | 
268 |         Parameters
269 |         ----------
270 |         ann : jams.Annotation
271 |             The annotation to convert
272 | 
273 |         duration : number > 0
274 |             The duration of the track
275 | 
276 |         Returns
277 |         -------
278 |         data : dict
279 |             data['position'] : np.ndarray, shape=(n, n_labels) or (n, 1)
280 |                 A time-varying label encoding of beat position
281 |         '''
282 | 
283 |         # 1. get all the events
284 |         # 2. find all the downbeats
285 |         # 3. map each downbeat to a subdivision counter
286 |         #       number of beats until the next downbeat
287 |         # 4. pad out events to intervals
288 |         # 5. encode each beat interval to its position
289 | 
290 |         boundaries, values = ann.to_interval_values()
291 |         # Convert to intervals and span the duration
292 |         # padding at the end of track does not propagate the right label
293 |         # this is an artifact of inferring end-of-track from boundaries though
294 |         boundaries = list(boundaries[:, 0])
295 |         if boundaries and boundaries[-1] < duration:
296 |             boundaries.append(duration)
297 |         intervals = boundaries_to_intervals(boundaries)
298 |         intervals, values = adjust_intervals(intervals, values,
299 |                                              t_min=0,
300 |                                              t_max=duration,
301 |                                              start_label=0,
302 |                                              end_label=0)
303 | 
304 |         values = np.asarray(values, dtype=int)
305 |         downbeats = np.flatnonzero(values == 1)
306 | 
307 |         position = []
308 |         for i, v in enumerate(values):
309 |             # If the value is a 0, mark it as X and move on
310 |             if v == 0:
311 |                 position.extend(self.encoder.transform(['X']))
312 |                 continue
313 | 
314 |             # Otherwise, let's try to find the surrounding downbeats
315 |             prev_idx = np.searchsorted(downbeats, i, side='right') - 1
316 |             next_idx = 1 + prev_idx
317 | 
318 |             if prev_idx >= 0 and next_idx < len(downbeats):
319 |                 # In this case, the subdivision is well-defined
320 |                 subdivision = downbeats[next_idx] - downbeats[prev_idx]
321 |             elif prev_idx < 0 and next_idx < len(downbeats):
322 |                 subdivision = np.max(values[:downbeats[0]+1])
323 |             elif next_idx >= len(downbeats):
324 |                 subdivision = len(values) - downbeats[prev_idx]
325 | 
326 |             if subdivision > self.max_divisions or subdivision < 1:
327 |                 position.extend(self.encoder.transform(['X']))
328 |             else:
329 |                 position.extend(self.encoder.transform(['{:02d}/{:02d}'.format(subdivision, v)]))
330 | 
331 |         dtype = self.fields[self.scope('position')].dtype
332 | 
333 |         position = np.asarray(position)
334 |         if self.sparse:
335 |             position = position[:, np.newaxis]
336 | 
337 |         target = self.encode_intervals(duration, intervals, position,
338 |                                        multi=False, dtype=dtype)
339 |         return {'position': target}
340 | 
341 |     def inverse(self, encoded, duration=None):
342 |         '''Inverse transformation'''
343 | 
344 |         raise NotImplementedError
345 | 


--------------------------------------------------------------------------------
/pumpp/task/chord.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Chord recognition task transformer'''
  4 | 
  5 | import re
  6 | from itertools import product
  7 | 
  8 | import numpy as np
  9 | import mir_eval
 10 | import jams
 11 | 
 12 | from librosa import time_to_frames
 13 | from librosa.sequence import transition_loop
 14 | 
 15 | from .base import BaseTaskTransformer
 16 | from ..exceptions import ParameterError
 17 | from ..labels import LabelBinarizer, LabelEncoder, MultiLabelBinarizer
 18 | 
 19 | __all__ = ['ChordTransformer', 'SimpleChordTransformer', 'ChordTagTransformer']
 20 | 
 21 | 
 22 | def _pad_nochord(target, axis=-1):
 23 |     '''Pad a chord annotation with no-chord flags.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     target : np.ndarray
 28 |         the input data
 29 | 
 30 |     axis : int
 31 |         the axis along which to pad
 32 | 
 33 |     Returns
 34 |     -------
 35 |     target_pad
 36 |         `target` expanded by 1 along the specified `axis`.
 37 |         The expanded dimension will be 0 when `target` is non-zero
 38 |         before padding, and 1 otherwise.
 39 |     '''
 40 |     ncmask = ~np.max(target, axis=axis, keepdims=True)
 41 | 
 42 |     return np.concatenate([target, ncmask], axis=axis)
 43 | 
 44 | 
 45 | class ChordTransformer(BaseTaskTransformer):
 46 |     '''Chord annotation transformers.
 47 | 
 48 |     This transformer uses a (pitch, root, bass) decomposition of
 49 |     chord annotations.
 50 | 
 51 |     Attributes
 52 |     ----------
 53 |     name : str
 54 |         The name of the chord transformer
 55 | 
 56 |     sr : number > 0
 57 |         The sampling rate of audio
 58 | 
 59 |     hop_length : int > 0
 60 |         The number of samples between each annotation frame
 61 | 
 62 |     sparse : bool
 63 |         If True, root and bass values are sparsely encoded as integers in [0, 12].
 64 |         If False, root and bass values are densely encoded as 13-dimensional booleans.
 65 | 
 66 |     See Also
 67 |     --------
 68 |     SimpleChordTransformer
 69 |     '''
 70 |     def __init__(self, name='chord', sr=22050, hop_length=512, sparse=False):
 71 |         '''Initialize a chord task transformer'''
 72 | 
 73 |         super(ChordTransformer, self).__init__(name=name,
 74 |                                                namespace='chord',
 75 |                                                sr=sr, hop_length=hop_length)
 76 | 
 77 |         self.encoder = MultiLabelBinarizer()
 78 |         self.encoder.fit([list(range(12))])
 79 |         self._classes = set(self.encoder.classes_)
 80 |         self.sparse = sparse
 81 | 
 82 |         self.register('pitch', [None, 12], bool)
 83 |         if self.sparse:
 84 |             self.register('root', [None, 1], int)
 85 |             self.register('bass', [None, 1], int)
 86 |         else:
 87 |             self.register('root', [None, 13], bool)
 88 |             self.register('bass', [None, 13], bool)
 89 | 
 90 |     def empty(self, duration):
 91 |         '''Empty chord annotations
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         duration : number
 96 |             The length (in seconds) of the empty annotation
 97 | 
 98 |         Returns
 99 |         -------
100 |         ann : jams.Annotation
101 |             A chord annotation consisting of a single `no-chord` observation.
102 |         '''
103 |         ann = super(ChordTransformer, self).empty(duration)
104 | 
105 |         ann.append(time=0,
106 |                    duration=duration,
107 |                    value='N', confidence=0)
108 | 
109 |         return ann
110 | 
111 |     def transform_annotation(self, ann, duration):
112 |         '''Apply the chord transformation.
113 | 
114 |         Parameters
115 |         ----------
116 |         ann : jams.Annotation
117 |             The chord annotation
118 | 
119 |         duration : number > 0
120 |             The target duration
121 | 
122 |         Returns
123 |         -------
124 |         data : dict
125 |             data['pitch'] : np.ndarray, shape=(n, 12)
126 |             data['root'] : np.ndarray, shape=(n, 13) or (n, 1)
127 |             data['bass'] : np.ndarray, shape=(n, 13) or (n, 1)
128 | 
129 |             `pitch` is a binary matrix indicating pitch class
130 |             activation at each frame.
131 | 
132 |             `root` is a one-hot matrix indicating the chord
133 |             root's pitch class at each frame.
134 | 
135 |             `bass` is a one-hot matrix indicating the chord
136 |             bass (lowest note) pitch class at each frame.
137 | 
138 |             If sparsely encoded, `root` and `bass` are integers
139 |             in the range [0, 12] where 12 indicates no chord.
140 | 
141 |             If densely encoded, `root` and `bass` have an extra
142 |             final dimension which is active when there is no chord
143 |             sounding.
144 |         '''
145 |         # Construct a blank annotation with mask = 0
146 |         intervals, chords = ann.to_interval_values()
147 | 
148 |         # Get the dtype for root/bass
149 |         if self.sparse:
150 |             dtype = int
151 |         else:
152 |             dtype = bool
153 | 
154 |         # If we don't have any labeled intervals, fill in a no-chord
155 |         if not chords:
156 |             intervals = np.asarray([[0, duration]])
157 |             chords = ['N']
158 | 
159 |         # Suppress all intervals not in the encoder
160 |         pitches = []
161 |         roots = []
162 |         basses = []
163 | 
164 |         # default value when data is missing
165 |         if self.sparse:
166 |             fill = 12
167 |         else:
168 |             fill = False
169 | 
170 |         for chord in chords:
171 |             # Encode the pitches
172 |             root, semi, bass = mir_eval.chord.encode(chord)
173 |             pitches.append(np.roll(semi, root))
174 | 
175 |             if self.sparse:
176 |                 if root in self._classes:
177 |                     roots.append([root])
178 |                     basses.append([(root + bass) % 12])
179 |                 else:
180 |                     roots.append([fill])
181 |                     basses.append([fill])
182 |             else:
183 |                 if root in self._classes:
184 |                     roots.extend(self.encoder.transform([[root]]))
185 |                     basses.extend(self.encoder.transform([[(root + bass) % 12]]))
186 |                 else:
187 |                     roots.extend(self.encoder.transform([[]]))
188 |                     basses.extend(self.encoder.transform([[]]))
189 | 
190 |         pitches = np.asarray(pitches, dtype=bool)
191 |         roots = np.asarray(roots, dtype=dtype)
192 |         basses = np.asarray(basses, dtype=dtype)
193 | 
194 |         target_pitch = self.encode_intervals(duration, intervals, pitches)
195 | 
196 |         target_root = self.encode_intervals(duration, intervals, roots,
197 |                                             multi=False,
198 |                                             dtype=dtype,
199 |                                             fill=fill)
200 |         target_bass = self.encode_intervals(duration, intervals, basses,
201 |                                             multi=False,
202 |                                             dtype=dtype,
203 |                                             fill=fill)
204 | 
205 |         if not self.sparse:
206 |             target_root = _pad_nochord(target_root)
207 |             target_bass = _pad_nochord(target_bass)
208 | 
209 |         return {'pitch': target_pitch,
210 |                 'root': target_root,
211 |                 'bass': target_bass}
212 | 
213 |     def inverse(self, pitch, root, bass, duration=None):
214 | 
215 |         raise NotImplementedError('Chord cannot be inverted')
216 | 
217 | 
218 | class SimpleChordTransformer(ChordTransformer):
219 |     '''Simplified chord transformations.  Only pitch class activity is encoded.
220 | 
221 |     Attributes
222 |     ----------
223 |     name : str
224 |         name of the transformer
225 | 
226 |     sr : number > 0
227 |         Sampling rate of audio
228 | 
229 |     hop_length : int > 0
230 |         Hop length for annotation frames
231 | 
232 |     See Also
233 |     --------
234 |     ChordTransformer
235 |     '''
236 |     def __init__(self, name='chord', sr=22050, hop_length=512):
237 |         super(SimpleChordTransformer, self).__init__(name=name,
238 |                                                      sr=sr,
239 |                                                      hop_length=hop_length)
240 |         # Remove the extraneous fields
241 |         self.pop('root')
242 |         self.pop('bass')
243 | 
244 |     def transform_annotation(self, ann, duration):
245 |         '''Apply the chord transformation.
246 | 
247 |         Parameters
248 |         ----------
249 |         ann : jams.Annotation
250 |             The chord annotation
251 | 
252 |         duration : number > 0
253 |             The target duration
254 | 
255 |         Returns
256 |         -------
257 |         data : dict
258 |             data['pitch'] : np.ndarray, shape=(n, 12)
259 | 
260 |             `pitch` is a binary matrix indicating pitch class
261 |             activation at each frame.
262 |         '''
263 |         data = super(SimpleChordTransformer,
264 |                      self).transform_annotation(ann, duration)
265 | 
266 |         data.pop('root', None)
267 |         data.pop('bass', None)
268 |         return data
269 | 
270 |     def inverse(self, *args, **kwargs):
271 |         raise NotImplementedError('SimpleChord cannot be inverted')
272 | 
273 | 
274 | '''A list of normalized pitch class names'''
275 | PITCHES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
276 | 
277 | 
278 | '''A mapping of chord quality encodings to their names'''
279 | QUALITIES = {
280 |     0b000100000000: 'min',
281 |     0b000010000000: 'maj',
282 |     0b000100010000: 'min',
283 |     0b000010010000: 'maj',
284 |     0b000100100000: 'dim',
285 |     0b000010001000: 'aug',
286 |     0b000100010010: 'min7',
287 |     0b000010010001: 'maj7',
288 |     0b000010010010: '7',
289 |     0b000100100100: 'dim7',
290 |     0b000100100010: 'hdim7',
291 |     0b000100010001: 'minmaj7',
292 |     0b000100010100: 'min6',
293 |     0b000010010100: 'maj6',
294 |     0b001000010000: 'sus2',
295 |     0b000001010000: 'sus4'
296 | }
297 | 
298 | 
299 | class ChordTagTransformer(BaseTaskTransformer):
300 |     '''Chord transformer that uses a tag-space encoding for chord labels.
301 | 
302 |     Attributes
303 |     ----------
304 |     name : str
305 |         name of the transformer
306 | 
307 |     vocab : str
308 | 
309 |         A string of chord quality indicators to include:
310 | 
311 |             - '3': maj/min
312 |             - '5': '3' + aug/dim
313 |             - '6': '3' + '5' + maj6/min6
314 |             - '7': '3' + '5' + '6' + 7/min7/maj7/dim7/hdim7/minmaj7
315 |             - 's': sus2/sus4
316 | 
317 |         Note: 5 requires 3, 6 requires 5, 7 requires 6.
318 | 
319 |     sr : number > 0
320 |         Sampling rate of audio
321 | 
322 |     hop_length : int > 0
323 |         Hop length for annotation frames
324 | 
325 |     sparse : Bool
326 |         Whether or not to use sparse encoding for the labels
327 | 
328 |     p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)]
329 |         Optional self-loop probability(ies), used for Viterbi decoding
330 | 
331 |     p_state : None or np.ndarray [shape=(n_labels,)]
332 |         Optional marginal probability for each chord class
333 | 
334 |     p_init : None or np.ndarray [shape=(n_labels,)]
335 |         Optional initial probability for each chord class
336 | 
337 |     Notes
338 |     -----
339 |     The number of chord classes (`n_labels`) depends on the vocabulary:
340 | 
341 |         - '3' => 2 + 12 * 2 = 26
342 |         - '35' => 2 + 12 * 4 = 50
343 |         - '356' => 2 + 12 * 6 = 74
344 |         - '3567' => 2 + 12 * 12 = 146
345 |         - '3567s' => 2 + 12 * 14 = 170
346 | 
347 |     See Also
348 |     --------
349 |     ChordTransformer
350 |     SimpleChordTransformer
351 |     '''
352 |     def __init__(self, name='chord', vocab='3567s',
353 |                  sr=22050, hop_length=512, sparse=False,
354 |                  p_self=None, p_init=None, p_state=None):
355 | 
356 |         super(ChordTagTransformer, self).__init__(name=name,
357 |                                                   namespace='chord',
358 |                                                   sr=sr,
359 |                                                   hop_length=hop_length)
360 | 
361 |         # Stringify and lowercase
362 |         if set(vocab) - set('3567s'):
363 |             raise ParameterError('Invalid vocabulary string: {}'.format(vocab))
364 | 
365 |         if '5' in vocab and '3' not in vocab:
366 |             raise ParameterError('Invalid vocabulary string: {}'.format(vocab))
367 | 
368 |         if '6' in vocab and '5' not in vocab:
369 |             raise ParameterError('Invalid vocabulary string: {}'.format(vocab))
370 | 
371 |         if '7' in vocab and '6' not in vocab:
372 |             raise ParameterError('Invalid vocabulary string: {}'.format(vocab))
373 | 
374 |         self.vocab = vocab.lower()
375 |         labels = self.vocabulary()
376 |         self.sparse = sparse
377 | 
378 |         if self.sparse:
379 |             self.encoder = LabelEncoder()
380 |         else:
381 |             self.encoder = LabelBinarizer()
382 |         self.encoder.fit(labels)
383 |         self._classes = set(self.encoder.classes_)
384 | 
385 |         self.set_transition(p_self)
386 | 
387 |         if p_init is not None:
388 |             if len(p_init) != len(self._classes):
389 |                 raise ParameterError('Invalid p_init.shape={} for vocabulary {} size={}'.format(p_init.shape, vocab, len(self._classes)))
390 | 
391 |         self.p_init = p_init
392 | 
393 |         if p_state is not None:
394 |             if len(p_state) != len(self._classes):
395 |                 raise ParameterError('Invalid p_state.shape={} for vocabulary {} size={}'.format(p_state.shape, vocab, len(self._classes)))
396 | 
397 |         self.p_state = p_state
398 | 
399 |         # Construct the quality mask for chord encoding
400 |         self.mask_ = 0b000000000000
401 |         if '3' in self.vocab:
402 |             self.mask_ |= 0b000110000000
403 |         if '5' in self.vocab:
404 |             self.mask_ |= 0b000110111000
405 |         if '6' in self.vocab:
406 |             self.mask_ |= 0b000110010100
407 |         if '7' in self.vocab:
408 |             self.mask_ |= 0b000110110111
409 |         if 's' in self.vocab:
410 |             self.mask_ |= 0b001001010000
411 | 
412 |         if self.sparse:
413 |             self.register('chord', [None, 1], int)
414 |         else:
415 |             self.register('chord', [None, len(self._classes)], bool)
416 | 
417 |     def set_transition(self, p_self):
418 |         '''Set the transition matrix according to self-loop probabilities.
419 | 
420 |         Parameters
421 |         ----------
422 |         p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)]
423 |             Optional self-loop probability(ies), used for Viterbi decoding
424 |         '''
425 |         if p_self is None:
426 |             self.transition = None
427 |         else:
428 |             self.transition = transition_loop(len(self._classes), p_self)
429 | 
430 |     def empty(self, duration):
431 |         '''Empty chord annotations
432 | 
433 |         Parameters
434 |         ----------
435 |         duration : number
436 |             The length (in seconds) of the empty annotation
437 | 
438 |         Returns
439 |         -------
440 |         ann : jams.Annotation
441 |             A chord annotation consisting of a single `no-chord` observation.
442 |         '''
443 |         ann = super(ChordTagTransformer, self).empty(duration)
444 | 
445 |         ann.append(time=0,
446 |                    duration=duration,
447 |                    value='X', confidence=0)
448 | 
449 |         return ann
450 | 
451 |     def vocabulary(self):
452 |         qualities = []
453 | 
454 |         if '3' in self.vocab or '5' in self.vocab:
455 |             qualities.extend(['min', 'maj'])
456 | 
457 |         if '5' in self.vocab:
458 |             qualities.extend(['dim', 'aug'])
459 | 
460 |         if '6' in self.vocab:
461 |             qualities.extend(['min6', 'maj6'])
462 | 
463 |         if '7' in self.vocab:
464 |             qualities.extend(['min7', 'maj7', '7', 'dim7', 'hdim7', 'minmaj7'])
465 | 
466 |         if 's' in self.vocab:
467 |             qualities.extend(['sus2', 'sus4'])
468 | 
469 |         labels = ['N', 'X']
470 | 
471 |         for chord in product(PITCHES, qualities):
472 |             labels.append('{}:{}'.format(*chord))
473 | 
474 |         return labels
475 | 
476 |     def simplify(self, chord):
477 |         '''Simplify a chord string down to the vocabulary space'''
478 |         # Drop inversions
479 |         chord = re.sub(r'/.*$', r'', chord)
480 |         # Drop any additional or suppressed tones
481 |         chord = re.sub(r'\(.*?\)', r'', chord)
482 |         # Drop dangling : indicators
483 |         chord = re.sub(r':$', r'', chord)
484 | 
485 |         # Encode the chord
486 |         root, pitches, _ = mir_eval.chord.encode(chord)
487 | 
488 |         # Build the query
489 |         # To map the binary vector pitches down to bit masked integer,
490 |         # we just dot against powers of 2
491 |         P = 2**np.arange(12, dtype=int)
492 |         query = self.mask_ & pitches[::-1].dot(P)
493 | 
494 |         if root < 0 and chord[0].upper() == 'N':
495 |             return 'N'
496 |         if query not in QUALITIES:
497 |             return 'X'
498 | 
499 |         return '{}:{}'.format(PITCHES[root], QUALITIES[query])
500 | 
501 |     def transform_annotation(self, ann, duration):
502 |         '''Transform an annotation to chord-tag encoding
503 | 
504 |         Parameters
505 |         ----------
506 |         ann : jams.Annotation
507 |             The annotation to convert
508 | 
509 |         duration : number > 0
510 |             The duration of the track
511 | 
512 |         Returns
513 |         -------
514 |         data : dict
515 |             data['chord'] : np.ndarray, shape=(n, n_labels)
516 |                 A time-varying binary encoding of the chords
517 |         '''
518 | 
519 |         intervals, values = ann.to_interval_values()
520 | 
521 |         chords = []
522 |         for v in values:
523 |             chords.extend(self.encoder.transform([self.simplify(v)]))
524 | 
525 |         dtype = self.fields[self.scope('chord')].dtype
526 | 
527 |         chords = np.asarray(chords)
528 | 
529 |         if self.sparse:
530 |             chords = chords[:, np.newaxis]
531 | 
532 |         target = self.encode_intervals(duration, intervals, chords,
533 |                                        multi=False, dtype=dtype)
534 | 
535 |         return {'chord': target}
536 | 
537 |     def inverse(self, encoded, duration=None):
538 |         '''Inverse transformation'''
539 | 
540 |         ann = jams.Annotation(self.namespace, duration=duration)
541 | 
542 |         for start, end, value in self.decode_intervals(encoded,
543 |                                                        duration=duration,
544 |                                                        multi=False,
545 |                                                        sparse=self.sparse,
546 |                                                        transition=self.transition,
547 |                                                        p_init=self.p_init,
548 |                                                        p_state=self.p_state):
549 | 
550 |             # Map start:end to frames
551 |             f_start, f_end = time_to_frames([start, end],
552 |                                             sr=self.sr,
553 |                                             hop_length=self.hop_length)
554 | 
555 |             # Reverse the index
556 |             if self.sparse:
557 |                 # Compute the confidence
558 |                 if encoded.shape[1] == 1:
559 |                     # This case is for full-confidence prediction (just the index)
560 |                     confidence = 1.
561 |                 else:
562 |                     confidence = np.mean(encoded[f_start:f_end+1, value])
563 | 
564 |                 value_dec = self.encoder.inverse_transform(value)
565 |             else:
566 |                 confidence = np.mean(encoded[f_start:f_end+1, np.argmax(value)])
567 |                 value_dec = self.encoder.inverse_transform(np.atleast_2d(value))
568 | 
569 |             for vd in value_dec:
570 |                 ann.append(time=start,
571 |                            duration=end-start,
572 |                            value=vd,
573 |                            confidence=float(confidence))
574 | 
575 |         return ann
576 | 


--------------------------------------------------------------------------------
/pumpp/task/key.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Key recognition task transformer'''
  4 | 
  5 | from itertools import product
  6 | import logging
  7 | 
  8 | import numpy as np
  9 | import mir_eval
 10 | import jams
 11 | 
 12 | from librosa import note_to_midi, midi_to_note, time_to_frames, key_to_degrees
 13 | from librosa.sequence import transition_loop
 14 | 
 15 | from .base import BaseTaskTransformer
 16 | from ..exceptions import ParameterError
 17 | from ..labels import LabelBinarizer, LabelEncoder, MultiLabelBinarizer
 18 | 
 19 | __all__ = ['KeyTransformer', 'KeyTagTransformer']
 20 | 
 21 | C_MAJOR_PITCHES = key_to_degrees('C:maj')
 22 | MODES = ['ionian', 'dorian', 'phrygian', 'lydian', 'mixolydian', 'aeolian', 'locrian']
 23 | QUALITY = {'major' : 0, 'minor' : -3}
 24 | 
 25 | 
 26 | def _encode_key_str(key_str, sparse):
 27 |         '''Helper function to go from jams `key_mode` annotation value strings to 12-D 
 28 |         numpy membership vec, representing the pitch profile.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         key_str : str
 33 |             String in the style of 'key_mode' jams annotation values.
 34 |         sparse : bool
 35 |             Whether or not to use sparse encoding for the tonic field.
 36 | 
 37 |         Returns
 38 |         -------
 39 |         (pitch_profile, tonic) : tuple
 40 |             pitch_profile : np.ndarray, shape = (1, 12), dtype = bool
 41 |                 a 12-D row vector that's encodes the membership of each pitch class for 
 42 |                 a given `key_str`.
 43 |             tonic : int or np.ndarray, shape = (1, 13), dtype = bool
 44 |                 a int in the range [0, 12] to indicate the pitch class of the tonic. 12
 45 |                 being atonal. The type will depend on the `sparse` parameter
 46 |         '''
 47 |         
 48 |         key_str_split = key_str.split(':')
 49 |         
 50 |         # Look at the Tonic first
 51 |         if key_str_split[0] == 'N':
 52 |             tonic = 12
 53 |         else:
 54 |             tonic = note_to_midi(key_str_split[0]) % 12
 55 | 
 56 |         # Now look at quality/mode and build pitch_profile
 57 |         # First construct the profile in C for a given mode/quality
 58 |         c_major_profile = np.zeros(12)
 59 |         for pc in C_MAJOR_PITCHES:
 60 |             c_major_profile[pc] = 1
 61 | 
 62 |         # When there is no tonal center, pitch profile is all zeros.
 63 |         if tonic == 12:
 64 |             pitch_profile = np.zeros(12, dtype=bool)
 65 |         else:
 66 |             # When there is no quality, major assumed.
 67 |             if len(key_str_split) == 1:
 68 |                 quality = 'major'
 69 |             else:
 70 |                 quality = key_str_split[1]
 71 | 
 72 |             if quality in MODES:
 73 |                 mode_transpose_int = -1 * C_MAJOR_PITCHES[MODES.index(quality)]
 74 |             elif quality in QUALITY.keys():
 75 |                 mode_transpose_int = -1 * QUALITY[quality]
 76 |             else:
 77 |                 logging.info(
 78 |                     '{} is not a recognized quality. Using major instead.'.format(quality)
 79 |                 )
 80 |                 mode_transpose_int = 0
 81 | 
 82 |             # roll the profile to fit different modes.        
 83 |             mode_profile_in_c = np.roll(c_major_profile, mode_transpose_int)
 84 |             # Add the leading tone to the minor profiles
 85 |             if quality == 'minor':
 86 |                 mode_profile_in_c[11] = 1
 87 |             
 88 |             # Now roll the profile again to get the right tonic.
 89 |             pitch_profile = np.roll(mode_profile_in_c, tonic)
 90 | 
 91 |         if not sparse:
 92 |             tonic_vec = np.zeros(13, dtype=bool)
 93 |             tonic_vec[tonic] = 1
 94 |             tonic = tonic_vec
 95 | 
 96 |         return (pitch_profile, tonic)
 97 | 
 98 | 
 99 | class KeyTransformer(BaseTaskTransformer):
100 |     '''Key annotation transformer.
101 | 
102 |     This transformer uses a (pitch_profile, tonic) decomposition of key_mode 
103 |     annotation, where the mode is reflected in the 12-D pitch_profile vector.
104 | 
105 |     Attributes
106 |     ----------
107 |     name : str
108 |         The name of the key trnsformer
109 | 
110 |     sr : number > 0
111 |         The sampling rate of audio
112 | 
113 |     hop_length : int > 0
114 |         The number of samples between each annotation frame
115 | 
116 |     sparse : bool
117 |         If True, tonic value is sparsely encoded as integers in [0, 12].
118 |         If False, tonic value is densely encoded as 13-dimensional booleans.
119 |     '''
120 |     def __init__(self, name='key', sr=22050, hop_length=512, sparse=False):
121 |         '''Initialize a key task transformer'''
122 | 
123 |         super(KeyTransformer, self).__init__(name=name,
124 |                                              namespace='key_mode',
125 |                                              sr=sr, hop_length=hop_length)
126 |         self.sparse = sparse
127 |  
128 |         self.register('pitch_profile', [None, 12], bool)
129 |         if self.sparse:
130 |             self.register('tonic', [None, 1], int)
131 |         else:
132 |             self.register('tonic', [None, 13], bool)
133 | 
134 |     
135 |     def empty(self, duration):
136 |         '''Empty key annotation
137 | 
138 |         Parameters
139 |         ----------
140 |         duration : number
141 |             The length (in seconds) of the empty annotation
142 | 
143 |         Returns
144 |         -------
145 |         ann : jams.Annotation
146 |             A key_mode annotation consisting of a single `no-key` observation.
147 |         '''
148 |         ann = super(KeyTransformer, self).empty(duration)
149 | 
150 |         ann.append(time=0,
151 |                    duration=duration,
152 |                    value='N', confidence=0)
153 | 
154 |         return ann
155 | 
156 |     def transform_annotation(self, ann, duration):
157 |         '''Apply the key transformation.
158 | 
159 |         Parameters
160 |         ----------
161 |         ann : jams.Annotation
162 |             The key_mode annotation
163 | 
164 |         duration : number > 0
165 |             The target duration
166 | 
167 |         Returns
168 |         -------
169 |         data : dict
170 |             data['pitch_profile'] : np.ndarray, shape=(n, 12)
171 |             data['tonic'] : np.ndarray, shape=(n, 13) or (n, 1)
172 | 
173 |             `pitch_profile` is a binary matrix indicating pitch class
174 |             activation at each frame.
175 | 
176 |             `tonic` is a one-hot matrix indicating the tonal center's 
177 |             pitch class at each frame.
178 | 
179 |             If sparsely encoded, `tonic` is a integer
180 |             in the range [0, 12] where 12 indicates atonal.
181 | 
182 |             If densely encoded, `tonic` has an extra
183 |             final dimension which is active when it is atonal.
184 |         '''
185 |         # get list of observations
186 |         intervals, keys = ann.to_interval_values()
187 | 
188 |         # Get the dtype for tonic
189 |         if self.sparse:
190 |             dtype = int
191 |         else:
192 |             dtype = bool
193 | 
194 |         # If we don't have any labeled intervals, fill in a 'N'
195 |         if not keys:
196 |             intervals = np.asarray([[0, duration]])
197 |             keys = ['N']
198 | 
199 |         # Suppress all intervals not in the encoder
200 |         pitch_profiles = []
201 |         tonics = []
202 | 
203 |         # default value when data is missing
204 |         if self.sparse:
205 |             fill = 12
206 |         else:
207 |             fill = False
208 | 
209 |         for key in keys:
210 |             pitch_profile, tonic = _encode_key_str(key, self.sparse)
211 |             pitch_profiles.append(pitch_profile)
212 |             tonics.append(tonic if isinstance(tonic, np.ndarray) else [tonic])
213 |         
214 |         pitch_profiles = np.asarray(pitch_profiles, dtype=bool)
215 |         tonics = np.asarray(tonics, dtype=dtype)
216 | 
217 |         target_pitch_profile = self.encode_intervals(duration, intervals, pitch_profiles)
218 | 
219 |         target_tonic = self.encode_intervals(duration, intervals, tonics,
220 |                                              multi=False,
221 |                                              dtype=dtype,
222 |                                              fill=fill)
223 | 
224 |         return {'pitch_profile': target_pitch_profile,
225 |                 'tonic': target_tonic}
226 | 
227 |     def inverse(self, pitch_profile, tonic, duration=None):
228 |         raise NotImplementedError('There are some ambiguities, also streaming profiles are difficult')
229 | 
230 | class KeyTagTransformer(BaseTaskTransformer):
231 |     '''Chord transformer that uses a tag-space encoding for key labels.
232 | 
233 |     Attributes
234 |     ----------
235 |     name : str
236 |         name of the transformer
237 | 
238 |     sr : number > 0
239 |         Sampling rate of audio
240 | 
241 |     hop_length : int > 0
242 |         Hop length for annotation frames
243 | 
244 |     sparse : Bool
245 |         Whether or not to use sparse encoding for the labels
246 | 
247 |     p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)]
248 |         Optional self-loop probability(ies), used for Viterbi decoding
249 | 
250 |     p_state : None or np.ndarray [shape=(n_labels,)]
251 |         Optional marginal probability for each chord class
252 | 
253 |     p_init : None or np.ndarray [shape=(n_labels,)]
254 |         Optional initial probability for each chord class
255 | 
256 |     See Also
257 |     --------
258 |     KeyTransformer
259 |     ChordTagTransformer
260 |     '''
261 |     def __init__(self, name='key_tag',
262 |                  sr=22050, hop_length=512, sparse=False,
263 |                  p_self=None, p_init=None, p_state=None):
264 | 
265 |         super(KeyTagTransformer, self).__init__(name=name,
266 |                                                 namespace='key_mode',
267 |                                                 sr=sr,
268 |                                                 hop_length=hop_length)
269 |         
270 |         labels = self.vocabulary()
271 |         self.sparse = sparse
272 | 
273 |         if self.sparse:
274 |             self.encoder = LabelEncoder()
275 |         else:
276 |             self.encoder = LabelBinarizer()
277 |         self.encoder.fit(labels)
278 |         self._classes = set(self.encoder.classes_)
279 | 
280 |         self.set_transition(p_self)
281 | 
282 |         if p_init is not None:
283 |             if len(p_init) != len(self._classes):
284 |                 raise ParameterError('Invalid p_init.shape={} for vocabulary of size {}'.format(p_init.shape, len(self._classes)))
285 | 
286 |         self.p_init = p_init
287 | 
288 |         if p_state is not None:
289 |             if len(p_state) != len(self._classes):
290 |                 raise ParameterError('Invalid p_state.shape={} for vocabulary of size {}'.format(p_state.shape, len(self._classes)))
291 | 
292 |         self.p_state = p_state
293 | 
294 |         if self.sparse:
295 |             self.register('tag', [None, 1], int)
296 |         else:
297 |             self.register('tag', [None, len(self._classes)], bool)
298 | 
299 |     def set_transition(self, p_self):
300 |         '''Set the transition matrix according to self-loop probabilities.
301 | 
302 |         Parameters
303 |         ----------
304 |         p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)]
305 |             Optional self-loop probability(ies), used for Viterbi decoding
306 |         '''
307 |         if p_self is None:
308 |             self.transition = None
309 |         else:
310 |             self.transition = transition_loop(len(self._classes), p_self)
311 | 
312 |     def empty(self, duration):
313 |         '''Empty key annotations
314 | 
315 |         Parameters
316 |         ----------
317 |         duration : number
318 |             The length (in seconds) of the empty annotation
319 | 
320 |         Returns
321 |         -------
322 |         ann : jams.Annotation
323 |             A key annotation consisting of a single `N` observation.
324 |         '''
325 |         ann = super(KeyTagTransformer, self).empty(duration)
326 | 
327 |         ann.append(time=0,
328 |                    duration=duration,
329 |                    value='N', confidence=0)
330 | 
331 |         return ann
332 | 
333 |     def vocabulary(self):
334 |         ''' Build the vocabulary for all key_mode strings
335 | 
336 |         Returns
337 |         -------
338 |         labels : list
339 |             list of string labels.
340 |         '''
341 |         qualities = MODES + list(QUALITY.keys())
342 |         tonics = midi_to_note(list(range(12)), octave=False, unicode=False)
343 |         
344 |         labels = ['N']
345 | 
346 |         for key_mode in product(tonics, qualities):
347 |             labels.append('{}:{}'.format(*key_mode))
348 | 
349 |         return labels
350 | 
351 |     def enharmonic(self, key_str):
352 |         '''Force the tonic spelling to fit our tonic list 
353 |         by spelling out of vocab keys enharmonically.
354 | 
355 |         Parameters
356 |         ----------
357 |         key_str : str
358 |             The key_mode string in jams style.
359 | 
360 |         Returns
361 |         -------
362 |         key_str : str
363 |             The key_mode string spelled enharmonically to fit our vocab.
364 |         '''
365 |         key_list = key_str.split(':')
366 |         # spell the tonic enharmonically if necessary
367 |         if key_list[0] != 'N':
368 |             key_list[0] = midi_to_note(note_to_midi(key_list[0]), octave=False, unicode=False)
369 |             if len(key_list) == 1:
370 |                 key_list.append('major')
371 | 
372 |         return ':'.join(key_list)
373 | 
374 |     def transform_annotation(self, ann, duration):
375 |         '''Transform an annotation to key-tag encoding
376 | 
377 |         Parameters
378 |         ----------
379 |         ann : jams.Annotation
380 |             The annotation to convert
381 | 
382 |         duration : number > 0
383 |             The duration of the track
384 | 
385 |         Returns
386 |         -------
387 |         data : dict
388 |             if self.sparse = True
389 |             data['tag'] : np.ndarray, shape=(n, n_labels) or shape=(n,)
390 |                 A time-varying binary encoding of the keys. 
391 |                 The shape depends on self.sparse.
392 |         '''
393 |         intervals, values = ann.to_interval_values()
394 | 
395 |         keys = []
396 |         for v in values:
397 |             keys.extend(self.encoder.transform([self.enharmonic(v)]))
398 | 
399 |         dtype = self.fields[self.scope('tag')].dtype
400 | 
401 |         keys = np.asarray(keys)
402 | 
403 |         if self.sparse:
404 |             keys = keys[:, np.newaxis]
405 |         
406 |         target = self.encode_intervals(duration, intervals, keys,
407 |                                        multi=False, dtype=dtype)
408 | 
409 |         return {'tag': target}
410 | 
411 |     def inverse(self, encoded, duration=None):
412 |         '''Inverse transformation'''
413 | 
414 |         ann = jams.Annotation(self.namespace, duration=duration)
415 |             
416 |         for start, end, value in self.decode_intervals(encoded,
417 |                                                        duration=duration,
418 |                                                        multi=False,
419 |                                                        sparse=self.sparse,
420 |                                                        transition=self.transition,
421 |                                                        p_init=self.p_init,
422 |                                                        p_state=self.p_state):
423 | 
424 |             # Map start:end to frames
425 |             f_start, f_end = time_to_frames([start, end],
426 |                                             sr=self.sr,
427 |                                             hop_length=self.hop_length)
428 | 
429 |             # Reverse the index
430 |             if self.sparse:
431 |                 # Compute the confidence
432 |                 if encoded.shape[1] == 1:
433 |                     # This case is for full-confidence prediction (just the index)
434 |                     confidence = 1.
435 |                 else:
436 |                     confidence = np.mean(encoded[f_start:f_end+1, value])
437 | 
438 |                 value_dec = self.encoder.inverse_transform(value)
439 |             else:
440 |                 confidence = np.mean(encoded[f_start:f_end+1, np.argmax(value)])
441 |                 value_dec = self.encoder.inverse_transform(np.atleast_2d(value))
442 | 
443 |             for vd in value_dec:
444 |                 ann.append(time=start,
445 |                            duration=end-start,
446 |                            value=vd,
447 |                            confidence=float(confidence))
448 | 
449 |         return ann
450 | 


--------------------------------------------------------------------------------
/pumpp/task/regression.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Regression task transformers'''
  4 | 
  5 | import numpy as np
  6 | 
  7 | import jams
  8 | 
  9 | from .base import BaseTaskTransformer
 10 | from ..exceptions import DataError
 11 | 
 12 | __all__ = ['VectorTransformer']
 13 | 
 14 | 
 15 | class VectorTransformer(BaseTaskTransformer):
 16 |     '''Vector regression transformer.
 17 | 
 18 |     Attributes
 19 |     ----------
 20 |     name : str
 21 |         The name of this transformer
 22 | 
 23 |     namespace : str
 24 |         The target namespace of this transformer
 25 | 
 26 |     dimension : int > 0
 27 |         The dimension of the vector data
 28 | 
 29 |     dtype : np.dtype
 30 |         The desired data type of the output
 31 |     '''
 32 |     def __init__(self, name, namespace, dimension, dtype=np.float32):
 33 |         super(VectorTransformer, self).__init__(name=name,
 34 |                                                 namespace=namespace,
 35 |                                                 sr=1, hop_length=1)
 36 | 
 37 |         self.dimension = dimension
 38 |         self.dtype = dtype
 39 | 
 40 |         self.register('vector', [1, self.dimension], self.dtype)
 41 | 
 42 |     def empty(self, duration):
 43 |         '''Empty vector annotations.
 44 | 
 45 |         This returns an annotation with a single observation
 46 |         vector consisting of all-zeroes.
 47 | 
 48 |         Parameters
 49 |         ----------
 50 |         duration : number >0
 51 |             Length of the track
 52 | 
 53 |         Returns
 54 |         -------
 55 |         ann : jams.Annotation
 56 |             The empty annotation
 57 |         '''
 58 |         ann = super(VectorTransformer, self).empty(duration)
 59 | 
 60 |         ann.append(time=0, duration=duration, confidence=0,
 61 |                    value=np.zeros(self.dimension, dtype=np.float32))
 62 |         return ann
 63 | 
 64 |     def transform_annotation(self, ann, duration):
 65 |         '''Apply the vector transformation.
 66 | 
 67 |         Parameters
 68 |         ----------
 69 |         ann : jams.Annotation
 70 |             The input annotation
 71 | 
 72 |         duration : number > 0
 73 |             The duration of the track
 74 | 
 75 |         Returns
 76 |         -------
 77 |         data : dict
 78 |             data['vector'] : np.ndarray, shape=(dimension,)
 79 | 
 80 |         Raises
 81 |         ------
 82 |         DataError
 83 |             If the input dimension does not match
 84 |         '''
 85 |         _, values = ann.to_interval_values()
 86 |         vector = np.asarray(values[0], dtype=self.dtype)
 87 |         if len(vector) != self.dimension:
 88 |             raise DataError('vector dimension({:0}) '
 89 |                             '!= self.dimension({:1})'
 90 |                             .format(len(vector), self.dimension))
 91 | 
 92 |         return {'vector': vector}
 93 | 
 94 |     def inverse(self, vector, duration=None):
 95 |         '''Inverse vector transformer'''
 96 | 
 97 |         ann = jams.Annotation(namespace=self.namespace, duration=duration)
 98 | 
 99 |         if duration is None:
100 |             duration = 0
101 |         ann.append(time=0, duration=duration, value=vector)
102 | 
103 |         return ann
104 | 


--------------------------------------------------------------------------------
/pumpp/task/structure.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''Segment and structure tasks'''
 4 | 
 5 | import numpy as np
 6 | from mir_eval.util import intervals_to_samples, index_labels, adjust_intervals
 7 | 
 8 | from .base import BaseTaskTransformer
 9 | 
10 | __all__ = ['StructureTransformer']
11 | 
12 | 
13 | class StructureTransformer(BaseTaskTransformer):
14 |     '''Structure agreement transformer.
15 | 
16 |     This transformer maps a labeled, flat structural segmentation
17 |     to an `n*n` boolean matrix indicating whether two frames
18 |     belong to a similarly labeled segment or not.
19 | 
20 |     Attributes
21 |     ----------
22 |     name : str
23 |         The name of this transformer
24 | 
25 |     sr : number > 0
26 |         The audio sampling rate
27 | 
28 |     hop_length : int > 0
29 |         The number of samples between each annotation frame
30 |     '''
31 | 
32 |     def __init__(self, name='structure', sr=22050, hop_length=512):
33 |         '''Initialize a structure agreement transformer'''
34 | 
35 |         super(StructureTransformer, self).__init__(name=name,
36 |                                                    namespace='segment_open',
37 |                                                    sr=sr,
38 |                                                    hop_length=hop_length)
39 | 
40 |         self.register('agree', [None, None], bool)
41 | 
42 |     def empty(self, duration):
43 |         ann = super(StructureTransformer, self).empty(duration)
44 |         ann.append(time=0, duration=duration, value='none', confidence=0)
45 |         return ann
46 | 
47 |     def transform_annotation(self, ann, duration):
48 |         '''Apply the structure agreement transformation.
49 | 
50 |         Parameters
51 |         ----------
52 |         ann : jams.Annotation
53 |             The segment annotation
54 | 
55 |         duration : number > 0
56 |             The target duration
57 | 
58 |         Returns
59 |         -------
60 |         data : dict
61 |             data['agree'] : np.ndarray, shape=(n, n), dtype=bool
62 |         '''
63 | 
64 |         intervals, values = ann.to_interval_values()
65 | 
66 |         intervals, values = adjust_intervals(intervals, values,
67 |                                              t_min=0, t_max=duration)
68 |         # Re-index the labels
69 |         ids, _ = index_labels(values)
70 | 
71 |         rate = float(self.hop_length) / self.sr
72 |         # Sample segment labels on our frame grid
73 |         _, labels = intervals_to_samples(intervals, ids, sample_size=rate)
74 | 
75 |         # Make the agreement matrix
76 |         return {'agree': np.equal.outer(labels, labels)}
77 | 
78 |     def inverse(self, agree, duration=None):
79 | 
80 |         raise NotImplementedError('Segment agreement cannot be inverted')
81 | 


--------------------------------------------------------------------------------
/pumpp/task/tags.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Tag task transformers'''
  4 | 
  5 | import numpy as np
  6 | import jams
  7 | 
  8 | from librosa import time_to_frames
  9 | from librosa.sequence import transition_loop
 10 | 
 11 | from .base import BaseTaskTransformer
 12 | from ..exceptions import ParameterError
 13 | from ..labels import MultiLabelBinarizer
 14 | 
 15 | __all__ = ['DynamicLabelTransformer', 'StaticLabelTransformer']
 16 | 
 17 | 
 18 | class DynamicLabelTransformer(BaseTaskTransformer):
 19 |     '''Time-series label transformer.
 20 | 
 21 |     Attributes
 22 |     ----------
 23 |     name : str
 24 |         The name of this transformer object
 25 | 
 26 |     namespace : str
 27 |         The JAMS namespace for this task
 28 | 
 29 |     labels : list of str [optional]
 30 |         The list of labels for this task.
 31 | 
 32 |         If not provided, it will attempt to infer the label set from the
 33 |         namespace definition.
 34 | 
 35 |     sr : number > 0
 36 |         The audio sampling rate
 37 | 
 38 |     hop_length : int > 0
 39 |         The hop length for annotation frames
 40 | 
 41 |     p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)]
 42 |         Optional self-loop probability(ies), used for Viterbi decoding
 43 | 
 44 |     p_state : None or np.ndarray [shape=(n_labels,)]
 45 |         Optional marginal probability for each class
 46 | 
 47 |     p_init : None or np.ndarray [shape=(n_labels,)]
 48 |         Optional initial probability for each class
 49 | 
 50 | 
 51 |     See Also
 52 |     --------
 53 |     StaticLabelTransformer
 54 |     '''
 55 |     def __init__(self, name, namespace, labels=None, sr=22050, hop_length=512,
 56 |                  p_self=None, p_init=None, p_state=None):
 57 |         super(DynamicLabelTransformer, self).__init__(name=name,
 58 |                                                       namespace=namespace,
 59 |                                                       sr=sr,
 60 |                                                       hop_length=hop_length)
 61 | 
 62 |         if labels is None:
 63 |             labels = jams.schema.values(namespace)
 64 | 
 65 |         self.encoder = MultiLabelBinarizer()
 66 |         self.encoder.fit([labels])
 67 |         self._classes = set(self.encoder.classes_)
 68 | 
 69 |         self.set_transition(p_self)
 70 | 
 71 |         if p_init is not None:
 72 |             if len(p_init) != len(self._classes):
 73 |                 raise ParameterError('Invalid p_init.shape={} for vocabulary size={}'.format(p_init.shape, len(self._classes)))
 74 | 
 75 |         self.p_init = p_init
 76 | 
 77 |         if p_state is not None:
 78 |             if len(p_state) != len(self._classes):
 79 |                 raise ParameterError('Invalid p_state.shape={} for vocabulary size={}'.format(p_state.shape, len(self._classes)))
 80 | 
 81 |         self.p_state = p_state
 82 | 
 83 |         self.register('tags', [None, len(self._classes)], bool)
 84 | 
 85 |     def set_transition(self, p_self):
 86 |         '''Set the transition matrix according to self-loop probabilities.
 87 | 
 88 |         Parameters
 89 |         ----------
 90 |         p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)]
 91 |             Optional self-loop probability(ies), used for Viterbi decoding
 92 |         '''
 93 |         if p_self is None:
 94 |             self.transition = None
 95 |         else:
 96 |             self.transition = np.empty((len(self._classes), 2, 2))
 97 |             if np.isscalar(p_self):
 98 |                 self.transition = transition_loop(2, p_self)
 99 |             elif len(p_self) != len(self._classes):
100 |                 raise ParameterError('Invalid p_self.shape={} for vocabulary size={}'.format(p_self.shape, len(self._classes)))
101 |             else:
102 |                 for i in range(len(self._classes)):
103 |                     self.transition[i] = transition_loop(2, p_self[i])
104 | 
105 |     def empty(self, duration):
106 |         '''Empty label annotations.
107 | 
108 |         Constructs a single observation with an empty value (None).
109 | 
110 |         Parameters
111 |         ----------
112 |         duration : number > 0
113 |             The duration of the annotation
114 |         '''
115 |         ann = super(DynamicLabelTransformer, self).empty(duration)
116 |         ann.append(time=0, duration=duration, value=None)
117 |         return ann
118 | 
119 |     def transform_annotation(self, ann, duration):
120 |         '''Transform an annotation to dynamic label encoding.
121 | 
122 |         Parameters
123 |         ----------
124 |         ann : jams.Annotation
125 |             The annotation to convert
126 | 
127 |         duration : number > 0
128 |             The duration of the track
129 | 
130 |         Returns
131 |         -------
132 |         data : dict
133 |             data['tags'] : np.ndarray, shape=(n, n_labels)
134 |                 A time-varying binary encoding of the labels
135 |         '''
136 |         intervals, values = ann.to_interval_values()
137 | 
138 |         # Suppress all intervals not in the encoder
139 |         tags = []
140 |         for v in values:
141 |             if v in self._classes:
142 |                 tags.extend(self.encoder.transform([[v]]))
143 |             else:
144 |                 tags.extend(self.encoder.transform([[]]))
145 | 
146 |         tags = np.asarray(tags)
147 |         target = self.encode_intervals(duration, intervals, tags)
148 | 
149 |         return {'tags': target}
150 | 
151 |     def inverse(self, encoded, duration=None):
152 |         '''Inverse transformation'''
153 | 
154 |         ann = jams.Annotation(namespace=self.namespace, duration=duration)
155 |         for start, end, value in self.decode_intervals(encoded,
156 |                                                        duration=duration,
157 |                                                        transition=self.transition,
158 |                                                        p_init=self.p_init,
159 |                                                        p_state=self.p_state):
160 |             # Map start:end to frames
161 |             f_start, f_end = time_to_frames([start, end],
162 |                                             sr=self.sr,
163 |                                             hop_length=self.hop_length)
164 | 
165 |             confidence = np.mean(encoded[f_start:f_end+1, value])
166 | 
167 |             value_dec = self.encoder.inverse_transform(np.atleast_2d(value))[0]
168 | 
169 |             for vd in value_dec:
170 |                 ann.append(time=start,
171 |                            duration=end-start,
172 |                            value=vd,
173 |                            confidence=confidence)
174 | 
175 |         return ann
176 | 
177 | 
178 | class StaticLabelTransformer(BaseTaskTransformer):
179 |     '''Static label transformer.
180 | 
181 |     Attributes
182 |     ----------
183 |     name : str
184 |         The name of this transformer object
185 | 
186 |     namespace : str
187 |         The JAMS namespace for this task
188 | 
189 |     labels : list of str [optional]
190 |         The list of labels for this task.
191 | 
192 |         If not provided, it will attempt to infer the label set from the
193 |         namespace definition.
194 | 
195 |     See Also
196 |     --------
197 |     DynamicLabelTransformer
198 |     '''
199 | 
200 |     def __init__(self, name, namespace, labels=None):
201 |         super(StaticLabelTransformer, self).__init__(name=name,
202 |                                                      namespace=namespace,
203 |                                                      sr=1, hop_length=1)
204 | 
205 |         if labels is None:
206 |             labels = jams.schema.values(namespace)
207 | 
208 |         self.encoder = MultiLabelBinarizer()
209 |         self.encoder.fit([labels])
210 |         self._classes = set(self.encoder.classes_)
211 |         self.register('tags', [len(self._classes)], bool)
212 | 
213 |     def transform_annotation(self, ann, duration):
214 |         '''Transform an annotation to static label encoding.
215 | 
216 |         Parameters
217 |         ----------
218 |         ann : jams.Annotation
219 |             The annotation to convert
220 | 
221 |         duration : number > 0
222 |             The duration of the track
223 | 
224 |         Returns
225 |         -------
226 |         data : dict
227 |             data['tags'] : np.ndarray, shape=(n_labels,)
228 |                 A static binary encoding of the labels
229 |         '''
230 |         intervals = np.asarray([[0, 1]])
231 |         values = list([obs.value for obs in ann])
232 |         intervals = np.tile(intervals, [len(values), 1])
233 | 
234 |         # Suppress all intervals not in the encoder
235 |         tags = [v for v in values if v in self._classes]
236 |         if len(tags):
237 |             target = self.encoder.transform([tags]).astype(bool).max(axis=0)
238 |         else:
239 |             target = np.zeros(len(self._classes), dtype=bool)
240 | 
241 |         return {'tags': target}
242 | 
243 |     def inverse(self, encoded, duration=None):
244 |         '''Inverse static tag transformation'''
245 | 
246 |         ann = jams.Annotation(namespace=self.namespace, duration=duration)
247 | 
248 |         if np.isrealobj(encoded):
249 |             detected = (encoded >= 0.5)
250 |         else:
251 |             detected = encoded
252 | 
253 |         for vd in self.encoder.inverse_transform(np.atleast_2d(detected))[0]:
254 |             vid = np.flatnonzero(self.encoder.transform(np.atleast_2d(vd)))
255 |             ann.append(time=0,
256 |                        duration=duration,
257 |                        value=vd,
258 |                        confidence=encoded[vid])
259 |         return ann
260 | 


--------------------------------------------------------------------------------
/pumpp/version.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """Version info"""
4 | 
5 | short_version = '0.6'
6 | version = '0.6.0'
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools >= 48",
4 |     "wheel >= 0.29.0",
5 | ]
6 | build-backend = 'setuptools.build_meta'
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [pycodestyle]
 2 | max-line-length=119
 3 | 
 4 | [tool:pytest]
 5 | addopts = --cov-report term-missing --cov pumpp --cov-report=xml
 6 | xfail_strict = true
 7 | filterwarnings =
 8 |     ignore:Using a non-tuple sequence:FutureWarning:scipy.*
 9 | 
10 | 
11 | [coverage:report]
12 | omit =
13 |     */python?.?/*
14 |     */site-packages/nose/*
15 |     pumpp/labels.py
16 | 
17 | 
18 | [metadata]
19 | name = pumpp
20 | version = attr: pumpp.version.version
21 | description = A practically universal music pre-processor
22 | long_description = file: README.md
23 | long_description_content_type = text/markdown; charset=UTF-8
24 | url = https://github.com/bmcfee/pumpp
25 | author = Brian McFee
26 | license = ISC
27 | license_file = LICENSE.md
28 | license_file_content_type = text/markdown; charset=UTF-8
29 | project_urls = 
30 |     Source = https://github.com/bmcfee/pumpp
31 |     Download = https://github.com/bmcfee/pumpp/releases
32 | classifiers =
33 |     License :: OSI Approved :: ISC License (ISCL)
34 |     Programming Language :: Python
35 |     Development Status :: 3 - Alpha
36 |     Intended Audience :: Developers
37 |     Topic :: Software Development
38 |     Programming Language :: Python :: 3
39 |     Programming Language :: Python :: 3.7
40 |     Programming Language :: Python :: 3.8
41 |     Programming Language :: Python :: 3.9
42 | 
43 | [options]
44 | packages = find:
45 | install_requires =
46 |     librosa >= 0.8.0
47 |     jams >= 0.3
48 |     scikit-learn >= 1.0
49 |     mir_eval >= 0.5
50 | python_requires = >= 3.6
51 | 
52 | [options.extras_require]
53 | docs  =
54 |     numpydoc
55 |     sphinx
56 | tests = 
57 |     pytest
58 |     pytest-cov
59 |     keras >= 2.6
60 |     tensorflow >= 2.0
61 | keras = 
62 |     keras >= 2.6
63 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | if __name__ == '__main__':
4 |     setup()
5 | 


--------------------------------------------------------------------------------
/tests/data/test.jams:
--------------------------------------------------------------------------------
  1 | {
  2 |   "sandbox": {}, 
  3 |   "annotations": [
  4 |     {
  5 |       "namespace": "chord", 
  6 |       "sandbox": {}, 
  7 |       "time": 0, 
  8 |       "duration": 3.5, 
  9 |       "annotation_metadata": {
 10 |         "annotation_tools": "", 
 11 |         "curator": {
 12 |           "name": "", 
 13 |           "email": ""
 14 |         }, 
 15 |         "annotator": {}, 
 16 |         "version": "", 
 17 |         "corpus": "", 
 18 |         "annotation_rules": "", 
 19 |         "validation": "", 
 20 |         "data_source": ""
 21 |       }, 
 22 |       "data": [
 23 |         {
 24 |           "duration": 0.5, 
 25 |           "confidence": null, 
 26 |           "value": "C:1", 
 27 |           "time": 0.0
 28 |         }, 
 29 |         {
 30 |           "duration": 0.5, 
 31 |           "confidence": null, 
 32 |           "value": "D:1", 
 33 |           "time": 0.5
 34 |         }, 
 35 |         {
 36 |           "duration": 0.5, 
 37 |           "confidence": null, 
 38 |           "value": "E:1", 
 39 |           "time": 1.0
 40 |         }, 
 41 |         {
 42 |           "duration": 0.5, 
 43 |           "confidence": null, 
 44 |           "value": "F:1", 
 45 |           "time": 1.5
 46 |         }, 
 47 |         {
 48 |           "duration": 0.5, 
 49 |           "confidence": null, 
 50 |           "value": "G:1", 
 51 |           "time": 2.0
 52 |         }, 
 53 |         {
 54 |           "duration": 0.5, 
 55 |           "confidence": null, 
 56 |           "value": "A:1", 
 57 |           "time": 2.5
 58 |         }, 
 59 |         {
 60 |           "duration": 0.5, 
 61 |           "confidence": null, 
 62 |           "value": "B:1", 
 63 |           "time": 3.0
 64 |         }
 65 |       ]
 66 |     }, 
 67 |     {
 68 |       "namespace": "chord_harte", 
 69 |       "sandbox": {}, 
 70 |       "time": 0, 
 71 |       "duration": 3.5, 
 72 |       "annotation_metadata": {
 73 |         "annotation_tools": "", 
 74 |         "curator": {
 75 |           "name": "", 
 76 |           "email": ""
 77 |         }, 
 78 |         "annotator": {}, 
 79 |         "version": "", 
 80 |         "corpus": "", 
 81 |         "annotation_rules": "", 
 82 |         "validation": "", 
 83 |         "data_source": ""
 84 |       }, 
 85 |       "data": [
 86 |         {
 87 |           "duration": 0.5, 
 88 |           "confidence": null, 
 89 |           "value": "C:maj(*3,*5)", 
 90 |           "time": 0.0
 91 |         }, 
 92 |         {
 93 |           "duration": 0.5, 
 94 |           "confidence": null, 
 95 |           "value": "D:maj(*3,*5)", 
 96 |           "time": 0.5
 97 |         }, 
 98 |         {
 99 |           "duration": 0.5, 
100 |           "confidence": null, 
101 |           "value": "E:maj(*3,*5)", 
102 |           "time": 1.0
103 |         }, 
104 |         {
105 |           "duration": 0.5, 
106 |           "confidence": null, 
107 |           "value": "F:maj(*3,*5)", 
108 |           "time": 1.5
109 |         }, 
110 |         {
111 |           "duration": 0.5, 
112 |           "confidence": null, 
113 |           "value": "G:maj(*3,*5)", 
114 |           "time": 2.0
115 |         }, 
116 |         {
117 |           "duration": 0.5, 
118 |           "confidence": null, 
119 |           "value": "A:maj(*3,*5)", 
120 |           "time": 2.5
121 |         }, 
122 |         {
123 |           "duration": 0.5, 
124 |           "confidence": null, 
125 |           "value": "B:maj(*3,*5)", 
126 |           "time": 3.0
127 |         }
128 |       ]
129 |     }, 
130 |     {
131 |       "namespace": "chord_roman", 
132 |       "sandbox": {}, 
133 |       "time": 0, 
134 |       "duration": 3.5, 
135 |       "annotation_metadata": {
136 |         "annotation_tools": "", 
137 |         "curator": {
138 |           "name": "", 
139 |           "email": ""
140 |         }, 
141 |         "annotator": {}, 
142 |         "version": "", 
143 |         "corpus": "", 
144 |         "annotation_rules": "", 
145 |         "validation": "", 
146 |         "data_source": ""
147 |       }, 
148 |       "data": [
149 |         {
150 |           "duration": 0.5, 
151 |           "confidence": null, 
152 |           "value": {
153 |             "tonic": "C", 
154 |             "chord": "I"
155 |           }, 
156 |           "time": 0.0
157 |         }, 
158 |         {
159 |           "duration": 0.5, 
160 |           "confidence": null, 
161 |           "value": {
162 |             "tonic": "C", 
163 |             "chord": "ii"
164 |           }, 
165 |           "time": 0.5
166 |         }, 
167 |         {
168 |           "duration": 0.5, 
169 |           "confidence": null, 
170 |           "value": {
171 |             "tonic": "C", 
172 |             "chord": "iii"
173 |           }, 
174 |           "time": 1.0
175 |         }, 
176 |         {
177 |           "duration": 0.5, 
178 |           "confidence": null, 
179 |           "value": {
180 |             "tonic": "C", 
181 |             "chord": "iv"
182 |           }, 
183 |           "time": 1.5
184 |         }, 
185 |         {
186 |           "duration": 0.5, 
187 |           "confidence": null, 
188 |           "value": {
189 |             "tonic": "C", 
190 |             "chord": "V"
191 |           }, 
192 |           "time": 2.0
193 |         }, 
194 |         {
195 |           "duration": 0.5, 
196 |           "confidence": null, 
197 |           "value": {
198 |             "tonic": "C", 
199 |             "chord": "vi"
200 |           }, 
201 |           "time": 2.5
202 |         }, 
203 |         {
204 |           "duration": 0.5, 
205 |           "confidence": null, 
206 |           "value": {
207 |             "tonic": "C", 
208 |             "chord": "VII"
209 |           }, 
210 |           "time": 3.0
211 |         }
212 |       ]
213 |     }, 
214 |     {
215 |       "namespace": "key_mode", 
216 |       "sandbox": {}, 
217 |       "time": 0, 
218 |       "duration": 3.5, 
219 |       "annotation_metadata": {
220 |         "annotation_tools": "", 
221 |         "curator": {
222 |           "name": "", 
223 |           "email": ""
224 |         }, 
225 |         "annotator": {}, 
226 |         "version": "", 
227 |         "corpus": "", 
228 |         "annotation_rules": "", 
229 |         "validation": "", 
230 |         "data_source": ""
231 |       }, 
232 |       "data": [
233 |         {
234 |           "duration": 3.5, 
235 |           "confidence": 1.0, 
236 |           "value": "C:major", 
237 |           "time": 0.0
238 |         }
239 |       ]
240 |     }, 
241 |     {
242 |       "namespace": "pitch_class", 
243 |       "sandbox": {}, 
244 |       "time": 0, 
245 |       "duration": 3.5, 
246 |       "annotation_metadata": {
247 |         "annotation_tools": "", 
248 |         "curator": {
249 |           "name": "", 
250 |           "email": ""
251 |         }, 
252 |         "annotator": {}, 
253 |         "version": "", 
254 |         "corpus": "", 
255 |         "annotation_rules": "", 
256 |         "validation": "", 
257 |         "data_source": ""
258 |       }, 
259 |       "data": {
260 |         "duration": [
261 |           0.5, 
262 |           0.5, 
263 |           0.5, 
264 |           0.5, 
265 |           0.5, 
266 |           0.5, 
267 |           0.5
268 |         ], 
269 |         "confidence": [
270 |           null, 
271 |           null, 
272 |           null, 
273 |           null, 
274 |           null, 
275 |           null, 
276 |           null
277 |         ], 
278 |         "value": [
279 |           {
280 |             "tonic": "C", 
281 |             "pitch": 1
282 |           }, 
283 |           {
284 |             "tonic": "C", 
285 |             "pitch": 2
286 |           }, 
287 |           {
288 |             "tonic": "C", 
289 |             "pitch": 3
290 |           }, 
291 |           {
292 |             "tonic": "C", 
293 |             "pitch": 4
294 |           }, 
295 |           {
296 |             "tonic": "C", 
297 |             "pitch": 5
298 |           }, 
299 |           {
300 |             "tonic": "C", 
301 |             "pitch": 6
302 |           }, 
303 |           {
304 |             "tonic": "C", 
305 |             "pitch": 7
306 |           }
307 |         ], 
308 |         "time": [
309 |           0.0, 
310 |           0.5, 
311 |           1.0, 
312 |           1.5, 
313 |           2.0, 
314 |           2.5, 
315 |           3.0
316 |         ]
317 |       }
318 |     }, 
319 |     {
320 |       "namespace": "pitch_hz", 
321 |       "sandbox": {}, 
322 |       "time": 0, 
323 |       "duration": 3.5, 
324 |       "annotation_metadata": {
325 |         "annotation_tools": "", 
326 |         "curator": {
327 |           "name": "", 
328 |           "email": ""
329 |         }, 
330 |         "annotator": {}, 
331 |         "version": "", 
332 |         "corpus": "", 
333 |         "annotation_rules": "", 
334 |         "validation": "", 
335 |         "data_source": ""
336 |       }, 
337 |       "data": {
338 |         "duration": [
339 |           0.5, 
340 |           0.5, 
341 |           0.5, 
342 |           0.5, 
343 |           0.5, 
344 |           0.5, 
345 |           0.5
346 |         ], 
347 |         "confidence": [
348 |           NaN, 
349 |           NaN, 
350 |           NaN, 
351 |           NaN, 
352 |           NaN, 
353 |           NaN, 
354 |           NaN
355 |         ], 
356 |         "value": [
357 |           130.81278265029931, 
358 |           146.83238395870379, 
359 |           164.81377845643496, 
360 |           174.61411571650194, 
361 |           195.99771799087463, 
362 |           220.0, 
363 |           246.94165062806206
364 |         ], 
365 |         "time": [
366 |           0.0, 
367 |           0.5, 
368 |           1.0, 
369 |           1.5, 
370 |           2.0, 
371 |           2.5, 
372 |           3.0
373 |         ]
374 |       }
375 |     }, 
376 |     {
377 |       "namespace": "pitch_midi", 
378 |       "sandbox": {}, 
379 |       "time": 0, 
380 |       "duration": 3.5, 
381 |       "annotation_metadata": {
382 |         "annotation_tools": "", 
383 |         "curator": {
384 |           "name": "", 
385 |           "email": ""
386 |         }, 
387 |         "annotator": {}, 
388 |         "version": "", 
389 |         "corpus": "", 
390 |         "annotation_rules": "", 
391 |         "validation": "", 
392 |         "data_source": ""
393 |       }, 
394 |       "data": {
395 |         "duration": [
396 |           0.5, 
397 |           0.5, 
398 |           0.5, 
399 |           0.5, 
400 |           0.5, 
401 |           0.5, 
402 |           0.5
403 |         ], 
404 |         "confidence": [
405 |           NaN, 
406 |           NaN, 
407 |           NaN, 
408 |           NaN, 
409 |           NaN, 
410 |           NaN, 
411 |           NaN
412 |         ], 
413 |         "value": [
414 |           48.0, 
415 |           50.0, 
416 |           52.0, 
417 |           53.0, 
418 |           55.0, 
419 |           57.0, 
420 |           59.0
421 |         ], 
422 |         "time": [
423 |           0.0, 
424 |           0.5, 
425 |           1.0, 
426 |           1.5, 
427 |           2.0, 
428 |           2.5, 
429 |           3.0
430 |         ]
431 |       }
432 |     }, 
433 |     {
434 |       "namespace": "tempo", 
435 |       "sandbox": {}, 
436 |       "time": 0, 
437 |       "duration": 3.5, 
438 |       "annotation_metadata": {
439 |         "annotation_tools": "", 
440 |         "curator": {
441 |           "name": "", 
442 |           "email": ""
443 |         }, 
444 |         "annotator": {}, 
445 |         "version": "", 
446 |         "corpus": "", 
447 |         "annotation_rules": "", 
448 |         "validation": "", 
449 |         "data_source": ""
450 |       }, 
451 |       "data": [
452 |         {
453 |           "duration": 3.5, 
454 |           "confidence": 1.0, 
455 |           "value": 120.0, 
456 |           "time": 0.0
457 |         }
458 |       ]
459 |     }
460 |   ], 
461 |   "file_metadata": {
462 |     "jams_version": "0.2.0", 
463 |     "title": "", 
464 |     "identifiers": {}, 
465 |     "release": "", 
466 |     "duration": 3.485170068027211, 
467 |     "artist": ""
468 |   }
469 | }


--------------------------------------------------------------------------------
/tests/data/test.ogg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmcfee/pumpp/1ca952e72e1d7f1e08514060821872465459c7f3/tests/data/test.ogg


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Miscellaneous utility tests'''
  4 | import pytest
  5 | import numpy as np
  6 | 
  7 | import librosa
  8 | import jams
  9 | 
 10 | import pumpp
 11 | 
 12 | 
 13 | @pytest.fixture(params=[11025, 22050])
 14 | def sr(request):
 15 |     return request.param
 16 | 
 17 | 
 18 | @pytest.fixture(params=[128, 512])
 19 | def hop_length(request):
 20 |     return request.param
 21 | 
 22 | 
 23 | @pytest.fixture(params=[None,
 24 |                         'tests/data/test.jams',
 25 |                         jams.load('tests/data/test.jams')])
 26 | def jam(request):
 27 |     return request.param
 28 | 
 29 | 
 30 | @pytest.mark.parametrize('audio_f', [None, 'tests/data/test.ogg'])
 31 | @pytest.mark.parametrize('y', [None, 'tests/data/test.ogg'])
 32 | @pytest.mark.parametrize('sr2', [None, 22050])
 33 | @pytest.mark.parametrize('crop', [False, True])
 34 | def test_pump(audio_f, jam, y, sr, sr2, hop_length, crop):
 35 | 
 36 |     ops = [pumpp.feature.STFT(name='stft', sr=sr,
 37 |                               hop_length=hop_length,
 38 |                               n_fft=2*hop_length),
 39 | 
 40 |            pumpp.task.BeatTransformer(name='beat', sr=sr,
 41 |                                       hop_length=hop_length),
 42 | 
 43 |            pumpp.task.ChordTransformer(name='chord', sr=sr,
 44 |                                        hop_length=hop_length),
 45 | 
 46 |            pumpp.task.StaticLabelTransformer(name='tags',
 47 |                                              namespace='tag_open',
 48 |                                              labels=['rock', 'jazz'])]
 49 | 
 50 |     P = pumpp.Pump(*ops)
 51 | 
 52 |     if audio_f is None and y is None:
 53 |         # no input
 54 |         with pytest.raises(pumpp.ParameterError):
 55 |             data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2)
 56 |     elif y is not None and sr2 is None:
 57 |         # input buffer, but no sampling rate
 58 |         y = librosa.load(y, sr=sr2)[0]
 59 |         with pytest.raises(pumpp.ParameterError):
 60 |             data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2)
 61 |     elif y is not None:
 62 |         y = librosa.load(y, sr=sr2)[0]
 63 |         data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2)
 64 |     else:
 65 | 
 66 |         fields = set(['stft/mag',
 67 |                       'stft/phase',
 68 |                       'beat/beat',
 69 |                       'beat/downbeat',
 70 |                       'beat/mask_downbeat',
 71 |                       'chord/pitch',
 72 |                       'chord/root',
 73 |                       'chord/bass',
 74 |                       'tags/tags'])
 75 | 
 76 |         valids = set(['beat/_valid', 'chord/_valid', 'tags/_valid'])
 77 | 
 78 |         assert set(P.fields.keys()) == fields
 79 | 
 80 |         data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2, crop=crop)
 81 |         data2 = P(audio_f=audio_f, jam=jam, y=y, sr=sr2, crop=crop)
 82 | 
 83 |         # Fields we should have:
 84 |         assert set(data.keys()) == fields | valids
 85 | 
 86 |         # time shapes should be the same for annotations
 87 |         assert data['beat/beat'].shape[1] == data['beat/downbeat'].shape[1]
 88 |         assert data['beat/beat'].shape[1] == data['chord/pitch'].shape[1]
 89 |         assert data['beat/beat'].shape[1] == data['chord/root'].shape[1]
 90 |         assert data['beat/beat'].shape[1] == data['chord/bass'].shape[1]
 91 | 
 92 |         # Audio features can be off by at most a frame
 93 |         if crop:
 94 |             assert data['stft/mag'].shape[1] == data['beat/beat'].shape[1]
 95 |             assert data['stft/mag'].shape[1] == data['chord/pitch'].shape[1]
 96 |         else:
 97 |             assert (np.abs(data['stft/mag'].shape[1] - data['beat/beat'].shape[1])
 98 |                     * hop_length / float(sr)) <= 0.05
 99 | 
100 |         assert data.keys() == data2.keys()
101 |         for k in data:
102 |             assert np.allclose(data[k], data2[k])
103 | 
104 | 
105 | @pytest.mark.parametrize('audio_f', ['tests/data/test.ogg'])
106 | def test_pump_empty(audio_f, jam, sr, hop_length):
107 | 
108 |     pump = pumpp.Pump()
109 |     data = pump.transform(audio_f, jam)
110 |     assert data == dict()
111 | 
112 | 
113 | def test_pump_add(sr, hop_length):
114 | 
115 |     ops = [pumpp.feature.STFT(name='stft', sr=sr,
116 |                               hop_length=hop_length,
117 |                               n_fft=2*hop_length),
118 | 
119 |            pumpp.task.BeatTransformer(name='beat', sr=sr,
120 |                                       hop_length=hop_length),
121 | 
122 |            pumpp.task.ChordTransformer(name='chord', sr=sr,
123 |                                        hop_length=hop_length),
124 | 
125 |            pumpp.task.StaticLabelTransformer(name='tags',
126 |                                              namespace='tag_open',
127 |                                              labels=['rock', 'jazz'])]
128 | 
129 |     pump = pumpp.Pump()
130 |     assert pump.ops == []
131 | 
132 |     for op in ops:
133 |         pump.add(op)
134 |         assert op in pump.ops
135 | 
136 | 
137 | @pytest.mark.xfail(raises=pumpp.ParameterError)
138 | def test_pump_add_bad():
139 | 
140 |     pumpp.Pump('foo')
141 | 
142 | 
143 | @pytest.mark.xfail(raises=pumpp.ParameterError)
144 | def test_pump_add_twice(sr, hop_length):
145 | 
146 |     op = pumpp.feature.STFT(name='stft', sr=sr,
147 |                             hop_length=hop_length,
148 |                             n_fft=2*hop_length)
149 | 
150 |     P = pumpp.Pump()
151 | 
152 |     P.add(op)
153 |     P.add(op)
154 | 
155 | 
156 | @pytest.mark.xfail(raises=KeyError)
157 | def test_pump_badkey(sr, hop_length):
158 | 
159 |     op = pumpp.feature.STFT(name='stft', sr=sr,
160 |                             hop_length=hop_length,
161 |                             n_fft=2*hop_length)
162 | 
163 |     P = pumpp.Pump(op)
164 | 
165 |     P['bad key']
166 | 
167 | 
168 | @pytest.mark.parametrize('n_samples', [None, 10])
169 | @pytest.mark.parametrize('duration', [1, 5])
170 | @pytest.mark.parametrize('rng', [None, 1])
171 | def test_pump_sampler(sr, hop_length, n_samples, duration, rng):
172 |     ops = [pumpp.feature.STFT(name='stft', sr=sr,
173 |                               hop_length=hop_length,
174 |                               n_fft=2*hop_length),
175 | 
176 |            pumpp.task.BeatTransformer(name='beat', sr=sr,
177 |                                       hop_length=hop_length)]
178 | 
179 |     P = pumpp.Pump(*ops)
180 | 
181 |     S1 = pumpp.Sampler(n_samples, duration, random_state=rng, *ops)
182 |     S2 = P.sampler(n_samples, duration, random_state=rng)
183 | 
184 |     assert S1._time == S2._time
185 |     assert S1.n_samples == S2.n_samples
186 |     assert S1.duration == S2.duration
187 | 
188 | 
189 | #@pytest.mark.skip
190 | def test_pump_layers(sr, hop_length):
191 |     ops = [pumpp.feature.STFT(name='stft', sr=sr,
192 |                               hop_length=hop_length,
193 |                               n_fft=2*hop_length),
194 | 
195 |            pumpp.feature.CQT(name='cqt', sr=sr,
196 |                              hop_length=hop_length),
197 | 
198 |            pumpp.task.BeatTransformer(name='beat', sr=sr,
199 |                                       hop_length=hop_length)]
200 | 
201 |     P = pumpp.Pump(*ops)
202 | 
203 |     L1 = P.layers()
204 |     L2 = dict()
205 |     L2.update(ops[0].layers())
206 |     L2.update(ops[1].layers())
207 | 
208 |     assert L1.keys() == L2.keys()
209 | 
210 |     for k in L1:
211 |         assert L1[k].dtype == L2[k].dtype
212 |         for d1, d2 in zip(L1[k].shape, L2[k].shape):
213 |             assert str(d1) == str(d2)
214 | 
215 |     # test other input layers
216 |     P.layers('tf.keras')
217 |     P.layers('tf')
218 | 
219 | 
220 | def test_pump_str(sr, hop_length):
221 | 
222 |     ops = [pumpp.feature.STFT(name='stft', sr=sr,
223 |                               hop_length=hop_length,
224 |                               n_fft=2*hop_length),
225 | 
226 |            pumpp.task.BeatTransformer(name='beat', sr=sr,
227 |                                       hop_length=hop_length),
228 | 
229 |            pumpp.task.ChordTransformer(name='chord', sr=sr,
230 |                                        hop_length=hop_length),
231 | 
232 |            pumpp.task.StaticLabelTransformer(name='tags',
233 |                                              namespace='tag_open',
234 |                                              labels=['rock', 'jazz'])]
235 | 
236 |     pump = pumpp.Pump(*ops)
237 | 
238 |     assert isinstance(str(pump), str)
239 | 
240 | 
241 | def test_pump_repr_html(sr, hop_length):
242 | 
243 |     ops = [pumpp.feature.STFT(name='stft', sr=sr,
244 |                               hop_length=hop_length,
245 |                               n_fft=2*hop_length),
246 | 
247 |            pumpp.task.BeatTransformer(name='beat', sr=sr,
248 |                                       hop_length=hop_length),
249 | 
250 |            pumpp.task.ChordTransformer(name='chord', sr=sr,
251 |                                        hop_length=hop_length),
252 | 
253 |            pumpp.task.StaticLabelTransformer(name='tags',
254 |                                              namespace='tag_open',
255 |                                              labels=['rock', 'jazz'])]
256 | 
257 |     pump = pumpp.Pump(*ops)
258 | 
259 |     assert isinstance(pump._repr_html_(), str)
260 | 
261 | 
262 | def test_pump_skip(sr, hop_length, tmp_path):
263 |     ops = [pumpp.feature.STFT(name='stft', sr=sr,
264 |                               hop_length=hop_length,
265 |                               n_fft=2*hop_length),
266 | 
267 |            pumpp.feature.Tempogram(name='tempo', sr=sr,
268 |                                    win_length=384,
269 |                                    hop_length=hop_length),
270 | 
271 |            pumpp.task.BeatTransformer(name='beat', sr=sr,
272 |                                       hop_length=hop_length)]
273 | 
274 |     audio_f = 'tests/data/test.ogg'
275 |     jam_f = 'tests/data/test.jams'
276 |     KEY = 'tempo/tempogram'
277 |     SENTINEL = (None,)
278 |     data = {KEY: SENTINEL}
279 | 
280 |     P = pumpp.Pump(*ops)
281 |     fields = set(P.fields)
282 | 
283 |     get_valid_fields = lambda x: {f for f in set(x) if not f.endswith('_valid')}
284 | 
285 |     # see if existing keys are skipped
286 |     X = P.transform(audio_f, data=dict(data))
287 |     assert X[KEY] is SENTINEL, 'field was overwritten'
288 |     assert get_valid_fields(X) == fields
289 | 
290 |     # make sure fields are computed normally
291 |     X = P.transform(audio_f)
292 |     assert X[KEY] is not SENTINEL, 'field was not computed'
293 |     assert get_valid_fields(X) == fields
294 | 
295 |     # see if loading audio is skipped if we don't need it
296 | 
297 |     feature_ops = [op for op in P.ops if isinstance(op, pumpp.FeatureExtractor)]
298 |     data = {k: SENTINEL for op in feature_ops for k in op.fields}
299 | 
300 |     X = P.transform(None, jam_f, data=data)
301 |     assert all(X[k] is SENTINEL
302 |                for op in feature_ops
303 |                for k in op.fields), 'field should not have been computed'
304 |     assert get_valid_fields(X) == fields
305 | 


--------------------------------------------------------------------------------
/tests/test_misc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''Miscellaneous utility tests'''
 4 | 
 5 | import pytest
 6 | import numpy as np
 7 | 
 8 | import pumpp
 9 | 
10 | from pumpp import ParameterError
11 | 
12 | xfail = pytest.mark.xfail
13 | 
14 | 
15 | @pytest.mark.parametrize('dtype',
16 |                          [int, np.int64,
17 |                           pytest.param('not a type',
18 |                                        marks=xfail(raises=ParameterError))])
19 | def test_scope_type(dtype):
20 | 
21 |     scope = pumpp.base.Scope(None)
22 |     scope.register('foo', [None], dtype)
23 | 
24 | 
25 | @pytest.mark.parametrize('shape',
26 |                          [[None], [1], [1, None],
27 |                           pytest.param(1, marks=xfail(raises=ParameterError)),
28 |                           pytest.param(None, marks=xfail(raises=ParameterError)),
29 |                           pytest.param(23.5, marks=xfail(raises=ParameterError)),
30 |                           pytest.param('not a shape', marks=xfail(raises=ParameterError))])
31 | def test_scope_badshape(shape):
32 | 
33 |     scope = pumpp.base.Scope(None)
34 |     scope.register('foo', shape, int)
35 | 
36 | 
37 | def test_bad_extractor():
38 |     ext = pumpp.feature.FeatureExtractor(None, 22050, 512)
39 | 
40 |     with pytest.raises(NotImplementedError):
41 |         ext.transform(np.zeros(1024), 22050)
42 | 
43 | 
44 | @pytest.mark.parametrize('dtype, fill',
45 |                          [(int, 0),
46 |                           (bool, False),
47 |                           (float, np.nan),
48 |                           (complex, np.nan)])
49 | def test_fill_value(dtype, fill):
50 | 
51 |     v = pumpp.task.base.fill_value(dtype)
52 | 
53 |     assert isinstance(v, dtype)
54 |     assert v == fill or np.isnan(v) and np.isnan(fill)
55 | 


--------------------------------------------------------------------------------
/tests/test_sampler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''Testing the sampler module'''
  4 | 
  5 | import numpy as np
  6 | 
  7 | import pytest
  8 | 
  9 | import pumpp
 10 | xfail = pytest.mark.xfail
 11 | 
 12 | 
 13 | # Make a fixture with some audio and task output
 14 | @pytest.fixture(params=[11025], scope='module')
 15 | def sr(request):
 16 |     return request.param
 17 | 
 18 | 
 19 | @pytest.fixture(params=[512], scope='module')
 20 | def hop_length(request):
 21 |     return request.param
 22 | 
 23 | 
 24 | @pytest.fixture(scope='module')
 25 | def ops(sr, hop_length):
 26 | 
 27 |     ops = []
 28 | 
 29 |     # Let's put on two feature extractors
 30 |     ops.append(pumpp.feature.STFT(name='stft', sr=sr,
 31 |                                   hop_length=hop_length,
 32 |                                   n_fft=hop_length))
 33 | 
 34 |     ops.append(pumpp.feature.Tempogram(name='rhythm', sr=sr,
 35 |                                        hop_length=hop_length,
 36 |                                        win_length=hop_length))
 37 | 
 38 |     # A time-varying annotation
 39 |     ops.append(pumpp.task.ChordTransformer(name='chord', sr=sr,
 40 |                                            hop_length=hop_length))
 41 | 
 42 |     # And a static annotation
 43 |     ops.append(pumpp.task.VectorTransformer(namespace='vector',
 44 |                                             dimension=32,
 45 |                                             name='vec'))
 46 | 
 47 |     yield ops
 48 | 
 49 | 
 50 | @pytest.fixture(scope='module')
 51 | def data(ops):
 52 | 
 53 |     audio_f = 'tests/data/test.ogg'
 54 |     jams_f = 'tests/data/test.jams'
 55 | 
 56 |     P = pumpp.Pump(*ops)
 57 |     return P.transform(audio_f=audio_f, jam=jams_f)
 58 | 
 59 | 
 60 | @pytest.fixture(params=[4, 16, None], scope='module')
 61 | def n_samples(request):
 62 |     return request.param
 63 | 
 64 | 
 65 | @pytest.fixture(params=[16, 32], scope='module')
 66 | def duration(request):
 67 |     return request.param
 68 | 
 69 | 
 70 | @pytest.fixture(params=[None, 16, 256,
 71 |                         pytest.param(-1, marks=xfail(raises=pumpp.ParameterError))],
 72 |                 scope='module')
 73 | def stride(request):
 74 |     return request.param
 75 | 
 76 | 
 77 | @pytest.fixture(params=[None, 20170401, np.random.RandomState(100),
 78 |                         pytest.param('bad rng', marks=xfail(raises=pumpp.ParameterError))],
 79 |                 scope='module')
 80 | def rng(request):
 81 |     return request.param
 82 | 
 83 | 
 84 | def test_sampler(data, ops, n_samples, duration, rng):
 85 | 
 86 |     MAX_SAMPLES = 30
 87 |     sampler = pumpp.Sampler(n_samples, duration, *ops, random_state=rng)
 88 | 
 89 |     # Build the set of reference keys that we want to track
 90 |     ref_keys = set()
 91 |     for op in ops:
 92 |         ref_keys |= set(op.fields.keys())
 93 | 
 94 |     for datum, n in zip(sampler(data), range(MAX_SAMPLES)):
 95 |         # First, test that we have the right fields
 96 |         assert set(datum.keys()) == ref_keys
 97 | 
 98 |         # Now test that shape is preserved in the right way
 99 |         for key in datum:
100 |             ref_shape = list(data[key].shape)
101 |             for tdim in sampler._time[key]:
102 |                 ref_shape[tdim] = duration
103 | 
104 |             # Check that all keys have length=1
105 |             assert datum[key].shape[0] == 1
106 |             assert list(datum[key].shape[1:]) == ref_shape[1:]
107 | 
108 |     # Test that we got the right number of samples out
109 |     if n_samples is None:
110 |         assert n == MAX_SAMPLES - 1
111 |     else:
112 |         assert n == n_samples - 1
113 | 
114 | 
115 | def test_sequential_sampler(data, ops, duration, stride, rng):
116 |     sampler = pumpp.SequentialSampler(duration, *ops, stride=stride, random_state=rng)
117 | 
118 |     # Build the set of reference keys that we want to track
119 |     ref_keys = set()
120 |     for op in ops:
121 |         ref_keys |= set(op.fields.keys())
122 | 
123 |     for datum in sampler(data):
124 |         # First, test that we have the right fields
125 |         assert set(datum.keys()) == ref_keys
126 | 
127 |         # Now test that shape is preserved in the right way
128 |         for key in datum:
129 |             ref_shape = list(data[key].shape)
130 |             for tdim in sampler._time[key]:
131 |                 ref_shape[tdim] = duration
132 | 
133 |             # Check that all keys have length=1
134 |             assert datum[key].shape[0] == 1
135 |             assert list(datum[key].shape[1:]) == ref_shape[1:]
136 | 
137 | 
138 | def test_slicer():
139 |     scope1 = pumpp.base.Scope('test1')
140 |     scope1.register('first', (None, 10), int)
141 |     scope1.register('second', (2, None), int)
142 |     scope1.register('none', (16, 16), int)
143 | 
144 |     scope2 = pumpp.base.Scope('test2')
145 |     scope2.register('first', (None, 5), int)
146 |     scope2.register('second', (20, None), int)
147 |     scope2.register('square', (None, None, 3), int)
148 | 
149 |     slicer = pumpp.base.Slicer(scope1, scope2)
150 | 
151 |     # Minimum time for all of these is 8
152 |     data_in = {'test1/first': np.random.randint(0, 7, size=(1, 8, 10)),
153 |                'test1/second': np.random.randint(0, 7, size=(1, 2, 100)),
154 |                'test1/none': np.random.randint(0, 7, size=(1, 16, 16)),
155 |                'test2/first': np.random.randint(0, 7, size=(1, 9, 5)),
156 |                'test2/second': np.random.randint(0, 7, (1, 20, 105)),
157 |                'test2/square': np.random.randint(0, 7, (1, 20, 20, 3))}
158 | 
159 |     data_out = slicer.crop(data_in)
160 |     assert set(data_out.keys()) == set(data_in.keys())
161 | 
162 |     assert data_out['test1/first'].shape == (1, 8, 10)
163 |     assert np.all(data_out['test1/first'] == data_in['test1/first'][:, :8, :])
164 | 
165 |     assert data_out['test1/second'].shape == (1, 2, 8)
166 |     assert np.all(data_out['test1/second'] == data_in['test1/second'][:, :, :8])
167 | 
168 |     assert data_out['test1/none'].shape == (1, 16, 16)
169 |     assert np.all(data_out['test1/none'] == data_in['test1/none'])
170 | 
171 |     assert data_out['test2/first'].shape == (1, 8, 5)
172 |     assert np.all(data_out['test2/first'] == data_in['test2/first'][:, :8, :])
173 | 
174 |     assert data_out['test2/second'].shape == (1, 20, 8)
175 |     assert np.all(data_out['test2/second'] == data_in['test2/second'][:, :, :8])
176 | 
177 |     assert data_out['test2/square'].shape == (1, 8, 8, 3)
178 |     assert np.all(data_out['test2/square'] == data_in['test2/square'][:, :8, :8, :])
179 | 
180 | 
181 | @pytest.mark.xfail(raises=pumpp.ParameterError)
182 | def test_slicer_fail():
183 |     pumpp.base.Slicer('not a scope')
184 | 
185 | 
186 | @pytest.mark.parametrize('durations',
187 |                         [(8, 16), (16, 16),
188 |                          pytest.param((0, 8), marks=xfail(raises=pumpp.ParameterError)),
189 |                          pytest.param((8, 4), marks=xfail(raises=pumpp.ParameterError))])
190 | def test_vlsampler(data, ops, n_samples, durations, rng):
191 | 
192 |     MAX_SAMPLES = 30
193 |     min_duration, max_duration = durations
194 |     sampler = pumpp.VariableLengthSampler(n_samples, min_duration,
195 |                                           max_duration,
196 |                                           *ops, random_state=rng)
197 | 
198 |     # Build the set of reference keys that we want to track
199 |     ref_keys = set()
200 |     for op in ops:
201 |         ref_keys |= set(op.fields.keys())
202 | 
203 |     n = 0
204 |     for datum, n in zip(sampler(data), range(MAX_SAMPLES)):
205 |         # First, test that we have the right fields
206 |         assert set(datum.keys()) == ref_keys
207 | 
208 |         # Now test that shape is preserved in the right way
209 |         for key in datum:
210 |             for tdim in sampler._time[key]:
211 |                 assert datum[key].shape[tdim] >= min_duration
212 |                 assert datum[key].shape[tdim] <= max_duration
213 | 
214 |             # Check that all keys have length=1
215 |             assert datum[key].shape[0] == 1
216 | 
217 |     # Test that we got the right number of samples out
218 |     if n_samples is None:
219 |         assert n == MAX_SAMPLES - 1
220 |     else:
221 |         assert n == n_samples - 1
222 | 
223 | 
224 | @pytest.mark.xfail(raises=pumpp.DataError)
225 | def test_sampler_short_error(data, ops):
226 | 
227 |     MAX_SAMPLES = 2
228 |     sampler = pumpp.Sampler(MAX_SAMPLES, 5000, *ops)
229 | 
230 |     # Build the set of reference keys that we want to track
231 |     ref_keys = set()
232 |     for op in ops:
233 |         ref_keys |= set(op.fields.keys())
234 | 
235 |     for datum, n in zip(sampler(data), range(MAX_SAMPLES)):
236 |         # First, test that we have the right fields
237 |         assert set(datum.keys()) == ref_keys
238 | 
239 |         # Now test that shape is preserved in the right way
240 |         for key in datum:
241 |             ref_shape = list(data[key].shape)
242 |             for tdim in sampler._time[key]:
243 |                 ref_shape[tdim] = duration
244 | 
245 |             # Check that all keys have length=1
246 |             assert datum[key].shape[0] == 1
247 |             assert list(datum[key].shape[1:]) == ref_shape[1:]
248 | 
249 |     # Test that we got the right number of samples out
250 |     if n_samples is None:
251 |         assert n == MAX_SAMPLES - 1
252 |     else:
253 |         assert n == n_samples - 1
254 | 
255 | 
256 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''Tests for feature utility helpers'''
 4 | 
 5 | import pytest
 6 | import numpy as np
 7 | 
 8 | import pumpp
 9 | import pumpp.feature._utils
10 | 
11 | 
12 | @pytest.mark.parametrize('dtype', ['uint8', np.uint8])
13 | def test_quantize(dtype):
14 | 
15 |     # The range -5 to 5 is broken into 256 equal pieces
16 |     # -5/3 lands at 85 (1/3)
17 |     # 5/3 lands at 2*85 = 270
18 |     # 5 lands at the max
19 |     x = np.asarray([-5, -5/3, 5/3, 5])
20 |     y = pumpp.feature._utils.quantize(x, dtype=dtype)
21 |     assert np.allclose(y, [0, 85, 170, 255])
22 | 
23 | 
24 | def test_quantize_min():
25 |     x = np.asarray([-5, -5/3, 5/3, 5])
26 |     y = pumpp.feature._utils.quantize(x, ref_min=0)
27 |     assert np.allclose(y, [0, 0, 85, 255])
28 | 
29 | 
30 | def test_quantize_max():
31 |     x = np.asarray([-5, -5/3, 5/3, 5])
32 |     y = pumpp.feature._utils.quantize(x, ref_max=0)
33 |     assert np.allclose(y, [0, 170, 255, 255])
34 | 
35 | 
36 | @pytest.mark.xfail(raises=pumpp.ParameterError)
37 | @pytest.mark.parametrize('dtype', ['int8', 'float32'])
38 | def test_quantize_bad_dtype(dtype):
39 |     x = np.asarray([-5, -5/3, 5/3, 5])
40 |     pumpp.feature._utils.quantize(x, dtype=dtype)
41 | 


--------------------------------------------------------------------------------