├── .github ├── PULL_REQUEST_TEMPLATE.md ├── environment-ci.yml └── workflows │ ├── ci.yml │ └── publish.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE.md ├── README.md ├── docs ├── Makefile ├── api.rst ├── changes.rst ├── conf.py ├── index.rst └── requirements.txt ├── pumpp ├── __init__.py ├── base.py ├── core.py ├── exceptions.py ├── feature │ ├── __init__.py │ ├── _utils.py │ ├── base.py │ ├── cqt.py │ ├── fft.py │ ├── mel.py │ ├── rhythm.py │ └── time.py ├── labels.py ├── sampler.py ├── task │ ├── __init__.py │ ├── base.py │ ├── beat.py │ ├── chord.py │ ├── key.py │ ├── regression.py │ ├── structure.py │ └── tags.py └── version.py ├── pyproject.toml ├── setup.cfg ├── setup.py └── tests ├── data ├── test.jams └── test.ogg ├── test_core.py ├── test_decode.py ├── test_feature.py ├── test_misc.py ├── test_sampler.py ├── test_task.py └── test_utils.py /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | #### Reference Issue 6 | 7 | 8 | 9 | #### What does this implement/fix? Explain your changes. 10 | 11 | 12 | #### Any other comments? 13 | 14 | -------------------------------------------------------------------------------- /.github/environment-ci.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | # required 7 | - pip 8 | - librosa 9 | - keras>=2.6 10 | - tensorflow>=2.0 11 | - scikit-learn>=0.20 12 | # optional, but required for testing 13 | - pytest-mpl 14 | - pytest-cov 15 | - pytest-faulthandler 16 | - pytest 17 | - contextlib2 18 | - coverage 19 | - pip: 20 | - soxr 21 | - samplerate 22 | - jams>=0.3 23 | - mir_eval>=0.5 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref }} 13 | cancel-in-progress: True 14 | 15 | jobs: 16 | test: 17 | name: "Python ${{ matrix.python-version }} on ${{ matrix.os }}" 18 | runs-on: ${{ matrix.os }} 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | include: 24 | - os: ubuntu-latest 25 | python-version: "3.7" 26 | channel-priority: "strict" 27 | envfile: ".github/environment-ci.yml" 28 | 29 | - os: ubuntu-latest 30 | python-version: "3.8" 31 | channel-priority: "strict" 32 | envfile: ".github/environment-ci.yml" 33 | 34 | - os: ubuntu-latest 35 | python-version: "3.9" 36 | channel-priority: "strict" 37 | envfile: ".github/environment-ci.yml" 38 | 39 | steps: 40 | - uses: actions/checkout@v2 41 | with: 42 | submodules: true 43 | 44 | - name: Install OS dependencies 45 | shell: bash -l {0} 46 | run: | 47 | case "${{ runner.os }}" in 48 | Linux) 49 | sudo apt-get update -yy 50 | sudo apt-get install -yy libsamplerate0 51 | ;; 52 | macOS) 53 | brew install libsamplerate 54 | ;; 55 | esac 56 | 57 | - name: Cache conda 58 | uses: actions/cache@v2 59 | env: 60 | # Increase this value to reset cache if etc/example-environment.yml has not changed 61 | CACHE_NUMBER: 1 62 | with: 63 | path: ~/conda_pkgs_dir 64 | key: ${{ runner.os }}-${{ matrix.python-version }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles( matrix.envfile ) }} 65 | 66 | - name: Install Conda environment 67 | uses: conda-incubator/setup-miniconda@v2 68 | with: 69 | auto-update-conda: true 70 | python-version: ${{ matrix.python-version }} 71 | add-pip-as-python-dependency: true 72 | auto-activate-base: false 73 | activate-environment: test 74 | channel-priority: ${{ matrix.channel-priority }} 75 | environment-file: ${{ matrix.envfile }} 76 | use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly! 77 | 78 | - name: Conda info 79 | shell: bash -l {0} 80 | run: | 81 | conda info -a 82 | conda list 83 | 84 | - name: Install pumpp 85 | shell: bash -l {0} 86 | run: python -m pip install --upgrade-strategy only-if-needed -e .[tests] 87 | 88 | - name: Run pytest 89 | shell: bash -l {0} 90 | run: pytest 91 | 92 | - name: Upload coverage to Codecov 93 | uses: codecov/codecov-action@v1 94 | with: 95 | token: ${{ secrets.CODECOV_TOKEN }} 96 | files: ./coverage.xml 97 | directory: ./coverage/reports/ 98 | flags: unittests 99 | env_vars: OS,PYTHON 100 | name: codecov-umbrella 101 | fail_ci_if_error: true 102 | path_to_write_report: ./coverage/codecov_report.txt 103 | verbose: true 104 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | build-n-publish: 9 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@master 14 | - name: Set up Python 3.7 15 | uses: actions/setup-python@v1 16 | with: 17 | python-version: 3.7 18 | 19 | - name: Install pypa/build 20 | run: >- 21 | python -m 22 | pip install 23 | build 24 | --user 25 | - name: Build a binary wheel and a source tarball 26 | run: >- 27 | python -m 28 | build 29 | --sdist 30 | --wheel 31 | --outdir dist/ 32 | . 33 | - name: Publish distribution 📦 to Test PyPI 34 | uses: pypa/gh-action-pypi-publish@master 35 | with: 36 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 37 | repository_url: https://test.pypi.org/legacy/ 38 | - name: Publish distribution 📦 to PyPI 39 | if: startsWith(github.ref, 'refs/tags') 40 | uses: pypa/gh-action-pypi-publish@master 41 | with: 42 | password: ${{ secrets.PYPI_API_TOKEN }} 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | docs/_* 66 | docs/generated/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | # VS Code 94 | .vscode 95 | 96 | # pytest 97 | .pytest_cache 98 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at brian.mcfee@nyu.edu. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | Contributing code 3 | ================= 4 | 5 | How to contribute 6 | ----------------- 7 | 8 | The preferred way to contribute to pumpp is to fork the 9 | [main repository](http://github.com/bmcfee/pumpp/) on 10 | GitHub: 11 | 12 | 1. Fork the [project repository](http://github.com/bmcfee/pumpp): 13 | click on the 'Fork' button near the top of the page. This creates 14 | a copy of the code under your account on the GitHub server. 15 | 16 | 2. Clone this copy to your local disk: 17 | 18 | $ git clone git@github.com:YourLogin/pumpp.git 19 | $ cd pumpp 20 | 21 | 3. Create a branch to hold your changes: 22 | 23 | $ git checkout -b my-feature 24 | 25 | and start making changes. Never work in the ``master`` branch! 26 | 27 | 4. Work on this copy on your computer using Git to do the version 28 | control. When you're done editing, do: 29 | 30 | $ git add modified_files 31 | $ git commit 32 | 33 | to record your changes in Git, then push them to GitHub with: 34 | 35 | $ git push -u origin my-feature 36 | 37 | Finally, go to the web page of the your fork of the pumpp repo, 38 | and click 'Pull request' to send your changes to the maintainers for 39 | review. This will send an email to the committers. 40 | 41 | (If any of the above seems like magic to you, then look up the 42 | [Git documentation](http://git-scm.com/documentation) on the web.) 43 | 44 | It is recommended to check that your contribution complies with the 45 | following rules before submitting a pull request: 46 | 47 | - All public methods should have informative docstrings with sample 48 | usage presented. 49 | 50 | You can also check for common programming errors with the following 51 | tools: 52 | 53 | - Code with good unittest coverage (at least 80%), check with: 54 | 55 | $ pip install pytest pytest-cov 56 | $ pytest 57 | 58 | - No pyflakes warnings, check with: 59 | 60 | $ pip install pyflakes 61 | $ pyflakes path/to/module.py 62 | 63 | - No PEP8 warnings, check with: 64 | 65 | $ pip install pep8 66 | $ pep8 path/to/module.py 67 | 68 | - AutoPEP8 can help you fix some of the easy redundant errors: 69 | 70 | $ pip install autopep8 71 | $ autopep8 path/to/pep8.py 72 | 73 | Filing bugs 74 | ----------- 75 | We use Github issues to track all bugs and feature requests; feel free to 76 | open an issue if you have found a bug or wish to see a feature implemented. 77 | 78 | It is recommended to check that your issue complies with the 79 | following rules before submitting: 80 | 81 | - Verify that your issue is not being currently addressed by other 82 | [issues](https://github.com/bmcfee/pumpp/issues?q=) 83 | or [pull requests](https://github.com/bmcfee/pumpp/pulls?q=). 84 | 85 | - Please ensure all code snippets and error messages are formatted in 86 | appropriate code blocks. 87 | See [Creating and highlighting code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks). 88 | 89 | - Please include your operating system type and version number, as well 90 | as your Python, scikit-learn, numpy, and scipy versions. This information 91 | can be found by runnning the following code snippet: 92 | 93 | ```python 94 | import platform; print(platform.platform()) 95 | import sys; print("Python", sys.version) 96 | import numpy; print("NumPy", numpy.__version__) 97 | import scipy; print("SciPy", scipy.__version__) 98 | import sklearn; print("sklearn", sklearn.__version__) 99 | import librosa; print("librosa", librosa.__version__) 100 | import jams; print("jams", jams.__version__) 101 | import mir_eval; print("mir_eval", mir_eval.__version__) 102 | ``` 103 | 104 | Documentation 105 | ------------- 106 | 107 | You can edit the documentation using any text editor and then generate 108 | the HTML output by typing ``make html`` from the docs/ directory. 109 | The resulting HTML files will be placed in _build/html/ and are viewable 110 | in a web browser. See the README file in the doc/ directory for more information. 111 | 112 | For building the documentation, you will need 113 | [sphinx](http://sphinx.pocoo.org/), 114 | [matplotlib](http://matplotlib.sourceforge.net/), and [numpydoc](https://pypi.python.org/pypi/numpydoc). 115 | 116 | Note 117 | ---- 118 | This document was gleefully borrowed from [scikit-learn](http://scikit-learn.org/). 119 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | #### Description 4 | 5 | 6 | #### Steps/Code to Reproduce 7 | 11 | 12 | #### Expected Results 13 | 14 | 15 | #### Actual Results 16 | 17 | 18 | #### Versions 19 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2016, 2017-, Brian McFee 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pumpp 2 | [![GitHub license](https://img.shields.io/badge/license-ISC-blue.svg)](https://raw.githubusercontent.com/bmcfee/pumpp/master/LICENSE) 3 | [![CI](https://github.com/bmcfee/pumpp/actions/workflows/ci.yml/badge.svg)](https://github.com/bmcfee/pumpp/actions/workflows/ci.yml) 4 | [![codecov](https://codecov.io/gh/bmcfee/pumpp/branch/main/graph/badge.svg?token=i7YxIwweBQ)](https://codecov.io/gh/bmcfee/pumpp) 5 | [![Documentation Status](http://readthedocs.org/projects/pumpp/badge/?version=latest)](http://pumpp.readthedocs.io/en/latest/?badge=latest) 6 | 7 | 8 | 9 | practically universal music pre-processor 10 | 11 | ### pumpp up the jams 12 | 13 | The goal of this package is to make it easy to convert pairs of `(audio, jams)` into data that can 14 | be easily consumed by statistical algorithms. Some desired features: 15 | 16 | - Converting tags to sparse encoding vectors 17 | - Sampling `(start, end, label)` to frame-level annotations at a specific frame rate 18 | - Extracting input features (eg, Mel spectra or CQT) from audio 19 | - Converting between annotation spaces for a given task 20 | 21 | ## Example usage 22 | 23 | ```python 24 | 25 | >>> import jams 26 | >>> import pumpp 27 | 28 | >>> audio_f = '/path/to/audio/myfile.ogg' 29 | >>> jams_f = '/path/to/annotations/myfile.jamz' 30 | 31 | >>> # Set up sampling and frame rate parameters 32 | >>> sr, hop_length = 44100, 512 33 | 34 | >>> # Create a feature extraction object 35 | >>> p_cqt = pumpp.feature.CQT(name='cqt', sr=sr, hop_length=hop_length) 36 | 37 | >>> # Create some annotation extractors 38 | >>> p_beat = pumpp.task.BeatTransformer(sr=sr, hop_length=hop_length) 39 | >>> p_chord = pumpp.task.SimpleChordTransformer(sr=sr, hop_length=hop_length) 40 | 41 | >>> # Collect the operators in a pump 42 | >>> pump = pumpp.Pump(p_cqt, p_beat, p_chord) 43 | 44 | >>> # Apply the extractors to generate training data 45 | >>> data = pump(audio_f=audio_f, jam=jams_fjams_f) 46 | 47 | >>> # Or test data 48 | >>> test_data = pump(audio_f='/my/test/audio.ogg') 49 | 50 | >>> # Or in-memory 51 | >>> y, sr = librosa.load(audio_f) 52 | >>> test_data = pump(y=y, sr=sr) 53 | ``` 54 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " epub3 to make an epub3" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | @echo " dummy to check syntax errors of document sources" 51 | 52 | .PHONY: clean 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | .PHONY: html 57 | html: 58 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 61 | 62 | .PHONY: dirhtml 63 | dirhtml: 64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 65 | @echo 66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 67 | 68 | .PHONY: singlehtml 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | .PHONY: pickle 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | .PHONY: json 81 | json: 82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 83 | @echo 84 | @echo "Build finished; now you can process the JSON files." 85 | 86 | .PHONY: htmlhelp 87 | htmlhelp: 88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 89 | @echo 90 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 91 | ".hhp project file in $(BUILDDIR)/htmlhelp." 92 | 93 | .PHONY: qthelp 94 | qthelp: 95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 96 | @echo 97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pumpp.qhcp" 100 | @echo "To view the help file:" 101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pumpp.qhc" 102 | 103 | .PHONY: applehelp 104 | applehelp: 105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 106 | @echo 107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 108 | @echo "N.B. You won't be able to view it unless you put it in" \ 109 | "~/Library/Documentation/Help or install it in your application" \ 110 | "bundle." 111 | 112 | .PHONY: devhelp 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pumpp" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pumpp" 120 | @echo "# devhelp" 121 | 122 | .PHONY: epub 123 | epub: 124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 125 | @echo 126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 127 | 128 | .PHONY: epub3 129 | epub3: 130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 131 | @echo 132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 133 | 134 | .PHONY: latex 135 | latex: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo 138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 140 | "(use \`make latexpdf' here to do that automatically)." 141 | 142 | .PHONY: latexpdf 143 | latexpdf: 144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 145 | @echo "Running LaTeX files through pdflatex..." 146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 148 | 149 | .PHONY: latexpdfja 150 | latexpdfja: 151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 152 | @echo "Running LaTeX files through platex and dvipdfmx..." 153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 155 | 156 | .PHONY: text 157 | text: 158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 159 | @echo 160 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 161 | 162 | .PHONY: man 163 | man: 164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 165 | @echo 166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 167 | 168 | .PHONY: texinfo 169 | texinfo: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo 172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 173 | @echo "Run \`make' in that directory to run these through makeinfo" \ 174 | "(use \`make info' here to do that automatically)." 175 | 176 | .PHONY: info 177 | info: 178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 179 | @echo "Running Texinfo files through makeinfo..." 180 | make -C $(BUILDDIR)/texinfo info 181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 182 | 183 | .PHONY: gettext 184 | gettext: 185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 186 | @echo 187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 188 | 189 | .PHONY: changes 190 | changes: 191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 192 | @echo 193 | @echo "The overview file is in $(BUILDDIR)/changes." 194 | 195 | .PHONY: linkcheck 196 | linkcheck: 197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 198 | @echo 199 | @echo "Link check complete; look for any errors in the above output " \ 200 | "or in $(BUILDDIR)/linkcheck/output.txt." 201 | 202 | .PHONY: doctest 203 | doctest: 204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 205 | @echo "Testing of doctests in the sources finished, look at the " \ 206 | "results in $(BUILDDIR)/doctest/output.txt." 207 | 208 | .PHONY: coverage 209 | coverage: 210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 211 | @echo "Testing of coverage in the sources finished, look at the " \ 212 | "results in $(BUILDDIR)/coverage/python.txt." 213 | 214 | .PHONY: xml 215 | xml: 216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 217 | @echo 218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 219 | 220 | .PHONY: pseudoxml 221 | pseudoxml: 222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 223 | @echo 224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 225 | 226 | .PHONY: dummy 227 | dummy: 228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 229 | @echo 230 | @echo "Build finished. Dummy builder generates no files." 231 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | .. module:: pump 4 | 5 | .. automodule:: pumpp.core 6 | :members: 7 | :noindex: 8 | 9 | .. automodule:: pumpp.feature 10 | :members: 11 | 12 | .. automodule:: pumpp.task 13 | :members: 14 | 15 | .. automodule:: pumpp.sampler 16 | :members: 17 | :special-members: 18 | :noindex: 19 | 20 | -------------------------------------------------------------------------------- /docs/changes.rst: -------------------------------------------------------------------------------- 1 | Changes 2 | ------- 3 | 4 | 0.6.0 5 | ===== 6 | - `#136`_ Fixed a bug in serialization of objects with randomstate 7 | - `#135`_ Fixed deprecation warnings for numpy 1.20 and librosa 0.9 8 | - `#134`_ Added tensorflow-keras layer constructors 9 | - `#133`_ Fixed a bug in operator maps with missing keys 10 | - `#131`_ Update to modern dependencies (tensorflow 2, librosa 0.8+). Dropped support for python < 11 | 3.6. 12 | - `#128`_ Avoid redundant computation of features 13 | - `#117`_ Added default names for transformations 14 | 15 | .. _#136: https://github.com/bmcfee/pumpp/pull/136 16 | .. _#135: https://github.com/bmcfee/pumpp/pull/135 17 | .. _#134: https://github.com/bmcfee/pumpp/pull/134 18 | .. _#133: https://github.com/bmcfee/pumpp/pull/133 19 | .. _#131: https://github.com/bmcfee/pumpp/pull/131 20 | .. _#128: https://github.com/bmcfee/pumpp/pull/128 21 | .. _#117: https://github.com/bmcfee/pumpp/pull/117 22 | 23 | 24 | 0.5.0 25 | ===== 26 | - `#105`_ Expanding API for layer construction to eventually support alternative frameworks. 27 | - `#104`_ Added API for explicit data types in feature modules. 28 | - `#103`_ Added quantization support for feature modules. 29 | - `#106`_ Dropped support for python 2.7. 30 | 31 | .. _#106: https://github.com/bmcfee/pumpp/pull/106 32 | .. _#103: https://github.com/bmcfee/pumpp/pull/103 33 | .. _#104: https://github.com/bmcfee/pumpp/pull/104 34 | .. _#105: https://github.com/bmcfee/pumpp/pull/105 35 | 36 | 0.4.0 37 | ===== 38 | - `#100`_ Added viterbi decoding options for tags, chords, and beat transformers 39 | - `#99`_ Updated test suite 40 | 41 | .. _#100: https://github.com/bmcfee/pumpp/pull/100 42 | .. _#99: https://github.com/bmcfee/pumpp/pull/99 43 | 44 | 0.3.3 45 | ===== 46 | - `#95`_ Data durations are now checked before sampling 47 | 48 | .. _#95: https://github.com/bmcfee/pumpp/pull/95 49 | 50 | 0.3.2 51 | ===== 52 | - `#91`_ JAMS annotations are now populated with confidence fields 53 | - `#92`_ Pump objects can pretty-print in jupyter notebooks 54 | 55 | .. _#91: https://github.com/bmcfee/pumpp/pull/91 56 | .. _#92: https://github.com/bmcfee/pumpp/pull/92 57 | 58 | 0.3.1 59 | ===== 60 | - `#88`_ Added time-position coding 61 | - `#87`_ Added variable-length sampler 62 | 63 | .. _#88: https://github.com/bmcfee/pumpp/pull/88 64 | .. _#87: https://github.com/bmcfee/pumpp/pull/87 65 | 66 | 0.3.0 67 | ===== 68 | - `#85`_ Fixed a bug in BeatPosition transforms 69 | - `#84`_ Fixed a bug in the documentation build on readthedocs 70 | - `#83`_ Fixed an off-by-one error in sampler 71 | - `#81`_ Support multiple time-like dimensions in sampling 72 | - `#80`_ Added `crop=` parameter to `Pump.transform`, which can slice temporal data down to a common duration. 73 | 74 | .. _#85: https://github.com/bmcfee/pumpp/pull/85 75 | .. _#84: https://github.com/bmcfee/pumpp/pull/84 76 | .. _#83: https://github.com/bmcfee/pumpp/pull/83 77 | .. _#81: https://github.com/bmcfee/pumpp/pull/81 78 | .. _#80: https://github.com/bmcfee/pumpp/pull/80 79 | 80 | 0.2.4 81 | ===== 82 | - `#76`_ Implemented the beat-position task 83 | 84 | .. _#76: https://github.com/bmcfee/pumpp/pull/76 85 | 86 | 87 | 0.2.3 88 | ===== 89 | - `#74`_ Implemented segmentation agreement task 90 | 91 | .. _#74: https://github.com/bmcfee/pumpp/pull/74 92 | 93 | 94 | 0.2.2 95 | ===== 96 | 97 | - `#70`_ Future-proofing against jams 0.3 98 | 99 | .. _#70: https://github.com/bmcfee/pumpp/pull/70 100 | 101 | 0.2.1 102 | ===== 103 | 104 | - `#68`_ Fixed a frame alignment error in task transformers 105 | - `#66`_ Remove warnings for improperly cast STFT data 106 | 107 | .. _#68: https://github.com/bmcfee/pumpp/pull/68 108 | .. _#66: https://github.com/bmcfee/pumpp/pull/66 109 | 110 | 0.2.0 111 | ===== 112 | - `#65`_ Removed old-style (function) transform API in favor of object interface 113 | - `#65`_ Support in-memory analysis 114 | 115 | .. _#65: https://github.com/bmcfee/pumpp/pull/65 116 | 117 | 0.1.5 118 | ===== 119 | - `#61`_ Fixed an alignment bug in feature extractors 120 | 121 | .. _#61: https://github.com/bmcfee/pumpp/pull/61 122 | 123 | 0.1.4 124 | ===== 125 | - `#59`_ harmonic CQT 126 | - `#58`_ Sparse chord output for chord labels 127 | - `#57`_ Updated sampler bindings for Pump object 128 | 129 | .. _#59: https://github.com/bmcfee/pumpp/pull/59 130 | .. _#58: https://github.com/bmcfee/pumpp/pull/58 131 | .. _#57: https://github.com/bmcfee/pumpp/pull/57 132 | 133 | 0.1.3 134 | ===== 135 | 136 | - `#55`_ Refactored the `Sampler` class, added support for random states and the `SequentialSampler` class 137 | 138 | .. _#55: https://github.com/bmcfee/pumpp/pull/55 139 | 140 | 0.1.2 141 | ===== 142 | 143 | - `#51`_ Added named operator index to `Pump` objects 144 | 145 | .. _#51: https://github.com/bmcfee/pumpp/pull/51 146 | 147 | 0.1.1 148 | ===== 149 | 150 | - `#49`_ Added `Pump.layers` constructor for Keras layers on pump containers 151 | - `#47`_ Fixed a bug in `Sampler` that caused a shape mismatch on input/output tensors 152 | when the input JAMS had multiple matching annotations for a given task. 153 | 154 | .. _#49: https://github.com/bmcfee/pumpp/pull/49 155 | .. _#47: https://github.com/bmcfee/pumpp/pull/47 156 | 157 | 0.1.0 158 | ===== 159 | 160 | - Initial public release 161 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pumpp documentation build configuration file, created by 5 | # sphinx-quickstart on Thu Jul 7 21:27:51 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath('../')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.autosummary', 35 | 'sphinx.ext.intersphinx', 36 | 'sphinx.ext.viewcode', 37 | 'numpydoc', 38 | ] 39 | 40 | from glob import glob 41 | autosummary_generate = glob('*.rst') 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # The suffix(es) of source filenames. 47 | # You can specify multiple suffix as a list of string: 48 | # source_suffix = ['.rst', '.md'] 49 | source_suffix = '.rst' 50 | 51 | # The encoding of source files. 52 | #source_encoding = 'utf-8-sig' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # General information about the project. 58 | project = 'pumpp' 59 | copyright = '2016-2022, Brian McFee' 60 | author = 'Brian McFee' 61 | 62 | import mock 63 | MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse', 64 | 'sklearn', 'sklearn.preprocessing', 'sklearn.utils', 'sklearn.utils.sparsefuncs', 65 | 'sklearn.utils.validation', 'sklearn.utils.multiclass', 66 | 'mir_eval', 'mir_eval.util', 67 | 'jams', 'librosa', 'librosa.util', 'librosa.feature', 'librosa.sequence', 68 | 'librosa.note_to_midi', 'librosa.midi_to_note', 'librosa.time_to_frames',] 69 | sys.modules.update((mod_name, mock.Mock()) for mod_name in MOCK_MODULES) 70 | 71 | # The version info for the project you're documenting, acts as replacement for 72 | # |version| and |release|, also used in various other places throughout the 73 | # built documents. 74 | # 75 | import imp 76 | pumpp_version = imp.load_source('pumpp.version', '../pumpp/version.py') 77 | # The short X.Y version. 78 | version = pumpp_version.short_version 79 | # The full version, including alpha/beta/rc tags. 80 | release = pumpp_version.version 81 | 82 | # The language for content autogenerated by Sphinx. Refer to documentation 83 | # for a list of supported languages. 84 | # 85 | # This is also used if you do content translation via gettext catalogs. 86 | # Usually you set "language" from the command line for these cases. 87 | language = None 88 | 89 | # There are two options for replacing |today|: either, you set today to some 90 | # non-false value, then it is used: 91 | #today = '' 92 | # Else, today_fmt is used as the format for a strftime call. 93 | #today_fmt = '%B %d, %Y' 94 | 95 | # List of patterns, relative to source directory, that match files and 96 | # directories to ignore when looking for source files. 97 | # This patterns also effect to html_static_path and html_extra_path 98 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 99 | 100 | # The reST default role (used for this markup: `text`) to use for all 101 | # documents. 102 | #default_role = None 103 | 104 | # If true, '()' will be appended to :func: etc. cross-reference text. 105 | #add_function_parentheses = True 106 | 107 | # If true, the current module name will be prepended to all description 108 | # unit titles (such as .. function::). 109 | #add_module_names = True 110 | 111 | # If true, sectionauthor and moduleauthor directives will be shown in the 112 | # output. They are ignored by default. 113 | #show_authors = False 114 | 115 | # The name of the Pygments (syntax highlighting) style to use. 116 | pygments_style = 'sphinx' 117 | 118 | # A list of ignored prefixes for module index sorting. 119 | #modindex_common_prefix = [] 120 | 121 | # If true, keep warnings as "system message" paragraphs in the built documents. 122 | #keep_warnings = False 123 | 124 | # If true, `todo` and `todoList` produce output, else they produce nothing. 125 | todo_include_todos = False 126 | 127 | 128 | # -- Options for HTML output ---------------------------------------------- 129 | 130 | # The theme to use for HTML and HTML Help pages. See the documentation for 131 | # a list of builtin themes. 132 | html_theme = 'default' 133 | 134 | # Theme options are theme-specific and customize the look and feel of a theme 135 | # further. For a list of options available for each theme, see the 136 | # documentation. 137 | #html_theme_options = {} 138 | 139 | # Add any paths that contain custom themes here, relative to this directory. 140 | #html_theme_path = [] 141 | 142 | # The name for this set of Sphinx documents. 143 | # " v documentation" by default. 144 | #html_title = 'pumpp v0.0.0pre' 145 | 146 | # A shorter title for the navigation bar. Default is the same as html_title. 147 | #html_short_title = None 148 | 149 | # The name of an image file (relative to this directory) to place at the top 150 | # of the sidebar. 151 | #html_logo = None 152 | 153 | # The name of an image file (relative to this directory) to use as a favicon of 154 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 155 | # pixels large. 156 | #html_favicon = None 157 | 158 | # Add any paths that contain custom static files (such as style sheets) here, 159 | # relative to this directory. They are copied after the builtin static files, 160 | # so a file named "default.css" will overwrite the builtin "default.css". 161 | # html_static_path = ['_static'] 162 | 163 | # Add any extra paths that contain custom files (such as robots.txt or 164 | # .htaccess) here, relative to this directory. These files are copied 165 | # directly to the root of the documentation. 166 | #html_extra_path = [] 167 | 168 | # If not None, a 'Last updated on:' timestamp is inserted at every page 169 | # bottom, using the given strftime format. 170 | # The empty string is equivalent to '%b %d, %Y'. 171 | #html_last_updated_fmt = None 172 | 173 | # If true, SmartyPants will be used to convert quotes and dashes to 174 | # typographically correct entities. 175 | #html_use_smartypants = True 176 | 177 | # Custom sidebar templates, maps document names to template names. 178 | #html_sidebars = {} 179 | 180 | # Additional templates that should be rendered to pages, maps page names to 181 | # template names. 182 | #html_additional_pages = {} 183 | 184 | # If false, no module index is generated. 185 | #html_domain_indices = True 186 | 187 | # If false, no index is generated. 188 | #html_use_index = True 189 | 190 | # If true, the index is split into individual pages for each letter. 191 | #html_split_index = False 192 | 193 | # If true, links to the reST sources are added to the pages. 194 | #html_show_sourcelink = True 195 | 196 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 197 | #html_show_sphinx = True 198 | 199 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 200 | #html_show_copyright = True 201 | 202 | # If true, an OpenSearch description file will be output, and all pages will 203 | # contain a tag referring to it. The value of this option must be the 204 | # base URL from which the finished HTML is served. 205 | #html_use_opensearch = '' 206 | 207 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 208 | #html_file_suffix = None 209 | 210 | # Language to be used for generating the HTML full-text search index. 211 | # Sphinx supports the following languages: 212 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 213 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' 214 | #html_search_language = 'en' 215 | 216 | # A dictionary with options for the search language support, empty by default. 217 | # 'ja' uses this config value. 218 | # 'zh' user can custom change `jieba` dictionary path. 219 | #html_search_options = {'type': 'default'} 220 | 221 | # The name of a javascript file (relative to the configuration directory) that 222 | # implements a search results scorer. If empty, the default will be used. 223 | #html_search_scorer = 'scorer.js' 224 | 225 | # Output file base name for HTML help builder. 226 | htmlhelp_basename = 'pumppdoc' 227 | 228 | # -- Options for LaTeX output --------------------------------------------- 229 | 230 | latex_elements = { 231 | # The paper size ('letterpaper' or 'a4paper'). 232 | #'papersize': 'letterpaper', 233 | 234 | # The font size ('10pt', '11pt' or '12pt'). 235 | #'pointsize': '10pt', 236 | 237 | # Additional stuff for the LaTeX preamble. 238 | #'preamble': '', 239 | 240 | # Latex figure (float) alignment 241 | #'figure_align': 'htbp', 242 | } 243 | 244 | # Grouping the document tree into LaTeX files. List of tuples 245 | # (source start file, target name, title, 246 | # author, documentclass [howto, manual, or own class]). 247 | latex_documents = [ 248 | (master_doc, 'pumpp.tex', 'pumpp Documentation', 249 | 'Brian McFee', 'manual'), 250 | ] 251 | 252 | # The name of an image file (relative to this directory) to place at the top of 253 | # the title page. 254 | #latex_logo = None 255 | 256 | # For "manual" documents, if this is true, then toplevel headings are parts, 257 | # not chapters. 258 | #latex_use_parts = False 259 | 260 | # If true, show page references after internal links. 261 | #latex_show_pagerefs = False 262 | 263 | # If true, show URL addresses after external links. 264 | #latex_show_urls = False 265 | 266 | # Documents to append as an appendix to all manuals. 267 | #latex_appendices = [] 268 | 269 | # If false, no module index is generated. 270 | #latex_domain_indices = True 271 | 272 | 273 | # -- Options for manual page output --------------------------------------- 274 | 275 | # One entry per manual page. List of tuples 276 | # (source start file, name, description, authors, manual section). 277 | man_pages = [ 278 | (master_doc, 'pumpp', 'pumpp Documentation', 279 | [author], 1) 280 | ] 281 | 282 | # If true, show URL addresses after external links. 283 | #man_show_urls = False 284 | 285 | 286 | # -- Options for Texinfo output ------------------------------------------- 287 | 288 | # Grouping the document tree into Texinfo files. List of tuples 289 | # (source start file, target name, title, author, 290 | # dir menu entry, description, category) 291 | texinfo_documents = [ 292 | (master_doc, 'pumpp', 'pumpp Documentation', 293 | author, 'pumpp', 'One line description of project.', 294 | 'Miscellaneous'), 295 | ] 296 | 297 | # Documents to append as an appendix to all manuals. 298 | #texinfo_appendices = [] 299 | 300 | # If false, no module index is generated. 301 | #texinfo_domain_indices = True 302 | 303 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 304 | #texinfo_show_urls = 'footnote' 305 | 306 | # If true, do not generate a @detailmenu in the "Top" node's menu. 307 | #texinfo_no_detailmenu = False 308 | 309 | 310 | # Example configuration for intersphinx: refer to the Python standard library. 311 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None), 312 | 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 313 | 'librosa': ('https://librosa.org/doc/latest/', None), 314 | 'jams': ('https://jams.readthedocs.io/en/latest/', None)} 315 | 316 | numpydoc_show_class_members = False 317 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. pumpp documentation master file, created by 2 | sphinx-quickstart on Thu Jul 7 21:27:51 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 7 | Practically Universal Music Pre-Processor 8 | ========================================= 9 | 10 | Pumpp is designed to make it easy to convert pairs of `(audio, jams)` into data that can 11 | be easily consumed by statistical algorithms. Some desired features: 12 | 13 | - Converting tags to sparse encoding vectors 14 | - Sampling `(start, end, label)` to frame-level annotations at a specific sampling rate 15 | - Extracting first-level features (eg, Mel spectra or CQT) from audio 16 | - Aligning and storing the results in a simple data structure (npz, hdf5) 17 | - Converting between annotation spaces for a given task 18 | - Helper variables for semi-supervised learning 19 | 20 | 21 | API 22 | === 23 | .. toctree:: 24 | :maxdepth: 2 25 | 26 | api 27 | 28 | 29 | Changes 30 | ======= 31 | .. toctree:: 32 | :maxdepth: 2 33 | 34 | changes 35 | 36 | Contribute 37 | ========== 38 | - `Issue Tracker `_ 39 | - `Source Code `_ 40 | 41 | Indices and tables 42 | ================== 43 | 44 | * :ref:`genindex` 45 | * :ref:`modindex` 46 | * :ref:`search` 47 | 48 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpydoc>=0.5 2 | -------------------------------------------------------------------------------- /pumpp/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Practically universal music pre-processing''' 4 | 5 | from .version import version as __version__ 6 | from .core import * 7 | from .exceptions import * 8 | from . import feature 9 | from . import task 10 | from . import labels 11 | from .sampler import * 12 | -------------------------------------------------------------------------------- /pumpp/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Base class definitions''' 4 | 5 | from collections import namedtuple 6 | from collections.abc import Iterable 7 | import numpy as np 8 | 9 | from .exceptions import ParameterError 10 | __all__ = ['Tensor', 'Scope', 'Slicer'] 11 | 12 | # This type is used for storing shape information 13 | Tensor = namedtuple('Tensor', ['shape', 'dtype']) 14 | ''' 15 | Multi-dimensional array descriptions: `shape` and `dtype` 16 | ''' 17 | 18 | 19 | class Scope: 20 | ''' 21 | A base class for managing named tensors 22 | 23 | Attributes 24 | ---------- 25 | name : str or None 26 | The name of this object. If not `None`, 27 | all field keys are prefixed by `name/`. 28 | 29 | fields : dict of str : Tensor 30 | A dictionary of fields produced by this object. 31 | Each value defines the shape and data type of the field. 32 | ''' 33 | def __init__(self, name): 34 | self.name = name 35 | self.fields = dict() 36 | 37 | def __repr__(self): 38 | return '<{}({}) fields={}>'.format(self.__class__.__name__, self.name, self.fields) 39 | 40 | def scope(self, key): 41 | '''Apply the name scope to a key 42 | 43 | Parameters 44 | ---------- 45 | key : string 46 | 47 | Returns 48 | ------- 49 | `name/key` if `name` is not `None`; 50 | otherwise, `key`. 51 | ''' 52 | if self.name is None: 53 | return key 54 | return '{:s}/{:s}'.format(self.name, key) 55 | 56 | def register(self, field, shape, dtype): 57 | '''Register a field as a tensor with specified shape and type. 58 | 59 | A `Tensor` of the given shape and type will be registered in this 60 | object's `fields` dict. 61 | 62 | Parameters 63 | ---------- 64 | field : str 65 | The name of the field 66 | 67 | shape : iterable of `int` or `None` 68 | The shape of the output variable. 69 | This does not include a dimension for multiple outputs. 70 | 71 | `None` may be used to indicate variable-length outputs 72 | 73 | dtype : type 74 | The data type of the field 75 | 76 | Raises 77 | ------ 78 | ParameterError 79 | If dtype or shape are improperly specified 80 | ''' 81 | if not isinstance(dtype, (type, np.dtype)): 82 | raise ParameterError('dtype={} must be a type or np.dtype'.format(dtype)) 83 | 84 | if not (isinstance(shape, Iterable) and 85 | all([s is None or isinstance(s, int) for s in shape])): 86 | raise ParameterError('shape={} must be an iterable of integers'.format(shape)) 87 | 88 | self.fields[self.scope(field)] = Tensor(tuple(shape), dtype) 89 | 90 | def pop(self, field): 91 | return self.fields.pop(self.scope(field)) 92 | 93 | def merge(self, data): 94 | '''Merge an array of output dictionaries into a single dictionary 95 | with properly scoped names. 96 | 97 | Parameters 98 | ---------- 99 | data : list of dict 100 | Output dicts as produced by `pumpp.task.BaseTaskTransformer.transform` 101 | or `pumpp.feature.FeatureExtractor.transform`. 102 | 103 | Returns 104 | ------- 105 | data_out : dict 106 | All elements of the input dicts are stacked along the 0 axis, 107 | and keys are re-mapped by `scope`. 108 | ''' 109 | data_out = dict() 110 | 111 | # Iterate over all keys in data 112 | for key in set().union(*data): 113 | data_out[self.scope(key)] = np.stack([np.asarray(d[key]) for d in data], 114 | axis=0) 115 | return data_out 116 | 117 | 118 | class Slicer: 119 | '''Slicer can compute the duration of data with time-like fields, 120 | and slice down to the common time index. 121 | 122 | This class serves as a base for Sampler and Pump, and should not 123 | be used directly. 124 | 125 | Parameters 126 | ---------- 127 | ops : one or more Scope (TaskTransformer or FeatureExtractor) 128 | ''' 129 | def __init__(self, *ops): 130 | 131 | self._time = dict() 132 | 133 | for operator in ops: 134 | self.add(operator) 135 | 136 | def add(self, operator): 137 | '''Add an operator to the Slicer 138 | 139 | Parameters 140 | ---------- 141 | operator : Scope (TaskTransformer or FeatureExtractor) 142 | The new operator to add 143 | ''' 144 | if not isinstance(operator, Scope): 145 | raise ParameterError('Operator {} must be a TaskTransformer ' 146 | 'or FeatureExtractor'.format(operator)) 147 | for key in operator.fields: 148 | self._time[key] = [] 149 | # We add 1 to the dimension here to account for batching 150 | for tdim, idx in enumerate(operator.fields[key].shape, 1): 151 | if idx is None: 152 | self._time[key].append(tdim) 153 | 154 | def data_duration(self, data): 155 | '''Compute the valid data duration of a dict 156 | 157 | Parameters 158 | ---------- 159 | data : dict 160 | As produced by pumpp.transform 161 | 162 | Returns 163 | ------- 164 | length : int 165 | The minimum temporal extent of a dynamic observation in data 166 | ''' 167 | # Find all the time-like indices of the data 168 | lengths = [] 169 | for key in self._time: 170 | for idx in self._time.get(key, []): 171 | lengths.append(data[key].shape[idx]) 172 | 173 | return min(lengths) 174 | 175 | def crop(self, data): 176 | '''Crop a data dictionary down to its common time 177 | 178 | Parameters 179 | ---------- 180 | data : dict 181 | As produced by pumpp.transform 182 | 183 | Returns 184 | ------- 185 | data_cropped : dict 186 | Like `data` but with all time-like axes truncated to the 187 | minimum common duration 188 | ''' 189 | 190 | duration = self.data_duration(data) 191 | data_out = dict() 192 | for key in data: 193 | idx = [slice(None)] * data[key].ndim 194 | for tdim in self._time.get(key, []): 195 | idx[tdim] = slice(duration) 196 | data_out[key] = data[key][tuple(idx)] 197 | 198 | return data_out 199 | -------------------------------------------------------------------------------- /pumpp/core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | Core functionality 5 | ================== 6 | .. autosummary:: 7 | :toctree: generated/ 8 | 9 | Pump 10 | ''' 11 | 12 | import librosa 13 | import jams 14 | 15 | from .base import Slicer 16 | from .exceptions import ParameterError 17 | from .task import BaseTaskTransformer 18 | from .feature import FeatureExtractor 19 | from .sampler import Sampler 20 | 21 | 22 | class Pump(Slicer): 23 | '''Top-level pump object. 24 | 25 | This class is used to collect feature and task transformers 26 | 27 | Attributes 28 | ---------- 29 | ops : list of (BaseTaskTransformer, FeatureExtractor) 30 | The operations to apply 31 | 32 | Examples 33 | -------- 34 | Create a CQT and chord transformer 35 | 36 | >>> p_cqt = pumpp.feature.CQT('cqt', sr=44100, hop_length=1024) 37 | >>> p_chord = pumpp.task.ChordTagTransformer(sr=44100, hop_length=1024) 38 | >>> pump = pumpp.Pump(p_cqt, p_chord) 39 | >>> data = pump.transform(audio_f='/my/audio/file.mp3', 40 | ... jam='/my/jams/annotation.jams') 41 | 42 | Or use the call interface: 43 | 44 | >>> data = pump(audio_f='/my/audio/file.mp3', 45 | ... jam='/my/jams/annotation.jams') 46 | 47 | Or apply to audio in memory, and without existing annotations: 48 | 49 | >>> y, sr = librosa.load('/my/audio/file.mp3') 50 | >>> data = pump(y=y, sr=sr) 51 | 52 | Access all the fields produced by this pump: 53 | 54 | >>> pump.fields 55 | {'chord/chord': Tensor(shape=(None, 170), dtype=), 56 | 'cqt/mag': Tensor(shape=(None, 288), dtype=), 57 | 'cqt/phase': Tensor(shape=(None, 288), dtype=)} 58 | 59 | Access a constituent operator by name: 60 | 61 | >>> pump['chord'].fields 62 | {'chord/chord': Tensor(shape=(None, 170), dtype=)} 63 | ''' 64 | 65 | def __init__(self, *ops): 66 | 67 | self.ops = [] 68 | self.opmap = dict() 69 | super(Pump, self).__init__(*ops) 70 | 71 | def add(self, operator): 72 | '''Add an operation to this pump. 73 | 74 | Parameters 75 | ---------- 76 | operator : BaseTaskTransformer, FeatureExtractor 77 | The operation to add 78 | 79 | Raises 80 | ------ 81 | ParameterError 82 | if `op` is not of a correct type 83 | ''' 84 | if not isinstance(operator, (BaseTaskTransformer, FeatureExtractor)): 85 | raise ParameterError('operator={} must be one of ' 86 | '(BaseTaskTransformer, FeatureExtractor)' 87 | .format(operator)) 88 | 89 | if operator.name in self.opmap: 90 | raise ParameterError('Duplicate operator name detected: ' 91 | '{}'.format(operator)) 92 | 93 | super(Pump, self).add(operator) 94 | self.opmap[operator.name] = operator 95 | self.ops.append(operator) 96 | 97 | def transform(self, audio_f=None, jam=None, y=None, sr=None, crop=False, 98 | data=None): 99 | '''Apply the transformations to an audio file, and optionally JAMS object. 100 | 101 | Parameters 102 | ---------- 103 | audio_f : str 104 | Path to audio file 105 | 106 | jam : optional, `jams.JAMS`, str or file-like 107 | Optional JAMS object/path to JAMS file/open file descriptor. 108 | 109 | If provided, this will provide data for task transformers. 110 | 111 | y : np.ndarray 112 | sr : number > 0 113 | If provided, operate directly on an existing audio buffer `y` at 114 | sampling rate `sr` rather than load from `audio_f`. 115 | 116 | crop : bool 117 | If `True`, then data are cropped to a common time index across all 118 | fields. Otherwise, data may have different time extents. 119 | data : None or dict 120 | Optional data dict containing already computed features. Fields in 121 | dict will be skipped unless ``refresh`` is True. 122 | 123 | Returns 124 | ------- 125 | data : dict 126 | Data dictionary containing the transformed audio (and annotations) 127 | 128 | Raises 129 | ------ 130 | ParameterError 131 | At least one of `audio_f` or `(y, sr)` must be provided. 132 | 133 | ''' 134 | data = dict() if data is None else data 135 | existing_keys = set(data) 136 | ops = [op for op in self.ops if set(op.fields) - existing_keys] 137 | 138 | if any(isinstance(op, FeatureExtractor) for op in ops): 139 | if y is None: 140 | if audio_f is None: 141 | raise ParameterError('At least one of `y` or `audio_f` ' 142 | 'must be provided') 143 | 144 | # Load the audio 145 | y, sr = librosa.load(audio_f, sr=sr, mono=True) 146 | 147 | if sr is None: 148 | raise ParameterError('If audio is provided as `y`, you must ' 149 | 'specify the sampling rate as sr=') 150 | 151 | if any(isinstance(op, BaseTaskTransformer) for op in ops): 152 | if jam is None: 153 | jam = jams.JAMS() 154 | jam.file_metadata.duration = librosa.get_duration(y=y, sr=sr) 155 | 156 | # Load the jams 157 | if not isinstance(jam, jams.JAMS): 158 | jam = jams.load(jam) 159 | 160 | for op in ops: 161 | if isinstance(op, BaseTaskTransformer): 162 | data.update(op.transform(jam)) 163 | elif isinstance(op, FeatureExtractor): 164 | data.update(op.transform(y, sr)) 165 | if crop: 166 | data = self.crop(data) 167 | return data 168 | 169 | def sampler(self, n_samples, duration, random_state=None): 170 | '''Construct a sampler object for this pump's operators. 171 | 172 | Parameters 173 | ---------- 174 | n_samples : None or int > 0 175 | The number of samples to generate 176 | 177 | duration : int > 0 178 | The duration (in frames) of each sample patch 179 | 180 | random_state : None, int, or np.random.RandomState 181 | If int, random_state is the seed used by the random number 182 | generator; 183 | 184 | If RandomState instance, random_state is the random number 185 | generator; 186 | 187 | If None, the random number generator is the RandomState instance 188 | used by np.random. 189 | 190 | Returns 191 | ------- 192 | sampler : pumpp.Sampler 193 | The sampler object 194 | 195 | See Also 196 | -------- 197 | pumpp.sampler.Sampler 198 | ''' 199 | 200 | return Sampler(n_samples, duration, 201 | random_state=random_state, 202 | *self.ops) 203 | 204 | @property 205 | def fields(self): 206 | '''A dictionary of fields constructed by this pump''' 207 | out = dict() 208 | for operator in self.ops: 209 | out.update(**operator.fields) 210 | 211 | return out 212 | 213 | def layers(self, api='keras'): 214 | '''Construct input layers for all feature transformers 215 | in the pump. 216 | 217 | Parameters 218 | ---------- 219 | api : {'keras', ...} 220 | Which API to use for layer construction 221 | 222 | Returns 223 | ------- 224 | layers : {field: keras.layers.Input} 225 | A dictionary of keras input layers, keyed by the corresponding 226 | fields. 227 | ''' 228 | 229 | layermap = dict() 230 | for operator in self.ops: 231 | if hasattr(operator, 'layers'): 232 | layermap.update(operator.layers(api=api)) 233 | return layermap 234 | 235 | def __getitem__(self, key): 236 | return self.opmap[key] 237 | 238 | def __call__(self, *args, **kwargs): 239 | return self.transform(*args, **kwargs) 240 | 241 | def __str__(self): 242 | rstr = ''.format(len(self.ops), 243 | len(self.fields)) 244 | for key in self.opmap: 245 | rstr += "\n - '{}': {}".format(key, type(self.opmap[key])) 246 | for field in self.opmap[key].fields: 247 | rstr += "\n - '{}': {}".format(field, self.opmap[key].fields[field]) 248 | return rstr 249 | 250 | def _repr_html_(self): 251 | 252 | rstr = '
' 253 | for key in self.opmap: 254 | rstr += '\n
{:s}
'.format(key) 255 | rstr += '\n
{}'.format(self.opmap[key]) 256 | 257 | rstr += '
    ' 258 | for fkey, field in self.opmap[key].fields.items(): 259 | rstr += '\n
  • {:s} [shape={}, dtype={}]
  • '.format(fkey, 260 | field.shape, 261 | repr(field.dtype)) 262 | rstr += '
' 263 | rstr += '
' 264 | return rstr 265 | -------------------------------------------------------------------------------- /pumpp/exceptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Exception classes for pumpp''' 4 | 5 | 6 | class PumppError(Exception): 7 | '''The root pumpp exception class''' 8 | pass 9 | 10 | 11 | class DataError(PumppError): 12 | '''Exceptions relating to data errors''' 13 | pass 14 | 15 | 16 | class ParameterError(PumppError): 17 | '''Exceptions relating to function and method parameters''' 18 | pass 19 | -------------------------------------------------------------------------------- /pumpp/feature/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | Feature extractors 5 | ================== 6 | 7 | .. autosummary:: 8 | :toctree: generated/ 9 | 10 | FeatureExtractor 11 | CQT 12 | CQTMag 13 | CQTPhaseDiff 14 | HCQT 15 | HCQTMag 16 | HCQTPhaseDiff 17 | STFT 18 | STFTMag 19 | STFTPhaseDiff 20 | Mel 21 | Tempogram 22 | TempoScale 23 | TimePosition 24 | ''' 25 | 26 | from .base import * 27 | from .cqt import * 28 | from .fft import * 29 | from .mel import * 30 | from .rhythm import * 31 | from .time import * 32 | -------------------------------------------------------------------------------- /pumpp/feature/_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Utilities for feature extraction classes''' 4 | 5 | import numpy as np 6 | 7 | from ..exceptions import ParameterError 8 | 9 | 10 | def phase_diff(phase, conv): 11 | '''Compute the phase differential along a given axis 12 | 13 | Parameters 14 | ---------- 15 | phase : np.ndarray 16 | Input phase (in radians) 17 | 18 | conv: {None, 'tf', 'th', 'channels_last', 'channels_first'} 19 | Convolution mode 20 | 21 | Returns 22 | ------- 23 | dphase : np.ndarray like `phase` 24 | The phase differential. 25 | ''' 26 | 27 | if conv is None: 28 | axis = 0 29 | elif conv in ('channels_last', 'tf'): 30 | axis = 0 31 | elif conv in ('channels_first', 'th'): 32 | axis = 1 33 | 34 | # Compute the phase differential 35 | dphase = np.empty(phase.shape, dtype=phase.dtype) 36 | zero_idx = [slice(None)] * phase.ndim 37 | zero_idx[axis] = slice(1) 38 | else_idx = [slice(None)] * phase.ndim 39 | else_idx[axis] = slice(1, None) 40 | zero_idx = tuple(zero_idx) 41 | else_idx = tuple(else_idx) 42 | dphase[zero_idx] = phase[zero_idx] 43 | dphase[else_idx] = np.diff(np.unwrap(phase, axis=axis), axis=axis) 44 | return dphase 45 | 46 | 47 | def quantize(x, ref_min=None, ref_max=None, dtype='uint8'): 48 | '''Quantize array entries to a fixed dtype. 49 | 50 | Parameters 51 | ---------- 52 | x : np.ndarray 53 | The data to quantize 54 | 55 | ref_min : None or float 56 | 57 | ref_max : None or float 58 | The reference minimum (maximum) value for quantization. 59 | By default, `x.min()` (`x.max()`) 60 | 61 | dtype : np.dtype {'uint8', 'uint16'} 62 | The target data type. Any unsigned int type is supported, 63 | but most cases will call for `uint8`. 64 | 65 | Returns 66 | ------- 67 | y : np.ndarray, dtype=dtype 68 | The values of `x` quantized to integer values 69 | ''' 70 | 71 | if ref_min is None: 72 | ref_min = np.min(x) 73 | 74 | if ref_max is None: 75 | ref_max = np.max(x) 76 | 77 | try: 78 | info = np.iinfo(dtype) 79 | except ValueError as exc: 80 | raise ParameterError('dtype={} must be an unsigned integer type'.format(dtype)) from exc 81 | if info.kind != 'u': 82 | raise ParameterError('dtype={} must be an unsigned integer type'.format(dtype)) 83 | 84 | x_quant = np.empty_like(x, dtype=np.dtype(dtype)) 85 | 86 | bins = np.linspace(ref_min, ref_max, num=info.max - info.min + 1) 87 | x_quant[:] = np.digitize(x, bins, right=True) 88 | x_quant[x > ref_max] = info.max 89 | x_quant[x < ref_min] = info.min 90 | return x_quant 91 | 92 | 93 | def to_dtype(x, dtype): 94 | '''Convert an array to a target dtype. Quantize if integrable. 95 | 96 | Parameters 97 | ---------- 98 | x : np.ndarray 99 | The input data 100 | 101 | dtype : np.dtype or type specification 102 | The target dtype 103 | 104 | Returns 105 | ------- 106 | x_dtype : np.ndarray, dtype=dtype 107 | The converted data. 108 | 109 | If dtype is integrable, `x_dtype` will be quantized. 110 | 111 | See Also 112 | -------- 113 | quantize 114 | ''' 115 | 116 | if np.issubdtype(dtype, np.integer): 117 | return quantize(x, dtype=dtype) 118 | else: 119 | return x.astype(dtype) 120 | -------------------------------------------------------------------------------- /pumpp/feature/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Feature extraction base class''' 4 | 5 | import numpy as np 6 | from librosa import resample, time_to_frames 7 | 8 | from ..base import Scope 9 | from ..exceptions import ParameterError 10 | 11 | 12 | class FeatureExtractor(Scope): 13 | '''The base feature extractor class. 14 | 15 | Attributes 16 | ---------- 17 | name : str 18 | The name for this feature extractor 19 | 20 | sr : number > 0 21 | The sampling rate of audio for analysis 22 | 23 | hop_length : int > 0 24 | The hop length between analysis frames 25 | 26 | conv : {'tf', 'th', 'channels_last', 'channels_first', None} 27 | convolution dimension ordering: 28 | 29 | - 'channels_last' for tensorflow-style 2D convolution 30 | - 'tf' equivalent to 'channels_last' 31 | - 'channels_first' for theano-style 2D convolution 32 | - 'th' equivalent to 'channels_first' 33 | - None for 1D or non-convolutional representations 34 | 35 | dtype : str or np.dtype 36 | The data type for features produced by this object. Default is`float32`. 37 | 38 | Setting to `uint8` will produced quantized features. 39 | 40 | ''' 41 | def __init__(self, name=None, sr=22050, hop_length=512, conv=None, dtype='float32'): 42 | 43 | super(FeatureExtractor, self).__init__(name) 44 | 45 | if conv not in ('tf', 'th', 'channels_last', 'channels_first', None): 46 | raise ParameterError('conv="{}", must be one of ' 47 | '("channels_last", "tf", ' 48 | '"channels_first", "th", None)'.format(conv)) 49 | 50 | self.sr = sr 51 | self.hop_length = hop_length 52 | self.conv = conv 53 | self.dtype = np.dtype(dtype) 54 | 55 | def register(self, key, dimension, dtype, channels=1): 56 | 57 | shape = [None, dimension] 58 | 59 | if self.conv in ('channels_last', 'tf'): 60 | shape.append(channels) 61 | 62 | elif self.conv in ('channels_first', 'th'): 63 | shape.insert(0, channels) 64 | 65 | super(FeatureExtractor, self).register(key, shape, dtype) 66 | 67 | @property 68 | def idx(self): 69 | if self.conv is None: 70 | return Ellipsis 71 | 72 | elif self.conv in ('channels_last', 'tf'): 73 | return (slice(None), slice(None), np.newaxis) 74 | 75 | elif self.conv in ('channels_first', 'th'): 76 | return (np.newaxis, slice(None), slice(None)) 77 | 78 | def transform(self, y, sr): 79 | '''Transform an audio signal 80 | 81 | Parameters 82 | ---------- 83 | y : np.ndarray 84 | The audio signal 85 | 86 | sr : number > 0 87 | The native sampling rate of y 88 | 89 | Returns 90 | ------- 91 | dict 92 | Data dictionary containing features extracted from y 93 | 94 | See Also 95 | -------- 96 | transform_audio 97 | ''' 98 | if sr != self.sr: 99 | y = resample(y, orig_sr=sr, target_sr=self.sr) 100 | 101 | return self.merge([self.transform_audio(y)]) 102 | 103 | def transform_audio(self, y): 104 | raise NotImplementedError 105 | 106 | def layers(self, api='keras'): 107 | '''Construct input layers for the given transformer 108 | 109 | Parameters 110 | ---------- 111 | api : string 112 | One of 'k', 'keras' (for Keras mode) 113 | 'tf.keras', 'tensorflow.keras', 'tfk' for tensorflow.keras mode 114 | 'tf', 'tensorflow' for tensorflow (v1 compatible) 115 | 116 | Note that 'tensorflow' mode uses v1 compatibility, and disables eager execution. 117 | 118 | Returns 119 | ------- 120 | layers : {field: layer object} 121 | A dictionary of keras or tensorflow input layers, keyed by the corresponding 122 | field keys. 123 | ''' 124 | 125 | if api in ('k', 'keras'): 126 | return self.layers_keras() 127 | elif api in ('tf.keras', 'tensorflow.keras', 'tfk'): 128 | return self.layers_tfkeras() 129 | elif api in ('tf', 'tensorflow'): 130 | return self.layers_tensorflow() 131 | else: 132 | raise ParameterError('Unsupported layer api={}'.format(api)) 133 | 134 | def layers_tensorflow(self): 135 | import tensorflow.compat.v1 as tf 136 | tf.disable_v2_behavior() 137 | 138 | L = dict() 139 | for key in self.fields: 140 | shape = tuple([None] + list(self.fields[key].shape)) 141 | L[key] = tf.placeholder(self.fields[key].dtype, 142 | shape=shape, name=key) 143 | return L 144 | 145 | def layers_keras(self): 146 | from keras.layers import Input 147 | 148 | L = dict() 149 | for key in self.fields: 150 | L[key] = Input(name=key, 151 | shape=self.fields[key].shape, 152 | dtype=np.dtype(self.fields[key].dtype).name) 153 | 154 | return L 155 | 156 | def layers_tfkeras(self): 157 | from tensorflow.keras.layers import Input 158 | 159 | L = dict() 160 | for key in self.fields: 161 | L[key] = Input(name=key, 162 | shape=self.fields[key].shape, 163 | dtype=np.dtype(self.fields[key].dtype).name) 164 | 165 | return L 166 | 167 | def n_frames(self, duration): 168 | '''Get the number of frames for a given duration 169 | 170 | Parameters 171 | ---------- 172 | duration : number >= 0 173 | The duration, in seconds 174 | 175 | Returns 176 | ------- 177 | n_frames : int >= 0 178 | The number of frames at this extractor's sampling rate and 179 | hop length 180 | ''' 181 | 182 | return int(time_to_frames(duration, sr=self.sr, 183 | hop_length=self.hop_length)) 184 | -------------------------------------------------------------------------------- /pumpp/feature/cqt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | '''CQT features''' 3 | 4 | import numpy as np 5 | from librosa import cqt, magphase, note_to_hz 6 | from librosa import amplitude_to_db, get_duration 7 | from librosa.util import fix_length 8 | 9 | from ._utils import phase_diff, to_dtype 10 | from .base import FeatureExtractor 11 | from ..exceptions import ParameterError 12 | 13 | __all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff', 14 | 'HCQT', 'HCQTMag', 'HCQTPhaseDiff'] 15 | 16 | 17 | class CQT(FeatureExtractor): 18 | '''Constant-Q transform 19 | 20 | Attributes 21 | ---------- 22 | name : str 23 | The name for this feature extractor 24 | 25 | sr : number > 0 26 | The sampling rate of audio 27 | 28 | hop_length : int > 0 29 | The number of samples between CQT frames 30 | 31 | n_octaves : int > 0 32 | The number of octaves in the CQT 33 | 34 | over_sample : int > 0 35 | The amount of frequency oversampling (bins per semitone) 36 | 37 | fmin : float > 0 38 | The minimum frequency of the CQT 39 | 40 | log : boolean 41 | If `True`, scale the magnitude to decibels 42 | 43 | Otherwise, use linear magnitude 44 | 45 | ''' 46 | def __init__(self, name='cqt', sr=22050, hop_length=512, n_octaves=8, over_sample=3, 47 | fmin=None, log=False, conv=None, dtype='float32'): 48 | super(CQT, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype) 49 | 50 | if fmin is None: 51 | fmin = note_to_hz('C1') 52 | 53 | self.n_octaves = n_octaves 54 | self.over_sample = over_sample 55 | self.fmin = fmin 56 | self.log = log 57 | 58 | n_bins = n_octaves * 12 * over_sample 59 | self.register('mag', n_bins, self.dtype) 60 | self.register('phase', n_bins, self.dtype) 61 | 62 | def transform_audio(self, y): 63 | '''Compute the CQT 64 | 65 | Parameters 66 | ---------- 67 | y : np.ndarray 68 | The audio buffer 69 | 70 | Returns 71 | ------- 72 | data : dict 73 | data['mag'] : np.ndarray, shape = (n_frames, n_bins) 74 | The CQT magnitude 75 | 76 | data['phase']: np.ndarray, shape = mag.shape 77 | The CQT phase 78 | ''' 79 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 80 | 81 | C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, 82 | fmin=self.fmin, 83 | n_bins=(self.n_octaves * self.over_sample * 12), 84 | bins_per_octave=(self.over_sample * 12)) 85 | 86 | C = fix_length(C, size=n_frames) 87 | 88 | cqtm, phase = magphase(C) 89 | if self.log: 90 | cqtm = amplitude_to_db(cqtm, ref=np.max) 91 | 92 | return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 93 | 'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)} 94 | 95 | 96 | class CQTMag(CQT): 97 | '''Magnitude CQT 98 | 99 | See Also 100 | -------- 101 | CQT 102 | ''' 103 | 104 | def __init__(self, *args, **kwargs): 105 | super(CQTMag, self).__init__(*args, **kwargs) 106 | self.pop('phase') 107 | 108 | def transform_audio(self, y): 109 | '''Compute CQT magnitude. 110 | 111 | Parameters 112 | ---------- 113 | y : np.ndarray 114 | the audio buffer 115 | 116 | Returns 117 | ------- 118 | data : dict 119 | data['mag'] : np.ndarray, shape=(n_frames, n_bins) 120 | The CQT magnitude 121 | ''' 122 | data = super(CQTMag, self).transform_audio(y) 123 | data.pop('phase') 124 | return data 125 | 126 | 127 | class CQTPhaseDiff(CQT): 128 | '''CQT with unwrapped phase differentials 129 | 130 | See Also 131 | -------- 132 | CQT 133 | ''' 134 | def __init__(self, *args, **kwargs): 135 | super(CQTPhaseDiff, self).__init__(*args, **kwargs) 136 | phase_field = self.pop('phase') 137 | 138 | self.register('dphase', 139 | self.n_octaves * 12 * self.over_sample, 140 | phase_field.dtype) 141 | 142 | def transform_audio(self, y): 143 | '''Compute the CQT 144 | 145 | Parameters 146 | ---------- 147 | y : np.ndarray 148 | The audio buffer 149 | 150 | Returns 151 | ------- 152 | data : dict 153 | data['mag'] : np.ndarray, shape = (n_frames, n_bins) 154 | The CQT magnitude 155 | 156 | data['phase']: np.ndarray, shape = mag.shape 157 | The CQT phase 158 | ''' 159 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 160 | 161 | C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, 162 | fmin=self.fmin, 163 | n_bins=(self.n_octaves * self.over_sample * 12), 164 | bins_per_octave=(self.over_sample * 12)) 165 | 166 | C = fix_length(C, size=n_frames) 167 | 168 | cqtm, phase = magphase(C) 169 | if self.log: 170 | cqtm = amplitude_to_db(cqtm, ref=np.max) 171 | 172 | dphase = phase_diff(np.angle(phase).T[self.idx], self.conv) 173 | 174 | return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 175 | 'dphase': to_dtype(dphase, self.dtype)} 176 | 177 | 178 | class HCQT(FeatureExtractor): 179 | '''Harmonic Constant-Q transform 180 | 181 | Attributes 182 | ---------- 183 | name : str 184 | The name for this feature extractor 185 | 186 | sr : number > 0 187 | The sampling rate of audio 188 | 189 | hop_length : int > 0 190 | The number of samples between CQT frames 191 | 192 | n_octaves : int > 0 193 | The number of octaves in the CQT 194 | 195 | over_sample : int > 0 196 | The amount of frequency oversampling (bins per semitone) 197 | 198 | fmin : float > 0 199 | The minimum frequency of the CQT 200 | 201 | harmonics : list of int >= 1 202 | The list of harmonics to compute 203 | 204 | log : boolean 205 | If `True`, scale the magnitude to decibels 206 | 207 | Otherwise, use linear magnitude 208 | 209 | conv : {'tf', 'th', 'channels_last', 'channels_first', None} 210 | convolution dimension ordering: 211 | 212 | - 'channels_last' for tensorflow-style 2D convolution 213 | - 'tf' equivalent to 'channels_last' 214 | - 'channels_first' for theano-style 2D convolution 215 | - 'th' equivalent to 'channels_first' 216 | 217 | dtype : np.dtype 218 | The data type for the output features. Default is `float32`. 219 | 220 | Setting to `uint8` will produce quantized features. 221 | ''' 222 | def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3, 223 | fmin=None, harmonics=None, log=False, conv='channels_last', 224 | dtype='float32'): 225 | 226 | if conv not in ('channels_last', 'tf', 'channels_first', 'th'): 227 | raise ParameterError('Invalid conv={}'.format(conv)) 228 | 229 | super(HCQT, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype) 230 | 231 | if fmin is None: 232 | fmin = note_to_hz('C1') 233 | 234 | if harmonics is None: 235 | harmonics = [1] 236 | else: 237 | harmonics = list(harmonics) 238 | if not all(isinstance(_, int) and _ > 0 for _ in harmonics): 239 | raise ParameterError('Invalid harmonics={}'.format(harmonics)) 240 | 241 | self.n_octaves = n_octaves 242 | self.over_sample = over_sample 243 | self.fmin = fmin 244 | self.log = log 245 | self.harmonics = harmonics 246 | 247 | n_bins = n_octaves * 12 * over_sample 248 | self.register('mag', n_bins, self.dtype, channels=len(harmonics)) 249 | self.register('phase', n_bins, self.dtype, channels=len(harmonics)) 250 | 251 | def transform_audio(self, y): 252 | '''Compute the HCQT 253 | 254 | Parameters 255 | ---------- 256 | y : np.ndarray 257 | The audio buffer 258 | 259 | Returns 260 | ------- 261 | data : dict 262 | data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics) 263 | The CQT magnitude 264 | 265 | data['phase']: np.ndarray, shape = mag.shape 266 | The CQT phase 267 | ''' 268 | cqtm, phase = [], [] 269 | 270 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 271 | 272 | for h in self.harmonics: 273 | C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, 274 | fmin=self.fmin * h, 275 | n_bins=(self.n_octaves * self.over_sample * 12), 276 | bins_per_octave=(self.over_sample * 12)) 277 | 278 | C = fix_length(C, size=n_frames) 279 | 280 | C, P = magphase(C) 281 | if self.log: 282 | C = amplitude_to_db(C, ref=np.max) 283 | cqtm.append(C) 284 | phase.append(P) 285 | 286 | cqtm = to_dtype(np.asarray(cqtm), self.dtype) 287 | phase = to_dtype(np.angle(np.asarray(phase)), self.dtype) 288 | 289 | return {'mag': self._index(cqtm), 290 | 'phase': self._index(phase)} 291 | 292 | def _index(self, value): 293 | '''Rearrange a tensor according to the convolution mode 294 | 295 | Input is assumed to be in (channels, bins, time) format. 296 | ''' 297 | 298 | if self.conv in ('channels_last', 'tf'): 299 | return np.transpose(value, (2, 1, 0)) 300 | 301 | else: # self.conv in ('channels_first', 'th') 302 | return np.transpose(value, (0, 2, 1)) 303 | 304 | 305 | class HCQTMag(HCQT): 306 | '''Magnitude HCQT 307 | 308 | See Also 309 | -------- 310 | HCQT 311 | ''' 312 | 313 | def __init__(self, *args, **kwargs): 314 | super(HCQTMag, self).__init__(*args, **kwargs) 315 | self.pop('phase') 316 | 317 | def transform_audio(self, y): 318 | '''Compute HCQT magnitude. 319 | 320 | Parameters 321 | ---------- 322 | y : np.ndarray 323 | the audio buffer 324 | 325 | Returns 326 | ------- 327 | data : dict 328 | data['mag'] : np.ndarray, shape=(n_frames, n_bins) 329 | The CQT magnitude 330 | ''' 331 | data = super(HCQTMag, self).transform_audio(y) 332 | data.pop('phase') 333 | return data 334 | 335 | 336 | class HCQTPhaseDiff(HCQT): 337 | '''HCQT with unwrapped phase differentials 338 | 339 | See Also 340 | -------- 341 | HCQT 342 | ''' 343 | def __init__(self, *args, **kwargs): 344 | super(HCQTPhaseDiff, self).__init__(*args, **kwargs) 345 | phase_field = self.pop('phase') 346 | 347 | self.register('dphase', 348 | self.n_octaves * 12 * self.over_sample, 349 | phase_field.dtype, 350 | channels=len(self.harmonics)) 351 | 352 | def transform_audio(self, y): 353 | '''Compute the HCQT 354 | 355 | Parameters 356 | ---------- 357 | y : np.ndarray 358 | The audio buffer 359 | 360 | Returns 361 | ------- 362 | data : dict 363 | data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics) 364 | The CQT magnitude 365 | 366 | data['phase']: np.ndarray, shape = mag.shape 367 | The CQT phase 368 | ''' 369 | cqtm, phase = [], [] 370 | 371 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 372 | 373 | for h in self.harmonics: 374 | C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, 375 | fmin=self.fmin * h, 376 | n_bins=(self.n_octaves * self.over_sample * 12), 377 | bins_per_octave=(self.over_sample * 12)) 378 | 379 | C = fix_length(C, size=n_frames) 380 | 381 | C, P = magphase(C) 382 | if self.log: 383 | C = amplitude_to_db(C, ref=np.max) 384 | cqtm.append(C) 385 | phase.append(P) 386 | 387 | cqtm = to_dtype(np.asarray(cqtm), self.dtype) 388 | phase = np.angle(np.asarray(phase)) 389 | 390 | dphase = to_dtype(phase_diff(self._index(phase), self.conv), 391 | self.dtype) 392 | 393 | return {'mag': self._index(cqtm), 394 | 'dphase': dphase} 395 | -------------------------------------------------------------------------------- /pumpp/feature/fft.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """STFT feature extractors""" 3 | 4 | import numpy as np 5 | from librosa import stft, magphase 6 | from librosa import amplitude_to_db, get_duration 7 | from librosa.util import fix_length 8 | 9 | from .base import FeatureExtractor 10 | from ._utils import phase_diff, to_dtype 11 | 12 | __all__ = ['STFT', 'STFTMag', 'STFTPhaseDiff'] 13 | 14 | 15 | class STFT(FeatureExtractor): 16 | '''Short-time Fourier Transform (STFT) with both magnitude 17 | and phase. 18 | 19 | Attributes 20 | ---------- 21 | name : str 22 | The name of this transformer 23 | 24 | sr : number > 0 25 | The sampling rate of audio 26 | 27 | hop_length : int > 0 28 | The hop length of STFT frames 29 | 30 | n_fft : int > 0 31 | The number of FFT bins per frame 32 | 33 | log : bool 34 | If `True`, scale magnitude in decibels. 35 | 36 | Otherwise use linear magnitude. 37 | 38 | conv : str 39 | Convolution mode 40 | 41 | dtype : np.dtype 42 | The data type for the output features. Default is `float32`. 43 | 44 | Setting to `uint8` will produce quantized features. 45 | 46 | See Also 47 | -------- 48 | STFTMag 49 | STFTPhaseDiff 50 | ''' 51 | def __init__(self, name='stft', sr=22050, hop_length=512, n_fft=2048, log=False, 52 | conv=None, dtype='float32'): 53 | super(STFT, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype) 54 | 55 | self.n_fft = n_fft 56 | self.log = log 57 | 58 | self.register('mag', 1 + n_fft // 2, self.dtype) 59 | self.register('phase', 1 + n_fft // 2, self.dtype) 60 | 61 | def transform_audio(self, y): 62 | '''Compute the STFT magnitude and phase. 63 | 64 | Parameters 65 | ---------- 66 | y : np.ndarray 67 | The audio buffer 68 | 69 | Returns 70 | ------- 71 | data : dict 72 | data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) 73 | STFT magnitude 74 | 75 | data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) 76 | STFT phase 77 | ''' 78 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 79 | 80 | D = stft(y, hop_length=self.hop_length, 81 | n_fft=self.n_fft) 82 | 83 | D = fix_length(D, size=n_frames) 84 | 85 | mag, phase = magphase(D) 86 | if self.log: 87 | mag = amplitude_to_db(mag, ref=np.max) 88 | 89 | return {'mag': to_dtype(mag.T[self.idx], self.dtype), 90 | 'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)} 91 | 92 | 93 | class STFTPhaseDiff(STFT): 94 | '''STFT with phase differentials 95 | 96 | See Also 97 | -------- 98 | STFT 99 | ''' 100 | def __init__(self, *args, **kwargs): 101 | super(STFTPhaseDiff, self).__init__(*args, **kwargs) 102 | phase_field = self.pop('phase') 103 | self.register('dphase', 1 + self.n_fft // 2, phase_field.dtype) 104 | 105 | def transform_audio(self, y): 106 | '''Compute the STFT magnitude and phase differential. 107 | 108 | Parameters 109 | ---------- 110 | y : np.ndarray 111 | The audio buffer 112 | 113 | Returns 114 | ------- 115 | data : dict 116 | data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) 117 | STFT magnitude 118 | 119 | data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) 120 | STFT phase 121 | ''' 122 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 123 | 124 | D = stft(y, hop_length=self.hop_length, 125 | n_fft=self.n_fft) 126 | 127 | D = fix_length(D, size=n_frames) 128 | 129 | mag, phase = magphase(D) 130 | if self.log: 131 | mag = amplitude_to_db(mag, ref=np.max) 132 | 133 | phase = phase_diff(np.angle(phase.T)[self.idx], self.conv) 134 | 135 | return {'mag': to_dtype(mag.T[self.idx], self.dtype), 136 | 'dphase': to_dtype(phase, self.dtype)} 137 | 138 | 139 | class STFTMag(STFT): 140 | '''STFT with only magnitude. 141 | 142 | See Also 143 | -------- 144 | STFT 145 | ''' 146 | def __init__(self, *args, **kwargs): 147 | super(STFTMag, self).__init__(*args, **kwargs) 148 | self.pop('phase') 149 | 150 | def transform_audio(self, y): 151 | '''Compute the STFT 152 | 153 | Parameters 154 | ---------- 155 | y : np.ndarray 156 | The audio buffer 157 | 158 | Returns 159 | ------- 160 | data : dict 161 | data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) 162 | The STFT magnitude 163 | ''' 164 | data = super(STFTMag, self).transform_audio(y) 165 | data.pop('phase') 166 | 167 | return data 168 | -------------------------------------------------------------------------------- /pumpp/feature/mel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Mel spectrogram""" 3 | 4 | import numpy as np 5 | from librosa.feature import melspectrogram 6 | from librosa import amplitude_to_db, get_duration 7 | from librosa.util import fix_length 8 | 9 | from .base import FeatureExtractor 10 | 11 | from ._utils import to_dtype 12 | 13 | __all__ = ['Mel'] 14 | 15 | 16 | class Mel(FeatureExtractor): 17 | '''Mel spectra feature extraction 18 | 19 | Attributes 20 | ---------- 21 | name : str or None 22 | naming scope for this feature extractor 23 | 24 | sr : number > 0 25 | Sampling rate of the audio (in Hz) 26 | 27 | hop_length : int > 0 28 | Number of samples to advance between frames 29 | 30 | n_fft : int > 0 31 | Number of samples per frame 32 | 33 | n_mels : int > 0 34 | Number of Mel frequency bins 35 | 36 | fmax : number > 0 37 | The maximum frequency bin. 38 | Defaults to `0.5 * sr` 39 | 40 | log : bool 41 | If `True`, scale magnitude in decibels. 42 | 43 | Otherwise, use a linear amplitude scale. 44 | 45 | dtype : np.dtype 46 | The data type for the output features. Default is `float32`. 47 | 48 | Setting to `uint8` will produce quantized features. 49 | ''' 50 | def __init__(self, name='mel', sr=22050, hop_length=512, n_fft=2048, n_mels=128, 51 | fmax=None, log=False, conv=None, dtype='float32'): 52 | super(Mel, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype) 53 | 54 | self.n_fft = n_fft 55 | self.n_mels = n_mels 56 | self.fmax = fmax 57 | self.log = log 58 | 59 | self.register('mag', n_mels, self.dtype) 60 | 61 | def transform_audio(self, y): 62 | '''Compute the Mel spectrogram 63 | 64 | Parameters 65 | ---------- 66 | y : np.ndarray 67 | The audio buffer 68 | 69 | Returns 70 | ------- 71 | data : dict 72 | data['mag'] : np.ndarray, shape=(n_frames, n_mels) 73 | The Mel spectrogram 74 | ''' 75 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 76 | 77 | mel = np.sqrt(melspectrogram(y=y, sr=self.sr, 78 | n_fft=self.n_fft, 79 | hop_length=self.hop_length, 80 | n_mels=self.n_mels, 81 | fmax=self.fmax)) 82 | 83 | mel = fix_length(mel, size=n_frames) 84 | 85 | if self.log: 86 | mel = amplitude_to_db(mel, ref=np.max) 87 | 88 | # Type convert 89 | mel = to_dtype(mel, self.dtype) 90 | 91 | return {'mag': mel.T[self.idx]} 92 | -------------------------------------------------------------------------------- /pumpp/feature/rhythm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | '''Rhythm analysis features''' 3 | 4 | import numpy as np 5 | from librosa import fmt 6 | from librosa.feature import tempogram 7 | from librosa import get_duration 8 | from librosa.util import fix_length 9 | 10 | from .base import FeatureExtractor 11 | from ._utils import to_dtype 12 | 13 | __all__ = ['Tempogram', 'TempoScale'] 14 | 15 | 16 | class Tempogram(FeatureExtractor): 17 | '''Tempogram: the short-time autocorrelation of the accent signal 18 | 19 | Attributes 20 | ---------- 21 | name : str 22 | The name of this feature extractor 23 | 24 | sr : number > 0 25 | The sampling rate of audio 26 | 27 | hop_length : int > 0 28 | The hop length of analysis windows 29 | 30 | win_length : int > 0 31 | The length of the analysis window (in frames) 32 | 33 | conv : str 34 | Convolution mode 35 | 36 | dtype : np.dtype 37 | The data type for the output features. Default is `float32`. 38 | 39 | Setting to `uint8` will produce quantized features. 40 | ''' 41 | def __init__(self, name='tempogram', sr=22050, hop_length=512, win_length=384, 42 | conv=None, dtype='float32'): 43 | super(Tempogram, self).__init__(name, sr, hop_length, conv=conv, dtype=dtype) 44 | 45 | self.win_length = win_length 46 | 47 | self.register('tempogram', win_length, self.dtype) 48 | 49 | def transform_audio(self, y): 50 | '''Compute the tempogram 51 | 52 | Parameters 53 | ---------- 54 | y : np.ndarray 55 | Audio buffer 56 | 57 | Returns 58 | ------- 59 | data : dict 60 | data['tempogram'] : np.ndarray, shape=(n_frames, win_length) 61 | The tempogram 62 | ''' 63 | n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) 64 | 65 | tgram = tempogram(y=y, sr=self.sr, 66 | hop_length=self.hop_length, 67 | win_length=self.win_length) 68 | 69 | tgram = to_dtype(fix_length(tgram, size=n_frames), self.dtype) 70 | return {'tempogram': tgram.T[self.idx]} 71 | 72 | 73 | class TempoScale(Tempogram): 74 | '''Tempogram scale transform. 75 | 76 | Mellin scale transform magnitude of the Tempogram. 77 | 78 | Attributes 79 | ---------- 80 | name : str 81 | Name of this extractor 82 | 83 | sr : number > 0 84 | Sampling rate of audio 85 | 86 | hop_length : int > 0 87 | Hop length for analysis frames 88 | 89 | win_length : int > 0 90 | Number of frames per analysis window 91 | 92 | n_fmt : int > 0 93 | Number of scale coefficients to retain 94 | 95 | conv : str 96 | Convolution mode 97 | 98 | dtype : np.dtype 99 | The data type for the output features. Default is `float32`. 100 | 101 | Setting to `uint8` will produce quantized features. 102 | ''' 103 | def __init__(self, name, sr, hop_length, win_length, n_fmt=128, conv=None, dtype='float32'): 104 | super(TempoScale, self).__init__(name, sr, hop_length, win_length, 105 | conv=conv, dtype=dtype) 106 | 107 | self.n_fmt = n_fmt 108 | 109 | self.pop('tempogram') 110 | self.register('temposcale', 1 + n_fmt // 2, self.dtype) 111 | 112 | def transform_audio(self, y): 113 | '''Apply the scale transform to the tempogram 114 | 115 | Parameters 116 | ---------- 117 | y : np.ndarray 118 | The audio buffer 119 | 120 | Returns 121 | ------- 122 | data : dict 123 | data['temposcale'] : np.ndarray, shape=(n_frames, n_fmt) 124 | The scale transform magnitude coefficients 125 | ''' 126 | data = super(TempoScale, self).transform_audio(y) 127 | data['temposcale'] = np.abs(fmt(data.pop('tempogram'), 128 | axis=1, 129 | n_fmt=self.n_fmt))[self.idx] 130 | data['temposcale'] = to_dtype(data['temposcale'], self.dtype) 131 | 132 | return data 133 | -------------------------------------------------------------------------------- /pumpp/feature/time.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | '''Time features''' 3 | 4 | import numpy as np 5 | 6 | from librosa import get_duration 7 | 8 | from .base import FeatureExtractor 9 | from ._utils import to_dtype 10 | 11 | __all__ = ['TimePosition'] 12 | 13 | 14 | class TimePosition(FeatureExtractor): 15 | '''TimePosition: encode frame position as features. 16 | 17 | Attributes 18 | ---------- 19 | name : str 20 | The name of this feature extractor 21 | 22 | sr : number > 0 23 | The sampling rate of audio 24 | 25 | hop_length : int > 0 26 | The hop length of analysis windows 27 | ''' 28 | 29 | def __init__(self, name='time_position', sr=22050, hop_length=512, 30 | conv=None, dtype='float32'): 31 | super(TimePosition, self).__init__(name, sr, hop_length, conv=conv, 32 | dtype=dtype) 33 | 34 | self.register('relative', 2, self.dtype) 35 | self.register('absolute', 2, self.dtype) 36 | 37 | def transform_audio(self, y): 38 | '''Compute the time position encoding 39 | 40 | Parameters 41 | ---------- 42 | y : np.ndarray 43 | Audio buffer 44 | 45 | Returns 46 | ------- 47 | data : dict 48 | data['relative'] = np.ndarray, shape=(n_frames, 2) 49 | data['absolute'] = np.ndarray, shape=(n_frames, 2) 50 | 51 | Relative and absolute time positional encodings. 52 | ''' 53 | 54 | duration = get_duration(y=y, sr=self.sr) 55 | n_frames = self.n_frames(duration) 56 | 57 | relative = np.zeros((n_frames, 2), dtype=np.float32) 58 | relative[:, 0] = np.cos(np.pi * np.linspace(0, 1, num=n_frames)) 59 | relative[:, 1] = np.sin(np.pi * np.linspace(0, 1, num=n_frames)) 60 | 61 | absolute = relative * np.sqrt(duration) 62 | 63 | return {'relative': to_dtype(relative[self.idx], self.dtype), 64 | 'absolute': to_dtype(absolute[self.idx], self.dtype)} 65 | -------------------------------------------------------------------------------- /pumpp/sampler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | Data subsampling 5 | ================ 6 | .. autosummary:: 7 | :toctree: generated/ 8 | 9 | Sampler 10 | SequentialSampler 11 | VariableLengthSampler 12 | ''' 13 | 14 | from itertools import count 15 | 16 | import numpy as np 17 | 18 | from .base import Slicer 19 | from .exceptions import ParameterError, DataError 20 | 21 | __all__ = ['Sampler', 'SequentialSampler', 'VariableLengthSampler'] 22 | 23 | 24 | class Sampler(Slicer): 25 | '''Generate samples uniformly at random from a pumpp data dict. 26 | 27 | Attributes 28 | ---------- 29 | n_samples : int or None 30 | the number of samples to generate. 31 | If `None`, generate indefinitely. 32 | 33 | duration : int > 0 34 | the duration (in frames) of each sample 35 | 36 | random_state : None, int, or np.random.RandomState 37 | If int, random_state is the seed used by the random number 38 | generator; 39 | 40 | If RandomState instance, random_state is the random number 41 | generator; 42 | 43 | If None, the random number generator is the RandomState instance 44 | used by np.random. 45 | 46 | ops : array of pumpp.feature.FeatureExtractor or pumpp.task.BaseTaskTransformer 47 | The operators to include when sampling data. 48 | 49 | 50 | Examples 51 | -------- 52 | 53 | >>> # Set up the parameters 54 | >>> sr, n_fft, hop_length = 22050, 512, 2048 55 | >>> # Instantiate some transformers 56 | >>> p_stft = pumpp.feature.STFTMag('stft', sr=sr, n_fft=n_fft, 57 | ... hop_length=hop_length) 58 | >>> p_beat = pumpp.task.BeatTransformer('beat', sr=sr, 59 | ... hop_length=hop_length) 60 | >>> # Apply the transformers to the data 61 | >>> data = pumpp.transform('test.ogg', 'test.jams', p_stft, p_beat) 62 | >>> # We'll sample 10 patches of duration = 32 frames 63 | >>> stream = pumpp.Sampler(10, 32, p_stft, p_beat) 64 | >>> # Apply the streamer to the data dict 65 | >>> for example in stream(data): 66 | ... process(data) 67 | ''' 68 | def __init__(self, n_samples, duration, *ops, **kwargs): 69 | 70 | super(Sampler, self).__init__(*ops) 71 | 72 | self.n_samples = n_samples 73 | self.duration = duration 74 | 75 | random_state = kwargs.pop('random_state', None) 76 | if random_state is None or isinstance(random_state, int): 77 | self.rng = np.random.RandomState(seed=random_state) 78 | elif isinstance(random_state, np.random.RandomState): 79 | self.rng = random_state 80 | else: 81 | raise ParameterError('Invalid random_state={}'.format(random_state)) 82 | 83 | def sample(self, data, interval): 84 | '''Sample a patch from the data object 85 | 86 | Parameters 87 | ---------- 88 | data : dict 89 | A data dict as produced by pumpp.Pump.transform 90 | 91 | interval : slice 92 | The time interval to sample 93 | 94 | Returns 95 | ------- 96 | data_slice : dict 97 | `data` restricted to `interval`. 98 | ''' 99 | data_slice = dict() 100 | 101 | for key in data: 102 | if '_valid' in key: 103 | continue 104 | 105 | index = [slice(None)] * data[key].ndim 106 | 107 | # if we have multiple observations for this key, pick one 108 | index[0] = self.rng.randint(0, data[key].shape[0]) 109 | index[0] = slice(index[0], index[0] + 1) 110 | 111 | for tdim in self._time[key]: 112 | index[tdim] = interval 113 | 114 | data_slice[key] = data[key][tuple(index)] 115 | 116 | return data_slice 117 | 118 | def indices(self, data): 119 | '''Generate patch indices 120 | 121 | Parameters 122 | ---------- 123 | data : dict of np.ndarray 124 | As produced by pumpp.transform 125 | 126 | Yields 127 | ------ 128 | start : int >= 0 129 | The start index of a sample patch 130 | ''' 131 | duration = self.data_duration(data) 132 | 133 | if self.duration > duration: 134 | raise DataError('Data duration={} is less than ' 135 | 'sample duration={}'.format(duration, self.duration)) 136 | 137 | while True: 138 | # Generate a sampling interval 139 | yield self.rng.randint(0, duration - self.duration + 1) 140 | 141 | def __call__(self, data): 142 | '''Generate samples from a data dict. 143 | 144 | Parameters 145 | ---------- 146 | data : dict 147 | As produced by pumpp.transform 148 | 149 | Yields 150 | ------ 151 | data_sample : dict 152 | A sequence of patch samples from `data`, 153 | as parameterized by the sampler object. 154 | ''' 155 | if self.n_samples: 156 | counter = range(self.n_samples) 157 | else: 158 | counter = count(0) 159 | 160 | for _, start in zip(counter, self.indices(data)): 161 | yield self.sample(data, slice(start, start + self.duration)) 162 | 163 | 164 | class SequentialSampler(Sampler): 165 | '''Sample patches in sequential (temporal) order 166 | 167 | Attributes 168 | ---------- 169 | duration : int > 0 170 | the duration (in frames) of each sample 171 | 172 | stride : int > 0 173 | The number of frames to advance between samples. 174 | By default, matches `duration` so there is no overlap. 175 | 176 | ops : array of pumpp.feature.FeatureExtractor or pumpp.task.BaseTaskTransformer 177 | The operators to include when sampling data. 178 | 179 | random_state : None, int, or np.random.RandomState 180 | If int, random_state is the seed used by the random number 181 | generator; 182 | 183 | If RandomState instance, random_state is the random number 184 | generator; 185 | 186 | If None, the random number generator is the RandomState instance 187 | 188 | See Also 189 | -------- 190 | Sampler 191 | ''' 192 | 193 | def __init__(self, duration, *ops, **kwargs): 194 | 195 | stride = kwargs.pop('stride', None) 196 | 197 | super(SequentialSampler, self).__init__(None, duration, *ops, **kwargs) 198 | 199 | if stride is None: 200 | stride = duration 201 | 202 | if not stride > 0: 203 | raise ParameterError('Invalid patch stride={}'.format(stride)) 204 | self.stride = stride 205 | 206 | def indices(self, data): 207 | '''Generate patch start indices 208 | 209 | Parameters 210 | ---------- 211 | data : dict of np.ndarray 212 | As produced by pumpp.transform 213 | 214 | Yields 215 | ------ 216 | start : int >= 0 217 | The start index of a sample patch 218 | ''' 219 | duration = self.data_duration(data) 220 | 221 | for start in range(0, duration - self.duration, self.stride): 222 | yield start 223 | 224 | 225 | class VariableLengthSampler(Sampler): 226 | '''Sample random patches like a `Sampler`, but allow for 227 | output patches to be less than the target duration when the 228 | data is too short. 229 | 230 | Attributes 231 | ---------- 232 | n_samples : int or None 233 | the number of samples to generate. 234 | If `None`, generate indefinitely. 235 | 236 | min_duration : int > 0 237 | The minimum duration (in frames) of each sample 238 | 239 | max_duration : int > 0 240 | the maximum duration (in frames) of each sample 241 | 242 | random_state : None, int, or np.random.RandomState 243 | If int, random_state is the seed used by the random number 244 | generator; 245 | 246 | If RandomState instance, random_state is the random number 247 | generator; 248 | 249 | If None, the random number generator is the RandomState instance 250 | used by np.random. 251 | 252 | ops : array of pumpp.feature.FeatureExtractor or pumpp.task.BaseTaskTransformer 253 | The operators to include when sampling data. 254 | 255 | 256 | See Also 257 | -------- 258 | Sampler 259 | ''' 260 | def __init__(self, n_samples, min_duration, max_duration, *ops, **kwargs): 261 | super(VariableLengthSampler, self).__init__(n_samples, max_duration, 262 | *ops, **kwargs) 263 | 264 | if min_duration < 1: 265 | raise ParameterError('min_duration={} must be ' 266 | 'at least 1.'.format(min_duration)) 267 | 268 | if max_duration < min_duration: 269 | raise ParameterError('max_duration={} must be at least ' 270 | 'min_duration={}'.format(max_duration, 271 | min_duration)) 272 | 273 | self.min_duration = min_duration 274 | 275 | def indices(self, data): 276 | '''Generate patch indices 277 | 278 | Parameters 279 | ---------- 280 | data : dict of np.ndarray 281 | As produced by pumpp.transform 282 | 283 | Yields 284 | ------ 285 | start : int >= 0 286 | The start index of a sample patch 287 | ''' 288 | duration = self.data_duration(data) 289 | 290 | while True: 291 | # Generate a sampling interval 292 | yield self.rng.randint(0, duration - self.min_duration + 1) 293 | 294 | def __call__(self, data): 295 | '''Generate samples from a data dict. 296 | 297 | Parameters 298 | ---------- 299 | data : dict 300 | As produced by pumpp.transform 301 | 302 | Yields 303 | ------ 304 | data_sample : dict 305 | A sequence of patch samples from `data`, 306 | as parameterized by the sampler object. 307 | ''' 308 | if self.n_samples: 309 | counter = range(self.n_samples) 310 | else: 311 | counter = count(0) 312 | 313 | duration = self.data_duration(data) 314 | 315 | for _, start in zip(counter, self.indices(data)): 316 | yield self.sample(data, 317 | slice(start, min(duration, start + self.duration))) 318 | -------------------------------------------------------------------------------- /pumpp/task/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | Task transformations 5 | ==================== 6 | .. autosummary:: 7 | :toctree: generated/ 8 | 9 | BaseTaskTransformer 10 | BeatTransformer 11 | BeatPositionTransformer 12 | ChordTransformer 13 | SimpleChordTransformer 14 | ChordTagTransformer 15 | VectorTransformer 16 | DynamicLabelTransformer 17 | StaticLabelTransformer 18 | StructureTransformer 19 | ''' 20 | 21 | from .base import * 22 | from .chord import * 23 | from .beat import * 24 | from .regression import * 25 | from .tags import * 26 | from .structure import * 27 | from .key import * 28 | -------------------------------------------------------------------------------- /pumpp/task/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''The base class for task transformer objects''' 4 | 5 | import numpy as np 6 | from librosa import time_to_frames, times_like 7 | from librosa.sequence import viterbi_binary, viterbi_discriminative 8 | import jams 9 | 10 | from ..base import Scope 11 | 12 | __all__ = ['BaseTaskTransformer'] 13 | 14 | 15 | def fill_value(dtype): 16 | '''Get a fill-value for a given dtype 17 | 18 | Parameters 19 | ---------- 20 | dtype : type 21 | 22 | Returns 23 | ------- 24 | `np.nan` if `dtype` is real or complex 25 | 26 | 0 otherwise 27 | ''' 28 | if np.issubdtype(dtype, np.floating) or np.issubdtype(dtype, np.complexfloating): 29 | return dtype(np.nan) 30 | 31 | return dtype(0) 32 | 33 | 34 | class BaseTaskTransformer(Scope): 35 | '''Base class for task transformer objects 36 | 37 | Attributes 38 | ---------- 39 | name : str 40 | The name prefix for this transformer object 41 | 42 | namespace : str 43 | The JAMS namespace for annotations in this task 44 | 45 | sr : number > 0 46 | The sampling rate for audio 47 | 48 | hop_length : int > 0 49 | The number of samples between frames 50 | ''' 51 | 52 | def __init__(self, name, namespace, sr, hop_length): 53 | super(BaseTaskTransformer, self).__init__(name) 54 | 55 | # This will trigger an exception if the namespace is not found 56 | jams.schema.is_dense(namespace) 57 | 58 | self.namespace = namespace 59 | self.sr = sr 60 | self.hop_length = hop_length 61 | 62 | def empty(self, duration): 63 | '''Create an empty jams.Annotation for this task. 64 | 65 | This method should be overridden by derived classes. 66 | 67 | Parameters 68 | ---------- 69 | duration : int >= 0 70 | Duration of the annotation 71 | ''' 72 | return jams.Annotation(namespace=self.namespace, time=0, duration=0) 73 | 74 | def transform(self, jam, query=None): 75 | '''Transform jam object to make data for this task 76 | 77 | Parameters 78 | ---------- 79 | jam : jams.JAMS 80 | The jams container object 81 | 82 | query : string, dict, or callable [optional] 83 | An optional query to narrow the elements of `jam.annotations` 84 | to be considered. 85 | 86 | If not provided, all annotations are considered. 87 | 88 | Returns 89 | ------- 90 | data : dict 91 | A dictionary of transformed annotations. 92 | All annotations which can be converted to the target namespace 93 | will be converted. 94 | ''' 95 | anns = [] 96 | if query: 97 | results = jam.search(**query) 98 | else: 99 | results = jam.annotations 100 | 101 | # Find annotations that can be coerced to our target namespace 102 | for ann in results: 103 | try: 104 | anns.append(jams.nsconvert.convert(ann, self.namespace)) 105 | except jams.NamespaceError: 106 | pass 107 | 108 | duration = jam.file_metadata.duration 109 | 110 | # If none, make a fake one 111 | if not anns: 112 | anns = [self.empty(duration)] 113 | 114 | # Apply transformations 115 | results = [] 116 | for ann in anns: 117 | 118 | results.append(self.transform_annotation(ann, duration)) 119 | # If the annotation range is None, it spans the entire track 120 | if ann.time is None or ann.duration is None: 121 | valid = [0, duration] 122 | else: 123 | valid = [ann.time, ann.time + ann.duration] 124 | 125 | results[-1]['_valid'] = time_to_frames(valid, sr=self.sr, 126 | hop_length=self.hop_length) 127 | 128 | # Prefix and collect 129 | return self.merge(results) 130 | 131 | def transform_annotation(self, ann, duration): 132 | '''Transform jams.Annotation to make data for a given task. 133 | 134 | Parameters 135 | ---------- 136 | ann : jams.Annotation 137 | The jams annotation containing the data 138 | 139 | duration : number > 0 140 | time in seconds of the output duration 141 | 142 | Returns 143 | ------- 144 | data : dict 145 | A dictionary of transformed annotation. 146 | ''' 147 | raise NotImplementedError 148 | 149 | def encode_events(self, duration, events, values, dtype=bool): 150 | '''Encode labeled events as a time-series matrix. 151 | 152 | Parameters 153 | ---------- 154 | duration : number 155 | The duration of the track 156 | 157 | events : ndarray, shape=(n,) 158 | Time index of the events 159 | 160 | values : ndarray, shape=(n, m) 161 | Values array. Must have the same first index as `events`. 162 | 163 | dtype : numpy data type 164 | 165 | Returns 166 | ------- 167 | target : ndarray, shape=(n_frames, n_values) 168 | ''' 169 | 170 | frames = time_to_frames(events, sr=self.sr, 171 | hop_length=self.hop_length) 172 | 173 | n_total = int(time_to_frames(duration, sr=self.sr, 174 | hop_length=self.hop_length)) 175 | 176 | n_alloc = n_total 177 | if np.any(frames): 178 | n_alloc = max(n_total, 1 + int(frames.max())) 179 | 180 | target = np.empty((n_alloc, values.shape[1]), 181 | dtype=dtype) 182 | 183 | target.fill(fill_value(dtype)) 184 | values = values.astype(dtype) 185 | for column, event in zip(values, frames): 186 | target[event] += column 187 | 188 | return target[:n_total] 189 | 190 | def encode_intervals(self, duration, intervals, values, dtype=bool, 191 | multi=True, fill=None): 192 | '''Encode labeled intervals as a time-series matrix. 193 | 194 | Parameters 195 | ---------- 196 | duration : number 197 | The duration (in frames) of the track 198 | 199 | intervals : np.ndarray, shape=(n, 2) 200 | The list of intervals 201 | 202 | values : np.ndarray, shape=(n, m) 203 | The (encoded) values corresponding to each interval 204 | 205 | dtype : np.dtype 206 | The desired output type 207 | 208 | multi : bool 209 | If `True`, allow multiple labels per interval. 210 | 211 | fill : dtype (optional) 212 | Optional default fill value for missing data. 213 | 214 | If not provided, the default is inferred from `dtype`. 215 | 216 | Returns 217 | ------- 218 | target : np.ndarray, shape=(duration * sr / hop_length, m) 219 | The labeled interval encoding, sampled at the desired frame rate 220 | ''' 221 | if fill is None: 222 | fill = fill_value(dtype) 223 | 224 | frames = time_to_frames(intervals, sr=self.sr, 225 | hop_length=self.hop_length) 226 | 227 | n_total = int(time_to_frames(duration, sr=self.sr, 228 | hop_length=self.hop_length)) 229 | 230 | values = values.astype(dtype) 231 | 232 | n_alloc = n_total 233 | if np.any(frames): 234 | n_alloc = max(n_total, 1 + int(frames.max())) 235 | 236 | target = np.empty((n_alloc, values.shape[1]), 237 | 238 | dtype=dtype) 239 | 240 | target.fill(fill) 241 | 242 | for column, interval in zip(values, frames): 243 | if multi: 244 | target[interval[0]:interval[1]] += column 245 | else: 246 | target[interval[0]:interval[1]] = column 247 | 248 | return target[:n_total] 249 | 250 | def decode_events(self, encoded, transition=None, p_state=None, p_init=None): 251 | '''Decode labeled events into (time, value) pairs 252 | 253 | Real-valued inputs are thresholded at 0.5. 254 | 255 | Optionally, viterbi decoding can be applied to each event class. 256 | 257 | Parameters 258 | ---------- 259 | encoded : np.ndarray, shape=(n_frames, m) 260 | Frame-level annotation encodings as produced by ``encode_events``. 261 | 262 | transition : None or np.ndarray [shape=(2, 2) or (m, 2, 2)] 263 | Optional transition matrix for each event, used for Viterbi 264 | 265 | p_state : None or np.ndarray [shape=(m,)] 266 | Optional marginal probability for each event 267 | 268 | p_init : None or np.ndarray [shape=(m,)] 269 | Optional marginal probability for each event 270 | 271 | Returns 272 | ------- 273 | [(time, value)] : iterable of tuples 274 | where `time` is the event time and `value` is an 275 | np.ndarray, shape=(m,) of the encoded value at that time 276 | 277 | See Also 278 | -------- 279 | librosa.sequence.viterbi_binary 280 | ''' 281 | if np.isrealobj(encoded): 282 | if transition is None: 283 | encoded = (encoded >= 0.5) 284 | else: 285 | encoded = viterbi_binary(encoded.T, transition, 286 | p_state=p_state, 287 | p_init=p_init).T 288 | 289 | times = times_like(encoded, 290 | sr=self.sr, 291 | hop_length=self.hop_length, 292 | axis=0) 293 | 294 | return zip(times, encoded) 295 | 296 | def decode_intervals(self, encoded, duration=None, multi=True, sparse=False, 297 | transition=None, p_state=None, p_init=None): 298 | '''Decode labeled intervals into (start, end, value) triples 299 | 300 | Parameters 301 | ---------- 302 | encoded : np.ndarray, shape=(n_frames, m) 303 | Frame-level annotation encodings as produced by 304 | ``encode_intervals`` 305 | 306 | duration : None or float > 0 307 | The max duration of the annotation (in seconds) 308 | Must be greater than the length of encoded array. 309 | 310 | multi : bool 311 | If true, allow multiple labels per input frame. 312 | If false, take the most likely label per input frame. 313 | 314 | sparse : bool 315 | If true, values are returned as indices, not one-hot. 316 | If false, values are returned as one-hot encodings. 317 | 318 | Only applies when `multi=False`. 319 | 320 | transition : None or np.ndarray [shape=(m, m) or (2, 2) or (m, 2, 2)] 321 | Optional transition matrix for each interval, used for Viterbi 322 | decoding. If `multi=True`, then transition should be `(2, 2)` or 323 | `(m, 2, 2)`-shaped. If `multi=False`, then transition should be 324 | `(m, m)`-shaped. 325 | 326 | p_state : None or np.ndarray [shape=(m,)] 327 | Optional marginal probability for each label. 328 | 329 | p_init : None or np.ndarray [shape=(m,)] 330 | Optional marginal probability for each label. 331 | 332 | Returns 333 | ------- 334 | [(start, end, value)] : iterable of tuples 335 | where `start` and `end` are the interval boundaries (in seconds) 336 | and `value` is an np.ndarray, shape=(m,) of the encoded value 337 | for this interval. 338 | ''' 339 | if np.isrealobj(encoded): 340 | if multi: 341 | if transition is None: 342 | encoded = encoded >= 0.5 343 | else: 344 | encoded = viterbi_binary(encoded.T, transition, 345 | p_init=p_init, p_state=p_state).T 346 | elif sparse and encoded.shape[1] > 1: 347 | # map to argmax if it's densely encoded (logits) 348 | if transition is None: 349 | encoded = np.argmax(encoded, axis=1)[:, np.newaxis] 350 | else: 351 | encoded = viterbi_discriminative(encoded.T, transition, 352 | p_init=p_init, 353 | p_state=p_state)[:, np.newaxis] 354 | elif not sparse: 355 | # if dense and multi, map to one-hot encoding 356 | if transition is None: 357 | encoded = (encoded == np.max(encoded, axis=1, keepdims=True)) 358 | else: 359 | encoded_ = viterbi_discriminative(encoded.T, transition, 360 | p_init=p_init, 361 | p_state=p_state) 362 | # Map to one-hot encoding 363 | encoded = np.zeros(encoded.shape, dtype=bool) 364 | encoded[np.arange(len(encoded_)), encoded_] = True 365 | 366 | if duration is None: 367 | # 1+ is fair here, because encode_intervals already pads 368 | duration = 1 + encoded.shape[0] 369 | else: 370 | duration = 1 + time_to_frames(duration, 371 | sr=self.sr, 372 | hop_length=self.hop_length) 373 | 374 | # [0, duration] inclusive 375 | times = times_like(duration + 1, 376 | sr=self.sr, hop_length=self.hop_length) 377 | 378 | # Find the change-points of the rows 379 | if sparse: 380 | idx = np.where(encoded[1:] != encoded[:-1])[0] 381 | else: 382 | idx = np.where(np.max(encoded[1:] != encoded[:-1], axis=-1))[0] 383 | 384 | idx = np.unique(np.append(idx, encoded.shape[0])) 385 | delta = np.diff(np.append(-1, idx)) 386 | 387 | # Starting positions can be integrated from changes 388 | position = np.cumsum(np.append(0, delta)) 389 | 390 | return [(times[p], times[p + d], encoded[p]) 391 | for (p, d) in zip(position, delta)] 392 | -------------------------------------------------------------------------------- /pumpp/task/beat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Instantaneous event coding''' 4 | 5 | import numpy as np 6 | import jams 7 | 8 | from librosa import time_to_frames 9 | from librosa.sequence import transition_loop, transition_cycle 10 | from mir_eval.util import boundaries_to_intervals, adjust_intervals 11 | 12 | from .base import BaseTaskTransformer 13 | from ..exceptions import ParameterError 14 | from ..labels import LabelBinarizer, LabelEncoder 15 | 16 | __all__ = ['BeatTransformer', 'BeatPositionTransformer'] 17 | 18 | 19 | class BeatTransformer(BaseTaskTransformer): 20 | '''Task transformation for beat tracking 21 | 22 | Attributes 23 | ---------- 24 | name : str 25 | The name of this transformer 26 | 27 | sr : number > 0 28 | The audio sampling rate 29 | 30 | hop_length : int > 0 31 | The hop length for annotation frames 32 | 33 | p_self_beat : None, float in (0, 1), or np.ndarray [shape=(2,)] 34 | Optional self-loop probability(ies), used for Viterbi decoding 35 | 36 | p_state_beat : None or float in (0, 1) 37 | Optional marginal probability for beat state 38 | 39 | p_init_beat : None or float in (0, 1) 40 | Optional initial probability for beat state 41 | 42 | p_self_down : None, float in (0, 1), or np.ndarray [shape=(2,)] 43 | Optional self-loop probability(ies), used for Viterbi decoding 44 | 45 | p_state_down : None or float in (0, 1) 46 | Optional marginal probability for downbeat state 47 | 48 | p_init_down : None or float in (0, 1) 49 | Optional initial probability for downbeat state 50 | 51 | ''' 52 | def __init__(self, name='beat', sr=22050, hop_length=512, 53 | p_self_beat=None, p_init_beat=None, p_state_beat=None, 54 | p_self_down=None, p_init_down=None, p_state_down=None): 55 | 56 | super(BeatTransformer, self).__init__(name=name, 57 | namespace='beat', 58 | sr=sr, hop_length=hop_length) 59 | 60 | self.set_transition_beat(p_self_beat) 61 | 62 | if p_init_beat is not None: 63 | if not np.isscalar(p_init_beat): 64 | raise ParameterError('Invalid p_init_beat={}'.format(p_init_beat)) 65 | 66 | self.beat_p_init = p_init_beat 67 | 68 | if p_state_beat is not None: 69 | if not np.isscalar(p_state_beat): 70 | raise ParameterError('Invalid p_state_beat={}'.format(p_state_beat)) 71 | 72 | self.beat_p_state = p_state_beat 73 | 74 | self.set_transition_down(p_self_beat) 75 | 76 | if p_init_down is not None: 77 | if not np.isscalar(p_init_down): 78 | raise ParameterError('Invalid p_init_down={}'.format(p_init_down)) 79 | 80 | self.down_p_init = p_init_down 81 | 82 | if p_state_down is not None: 83 | if not np.isscalar(p_state_down): 84 | raise ParameterError('Invalid p_state_down={}'.format(p_state_down)) 85 | 86 | self.down_p_state = p_state_down 87 | 88 | self.register('beat', [None], bool) 89 | self.register('downbeat', [None], bool) 90 | self.register('mask_downbeat', [1], bool) 91 | 92 | def set_transition_beat(self, p_self): 93 | '''Set the beat-tracking transition matrix according to 94 | self-loop probabilities. 95 | 96 | Parameters 97 | ---------- 98 | p_self : None, float in (0, 1), or np.ndarray [shape=(2,)] 99 | Optional self-loop probability(ies), used for Viterbi decoding 100 | ''' 101 | if p_self is None: 102 | self.beat_transition = None 103 | else: 104 | self.beat_transition = transition_loop(2, p_self) 105 | 106 | def set_transition_down(self, p_self): 107 | '''Set the downbeat-tracking transition matrix according to 108 | self-loop probabilities. 109 | 110 | Parameters 111 | ---------- 112 | p_self : None, float in (0, 1), or np.ndarray [shape=(2,)] 113 | Optional self-loop probability(ies), used for Viterbi decoding 114 | ''' 115 | if p_self is None: 116 | self.down_transition = None 117 | else: 118 | self.down_transition = transition_loop(2, p_self) 119 | 120 | def transform_annotation(self, ann, duration): 121 | '''Apply the beat transformer 122 | 123 | Parameters 124 | ---------- 125 | ann : jams.Annotation 126 | The input annotation 127 | 128 | duration : number > 0 129 | The duration of the audio 130 | 131 | Returns 132 | ------- 133 | data : dict 134 | data['beat'] : np.ndarray, shape=(n, 1) 135 | Binary indicator of beat/non-beat 136 | 137 | data['downbeat'] : np.ndarray, shape=(n, 1) 138 | Binary indicator of downbeat/non-downbeat 139 | 140 | mask_downbeat : bool 141 | True if downbeat annotations are present 142 | ''' 143 | 144 | mask_downbeat = False 145 | 146 | intervals, values = ann.to_interval_values() 147 | values = np.asarray(values) 148 | 149 | beat_events = intervals[:, 0] 150 | beat_labels = np.ones((len(beat_events), 1)) 151 | 152 | idx = (values == 1) 153 | if np.any(idx): 154 | downbeat_events = beat_events[idx] 155 | downbeat_labels = np.ones((len(downbeat_events), 1)) 156 | mask_downbeat = True 157 | else: 158 | downbeat_events = np.zeros(0) 159 | downbeat_labels = np.zeros((0, 1)) 160 | 161 | target_beat = self.encode_events(duration, 162 | beat_events, 163 | beat_labels) 164 | 165 | target_downbeat = self.encode_events(duration, 166 | downbeat_events, 167 | downbeat_labels) 168 | 169 | return {'beat': target_beat, 170 | 'downbeat': target_downbeat, 171 | 'mask_downbeat': mask_downbeat} 172 | 173 | def inverse(self, encoded, downbeat=None, duration=None): 174 | '''Inverse transformation for beats and optional downbeats''' 175 | 176 | ann = jams.Annotation(namespace=self.namespace, duration=duration) 177 | 178 | beat_times = np.asarray([t for t, _ in self.decode_events(encoded, 179 | transition=self.beat_transition, 180 | p_init=self.beat_p_init, 181 | p_state=self.beat_p_state) if _]) 182 | beat_frames = time_to_frames(beat_times, 183 | sr=self.sr, 184 | hop_length=self.hop_length) 185 | 186 | if downbeat is not None: 187 | downbeat_times = set([t for t, _ in self.decode_events(downbeat, 188 | transition=self.down_transition, 189 | p_init=self.down_p_init, 190 | p_state=self.down_p_state) if _]) 191 | pickup_beats = len([t for t in beat_times 192 | if t < min(downbeat_times)]) 193 | else: 194 | downbeat_times = set() 195 | pickup_beats = 0 196 | 197 | value = - pickup_beats - 1 198 | for beat_t, beat_f in zip(beat_times, beat_frames): 199 | if beat_t in downbeat_times: 200 | value = 1 201 | else: 202 | value += 1 203 | confidence = encoded[beat_f] 204 | ann.append(time=beat_t, 205 | duration=0, 206 | value=value, 207 | confidence=confidence) 208 | 209 | return ann 210 | 211 | 212 | class BeatPositionTransformer(BaseTaskTransformer): 213 | '''Encode beat- and downbeat-annotations as labeled intervals. 214 | 215 | This transformer assumes that the `value` field of a beat annotation 216 | encodes its metrical position (1, 2, 3, 4, ...). 217 | 218 | A `value` of 0 indicates that the beat does not belong to a bar, 219 | and should be used to indicate pickup beats. 220 | 221 | Beat position strings are coded as SUBDIVISION/POSITION 222 | 223 | For example, in 4/4 time, the 2 beat would be coded as "04/02". 224 | ''' 225 | def __init__(self, name, max_divisions=12, 226 | sr=22050, hop_length=512, sparse=False): 227 | 228 | super(BeatPositionTransformer, self).__init__(name=name, 229 | namespace='beat', 230 | sr=sr, 231 | hop_length=hop_length) 232 | 233 | # Make the vocab set 234 | if not isinstance(max_divisions, int) or max_divisions < 1: 235 | raise ParameterError('Invalid max_divisions={}'.format(max_divisions)) 236 | 237 | self.max_divisions = max_divisions 238 | labels = self.vocabulary() 239 | self.sparse = sparse 240 | 241 | if self.sparse: 242 | self.encoder = LabelEncoder() 243 | else: 244 | self.encoder = LabelBinarizer() 245 | self.encoder.fit(labels) 246 | self._classes = set(self.encoder.classes_) 247 | 248 | # transitions should use transition_loop here 249 | # construct block-wise for each metrical length 250 | # initial-state distributions should be over X 251 | # X -> **/01 s 252 | 253 | if self.sparse: 254 | self.register('position', [None, 1], int) 255 | else: 256 | self.register('position', [None, len(self._classes)], bool) 257 | 258 | def vocabulary(self): 259 | states = ['X'] 260 | for d in range(1, self.max_divisions + 1): 261 | for n in range(1, d + 1): 262 | states.append('{:02d}/{:02d}'.format(d, n)) 263 | return states 264 | 265 | def transform_annotation(self, ann, duration): 266 | '''Transform an annotation to the beat-position encoding 267 | 268 | Parameters 269 | ---------- 270 | ann : jams.Annotation 271 | The annotation to convert 272 | 273 | duration : number > 0 274 | The duration of the track 275 | 276 | Returns 277 | ------- 278 | data : dict 279 | data['position'] : np.ndarray, shape=(n, n_labels) or (n, 1) 280 | A time-varying label encoding of beat position 281 | ''' 282 | 283 | # 1. get all the events 284 | # 2. find all the downbeats 285 | # 3. map each downbeat to a subdivision counter 286 | # number of beats until the next downbeat 287 | # 4. pad out events to intervals 288 | # 5. encode each beat interval to its position 289 | 290 | boundaries, values = ann.to_interval_values() 291 | # Convert to intervals and span the duration 292 | # padding at the end of track does not propagate the right label 293 | # this is an artifact of inferring end-of-track from boundaries though 294 | boundaries = list(boundaries[:, 0]) 295 | if boundaries and boundaries[-1] < duration: 296 | boundaries.append(duration) 297 | intervals = boundaries_to_intervals(boundaries) 298 | intervals, values = adjust_intervals(intervals, values, 299 | t_min=0, 300 | t_max=duration, 301 | start_label=0, 302 | end_label=0) 303 | 304 | values = np.asarray(values, dtype=int) 305 | downbeats = np.flatnonzero(values == 1) 306 | 307 | position = [] 308 | for i, v in enumerate(values): 309 | # If the value is a 0, mark it as X and move on 310 | if v == 0: 311 | position.extend(self.encoder.transform(['X'])) 312 | continue 313 | 314 | # Otherwise, let's try to find the surrounding downbeats 315 | prev_idx = np.searchsorted(downbeats, i, side='right') - 1 316 | next_idx = 1 + prev_idx 317 | 318 | if prev_idx >= 0 and next_idx < len(downbeats): 319 | # In this case, the subdivision is well-defined 320 | subdivision = downbeats[next_idx] - downbeats[prev_idx] 321 | elif prev_idx < 0 and next_idx < len(downbeats): 322 | subdivision = np.max(values[:downbeats[0]+1]) 323 | elif next_idx >= len(downbeats): 324 | subdivision = len(values) - downbeats[prev_idx] 325 | 326 | if subdivision > self.max_divisions or subdivision < 1: 327 | position.extend(self.encoder.transform(['X'])) 328 | else: 329 | position.extend(self.encoder.transform(['{:02d}/{:02d}'.format(subdivision, v)])) 330 | 331 | dtype = self.fields[self.scope('position')].dtype 332 | 333 | position = np.asarray(position) 334 | if self.sparse: 335 | position = position[:, np.newaxis] 336 | 337 | target = self.encode_intervals(duration, intervals, position, 338 | multi=False, dtype=dtype) 339 | return {'position': target} 340 | 341 | def inverse(self, encoded, duration=None): 342 | '''Inverse transformation''' 343 | 344 | raise NotImplementedError 345 | -------------------------------------------------------------------------------- /pumpp/task/chord.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Chord recognition task transformer''' 4 | 5 | import re 6 | from itertools import product 7 | 8 | import numpy as np 9 | import mir_eval 10 | import jams 11 | 12 | from librosa import time_to_frames 13 | from librosa.sequence import transition_loop 14 | 15 | from .base import BaseTaskTransformer 16 | from ..exceptions import ParameterError 17 | from ..labels import LabelBinarizer, LabelEncoder, MultiLabelBinarizer 18 | 19 | __all__ = ['ChordTransformer', 'SimpleChordTransformer', 'ChordTagTransformer'] 20 | 21 | 22 | def _pad_nochord(target, axis=-1): 23 | '''Pad a chord annotation with no-chord flags. 24 | 25 | Parameters 26 | ---------- 27 | target : np.ndarray 28 | the input data 29 | 30 | axis : int 31 | the axis along which to pad 32 | 33 | Returns 34 | ------- 35 | target_pad 36 | `target` expanded by 1 along the specified `axis`. 37 | The expanded dimension will be 0 when `target` is non-zero 38 | before padding, and 1 otherwise. 39 | ''' 40 | ncmask = ~np.max(target, axis=axis, keepdims=True) 41 | 42 | return np.concatenate([target, ncmask], axis=axis) 43 | 44 | 45 | class ChordTransformer(BaseTaskTransformer): 46 | '''Chord annotation transformers. 47 | 48 | This transformer uses a (pitch, root, bass) decomposition of 49 | chord annotations. 50 | 51 | Attributes 52 | ---------- 53 | name : str 54 | The name of the chord transformer 55 | 56 | sr : number > 0 57 | The sampling rate of audio 58 | 59 | hop_length : int > 0 60 | The number of samples between each annotation frame 61 | 62 | sparse : bool 63 | If True, root and bass values are sparsely encoded as integers in [0, 12]. 64 | If False, root and bass values are densely encoded as 13-dimensional booleans. 65 | 66 | See Also 67 | -------- 68 | SimpleChordTransformer 69 | ''' 70 | def __init__(self, name='chord', sr=22050, hop_length=512, sparse=False): 71 | '''Initialize a chord task transformer''' 72 | 73 | super(ChordTransformer, self).__init__(name=name, 74 | namespace='chord', 75 | sr=sr, hop_length=hop_length) 76 | 77 | self.encoder = MultiLabelBinarizer() 78 | self.encoder.fit([list(range(12))]) 79 | self._classes = set(self.encoder.classes_) 80 | self.sparse = sparse 81 | 82 | self.register('pitch', [None, 12], bool) 83 | if self.sparse: 84 | self.register('root', [None, 1], int) 85 | self.register('bass', [None, 1], int) 86 | else: 87 | self.register('root', [None, 13], bool) 88 | self.register('bass', [None, 13], bool) 89 | 90 | def empty(self, duration): 91 | '''Empty chord annotations 92 | 93 | Parameters 94 | ---------- 95 | duration : number 96 | The length (in seconds) of the empty annotation 97 | 98 | Returns 99 | ------- 100 | ann : jams.Annotation 101 | A chord annotation consisting of a single `no-chord` observation. 102 | ''' 103 | ann = super(ChordTransformer, self).empty(duration) 104 | 105 | ann.append(time=0, 106 | duration=duration, 107 | value='N', confidence=0) 108 | 109 | return ann 110 | 111 | def transform_annotation(self, ann, duration): 112 | '''Apply the chord transformation. 113 | 114 | Parameters 115 | ---------- 116 | ann : jams.Annotation 117 | The chord annotation 118 | 119 | duration : number > 0 120 | The target duration 121 | 122 | Returns 123 | ------- 124 | data : dict 125 | data['pitch'] : np.ndarray, shape=(n, 12) 126 | data['root'] : np.ndarray, shape=(n, 13) or (n, 1) 127 | data['bass'] : np.ndarray, shape=(n, 13) or (n, 1) 128 | 129 | `pitch` is a binary matrix indicating pitch class 130 | activation at each frame. 131 | 132 | `root` is a one-hot matrix indicating the chord 133 | root's pitch class at each frame. 134 | 135 | `bass` is a one-hot matrix indicating the chord 136 | bass (lowest note) pitch class at each frame. 137 | 138 | If sparsely encoded, `root` and `bass` are integers 139 | in the range [0, 12] where 12 indicates no chord. 140 | 141 | If densely encoded, `root` and `bass` have an extra 142 | final dimension which is active when there is no chord 143 | sounding. 144 | ''' 145 | # Construct a blank annotation with mask = 0 146 | intervals, chords = ann.to_interval_values() 147 | 148 | # Get the dtype for root/bass 149 | if self.sparse: 150 | dtype = int 151 | else: 152 | dtype = bool 153 | 154 | # If we don't have any labeled intervals, fill in a no-chord 155 | if not chords: 156 | intervals = np.asarray([[0, duration]]) 157 | chords = ['N'] 158 | 159 | # Suppress all intervals not in the encoder 160 | pitches = [] 161 | roots = [] 162 | basses = [] 163 | 164 | # default value when data is missing 165 | if self.sparse: 166 | fill = 12 167 | else: 168 | fill = False 169 | 170 | for chord in chords: 171 | # Encode the pitches 172 | root, semi, bass = mir_eval.chord.encode(chord) 173 | pitches.append(np.roll(semi, root)) 174 | 175 | if self.sparse: 176 | if root in self._classes: 177 | roots.append([root]) 178 | basses.append([(root + bass) % 12]) 179 | else: 180 | roots.append([fill]) 181 | basses.append([fill]) 182 | else: 183 | if root in self._classes: 184 | roots.extend(self.encoder.transform([[root]])) 185 | basses.extend(self.encoder.transform([[(root + bass) % 12]])) 186 | else: 187 | roots.extend(self.encoder.transform([[]])) 188 | basses.extend(self.encoder.transform([[]])) 189 | 190 | pitches = np.asarray(pitches, dtype=bool) 191 | roots = np.asarray(roots, dtype=dtype) 192 | basses = np.asarray(basses, dtype=dtype) 193 | 194 | target_pitch = self.encode_intervals(duration, intervals, pitches) 195 | 196 | target_root = self.encode_intervals(duration, intervals, roots, 197 | multi=False, 198 | dtype=dtype, 199 | fill=fill) 200 | target_bass = self.encode_intervals(duration, intervals, basses, 201 | multi=False, 202 | dtype=dtype, 203 | fill=fill) 204 | 205 | if not self.sparse: 206 | target_root = _pad_nochord(target_root) 207 | target_bass = _pad_nochord(target_bass) 208 | 209 | return {'pitch': target_pitch, 210 | 'root': target_root, 211 | 'bass': target_bass} 212 | 213 | def inverse(self, pitch, root, bass, duration=None): 214 | 215 | raise NotImplementedError('Chord cannot be inverted') 216 | 217 | 218 | class SimpleChordTransformer(ChordTransformer): 219 | '''Simplified chord transformations. Only pitch class activity is encoded. 220 | 221 | Attributes 222 | ---------- 223 | name : str 224 | name of the transformer 225 | 226 | sr : number > 0 227 | Sampling rate of audio 228 | 229 | hop_length : int > 0 230 | Hop length for annotation frames 231 | 232 | See Also 233 | -------- 234 | ChordTransformer 235 | ''' 236 | def __init__(self, name='chord', sr=22050, hop_length=512): 237 | super(SimpleChordTransformer, self).__init__(name=name, 238 | sr=sr, 239 | hop_length=hop_length) 240 | # Remove the extraneous fields 241 | self.pop('root') 242 | self.pop('bass') 243 | 244 | def transform_annotation(self, ann, duration): 245 | '''Apply the chord transformation. 246 | 247 | Parameters 248 | ---------- 249 | ann : jams.Annotation 250 | The chord annotation 251 | 252 | duration : number > 0 253 | The target duration 254 | 255 | Returns 256 | ------- 257 | data : dict 258 | data['pitch'] : np.ndarray, shape=(n, 12) 259 | 260 | `pitch` is a binary matrix indicating pitch class 261 | activation at each frame. 262 | ''' 263 | data = super(SimpleChordTransformer, 264 | self).transform_annotation(ann, duration) 265 | 266 | data.pop('root', None) 267 | data.pop('bass', None) 268 | return data 269 | 270 | def inverse(self, *args, **kwargs): 271 | raise NotImplementedError('SimpleChord cannot be inverted') 272 | 273 | 274 | '''A list of normalized pitch class names''' 275 | PITCHES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] 276 | 277 | 278 | '''A mapping of chord quality encodings to their names''' 279 | QUALITIES = { 280 | 0b000100000000: 'min', 281 | 0b000010000000: 'maj', 282 | 0b000100010000: 'min', 283 | 0b000010010000: 'maj', 284 | 0b000100100000: 'dim', 285 | 0b000010001000: 'aug', 286 | 0b000100010010: 'min7', 287 | 0b000010010001: 'maj7', 288 | 0b000010010010: '7', 289 | 0b000100100100: 'dim7', 290 | 0b000100100010: 'hdim7', 291 | 0b000100010001: 'minmaj7', 292 | 0b000100010100: 'min6', 293 | 0b000010010100: 'maj6', 294 | 0b001000010000: 'sus2', 295 | 0b000001010000: 'sus4' 296 | } 297 | 298 | 299 | class ChordTagTransformer(BaseTaskTransformer): 300 | '''Chord transformer that uses a tag-space encoding for chord labels. 301 | 302 | Attributes 303 | ---------- 304 | name : str 305 | name of the transformer 306 | 307 | vocab : str 308 | 309 | A string of chord quality indicators to include: 310 | 311 | - '3': maj/min 312 | - '5': '3' + aug/dim 313 | - '6': '3' + '5' + maj6/min6 314 | - '7': '3' + '5' + '6' + 7/min7/maj7/dim7/hdim7/minmaj7 315 | - 's': sus2/sus4 316 | 317 | Note: 5 requires 3, 6 requires 5, 7 requires 6. 318 | 319 | sr : number > 0 320 | Sampling rate of audio 321 | 322 | hop_length : int > 0 323 | Hop length for annotation frames 324 | 325 | sparse : Bool 326 | Whether or not to use sparse encoding for the labels 327 | 328 | p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] 329 | Optional self-loop probability(ies), used for Viterbi decoding 330 | 331 | p_state : None or np.ndarray [shape=(n_labels,)] 332 | Optional marginal probability for each chord class 333 | 334 | p_init : None or np.ndarray [shape=(n_labels,)] 335 | Optional initial probability for each chord class 336 | 337 | Notes 338 | ----- 339 | The number of chord classes (`n_labels`) depends on the vocabulary: 340 | 341 | - '3' => 2 + 12 * 2 = 26 342 | - '35' => 2 + 12 * 4 = 50 343 | - '356' => 2 + 12 * 6 = 74 344 | - '3567' => 2 + 12 * 12 = 146 345 | - '3567s' => 2 + 12 * 14 = 170 346 | 347 | See Also 348 | -------- 349 | ChordTransformer 350 | SimpleChordTransformer 351 | ''' 352 | def __init__(self, name='chord', vocab='3567s', 353 | sr=22050, hop_length=512, sparse=False, 354 | p_self=None, p_init=None, p_state=None): 355 | 356 | super(ChordTagTransformer, self).__init__(name=name, 357 | namespace='chord', 358 | sr=sr, 359 | hop_length=hop_length) 360 | 361 | # Stringify and lowercase 362 | if set(vocab) - set('3567s'): 363 | raise ParameterError('Invalid vocabulary string: {}'.format(vocab)) 364 | 365 | if '5' in vocab and '3' not in vocab: 366 | raise ParameterError('Invalid vocabulary string: {}'.format(vocab)) 367 | 368 | if '6' in vocab and '5' not in vocab: 369 | raise ParameterError('Invalid vocabulary string: {}'.format(vocab)) 370 | 371 | if '7' in vocab and '6' not in vocab: 372 | raise ParameterError('Invalid vocabulary string: {}'.format(vocab)) 373 | 374 | self.vocab = vocab.lower() 375 | labels = self.vocabulary() 376 | self.sparse = sparse 377 | 378 | if self.sparse: 379 | self.encoder = LabelEncoder() 380 | else: 381 | self.encoder = LabelBinarizer() 382 | self.encoder.fit(labels) 383 | self._classes = set(self.encoder.classes_) 384 | 385 | self.set_transition(p_self) 386 | 387 | if p_init is not None: 388 | if len(p_init) != len(self._classes): 389 | raise ParameterError('Invalid p_init.shape={} for vocabulary {} size={}'.format(p_init.shape, vocab, len(self._classes))) 390 | 391 | self.p_init = p_init 392 | 393 | if p_state is not None: 394 | if len(p_state) != len(self._classes): 395 | raise ParameterError('Invalid p_state.shape={} for vocabulary {} size={}'.format(p_state.shape, vocab, len(self._classes))) 396 | 397 | self.p_state = p_state 398 | 399 | # Construct the quality mask for chord encoding 400 | self.mask_ = 0b000000000000 401 | if '3' in self.vocab: 402 | self.mask_ |= 0b000110000000 403 | if '5' in self.vocab: 404 | self.mask_ |= 0b000110111000 405 | if '6' in self.vocab: 406 | self.mask_ |= 0b000110010100 407 | if '7' in self.vocab: 408 | self.mask_ |= 0b000110110111 409 | if 's' in self.vocab: 410 | self.mask_ |= 0b001001010000 411 | 412 | if self.sparse: 413 | self.register('chord', [None, 1], int) 414 | else: 415 | self.register('chord', [None, len(self._classes)], bool) 416 | 417 | def set_transition(self, p_self): 418 | '''Set the transition matrix according to self-loop probabilities. 419 | 420 | Parameters 421 | ---------- 422 | p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] 423 | Optional self-loop probability(ies), used for Viterbi decoding 424 | ''' 425 | if p_self is None: 426 | self.transition = None 427 | else: 428 | self.transition = transition_loop(len(self._classes), p_self) 429 | 430 | def empty(self, duration): 431 | '''Empty chord annotations 432 | 433 | Parameters 434 | ---------- 435 | duration : number 436 | The length (in seconds) of the empty annotation 437 | 438 | Returns 439 | ------- 440 | ann : jams.Annotation 441 | A chord annotation consisting of a single `no-chord` observation. 442 | ''' 443 | ann = super(ChordTagTransformer, self).empty(duration) 444 | 445 | ann.append(time=0, 446 | duration=duration, 447 | value='X', confidence=0) 448 | 449 | return ann 450 | 451 | def vocabulary(self): 452 | qualities = [] 453 | 454 | if '3' in self.vocab or '5' in self.vocab: 455 | qualities.extend(['min', 'maj']) 456 | 457 | if '5' in self.vocab: 458 | qualities.extend(['dim', 'aug']) 459 | 460 | if '6' in self.vocab: 461 | qualities.extend(['min6', 'maj6']) 462 | 463 | if '7' in self.vocab: 464 | qualities.extend(['min7', 'maj7', '7', 'dim7', 'hdim7', 'minmaj7']) 465 | 466 | if 's' in self.vocab: 467 | qualities.extend(['sus2', 'sus4']) 468 | 469 | labels = ['N', 'X'] 470 | 471 | for chord in product(PITCHES, qualities): 472 | labels.append('{}:{}'.format(*chord)) 473 | 474 | return labels 475 | 476 | def simplify(self, chord): 477 | '''Simplify a chord string down to the vocabulary space''' 478 | # Drop inversions 479 | chord = re.sub(r'/.*$', r'', chord) 480 | # Drop any additional or suppressed tones 481 | chord = re.sub(r'\(.*?\)', r'', chord) 482 | # Drop dangling : indicators 483 | chord = re.sub(r':$', r'', chord) 484 | 485 | # Encode the chord 486 | root, pitches, _ = mir_eval.chord.encode(chord) 487 | 488 | # Build the query 489 | # To map the binary vector pitches down to bit masked integer, 490 | # we just dot against powers of 2 491 | P = 2**np.arange(12, dtype=int) 492 | query = self.mask_ & pitches[::-1].dot(P) 493 | 494 | if root < 0 and chord[0].upper() == 'N': 495 | return 'N' 496 | if query not in QUALITIES: 497 | return 'X' 498 | 499 | return '{}:{}'.format(PITCHES[root], QUALITIES[query]) 500 | 501 | def transform_annotation(self, ann, duration): 502 | '''Transform an annotation to chord-tag encoding 503 | 504 | Parameters 505 | ---------- 506 | ann : jams.Annotation 507 | The annotation to convert 508 | 509 | duration : number > 0 510 | The duration of the track 511 | 512 | Returns 513 | ------- 514 | data : dict 515 | data['chord'] : np.ndarray, shape=(n, n_labels) 516 | A time-varying binary encoding of the chords 517 | ''' 518 | 519 | intervals, values = ann.to_interval_values() 520 | 521 | chords = [] 522 | for v in values: 523 | chords.extend(self.encoder.transform([self.simplify(v)])) 524 | 525 | dtype = self.fields[self.scope('chord')].dtype 526 | 527 | chords = np.asarray(chords) 528 | 529 | if self.sparse: 530 | chords = chords[:, np.newaxis] 531 | 532 | target = self.encode_intervals(duration, intervals, chords, 533 | multi=False, dtype=dtype) 534 | 535 | return {'chord': target} 536 | 537 | def inverse(self, encoded, duration=None): 538 | '''Inverse transformation''' 539 | 540 | ann = jams.Annotation(self.namespace, duration=duration) 541 | 542 | for start, end, value in self.decode_intervals(encoded, 543 | duration=duration, 544 | multi=False, 545 | sparse=self.sparse, 546 | transition=self.transition, 547 | p_init=self.p_init, 548 | p_state=self.p_state): 549 | 550 | # Map start:end to frames 551 | f_start, f_end = time_to_frames([start, end], 552 | sr=self.sr, 553 | hop_length=self.hop_length) 554 | 555 | # Reverse the index 556 | if self.sparse: 557 | # Compute the confidence 558 | if encoded.shape[1] == 1: 559 | # This case is for full-confidence prediction (just the index) 560 | confidence = 1. 561 | else: 562 | confidence = np.mean(encoded[f_start:f_end+1, value]) 563 | 564 | value_dec = self.encoder.inverse_transform(value) 565 | else: 566 | confidence = np.mean(encoded[f_start:f_end+1, np.argmax(value)]) 567 | value_dec = self.encoder.inverse_transform(np.atleast_2d(value)) 568 | 569 | for vd in value_dec: 570 | ann.append(time=start, 571 | duration=end-start, 572 | value=vd, 573 | confidence=float(confidence)) 574 | 575 | return ann 576 | -------------------------------------------------------------------------------- /pumpp/task/key.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Key recognition task transformer''' 4 | 5 | from itertools import product 6 | import logging 7 | 8 | import numpy as np 9 | import mir_eval 10 | import jams 11 | 12 | from librosa import note_to_midi, midi_to_note, time_to_frames, key_to_degrees 13 | from librosa.sequence import transition_loop 14 | 15 | from .base import BaseTaskTransformer 16 | from ..exceptions import ParameterError 17 | from ..labels import LabelBinarizer, LabelEncoder, MultiLabelBinarizer 18 | 19 | __all__ = ['KeyTransformer', 'KeyTagTransformer'] 20 | 21 | C_MAJOR_PITCHES = key_to_degrees('C:maj') 22 | MODES = ['ionian', 'dorian', 'phrygian', 'lydian', 'mixolydian', 'aeolian', 'locrian'] 23 | QUALITY = {'major' : 0, 'minor' : -3} 24 | 25 | 26 | def _encode_key_str(key_str, sparse): 27 | '''Helper function to go from jams `key_mode` annotation value strings to 12-D 28 | numpy membership vec, representing the pitch profile. 29 | 30 | Parameters 31 | ---------- 32 | key_str : str 33 | String in the style of 'key_mode' jams annotation values. 34 | sparse : bool 35 | Whether or not to use sparse encoding for the tonic field. 36 | 37 | Returns 38 | ------- 39 | (pitch_profile, tonic) : tuple 40 | pitch_profile : np.ndarray, shape = (1, 12), dtype = bool 41 | a 12-D row vector that's encodes the membership of each pitch class for 42 | a given `key_str`. 43 | tonic : int or np.ndarray, shape = (1, 13), dtype = bool 44 | a int in the range [0, 12] to indicate the pitch class of the tonic. 12 45 | being atonal. The type will depend on the `sparse` parameter 46 | ''' 47 | 48 | key_str_split = key_str.split(':') 49 | 50 | # Look at the Tonic first 51 | if key_str_split[0] == 'N': 52 | tonic = 12 53 | else: 54 | tonic = note_to_midi(key_str_split[0]) % 12 55 | 56 | # Now look at quality/mode and build pitch_profile 57 | # First construct the profile in C for a given mode/quality 58 | c_major_profile = np.zeros(12) 59 | for pc in C_MAJOR_PITCHES: 60 | c_major_profile[pc] = 1 61 | 62 | # When there is no tonal center, pitch profile is all zeros. 63 | if tonic == 12: 64 | pitch_profile = np.zeros(12, dtype=bool) 65 | else: 66 | # When there is no quality, major assumed. 67 | if len(key_str_split) == 1: 68 | quality = 'major' 69 | else: 70 | quality = key_str_split[1] 71 | 72 | if quality in MODES: 73 | mode_transpose_int = -1 * C_MAJOR_PITCHES[MODES.index(quality)] 74 | elif quality in QUALITY.keys(): 75 | mode_transpose_int = -1 * QUALITY[quality] 76 | else: 77 | logging.info( 78 | '{} is not a recognized quality. Using major instead.'.format(quality) 79 | ) 80 | mode_transpose_int = 0 81 | 82 | # roll the profile to fit different modes. 83 | mode_profile_in_c = np.roll(c_major_profile, mode_transpose_int) 84 | # Add the leading tone to the minor profiles 85 | if quality == 'minor': 86 | mode_profile_in_c[11] = 1 87 | 88 | # Now roll the profile again to get the right tonic. 89 | pitch_profile = np.roll(mode_profile_in_c, tonic) 90 | 91 | if not sparse: 92 | tonic_vec = np.zeros(13, dtype=bool) 93 | tonic_vec[tonic] = 1 94 | tonic = tonic_vec 95 | 96 | return (pitch_profile, tonic) 97 | 98 | 99 | class KeyTransformer(BaseTaskTransformer): 100 | '''Key annotation transformer. 101 | 102 | This transformer uses a (pitch_profile, tonic) decomposition of key_mode 103 | annotation, where the mode is reflected in the 12-D pitch_profile vector. 104 | 105 | Attributes 106 | ---------- 107 | name : str 108 | The name of the key trnsformer 109 | 110 | sr : number > 0 111 | The sampling rate of audio 112 | 113 | hop_length : int > 0 114 | The number of samples between each annotation frame 115 | 116 | sparse : bool 117 | If True, tonic value is sparsely encoded as integers in [0, 12]. 118 | If False, tonic value is densely encoded as 13-dimensional booleans. 119 | ''' 120 | def __init__(self, name='key', sr=22050, hop_length=512, sparse=False): 121 | '''Initialize a key task transformer''' 122 | 123 | super(KeyTransformer, self).__init__(name=name, 124 | namespace='key_mode', 125 | sr=sr, hop_length=hop_length) 126 | self.sparse = sparse 127 | 128 | self.register('pitch_profile', [None, 12], bool) 129 | if self.sparse: 130 | self.register('tonic', [None, 1], int) 131 | else: 132 | self.register('tonic', [None, 13], bool) 133 | 134 | 135 | def empty(self, duration): 136 | '''Empty key annotation 137 | 138 | Parameters 139 | ---------- 140 | duration : number 141 | The length (in seconds) of the empty annotation 142 | 143 | Returns 144 | ------- 145 | ann : jams.Annotation 146 | A key_mode annotation consisting of a single `no-key` observation. 147 | ''' 148 | ann = super(KeyTransformer, self).empty(duration) 149 | 150 | ann.append(time=0, 151 | duration=duration, 152 | value='N', confidence=0) 153 | 154 | return ann 155 | 156 | def transform_annotation(self, ann, duration): 157 | '''Apply the key transformation. 158 | 159 | Parameters 160 | ---------- 161 | ann : jams.Annotation 162 | The key_mode annotation 163 | 164 | duration : number > 0 165 | The target duration 166 | 167 | Returns 168 | ------- 169 | data : dict 170 | data['pitch_profile'] : np.ndarray, shape=(n, 12) 171 | data['tonic'] : np.ndarray, shape=(n, 13) or (n, 1) 172 | 173 | `pitch_profile` is a binary matrix indicating pitch class 174 | activation at each frame. 175 | 176 | `tonic` is a one-hot matrix indicating the tonal center's 177 | pitch class at each frame. 178 | 179 | If sparsely encoded, `tonic` is a integer 180 | in the range [0, 12] where 12 indicates atonal. 181 | 182 | If densely encoded, `tonic` has an extra 183 | final dimension which is active when it is atonal. 184 | ''' 185 | # get list of observations 186 | intervals, keys = ann.to_interval_values() 187 | 188 | # Get the dtype for tonic 189 | if self.sparse: 190 | dtype = int 191 | else: 192 | dtype = bool 193 | 194 | # If we don't have any labeled intervals, fill in a 'N' 195 | if not keys: 196 | intervals = np.asarray([[0, duration]]) 197 | keys = ['N'] 198 | 199 | # Suppress all intervals not in the encoder 200 | pitch_profiles = [] 201 | tonics = [] 202 | 203 | # default value when data is missing 204 | if self.sparse: 205 | fill = 12 206 | else: 207 | fill = False 208 | 209 | for key in keys: 210 | pitch_profile, tonic = _encode_key_str(key, self.sparse) 211 | pitch_profiles.append(pitch_profile) 212 | tonics.append(tonic if isinstance(tonic, np.ndarray) else [tonic]) 213 | 214 | pitch_profiles = np.asarray(pitch_profiles, dtype=bool) 215 | tonics = np.asarray(tonics, dtype=dtype) 216 | 217 | target_pitch_profile = self.encode_intervals(duration, intervals, pitch_profiles) 218 | 219 | target_tonic = self.encode_intervals(duration, intervals, tonics, 220 | multi=False, 221 | dtype=dtype, 222 | fill=fill) 223 | 224 | return {'pitch_profile': target_pitch_profile, 225 | 'tonic': target_tonic} 226 | 227 | def inverse(self, pitch_profile, tonic, duration=None): 228 | raise NotImplementedError('There are some ambiguities, also streaming profiles are difficult') 229 | 230 | class KeyTagTransformer(BaseTaskTransformer): 231 | '''Chord transformer that uses a tag-space encoding for key labels. 232 | 233 | Attributes 234 | ---------- 235 | name : str 236 | name of the transformer 237 | 238 | sr : number > 0 239 | Sampling rate of audio 240 | 241 | hop_length : int > 0 242 | Hop length for annotation frames 243 | 244 | sparse : Bool 245 | Whether or not to use sparse encoding for the labels 246 | 247 | p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] 248 | Optional self-loop probability(ies), used for Viterbi decoding 249 | 250 | p_state : None or np.ndarray [shape=(n_labels,)] 251 | Optional marginal probability for each chord class 252 | 253 | p_init : None or np.ndarray [shape=(n_labels,)] 254 | Optional initial probability for each chord class 255 | 256 | See Also 257 | -------- 258 | KeyTransformer 259 | ChordTagTransformer 260 | ''' 261 | def __init__(self, name='key_tag', 262 | sr=22050, hop_length=512, sparse=False, 263 | p_self=None, p_init=None, p_state=None): 264 | 265 | super(KeyTagTransformer, self).__init__(name=name, 266 | namespace='key_mode', 267 | sr=sr, 268 | hop_length=hop_length) 269 | 270 | labels = self.vocabulary() 271 | self.sparse = sparse 272 | 273 | if self.sparse: 274 | self.encoder = LabelEncoder() 275 | else: 276 | self.encoder = LabelBinarizer() 277 | self.encoder.fit(labels) 278 | self._classes = set(self.encoder.classes_) 279 | 280 | self.set_transition(p_self) 281 | 282 | if p_init is not None: 283 | if len(p_init) != len(self._classes): 284 | raise ParameterError('Invalid p_init.shape={} for vocabulary of size {}'.format(p_init.shape, len(self._classes))) 285 | 286 | self.p_init = p_init 287 | 288 | if p_state is not None: 289 | if len(p_state) != len(self._classes): 290 | raise ParameterError('Invalid p_state.shape={} for vocabulary of size {}'.format(p_state.shape, len(self._classes))) 291 | 292 | self.p_state = p_state 293 | 294 | if self.sparse: 295 | self.register('tag', [None, 1], int) 296 | else: 297 | self.register('tag', [None, len(self._classes)], bool) 298 | 299 | def set_transition(self, p_self): 300 | '''Set the transition matrix according to self-loop probabilities. 301 | 302 | Parameters 303 | ---------- 304 | p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] 305 | Optional self-loop probability(ies), used for Viterbi decoding 306 | ''' 307 | if p_self is None: 308 | self.transition = None 309 | else: 310 | self.transition = transition_loop(len(self._classes), p_self) 311 | 312 | def empty(self, duration): 313 | '''Empty key annotations 314 | 315 | Parameters 316 | ---------- 317 | duration : number 318 | The length (in seconds) of the empty annotation 319 | 320 | Returns 321 | ------- 322 | ann : jams.Annotation 323 | A key annotation consisting of a single `N` observation. 324 | ''' 325 | ann = super(KeyTagTransformer, self).empty(duration) 326 | 327 | ann.append(time=0, 328 | duration=duration, 329 | value='N', confidence=0) 330 | 331 | return ann 332 | 333 | def vocabulary(self): 334 | ''' Build the vocabulary for all key_mode strings 335 | 336 | Returns 337 | ------- 338 | labels : list 339 | list of string labels. 340 | ''' 341 | qualities = MODES + list(QUALITY.keys()) 342 | tonics = midi_to_note(list(range(12)), octave=False, unicode=False) 343 | 344 | labels = ['N'] 345 | 346 | for key_mode in product(tonics, qualities): 347 | labels.append('{}:{}'.format(*key_mode)) 348 | 349 | return labels 350 | 351 | def enharmonic(self, key_str): 352 | '''Force the tonic spelling to fit our tonic list 353 | by spelling out of vocab keys enharmonically. 354 | 355 | Parameters 356 | ---------- 357 | key_str : str 358 | The key_mode string in jams style. 359 | 360 | Returns 361 | ------- 362 | key_str : str 363 | The key_mode string spelled enharmonically to fit our vocab. 364 | ''' 365 | key_list = key_str.split(':') 366 | # spell the tonic enharmonically if necessary 367 | if key_list[0] != 'N': 368 | key_list[0] = midi_to_note(note_to_midi(key_list[0]), octave=False, unicode=False) 369 | if len(key_list) == 1: 370 | key_list.append('major') 371 | 372 | return ':'.join(key_list) 373 | 374 | def transform_annotation(self, ann, duration): 375 | '''Transform an annotation to key-tag encoding 376 | 377 | Parameters 378 | ---------- 379 | ann : jams.Annotation 380 | The annotation to convert 381 | 382 | duration : number > 0 383 | The duration of the track 384 | 385 | Returns 386 | ------- 387 | data : dict 388 | if self.sparse = True 389 | data['tag'] : np.ndarray, shape=(n, n_labels) or shape=(n,) 390 | A time-varying binary encoding of the keys. 391 | The shape depends on self.sparse. 392 | ''' 393 | intervals, values = ann.to_interval_values() 394 | 395 | keys = [] 396 | for v in values: 397 | keys.extend(self.encoder.transform([self.enharmonic(v)])) 398 | 399 | dtype = self.fields[self.scope('tag')].dtype 400 | 401 | keys = np.asarray(keys) 402 | 403 | if self.sparse: 404 | keys = keys[:, np.newaxis] 405 | 406 | target = self.encode_intervals(duration, intervals, keys, 407 | multi=False, dtype=dtype) 408 | 409 | return {'tag': target} 410 | 411 | def inverse(self, encoded, duration=None): 412 | '''Inverse transformation''' 413 | 414 | ann = jams.Annotation(self.namespace, duration=duration) 415 | 416 | for start, end, value in self.decode_intervals(encoded, 417 | duration=duration, 418 | multi=False, 419 | sparse=self.sparse, 420 | transition=self.transition, 421 | p_init=self.p_init, 422 | p_state=self.p_state): 423 | 424 | # Map start:end to frames 425 | f_start, f_end = time_to_frames([start, end], 426 | sr=self.sr, 427 | hop_length=self.hop_length) 428 | 429 | # Reverse the index 430 | if self.sparse: 431 | # Compute the confidence 432 | if encoded.shape[1] == 1: 433 | # This case is for full-confidence prediction (just the index) 434 | confidence = 1. 435 | else: 436 | confidence = np.mean(encoded[f_start:f_end+1, value]) 437 | 438 | value_dec = self.encoder.inverse_transform(value) 439 | else: 440 | confidence = np.mean(encoded[f_start:f_end+1, np.argmax(value)]) 441 | value_dec = self.encoder.inverse_transform(np.atleast_2d(value)) 442 | 443 | for vd in value_dec: 444 | ann.append(time=start, 445 | duration=end-start, 446 | value=vd, 447 | confidence=float(confidence)) 448 | 449 | return ann 450 | -------------------------------------------------------------------------------- /pumpp/task/regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Regression task transformers''' 4 | 5 | import numpy as np 6 | 7 | import jams 8 | 9 | from .base import BaseTaskTransformer 10 | from ..exceptions import DataError 11 | 12 | __all__ = ['VectorTransformer'] 13 | 14 | 15 | class VectorTransformer(BaseTaskTransformer): 16 | '''Vector regression transformer. 17 | 18 | Attributes 19 | ---------- 20 | name : str 21 | The name of this transformer 22 | 23 | namespace : str 24 | The target namespace of this transformer 25 | 26 | dimension : int > 0 27 | The dimension of the vector data 28 | 29 | dtype : np.dtype 30 | The desired data type of the output 31 | ''' 32 | def __init__(self, name, namespace, dimension, dtype=np.float32): 33 | super(VectorTransformer, self).__init__(name=name, 34 | namespace=namespace, 35 | sr=1, hop_length=1) 36 | 37 | self.dimension = dimension 38 | self.dtype = dtype 39 | 40 | self.register('vector', [1, self.dimension], self.dtype) 41 | 42 | def empty(self, duration): 43 | '''Empty vector annotations. 44 | 45 | This returns an annotation with a single observation 46 | vector consisting of all-zeroes. 47 | 48 | Parameters 49 | ---------- 50 | duration : number >0 51 | Length of the track 52 | 53 | Returns 54 | ------- 55 | ann : jams.Annotation 56 | The empty annotation 57 | ''' 58 | ann = super(VectorTransformer, self).empty(duration) 59 | 60 | ann.append(time=0, duration=duration, confidence=0, 61 | value=np.zeros(self.dimension, dtype=np.float32)) 62 | return ann 63 | 64 | def transform_annotation(self, ann, duration): 65 | '''Apply the vector transformation. 66 | 67 | Parameters 68 | ---------- 69 | ann : jams.Annotation 70 | The input annotation 71 | 72 | duration : number > 0 73 | The duration of the track 74 | 75 | Returns 76 | ------- 77 | data : dict 78 | data['vector'] : np.ndarray, shape=(dimension,) 79 | 80 | Raises 81 | ------ 82 | DataError 83 | If the input dimension does not match 84 | ''' 85 | _, values = ann.to_interval_values() 86 | vector = np.asarray(values[0], dtype=self.dtype) 87 | if len(vector) != self.dimension: 88 | raise DataError('vector dimension({:0}) ' 89 | '!= self.dimension({:1})' 90 | .format(len(vector), self.dimension)) 91 | 92 | return {'vector': vector} 93 | 94 | def inverse(self, vector, duration=None): 95 | '''Inverse vector transformer''' 96 | 97 | ann = jams.Annotation(namespace=self.namespace, duration=duration) 98 | 99 | if duration is None: 100 | duration = 0 101 | ann.append(time=0, duration=duration, value=vector) 102 | 103 | return ann 104 | -------------------------------------------------------------------------------- /pumpp/task/structure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Segment and structure tasks''' 4 | 5 | import numpy as np 6 | from mir_eval.util import intervals_to_samples, index_labels, adjust_intervals 7 | 8 | from .base import BaseTaskTransformer 9 | 10 | __all__ = ['StructureTransformer'] 11 | 12 | 13 | class StructureTransformer(BaseTaskTransformer): 14 | '''Structure agreement transformer. 15 | 16 | This transformer maps a labeled, flat structural segmentation 17 | to an `n*n` boolean matrix indicating whether two frames 18 | belong to a similarly labeled segment or not. 19 | 20 | Attributes 21 | ---------- 22 | name : str 23 | The name of this transformer 24 | 25 | sr : number > 0 26 | The audio sampling rate 27 | 28 | hop_length : int > 0 29 | The number of samples between each annotation frame 30 | ''' 31 | 32 | def __init__(self, name='structure', sr=22050, hop_length=512): 33 | '''Initialize a structure agreement transformer''' 34 | 35 | super(StructureTransformer, self).__init__(name=name, 36 | namespace='segment_open', 37 | sr=sr, 38 | hop_length=hop_length) 39 | 40 | self.register('agree', [None, None], bool) 41 | 42 | def empty(self, duration): 43 | ann = super(StructureTransformer, self).empty(duration) 44 | ann.append(time=0, duration=duration, value='none', confidence=0) 45 | return ann 46 | 47 | def transform_annotation(self, ann, duration): 48 | '''Apply the structure agreement transformation. 49 | 50 | Parameters 51 | ---------- 52 | ann : jams.Annotation 53 | The segment annotation 54 | 55 | duration : number > 0 56 | The target duration 57 | 58 | Returns 59 | ------- 60 | data : dict 61 | data['agree'] : np.ndarray, shape=(n, n), dtype=bool 62 | ''' 63 | 64 | intervals, values = ann.to_interval_values() 65 | 66 | intervals, values = adjust_intervals(intervals, values, 67 | t_min=0, t_max=duration) 68 | # Re-index the labels 69 | ids, _ = index_labels(values) 70 | 71 | rate = float(self.hop_length) / self.sr 72 | # Sample segment labels on our frame grid 73 | _, labels = intervals_to_samples(intervals, ids, sample_size=rate) 74 | 75 | # Make the agreement matrix 76 | return {'agree': np.equal.outer(labels, labels)} 77 | 78 | def inverse(self, agree, duration=None): 79 | 80 | raise NotImplementedError('Segment agreement cannot be inverted') 81 | -------------------------------------------------------------------------------- /pumpp/task/tags.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Tag task transformers''' 4 | 5 | import numpy as np 6 | import jams 7 | 8 | from librosa import time_to_frames 9 | from librosa.sequence import transition_loop 10 | 11 | from .base import BaseTaskTransformer 12 | from ..exceptions import ParameterError 13 | from ..labels import MultiLabelBinarizer 14 | 15 | __all__ = ['DynamicLabelTransformer', 'StaticLabelTransformer'] 16 | 17 | 18 | class DynamicLabelTransformer(BaseTaskTransformer): 19 | '''Time-series label transformer. 20 | 21 | Attributes 22 | ---------- 23 | name : str 24 | The name of this transformer object 25 | 26 | namespace : str 27 | The JAMS namespace for this task 28 | 29 | labels : list of str [optional] 30 | The list of labels for this task. 31 | 32 | If not provided, it will attempt to infer the label set from the 33 | namespace definition. 34 | 35 | sr : number > 0 36 | The audio sampling rate 37 | 38 | hop_length : int > 0 39 | The hop length for annotation frames 40 | 41 | p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] 42 | Optional self-loop probability(ies), used for Viterbi decoding 43 | 44 | p_state : None or np.ndarray [shape=(n_labels,)] 45 | Optional marginal probability for each class 46 | 47 | p_init : None or np.ndarray [shape=(n_labels,)] 48 | Optional initial probability for each class 49 | 50 | 51 | See Also 52 | -------- 53 | StaticLabelTransformer 54 | ''' 55 | def __init__(self, name, namespace, labels=None, sr=22050, hop_length=512, 56 | p_self=None, p_init=None, p_state=None): 57 | super(DynamicLabelTransformer, self).__init__(name=name, 58 | namespace=namespace, 59 | sr=sr, 60 | hop_length=hop_length) 61 | 62 | if labels is None: 63 | labels = jams.schema.values(namespace) 64 | 65 | self.encoder = MultiLabelBinarizer() 66 | self.encoder.fit([labels]) 67 | self._classes = set(self.encoder.classes_) 68 | 69 | self.set_transition(p_self) 70 | 71 | if p_init is not None: 72 | if len(p_init) != len(self._classes): 73 | raise ParameterError('Invalid p_init.shape={} for vocabulary size={}'.format(p_init.shape, len(self._classes))) 74 | 75 | self.p_init = p_init 76 | 77 | if p_state is not None: 78 | if len(p_state) != len(self._classes): 79 | raise ParameterError('Invalid p_state.shape={} for vocabulary size={}'.format(p_state.shape, len(self._classes))) 80 | 81 | self.p_state = p_state 82 | 83 | self.register('tags', [None, len(self._classes)], bool) 84 | 85 | def set_transition(self, p_self): 86 | '''Set the transition matrix according to self-loop probabilities. 87 | 88 | Parameters 89 | ---------- 90 | p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] 91 | Optional self-loop probability(ies), used for Viterbi decoding 92 | ''' 93 | if p_self is None: 94 | self.transition = None 95 | else: 96 | self.transition = np.empty((len(self._classes), 2, 2)) 97 | if np.isscalar(p_self): 98 | self.transition = transition_loop(2, p_self) 99 | elif len(p_self) != len(self._classes): 100 | raise ParameterError('Invalid p_self.shape={} for vocabulary size={}'.format(p_self.shape, len(self._classes))) 101 | else: 102 | for i in range(len(self._classes)): 103 | self.transition[i] = transition_loop(2, p_self[i]) 104 | 105 | def empty(self, duration): 106 | '''Empty label annotations. 107 | 108 | Constructs a single observation with an empty value (None). 109 | 110 | Parameters 111 | ---------- 112 | duration : number > 0 113 | The duration of the annotation 114 | ''' 115 | ann = super(DynamicLabelTransformer, self).empty(duration) 116 | ann.append(time=0, duration=duration, value=None) 117 | return ann 118 | 119 | def transform_annotation(self, ann, duration): 120 | '''Transform an annotation to dynamic label encoding. 121 | 122 | Parameters 123 | ---------- 124 | ann : jams.Annotation 125 | The annotation to convert 126 | 127 | duration : number > 0 128 | The duration of the track 129 | 130 | Returns 131 | ------- 132 | data : dict 133 | data['tags'] : np.ndarray, shape=(n, n_labels) 134 | A time-varying binary encoding of the labels 135 | ''' 136 | intervals, values = ann.to_interval_values() 137 | 138 | # Suppress all intervals not in the encoder 139 | tags = [] 140 | for v in values: 141 | if v in self._classes: 142 | tags.extend(self.encoder.transform([[v]])) 143 | else: 144 | tags.extend(self.encoder.transform([[]])) 145 | 146 | tags = np.asarray(tags) 147 | target = self.encode_intervals(duration, intervals, tags) 148 | 149 | return {'tags': target} 150 | 151 | def inverse(self, encoded, duration=None): 152 | '''Inverse transformation''' 153 | 154 | ann = jams.Annotation(namespace=self.namespace, duration=duration) 155 | for start, end, value in self.decode_intervals(encoded, 156 | duration=duration, 157 | transition=self.transition, 158 | p_init=self.p_init, 159 | p_state=self.p_state): 160 | # Map start:end to frames 161 | f_start, f_end = time_to_frames([start, end], 162 | sr=self.sr, 163 | hop_length=self.hop_length) 164 | 165 | confidence = np.mean(encoded[f_start:f_end+1, value]) 166 | 167 | value_dec = self.encoder.inverse_transform(np.atleast_2d(value))[0] 168 | 169 | for vd in value_dec: 170 | ann.append(time=start, 171 | duration=end-start, 172 | value=vd, 173 | confidence=confidence) 174 | 175 | return ann 176 | 177 | 178 | class StaticLabelTransformer(BaseTaskTransformer): 179 | '''Static label transformer. 180 | 181 | Attributes 182 | ---------- 183 | name : str 184 | The name of this transformer object 185 | 186 | namespace : str 187 | The JAMS namespace for this task 188 | 189 | labels : list of str [optional] 190 | The list of labels for this task. 191 | 192 | If not provided, it will attempt to infer the label set from the 193 | namespace definition. 194 | 195 | See Also 196 | -------- 197 | DynamicLabelTransformer 198 | ''' 199 | 200 | def __init__(self, name, namespace, labels=None): 201 | super(StaticLabelTransformer, self).__init__(name=name, 202 | namespace=namespace, 203 | sr=1, hop_length=1) 204 | 205 | if labels is None: 206 | labels = jams.schema.values(namespace) 207 | 208 | self.encoder = MultiLabelBinarizer() 209 | self.encoder.fit([labels]) 210 | self._classes = set(self.encoder.classes_) 211 | self.register('tags', [len(self._classes)], bool) 212 | 213 | def transform_annotation(self, ann, duration): 214 | '''Transform an annotation to static label encoding. 215 | 216 | Parameters 217 | ---------- 218 | ann : jams.Annotation 219 | The annotation to convert 220 | 221 | duration : number > 0 222 | The duration of the track 223 | 224 | Returns 225 | ------- 226 | data : dict 227 | data['tags'] : np.ndarray, shape=(n_labels,) 228 | A static binary encoding of the labels 229 | ''' 230 | intervals = np.asarray([[0, 1]]) 231 | values = list([obs.value for obs in ann]) 232 | intervals = np.tile(intervals, [len(values), 1]) 233 | 234 | # Suppress all intervals not in the encoder 235 | tags = [v for v in values if v in self._classes] 236 | if len(tags): 237 | target = self.encoder.transform([tags]).astype(bool).max(axis=0) 238 | else: 239 | target = np.zeros(len(self._classes), dtype=bool) 240 | 241 | return {'tags': target} 242 | 243 | def inverse(self, encoded, duration=None): 244 | '''Inverse static tag transformation''' 245 | 246 | ann = jams.Annotation(namespace=self.namespace, duration=duration) 247 | 248 | if np.isrealobj(encoded): 249 | detected = (encoded >= 0.5) 250 | else: 251 | detected = encoded 252 | 253 | for vd in self.encoder.inverse_transform(np.atleast_2d(detected))[0]: 254 | vid = np.flatnonzero(self.encoder.transform(np.atleast_2d(vd))) 255 | ann.append(time=0, 256 | duration=duration, 257 | value=vd, 258 | confidence=encoded[vid]) 259 | return ann 260 | -------------------------------------------------------------------------------- /pumpp/version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Version info""" 4 | 5 | short_version = '0.6' 6 | version = '0.6.0' 7 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools >= 48", 4 | "wheel >= 0.29.0", 5 | ] 6 | build-backend = 'setuptools.build_meta' 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | max-line-length=119 3 | 4 | [tool:pytest] 5 | addopts = --cov-report term-missing --cov pumpp --cov-report=xml 6 | xfail_strict = true 7 | filterwarnings = 8 | ignore:Using a non-tuple sequence:FutureWarning:scipy.* 9 | 10 | 11 | [coverage:report] 12 | omit = 13 | */python?.?/* 14 | */site-packages/nose/* 15 | pumpp/labels.py 16 | 17 | 18 | [metadata] 19 | name = pumpp 20 | version = attr: pumpp.version.version 21 | description = A practically universal music pre-processor 22 | long_description = file: README.md 23 | long_description_content_type = text/markdown; charset=UTF-8 24 | url = https://github.com/bmcfee/pumpp 25 | author = Brian McFee 26 | license = ISC 27 | license_file = LICENSE.md 28 | license_file_content_type = text/markdown; charset=UTF-8 29 | project_urls = 30 | Source = https://github.com/bmcfee/pumpp 31 | Download = https://github.com/bmcfee/pumpp/releases 32 | classifiers = 33 | License :: OSI Approved :: ISC License (ISCL) 34 | Programming Language :: Python 35 | Development Status :: 3 - Alpha 36 | Intended Audience :: Developers 37 | Topic :: Software Development 38 | Programming Language :: Python :: 3 39 | Programming Language :: Python :: 3.7 40 | Programming Language :: Python :: 3.8 41 | Programming Language :: Python :: 3.9 42 | 43 | [options] 44 | packages = find: 45 | install_requires = 46 | librosa >= 0.8.0 47 | jams >= 0.3 48 | scikit-learn >= 1.0 49 | mir_eval >= 0.5 50 | python_requires = >= 3.6 51 | 52 | [options.extras_require] 53 | docs = 54 | numpydoc 55 | sphinx 56 | tests = 57 | pytest 58 | pytest-cov 59 | keras >= 2.6 60 | tensorflow >= 2.0 61 | keras = 62 | keras >= 2.6 63 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == '__main__': 4 | setup() 5 | -------------------------------------------------------------------------------- /tests/data/test.jams: -------------------------------------------------------------------------------- 1 | { 2 | "sandbox": {}, 3 | "annotations": [ 4 | { 5 | "namespace": "chord", 6 | "sandbox": {}, 7 | "time": 0, 8 | "duration": 3.5, 9 | "annotation_metadata": { 10 | "annotation_tools": "", 11 | "curator": { 12 | "name": "", 13 | "email": "" 14 | }, 15 | "annotator": {}, 16 | "version": "", 17 | "corpus": "", 18 | "annotation_rules": "", 19 | "validation": "", 20 | "data_source": "" 21 | }, 22 | "data": [ 23 | { 24 | "duration": 0.5, 25 | "confidence": null, 26 | "value": "C:1", 27 | "time": 0.0 28 | }, 29 | { 30 | "duration": 0.5, 31 | "confidence": null, 32 | "value": "D:1", 33 | "time": 0.5 34 | }, 35 | { 36 | "duration": 0.5, 37 | "confidence": null, 38 | "value": "E:1", 39 | "time": 1.0 40 | }, 41 | { 42 | "duration": 0.5, 43 | "confidence": null, 44 | "value": "F:1", 45 | "time": 1.5 46 | }, 47 | { 48 | "duration": 0.5, 49 | "confidence": null, 50 | "value": "G:1", 51 | "time": 2.0 52 | }, 53 | { 54 | "duration": 0.5, 55 | "confidence": null, 56 | "value": "A:1", 57 | "time": 2.5 58 | }, 59 | { 60 | "duration": 0.5, 61 | "confidence": null, 62 | "value": "B:1", 63 | "time": 3.0 64 | } 65 | ] 66 | }, 67 | { 68 | "namespace": "chord_harte", 69 | "sandbox": {}, 70 | "time": 0, 71 | "duration": 3.5, 72 | "annotation_metadata": { 73 | "annotation_tools": "", 74 | "curator": { 75 | "name": "", 76 | "email": "" 77 | }, 78 | "annotator": {}, 79 | "version": "", 80 | "corpus": "", 81 | "annotation_rules": "", 82 | "validation": "", 83 | "data_source": "" 84 | }, 85 | "data": [ 86 | { 87 | "duration": 0.5, 88 | "confidence": null, 89 | "value": "C:maj(*3,*5)", 90 | "time": 0.0 91 | }, 92 | { 93 | "duration": 0.5, 94 | "confidence": null, 95 | "value": "D:maj(*3,*5)", 96 | "time": 0.5 97 | }, 98 | { 99 | "duration": 0.5, 100 | "confidence": null, 101 | "value": "E:maj(*3,*5)", 102 | "time": 1.0 103 | }, 104 | { 105 | "duration": 0.5, 106 | "confidence": null, 107 | "value": "F:maj(*3,*5)", 108 | "time": 1.5 109 | }, 110 | { 111 | "duration": 0.5, 112 | "confidence": null, 113 | "value": "G:maj(*3,*5)", 114 | "time": 2.0 115 | }, 116 | { 117 | "duration": 0.5, 118 | "confidence": null, 119 | "value": "A:maj(*3,*5)", 120 | "time": 2.5 121 | }, 122 | { 123 | "duration": 0.5, 124 | "confidence": null, 125 | "value": "B:maj(*3,*5)", 126 | "time": 3.0 127 | } 128 | ] 129 | }, 130 | { 131 | "namespace": "chord_roman", 132 | "sandbox": {}, 133 | "time": 0, 134 | "duration": 3.5, 135 | "annotation_metadata": { 136 | "annotation_tools": "", 137 | "curator": { 138 | "name": "", 139 | "email": "" 140 | }, 141 | "annotator": {}, 142 | "version": "", 143 | "corpus": "", 144 | "annotation_rules": "", 145 | "validation": "", 146 | "data_source": "" 147 | }, 148 | "data": [ 149 | { 150 | "duration": 0.5, 151 | "confidence": null, 152 | "value": { 153 | "tonic": "C", 154 | "chord": "I" 155 | }, 156 | "time": 0.0 157 | }, 158 | { 159 | "duration": 0.5, 160 | "confidence": null, 161 | "value": { 162 | "tonic": "C", 163 | "chord": "ii" 164 | }, 165 | "time": 0.5 166 | }, 167 | { 168 | "duration": 0.5, 169 | "confidence": null, 170 | "value": { 171 | "tonic": "C", 172 | "chord": "iii" 173 | }, 174 | "time": 1.0 175 | }, 176 | { 177 | "duration": 0.5, 178 | "confidence": null, 179 | "value": { 180 | "tonic": "C", 181 | "chord": "iv" 182 | }, 183 | "time": 1.5 184 | }, 185 | { 186 | "duration": 0.5, 187 | "confidence": null, 188 | "value": { 189 | "tonic": "C", 190 | "chord": "V" 191 | }, 192 | "time": 2.0 193 | }, 194 | { 195 | "duration": 0.5, 196 | "confidence": null, 197 | "value": { 198 | "tonic": "C", 199 | "chord": "vi" 200 | }, 201 | "time": 2.5 202 | }, 203 | { 204 | "duration": 0.5, 205 | "confidence": null, 206 | "value": { 207 | "tonic": "C", 208 | "chord": "VII" 209 | }, 210 | "time": 3.0 211 | } 212 | ] 213 | }, 214 | { 215 | "namespace": "key_mode", 216 | "sandbox": {}, 217 | "time": 0, 218 | "duration": 3.5, 219 | "annotation_metadata": { 220 | "annotation_tools": "", 221 | "curator": { 222 | "name": "", 223 | "email": "" 224 | }, 225 | "annotator": {}, 226 | "version": "", 227 | "corpus": "", 228 | "annotation_rules": "", 229 | "validation": "", 230 | "data_source": "" 231 | }, 232 | "data": [ 233 | { 234 | "duration": 3.5, 235 | "confidence": 1.0, 236 | "value": "C:major", 237 | "time": 0.0 238 | } 239 | ] 240 | }, 241 | { 242 | "namespace": "pitch_class", 243 | "sandbox": {}, 244 | "time": 0, 245 | "duration": 3.5, 246 | "annotation_metadata": { 247 | "annotation_tools": "", 248 | "curator": { 249 | "name": "", 250 | "email": "" 251 | }, 252 | "annotator": {}, 253 | "version": "", 254 | "corpus": "", 255 | "annotation_rules": "", 256 | "validation": "", 257 | "data_source": "" 258 | }, 259 | "data": { 260 | "duration": [ 261 | 0.5, 262 | 0.5, 263 | 0.5, 264 | 0.5, 265 | 0.5, 266 | 0.5, 267 | 0.5 268 | ], 269 | "confidence": [ 270 | null, 271 | null, 272 | null, 273 | null, 274 | null, 275 | null, 276 | null 277 | ], 278 | "value": [ 279 | { 280 | "tonic": "C", 281 | "pitch": 1 282 | }, 283 | { 284 | "tonic": "C", 285 | "pitch": 2 286 | }, 287 | { 288 | "tonic": "C", 289 | "pitch": 3 290 | }, 291 | { 292 | "tonic": "C", 293 | "pitch": 4 294 | }, 295 | { 296 | "tonic": "C", 297 | "pitch": 5 298 | }, 299 | { 300 | "tonic": "C", 301 | "pitch": 6 302 | }, 303 | { 304 | "tonic": "C", 305 | "pitch": 7 306 | } 307 | ], 308 | "time": [ 309 | 0.0, 310 | 0.5, 311 | 1.0, 312 | 1.5, 313 | 2.0, 314 | 2.5, 315 | 3.0 316 | ] 317 | } 318 | }, 319 | { 320 | "namespace": "pitch_hz", 321 | "sandbox": {}, 322 | "time": 0, 323 | "duration": 3.5, 324 | "annotation_metadata": { 325 | "annotation_tools": "", 326 | "curator": { 327 | "name": "", 328 | "email": "" 329 | }, 330 | "annotator": {}, 331 | "version": "", 332 | "corpus": "", 333 | "annotation_rules": "", 334 | "validation": "", 335 | "data_source": "" 336 | }, 337 | "data": { 338 | "duration": [ 339 | 0.5, 340 | 0.5, 341 | 0.5, 342 | 0.5, 343 | 0.5, 344 | 0.5, 345 | 0.5 346 | ], 347 | "confidence": [ 348 | NaN, 349 | NaN, 350 | NaN, 351 | NaN, 352 | NaN, 353 | NaN, 354 | NaN 355 | ], 356 | "value": [ 357 | 130.81278265029931, 358 | 146.83238395870379, 359 | 164.81377845643496, 360 | 174.61411571650194, 361 | 195.99771799087463, 362 | 220.0, 363 | 246.94165062806206 364 | ], 365 | "time": [ 366 | 0.0, 367 | 0.5, 368 | 1.0, 369 | 1.5, 370 | 2.0, 371 | 2.5, 372 | 3.0 373 | ] 374 | } 375 | }, 376 | { 377 | "namespace": "pitch_midi", 378 | "sandbox": {}, 379 | "time": 0, 380 | "duration": 3.5, 381 | "annotation_metadata": { 382 | "annotation_tools": "", 383 | "curator": { 384 | "name": "", 385 | "email": "" 386 | }, 387 | "annotator": {}, 388 | "version": "", 389 | "corpus": "", 390 | "annotation_rules": "", 391 | "validation": "", 392 | "data_source": "" 393 | }, 394 | "data": { 395 | "duration": [ 396 | 0.5, 397 | 0.5, 398 | 0.5, 399 | 0.5, 400 | 0.5, 401 | 0.5, 402 | 0.5 403 | ], 404 | "confidence": [ 405 | NaN, 406 | NaN, 407 | NaN, 408 | NaN, 409 | NaN, 410 | NaN, 411 | NaN 412 | ], 413 | "value": [ 414 | 48.0, 415 | 50.0, 416 | 52.0, 417 | 53.0, 418 | 55.0, 419 | 57.0, 420 | 59.0 421 | ], 422 | "time": [ 423 | 0.0, 424 | 0.5, 425 | 1.0, 426 | 1.5, 427 | 2.0, 428 | 2.5, 429 | 3.0 430 | ] 431 | } 432 | }, 433 | { 434 | "namespace": "tempo", 435 | "sandbox": {}, 436 | "time": 0, 437 | "duration": 3.5, 438 | "annotation_metadata": { 439 | "annotation_tools": "", 440 | "curator": { 441 | "name": "", 442 | "email": "" 443 | }, 444 | "annotator": {}, 445 | "version": "", 446 | "corpus": "", 447 | "annotation_rules": "", 448 | "validation": "", 449 | "data_source": "" 450 | }, 451 | "data": [ 452 | { 453 | "duration": 3.5, 454 | "confidence": 1.0, 455 | "value": 120.0, 456 | "time": 0.0 457 | } 458 | ] 459 | } 460 | ], 461 | "file_metadata": { 462 | "jams_version": "0.2.0", 463 | "title": "", 464 | "identifiers": {}, 465 | "release": "", 466 | "duration": 3.485170068027211, 467 | "artist": "" 468 | } 469 | } -------------------------------------------------------------------------------- /tests/data/test.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmcfee/pumpp/1ca952e72e1d7f1e08514060821872465459c7f3/tests/data/test.ogg -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Miscellaneous utility tests''' 4 | import pytest 5 | import numpy as np 6 | 7 | import librosa 8 | import jams 9 | 10 | import pumpp 11 | 12 | 13 | @pytest.fixture(params=[11025, 22050]) 14 | def sr(request): 15 | return request.param 16 | 17 | 18 | @pytest.fixture(params=[128, 512]) 19 | def hop_length(request): 20 | return request.param 21 | 22 | 23 | @pytest.fixture(params=[None, 24 | 'tests/data/test.jams', 25 | jams.load('tests/data/test.jams')]) 26 | def jam(request): 27 | return request.param 28 | 29 | 30 | @pytest.mark.parametrize('audio_f', [None, 'tests/data/test.ogg']) 31 | @pytest.mark.parametrize('y', [None, 'tests/data/test.ogg']) 32 | @pytest.mark.parametrize('sr2', [None, 22050]) 33 | @pytest.mark.parametrize('crop', [False, True]) 34 | def test_pump(audio_f, jam, y, sr, sr2, hop_length, crop): 35 | 36 | ops = [pumpp.feature.STFT(name='stft', sr=sr, 37 | hop_length=hop_length, 38 | n_fft=2*hop_length), 39 | 40 | pumpp.task.BeatTransformer(name='beat', sr=sr, 41 | hop_length=hop_length), 42 | 43 | pumpp.task.ChordTransformer(name='chord', sr=sr, 44 | hop_length=hop_length), 45 | 46 | pumpp.task.StaticLabelTransformer(name='tags', 47 | namespace='tag_open', 48 | labels=['rock', 'jazz'])] 49 | 50 | P = pumpp.Pump(*ops) 51 | 52 | if audio_f is None and y is None: 53 | # no input 54 | with pytest.raises(pumpp.ParameterError): 55 | data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2) 56 | elif y is not None and sr2 is None: 57 | # input buffer, but no sampling rate 58 | y = librosa.load(y, sr=sr2)[0] 59 | with pytest.raises(pumpp.ParameterError): 60 | data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2) 61 | elif y is not None: 62 | y = librosa.load(y, sr=sr2)[0] 63 | data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2) 64 | else: 65 | 66 | fields = set(['stft/mag', 67 | 'stft/phase', 68 | 'beat/beat', 69 | 'beat/downbeat', 70 | 'beat/mask_downbeat', 71 | 'chord/pitch', 72 | 'chord/root', 73 | 'chord/bass', 74 | 'tags/tags']) 75 | 76 | valids = set(['beat/_valid', 'chord/_valid', 'tags/_valid']) 77 | 78 | assert set(P.fields.keys()) == fields 79 | 80 | data = P.transform(audio_f=audio_f, jam=jam, y=y, sr=sr2, crop=crop) 81 | data2 = P(audio_f=audio_f, jam=jam, y=y, sr=sr2, crop=crop) 82 | 83 | # Fields we should have: 84 | assert set(data.keys()) == fields | valids 85 | 86 | # time shapes should be the same for annotations 87 | assert data['beat/beat'].shape[1] == data['beat/downbeat'].shape[1] 88 | assert data['beat/beat'].shape[1] == data['chord/pitch'].shape[1] 89 | assert data['beat/beat'].shape[1] == data['chord/root'].shape[1] 90 | assert data['beat/beat'].shape[1] == data['chord/bass'].shape[1] 91 | 92 | # Audio features can be off by at most a frame 93 | if crop: 94 | assert data['stft/mag'].shape[1] == data['beat/beat'].shape[1] 95 | assert data['stft/mag'].shape[1] == data['chord/pitch'].shape[1] 96 | else: 97 | assert (np.abs(data['stft/mag'].shape[1] - data['beat/beat'].shape[1]) 98 | * hop_length / float(sr)) <= 0.05 99 | 100 | assert data.keys() == data2.keys() 101 | for k in data: 102 | assert np.allclose(data[k], data2[k]) 103 | 104 | 105 | @pytest.mark.parametrize('audio_f', ['tests/data/test.ogg']) 106 | def test_pump_empty(audio_f, jam, sr, hop_length): 107 | 108 | pump = pumpp.Pump() 109 | data = pump.transform(audio_f, jam) 110 | assert data == dict() 111 | 112 | 113 | def test_pump_add(sr, hop_length): 114 | 115 | ops = [pumpp.feature.STFT(name='stft', sr=sr, 116 | hop_length=hop_length, 117 | n_fft=2*hop_length), 118 | 119 | pumpp.task.BeatTransformer(name='beat', sr=sr, 120 | hop_length=hop_length), 121 | 122 | pumpp.task.ChordTransformer(name='chord', sr=sr, 123 | hop_length=hop_length), 124 | 125 | pumpp.task.StaticLabelTransformer(name='tags', 126 | namespace='tag_open', 127 | labels=['rock', 'jazz'])] 128 | 129 | pump = pumpp.Pump() 130 | assert pump.ops == [] 131 | 132 | for op in ops: 133 | pump.add(op) 134 | assert op in pump.ops 135 | 136 | 137 | @pytest.mark.xfail(raises=pumpp.ParameterError) 138 | def test_pump_add_bad(): 139 | 140 | pumpp.Pump('foo') 141 | 142 | 143 | @pytest.mark.xfail(raises=pumpp.ParameterError) 144 | def test_pump_add_twice(sr, hop_length): 145 | 146 | op = pumpp.feature.STFT(name='stft', sr=sr, 147 | hop_length=hop_length, 148 | n_fft=2*hop_length) 149 | 150 | P = pumpp.Pump() 151 | 152 | P.add(op) 153 | P.add(op) 154 | 155 | 156 | @pytest.mark.xfail(raises=KeyError) 157 | def test_pump_badkey(sr, hop_length): 158 | 159 | op = pumpp.feature.STFT(name='stft', sr=sr, 160 | hop_length=hop_length, 161 | n_fft=2*hop_length) 162 | 163 | P = pumpp.Pump(op) 164 | 165 | P['bad key'] 166 | 167 | 168 | @pytest.mark.parametrize('n_samples', [None, 10]) 169 | @pytest.mark.parametrize('duration', [1, 5]) 170 | @pytest.mark.parametrize('rng', [None, 1]) 171 | def test_pump_sampler(sr, hop_length, n_samples, duration, rng): 172 | ops = [pumpp.feature.STFT(name='stft', sr=sr, 173 | hop_length=hop_length, 174 | n_fft=2*hop_length), 175 | 176 | pumpp.task.BeatTransformer(name='beat', sr=sr, 177 | hop_length=hop_length)] 178 | 179 | P = pumpp.Pump(*ops) 180 | 181 | S1 = pumpp.Sampler(n_samples, duration, random_state=rng, *ops) 182 | S2 = P.sampler(n_samples, duration, random_state=rng) 183 | 184 | assert S1._time == S2._time 185 | assert S1.n_samples == S2.n_samples 186 | assert S1.duration == S2.duration 187 | 188 | 189 | #@pytest.mark.skip 190 | def test_pump_layers(sr, hop_length): 191 | ops = [pumpp.feature.STFT(name='stft', sr=sr, 192 | hop_length=hop_length, 193 | n_fft=2*hop_length), 194 | 195 | pumpp.feature.CQT(name='cqt', sr=sr, 196 | hop_length=hop_length), 197 | 198 | pumpp.task.BeatTransformer(name='beat', sr=sr, 199 | hop_length=hop_length)] 200 | 201 | P = pumpp.Pump(*ops) 202 | 203 | L1 = P.layers() 204 | L2 = dict() 205 | L2.update(ops[0].layers()) 206 | L2.update(ops[1].layers()) 207 | 208 | assert L1.keys() == L2.keys() 209 | 210 | for k in L1: 211 | assert L1[k].dtype == L2[k].dtype 212 | for d1, d2 in zip(L1[k].shape, L2[k].shape): 213 | assert str(d1) == str(d2) 214 | 215 | # test other input layers 216 | P.layers('tf.keras') 217 | P.layers('tf') 218 | 219 | 220 | def test_pump_str(sr, hop_length): 221 | 222 | ops = [pumpp.feature.STFT(name='stft', sr=sr, 223 | hop_length=hop_length, 224 | n_fft=2*hop_length), 225 | 226 | pumpp.task.BeatTransformer(name='beat', sr=sr, 227 | hop_length=hop_length), 228 | 229 | pumpp.task.ChordTransformer(name='chord', sr=sr, 230 | hop_length=hop_length), 231 | 232 | pumpp.task.StaticLabelTransformer(name='tags', 233 | namespace='tag_open', 234 | labels=['rock', 'jazz'])] 235 | 236 | pump = pumpp.Pump(*ops) 237 | 238 | assert isinstance(str(pump), str) 239 | 240 | 241 | def test_pump_repr_html(sr, hop_length): 242 | 243 | ops = [pumpp.feature.STFT(name='stft', sr=sr, 244 | hop_length=hop_length, 245 | n_fft=2*hop_length), 246 | 247 | pumpp.task.BeatTransformer(name='beat', sr=sr, 248 | hop_length=hop_length), 249 | 250 | pumpp.task.ChordTransformer(name='chord', sr=sr, 251 | hop_length=hop_length), 252 | 253 | pumpp.task.StaticLabelTransformer(name='tags', 254 | namespace='tag_open', 255 | labels=['rock', 'jazz'])] 256 | 257 | pump = pumpp.Pump(*ops) 258 | 259 | assert isinstance(pump._repr_html_(), str) 260 | 261 | 262 | def test_pump_skip(sr, hop_length, tmp_path): 263 | ops = [pumpp.feature.STFT(name='stft', sr=sr, 264 | hop_length=hop_length, 265 | n_fft=2*hop_length), 266 | 267 | pumpp.feature.Tempogram(name='tempo', sr=sr, 268 | win_length=384, 269 | hop_length=hop_length), 270 | 271 | pumpp.task.BeatTransformer(name='beat', sr=sr, 272 | hop_length=hop_length)] 273 | 274 | audio_f = 'tests/data/test.ogg' 275 | jam_f = 'tests/data/test.jams' 276 | KEY = 'tempo/tempogram' 277 | SENTINEL = (None,) 278 | data = {KEY: SENTINEL} 279 | 280 | P = pumpp.Pump(*ops) 281 | fields = set(P.fields) 282 | 283 | get_valid_fields = lambda x: {f for f in set(x) if not f.endswith('_valid')} 284 | 285 | # see if existing keys are skipped 286 | X = P.transform(audio_f, data=dict(data)) 287 | assert X[KEY] is SENTINEL, 'field was overwritten' 288 | assert get_valid_fields(X) == fields 289 | 290 | # make sure fields are computed normally 291 | X = P.transform(audio_f) 292 | assert X[KEY] is not SENTINEL, 'field was not computed' 293 | assert get_valid_fields(X) == fields 294 | 295 | # see if loading audio is skipped if we don't need it 296 | 297 | feature_ops = [op for op in P.ops if isinstance(op, pumpp.FeatureExtractor)] 298 | data = {k: SENTINEL for op in feature_ops for k in op.fields} 299 | 300 | X = P.transform(None, jam_f, data=data) 301 | assert all(X[k] is SENTINEL 302 | for op in feature_ops 303 | for k in op.fields), 'field should not have been computed' 304 | assert get_valid_fields(X) == fields 305 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Miscellaneous utility tests''' 4 | 5 | import pytest 6 | import numpy as np 7 | 8 | import pumpp 9 | 10 | from pumpp import ParameterError 11 | 12 | xfail = pytest.mark.xfail 13 | 14 | 15 | @pytest.mark.parametrize('dtype', 16 | [int, np.int64, 17 | pytest.param('not a type', 18 | marks=xfail(raises=ParameterError))]) 19 | def test_scope_type(dtype): 20 | 21 | scope = pumpp.base.Scope(None) 22 | scope.register('foo', [None], dtype) 23 | 24 | 25 | @pytest.mark.parametrize('shape', 26 | [[None], [1], [1, None], 27 | pytest.param(1, marks=xfail(raises=ParameterError)), 28 | pytest.param(None, marks=xfail(raises=ParameterError)), 29 | pytest.param(23.5, marks=xfail(raises=ParameterError)), 30 | pytest.param('not a shape', marks=xfail(raises=ParameterError))]) 31 | def test_scope_badshape(shape): 32 | 33 | scope = pumpp.base.Scope(None) 34 | scope.register('foo', shape, int) 35 | 36 | 37 | def test_bad_extractor(): 38 | ext = pumpp.feature.FeatureExtractor(None, 22050, 512) 39 | 40 | with pytest.raises(NotImplementedError): 41 | ext.transform(np.zeros(1024), 22050) 42 | 43 | 44 | @pytest.mark.parametrize('dtype, fill', 45 | [(int, 0), 46 | (bool, False), 47 | (float, np.nan), 48 | (complex, np.nan)]) 49 | def test_fill_value(dtype, fill): 50 | 51 | v = pumpp.task.base.fill_value(dtype) 52 | 53 | assert isinstance(v, dtype) 54 | assert v == fill or np.isnan(v) and np.isnan(fill) 55 | -------------------------------------------------------------------------------- /tests/test_sampler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Testing the sampler module''' 4 | 5 | import numpy as np 6 | 7 | import pytest 8 | 9 | import pumpp 10 | xfail = pytest.mark.xfail 11 | 12 | 13 | # Make a fixture with some audio and task output 14 | @pytest.fixture(params=[11025], scope='module') 15 | def sr(request): 16 | return request.param 17 | 18 | 19 | @pytest.fixture(params=[512], scope='module') 20 | def hop_length(request): 21 | return request.param 22 | 23 | 24 | @pytest.fixture(scope='module') 25 | def ops(sr, hop_length): 26 | 27 | ops = [] 28 | 29 | # Let's put on two feature extractors 30 | ops.append(pumpp.feature.STFT(name='stft', sr=sr, 31 | hop_length=hop_length, 32 | n_fft=hop_length)) 33 | 34 | ops.append(pumpp.feature.Tempogram(name='rhythm', sr=sr, 35 | hop_length=hop_length, 36 | win_length=hop_length)) 37 | 38 | # A time-varying annotation 39 | ops.append(pumpp.task.ChordTransformer(name='chord', sr=sr, 40 | hop_length=hop_length)) 41 | 42 | # And a static annotation 43 | ops.append(pumpp.task.VectorTransformer(namespace='vector', 44 | dimension=32, 45 | name='vec')) 46 | 47 | yield ops 48 | 49 | 50 | @pytest.fixture(scope='module') 51 | def data(ops): 52 | 53 | audio_f = 'tests/data/test.ogg' 54 | jams_f = 'tests/data/test.jams' 55 | 56 | P = pumpp.Pump(*ops) 57 | return P.transform(audio_f=audio_f, jam=jams_f) 58 | 59 | 60 | @pytest.fixture(params=[4, 16, None], scope='module') 61 | def n_samples(request): 62 | return request.param 63 | 64 | 65 | @pytest.fixture(params=[16, 32], scope='module') 66 | def duration(request): 67 | return request.param 68 | 69 | 70 | @pytest.fixture(params=[None, 16, 256, 71 | pytest.param(-1, marks=xfail(raises=pumpp.ParameterError))], 72 | scope='module') 73 | def stride(request): 74 | return request.param 75 | 76 | 77 | @pytest.fixture(params=[None, 20170401, np.random.RandomState(100), 78 | pytest.param('bad rng', marks=xfail(raises=pumpp.ParameterError))], 79 | scope='module') 80 | def rng(request): 81 | return request.param 82 | 83 | 84 | def test_sampler(data, ops, n_samples, duration, rng): 85 | 86 | MAX_SAMPLES = 30 87 | sampler = pumpp.Sampler(n_samples, duration, *ops, random_state=rng) 88 | 89 | # Build the set of reference keys that we want to track 90 | ref_keys = set() 91 | for op in ops: 92 | ref_keys |= set(op.fields.keys()) 93 | 94 | for datum, n in zip(sampler(data), range(MAX_SAMPLES)): 95 | # First, test that we have the right fields 96 | assert set(datum.keys()) == ref_keys 97 | 98 | # Now test that shape is preserved in the right way 99 | for key in datum: 100 | ref_shape = list(data[key].shape) 101 | for tdim in sampler._time[key]: 102 | ref_shape[tdim] = duration 103 | 104 | # Check that all keys have length=1 105 | assert datum[key].shape[0] == 1 106 | assert list(datum[key].shape[1:]) == ref_shape[1:] 107 | 108 | # Test that we got the right number of samples out 109 | if n_samples is None: 110 | assert n == MAX_SAMPLES - 1 111 | else: 112 | assert n == n_samples - 1 113 | 114 | 115 | def test_sequential_sampler(data, ops, duration, stride, rng): 116 | sampler = pumpp.SequentialSampler(duration, *ops, stride=stride, random_state=rng) 117 | 118 | # Build the set of reference keys that we want to track 119 | ref_keys = set() 120 | for op in ops: 121 | ref_keys |= set(op.fields.keys()) 122 | 123 | for datum in sampler(data): 124 | # First, test that we have the right fields 125 | assert set(datum.keys()) == ref_keys 126 | 127 | # Now test that shape is preserved in the right way 128 | for key in datum: 129 | ref_shape = list(data[key].shape) 130 | for tdim in sampler._time[key]: 131 | ref_shape[tdim] = duration 132 | 133 | # Check that all keys have length=1 134 | assert datum[key].shape[0] == 1 135 | assert list(datum[key].shape[1:]) == ref_shape[1:] 136 | 137 | 138 | def test_slicer(): 139 | scope1 = pumpp.base.Scope('test1') 140 | scope1.register('first', (None, 10), int) 141 | scope1.register('second', (2, None), int) 142 | scope1.register('none', (16, 16), int) 143 | 144 | scope2 = pumpp.base.Scope('test2') 145 | scope2.register('first', (None, 5), int) 146 | scope2.register('second', (20, None), int) 147 | scope2.register('square', (None, None, 3), int) 148 | 149 | slicer = pumpp.base.Slicer(scope1, scope2) 150 | 151 | # Minimum time for all of these is 8 152 | data_in = {'test1/first': np.random.randint(0, 7, size=(1, 8, 10)), 153 | 'test1/second': np.random.randint(0, 7, size=(1, 2, 100)), 154 | 'test1/none': np.random.randint(0, 7, size=(1, 16, 16)), 155 | 'test2/first': np.random.randint(0, 7, size=(1, 9, 5)), 156 | 'test2/second': np.random.randint(0, 7, (1, 20, 105)), 157 | 'test2/square': np.random.randint(0, 7, (1, 20, 20, 3))} 158 | 159 | data_out = slicer.crop(data_in) 160 | assert set(data_out.keys()) == set(data_in.keys()) 161 | 162 | assert data_out['test1/first'].shape == (1, 8, 10) 163 | assert np.all(data_out['test1/first'] == data_in['test1/first'][:, :8, :]) 164 | 165 | assert data_out['test1/second'].shape == (1, 2, 8) 166 | assert np.all(data_out['test1/second'] == data_in['test1/second'][:, :, :8]) 167 | 168 | assert data_out['test1/none'].shape == (1, 16, 16) 169 | assert np.all(data_out['test1/none'] == data_in['test1/none']) 170 | 171 | assert data_out['test2/first'].shape == (1, 8, 5) 172 | assert np.all(data_out['test2/first'] == data_in['test2/first'][:, :8, :]) 173 | 174 | assert data_out['test2/second'].shape == (1, 20, 8) 175 | assert np.all(data_out['test2/second'] == data_in['test2/second'][:, :, :8]) 176 | 177 | assert data_out['test2/square'].shape == (1, 8, 8, 3) 178 | assert np.all(data_out['test2/square'] == data_in['test2/square'][:, :8, :8, :]) 179 | 180 | 181 | @pytest.mark.xfail(raises=pumpp.ParameterError) 182 | def test_slicer_fail(): 183 | pumpp.base.Slicer('not a scope') 184 | 185 | 186 | @pytest.mark.parametrize('durations', 187 | [(8, 16), (16, 16), 188 | pytest.param((0, 8), marks=xfail(raises=pumpp.ParameterError)), 189 | pytest.param((8, 4), marks=xfail(raises=pumpp.ParameterError))]) 190 | def test_vlsampler(data, ops, n_samples, durations, rng): 191 | 192 | MAX_SAMPLES = 30 193 | min_duration, max_duration = durations 194 | sampler = pumpp.VariableLengthSampler(n_samples, min_duration, 195 | max_duration, 196 | *ops, random_state=rng) 197 | 198 | # Build the set of reference keys that we want to track 199 | ref_keys = set() 200 | for op in ops: 201 | ref_keys |= set(op.fields.keys()) 202 | 203 | n = 0 204 | for datum, n in zip(sampler(data), range(MAX_SAMPLES)): 205 | # First, test that we have the right fields 206 | assert set(datum.keys()) == ref_keys 207 | 208 | # Now test that shape is preserved in the right way 209 | for key in datum: 210 | for tdim in sampler._time[key]: 211 | assert datum[key].shape[tdim] >= min_duration 212 | assert datum[key].shape[tdim] <= max_duration 213 | 214 | # Check that all keys have length=1 215 | assert datum[key].shape[0] == 1 216 | 217 | # Test that we got the right number of samples out 218 | if n_samples is None: 219 | assert n == MAX_SAMPLES - 1 220 | else: 221 | assert n == n_samples - 1 222 | 223 | 224 | @pytest.mark.xfail(raises=pumpp.DataError) 225 | def test_sampler_short_error(data, ops): 226 | 227 | MAX_SAMPLES = 2 228 | sampler = pumpp.Sampler(MAX_SAMPLES, 5000, *ops) 229 | 230 | # Build the set of reference keys that we want to track 231 | ref_keys = set() 232 | for op in ops: 233 | ref_keys |= set(op.fields.keys()) 234 | 235 | for datum, n in zip(sampler(data), range(MAX_SAMPLES)): 236 | # First, test that we have the right fields 237 | assert set(datum.keys()) == ref_keys 238 | 239 | # Now test that shape is preserved in the right way 240 | for key in datum: 241 | ref_shape = list(data[key].shape) 242 | for tdim in sampler._time[key]: 243 | ref_shape[tdim] = duration 244 | 245 | # Check that all keys have length=1 246 | assert datum[key].shape[0] == 1 247 | assert list(datum[key].shape[1:]) == ref_shape[1:] 248 | 249 | # Test that we got the right number of samples out 250 | if n_samples is None: 251 | assert n == MAX_SAMPLES - 1 252 | else: 253 | assert n == n_samples - 1 254 | 255 | 256 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | '''Tests for feature utility helpers''' 4 | 5 | import pytest 6 | import numpy as np 7 | 8 | import pumpp 9 | import pumpp.feature._utils 10 | 11 | 12 | @pytest.mark.parametrize('dtype', ['uint8', np.uint8]) 13 | def test_quantize(dtype): 14 | 15 | # The range -5 to 5 is broken into 256 equal pieces 16 | # -5/3 lands at 85 (1/3) 17 | # 5/3 lands at 2*85 = 270 18 | # 5 lands at the max 19 | x = np.asarray([-5, -5/3, 5/3, 5]) 20 | y = pumpp.feature._utils.quantize(x, dtype=dtype) 21 | assert np.allclose(y, [0, 85, 170, 255]) 22 | 23 | 24 | def test_quantize_min(): 25 | x = np.asarray([-5, -5/3, 5/3, 5]) 26 | y = pumpp.feature._utils.quantize(x, ref_min=0) 27 | assert np.allclose(y, [0, 0, 85, 255]) 28 | 29 | 30 | def test_quantize_max(): 31 | x = np.asarray([-5, -5/3, 5/3, 5]) 32 | y = pumpp.feature._utils.quantize(x, ref_max=0) 33 | assert np.allclose(y, [0, 170, 255, 255]) 34 | 35 | 36 | @pytest.mark.xfail(raises=pumpp.ParameterError) 37 | @pytest.mark.parametrize('dtype', ['int8', 'float32']) 38 | def test_quantize_bad_dtype(dtype): 39 | x = np.asarray([-5, -5/3, 5/3, 5]) 40 | pumpp.feature._utils.quantize(x, dtype=dtype) 41 | --------------------------------------------------------------------------------