├── .github ├── environment-ci.yml └── workflows │ ├── ci.yml │ └── documentation.yaml ├── .gitignore ├── ACKNOWLEDGEMENTS ├── AUTHORS ├── COPYING ├── MANIFEST.in ├── README.md ├── compiam ├── __init__.py ├── annotations │ ├── 12_Sharanu_Janakana.txt │ ├── 143_Sundari_Nee_Divya.txt │ ├── 188_Vanajaksha_Ninne_Kori.txt │ ├── 47_Koti_Janmani.txt │ └── README.md ├── conf │ └── raga │ │ ├── carnatic.yaml │ │ └── svara_cents.yaml ├── data.py ├── dunya │ ├── README.md │ └── __init__.py ├── exceptions.py ├── io.py ├── melody │ ├── README.md │ ├── __init__.py │ ├── pattern │ │ ├── __init__.py │ │ └── sancara_search │ │ │ ├── __init__.py │ │ │ ├── complex_auto │ │ │ ├── __init__.py │ │ │ ├── complex.py │ │ │ ├── cqt.py │ │ │ └── util.py │ │ │ └── extraction │ │ │ ├── __init__.py │ │ │ ├── evaluation.py │ │ │ ├── img.py │ │ │ ├── io.py │ │ │ ├── pitch.py │ │ │ ├── segments.py │ │ │ ├── self_sim.py │ │ │ ├── sequence.py │ │ │ ├── utils.py │ │ │ └── visualisation.py │ ├── pitch_extraction │ │ ├── __init__.py │ │ ├── ftanet_carnatic │ │ │ ├── __init__.py │ │ │ ├── cfp.py │ │ │ └── pitch_processing.py │ │ ├── ftaresnet_carnatic │ │ │ ├── __init__.py │ │ │ └── model.py │ │ └── melodia.py │ ├── raga_recognition │ │ ├── __init__.py │ │ └── deepsrgm │ │ │ ├── __init__.py │ │ │ ├── attention_layer.py │ │ │ ├── model.py │ │ │ └── raga_mapping.py │ └── tonic_identification │ │ ├── __init__.py │ │ └── tonic_multipitch.py ├── models │ └── README.md ├── rhythm │ ├── README.md │ ├── __init__.py │ ├── meter │ │ ├── __init__.py │ │ └── akshara_pulse_tracker │ │ │ ├── __init__.py │ │ │ ├── models.py │ │ │ └── parameters.py │ └── transcription │ │ ├── __init__.py │ │ └── mnemonic_transcription │ │ ├── __init__.py │ │ └── bol_mappings.csv ├── separation │ ├── README.md │ ├── __init__.py │ ├── music_source_separation │ │ ├── __init__.py │ │ └── mixer_model │ │ │ ├── __init__.py │ │ │ ├── models.py │ │ │ └── modules.py │ └── singing_voice_extraction │ │ ├── __init__.py │ │ └── cold_diff_sep │ │ ├── __init__.py │ │ └── model │ │ ├── __init__.py │ │ ├── clustering.py │ │ ├── config.py │ │ ├── estnoise_ms.py │ │ ├── signal_processing.py │ │ ├── unet.py │ │ ├── unet_utils.py │ │ └── vad.py ├── structure │ ├── README.md │ ├── __init__.py │ └── segmentation │ │ ├── __init__.py │ │ └── dhrupad_bandish_segmentation │ │ ├── __init__.py │ │ ├── audio_processing.py │ │ ├── feature_extraction.py │ │ ├── model_utils.py │ │ └── params.py ├── timbre │ ├── README.md │ ├── __init__.py │ └── stroke_classification │ │ ├── __init__.py │ │ └── mridangam_stroke_classification │ │ ├── __init__.py │ │ ├── model.py │ │ └── stroke_features.py ├── utils │ ├── NMFtoolbox │ │ ├── NEMA.py │ │ ├── NMF.py │ │ ├── README.md │ │ ├── __init__.py │ │ ├── alphaWienerFilter.py │ │ ├── forwardSTFT.py │ │ ├── initActivations.py │ │ ├── initTemplates.py │ │ ├── inverseSTFT.py │ │ ├── midi2freq.py │ │ └── utils.py │ ├── __init__.py │ ├── augment │ │ ├── __init__.py │ │ ├── augment_data_ar.py │ │ ├── augment_data_sf.py │ │ ├── augment_data_sr.py │ │ └── templates.npy │ ├── download.py │ └── pitch.py ├── version.py └── visualisation │ ├── __init__.py │ ├── audio.py │ ├── peaks │ ├── __init__.py │ ├── data.py │ ├── intervals.py │ └── slope.py │ ├── pitch.py │ ├── training.py │ └── waveform_player │ ├── __init__.py │ ├── core.py │ └── waveform-playlist │ ├── annotations.html │ ├── css │ └── main.css │ ├── js │ ├── annotations.js │ ├── emitter.js │ ├── waveform-playlist.js │ └── waveform-playlist.js.LICENSE.txt │ └── multi-channel.html ├── docs ├── .nojekyll ├── Makefile ├── _static │ └── style.css ├── conf.py ├── index.rst ├── make.bat └── source │ ├── basic_usage.rst │ ├── contributing.rst │ ├── datasets.rst │ ├── melody.rst │ ├── rhythm.rst │ ├── separation.rst │ ├── structure.rst │ ├── timbre.rst │ ├── utils.rst │ └── visualisation.rst ├── notebooks ├── melody │ ├── Exploring_raga.ipynb │ ├── KDE.png │ ├── Melodic_Pattern_Finding_Carnatic.ipynb │ ├── emphasizing_diagonals.png │ ├── extracting_segments.png │ ├── grouping1.png │ └── test.png └── rhythm │ └── Transcription.ipynb ├── pyproject.toml ├── setup.py └── tests ├── README.md ├── __init__.py ├── conftest.py ├── melody ├── __init__.py ├── test_deepsrgm.py ├── test_essentia_extractors.py ├── test_ftanet.py └── test_ftaresnet.py ├── resources ├── melody │ ├── pitch_test.wav │ └── raga_mapping.json └── timbre │ ├── 224030__akshaylaya__bheem-b-001.wav │ └── 225359__akshaylaya__cha-c-001.wav ├── rhythm ├── __init__.py └── test_akshara_pulse.py ├── separation ├── __init__.py ├── test_cold_diff_sep.py └── test_mixer_model.py ├── structure ├── __init__.py └── test_dhrupad_segmentation.py ├── test_wrappers.py └── timbre └── test_mridangam_stroke_classification.py /.github/environment-ci.yml: -------------------------------------------------------------------------------- 1 | name: compiam-dev 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - pip 7 | - "attrs>=23.1.0" 8 | - "matplotlib>=3.0.0" 9 | - "numpy>=1.20.3,<=1.26.4" 10 | - "joblib>=1.2.0" 11 | - "pathlib~=1.0.1" 12 | - "tqdm>=4.66.1" 13 | - "IPython>=7.34.0" 14 | - "ipywidgets>=7.0.0,<8" 15 | - "Jinja2~=3.1.2" 16 | - "configobj~=5.0.6" 17 | - "seaborn" 18 | - "librosa>=0.10.1" 19 | - "scikit-learn==1.5.2" 20 | - "scikit-image~=0.24.0" 21 | - "hmmlearn==0.3.3" 22 | - "fastdtw~=0.3.4" 23 | ####### 24 | - libvorbis 25 | - pytest>=7.4.3 26 | ####### 27 | - pip: 28 | - "keras<3.0.0" 29 | - "tensorflow>=2.12.0,<2.16" 30 | - "torch==2.0.0" 31 | - "torchaudio==2.0.1" 32 | - "essentia" 33 | - "soundfile>=0.12.1" 34 | - "opencv-python~=4.6.0" 35 | - "mirdata==0.3.9" 36 | - "compmusic==0.4" 37 | - "attrs>=23.1.0" 38 | - "black>=23.3.0" 39 | - "decorator>=5.1.1" 40 | - "future>=0.18.3" 41 | - "testcontainers>=3.7.1" -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref }} 13 | cancel-in-progress: True 14 | 15 | jobs: 16 | test: 17 | name: "Python ${{ matrix.python-version }} on ${{ matrix.os }}" 18 | runs-on: ${{ matrix.os }} 19 | timeout-minutes: 60 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | include: 25 | - os: ubuntu-latest 26 | python-version: "3.9" 27 | channel-priority: "strict" 28 | envfile: ".github/environment-ci.yml" 29 | 30 | - os: ubuntu-latest 31 | python-version: "3.10" 32 | channel-priority: "strict" 33 | envfile: ".github/environment-ci.yml" 34 | 35 | - os: ubuntu-latest 36 | python-version: "3.11" 37 | channel-priority: "strict" 38 | envfile: ".github/environment-ci.yml" 39 | 40 | ### NOTE: Coming soon when jams in mirdata is updated or removed 41 | #- os: ubuntu-latest 42 | # python-version: "3.12" 43 | # channel-priority: "strict" 44 | # envfile: ".github/environment-ci.yml" 45 | 46 | #- os: ubuntu-latest 47 | # python-version: "3.13" 48 | # channel-priority: "strict" 49 | # envfile: ".github/environment-ci.yml" 50 | 51 | steps: 52 | - uses: actions/checkout@v3 53 | with: 54 | submodules: true 55 | 56 | - name: Install OS dependencies 57 | shell: bash -l {0} 58 | run: | 59 | case "${{ runner.os }}" in 60 | Linux) 61 | sudo apt-get update -yy 62 | sudo apt-get install -yy wget libsndfile-dev libsox-fmt-mp3 ffmpeg zip 63 | ;; 64 | macOS) 65 | brew install libsamplerate wget libsndfile sox ffmpeg zip 66 | ;; 67 | esac 68 | 69 | - name: Cache conda 70 | uses: actions/cache@v3 71 | env: 72 | CACHE_NUMBER: 2 73 | with: 74 | path: ~/conda_pkgs_dir 75 | key: ${{ runner.os }}-${{ matrix.python-version }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles( matrix.envfile ) }} 76 | restore-keys: | 77 | ${{ runner.os }}-${{ matrix.python-version }}-conda-${{ env.CACHE_NUMBER }}- 78 | ${{ runner.os }}-${{ matrix.python-version }}-conda- 79 | ${{ runner.os }}-conda- 80 | 81 | - name: Setup conda 82 | uses: conda-incubator/setup-miniconda@v2 83 | with: 84 | miniforge-variant: Miniforge3 85 | miniforge-version: latest 86 | channel-priority: ${{ matrix.channel-priority }} 87 | activate-environment: test 88 | auto-update-conda: false 89 | python-version: ${{ matrix.python-version }} 90 | use-only-tar-bz2: false 91 | 92 | 93 | - name: Install conda dependencies 94 | run: mamba env update -n test -f ${{ matrix.envfile }} 95 | if: steps.cache.outputs.cache-hit != 'true' 96 | 97 | - name: Conda info 98 | shell: bash -l {0} 99 | run: | 100 | conda info -a 101 | conda list 102 | 103 | - name: Run basic tests 104 | shell: bash -l {0} 105 | run: pytest tests/ 106 | 107 | - name: Run tensorflow tests 108 | shell: bash -l {0} 109 | run: pytest tests/ --tensorflow 110 | 111 | - name: Run torch tests 112 | shell: bash -l {0} 113 | run: pytest tests/ --torch 114 | 115 | - name: Run full ML tests 116 | shell: bash -l {0} 117 | run: pytest tests/ --full_ml 118 | 119 | - name: Run all tests 120 | shell: bash -l {0} 121 | run: pytest tests/ --all -------------------------------------------------------------------------------- /.github/workflows/documentation.yaml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | docs: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions/setup-python@v2 12 | with: 13 | python-version: '3.10' 14 | - name: Install dependencies 15 | run: | 16 | sudo apt-get update 17 | sudo apt-get install libsndfile1-dev 18 | pip install sphinx sphinx_rtd_theme numpydoc recommonmark 19 | pip install mirdata 20 | pip install scikit-learn 21 | pip install configobj 22 | pip install hmmlearn 23 | pip install fastdtw 24 | pip install matplotlib 25 | pip install scipy 26 | pip install essentia 27 | pip install tensorflow 28 | pip install torch 29 | pip install ipython 30 | pip install scikit-image 31 | pip install seaborn 32 | pip install opencv-python 33 | pip install smart_open 34 | - name: Sphinx build 35 | run: | 36 | sphinx-build docs _build 37 | - name: Deploy 38 | uses: peaceiris/actions-gh-pages@v3 39 | #if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} 40 | with: 41 | publish_branch: gh-pages 42 | github_token: ${{ secrets.GITHUB_TOKEN }} 43 | publish_dir: _build/ 44 | force_orphan: true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | .idea/ 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos 33 | into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | #Ipython Notebook 66 | .ipynb_checkpoints 67 | 68 | # Project specific directories 69 | output/ 70 | compiam/rhythm/meter/new_tool/ 71 | compiam/models/timbre/mridangam_stroke_classification/pre-computed_features.csv 72 | compiam/models/structure/dhrupad_bandish_segmentation/annotations/section_boundaries_labels.csv 73 | compiam/models/structure/dhrupad_bandish_segmentation/annotations/cycle_boundaries/ 74 | tests/resources/mir_datasets/* 75 | compiam/models/separation/ 76 | compiam/models/structure/ 77 | compiam/models/melody/ 78 | compiam/models/rhythm/ 79 | 80 | # For next release 81 | compiam/models/rhythm/4wayTabla/ 82 | tests/rhythm/test_4way_tabla.py 83 | compiam/rhythm/transcription/tabla_transcription/ 84 | 85 | # To avoid dependency clashes 86 | compiam/utils/augment/augment_data_ps.py 87 | compiam/utils/augment/augment_data_ts.py -------------------------------------------------------------------------------- /ACKNOWLEDGEMENTS: -------------------------------------------------------------------------------- 1 | Many thanks goes to 2 | ------------------- 3 | 4 | Marius Miron 5 | Lara Pearson 6 | Brindha Manickavasakan 7 | Preeti Rao 8 | Hema A Murthy 9 | Alastair Porter 10 | Ajay Srinivasamurthy 11 | Sankalp Gulati 12 | Gopala Krishna Koduri 13 | Kaustuv Kanti Ganguli 14 | Rohit MA 15 | Ranjani HG 16 | Venkatasubramanian Viraraghavan 17 | Jom Kuriakose 18 | Shrey Dutta 19 | Shubham Lohiya 20 | Swarada Bharadwaj 21 | 22 | Project Musical AI - PID2019-111403GB-I00/AEI/10.13039/501100011033 funded by the Spanish 23 | Ministerio de Ciencia, Innovación y Universidades (MCIU) and the Agencia Estatal de 24 | Investigación (AEI). 25 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | compIAM developers and contributors 2 | ------------------------------------ 3 | Genís Plaja-Roglans (genis.plaja@upf.edu) 4 | general architecture and design, tool integration, tests, documentation 5 | Thomas Nuttall (thomas.nuttall@upf.edu) 6 | general architecture and design, tool integration, tests, documentation -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include ACKNOWLEDGEMENTS 2 | include COPYING 3 | include AUTHORS 4 | recursive-include compiam/models * 5 | recursive-include compiam/conf * 6 | recursive-include compiam/visualisation/waveform_player/waveform-playlist * 7 | recursive-include compiam/utils/augment * 8 | recursive-include compiam/utils/NMFtoolbox * -------------------------------------------------------------------------------- /compiam/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import mirdata 3 | 4 | from importlib import import_module 5 | 6 | from compiam import melody, rhythm, structure, timbre 7 | from compiam.dunya import Corpora 8 | from compiam.data import models_dict, datasets_list, WORKDIR 9 | from compiam.exceptions import ModelNotDefinedError 10 | 11 | 12 | def load_model( 13 | model_name, data_home=None, models_dict=models_dict, version=None, **kwargs 14 | ): 15 | """Wrapper for loading pre-trained models. 16 | 17 | :param model_name: name of the model, extractors, or algorithm to load. 18 | :param data_home: path where the data lives. If None uses the default. 19 | :param models_dict: dict object including the available models. 20 | :param version: which version of the model to load. 21 | :returns: specific Class of the selected model. 22 | """ 23 | if not model_name in models_dict: 24 | raise ModelNotDefinedError( 25 | ( 26 | f"Model, {model_name} does not exist in compiam.data.models_dict, please follow " 27 | "instructions for adding new model to in ``data.py`` documentation" 28 | ) 29 | ) 30 | m_dict = models_dict[model_name] 31 | version_ = m_dict["default_version"] if not version else version 32 | if version_ not in m_dict["kwargs"]: 33 | raise ValueError( 34 | f""" 35 | Model {model_name} does not have a version {version_}. 36 | Available versions are: {list(m_dict['kwargs'].keys())}. 37 | """ 38 | ) 39 | model_kwargs = m_dict["kwargs"][version_] 40 | kwarg_paths = [x for x in list(model_kwargs.keys()) if "_path" in x] 41 | for kp in kwarg_paths: 42 | if isinstance(model_kwargs[kp], dict): 43 | for k in list(model_kwargs[kp].keys()): 44 | model_kwargs[kp][k] = ( 45 | os.path.join(data_home, model_kwargs[kp][k]) 46 | if data_home is not None 47 | else os.path.join(WORKDIR, model_kwargs[kp][k]) 48 | ) 49 | else: 50 | model_kwargs[kp] = ( 51 | os.path.join(data_home, model_kwargs[kp]) 52 | if data_home is not None 53 | else os.path.join(WORKDIR, model_kwargs[kp]) 54 | ) 55 | 56 | module = getattr(import_module(m_dict["module_name"]), m_dict["class_name"]) 57 | model_kwargs.update(kwargs) 58 | return module(**model_kwargs) 59 | 60 | 61 | def load_dataset(dataset_name, data_home=None, version="default"): 62 | """Alias function to load a mirdata Dataset class. 63 | 64 | :param dataset_name: the dataset's name, see mirdata.DATASETS for a 65 | complete list of possibilities. 66 | :param data_home: path where the data lives. If None uses the default 67 | home location. 68 | :param version: which version of the dataset to load. If None, the 69 | default version is loaded. 70 | :returns: a mirdata.core.Dataset object. 71 | """ 72 | if dataset_name not in datasets_list: 73 | raise ValueError("Invalid dataset {}".format(dataset_name)) 74 | dataloader = mirdata.initialize( 75 | dataset_name=dataset_name, data_home=data_home, version=version 76 | ) 77 | dataloader.download(["index"]) # Download index file 78 | return dataloader 79 | 80 | 81 | def load_corpora(tradition, token=None): 82 | """Wrapper to load access to the Dunya corpora. 83 | 84 | :param tradition: carnatic or hindustani. 85 | :param token: Dunya personal access token. 86 | :returns: a compiam.Corpora object. 87 | """ 88 | if token is None: 89 | raise ImportError( 90 | """Please initialize the Corpora introducing your Dunya API token as parameter. 91 | To get your token, first register to https://dunya.compmusic.upf.edu/ and then go to your user 92 | page by clicking at your username at te right top of the webpage. You will find the API token 93 | in the "API Access" section. Request restricted access if needed. Thanks. 94 | """ 95 | ) 96 | return Corpora(tradition=tradition, token=token) 97 | 98 | 99 | def list_models(): 100 | """Just listing the available models. 101 | 102 | :returns: a list of available models. 103 | """ 104 | return list(models_dict.keys()) 105 | 106 | 107 | def get_model_info(model_key): 108 | """Get complete info in data/models_dict for a particular pre-trained model 109 | 110 | :param model_key: model key from models_dict 111 | :returns: information about a particular model. 112 | """ 113 | if model_key not in list(models_dict.keys()): 114 | raise ValueError( 115 | "Please enter a valid model key from {}".format(list(models_dict.keys())) 116 | ) 117 | return models_dict[model_key] 118 | 119 | 120 | def list_datasets(): 121 | """Just listing the available datasets. 122 | 123 | :returns: a list of available datasets. 124 | """ 125 | return datasets_list 126 | -------------------------------------------------------------------------------- /compiam/annotations/README.md: -------------------------------------------------------------------------------- 1 | ## Sañcāra Annotations 2 | 3 | Annotations documentation 4 | 5 | | mbid | Performance | Filename | Raga | 6 | |--------------------------------------|---------------------------------------------|---------------------------------|-----------| 7 | | b4ede827-1d97-44a9-8523-794758c362e5 | Salem Guyatri Venkatesan - Sharanu Janakana | `12_Sharanu_Janakana.txt` | Bilahari | 8 | | 5fa0bcfd-c71e-4d6f-940e-0cef6fbc2a32 | Akkarai Sisters - Koti Janmani | `47_Koti_Janmani.txt` | Rītigauḷa | 9 | | 8fed9a77-2f4c-4a26-b0f6-6919fd469e2b | Modhumudi Sudhakar - Sundari Nee Divya | `143_Sundari_Nee_Divya.txt` | Kalyāṇi | 10 | | 3845625a-e186-44c3-b8f9-b47c1ca155bb | Sumitra Vasudev - Vanajaksha Ninne Kori | `188_Vanajaksha_Ninne_Kori.txt` | Rītigauḷa | 11 | 12 | 13 | Ground-truth annotations of all sañcāras (musical phrases) in the audio recordings were created by a professional Carnatic vocalist, based in South India, with 21 years of performance experience. The annotations are written in the Indian music notation system, known as sargam, and were created using the software ELAN [1]. 14 | 15 | Sañcāras can be defined as coherent segments of melodic movement that follow the grammar of a particular rāga (melodic framework). There exists no definitive lists of all possible sañcāras in each rāga, rather the body of existing compositions and the living oral tradition of rāga performance act as repositories for that knowledge. Therefore, the segmentations are based on the annotator’s experience as a professional performer and student of a highly esteemed musical lineage. These annotations are subjective to some degree, but have the benefit of being based on expert performer knowledge rather than on an externally imposed metric that may be irrelevant to musical concepts held by culture bearers. 16 | 17 | In kritis, one of the most popular compositional formats, initial phrases are repeated with variations, know as saṇgatis. This means that there will be initial sañcāras, versions of which appear again later but with some variations in the melodic line. These musical connections between related but not identical melodic patterns are captured in these annotations by grouping the related musical material together with an "underlying sañcāra" annotation, which refers to the first occurrence and typically simplest version of the phrase. To reflect the hierarchical nature of plausible musical segmentations, longer phrase-level annotations that can comprise several shorter sañcāras are also created, and named as “full-phrases”. 18 | 19 | [1] Lausberg, H., & Sloetjes, H. (2016). The revised NEUROGES–ELAN system: An objective and reliable interdisciplinary analysis tool for nonverbal behavior and gesture. Behavior Research Methods, 48(3), 973–993. -------------------------------------------------------------------------------- /compiam/conf/raga/carnatic.yaml: -------------------------------------------------------------------------------- 1 | anandabhairavi: 2 | arohana: ['S', 'G2', 'R2', 'G2', 'M1', 'P', 'D2', 'P', 'N2', 'S'] 3 | avorohana: ['S', 'N2', 'D2', 'P', 'M1', 'G2', 'R2', 'S'] 4 | 5 | begada: 6 | arohana: ['S', 'G3', 'R2', 'G3', 'M1', 'P', 'D2', 'N2', 'D2', 'P', 'S'] 7 | avorohana: ['S', 'N3', 'D2', 'P', 'M1', 'G3', 'R2', 'S'] 8 | 9 | bhairavi: 10 | arohana: ['S', 'R2', 'G2', 'M1', 'P', 'D2', 'N2', 'S'] 11 | avorohana: ['S', 'N2', 'D1', 'P', 'M1', 'G2', 'R2', 'S'] 12 | 13 | bilahari: 14 | arohana: ['S', 'R2', 'G3', 'P', 'D2', 'S'] 15 | avorohana: ['S', 'N3', 'D2', 'P', 'M1', 'G3', 'R2', 'S'] 16 | 17 | kalyani: 18 | arohana: ['S', 'R2', 'G3', 'M2', 'P', 'D2', 'N3', 'S'] 19 | avorohana: ['S', 'N3', 'D2', 'P', 'M2', 'G3', 'R2', 'S'] 20 | 21 | ritigaula: 22 | arohana: ['S', 'G2', 'R2', 'G2', 'M1', 'N2', 'D2', 'M1', 'N2', 'N2', 'S'] 23 | avorohana: ['S', 'N2', 'D2', 'M1', 'G2', 'M1', 'P', 'M1', 'G2', 'R2', 'S'] 24 | 25 | sankarabharanam: 26 | arohana: ['S', 'R2', 'G3', 'M1', 'P', 'D2', 'N3', 'S'] 27 | avorohana: ['S', 'N3', 'D2', 'P', 'M1', 'G3', 'R2', 'S'] 28 | 29 | todi: 30 | arohana: ['S', 'R1', 'G2', 'M1', 'P', 'D1', 'N2', 'S'] 31 | avorohana: ['S', 'N2', 'D1', 'P', 'M1', 'G2', 'R1', 'S'] 32 | 33 | varaali: 34 | arohana: ['S', 'G1', 'R1', 'G1', 'M2', 'P', 'D1', 'N3', 'S'] 35 | avorohana: ['S', 'N3', 'D1', 'P', 'M2', 'G1', 'R1', 'S'] 36 | 37 | kamas: 38 | arohana: ['S', 'M1', 'G3', 'M1', 'P', 'D2', 'N2', 'S'] 39 | avorohana: ['S', 'N2', 'D2', 'P', 'M1', 'G3', 'R2', 'S'] 40 | 41 | saurastram: 42 | arohana: ['S', 'R1', 'G3', 'M1', 'P', 'M1', 'D2', 'N3', 'S'] 43 | avorohana: ['S', 'N3', 'D2', 'N2', 'D2', 'P', 'M1', 'G3', 'R1', 'S'] 44 | 45 | behag: 46 | arohana: ['S', 'G3', 'M1', 'P', 'N3', 'D2', 'N3', 'S'] 47 | avorohana: ['S', 'N3', 'D2', 'P', 'M2', 'G3', 'M1', 'G3', 'R2', 'S'] 48 | 49 | surati: 50 | arohana: ['S', 'R2', 'M1', 'P', 'N2', 'S'] 51 | avorohana: ['S', 'N2', 'D2', 'P', 'M1', 'G3', 'P', 'M1', 'R2', 'S'] 52 | 53 | sanmukhapriya: 54 | arohana: ['S', 'R2', 'G2', 'M2', 'P', 'D1', 'N2', 'S'] 55 | avorohana: ['S', 'N2', 'D1', 'P', 'M2', 'G2', 'R2', 'S'] 56 | 57 | mohanam: 58 | arohana: ['S', 'R2', 'G3', 'P', 'D2', 'S'] 59 | avorohana: ['S', 'D2', 'P', 'G3', 'R2', 'S'] 60 | 61 | kamavardani: 62 | arohana: ['S', 'R1', 'G3', 'M2', 'P', 'D1', 'N3', 'S'] 63 | avorohana: ['S', 'N3', 'D1', 'P', 'M2', 'G3', 'R1', 'S'] 64 | -------------------------------------------------------------------------------- /compiam/conf/raga/svara_cents.yaml: -------------------------------------------------------------------------------- 1 | -2400: ['S'] 2 | -2300: ['R1'] 3 | -2200: ['R2', 'G1'] 4 | -2100: ['R3', 'G2'] 5 | -2000: ['G3'] 6 | -1900: ['M1'] 7 | -1800: ['M2'] 8 | -1700: ['P'] 9 | -1600: ['D1'] 10 | -1500: ['D2', 'N1'] 11 | -1400: ['D3', 'N2'] 12 | -1300: ['N3'] 13 | 14 | -1200: ['S'] 15 | -1100: ['R1'] 16 | -1000: ['R2', 'G1'] 17 | -900: ['R3', 'G2'] 18 | -800: ['G3'] 19 | -700: ['M1'] 20 | -600: ['M2'] 21 | -500: ['P'] 22 | -400: ['D1'] 23 | -300: ['D2', 'N1'] 24 | -200: ['D3', 'N2'] 25 | -100: ['N3'] 26 | 27 | 0: ['S'] 28 | 100: ['R1'] 29 | 200: ['R2', 'G1'] 30 | 300: ['R3', 'G2'] 31 | 400: ['G3'] 32 | 500: ['M1'] 33 | 600: ['M2'] 34 | 700: ['P'] 35 | 800: ['D1'] 36 | 900: ['D2', 'N1'] 37 | 1000: ['D3', 'N2'] 38 | 1100: ['N3'] 39 | 40 | 1200: ['S'] 41 | 1300: ['R1'] 42 | 1400: ['R2', 'G1'] 43 | 1500: ['R3', 'G2'] 44 | 1600: ['G3'] 45 | 1700: ['M1'] 46 | 1800: ['M2'] 47 | 1900: ['P'] 48 | 2000: ['D1'] 49 | 2100: ['D2', 'N1'] 50 | 2200: ['D3', 'N2'] 51 | 2300: ['N3'] 52 | 53 | 2400: ['S'] 54 | 2500: ['R1'] 55 | 2600: ['R2', 'G1'] 56 | 2700: ['R3', 'G2'] 57 | 2800: ['G3'] 58 | 2900: ['M1'] 59 | 3000: ['M2'] 60 | 3100: ['P'] 61 | 3200: ['D1'] 62 | 3300: ['D2', 'N1'] 63 | 3400: ['D3', 'N2'] 64 | 3500: ['N3'] -------------------------------------------------------------------------------- /compiam/dunya/README.md: -------------------------------------------------------------------------------- 1 | # Accessing the Dunya corpora 2 | 3 | Built within the framework of the [CompMusic project](https://compmusic.upf.edu/), the Dunya corpora cover several World music traditions and can be accessed through different ways. In compIAM we provide tools to easily access to the *two* Indian Art Music corpora in Dunya: one for Carnatic music, and one for the Hindustani repertoire. 4 | 5 | When initializing the corpora access through compIAM you will be asked to provide an *access token*. To get the *access token* you need to register to the [Dunya web portal](https://dunya.compmusic.upf.edu/). Once your registration is validation by the CompMusic team, you can access your user account information where an access token is provided. Use this token to initialize the ``compiam.Corpora`` objects and access the corpora. -------------------------------------------------------------------------------- /compiam/dunya/__init__.py: -------------------------------------------------------------------------------- 1 | from compiam.io import ( 2 | write_csv, 3 | write_json, 4 | write_scalar_txt, 5 | ) 6 | from compmusic import dunya # Using compmusic API to access the Dunya database 7 | from compmusic.dunya import carnatic, hindustani 8 | 9 | from compiam.utils import get_logger 10 | 11 | logger = get_logger(__name__) 12 | 13 | 14 | class Corpora: 15 | """Dunya corpora class with access functions""" 16 | 17 | def __init__(self, tradition, token): 18 | """Dunya corpora class init method. 19 | 20 | :param tradition: the name of the tradition. 21 | :param token: Dunya personal token to access te database. 22 | """ 23 | # Load and set token 24 | self.token = token 25 | dunya.set_token(self.token) 26 | 27 | if tradition not in ["carnatic", "hindustani"]: 28 | raise ValueError( 29 | "Please choose a valid tradition: carnatic or hindustani" 30 | ) 31 | self.tradition = carnatic if tradition == "carnatic" else hindustani 32 | 33 | # Functions from the compmusic API are added as a method in the Corpora class 34 | for name in dir(self.tradition): 35 | func = getattr(self.tradition, name) 36 | if callable(func): 37 | setattr(self, name, func) 38 | 39 | logger.warning(""" 40 | Note that a part of the collection is under restricted access. 41 | To access the full collection please request permission at https://dunya.compmusic.upf.edu/user/profile/ 42 | """) 43 | 44 | def get_collection(self, recording_detail=False): 45 | """Get the documents (recordings) in a collection. 46 | 47 | :param recording_detail: Include additional information about each recording. 48 | :returns: dictionary with the recordings in the collection. 49 | """ 50 | if recording_detail is False: 51 | logger.info( 52 | "To parse the entire collection with all recording details, " 53 | + "please use the .get_collection(recording_detail=True) method. " 54 | + "Please note that it might take a few moments..." 55 | ) 56 | return self.tradition.get_recordings(recording_detail) 57 | 58 | @staticmethod 59 | def list_available_types(recording_id): 60 | """Get the available source filetypes for a Musicbrainz recording. 61 | :param recording_id: Musicbrainz recording ID. 62 | :returns: a list of filetypes in the database for this recording. 63 | """ 64 | document = dunya.conn._dunya_query_json("document/by-id/%s" % recording_id) 65 | return { 66 | x: list(document["derivedfiles"][x].keys()) 67 | for x in list(document["derivedfiles"].keys()) 68 | } 69 | 70 | @staticmethod 71 | def get_annotation(recording_id, thetype, subtype=None, part=None, version=None): 72 | """Alias function of _file_for_document in the Corpora class. 73 | :param recording_id: Musicbrainz recording ID. 74 | :param thetype: the computed filetype. 75 | :param subtype: a subtype if the module has one. 76 | :param part: the file part if the module has one. 77 | :param version: a specific version, otherwise the most recent one will be used. 78 | :returns: The contents of the most recent version of the derived file. 79 | """ 80 | return dunya.file_for_document( 81 | recording_id, thetype, subtype=subtype, part=part, version=version 82 | ) 83 | 84 | @staticmethod 85 | def save_annotation( 86 | recording_id, thetype, location, subtype=None, part=None, version=None 87 | ): 88 | """A version of get_annotation that writes the parsed data into a file. 89 | :param recording_id: Musicbrainz recording ID. 90 | :param thetype: the computed filetype. 91 | :param subtype: a subtype if the module has one. 92 | :param part: the file part if the module has one. 93 | :param version: a specific version, otherwise the most recent one will be used. 94 | :returns: None (a file containing the parsed data is written). 95 | """ 96 | data = dunya.file_for_document( 97 | recording_id, thetype, subtype=subtype, part=part, version=version 98 | ) 99 | if ("tonic" in subtype) or ("aksharaPeriod" in subtype): 100 | write_scalar_txt(data, location) 101 | elif "section" in subtype: 102 | write_json(data, location) 103 | elif "APcurve" in subtype: 104 | write_csv(data, location) 105 | elif ("pitch" in subtype) or ("aksharaTicks" in subtype): 106 | write_csv(data, location) 107 | else: 108 | raise ValueError( 109 | "No writing method available for data type: {} and {}", thetype, subtype 110 | ) 111 | -------------------------------------------------------------------------------- /compiam/exceptions.py: -------------------------------------------------------------------------------- 1 | class ModelNotFoundError(Exception): 2 | pass 3 | 4 | 5 | class ModelNotDefinedError(Exception): 6 | pass 7 | 8 | 9 | class ModelNotTrainedError(Exception): 10 | pass 11 | 12 | 13 | class DatasetNotLoadedError(Exception): 14 | pass 15 | 16 | 17 | class HTTPError(Exception): 18 | pass 19 | 20 | 21 | class ConnectionError(Exception): 22 | pass 23 | -------------------------------------------------------------------------------- /compiam/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import json 4 | import yaml 5 | import pickle 6 | import importlib 7 | 8 | import numpy as np 9 | 10 | from compiam import utils 11 | 12 | 13 | def write_csv(data, out_path, header=None): 14 | """Writing multi-dimensional data into a file (.csv) 15 | 16 | :param data: the data to write 17 | :param output_path: the path where the data is going to be stored 18 | 19 | :returns: None 20 | """ 21 | data = np.array(data) 22 | with open(out_path, "w") as f: 23 | writer = csv.writer(f, delimiter=",") 24 | if header: 25 | if len(header) != len(data[0, :]): 26 | raise ValueError("Header and row length mismatch") 27 | writer.writerow(header) 28 | writer.writerows(data) 29 | 30 | 31 | def read_csv(file_path): 32 | """Reading a csv file (.csv) 33 | 34 | :param file_path: path to the csv 35 | 36 | :returns: numpy array containing the data from the read CSV 37 | """ 38 | output = np.genfromtxt(file_path, delimiter=",") 39 | return output[~np.isnan(output)] 40 | 41 | 42 | def save_object(obj, file_path): 43 | """Saves object to pickle file 44 | 45 | :param obj: an object to save to pickle file 46 | :param file_path: path to save the object 47 | """ 48 | with open(file_path, "wb") as outp: # Overwrites any existing file. 49 | pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL) 50 | 51 | 52 | def write_json(j, path): 53 | """Write json, , to 54 | 55 | :param j: json 56 | :type path: json 57 | :param path: path to write to, 58 | if the directory doesn't exist, one will be created 59 | :type path: str 60 | """ 61 | utils.create_if_not_exists(path) 62 | # Opening JSON file 63 | with open(path, "w") as f: 64 | json.dump(j, f) 65 | 66 | 67 | def write_json(sections, output_path): 68 | """Writing json-based data into a file (.json) 69 | 70 | :param data: the data to write 71 | :param output_path: the path where the data is going to be stored 72 | 73 | :returns: None 74 | """ 75 | output_path = output_path + ".json" 76 | with open(output_path, "w") as fhandle: 77 | json.dump(sections, fhandle) 78 | fhandle.close() 79 | 80 | 81 | def write_scalar_txt(data, output_path): 82 | """Writing scalar data into a file (.txt) 83 | 84 | :param data: the data to write 85 | :param output_path: the path where the data is going to be stored 86 | 87 | :returns: None 88 | """ 89 | output_path = output_path + ".txt" 90 | with open(output_path, "w") as f: 91 | f.write("{}".format(data)) 92 | f.close() 93 | 94 | 95 | def resolve_dottedname(dotted_name): 96 | """Resolve a dotted name to an actual object, similar to zope.dottedname.resolve 97 | 98 | :param dotted_name: a dotted name 99 | :returns: the object the dotted name refers to 100 | """ 101 | module_name, _, attribute_name = dotted_name.rpartition('.') 102 | if not module_name: 103 | raise ImportError(f"Invalid dotted name: '{dotted_name}'") 104 | module = importlib.import_module(module_name) 105 | return getattr(module, attribute_name) 106 | 107 | 108 | def load_yaml(path): 109 | """Load yaml at to dictionary, d 110 | 111 | :param path: input file 112 | :returns: loaded yaml information 113 | """ 114 | def constructor_dottedname(loader, node): 115 | value = loader.construct_scalar(node) 116 | return resolve_dottedname(value) 117 | 118 | yaml.add_constructor("!dottedname", constructor_dottedname) 119 | 120 | if not os.path.isfile(path): 121 | return None 122 | with open(path) as f: 123 | d = yaml.load(f, Loader=yaml.FullLoader) 124 | return d 125 | -------------------------------------------------------------------------------- /compiam/melody/README.md: -------------------------------------------------------------------------------- 1 | # Melodic analysis tools 2 | 3 | | **Tool** | **Task** | **Paper** | 4 | |---------------------------|----------------------------------|-----------| 5 | | Melodia | Predominant pitch extraction | [1] | 6 | | FTANet-Carnatic | Vocal pitch extraction | [2] | 7 | | FTAResNet-Carnatic-Violin | Violin pitch extraction | [3] | 8 | | TonicIndianArtMusic | Tonic idendification | [4] | 9 | | CAE + Matrix profile | Melodic pattern discovery | [5] | 10 | | DEEPSRGM | DL-based raga recognition | [6] | 11 | 12 | 13 | [1] J. Salamon and E. Gómez, "Melody extraction from polyphonic music signals using pitch contour characteristics", IEEE Transactions on Audio, Speech, and Language Processing, vol. 20, no. 6, pp. 1759–1770, 2012. 14 | 15 | [2, 3] G. Plaja-Roglans, T. Nuttall, L. Pearson, X. Serra, M. Miron, "Repertoire-Specific Vocal Pitch Data Generation for Improved Melodic Analysis of Carnatic Music", Transactions of the International Society for Music Information Retrieval, vol. 6, no. 1, pp. 13–26, 2023. 16 | 17 | [4] J. Salamon, S. Gulati, and X. Serra, "A Multipitch Approach to Tonic Identification in Indian Classical Music", in International Society for Music Information Retrieval Conference (ISMIR 12), 2012. 18 | 19 | [5] T. Nuttall, G. Plaja-Roglans, L. Pearson, and X. Serra, "In search of sañcaras: tradition-informed repeated melodic pattern recognition in Carnatic Music", in International Society for Music Information Retrieval Conference (ISMIR 22), 2022. 20 | 21 | [6] S. Madhusudhan and G. Chowdhary, "DeepSRGM - Sequence Classification and Ranking in Indian Classical Music Via Deep Learning", in International Society for Music Information Retrieval Conference (ISMIR 20), 2020. -------------------------------------------------------------------------------- /compiam/melody/__init__.py: -------------------------------------------------------------------------------- 1 | ### IMPORT HERE FUNCTIONALITIES 2 | import inspect, importlib as implib 3 | from compiam.data import models_dict 4 | 5 | TO_AVOID = [ 6 | x[0] 7 | for x in inspect.getmembers( 8 | implib.import_module("compiam.melody"), inspect.ismodule 9 | ) 10 | ] 11 | 12 | 13 | ### IMPORT HERE THE CONSIDERED TASKS 14 | from compiam.melody import tonic_identification 15 | from compiam.melody import pitch_extraction 16 | from compiam.melody import raga_recognition 17 | from compiam.melody import pattern 18 | 19 | 20 | # Show user the available tasks 21 | def list_tasks(): 22 | return [ 23 | x[0] 24 | for x in inspect.getmembers( 25 | implib.import_module("compiam.melody"), inspect.ismodule 26 | ) 27 | if x[0] not in TO_AVOID 28 | ] 29 | 30 | 31 | # Show user the available tools 32 | def list_tools(): 33 | tasks = [ 34 | x[0] 35 | for x in inspect.getmembers( 36 | implib.import_module("compiam.melody"), inspect.ismodule 37 | ) 38 | if x[0] not in TO_AVOID 39 | ] 40 | tools_for_tasks = [ 41 | inspect.getmembers( 42 | implib.import_module("compiam.melody." + task), inspect.isclass 43 | ) 44 | for task in tasks 45 | ] 46 | tools_for_tasks = [ 47 | tool[1].__module__.split(".")[-2] + "." + tool[0] 48 | for tool_list in tools_for_tasks 49 | for tool in tool_list 50 | ] # Get task.tool 51 | pre_trained_models = [ 52 | x["class_name"] for x in list(models_dict.values()) 53 | ] # Get list of pre-trained_models 54 | return [ 55 | tool + "*" if tool.split(".")[1] in pre_trained_models else tool 56 | for tool in tools_for_tasks 57 | ] 58 | -------------------------------------------------------------------------------- /compiam/melody/pattern/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | from compiam.melody.pattern.sancara_search import CAEWrapper 7 | from compiam.melody.pattern.sancara_search.extraction.self_sim import ( 8 | self_similarity, 9 | segmentExtractor, 10 | ) 11 | from compiam.melody.pattern.sancara_search.extraction.evaluation import ( 12 | load_annotations, 13 | to_aeneas, 14 | evaluate, 15 | ) 16 | 17 | 18 | # Show user the available tools 19 | def list_tools(): 20 | pre_trained_models = [ 21 | x["class_name"] for x in list(models_dict.values()) 22 | ] # Get list of pre-trained_models 23 | return [ 24 | tool + "*" if tool in pre_trained_models else tool 25 | for tool in get_tool_list(modules=sys.modules[__name__]) 26 | ] 27 | -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/complex_auto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/compiam/melody/pattern/sancara_search/complex_auto/__init__.py -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/complex_auto/complex.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on July 05, 2019 3 | 4 | @author: Stefan Lattner 5 | 6 | Sony CSL Paris, France 7 | 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | 15 | class Complex(nn.Module): 16 | """CAE model.""" 17 | 18 | def __init__(self, n_in, n_out, dropout=0.5, learn_norm=False): 19 | super(Complex, self).__init__() 20 | 21 | self.layer = nn.Linear(n_in, n_out * 2, bias=False) 22 | 23 | self.drop = nn.Dropout(dropout) 24 | self.learn_norm = learn_norm 25 | self.n_out = n_out 26 | self.norm_val = nn.Parameter(torch.Tensor([0.43])) # any start value 27 | 28 | def drop_gauss(self, x): 29 | return torch.normal(mean=x, std=0.5) 30 | 31 | def forward(self, x): 32 | out = torch.matmul( 33 | self.drop(x), self.set_to_norm_graph(self.norm_val).transpose(0, 1) 34 | ) 35 | real = out[:, : self.n_out] 36 | imag = out[:, self.n_out :] 37 | amplitudes = (real**2 + imag**2) ** 0.5 38 | phases = torch.atan2(real, imag) 39 | return amplitudes, phases 40 | 41 | def backward(self, amplitudes, phases): 42 | real = torch.sin(phases) * amplitudes 43 | imag = torch.cos(phases) * amplitudes 44 | cat_ = torch.cat((real, imag), dim=1) 45 | recon = torch.matmul(cat_, self.set_to_norm_graph(self.norm_val)) 46 | return recon 47 | 48 | def set_to_norm(self, val): 49 | """ 50 | Sets the norms of all convolutional kernels of the C-GAE to a specific 51 | value. 52 | 53 | :param val: norms of kernels are set to this value 54 | """ 55 | if val == -1: 56 | val = self.norm_val 57 | shape_x = self.layer.weight.size() 58 | conv_x_reshape = self.layer.weight.view(shape_x[0], -1) 59 | norms_x = ((conv_x_reshape**2).sum(1) ** 0.5).view(-1, 1) 60 | conv_x_reshape = conv_x_reshape / norms_x 61 | weight_x_new = (conv_x_reshape.view(*shape_x) * val).clone() 62 | self.layer.weight.data = weight_x_new 63 | 64 | def set_to_norm_graph(self, val): 65 | if not self.learn_norm: 66 | return self.layer.weight 67 | """ 68 | Sets the norms of all convolutional kernels of the C-GAE to a learned 69 | value. 70 | 71 | :param val: norms of kernels are set to this value 72 | """ 73 | if val == -1: 74 | val = self.norm_val 75 | shape_x = self.layer.weight.size() 76 | conv_x_reshape = self.layer.weight.view(shape_x[0], -1) 77 | norms_x = ((conv_x_reshape**2).sum(1) ** 0.5).view(-1, 1) 78 | conv_x_reshape = conv_x_reshape / norms_x 79 | weight_x_new = (conv_x_reshape.view(*shape_x) * val).clone() 80 | return weight_x_new 81 | -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/complex_auto/cqt.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on July 05, 2019 3 | 4 | @author: Stefan Lattner 5 | 6 | Sony CSL Paris, France 7 | 8 | """ 9 | 10 | import os 11 | import librosa 12 | import numpy as np 13 | 14 | from functools import partial 15 | from multiprocessing.pool import Pool 16 | 17 | from compiam.melody.pattern.sancara_search.complex_auto.util import ( 18 | normalize, 19 | load_pyc_bz, 20 | save_pyc_bz, 21 | ) 22 | from compiam.utils import get_logger 23 | 24 | logger = get_logger(__name__) 25 | 26 | 27 | def standardize(x, axis=-1): 28 | """ 29 | Performs contrast normalization (zero mean, unit variance) 30 | along the given axis. 31 | 32 | :param x: array to normalize 33 | :param axis: normalize along that axis 34 | :return: contrast-normalized array 35 | """ 36 | stds_avg = np.std(x, axis=axis, keepdims=True) 37 | x -= np.mean(x, axis=axis, keepdims=True) 38 | x /= stds_avg + 1e-8 39 | return x 40 | 41 | 42 | def load_audio(fn, sr=-1): 43 | file = fn 44 | logger.info(f"loading file {file}") 45 | audio, fs = librosa.load(fn, sr=sr) 46 | return audio, fs 47 | 48 | 49 | def to_mono(signal): 50 | if len(signal.shape) == 1: 51 | return signal 52 | return signal[:, 0] / 2 + signal[:, 1] / 2 53 | 54 | 55 | def get_signal(fn, use_nr_samples=-1, rand_midpoint=False, sr=-1): 56 | audio, fs = load_audio(fn, sr) 57 | audio = to_mono(audio) 58 | if use_nr_samples > 0 and use_nr_samples < len(audio): 59 | if rand_midpoint: 60 | mid_point = np.random.randint( 61 | use_nr_samples // 2, len(audio) - 1 - use_nr_samples // 2 62 | ) 63 | else: 64 | mid_point = len(audio) // 2 65 | audio_snippet = audio[ 66 | mid_point - use_nr_samples // 2 : mid_point + use_nr_samples // 2 67 | ] 68 | else: 69 | audio_snippet = audio 70 | 71 | return audio_snippet, fs 72 | 73 | 74 | def to_cqt_repr( 75 | fn, 76 | n_bins, 77 | bins_per_oct, 78 | fmin, 79 | hop_length, 80 | use_nr_samples, 81 | rand_midpoint=False, 82 | standard=False, 83 | normal=False, 84 | mult=1.0, 85 | sr=-1, 86 | ): 87 | audio, sr = get_signal(fn, use_nr_samples, rand_midpoint, sr=sr) 88 | 89 | cqt = librosa.cqt( 90 | audio, 91 | sr=sr, 92 | n_bins=n_bins, 93 | bins_per_octave=bins_per_oct, 94 | fmin=fmin, 95 | hop_length=hop_length, 96 | ) 97 | mag = librosa.magphase(cqt)[0] 98 | 99 | if standard: 100 | mag = standardize(mag, axis=0) 101 | 102 | if normal: 103 | mag = normalize(mag) 104 | 105 | return mag * mult 106 | 107 | 108 | def get_cqts( 109 | files, 110 | cache_key="train", 111 | rebuild=False, 112 | use_nr_samples=-1, 113 | processes=10, 114 | sr=-1, 115 | args=None, 116 | ): 117 | assert args is not None, "args are needed." 118 | cache_fn = os.path.join(args.cache_dir, "hist_cache_" + cache_key + ".pyc.bz") 119 | cqts = [] 120 | if not os.path.isfile(cache_fn) or rebuild: 121 | if processes > 1: 122 | calc_cqt_f = partial( 123 | to_cqt_repr, 124 | n_bins=args.n_bins, 125 | bins_per_oct=args.bins_per_oct, 126 | fmin=args.fmin, 127 | hop_length=args.hop_length, 128 | use_nr_samples=use_nr_samples, 129 | rand_midpoint=False, 130 | standard=True, 131 | sr=sr, 132 | mult=10, 133 | ) 134 | 135 | with Pool(processes=processes) as pool: 136 | cqts = pool.map(calc_cqt_f, files) 137 | 138 | save_pyc_bz(cqts, cache_fn) 139 | else: 140 | cqts = load_pyc_bz(cache_fn) 141 | 142 | return cqts 143 | -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/complex_auto/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on April 13, 2018 3 | Edited on July 05, 2019 4 | 5 | @author: Gaetan Hadjeres & Stefan Lattner 6 | 7 | Sony CSL Paris, France 8 | """ 9 | 10 | import os 11 | import bz2 12 | 13 | import soundfile 14 | import torch 15 | import pickle 16 | import logging 17 | 18 | import numpy as np 19 | 20 | from pickle import UnpicklingError 21 | from torch.autograd import Variable 22 | 23 | LOGGER = logging.getLogger(__name__) 24 | 25 | 26 | def normalize(x): 27 | x -= np.min(x) 28 | x /= np.max(x) + 1e-8 29 | return x 30 | 31 | 32 | def read_file(fn): 33 | with open(fn, "r") as f: 34 | lines = f.readlines() 35 | for i, line in enumerate(lines): 36 | lines[i] = line.strip() 37 | 38 | return lines 39 | 40 | 41 | def check_audio_files(filelist): 42 | for file in filelist: 43 | assert os.path.exists(file), f"File does not exist: {file}" 44 | assert os.path.isfile(file), f"Not a file: {file}" 45 | 46 | 47 | def prepare_audio_inputs(input_files): 48 | input_files = read_file(input_files) 49 | check_audio_files(input_files) 50 | return input_files 51 | 52 | 53 | def cuda_tensor(data, device): 54 | return torch.FloatTensor(data).to(device) 55 | 56 | 57 | def cuda_variable(tensor, volatile=False, device="cpu"): 58 | try: 59 | return Variable(tensor.to(device), volatile=volatile) 60 | except TypeError: 61 | return Variable(torch.Tensor(tensor).to(device), volatile=volatile) 62 | 63 | 64 | def to_numpy(variable): 65 | if type(variable) == np.ndarray: 66 | return variable 67 | try: 68 | if torch.cuda.is_available(): 69 | return variable.data.cpu().numpy() 70 | else: 71 | return variable.data.numpy() 72 | except: 73 | try: 74 | return variable.numpy() 75 | except: 76 | LOGGER.warning("Could not 'to_numpy' variable of type " f"{type(variable)}") 77 | return variable 78 | 79 | 80 | def save_pyc_bz(data, fn): 81 | """ 82 | Saves data to file (bz2 compressed) 83 | 84 | :param data: data to save 85 | :param fn: file name of dumped data 86 | """ 87 | pickle.dump(data, bz2.BZ2File(fn, "w")) 88 | 89 | 90 | def load_pyc_bz(fn): 91 | """ 92 | Loads data from file (bz2 compressed) 93 | 94 | :param fn: file name of dumped data 95 | :return: loaded data 96 | """ 97 | try: 98 | return pickle.load(bz2.BZ2File(fn, "r"), encoding="latin1") 99 | except EOFError: 100 | return pickle.load(bz2.BZ2File(fn, "r")) 101 | 102 | 103 | def cached(cache_fn, func, args=(), kwargs={}, refresh_cache=False, logger=None): 104 | """ 105 | If `cache_fn` exists, return the unpickled contents of that file 106 | (the cache file is treated as a bzipped pickle file). If this 107 | fails, compute `func`(*`args`), pickle the result to `cache_fn`, 108 | and return the result. 109 | 110 | Parameters 111 | ---------- 112 | 113 | func : function 114 | function to compute 115 | 116 | args : tuple 117 | argument for which to evaluate `func` 118 | 119 | cache_fn : str 120 | file name to load the computed value `func`(*`args`) from 121 | 122 | refresh_cache : boolean 123 | if True, ignore the cache file, compute function, and store the result 124 | in the cache file 125 | 126 | Returns 127 | ------- 128 | 129 | object 130 | 131 | the result of `func`(*`args`) 132 | 133 | """ 134 | if logger == None: 135 | LOGGER = logging.getLogger(__name__) 136 | else: 137 | LOGGER = logger 138 | 139 | result = None 140 | if cache_fn is not None and os.path.exists(cache_fn): 141 | if refresh_cache: 142 | os.remove(cache_fn) 143 | else: 144 | try: 145 | LOGGER.info(f"Loading cache file {cache_fn}...") 146 | result = load_pyc_bz(cache_fn) 147 | except UnpicklingError as e: 148 | LOGGER.error( 149 | ( 150 | "The file {0} exists, but cannot be unpickled." 151 | "Is it readable? Is this a pickle file? Try " 152 | "with numpy.." 153 | "" 154 | ).format(cache_fn) 155 | ) 156 | try: 157 | result = np.load(cache_fn) 158 | except Exception as g: 159 | LOGGER.error("Did not work, either.") 160 | raise e 161 | 162 | if result is None: 163 | result = func(*args, **kwargs) 164 | if cache_fn is not None: 165 | try: 166 | save_pyc_bz(result, cache_fn) 167 | except Exception as e: 168 | LOGGER.error("Could not save, try with numpy..") 169 | try: 170 | np.save(cache_fn, result) 171 | except Exception as g: 172 | LOGGER.error("Did not work, either.") 173 | raise e 174 | return result 175 | -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/extraction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/compiam/melody/pattern/sancara_search/extraction/__init__.py -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/extraction/img.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import cv2 4 | 5 | from scipy import signal 6 | from scipy import misc 7 | from scipy.ndimage import binary_opening 8 | 9 | from scipy.ndimage import gaussian_filter 10 | 11 | import matplotlib.pyplot as plt 12 | from matplotlib import cm 13 | 14 | scharr = np.array( 15 | [ 16 | [-3 - 3j, 0 - 10j, +3 - 3j], 17 | [-10 + 0j, 0 + 0j, +10 + 0j], 18 | [-3 + 3j, 0 + 10j, +3 + 3j], 19 | ] 20 | ) # Gx + j*Gy 21 | 22 | sobel = np.asarray([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) 23 | 24 | sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=float) 25 | 26 | sobel_y = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], dtype=float) 27 | 28 | 29 | def remove_diagonal(X): 30 | X_ = X.copy() 31 | n = X.shape[0] 32 | 33 | for i in range(-30, 30): 34 | x = range(n) 35 | y = [x_ + i for x_ in x] 36 | 37 | if i != 0: 38 | x = x[abs(i) : -abs(i)] 39 | y = y[abs(i) : -abs(i)] 40 | X_[x, y] = 0 41 | return X_ 42 | 43 | 44 | def convolve_array(X, cfilter=scharr): 45 | grad = signal.convolve2d(X, cfilter, boundary="symm", mode="same") 46 | X_conv = np.absolute(grad) 47 | return X_conv 48 | 49 | 50 | def convolve_array_tile(X, cfilter=sobel, divisor=49): 51 | """ 52 | Iteratively convolve equal sized tiles in X, rejoining for fast convolution of the whole 53 | """ 54 | x_height, x_width = X.shape 55 | 56 | assert x_height == x_width, "convolve_array expects square matrix" 57 | 58 | # Find even split for array 59 | divisor = divisor 60 | tile_height = None 61 | while (not tile_height) or (int(tile_height) != tile_height): 62 | # iterate divisor until whole number is found 63 | divisor += 1 64 | tile_height = x_height / divisor 65 | 66 | tile_height = int(tile_height) 67 | 68 | # Get list of tiles 69 | tiled_array = ( 70 | X.reshape(divisor, tile_height, -1, tile_height) 71 | .swapaxes(1, 2) 72 | .reshape(-1, tile_height, tile_height) 73 | ) 74 | 75 | # Convolve tiles iteratively 76 | tiled_array_conv = np.array( 77 | [convolve_array(x, cfilter=cfilter) for x in tiled_array] 78 | ) 79 | 80 | # Reconstruct original array using convolved tiles 81 | X_conv = ( 82 | tiled_array_conv.reshape(divisor, divisor, tile_height, tile_height) 83 | .swapaxes(1, 2) 84 | .reshape(x_height, x_width) 85 | ) 86 | 87 | return X_conv 88 | 89 | 90 | def binarize(X, bin_thresh): 91 | X_bin = X.copy() 92 | X_bin[X_bin < bin_thresh] = 0 93 | X_bin[X_bin >= bin_thresh] = 1 94 | return X_bin 95 | 96 | 97 | def diagonal_gaussian(X, gauss_sigma): 98 | d = X.shape[0] 99 | X_gauss = X.copy() 100 | 101 | diag_indices_x, diag_indices_y = np.diag_indices_from(X_gauss) 102 | for i in range(1, d): 103 | diy = np.append(diag_indices_y, diag_indices_y[:i]) 104 | diy = diy[i:] 105 | X_gauss[diag_indices_x, diy] = gaussian_filter( 106 | X_gauss[diag_indices_x, diy], sigma=gauss_sigma 107 | ) 108 | 109 | diag_indices_x, diag_indices_y = np.diag_indices_from(X_gauss) 110 | for i in range(1, d): 111 | dix = np.append(diag_indices_x, diag_indices_x[:i]) 112 | dix = dix[i:] 113 | X_gauss[dix, diag_indices_y] = gaussian_filter( 114 | X_gauss[dix, diag_indices_y], sigma=gauss_sigma 115 | ) 116 | 117 | return X_gauss 118 | 119 | 120 | def make_symmetric(X): 121 | return np.maximum(X, X.transpose()) 122 | 123 | 124 | def edges_to_contours(X, kernel_size=10): 125 | X_copy = X.copy() 126 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size)) 127 | close = cv2.morphologyEx(X_copy, cv2.MORPH_CLOSE, kernel) 128 | X_copy = close - X_copy 129 | X_copy[X_copy == -1] = 0 130 | return close # X_copy 131 | 132 | 133 | def apply_bin_op(X, binop_dim): 134 | binop_struct = np.zeros((binop_dim, binop_dim)) 135 | np.fill_diagonal(binop_struct, 1) 136 | X_binop = binary_opening(X, structure=binop_struct).astype(int) 137 | 138 | return X_binop 139 | -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/extraction/pitch.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | from compiam.melody.pattern.sancara_search.extraction.sequence import ( 5 | get_stability_mask, 6 | add_center_to_mask, 7 | ) 8 | from compiam.melody.pattern.sancara_search.extraction.io import ( 9 | get_timeseries, 10 | write_timeseries, 11 | ) 12 | from compiam.utils.pitch import interpolate_below_length 13 | from compiam.utils import get_logger 14 | 15 | logger = get_logger(__name__) 16 | 17 | 18 | def pitch_to_cents(p, tonic): 19 | """ 20 | Convert pitch value,

to cents above . 21 | 22 | :param p: Pitch value in Hz 23 | :type p: float 24 | :param tonic: Tonic value in Hz 25 | :type tonic: float 26 | 27 | :return: Pitch value,

in cents above 28 | :rtype: float 29 | """ 30 | return 1200 * math.log(p / tonic, 2) if p else None 31 | 32 | 33 | def cents_to_pitch(c, tonic): 34 | """ 35 | Convert cents value, to pitch in Hz 36 | 37 | :param c: Pitch value in cents above 38 | :type c: float/int 39 | :param tonic: Tonic value in Hz 40 | :type tonic: float 41 | 42 | :return: Pitch value, in Hz 43 | :rtype: float 44 | """ 45 | return (2 ** (c / 1200)) * tonic 46 | 47 | 48 | def pitch_seq_to_cents(pseq, tonic): 49 | """ 50 | Convert sequence of pitch values to sequence of 51 | cents above values 52 | 53 | :param pseq: Array of pitch values in Hz 54 | :type pseq: np.array 55 | :param tonic: Tonic value in Hz 56 | :type tonic: float 57 | 58 | :return: Sequence of original pitch value in cents above 59 | :rtype: np.array 60 | """ 61 | return np.vectorize(lambda y: pitch_to_cents(y, tonic))(pseq) 62 | 63 | 64 | def silence_stability_from_file( 65 | inpath, 66 | outpath, 67 | tonic=None, 68 | min_stability_length_secs=1, 69 | stab_hop_secs=0.2, 70 | freq_var_thresh_stab=8, 71 | gap_interp=0.250, 72 | ): 73 | pitch, time, timestep = get_timeseries(inpath) 74 | pitch_interp = interpolate_below_length(pitch, 0, (gap_interp / timestep)) 75 | 76 | logger.info("Computing stability/silence mask") 77 | if tonic: 78 | pi = pitch_seq_to_cents(pitch_interp, tonic) 79 | else: 80 | pi = pitch_interp 81 | stable_mask = get_stability_mask( 82 | pi, min_stability_length_secs, stab_hop_secs, freq_var_thresh_stab, timestep 83 | ) 84 | silence_mask = (pitch_interp == 0).astype(int) 85 | silence_mask = add_center_to_mask(silence_mask) 86 | silence_and_stable_mask = np.array( 87 | [int(any([i, j])) for i, j in zip(silence_mask, stable_mask)] 88 | ) 89 | write_timeseries([time, silence_and_stable_mask], outpath) 90 | -------------------------------------------------------------------------------- /compiam/melody/pattern/sancara_search/extraction/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from compiam.utils import get_logger 5 | 6 | logger = get_logger(__name__) 7 | 8 | 9 | def find_nearest(array, value, index=True): 10 | """ 11 | Find the closest element of to 12 | 13 | :param array: array of values 14 | :type array: numpy.array 15 | :param value: value to check 16 | :type value: float 17 | :param index: True or False, return index or value in of closest element? 18 | :type index: bool 19 | 20 | :return: index/value of element in closest to 21 | :rtype: number 22 | """ 23 | array = np.asarray(array) 24 | idx = (np.abs(array - value)).argmin() 25 | return idx if index else array[idx] 26 | 27 | 28 | def myround(x, base=5): 29 | return base * round(x / base) 30 | 31 | 32 | def check_stability(this_seq, thresh=130): 33 | return True if np.var(this_seq) < thresh else False 34 | 35 | 36 | def get_timestamp(secs, divider="-"): 37 | """ 38 | Convert seconds into timestamp 39 | 40 | :param secs: seconds 41 | :type secs: int 42 | :param divider: divider between minute and second, default "-" 43 | :type divider: str 44 | 45 | :return: timestamp 46 | :rtype: str 47 | """ 48 | minutes = int(secs / 60) 49 | seconds = round(secs % 60, 2) 50 | return f"{minutes}min{divider}{seconds}sec" 51 | 52 | 53 | def interpolate_below_length(arr, val, gap): 54 | """ 55 | Interpolate gaps of value, of 56 | length equal to or shorter than in 57 | 58 | :param arr: Array to interpolate 59 | :type arr: np.array 60 | :param val: Value expected in gaps to interpolate 61 | :type val: number 62 | :param gap: Maximum gap length to interpolate, gaps of longer than will not be interpolated 63 | :type gap: number 64 | 65 | :return: interpolated array 66 | :rtype: np.array 67 | """ 68 | s = np.copy(arr) 69 | is_zero = s == val 70 | cumsum = np.cumsum(is_zero).astype("float") 71 | diff = np.zeros_like(s) 72 | diff[~is_zero] = np.diff(cumsum[~is_zero], prepend=0) 73 | for i, d in enumerate(diff): 74 | if d <= gap: 75 | s[int(i - d) : i] = np.nan 76 | interp = pd.Series(s).interpolate(method="linear", axis=0).ffill().bfill().values 77 | return interp 78 | -------------------------------------------------------------------------------- /compiam/melody/pitch_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | from compiam.melody.pitch_extraction.melodia import Melodia 7 | from compiam.melody.pitch_extraction.ftanet_carnatic import FTANetCarnatic 8 | from compiam.melody.pitch_extraction.ftaresnet_carnatic import FTAResNetCarnatic 9 | 10 | 11 | # Show user the available tools 12 | def list_tools(): 13 | pre_trained_models = [ 14 | x["class_name"] for x in list(models_dict.values()) 15 | ] # Get list of pre-trained_models 16 | return [ 17 | tool + "*" if tool in pre_trained_models else tool 18 | for tool in get_tool_list(modules=sys.modules[__name__]) 19 | ] 20 | -------------------------------------------------------------------------------- /compiam/melody/pitch_extraction/ftanet_carnatic/pitch_processing.py: -------------------------------------------------------------------------------- 1 | # The functions in this file were directly ported (and in some cases adapted) 2 | # from the original repoository ofthe FTA-Net (https://github.com/yushuai/FTANet-melodic). 3 | # We documented these for a more clear usage of the code by the CompIAM users. For direct 4 | # use of the FTA-Net as designed and published by the authors, please refer to the original 5 | # mentioned GitHub repository. 6 | 7 | import numpy as np 8 | 9 | from compiam.melody.pitch_extraction.ftanet_carnatic.cfp import get_CenFreq 10 | 11 | 12 | def batchize_test(data, size=430): 13 | """Re-arrange CFP features to fit the FTA-Net model. 14 | 15 | :param data: input CFP features for the FTA-Net. 16 | :param size: size of the batches. 17 | :returns: batched features. 18 | """ 19 | xlist = [] 20 | num = int(data.shape[-1] / size) 21 | if data.shape[-1] % size != 0: 22 | num += 1 23 | for i in range(num): 24 | if (i + 1) * size > data.shape[-1]: 25 | batch_x = np.zeros((data.shape[0], data.shape[1], size)) 26 | 27 | tmp_x = data[:, :, i * size :] 28 | 29 | batch_x[:, :, : tmp_x.shape[-1]] += tmp_x 30 | xlist.append(batch_x.transpose(1, 2, 0)) 31 | break 32 | else: 33 | batch_x = data[:, :, i * size : (i + 1) * size] 34 | xlist.append(batch_x.transpose(1, 2, 0)) 35 | 36 | return np.array(xlist) 37 | 38 | 39 | def est(output, CenFreq, time_arr): 40 | """Re-arrange FTA-Net output to a versatile pitch time-series. 41 | 42 | :param data: input CFP features for the FTA-Net. 43 | :param size: size of the batches. 44 | :returns: batched features. 45 | """ 46 | CenFreq[0] = 0 47 | est_time = time_arr 48 | est_freq = np.argmax(output, axis=0) 49 | 50 | for j in range(len(est_freq)): 51 | est_freq[j] = CenFreq[int(est_freq[j])] 52 | 53 | if len(est_freq) != len(est_time): 54 | new_length = min(len(est_freq), len(est_time)) 55 | est_freq = est_freq[:new_length] 56 | est_time = est_time[:new_length] 57 | 58 | est_arr = np.concatenate((est_time[:, None], est_freq[:, None]), axis=1) 59 | 60 | return est_arr 61 | 62 | 63 | def iseg(data): 64 | """Re-shape data. 65 | 66 | :param data: input features. 67 | :returns: re-shaped data. 68 | """ 69 | # data: (batch_size, freq_bins, seg_len) 70 | new_length = data.shape[0] * data.shape[-1] # T = batch_size * seg_len 71 | new_data = np.zeros((data.shape[1], new_length)) # (freq_bins, T) 72 | for i in range(len(data)): 73 | new_data[:, i * data.shape[-1] : (i + 1) * data.shape[-1]] = data[i] 74 | return new_data 75 | 76 | 77 | def get_est_arr(model, x_list, y_list, batch_size): 78 | """Run the FTA-Net model in batches and construct the final pitch time-series. 79 | 80 | :param model: built and trained model. 81 | :param x_list: features. 82 | :param y_list: timestamps. 83 | :param batch_size: batch size of the input data. 84 | :returns: output pitch time-series. 85 | """ 86 | for i in range(len(x_list)): 87 | x = x_list[i] 88 | y = y_list[i] 89 | 90 | # predict and concat 91 | num = x.shape[0] // batch_size 92 | if x.shape[0] % batch_size != 0: 93 | num += 1 94 | preds = [] 95 | for j in range(num): 96 | # x: (batch_size, freq_bins, seg_len) 97 | if j == num - 1: 98 | X = x[j * batch_size :] 99 | length = x.shape[0] - j * batch_size 100 | else: 101 | X = x[j * batch_size : (j + 1) * batch_size] 102 | length = batch_size 103 | 104 | prediction = model.predict(X, length) 105 | preds.append(prediction) 106 | 107 | # (num*bs, freq_bins, seg_len) to (freq_bins, T) 108 | preds = np.concatenate(preds, axis=0) 109 | preds = iseg(preds) 110 | # transform to f0ref 111 | CenFreq = get_CenFreq(StartFreq=31, StopFreq=1250, NumPerOct=60) 112 | est_arr = est(preds, CenFreq, y) 113 | 114 | return est_arr 115 | 116 | 117 | def std_normalize(data): 118 | """Standardize the input data. 119 | 120 | :param data: input data. 121 | :returns: standardized data. 122 | """ 123 | data = data.astype(np.float64) 124 | mean = np.mean(data) 125 | std = np.std(data) 126 | data = data.copy() - mean 127 | if std != 0.0: 128 | data = data / std 129 | return data.astype(np.float32) 130 | -------------------------------------------------------------------------------- /compiam/melody/raga_recognition/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | from compiam.melody.raga_recognition.deepsrgm import DEEPSRGM 7 | 8 | 9 | # Show user the available tools 10 | def list_tools(): 11 | pre_trained_models = [ 12 | x["class_name"] for x in list(models_dict.values()) 13 | ] # Get list of pre-trained_models 14 | return [ 15 | tool + "*" if tool in pre_trained_models else tool 16 | for tool in get_tool_list(modules=sys.modules[__name__]) 17 | ] 18 | -------------------------------------------------------------------------------- /compiam/melody/raga_recognition/deepsrgm/attention_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # Attention layer inspired from the following blog post 6 | # https://towardsdatascience.com/nlp-learning-series-part-3-attention-cnn-and-what-not-for-text-classification-4313930ed566 7 | class Attention(nn.Module): 8 | def __init__(self, feature_dim, step_dim, bias=True, **kwargs): 9 | """Attention module init method 10 | 11 | :param feature_dim: input size at every step 12 | :param step_dim: total number of steps in the sequence 13 | :param bias: Bool param to indicate whether bias should be added 14 | :param kwargs: keyword arguments for the parent class 15 | """ 16 | 17 | super(Attention, self).__init__(**kwargs) 18 | self.supports_masking = True 19 | 20 | self.bias = bias 21 | self.feature_dim = feature_dim 22 | self.step_dim = step_dim 23 | self.features_dim = 0 24 | 25 | weight = torch.zeros(feature_dim, 1) 26 | nn.init.kaiming_uniform_(weight) 27 | self.weight = nn.Parameter(weight) 28 | if bias: 29 | self.b = nn.Parameter(torch.zeros(step_dim)) 30 | 31 | def forward(self, x, mask=None): 32 | feature_dim = self.feature_dim 33 | step_dim = self.step_dim 34 | 35 | eij = torch.mm(x.contiguous().view(-1, feature_dim), self.weight).view( 36 | -1, step_dim 37 | ) 38 | 39 | if self.bias: 40 | eij = eij + self.b 41 | 42 | eij = torch.tanh(eij) 43 | a = torch.exp(eij) 44 | 45 | if mask is not None: 46 | a = a * mask 47 | 48 | a = a / (torch.sum(a, 1, keepdim=True) + 1e-10) 49 | 50 | weighted_input = x * torch.unsqueeze(a, -1) 51 | return torch.sum(weighted_input, 1) 52 | -------------------------------------------------------------------------------- /compiam/melody/raga_recognition/deepsrgm/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from compiam.melody.raga_recognition.deepsrgm.attention_layer import Attention 3 | 4 | 5 | class deepsrgmModel(nn.Module): 6 | def __init__( 7 | self, 8 | rnn="lstm", 9 | input_length=5000, 10 | embedding_size=128, 11 | hidden_size=768, 12 | num_layers=1, 13 | num_classes=10, 14 | vocab_size=209, 15 | drop_prob=0.3, 16 | ): 17 | """DEEPSRGM model init class 18 | 19 | :param rnn: indicates whether to use an LSTM or a GRU 20 | :param input_length: length of input subsequence 21 | :param embedding_size: dim of the embedding for each element in input 22 | :param hidden_size: number of features in the hidden state of LSTM 23 | :param num_layers: number of LSTM layers 24 | :param num_classes: number of classes for classification task 25 | :param vocab_size: size of vocabulary for embedding layer 26 | :param drop_prob: dropour probability 27 | """ 28 | 29 | super(deepsrgmModel, self).__init__() 30 | self.num_layers = num_layers 31 | self.hidden_size = hidden_size 32 | 33 | if rnn == "lstm": 34 | self.rnn = nn.LSTM( 35 | embedding_size, 36 | hidden_size, 37 | num_layers, 38 | dropout=drop_prob, 39 | batch_first=True, 40 | ) 41 | elif rnn == "gru": 42 | self.rnn = nn.GRU( 43 | embedding_size, 44 | hidden_size, 45 | num_layers, 46 | dropout=drop_prob, 47 | batch_first=True, 48 | ) 49 | 50 | self.embeddings = nn.Embedding(vocab_size, embedding_size) 51 | self.attention_layer = Attention(hidden_size, input_length) 52 | 53 | self.fc1 = nn.Linear(hidden_size, 384) 54 | self.fc2 = nn.Linear(384, num_classes) 55 | 56 | # self.batchNorm1d = nn.BatchNorm1d(input_length) 57 | self.dropout = nn.Dropout(drop_prob) 58 | self.relu = nn.ReLU() 59 | 60 | def forward(self, x): 61 | # batch_size = x.size(0) 62 | embeds = self.embeddings(x) 63 | out, _ = self.rnn(embeds) 64 | # out = self.batchNorm1d(out) 65 | out = self.attention_layer(out) 66 | 67 | out = self.relu(self.fc1(out)) 68 | out = self.dropout(out) 69 | out = self.fc2(out) 70 | 71 | return out 72 | -------------------------------------------------------------------------------- /compiam/melody/raga_recognition/deepsrgm/raga_mapping.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def create_mapping(mapping_path, selection): 5 | """Creating a map for the ragas available in the dataset (40 out of 71) 6 | 7 | :param mapping_path: Path to raga mapping JSON file 8 | :param selection: ids to select the given ragas 9 | """ 10 | 11 | with open(mapping_path, "r") as fhandle: 12 | legend = json.load(fhandle) 13 | 14 | # Create mapping with raga and iids 15 | keys = list(legend.keys()) 16 | mapping = dict() 17 | for key in keys: 18 | if key in legend.keys(): 19 | mapping[key] = legend[key] 20 | 21 | # integer and simple ID per raga 22 | index2hash = dict() 23 | for i, cls in enumerate(mapping.keys()): 24 | index2hash[i] = cls 25 | 26 | # Select determined raagas 27 | final_map = dict() 28 | for i, cls in enumerate(selection): 29 | final_map[i] = mapping[index2hash[cls]] 30 | 31 | return final_map 32 | -------------------------------------------------------------------------------- /compiam/melody/tonic_identification/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | from compiam.melody.tonic_identification.tonic_multipitch import TonicIndianMultiPitch 7 | 8 | 9 | # Show user the available tools 10 | def list_tools(): 11 | pre_trained_models = [ 12 | x["class_name"] for x in list(models_dict.values()) 13 | ] # Get list of pre-trained_models 14 | return [ 15 | tool + "*" if tool in pre_trained_models else tool 16 | for tool in get_tool_list(modules=sys.modules[__name__]) 17 | ] 18 | -------------------------------------------------------------------------------- /compiam/melody/tonic_identification/tonic_multipitch.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | from compiam.utils import get_logger, stereo_to_mono 6 | 7 | logger = get_logger(__name__) 8 | 9 | 10 | class TonicIndianMultiPitch: 11 | """MultiPitch approach to extract the tonic from IAM music signals.""" 12 | 13 | def __init__( 14 | self, 15 | bin_resolution=10, 16 | frame_size=2048, 17 | harmonic_weight=0.8, 18 | hop_size=128, 19 | magnitude_compression=1, 20 | magnitude_threshold=40, 21 | max_tonic_frequency=375, 22 | min_tonic_frequency=100, 23 | num_harmonics=20, 24 | ref_frequency=55, 25 | sample_rate=44100, 26 | ): 27 | """Tonic extraction init method. 28 | For a complete and detailed list of the parameters see the documentation on the 29 | following link: https://essentia.upf.edu/reference/std_TonicIndianArtMusic.html. 30 | Naming convention of the arguments has been stadardized to compIAM-friendly format. 31 | """ 32 | ### IMPORTING OPTIONAL DEPENDENCIES 33 | try: 34 | global estd 35 | import essentia.standard as estd 36 | except: 37 | raise ImportError( 38 | "In order to use this tool you need to have essentia installed. " 39 | "Install compIAM with essentia support: pip install 'compiam[essentia]'" 40 | ) 41 | ### 42 | 43 | self.bin_resolution = bin_resolution 44 | self.frame_size = frame_size 45 | self.harmonic_weight = harmonic_weight 46 | self.hop_size = hop_size 47 | self.magnitude_compression = magnitude_compression 48 | self.magnitude_threshold = magnitude_threshold 49 | self.max_tonic_frequency = max_tonic_frequency 50 | self.min_tonic_frequency = min_tonic_frequency 51 | self.num_harmonics = num_harmonics 52 | self.ref_frequency = ref_frequency 53 | self.sample_rate = sample_rate 54 | 55 | def extract(self, input_data, input_sr=44100): 56 | """Extract the tonic from a given file. 57 | 58 | :param input_data: path to audio file or numpy array like audio signal 59 | :param input_sr: sampling rate of the input array of data (if any). This variable is only 60 | relevant if the input is an array of data instead of a filepath. 61 | :returns: a floating point number representing the tonic of the input recording. 62 | """ 63 | if isinstance(input_data, str): 64 | if not os.path.exists(input_data): 65 | raise FileNotFoundError("Target audio not found.") 66 | audio = estd.MonoLoader(filename=input_data, sampleRate=self.sample_rate)() 67 | elif isinstance(input_data, np.ndarray): 68 | if len(input_data.shape) == 2: 69 | input_data = stereo_to_mono(input_data) 70 | if len(input_data.shape) > 2: 71 | raise ValueError("Input must be an unbatched audio signal") 72 | logger.warning( 73 | f"Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)" 74 | ) 75 | resampling = estd.Resample( 76 | inputSampleRate=input_sr, outputSampleRate=self.sample_rate 77 | ) 78 | audio = resampling(input_data) 79 | else: 80 | raise ValueError("Input must be path to audio signal or an audio array") 81 | 82 | extractor = estd.TonicIndianArtMusic( 83 | binResolution=self.bin_resolution, 84 | frameSize=self.frame_size, 85 | harmonicWeight=self.harmonic_weight, 86 | hopSize=self.hop_size, 87 | magnitudeCompression=self.magnitude_compression, 88 | magnitudeThreshold=self.magnitude_threshold, 89 | maxTonicFrequency=self.max_tonic_frequency, 90 | minTonicFrequency=self.min_tonic_frequency, 91 | numberHarmonics=self.num_harmonics, 92 | referenceFrequency=self.ref_frequency, 93 | sampleRate=self.sample_rate, 94 | ) 95 | return extractor(audio) 96 | -------------------------------------------------------------------------------- /compiam/models/README.md: -------------------------------------------------------------------------------- 1 | # compIAM pre-trained models default storing 2 | This folder is meant to be the default storing folder of the pre-trained models in compIAM. Unless indicated otherwise, when initializing a pre-trained model in compIAN, the weights are going to be downloaded and stored in this folder. -------------------------------------------------------------------------------- /compiam/rhythm/README.md: -------------------------------------------------------------------------------- 1 | # Rhythmic analysis tools 2 | 3 | | **Tool** | **Task** | **Paper** | 4 | |--------------------------------|------------------------------------------------|-----------| 5 | | Akshara Pulse Tracker Detector | Detect onsets of aksharas in tabla recordings | [1] | 6 | | Mnemonic Stroke Transcription | Bol/Solkattu trasncription using HMM | [2] | 7 | 8 | 9 | [1] Originally implemented by Ajay Srinivasamurthy as part of PyCompMusic - https://github.com/MTG/pycompmusic 10 | 11 | [2] Gupta, S., Srinivasamurthy, A., Kumar, M., Murthy, H., & Serra, X. (2015, October). Discovery of Syllabic Percussion Patterns in Tabla Solo Recordings. In Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR 2015) (pp. 385–391). Malaga, Spain. -------------------------------------------------------------------------------- /compiam/rhythm/__init__.py: -------------------------------------------------------------------------------- 1 | ### IMPORT HERE FUNCTIONALITIES 2 | import inspect, importlib as implib 3 | from compiam.data import models_dict 4 | 5 | TO_AVOID = [ 6 | x[0] 7 | for x in inspect.getmembers( 8 | implib.import_module("compiam.rhythm"), inspect.ismodule 9 | ) 10 | ] 11 | 12 | 13 | ### IMPORT HERE THE CONSIDERED TASKS 14 | from compiam.rhythm import meter 15 | from compiam.rhythm import transcription 16 | 17 | 18 | # Show user the available tasks 19 | def list_tasks(): 20 | return [ 21 | x[0] 22 | for x in inspect.getmembers( 23 | implib.import_module("compiam.rhythm"), inspect.ismodule 24 | ) 25 | if x[0] not in TO_AVOID 26 | ] 27 | 28 | 29 | # Show user the available tools 30 | def list_tools(): 31 | tasks = [ 32 | x[0] 33 | for x in inspect.getmembers( 34 | implib.import_module("compiam.rhythm"), inspect.ismodule 35 | ) 36 | if x[0] not in TO_AVOID 37 | ] 38 | tools_for_tasks = [ 39 | inspect.getmembers( 40 | implib.import_module("compiam.rhythm." + task), inspect.isclass 41 | ) 42 | for task in tasks 43 | ] 44 | tools_for_tasks = [ 45 | tool[1].__module__.split(".")[-2] + "." + tool[0] 46 | for tool_list in tools_for_tasks 47 | for tool in tool_list 48 | ] # Get task.tool 49 | pre_trained_models = [ 50 | x["class_name"] for x in list(models_dict.values()) 51 | ] # Get list of pre-trained_models 52 | return [ 53 | tool + "*" if tool.split(".")[1] in pre_trained_models else tool 54 | for tool in tools_for_tasks 55 | ] 56 | -------------------------------------------------------------------------------- /compiam/rhythm/meter/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | from compiam.rhythm.meter.akshara_pulse_tracker import AksharaPulseTracker 7 | 8 | 9 | # Show user the available tools 10 | def list_tools(): 11 | pre_trained_models = [ 12 | x["class_name"] for x in list(models_dict.values()) 13 | ] # Get list of pre-trained_models 14 | return [ 15 | tool + "*" if tool in pre_trained_models else tool 16 | for tool in get_tool_list(modules=sys.modules[__name__]) 17 | ] 18 | -------------------------------------------------------------------------------- /compiam/rhythm/transcription/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | from compiam.rhythm.transcription.mnemonic_transcription import MnemonicTranscription 7 | 8 | 9 | # Show user the available tools 10 | def list_tools(): 11 | pre_trained_models = [ 12 | x["class_name"] for x in list(models_dict.values()) 13 | ] # Get list of pre-trained_models 14 | return [ 15 | tool + "*" if tool in pre_trained_models else tool 16 | for tool in get_tool_list(modules=sys.modules[__name__]) 17 | ] 18 | -------------------------------------------------------------------------------- /compiam/rhythm/transcription/mnemonic_transcription/bol_mappings.csv: -------------------------------------------------------------------------------- 1 | TA,TA,1 2 | NA,NA,2 3 | DHA,DHA,3 4 | KI,KI,4 5 | TI,TA,1 6 | GE,GE,5 7 | TAA,NA,2 8 | KA,KI,4 9 | RA,TA,1 10 | GA,GE,5 11 | GHI,GE,5 12 | RE,RE,6 13 | DHI,DHI,7 14 | DA,DA,8 15 | DHE,DHE,9 16 | KE,KI,4 17 | DHIN,DHIN,10 18 | DHET,DHET,11 19 | TII,TII,12 20 | KDA,KDA,13 21 | GI,GE,5 22 | TRA,TRA,14 23 | TIN,TIN,15 24 | TIT,TIT,16 25 | GHE,GE,5 26 | KAT,KI,4 27 | DIN,DIN,17 28 | DAA,DA,8 29 | KII,KI,4 30 | TE,TE,18 31 | N,NA,2 32 | DI,DIN,17 33 | KRU,KDA,13 34 | CHAP,TIT,16 35 | DING,DIN,17 36 | D,DA,8 37 | KRA,KDA,13 38 | KRI,KDA,13 -------------------------------------------------------------------------------- /compiam/separation/README.md: -------------------------------------------------------------------------------- 1 | # Source separation tools 2 | 3 | | **Tool** | **Task** | **Paper** | 4 | |---------------------------|----------------------------------|-----------| 5 | | ColdDiffSep | Singing voice extraction | [1] | 6 | | MDXNet w/ mixer model | Music source separation | [2] | 7 | 8 | 9 | [1] G. Plaja-Roglans, M. Miron, A. Shankar, and X. Serra, "Carnatic Singing Voice Separation using Cold Diffusion on Training Data with Bleeding", in International Society for Music Information Retrieval Conference (ISMIR 23), 2023. 10 | 11 | [2] Work under review. -------------------------------------------------------------------------------- /compiam/separation/__init__.py: -------------------------------------------------------------------------------- 1 | ### IMPORT HERE FUNCTIONALITIES 2 | import inspect, importlib as implib 3 | from compiam.data import models_dict 4 | 5 | TO_AVOID = [ 6 | x[0] 7 | for x in inspect.getmembers( 8 | implib.import_module("compiam.separation"), inspect.ismodule 9 | ) 10 | ] 11 | 12 | 13 | ### IMPORT HERE THE CONSIDERED TASKS 14 | from compiam.separation import singing_voice_extraction 15 | 16 | 17 | # Show user the available tasks 18 | def list_tasks(): 19 | return [ 20 | x[0] 21 | for x in inspect.getmembers( 22 | implib.import_module("compiam.separation"), inspect.ismodule 23 | ) 24 | if x[0] not in TO_AVOID 25 | ] 26 | 27 | 28 | # Show user the available tools 29 | def list_tools(): 30 | tasks = [ 31 | x[0] 32 | for x in inspect.getmembers( 33 | implib.import_module("compiam.separation"), inspect.ismodule 34 | ) 35 | if x[0] not in TO_AVOID 36 | ] 37 | tools_for_tasks = [ 38 | inspect.getmembers( 39 | implib.import_module("compiam.separation." + task), inspect.isclass 40 | ) 41 | for task in tasks 42 | ] 43 | tools_for_tasks = [ 44 | tool[1].__module__.split(".")[-2] + "." + tool[0] 45 | for tool_list in tools_for_tasks 46 | for tool in tool_list 47 | ] # Get task.tool 48 | pre_trained_models = [ 49 | x["class_name"] for x in list(models_dict.values()) 50 | ] # Get list of pre-trained_models 51 | return [ 52 | tool + "*" if tool.split(".")[1] in pre_trained_models else tool 53 | for tool in tools_for_tasks 54 | ] 55 | -------------------------------------------------------------------------------- /compiam/separation/music_source_separation/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | # Import tasks 7 | from compiam.separation.music_source_separation.mixer_model import ( 8 | MixerModel, 9 | ) 10 | 11 | 12 | # Show user the available tools 13 | def list_tools(): 14 | pre_trained_models = [ 15 | x["class_name"] for x in list(models_dict.values()) 16 | ] # Get list of pre-trained_models 17 | return [ 18 | tool + "*" if tool in pre_trained_models else tool 19 | for tool in get_tool_list(modules=sys.modules[__name__]) 20 | ] 21 | -------------------------------------------------------------------------------- /compiam/separation/music_source_separation/mixer_model/modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # Time-Frequency Modulation (directly ported from original code by KUIELab/TFC-TDF) 6 | 7 | 8 | class TFC(nn.Module): 9 | def __init__(self, c, l, k): 10 | super(TFC, self).__init__() 11 | 12 | self.H = nn.ModuleList() 13 | for i in range(l): 14 | self.H.append( 15 | nn.Sequential( 16 | nn.Conv2d( 17 | in_channels=c, 18 | out_channels=c, 19 | kernel_size=k, 20 | stride=1, 21 | padding=k // 2, 22 | ), 23 | nn.BatchNorm2d(c), 24 | nn.ReLU(), 25 | ) 26 | ) 27 | 28 | def forward(self, x): 29 | for h in self.H: 30 | x = h(x) 31 | return x 32 | 33 | 34 | # Dense TFC Block (directly ported from original code by KUIELab/TFC-TDF) 35 | 36 | 37 | class DenseTFC(nn.Module): 38 | def __init__(self, c, l, k): 39 | super(DenseTFC, self).__init__() 40 | 41 | self.conv = nn.ModuleList() 42 | for i in range(l): 43 | 44 | self.conv.append( 45 | nn.Sequential( 46 | nn.Conv2d( 47 | in_channels=c, 48 | out_channels=c, 49 | kernel_size=k, 50 | stride=1, 51 | padding=k // 2, 52 | ), 53 | nn.BatchNorm2d(c), 54 | nn.ReLU(), 55 | ) 56 | ) 57 | 58 | def forward(self, x): 59 | for layer in self.conv[:-1]: 60 | out = layer(x) 61 | x = torch.cat([out, x], 1) 62 | return self.conv[-1](x) 63 | 64 | 65 | # TFC TDF module (directly ported from original code by KUIELab/TFC-TDF) 66 | 67 | 68 | class TFC_TDF(nn.Module): 69 | def __init__(self, c, l, f, k, bn, dense=False, bias=True): 70 | super(TFC_TDF, self).__init__() 71 | 72 | self.use_tdf = bn is not None 73 | self.tfc = DenseTFC(c, l, k) if dense else TFC(c, l, k) 74 | self.bn = bn 75 | 76 | if self.use_tdf: 77 | if bn == 0: 78 | self.tdf = nn.Sequential( 79 | nn.Linear(f, f, bias=bias), nn.BatchNorm2d(c), nn.ReLU() 80 | ) 81 | else: 82 | self.tdf = nn.Sequential( 83 | nn.Linear(f, f // bn, bias=bias), 84 | nn.BatchNorm2d(c), 85 | nn.ReLU(), 86 | nn.Linear(f // bn, f, bias=bias), 87 | nn.BatchNorm2d(c), 88 | nn.ReLU(), 89 | ) 90 | 91 | def forward(self, x): 92 | out = self.tdf(x) 93 | x = self.tfc(x) 94 | return x + out if self.use_tdf else x 95 | -------------------------------------------------------------------------------- /compiam/separation/singing_voice_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | # Import tasks 7 | from compiam.separation.singing_voice_extraction.cold_diff_sep import ( 8 | ColdDiffSep, 9 | ) 10 | 11 | 12 | # Show user the available tools 13 | def list_tools(): 14 | pre_trained_models = [ 15 | x["class_name"] for x in list(models_dict.values()) 16 | ] # Get list of pre-trained_models 17 | return [ 18 | tool + "*" if tool in pre_trained_models else tool 19 | for tool in get_tool_list(modules=sys.modules[__name__]) 20 | ] 21 | -------------------------------------------------------------------------------- /compiam/separation/singing_voice_extraction/cold_diff_sep/model/clustering.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.cluster import KMeans 3 | 4 | 5 | def get_mask(normalized_feat, clusters, scheduler): 6 | kmeans = KMeans(n_clusters=clusters, random_state=0).fit(normalized_feat) 7 | centers = kmeans.cluster_centers_ 8 | original_means = np.mean(centers, axis=1) 9 | ordered_means = np.sort(np.mean(centers, axis=1)) 10 | means_and_pos = {} 11 | manual_weights = np.linspace(0, 1, clusters) ** scheduler 12 | for idx, j in zip(manual_weights, ordered_means): 13 | means_and_pos[j] = idx 14 | label_and_dist = [] 15 | for j in original_means: 16 | label_and_dist.append(means_and_pos[j]) 17 | weights = [] 18 | for j in kmeans.labels_: 19 | weights.append(label_and_dist[j]) 20 | return np.array(weights, dtype=np.float32) / float(clusters - 1) 21 | -------------------------------------------------------------------------------- /compiam/separation/singing_voice_extraction/cold_diff_sep/model/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | class Config: 6 | """Configuration for DiffWave implementation.""" 7 | 8 | def __init__(self): 9 | self.model_type = None 10 | 11 | self.sr = 22050 12 | 13 | self.hop = 256 14 | self.win = 1024 15 | 16 | # mel-scale filter bank 17 | self.mel = 80 18 | self.fmin = 0 19 | self.fmax = 8000 20 | 21 | self.eps = 1e-5 22 | 23 | # sample size 24 | self.frames = (self.hop + 6) * 128 # 16384 25 | self.batch = 8 26 | 27 | # leaky relu coefficient 28 | self.leak = 0.4 29 | 30 | # embdding config 31 | self.embedding_size = 128 32 | self.embedding_proj = 512 33 | self.embedding_layers = 2 34 | self.embedding_factor = 4 35 | 36 | # upsampler config 37 | self.upsample_stride = [4, 1] 38 | self.upsample_kernel = [32, 3] 39 | self.upsample_layers = 4 40 | # computed hop size 41 | # block config 42 | self.channels = 64 43 | self.kernel_size = 3 44 | self.dilation_rate = 2 45 | self.num_layers = 30 46 | self.num_cycles = 3 47 | 48 | # noise schedule 49 | self.iter = 8 # 20, 40, 50 50 | self.noise_policy = "linear" 51 | self.noise_start = 1e-4 52 | self.noise_end = 0.5 # 0.02 for 200 53 | 54 | def beta(self): 55 | """Generate beta-sequence. 56 | Returns: 57 | List[float], [iter], beta values. 58 | """ 59 | mapper = { 60 | "linear": self._linear_sched, 61 | } 62 | if self.noise_policy not in mapper: 63 | raise ValueError("invalid beta policy") 64 | return mapper[self.noise_policy]() 65 | 66 | def _linear_sched(self): 67 | """Linearly generated noise. 68 | Returns: 69 | List[float], [iter], beta values. 70 | """ 71 | return np.linspace( 72 | self.noise_start, self.noise_end, self.iter, dtype=np.float32 73 | ) 74 | 75 | def window_fn(self): 76 | """Return window generator. 77 | Returns: 78 | Callable, window function of tf.signal 79 | , which corresponds to self.win_fn. 80 | """ 81 | mapper = {"hann": tf.signal.hann_window, "hamming": tf.signal.hamming_window} 82 | if self.win_fn in mapper: 83 | return mapper[self.win_fn] 84 | 85 | raise ValueError("invalid window function: " + self.win_fn) 86 | -------------------------------------------------------------------------------- /compiam/separation/singing_voice_extraction/cold_diff_sep/model/signal_processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def get_overlap_window(signal, boundary=None): 6 | window_out = np.ones(signal.shape) 7 | midpoint = window_out.shape[0] // 2 8 | if boundary == "start": 9 | window_out[midpoint:] = np.linspace(1, 0, window_out.shape[0] - midpoint) 10 | elif boundary == "end": 11 | window_out[:midpoint] = np.linspace(0, 1, window_out.shape[0] - midpoint) 12 | else: 13 | window_out[:midpoint] = np.linspace(0, 1, window_out.shape[0] - midpoint) 14 | window_out[midpoint:] = np.linspace(1, 0, window_out.shape[0] - midpoint) 15 | return window_out 16 | 17 | 18 | def compute_stft(signal, unet_config): 19 | signal_stft = check_shape_3d( 20 | check_shape_3d( 21 | tf.signal.stft( 22 | signal, 23 | frame_length=unet_config.win, 24 | frame_step=unet_config.hop, 25 | fft_length=unet_config.win, 26 | window_fn=tf.signal.hann_window, 27 | ), 28 | 1, 29 | ), 30 | 2, 31 | ) 32 | mag = tf.abs(signal_stft) 33 | phase = tf.math.angle(signal_stft) 34 | return mag, phase 35 | 36 | 37 | def compute_signal_from_stft(spec, phase, config): 38 | polar_spec = tf.complex( 39 | tf.multiply(spec, tf.math.cos(phase)), tf.zeros(spec.shape) 40 | ) + tf.multiply( 41 | tf.complex(spec, tf.zeros(spec.shape)), 42 | tf.complex(tf.zeros(phase.shape), tf.math.sin(phase)), 43 | ) 44 | return tf.signal.inverse_stft( 45 | polar_spec, 46 | frame_length=config.win, 47 | frame_step=config.hop, 48 | window_fn=tf.signal.inverse_stft_window_fn( 49 | config.hop, forward_window_fn=tf.signal.hann_window 50 | ), 51 | ) 52 | 53 | 54 | def log2(x, base): 55 | return int(np.log(x) / np.log(base)) 56 | 57 | 58 | def next_power_of_2(n): 59 | # decrement `n` (to handle the case when `n` itself is a power of 2) 60 | n = n - 1 61 | # calculate the position of the last set bit of `n` 62 | lg = log2(n, 2) 63 | # next power of two will have a bit set at position `lg+1`. 64 | return 1 << lg # + 1 65 | 66 | 67 | def check_shape_3d(data, dim): 68 | n = data.shape[dim] 69 | if n % 2 != 0: 70 | n = data.shape[dim] - 1 71 | if dim == 0: 72 | return data[:n, :, :] 73 | if dim == 1: 74 | return data[:, :n, :] 75 | if dim == 2: 76 | return data[:, :, :n] 77 | 78 | 79 | def load_audio(paths): 80 | mixture = tf.io.read_file(paths[0]) 81 | vocals = tf.io.read_file(paths[1]) 82 | mixture_audio, _ = tf.audio.decode_wav(mixture, desired_channels=1) 83 | vocal_audio, _ = tf.audio.decode_wav(vocals, desired_channels=1) 84 | return tf.squeeze(mixture_audio, axis=-1), tf.squeeze(vocal_audio, axis=-1) 85 | -------------------------------------------------------------------------------- /compiam/separation/singing_voice_extraction/cold_diff_sep/model/unet_utils.py: -------------------------------------------------------------------------------- 1 | # nn.py 2 | # Source: https://github.com/hojonathanho/diffusion/blob/master/ 3 | # diffusion_tf/nn.py 4 | 5 | 6 | import math 7 | import tensorflow as tf 8 | 9 | 10 | def default_init(scale): 11 | return tf.initializers.variance_scaling( 12 | scale=1e-10 if scale == 0 else scale, mode="fan_avg", distribution="uniform" 13 | ) 14 | 15 | 16 | def meanflat(x): 17 | return tf.math.reduce_mean(x, axis=list(range(1, len(x.shape)))) 18 | 19 | 20 | def get_timestep_embedding(timesteps, embedding_dim): 21 | # From fairseq. Build sinusoidal embeddings. This matches the 22 | # implementation in tensor2tensor, but differs slightly from the 23 | # description in Section 3.5 of "Attention Is All You Need". 24 | assert len(timesteps.shape) == 1 # and timesteps.dtype == tf.int32 25 | 26 | half_dim = embedding_dim // 2 27 | emb = math.log(10000) / (half_dim - 1) 28 | emb = tf.math.exp(tf.range(half_dim, dtype=tf.float32) * -emb) 29 | # emb = tf.range(num_embeddings, dtype=tf.float32)[:, None] * emb[None, :] 30 | emb = tf.cast(timesteps, dtype=tf.float32)[:, None] * emb[None, :] 31 | emb = tf.concat([tf.math.sin(emb), tf.math.cos(emb)], axis=1) 32 | if embedding_dim % 2 == 1: # zero pad. 33 | # emb = tf.concat([emb, tf.zeros([num_embeddings, 1])], axis=1) 34 | emb = tf.pad(emb, [[0, 0], [0, 1]]) 35 | assert emb.shape == [timesteps.shape[0], embedding_dim] 36 | return emb 37 | -------------------------------------------------------------------------------- /compiam/separation/singing_voice_extraction/cold_diff_sep/model/vad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue May 1 20:43:28 2018 4 | @author: eesungkim 5 | """ 6 | 7 | import math 8 | import numpy as np 9 | from compiam.separation.singing_voice_extraction.cold_diff_sep.model.estnoise_ms import * 10 | 11 | 12 | def VAD(signal, sr, nFFT=512, win_length=0.025, hop_length=0.01, threshold=0.7): 13 | """Voice Activity Detector 14 | Parameters 15 | ---------- 16 | signal : audio time series 17 | sr : sampling rate of `signal` 18 | nFFT : length of the FFT window 19 | win_length : window size in sec 20 | hop_length : hop size in sec 21 | 22 | Returns 23 | ------- 24 | probRatio : frame-based voice activity probability sequence 25 | """ 26 | signal = signal.astype("float") 27 | 28 | maxPosteriorSNR = 1000 29 | minPosteriorSNR = 0.0001 30 | 31 | win_length_sample = round(win_length * sr) 32 | hop_length_sample = round(hop_length * sr) 33 | 34 | # the variance of the speech; lambda_x(k) 35 | _stft = stft( 36 | signal, n_fft=nFFT, win_length=win_length_sample, hop_length=hop_length_sample 37 | ) 38 | pSpectrum = np.abs(_stft) ** 2 39 | 40 | # estimate the variance of the noise using minimum statistics noise PSD estimation ; lambda_d(k). 41 | estNoise = estnoisem(pSpectrum, hop_length) 42 | estNoise = estNoise 43 | 44 | aPosterioriSNR = pSpectrum / estNoise 45 | aPosterioriSNR = aPosterioriSNR 46 | aPosterioriSNR[aPosterioriSNR > maxPosteriorSNR] = maxPosteriorSNR 47 | aPosterioriSNR[aPosterioriSNR < minPosteriorSNR] = minPosteriorSNR 48 | 49 | a01 = ( 50 | hop_length / 0.05 51 | ) # a01=P(signallence->speech) hop_length/mean signallence length (50 ms) 52 | a00 = 1 - a01 # a00=P(signallence->signallence) 53 | a10 = ( 54 | hop_length / 0.1 55 | ) # a10=P(speech->signallence) hop/mean talkspurt length (100 ms) 56 | a11 = 1 - a10 # a11=P(speech->speech) 57 | 58 | b01 = a01 / a00 59 | b10 = a11 - a10 * a01 / a00 60 | 61 | smoothFactorDD = 0.99 62 | previousGainedaPosSNR = 1 63 | (nFrames, nFFT2) = pSpectrum.shape 64 | probRatio = np.zeros((nFrames, 1)) 65 | logGamma_frame = 0 66 | for i in range(nFrames): 67 | aPosterioriSNR_frame = aPosterioriSNR[i, :] 68 | 69 | # operator [2](52) 70 | oper = aPosterioriSNR_frame - 1 71 | oper[oper < 0] = 0 72 | smoothed_a_priori_SNR = ( 73 | smoothFactorDD * previousGainedaPosSNR + (1 - smoothFactorDD) * oper 74 | ) 75 | 76 | # V for MMSE estimate ([2](8)) 77 | V = ( 78 | 0.1 79 | * smoothed_a_priori_SNR 80 | * aPosterioriSNR_frame 81 | / (1 + smoothed_a_priori_SNR) 82 | ) 83 | 84 | # geometric mean of log likelihood ratios for individual frequency band [1](4) 85 | logLRforFreqBins = 2 * V - np.log(smoothed_a_priori_SNR + 1) 86 | # logLRforFreqBins=np.exp(smoothed_a_priori_SNR*aPosterioriSNR_frame/(1+smoothed_a_priori_SNR))/(1+smoothed_a_priori_SNR) 87 | gMeanLogLRT = np.mean(logLRforFreqBins) 88 | logGamma_frame = ( 89 | np.log(a10 / a01) 90 | + gMeanLogLRT 91 | + np.log(b01 + b10 / (a10 + a00 * np.exp(-logGamma_frame))) 92 | ) 93 | probRatio[i] = 1 / (1 + np.exp(-logGamma_frame)) 94 | 95 | # Calculate Gain function which results from the MMSE [2](7). 96 | gain = ( 97 | (math.gamma(1.5) * np.sqrt(V)) 98 | / aPosterioriSNR_frame 99 | * np.exp(-1 * V / 2) 100 | * ((1 + V) * bessel(0, V / 2) + V * bessel(1, V / 2)) 101 | ) 102 | 103 | previousGainedaPosSNR = (gain**2) * aPosterioriSNR_frame 104 | probRatio[probRatio > threshold] = 1 105 | probRatio[probRatio < threshold] = 0 106 | 107 | return probRatio 108 | -------------------------------------------------------------------------------- /compiam/structure/README.md: -------------------------------------------------------------------------------- 1 | # Structure analysis tools 2 | 3 | | **Tool** | **Task** | **Paper** | 4 | |---------------------------------|---------------------------------------------------|-----------| 5 | | Dhrupad Bandish Segmentation | Automatic segmentation of Bandish performances | [1] | 6 | 7 | 8 | [1] Rohit M. A., Vinutha T. P., Preeti Rao. “Structural Segmentation of Dhrupad Vocal Bandish Audio based on Tempo”, in Proc. of the 21st International Society for Music Information Retrieval Conference (Montréal, Canada), 2020 -------------------------------------------------------------------------------- /compiam/structure/__init__.py: -------------------------------------------------------------------------------- 1 | ### IMPORT HERE FUNCTIONALITIES 2 | import inspect, importlib as implib 3 | from compiam.data import models_dict 4 | 5 | TO_AVOID = [ 6 | x[0] 7 | for x in inspect.getmembers( 8 | implib.import_module("compiam.structure"), inspect.ismodule 9 | ) 10 | ] 11 | 12 | 13 | ### IMPORT HERE THE CONSIDERED TASKS 14 | from compiam.structure import segmentation 15 | 16 | 17 | # Show user the available tasks 18 | def list_tasks(): 19 | return [ 20 | x[0] 21 | for x in inspect.getmembers( 22 | implib.import_module("compiam.structure"), inspect.ismodule 23 | ) 24 | if x[0] not in TO_AVOID 25 | ] 26 | 27 | 28 | # Show user the available tools 29 | def list_tools(): 30 | tasks = [ 31 | x[0] 32 | for x in inspect.getmembers( 33 | implib.import_module("compiam.structure"), inspect.ismodule 34 | ) 35 | if x[0] not in TO_AVOID 36 | ] 37 | tools_for_tasks = [ 38 | inspect.getmembers( 39 | implib.import_module("compiam.structure." + task), inspect.isclass 40 | ) 41 | for task in tasks 42 | ] 43 | tools_for_tasks = [ 44 | tool[1].__module__.split(".")[-2] + "." + tool[0] 45 | for tool_list in tools_for_tasks 46 | for tool in tool_list 47 | ] # Get task.tool 48 | pre_trained_models = [ 49 | x["class_name"] for x in list(models_dict.values()) 50 | ] # Get list of pre-trained_models 51 | return [ 52 | tool + "*" if tool.split(".")[1] in pre_trained_models else tool 53 | for tool in tools_for_tasks 54 | ] 55 | -------------------------------------------------------------------------------- /compiam/structure/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | from compiam.structure.segmentation.dhrupad_bandish_segmentation import ( 7 | DhrupadBandishSegmentation, 8 | ) 9 | 10 | 11 | # Show user the available tools 12 | def list_tools(): 13 | pre_trained_models = [ 14 | x["class_name"] for x in list(models_dict.values()) 15 | ] # Get list of pre-trained_models 16 | return [ 17 | tool + "*" if tool in pre_trained_models else tool 18 | for tool in get_tool_list(modules=sys.modules[__name__]) 19 | ] 20 | -------------------------------------------------------------------------------- /compiam/structure/segmentation/dhrupad_bandish_segmentation/audio_processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import librosa 3 | 4 | import numpy as np 5 | import soundfile as sf 6 | 7 | from compiam.structure.segmentation.dhrupad_bandish_segmentation.params import fs 8 | from compiam.utils import get_logger 9 | 10 | logger = get_logger(__name__) 11 | 12 | 13 | 14 | def split_audios(save_dir=None, annotations_path=None, audios_path=None): 15 | """Split audio of Dhrupad dataset 16 | 17 | :param save_dir: path where to save the splits 18 | :param annotations_path: path where to find the annotations 19 | :param audios_path: path where to find the original audios 20 | """ 21 | if not os.path.exists(save_dir): 22 | logger.warning( 23 | """Save directory not found. Creating it... 24 | """ 25 | ) 26 | os.mkdir(save_dir) 27 | 28 | if not os.path.exists(annotations_path): 29 | raise ValueError( 30 | """ 31 | Path to annotations not found.""" 32 | ) 33 | 34 | if not os.path.exists(audios_path): 35 | raise ValueError( 36 | """ 37 | Path to original audios not found.""" 38 | ) 39 | 40 | annotations = np.loadtxt( 41 | os.path.join(annotations_path, "section_boundaries_labels.csv"), 42 | delimiter=",", 43 | dtype=str, 44 | ) 45 | 46 | song = "" # please leave this line as it is 47 | for item in annotations: 48 | if "_".join(item[0].split("_")[:-1]) != song: 49 | song = "_".join(item[0].split("_")[:-1]) 50 | try: 51 | x, _ = librosa.load(os.path.join(audios_path, song + ".wav"), sr=fs) 52 | except FileNotFoundError: 53 | logger.error( 54 | f""" 55 | Audio for {song} not found. Please make sure you check: 56 | models/structure/dhrupad_bandish_segmentation/original_audio/README.md 57 | """ 58 | ) 59 | continue 60 | 61 | start = int(float(item[1]) * fs) 62 | end = int(float(item[2]) * fs) 63 | y = x[start:end] 64 | sf.write(os.path.join(save_dir, item[0] + ".wav"), y, fs) 65 | -------------------------------------------------------------------------------- /compiam/structure/segmentation/dhrupad_bandish_segmentation/model_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | try: 4 | import torch 5 | import torch.nn as nn 6 | from torch.utils import data 7 | except: 8 | raise ImportError( 9 | "In order to use this tool you need to have torch installed. " 10 | "Install compIAM with torch support: pip install 'compiam[torch]'" 11 | ) 12 | 13 | 14 | # class for sf layers 15 | class sfmodule(nn.Module): 16 | def __init__(self, n_ch_in): 17 | super(sfmodule, self).__init__() 18 | n_filters = 16 19 | self.bn1 = nn.BatchNorm2d(n_ch_in, track_running_stats=True) 20 | self.conv1 = nn.Conv2d(n_ch_in, n_filters, (1, 5), stride=1, padding=(0, 2)) 21 | self.elu = nn.ELU() 22 | self.do = nn.Dropout(p=0.1) 23 | 24 | def forward(self, x): 25 | y = self.bn1(x) 26 | y = self.conv1(x) 27 | y = self.elu(y) 28 | y = self.do(y) 29 | return y 30 | 31 | 32 | # class for multi-filter module 33 | class mfmodule(nn.Module): 34 | def __init__(self, pool_height, n_ch, kernel_widths, n_filters): 35 | super(mfmodule, self).__init__() 36 | self.avgpool1 = nn.AvgPool2d((pool_height, 1)) 37 | self.bn1 = nn.BatchNorm2d(n_ch, track_running_stats=True) 38 | 39 | self.conv1s = nn.ModuleList([]) 40 | for kw in kernel_widths: 41 | self.conv1s.append( 42 | nn.Conv2d(n_ch, n_filters[0], (1, kw), stride=1, padding=(0, kw // 2)) 43 | ) 44 | 45 | self.do = nn.Dropout(0.5) 46 | self.conv2 = nn.Conv2d( 47 | n_filters[0] * len(kernel_widths), n_filters[1], (1, 1), stride=1 48 | ) 49 | 50 | def forward(self, x): 51 | y = self.avgpool1(x) 52 | y = self.bn1(y) 53 | z = [] 54 | for conv1 in self.conv1s: 55 | z.append(conv1(y)) 56 | 57 | # trim last column to keep width = input_len (needed if filter width is even) 58 | for i in range(len(z)): 59 | z[i] = z[i][:, :, :, :-1] 60 | 61 | y = torch.cat(z, dim=1) 62 | y = self.do(y) 63 | y = self.conv2(y) 64 | return y 65 | 66 | 67 | class densemodule(nn.Module): 68 | def __init__(self, n_ch_in, input_len, input_height, n_classes): 69 | super(densemodule, self).__init__() 70 | n_linear1_in = n_ch_in * input_height 71 | 72 | self.dense_mod = nn.ModuleList( 73 | [ 74 | nn.AvgPool2d((1, input_len)), 75 | nn.BatchNorm2d(n_ch_in, track_running_stats=True), 76 | nn.Dropout(p=0.5), 77 | nn.Flatten(), 78 | nn.Linear(n_linear1_in, n_classes), 79 | ] 80 | ) 81 | 82 | def forward(self, x): 83 | for layer in self.dense_mod: 84 | x = layer(x) 85 | return x 86 | 87 | 88 | def build_model(input_height, input_len, n_classes): 89 | model = nn.Sequential() 90 | i_module = 0 91 | 92 | # add sf layers 93 | sfmod_ch_sizes = [1, 16, 16] 94 | for ch in sfmod_ch_sizes: 95 | sfmod_i = sfmodule(ch) 96 | model.add_module(str(i_module), sfmod_i) 97 | i_module += 1 98 | 99 | # add mfmods 100 | pool_height = 5 101 | kernel_widths = [16, 32, 64, 96] 102 | ch_in, ch_out = 16, 16 103 | mfmod_n_filters = [12, 16] 104 | 105 | mfmod_i = mfmodule(pool_height, ch_in, kernel_widths, mfmod_n_filters) 106 | model.add_module(str(i_module), mfmod_i) 107 | input_height //= pool_height 108 | i_module += 1 109 | 110 | # add densemod 111 | ch_in = 16 112 | densemod = densemodule(ch_in, input_len, input_height, n_classes) 113 | model.add_module(str(i_module), densemod) 114 | return model 115 | 116 | 117 | # data-loader(https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel) 118 | class Dataset(data.Dataset): 119 | """Characterizes a dataset for PyTorch""" 120 | 121 | def __init__(self, datadir, list_IDs, labels): 122 | "Initialization" 123 | self.datadir = datadir 124 | self.labels = labels 125 | self.list_IDs = list_IDs 126 | 127 | def __len__(self): 128 | """Denotes the total number of samples""" 129 | return len(self.list_IDs) 130 | 131 | def __getitem__(self, index): 132 | """Generates one sample of data""" 133 | # Select sample 134 | ID = self.list_IDs[index] # os.path.join(self.datadir, self.list_IDs[index]) 135 | 136 | # Load data and get label 137 | # X = torch.tensor(np.load(ID)) 138 | X = torch.load(ID) 139 | y = self.labels[ID.replace(self.datadir, "")] # .replace(".npy","")] 140 | id = ID.replace(self.datadir, "") 141 | 142 | return X, y, id 143 | 144 | 145 | def class_to_categorical(labels, classes): 146 | map = dict(zip(classes, np.arange(0, len(classes)).tolist())) 147 | for i in range(len(labels)): 148 | labels[i] = map[labels[i].item()] 149 | return labels 150 | 151 | 152 | def categorical_to_class(class_ids, classes): 153 | map = dict(zip(np.arange(0, len(classes)).tolist(), classes)) 154 | for i in range(len(class_ids)): 155 | class_ids[i] = map[class_ids[i].item()] 156 | return class_ids 157 | 158 | 159 | # function to smooth predicted s.t.m. estimates by constraining minimum section duration 160 | def smooth_boundaries(stmvstime_track, min_dur): 161 | stmvstime_track_smu = np.copy(stmvstime_track) 162 | prev_stm = stmvstime_track_smu[0] 163 | curr_stm_dur = 1 164 | i = 1 165 | while i < len(stmvstime_track_smu): 166 | if stmvstime_track_smu[i] != stmvstime_track_smu[i - 1]: 167 | if curr_stm_dur >= min_dur: 168 | curr_stm_dur = 1 169 | prev_stm = stmvstime_track_smu[i - 1] 170 | 171 | else: 172 | # if stmvstime_track_smu[i] = = prev_stm: 173 | stmvstime_track_smu[i - curr_stm_dur : i] = prev_stm 174 | # else: 175 | # prev_stm = stmvstime_track_smu[i-1] 176 | curr_stm_dur = 1 177 | else: 178 | curr_stm_dur += 1 179 | i += 1 180 | return stmvstime_track_smu 181 | -------------------------------------------------------------------------------- /compiam/structure/segmentation/dhrupad_bandish_segmentation/params.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # parameters for data loader 4 | batch_size = 32 5 | params = {"batch_size": batch_size, "shuffle": True, "num_workers": 4} 6 | max_epochs = 500 7 | 8 | # input-output parameters 9 | fs = 16000 10 | 11 | winsize_sec = 0.04 12 | winsize = int(winsize_sec * fs) 13 | hopsize_sec = 0.02 14 | hopsize = int(hopsize_sec * fs) 15 | nfft = int(2 ** (np.ceil(np.log2(winsize)))) 16 | 17 | input_len_sec = 8 18 | input_len = int(input_len_sec / hopsize_sec) 19 | input_hop_sec = 0.5 20 | input_hop = int(input_hop_sec / hopsize_sec) 21 | input_height = 40 22 | 23 | classes_dict = { 24 | "voc": [1.0, 2.0, 4.0, 8.0], 25 | "pakh": [1.0, 2.0, 4.0, 8.0, 16.0], 26 | "net": [1.0, 2.0, 4.0, 8.0, 16.0], 27 | } 28 | 29 | # minimum section duration for smoothing s.t.m. estimates 30 | min_sec_dur = 5 # in seconds 31 | min_sec_dur /= input_hop_sec 32 | -------------------------------------------------------------------------------- /compiam/timbre/README.md: -------------------------------------------------------------------------------- 1 | # Timbre analysis tools 2 | 3 | | **Tool** | **Task** | **Paper** | 4 | |---------------------------------|---------------------------------------|-----------| 5 | | Mridangam Stroke Classification | Timbre-based stroke classification | [1] | 6 | 7 | 8 | [1] Akshay Anantapadmanabhan, Ashwin Bellur, Hema A. Murthy, "Modal analysis and transcription of strokes of the mridangam using non-negative matrix factorization," in Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013), pp.181-185, 2013 -------------------------------------------------------------------------------- /compiam/timbre/__init__.py: -------------------------------------------------------------------------------- 1 | ### IMPORT HERE FUNCTIONALITIES 2 | import inspect, importlib as implib 3 | from compiam.data import models_dict 4 | 5 | TO_AVOID = [ 6 | x[0] 7 | for x in inspect.getmembers( 8 | implib.import_module("compiam.timbre"), inspect.ismodule 9 | ) 10 | ] 11 | 12 | 13 | ### IMPORT HERE THE CONSIDERED TASKS 14 | from compiam.timbre import stroke_classification 15 | 16 | 17 | # Show user the available tasks 18 | def list_tasks(): 19 | return [ 20 | x[0] 21 | for x in inspect.getmembers( 22 | implib.import_module("compiam.timbre"), inspect.ismodule 23 | ) 24 | if x[0] not in TO_AVOID 25 | ] 26 | 27 | 28 | # Show user the available tools 29 | def list_tools(): 30 | tasks = [ 31 | x[0] 32 | for x in inspect.getmembers( 33 | implib.import_module("compiam.timbre"), inspect.ismodule 34 | ) 35 | if x[0] not in TO_AVOID 36 | ] 37 | tools_for_tasks = [ 38 | inspect.getmembers( 39 | implib.import_module("compiam.timbre." + task), inspect.isclass 40 | ) 41 | for task in tasks 42 | ] 43 | tools_for_tasks = [ 44 | tool[1].__module__.split(".")[-2] + "." + tool[0] 45 | for tool_list in tools_for_tasks 46 | for tool in tool_list 47 | ] # Get task.tool 48 | pre_trained_models = [ 49 | x["class_name"] for x in list(models_dict.values()) 50 | ] # Get list of pre-trained_models 51 | return [ 52 | tool + "*" if tool.split(".")[1] in pre_trained_models else tool 53 | for tool in tools_for_tasks 54 | ] 55 | -------------------------------------------------------------------------------- /compiam/timbre/stroke_classification/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from compiam.utils import get_tool_list 4 | from compiam.data import models_dict 5 | 6 | # Import tasks 7 | from compiam.timbre.stroke_classification.mridangam_stroke_classification import ( 8 | MridangamStrokeClassification, 9 | ) 10 | 11 | 12 | # Show user the available tools 13 | def list_tools(): 14 | pre_trained_models = [ 15 | x["class_name"] for x in list(models_dict.values()) 16 | ] # Get list of pre-trained_models 17 | return [ 18 | tool + "*" if tool in pre_trained_models else tool 19 | for tool in get_tool_list(modules=sys.modules[__name__]) 20 | ] 21 | -------------------------------------------------------------------------------- /compiam/timbre/stroke_classification/mridangam_stroke_classification/model.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from sklearn.model_selection import train_test_split 6 | from sklearn import svm 7 | from sklearn.neural_network import MLPClassifier 8 | 9 | from compiam.timbre.stroke_classification.mridangam_stroke_classification import ( 10 | normalise_features, 11 | ) 12 | from compiam.utils import get_logger 13 | 14 | logger = get_logger(__name__) 15 | 16 | 17 | class StrokeClassification: 18 | """Mridangam stroke classification.""" 19 | 20 | def __init__(self): 21 | """Mridangam stroke classification init method.""" 22 | 23 | def train( 24 | self, 25 | training_data, 26 | feature_list, 27 | model_type="svm", 28 | balance=False, 29 | balance_ref="random", 30 | ): 31 | """Train a support vector machine for stroke classification. 32 | 33 | :param training_data: DataFrame including features to train. 34 | :param feature_list: list of features considered for training. 35 | :param model_type: type of model to train. 36 | :param balance: balance the number of instances per class to prevent biases. 37 | :param balance_ref: reference class for data balancement. 38 | :returns: a trained scikit learn classificator object. 39 | """ 40 | 41 | if training_data is None: 42 | raise ValueError( 43 | "Prior to train the model please load the dataset using .process_strokes()" 44 | ) 45 | 46 | # Let's use sklearn's preprocessing tools for applying normalisation to features 47 | data_modif = normalise_features(training_data, feature_list) 48 | 49 | if balance == True: 50 | strokes = training_data.stroke.unique() 51 | count_dict = training_data["stroke"].value_counts().to_dict() 52 | min_stroke = min(count_dict, key=count_dict.get) 53 | min_number = ( 54 | data_modif.stroke.value_counts()[min_stroke] 55 | if balance_ref == "lower" 56 | else data_modif.stroke.value_counts()[random.choice(strokes)] 57 | ) 58 | reshaped_stroke_list = [] 59 | for strk in strokes: 60 | if count_dict[strk] > min_number: 61 | reshaped_stroke_list.append( 62 | data_modif[data_modif.stroke == strk].sample(n=min_number) 63 | ) 64 | else: 65 | reshaped_stroke_list.append(data_modif[data_modif.stroke == strk]) 66 | # Merging after downsampling 67 | data_modif = pd.concat(reshaped_stroke_list) 68 | 69 | X = data_modif.iloc[:, : len(feature_list) - 1].values 70 | # Creating output values 71 | data_modif.stroke = pd.Categorical( 72 | data_modif.stroke 73 | ) # convert to categorical data 74 | y = np.array(data_modif.stroke.cat.codes) # create label encoded outputs 75 | 76 | X_train, X_test, y_train, y_test = train_test_split( 77 | X, y, test_size=0.33, random_state=42 78 | ) 79 | 80 | if model_type == "svm": 81 | clf = svm.SVC(gamma=1 / (X_train.shape[-1] * X_train.var())) 82 | elif model_type == "mlp": 83 | clf = MLPClassifier(alpha=1, max_iter=1000) 84 | else: 85 | raise ValueError( 86 | "Model not available. Please check the available options in the documentation." 87 | ) 88 | 89 | # Fit model with training data 90 | clf.fit(X_train, y_train) 91 | 92 | # Evaluate 93 | y_pred = clf.predict(X_test) 94 | logger.info( 95 | "{} model successfully trained with accuracy {}% in the testing set".format( 96 | model_type.upper(), 97 | round(np.sum(y_test == y_pred) / len(y_pred) * 100), 98 | 2, 99 | ) 100 | ) 101 | return clf 102 | -------------------------------------------------------------------------------- /compiam/utils/NMFtoolbox/NEMA.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name: NEMA 3 | Date: Jun 2019 4 | Programmer: Christian Dittmar, Yiğitcan Özer 5 | 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 | If you use the 'NMF toolbox' please refer to: 8 | [1] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard 9 | Müller 10 | NMF Toolbox: Music Processing Applications of Nonnegative Matrix 11 | Factorization 12 | In Proceedings of the International Conference on Digital Audio Effects 13 | (DAFx), 2019. 14 | 15 | License: 16 | This file is part of 'NMF toolbox'. 17 | https://www.audiolabs-erlangen.de/resources/MIR/NMFtoolbox/ 18 | 'NMF toolbox' is free software: you can redistribute it and/or modify it 19 | under the terms of the GNU General Public License as published by the 20 | the Free Software Foundation, either version 3 of the License, or (at 21 | your option) any later version. 22 | 23 | 'NMF toolbox' is distributed in the hope that it will be useful, but 24 | WITHOUT ANY WARRANTY; without even the implied warranty of 25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 26 | Public License for more details. 27 | 28 | You should have received a copy of the GNU General Public License along 29 | with 'NMF toolbox'. If not, see http://www.gnu.org/licenses/. 30 | """ 31 | 32 | from copy import deepcopy 33 | import numpy as np 34 | 35 | 36 | def NEMA(A, lamb=0.9): 37 | """This function takes a matrix of row-wise time series and applies a 38 | non-linear exponential moving average (NEMA) to each row. This filter 39 | introduces exponentially decaying slopes and is defined in eq. (3) from [2]. 40 | 41 | The difference equation of that filter would be: 42 | y(n) = max( x(n), y(n-1)*(decay) + x(n)*(1-decay) ) 43 | 44 | References 45 | ---------- 46 | [2] Christian Dittmar, Patricio López-Serrano, Meinard Müller: "Unifying 47 | Local and Global Methods for Harmonic-Percussive Source Separation" 48 | In Proceedings of the IEEE International Conference on Acoustics, 49 | Speech, and Signal Processing (ICASSP), 2018. 50 | 51 | Parameters 52 | ---------- 53 | A: array-like 54 | The matrix with time series in its rows 55 | 56 | lamb: array-like / float 57 | The decay parameter in the range [0 ... 1], this can be 58 | given as a column-vector with individual decays per row 59 | or as a scalar 60 | 61 | Results 62 | ------- 63 | filtered: array-like 64 | The result after application of the NEMA filter 65 | """ 66 | 67 | # Prevent instable filter 68 | lamb = max(0.0, min(0.9999999, lamb)) 69 | 70 | numRows, numCols = A.shape 71 | filtered = deepcopy(A) 72 | 73 | for k in range(1, numCols): 74 | storeRow = deepcopy(filtered[:, k]) 75 | filtered[:, k] = lamb * filtered[:, k - 1] + filtered[:, k] * (1 - lamb) 76 | filtered[:, k] = np.maximum(filtered[:, k], storeRow) 77 | 78 | return filtered 79 | -------------------------------------------------------------------------------- /compiam/utils/NMFtoolbox/NMF.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name: NMF 3 | Date: Jun 2019 4 | Programmer: Christian Dittmar, Yiğitcan Özer 5 | 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 | If you use the 'NMF toolbox' please refer to: 8 | [1] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard 9 | Müller 10 | NMF Toolbox: Music Processing Applications of Nonnegative Matrix 11 | Factorization 12 | In Proceedings of the International Conference on Digital Audio Effects 13 | (DAFx), 2019. 14 | 15 | License: 16 | This file is part of 'NMF toolbox'. 17 | https://www.audiolabs-erlangen.de/resources/MIR/NMFtoolbox/ 18 | 'NMF toolbox' is free software: you can redistribute it and/or modify it 19 | under the terms of the GNU General Public License as published by the 20 | the Free Software Foundation, either version 3 of the License, or (at 21 | your option) any later version. 22 | 23 | 'NMF toolbox' is distributed in the hope that it will be useful, but 24 | WITHOUT ANY WARRANTY; without even the implied warranty of 25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 26 | Public License for more details. 27 | 28 | You should have received a copy of the GNU General Public License along 29 | with 'NMF toolbox'. If not, see http://www.gnu.org/licenses/. 30 | """ 31 | 32 | import numpy as np 33 | from copy import deepcopy 34 | from tqdm import tnrange 35 | 36 | from compiam.utils.NMFtoolbox.utils import EPS 37 | 38 | 39 | def NMF(V, parameter): 40 | """Given a non-negative matrix V, find non-negative templates W and activations 41 | H that approximate V. 42 | 43 | References 44 | ---------- 45 | [2] Lee, DD & Seung, HS. "Algorithms for Non-negative Matrix Factorization" 46 | 47 | [3] Andrzej Cichocki, Rafal Zdunek, Anh Huy Phan, and Shun-ichi Amari 48 | "Nonnegative Matrix and Tensor Factorizations: Applications to 49 | Exploratory Multi-Way Data Analysis and Blind Source Separation" 50 | John Wiley and Sons, 2009. 51 | 52 | Parameters 53 | ---------- 54 | V: array-like 55 | K x M non-negative matrix to be factorized 56 | 57 | parameter: dict 58 | costFunc Cost function used for the optimization, currently 59 | supported are: 60 | 'EucDdist' for Euclidean Distance 61 | 'KLDiv' for Kullback Leibler Divergence 62 | 'ISDiv' for Itakura Saito Divergence 63 | numIter Number of iterations the algorithm will run. 64 | numComp The rank of the approximation 65 | 66 | Returns 67 | ------- 68 | W: array-like 69 | K x R non-negative templates 70 | H: array-like 71 | R x M non-negative activations 72 | nmfV: array-like 73 | List with approximated component matrices 74 | """ 75 | parameter = init_parameters(parameter) 76 | 77 | # get important params 78 | K, M = V.shape 79 | R = parameter["numComp"] 80 | L = parameter["numIter"] 81 | 82 | # initialization of W and H 83 | if isinstance(parameter["initW"], list): 84 | W = np.array(parameter["initW"]) 85 | else: 86 | W = deepcopy(parameter["initW"]) 87 | 88 | H = deepcopy(parameter["initH"]) 89 | 90 | # create helper matrix of all ones 91 | onesMatrix = np.ones((K, M)) 92 | 93 | # normalize to unit sum 94 | V /= EPS + V.sum() 95 | 96 | # main iterations 97 | for iter in tnrange(L, desc="Processing"): 98 | # compute approximation 99 | Lambda = EPS + W @ H 100 | 101 | # switch between pre-defined update rules 102 | if parameter["costFunc"] == "EucDist": # euclidean update rules 103 | if not parameter["fixW"]: 104 | W *= V @ H.T / (Lambda @ H.T + EPS) 105 | 106 | H *= W.T @ V / (W.T @ Lambda + EPS) 107 | 108 | elif ( 109 | parameter["costFunc"] == "KLDiv" 110 | ): # Kullback Leibler divergence update rules 111 | if not parameter["fixW"]: 112 | W *= ((V / Lambda) @ H.T) / (onesMatrix @ H.T + EPS) 113 | 114 | H *= (W.T @ (V / Lambda)) / (W.T @ onesMatrix + EPS) 115 | 116 | elif parameter["costFunc"] == "ISDiv": # Itakura Saito divergence update rules 117 | if not parameter["fixW"]: 118 | W *= ((Lambda**-2 * V) @ H.T) / ((Lambda**-1) @ H.T + EPS) 119 | 120 | H *= (W.T @ (Lambda**-2 * V)) / (W.T @ (Lambda**-1) + EPS) 121 | 122 | else: 123 | raise ValueError("Unknown cost function") 124 | 125 | # normalize templates to unit sum 126 | if not parameter["fixW"]: 127 | normVec = W.sum(axis=0) 128 | W *= 1.0 / (EPS + normVec) 129 | 130 | nmfV = list() 131 | 132 | # compute final output approximation 133 | for r in range(R): 134 | nmfV.append(W[:, r].reshape(-1, 1) @ H[r, :].reshape(1, -1)) 135 | 136 | return W, H, nmfV 137 | 138 | 139 | def init_parameters(parameter): 140 | """Auxiliary function to set the parameter dictionary 141 | 142 | Parameters 143 | ---------- 144 | parameter: dict 145 | See the above function inverseSTFT for further information 146 | 147 | Returns 148 | ------- 149 | parameter: dict 150 | """ 151 | parameter["costFunc"] = ( 152 | "KLDiv" if "costFunc" not in parameter else parameter["costFunc"] 153 | ) 154 | parameter["numIter"] = 30 if "numIter" not in parameter else parameter["numIter"] 155 | parameter["fixW"] = False if "fixW" not in parameter else parameter["fixW"] 156 | 157 | return parameter 158 | -------------------------------------------------------------------------------- /compiam/utils/NMFtoolbox/README.md: -------------------------------------------------------------------------------- 1 | ## NMFtoolbox 2 | 3 | Code copied directly from https://www.audiolabs-erlangen.de/resources/MIR/NMFtoolbox/#Python -------------------------------------------------------------------------------- /compiam/utils/NMFtoolbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/compiam/utils/NMFtoolbox/__init__.py -------------------------------------------------------------------------------- /compiam/utils/NMFtoolbox/alphaWienerFilter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name: alphaWienerFilter 3 | Date: Jun 2019 4 | Programmer: Christian Dittmar, Yiğitcan Özer 5 | 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 | If you use the 'NMF toolbox' please refer to: 8 | [1] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard 9 | Müller 10 | NMF Toolbox: Music Processing Applications of Nonnegative Matrix 11 | Factorization 12 | In Proceedings of the International Conference on Digital Audio Effects 13 | (DAFx), 2019. 14 | 15 | License: 16 | This file is part of 'NMF toolbox'. 17 | https://www.audiolabs-erlangen.de/resources/MIR/NMFtoolbox/ 18 | 'NMF toolbox' is free software: you can redistribute it and/or modify it 19 | under the terms of the GNU General Public License as published by the 20 | the Free Software Foundation, either version 3 of the License, or (at 21 | your option) any later version. 22 | 23 | 'NMF toolbox' is distributed in the hope that it will be useful, but 24 | WITHOUT ANY WARRANTY; without even the implied warranty of 25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 26 | Public License for more details. 27 | 28 | You should have received a copy of the GNU General Public License along 29 | with 'NMF toolbox'. If not, see http://www.gnu.org/licenses/. 30 | """ 31 | 32 | import numpy as np 33 | 34 | from compiam.utils.NMFtoolbox.utils import EPS 35 | 36 | 37 | def alphaWienerFilter(mixtureX, sourceA, alpha=1.2, binarize=False): 38 | """Given a cell-array of spectrogram estimates as input, this function 39 | computes the alpha-related soft masks for extracting the sources. Details 40 | about this procedure are given in [2], further experimental studies in [3]. 41 | 42 | References 43 | ---------- 44 | [2] Antoine Liutkus and Roland Badeau: Generalized Wiener filtering with 45 | fractional power spectrograms, ICASPP 2015 46 | 47 | [3] Christian Dittmar et al.: An Experimental Approach to Generalized 48 | Wiener Filtering in Music Source Separation, EUSIPCO 2016 49 | 50 | Parameters 51 | ---------- 52 | mixtureX: array_like 53 | The mixture spectrogram (numBins x numFrames) (may be real-or complex-valued) 54 | 55 | sourceA: array_like 56 | A list holding the equally sized spectrogram estimates of single sound sources (aka components) 57 | 58 | alpha: float 59 | The fractional power in rand [0 ... 2] 60 | 61 | binarize: bool 62 | If this is set to True, we binarize the masks 63 | 64 | 65 | Returns 66 | ------- 67 | sourceX: array_like 68 | A list of extracted source spectrograms 69 | 70 | softMasks: array_like 71 | A list with the extracted masks 72 | """ 73 | 74 | numBins, numFrames = mixtureX.shape 75 | numComp = len(sourceA) 76 | 77 | # Initialize the mixture of the sources / components with a small constant 78 | mixtureA = EPS + np.zeros((numBins, numFrames)) 79 | 80 | softMasks = list() 81 | sourceX = list() 82 | 83 | # Make superposition 84 | for k in range(numComp): 85 | mixtureA += sourceA[k] ** alpha 86 | 87 | # Compute soft masks and spectrogram estimates 88 | for k in range(numComp): 89 | currSoftMask = (sourceA[k] ** alpha) / mixtureA 90 | softMasks.append(currSoftMask.astype(np.float32)) 91 | 92 | # If desired, make this a binary mask 93 | if binarize: 94 | tmp = softMasks[k] 95 | softMasks[k] = tmp[tmp > (1.0 / numComp)] * 1 96 | 97 | # And apply it to the mixture 98 | sourceX.append(mixtureX * currSoftMask) 99 | 100 | return sourceX, softMasks 101 | -------------------------------------------------------------------------------- /compiam/utils/NMFtoolbox/forwardSTFT.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name: forwardSTFT 3 | Date of Revision: Jun 2019 4 | Programmer: Christian Dittmar, Yiğitcan Özer 5 | 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 | If you use the 'NMF toolbox' please refer to: 8 | [1] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard 9 | Müller 10 | NMF Toolbox: Music Processing Applications of Nonnegative Matrix 11 | Factorization 12 | In Proceedings of the International Conference on Digital Audio Effects 13 | (DAFx), 2019. 14 | 15 | License: 16 | This file is part of 'NMF toolbox'. 17 | https://www.audiolabs-erlangen.de/resources/MIR/NMFtoolbox/ 18 | 'NMF toolbox' is free software: you can redistribute it and/or modify it 19 | under the terms of the GNU General Public License as published by the 20 | the Free Software Foundation, either version 3 of the License, or (at 21 | your option) any later version. 22 | 23 | 'NMF toolbox' is distributed in the hope that it will be useful, but 24 | WITHOUT ANY WARRANTY; without even the implied warranty of 25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 26 | Public License for more details. 27 | 28 | You should have received a copy of the GNU General Public License along 29 | with 'NMF toolbox'. If not, see http://www.gnu.org/licenses/. 30 | """ 31 | 32 | import numpy as np 33 | from scipy.fftpack import fft 34 | 35 | 36 | def forwardSTFT(x, parameter=None): 37 | """Given a time signal as input, this computes the spectrogram by means of 38 | the Short-time fourier transform 39 | 40 | Parameters 41 | ---------- 42 | x: array-like 43 | The time signal oriented as numSamples x 1 44 | 45 | parameter: dict 46 | blockSize The blocksize to use during analysis 47 | hopSize The hopsize to use during analysis 48 | winFunc The analysis window 49 | reconstMirror This switch decides whether to discard the mirror 50 | spectrum or not 51 | appendFrame This switch decides if we use silence in the 52 | beginning and the end 53 | 54 | 55 | Returns 56 | ------- 57 | X: array-like 58 | The complex valued spectrogram in numBins x numFrames 59 | 60 | A: array-like 61 | The magnitude spectrogram 62 | 63 | P: array-like 64 | The phase spectrogram (wrapped in -pi ... +pi) 65 | """ 66 | parameter = init_parameters(parameter) 67 | blockSize = parameter["blockSize"] 68 | halfBlockSize = round(blockSize / 2) 69 | hopSize = parameter["hopSize"] 70 | winFunc = parameter["winFunc"] 71 | reconstMirror = parameter["reconstMirror"] 72 | appendFrame = parameter["appendFrame"] 73 | 74 | # the number of bins needs to be corrected 75 | # if we want to discard the mirror spectrum 76 | if parameter["reconstMirror"]: 77 | numBins = round(parameter["blockSize"] / 2) + 1 78 | else: 79 | numBins = parameter["blockSize"] 80 | 81 | # append safety space in the beginning and end 82 | if appendFrame: 83 | x = np.concatenate( 84 | (np.zeros(halfBlockSize), x, np.zeros(halfBlockSize)), axis=0 85 | ) 86 | 87 | numSamples = len(x) 88 | 89 | # pre-compute the number of frames 90 | numFrames = round(numSamples / hopSize) 91 | 92 | # initialize with correct size 93 | X = np.zeros((np.int(numBins), numFrames), dtype=np.complex64) 94 | 95 | counter = 0 96 | 97 | for k in range(0, len(x) - blockSize, hopSize): 98 | # where to pick 99 | ind = range(k, k + blockSize) 100 | 101 | # pick signal frame 102 | snip = x[ind] 103 | 104 | # apply windowing 105 | snip *= winFunc 106 | 107 | # do FFT 108 | f = fft(snip, axis=0) 109 | 110 | # if required, remove the upper half of spectrum 111 | if reconstMirror: 112 | f = np.delete(f, range(numBins, blockSize), axis=0) 113 | 114 | # store into STFT matrix 115 | X[:, counter] = f 116 | counter += 1 117 | 118 | # after constructing the STFT array, remove excessive frames 119 | X = np.delete(X, range(counter, numFrames), axis=1) 120 | 121 | # compute derived matrices 122 | # get magnitude 123 | A = np.abs(X) 124 | 125 | # get phase 126 | P = np.angle(X) 127 | 128 | # return complex-valued STFT, magnitude STFT, and phase STFT 129 | return X, A, P 130 | 131 | 132 | def init_parameters(parameter): 133 | """Auxiliary function to set the parameter dictionary 134 | 135 | Parameters 136 | ---------- 137 | parameter: dict 138 | See the above function forwardSTFT for further information 139 | 140 | Returns 141 | ------- 142 | parameter: dict 143 | """ 144 | parameter = dict() if not parameter else parameter 145 | parameter["blockSize"] = ( 146 | 2048 if "blockSize" not in parameter else parameter["blockSize"] 147 | ) 148 | parameter["hopSize"] = 512 if "hopSize" not in parameter else parameter["hopSize"] 149 | parameter["winFunc"] = ( 150 | np.hanning(parameter["blockSize"]) 151 | if "winFunc" not in parameter 152 | else parameter["winFunc"] 153 | ) 154 | parameter["reconstMirror"] = ( 155 | True if "reconstMirror" not in parameter else parameter["reconstMirror"] 156 | ) 157 | parameter["appendFrame"] = ( 158 | True if "appendFrame" not in parameter else parameter["appendFrame"] 159 | ) 160 | 161 | return parameter 162 | -------------------------------------------------------------------------------- /compiam/utils/NMFtoolbox/midi2freq.py: -------------------------------------------------------------------------------- 1 | """ 2 | Name: midi2freq 3 | Date: Jun 2019 4 | Programmer: Christian Dittmar, Yiğitcan Özer 5 | 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 | If you use the 'NMF toolbox' please refer to: 8 | [1] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard 9 | Müller 10 | NMF Toolbox: Music Processing Applications of Nonnegative Matrix 11 | Factorization 12 | In Proceedings of the International Conference on Digital Audio Effects 13 | (DAFx), 2019. 14 | 15 | License: 16 | This file is part of 'NMF toolbox'. 17 | https://www.audiolabs-erlangen.de/resources/MIR/NMFtoolbox/ 18 | 'NMF toolbox' is free software: you can redistribute it and/or modify it 19 | under the terms of the GNU General Public License as published by the 20 | the Free Software Foundation, either version 3 of the License, or (at 21 | your option) any later version. 22 | 23 | 'NMF toolbox' is distributed in the hope that it will be useful, but 24 | WITHOUT ANY WARRANTY; without even the implied warranty of 25 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 26 | Public License for more details. 27 | 28 | You should have received a copy of the GNU General Public License along 29 | with 'NMF toolbox'. If not, see http://www.gnu.org/licenses/. 30 | """ 31 | 32 | 33 | def midi2freq(midi): 34 | """Converts a given MIDI pitch to the corresponding frequency in Hz. No 35 | sanity checks on the validity of the input are performed. 36 | 37 | Parameters 38 | ---------- 39 | midi: array-like / float 40 | The MIDI pitch, can also be floating value 41 | 42 | Returns 43 | ------- 44 | freq: array-like / float 45 | The frequency in Hz 46 | """ 47 | freq = (440.0 / 32) * 2 ** ((midi - 9) / 12) 48 | 49 | return freq 50 | -------------------------------------------------------------------------------- /compiam/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import inspect 4 | import pathlib 5 | import pickle 6 | import difflib 7 | 8 | import IPython.display as ipd 9 | import numpy as np 10 | 11 | from compiam.io import save_object, load_yaml 12 | from compiam.utils.pitch import cents_to_pitch 13 | 14 | WORKDIR = os.path.dirname(pathlib.Path(__file__).parent.resolve()) 15 | 16 | svara_cents_carnatic_path = os.path.join(WORKDIR, "conf", "raga", "svara_cents.yaml") 17 | svara_lookup_carnatic_path = os.path.join(WORKDIR, "conf", "raga", "carnatic.yaml") 18 | 19 | 20 | def get_logger(name): 21 | """Create logger 22 | 23 | :param name: logger name 24 | """ 25 | logging.basicConfig( 26 | format="[%(asctime)s] %(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s", 27 | ) 28 | logger = logging.getLogger(name) 29 | logger.setLevel(logging.DEBUG) 30 | return logger 31 | 32 | 33 | logger = get_logger(__name__) 34 | 35 | 36 | def create_if_not_exists(path): 37 | """If the directory at does not exist, create it empty 38 | 39 | :param path: path to folder 40 | """ 41 | directory = os.path.dirname(path) 42 | # Do not try and create directory if path is just a filename 43 | if (not os.path.exists(directory)) and (directory != ""): 44 | os.makedirs(directory) 45 | 46 | 47 | def get_tool_list(modules): 48 | """Given sys.modules[__name__], prints out the imported classes 49 | 50 | :param modules: basically the sys.modules[__name__] of a file 51 | """ 52 | list_of_tools = [] 53 | for _, obj in inspect.getmembers(modules): 54 | if inspect.isclass(obj): 55 | list_of_tools.append(obj.__name__) 56 | return list_of_tools 57 | 58 | 59 | def run_or_cache(func, inputs, cache): 60 | """ 61 | Run function, with inputs, and save 62 | to . If already exists, load rather than 63 | run anew 64 | 65 | :param func: python function 66 | :type func: function 67 | :param inputs: parameters to pass to , in order 68 | :type inputs: tuple 69 | :param cache: .pkl filepath 70 | :type cache: str or None 71 | 72 | :returns: output of 73 | :rtype: equal to type returned by 74 | """ 75 | if cache: 76 | if os.path.isfile(cache): 77 | try: 78 | file = open(cache, "rb") 79 | results = pickle.load(file) 80 | return results 81 | except: 82 | logger.warning("Error loading from cache, recomputing") 83 | results = func(*inputs) 84 | 85 | if cache: 86 | try: 87 | create_if_not_exists(cache) 88 | save_object(results, cache) 89 | except Exception as e: 90 | logger.error(f"Error saving object: {e}") 91 | 92 | return results 93 | 94 | 95 | def myround(x, base=5): 96 | return base * round(x / base) 97 | 98 | 99 | def get_timestamp(secs, divider="-"): 100 | """ 101 | Convert seconds into timestamp 102 | 103 | :param secs: seconds 104 | :type secs: int 105 | :param divider: divider between minute and second, default "-" 106 | :type divider: str 107 | 108 | :return: timestamp 109 | :rtype: str 110 | """ 111 | minutes = int(secs / 60) 112 | seconds = round(secs % 60, 2) 113 | return f"{minutes}min{divider}{seconds}sec" 114 | 115 | 116 | def ipy_audio(y, t1, t2, sr=44100): 117 | y_ = y[round(t1 * sr) : round(t2 * sr)] 118 | return ipd.Audio(y_, rate=sr, autoplay=False) 119 | 120 | 121 | def get_svara_pitch_carnatic(raga, tonic=None): 122 | svara_pitch = get_svara_pitch( 123 | raga, tonic, svara_cents_carnatic_path, svara_lookup_carnatic_path 124 | ) 125 | return svara_pitch 126 | 127 | 128 | def get_svara_pitch(raga, tonic, svara_cents_path, svara_lookup_path): 129 | svara_cents = load_yaml(svara_cents_path) 130 | svara_lookup = load_yaml(svara_lookup_path) 131 | 132 | if not raga in svara_lookup: 133 | all_ragas = list(svara_lookup.keys()) 134 | close = difflib.get_close_matches(raga, all_ragas) 135 | error_message = f"Raga, {raga} not available in conf." 136 | if close: 137 | error_message += f" Nearest matches: {close}" 138 | raise ValueError(error_message) 139 | 140 | arohana = svara_lookup[raga]["arohana"] 141 | avorohana = svara_lookup[raga]["avorohana"] 142 | all_svaras = list(set(arohana + avorohana)) 143 | 144 | if tonic: 145 | svara_pitch = {cents_to_pitch(k, tonic): v for k, v in svara_cents.items()} 146 | else: 147 | svara_pitch = svara_cents 148 | 149 | final_dict = {} 150 | for svara in all_svaras: 151 | for c, sl in svara_pitch.items(): 152 | for s in sl: 153 | if svara == s: 154 | final_dict[c] = s 155 | 156 | return final_dict 157 | 158 | 159 | def add_center_to_mask(mask): 160 | num_one = 0 161 | indices = [] 162 | for i, s in enumerate(mask): 163 | if s == 1: 164 | num_one += 1 165 | indices.append(i) 166 | else: 167 | li = len(indices) 168 | if li: 169 | middle = indices[int(li / 2)] 170 | mask[middle] = 2 171 | num_one = 0 172 | indices = [] 173 | return mask 174 | 175 | 176 | def stereo_to_mono(audio): 177 | """Assuming numpy array as input""" 178 | if len(audio.shape) == 2: 179 | # Put channels first 180 | if audio.shape[0] > audio.shape[1]: 181 | audio = audio.T 182 | if audio.shape[0] > 2: 183 | raise ValueError("Expected mono or stereo audio, got multi-channel audio") 184 | # If stereo, average the channels 185 | if audio.shape[0] == 2: 186 | audio = np.mean(audio, axis=0) 187 | if audio.shape[0] == 1: 188 | audio = np.squeeze(audio, axis=0) 189 | if len(audio.shape) > 2: 190 | raise ValueError("Input must be an unbatched audio signal") 191 | return audio 192 | -------------------------------------------------------------------------------- /compiam/utils/augment/augment_data_ar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import librosa 4 | import soundfile as sf 5 | from joblib import Parallel, delayed 6 | import argparse 7 | from pathlib import PurePath 8 | 9 | 10 | def attack_remix_hps(x, fs, winSize, hopSize, window_type, nFFT, attack_gain_factor): 11 | S = librosa.stft( 12 | x, 13 | n_fft=nFFT, 14 | hop_length=hopSize, 15 | win_length=winSize, 16 | window=window_type, 17 | center=True, 18 | pad_mode="reflect", 19 | ) 20 | H, P = librosa.decompose.hpss(S, kernel_size=31, power=1.0, mask=False, margin=1.0) 21 | x_harm = librosa.istft( 22 | H, hop_length=hopSize, win_length=winSize, window="hann", center=True 23 | ) 24 | x_perc = librosa.istft( 25 | P, hop_length=hopSize, win_length=winSize, window="hann", center=True 26 | ) 27 | x_remixed = x_harm + attack_gain_factor * x_perc 28 | return x_remixed 29 | 30 | 31 | def augment_data_attack_remix( 32 | audio_path, out_dir, fs, attack_gain_factor, winDur, hopDur 33 | ): 34 | winSize = int(np.ceil(winDur * fs)) 35 | hopSize = int(np.ceil(hopDur * fs)) 36 | nFFT = 2 ** (int(np.ceil(np.log2(winSize))) + 1) 37 | 38 | x, fs = librosa.load(audio_path, sr=fs) 39 | x /= np.max(np.abs(x)) 40 | 41 | x_remixed = attack_remix_hps( 42 | x, fs, winSize, hopSize, "hann", nFFT, attack_gain_factor 43 | ) 44 | x_remixed /= np.max(np.abs(x_remixed)) 45 | sf.write( 46 | os.path.join( 47 | out_dir, 48 | PurePath(audio_path).name.replace( 49 | ".wav", "_ar_%2.2f.wav" % attack_gain_factor 50 | ), 51 | ), 52 | x_remixed, 53 | fs, 54 | ) 55 | return 56 | 57 | 58 | if __name__ == "__main__": 59 | parser = argparse.ArgumentParser(description="attack-remix") 60 | parser.add_argument("--input", type=str, default="", help="path to input audio") 61 | parser.add_argument( 62 | "--output", 63 | type=str, 64 | default="./outputs/", 65 | help="folder to save modified audios", 66 | ) 67 | parser.add_argument("--fs", type=int, default=16000, help="sampling rate") 68 | parser.add_argument( 69 | "--win_dur", type=float, default=46.4, help="window size in milliseconds" 70 | ) 71 | parser.add_argument( 72 | "--hop_dur", type=str, default=5, help="hop size in milliseconds" 73 | ) 74 | parser.add_argument( 75 | "--params", 76 | nargs="+", 77 | type=float, 78 | default=[0.3, 0.5, 2, 3], 79 | help="list of gain factors to scale attack portion with", 80 | ) 81 | parser.add_argument( 82 | "--n_jobs", type=int, default=4, help="number of cores to run program on" 83 | ) 84 | 85 | args, _ = parser.parse_known_args() 86 | 87 | winDur = args.win_dur * 1e-3 88 | hopDur = args.hop_dur * 1e-3 89 | 90 | if not os.path.exists(args.output): 91 | os.makedirs(args.output) 92 | 93 | _ = Parallel(n_jobs=args.n_jobs)( 94 | delayed(augment_data_attack_remix)( 95 | args.input, args.output, args.fs, G, winDur, hopDur 96 | ) 97 | for G in args.params 98 | ) 99 | -------------------------------------------------------------------------------- /compiam/utils/augment/augment_data_sf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import librosa 4 | import soundfile as sf 5 | import scipy 6 | from joblib import Parallel, delayed 7 | from pathlib import PurePath 8 | import argparse 9 | 10 | 11 | def tuple_list(s): 12 | try: 13 | x, y, z = map(float, s.split(",")) 14 | return x, y, z 15 | except: 16 | raise argparse.ArgumentTypeError("Gains must be x,y,z") 17 | 18 | 19 | def make_gain_curve( 20 | gain_curve_center, 21 | gain_curve_width, 22 | gain_factor, 23 | fs, 24 | nFFT, 25 | curve_func=scipy.signal.windows.hann, 26 | ): 27 | gain_curve = np.ones(nFFT // 2 + 1) 28 | if gain_factor == 1.0: 29 | return gain_curve 30 | 31 | gain_curve_center = int(np.floor(gain_curve_center * (nFFT // 2 + 1) / (fs / 2))) 32 | gain_curve_width = int(np.floor(gain_curve_width * (nFFT // 2 + 1) / (fs / 2))) 33 | low_end = np.max([0, gain_curve_center - gain_curve_width // 2]) 34 | high_end = np.min([nFFT // 2 + 1, gain_curve_center + gain_curve_width // 2]) 35 | 36 | try: 37 | gain_curve[low_end:high_end] += (gain_factor - 1) * curve_func(gain_curve_width) 38 | except ValueError: 39 | gain_curve[low_end : high_end + 1] += (gain_factor - 1) * curve_func( 40 | gain_curve_width 41 | ) 42 | 43 | return gain_curve 44 | 45 | 46 | def augment_data_spectral_shape( 47 | audio_path, out_dir, fs, gain_curve_params, winDur=46.4e-3, hopDur=5e-3 48 | ): 49 | # analysis parameters 50 | winSize = int(np.ceil(winDur * fs)) 51 | hopSize = int(np.ceil(hopDur * fs)) 52 | nFFT = 2 ** (int(np.ceil(np.log2(winSize))) + 1) 53 | 54 | # load audio 55 | x, fs = librosa.load(audio_path, sr=fs) 56 | x /= np.max(np.abs(x)) 57 | 58 | # apply hps 59 | components = ["harm", "perc"] 60 | S = librosa.stft( 61 | x, 62 | n_fft=nFFT, 63 | hop_length=hopSize, 64 | win_length=winSize, 65 | window="hann", 66 | center=True, 67 | pad_mode="reflect", 68 | ) 69 | S = dict( 70 | zip( 71 | components, 72 | librosa.decompose.hpss( 73 | S, kernel_size=31, power=1.0, mask=False, margin=1.0 74 | ), 75 | ) 76 | ) 77 | 78 | x = {} 79 | x_stft = {} 80 | x_resyn = [] 81 | for comp in components: 82 | x[comp] = librosa.istft( 83 | S[comp], hop_length=hopSize, win_length=winSize, window="hann", center=True 84 | ) 85 | x_stft[comp] = librosa.stft( 86 | x[comp], 87 | n_fft=nFFT, 88 | hop_length=hopSize, 89 | win_length=winSize, 90 | window="hann", 91 | center=True, 92 | pad_mode="reflect", 93 | ) 94 | S = [] 95 | 96 | gain_curves = {} 97 | for comp in components: 98 | # make gain_curve 99 | if comp == "perc": 100 | gain_curves[comp] = make_gain_curve( 101 | fs / 4, fs / 2, gain_curve_params[2], fs, nFFT 102 | ) 103 | else: 104 | # bass 105 | gain_curves[comp] = make_gain_curve( 106 | 100, 100, gain_curve_params[0], fs, nFFT 107 | ) 108 | # treble 109 | gain_curves[comp] *= make_gain_curve( 110 | 1100, 1800, gain_curve_params[1], fs, nFFT 111 | ) 112 | 113 | # modify shape 114 | x_stft[comp] *= np.atleast_2d(gain_curves[comp]).T 115 | 116 | x[comp] = librosa.istft( 117 | x_stft[comp], 118 | hop_length=hopSize, 119 | win_length=None, 120 | window="hann", 121 | center=True, 122 | ) 123 | 124 | if len(x_resyn) == 0: 125 | x_resyn = x[comp] 126 | else: 127 | x_resyn += x[comp] 128 | 129 | x_resyn /= np.max(x_resyn) 130 | 131 | audio_save_name = PurePath(audio_path).name.replace( 132 | ".wav", "_sf_bass_%2.2f_treble_%2.2f_tilt_%2.2f.wav" % tuple(gain_curve_params) 133 | ) 134 | 135 | sf.write(os.path.join(out_dir, audio_save_name), x_resyn, fs) 136 | 137 | return 138 | 139 | 140 | if __name__ == "__main__": 141 | parser = argparse.ArgumentParser(description="attack-remix") 142 | parser.add_argument("--input", type=str, default="", help="path to input audio") 143 | parser.add_argument( 144 | "--output", 145 | type=str, 146 | default="./outputs/", 147 | help="folder to save modified audios", 148 | ) 149 | parser.add_argument("--fs", type=int, default=16000, help="sampling rate") 150 | parser.add_argument( 151 | "--win_dur", type=float, default=46.4, help="window size in milliseconds" 152 | ) 153 | parser.add_argument( 154 | "--hop_dur", type=str, default=5e-3, help="hop size in milliseconds" 155 | ) 156 | parser.add_argument( 157 | "--params", 158 | nargs="+", 159 | type=tuple_list, 160 | default=[(0.6, 2, 0.2), (0.6, 2, 3), (2, 0.5, 0.2), (2, 0.5, 3)], 161 | help="list of 3-tuples with gain factors for filtering. Tuple entries correspond to each of bass, treble, & tilt filters.", 162 | ) 163 | parser.add_argument( 164 | "--n_jobs", type=int, default=4, help="number of cores to run program on" 165 | ) 166 | 167 | args, _ = parser.parse_known_args() 168 | 169 | winDur = args.win_dur * 1e-3 170 | hopDur = args.hop_dur * 1e-3 171 | 172 | if not os.path.exists(args.output): 173 | os.makedirs(args.output) 174 | 175 | _ = Parallel(n_jobs=args.n_jobs)( 176 | delayed(augment_data_spectral_shape)( 177 | args.input, args.output, args.fs, gain_set, winDur=winDur, hopDur=hopDur 178 | ) 179 | for gain_set in args.params 180 | ) 181 | -------------------------------------------------------------------------------- /compiam/utils/augment/templates.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/compiam/utils/augment/templates.npy -------------------------------------------------------------------------------- /compiam/utils/download.py: -------------------------------------------------------------------------------- 1 | """Utilities for downloading from the web. 2 | PART OF THE CODE IS TAKEN FROM mir-dataset-loaders/mirdata. Kudos to all authors :) 3 | """ 4 | 5 | import os 6 | import zipfile 7 | import hashlib 8 | import requests 9 | 10 | from tqdm import tqdm 11 | from smart_open import open 12 | 13 | from compiam.utils import get_logger 14 | 15 | logger = get_logger(__name__) 16 | 17 | 18 | def md5(file_path): 19 | """Get md5 hash of a file. 20 | 21 | Args: 22 | file_path (str): File path 23 | 24 | Returns: 25 | str: md5 hash of data in file_path 26 | 27 | """ 28 | hash_md5 = hashlib.md5() 29 | with open(file_path, "rb", compression="disable") as fhandle: 30 | for chunk in iter(lambda: fhandle.read(4096), b""): 31 | hash_md5.update(chunk) 32 | return hash_md5.hexdigest() 33 | 34 | 35 | def download_remote_model( 36 | download_link, download_checksum, download_path, force_overwrite=False 37 | ): 38 | """Elegantly download model from Zenodo. 39 | 40 | IMPORTANT DISCLAIMER: Part of the code is taken from mir-dataset-loders/mirdata :) 41 | 42 | :param download_link: link to remote model to download 43 | :param download_checksum: checksum of the downloaded model 44 | :param download_path: path to save the downloaded model 45 | :param force_overwrite: if True, overwrite existing file 46 | """ 47 | if "zenodo.org" not in download_link: 48 | raise ValueError("Only Zenodo download link are supported.") 49 | if len(os.listdir(download_path)) > 0 and not force_overwrite: 50 | logger.warning( 51 | f"""Files already exist at {download_path}. Skipping download. 52 | Please make sure these are correct. 53 | Otherwise, run the .download_model() method with force_overwrite=True. 54 | """ 55 | ) 56 | return 57 | else: 58 | local_filename = download_zip(download_link, download_path) 59 | # Check the checksum 60 | checksum = md5(local_filename) 61 | if download_checksum != checksum: 62 | raise IOError( 63 | "{} has an MD5 checksum ({}) " 64 | "differing from expected ({}), " 65 | "file may be corrupted.".format( 66 | download_path, checksum, download_checksum 67 | ) 68 | ) 69 | # Unzip it 70 | extract_zip(local_filename, download_path, cleanup=True) 71 | logger.info("Files downloaded and extracted successfully.") 72 | return 73 | 74 | 75 | def download_zip(url, root_path): 76 | """Download a ZIP file from a URL.""" 77 | # Get the file name from the URL 78 | local_filename = os.path.join( 79 | root_path, 80 | url.split("/")[-1].split("?")[0] 81 | ) 82 | 83 | # Stream the download and save the file 84 | with requests.get(url, stream=True) as r: 85 | r.raise_for_status() 86 | total_size = int(r.headers.get("content-length", 0)) 87 | chunk_size = 8192 88 | with open(local_filename, "wb") as f, tqdm( 89 | total=total_size, unit="iB", unit_scale=True 90 | ) as pbar: 91 | for chunk in r.iter_content(chunk_size=chunk_size): 92 | if chunk: # filter out keep-alive new chunks 93 | f.write(chunk) 94 | pbar.update(len(chunk)) 95 | logger.info(f"Download complete: {local_filename}") 96 | return local_filename 97 | 98 | 99 | def extract_zip(local_filename, extract_to=".", cleanup=True): 100 | """Extract a ZIP file into a given folder.""" 101 | # Check if it's a zip file 102 | if zipfile.is_zipfile(local_filename): 103 | logger.info(f"Extracting {local_filename}...") 104 | with zipfile.ZipFile(local_filename, "r") as zip_ref: 105 | zip_ref.extractall(extract_to) 106 | logger.info(f"Extraction complete: Files extracted to {extract_to}") 107 | else: 108 | logger.info(f"{local_filename} is not a valid ZIP file.") 109 | if cleanup: 110 | os.remove(local_filename) 111 | -------------------------------------------------------------------------------- /compiam/version.py: -------------------------------------------------------------------------------- 1 | """Version info""" 2 | 3 | short_version = "0.4" 4 | version = "0.4.1" -------------------------------------------------------------------------------- /compiam/visualisation/__init__.py: -------------------------------------------------------------------------------- 1 | # Importing visualisation modules 2 | from compiam.visualisation.audio import * 3 | from compiam.visualisation.training import * 4 | from compiam.visualisation.peaks import * 5 | from compiam.visualisation.waveform_player import * 6 | from compiam.visualisation.pitch import * 7 | -------------------------------------------------------------------------------- /compiam/visualisation/audio.py: -------------------------------------------------------------------------------- 1 | import os 2 | import librosa 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | from compiam.utils import get_logger 8 | 9 | logger = get_logger(__name__) 10 | 11 | 12 | def plot_waveform( 13 | input_data, 14 | t1, 15 | t2, 16 | labels=None, 17 | input_sr=44100, 18 | sr=44100, 19 | output_path=None, 20 | verbose=False, 21 | ): 22 | """Plotting waveform between two given points with optional labels 23 | 24 | :param input_data: path to audio file or numpy array like audio signal 25 | :param input_sr: sampling rate of the input array of data (if any). This variable is only 26 | relevant if the input is an array of data instead of a filepath. 27 | :param t1: starting point for plotting 28 | :param t2: ending point for plotting 29 | :param labels: dictionary {time_stamp:label} to plot on top of waveform 30 | :param sr: sampling rate 31 | :param output_path: optional path (finished with .png) where the plot is saved 32 | """ 33 | if verbose is False: 34 | logger.setLevel("ERROR") 35 | if isinstance(input_data, str): 36 | if not os.path.exists(input_data): 37 | raise FileNotFoundError("Target audio not found.") 38 | audio, _ = librosa.load(input_data, sr=sr) 39 | elif isinstance(input_data, np.ndarray): 40 | logger.warning( 41 | f"Resampling... (input sampling rate is {input_sr}Hz, make sure this is correct)" 42 | ) 43 | audio = librosa.resample(input_data, orig_sr=input_sr, target_sr=sr) 44 | else: 45 | raise ValueError("Input must be path to audio signal or an audio array") 46 | 47 | y1 = t1 * sr 48 | y2 = t2 * sr 49 | audio = audio[y1:y2] 50 | max_y = max(audio) 51 | min_y = min(audio) 52 | t = np.linspace(t1, t2, len(audio)) 53 | 54 | # Plot 55 | plt.figure(figsize=(20, 5)) 56 | fig, ax = plt.subplots() 57 | 58 | ax.set_facecolor("#dbdbdb") 59 | plt.plot(t, audio, color="darkgreen") 60 | plt.ylabel("Signal Value") 61 | plt.xlabel("Time (s)") 62 | plt.ylim((min_y - min_y * 0.1, max_y + max_y * 0.1)) 63 | 64 | if labels is not None: 65 | for o, l in labels.items(): 66 | if t1 <= o <= t2: 67 | logger.info(f"{o}:{l}") 68 | plt.axvline(o, color="firebrick", linestyle="--") 69 | plt.text(o, max_y + max_y * 0.11, l, color="firebrick") 70 | 71 | if output_path: 72 | plt.savefig(output_path) 73 | else: 74 | plt.show() 75 | -------------------------------------------------------------------------------- /compiam/visualisation/peaks/__init__.py: -------------------------------------------------------------------------------- 1 | from compiam.visualisation.peaks.data import PeakData 2 | -------------------------------------------------------------------------------- /compiam/visualisation/peaks/intervals.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code taken with permission from https://github.com/gopalkoduri/pypeaks 3 | 4 | Author: Gopal Koduri 5 | Date: Oct 2022 6 | """ 7 | 8 | import numpy as np 9 | from compiam.visualisation.peaks.slope import find_nearest_index 10 | 11 | 12 | class Intervals: 13 | def __init__(self, intervals): 14 | """ 15 | Initializes the Intervals object with a set of given intervals. 16 | """ 17 | self.intervals = np.array(intervals) 18 | 19 | def prev_interval(self, interval): 20 | """ 21 | Given a value of an interval, this function returns the 22 | previous interval value 23 | """ 24 | index = np.where(self.intervals == interval) 25 | if index[0][0] - 1 < len(self.intervals): 26 | return self.intervals[index[0][0] - 1] 27 | else: 28 | raise IndexError("Ran out of intervals!") 29 | 30 | def next_interval(self, interval): 31 | """ 32 | Given a value of an interval, this function returns the 33 | next interval value 34 | """ 35 | index = np.where(self.intervals == interval) 36 | if index[0][0] + 1 < len(self.intervals): 37 | return self.intervals[index[0][0] + 1] 38 | else: 39 | raise IndexError("Ran out of intervals!") 40 | 41 | def nearest_interval(self, interval): 42 | """ 43 | This function returns the nearest interval to any given interval. 44 | """ 45 | thresh_range = 25 # in cents 46 | if ( 47 | interval < self.intervals[0] - thresh_range 48 | or interval > self.intervals[-1] + thresh_range 49 | ): 50 | raise IndexError( 51 | "The interval given is beyond " 52 | + str(thresh_range) 53 | + " cents over the range of intervals defined." 54 | ) 55 | 56 | index = find_nearest_index(self.intervals, interval) 57 | return self.intervals[index] 58 | -------------------------------------------------------------------------------- /compiam/visualisation/training.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | 4 | def plot_losses(train_loss, val_loss, output_path): 5 | """Plotting loss curves 6 | 7 | :param train_loss: training loss curve 8 | :param val_loss: validation loss curve (same length as training curve) 9 | :param output_path: optional path (finished with .png) where the plot is saved 10 | """ 11 | plt.plot(train_loss, label="train") 12 | plt.plot(val_loss, label="val") 13 | plt.legend() 14 | 15 | if output_path: 16 | plt.savefig(output_path) 17 | plt.clf() 18 | else: 19 | plt.show() 20 | -------------------------------------------------------------------------------- /compiam/visualisation/waveform_player/__init__.py: -------------------------------------------------------------------------------- 1 | from compiam.visualisation.waveform_player.core import Player 2 | -------------------------------------------------------------------------------- /compiam/visualisation/waveform_player/core.py: -------------------------------------------------------------------------------- 1 | from ipywidgets import HTML 2 | from pathlib import Path 3 | import mimetypes 4 | import json 5 | import base64 6 | import jinja2 7 | import os 8 | import html 9 | import pathlib 10 | 11 | WORKDIR = os.path.dirname(pathlib.Path(__file__).parent.resolve()) 12 | TOOLDIR = os.path.join(WORKDIR, "waveform_player", "") 13 | WPDIR = os.path.join(TOOLDIR, "waveform-playlist", "") 14 | 15 | 16 | def audio_file_to_base64(filename): 17 | mimetype = mimetypes.guess_type(filename) 18 | return ( 19 | "data:" 20 | + mimetype[0] 21 | + ";base64," 22 | + base64.b64encode(Path(filename).read_bytes()).decode("ascii") 23 | ) 24 | 25 | 26 | def json_track_list(titles, files, gains, mutes, solos): 27 | assert ( 28 | len(titles) == len(files) 29 | and (gains == None or len(gains) == len(titles)) 30 | and (mutes == None or len(mutes) == len(titles)) 31 | and (solos == None or len(solos) == len(titles)) 32 | ) 33 | res = [] 34 | for i in range(len(titles)): 35 | entry = {} 36 | entry["src"] = audio_file_to_base64(files[i]) 37 | entry["name"] = titles[i] 38 | if gains is not None: 39 | entry["gain"] = gains[i] 40 | if mutes is not None: 41 | entry["muted"] = mutes[i] 42 | if solos is not None: 43 | entry["soloed"] = solos[i] 44 | res.append(entry) 45 | return json.dumps(res) 46 | 47 | 48 | def local_text(filename): 49 | if filename[0] == "/": 50 | filename = filename[1:] # this is a bit ghetto 51 | full_path = os.path.join(TOOLDIR, filename) 52 | with open(full_path, "r") as f: 53 | return f.read() 54 | 55 | 56 | def make_playlist_iframe( 57 | titles, 58 | files, 59 | gains=None, 60 | mutes=None, 61 | solos=None, 62 | annotations=None, 63 | template_name="multi-channel.html", 64 | ): 65 | templateLoader = jinja2.FileSystemLoader(searchpath=WPDIR) 66 | templateEnv = jinja2.Environment(loader=templateLoader) 67 | print(WPDIR) 68 | print(template_name) 69 | template = templateEnv.get_template(template_name) 70 | 71 | return template.render( 72 | { 73 | "local_text": local_text, 74 | "play_list_json": lambda: json_track_list( 75 | titles, files, gains, mutes, solos 76 | ), 77 | "annotations_data": lambda: json.dumps(annotations), 78 | } 79 | ) 80 | 81 | 82 | class Player(HTML): 83 | def __init__( 84 | self, 85 | titles, 86 | files, 87 | annotations=None, 88 | height=None, 89 | width=1000, 90 | gains=None, 91 | mutes=None, 92 | solos=None, 93 | ): 94 | if isinstance(titles, str): 95 | titles = [titles] 96 | if isinstance(files, str): 97 | files = [files] 98 | assert len(titles) == len( 99 | files 100 | ), "Requires titles and files to be equal in length" 101 | if not height: 102 | # 180 headers and buttons, about 150 for each audio 103 | height = 180 + 150 * len(titles) 104 | if annotations: 105 | height += 200 106 | template_name = "annotations.html" if annotations else "multi-channel.html" 107 | super().__init__( 108 | '' 121 | ) 122 | -------------------------------------------------------------------------------- /compiam/visualisation/waveform_player/waveform-playlist/js/annotations.js: -------------------------------------------------------------------------------- 1 | var actions = [ 2 | { 3 | class: 'fas.fa-minus', 4 | title: 'Reduce annotation end by 0.010s', 5 | action: (annotation, i, annotations, opts) => { 6 | var next; 7 | var delta = 0.010; 8 | annotation.end -= delta; 9 | 10 | if (opts.linkEndpoints) { 11 | next = annotations[i + 1]; 12 | next && (next.start -= delta); 13 | } 14 | } 15 | }, 16 | { 17 | class: 'fas.fa-plus', 18 | title: 'Increase annotation end by 0.010s', 19 | action: (annotation, i, annotations, opts) => { 20 | var next; 21 | var delta = 0.010; 22 | annotation.end += delta; 23 | 24 | if (opts.linkEndpoints) { 25 | next = annotations[i + 1]; 26 | next && (next.start += delta); 27 | } 28 | } 29 | }, 30 | { 31 | class: 'fas.fa-cut', 32 | title: 'Split annotation in half', 33 | action: (annotation, i, annotations) => { 34 | const halfDuration = (annotation.end - annotation.start) / 2; 35 | 36 | annotations.splice(i + 1, 0, { 37 | id: 'test', 38 | start: annotation.end - halfDuration, 39 | end: annotation.end, 40 | lines: ['----'], 41 | lang: 'en', 42 | }); 43 | 44 | annotation.end = annotation.start + halfDuration; 45 | } 46 | }, 47 | { 48 | class: 'fas.fa-trash', 49 | title: 'Delete annotation', 50 | action: (annotation, i, annotations) => { 51 | annotations.splice(i, 1); 52 | } 53 | } 54 | ]; 55 | -------------------------------------------------------------------------------- /compiam/visualisation/waveform_player/waveform-playlist/js/waveform-playlist.js.LICENSE.txt: -------------------------------------------------------------------------------- 1 | /*! 2 | * Cross-Browser Split 1.1.1 3 | * Copyright 2007-2012 Steven Levithan 4 | * Available under the MIT License 5 | * ECMAScript compliant, uniform cross-browser split method 6 | */ -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/docs/.nojekyll -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_static/style.css: -------------------------------------------------------------------------------- 1 | .wy-nav-content { 2 | max-width: 85%; 3 | } -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | import os 7 | import sys 8 | 9 | sys.path.insert(0, os.path.abspath("../")) 10 | 11 | 12 | # -- Project information ----------------------------------------------------- 13 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 14 | 15 | project = "compiam" 16 | copyright = "2024, Music Technology Group, Universitat Pompeu Fabra \ 17 | (Genís Plaja-Roglans, Thomas Nuttall, Xavier Serra)" 18 | author = "Genís Plaja-Roglans, Thomas Nuttall, Xavier Serra" 19 | release = "0.3.0" 20 | 21 | # -- General configuration --------------------------------------------------- 22 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 23 | 24 | extensions = [ 25 | "sphinx.ext.autodoc", 26 | "sphinx.ext.coverage", 27 | "sphinx.ext.napoleon", 28 | "sphinx.ext.viewcode", 29 | "sphinx.ext.autosummary", 30 | "sphinx.ext.intersphinx", 31 | "sphinx.ext.extlinks", 32 | "sphinx.ext.githubpages", 33 | ] 34 | 35 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 36 | 37 | # -- Options for HTML output ------------------------------------------------- 38 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 39 | html_theme = "sphinx_rtd_theme" 40 | html_static_path = ["_static"] 41 | html_css_files = [ 42 | "style.css", 43 | ] -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to compiam's documentation! 2 | =================================== 3 | 4 | compIAM is a collaborative initiative lead by the MTG and involving many researchers that aims at putting together data 5 | loaders, tools, and models for the computational analysis of two main Indian Art Music traditions: Carnatic and Hindustani. 6 | 7 | Installing compIAM 8 | ++++++++++++++++++ 9 | 10 | compIAM is registered to PyPI, therefore the latest release can be installed with: 11 | 12 | .. code-block:: bash 13 | 14 | pip install compiam 15 | 16 | 17 | Nonetheless, to get the latest version of the library with the fresher updates, proceed as follows: 18 | 19 | .. code-block:: bash 20 | 21 | git clone https://github.com/MTG/compIAM.git 22 | cd compIAM 23 | 24 | virtualenv -p python3 compiam_env 25 | source compiam_env/bin/activate 26 | 27 | pip install -e . 28 | pip install -r requirements.txt 29 | 30 | 31 | License 32 | +++++++ 33 | compIAM is Copyright 2022 Music Technology Group - Universitat Pompeu Fabra 34 | 35 | compIAM is released under the terms of the GNU Affero General Public License (v3 or later). 36 | See the COPYING file for more information. For the case of a particular tool or implementation 37 | that has a specific different licence, this is explicitly specified in the files related to this 38 | tool, and these terms must be followed. 39 | 40 | For any licensing enquires, please contact us at `mtg-info@upf.edu`_. 41 | 42 | .. _mtg-info@upf.edu: mailto:mtg-info@upf.edu 43 | 44 | 45 | Citation 46 | ++++++++ 47 | If you use compIAM for your research, please consider citing our work as: 48 | 49 | .. code-block:: bibtex 50 | 51 | @software{compiam_mtg, 52 | author = {{Genís Plaja-Roglans and Thomas Nuttall and Xavier Serra}}, 53 | title = {compIAM}, 54 | url = {https://mtg.github.io/compIAM/}, 55 | version = {0.3.0}, 56 | year = {2023} 57 | } 58 | 59 | 60 | .. toctree:: 61 | :caption: Basic usage 62 | :hidden: 63 | 64 | source/basic_usage 65 | 66 | .. toctree:: 67 | :caption: Tools and models 68 | :hidden: 69 | 70 | source/melody 71 | source/rhythm 72 | source/structure 73 | source/timbre 74 | source/separation 75 | 76 | .. toctree:: 77 | :caption: Datasets 78 | :hidden: 79 | 80 | source/datasets 81 | 82 | .. toctree:: 83 | :caption: Miscellaneous 84 | :hidden: 85 | 86 | source/visualisation 87 | source/utils 88 | 89 | .. toctree:: 90 | :caption: Contributing 91 | :hidden: 92 | 93 | source/contributing -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/basic_usage.rst: -------------------------------------------------------------------------------- 1 | Basic usage 2 | =========== 3 | 4 | Loading the tools 5 | +++++++++++++++++ 6 | 7 | compIAM does not have terminal functionalities but it is to be used within Python based-projects. First, import the library to 8 | your Python project with: ``import compiam``. 9 | 10 | The integrated tools and models are organized by: 11 | 12 | #. First, the following fundamental musical aspects: melody, rhythm, structure, timbre, and from v0.3.0 we have included separation as well. 13 | #. Then, the tools are grouped by tasks. 14 | 15 | You can access the several included tools by importing them from their corresponding modules: 16 | 17 | .. code-block:: python 18 | 19 | from compiam.melody.pitch_extraction import FTANetCarnatic 20 | from compiam.rhythm.transcription import FourWayTabla 21 | 22 | 23 | We provide nice functionalities to explore where do the tools live: 24 | 25 | #. Print out the available tasks for each category: ``compiam.melody.list_tasks()`` 26 | #. Print out the available tools for each module using: ``compiam.melody.list_tools()`` 27 | #. Print out only the tools for a particular task: ``compiam.melody.pitch_extraction.list_tools()`` 28 | 29 | .. important:: 30 | Some tools (especially the ML/DL models) require specific dependencies that are not installed by default, 31 | because of their size or compatibility issues. If a tool is loaded and a particular dependency is missing, 32 | an alert will be displayed, to inform the user which dependency is missing and how to proceed to install 33 | it in the right version. See ``optional_requirements.txt`` where the optional dependencies and 34 | the specific versions we use in ``compiam`` are listed. 35 | 36 | 37 | Wrappers 38 | ++++++++ 39 | 40 | compIAM also includes wrappers to easily initialize relevant datasets, corpora, and also pre-trained models for particular problems. 41 | 42 | .. autosummary:: 43 | 44 | compiam.load_dataset 45 | compiam.load_corpora 46 | compiam.load_model 47 | 48 | 49 | .. tip:: 50 | When listing available tools using the ``list_tools()`` functions, some will appear with a "*" at the end. That is meant to 51 | indicate that such tools have pre-trained models available, which may be loaded using the wrapper ``compiam.load_model()``. 52 | 53 | Model weights are large in size and therefore, not included in the library from v0.2.1 on. We have included a ``.download_model()`` 54 | function to all ML/DL models that require pre-trained weights, so that the user can download them on demand. This function is 55 | automatically run when invoking the model through the ``compiam.load_model()`` wrapper. The model weights are then stored in the 56 | corresponding default folder ``./compiam/model/``. If the model is already downloaded, the function will not download it again. 57 | 58 | .. note:: 59 | From v0.3.0 on, ``compiam.load_model()`` wrapper has an argument ``data_home``, in where you can specify to which folder you 60 | want the models to be downloaded and read from. 61 | 62 | .. note:: 63 | From v0.4.0 on, ``compiam.load_model()`` wrapper has an argument ``version``, in where you can specify which version of the 64 | pre-trained model you want to use. You may want to use ``compiam.get_model_info()`` to print out the entire model 65 | information in compiam/data.py and visualisse the available versions. By default, the model contributor selects a default version 66 | to be loaded without the user having to specify it, so this argument is option. If you try to load a non-existing version, an 67 | error will be thrown. -------------------------------------------------------------------------------- /docs/source/datasets.rst: -------------------------------------------------------------------------------- 1 | .. _datasets: 2 | 3 | Load IAM datasets using mirdata 4 | =============================== 5 | 6 | compIAM includes an alias function for ``mirdata.initialize()`` to directly 7 | initialize the mirdata loaders of Indian Art Music datasets. 8 | 9 | .. note:: 10 | Print out the available datasets to load: ``compiam.list_datasets()``. 11 | 12 | .. autofunction:: compiam.load_dataset 13 | 14 | 15 | Access the Dunya corpora 16 | ======================== 17 | 18 | Use the Corpora class to access the Indian Art Music corpora in CompMusic. 19 | Please note that to access the corpora in CompMusic, you need to first 20 | register and get a personal access token. Said token will be required 21 | to use the functions to access the database. 22 | 23 | 24 | .. automodule:: compiam.dunya 25 | :members: 26 | 27 | 28 | .. automodule:: compiam.dunya.conn 29 | :members: -------------------------------------------------------------------------------- /docs/source/melody.rst: -------------------------------------------------------------------------------- 1 | .. _melody: 2 | 3 | Melodic analysis 4 | ================ 5 | 6 | Tonic Identification 7 | ++++++++++++++++++++ 8 | 9 | Tonic Indian Art Music (Multipitch approach) 10 | -------------------------------------------- 11 | 12 | .. note:: 13 | REQUIRES: essentia 14 | 15 | .. automodule:: compiam.melody.tonic_identification.tonic_multipitch 16 | :members: 17 | 18 | 19 | Pitch Extraction 20 | ++++++++++++++++ 21 | 22 | Melodia 23 | ------- 24 | 25 | .. note:: 26 | REQUIRES: essentia 27 | 28 | .. automodule:: compiam.melody.pitch_extraction.melodia 29 | :members: 30 | 31 | FTANet-Carnatic 32 | --------------- 33 | 34 | .. note:: 35 | REQUIRES: tensorflow 36 | 37 | .. autoclass:: compiam.melody.pitch_extraction.FTANetCarnatic 38 | :members: 39 | 40 | FTAResNet-Carnatic 41 | ------------------ 42 | 43 | .. note:: 44 | REQUIRES: torch 45 | 46 | .. autoclass:: compiam.melody.pitch_extraction.FTAResNetCarnatic 47 | :members: 48 | 49 | 50 | Melodic Pattern Discovery 51 | +++++++++++++++++++++++++ 52 | 53 | CAE-Carnatic (Wrapper) 54 | ---------------------- 55 | 56 | .. note:: 57 | REQUIRES: torch 58 | 59 | .. autoclass:: compiam.melody.pattern.sancara_search.CAEWrapper 60 | :members: 61 | 62 | Self-similarity matrix 63 | ---------------------- 64 | 65 | .. note:: 66 | REQUIRES: torch 67 | 68 | .. automodule:: compiam.melody.pattern.sancara_search.extraction.self_sim 69 | :members: 70 | 71 | 72 | Raga Recognition 73 | ++++++++++++++++ 74 | 75 | DEEPSRGM 76 | -------- 77 | 78 | .. note:: 79 | REQUIRES: torch 80 | 81 | .. autoclass:: compiam.melody.raga_recognition.deepsrgm.DEEPSRGM 82 | :members: 83 | -------------------------------------------------------------------------------- /docs/source/rhythm.rst: -------------------------------------------------------------------------------- 1 | .. _rhythm: 2 | 3 | Rhythm analysis 4 | =============== 5 | 6 | Percussion Transcription 7 | ++++++++++++++++++++++++ 8 | 9 | 10 | Mnemonic Transcription 11 | ---------------------- 12 | 13 | .. note:: 14 | REQUIRES: torch 15 | 16 | .. autoclass:: compiam.rhythm.transcription.mnemonic_transcription.MnemonicTranscription 17 | :members: 18 | 19 | 20 | Meter tracking 21 | ++++++++++++++ 22 | 23 | Akshara Pulse Tracker 24 | --------------------- 25 | 26 | .. autoclass:: compiam.rhythm.meter.akshara_pulse_tracker.AksharaPulseTracker 27 | :members: 28 | -------------------------------------------------------------------------------- /docs/source/separation.rst: -------------------------------------------------------------------------------- 1 | .. _separation: 2 | 3 | Separation 4 | ========== 5 | 6 | Singing voice extraction 7 | ++++++++++++++++++++++++ 8 | 9 | Leakage-aware Carnatic Singing Voice Separation 10 | ----------------------------------------------- 11 | 12 | .. note:: 13 | REQUIRES: tensorflow 14 | 15 | .. autoclass:: compiam.separation.singing_voice_extraction.cold_diff_sep.ColdDiffSep 16 | :members: 17 | 18 | 19 | Vocals and violin separation 20 | ++++++++++++++++++++++++++++ 21 | 22 | MDXNet mixer model to separate vocals and violin 23 | ------------------------------------------------ 24 | 25 | .. note:: 26 | REQUIRES: torch 27 | 28 | .. autoclass:: compiam.separation.music_source_separation.mixer_model.MixerModel 29 | :members: -------------------------------------------------------------------------------- /docs/source/structure.rst: -------------------------------------------------------------------------------- 1 | .. _structure: 2 | 3 | Structure analysis 4 | ================== 5 | 6 | Segmentation 7 | +++++++++++++ 8 | 9 | Dhrupad Bandish Segmentation 10 | ---------------------------- 11 | 12 | .. note:: 13 | REQUIRES: torch 14 | 15 | .. autoclass:: compiam.structure.segmentation.dhrupad_bandish_segmentation.DhrupadBandishSegmentation 16 | :members: 17 | :inherited-members: -------------------------------------------------------------------------------- /docs/source/timbre.rst: -------------------------------------------------------------------------------- 1 | .. _timbre: 2 | 3 | Timbre analysis 4 | =============== 5 | 6 | Stroke Classification 7 | +++++++++++++++++++++ 8 | 9 | Mridangam Stroke Classification 10 | ------------------------------- 11 | 12 | .. note:: 13 | REQUIRES: essentia 14 | 15 | .. autoclass:: compiam.timbre.stroke_classification.mridangam_stroke_classification.MridangamStrokeClassification 16 | :members: -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | Util functions 2 | ============== 3 | 4 | Data augmentation utils 5 | +++++++++++++++++++++++ 6 | 7 | .. automodule:: compiam.utils.augment 8 | :members: 9 | 10 | Pitch-related utils 11 | +++++++++++++++++++ 12 | 13 | .. automodule:: compiam.utils.pitch 14 | :members: -------------------------------------------------------------------------------- /docs/source/visualisation.rst: -------------------------------------------------------------------------------- 1 | Visualisation tools 2 | =================== 3 | 4 | PyPeaks for Indian Art Music 5 | ++++++++++++++++++++++++++++ 6 | 7 | .. automodule:: compiam.visualisation.peaks 8 | :members: 9 | 10 | Audio visualisation utils 11 | ++++++++++++++++++++++++++ 12 | 13 | .. automodule:: compiam.visualisation.audio 14 | :members: 15 | 16 | Training visualisation utils 17 | ++++++++++++++++++++++++++++ 18 | 19 | .. automodule:: compiam.visualisation.training 20 | :members: -------------------------------------------------------------------------------- /notebooks/melody/KDE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/notebooks/melody/KDE.png -------------------------------------------------------------------------------- /notebooks/melody/emphasizing_diagonals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/notebooks/melody/emphasizing_diagonals.png -------------------------------------------------------------------------------- /notebooks/melody/extracting_segments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/notebooks/melody/extracting_segments.png -------------------------------------------------------------------------------- /notebooks/melody/grouping1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/notebooks/melody/grouping1.png -------------------------------------------------------------------------------- /notebooks/melody/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/notebooks/melody/test.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "compiam" 7 | dynamic = ["version"] 8 | description = "Common tools for the computational analysis of Indian Art Music." 9 | readme = "README.md" 10 | license = {file = "COPYING"} 11 | requires-python = ">=3.9" 12 | keywords = ["carnatic", "hindustani", "models", "datasets", "tools"] 13 | classifiers = [ 14 | "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", 15 | "Programming Language :: Python", 16 | "Development Status :: 3 - Alpha", 17 | "Intended Audience :: Science/Research", 18 | "Topic :: Multimedia :: Sound/Audio :: Analysis", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11" 22 | ] 23 | dependencies = [ 24 | "attrs>=23.1.0", 25 | "matplotlib>=3.0.0", 26 | "numpy>=1.20.3,<=1.26.4", 27 | "joblib>=1.2.0", 28 | "pathlib~=1.0.1", 29 | "tqdm>=4.66.1", 30 | "IPython>=7.34.0", 31 | "ipywidgets>=7.0.0,<8", 32 | "Jinja2~=3.1.2", 33 | "configobj~=5.0.6", 34 | "seaborn", 35 | "librosa>=0.10.1", 36 | "soundfile>=0.12.1", 37 | "scikit-learn~=1.5.2", 38 | "scikit-image~=0.24.0", 39 | "opencv-python~=4.6.0", 40 | "hmmlearn==0.3.3", 41 | "fastdtw~=0.3.4", 42 | "compmusic==0.4", 43 | "mirdata==0.3.9", 44 | ] 45 | 46 | [tool.pytest.ini_options] 47 | testpaths = [ 48 | "tests", 49 | ] 50 | 51 | [project.optional-dependencies] 52 | tests = [ 53 | "decorator>=5.1.1", 54 | "pytest>=7.2.0", 55 | "future>=0.18.3", 56 | "black>=23.3.0", 57 | ] 58 | docs = [ 59 | "numpydoc", 60 | "recommonmark", 61 | "sphinx>=3.4.0", 62 | "sphinxcontrib-napoleon", 63 | "sphinx_rtd_theme", 64 | "sphinx-togglebutton", 65 | ] 66 | tensorflow = [ 67 | "keras<3.0.0", 68 | "tensorflow==2.15.0", 69 | ] # Fixing tf versions to avoid issues 70 | torch = [ 71 | "torch==2.0.0", 72 | "torchaudio==2.0.1", 73 | ] 74 | essentia = [ 75 | "essentia", 76 | ] 77 | 78 | [project.urls] 79 | Homepage = "https://github.com/MTG/compIAM" 80 | Documentation = "https://mtg.github.io/compIAM/" 81 | Issues = "https://github.com/MTG/compIAM/issues/" 82 | Releases = "https://github.com/MTG/compIAM/releases/" -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ Setup script for compiam. """ 2 | from importlib.machinery import SourceFileLoader 3 | from setuptools import setup, find_packages 4 | 5 | version_sfl = SourceFileLoader("compiam.version", "compiam/version.py") 6 | version_module = version_sfl.load_module() 7 | 8 | if __name__ == "__main__": 9 | setup( 10 | version=version_module.version, 11 | packages=find_packages(exclude=["test", "*.test", "*.test.*","tests", "tests.*", "*.tests.*", "tests/__pycache__", "tests/__pycache__/*", "notebooks/*", "notebooks/__pycache__/*"]), 12 | ) -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Tests -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | OPTIONS = [ 4 | "tensorflow", 5 | "torch", 6 | "essentia", 7 | "full_ml", 8 | "all", 9 | ] 10 | 11 | 12 | def pytest_addoption(parser): 13 | for option in OPTIONS: 14 | parser.addoption( 15 | "--" + option, 16 | action="store_true", 17 | default=False, 18 | help="test setting for " + option.replace("_", " and "), 19 | ) 20 | 21 | 22 | @pytest.fixture(scope="session") 23 | def skip_tensorflow(request): 24 | return request.config.getoption("--tensorflow") 25 | 26 | 27 | @pytest.fixture(scope="session") 28 | def skip_torch(request): 29 | return request.config.getoption("--torch") 30 | 31 | 32 | @pytest.fixture(scope="session") 33 | def skip_essentia(request): 34 | return request.config.getoption("--essentia") 35 | 36 | 37 | @pytest.fixture(scope="session") 38 | def skip_full_ml(request): 39 | return request.config.getoption("--full_ml") 40 | 41 | 42 | @pytest.fixture(scope="session") 43 | def skip_all(request): 44 | return request.config.getoption("--all") 45 | 46 | 47 | def pytest_configure(config): 48 | for option in OPTIONS: 49 | config.addinivalue_line( 50 | "markers", 51 | option + ": run optional " + option.replace("_", "and") + " tests", 52 | ) 53 | 54 | 55 | def _skip_tests_or_no(config, items, option, option_flag, running_test): 56 | # No option has been added 57 | if option_flag == -1: 58 | if not config.getoption("--" + option): 59 | skip_option = pytest.mark.skip(reason="need --" + option + " option to run") 60 | for item in items: 61 | if option in item.keywords: 62 | item.add_marker(skip_option) 63 | else: 64 | running_test = option 65 | option_flag = 0 66 | return config, items, option_flag, running_test 67 | 68 | # One option already added. Warning user... 69 | elif option_flag == 0: 70 | if not config.getoption("--" + option): 71 | skip_option = pytest.mark.skip(reason="need --" + option + " option to run") 72 | for item in items: 73 | if option in item.keywords: 74 | item.add_marker(skip_option) 75 | else: 76 | print( 77 | "\n\nIMPORTANT: You have entered two testing markers. " 78 | + "Please do run test options one by one. You will find the " 79 | + "available options in the docs or by running pytest --markers " 80 | + "in the terminal. Running tests only for: " 81 | + running_test 82 | + "\n" 83 | ) 84 | skip_option = pytest.mark.skip(reason="need --" + option + " option to run") 85 | for item in items: 86 | if option in item.keywords: 87 | item.add_marker(skip_option) 88 | option_flag = 1 89 | return config, items, option_flag, running_test 90 | 91 | # User already warned. No need for multiple warnings. 92 | elif option_flag == 1: 93 | if not config.getoption("--" + option): 94 | skip_option = pytest.mark.skip(reason="need --" + option + " option to run") 95 | for item in items: 96 | if option in item.keywords: 97 | item.add_marker(skip_option) 98 | else: 99 | skip_option = pytest.mark.skip(reason="need --" + option + " option to run") 100 | for item in items: 101 | if option in item.keywords: 102 | item.add_marker(skip_option) 103 | return config, items, option_flag, running_test 104 | 105 | 106 | def pytest_collection_modifyitems(config, items): 107 | running_test = None 108 | option_flag = -1 109 | for option in OPTIONS: 110 | config, items, option_flag, running_test = _skip_tests_or_no( 111 | config, items, option, option_flag, running_test 112 | ) 113 | return 114 | -------------------------------------------------------------------------------- /tests/melody/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/melody/__init__.py -------------------------------------------------------------------------------- /tests/melody/test_deepsrgm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import librosa 4 | 5 | import numpy as np 6 | 7 | from compiam import load_model 8 | from compiam.data import TESTDIR 9 | 10 | 11 | def _load_model(): 12 | from compiam.melody.raga_recognition import DEEPSRGM 13 | 14 | deepsrgm = DEEPSRGM( 15 | mapping_path=os.path.join(TESTDIR, "resources", "melody", "raga_mapping.json") 16 | ) 17 | raga_mapping = deepsrgm.mapping 18 | assert raga_mapping == { 19 | 0: "Bhairav", 20 | 1: "Madhukauns", 21 | 2: "Mōhanaṁ", 22 | 3: "Hamsadhvāni", 23 | 4: "Varāḷi", 24 | 5: "Dēś", 25 | 6: "Kamās", 26 | 7: "Yaman kalyāṇ", 27 | 8: "Bilahari", 28 | 9: "Ahira bhairav", 29 | } 30 | 31 | 32 | def _get_features(): 33 | from compiam.melody.raga_recognition import DEEPSRGM 34 | 35 | deepsrgm = DEEPSRGM( 36 | mapping_path=os.path.join(TESTDIR, "resources", "melody", "raga_mapping.json") 37 | ) 38 | raga_mapping = deepsrgm.mapping 39 | with pytest.raises(FileNotFoundError): 40 | feat = deepsrgm.get_features( 41 | os.path.join(TESTDIR, "resources", "melody", "hola.wav") 42 | ) 43 | with pytest.raises(ValueError): 44 | feat = deepsrgm.get_features( 45 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 46 | ) 47 | audio = librosa.load(os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav"), sr=44100)[0] 48 | audio = np.tile(audio, 9) 49 | feat_1 = deepsrgm.get_features(audio) 50 | feat_2 = deepsrgm.get_features(np.stack([audio, audio])) 51 | feat_3 = deepsrgm.get_features(np.stack([audio, audio]).T) 52 | 53 | 54 | @pytest.mark.torch 55 | def test_predict_tf(): 56 | _load_model() 57 | 58 | 59 | @pytest.mark.full_ml 60 | def test_predict_full(): 61 | _load_model() 62 | 63 | 64 | @pytest.mark.all 65 | def test_predict_all(): 66 | _load_model() 67 | _get_features() 68 | -------------------------------------------------------------------------------- /tests/melody/test_essentia_extractors.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import librosa 4 | 5 | import numpy as np 6 | 7 | from compiam.data import TESTDIR 8 | 9 | 10 | def _predict_normalized_pitch(): 11 | from compiam.melody.pitch_extraction import Melodia 12 | 13 | melodia = Melodia() 14 | with pytest.raises(FileNotFoundError): 15 | melodia.extract(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 16 | pitch = melodia.extract( 17 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 18 | ) 19 | pitch_2 = melodia.extract(np.zeros([44100])) 20 | pitch_3 = melodia.extract(np.zeros([2, 44100])) # Testing input array 21 | pitch_4 = melodia.extract(np.zeros([44100, 2])) # Testing input array 22 | 23 | assert isinstance(pitch, np.ndarray) 24 | assert np.shape(pitch) == (699, 2) 25 | assert np.all( 26 | np.isclose( 27 | pitch[:10, 0], 28 | np.array( 29 | [ 30 | 0.0, 31 | 0.00290249, 32 | 0.00580499, 33 | 0.00870748, 34 | 0.01160998, 35 | 0.01451247, 36 | 0.01741497, 37 | 0.02031746, 38 | 0.02321995, 39 | 0.02612245, 40 | ] 41 | ), 42 | ) 43 | ) 44 | assert np.all( 45 | np.isclose( 46 | pitch[140:150, 1], 47 | np.array( 48 | [ 49 | 274.00152588, 50 | 270.85430908, 51 | 269.29431152, 52 | 267.74328613, 53 | 266.20120239, 54 | 263.14358521, 55 | 261.62796021, 56 | 260.12109375, 57 | 258.6229248, 58 | 257.13336182, 59 | ] 60 | ), 61 | ) 62 | ) 63 | 64 | from compiam.melody.tonic_identification import TonicIndianMultiPitch 65 | 66 | tonic_multipitch = TonicIndianMultiPitch() 67 | with pytest.raises(FileNotFoundError): 68 | tonic_multipitch.extract( 69 | os.path.join(TESTDIR, "resources", "melody", "hola.wav") 70 | ) 71 | tonic = tonic_multipitch.extract( 72 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 73 | ) 74 | audio = librosa.load(os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav"), sr=44100)[0] 75 | tonic_2 = tonic_multipitch.extract(audio) # Testing input array 76 | tonic_3 = tonic_multipitch.extract(np.stack([audio, audio])) # Testing input array 77 | tonic_4 = tonic_multipitch.extract(np.stack([audio, audio]).T) # Testing input array 78 | 79 | assert isinstance(tonic, float) 80 | assert tonic == 157.64892578125 81 | 82 | normalised_pitch = melodia.normalise_pitch(pitch, tonic) 83 | assert isinstance(normalised_pitch, np.ndarray) 84 | assert np.shape(normalised_pitch) == np.shape(pitch) 85 | assert np.all( 86 | np.isclose( 87 | normalised_pitch[140:150, 1], 88 | np.array([4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0]), 89 | ) 90 | ) 91 | 92 | 93 | @pytest.mark.essentia 94 | def test_ess_extractors_ess(): 95 | _predict_normalized_pitch() 96 | 97 | 98 | @pytest.mark.all 99 | def test_ess_extractors_ess_all(): 100 | _predict_normalized_pitch() 101 | -------------------------------------------------------------------------------- /tests/melody/test_ftanet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import librosa 4 | 5 | import numpy as np 6 | 7 | from compiam.data import TESTDIR 8 | from compiam.exceptions import ModelNotTrainedError 9 | 10 | 11 | def _predict_pitch(): 12 | from compiam.melody.pitch_extraction import FTANetCarnatic 13 | 14 | ftanet = FTANetCarnatic() 15 | with pytest.raises(ModelNotTrainedError): 16 | ftanet.predict(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 17 | ftanet.trained = True 18 | with pytest.raises(FileNotFoundError): 19 | ftanet.predict(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 20 | pitch = ftanet.predict( 21 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 22 | ) 23 | 24 | audio_in, sr = librosa.load( 25 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 26 | ) 27 | pitch_2 = ftanet.predict(audio_in, input_sr=sr) 28 | 29 | assert np.all(np.isclose(pitch, pitch_2)) 30 | 31 | assert isinstance(pitch, np.ndarray) 32 | assert np.shape(pitch) == (202, 2) 33 | assert np.all( 34 | np.isclose( 35 | pitch[:10, 0], 36 | np.array( 37 | [ 38 | 0.0, 39 | 0.01007774, 40 | 0.02015547, 41 | 0.03023321, 42 | 0.04031095, 43 | 0.05038868, 44 | 0.06046642, 45 | 0.07054415, 46 | 0.08062189, 47 | 0.09069963, 48 | ] 49 | ), 50 | ) 51 | ) 52 | assert np.all( 53 | np.isclose( 54 | pitch[140:150, 1], 55 | np.array( 56 | # [354.0, 350.0, 350.0, 354.0, 354.0, 358.0, 367.0, 371.0, 375.0, 375.0] 57 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] 58 | ), 59 | ) 60 | ) 61 | 62 | pitch = ftanet.predict( 63 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav"), 64 | out_step=0.001, 65 | ) 66 | 67 | assert np.all( 68 | np.isclose( 69 | pitch[:10, 0], 70 | np.array( 71 | [ 72 | 0.0, 73 | 0.0010008, 74 | 0.00200161, 75 | 0.00300241, 76 | 0.00400321, 77 | 0.00500401, 78 | 0.00600482, 79 | 0.00700562, 80 | 0.00800642, 81 | 0.00900723, 82 | ] 83 | ), 84 | ) 85 | ) 86 | assert np.all( 87 | np.isclose( 88 | pitch[1000:1010, 1], 89 | np.array( 90 | # [327.0, 327.0, 327.0, 327.0, 327.0, 327.0, 327.0, 327.0, 327.0, 327.0] 91 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] 92 | ), 93 | ) 94 | ) 95 | 96 | 97 | def _predict_normalized_pitch(): 98 | from compiam.melody.pitch_extraction import FTANetCarnatic 99 | 100 | ftanet = FTANetCarnatic() 101 | ftanet.trained = True 102 | with pytest.raises(FileNotFoundError): 103 | ftanet.predict(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 104 | pitch = ftanet.predict( 105 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 106 | ) 107 | 108 | from compiam.melody.tonic_identification import TonicIndianMultiPitch 109 | 110 | tonic_multipitch = TonicIndianMultiPitch() 111 | tonic = tonic_multipitch.extract( 112 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 113 | ) 114 | 115 | assert isinstance(tonic, float) 116 | assert tonic == 157.64892578125 117 | 118 | normalised_pitch = ftanet.normalise_pitch(pitch, tonic) 119 | assert isinstance(normalised_pitch, np.ndarray) 120 | assert np.shape(normalised_pitch) == np.shape(pitch) 121 | 122 | 123 | @pytest.mark.tensorflow 124 | def test_predict_tf(): 125 | _predict_pitch() 126 | 127 | 128 | @pytest.mark.full_ml 129 | def test_predict_full(): 130 | _predict_pitch() 131 | 132 | 133 | @pytest.mark.all 134 | def test_predict_all(): 135 | _predict_pitch() 136 | _predict_normalized_pitch() 137 | -------------------------------------------------------------------------------- /tests/melody/test_ftaresnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import librosa 4 | 5 | import numpy as np 6 | 7 | from compiam.data import TESTDIR 8 | from compiam.exceptions import ModelNotTrainedError 9 | 10 | 11 | def _predict_pitch(): 12 | from compiam.melody.pitch_extraction import FTAResNetCarnatic 13 | 14 | ftaresnet = FTAResNetCarnatic() 15 | with pytest.raises(ModelNotTrainedError): 16 | ftaresnet.predict(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 17 | ftaresnet.trained = True 18 | with pytest.raises(FileNotFoundError): 19 | ftaresnet.predict(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 20 | pitch = ftaresnet.predict( 21 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 22 | ) 23 | 24 | audio_in, sr = librosa.load( 25 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 26 | ) 27 | pitch_2 = ftaresnet.predict(audio_in, input_sr=sr) 28 | 29 | assert isinstance(pitch, np.ndarray) 30 | assert isinstance(pitch_2, np.ndarray) 31 | assert np.shape(pitch) == (128, 2) 32 | assert np.shape(pitch_2) == (128, 2) 33 | 34 | assert np.all( 35 | np.isclose( 36 | pitch[:10, 0], 37 | np.array( 38 | [ 39 | 0.0, 40 | 0.01007874, 41 | 0.02015748, 42 | 0.03023622, 43 | 0.04031496, 44 | 0.0503937, 45 | 0.06047244, 46 | 0.07055118, 47 | 0.08062992, 48 | 0.09070867, 49 | ] 50 | ), 51 | ) 52 | ) 53 | 54 | pitch = ftaresnet.predict( 55 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav"), 56 | out_step=0.001, 57 | ) 58 | 59 | print(pitch.shape) 60 | print(pitch[:10, 0].shape) 61 | 62 | assert np.all( 63 | np.isclose( 64 | pitch[:10, 0], 65 | np.array( 66 | [ 67 | 0.0, 68 | 0.00063241, 69 | 0.00126482, 70 | 0.00189723, 71 | 0.00252964, 72 | 0.00316206, 73 | 0.00379447, 74 | 0.00442688, 75 | 0.00505929, 76 | 0.0056917, 77 | ] 78 | ), 79 | ) 80 | ) 81 | 82 | 83 | def _predict_normalized_pitch(): 84 | from compiam.melody.pitch_extraction import FTAResNetCarnatic 85 | 86 | ftaresnet = FTAResNetCarnatic() 87 | ftaresnet.trained = True 88 | with pytest.raises(FileNotFoundError): 89 | ftaresnet.predict(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 90 | pitch = ftaresnet.predict( 91 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 92 | ) 93 | 94 | from compiam.melody.tonic_identification import TonicIndianMultiPitch 95 | 96 | tonic_multipitch = TonicIndianMultiPitch() 97 | tonic = tonic_multipitch.extract( 98 | os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav") 99 | ) 100 | 101 | assert isinstance(tonic, float) 102 | assert tonic == 157.64892578125 103 | 104 | normalised_pitch = ftaresnet.normalise_pitch(pitch, tonic) 105 | assert isinstance(normalised_pitch, np.ndarray) 106 | assert np.shape(normalised_pitch) == np.shape(pitch) 107 | 108 | 109 | @pytest.mark.torch 110 | def test_predict_torch(): 111 | _predict_pitch() 112 | 113 | 114 | @pytest.mark.full_ml 115 | def test_predict_full(): 116 | _predict_pitch() 117 | 118 | 119 | @pytest.mark.all 120 | def test_predict_all(): 121 | _predict_pitch() 122 | _predict_normalized_pitch() 123 | -------------------------------------------------------------------------------- /tests/resources/melody/pitch_test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/resources/melody/pitch_test.wav -------------------------------------------------------------------------------- /tests/resources/melody/raga_mapping.json: -------------------------------------------------------------------------------- 1 | {"6345e8fe-7061-4bdc-842c-dcfd4a379820": "A\u1e6d\u0101na", "98d46d9e-5100-4f24-bcd4-d0f95966c7cb": "Sindhubhairavi", "5ce23030-f71d-4f5d-9d76-f91c2c182392": "Dhany\u0101si", "cda9cbe9-c1aa-42bb-8b4c-e9dfc8af133c": "N\u0101\u1e6dakurinji", "bf4662d4-25c3-4cad-9249-4de2dc513c06": "Kaly\u0101\u1e47i", "1e7de02f-e77f-405a-a033-f31117aaf955": "Bhairav", "4ce0b18d-f4df-41cf-9b40-9166199506b4": "R\u012btigau\u1e37a", "64e5fb9e-5569-4e80-8e6c-f543af9469c7": "M\u0101lkauns", "0b3bbf97-0ec3-41da-add4-722d87329ec3": "Madhukauns", "a99e07d5-20a0-467b-8dcd-aa5a095177fd": "Lalit", "39821826-3327-41d7-9cd5-e22fe7b08360": "M\u014dhana\u1e41", "18b1acb9-dff6-47ec-873a-b2086c8d268d": "Madhyam\u0101vati", "c6b5f8d9-ebb4-46af-a020-6646fce2c77d": "S\u0101ma", "595de771-fcc2-414c-9df6-ae4ffd898549": "Hamsadhv\u0101ni", "7591faad-e68a-4550-b675-8082842c6056": "B\u0101g\u0113\u015br\u012b", "b6989a44-e85d-43cf-8b95-2eae5dcd28a2": "P\u016brv\u012bka\u1e37y\u0101\u1e47i", "db085e26-665d-4f4c-a2a5-95251c22b69e": "P\u016brv\u012bka\u1e37y\u0101\u1e47i", "e8a0bf54-13c6-4a09-922a-bfc744ddf38a": "Var\u0101\u1e37i", "3af5a361-923a-465d-864d-9c7ba0c04a47": "Sencuru\u1e6d\u1e6di", "e771a74d-545d-41d5-816b-43403a818b0c": "Bh\u016bp", "62b79291-73b0-4c77-a353-0f8bc6ed8362": "D\u0113\u015b", "e18fcaa7-1f9c-4f09-b627-0687481f4ec7": "\u0100nandabhairavi", "a4ec6633-1050-4207-8313-b017194c8fa0": "Kam\u0101s", "48b37bed-e847-4882-8a01-5c721e07f07d": "Yaman kaly\u0101\u1e47", "2165542c-45da-4301-af82-c2e7ddbe4768": "Bilahari", "f7fddfc0-8c1d-4dd2-90d5-5d51a99d61f8": "R\u0101g\u0113\u015br\u012b", "3eb7ba30-4b94-432e-9618-875ee57e01ab": "M\u0101rv\u0101", "1b05a564-059f-445b-b325-cf26318367e3": "Miy\u0101n malh\u0101r", "f6432fec-e9c2-4b09-9e73-c46086cbd8ea": "Ahira bhairav", "09c179f3-8b19-4792-a852-e9fa0090e409": "K\u0101pi", "54c4214c-05b9-4acc-8f77-6d5786e43a2e": "M\u0101r\u016bbih\u0101g", "ddff55ae-20f6-4d7d-ba9e-a6c10eeebd41": "Yadukula k\u0101\u1e41b\u014dji", "a9413dff-91d1-4e29-ad92-c04019dce5b8": "T\u014d\u1e0di", "defad4cc-48aa-4372-a31a-c43624930713": "D\u0113vag\u0101ndh\u0101ri", "a47e9d22-847a-46e8-b589-2a3537789f5f": "S\u0101v\u0113ri", "978f4c3c-6a12-43fd-8427-c414ee17b252": "Mukh\u0101ri", "40dbe1db-858f-4366-bef8-9076eb67340d": "\u015auddh s\u0101ra\u1e45g", "118401e7-8de8-4d81-9d8e-8070889e3fa8": "Darb\u0101ri", "9cedca68-4a9d-4170-bec3-0d1db1ff730e": "M\u0101y\u0101m\u0101\u1e37avagau\u1e37a", "839bb6b4-1577-4a78-9e5d-4906c6453274": "Sura\u1e6di", "a7d98897-e2fe-4d75-b9dc-e5b4dc88e1c6": "Bih\u0101g", "6f13484e-6fdd-402d-baf3-3835835454d0": "T\u014d\u1e0d\u012b", "42dd0ccb-f92a-4622-ae5d-a3be571b4939": "\u015ar\u012branjani", "2ed9379f-14c9-49af-8e4d-f9b63e96801f": "Alahaiy\u0101 bil\u0101val", "b143adaa-f1a6-4de4-8985-a5bd35e96279": "Bair\u0101gi", "700e1d92-7094-4c21-8a3b-d7b60c550edf": "Beh\u0101g", "a9ee554f-f146-43e9-b2d0-9bb31fd4b57d": "K\u0113d\u0101r", "0277eae5-3411-4b22-9fa8-1b347e7528d1": "\u1e62anmukhapriya", "df85a0a5-b1a8-42f1-a87b-3d7c7ee33fb4": "K\u0113d\u0101ragau\u1e37a", "ba3242d0-8d40-4d93-865e-d2e2497ea2a8": "Gau\u1e0d malh\u0101r", "46997b02-f09c-4969-8138-4e1861f61967": "\u015ar\u012b", "290674e0-d94c-41c1-ad99-f65fa22a1447": "Madhuvanti", "85ccf631-4cdf-4f6c-a841-0edfcf4255d1": "K\u0101mavardani", "a2f9f182-0ceb-4531-b286-b840b47a54b8": "\u015aankar\u0101bhara\u1e47a\u1e41", "e5c4d94a-b34a-42ef-acd7-f235612350e4": "Gau\u1e37a", "fa28470c-d413-44c7-94da-181f530cbfdd": "Puriy\u0101 dhana\u015br\u012b", "dd59147d-8775-44ff-a36b-0d9f15b31319": "Bil\u0101sakh\u0101n\u012b t\u014d\u1e0d\u012b", "bdd80890-44f1-4a93-8d93-06a418781f97": "\u015ar\u012b", "d9c603fa-875f-4b84-b851-c6a345427898": "\u0100bh\u014dg\u012b", "123b09bd-9901-4e64-a65a-10b02c9e0597": "Bhairavi", "f0866e71-33b2-47db-ab75-0808c41f2401": "K\u0101\u1e41bh\u014dji", "f972db4d-5d16-4f9a-9841-f313e1601aaa": "Karaharapriya", "ecd04ceb-b46c-47fc-9045-84ac9160e527": "Basant", "93c73081-bdf8-4eca-b325-d736b71e9b4b": "Kham\u0101j", "b08475a2-1049-433a-be8f-105bf14718fb": "Harik\u0101mbh\u014dji", "aa5f376f-06cd-4a69-9cc9-b1077104e0b0": "B\u0113ga\u1e0da", "9ebeb536-30a7-403f-8042-f7c1445c4b87": "N\u0101\u1e6da", "50bd048f-4482-4c5b-850c-9ad5e5ec46f1": "Huss\u0113n\u012b", "993d6cf6-dc89-4d23-9a9f-8eeed1524872": "Sah\u0101n\u0101", "063ea5a0-23b1-4bb5-8537-3d924fe8ebb3": "J\u014dg", "9a071e54-3eed-48b2-83a3-1a3fd683b8e0": "K\u0101na\u1e0da"} -------------------------------------------------------------------------------- /tests/resources/timbre/224030__akshaylaya__bheem-b-001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/resources/timbre/224030__akshaylaya__bheem-b-001.wav -------------------------------------------------------------------------------- /tests/resources/timbre/225359__akshaylaya__cha-c-001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/resources/timbre/225359__akshaylaya__cha-c-001.wav -------------------------------------------------------------------------------- /tests/rhythm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/rhythm/__init__.py -------------------------------------------------------------------------------- /tests/rhythm/test_akshara_pulse.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | import numpy as np 5 | 6 | from compiam.data import TESTDIR 7 | 8 | 9 | def test_tool(): 10 | from compiam.rhythm.meter import AksharaPulseTracker 11 | 12 | apt = AksharaPulseTracker() 13 | 14 | with pytest.raises(FileNotFoundError): 15 | apt.extract(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 16 | 17 | pulses = apt.extract(os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav")) 18 | assert pulses["aksharaPeriod"] == 0.316 19 | assert pulses["sections"] == { 20 | "startTime": np.array([0.0]), 21 | "endTime": np.array([2.009]), 22 | "label": "Kriti", 23 | } 24 | assert np.all( 25 | np.isclose( 26 | pulses["aksharaPulses"], np.array([0.75403026, 1.70514967, 1.97136237]) 27 | ) 28 | ) 29 | assert np.all( 30 | np.isclose( 31 | pulses["APcurve"], 32 | np.array( 33 | [ 34 | [0.0, 0.31496062992125984], 35 | [0.4992290249433111, 0.31496062992125984], 36 | [0.9984580498866213, 0.31496062992125984], 37 | [1.4976870748299325, 0.31496062992125984], 38 | ] 39 | ), 40 | ) 41 | ) 42 | -------------------------------------------------------------------------------- /tests/separation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/separation/__init__.py -------------------------------------------------------------------------------- /tests/separation/test_cold_diff_sep.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | import compiam 8 | from compiam.data import TESTDIR 9 | from compiam.exceptions import ModelNotTrainedError 10 | 11 | 12 | def _separate(): 13 | from compiam.separation.singing_voice_extraction import ColdDiffSep 14 | 15 | cold_diff_sep = ColdDiffSep() 16 | with pytest.raises(ModelNotTrainedError): 17 | cold_diff_sep.separate(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 18 | cold_diff_sep.trained = True 19 | with pytest.raises(FileNotFoundError): 20 | cold_diff_sep.separate(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 21 | 22 | cold_diff_sep = compiam.load_model("separation:cold-diff-sep", data_home=TESTDIR) 23 | audio_in, sr = np.array(np.ones([2, 44150 * 10]), dtype=np.float32), 44100 24 | separation = cold_diff_sep.separate(audio_in, input_sr=sr) 25 | shutil.rmtree(os.path.join(TESTDIR, "models")) 26 | 27 | 28 | @pytest.mark.tensorflow 29 | def test_predict_tf(): 30 | _separate() 31 | 32 | 33 | @pytest.mark.full_ml 34 | def test_predict_full(): 35 | _separate() 36 | 37 | 38 | @pytest.mark.all 39 | def test_predict_all(): 40 | _separate() 41 | -------------------------------------------------------------------------------- /tests/separation/test_mixer_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import shutil 4 | 5 | import numpy as np 6 | 7 | import compiam 8 | from compiam.data import TESTDIR 9 | from compiam.exceptions import ModelNotTrainedError 10 | 11 | 12 | def _separate(): 13 | from compiam.separation.music_source_separation import MixerModel 14 | 15 | mixer_model = MixerModel() 16 | with pytest.raises(ModelNotTrainedError): 17 | mixer_model.separate(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 18 | mixer_model.trained = True 19 | with pytest.raises(FileNotFoundError): 20 | mixer_model.separate(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 21 | 22 | mixer_model = compiam.load_model("separation:mixer-model", data_home=TESTDIR) 23 | audio_in, sr = np.array(np.ones([1, 44100]), dtype=np.float32), 44100 24 | separation = mixer_model.separate(audio_in, input_sr=sr) 25 | assert isinstance(separation, tuple) 26 | assert isinstance(separation[0], np.ndarray) 27 | assert isinstance(separation[1], np.ndarray) 28 | shutil.rmtree(os.path.join(TESTDIR, "models")) 29 | 30 | 31 | @pytest.mark.torch 32 | def test_predict_torch(): 33 | _separate() 34 | 35 | 36 | @pytest.mark.full_ml 37 | def test_predict_full(): 38 | _separate() 39 | 40 | 41 | @pytest.mark.all 42 | def test_predict_all(): 43 | _separate() 44 | -------------------------------------------------------------------------------- /tests/structure/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MTG/compIAM/7efdfa86ea8e20ab4950d0b78c9080bb9dc35448/tests/structure/__init__.py -------------------------------------------------------------------------------- /tests/structure/test_dhrupad_segmentation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import subprocess 4 | 5 | import numpy as np 6 | 7 | from compiam import load_model 8 | from compiam.data import WORKDIR, TESTDIR 9 | from compiam.exceptions import ModelNotTrainedError 10 | 11 | 12 | def _test_model(): 13 | dbs = load_model("structure:dhrupad-bandish-segmentation") 14 | 15 | assert dbs.mode == "net" 16 | assert dbs.fold == 0 17 | 18 | dbs.update_fold(fold=1) 19 | assert dbs.fold == 1 20 | 21 | dbs.update_mode(mode="voc") 22 | assert dbs.mode == "voc" 23 | assert isinstance(dbs.model_path, dict) 24 | assert isinstance(dbs.loaded_model_path, str) 25 | 26 | with pytest.raises(FileNotFoundError): 27 | dbs.predict_stm(os.path.join(TESTDIR, "resources", "melody", "hola.wav")) 28 | 29 | # dbs.predict_stm( 30 | # file_path=os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav"), 31 | # output_dir=os.path.join(TESTDIR, "resources", "melody") 32 | # ) 33 | 34 | # subprocess.run( 35 | # ["rm", os.path.join(TESTDIR, "resources", "melody", "pitch_test.png")] 36 | # ) 37 | 38 | from compiam.structure.segmentation import DhrupadBandishSegmentation 39 | 40 | dbs = DhrupadBandishSegmentation() 41 | 42 | with pytest.raises(ModelNotTrainedError): 43 | dbs.predict_stm(os.path.join(TESTDIR, "resources", "melody", "pitch_test.wav")) 44 | 45 | 46 | @pytest.mark.torch 47 | def test_predict_tf(): 48 | _test_model() 49 | 50 | 51 | @pytest.mark.full_ml 52 | def test_predict_full(): 53 | _test_model() 54 | 55 | 56 | @pytest.mark.all 57 | def test_predict_all(): 58 | _test_model() 59 | -------------------------------------------------------------------------------- /tests/test_wrappers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import mirdata 3 | from compiam import list_models, load_dataset, load_corpora, list_datasets 4 | 5 | 6 | ###################### 7 | # Test base operations 8 | ###################### 9 | 10 | 11 | def test_load_dataset(): 12 | dataset = load_dataset("mridangam_stroke") 13 | dataset_mirdata = mirdata.initialize("mridangam_stroke") 14 | assert type(dataset) == type(dataset_mirdata) 15 | with pytest.raises(ValueError): 16 | load_dataset("hola") 17 | 18 | 19 | def test_load_corpora(): 20 | with pytest.raises(ValueError): 21 | load_corpora(tradition="hola", token="test") 22 | with pytest.raises(ImportError): 23 | load_corpora("carnatic", token=None) 24 | 25 | 26 | def test_lists(): 27 | assert type(list_models()) is list 28 | assert type(list_datasets()) is list 29 | assert "melody:ftanet-carnatic" in list_models() 30 | assert "saraga_carnatic" in list_datasets() 31 | 32 | 33 | ######################## 34 | # Defining wrapper utils 35 | ######################## 36 | 37 | 38 | def _load_torch_models(): 39 | from compiam import load_model 40 | from compiam.structure.segmentation.dhrupad_bandish_segmentation import ( 41 | DhrupadBandishSegmentation, 42 | ) 43 | 44 | dhrupad_segmentation = load_model("structure:dhrupad-bandish-segmentation") 45 | assert type(dhrupad_segmentation) == DhrupadBandishSegmentation 46 | 47 | 48 | ############################################# 49 | # Load model with optional dependency testing 50 | ############################################# 51 | 52 | 53 | @pytest.mark.torch 54 | def test_load_torch_models_torch(): 55 | _load_torch_models() 56 | 57 | 58 | @pytest.mark.essentia_torch 59 | def test_load_torch_models_ess_torch(): 60 | _load_torch_models() 61 | 62 | 63 | @pytest.mark.full_ml 64 | def test_load_torch_models_full(): 65 | _load_torch_models() 66 | 67 | 68 | @pytest.mark.all 69 | def test_load_torch_models_all(): 70 | _load_torch_models() 71 | -------------------------------------------------------------------------------- /tests/timbre/test_mridangam_stroke_classification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import subprocess 4 | 5 | from compiam.data import TESTDIR 6 | from compiam.exceptions import DatasetNotLoadedError, ModelNotTrainedError 7 | 8 | test_files = [ 9 | os.path.join(TESTDIR, "resources", "timbre", "224030__akshaylaya__bheem-b-001.wav"), 10 | os.path.join(TESTDIR, "resources", "timbre", "225359__akshaylaya__cha-c-001.wav"), 11 | ] 12 | 13 | 14 | def _predict_strokes(): 15 | if not os.path.exists(os.path.join(TESTDIR, "resources", "mir_datasets")): 16 | subprocess.run(["mkdir", os.path.join(TESTDIR, "resources", "mir_datasets")]) 17 | from compiam.timbre.stroke_classification import MridangamStrokeClassification 18 | 19 | mridangam_stroke_class = MridangamStrokeClassification() 20 | with pytest.raises(DatasetNotLoadedError): 21 | mridangam_stroke_class.train_model() 22 | with pytest.raises(ValueError): 23 | mridangam_stroke_class.train_model(load_computed=True) 24 | with pytest.raises(ModelNotTrainedError): 25 | mridangam_stroke_class.predict(test_files) 26 | mridangam_stroke_class.load_mridangam_dataset( 27 | data_home=os.path.join(TESTDIR, "resources", "mir_datasets"), 28 | download=True, 29 | ) 30 | assert mridangam_stroke_class.list_strokes() == [ 31 | "bheem", 32 | "cha", 33 | "dheem", 34 | "dhin", 35 | "num", 36 | "ta", 37 | "tha", 38 | "tham", 39 | "thi", 40 | "thom", 41 | ] 42 | assert mridangam_stroke_class.dict_strokes() == { 43 | 0: "bheem", 44 | 1: "cha", 45 | 2: "dheem", 46 | 3: "dhin", 47 | 4: "num", 48 | 5: "ta", 49 | 6: "tha", 50 | 7: "tham", 51 | 8: "thi", 52 | 9: "thom", 53 | } 54 | acc = mridangam_stroke_class.train_model() 55 | assert acc > 90 56 | preds = mridangam_stroke_class.predict(test_files) 57 | assert isinstance(preds, dict) 58 | assert len(list(preds.keys())) == 2 59 | subprocess.run(["rm", "-r", os.path.join(TESTDIR, "resources", "mir_datasets")]) 60 | 61 | 62 | @pytest.mark.essentia 63 | def test_strokes_ess(): 64 | _predict_strokes() 65 | 66 | 67 | @pytest.mark.all 68 | def test_strokes_ess_all(): 69 | _predict_strokes() 70 | --------------------------------------------------------------------------------