├── .gitattributes ├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── __init__.py ├── requirements.txt ├── setup.py ├── src ├── __init__.py ├── audiotools.py ├── ensembles.py ├── models.py ├── models_dir │ ├── __init__.py │ ├── demucs │ │ ├── .gitignore │ │ ├── CODE_OF_CONDUCT.md │ │ ├── CONTRIBUTING.md │ │ ├── LICENSE │ │ ├── MANIFEST.in │ │ ├── Makefile │ │ ├── README.md │ │ ├── __init__.py │ │ ├── conf │ │ │ ├── config.yaml │ │ │ ├── dset │ │ │ │ ├── aetl.yaml │ │ │ │ ├── auto_extra_test.yaml │ │ │ │ ├── auto_mus.yaml │ │ │ │ ├── extra44.yaml │ │ │ │ ├── extra_mmi_goodclean.yaml │ │ │ │ ├── extra_test.yaml │ │ │ │ ├── musdb44.yaml │ │ │ │ ├── sdx23_bleeding.yaml │ │ │ │ └── sdx23_labelnoise.yaml │ │ │ ├── svd │ │ │ │ ├── base.yaml │ │ │ │ ├── base2.yaml │ │ │ │ └── default.yaml │ │ │ └── variant │ │ │ │ ├── default.yaml │ │ │ │ ├── example.yaml │ │ │ │ └── finetune.yaml │ │ ├── demucs.png │ │ ├── demucs │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── apply.py │ │ │ ├── audio.py │ │ │ ├── augment.py │ │ │ ├── demucs.py │ │ │ ├── distrib.py │ │ │ ├── ema.py │ │ │ ├── evaluate.py │ │ │ ├── grids │ │ │ │ ├── __init__.py │ │ │ │ ├── _explorers.py │ │ │ │ ├── mdx.py │ │ │ │ ├── mdx_extra.py │ │ │ │ ├── mdx_refine.py │ │ │ │ ├── mmi.py │ │ │ │ ├── mmi_ft.py │ │ │ │ ├── repro.py │ │ │ │ ├── repro_ft.py │ │ │ │ └── sdx23.py │ │ │ ├── hdemucs.py │ │ │ ├── htdemucs.py │ │ │ ├── pretrained.py │ │ │ ├── py.typed │ │ │ ├── remote │ │ │ │ ├── files.txt │ │ │ │ ├── hdemucs_mmi.yaml │ │ │ │ ├── htdemucs.yaml │ │ │ │ ├── htdemucs_6s.yaml │ │ │ │ ├── htdemucs_ft.yaml │ │ │ │ ├── mdx.yaml │ │ │ │ ├── mdx_extra.yaml │ │ │ │ ├── mdx_extra_q.yaml │ │ │ │ ├── mdx_q.yaml │ │ │ │ ├── repro_mdx_a.yaml │ │ │ │ ├── repro_mdx_a_hybrid_only.yaml │ │ │ │ └── repro_mdx_a_time_only.yaml │ │ │ ├── repitch.py │ │ │ ├── repo.py │ │ │ ├── separate.py │ │ │ ├── solver.py │ │ │ ├── spec.py │ │ │ ├── states.py │ │ │ ├── svd.py │ │ │ ├── train.py │ │ │ ├── transformer.py │ │ │ ├── utils.py │ │ │ ├── wav.py │ │ │ └── wdemucs.py │ │ ├── docs │ │ │ ├── api.md │ │ │ ├── linux.md │ │ │ ├── mac.md │ │ │ ├── mdx.md │ │ │ ├── release.md │ │ │ ├── sdx23.md │ │ │ ├── training.md │ │ │ └── windows.md │ │ ├── environment-cpu.yml │ │ ├── environment-cuda.yml │ │ ├── hdemucs.py │ │ ├── hubconf.py │ │ ├── mypy.ini │ │ ├── outputs.tar.gz │ │ ├── requirements.txt │ │ ├── requirements_minimal.txt │ │ ├── setup.cfg │ │ ├── setup.py │ │ ├── test.mp3 │ │ └── tools │ │ │ ├── __init__.py │ │ │ ├── automix.py │ │ │ ├── bench.py │ │ │ ├── convert.py │ │ │ ├── export.py │ │ │ └── notpytest_test_pretrained.py │ ├── mdx │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── mdx_interface.py │ │ ├── mdxnet.py │ │ ├── modelparams │ │ │ ├── model_data.json │ │ │ └── model_name_mapper.json │ │ ├── modules.py │ │ ├── pyrb.py │ │ ├── spec_utils.py │ │ └── tfc_tdf_v3.py │ ├── mdxc │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── mdxc_interface.py │ │ ├── mdxnet.py │ │ ├── modelparams │ │ │ ├── mdx_c_configs │ │ │ │ ├── model1.yaml │ │ │ │ ├── model2.yaml │ │ │ │ ├── model3.yaml │ │ │ │ ├── modelA.yaml │ │ │ │ ├── modelB.yaml │ │ │ │ ├── model_2_stem_061321.yaml │ │ │ │ ├── model_2_stem_full_band.yaml │ │ │ │ ├── model_2_stem_full_band_2.yaml │ │ │ │ ├── model_2_stem_full_band_3.yaml │ │ │ │ ├── model_2_stem_full_band_4.yaml │ │ │ │ ├── model_2_stem_full_band_8k.yaml │ │ │ │ └── sndfx.yaml │ │ │ ├── model_data.json │ │ │ └── model_name_mapper.json │ │ ├── modules.py │ │ ├── pyrb.py │ │ ├── spec_utils.py │ │ └── tfc_tdf_v3.py │ ├── models.json │ └── vr_network │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── layers.py │ │ ├── layers_new.py │ │ ├── model_param_init.py │ │ ├── modelparams │ │ ├── 1band_sr16000_hl512.json │ │ ├── 1band_sr32000_hl512.json │ │ ├── 1band_sr33075_hl384.json │ │ ├── 1band_sr44100_hl1024.json │ │ ├── 1band_sr44100_hl256.json │ │ ├── 1band_sr44100_hl512.json │ │ ├── 1band_sr44100_hl512_cut.json │ │ ├── 1band_sr44100_hl512_nf1024.json │ │ ├── 2band_32000.json │ │ ├── 2band_44100_lofi.json │ │ ├── 2band_48000.json │ │ ├── 3band_44100.json │ │ ├── 3band_44100_mid.json │ │ ├── 3band_44100_msb2.json │ │ ├── 4band_44100.json │ │ ├── 4band_44100_mid.json │ │ ├── 4band_44100_msb.json │ │ ├── 4band_44100_msb2.json │ │ ├── 4band_44100_reverse.json │ │ ├── 4band_44100_sw.json │ │ ├── 4band_v2.json │ │ ├── 4band_v2_sn.json │ │ ├── 4band_v3.json │ │ ├── 4band_v3_sn.json │ │ ├── ensemble.json │ │ └── model_data.json │ │ ├── nets.py │ │ ├── nets_new.py │ │ ├── pyrb.py │ │ ├── spec_utils.py │ │ └── vr_interface.py ├── pipelines.py └── utils │ ├── __init__.py │ ├── fastio.py │ └── get_models.py └── tests ├── __init__.py ├── models_status.json ├── test_models.py └── utils ├── __init__.py ├── test_fastio.py └── test_get_models.py /.gitattributes: -------------------------------------------------------------------------------- 1 | GUI/lib_v5/demo.ipynb filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore virtual environment 2 | venv/ 3 | 4 | # Ignore compiled Python files 5 | *.pyc 6 | 7 | # Ignore logs 8 | *.log 9 | 10 | # Ignore database files 11 | *.db 12 | 13 | # Ignore cache files 14 | __pycache__/ 15 | 16 | # Ignore environment variables file 17 | .env 18 | 19 | # Ignore local development settings 20 | settings_local.py 21 | 22 | # Ignore IDE files 23 | .vscode/ 24 | .idea/ 25 | 26 | # Ignore package dependencies 27 | venv/ 28 | 29 | *.pkl 30 | ffmpeg* 31 | 32 | **/weights/ 33 | 34 | base.mp3 35 | drums.mp3 36 | vocals.mp3 37 | other.mp3 38 | 39 | **/*.wav 40 | **/*.mp3 41 | **/*.flac 42 | **/.pytest_cache 43 | 44 | build/ 45 | dependency_links.txt 46 | PKG-INFO 47 | requires.txt 48 | SOURCES.txt 49 | top_level.txt -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | Mohannad.Barakat@fau.de. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 NextAudioGen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Static Badge](https://img.shields.io/badge/passing-tests-blue) 2 | ![Static Badge](https://img.shields.io/badge/pre_release-red) 3 | Buy Me A Coffee 4 | Buy Me A Coffee 5 | 6 | # Ultimate Vocal Remover API v0.1 7 | 8 | This is a an API for ultimate vocal removing. It is designed to be expandable with new models/algorethems while maintaining a simple interface. 9 | [Colab demo](https://colab.research.google.com/drive/1qf17AV5KU_8v0f29zUnPHQBbr3iX8bu6?usp=sharing) 10 | 11 | 12 | # Install 13 | If you intend to edit the code 14 | ```bash 15 | git clone https://github.com/NextAudioGen/ultimatevocalremover_api.git 16 | cd ultimatevocalremover_api 17 | pip install . 18 | ``` 19 | # Usage 20 | ```python 21 | import uvr 22 | from uvr import models 23 | from uvr.utils.get_models import download_all_models 24 | import torch 25 | import audiofile 26 | import json 27 | 28 | models_json = json.load(open("/content/ultimatevocalremover_api/src/models_dir/models.json", "r")) 29 | download_all_models(models_json) 30 | name = {name_of_your_audio} 31 | device = "cuda" 32 | 33 | demucs = models.Demucs(name="hdemucs_mmi", other_metadata={"segment":2, "split":True}, device=device, logger=None) 34 | 35 | # Separating an audio file 36 | res = demucs(name) 37 | seperted_audio = res["separated"] 38 | vocals = seperted_audio["vocals"] 39 | base = seperted_audio["bass"] 40 | drums = seperted_audio["drums"] 41 | other = seperted_audio["other"] 42 | ``` 43 | # Archetecture: 44 | ```text 45 | Ultimate Vocal Remover API 46 | ├── src 47 | │ ├── audiotools.py 48 | │ ├── models.py 49 | │ ├── ensembles.py 50 | │ ├── pipelines.py 51 | │ ├── utils/ 52 | │ ├── audio_tools/ 53 | │ └── models_dir 54 | │ ├── Each implementation of a model is added here as a single directory. 55 | │ └── models.json (this is used to download the models) 56 | ├── docs 57 | │ ├── models/ 58 | │ │ └── Here goes all models docs each in a single directory. 59 | │ ├── ensembles/ 60 | │ │ └── Here goes all ensembles docs each in a single directory. 61 | │ ├── pipelines/ 62 | │ │ └── Here goes all pipelines docs each in a single directory. 63 | │ ├── audio_tools/ 64 | │ └── utils/ 65 | └── tests/ 66 | ├── test_models.py 67 | ├── test_ensembles.py 68 | ├── test_pipelines.py 69 | ├── test_audiotools.py 70 | └── utils/ 71 | ``` 72 | **audiotools.py:** Interface for all audio tools \ 73 | **models.py:** Interface for all models following a consistent interface \ 74 | **utils/** Here goes read and write utils for audio, models...etc. \ 75 | 76 | ## All models, pipelines and ensembles follow this interface: 77 | ```python 78 | class BaseModel: 79 | def __init__(self, name:str, architecture:str, other_metadata:dict, device=None, logger=None) 80 | def __call__(self, audio:Union[npt.NDArray, str], sampling_rate:int=None, **kwargs)->dict 81 | # @singledispatch 82 | def predict(self, audio:npt.NDArray, sampling_rate:int, **kwargs)->dict 83 | def predict_path(self, audio:str, **kwargs)->dict 84 | def separate(self, audio:npt.NDArray, sampling_rate:int=None)->dict 85 | def __repr__(self) 86 | def to(self, device:str) 87 | def update_metadata(self, metadata:dict) 88 | @staticmethod 89 | def list_models()->list 90 | 91 | ``` 92 | 93 | # Contribution 94 | If you like this, leave a star, fork it, and definitely you are welcomed to [buy me a coffee](https://www.buymeacoffee.com/mohannadbarakat). 95 | 96 | Also, please open issues, make pull requests but remember to follow the structure and interfaces. Moreover, we are trying to build automated testing, we are aware that the current tests are so naive but we are working on it. So please make sure to add some tests to your new code as well. 97 | 98 | # Refrences 99 | ## code 100 | Code and weights from these sources used in developing this library: 101 | - [MDX-Net](https://github.com/kuielab/mdx-net/tree/main) This is the original MDX architecture implementation. 102 | - [MDXC and demucs](https://github.com/ZFTurbo/MVSEP-MDX23-music-separation-model/tree/main) This repo has a clever ensumbling methods for MDX, Demucs 3, and Demucs 4. Moreover they have the wieghts for their finetuned MDX open (available under MDXC implementation [here](/src/models_dir/mdxc/)). 103 | - [Demucs](https://github.com/facebookresearch/demucs/tree/e976d93ecc3865e5757426930257e200846a520a) This is the original implementation of the model. 104 | - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui/tree/master) This is one of the best vocal removers. A lot of ideas in this repo were borrowed from here. 105 | - [weights](https://github.com/TRvlvr/model_repo/releases/tag/all_public_uvr_models) Most of the models right now are comming from this repo. 106 | 107 | ## Papers 108 | - [Benchmarks and leaderboards for sound demixing 109 | tasks](https://arxiv.org/pdf/2305.07489.pdf) 110 | - [MULTI-SCALE MULTI-BAND DENSENETS FOR AUDIO SOURCE SEPARATION](https://arxiv.org/pdf/1706.09588.pdf) 111 | - [HYBRID TRANSFORMERS FOR MUSIC SOURCE SEPARATION](https://arxiv.org/pdf/2211.08553.pdf) 112 | - [KUIELab-MDX-Net: A Two-Stream Neural Network for Music Demixing](https://arxiv.org/abs/2111.12203) 113 | 114 | # Core Developers 115 | 116 | - [Mohannad Barakat](https://github.com/mohannadEhabBarakat/) 117 | - [Noha Magdy](https://github.com/Noha-Magdy) 118 | - [Mohtady Ehab](https://github.com/Mohtady-Ehab) 119 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/__init__.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # please make sure you have already a pytorch install that is cuda enabled! 2 | dora-search>=0.1.12 3 | diffq>=0.2.1 4 | einops 5 | flake8 6 | hydra-colorlog>=1.1 7 | hydra-core>=1.1 8 | julius>=0.2.3 9 | lameenc>=1.2 10 | museval 11 | mypy 12 | openunmix 13 | pyyaml 14 | submitit 15 | torch>=1.8.1 16 | torchaudio>=0.8,<2.1 17 | tqdm 18 | treetable 19 | soundfile>=0.10.3 20 | pytest 21 | librosa 22 | audiofile 23 | pytorch_lightning 24 | onnxruntime 25 | onnx 26 | onnx2pytorch 27 | ml_collections 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from distutils.core import setup 3 | from setuptools import setup 4 | 5 | with open('requirements.txt') as f: 6 | required = f.read().splitlines() 7 | # install_requires=required, 8 | 9 | setup( 10 | name='uvr', 11 | version='0.1', 12 | description='Universal Voice Remover API', 13 | authors='Mohannad Barakat', 14 | author_email="Mohannad.Barakat@fau.de", 15 | license='MIT', 16 | package_dir={'uvr':'src'}, 17 | long_description=open('README.md').read(), 18 | install_requires=required, 19 | url="https://github.com/NextAudioGen/ultimatevocalremover_api.git", 20 | package_data={ 21 | 'uvr': ['**/*.txt', '**/*.t7', '**/*.pth', '**/*.json', '**/*.yaml', '**/*.yml'] 22 | } 23 | ) -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/__init__.py -------------------------------------------------------------------------------- /src/audiotools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/audiotools.py -------------------------------------------------------------------------------- /src/ensembles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/ensembles.py -------------------------------------------------------------------------------- /src/models_dir/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/__init__.py -------------------------------------------------------------------------------- /src/models_dir/demucs/.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | __pycache__ 3 | Session.vim 4 | /build 5 | /dist 6 | /lab 7 | /metadata 8 | /notebooks 9 | /outputs 10 | /release 11 | /release_models 12 | /separated 13 | /tests 14 | /trash 15 | /misc 16 | /mdx 17 | .mypy_cache 18 | -------------------------------------------------------------------------------- /src/models_dir/demucs/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /src/models_dir/demucs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Demucs 2 | 3 | ## Pull Requests 4 | 5 | In order to accept your pull request, we need you to submit a CLA. You only need 6 | to do this once to work on any of Facebook's open source projects. 7 | 8 | Complete your CLA here: 9 | 10 | Demucs is the implementation of a research paper. 11 | Therefore, we do not plan on accepting many pull requests for new features. 12 | We certainly welcome them for bug fixes. 13 | 14 | 15 | ## Issues 16 | 17 | We use GitHub issues to track public bugs. Please ensure your description is 18 | clear and has sufficient instructions to be able to reproduce the issue. 19 | 20 | 21 | ## License 22 | By contributing to this repository, you agree that your contributions will be licensed 23 | under the LICENSE file in the root directory of this source tree. 24 | -------------------------------------------------------------------------------- /src/models_dir/demucs/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Meta Platforms, Inc. and affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/models_dir/demucs/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-exclude env * 2 | recursive-include conf *.yaml 3 | include Makefile 4 | include LICENSE 5 | include demucs.png 6 | include outputs.tar.gz 7 | include test.mp3 8 | include requirements.txt 9 | include requirements_minimal.txt 10 | include mypy.ini 11 | include demucs/py.typed 12 | include demucs/remote/*.txt 13 | include demucs/remote/*.yaml 14 | -------------------------------------------------------------------------------- /src/models_dir/demucs/Makefile: -------------------------------------------------------------------------------- 1 | all: linter tests 2 | 3 | linter: 4 | flake8 demucs 5 | mypy demucs 6 | 7 | tests: test_train test_eval 8 | 9 | test_train: tests/musdb 10 | _DORA_TEST_PATH=/tmp/demucs python3 -m dora run --clear \ 11 | dset.musdb=./tests/musdb dset.segment=4 dset.shift=2 epochs=2 model=demucs \ 12 | demucs.depth=2 demucs.channels=4 test.sdr=false misc.num_workers=0 test.workers=0 \ 13 | test.shifts=0 14 | 15 | test_eval: 16 | python3 -m demucs -n demucs_unittest test.mp3 17 | python3 -m demucs -n demucs_unittest --two-stems=vocals test.mp3 18 | python3 -m demucs -n demucs_unittest --mp3 test.mp3 19 | python3 -m demucs -n demucs_unittest --flac --int24 test.mp3 20 | python3 -m demucs -n demucs_unittest --int24 --clip-mode clamp test.mp3 21 | python3 -m demucs -n demucs_unittest --segment 8 test.mp3 22 | python3 -m demucs.api -n demucs_unittest --segment 8 test.mp3 23 | python3 -m demucs --list-models 24 | 25 | tests/musdb: 26 | test -e tests || mkdir tests 27 | python3 -c 'import musdb; musdb.DB("tests/tmp", download=True)' 28 | musdbconvert tests/tmp tests/musdb 29 | 30 | dist: 31 | python3 setup.py sdist 32 | 33 | clean: 34 | rm -r dist build *.egg-info 35 | 36 | .PHONY: linter dist test_train test_eval 37 | -------------------------------------------------------------------------------- /src/models_dir/demucs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/__init__.py -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/aetl.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # automix dataset with Musdb, extra training data and the test set of Musdb. 4 | # This used even more remixes than auto_extra_test. 5 | dset: 6 | wav: /checkpoint/defossez/datasets/aetl 7 | samplerate: 44100 8 | channels: 2 9 | epochs: 320 10 | max_batches: 500 11 | 12 | augment: 13 | shift_same: true 14 | scale: 15 | proba: 0. 16 | remix: 17 | proba: 0 18 | repitch: 19 | proba: 0 20 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/auto_extra_test.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # automix dataset with Musdb, extra training data and the test set of Musdb. 4 | dset: 5 | wav: /checkpoint/defossez/datasets/automix_extra_test2 6 | samplerate: 44100 7 | channels: 2 8 | epochs: 320 9 | max_batches: 500 10 | 11 | augment: 12 | shift_same: true 13 | scale: 14 | proba: 0. 15 | remix: 16 | proba: 0 17 | repitch: 18 | proba: 0 19 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/auto_mus.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Automix dataset based on musdb train set. 4 | dset: 5 | wav: /checkpoint/defossez/datasets/automix_musdb 6 | samplerate: 44100 7 | channels: 2 8 | epochs: 360 9 | max_batches: 300 10 | test: 11 | every: 4 12 | 13 | augment: 14 | shift_same: true 15 | scale: 16 | proba: 0.5 17 | remix: 18 | proba: 0 19 | repitch: 20 | proba: 0 21 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/extra44.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Musdb + extra tracks 4 | dset: 5 | wav: /checkpoint/defossez/datasets/allstems_44/ 6 | samplerate: 44100 7 | channels: 2 8 | epochs: 320 9 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/extra_mmi_goodclean.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Musdb + extra tracks 4 | dset: 5 | wav: /checkpoint/defossez/datasets/allstems_44/ 6 | wav2: /checkpoint/defossez/datasets/mmi44_goodclean 7 | samplerate: 44100 8 | channels: 2 9 | wav2_weight: null 10 | wav2_valid: false 11 | valid_samples: 100 12 | epochs: 1200 13 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/extra_test.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Musdb + extra tracks + test set from musdb. 4 | dset: 5 | wav: /checkpoint/defossez/datasets/allstems_test_44/ 6 | samplerate: 44100 7 | channels: 2 8 | epochs: 320 9 | max_batches: 700 10 | test: 11 | sdr: false 12 | every: 500 13 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/musdb44.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | dset: 4 | samplerate: 44100 5 | channels: 2 -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/sdx23_bleeding.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Musdb + extra tracks 4 | dset: 5 | wav: /shared/home/defossez/data/datasets/moisesdb23_bleeding_v1.0/ 6 | use_musdb: false 7 | samplerate: 44100 8 | channels: 2 9 | backend: soundfile # must use soundfile as some mixture would clip with sox. 10 | epochs: 320 11 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/dset/sdx23_labelnoise.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Musdb + extra tracks 4 | dset: 5 | wav: /shared/home/defossez/data/datasets/moisesdb23_labelnoise_v1.0 6 | use_musdb: false 7 | samplerate: 44100 8 | channels: 2 9 | backend: soundfile # must use soundfile as some mixture would clip with sox. 10 | epochs: 320 11 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/svd/base.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | svd: 4 | penalty: 0 5 | min_size: 1 6 | dim: 50 7 | niters: 4 8 | powm: false 9 | proba: 1 10 | conv_only: false 11 | convtr: false # ideally this should be true, but some models were trained with this to false. 12 | 13 | optim: 14 | beta2: 0.9998 -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/svd/base2.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | svd: 4 | penalty: 0 5 | min_size: 1 6 | dim: 100 7 | niters: 4 8 | powm: false 9 | proba: 1 10 | conv_only: false 11 | convtr: true 12 | 13 | optim: 14 | beta2: 0.9998 -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/svd/default.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/variant/default.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/variant/example.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | model: hdemucs 4 | hdemucs: 5 | channels: 32 -------------------------------------------------------------------------------- /src/models_dir/demucs/conf/variant/finetune.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | epochs: 4 4 | batch_size: 16 5 | optim: 6 | lr: 0.0006 7 | test: 8 | every: 1 9 | sdr: false 10 | dset: 11 | segment: 28 12 | shift: 2 13 | 14 | augment: 15 | scale: 16 | proba: 0 17 | shift_same: true 18 | remix: 19 | proba: 0 20 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/demucs.png -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | __version__ = "4.1.0a2" 8 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/augment.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """Data augmentations. 7 | """ 8 | 9 | import random 10 | import torch as th 11 | from torch import nn 12 | 13 | 14 | class Shift(nn.Module): 15 | """ 16 | Randomly shift audio in time by up to `shift` samples. 17 | """ 18 | def __init__(self, shift=8192, same=False): 19 | super().__init__() 20 | self.shift = shift 21 | self.same = same 22 | 23 | def forward(self, wav): 24 | batch, sources, channels, time = wav.size() 25 | length = time - self.shift 26 | if self.shift > 0: 27 | if not self.training: 28 | wav = wav[..., :length] 29 | else: 30 | srcs = 1 if self.same else sources 31 | offsets = th.randint(self.shift, [batch, srcs, 1, 1], device=wav.device) 32 | offsets = offsets.expand(-1, sources, channels, -1) 33 | indexes = th.arange(length, device=wav.device) 34 | wav = wav.gather(3, indexes + offsets) 35 | return wav 36 | 37 | 38 | class FlipChannels(nn.Module): 39 | """ 40 | Flip left-right channels. 41 | """ 42 | def forward(self, wav): 43 | batch, sources, channels, time = wav.size() 44 | if self.training and wav.size(2) == 2: 45 | left = th.randint(2, (batch, sources, 1, 1), device=wav.device) 46 | left = left.expand(-1, -1, -1, time) 47 | right = 1 - left 48 | wav = th.cat([wav.gather(2, left), wav.gather(2, right)], dim=2) 49 | return wav 50 | 51 | 52 | class FlipSign(nn.Module): 53 | """ 54 | Random sign flip. 55 | """ 56 | def forward(self, wav): 57 | batch, sources, channels, time = wav.size() 58 | if self.training: 59 | signs = th.randint(2, (batch, sources, 1, 1), device=wav.device, dtype=th.float32) 60 | wav = wav * (2 * signs - 1) 61 | return wav 62 | 63 | 64 | class Remix(nn.Module): 65 | """ 66 | Shuffle sources to make new mixes. 67 | """ 68 | def __init__(self, proba=1, group_size=4): 69 | """ 70 | Shuffle sources within one batch. 71 | Each batch is divided into groups of size `group_size` and shuffling is done within 72 | each group separatly. This allow to keep the same probability distribution no matter 73 | the number of GPUs. Without this grouping, using more GPUs would lead to a higher 74 | probability of keeping two sources from the same track together which can impact 75 | performance. 76 | """ 77 | super().__init__() 78 | self.proba = proba 79 | self.group_size = group_size 80 | 81 | def forward(self, wav): 82 | batch, streams, channels, time = wav.size() 83 | device = wav.device 84 | 85 | if self.training and random.random() < self.proba: 86 | group_size = self.group_size or batch 87 | if batch % group_size != 0: 88 | raise ValueError(f"Batch size {batch} must be divisible by group size {group_size}") 89 | groups = batch // group_size 90 | wav = wav.view(groups, group_size, streams, channels, time) 91 | permutations = th.argsort(th.rand(groups, group_size, streams, 1, 1, device=device), 92 | dim=1) 93 | wav = wav.gather(1, permutations.expand(-1, -1, -1, channels, time)) 94 | wav = wav.view(batch, streams, channels, time) 95 | return wav 96 | 97 | 98 | class Scale(nn.Module): 99 | def __init__(self, proba=1., min=0.25, max=1.25): 100 | super().__init__() 101 | self.proba = proba 102 | self.min = min 103 | self.max = max 104 | 105 | def forward(self, wav): 106 | batch, streams, channels, time = wav.size() 107 | device = wav.device 108 | if self.training and random.random() < self.proba: 109 | scales = th.empty(batch, streams, 1, 1, device=device).uniform_(self.min, self.max) 110 | wav *= scales 111 | return wav 112 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/distrib.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """Distributed training utilities. 7 | """ 8 | import logging 9 | import pickle 10 | 11 | import numpy as np 12 | import torch 13 | from torch.utils.data.distributed import DistributedSampler 14 | from torch.utils.data import DataLoader, Subset 15 | from torch.nn.parallel.distributed import DistributedDataParallel 16 | 17 | from dora import distrib as dora_distrib 18 | 19 | logger = logging.getLogger(__name__) 20 | rank = 0 21 | world_size = 1 22 | 23 | 24 | def init(): 25 | global rank, world_size 26 | if not torch.distributed.is_initialized(): 27 | dora_distrib.init() 28 | rank = dora_distrib.rank() 29 | world_size = dora_distrib.world_size() 30 | 31 | 32 | def average(metrics, count=1.): 33 | if isinstance(metrics, dict): 34 | keys, values = zip(*sorted(metrics.items())) 35 | values = average(values, count) 36 | return dict(zip(keys, values)) 37 | if world_size == 1: 38 | return metrics 39 | tensor = torch.tensor(list(metrics) + [1], device='cuda', dtype=torch.float32) 40 | tensor *= count 41 | torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM) 42 | return (tensor[:-1] / tensor[-1]).cpu().numpy().tolist() 43 | 44 | 45 | def wrap(model): 46 | if world_size == 1: 47 | return model 48 | else: 49 | return DistributedDataParallel( 50 | model, 51 | # find_unused_parameters=True, 52 | device_ids=[torch.cuda.current_device()], 53 | output_device=torch.cuda.current_device()) 54 | 55 | 56 | def barrier(): 57 | if world_size > 1: 58 | torch.distributed.barrier() 59 | 60 | 61 | def share(obj=None, src=0): 62 | if world_size == 1: 63 | return obj 64 | size = torch.empty(1, device='cuda', dtype=torch.long) 65 | if rank == src: 66 | dump = pickle.dumps(obj) 67 | size[0] = len(dump) 68 | torch.distributed.broadcast(size, src=src) 69 | # size variable is now set to the length of pickled obj in all processes 70 | 71 | if rank == src: 72 | buffer = torch.from_numpy(np.frombuffer(dump, dtype=np.uint8).copy()).cuda() 73 | else: 74 | buffer = torch.empty(size[0].item(), device='cuda', dtype=torch.uint8) 75 | torch.distributed.broadcast(buffer, src=src) 76 | # buffer variable is now set to pickled obj in all processes 77 | 78 | if rank != src: 79 | obj = pickle.loads(buffer.cpu().numpy().tobytes()) 80 | logger.debug(f"Shared object of size {len(buffer)}") 81 | return obj 82 | 83 | 84 | def loader(dataset, *args, shuffle=False, klass=DataLoader, **kwargs): 85 | """ 86 | Create a dataloader properly in case of distributed training. 87 | If a gradient is going to be computed you must set `shuffle=True`. 88 | """ 89 | if world_size == 1: 90 | return klass(dataset, *args, shuffle=shuffle, **kwargs) 91 | 92 | if shuffle: 93 | # train means we will compute backward, we use DistributedSampler 94 | sampler = DistributedSampler(dataset) 95 | # We ignore shuffle, DistributedSampler already shuffles 96 | return klass(dataset, *args, **kwargs, sampler=sampler) 97 | else: 98 | # We make a manual shard, as DistributedSampler otherwise replicate some examples 99 | dataset = Subset(dataset, list(range(rank, len(dataset), world_size))) 100 | return klass(dataset, *args, shuffle=shuffle, **kwargs) 101 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/ema.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # Inspired from https://github.com/rwightman/pytorch-image-models 8 | from contextlib import contextmanager 9 | 10 | import torch 11 | 12 | from .states import swap_state 13 | 14 | 15 | class ModelEMA: 16 | """ 17 | Perform EMA on a model. You can switch to the EMA weights temporarily 18 | with the `swap` method. 19 | 20 | ema = ModelEMA(model) 21 | with ema.swap(): 22 | # compute valid metrics with averaged model. 23 | """ 24 | def __init__(self, model, decay=0.9999, unbias=True, device='cpu'): 25 | self.decay = decay 26 | self.model = model 27 | self.state = {} 28 | self.count = 0 29 | self.device = device 30 | self.unbias = unbias 31 | 32 | self._init() 33 | 34 | def _init(self): 35 | for key, val in self.model.state_dict().items(): 36 | if val.dtype != torch.float32: 37 | continue 38 | device = self.device or val.device 39 | if key not in self.state: 40 | self.state[key] = val.detach().to(device, copy=True) 41 | 42 | def update(self): 43 | if self.unbias: 44 | self.count = self.count * self.decay + 1 45 | w = 1 / self.count 46 | else: 47 | w = 1 - self.decay 48 | for key, val in self.model.state_dict().items(): 49 | if val.dtype != torch.float32: 50 | continue 51 | device = self.device or val.device 52 | self.state[key].mul_(1 - w) 53 | self.state[key].add_(val.detach().to(device), alpha=w) 54 | 55 | @contextmanager 56 | def swap(self): 57 | with swap_state(self.model, self.state): 58 | yield 59 | 60 | def state_dict(self): 61 | return {'state': self.state, 'count': self.count} 62 | 63 | def load_state_dict(self, state): 64 | self.count = state['count'] 65 | for k, v in state['state'].items(): 66 | self.state[k].copy_(v) 67 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/demucs/grids/__init__.py -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/_explorers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | from dora import Explorer 7 | import treetable as tt 8 | 9 | 10 | class MyExplorer(Explorer): 11 | test_metrics = ['nsdr', 'sdr_med'] 12 | 13 | def get_grid_metrics(self): 14 | """Return the metrics that should be displayed in the tracking table. 15 | """ 16 | return [ 17 | tt.group("train", [ 18 | tt.leaf("epoch"), 19 | tt.leaf("reco", ".3f"), 20 | ], align=">"), 21 | tt.group("valid", [ 22 | tt.leaf("penalty", ".1f"), 23 | tt.leaf("ms", ".1f"), 24 | tt.leaf("reco", ".2%"), 25 | tt.leaf("breco", ".2%"), 26 | tt.leaf("b_nsdr", ".2f"), 27 | # tt.leaf("b_nsdr_drums", ".2f"), 28 | # tt.leaf("b_nsdr_bass", ".2f"), 29 | # tt.leaf("b_nsdr_other", ".2f"), 30 | # tt.leaf("b_nsdr_vocals", ".2f"), 31 | ], align=">"), 32 | tt.group("test", [ 33 | tt.leaf(name, ".2f") 34 | for name in self.test_metrics 35 | ], align=">") 36 | ] 37 | 38 | def process_history(self, history): 39 | train = { 40 | 'epoch': len(history), 41 | } 42 | valid = {} 43 | test = {} 44 | best_v_main = float('inf') 45 | breco = float('inf') 46 | for metrics in history: 47 | train.update(metrics['train']) 48 | valid.update(metrics['valid']) 49 | if 'main' in metrics['valid']: 50 | best_v_main = min(best_v_main, metrics['valid']['main']['loss']) 51 | valid['bmain'] = best_v_main 52 | valid['breco'] = min(breco, metrics['valid']['reco']) 53 | breco = valid['breco'] 54 | if (metrics['valid']['loss'] == metrics['valid']['best'] or 55 | metrics['valid'].get('nsdr') == metrics['valid']['best']): 56 | for k, v in metrics['valid'].items(): 57 | if k.startswith('reco_'): 58 | valid['b_' + k[len('reco_'):]] = v 59 | if k.startswith('nsdr'): 60 | valid[f'b_{k}'] = v 61 | if 'test' in metrics: 62 | test.update(metrics['test']) 63 | metrics = history[-1] 64 | return {"train": train, "valid": valid, "test": test} 65 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/mdx.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Main training for the Track A MDX models. 8 | """ 9 | 10 | from ._explorers import MyExplorer 11 | from ..train import main 12 | 13 | 14 | TRACK_A = ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68'] 15 | 16 | 17 | @MyExplorer 18 | def explorer(launcher): 19 | launcher.slurm_( 20 | gpus=8, 21 | time=3 * 24 * 60, 22 | partition='learnlab') 23 | 24 | # Reproduce results from MDX competition Track A 25 | # This trains the first round of models. Once this is trained, 26 | # you will need to schedule `mdx_refine`. 27 | for sig in TRACK_A: 28 | xp = main.get_xp_from_sig(sig) 29 | parent = xp.cfg.continue_from 30 | xp = main.get_xp_from_sig(parent) 31 | launcher(xp.argv) 32 | launcher(xp.argv, {'quant.diffq': 1e-4}) 33 | launcher(xp.argv, {'quant.diffq': 3e-4}) 34 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/mdx_extra.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Main training for the Track A MDX models. 8 | """ 9 | 10 | from ._explorers import MyExplorer 11 | from ..train import main 12 | 13 | TRACK_B = ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08'] 14 | 15 | 16 | @MyExplorer 17 | def explorer(launcher): 18 | launcher.slurm_( 19 | gpus=8, 20 | time=3 * 24 * 60, 21 | partition='learnlab') 22 | 23 | # Reproduce results from MDX competition Track A 24 | # This trains the first round of models. Once this is trained, 25 | # you will need to schedule `mdx_refine`. 26 | for sig in TRACK_B: 27 | while sig is not None: 28 | xp = main.get_xp_from_sig(sig) 29 | sig = xp.cfg.continue_from 30 | 31 | for dset in ['extra44', 'extra_test']: 32 | sub = launcher.bind(xp.argv, dset=dset) 33 | sub() 34 | if dset == 'extra_test': 35 | sub({'quant.diffq': 1e-4}) 36 | sub({'quant.diffq': 3e-4}) 37 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/mdx_refine.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Main training for the Track A MDX models. 8 | """ 9 | 10 | from ._explorers import MyExplorer 11 | from .mdx import TRACK_A 12 | from ..train import main 13 | 14 | 15 | @MyExplorer 16 | def explorer(launcher): 17 | launcher.slurm_( 18 | gpus=8, 19 | time=3 * 24 * 60, 20 | partition='learnlab') 21 | 22 | # Reproduce results from MDX competition Track A 23 | # WARNING: all the experiments in the `mdx` grid must have completed. 24 | for sig in TRACK_A: 25 | xp = main.get_xp_from_sig(sig) 26 | launcher(xp.argv) 27 | for diffq in [1e-4, 3e-4]: 28 | xp_src = main.get_xp_from_sig(xp.cfg.continue_from) 29 | q_argv = [f'quant.diffq={diffq}'] 30 | actual_src = main.get_xp(xp_src.argv + q_argv) 31 | actual_src.link.load() 32 | assert len(actual_src.link.history) == actual_src.cfg.epochs 33 | argv = xp.argv + q_argv + [f'continue_from="{actual_src.sig}"'] 34 | launcher(argv) 35 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/mmi.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from ._explorers import MyExplorer 8 | from dora import Launcher 9 | 10 | 11 | @MyExplorer 12 | def explorer(launcher: Launcher): 13 | launcher.slurm_(gpus=8, time=3 * 24 * 60, partition="devlab,learnlab,learnfair") # 3 days 14 | 15 | sub = launcher.bind_( 16 | { 17 | "dset": "extra_mmi_goodclean", 18 | "test.shifts": 0, 19 | "model": "htdemucs", 20 | "htdemucs.dconv_mode": 3, 21 | "htdemucs.depth": 4, 22 | "htdemucs.t_dropout": 0.02, 23 | "htdemucs.t_layers": 5, 24 | "max_batches": 800, 25 | "ema.epoch": [0.9, 0.95], 26 | "ema.batch": [0.9995, 0.9999], 27 | "dset.segment": 10, 28 | "batch_size": 32, 29 | } 30 | ) 31 | sub({"model": "hdemucs"}) 32 | sub({"model": "hdemucs", "dset": "extra44"}) 33 | sub({"model": "hdemucs", "dset": "musdb44"}) 34 | 35 | sparse = { 36 | 'batch_size': 3 * 8, 37 | 'augment.remix.group_size': 3, 38 | 'htdemucs.t_auto_sparsity': True, 39 | 'htdemucs.t_sparse_self_attn': True, 40 | 'htdemucs.t_sparse_cross_attn': True, 41 | 'htdemucs.t_sparsity': 0.9, 42 | "htdemucs.t_layers": 7 43 | } 44 | 45 | with launcher.job_array(): 46 | for transf_layers in [5, 7]: 47 | for bottom_channels in [0, 512]: 48 | sub = launcher.bind({ 49 | "htdemucs.t_layers": transf_layers, 50 | "htdemucs.bottom_channels": bottom_channels, 51 | }) 52 | if bottom_channels == 0 and transf_layers == 5: 53 | sub({"augment.remix.proba": 0.0}) 54 | sub({ 55 | "augment.repitch.proba": 0.0, 56 | # when doing repitching, we trim the outut to align on the 57 | # highest change of BPM. When removing repitching, 58 | # we simulate it here to ensure the training context is the same. 59 | # Another second is lost for all experiments due to the random 60 | # shift augmentation. 61 | "dset.segment": 10 * 0.88}) 62 | elif bottom_channels == 512 and transf_layers == 5: 63 | sub(dset="musdb44") 64 | sub(dset="extra44") 65 | # Sparse kernel XP, currently not released as kernels are still experimental. 66 | sub(sparse, {'dset.segment': 15, "htdemucs.t_layers": 7}) 67 | 68 | for duration in [5, 10, 15]: 69 | sub({"dset.segment": duration}) 70 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/mmi_ft.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from ._explorers import MyExplorer 8 | from dora import Launcher 9 | from demucs import train 10 | 11 | 12 | def get_sub(launcher, sig): 13 | xp = train.main.get_xp_from_sig(sig) 14 | sub = launcher.bind(xp.argv) 15 | sub() 16 | sub.bind_({ 17 | 'continue_from': sig, 18 | 'continue_best': True}) 19 | return sub 20 | 21 | 22 | @MyExplorer 23 | def explorer(launcher: Launcher): 24 | launcher.slurm_(gpus=4, time=3 * 24 * 60, partition="devlab,learnlab,learnfair") # 3 days 25 | ft = { 26 | 'optim.lr': 1e-4, 27 | 'augment.remix.proba': 0, 28 | 'augment.scale.proba': 0, 29 | 'augment.shift_same': True, 30 | 'htdemucs.t_weight_decay': 0.05, 31 | 'batch_size': 8, 32 | 'optim.clip_grad': 5, 33 | 'optim.optim': 'adamw', 34 | 'epochs': 50, 35 | 'dset.wav2_valid': True, 36 | 'ema.epoch': [], # let's make valid a bit faster 37 | } 38 | with launcher.job_array(): 39 | for sig in ['2899e11a']: 40 | sub = get_sub(launcher, sig) 41 | sub.bind_(ft) 42 | for segment in [15, 18]: 43 | for source in range(4): 44 | w = [0] * 4 45 | w[source] = 1 46 | sub({'weights': w, 'dset.segment': segment}) 47 | 48 | for sig in ['955717e8']: 49 | sub = get_sub(launcher, sig) 50 | sub.bind_(ft) 51 | for segment in [10, 15]: 52 | for source in range(4): 53 | w = [0] * 4 54 | w[source] = 1 55 | sub({'weights': w, 'dset.segment': segment}) 56 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/repro.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Easier training for reproducibility 8 | """ 9 | 10 | from ._explorers import MyExplorer 11 | 12 | 13 | @MyExplorer 14 | def explorer(launcher): 15 | launcher.slurm_( 16 | gpus=8, 17 | time=3 * 24 * 60, 18 | partition='devlab,learnlab') 19 | 20 | launcher.bind_({'ema.epoch': [0.9, 0.95]}) 21 | launcher.bind_({'ema.batch': [0.9995, 0.9999]}) 22 | launcher.bind_({'epochs': 600}) 23 | 24 | base = {'model': 'demucs', 'demucs.dconv_mode': 0, 'demucs.gelu': False, 25 | 'demucs.lstm_layers': 2} 26 | newt = {'model': 'demucs', 'demucs.normalize': True} 27 | hdem = {'model': 'hdemucs'} 28 | svd = {'svd.penalty': 1e-5, 'svd': 'base2'} 29 | 30 | with launcher.job_array(): 31 | for model in [base, newt, hdem]: 32 | sub = launcher.bind(model) 33 | if model is base: 34 | # Training the v2 Demucs on MusDB HQ 35 | sub(epochs=360) 36 | continue 37 | 38 | # those two will be used in the repro_mdx_a bag of models. 39 | sub(svd) 40 | sub(svd, seed=43) 41 | if model == newt: 42 | # Ablation study 43 | sub() 44 | abl = sub.bind(svd) 45 | abl({'ema.epoch': [], 'ema.batch': []}) 46 | abl({'demucs.dconv_lstm': 10}) 47 | abl({'demucs.dconv_attn': 10}) 48 | abl({'demucs.dconv_attn': 10, 'demucs.dconv_lstm': 10, 'demucs.lstm_layers': 2}) 49 | abl({'demucs.dconv_mode': 0}) 50 | abl({'demucs.gelu': False}) 51 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/repro_ft.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Fine tuning experiments 8 | """ 9 | 10 | from ._explorers import MyExplorer 11 | from ..train import main 12 | 13 | 14 | @MyExplorer 15 | def explorer(launcher): 16 | launcher.slurm_( 17 | gpus=8, 18 | time=300, 19 | partition='devlab,learnlab') 20 | 21 | # Mus 22 | launcher.slurm_(constraint='volta32gb') 23 | 24 | grid = "repro" 25 | folder = main.dora.dir / "grids" / grid 26 | 27 | for sig in folder.iterdir(): 28 | if not sig.is_symlink(): 29 | continue 30 | xp = main.get_xp_from_sig(sig) 31 | xp.link.load() 32 | if len(xp.link.history) != xp.cfg.epochs: 33 | continue 34 | sub = launcher.bind(xp.argv, [f'continue_from="{xp.sig}"']) 35 | sub.bind_({'ema.epoch': [0.9, 0.95], 'ema.batch': [0.9995, 0.9999]}) 36 | sub.bind_({'test.every': 1, 'test.sdr': True, 'epochs': 4}) 37 | sub.bind_({'dset.segment': 28, 'dset.shift': 2}) 38 | sub.bind_({'batch_size': 32}) 39 | auto = {'dset': 'auto_mus'} 40 | auto.update({'augment.remix.proba': 0, 'augment.scale.proba': 0, 41 | 'augment.shift_same': True}) 42 | sub.bind_(auto) 43 | sub.bind_({'batch_size': 16}) 44 | sub.bind_({'optim.lr': 1e-4}) 45 | sub.bind_({'model_segment': 44}) 46 | sub() 47 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/grids/sdx23.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from ._explorers import MyExplorer 8 | from dora import Launcher 9 | 10 | 11 | @MyExplorer 12 | def explorer(launcher: Launcher): 13 | launcher.slurm_(gpus=8, time=3 * 24 * 60, partition="speechgpt,learnfair", 14 | mem_per_gpu=None, constraint='') 15 | launcher.bind_({"dset.use_musdb": False}) 16 | 17 | with launcher.job_array(): 18 | launcher(dset='sdx23_bleeding') 19 | launcher(dset='sdx23_labelnoise') 20 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/pretrained.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """Loading pretrained models. 7 | """ 8 | 9 | import logging 10 | from pathlib import Path 11 | import typing as tp 12 | 13 | # from dora.log import fatal, bold 14 | 15 | from .hdemucs import HDemucs 16 | from .repo import RemoteRepo, LocalRepo, ModelOnlyRepo, BagOnlyRepo, AnyModelRepo, ModelLoadingError # noqa 17 | from .states import _check_diffq 18 | 19 | logger = logging.getLogger(__name__) 20 | ROOT_URL = "https://dl.fbaipublicfiles.com/demucs/" 21 | REMOTE_ROOT = Path(__file__).parent / 'remote' 22 | 23 | SOURCES = ["drums", "bass", "other", "vocals"] 24 | DEFAULT_MODEL = 'htdemucs' 25 | 26 | 27 | def demucs_unittest(): 28 | model = HDemucs(channels=4, sources=SOURCES) 29 | return model 30 | 31 | 32 | def add_model_flags(parser): 33 | group = parser.add_mutually_exclusive_group(required=False) 34 | group.add_argument("-s", "--sig", help="Locally trained XP signature.") 35 | group.add_argument("-n", "--name", default="htdemucs", 36 | help="Pretrained model name or signature. Default is htdemucs.") 37 | parser.add_argument("--repo", type=Path, 38 | help="Folder containing all pre-trained models for use with -n.") 39 | 40 | 41 | def _parse_remote_files(remote_file_list) -> tp.Dict[str, str]: 42 | root: str = '' 43 | models: tp.Dict[str, str] = {} 44 | for line in remote_file_list.read_text().split('\n'): 45 | line = line.strip() 46 | if line.startswith('#'): 47 | continue 48 | elif len(line) == 0: 49 | continue 50 | elif line.startswith('root:'): 51 | root = line.split(':', 1)[1].strip() 52 | else: 53 | sig = line.split('-', 1)[0] 54 | assert sig not in models 55 | models[sig] = ROOT_URL + root + line 56 | return models 57 | 58 | 59 | def get_model(name: str, 60 | repo: tp.Optional[Path] = None): 61 | """`name` must be a bag of models name or a pretrained signature 62 | from the remote AWS model repo or the specified local repo if `repo` is not None. 63 | """ 64 | if name == 'demucs_unittest': 65 | return demucs_unittest() 66 | model_repo: ModelOnlyRepo 67 | if repo is None: 68 | models = _parse_remote_files(REMOTE_ROOT / 'files.txt') 69 | model_repo = RemoteRepo(models) 70 | bag_repo = BagOnlyRepo(REMOTE_ROOT, model_repo) 71 | else: 72 | if not repo.is_dir(): 73 | # fatal(f"{repo} must exist and be a directory.") 74 | pass 75 | model_repo = LocalRepo(repo) 76 | bag_repo = BagOnlyRepo(repo, model_repo) 77 | any_repo = AnyModelRepo(model_repo, bag_repo) 78 | try: 79 | model = any_repo.get_model(name) 80 | except ImportError as exc: 81 | if 'diffq' in exc.args[0]: 82 | _check_diffq() 83 | raise 84 | 85 | model.eval() 86 | return model 87 | 88 | 89 | def get_model_from_args(args): 90 | """ 91 | Load local model package or pre-trained model. 92 | """ 93 | if args.name is None: 94 | args.name = DEFAULT_MODEL 95 | # print(bold("Important: the default model was recently changed to `htdemucs`"), 96 | # "the latest Hybrid Transformer Demucs model. In some cases, this model can " 97 | # "actually perform worse than previous models. To get back the old default model " 98 | # "use `-n mdx_extra_q`.") 99 | print("Important: the default model was recently changed to `htdemucs`", 100 | "the latest Hybrid Transformer Demucs model. In some cases, this model can " 101 | "actually perform worse than previous models. To get back the old default model " 102 | "use `-n mdx_extra_q`.") 103 | return get_model(name=args.name, repo=args.repo) 104 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/demucs/py.typed -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/files.txt: -------------------------------------------------------------------------------- 1 | # MDX Models 2 | root: mdx_final/ 3 | 0d19c1c6-0f06f20e.th 4 | 5d2d6c55-db83574e.th 5 | 7d865c68-3d5dd56b.th 6 | 7ecf8ec1-70f50cc9.th 7 | a1d90b5c-ae9d2452.th 8 | c511e2ab-fe698775.th 9 | cfa93e08-61801ae1.th 10 | e51eebcc-c1b80bdd.th 11 | 6b9c2ca1-3fd82607.th 12 | b72baf4e-8778635e.th 13 | 42e558d4-196e0e1b.th 14 | 305bc58f-18378783.th 15 | 14fc6a69-a89dd0ee.th 16 | 464b36d7-e5a9386e.th 17 | 7fd6ef75-a905dd85.th 18 | 83fc094f-4a16d450.th 19 | 1ef250f1-592467ce.th 20 | 902315c2-b39ce9c9.th 21 | 9a6b4851-03af0aa6.th 22 | fa0cb7f9-100d8bf4.th 23 | # Hybrid Transformer models 24 | root: hybrid_transformer/ 25 | 955717e8-8726e21a.th 26 | f7e0c4bc-ba3fe64a.th 27 | d12395a8-e57c48e6.th 28 | 92cfc3b6-ef3bcb9c.th 29 | 04573f0d-f3cf25b2.th 30 | 75fc33f5-1941ce65.th 31 | # Experimental 6 sources model 32 | 5c90dfd2-34c22ccb.th 33 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/hdemucs_mmi.yaml: -------------------------------------------------------------------------------- 1 | models: ['75fc33f5'] 2 | segment: 44 3 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/htdemucs.yaml: -------------------------------------------------------------------------------- 1 | models: ['955717e8'] 2 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/htdemucs_6s.yaml: -------------------------------------------------------------------------------- 1 | models: ['5c90dfd2'] 2 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/htdemucs_ft.yaml: -------------------------------------------------------------------------------- 1 | models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d'] 2 | weights: [ 3 | [1., 0., 0., 0.], 4 | [0., 1., 0., 0.], 5 | [0., 0., 1., 0.], 6 | [0., 0., 0., 1.], 7 | ] -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/mdx.yaml: -------------------------------------------------------------------------------- 1 | models: ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68'] 2 | weights: [ 3 | [1., 1., 0., 0.], 4 | [0., 1., 0., 0.], 5 | [1., 0., 1., 1.], 6 | [1., 0., 1., 1.], 7 | ] 8 | segment: 44 9 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/mdx_extra.yaml: -------------------------------------------------------------------------------- 1 | models: ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08'] 2 | segment: 44 -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/mdx_extra_q.yaml: -------------------------------------------------------------------------------- 1 | models: ['83fc094f', '464b36d7', '14fc6a69', '7fd6ef75'] 2 | segment: 44 3 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/mdx_q.yaml: -------------------------------------------------------------------------------- 1 | models: ['6b9c2ca1', 'b72baf4e', '42e558d4', '305bc58f'] 2 | weights: [ 3 | [1., 1., 0., 0.], 4 | [0., 1., 0., 0.], 5 | [1., 0., 1., 1.], 6 | [1., 0., 1., 1.], 7 | ] 8 | segment: 44 9 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/repro_mdx_a.yaml: -------------------------------------------------------------------------------- 1 | models: ['9a6b4851', '1ef250f1', 'fa0cb7f9', '902315c2'] 2 | segment: 44 3 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/repro_mdx_a_hybrid_only.yaml: -------------------------------------------------------------------------------- 1 | models: ['fa0cb7f9', '902315c2', 'fa0cb7f9', '902315c2'] 2 | segment: 44 3 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/remote/repro_mdx_a_time_only.yaml: -------------------------------------------------------------------------------- 1 | models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1'] 2 | segment: 44 3 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/repitch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """Utility for on the fly pitch/tempo change for data augmentation.""" 7 | 8 | import random 9 | import subprocess as sp 10 | import tempfile 11 | 12 | import torch 13 | import torchaudio as ta 14 | 15 | from .audio import save_audio 16 | 17 | 18 | class RepitchedWrapper: 19 | """ 20 | Wrap a dataset to apply online change of pitch / tempo. 21 | """ 22 | def __init__(self, dataset, proba=0.2, max_pitch=2, max_tempo=12, 23 | tempo_std=5, vocals=[3], same=True): 24 | self.dataset = dataset 25 | self.proba = proba 26 | self.max_pitch = max_pitch 27 | self.max_tempo = max_tempo 28 | self.tempo_std = tempo_std 29 | self.same = same 30 | self.vocals = vocals 31 | 32 | def __len__(self): 33 | return len(self.dataset) 34 | 35 | def __getitem__(self, index): 36 | streams = self.dataset[index] 37 | in_length = streams.shape[-1] 38 | out_length = int((1 - 0.01 * self.max_tempo) * in_length) 39 | 40 | if random.random() < self.proba: 41 | outs = [] 42 | for idx, stream in enumerate(streams): 43 | if idx == 0 or not self.same: 44 | delta_pitch = random.randint(-self.max_pitch, self.max_pitch) 45 | delta_tempo = random.gauss(0, self.tempo_std) 46 | delta_tempo = min(max(-self.max_tempo, delta_tempo), self.max_tempo) 47 | stream = repitch( 48 | stream, 49 | delta_pitch, 50 | delta_tempo, 51 | voice=idx in self.vocals) 52 | outs.append(stream[:, :out_length]) 53 | streams = torch.stack(outs) 54 | else: 55 | streams = streams[..., :out_length] 56 | return streams 57 | 58 | 59 | def repitch(wav, pitch, tempo, voice=False, quick=False, samplerate=44100): 60 | """ 61 | tempo is a relative delta in percentage, so tempo=10 means tempo at 110%! 62 | pitch is in semi tones. 63 | Requires `soundstretch` to be installed, see 64 | https://www.surina.net/soundtouch/soundstretch.html 65 | """ 66 | infile = tempfile.NamedTemporaryFile(suffix=".wav") 67 | outfile = tempfile.NamedTemporaryFile(suffix=".wav") 68 | save_audio(wav, infile.name, samplerate, clip='clamp') 69 | command = [ 70 | "soundstretch", 71 | infile.name, 72 | outfile.name, 73 | f"-pitch={pitch}", 74 | f"-tempo={tempo:.6f}", 75 | ] 76 | if quick: 77 | command += ["-quick"] 78 | if voice: 79 | command += ["-speech"] 80 | try: 81 | sp.run(command, capture_output=True, check=True) 82 | except sp.CalledProcessError as error: 83 | raise RuntimeError(f"Could not change bpm because {error.stderr.decode('utf-8')}") 84 | wav, sr = ta.load(outfile.name) 85 | assert sr == samplerate 86 | return wav 87 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/spec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """Conveniance wrapper to perform STFT and iSTFT""" 7 | 8 | import torch as th 9 | 10 | 11 | def spectro(x, n_fft=512, hop_length=None, pad=0): 12 | *other, length = x.shape 13 | x = x.reshape(-1, length) 14 | is_mps = x.device.type == 'mps' 15 | if is_mps: 16 | x = x.cpu() 17 | z = th.stft(x, 18 | n_fft * (1 + pad), 19 | hop_length or n_fft // 4, 20 | window=th.hann_window(n_fft).to(x), 21 | win_length=n_fft, 22 | normalized=True, 23 | center=True, 24 | return_complex=True, 25 | pad_mode='reflect') 26 | _, freqs, frame = z.shape 27 | return z.view(*other, freqs, frame) 28 | 29 | 30 | def ispectro(z, hop_length=None, length=None, pad=0): 31 | *other, freqs, frames = z.shape 32 | n_fft = 2 * freqs - 2 33 | z = z.view(-1, freqs, frames) 34 | win_length = n_fft // (1 + pad) 35 | is_mps = z.device.type == 'mps' 36 | if is_mps: 37 | z = z.cpu() 38 | x = th.istft(z, 39 | n_fft, 40 | hop_length, 41 | window=th.hann_window(win_length).to(z.real), 42 | win_length=win_length, 43 | normalized=True, 44 | length=length, 45 | center=True) 46 | _, length = x.shape 47 | return x.view(*other, length) 48 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/states.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """ 7 | Utilities to save and load models. 8 | """ 9 | from contextlib import contextmanager 10 | 11 | import functools 12 | import hashlib 13 | import inspect 14 | import io 15 | from pathlib import Path 16 | import warnings 17 | 18 | from omegaconf import OmegaConf 19 | # from dora.log import fatal 20 | import torch 21 | 22 | 23 | def _check_diffq(): 24 | try: 25 | import diffq # noqa 26 | except ImportError: 27 | # fatal('Trying to use DiffQ, but diffq is not installed.\n' 28 | # 'On Windows run: python.exe -m pip install diffq \n' 29 | # 'On Linux/Mac, run: python3 -m pip install diffq') 30 | pass 31 | 32 | 33 | def get_quantizer(model, args, optimizer=None): 34 | """Return the quantizer given the XP quantization args.""" 35 | quantizer = None 36 | if args.diffq: 37 | _check_diffq() 38 | from diffq import DiffQuantizer 39 | quantizer = DiffQuantizer( 40 | model, min_size=args.min_size, group_size=args.group_size) 41 | if optimizer is not None: 42 | quantizer.setup_optimizer(optimizer) 43 | elif args.qat: 44 | _check_diffq() 45 | from diffq import UniformQuantizer 46 | quantizer = UniformQuantizer( 47 | model, bits=args.qat, min_size=args.min_size) 48 | return quantizer 49 | 50 | 51 | def load_model(path_or_package, strict=False): 52 | """Load a model from the given serialized model, either given as a dict (already loaded) 53 | or a path to a file on disk.""" 54 | if isinstance(path_or_package, dict): 55 | package = path_or_package 56 | elif isinstance(path_or_package, (str, Path)): 57 | with warnings.catch_warnings(): 58 | warnings.simplefilter("ignore") 59 | path = path_or_package 60 | package = torch.load(path, 'cpu') 61 | else: 62 | raise ValueError(f"Invalid type for {path_or_package}.") 63 | 64 | klass = package["klass"] 65 | args = package["args"] 66 | kwargs = package["kwargs"] 67 | 68 | if strict: 69 | model = klass(*args, **kwargs) 70 | else: 71 | sig = inspect.signature(klass) 72 | for key in list(kwargs): 73 | if key not in sig.parameters: 74 | warnings.warn("Dropping inexistant parameter " + key) 75 | del kwargs[key] 76 | model = klass(*args, **kwargs) 77 | 78 | state = package["state"] 79 | 80 | set_state(model, state) 81 | return model 82 | 83 | 84 | def get_state(model, quantizer, half=False): 85 | """Get the state from a model, potentially with quantization applied. 86 | If `half` is True, model are stored as half precision, which shouldn't impact performance 87 | but half the state size.""" 88 | if quantizer is None: 89 | dtype = torch.half if half else None 90 | state = {k: p.data.to(device='cpu', dtype=dtype) for k, p in model.state_dict().items()} 91 | else: 92 | state = quantizer.get_quantized_state() 93 | state['__quantized'] = True 94 | return state 95 | 96 | 97 | def set_state(model, state, quantizer=None): 98 | """Set the state on a given model.""" 99 | if state.get('__quantized'): 100 | if quantizer is not None: 101 | quantizer.restore_quantized_state(model, state['quantized']) 102 | else: 103 | _check_diffq() 104 | from diffq import restore_quantized_state 105 | restore_quantized_state(model, state) 106 | else: 107 | model.load_state_dict(state) 108 | return state 109 | 110 | 111 | def save_with_checksum(content, path): 112 | """Save the given value on disk, along with a sha256 hash. 113 | Should be used with the output of either `serialize_model` or `get_state`.""" 114 | buf = io.BytesIO() 115 | torch.save(content, buf) 116 | sig = hashlib.sha256(buf.getvalue()).hexdigest()[:8] 117 | 118 | path = path.parent / (path.stem + "-" + sig + path.suffix) 119 | path.write_bytes(buf.getvalue()) 120 | 121 | 122 | def serialize_model(model, training_args, quantizer=None, half=True): 123 | args, kwargs = model._init_args_kwargs 124 | klass = model.__class__ 125 | 126 | state = get_state(model, quantizer, half) 127 | return { 128 | 'klass': klass, 129 | 'args': args, 130 | 'kwargs': kwargs, 131 | 'state': state, 132 | 'training_args': OmegaConf.to_container(training_args, resolve=True), 133 | } 134 | 135 | 136 | def copy_state(state): 137 | return {k: v.cpu().clone() for k, v in state.items()} 138 | 139 | 140 | @contextmanager 141 | def swap_state(model, state): 142 | """ 143 | Context manager that swaps the state of a model, e.g: 144 | 145 | # model is in old state 146 | with swap_state(model, new_state): 147 | # model in new state 148 | # model back to old state 149 | """ 150 | old_state = copy_state(model.state_dict()) 151 | model.load_state_dict(state, strict=False) 152 | try: 153 | yield 154 | finally: 155 | model.load_state_dict(old_state) 156 | 157 | 158 | def capture_init(init): 159 | @functools.wraps(init) 160 | def __init__(self, *args, **kwargs): 161 | self._init_args_kwargs = (args, kwargs) 162 | init(self, *args, **kwargs) 163 | 164 | return __init__ 165 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/svd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | """Ways to make the model stronger.""" 7 | import random 8 | import torch 9 | 10 | 11 | def power_iteration(m, niters=1, bs=1): 12 | """This is the power method. batch size is used to try multiple starting point in parallel.""" 13 | assert m.dim() == 2 14 | assert m.shape[0] == m.shape[1] 15 | dim = m.shape[0] 16 | b = torch.randn(dim, bs, device=m.device, dtype=m.dtype) 17 | 18 | for _ in range(niters): 19 | n = m.mm(b) 20 | norm = n.norm(dim=0, keepdim=True) 21 | b = n / (1e-10 + norm) 22 | 23 | return norm.mean() 24 | 25 | 26 | # We need a shared RNG to make sure all the distributed worker will skip the penalty together, 27 | # as otherwise we wouldn't get any speed up. 28 | penalty_rng = random.Random(1234) 29 | 30 | 31 | def svd_penalty(model, min_size=0.1, dim=1, niters=2, powm=False, convtr=True, 32 | proba=1, conv_only=False, exact=False, bs=1): 33 | """ 34 | Penalty on the largest singular value for a layer. 35 | Args: 36 | - model: model to penalize 37 | - min_size: minimum size in MB of a layer to penalize. 38 | - dim: projection dimension for the svd_lowrank. Higher is better but slower. 39 | - niters: number of iterations in the algorithm used by svd_lowrank. 40 | - powm: use power method instead of lowrank SVD, my own experience 41 | is that it is both slower and less stable. 42 | - convtr: when True, differentiate between Conv and Transposed Conv. 43 | this is kept for compatibility with older experiments. 44 | - proba: probability to apply the penalty. 45 | - conv_only: only apply to conv and conv transposed, not LSTM 46 | (might not be reliable for other models than Demucs). 47 | - exact: use exact SVD (slow but useful at validation). 48 | - bs: batch_size for power method. 49 | """ 50 | total = 0 51 | if penalty_rng.random() > proba: 52 | return 0. 53 | 54 | for m in model.modules(): 55 | for name, p in m.named_parameters(recurse=False): 56 | if p.numel() / 2**18 < min_size: 57 | continue 58 | if convtr: 59 | if isinstance(m, (torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d)): 60 | if p.dim() in [3, 4]: 61 | p = p.transpose(0, 1).contiguous() 62 | if p.dim() == 3: 63 | p = p.view(len(p), -1) 64 | elif p.dim() == 4: 65 | p = p.view(len(p), -1) 66 | elif p.dim() == 1: 67 | continue 68 | elif conv_only: 69 | continue 70 | assert p.dim() == 2, (name, p.shape) 71 | if exact: 72 | estimate = torch.svd(p, compute_uv=False)[1].pow(2).max() 73 | elif powm: 74 | a, b = p.shape 75 | if a < b: 76 | n = p.mm(p.t()) 77 | else: 78 | n = p.t().mm(p) 79 | estimate = power_iteration(n, niters, bs) 80 | else: 81 | estimate = torch.svd_lowrank(p, dim, niters)[1][0].pow(2) 82 | total += estimate 83 | return total / proba 84 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from collections import defaultdict 8 | from concurrent.futures import CancelledError 9 | from contextlib import contextmanager 10 | import math 11 | import os 12 | import tempfile 13 | import typing as tp 14 | 15 | import torch 16 | from torch.nn import functional as F 17 | from torch.utils.data import Subset 18 | 19 | 20 | def unfold(a, kernel_size, stride): 21 | """Given input of size [*OT, T], output Tensor of size [*OT, F, K] 22 | with K the kernel size, by extracting frames with the given stride. 23 | 24 | This will pad the input so that `F = ceil(T / K)`. 25 | 26 | see https://github.com/pytorch/pytorch/issues/60466 27 | """ 28 | *shape, length = a.shape 29 | n_frames = math.ceil(length / stride) 30 | tgt_length = (n_frames - 1) * stride + kernel_size 31 | a = F.pad(a, (0, tgt_length - length)) 32 | strides = list(a.stride()) 33 | assert strides[-1] == 1, 'data should be contiguous' 34 | strides = strides[:-1] + [stride, 1] 35 | return a.as_strided([*shape, n_frames, kernel_size], strides) 36 | 37 | 38 | def center_trim(tensor: torch.Tensor, reference: tp.Union[torch.Tensor, int]): 39 | """ 40 | Center trim `tensor` with respect to `reference`, along the last dimension. 41 | `reference` can also be a number, representing the length to trim to. 42 | If the size difference != 0 mod 2, the extra sample is removed on the right side. 43 | """ 44 | ref_size: int 45 | if isinstance(reference, torch.Tensor): 46 | ref_size = reference.size(-1) 47 | else: 48 | ref_size = reference 49 | delta = tensor.size(-1) - ref_size 50 | if delta < 0: 51 | raise ValueError("tensor must be larger than reference. " f"Delta is {delta}.") 52 | if delta: 53 | tensor = tensor[..., delta // 2:-(delta - delta // 2)] 54 | return tensor 55 | 56 | 57 | def pull_metric(history: tp.List[dict], name: str): 58 | out = [] 59 | for metrics in history: 60 | metric = metrics 61 | for part in name.split("."): 62 | metric = metric[part] 63 | out.append(metric) 64 | return out 65 | 66 | 67 | def EMA(beta: float = 1): 68 | """ 69 | Exponential Moving Average callback. 70 | Returns a single function that can be called to repeatidly update the EMA 71 | with a dict of metrics. The callback will return 72 | the new averaged dict of metrics. 73 | 74 | Note that for `beta=1`, this is just plain averaging. 75 | """ 76 | fix: tp.Dict[str, float] = defaultdict(float) 77 | total: tp.Dict[str, float] = defaultdict(float) 78 | 79 | def _update(metrics: dict, weight: float = 1) -> dict: 80 | nonlocal total, fix 81 | for key, value in metrics.items(): 82 | total[key] = total[key] * beta + weight * float(value) 83 | fix[key] = fix[key] * beta + weight 84 | return {key: tot / fix[key] for key, tot in total.items()} 85 | return _update 86 | 87 | 88 | def sizeof_fmt(num: float, suffix: str = 'B'): 89 | """ 90 | Given `num` bytes, return human readable size. 91 | Taken from https://stackoverflow.com/a/1094933 92 | """ 93 | for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: 94 | if abs(num) < 1024.0: 95 | return "%3.1f%s%s" % (num, unit, suffix) 96 | num /= 1024.0 97 | return "%.1f%s%s" % (num, 'Yi', suffix) 98 | 99 | 100 | @contextmanager 101 | def temp_filenames(count: int, delete=True): 102 | names = [] 103 | try: 104 | for _ in range(count): 105 | names.append(tempfile.NamedTemporaryFile(delete=False).name) 106 | yield names 107 | finally: 108 | if delete: 109 | for name in names: 110 | os.unlink(name) 111 | 112 | 113 | def random_subset(dataset, max_samples: int, seed: int = 42): 114 | if max_samples >= len(dataset): 115 | return dataset 116 | 117 | generator = torch.Generator().manual_seed(seed) 118 | perm = torch.randperm(len(dataset), generator=generator) 119 | return Subset(dataset, perm[:max_samples].tolist()) 120 | 121 | 122 | class DummyPoolExecutor: 123 | class DummyResult: 124 | def __init__(self, func, _dict, *args, **kwargs): 125 | self.func = func 126 | self._dict = _dict 127 | self.args = args 128 | self.kwargs = kwargs 129 | 130 | def result(self): 131 | if self._dict["run"]: 132 | return self.func(*self.args, **self.kwargs) 133 | else: 134 | raise CancelledError() 135 | 136 | def __init__(self, workers=0): 137 | self._dict = {"run": True} 138 | 139 | def submit(self, func, *args, **kwargs): 140 | return DummyPoolExecutor.DummyResult(func, self._dict, *args, **kwargs) 141 | 142 | def shutdown(self, *_, **__): 143 | self._dict["run"] = False 144 | 145 | def __enter__(self): 146 | return self 147 | 148 | def __exit__(self, exc_type, exc_value, exc_tb): 149 | return 150 | -------------------------------------------------------------------------------- /src/models_dir/demucs/demucs/wdemucs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # For compat 7 | from .hdemucs import HDemucs 8 | 9 | WDemucs = HDemucs 10 | -------------------------------------------------------------------------------- /src/models_dir/demucs/docs/linux.md: -------------------------------------------------------------------------------- 1 | # Linux support for Demucs 2 | 3 | If your distribution has at least Python 3.8, and you just wish to separate 4 | tracks with Demucs, not train it, you can just run 5 | 6 | ```bash 7 | pip3 install --user -U demucs 8 | # Then anytime you want to use demucs, just do 9 | python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1 10 | # If you have added the user specific pip bin/ folder to your path, you can also do 11 | demucs -d cpu PATH_TO_AUDIO_FILE_1 12 | ``` 13 | 14 | If Python is too old, or you want to be able to train, I recommend [installing Miniconda][miniconda], with Python 3.8 or more. 15 | 16 | ```bash 17 | conda activate 18 | pip3 install -U demucs 19 | # Then anytime you want to use demucs, first do conda activate, then 20 | demucs -d cpu PATH_TO_AUDIO_FILE_1 21 | ``` 22 | 23 | Of course, you can also use a specific env for Demucs. 24 | 25 | **Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or as a distribution package (e.g. `sudo apt-get install ffmpeg`). 26 | 27 | 28 | [miniconda]: https://docs.conda.io/en/latest/miniconda.html#linux-installers 29 | -------------------------------------------------------------------------------- /src/models_dir/demucs/docs/mac.md: -------------------------------------------------------------------------------- 1 | # macOS support for Demucs 2 | 3 | If you have a sufficiently recent version of macOS, you can just run 4 | 5 | ```bash 6 | python3 -m pip install --user -U demucs 7 | # Then anytime you want to use demucs, just do 8 | python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1 9 | # If you have added the user specific pip bin/ folder to your path, you can also do 10 | demucs -d cpu PATH_TO_AUDIO_FILE_1 11 | ``` 12 | 13 | If you do not already have Anaconda installed or much experience with the terminal on macOS, here are some detailed instructions: 14 | 15 | 1. Download [Anaconda 3.8 (or more recent) 64-bit for macOS][anaconda]: 16 | 2. Open [Anaconda Prompt in macOS][prompt] 17 | 3. Follow these commands: 18 | ```bash 19 | conda activate 20 | pip3 install -U demucs 21 | # Then anytime you want to use demucs, first do conda activate, then 22 | demucs -d cpu PATH_TO_AUDIO_FILE_1 23 | ``` 24 | 25 | **Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or with Homebrew for instance (`brew install ffmpeg`). 26 | 27 | [anaconda]: https://www.anaconda.com/download 28 | [prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-nav-mac 29 | -------------------------------------------------------------------------------- /src/models_dir/demucs/docs/mdx.md: -------------------------------------------------------------------------------- 1 | # Music DemiXing challenge (MDX) 2 | 3 | If you want to use Demucs for the [MDX challenge](https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021), 4 | please follow the instructions hereafter 5 | 6 | ## Installing Demucs 7 | 8 | Follow the instructions from the [main README](https://github.com/facebookresearch/demucs#requirements) 9 | in order to setup Demucs using Anaconda. You will need the full setup up for training, including soundstretch. 10 | 11 | ## Getting MusDB-HQ 12 | 13 | Download [MusDB-HQ](https://zenodo.org/record/3338373) to some folder and unzip it. 14 | 15 | ## Training Demucs 16 | 17 | Train Demucs (you might need to change the batch size depending on the number of GPUs available). 18 | It seems 48 channels is enough to get the best performance on MusDB-HQ, and training will faster 19 | and less memory demanding. In any case, the 64 channels versions is timing out on the challenge. 20 | ```bash 21 | ./run.py --channels=48 --batch_size 64 --musdb=PATH_TO_MUSDB --is_wav [EXTRA_FLAGS] 22 | ``` 23 | 24 | ### Post training 25 | 26 | Once the training is completed, a new model file will be exported in `models/`. 27 | 28 | You can look at the SDR on the MusDB dataset using `python result_table.py`. 29 | 30 | 31 | ### Evaluate and export a model before training is over 32 | 33 | If you want to export a model before training is complete, use the following command: 34 | ```bash 35 | python -m demucs [ALL EXACT TRAINING FLAGS] --save_model 36 | ``` 37 | You can also pass the `--half` flag, in order to save weights in half precision. This will divide the model size by 2 and won't impact SDR. 38 | 39 | Once this is done, you can partially evaluate a model with 40 | ```bash 41 | ./run.py --test NAME_OF_MODEL.th --musdb=PATH_TO_MUSDB --is_wav 42 | ``` 43 | 44 | **Note:** `NAME_OF_MODEL.th` is given relative to the models folder (given by `--models`, defaults to `models/`), so don't include it in the name. 45 | 46 | 47 | ### Training smaller models 48 | 49 | If you want to quickly test idea, I would recommend training a 16 kHz model, and testing if things work there or not, before training the full 44kHz model. You can train one of those with 50 | ```bash 51 | ./run.py --channels=32 --samplerate 16000 --samples 160000 --data_stride 16000 --depth=5 --batch_size 64 --repitch=0 --musdb=PATH_TO_MUSDB --is_wav [EXTRA_FLAGS] 52 | ``` 53 | (repitch must be turned off, because things will break at 16kHz). 54 | 55 | ## Submitting your model 56 | 57 | 1. Git clone [the Music Demixing Challenge - Starter Kit - Demucs Edition](https://github.com/adefossez/music-demixing-challenge-starter-kit). 58 | 2. Inside the starter kit, create a `models/` folder and copy over the trained model from the Demucs repo (renaming 59 | it for instance `my_model.th`) 60 | 3. Inside the `test_demuc.py` file, change the function `prediction_setup`: comment the loading 61 | of the pre-trained model, and uncomment the code to load your own model. 62 | 4. Edit the file `aicrowd.json` with your username. 63 | 5. Install [git-lfs](https://git-lfs.github.com/). Then run 64 | 65 | ```bash 66 | git lfs install 67 | git add models/ 68 | git add -u . 69 | git commit -m "My Demucs submission" 70 | ``` 71 | 6. Follow the [submission instructions](https://github.com/AIcrowd/music-demixing-challenge-starter-kit/blob/master/docs/SUBMISSION.md). 72 | 73 | Best of luck 🤞 74 | -------------------------------------------------------------------------------- /src/models_dir/demucs/docs/release.md: -------------------------------------------------------------------------------- 1 | # Release notes for Demucs 2 | 3 | ## V4.1.0a1, TBD 4 | 5 | Get models list 6 | 7 | Check segment of HTDemucs inside BagOfModels 8 | 9 | Added api.py to be called from another program 10 | 11 | Use api in separate.py 12 | 13 | Added `--other-method`: method to get `no_{STEM}`, add up all the other stems (add), original track substract the specific stem (minus), and discard (none) 14 | 15 | Added type `HTDemucs` to type alias `AnyModel`. 16 | 17 | ## V4.0.1, 8th of September 2023 18 | 19 | **From this version, Python 3.7 is no longer supported. This is not a problem since the latest PyTorch 2.0.0 no longer support it either.** 20 | 21 | Various improvements by @CarlGao4. Support for `segment` param inside of HTDemucs 22 | model. 23 | 24 | Made diffq an optional dependency, with an error message if not installed. 25 | 26 | Added output format flac (Free Lossless Audio Codec) 27 | 28 | Will use CPU for complex numbers, when using MPS device (all other computations are performed by mps). 29 | 30 | Optimize codes to save memory 31 | 32 | Allow changing preset of MP3 33 | 34 | ## V4.0.0, 7th of December 2022 35 | 36 | Adding hybrid transformer Demucs model. 37 | 38 | Added support for [Torchaudio implementation of HDemucs](https://pytorch.org/audio/main/tutorials/hybrid_demucs_tutorial.html), thanks @skim0514. 39 | 40 | Added experimental 6 sources model `htdemucs_6s` (`drums`, `bass`, `other`, `vocals`, `piano`, `guitar`). 41 | 42 | ## V3.0.6, 16th of November 2022 43 | 44 | Option to customize output path of stems (@CarlGao4) 45 | 46 | Fixed bug in pad1d leading to failure sometimes. 47 | 48 | ## V3.0.5, 17th of August 2022 49 | 50 | Added `--segment` flag to customize the segment length and use less memory (thanks @CarlGao4). 51 | 52 | Fix reflect padding bug on small inputs. 53 | 54 | Compatible with pyTorch 1.12 55 | 56 | ## V3.0.4, 24th of February 2022 57 | 58 | Added option to split into two stems (i.e. vocals, vs. non vocals), thanks to @CarlGao4. 59 | 60 | Added `--float32`, `--int24` and `--clip-mode` options to customize how output stems are saved. 61 | 62 | ## V3.0.3, 2nd of December 2021 63 | 64 | Fix bug in weights used for different sources. Thanks @keunwoochoi for the report and fix. 65 | 66 | Improving drastically memory usage on GPU for long files. Thanks a lot @famzah for providing this. 67 | 68 | Adding multithread evaluation on CPU (`-j` option). 69 | 70 | (v3.0.2 had a bug with the CPU pool and is skipped.) 71 | 72 | ## V3.0.1, 12th of November 2021 73 | 74 | Release of Demucs v3, featuring hybrid domain separation and much more. 75 | This drops support for Conv-Tasnet and training on the non HQ MusDB dataset. 76 | There is no version 3.0.0 because I messed up. 77 | 78 | ## V2.0.2, 26th of May 2021 79 | 80 | - Fix in Tasnet (PR #178) 81 | - Use ffmpeg in priority when available instead of torchaudio to avoid small shift in MP3 data. 82 | - other minor fixes 83 | 84 | ## v2.0.1, 11th of May 2021 85 | 86 | MusDB HQ support added. Custom wav dataset support added. 87 | Minor changes: issue with padding of mp3 and torchaudio reading, in order to limit that, 88 | Demucs now uses ffmpeg in priority and fallback to torchaudio. 89 | Replaced pre-trained demucs model with one trained on more recent codebase. 90 | 91 | ## v2.0.0, 28th of April 2021 92 | 93 | This is a big release, with at lof of breaking changes. You will likely 94 | need to install Demucs from scratch. 95 | 96 | 97 | 98 | - Demucs now supports on the fly resampling by a factor of 2. 99 | This improves SDR almost 0.3 points. 100 | - Random scaling of each source added (From Uhlich et al. 2017). 101 | - Random pitch and tempo augmentation addded, from [Cohen-Hadria et al. 2019]. 102 | - With extra augmentation, the best performing Demucs model now has only 64 channels 103 | instead of 100, so model size goes from 2.4GB to 1GB. Also SDR is up from 5.6 SDR to 6.3 when trained only on MusDB. 104 | - Quantized model using [DiffQ](https://github.com/facebookresearch/diffq) has been added. Model size is 150MB, no loss in quality as far as I, or the metrics, 105 | can say. 106 | - Pretrained models are now using the TorchHub interface. 107 | - Overlap mode for separation, to limit inconsitencies at 108 | frame boundaries, with linear transition over the overlap. Overlap is currently 109 | at 25%. Not that this is only done for separation, not training, because 110 | I added that quite late to the code. For Conv-TasNet this can improve 111 | SDR quite a bit (+0.3 points, to 6.0). 112 | - PyPI hosting, for separation, not training! 113 | -------------------------------------------------------------------------------- /src/models_dir/demucs/docs/sdx23.md: -------------------------------------------------------------------------------- 1 | # SDX 23 challenge 2 | 3 | Checkout [the challenge page](https://www.aicrowd.com/challenges/sound-demixing-challenge-2023) 4 | for more information. This page is specifically on training models for the [MDX'23 sub-challenge](https://www.aicrowd.com/challenges/sound-demixing-challenge-2023/problems/music-demixing-track-mdx-23). 5 | There are two tracks: one trained on a dataset with bleeding, and the other with label mixups. 6 | 7 | This gives instructions on training an Hybrid Demucs model on those datasets. 8 | I haven't tried the HT Demucs model, as it typically requires quite a bit of training data but the same could be done with it. 9 | 10 | You will need to work from an up to date clone of this repo. See the [generic training instructions](./training.md) for more information. 11 | 12 | ## Getting the data 13 | 14 | Register on the challenge, then checkout the [Resources page](https://www.aicrowd.com/challenges/sound-demixing-challenge-2023/problems/music-demixing-track-mdx-23/dataset_files) and download the dataset you are 15 | interested in. 16 | 17 | Update the `conf/dset/sdx23_bleeding.yaml` and `conf/dset/sdx23_labelnoise.yaml` files to point to the right path. 18 | 19 | **Make sure soundfile** is installed (`conda install -c conda-forge libsndfile; pip install soundfile`). 20 | 21 | ### Create proper train / valid structure 22 | 23 | Demucs requires a valid set to work properly. Go to the folder where you extracted the tracks then do: 24 | 25 | ```shell 26 | mkdir train 27 | mv * train # there will be a warning saying cannot move train to itself but that's fine the other tracks should have. 28 | mkdir valid 29 | cd train 30 | mv 5640831d-7853-4d06-8166-988e2844b652 bc964128-da16-4e4c-af95-4d1211e78c70 \ 31 | cc7f7675-d3c8-4a49-a2d7-a8959b694004 f40ffd10-4e8b-41e6-bd8a-971929ca9138 \ 32 | bc1f2967-f834-43bd-aadc-95afc897cfe7 cc3e4991-6cce-40fe-a917-81a4fbb92ea6 \ 33 | ed90a89a-bf22-444d-af3d-d9ac3896ebd2 f4b735de-14b1-4091-a9ba-c8b30c0740a7 ../valid 34 | ``` 35 | 36 | ## Training 37 | 38 | See `dora grid sdx23` for a starting point. You can do `dora grid sdx23 --init --dry_run` then `dora run -f SIG -d` with `SIG` one of the signature 39 | to train on a machine with GPUs if you do not have a SLURM cluster. 40 | 41 | Keep in mind that the valid tracks and train tracks are corrupted in different ways for those tasks, so do not expect 42 | the valid loss to go down as smoothly as with normal training on the clean MusDB. 43 | 44 | I only trained Hybrid Demucs baselines as Hybrid Transformer typically requires more data. 45 | 46 | 47 | ## Exporting models 48 | 49 | Run 50 | ``` 51 | python -m tools.export SIG 52 | ``` 53 | 54 | This will export the trained model into the `release_models` folder. 55 | 56 | ## Submitting a model 57 | 58 | Clone the [Demucs Starter Kit for SDX23](https://github.com/adefossez/sdx23). Follow the instructions there. 59 | 60 | You will to copy the models under `release_models` in the `sdx23/models/` folder before you can use them. 61 | Make sure you have git-lfs properly installed and setup before adding those files to your fork of `sdx23`. 62 | -------------------------------------------------------------------------------- /src/models_dir/demucs/docs/windows.md: -------------------------------------------------------------------------------- 1 | # Windows support for Demucs 2 | 3 | ## Installation and usage 4 | 5 | If you don't have much experience with Anaconda, python or the shell, here are more detailed instructions. Note that **Demucs is not supported on 32bits systems** (as Pytorch is not available there). 6 | 7 | - First install Anaconda with **Python 3.8** or more recent, which you can find [here][install]. 8 | - Start the [Anaconda prompt][prompt]. 9 | 10 | Then, all commands that follow must be run from this prompt. 11 | 12 |
13 | I have no coding experience and these are too difficult for me 14 | 15 | > Then a GUI is suitable for you. See [Demucs GUI](https://github.com/CarlGao4/Demucs-Gui) 16 | 17 |
18 | 19 | ### If you want to use your GPU 20 | 21 | If you have graphic cards produced by NVIDIA with more than 2GiB of memory, you can separate tracks with GPU acceleration. To achieve this, you must install Pytorch with CUDA. If Pytorch was already installed (you already installed Demucs for instance), first run `python.exe -m pip uninstall torch torchaudio`. 22 | Then visit [Pytorch Home Page](https://pytorch.org/get-started/locally/) and follow the guide on it to install with CUDA support. Please make sure that the version of torchaudio should no greater than 2.1 (which is the latest version when this document is written, but 2.2.0 is sure unsupported) 23 | 24 | ### Installation 25 | 26 | Start the Anaconda prompt, and run the following 27 | 28 | ```cmd 29 | conda install -c conda-forge ffmpeg 30 | python.exe -m pip install -U demucs SoundFile 31 | ``` 32 | 33 | ### Upgrade 34 | 35 | To upgrade Demucs, simply run `python.exe -m pip install -U demucs`, from the Anaconda prompt. 36 | 37 | ### Usage 38 | 39 | Then to use Demucs, just start the **Anaconda prompt** and run: 40 | ``` 41 | demucs -d cpu "PATH_TO_AUDIO_FILE_1" ["PATH_TO_AUDIO_FILE_2" ...] 42 | ``` 43 | The `"` around the filename are required if the path contains spaces. A simple way to input these paths is draging a file from a folder into the terminal. 44 | 45 | To find out the separated files, you can run this command and open the folders: 46 | ``` 47 | explorer separated 48 | ``` 49 | 50 | ### Separating an entire folder 51 | 52 | You can use the following command to separate an entire folder of mp3s for instance (replace the extension `.mp3` if needs be for other file types) 53 | ``` 54 | cd FOLDER 55 | for %i in (*.mp3) do (demucs -d cpu "%i") 56 | ``` 57 | 58 | ## Potential errors 59 | 60 | If you have an error saying that `mkl_intel_thread.dll` cannot be found, you can try to first run 61 | `conda install -c defaults intel-openmp -f`. Then try again to run the `demucs` command. If it still doesn't work, you can try to run first `set CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1`, then again the `demucs` command and hopefully it will work 🙏. 62 | 63 | **If you get a permission error**, please try starting the Anaconda Prompt as administrator. 64 | 65 | 66 | [install]: https://www.anaconda.com/download 67 | [prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-prompt-win 68 | -------------------------------------------------------------------------------- /src/models_dir/demucs/environment-cpu.yml: -------------------------------------------------------------------------------- 1 | name: demucs 2 | 3 | channels: 4 | - pytorch 5 | - conda-forge 6 | 7 | dependencies: 8 | - python>=3.8,<3.10 9 | - ffmpeg>=4.2 10 | - pytorch>=1.8.1 11 | - torchaudio>=0.8 12 | - tqdm>=4.36 13 | - pip 14 | - pip: 15 | - diffq>=0.2 16 | - dora-search 17 | - einops 18 | - hydra-colorlog>=1.1 19 | - hydra-core>=1.1 20 | - julius>=0.2.3 21 | - lameenc>=1.2 22 | - openunmix 23 | - musdb>=0.4.0 24 | - museval>=0.4.0 25 | - soundfile 26 | - submitit 27 | - treetable>=0.2.3 28 | 29 | -------------------------------------------------------------------------------- /src/models_dir/demucs/environment-cuda.yml: -------------------------------------------------------------------------------- 1 | name: demucs 2 | 3 | channels: 4 | - pytorch 5 | - conda-forge 6 | 7 | dependencies: 8 | - python>=3.8,<3.10 9 | - ffmpeg>=4.2 10 | - pytorch>=1.8.1 11 | - torchaudio>=0.8 12 | - cudatoolkit>=10 13 | - tqdm>=4.36 14 | - pip 15 | - pip: 16 | - diffq>=0.2 17 | - dora-search 18 | - einops 19 | - hydra-colorlog>=1.1 20 | - hydra-core>=1.1 21 | - julius>=0.2.3 22 | - lameenc>=1.2 23 | - openunmix 24 | - musdb>=0.4.0 25 | - museval>=0.4.0 26 | - soundfile 27 | - submitit 28 | - treetable>=0.2.3 29 | -------------------------------------------------------------------------------- /src/models_dir/demucs/hubconf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | dependencies = ['dora-search', 'julius', 'lameenc', 'openunmix', 'pyyaml', 8 | 'torch', 'torchaudio', 'tqdm'] 9 | 10 | from demucs.pretrained import get_model 11 | 12 | -------------------------------------------------------------------------------- /src/models_dir/demucs/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | [mypy-treetable,torchaudio.*,diffq,yaml,tqdm,lameenc,musdb,museval,openunmix.*,einops,xformers.*] 4 | ignore_missing_imports = True 5 | 6 | -------------------------------------------------------------------------------- /src/models_dir/demucs/outputs.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/outputs.tar.gz -------------------------------------------------------------------------------- /src/models_dir/demucs/requirements.txt: -------------------------------------------------------------------------------- 1 | # please make sure you have already a pytorch install that is cuda enabled! 2 | dora-search>=0.1.12 3 | diffq>=0.2.1 4 | einops 5 | flake8 6 | hydra-colorlog>=1.1 7 | hydra-core>=1.1 8 | julius>=0.2.3 9 | lameenc>=1.2 10 | museval 11 | mypy 12 | openunmix 13 | pyyaml 14 | submitit 15 | torch>=1.8.1 16 | torchaudio>=0.8,<2.1 17 | tqdm 18 | treetable 19 | soundfile>=0.10.3;sys_platform=="win32" 20 | -------------------------------------------------------------------------------- /src/models_dir/demucs/requirements_minimal.txt: -------------------------------------------------------------------------------- 1 | # please make sure you have already a pytorch install that is cuda enabled! 2 | dora-search 3 | einops 4 | julius>=0.2.3 5 | lameenc>=1.2 6 | openunmix 7 | pyyaml 8 | torch>=1.8.1 9 | torchaudio>=0.8,<2.1 10 | tqdm 11 | -------------------------------------------------------------------------------- /src/models_dir/demucs/setup.cfg: -------------------------------------------------------------------------------- 1 | [pep8] 2 | max-line-length = 100 3 | 4 | [flake8] 5 | max-line-length = 100 6 | 7 | [yapf] 8 | column_limit = 100 9 | -------------------------------------------------------------------------------- /src/models_dir/demucs/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # author: adefossez 7 | # Inspired from https://github.com/kennethreitz/setup.py 8 | 9 | from pathlib import Path 10 | 11 | from setuptools import setup 12 | 13 | 14 | NAME = 'demucs' 15 | DESCRIPTION = 'Music source separation in the waveform domain.' 16 | 17 | URL = 'https://github.com/facebookresearch/demucs' 18 | EMAIL = 'defossez@fb.com' 19 | AUTHOR = 'Alexandre Défossez' 20 | REQUIRES_PYTHON = '>=3.8.0' 21 | 22 | HERE = Path(__file__).parent 23 | 24 | # Get version without explicitely loading the module. 25 | for line in open('demucs/__init__.py'): 26 | line = line.strip() 27 | if '__version__' in line: 28 | context = {} 29 | exec(line, context) 30 | VERSION = context['__version__'] 31 | 32 | 33 | def load_requirements(name): 34 | required = [i.strip() for i in open(HERE / name)] 35 | required = [i for i in required if not i.startswith('#')] 36 | return required 37 | 38 | 39 | REQUIRED = load_requirements('requirements_minimal.txt') 40 | ALL_REQUIRED = load_requirements('requirements.txt') 41 | 42 | try: 43 | with open(HERE / "README.md", encoding='utf-8') as f: 44 | long_description = '\n' + f.read() 45 | except FileNotFoundError: 46 | long_description = DESCRIPTION 47 | 48 | setup( 49 | name=NAME, 50 | version=VERSION, 51 | description=DESCRIPTION, 52 | long_description=long_description, 53 | long_description_content_type='text/markdown', 54 | author=AUTHOR, 55 | author_email=EMAIL, 56 | python_requires=REQUIRES_PYTHON, 57 | url=URL, 58 | packages=['demucs'], 59 | extras_require={ 60 | 'dev': ALL_REQUIRED, 61 | }, 62 | install_requires=REQUIRED, 63 | include_package_data=True, 64 | entry_points={ 65 | 'console_scripts': ['demucs=demucs.separate:main'], 66 | }, 67 | license='MIT License', 68 | classifiers=[ 69 | # Trove classifiers 70 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 71 | 'License :: OSI Approved :: MIT License', 72 | 'Topic :: Multimedia :: Sound/Audio', 73 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 74 | ], 75 | ) 76 | -------------------------------------------------------------------------------- /src/models_dir/demucs/test.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/test.mp3 -------------------------------------------------------------------------------- /src/models_dir/demucs/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /src/models_dir/demucs/tools/bench.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """ 8 | benchmarking script, useful to check for OOM, reasonable train time, 9 | and for the MDX competion, estimate if we will match the time limit.""" 10 | from contextlib import contextmanager 11 | import logging 12 | import sys 13 | import time 14 | import torch 15 | 16 | from demucs.train import get_solver, main 17 | from demucs.apply import apply_model 18 | 19 | logging.basicConfig(level=logging.INFO, stream=sys.stderr) 20 | 21 | 22 | class Result: 23 | pass 24 | 25 | 26 | @contextmanager 27 | def bench(): 28 | import gc 29 | gc.collect() 30 | torch.cuda.reset_max_memory_allocated() 31 | torch.cuda.empty_cache() 32 | result = Result() 33 | # before = torch.cuda.memory_allocated() 34 | before = 0 35 | begin = time.time() 36 | try: 37 | yield result 38 | finally: 39 | torch.cuda.synchronize() 40 | mem = (torch.cuda.max_memory_allocated() - before) / 2 ** 20 41 | tim = time.time() - begin 42 | result.mem = mem 43 | result.tim = tim 44 | 45 | 46 | xp = main.get_xp_from_sig(sys.argv[1]) 47 | xp = main.get_xp(xp.argv + sys.argv[2:]) 48 | with xp.enter(): 49 | solver = get_solver(xp.cfg) 50 | if getattr(solver.model, 'use_train_segment', False): 51 | batch = solver.augment(next(iter(solver.loaders['train']))) 52 | solver.model.segment = Fraction(batch.shape[-1], solver.model.samplerate) 53 | train_segment = solver.model.segment 54 | solver.model.eval() 55 | model = solver.model 56 | model.cuda() 57 | x = torch.randn(2, xp.cfg.dset.channels, int(10 * model.samplerate), device='cuda') 58 | with bench() as res: 59 | y = model(x) 60 | y.sum().backward() 61 | del y 62 | for p in model.parameters(): 63 | p.grad = None 64 | print(f"FB: {res.mem:.1f} MB, {res.tim * 1000:.1f} ms") 65 | 66 | x = torch.randn(1, xp.cfg.dset.channels, int(model.segment * model.samplerate), device='cuda') 67 | with bench() as res: 68 | with torch.no_grad(): 69 | y = model(x) 70 | del y 71 | print(f"FV: {res.mem:.1f} MB, {res.tim * 1000:.1f} ms") 72 | 73 | model.cpu() 74 | torch.set_num_threads(1) 75 | test = torch.randn(1, xp.cfg.dset.channels, model.samplerate * 40) 76 | b = time.time() 77 | apply_model(model, test, split=True, shifts=1) 78 | print("CPU 40 sec:", time.time() - b) 79 | -------------------------------------------------------------------------------- /src/models_dir/demucs/tools/convert.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # Script to convert option names and model args from the dev branch to 8 | # the cleanup release one. There should be no reaso to use that anymore. 9 | 10 | import argparse 11 | import io 12 | import json 13 | from pathlib import Path 14 | import subprocess as sp 15 | 16 | import torch 17 | 18 | from demucs import train, pretrained, states 19 | 20 | DEV_REPO = Path.home() / 'tmp/release_demucs_mdx' 21 | 22 | 23 | TO_REMOVE = [ 24 | 'demucs.dconv_kw.gelu=True', 25 | 'demucs.dconv_kw.nfreqs=0', 26 | 'demucs.dconv_kw.nfreqs=0', 27 | 'demucs.dconv_kw.version=4', 28 | 'demucs.norm=gn', 29 | 'wdemucs.nice=True', 30 | 'wdemucs.good=True', 31 | 'wdemucs.freq_emb=-0.2', 32 | 'special=True', 33 | 'special=False', 34 | ] 35 | 36 | TO_REPLACE = [ 37 | ('power', 'svd'), 38 | ('wdemucs', 'hdemucs'), 39 | ('hdemucs.hybrid=True', 'hdemucs.hybrid_old=True'), 40 | ('hdemucs.hybrid=2', 'hdemucs.hybrid=True'), 41 | ] 42 | 43 | TO_INJECT = [ 44 | ('model=hdemucs', ['hdemucs.cac=False']), 45 | ('model=hdemucs', ['hdemucs.norm_starts=999']), 46 | ] 47 | 48 | 49 | def get_original_argv(sig): 50 | return json.load(open(Path(DEV_REPO) / f'outputs/xps/{sig}/.argv.json')) 51 | 52 | 53 | def transform(argv, mappings, verbose=False): 54 | for rm in TO_REMOVE: 55 | while rm in argv: 56 | argv.remove(rm) 57 | 58 | for old, new in TO_REPLACE: 59 | argv[:] = [a.replace(old, new) for a in argv] 60 | 61 | for condition, args in TO_INJECT: 62 | if condition in argv: 63 | argv[:] = args + argv 64 | 65 | for idx, arg in enumerate(argv): 66 | if 'continue_from=' in arg: 67 | dep_sig = arg.split('=')[1] 68 | if dep_sig.startswith('"'): 69 | dep_sig = eval(dep_sig) 70 | if verbose: 71 | print("Need to recursively convert dependency XP", dep_sig) 72 | new_sig = convert(dep_sig, mappings, verbose).sig 73 | argv[idx] = f'continue_from="{new_sig}"' 74 | 75 | 76 | def convert(sig, mappings, verbose=False): 77 | argv = get_original_argv(sig) 78 | if verbose: 79 | print("Original argv", argv) 80 | transform(argv, mappings, verbose) 81 | if verbose: 82 | print("New argv", argv) 83 | xp = train.main.get_xp(argv) 84 | train.main.init_xp(xp) 85 | if verbose: 86 | print("Mapping", sig, "->", xp.sig) 87 | mappings[sig] = xp.sig 88 | return xp 89 | 90 | 91 | def _eval_old(old_sig, x): 92 | script = ( 93 | 'from demucs import pretrained; import torch; import sys; import io; ' 94 | 'buf = io.BytesIO(sys.stdin.buffer.read()); ' 95 | 'x = torch.load(buf); m = pretrained.load_pretrained_model(' 96 | f'"{old_sig}"); torch.save(m(x), sys.stdout.buffer)') 97 | 98 | buf = io.BytesIO() 99 | torch.save(x, buf) 100 | proc = sp.run( 101 | ['python3', '-c', script], input=buf.getvalue(), capture_output=True, cwd=DEV_REPO) 102 | if proc.returncode != 0: 103 | print("Error", proc.stderr.decode()) 104 | assert False 105 | 106 | buf = io.BytesIO(proc.stdout) 107 | return torch.load(buf) 108 | 109 | 110 | def compare(old_sig, model): 111 | test = torch.randn(1, 2, 44100 * 10) 112 | old_out = _eval_old(old_sig, test) 113 | out = model(test) 114 | 115 | delta = 20 * torch.log10((out - old_out).norm() / out.norm()).item() 116 | return delta 117 | 118 | 119 | def main(): 120 | torch.manual_seed(1234) 121 | parser = argparse.ArgumentParser('convert') 122 | parser.add_argument('sigs', nargs='*') 123 | parser.add_argument('-o', '--output', type=Path, default=Path('release_models')) 124 | parser.add_argument('-d', '--dump', action='store_true') 125 | parser.add_argument('-c', '--compare', action='store_true') 126 | parser.add_argument('-v', '--verbose', action='store_true') 127 | args = parser.parse_args() 128 | 129 | args.output.mkdir(exist_ok=True, parents=True) 130 | mappings = {} 131 | for sig in args.sigs: 132 | xp = convert(sig, mappings, args.verbose) 133 | if args.dump or args.compare: 134 | old_pkg = pretrained._load_package(sig, old=True) 135 | model = train.get_model(xp.cfg) 136 | model.load_state_dict(old_pkg['state']) 137 | if args.dump: 138 | pkg = states.serialize_model(model, xp.cfg) 139 | states.save_with_checksum(pkg, args.output / f'{xp.sig}.th') 140 | if args.compare: 141 | delta = compare(sig, model) 142 | print("Delta for", sig, xp.sig, delta) 143 | 144 | mappings[sig] = xp.sig 145 | 146 | print("FINAL MAPPINGS") 147 | for old, new in mappings.items(): 148 | print(old, " ", new) 149 | 150 | 151 | if __name__ == '__main__': 152 | main() 153 | -------------------------------------------------------------------------------- /src/models_dir/demucs/tools/export.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """Export a trained model from the full checkpoint (with optimizer etc.) to 8 | a final checkpoint, with only the model itself. The model is always stored as 9 | half float to gain space, and because this has zero impact on the final loss. 10 | When DiffQ was used for training, the model will actually be quantized and bitpacked.""" 11 | from argparse import ArgumentParser 12 | from fractions import Fraction 13 | import logging 14 | from pathlib import Path 15 | import sys 16 | import torch 17 | 18 | from demucs import train 19 | from demucs.states import serialize_model, save_with_checksum 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | def main(): 26 | logging.basicConfig(level=logging.INFO, stream=sys.stderr) 27 | 28 | parser = ArgumentParser("tools.export", description="Export trained models from XP sigs.") 29 | parser.add_argument('signatures', nargs='*', help='XP signatures.') 30 | parser.add_argument('-o', '--out', type=Path, default=Path("release_models"), 31 | help="Path where to store release models (default release_models)") 32 | parser.add_argument('-s', '--sign', action='store_true', 33 | help='Add sha256 prefix checksum to the filename.') 34 | 35 | args = parser.parse_args() 36 | args.out.mkdir(exist_ok=True, parents=True) 37 | 38 | for sig in args.signatures: 39 | xp = train.main.get_xp_from_sig(sig) 40 | name = train.main.get_name(xp) 41 | logger.info('Handling %s/%s', sig, name) 42 | 43 | out_path = args.out / (sig + ".th") 44 | 45 | solver = train.get_solver_from_sig(sig) 46 | if len(solver.history) < solver.args.epochs: 47 | logger.warning( 48 | 'Model %s has less epoch than expected (%d / %d)', 49 | sig, len(solver.history), solver.args.epochs) 50 | 51 | solver.model.load_state_dict(solver.best_state) 52 | pkg = serialize_model(solver.model, solver.args, solver.quantizer, half=True) 53 | if getattr(solver.model, 'use_train_segment', False): 54 | batch = solver.augment(next(iter(solver.loaders['train']))) 55 | pkg['kwargs']['segment'] = Fraction(batch.shape[-1], solver.model.samplerate) 56 | print("Override", pkg['kwargs']['segment']) 57 | valid, test = None, None 58 | for m in solver.history: 59 | if 'valid' in m: 60 | valid = m['valid'] 61 | if 'test' in m: 62 | test = m['test'] 63 | pkg['metrics'] = (valid, test) 64 | if args.sign: 65 | save_with_checksum(pkg, out_path) 66 | else: 67 | torch.save(pkg, out_path) 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /src/models_dir/demucs/tools/notpytest_test_pretrained.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # Script to evaluate pretrained models. 8 | 9 | from argparse import ArgumentParser 10 | import logging 11 | import sys 12 | 13 | import torch 14 | 15 | from demucs import train, pretrained, evaluate 16 | 17 | 18 | def main(): 19 | torch.set_num_threads(1) 20 | logging.basicConfig(stream=sys.stderr, level=logging.INFO) 21 | parser = ArgumentParser("tools.test_pretrained", 22 | description="Evaluate pre-trained models or bags of models " 23 | "on MusDB.") 24 | pretrained.add_model_flags(parser) 25 | parser.add_argument('overrides', nargs='*', 26 | help='Extra overrides, e.g. test.shifts=2.') 27 | args = parser.parse_args() 28 | 29 | xp = train.main.get_xp(args.overrides) 30 | with xp.enter(): 31 | solver = train.get_solver(xp.cfg) 32 | 33 | model = pretrained.get_model_from_args(args) 34 | solver.model = model.to(solver.device) 35 | solver.model.eval() 36 | 37 | with torch.no_grad(): 38 | results = evaluate.evaluate(solver, xp.cfg.test.sdr) 39 | print(results) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /src/models_dir/mdx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/mdx/__init__.py -------------------------------------------------------------------------------- /src/models_dir/mdx/mdxnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .modules import TFC_TDF 4 | from pytorch_lightning import LightningModule 5 | 6 | dim_s = 4 7 | 8 | class AbstractMDXNet(LightningModule): 9 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap): 10 | super().__init__() 11 | self.target_name = target_name 12 | self.lr = lr 13 | self.optimizer = optimizer 14 | self.dim_c = dim_c 15 | self.dim_f = dim_f 16 | self.dim_t = dim_t 17 | self.n_fft = n_fft 18 | self.n_bins = n_fft // 2 + 1 19 | self.hop_length = hop_length 20 | self.window = nn.Parameter(torch.hann_window(window_length=self.n_fft, periodic=True), requires_grad=False) 21 | self.freq_pad = nn.Parameter(torch.zeros([1, dim_c, self.n_bins - self.dim_f, self.dim_t]), requires_grad=False) 22 | 23 | def get_optimizer(self): 24 | if self.optimizer == 'rmsprop': 25 | return torch.optim.RMSprop(self.parameters(), self.lr) 26 | 27 | if self.optimizer == 'adamw': 28 | return torch.optim.AdamW(self.parameters(), self.lr) 29 | 30 | class ConvTDFNet(AbstractMDXNet): 31 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, 32 | num_blocks, l, g, k, bn, bias, overlap): 33 | 34 | super(ConvTDFNet, self).__init__( 35 | target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap) 36 | #self.save_hyperparameters() 37 | 38 | self.num_blocks = num_blocks 39 | self.l = l 40 | self.g = g 41 | self.k = k 42 | self.bn = bn 43 | self.bias = bias 44 | 45 | if optimizer == 'rmsprop': 46 | norm = nn.BatchNorm2d 47 | 48 | if optimizer == 'adamw': 49 | norm = lambda input:nn.GroupNorm(2, input) 50 | 51 | self.n = num_blocks // 2 52 | scale = (2, 2) 53 | 54 | self.first_conv = nn.Sequential( 55 | nn.Conv2d(in_channels=self.dim_c, out_channels=g, kernel_size=(1, 1)), 56 | norm(g), 57 | nn.ReLU(), 58 | ) 59 | 60 | f = self.dim_f 61 | c = g 62 | self.encoding_blocks = nn.ModuleList() 63 | self.ds = nn.ModuleList() 64 | for i in range(self.n): 65 | self.encoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm)) 66 | self.ds.append( 67 | nn.Sequential( 68 | nn.Conv2d(in_channels=c, out_channels=c + g, kernel_size=scale, stride=scale), 69 | norm(c + g), 70 | nn.ReLU() 71 | ) 72 | ) 73 | f = f // 2 74 | c += g 75 | 76 | self.bottleneck_block = TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm) 77 | 78 | self.decoding_blocks = nn.ModuleList() 79 | self.us = nn.ModuleList() 80 | for i in range(self.n): 81 | self.us.append( 82 | nn.Sequential( 83 | nn.ConvTranspose2d(in_channels=c, out_channels=c - g, kernel_size=scale, stride=scale), 84 | norm(c - g), 85 | nn.ReLU() 86 | ) 87 | ) 88 | f = f * 2 89 | c -= g 90 | 91 | self.decoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm)) 92 | 93 | self.final_conv = nn.Sequential( 94 | nn.Conv2d(in_channels=c, out_channels=self.dim_c, kernel_size=(1, 1)), 95 | ) 96 | 97 | def forward(self, x): 98 | 99 | x = self.first_conv(x) 100 | 101 | x = x.transpose(-1, -2) 102 | 103 | ds_outputs = [] 104 | for i in range(self.n): 105 | x = self.encoding_blocks[i](x) 106 | ds_outputs.append(x) 107 | x = self.ds[i](x) 108 | 109 | x = self.bottleneck_block(x) 110 | 111 | for i in range(self.n): 112 | x = self.us[i](x) 113 | x *= ds_outputs[-i - 1] 114 | x = self.decoding_blocks[i](x) 115 | 116 | x = x.transpose(-1, -2) 117 | 118 | x = self.final_conv(x) 119 | 120 | return x 121 | 122 | class Mixer(nn.Module): 123 | def __init__(self, device, mixer_path): 124 | 125 | super(Mixer, self).__init__() 126 | 127 | self.linear = nn.Linear((dim_s+1)*2, dim_s*2, bias=False) 128 | 129 | self.load_state_dict( 130 | torch.load(mixer_path, map_location=device) 131 | ) 132 | 133 | def forward(self, x): 134 | x = x.reshape(1,(dim_s+1)*2,-1).transpose(-1,-2) 135 | x = self.linear(x) 136 | return x.transpose(-1,-2).reshape(dim_s,2,-1) -------------------------------------------------------------------------------- /src/models_dir/mdx/modelparams/model_name_mapper.json: -------------------------------------------------------------------------------- 1 | { 2 | "UVR_MDXNET_1_9703": "UVR-MDX-NET 1", 3 | "UVR_MDXNET_2_9682": "UVR-MDX-NET 2", 4 | "UVR_MDXNET_3_9662": "UVR-MDX-NET 3", 5 | "UVR_MDXNET_KARA": "UVR-MDX-NET Karaoke", 6 | "UVR_MDXNET_Main": "UVR-MDX-NET Main", 7 | "UVR-MDX-NET-Inst_1": "UVR-MDX-NET Inst 1", 8 | "UVR-MDX-NET-Inst_2": "UVR-MDX-NET Inst 2", 9 | "UVR-MDX-NET-Inst_3": "UVR-MDX-NET Inst 3", 10 | "UVR-MDX-NET-Inst_4": "UVR-MDX-NET Inst 4", 11 | "UVR-MDX-NET-Inst_Main": "UVR-MDX-NET Inst Main", 12 | "UVR-MDX-NET-Inst_Main_2": "UVR-MDX-NET Inst Main 2", 13 | "UVR-MDX-NET-Inst_HQ_1": "UVR-MDX-NET Inst HQ 1", 14 | "UVR-MDX-NET-Inst_HQ_2": "UVR-MDX-NET Inst HQ 2", 15 | "UVR-MDX-NET-Inst_HQ_3": "UVR-MDX-NET Inst HQ 3", 16 | "UVR_MDXNET_KARA_2": "UVR-MDX-NET Karaoke 2", 17 | "Kim_Vocal_1": "Kim Vocal 1", 18 | "Kim_Vocal_2": "Kim Vocal 2", 19 | "Kim_Inst": "Kim Inst", 20 | "MDX23C-8KFFT-InstVoc_HQ.ckpt": "MDX23C-InstVoc HQ", 21 | "MDX23C-8KFFT-InstVoc_HQ_2.ckpt": "MDX23C-InstVoc HQ 2", 22 | "MDX23C_D1581.ckpt": "MDX23C-InstVoc D1581", 23 | "Reverb_HQ_By_FoxJoy": "Reverb HQ" 24 | } -------------------------------------------------------------------------------- /src/models_dir/mdx/modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class TFC(nn.Module): 6 | def __init__(self, c, l, k, norm): 7 | super(TFC, self).__init__() 8 | 9 | self.H = nn.ModuleList() 10 | for i in range(l): 11 | self.H.append( 12 | nn.Sequential( 13 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2), 14 | norm(c), 15 | nn.ReLU(), 16 | ) 17 | ) 18 | 19 | def forward(self, x): 20 | for h in self.H: 21 | x = h(x) 22 | return x 23 | 24 | 25 | class DenseTFC(nn.Module): 26 | def __init__(self, c, l, k, norm): 27 | super(DenseTFC, self).__init__() 28 | 29 | self.conv = nn.ModuleList() 30 | for i in range(l): 31 | self.conv.append( 32 | nn.Sequential( 33 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2), 34 | norm(c), 35 | nn.ReLU(), 36 | ) 37 | ) 38 | 39 | def forward(self, x): 40 | for layer in self.conv[:-1]: 41 | x = torch.cat([layer(x), x], 1) 42 | return self.conv[-1](x) 43 | 44 | 45 | class TFC_TDF(nn.Module): 46 | def __init__(self, c, l, f, k, bn, dense=False, bias=True, norm=nn.BatchNorm2d): 47 | 48 | super(TFC_TDF, self).__init__() 49 | 50 | self.use_tdf = bn is not None 51 | 52 | self.tfc = DenseTFC(c, l, k, norm) if dense else TFC(c, l, k, norm) 53 | 54 | if self.use_tdf: 55 | if bn == 0: 56 | self.tdf = nn.Sequential( 57 | nn.Linear(f, f, bias=bias), 58 | norm(c), 59 | nn.ReLU() 60 | ) 61 | else: 62 | self.tdf = nn.Sequential( 63 | nn.Linear(f, f // bn, bias=bias), 64 | norm(c), 65 | nn.ReLU(), 66 | nn.Linear(f // bn, f, bias=bias), 67 | norm(c), 68 | nn.ReLU() 69 | ) 70 | 71 | def forward(self, x): 72 | x = self.tfc(x) 73 | return x + self.tdf(x) if self.use_tdf else x 74 | 75 | -------------------------------------------------------------------------------- /src/models_dir/mdx/pyrb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | import six 5 | import numpy as np 6 | import soundfile as sf 7 | import sys 8 | 9 | if getattr(sys, 'frozen', False): 10 | BASE_PATH_RUB = sys._MEIPASS 11 | else: 12 | BASE_PATH_RUB = os.path.dirname(os.path.abspath(__file__)) 13 | 14 | __all__ = ['time_stretch', 'pitch_shift'] 15 | 16 | __RUBBERBAND_UTIL = os.path.join(BASE_PATH_RUB, 'rubberband') 17 | 18 | if six.PY2: 19 | DEVNULL = open(os.devnull, 'w') 20 | else: 21 | DEVNULL = subprocess.DEVNULL 22 | 23 | def __rubberband(y, sr, **kwargs): 24 | 25 | assert sr > 0 26 | 27 | # Get the input and output tempfile 28 | fd, infile = tempfile.mkstemp(suffix='.wav') 29 | os.close(fd) 30 | fd, outfile = tempfile.mkstemp(suffix='.wav') 31 | os.close(fd) 32 | 33 | # dump the audio 34 | sf.write(infile, y, sr) 35 | 36 | try: 37 | # Execute rubberband 38 | arguments = [__RUBBERBAND_UTIL, '-q'] 39 | 40 | for key, value in six.iteritems(kwargs): 41 | arguments.append(str(key)) 42 | arguments.append(str(value)) 43 | 44 | arguments.extend([infile, outfile]) 45 | 46 | subprocess.check_call(arguments, stdout=DEVNULL, stderr=DEVNULL) 47 | 48 | # Load the processed audio. 49 | y_out, _ = sf.read(outfile, always_2d=True) 50 | 51 | # make sure that output dimensions matches input 52 | if y.ndim == 1: 53 | y_out = np.squeeze(y_out) 54 | 55 | except OSError as exc: 56 | six.raise_from(RuntimeError('Failed to execute rubberband. ' 57 | 'Please verify that rubberband-cli ' 58 | 'is installed.'), 59 | exc) 60 | 61 | finally: 62 | # Remove temp files 63 | os.unlink(infile) 64 | os.unlink(outfile) 65 | 66 | return y_out 67 | 68 | def time_stretch(y, sr, rate, rbargs=None): 69 | if rate <= 0: 70 | raise ValueError('rate must be strictly positive') 71 | 72 | if rate == 1.0: 73 | return y 74 | 75 | if rbargs is None: 76 | rbargs = dict() 77 | 78 | rbargs.setdefault('--tempo', rate) 79 | 80 | return __rubberband(y, sr, **rbargs) 81 | 82 | def pitch_shift(y, sr, n_steps, rbargs=None): 83 | 84 | if n_steps == 0: 85 | return y 86 | 87 | if rbargs is None: 88 | rbargs = dict() 89 | 90 | rbargs.setdefault('--pitch', n_steps) 91 | 92 | return __rubberband(y, sr, **rbargs) 93 | -------------------------------------------------------------------------------- /src/models_dir/mdxc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/mdxc/__init__.py -------------------------------------------------------------------------------- /src/models_dir/mdxc/mdxnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .modules import TFC_TDF 4 | from pytorch_lightning import LightningModule 5 | 6 | dim_s = 4 7 | 8 | class AbstractMDXNet(LightningModule): 9 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap): 10 | super().__init__() 11 | self.target_name = target_name 12 | self.lr = lr 13 | self.optimizer = optimizer 14 | self.dim_c = dim_c 15 | self.dim_f = dim_f 16 | self.dim_t = dim_t 17 | self.n_fft = n_fft 18 | self.n_bins = n_fft // 2 + 1 19 | self.hop_length = hop_length 20 | self.window = nn.Parameter(torch.hann_window(window_length=self.n_fft, periodic=True), requires_grad=False) 21 | self.freq_pad = nn.Parameter(torch.zeros([1, dim_c, self.n_bins - self.dim_f, self.dim_t]), requires_grad=False) 22 | 23 | def get_optimizer(self): 24 | if self.optimizer == 'rmsprop': 25 | return torch.optim.RMSprop(self.parameters(), self.lr) 26 | 27 | if self.optimizer == 'adamw': 28 | return torch.optim.AdamW(self.parameters(), self.lr) 29 | 30 | class ConvTDFNet(AbstractMDXNet): 31 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, 32 | num_blocks, l, g, k, bn, bias, overlap): 33 | 34 | super(ConvTDFNet, self).__init__( 35 | target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap) 36 | #self.save_hyperparameters() 37 | 38 | self.num_blocks = num_blocks 39 | self.l = l 40 | self.g = g 41 | self.k = k 42 | self.bn = bn 43 | self.bias = bias 44 | 45 | if optimizer == 'rmsprop': 46 | norm = nn.BatchNorm2d 47 | 48 | if optimizer == 'adamw': 49 | norm = lambda input:nn.GroupNorm(2, input) 50 | 51 | self.n = num_blocks // 2 52 | scale = (2, 2) 53 | 54 | self.first_conv = nn.Sequential( 55 | nn.Conv2d(in_channels=self.dim_c, out_channels=g, kernel_size=(1, 1)), 56 | norm(g), 57 | nn.ReLU(), 58 | ) 59 | 60 | f = self.dim_f 61 | c = g 62 | self.encoding_blocks = nn.ModuleList() 63 | self.ds = nn.ModuleList() 64 | for i in range(self.n): 65 | self.encoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm)) 66 | self.ds.append( 67 | nn.Sequential( 68 | nn.Conv2d(in_channels=c, out_channels=c + g, kernel_size=scale, stride=scale), 69 | norm(c + g), 70 | nn.ReLU() 71 | ) 72 | ) 73 | f = f // 2 74 | c += g 75 | 76 | self.bottleneck_block = TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm) 77 | 78 | self.decoding_blocks = nn.ModuleList() 79 | self.us = nn.ModuleList() 80 | for i in range(self.n): 81 | self.us.append( 82 | nn.Sequential( 83 | nn.ConvTranspose2d(in_channels=c, out_channels=c - g, kernel_size=scale, stride=scale), 84 | norm(c - g), 85 | nn.ReLU() 86 | ) 87 | ) 88 | f = f * 2 89 | c -= g 90 | 91 | self.decoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm)) 92 | 93 | self.final_conv = nn.Sequential( 94 | nn.Conv2d(in_channels=c, out_channels=self.dim_c, kernel_size=(1, 1)), 95 | ) 96 | 97 | def forward(self, x): 98 | 99 | x = self.first_conv(x) 100 | 101 | x = x.transpose(-1, -2) 102 | 103 | ds_outputs = [] 104 | for i in range(self.n): 105 | x = self.encoding_blocks[i](x) 106 | ds_outputs.append(x) 107 | x = self.ds[i](x) 108 | 109 | x = self.bottleneck_block(x) 110 | 111 | for i in range(self.n): 112 | x = self.us[i](x) 113 | x *= ds_outputs[-i - 1] 114 | x = self.decoding_blocks[i](x) 115 | 116 | x = x.transpose(-1, -2) 117 | 118 | x = self.final_conv(x) 119 | 120 | return x 121 | 122 | class Mixer(nn.Module): 123 | def __init__(self, device, mixer_path): 124 | 125 | super(Mixer, self).__init__() 126 | 127 | self.linear = nn.Linear((dim_s+1)*2, dim_s*2, bias=False) 128 | 129 | self.load_state_dict( 130 | torch.load(mixer_path, map_location=device) 131 | ) 132 | 133 | def forward(self, x): 134 | x = x.reshape(1,(dim_s+1)*2,-1).transpose(-1,-2) 135 | x = self.linear(x) 136 | return x.transpose(-1,-2).reshape(dim_s,2,-1) -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model1.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 260096 3 | dim_f: 4096 4 | dim_t: 128 5 | hop_length: 2048 6 | n_fft: 8192 7 | num_channels: 2 8 | sample_rate: 44100 9 | model: 10 | act: gelu 11 | bottleneck_factor: 4 12 | growth: 64 13 | norm: InstanceNorm 14 | num_blocks_per_scale: 2 15 | num_channels: 128 16 | num_scales: 5 17 | num_subbands: 4 18 | scale: 19 | - 2 20 | - 2 21 | training: 22 | batch_size: 8 23 | grad_clip: 0 24 | instruments: 25 | - Vocals 26 | - Drums 27 | - Bass 28 | - Other 29 | lr: 5.0e-05 30 | target_instrument: null 31 | inference: 32 | batch_size: 1 33 | dim_t: 256 34 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model2.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 260096 3 | dim_f: 4096 4 | dim_t: 128 5 | hop_length: 2048 6 | n_fft: 8192 7 | num_channels: 2 8 | sample_rate: 44100 9 | model: 10 | act: gelu 11 | bottleneck_factor: 4 12 | growth: 64 13 | norm: InstanceNorm 14 | num_blocks_per_scale: 2 15 | num_channels: 256 16 | num_scales: 5 17 | num_subbands: 4 18 | scale: 19 | - 2 20 | - 2 21 | training: 22 | batch_size: 8 23 | grad_clip: 0 24 | instruments: 25 | - Vocals 26 | - Drums 27 | - Bass 28 | - Other 29 | lr: 3.0e-05 30 | target_instrument: null 31 | inference: 32 | batch_size: 1 33 | dim_t: 256 34 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model3.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 260096 3 | dim_f: 4096 4 | dim_t: 128 5 | hop_length: 2048 6 | n_fft: 12288 7 | num_channels: 2 8 | sample_rate: 44100 9 | model: 10 | act: gelu 11 | bottleneck_factor: 4 12 | growth: 64 13 | norm: InstanceNorm 14 | num_blocks_per_scale: 2 15 | num_channels: 128 16 | num_scales: 5 17 | num_subbands: 4 18 | scale: 19 | - 2 20 | - 2 21 | training: 22 | batch_size: 8 23 | grad_clip: 0 24 | instruments: 25 | - Vocals 26 | - Drums 27 | - Bass 28 | - Other 29 | lr: 5.0e-05 30 | target_instrument: Vocals 31 | inference: 32 | batch_size: 1 33 | dim_t: 256 34 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/modelA.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 261120 3 | dim_f: 4096 4 | dim_t: 256 5 | hop_length: 1024 6 | min_mean_abs: 0.01 7 | n_fft: 8192 8 | num_channels: 2 9 | sample_rate: 44100 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 64 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 64 17 | num_scales: 5 18 | num_subbands: 4 19 | scale: 20 | - 2 21 | - 2 22 | training: 23 | batch_size: 6 24 | coarse_loss_clip: true 25 | ema_momentum: 0.999 26 | grad_clip: null 27 | instruments: 28 | - Vocals 29 | - Drums 30 | - Bass 31 | - Other 32 | lr: 0.0001 33 | num_steps: 100000 34 | q: 0.4 35 | target_instrument: null 36 | inference: 37 | batch_size: 2 38 | dim_t: 256 39 | num_overlap: 8 40 | -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/modelB.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 261120 3 | dim_f: 4096 4 | dim_t: 256 5 | hop_length: 1024 6 | min_mean_abs: 0.01 7 | n_fft: 8192 8 | num_channels: 2 9 | sample_rate: 44100 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 64 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 64 17 | num_scales: 5 18 | num_subbands: 4 19 | scale: 20 | - 2 21 | - 2 22 | training: 23 | batch_size: 6 24 | coarse_loss_clip: false 25 | datasets: 26 | - ../data/moises/bleeding 27 | ema_momentum: 0.999 28 | grad_clip: null 29 | instruments: 30 | - Vocals 31 | - Drums 32 | - Bass 33 | - Other 34 | lr: 0.0001 35 | num_steps: 150000 36 | q: 0.93 37 | target_instrument: null 38 | inference: 39 | batch_size: 2 40 | dim_t: 256 41 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_061321.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 260096 3 | dim_f: 4096 4 | dim_t: 256 5 | hop_length: 2048 6 | n_fft: 12288 7 | num_channels: 2 8 | sample_rate: 44100 9 | min_mean_abs: 0.001 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 64 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 128 17 | num_scales: 5 18 | num_subbands: 4 19 | scale: 20 | - 2 21 | - 2 22 | name: epoch_10.ckpt 23 | training: 24 | batch_size: 16 25 | grad_clip: 0 26 | instruments: 27 | - Vocals 28 | - Instrumental 29 | lr: 5.0e-05 30 | target_instrument: null 31 | num_epochs: 100 32 | num_steps: 1000 33 | inference: 34 | batch_size: 1 35 | dim_t: 256 36 | num_overlap: 8 37 | -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 260096 3 | dim_f: 6144 4 | dim_t: 128 5 | hop_length: 2048 6 | n_fft: 12288 7 | num_channels: 2 8 | sample_rate: 44100 9 | min_mean_abs: 0.001 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 64 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 128 17 | num_scales: 5 18 | num_subbands: 6 19 | scale: 20 | - 2 21 | - 2 22 | training: 23 | batch_size: 14 24 | grad_clip: 0 25 | instruments: 26 | - Vocals 27 | - Instrumental 28 | lr: 3.0e-05 29 | target_instrument: null 30 | num_epochs: 1000 31 | num_steps: 1000 32 | augmentation: 1 33 | inference: 34 | batch_size: 1 35 | dim_t: 256 36 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_2.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 260096 3 | dim_f: 6144 4 | dim_t: 128 5 | hop_length: 2048 6 | n_fft: 12288 7 | num_channels: 2 8 | sample_rate: 44100 9 | min_mean_abs: 0.001 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 128 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 128 17 | num_scales: 5 18 | num_subbands: 6 19 | scale: 20 | - 2 21 | - 2 22 | training: 23 | batch_size: 14 24 | grad_clip: 0 25 | instruments: 26 | - Vocals 27 | - Instrumental 28 | lr: 2.0e-05 29 | target_instrument: null 30 | num_epochs: 1000 31 | num_steps: 1000 32 | augmentation: 1 33 | inference: 34 | batch_size: 1 35 | dim_t: 256 36 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_3.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 261120 3 | dim_f: 6144 4 | dim_t: 256 5 | hop_length: 1024 6 | n_fft: 12288 7 | num_channels: 2 8 | sample_rate: 44100 9 | min_mean_abs: 0.001 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 128 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 128 17 | num_scales: 5 18 | num_subbands: 6 19 | scale: 20 | - 2 21 | - 2 22 | training: 23 | batch_size: 6 24 | grad_clip: 0 25 | instruments: 26 | - Vocals 27 | - Instrumental 28 | lr: 1.0e-05 29 | target_instrument: null 30 | num_epochs: 1000 31 | num_steps: 1000 32 | augmentation: 1 33 | q: 0.95 34 | coarse_loss_clip: true 35 | ema_momentum: 0.999 36 | inference: 37 | batch_size: 1 38 | dim_t: 256 39 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_4.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 261120 3 | dim_f: 6144 4 | dim_t: 256 5 | hop_length: 1024 6 | n_fft: 12288 7 | num_channels: 2 8 | sample_rate: 44100 9 | min_mean_abs: 0.001 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 128 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 128 17 | num_scales: 5 18 | num_subbands: 6 19 | scale: 20 | - 2 21 | - 2 22 | training: 23 | batch_size: 6 24 | grad_clip: 0 25 | instruments: 26 | - Vocals 27 | - Instrumental 28 | lr: 0.7e-05 29 | patience: 2 30 | target_instrument: null 31 | num_epochs: 1000 32 | num_steps: 1000 33 | augmentation: 1 34 | q: 0.95 35 | coarse_loss_clip: true 36 | ema_momentum: 0.999 37 | inference: 38 | batch_size: 1 39 | dim_t: 256 40 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_8k.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 261120 3 | dim_f: 4096 4 | dim_t: 256 5 | hop_length: 1024 6 | n_fft: 8192 7 | num_channels: 2 8 | sample_rate: 44100 9 | min_mean_abs: 0.001 10 | model: 11 | act: gelu 12 | bottleneck_factor: 4 13 | growth: 128 14 | norm: InstanceNorm 15 | num_blocks_per_scale: 2 16 | num_channels: 128 17 | num_scales: 5 18 | num_subbands: 4 19 | scale: 20 | - 2 21 | - 2 22 | training: 23 | batch_size: 6 24 | grad_clip: 0 25 | instruments: 26 | - Vocals 27 | - Instrumental 28 | lr: 1.0e-05 29 | patience: 2 30 | reduce_factor: 0.95 31 | target_instrument: null 32 | num_epochs: 1000 33 | num_steps: 1000 34 | augmentation: 1 35 | augmentation_type: simple1 36 | augmentation_mix: true 37 | q: 0.95 38 | coarse_loss_clip: true 39 | ema_momentum: 0.999 40 | inference: 41 | batch_size: 1 42 | dim_t: 256 43 | num_overlap: 8 -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/mdx_c_configs/sndfx.yaml: -------------------------------------------------------------------------------- 1 | audio: 2 | chunk_size: 261120 3 | dim_f: 1024 4 | dim_t: 256 5 | hop_length: 1024 6 | min_mean_abs: 0.01 7 | n_fft: 2048 8 | num_channels: 2 9 | sample_rate: 44100 10 | stereo_prob: 0.7 11 | model: 12 | act: gelu 13 | bottleneck_factor: 4 14 | growth: 64 15 | norm: InstanceNorm 16 | num_blocks_per_scale: 2 17 | num_channels: 64 18 | num_scales: 5 19 | num_subbands: 4 20 | scale: 21 | - 2 22 | - 2 23 | training: 24 | batch_size: 8 25 | ema_momentum: 0.999 26 | grad_clip: null 27 | instruments: 28 | - Music 29 | - Speech 30 | - SFX 31 | lr: 0.0001 32 | num_steps: 30000 33 | target_instrument: null 34 | inference: 35 | batch_size: 8 36 | dim_t: 256 37 | instruments: 38 | - Music 39 | - Dialog 40 | - Effect 41 | num_overlap: 8 42 | -------------------------------------------------------------------------------- /src/models_dir/mdxc/modelparams/model_name_mapper.json: -------------------------------------------------------------------------------- 1 | { 2 | "UVR_MDXNET_1_9703": "UVR-MDX-NET 1", 3 | "UVR_MDXNET_2_9682": "UVR-MDX-NET 2", 4 | "UVR_MDXNET_3_9662": "UVR-MDX-NET 3", 5 | "UVR_MDXNET_KARA": "UVR-MDX-NET Karaoke", 6 | "UVR_MDXNET_Main": "UVR-MDX-NET Main", 7 | "UVR-MDX-NET-Inst_1": "UVR-MDX-NET Inst 1", 8 | "UVR-MDX-NET-Inst_2": "UVR-MDX-NET Inst 2", 9 | "UVR-MDX-NET-Inst_3": "UVR-MDX-NET Inst 3", 10 | "UVR-MDX-NET-Inst_4": "UVR-MDX-NET Inst 4", 11 | "UVR-MDX-NET-Inst_Main": "UVR-MDX-NET Inst Main", 12 | "UVR-MDX-NET-Inst_Main_2": "UVR-MDX-NET Inst Main 2", 13 | "UVR-MDX-NET-Inst_HQ_1": "UVR-MDX-NET Inst HQ 1", 14 | "UVR-MDX-NET-Inst_HQ_2": "UVR-MDX-NET Inst HQ 2", 15 | "UVR-MDX-NET-Inst_HQ_3": "UVR-MDX-NET Inst HQ 3", 16 | "UVR_MDXNET_KARA_2": "UVR-MDX-NET Karaoke 2", 17 | "Kim_Vocal_1": "Kim Vocal 1", 18 | "Kim_Vocal_2": "Kim Vocal 2", 19 | "Kim_Inst": "Kim Inst", 20 | "MDX23C-8KFFT-InstVoc_HQ.ckpt": "MDX23C-InstVoc HQ", 21 | "MDX23C-8KFFT-InstVoc_HQ_2.ckpt": "MDX23C-InstVoc HQ 2", 22 | "MDX23C_D1581.ckpt": "MDX23C-InstVoc D1581", 23 | "Reverb_HQ_By_FoxJoy": "Reverb HQ" 24 | } -------------------------------------------------------------------------------- /src/models_dir/mdxc/modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class TFC(nn.Module): 6 | def __init__(self, c, l, k, norm): 7 | super(TFC, self).__init__() 8 | 9 | self.H = nn.ModuleList() 10 | for i in range(l): 11 | self.H.append( 12 | nn.Sequential( 13 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2), 14 | norm(c), 15 | nn.ReLU(), 16 | ) 17 | ) 18 | 19 | def forward(self, x): 20 | for h in self.H: 21 | x = h(x) 22 | return x 23 | 24 | 25 | class DenseTFC(nn.Module): 26 | def __init__(self, c, l, k, norm): 27 | super(DenseTFC, self).__init__() 28 | 29 | self.conv = nn.ModuleList() 30 | for i in range(l): 31 | self.conv.append( 32 | nn.Sequential( 33 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2), 34 | norm(c), 35 | nn.ReLU(), 36 | ) 37 | ) 38 | 39 | def forward(self, x): 40 | for layer in self.conv[:-1]: 41 | x = torch.cat([layer(x), x], 1) 42 | return self.conv[-1](x) 43 | 44 | 45 | class TFC_TDF(nn.Module): 46 | def __init__(self, c, l, f, k, bn, dense=False, bias=True, norm=nn.BatchNorm2d): 47 | 48 | super(TFC_TDF, self).__init__() 49 | 50 | self.use_tdf = bn is not None 51 | 52 | self.tfc = DenseTFC(c, l, k, norm) if dense else TFC(c, l, k, norm) 53 | 54 | if self.use_tdf: 55 | if bn == 0: 56 | self.tdf = nn.Sequential( 57 | nn.Linear(f, f, bias=bias), 58 | norm(c), 59 | nn.ReLU() 60 | ) 61 | else: 62 | self.tdf = nn.Sequential( 63 | nn.Linear(f, f // bn, bias=bias), 64 | norm(c), 65 | nn.ReLU(), 66 | nn.Linear(f // bn, f, bias=bias), 67 | norm(c), 68 | nn.ReLU() 69 | ) 70 | 71 | def forward(self, x): 72 | x = self.tfc(x) 73 | return x + self.tdf(x) if self.use_tdf else x 74 | 75 | -------------------------------------------------------------------------------- /src/models_dir/mdxc/pyrb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | import six 5 | import numpy as np 6 | import soundfile as sf 7 | import sys 8 | 9 | if getattr(sys, 'frozen', False): 10 | BASE_PATH_RUB = sys._MEIPASS 11 | else: 12 | BASE_PATH_RUB = os.path.dirname(os.path.abspath(__file__)) 13 | 14 | __all__ = ['time_stretch', 'pitch_shift'] 15 | 16 | __RUBBERBAND_UTIL = os.path.join(BASE_PATH_RUB, 'rubberband') 17 | 18 | if six.PY2: 19 | DEVNULL = open(os.devnull, 'w') 20 | else: 21 | DEVNULL = subprocess.DEVNULL 22 | 23 | def __rubberband(y, sr, **kwargs): 24 | 25 | assert sr > 0 26 | 27 | # Get the input and output tempfile 28 | fd, infile = tempfile.mkstemp(suffix='.wav') 29 | os.close(fd) 30 | fd, outfile = tempfile.mkstemp(suffix='.wav') 31 | os.close(fd) 32 | 33 | # dump the audio 34 | sf.write(infile, y, sr) 35 | 36 | try: 37 | # Execute rubberband 38 | arguments = [__RUBBERBAND_UTIL, '-q'] 39 | 40 | for key, value in six.iteritems(kwargs): 41 | arguments.append(str(key)) 42 | arguments.append(str(value)) 43 | 44 | arguments.extend([infile, outfile]) 45 | 46 | subprocess.check_call(arguments, stdout=DEVNULL, stderr=DEVNULL) 47 | 48 | # Load the processed audio. 49 | y_out, _ = sf.read(outfile, always_2d=True) 50 | 51 | # make sure that output dimensions matches input 52 | if y.ndim == 1: 53 | y_out = np.squeeze(y_out) 54 | 55 | except OSError as exc: 56 | six.raise_from(RuntimeError('Failed to execute rubberband. ' 57 | 'Please verify that rubberband-cli ' 58 | 'is installed.'), 59 | exc) 60 | 61 | finally: 62 | # Remove temp files 63 | os.unlink(infile) 64 | os.unlink(outfile) 65 | 66 | return y_out 67 | 68 | def time_stretch(y, sr, rate, rbargs=None): 69 | if rate <= 0: 70 | raise ValueError('rate must be strictly positive') 71 | 72 | if rate == 1.0: 73 | return y 74 | 75 | if rbargs is None: 76 | rbargs = dict() 77 | 78 | rbargs.setdefault('--tempo', rate) 79 | 80 | return __rubberband(y, sr, **rbargs) 81 | 82 | def pitch_shift(y, sr, n_steps, rbargs=None): 83 | 84 | if n_steps == 0: 85 | return y 86 | 87 | if rbargs is None: 88 | rbargs = dict() 89 | 90 | rbargs.setdefault('--pitch', n_steps) 91 | 92 | return __rubberband(y, sr, **rbargs) 93 | -------------------------------------------------------------------------------- /src/models_dir/models.json: -------------------------------------------------------------------------------- 1 | { 2 | "demucs":{ 3 | "hdemucs_mmi":{ 4 | "model_path":[ 5 | "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/75fc33f5-1941ce65.th", 6 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/hdemucs_mmi.yaml" 7 | ] 8 | } 9 | }, 10 | "vr_network":{ 11 | "1_HP-UVR":{ 12 | "model_path":[ 13 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/1_HP-UVR.pth" 14 | ] 15 | } 16 | }, 17 | "mdx":{ 18 | "UVR-MDX-NET-Inst_1":{ 19 | "model_path":[ 20 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR-MDX-NET-Inst_1.onnx" 21 | ] 22 | } 23 | }, 24 | "mdxc":{ 25 | "MDX23C-8KFFT-InstVoc_HQ":{ 26 | "model_path":[ 27 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/MDX23C-8KFFT-InstVoc_HQ.ckpt" 28 | ] 29 | } 30 | } 31 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/__init__.py: -------------------------------------------------------------------------------- 1 | # VR init. 2 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/constants.py: -------------------------------------------------------------------------------- 1 | import platform 2 | 3 | #Platform Details 4 | OPERATING_SYSTEM = platform.system() 5 | SYSTEM_ARCH = platform.platform() 6 | SYSTEM_PROC = platform.processor() 7 | ARM = 'arm' 8 | 9 | # Network Constants 10 | N_BINS = 'n_bins' 11 | 12 | 13 | ALL_STEMS = 'All Stems' 14 | VOCAL_STEM = 'Vocals' 15 | INST_STEM = 'Instrumental' 16 | OTHER_STEM = 'Other' 17 | BASS_STEM = 'Bass' 18 | DRUM_STEM = 'Drums' 19 | GUITAR_STEM = 'Guitar' 20 | PIANO_STEM = 'Piano' 21 | SYNTH_STEM = 'Synthesizer' 22 | STRINGS_STEM = 'Strings' 23 | WOODWINDS_STEM = 'Woodwinds' 24 | BRASS_STEM = 'Brass' 25 | WIND_INST_STEM = 'Wind Inst' 26 | NO_OTHER_STEM = 'No Other' 27 | NO_BASS_STEM = 'No Bass' 28 | NO_DRUM_STEM = 'No Drums' 29 | NO_GUITAR_STEM = 'No Guitar' 30 | NO_PIANO_STEM = 'No Piano' 31 | NO_SYNTH_STEM = 'No Synthesizer' 32 | NO_STRINGS_STEM = 'No Strings' 33 | NO_WOODWINDS_STEM = 'No Woodwinds' 34 | NO_WIND_INST_STEM = 'No Wind Inst' 35 | NO_BRASS_STEM = 'No Brass' 36 | PRIMARY_STEM = 'Primary Stem' 37 | SECONDARY_STEM = 'Secondary Stem' 38 | 39 | 40 | NO_STEM = "No " 41 | 42 | NON_ACCOM_STEMS = ( 43 | VOCAL_STEM, 44 | OTHER_STEM, 45 | BASS_STEM, 46 | DRUM_STEM, 47 | GUITAR_STEM, 48 | PIANO_STEM, 49 | SYNTH_STEM, 50 | STRINGS_STEM, 51 | WOODWINDS_STEM, 52 | BRASS_STEM, 53 | WIND_INST_STEM) -------------------------------------------------------------------------------- /src/models_dir/vr_network/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | from . import spec_utils 6 | 7 | class Conv2DBNActiv(nn.Module): 8 | 9 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU): 10 | super(Conv2DBNActiv, self).__init__() 11 | self.conv = nn.Sequential( 12 | nn.Conv2d( 13 | nin, nout, 14 | kernel_size=ksize, 15 | stride=stride, 16 | padding=pad, 17 | dilation=dilation, 18 | bias=False), 19 | nn.BatchNorm2d(nout), 20 | activ() 21 | ) 22 | 23 | def __call__(self, x): 24 | return self.conv(x) 25 | 26 | class SeperableConv2DBNActiv(nn.Module): 27 | 28 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU): 29 | super(SeperableConv2DBNActiv, self).__init__() 30 | self.conv = nn.Sequential( 31 | nn.Conv2d( 32 | nin, nin, 33 | kernel_size=ksize, 34 | stride=stride, 35 | padding=pad, 36 | dilation=dilation, 37 | groups=nin, 38 | bias=False), 39 | nn.Conv2d( 40 | nin, nout, 41 | kernel_size=1, 42 | bias=False), 43 | nn.BatchNorm2d(nout), 44 | activ() 45 | ) 46 | 47 | def __call__(self, x): 48 | return self.conv(x) 49 | 50 | 51 | class Encoder(nn.Module): 52 | 53 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU): 54 | super(Encoder, self).__init__() 55 | self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ) 56 | self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ) 57 | 58 | def __call__(self, x): 59 | skip = self.conv1(x) 60 | h = self.conv2(skip) 61 | 62 | return h, skip 63 | 64 | 65 | class Decoder(nn.Module): 66 | 67 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False): 68 | super(Decoder, self).__init__() 69 | self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ) 70 | self.dropout = nn.Dropout2d(0.1) if dropout else None 71 | 72 | def __call__(self, x, skip=None): 73 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) 74 | if skip is not None: 75 | skip = spec_utils.crop_center(skip, x) 76 | x = torch.cat([x, skip], dim=1) 77 | h = self.conv(x) 78 | 79 | if self.dropout is not None: 80 | h = self.dropout(h) 81 | 82 | return h 83 | 84 | 85 | class ASPPModule(nn.Module): 86 | 87 | def __init__(self, nn_architecture, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU): 88 | super(ASPPModule, self).__init__() 89 | self.conv1 = nn.Sequential( 90 | nn.AdaptiveAvgPool2d((1, None)), 91 | Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ) 92 | ) 93 | 94 | self.nn_architecture = nn_architecture 95 | self.six_layer = [129605] 96 | self.seven_layer = [537238, 537227, 33966] 97 | 98 | extra_conv = SeperableConv2DBNActiv( 99 | nin, nin, 3, 1, dilations[2], dilations[2], activ=activ) 100 | 101 | self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ) 102 | self.conv3 = SeperableConv2DBNActiv( 103 | nin, nin, 3, 1, dilations[0], dilations[0], activ=activ) 104 | self.conv4 = SeperableConv2DBNActiv( 105 | nin, nin, 3, 1, dilations[1], dilations[1], activ=activ) 106 | self.conv5 = SeperableConv2DBNActiv( 107 | nin, nin, 3, 1, dilations[2], dilations[2], activ=activ) 108 | 109 | if self.nn_architecture in self.six_layer: 110 | self.conv6 = extra_conv 111 | nin_x = 6 112 | elif self.nn_architecture in self.seven_layer: 113 | self.conv6 = extra_conv 114 | self.conv7 = extra_conv 115 | nin_x = 7 116 | else: 117 | nin_x = 5 118 | 119 | self.bottleneck = nn.Sequential( 120 | Conv2DBNActiv(nin * nin_x, nout, 1, 1, 0, activ=activ), 121 | nn.Dropout2d(0.1) 122 | ) 123 | 124 | def forward(self, x): 125 | _, _, h, w = x.size() 126 | feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) 127 | feat2 = self.conv2(x) 128 | feat3 = self.conv3(x) 129 | feat4 = self.conv4(x) 130 | feat5 = self.conv5(x) 131 | 132 | if self.nn_architecture in self.six_layer: 133 | feat6 = self.conv6(x) 134 | out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6), dim=1) 135 | elif self.nn_architecture in self.seven_layer: 136 | feat6 = self.conv6(x) 137 | feat7 = self.conv7(x) 138 | out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1) 139 | else: 140 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1) 141 | 142 | bottle = self.bottleneck(out) 143 | return bottle 144 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/layers_new.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | from . import spec_utils 6 | 7 | class Conv2DBNActiv(nn.Module): 8 | 9 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU): 10 | super(Conv2DBNActiv, self).__init__() 11 | self.conv = nn.Sequential( 12 | nn.Conv2d( 13 | nin, nout, 14 | kernel_size=ksize, 15 | stride=stride, 16 | padding=pad, 17 | dilation=dilation, 18 | bias=False), 19 | nn.BatchNorm2d(nout), 20 | activ() 21 | ) 22 | 23 | def __call__(self, x): 24 | return self.conv(x) 25 | 26 | class Encoder(nn.Module): 27 | 28 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU): 29 | super(Encoder, self).__init__() 30 | self.conv1 = Conv2DBNActiv(nin, nout, ksize, stride, pad, activ=activ) 31 | self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ) 32 | 33 | def __call__(self, x): 34 | h = self.conv1(x) 35 | h = self.conv2(h) 36 | 37 | return h 38 | 39 | 40 | class Decoder(nn.Module): 41 | 42 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False): 43 | super(Decoder, self).__init__() 44 | self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ) 45 | # self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ) 46 | self.dropout = nn.Dropout2d(0.1) if dropout else None 47 | 48 | def __call__(self, x, skip=None): 49 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) 50 | 51 | if skip is not None: 52 | skip = spec_utils.crop_center(skip, x) 53 | x = torch.cat([x, skip], dim=1) 54 | 55 | h = self.conv1(x) 56 | # h = self.conv2(h) 57 | 58 | if self.dropout is not None: 59 | h = self.dropout(h) 60 | 61 | return h 62 | 63 | 64 | class ASPPModule(nn.Module): 65 | 66 | def __init__(self, nin, nout, dilations=(4, 8, 12), activ=nn.ReLU, dropout=False): 67 | super(ASPPModule, self).__init__() 68 | self.conv1 = nn.Sequential( 69 | nn.AdaptiveAvgPool2d((1, None)), 70 | Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ) 71 | ) 72 | self.conv2 = Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ) 73 | self.conv3 = Conv2DBNActiv( 74 | nin, nout, 3, 1, dilations[0], dilations[0], activ=activ 75 | ) 76 | self.conv4 = Conv2DBNActiv( 77 | nin, nout, 3, 1, dilations[1], dilations[1], activ=activ 78 | ) 79 | self.conv5 = Conv2DBNActiv( 80 | nin, nout, 3, 1, dilations[2], dilations[2], activ=activ 81 | ) 82 | self.bottleneck = Conv2DBNActiv(nout * 5, nout, 1, 1, 0, activ=activ) 83 | self.dropout = nn.Dropout2d(0.1) if dropout else None 84 | 85 | def forward(self, x): 86 | _, _, h, w = x.size() 87 | feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) 88 | feat2 = self.conv2(x) 89 | feat3 = self.conv3(x) 90 | feat4 = self.conv4(x) 91 | feat5 = self.conv5(x) 92 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1) 93 | out = self.bottleneck(out) 94 | 95 | if self.dropout is not None: 96 | out = self.dropout(out) 97 | 98 | return out 99 | 100 | 101 | class LSTMModule(nn.Module): 102 | 103 | def __init__(self, nin_conv, nin_lstm, nout_lstm): 104 | super(LSTMModule, self).__init__() 105 | self.conv = Conv2DBNActiv(nin_conv, 1, 1, 1, 0) 106 | self.lstm = nn.LSTM( 107 | input_size=nin_lstm, 108 | hidden_size=nout_lstm // 2, 109 | bidirectional=True 110 | ) 111 | self.dense = nn.Sequential( 112 | nn.Linear(nout_lstm, nin_lstm), 113 | nn.BatchNorm1d(nin_lstm), 114 | nn.ReLU() 115 | ) 116 | 117 | def forward(self, x): 118 | N, _, nbins, nframes = x.size() 119 | h = self.conv(x)[:, 0] # N, nbins, nframes 120 | h = h.permute(2, 0, 1) # nframes, N, nbins 121 | h, _ = self.lstm(h) 122 | h = self.dense(h.reshape(-1, h.size()[-1])) # nframes * N, nbins 123 | h = h.reshape(nframes, N, 1, nbins) 124 | h = h.permute(1, 2, 3, 0) 125 | 126 | return h 127 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/model_param_init.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | default_param = {} 4 | default_param['bins'] = -1 5 | default_param['unstable_bins'] = -1 # training only 6 | default_param['stable_bins'] = -1 # training only 7 | default_param['sr'] = 44100 8 | default_param['pre_filter_start'] = -1 9 | default_param['pre_filter_stop'] = -1 10 | default_param['band'] = {} 11 | 12 | N_BINS = 'n_bins' 13 | 14 | def int_keys(d): 15 | r = {} 16 | for k, v in d: 17 | if k.isdigit(): 18 | k = int(k) 19 | r[k] = v 20 | return r 21 | 22 | class ModelParameters(object): 23 | def __init__(self, config_path=''): 24 | with open(config_path, 'r') as f: 25 | self.param = json.loads(f.read(), object_pairs_hook=int_keys) 26 | 27 | for k in ['mid_side', 'mid_side_b', 'mid_side_b2', 'stereo_w', 'stereo_n', 'reverse']: 28 | if not k in self.param: 29 | self.param[k] = False 30 | 31 | if N_BINS in self.param: 32 | self.param['bins'] = self.param[N_BINS] -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr16000_hl512.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 1024, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 16000, 8 | "hl": 512, 9 | "n_fft": 2048, 10 | "crop_start": 0, 11 | "crop_stop": 1024, 12 | "hpf_start": -1, 13 | "res_type": "sinc_best" 14 | } 15 | }, 16 | "sr": 16000, 17 | "pre_filter_start": 1023, 18 | "pre_filter_stop": 1024 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr32000_hl512.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 1024, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 32000, 8 | "hl": 512, 9 | "n_fft": 2048, 10 | "crop_start": 0, 11 | "crop_stop": 1024, 12 | "hpf_start": -1, 13 | "res_type": "kaiser_fast" 14 | } 15 | }, 16 | "sr": 32000, 17 | "pre_filter_start": 1000, 18 | "pre_filter_stop": 1021 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr33075_hl384.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 1024, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 33075, 8 | "hl": 384, 9 | "n_fft": 2048, 10 | "crop_start": 0, 11 | "crop_stop": 1024, 12 | "hpf_start": -1, 13 | "res_type": "sinc_best" 14 | } 15 | }, 16 | "sr": 33075, 17 | "pre_filter_start": 1000, 18 | "pre_filter_stop": 1021 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr44100_hl1024.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 1024, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 44100, 8 | "hl": 1024, 9 | "n_fft": 2048, 10 | "crop_start": 0, 11 | "crop_stop": 1024, 12 | "hpf_start": -1, 13 | "res_type": "sinc_best" 14 | } 15 | }, 16 | "sr": 44100, 17 | "pre_filter_start": 1023, 18 | "pre_filter_stop": 1024 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr44100_hl256.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 256, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 44100, 8 | "hl": 256, 9 | "n_fft": 512, 10 | "crop_start": 0, 11 | "crop_stop": 256, 12 | "hpf_start": -1, 13 | "res_type": "sinc_best" 14 | } 15 | }, 16 | "sr": 44100, 17 | "pre_filter_start": 256, 18 | "pre_filter_stop": 256 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr44100_hl512.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 1024, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 44100, 8 | "hl": 512, 9 | "n_fft": 2048, 10 | "crop_start": 0, 11 | "crop_stop": 1024, 12 | "hpf_start": -1, 13 | "res_type": "sinc_best" 14 | } 15 | }, 16 | "sr": 44100, 17 | "pre_filter_start": 1023, 18 | "pre_filter_stop": 1024 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr44100_hl512_cut.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 1024, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 44100, 8 | "hl": 512, 9 | "n_fft": 2048, 10 | "crop_start": 0, 11 | "crop_stop": 700, 12 | "hpf_start": -1, 13 | "res_type": "sinc_best" 14 | } 15 | }, 16 | "sr": 44100, 17 | "pre_filter_start": 1023, 18 | "pre_filter_stop": 700 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/1band_sr44100_hl512_nf1024.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 512, 3 | "unstable_bins": 0, 4 | "reduction_bins": 0, 5 | "band": { 6 | "1": { 7 | "sr": 44100, 8 | "hl": 512, 9 | "n_fft": 1024, 10 | "crop_start": 0, 11 | "crop_stop": 512, 12 | "hpf_start": -1, 13 | "res_type": "sinc_best" 14 | } 15 | }, 16 | "sr": 44100, 17 | "pre_filter_start": 511, 18 | "pre_filter_stop": 512 19 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/2band_32000.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 768, 3 | "unstable_bins": 7, 4 | "reduction_bins": 705, 5 | "band": { 6 | "1": { 7 | "sr": 6000, 8 | "hl": 66, 9 | "n_fft": 512, 10 | "crop_start": 0, 11 | "crop_stop": 240, 12 | "lpf_start": 60, 13 | "lpf_stop": 118, 14 | "res_type": "sinc_fastest" 15 | }, 16 | "2": { 17 | "sr": 32000, 18 | "hl": 352, 19 | "n_fft": 1024, 20 | "crop_start": 22, 21 | "crop_stop": 505, 22 | "hpf_start": 44, 23 | "hpf_stop": 23, 24 | "res_type": "sinc_medium" 25 | } 26 | }, 27 | "sr": 32000, 28 | "pre_filter_start": 710, 29 | "pre_filter_stop": 731 30 | } 31 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/2band_44100_lofi.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 512, 3 | "unstable_bins": 7, 4 | "reduction_bins": 510, 5 | "band": { 6 | "1": { 7 | "sr": 11025, 8 | "hl": 160, 9 | "n_fft": 768, 10 | "crop_start": 0, 11 | "crop_stop": 192, 12 | "lpf_start": 41, 13 | "lpf_stop": 139, 14 | "res_type": "sinc_fastest" 15 | }, 16 | "2": { 17 | "sr": 44100, 18 | "hl": 640, 19 | "n_fft": 1024, 20 | "crop_start": 10, 21 | "crop_stop": 320, 22 | "hpf_start": 47, 23 | "hpf_stop": 15, 24 | "res_type": "sinc_medium" 25 | } 26 | }, 27 | "sr": 44100, 28 | "pre_filter_start": 510, 29 | "pre_filter_stop": 512 30 | } 31 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/2band_48000.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 768, 3 | "unstable_bins": 7, 4 | "reduction_bins": 705, 5 | "band": { 6 | "1": { 7 | "sr": 6000, 8 | "hl": 66, 9 | "n_fft": 512, 10 | "crop_start": 0, 11 | "crop_stop": 240, 12 | "lpf_start": 60, 13 | "lpf_stop": 240, 14 | "res_type": "sinc_fastest" 15 | }, 16 | "2": { 17 | "sr": 48000, 18 | "hl": 528, 19 | "n_fft": 1536, 20 | "crop_start": 22, 21 | "crop_stop": 505, 22 | "hpf_start": 82, 23 | "hpf_stop": 22, 24 | "res_type": "sinc_medium" 25 | } 26 | }, 27 | "sr": 48000, 28 | "pre_filter_start": 710, 29 | "pre_filter_stop": 731 30 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/3band_44100.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 768, 3 | "unstable_bins": 5, 4 | "reduction_bins": 733, 5 | "band": { 6 | "1": { 7 | "sr": 11025, 8 | "hl": 128, 9 | "n_fft": 768, 10 | "crop_start": 0, 11 | "crop_stop": 278, 12 | "lpf_start": 28, 13 | "lpf_stop": 140, 14 | "res_type": "polyphase" 15 | }, 16 | "2": { 17 | "sr": 22050, 18 | "hl": 256, 19 | "n_fft": 768, 20 | "crop_start": 14, 21 | "crop_stop": 322, 22 | "hpf_start": 70, 23 | "hpf_stop": 14, 24 | "lpf_start": 283, 25 | "lpf_stop": 314, 26 | "res_type": "polyphase" 27 | }, 28 | "3": { 29 | "sr": 44100, 30 | "hl": 512, 31 | "n_fft": 768, 32 | "crop_start": 131, 33 | "crop_stop": 313, 34 | "hpf_start": 154, 35 | "hpf_stop": 141, 36 | "res_type": "sinc_medium" 37 | } 38 | }, 39 | "sr": 44100, 40 | "pre_filter_start": 757, 41 | "pre_filter_stop": 768 42 | } 43 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/3band_44100_mid.json: -------------------------------------------------------------------------------- 1 | { 2 | "mid_side": true, 3 | "bins": 768, 4 | "unstable_bins": 5, 5 | "reduction_bins": 733, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 128, 10 | "n_fft": 768, 11 | "crop_start": 0, 12 | "crop_stop": 278, 13 | "lpf_start": 28, 14 | "lpf_stop": 140, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 22050, 19 | "hl": 256, 20 | "n_fft": 768, 21 | "crop_start": 14, 22 | "crop_stop": 322, 23 | "hpf_start": 70, 24 | "hpf_stop": 14, 25 | "lpf_start": 283, 26 | "lpf_stop": 314, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 44100, 31 | "hl": 512, 32 | "n_fft": 768, 33 | "crop_start": 131, 34 | "crop_stop": 313, 35 | "hpf_start": 154, 36 | "hpf_stop": 141, 37 | "res_type": "sinc_medium" 38 | } 39 | }, 40 | "sr": 44100, 41 | "pre_filter_start": 757, 42 | "pre_filter_stop": 768 43 | } 44 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/3band_44100_msb2.json: -------------------------------------------------------------------------------- 1 | { 2 | "mid_side_b2": true, 3 | "bins": 640, 4 | "unstable_bins": 7, 5 | "reduction_bins": 565, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 108, 10 | "n_fft": 1024, 11 | "crop_start": 0, 12 | "crop_stop": 187, 13 | "lpf_start": 92, 14 | "lpf_stop": 186, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 22050, 19 | "hl": 216, 20 | "n_fft": 768, 21 | "crop_start": 0, 22 | "crop_stop": 212, 23 | "hpf_start": 68, 24 | "hpf_stop": 34, 25 | "lpf_start": 174, 26 | "lpf_stop": 209, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 44100, 31 | "hl": 432, 32 | "n_fft": 640, 33 | "crop_start": 66, 34 | "crop_stop": 307, 35 | "hpf_start": 86, 36 | "hpf_stop": 72, 37 | "res_type": "kaiser_fast" 38 | } 39 | }, 40 | "sr": 44100, 41 | "pre_filter_start": 639, 42 | "pre_filter_stop": 640 43 | } 44 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_44100.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 768, 3 | "unstable_bins": 7, 4 | "reduction_bins": 668, 5 | "band": { 6 | "1": { 7 | "sr": 11025, 8 | "hl": 128, 9 | "n_fft": 1024, 10 | "crop_start": 0, 11 | "crop_stop": 186, 12 | "lpf_start": 37, 13 | "lpf_stop": 73, 14 | "res_type": "polyphase" 15 | }, 16 | "2": { 17 | "sr": 11025, 18 | "hl": 128, 19 | "n_fft": 512, 20 | "crop_start": 4, 21 | "crop_stop": 185, 22 | "hpf_start": 36, 23 | "hpf_stop": 18, 24 | "lpf_start": 93, 25 | "lpf_stop": 185, 26 | "res_type": "polyphase" 27 | }, 28 | "3": { 29 | "sr": 22050, 30 | "hl": 256, 31 | "n_fft": 512, 32 | "crop_start": 46, 33 | "crop_stop": 186, 34 | "hpf_start": 93, 35 | "hpf_stop": 46, 36 | "lpf_start": 164, 37 | "lpf_stop": 186, 38 | "res_type": "polyphase" 39 | }, 40 | "4": { 41 | "sr": 44100, 42 | "hl": 512, 43 | "n_fft": 768, 44 | "crop_start": 121, 45 | "crop_stop": 382, 46 | "hpf_start": 138, 47 | "hpf_stop": 123, 48 | "res_type": "sinc_medium" 49 | } 50 | }, 51 | "sr": 44100, 52 | "pre_filter_start": 740, 53 | "pre_filter_stop": 768 54 | } 55 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_44100_mid.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 768, 3 | "unstable_bins": 7, 4 | "mid_side": true, 5 | "reduction_bins": 668, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 128, 10 | "n_fft": 1024, 11 | "crop_start": 0, 12 | "crop_stop": 186, 13 | "lpf_start": 37, 14 | "lpf_stop": 73, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 11025, 19 | "hl": 128, 20 | "n_fft": 512, 21 | "crop_start": 4, 22 | "crop_stop": 185, 23 | "hpf_start": 36, 24 | "hpf_stop": 18, 25 | "lpf_start": 93, 26 | "lpf_stop": 185, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 22050, 31 | "hl": 256, 32 | "n_fft": 512, 33 | "crop_start": 46, 34 | "crop_stop": 186, 35 | "hpf_start": 93, 36 | "hpf_stop": 46, 37 | "lpf_start": 164, 38 | "lpf_stop": 186, 39 | "res_type": "polyphase" 40 | }, 41 | "4": { 42 | "sr": 44100, 43 | "hl": 512, 44 | "n_fft": 768, 45 | "crop_start": 121, 46 | "crop_stop": 382, 47 | "hpf_start": 138, 48 | "hpf_stop": 123, 49 | "res_type": "sinc_medium" 50 | } 51 | }, 52 | "sr": 44100, 53 | "pre_filter_start": 740, 54 | "pre_filter_stop": 768 55 | } 56 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_44100_msb.json: -------------------------------------------------------------------------------- 1 | { 2 | "mid_side_b": true, 3 | "bins": 768, 4 | "unstable_bins": 7, 5 | "reduction_bins": 668, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 128, 10 | "n_fft": 1024, 11 | "crop_start": 0, 12 | "crop_stop": 186, 13 | "lpf_start": 37, 14 | "lpf_stop": 73, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 11025, 19 | "hl": 128, 20 | "n_fft": 512, 21 | "crop_start": 4, 22 | "crop_stop": 185, 23 | "hpf_start": 36, 24 | "hpf_stop": 18, 25 | "lpf_start": 93, 26 | "lpf_stop": 185, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 22050, 31 | "hl": 256, 32 | "n_fft": 512, 33 | "crop_start": 46, 34 | "crop_stop": 186, 35 | "hpf_start": 93, 36 | "hpf_stop": 46, 37 | "lpf_start": 164, 38 | "lpf_stop": 186, 39 | "res_type": "polyphase" 40 | }, 41 | "4": { 42 | "sr": 44100, 43 | "hl": 512, 44 | "n_fft": 768, 45 | "crop_start": 121, 46 | "crop_stop": 382, 47 | "hpf_start": 138, 48 | "hpf_stop": 123, 49 | "res_type": "sinc_medium" 50 | } 51 | }, 52 | "sr": 44100, 53 | "pre_filter_start": 740, 54 | "pre_filter_stop": 768 55 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_44100_msb2.json: -------------------------------------------------------------------------------- 1 | { 2 | "mid_side_b": true, 3 | "bins": 768, 4 | "unstable_bins": 7, 5 | "reduction_bins": 668, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 128, 10 | "n_fft": 1024, 11 | "crop_start": 0, 12 | "crop_stop": 186, 13 | "lpf_start": 37, 14 | "lpf_stop": 73, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 11025, 19 | "hl": 128, 20 | "n_fft": 512, 21 | "crop_start": 4, 22 | "crop_stop": 185, 23 | "hpf_start": 36, 24 | "hpf_stop": 18, 25 | "lpf_start": 93, 26 | "lpf_stop": 185, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 22050, 31 | "hl": 256, 32 | "n_fft": 512, 33 | "crop_start": 46, 34 | "crop_stop": 186, 35 | "hpf_start": 93, 36 | "hpf_stop": 46, 37 | "lpf_start": 164, 38 | "lpf_stop": 186, 39 | "res_type": "polyphase" 40 | }, 41 | "4": { 42 | "sr": 44100, 43 | "hl": 512, 44 | "n_fft": 768, 45 | "crop_start": 121, 46 | "crop_stop": 382, 47 | "hpf_start": 138, 48 | "hpf_stop": 123, 49 | "res_type": "sinc_medium" 50 | } 51 | }, 52 | "sr": 44100, 53 | "pre_filter_start": 740, 54 | "pre_filter_stop": 768 55 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_44100_reverse.json: -------------------------------------------------------------------------------- 1 | { 2 | "reverse": true, 3 | "bins": 768, 4 | "unstable_bins": 7, 5 | "reduction_bins": 668, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 128, 10 | "n_fft": 1024, 11 | "crop_start": 0, 12 | "crop_stop": 186, 13 | "lpf_start": 37, 14 | "lpf_stop": 73, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 11025, 19 | "hl": 128, 20 | "n_fft": 512, 21 | "crop_start": 4, 22 | "crop_stop": 185, 23 | "hpf_start": 36, 24 | "hpf_stop": 18, 25 | "lpf_start": 93, 26 | "lpf_stop": 185, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 22050, 31 | "hl": 256, 32 | "n_fft": 512, 33 | "crop_start": 46, 34 | "crop_stop": 186, 35 | "hpf_start": 93, 36 | "hpf_stop": 46, 37 | "lpf_start": 164, 38 | "lpf_stop": 186, 39 | "res_type": "polyphase" 40 | }, 41 | "4": { 42 | "sr": 44100, 43 | "hl": 512, 44 | "n_fft": 768, 45 | "crop_start": 121, 46 | "crop_stop": 382, 47 | "hpf_start": 138, 48 | "hpf_stop": 123, 49 | "res_type": "sinc_medium" 50 | } 51 | }, 52 | "sr": 44100, 53 | "pre_filter_start": 740, 54 | "pre_filter_stop": 768 55 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_44100_sw.json: -------------------------------------------------------------------------------- 1 | { 2 | "stereo_w": true, 3 | "bins": 768, 4 | "unstable_bins": 7, 5 | "reduction_bins": 668, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 128, 10 | "n_fft": 1024, 11 | "crop_start": 0, 12 | "crop_stop": 186, 13 | "lpf_start": 37, 14 | "lpf_stop": 73, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 11025, 19 | "hl": 128, 20 | "n_fft": 512, 21 | "crop_start": 4, 22 | "crop_stop": 185, 23 | "hpf_start": 36, 24 | "hpf_stop": 18, 25 | "lpf_start": 93, 26 | "lpf_stop": 185, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 22050, 31 | "hl": 256, 32 | "n_fft": 512, 33 | "crop_start": 46, 34 | "crop_stop": 186, 35 | "hpf_start": 93, 36 | "hpf_stop": 46, 37 | "lpf_start": 164, 38 | "lpf_stop": 186, 39 | "res_type": "polyphase" 40 | }, 41 | "4": { 42 | "sr": 44100, 43 | "hl": 512, 44 | "n_fft": 768, 45 | "crop_start": 121, 46 | "crop_stop": 382, 47 | "hpf_start": 138, 48 | "hpf_stop": 123, 49 | "res_type": "sinc_medium" 50 | } 51 | }, 52 | "sr": 44100, 53 | "pre_filter_start": 740, 54 | "pre_filter_stop": 768 55 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 672, 3 | "unstable_bins": 8, 4 | "reduction_bins": 637, 5 | "band": { 6 | "1": { 7 | "sr": 7350, 8 | "hl": 80, 9 | "n_fft": 640, 10 | "crop_start": 0, 11 | "crop_stop": 85, 12 | "lpf_start": 25, 13 | "lpf_stop": 53, 14 | "res_type": "polyphase" 15 | }, 16 | "2": { 17 | "sr": 7350, 18 | "hl": 80, 19 | "n_fft": 320, 20 | "crop_start": 4, 21 | "crop_stop": 87, 22 | "hpf_start": 25, 23 | "hpf_stop": 12, 24 | "lpf_start": 31, 25 | "lpf_stop": 62, 26 | "res_type": "polyphase" 27 | }, 28 | "3": { 29 | "sr": 14700, 30 | "hl": 160, 31 | "n_fft": 512, 32 | "crop_start": 17, 33 | "crop_stop": 216, 34 | "hpf_start": 48, 35 | "hpf_stop": 24, 36 | "lpf_start": 139, 37 | "lpf_stop": 210, 38 | "res_type": "polyphase" 39 | }, 40 | "4": { 41 | "sr": 44100, 42 | "hl": 480, 43 | "n_fft": 960, 44 | "crop_start": 78, 45 | "crop_stop": 383, 46 | "hpf_start": 130, 47 | "hpf_stop": 86, 48 | "res_type": "kaiser_fast" 49 | } 50 | }, 51 | "sr": 44100, 52 | "pre_filter_start": 668, 53 | "pre_filter_stop": 672 54 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_v2_sn.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 672, 3 | "unstable_bins": 8, 4 | "reduction_bins": 637, 5 | "band": { 6 | "1": { 7 | "sr": 7350, 8 | "hl": 80, 9 | "n_fft": 640, 10 | "crop_start": 0, 11 | "crop_stop": 85, 12 | "lpf_start": 25, 13 | "lpf_stop": 53, 14 | "res_type": "polyphase" 15 | }, 16 | "2": { 17 | "sr": 7350, 18 | "hl": 80, 19 | "n_fft": 320, 20 | "crop_start": 4, 21 | "crop_stop": 87, 22 | "hpf_start": 25, 23 | "hpf_stop": 12, 24 | "lpf_start": 31, 25 | "lpf_stop": 62, 26 | "res_type": "polyphase" 27 | }, 28 | "3": { 29 | "sr": 14700, 30 | "hl": 160, 31 | "n_fft": 512, 32 | "crop_start": 17, 33 | "crop_stop": 216, 34 | "hpf_start": 48, 35 | "hpf_stop": 24, 36 | "lpf_start": 139, 37 | "lpf_stop": 210, 38 | "res_type": "polyphase" 39 | }, 40 | "4": { 41 | "sr": 44100, 42 | "hl": 480, 43 | "n_fft": 960, 44 | "crop_start": 78, 45 | "crop_stop": 383, 46 | "hpf_start": 130, 47 | "hpf_stop": 86, 48 | "convert_channels": "stereo_n", 49 | "res_type": "kaiser_fast" 50 | } 51 | }, 52 | "sr": 44100, 53 | "pre_filter_start": 668, 54 | "pre_filter_stop": 672 55 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_v3.json: -------------------------------------------------------------------------------- 1 | { 2 | "bins": 672, 3 | "unstable_bins": 8, 4 | "reduction_bins": 530, 5 | "band": { 6 | "1": { 7 | "sr": 7350, 8 | "hl": 80, 9 | "n_fft": 640, 10 | "crop_start": 0, 11 | "crop_stop": 85, 12 | "lpf_start": 25, 13 | "lpf_stop": 53, 14 | "res_type": "polyphase" 15 | }, 16 | "2": { 17 | "sr": 7350, 18 | "hl": 80, 19 | "n_fft": 320, 20 | "crop_start": 4, 21 | "crop_stop": 87, 22 | "hpf_start": 25, 23 | "hpf_stop": 12, 24 | "lpf_start": 31, 25 | "lpf_stop": 62, 26 | "res_type": "polyphase" 27 | }, 28 | "3": { 29 | "sr": 14700, 30 | "hl": 160, 31 | "n_fft": 512, 32 | "crop_start": 17, 33 | "crop_stop": 216, 34 | "hpf_start": 48, 35 | "hpf_stop": 24, 36 | "lpf_start": 139, 37 | "lpf_stop": 210, 38 | "res_type": "polyphase" 39 | }, 40 | "4": { 41 | "sr": 44100, 42 | "hl": 480, 43 | "n_fft": 960, 44 | "crop_start": 78, 45 | "crop_stop": 383, 46 | "hpf_start": 130, 47 | "hpf_stop": 86, 48 | "res_type": "kaiser_fast" 49 | } 50 | }, 51 | "sr": 44100, 52 | "pre_filter_start": 668, 53 | "pre_filter_stop": 672 54 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/4band_v3_sn.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_bins": 672, 3 | "unstable_bins": 8, 4 | "stable_bins": 530, 5 | "band": { 6 | "1": { 7 | "sr": 7350, 8 | "hl": 80, 9 | "n_fft": 640, 10 | "crop_start": 0, 11 | "crop_stop": 85, 12 | "lpf_start": 25, 13 | "lpf_stop": 53, 14 | "res_type": "polyphase" 15 | }, 16 | "2": { 17 | "sr": 7350, 18 | "hl": 80, 19 | "n_fft": 320, 20 | "crop_start": 4, 21 | "crop_stop": 87, 22 | "hpf_start": 25, 23 | "hpf_stop": 12, 24 | "lpf_start": 31, 25 | "lpf_stop": 62, 26 | "res_type": "polyphase" 27 | }, 28 | "3": { 29 | "sr": 14700, 30 | "hl": 160, 31 | "n_fft": 512, 32 | "crop_start": 17, 33 | "crop_stop": 216, 34 | "hpf_start": 48, 35 | "hpf_stop": 24, 36 | "lpf_start": 139, 37 | "lpf_stop": 210, 38 | "res_type": "polyphase" 39 | }, 40 | "4": { 41 | "sr": 44100, 42 | "hl": 480, 43 | "n_fft": 960, 44 | "crop_start": 78, 45 | "crop_stop": 383, 46 | "hpf_start": 130, 47 | "hpf_stop": 86, 48 | "convert_channels": "stereo_n", 49 | "res_type": "kaiser_fast" 50 | } 51 | }, 52 | "sr": 44100, 53 | "pre_filter_start": 668, 54 | "pre_filter_stop": 672 55 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "mid_side_b2": true, 3 | "bins": 1280, 4 | "unstable_bins": 7, 5 | "reduction_bins": 565, 6 | "band": { 7 | "1": { 8 | "sr": 11025, 9 | "hl": 108, 10 | "n_fft": 2048, 11 | "crop_start": 0, 12 | "crop_stop": 374, 13 | "lpf_start": 92, 14 | "lpf_stop": 186, 15 | "res_type": "polyphase" 16 | }, 17 | "2": { 18 | "sr": 22050, 19 | "hl": 216, 20 | "n_fft": 1536, 21 | "crop_start": 0, 22 | "crop_stop": 424, 23 | "hpf_start": 68, 24 | "hpf_stop": 34, 25 | "lpf_start": 348, 26 | "lpf_stop": 418, 27 | "res_type": "polyphase" 28 | }, 29 | "3": { 30 | "sr": 44100, 31 | "hl": 432, 32 | "n_fft": 1280, 33 | "crop_start": 132, 34 | "crop_stop": 614, 35 | "hpf_start": 172, 36 | "hpf_stop": 144, 37 | "res_type": "polyphase" 38 | } 39 | }, 40 | "sr": 44100, 41 | "pre_filter_start": 1280, 42 | "pre_filter_stop": 1280 43 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/modelparams/model_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "0d0e6d143046b0eecc41a22e60224582": { 3 | "vr_model_param": "3band_44100_mid", 4 | "primary_stem": "Instrumental" 5 | }, 6 | "18b52f873021a0af556fb4ecd552bb8e": { 7 | "vr_model_param": "2band_32000", 8 | "primary_stem": "Instrumental" 9 | }, 10 | "1fc66027c82b499c7d8f55f79e64cadc": { 11 | "vr_model_param": "2band_32000", 12 | "primary_stem": "Instrumental" 13 | }, 14 | "2aa34fbc01f8e6d2bf509726481e7142": { 15 | "vr_model_param": "4band_44100", 16 | "primary_stem": "No Piano" 17 | }, 18 | "3e18f639b11abea7361db1a4a91c2559": { 19 | "vr_model_param": "4band_44100", 20 | "primary_stem": "Instrumental" 21 | }, 22 | "570b5f50054609a17741369a35007ddd": { 23 | "vr_model_param": "4band_v3", 24 | "primary_stem": "Instrumental" 25 | }, 26 | "5a6e24c1b530f2dab045a522ef89b751": { 27 | "vr_model_param": "1band_sr44100_hl512", 28 | "primary_stem": "Instrumental" 29 | }, 30 | "6b5916069a49be3fe29d4397ecfd73fa": { 31 | "vr_model_param": "3band_44100_msb2", 32 | "primary_stem": "Instrumental", 33 | "is_karaoke": true 34 | }, 35 | "74b3bc5fa2b69f29baf7839b858bc679": { 36 | "vr_model_param": "4band_44100", 37 | "primary_stem": "Instrumental" 38 | }, 39 | "827213b316df36b52a1f3d04fec89369": { 40 | "vr_model_param": "4band_44100", 41 | "primary_stem": "Instrumental" 42 | }, 43 | "911d4048eee7223eca4ee0efb7d29256": { 44 | "vr_model_param": "4band_44100", 45 | "primary_stem": "Vocals" 46 | }, 47 | "941f3f7f0b0341f12087aacdfef644b1": { 48 | "vr_model_param": "4band_v2", 49 | "primary_stem": "Instrumental" 50 | }, 51 | "a02827cf69d75781a35c0e8a327f3195": { 52 | "vr_model_param": "1band_sr33075_hl384", 53 | "primary_stem": "Instrumental" 54 | }, 55 | "b165fbff113c959dba5303b74c6484bc": { 56 | "vr_model_param": "3band_44100", 57 | "primary_stem": "Instrumental" 58 | }, 59 | "b5f988cd3e891dca7253bf5f0f3427c7": { 60 | "vr_model_param": "4band_44100", 61 | "primary_stem": "Instrumental" 62 | }, 63 | "b99c35723bc35cb11ed14a4780006a80": { 64 | "vr_model_param": "1band_sr44100_hl1024", 65 | "primary_stem": "Instrumental" 66 | }, 67 | "ba02fd25b71d620eebbdb49e18e4c336": { 68 | "vr_model_param": "3band_44100_mid", 69 | "primary_stem": "Instrumental" 70 | }, 71 | "c4476ef424d8cba65f38d8d04e8514e2": { 72 | "vr_model_param": "3band_44100_msb2", 73 | "primary_stem": "Instrumental" 74 | }, 75 | "da2d37b8be2972e550a409bae08335aa": { 76 | "vr_model_param": "4band_44100", 77 | "primary_stem": "Vocals" 78 | }, 79 | "db57205d3133e39df8e050b435a78c80": { 80 | "vr_model_param": "4band_44100", 81 | "primary_stem": "Instrumental" 82 | }, 83 | "ea83b08e32ec2303456fe50659035f69": { 84 | "vr_model_param": "4band_v3", 85 | "primary_stem": "Instrumental" 86 | }, 87 | "f6ea8473ff86017b5ebd586ccacf156b": { 88 | "vr_model_param": "4band_v2_sn", 89 | "primary_stem": "Instrumental", 90 | "is_karaoke": true 91 | }, 92 | "fd297a61eafc9d829033f8b987c39a3d": { 93 | "vr_model_param": "1band_sr32000_hl512", 94 | "primary_stem": "Instrumental" 95 | }, 96 | "0ec76fd9e65f81d8b4fbd13af4826ed8": { 97 | "vr_model_param": "4band_v3", 98 | "primary_stem": "No Woodwinds" 99 | }, 100 | "0fb9249ffe4ffc38d7b16243f394c0ff": { 101 | "vr_model_param": "4band_v3", 102 | "primary_stem": "No Reverb" 103 | }, 104 | "6857b2972e1754913aad0c9a1678c753": { 105 | "vr_model_param": "4band_v3", 106 | "primary_stem": "No Echo", 107 | "nout": 48, 108 | "nout_lstm": 128 109 | }, 110 | "f200a145434efc7dcf0cd093f517ed52": { 111 | "vr_model_param": "4band_v3", 112 | "primary_stem": "No Echo", 113 | "nout": 48, 114 | "nout_lstm": 128 115 | }, 116 | "44c55d8b5d2e3edea98c2b2bf93071c7": { 117 | "vr_model_param": "4band_v3", 118 | "primary_stem": "Noise", 119 | "nout": 48, 120 | "nout_lstm": 128 121 | }, 122 | "51ea8c43a6928ed3c10ef5cb2707d57b": { 123 | "vr_model_param": "1band_sr44100_hl1024", 124 | "primary_stem": "Noise", 125 | "nout": 16, 126 | "nout_lstm": 128 127 | }, 128 | "944950a9c5963a5eb70b445d67b7068a": { 129 | "vr_model_param": "4band_v3_sn", 130 | "primary_stem": "Vocals", 131 | "nout": 64, 132 | "nout_lstm": 128, 133 | "is_karaoke": false, 134 | "is_bv_model": true, 135 | "is_bv_model_rebalanced": 0.9 136 | } 137 | } -------------------------------------------------------------------------------- /src/models_dir/vr_network/nets_new.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from . import layers_new as layers 5 | 6 | class BaseNet(nn.Module): 7 | 8 | def __init__(self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))): 9 | super(BaseNet, self).__init__() 10 | self.enc1 = layers.Conv2DBNActiv(nin, nout, 3, 1, 1) 11 | self.enc2 = layers.Encoder(nout, nout * 2, 3, 2, 1) 12 | self.enc3 = layers.Encoder(nout * 2, nout * 4, 3, 2, 1) 13 | self.enc4 = layers.Encoder(nout * 4, nout * 6, 3, 2, 1) 14 | self.enc5 = layers.Encoder(nout * 6, nout * 8, 3, 2, 1) 15 | 16 | self.aspp = layers.ASPPModule(nout * 8, nout * 8, dilations, dropout=True) 17 | 18 | self.dec4 = layers.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1) 19 | self.dec3 = layers.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1) 20 | self.dec2 = layers.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1) 21 | self.lstm_dec2 = layers.LSTMModule(nout * 2, nin_lstm, nout_lstm) 22 | self.dec1 = layers.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1) 23 | 24 | def __call__(self, x): 25 | e1 = self.enc1(x) 26 | e2 = self.enc2(e1) 27 | e3 = self.enc3(e2) 28 | e4 = self.enc4(e3) 29 | e5 = self.enc5(e4) 30 | 31 | h = self.aspp(e5) 32 | 33 | h = self.dec4(h, e4) 34 | h = self.dec3(h, e3) 35 | h = self.dec2(h, e2) 36 | h = torch.cat([h, self.lstm_dec2(h)], dim=1) 37 | h = self.dec1(h, e1) 38 | 39 | return h 40 | 41 | class CascadedNet(nn.Module): 42 | 43 | def __init__(self, n_fft, nn_arch_size=51000, nout=32, nout_lstm=128): 44 | super(CascadedNet, self).__init__() 45 | self.max_bin = n_fft // 2 46 | self.output_bin = n_fft // 2 + 1 47 | self.nin_lstm = self.max_bin // 2 48 | self.offset = 64 49 | nout = 64 if nn_arch_size == 218409 else nout 50 | 51 | #print(nout, nout_lstm, n_fft) 52 | 53 | self.stg1_low_band_net = nn.Sequential( 54 | BaseNet(2, nout // 2, self.nin_lstm // 2, nout_lstm), 55 | layers.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0) 56 | ) 57 | self.stg1_high_band_net = BaseNet(2, nout // 4, self.nin_lstm // 2, nout_lstm // 2) 58 | 59 | self.stg2_low_band_net = nn.Sequential( 60 | BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm), 61 | layers.Conv2DBNActiv(nout, nout // 2, 1, 1, 0) 62 | ) 63 | self.stg2_high_band_net = BaseNet(nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2) 64 | 65 | self.stg3_full_band_net = BaseNet(3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm) 66 | 67 | self.out = nn.Conv2d(nout, 2, 1, bias=False) 68 | self.aux_out = nn.Conv2d(3 * nout // 4, 2, 1, bias=False) 69 | 70 | def forward(self, x): 71 | x = x[:, :, :self.max_bin] 72 | 73 | bandw = x.size()[2] // 2 74 | l1_in = x[:, :, :bandw] 75 | h1_in = x[:, :, bandw:] 76 | l1 = self.stg1_low_band_net(l1_in) 77 | h1 = self.stg1_high_band_net(h1_in) 78 | aux1 = torch.cat([l1, h1], dim=2) 79 | 80 | l2_in = torch.cat([l1_in, l1], dim=1) 81 | h2_in = torch.cat([h1_in, h1], dim=1) 82 | l2 = self.stg2_low_band_net(l2_in) 83 | h2 = self.stg2_high_band_net(h2_in) 84 | aux2 = torch.cat([l2, h2], dim=2) 85 | 86 | f3_in = torch.cat([x, aux1, aux2], dim=1) 87 | f3 = self.stg3_full_band_net(f3_in) 88 | 89 | mask = torch.sigmoid(self.out(f3)) 90 | mask = F.pad( 91 | input=mask, 92 | pad=(0, 0, 0, self.output_bin - mask.size()[2]), 93 | mode='replicate' 94 | ) 95 | 96 | if self.training: 97 | aux = torch.cat([aux1, aux2], dim=1) 98 | aux = torch.sigmoid(self.aux_out(aux)) 99 | aux = F.pad( 100 | input=aux, 101 | pad=(0, 0, 0, self.output_bin - aux.size()[2]), 102 | mode='replicate' 103 | ) 104 | return mask, aux 105 | else: 106 | return mask 107 | 108 | def predict_mask(self, x): 109 | mask = self.forward(x) 110 | 111 | if self.offset > 0: 112 | mask = mask[:, :, :, self.offset:-self.offset] 113 | assert mask.size()[3] > 0 114 | 115 | return mask 116 | 117 | def predict(self, x): 118 | mask = self.forward(x) 119 | pred_mag = x * mask 120 | 121 | if self.offset > 0: 122 | pred_mag = pred_mag[:, :, :, self.offset:-self.offset] 123 | assert pred_mag.size()[3] > 0 124 | 125 | return pred_mag 126 | -------------------------------------------------------------------------------- /src/models_dir/vr_network/pyrb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | import six 5 | import numpy as np 6 | import soundfile as sf 7 | import sys 8 | 9 | if getattr(sys, 'frozen', False): 10 | BASE_PATH_RUB = sys._MEIPASS 11 | else: 12 | BASE_PATH_RUB = os.path.dirname(os.path.abspath(__file__)) 13 | 14 | __all__ = ['time_stretch', 'pitch_shift'] 15 | 16 | __RUBBERBAND_UTIL = os.path.join(BASE_PATH_RUB, 'rubberband') 17 | 18 | if six.PY2: 19 | DEVNULL = open(os.devnull, 'w') 20 | else: 21 | DEVNULL = subprocess.DEVNULL 22 | 23 | def __rubberband(y, sr, **kwargs): 24 | 25 | assert sr > 0 26 | 27 | # Get the input and output tempfile 28 | fd, infile = tempfile.mkstemp(suffix='.wav') 29 | os.close(fd) 30 | fd, outfile = tempfile.mkstemp(suffix='.wav') 31 | os.close(fd) 32 | 33 | # dump the audio 34 | sf.write(infile, y, sr) 35 | 36 | try: 37 | # Execute rubberband 38 | arguments = [__RUBBERBAND_UTIL, '-q'] 39 | 40 | for key, value in six.iteritems(kwargs): 41 | arguments.append(str(key)) 42 | arguments.append(str(value)) 43 | 44 | arguments.extend([infile, outfile]) 45 | 46 | subprocess.check_call(arguments, stdout=DEVNULL, stderr=DEVNULL) 47 | 48 | # Load the processed audio. 49 | y_out, _ = sf.read(outfile, always_2d=True) 50 | 51 | # make sure that output dimensions matches input 52 | if y.ndim == 1: 53 | y_out = np.squeeze(y_out) 54 | 55 | except OSError as exc: 56 | six.raise_from(RuntimeError('Failed to execute rubberband. ' 57 | 'Please verify that rubberband-cli ' 58 | 'is installed.'), 59 | exc) 60 | 61 | finally: 62 | # Remove temp files 63 | os.unlink(infile) 64 | os.unlink(outfile) 65 | 66 | return y_out 67 | 68 | def time_stretch(y, sr, rate, rbargs=None): 69 | if rate <= 0: 70 | raise ValueError('rate must be strictly positive') 71 | 72 | if rate == 1.0: 73 | return y 74 | 75 | if rbargs is None: 76 | rbargs = dict() 77 | 78 | rbargs.setdefault('--tempo', rate) 79 | 80 | return __rubberband(y, sr, **rbargs) 81 | 82 | def pitch_shift(y, sr, n_steps, rbargs=None): 83 | 84 | if n_steps == 0: 85 | return y 86 | 87 | if rbargs is None: 88 | rbargs = dict() 89 | 90 | rbargs.setdefault('--pitch', n_steps) 91 | 92 | return __rubberband(y, sr, **rbargs) 93 | -------------------------------------------------------------------------------- /src/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/pipelines.py -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/utils/__init__.py -------------------------------------------------------------------------------- /src/utils/fastio.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import soundfile as sf 3 | import librosa 4 | import audiofile as af 5 | import types 6 | from typing import Union, List, Tuple 7 | import numpy.typing as npt 8 | 9 | def read(path:str, insure_2d:bool=True, target_sampling_rate=None, logger=None)->Tuple[npt.NDArray, int]: 10 | """Read audio file first try with audiofile then with soundfile and last with librosa 11 | 12 | Args: 13 | path (str): path to the audio file 14 | insure_2d (bool, optional): insure that the audio data is 2D. 15 | If audio is dosen't have 2 channels it will be converted to 2D by repeating the channel. 16 | Defaults to True. 17 | logger (logging.Logger, optional): logger. Defaults to None. 18 | 19 | Returns: 20 | tuple: audio data and samplerate 21 | 22 | Raises: 23 | ValueError: Failed to read the audio file with any of the available libraries 24 | 25 | """ 26 | 27 | ext = path.split('.')[-1] 28 | signal, sampling_rate = None, None 29 | 30 | if ext in ['wav', 'flac', 'ogg', 'mp3']: 31 | try: 32 | signal, sampling_rate = af.read(path) 33 | except Exception as e: 34 | if logger: 35 | logger.warning(f"audiofile failed to read {path} with error {e}") 36 | 37 | if signal is None: 38 | try: 39 | signal, sampling_rate = sf.read(path) 40 | except Exception as e: 41 | if logger: 42 | logger.warning(f"soundfile failed to read {path} with error {e}") 43 | 44 | if signal is None: 45 | try: 46 | signal, sampling_rate = librosa.load(path, sr=None, mono=False) 47 | except Exception as e: 48 | if logger: 49 | logger.error(f"librosa failed to read {path} with error {e}") 50 | 51 | if target_sampling_rate is not None: 52 | signal = librosa.resample(signal, sampling_rate, target_sampling_rate) 53 | sampling_rate = target_sampling_rate 54 | 55 | if signal is not None: 56 | signal = insure_2d_signal(signal, insure_2d, logger) 57 | return signal, sampling_rate 58 | 59 | raise ValueError(f"Failed to read {path} with any of the available libraries") 60 | 61 | 62 | def insure_2d_signal(signal:npt.NDArray, insure_2d:bool, logger=None)->npt.NDArray: 63 | """Insure that the audio data is 2D. 64 | If audio is dosen't have 2 channels it will be converted to 2D by repeating the channel. 65 | If audio has more than 2 channels the extra channels will be removed. 66 | 67 | Args: 68 | signal (np.array): audio data 69 | insure_2d (bool): insure that the audio data is 2D. 70 | logger (logging.Logger, optional): logger. Defaults to None. 71 | 72 | Returns: 73 | np.array: 2D audio data 74 | 75 | """ 76 | if insure_2d and signal.ndim == 1: 77 | signal = np.stack([signal, signal]) 78 | if logger: 79 | logger.warning(f"Insured 2D signal for audio data. Original shape was {signal.shape}") 80 | elif insure_2d and signal.ndim > 2: 81 | if logger: 82 | logger.warning(f"Insured 2D signal for audio data. Original shape was {signal.shape}") 83 | signal = signal[:2] 84 | return signal 85 | 86 | 87 | def write(path:str, signal:Union[npt.NDArray, List], sampling_rate:int, ext:str=None, logger=None): 88 | """Write audio file first try with audiofile then with soundfile and last with librosa 89 | 90 | Args: 91 | path (str): path to the audio file 92 | signal (np.array|list): audio data 93 | sampling_rate (int): samplerate 94 | ext (str, optional): file extension ovverides the file extension from the path. Defaults to None. Example: 'wav', 'flac', 'ogg', 'mp3' don't add the dot. 95 | logger (logging.Logger, optional): logger. Defaults to None. 96 | 97 | Raises: 98 | ValueError: Failed to write the audio file with any of the available libraries 99 | """ 100 | if ext is not None: 101 | path = path+'.'+ext 102 | 103 | 104 | if ext in ['wav', 'flac', 'ogg', 'mp3']: 105 | try: 106 | af.write(path, signal, sampling_rate) 107 | return 108 | except Exception as e: 109 | if logger: 110 | logger.warning(f"audiofile failed to write {path} with error {e}") 111 | 112 | try: 113 | sf.write(path, signal.T, sampling_rate) 114 | return 115 | except Exception as e: 116 | if logger: 117 | logger.warning(f"soundfile failed to write {path} with error {e}") 118 | 119 | try: 120 | librosa.output.write_wav(path, signal.T, sampling_rate) 121 | return 122 | except Exception as e: 123 | if logger: 124 | logger.error(f"librosa failed to write {path} with error {e}") 125 | 126 | raise ValueError(f"Failed to write {path} with any of the available libraries") 127 | 128 | -------------------------------------------------------------------------------- /src/utils/get_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import urllib.request 4 | from typing import List 5 | from pathlib import Path 6 | import json 7 | 8 | uvr_path = Path(__file__).parent.parent 9 | 10 | def download_model(model_name:str, model_arch:str, model_path:List[str]=None, logger=None)->str: 11 | """Download model from Hugging Face model hub 12 | 13 | Args: 14 | model_name (str): model name. 15 | model_path (list[str]): model pathS to download the model from. Defaults to None (loads paths from uvr/models_dir/models.json file) 16 | model_arch (str): model architecture. A path in ../models_dir/{model_arch}/weights/{model_name} 17 | If path is not found it will be created. And if the model is already downloaded it will not be downloaded again. 18 | logger (logging.Logger, optional): logger. Defaults to None. 19 | 20 | Returns: 21 | str: path to the downloaded model 22 | """ 23 | if model_path is None: 24 | if logger: 25 | logger.error(f"Model path is not provided for {model_name} auto loading from models.json file") 26 | models_json_path = os.path.join(uvr_path, "models_dir", "models.json") 27 | models = json.load(open(models_json_path, "r")) 28 | model_path = models[model_arch][model_name]["model_path"] 29 | 30 | save_path = os.path.join(uvr_path, "models_dir", model_arch, "weights", model_name) 31 | 32 | if not os.path.exists(save_path): 33 | os.makedirs(save_path) 34 | 35 | files = [path.split("/")[-1] for path in model_path] 36 | if model_exists(model_name=model_name, model_arch=model_arch, files=files): 37 | if logger: 38 | logger.info(f"Model {model_name} is already exists in {save_path}") 39 | return save_path 40 | 41 | try: 42 | # os.system(f"wget {model_path} -P {local_model_path}") 43 | for file_name, path in zip(files, model_path): 44 | local_file_path = os.path.join(save_path, file_name) 45 | urllib.request.urlretrieve(path, local_file_path) 46 | if logger: 47 | logger.info(f"Downloaded {model_name} from {model_path}") 48 | 49 | 50 | return save_path 51 | 52 | except Exception as e: 53 | if logger: 54 | logger.error(f"Failed to download {model_name} from {model_path} with error {e}") 55 | 56 | return None 57 | 58 | def model_exists(model_name:str, model_arch:str, files:List=None)->bool: 59 | """Check if the model exists in ../models_dir/{model_arch}/weights/{model_name} 60 | 61 | Args: 62 | model_name (str): model name. 63 | model_arch (str): model architecture. 64 | files (list[str], optional): list of files to check if they exist. Defaults to None. If not provided it will check if the model directory exists. 65 | 66 | Returns: 67 | bool: True if the model exists, False otherwise 68 | """ 69 | # remove extension from the model name 70 | if len(model_name.split('.')) > 1: 71 | model_name = model_name.split('.')[0] 72 | 73 | save_path = os.path.join(uvr_path, "models_dir", model_arch, "weights", model_name) 74 | if files is not None: 75 | for file in files: 76 | local_model_path = os.path.join(save_path, file) 77 | if not os.path.isfile(local_model_path): 78 | return False 79 | 80 | if os.path.exists(save_path): 81 | return True 82 | return False 83 | 84 | """ 85 | Example of the model json file: 86 | models_json = { 87 | 88 | "demucs":{ 89 | "name1":{ 90 | "model_path":"https://abc/bcd/model.pt", 91 | "other_metadata":1, 92 | }, 93 | } 94 | } 95 | """ 96 | 97 | def download_all_models(models_json:dict=None, logger=None)->dict: 98 | """Download all models from the models_json 99 | 100 | Args: 101 | models_json (dict): dictionary of models to download. Defaults to None (loads paths from uvr/models_dir/models.json file) 102 | logger (logging.Logger, optional): logger. Defaults to None. 103 | 104 | Returns: 105 | dict: dictionary of downloaded models. with the same structure as the input models_json. 106 | architectures -> model_name -> model_path. Also the model_path will be the local path to the downloaded model. 107 | If the model is already downloaded it will not be downloaded again. And if the model failed to download it will be None. 108 | """ 109 | paths = {} 110 | if models_json is None: 111 | if logger: 112 | logger.error(f"Model path is not provided for {model_name} auto loading from models.json file") 113 | models_json_path = os.path.join(uvr_path, "models_dir", "models.json") 114 | models_json = json.load(open(models_json_path, "r")) 115 | 116 | for model_arch, models in models_json.items(): 117 | paths[model_arch] = {} 118 | for model_name, model_data in models.items(): 119 | model_path = model_data["model_path"] 120 | model_path = download_model(model_name=model_name, model_path=model_path, model_arch=model_arch, logger=logger) 121 | paths[model_arch][model_name] = model_path 122 | 123 | return paths 124 | 125 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/tests/__init__.py -------------------------------------------------------------------------------- /tests/models_status.json: -------------------------------------------------------------------------------- 1 | { 2 | "demucs": { 3 | "hdemucs_mmi": { 4 | "audio": { 5 | "download": true, 6 | "load": true, 7 | "run": true, 8 | "time": 28.517844915390015 9 | }, 10 | "wav": { 11 | "download": true, 12 | "load": true, 13 | "run": true, 14 | "time": 19.17659068107605 15 | }, 16 | "mp3": { 17 | "download": true, 18 | "load": true, 19 | "run": true, 20 | "time": 20.161988019943237 21 | }, 22 | "flac": { 23 | "download": true, 24 | "load": true, 25 | "run": true, 26 | "time": 23.07073998451233 27 | } 28 | } 29 | }, 30 | "vr_network": { 31 | "1_HP-UVR": { 32 | "audio": { 33 | "download": true, 34 | "load": true, 35 | "run": true, 36 | "time": 11.637473106384277 37 | }, 38 | "wav": { 39 | "download": true, 40 | "load": true, 41 | "run": true, 42 | "time": 12.053731918334961 43 | }, 44 | "mp3": { 45 | "download": true, 46 | "load": true, 47 | "run": true, 48 | "time": 17.92900800704956 49 | }, 50 | "flac": { 51 | "download": true, 52 | "load": true, 53 | "run": true, 54 | "time": 10.97541093826294 55 | } 56 | } 57 | }, 58 | "mdx": { 59 | "UVR-MDX-NET-Inst_1": { 60 | "audio": { 61 | "download": true, 62 | "load": true, 63 | "run": true, 64 | "time": 6.675442218780518 65 | }, 66 | "wav": { 67 | "download": true, 68 | "load": true, 69 | "run": true, 70 | "time": 4.095139265060425 71 | }, 72 | "mp3": { 73 | "download": true, 74 | "load": true, 75 | "run": true, 76 | "time": 4.160974740982056 77 | }, 78 | "flac": { 79 | "download": true, 80 | "load": true, 81 | "run": true, 82 | "time": 4.121398687362671 83 | } 84 | } 85 | }, 86 | "mdxc": { 87 | "MDX23C-8KFFT-InstVoc_HQ": { 88 | "audio": { 89 | "download": true, 90 | "load": true, 91 | "run": true, 92 | "time": 71.7358889579773 93 | }, 94 | "wav": { 95 | "download": true, 96 | "load": true, 97 | "run": true, 98 | "time": 74.15330004692078 99 | }, 100 | "mp3": { 101 | "download": true, 102 | "load": true, 103 | "run": true, 104 | "time": 76.09534502029419 105 | }, 106 | "flac": { 107 | "download": true, 108 | "load": true, 109 | "run": true, 110 | "time": 72.53368592262268 111 | } 112 | } 113 | } 114 | } -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/tests/utils/__init__.py -------------------------------------------------------------------------------- /tests/utils/test_fastio.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /tests/utils/test_get_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from ...src.utils import get_models 4 | 5 | def is_samepath(path1, path2): 6 | return os.path.abspath(path1) == os.path.abspath(path2) 7 | 8 | def rm_models_dir(model_arch): 9 | current_path = os.getcwd() 10 | rm_path = os.path.join(current_path, "src", "models_dir", model_arch) 11 | rm_path = os.path.abspath(rm_path) 12 | # print("rm_path", rm_path) 13 | # os.remove(rm_path) 14 | shutil.rmtree(rm_path) 15 | 16 | def test_model_dont_exists(): 17 | model_name = "model_name" 18 | model_arch = "model_arch" 19 | assert get_models.model_exists(model_name=model_name, model_arch=model_arch) == False 20 | 21 | def test_model_exists(): 22 | model_name = "model_name" 23 | model_arch = "model_arch" 24 | files = ["model_name.txt"] 25 | current_path = os.getcwd() 26 | save_path = os.path.join(current_path, "src", "models_dir", model_arch, "weights", model_name) 27 | if not os.path.exists(save_path): 28 | os.makedirs(save_path) 29 | 30 | for file_ in files: 31 | local_model_path = os.path.join(save_path, file_) 32 | local_model_path = os.path.abspath(local_model_path) 33 | 34 | with open(local_model_path, 'w') as f: 35 | f.write("test") 36 | 37 | assert get_models.model_exists(model_name=model_name, model_arch=model_arch, files=files) == True 38 | rm_models_dir(model_arch) 39 | 40 | def test_download_model(): 41 | model_arch = "model_arch" 42 | model_name = "model_name" 43 | model_path = ["https://www.google.com"] 44 | model_file = model_path[0].split("/")[-1] 45 | 46 | path = get_models.download_model(model_name=model_name, model_path=model_path, model_arch=model_arch) 47 | current_path = os.getcwd() 48 | save_path = os.path.join(current_path, "src", "models_dir", model_arch, "weights", model_name) 49 | local_file_path = os.path.join(save_path, model_file) 50 | assert is_samepath(path, save_path) == True 51 | assert os.path.isfile(local_file_path) == True 52 | rm_models_dir(model_arch) 53 | 54 | test_models_json = { 55 | "arch1":{ 56 | "model1":{ 57 | "model_path":["https://www.google.com"] 58 | } 59 | }, 60 | "arch2":{ 61 | "model2":{ 62 | "model_path":["https://www.apple.com"] 63 | } 64 | } 65 | } 66 | 67 | def test_get_all_models(): 68 | test_models_json_res = { 69 | "arch1":{ 70 | "model1": "www.google.com" 71 | }, 72 | "arch2":{ 73 | "model2": "www.apple.com" 74 | } 75 | } 76 | 77 | models = get_models.download_all_models(test_models_json) 78 | for arch in test_models_json_res: 79 | assert arch in models 80 | for model in test_models_json_res[arch]: 81 | assert model in models[arch] 82 | # print(models[arch][model]) 83 | current_path = os.getcwd() 84 | # ref_model_path = test_models_json_res[arch][model] 85 | ref_model_path = os.path.join(current_path, "src", "models_dir", arch, "weights", model) 86 | assert is_samepath(models[arch][model], ref_model_path) == True 87 | assert get_models.model_exists(model_name=model, model_arch=arch) == True 88 | 89 | rm_models_dir(arch) 90 | 91 | 92 | 93 | --------------------------------------------------------------------------------