├── .gitattributes
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── __init__.py
├── requirements.txt
├── setup.py
├── src
├── __init__.py
├── audiotools.py
├── ensembles.py
├── models.py
├── models_dir
│ ├── __init__.py
│ ├── demucs
│ │ ├── .gitignore
│ │ ├── CODE_OF_CONDUCT.md
│ │ ├── CONTRIBUTING.md
│ │ ├── LICENSE
│ │ ├── MANIFEST.in
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── conf
│ │ │ ├── config.yaml
│ │ │ ├── dset
│ │ │ │ ├── aetl.yaml
│ │ │ │ ├── auto_extra_test.yaml
│ │ │ │ ├── auto_mus.yaml
│ │ │ │ ├── extra44.yaml
│ │ │ │ ├── extra_mmi_goodclean.yaml
│ │ │ │ ├── extra_test.yaml
│ │ │ │ ├── musdb44.yaml
│ │ │ │ ├── sdx23_bleeding.yaml
│ │ │ │ └── sdx23_labelnoise.yaml
│ │ │ ├── svd
│ │ │ │ ├── base.yaml
│ │ │ │ ├── base2.yaml
│ │ │ │ └── default.yaml
│ │ │ └── variant
│ │ │ │ ├── default.yaml
│ │ │ │ ├── example.yaml
│ │ │ │ └── finetune.yaml
│ │ ├── demucs.png
│ │ ├── demucs
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── apply.py
│ │ │ ├── audio.py
│ │ │ ├── augment.py
│ │ │ ├── demucs.py
│ │ │ ├── distrib.py
│ │ │ ├── ema.py
│ │ │ ├── evaluate.py
│ │ │ ├── grids
│ │ │ │ ├── __init__.py
│ │ │ │ ├── _explorers.py
│ │ │ │ ├── mdx.py
│ │ │ │ ├── mdx_extra.py
│ │ │ │ ├── mdx_refine.py
│ │ │ │ ├── mmi.py
│ │ │ │ ├── mmi_ft.py
│ │ │ │ ├── repro.py
│ │ │ │ ├── repro_ft.py
│ │ │ │ └── sdx23.py
│ │ │ ├── hdemucs.py
│ │ │ ├── htdemucs.py
│ │ │ ├── pretrained.py
│ │ │ ├── py.typed
│ │ │ ├── remote
│ │ │ │ ├── files.txt
│ │ │ │ ├── hdemucs_mmi.yaml
│ │ │ │ ├── htdemucs.yaml
│ │ │ │ ├── htdemucs_6s.yaml
│ │ │ │ ├── htdemucs_ft.yaml
│ │ │ │ ├── mdx.yaml
│ │ │ │ ├── mdx_extra.yaml
│ │ │ │ ├── mdx_extra_q.yaml
│ │ │ │ ├── mdx_q.yaml
│ │ │ │ ├── repro_mdx_a.yaml
│ │ │ │ ├── repro_mdx_a_hybrid_only.yaml
│ │ │ │ └── repro_mdx_a_time_only.yaml
│ │ │ ├── repitch.py
│ │ │ ├── repo.py
│ │ │ ├── separate.py
│ │ │ ├── solver.py
│ │ │ ├── spec.py
│ │ │ ├── states.py
│ │ │ ├── svd.py
│ │ │ ├── train.py
│ │ │ ├── transformer.py
│ │ │ ├── utils.py
│ │ │ ├── wav.py
│ │ │ └── wdemucs.py
│ │ ├── docs
│ │ │ ├── api.md
│ │ │ ├── linux.md
│ │ │ ├── mac.md
│ │ │ ├── mdx.md
│ │ │ ├── release.md
│ │ │ ├── sdx23.md
│ │ │ ├── training.md
│ │ │ └── windows.md
│ │ ├── environment-cpu.yml
│ │ ├── environment-cuda.yml
│ │ ├── hdemucs.py
│ │ ├── hubconf.py
│ │ ├── mypy.ini
│ │ ├── outputs.tar.gz
│ │ ├── requirements.txt
│ │ ├── requirements_minimal.txt
│ │ ├── setup.cfg
│ │ ├── setup.py
│ │ ├── test.mp3
│ │ └── tools
│ │ │ ├── __init__.py
│ │ │ ├── automix.py
│ │ │ ├── bench.py
│ │ │ ├── convert.py
│ │ │ ├── export.py
│ │ │ └── notpytest_test_pretrained.py
│ ├── mdx
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── mdx_interface.py
│ │ ├── mdxnet.py
│ │ ├── modelparams
│ │ │ ├── model_data.json
│ │ │ └── model_name_mapper.json
│ │ ├── modules.py
│ │ ├── pyrb.py
│ │ ├── spec_utils.py
│ │ └── tfc_tdf_v3.py
│ ├── mdxc
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── mdxc_interface.py
│ │ ├── mdxnet.py
│ │ ├── modelparams
│ │ │ ├── mdx_c_configs
│ │ │ │ ├── model1.yaml
│ │ │ │ ├── model2.yaml
│ │ │ │ ├── model3.yaml
│ │ │ │ ├── modelA.yaml
│ │ │ │ ├── modelB.yaml
│ │ │ │ ├── model_2_stem_061321.yaml
│ │ │ │ ├── model_2_stem_full_band.yaml
│ │ │ │ ├── model_2_stem_full_band_2.yaml
│ │ │ │ ├── model_2_stem_full_band_3.yaml
│ │ │ │ ├── model_2_stem_full_band_4.yaml
│ │ │ │ ├── model_2_stem_full_band_8k.yaml
│ │ │ │ └── sndfx.yaml
│ │ │ ├── model_data.json
│ │ │ └── model_name_mapper.json
│ │ ├── modules.py
│ │ ├── pyrb.py
│ │ ├── spec_utils.py
│ │ └── tfc_tdf_v3.py
│ ├── models.json
│ └── vr_network
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── layers.py
│ │ ├── layers_new.py
│ │ ├── model_param_init.py
│ │ ├── modelparams
│ │ ├── 1band_sr16000_hl512.json
│ │ ├── 1band_sr32000_hl512.json
│ │ ├── 1band_sr33075_hl384.json
│ │ ├── 1band_sr44100_hl1024.json
│ │ ├── 1band_sr44100_hl256.json
│ │ ├── 1band_sr44100_hl512.json
│ │ ├── 1band_sr44100_hl512_cut.json
│ │ ├── 1band_sr44100_hl512_nf1024.json
│ │ ├── 2band_32000.json
│ │ ├── 2band_44100_lofi.json
│ │ ├── 2band_48000.json
│ │ ├── 3band_44100.json
│ │ ├── 3band_44100_mid.json
│ │ ├── 3band_44100_msb2.json
│ │ ├── 4band_44100.json
│ │ ├── 4band_44100_mid.json
│ │ ├── 4band_44100_msb.json
│ │ ├── 4band_44100_msb2.json
│ │ ├── 4band_44100_reverse.json
│ │ ├── 4band_44100_sw.json
│ │ ├── 4band_v2.json
│ │ ├── 4band_v2_sn.json
│ │ ├── 4band_v3.json
│ │ ├── 4band_v3_sn.json
│ │ ├── ensemble.json
│ │ └── model_data.json
│ │ ├── nets.py
│ │ ├── nets_new.py
│ │ ├── pyrb.py
│ │ ├── spec_utils.py
│ │ └── vr_interface.py
├── pipelines.py
└── utils
│ ├── __init__.py
│ ├── fastio.py
│ └── get_models.py
└── tests
├── __init__.py
├── models_status.json
├── test_models.py
└── utils
├── __init__.py
├── test_fastio.py
└── test_get_models.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | GUI/lib_v5/demo.ipynb filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore virtual environment
2 | venv/
3 |
4 | # Ignore compiled Python files
5 | *.pyc
6 |
7 | # Ignore logs
8 | *.log
9 |
10 | # Ignore database files
11 | *.db
12 |
13 | # Ignore cache files
14 | __pycache__/
15 |
16 | # Ignore environment variables file
17 | .env
18 |
19 | # Ignore local development settings
20 | settings_local.py
21 |
22 | # Ignore IDE files
23 | .vscode/
24 | .idea/
25 |
26 | # Ignore package dependencies
27 | venv/
28 |
29 | *.pkl
30 | ffmpeg*
31 |
32 | **/weights/
33 |
34 | base.mp3
35 | drums.mp3
36 | vocals.mp3
37 | other.mp3
38 |
39 | **/*.wav
40 | **/*.mp3
41 | **/*.flac
42 | **/.pytest_cache
43 |
44 | build/
45 | dependency_links.txt
46 | PKG-INFO
47 | requires.txt
48 | SOURCES.txt
49 | top_level.txt
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | Mohannad.Barakat@fau.de.
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 NextAudioGen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |
4 |
5 |
6 | # Ultimate Vocal Remover API v0.1
7 |
8 | This is a an API for ultimate vocal removing. It is designed to be expandable with new models/algorethems while maintaining a simple interface.
9 | [Colab demo](https://colab.research.google.com/drive/1qf17AV5KU_8v0f29zUnPHQBbr3iX8bu6?usp=sharing)
10 |
11 |
12 | # Install
13 | If you intend to edit the code
14 | ```bash
15 | git clone https://github.com/NextAudioGen/ultimatevocalremover_api.git
16 | cd ultimatevocalremover_api
17 | pip install .
18 | ```
19 | # Usage
20 | ```python
21 | import uvr
22 | from uvr import models
23 | from uvr.utils.get_models import download_all_models
24 | import torch
25 | import audiofile
26 | import json
27 |
28 | models_json = json.load(open("/content/ultimatevocalremover_api/src/models_dir/models.json", "r"))
29 | download_all_models(models_json)
30 | name = {name_of_your_audio}
31 | device = "cuda"
32 |
33 | demucs = models.Demucs(name="hdemucs_mmi", other_metadata={"segment":2, "split":True}, device=device, logger=None)
34 |
35 | # Separating an audio file
36 | res = demucs(name)
37 | seperted_audio = res["separated"]
38 | vocals = seperted_audio["vocals"]
39 | base = seperted_audio["bass"]
40 | drums = seperted_audio["drums"]
41 | other = seperted_audio["other"]
42 | ```
43 | # Archetecture:
44 | ```text
45 | Ultimate Vocal Remover API
46 | ├── src
47 | │ ├── audiotools.py
48 | │ ├── models.py
49 | │ ├── ensembles.py
50 | │ ├── pipelines.py
51 | │ ├── utils/
52 | │ ├── audio_tools/
53 | │ └── models_dir
54 | │ ├── Each implementation of a model is added here as a single directory.
55 | │ └── models.json (this is used to download the models)
56 | ├── docs
57 | │ ├── models/
58 | │ │ └── Here goes all models docs each in a single directory.
59 | │ ├── ensembles/
60 | │ │ └── Here goes all ensembles docs each in a single directory.
61 | │ ├── pipelines/
62 | │ │ └── Here goes all pipelines docs each in a single directory.
63 | │ ├── audio_tools/
64 | │ └── utils/
65 | └── tests/
66 | ├── test_models.py
67 | ├── test_ensembles.py
68 | ├── test_pipelines.py
69 | ├── test_audiotools.py
70 | └── utils/
71 | ```
72 | **audiotools.py:** Interface for all audio tools \
73 | **models.py:** Interface for all models following a consistent interface \
74 | **utils/** Here goes read and write utils for audio, models...etc. \
75 |
76 | ## All models, pipelines and ensembles follow this interface:
77 | ```python
78 | class BaseModel:
79 | def __init__(self, name:str, architecture:str, other_metadata:dict, device=None, logger=None)
80 | def __call__(self, audio:Union[npt.NDArray, str], sampling_rate:int=None, **kwargs)->dict
81 | # @singledispatch
82 | def predict(self, audio:npt.NDArray, sampling_rate:int, **kwargs)->dict
83 | def predict_path(self, audio:str, **kwargs)->dict
84 | def separate(self, audio:npt.NDArray, sampling_rate:int=None)->dict
85 | def __repr__(self)
86 | def to(self, device:str)
87 | def update_metadata(self, metadata:dict)
88 | @staticmethod
89 | def list_models()->list
90 |
91 | ```
92 |
93 | # Contribution
94 | If you like this, leave a star, fork it, and definitely you are welcomed to [buy me a coffee](https://www.buymeacoffee.com/mohannadbarakat).
95 |
96 | Also, please open issues, make pull requests but remember to follow the structure and interfaces. Moreover, we are trying to build automated testing, we are aware that the current tests are so naive but we are working on it. So please make sure to add some tests to your new code as well.
97 |
98 | # Refrences
99 | ## code
100 | Code and weights from these sources used in developing this library:
101 | - [MDX-Net](https://github.com/kuielab/mdx-net/tree/main) This is the original MDX architecture implementation.
102 | - [MDXC and demucs](https://github.com/ZFTurbo/MVSEP-MDX23-music-separation-model/tree/main) This repo has a clever ensumbling methods for MDX, Demucs 3, and Demucs 4. Moreover they have the wieghts for their finetuned MDX open (available under MDXC implementation [here](/src/models_dir/mdxc/)).
103 | - [Demucs](https://github.com/facebookresearch/demucs/tree/e976d93ecc3865e5757426930257e200846a520a) This is the original implementation of the model.
104 | - [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui/tree/master) This is one of the best vocal removers. A lot of ideas in this repo were borrowed from here.
105 | - [weights](https://github.com/TRvlvr/model_repo/releases/tag/all_public_uvr_models) Most of the models right now are comming from this repo.
106 |
107 | ## Papers
108 | - [Benchmarks and leaderboards for sound demixing
109 | tasks](https://arxiv.org/pdf/2305.07489.pdf)
110 | - [MULTI-SCALE MULTI-BAND DENSENETS FOR AUDIO SOURCE SEPARATION](https://arxiv.org/pdf/1706.09588.pdf)
111 | - [HYBRID TRANSFORMERS FOR MUSIC SOURCE SEPARATION](https://arxiv.org/pdf/2211.08553.pdf)
112 | - [KUIELab-MDX-Net: A Two-Stream Neural Network for Music Demixing](https://arxiv.org/abs/2111.12203)
113 |
114 | # Core Developers
115 |
116 | - [Mohannad Barakat](https://github.com/mohannadEhabBarakat/)
117 | - [Noha Magdy](https://github.com/Noha-Magdy)
118 | - [Mohtady Ehab](https://github.com/Mohtady-Ehab)
119 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/__init__.py
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # please make sure you have already a pytorch install that is cuda enabled!
2 | dora-search>=0.1.12
3 | diffq>=0.2.1
4 | einops
5 | flake8
6 | hydra-colorlog>=1.1
7 | hydra-core>=1.1
8 | julius>=0.2.3
9 | lameenc>=1.2
10 | museval
11 | mypy
12 | openunmix
13 | pyyaml
14 | submitit
15 | torch>=1.8.1
16 | torchaudio>=0.8,<2.1
17 | tqdm
18 | treetable
19 | soundfile>=0.10.3
20 | pytest
21 | librosa
22 | audiofile
23 | pytorch_lightning
24 | onnxruntime
25 | onnx
26 | onnx2pytorch
27 | ml_collections
28 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from distutils.core import setup
3 | from setuptools import setup
4 |
5 | with open('requirements.txt') as f:
6 | required = f.read().splitlines()
7 | # install_requires=required,
8 |
9 | setup(
10 | name='uvr',
11 | version='0.1',
12 | description='Universal Voice Remover API',
13 | authors='Mohannad Barakat',
14 | author_email="Mohannad.Barakat@fau.de",
15 | license='MIT',
16 | package_dir={'uvr':'src'},
17 | long_description=open('README.md').read(),
18 | install_requires=required,
19 | url="https://github.com/NextAudioGen/ultimatevocalremover_api.git",
20 | package_data={
21 | 'uvr': ['**/*.txt', '**/*.t7', '**/*.pth', '**/*.json', '**/*.yaml', '**/*.yml']
22 | }
23 | )
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/__init__.py
--------------------------------------------------------------------------------
/src/audiotools.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/audiotools.py
--------------------------------------------------------------------------------
/src/ensembles.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/ensembles.py
--------------------------------------------------------------------------------
/src/models_dir/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/__init__.py
--------------------------------------------------------------------------------
/src/models_dir/demucs/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | __pycache__
3 | Session.vim
4 | /build
5 | /dist
6 | /lab
7 | /metadata
8 | /notebooks
9 | /outputs
10 | /release
11 | /release_models
12 | /separated
13 | /tests
14 | /trash
15 | /misc
16 | /mdx
17 | .mypy_cache
18 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to make participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at . All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Demucs
2 |
3 | ## Pull Requests
4 |
5 | In order to accept your pull request, we need you to submit a CLA. You only need
6 | to do this once to work on any of Facebook's open source projects.
7 |
8 | Complete your CLA here:
9 |
10 | Demucs is the implementation of a research paper.
11 | Therefore, we do not plan on accepting many pull requests for new features.
12 | We certainly welcome them for bug fixes.
13 |
14 |
15 | ## Issues
16 |
17 | We use GitHub issues to track public bugs. Please ensure your description is
18 | clear and has sufficient instructions to be able to reproduce the issue.
19 |
20 |
21 | ## License
22 | By contributing to this repository, you agree that your contributions will be licensed
23 | under the LICENSE file in the root directory of this source tree.
24 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Meta Platforms, Inc. and affiliates.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/src/models_dir/demucs/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-exclude env *
2 | recursive-include conf *.yaml
3 | include Makefile
4 | include LICENSE
5 | include demucs.png
6 | include outputs.tar.gz
7 | include test.mp3
8 | include requirements.txt
9 | include requirements_minimal.txt
10 | include mypy.ini
11 | include demucs/py.typed
12 | include demucs/remote/*.txt
13 | include demucs/remote/*.yaml
14 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/Makefile:
--------------------------------------------------------------------------------
1 | all: linter tests
2 |
3 | linter:
4 | flake8 demucs
5 | mypy demucs
6 |
7 | tests: test_train test_eval
8 |
9 | test_train: tests/musdb
10 | _DORA_TEST_PATH=/tmp/demucs python3 -m dora run --clear \
11 | dset.musdb=./tests/musdb dset.segment=4 dset.shift=2 epochs=2 model=demucs \
12 | demucs.depth=2 demucs.channels=4 test.sdr=false misc.num_workers=0 test.workers=0 \
13 | test.shifts=0
14 |
15 | test_eval:
16 | python3 -m demucs -n demucs_unittest test.mp3
17 | python3 -m demucs -n demucs_unittest --two-stems=vocals test.mp3
18 | python3 -m demucs -n demucs_unittest --mp3 test.mp3
19 | python3 -m demucs -n demucs_unittest --flac --int24 test.mp3
20 | python3 -m demucs -n demucs_unittest --int24 --clip-mode clamp test.mp3
21 | python3 -m demucs -n demucs_unittest --segment 8 test.mp3
22 | python3 -m demucs.api -n demucs_unittest --segment 8 test.mp3
23 | python3 -m demucs --list-models
24 |
25 | tests/musdb:
26 | test -e tests || mkdir tests
27 | python3 -c 'import musdb; musdb.DB("tests/tmp", download=True)'
28 | musdbconvert tests/tmp tests/musdb
29 |
30 | dist:
31 | python3 setup.py sdist
32 |
33 | clean:
34 | rm -r dist build *.egg-info
35 |
36 | .PHONY: linter dist test_train test_eval
37 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/__init__.py
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/aetl.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # automix dataset with Musdb, extra training data and the test set of Musdb.
4 | # This used even more remixes than auto_extra_test.
5 | dset:
6 | wav: /checkpoint/defossez/datasets/aetl
7 | samplerate: 44100
8 | channels: 2
9 | epochs: 320
10 | max_batches: 500
11 |
12 | augment:
13 | shift_same: true
14 | scale:
15 | proba: 0.
16 | remix:
17 | proba: 0
18 | repitch:
19 | proba: 0
20 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/auto_extra_test.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # automix dataset with Musdb, extra training data and the test set of Musdb.
4 | dset:
5 | wav: /checkpoint/defossez/datasets/automix_extra_test2
6 | samplerate: 44100
7 | channels: 2
8 | epochs: 320
9 | max_batches: 500
10 |
11 | augment:
12 | shift_same: true
13 | scale:
14 | proba: 0.
15 | remix:
16 | proba: 0
17 | repitch:
18 | proba: 0
19 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/auto_mus.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # Automix dataset based on musdb train set.
4 | dset:
5 | wav: /checkpoint/defossez/datasets/automix_musdb
6 | samplerate: 44100
7 | channels: 2
8 | epochs: 360
9 | max_batches: 300
10 | test:
11 | every: 4
12 |
13 | augment:
14 | shift_same: true
15 | scale:
16 | proba: 0.5
17 | remix:
18 | proba: 0
19 | repitch:
20 | proba: 0
21 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/extra44.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # Musdb + extra tracks
4 | dset:
5 | wav: /checkpoint/defossez/datasets/allstems_44/
6 | samplerate: 44100
7 | channels: 2
8 | epochs: 320
9 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/extra_mmi_goodclean.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # Musdb + extra tracks
4 | dset:
5 | wav: /checkpoint/defossez/datasets/allstems_44/
6 | wav2: /checkpoint/defossez/datasets/mmi44_goodclean
7 | samplerate: 44100
8 | channels: 2
9 | wav2_weight: null
10 | wav2_valid: false
11 | valid_samples: 100
12 | epochs: 1200
13 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/extra_test.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # Musdb + extra tracks + test set from musdb.
4 | dset:
5 | wav: /checkpoint/defossez/datasets/allstems_test_44/
6 | samplerate: 44100
7 | channels: 2
8 | epochs: 320
9 | max_batches: 700
10 | test:
11 | sdr: false
12 | every: 500
13 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/musdb44.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | dset:
4 | samplerate: 44100
5 | channels: 2
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/sdx23_bleeding.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # Musdb + extra tracks
4 | dset:
5 | wav: /shared/home/defossez/data/datasets/moisesdb23_bleeding_v1.0/
6 | use_musdb: false
7 | samplerate: 44100
8 | channels: 2
9 | backend: soundfile # must use soundfile as some mixture would clip with sox.
10 | epochs: 320
11 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/dset/sdx23_labelnoise.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # Musdb + extra tracks
4 | dset:
5 | wav: /shared/home/defossez/data/datasets/moisesdb23_labelnoise_v1.0
6 | use_musdb: false
7 | samplerate: 44100
8 | channels: 2
9 | backend: soundfile # must use soundfile as some mixture would clip with sox.
10 | epochs: 320
11 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/svd/base.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | svd:
4 | penalty: 0
5 | min_size: 1
6 | dim: 50
7 | niters: 4
8 | powm: false
9 | proba: 1
10 | conv_only: false
11 | convtr: false # ideally this should be true, but some models were trained with this to false.
12 |
13 | optim:
14 | beta2: 0.9998
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/svd/base2.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | svd:
4 | penalty: 0
5 | min_size: 1
6 | dim: 100
7 | niters: 4
8 | powm: false
9 | proba: 1
10 | conv_only: false
11 | convtr: true
12 |
13 | optim:
14 | beta2: 0.9998
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/svd/default.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/variant/default.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/variant/example.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | model: hdemucs
4 | hdemucs:
5 | channels: 32
--------------------------------------------------------------------------------
/src/models_dir/demucs/conf/variant/finetune.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | epochs: 4
4 | batch_size: 16
5 | optim:
6 | lr: 0.0006
7 | test:
8 | every: 1
9 | sdr: false
10 | dset:
11 | segment: 28
12 | shift: 2
13 |
14 | augment:
15 | scale:
16 | proba: 0
17 | shift_same: true
18 | remix:
19 | proba: 0
20 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/demucs.png
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | __version__ = "4.1.0a2"
8 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/augment.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """Data augmentations.
7 | """
8 |
9 | import random
10 | import torch as th
11 | from torch import nn
12 |
13 |
14 | class Shift(nn.Module):
15 | """
16 | Randomly shift audio in time by up to `shift` samples.
17 | """
18 | def __init__(self, shift=8192, same=False):
19 | super().__init__()
20 | self.shift = shift
21 | self.same = same
22 |
23 | def forward(self, wav):
24 | batch, sources, channels, time = wav.size()
25 | length = time - self.shift
26 | if self.shift > 0:
27 | if not self.training:
28 | wav = wav[..., :length]
29 | else:
30 | srcs = 1 if self.same else sources
31 | offsets = th.randint(self.shift, [batch, srcs, 1, 1], device=wav.device)
32 | offsets = offsets.expand(-1, sources, channels, -1)
33 | indexes = th.arange(length, device=wav.device)
34 | wav = wav.gather(3, indexes + offsets)
35 | return wav
36 |
37 |
38 | class FlipChannels(nn.Module):
39 | """
40 | Flip left-right channels.
41 | """
42 | def forward(self, wav):
43 | batch, sources, channels, time = wav.size()
44 | if self.training and wav.size(2) == 2:
45 | left = th.randint(2, (batch, sources, 1, 1), device=wav.device)
46 | left = left.expand(-1, -1, -1, time)
47 | right = 1 - left
48 | wav = th.cat([wav.gather(2, left), wav.gather(2, right)], dim=2)
49 | return wav
50 |
51 |
52 | class FlipSign(nn.Module):
53 | """
54 | Random sign flip.
55 | """
56 | def forward(self, wav):
57 | batch, sources, channels, time = wav.size()
58 | if self.training:
59 | signs = th.randint(2, (batch, sources, 1, 1), device=wav.device, dtype=th.float32)
60 | wav = wav * (2 * signs - 1)
61 | return wav
62 |
63 |
64 | class Remix(nn.Module):
65 | """
66 | Shuffle sources to make new mixes.
67 | """
68 | def __init__(self, proba=1, group_size=4):
69 | """
70 | Shuffle sources within one batch.
71 | Each batch is divided into groups of size `group_size` and shuffling is done within
72 | each group separatly. This allow to keep the same probability distribution no matter
73 | the number of GPUs. Without this grouping, using more GPUs would lead to a higher
74 | probability of keeping two sources from the same track together which can impact
75 | performance.
76 | """
77 | super().__init__()
78 | self.proba = proba
79 | self.group_size = group_size
80 |
81 | def forward(self, wav):
82 | batch, streams, channels, time = wav.size()
83 | device = wav.device
84 |
85 | if self.training and random.random() < self.proba:
86 | group_size = self.group_size or batch
87 | if batch % group_size != 0:
88 | raise ValueError(f"Batch size {batch} must be divisible by group size {group_size}")
89 | groups = batch // group_size
90 | wav = wav.view(groups, group_size, streams, channels, time)
91 | permutations = th.argsort(th.rand(groups, group_size, streams, 1, 1, device=device),
92 | dim=1)
93 | wav = wav.gather(1, permutations.expand(-1, -1, -1, channels, time))
94 | wav = wav.view(batch, streams, channels, time)
95 | return wav
96 |
97 |
98 | class Scale(nn.Module):
99 | def __init__(self, proba=1., min=0.25, max=1.25):
100 | super().__init__()
101 | self.proba = proba
102 | self.min = min
103 | self.max = max
104 |
105 | def forward(self, wav):
106 | batch, streams, channels, time = wav.size()
107 | device = wav.device
108 | if self.training and random.random() < self.proba:
109 | scales = th.empty(batch, streams, 1, 1, device=device).uniform_(self.min, self.max)
110 | wav *= scales
111 | return wav
112 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/distrib.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """Distributed training utilities.
7 | """
8 | import logging
9 | import pickle
10 |
11 | import numpy as np
12 | import torch
13 | from torch.utils.data.distributed import DistributedSampler
14 | from torch.utils.data import DataLoader, Subset
15 | from torch.nn.parallel.distributed import DistributedDataParallel
16 |
17 | from dora import distrib as dora_distrib
18 |
19 | logger = logging.getLogger(__name__)
20 | rank = 0
21 | world_size = 1
22 |
23 |
24 | def init():
25 | global rank, world_size
26 | if not torch.distributed.is_initialized():
27 | dora_distrib.init()
28 | rank = dora_distrib.rank()
29 | world_size = dora_distrib.world_size()
30 |
31 |
32 | def average(metrics, count=1.):
33 | if isinstance(metrics, dict):
34 | keys, values = zip(*sorted(metrics.items()))
35 | values = average(values, count)
36 | return dict(zip(keys, values))
37 | if world_size == 1:
38 | return metrics
39 | tensor = torch.tensor(list(metrics) + [1], device='cuda', dtype=torch.float32)
40 | tensor *= count
41 | torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM)
42 | return (tensor[:-1] / tensor[-1]).cpu().numpy().tolist()
43 |
44 |
45 | def wrap(model):
46 | if world_size == 1:
47 | return model
48 | else:
49 | return DistributedDataParallel(
50 | model,
51 | # find_unused_parameters=True,
52 | device_ids=[torch.cuda.current_device()],
53 | output_device=torch.cuda.current_device())
54 |
55 |
56 | def barrier():
57 | if world_size > 1:
58 | torch.distributed.barrier()
59 |
60 |
61 | def share(obj=None, src=0):
62 | if world_size == 1:
63 | return obj
64 | size = torch.empty(1, device='cuda', dtype=torch.long)
65 | if rank == src:
66 | dump = pickle.dumps(obj)
67 | size[0] = len(dump)
68 | torch.distributed.broadcast(size, src=src)
69 | # size variable is now set to the length of pickled obj in all processes
70 |
71 | if rank == src:
72 | buffer = torch.from_numpy(np.frombuffer(dump, dtype=np.uint8).copy()).cuda()
73 | else:
74 | buffer = torch.empty(size[0].item(), device='cuda', dtype=torch.uint8)
75 | torch.distributed.broadcast(buffer, src=src)
76 | # buffer variable is now set to pickled obj in all processes
77 |
78 | if rank != src:
79 | obj = pickle.loads(buffer.cpu().numpy().tobytes())
80 | logger.debug(f"Shared object of size {len(buffer)}")
81 | return obj
82 |
83 |
84 | def loader(dataset, *args, shuffle=False, klass=DataLoader, **kwargs):
85 | """
86 | Create a dataloader properly in case of distributed training.
87 | If a gradient is going to be computed you must set `shuffle=True`.
88 | """
89 | if world_size == 1:
90 | return klass(dataset, *args, shuffle=shuffle, **kwargs)
91 |
92 | if shuffle:
93 | # train means we will compute backward, we use DistributedSampler
94 | sampler = DistributedSampler(dataset)
95 | # We ignore shuffle, DistributedSampler already shuffles
96 | return klass(dataset, *args, **kwargs, sampler=sampler)
97 | else:
98 | # We make a manual shard, as DistributedSampler otherwise replicate some examples
99 | dataset = Subset(dataset, list(range(rank, len(dataset), world_size)))
100 | return klass(dataset, *args, shuffle=shuffle, **kwargs)
101 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/ema.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | # Inspired from https://github.com/rwightman/pytorch-image-models
8 | from contextlib import contextmanager
9 |
10 | import torch
11 |
12 | from .states import swap_state
13 |
14 |
15 | class ModelEMA:
16 | """
17 | Perform EMA on a model. You can switch to the EMA weights temporarily
18 | with the `swap` method.
19 |
20 | ema = ModelEMA(model)
21 | with ema.swap():
22 | # compute valid metrics with averaged model.
23 | """
24 | def __init__(self, model, decay=0.9999, unbias=True, device='cpu'):
25 | self.decay = decay
26 | self.model = model
27 | self.state = {}
28 | self.count = 0
29 | self.device = device
30 | self.unbias = unbias
31 |
32 | self._init()
33 |
34 | def _init(self):
35 | for key, val in self.model.state_dict().items():
36 | if val.dtype != torch.float32:
37 | continue
38 | device = self.device or val.device
39 | if key not in self.state:
40 | self.state[key] = val.detach().to(device, copy=True)
41 |
42 | def update(self):
43 | if self.unbias:
44 | self.count = self.count * self.decay + 1
45 | w = 1 / self.count
46 | else:
47 | w = 1 - self.decay
48 | for key, val in self.model.state_dict().items():
49 | if val.dtype != torch.float32:
50 | continue
51 | device = self.device or val.device
52 | self.state[key].mul_(1 - w)
53 | self.state[key].add_(val.detach().to(device), alpha=w)
54 |
55 | @contextmanager
56 | def swap(self):
57 | with swap_state(self.model, self.state):
58 | yield
59 |
60 | def state_dict(self):
61 | return {'state': self.state, 'count': self.count}
62 |
63 | def load_state_dict(self, state):
64 | self.count = state['count']
65 | for k, v in state['state'].items():
66 | self.state[k].copy_(v)
67 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/demucs/grids/__init__.py
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/_explorers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | from dora import Explorer
7 | import treetable as tt
8 |
9 |
10 | class MyExplorer(Explorer):
11 | test_metrics = ['nsdr', 'sdr_med']
12 |
13 | def get_grid_metrics(self):
14 | """Return the metrics that should be displayed in the tracking table.
15 | """
16 | return [
17 | tt.group("train", [
18 | tt.leaf("epoch"),
19 | tt.leaf("reco", ".3f"),
20 | ], align=">"),
21 | tt.group("valid", [
22 | tt.leaf("penalty", ".1f"),
23 | tt.leaf("ms", ".1f"),
24 | tt.leaf("reco", ".2%"),
25 | tt.leaf("breco", ".2%"),
26 | tt.leaf("b_nsdr", ".2f"),
27 | # tt.leaf("b_nsdr_drums", ".2f"),
28 | # tt.leaf("b_nsdr_bass", ".2f"),
29 | # tt.leaf("b_nsdr_other", ".2f"),
30 | # tt.leaf("b_nsdr_vocals", ".2f"),
31 | ], align=">"),
32 | tt.group("test", [
33 | tt.leaf(name, ".2f")
34 | for name in self.test_metrics
35 | ], align=">")
36 | ]
37 |
38 | def process_history(self, history):
39 | train = {
40 | 'epoch': len(history),
41 | }
42 | valid = {}
43 | test = {}
44 | best_v_main = float('inf')
45 | breco = float('inf')
46 | for metrics in history:
47 | train.update(metrics['train'])
48 | valid.update(metrics['valid'])
49 | if 'main' in metrics['valid']:
50 | best_v_main = min(best_v_main, metrics['valid']['main']['loss'])
51 | valid['bmain'] = best_v_main
52 | valid['breco'] = min(breco, metrics['valid']['reco'])
53 | breco = valid['breco']
54 | if (metrics['valid']['loss'] == metrics['valid']['best'] or
55 | metrics['valid'].get('nsdr') == metrics['valid']['best']):
56 | for k, v in metrics['valid'].items():
57 | if k.startswith('reco_'):
58 | valid['b_' + k[len('reco_'):]] = v
59 | if k.startswith('nsdr'):
60 | valid[f'b_{k}'] = v
61 | if 'test' in metrics:
62 | test.update(metrics['test'])
63 | metrics = history[-1]
64 | return {"train": train, "valid": valid, "test": test}
65 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/mdx.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """
7 | Main training for the Track A MDX models.
8 | """
9 |
10 | from ._explorers import MyExplorer
11 | from ..train import main
12 |
13 |
14 | TRACK_A = ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68']
15 |
16 |
17 | @MyExplorer
18 | def explorer(launcher):
19 | launcher.slurm_(
20 | gpus=8,
21 | time=3 * 24 * 60,
22 | partition='learnlab')
23 |
24 | # Reproduce results from MDX competition Track A
25 | # This trains the first round of models. Once this is trained,
26 | # you will need to schedule `mdx_refine`.
27 | for sig in TRACK_A:
28 | xp = main.get_xp_from_sig(sig)
29 | parent = xp.cfg.continue_from
30 | xp = main.get_xp_from_sig(parent)
31 | launcher(xp.argv)
32 | launcher(xp.argv, {'quant.diffq': 1e-4})
33 | launcher(xp.argv, {'quant.diffq': 3e-4})
34 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/mdx_extra.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """
7 | Main training for the Track A MDX models.
8 | """
9 |
10 | from ._explorers import MyExplorer
11 | from ..train import main
12 |
13 | TRACK_B = ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08']
14 |
15 |
16 | @MyExplorer
17 | def explorer(launcher):
18 | launcher.slurm_(
19 | gpus=8,
20 | time=3 * 24 * 60,
21 | partition='learnlab')
22 |
23 | # Reproduce results from MDX competition Track A
24 | # This trains the first round of models. Once this is trained,
25 | # you will need to schedule `mdx_refine`.
26 | for sig in TRACK_B:
27 | while sig is not None:
28 | xp = main.get_xp_from_sig(sig)
29 | sig = xp.cfg.continue_from
30 |
31 | for dset in ['extra44', 'extra_test']:
32 | sub = launcher.bind(xp.argv, dset=dset)
33 | sub()
34 | if dset == 'extra_test':
35 | sub({'quant.diffq': 1e-4})
36 | sub({'quant.diffq': 3e-4})
37 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/mdx_refine.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """
7 | Main training for the Track A MDX models.
8 | """
9 |
10 | from ._explorers import MyExplorer
11 | from .mdx import TRACK_A
12 | from ..train import main
13 |
14 |
15 | @MyExplorer
16 | def explorer(launcher):
17 | launcher.slurm_(
18 | gpus=8,
19 | time=3 * 24 * 60,
20 | partition='learnlab')
21 |
22 | # Reproduce results from MDX competition Track A
23 | # WARNING: all the experiments in the `mdx` grid must have completed.
24 | for sig in TRACK_A:
25 | xp = main.get_xp_from_sig(sig)
26 | launcher(xp.argv)
27 | for diffq in [1e-4, 3e-4]:
28 | xp_src = main.get_xp_from_sig(xp.cfg.continue_from)
29 | q_argv = [f'quant.diffq={diffq}']
30 | actual_src = main.get_xp(xp_src.argv + q_argv)
31 | actual_src.link.load()
32 | assert len(actual_src.link.history) == actual_src.cfg.epochs
33 | argv = xp.argv + q_argv + [f'continue_from="{actual_src.sig}"']
34 | launcher(argv)
35 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/mmi.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from ._explorers import MyExplorer
8 | from dora import Launcher
9 |
10 |
11 | @MyExplorer
12 | def explorer(launcher: Launcher):
13 | launcher.slurm_(gpus=8, time=3 * 24 * 60, partition="devlab,learnlab,learnfair") # 3 days
14 |
15 | sub = launcher.bind_(
16 | {
17 | "dset": "extra_mmi_goodclean",
18 | "test.shifts": 0,
19 | "model": "htdemucs",
20 | "htdemucs.dconv_mode": 3,
21 | "htdemucs.depth": 4,
22 | "htdemucs.t_dropout": 0.02,
23 | "htdemucs.t_layers": 5,
24 | "max_batches": 800,
25 | "ema.epoch": [0.9, 0.95],
26 | "ema.batch": [0.9995, 0.9999],
27 | "dset.segment": 10,
28 | "batch_size": 32,
29 | }
30 | )
31 | sub({"model": "hdemucs"})
32 | sub({"model": "hdemucs", "dset": "extra44"})
33 | sub({"model": "hdemucs", "dset": "musdb44"})
34 |
35 | sparse = {
36 | 'batch_size': 3 * 8,
37 | 'augment.remix.group_size': 3,
38 | 'htdemucs.t_auto_sparsity': True,
39 | 'htdemucs.t_sparse_self_attn': True,
40 | 'htdemucs.t_sparse_cross_attn': True,
41 | 'htdemucs.t_sparsity': 0.9,
42 | "htdemucs.t_layers": 7
43 | }
44 |
45 | with launcher.job_array():
46 | for transf_layers in [5, 7]:
47 | for bottom_channels in [0, 512]:
48 | sub = launcher.bind({
49 | "htdemucs.t_layers": transf_layers,
50 | "htdemucs.bottom_channels": bottom_channels,
51 | })
52 | if bottom_channels == 0 and transf_layers == 5:
53 | sub({"augment.remix.proba": 0.0})
54 | sub({
55 | "augment.repitch.proba": 0.0,
56 | # when doing repitching, we trim the outut to align on the
57 | # highest change of BPM. When removing repitching,
58 | # we simulate it here to ensure the training context is the same.
59 | # Another second is lost for all experiments due to the random
60 | # shift augmentation.
61 | "dset.segment": 10 * 0.88})
62 | elif bottom_channels == 512 and transf_layers == 5:
63 | sub(dset="musdb44")
64 | sub(dset="extra44")
65 | # Sparse kernel XP, currently not released as kernels are still experimental.
66 | sub(sparse, {'dset.segment': 15, "htdemucs.t_layers": 7})
67 |
68 | for duration in [5, 10, 15]:
69 | sub({"dset.segment": duration})
70 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/mmi_ft.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from ._explorers import MyExplorer
8 | from dora import Launcher
9 | from demucs import train
10 |
11 |
12 | def get_sub(launcher, sig):
13 | xp = train.main.get_xp_from_sig(sig)
14 | sub = launcher.bind(xp.argv)
15 | sub()
16 | sub.bind_({
17 | 'continue_from': sig,
18 | 'continue_best': True})
19 | return sub
20 |
21 |
22 | @MyExplorer
23 | def explorer(launcher: Launcher):
24 | launcher.slurm_(gpus=4, time=3 * 24 * 60, partition="devlab,learnlab,learnfair") # 3 days
25 | ft = {
26 | 'optim.lr': 1e-4,
27 | 'augment.remix.proba': 0,
28 | 'augment.scale.proba': 0,
29 | 'augment.shift_same': True,
30 | 'htdemucs.t_weight_decay': 0.05,
31 | 'batch_size': 8,
32 | 'optim.clip_grad': 5,
33 | 'optim.optim': 'adamw',
34 | 'epochs': 50,
35 | 'dset.wav2_valid': True,
36 | 'ema.epoch': [], # let's make valid a bit faster
37 | }
38 | with launcher.job_array():
39 | for sig in ['2899e11a']:
40 | sub = get_sub(launcher, sig)
41 | sub.bind_(ft)
42 | for segment in [15, 18]:
43 | for source in range(4):
44 | w = [0] * 4
45 | w[source] = 1
46 | sub({'weights': w, 'dset.segment': segment})
47 |
48 | for sig in ['955717e8']:
49 | sub = get_sub(launcher, sig)
50 | sub.bind_(ft)
51 | for segment in [10, 15]:
52 | for source in range(4):
53 | w = [0] * 4
54 | w[source] = 1
55 | sub({'weights': w, 'dset.segment': segment})
56 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/repro.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """
7 | Easier training for reproducibility
8 | """
9 |
10 | from ._explorers import MyExplorer
11 |
12 |
13 | @MyExplorer
14 | def explorer(launcher):
15 | launcher.slurm_(
16 | gpus=8,
17 | time=3 * 24 * 60,
18 | partition='devlab,learnlab')
19 |
20 | launcher.bind_({'ema.epoch': [0.9, 0.95]})
21 | launcher.bind_({'ema.batch': [0.9995, 0.9999]})
22 | launcher.bind_({'epochs': 600})
23 |
24 | base = {'model': 'demucs', 'demucs.dconv_mode': 0, 'demucs.gelu': False,
25 | 'demucs.lstm_layers': 2}
26 | newt = {'model': 'demucs', 'demucs.normalize': True}
27 | hdem = {'model': 'hdemucs'}
28 | svd = {'svd.penalty': 1e-5, 'svd': 'base2'}
29 |
30 | with launcher.job_array():
31 | for model in [base, newt, hdem]:
32 | sub = launcher.bind(model)
33 | if model is base:
34 | # Training the v2 Demucs on MusDB HQ
35 | sub(epochs=360)
36 | continue
37 |
38 | # those two will be used in the repro_mdx_a bag of models.
39 | sub(svd)
40 | sub(svd, seed=43)
41 | if model == newt:
42 | # Ablation study
43 | sub()
44 | abl = sub.bind(svd)
45 | abl({'ema.epoch': [], 'ema.batch': []})
46 | abl({'demucs.dconv_lstm': 10})
47 | abl({'demucs.dconv_attn': 10})
48 | abl({'demucs.dconv_attn': 10, 'demucs.dconv_lstm': 10, 'demucs.lstm_layers': 2})
49 | abl({'demucs.dconv_mode': 0})
50 | abl({'demucs.gelu': False})
51 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/repro_ft.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """
7 | Fine tuning experiments
8 | """
9 |
10 | from ._explorers import MyExplorer
11 | from ..train import main
12 |
13 |
14 | @MyExplorer
15 | def explorer(launcher):
16 | launcher.slurm_(
17 | gpus=8,
18 | time=300,
19 | partition='devlab,learnlab')
20 |
21 | # Mus
22 | launcher.slurm_(constraint='volta32gb')
23 |
24 | grid = "repro"
25 | folder = main.dora.dir / "grids" / grid
26 |
27 | for sig in folder.iterdir():
28 | if not sig.is_symlink():
29 | continue
30 | xp = main.get_xp_from_sig(sig)
31 | xp.link.load()
32 | if len(xp.link.history) != xp.cfg.epochs:
33 | continue
34 | sub = launcher.bind(xp.argv, [f'continue_from="{xp.sig}"'])
35 | sub.bind_({'ema.epoch': [0.9, 0.95], 'ema.batch': [0.9995, 0.9999]})
36 | sub.bind_({'test.every': 1, 'test.sdr': True, 'epochs': 4})
37 | sub.bind_({'dset.segment': 28, 'dset.shift': 2})
38 | sub.bind_({'batch_size': 32})
39 | auto = {'dset': 'auto_mus'}
40 | auto.update({'augment.remix.proba': 0, 'augment.scale.proba': 0,
41 | 'augment.shift_same': True})
42 | sub.bind_(auto)
43 | sub.bind_({'batch_size': 16})
44 | sub.bind_({'optim.lr': 1e-4})
45 | sub.bind_({'model_segment': 44})
46 | sub()
47 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/grids/sdx23.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from ._explorers import MyExplorer
8 | from dora import Launcher
9 |
10 |
11 | @MyExplorer
12 | def explorer(launcher: Launcher):
13 | launcher.slurm_(gpus=8, time=3 * 24 * 60, partition="speechgpt,learnfair",
14 | mem_per_gpu=None, constraint='')
15 | launcher.bind_({"dset.use_musdb": False})
16 |
17 | with launcher.job_array():
18 | launcher(dset='sdx23_bleeding')
19 | launcher(dset='sdx23_labelnoise')
20 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/pretrained.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """Loading pretrained models.
7 | """
8 |
9 | import logging
10 | from pathlib import Path
11 | import typing as tp
12 |
13 | # from dora.log import fatal, bold
14 |
15 | from .hdemucs import HDemucs
16 | from .repo import RemoteRepo, LocalRepo, ModelOnlyRepo, BagOnlyRepo, AnyModelRepo, ModelLoadingError # noqa
17 | from .states import _check_diffq
18 |
19 | logger = logging.getLogger(__name__)
20 | ROOT_URL = "https://dl.fbaipublicfiles.com/demucs/"
21 | REMOTE_ROOT = Path(__file__).parent / 'remote'
22 |
23 | SOURCES = ["drums", "bass", "other", "vocals"]
24 | DEFAULT_MODEL = 'htdemucs'
25 |
26 |
27 | def demucs_unittest():
28 | model = HDemucs(channels=4, sources=SOURCES)
29 | return model
30 |
31 |
32 | def add_model_flags(parser):
33 | group = parser.add_mutually_exclusive_group(required=False)
34 | group.add_argument("-s", "--sig", help="Locally trained XP signature.")
35 | group.add_argument("-n", "--name", default="htdemucs",
36 | help="Pretrained model name or signature. Default is htdemucs.")
37 | parser.add_argument("--repo", type=Path,
38 | help="Folder containing all pre-trained models for use with -n.")
39 |
40 |
41 | def _parse_remote_files(remote_file_list) -> tp.Dict[str, str]:
42 | root: str = ''
43 | models: tp.Dict[str, str] = {}
44 | for line in remote_file_list.read_text().split('\n'):
45 | line = line.strip()
46 | if line.startswith('#'):
47 | continue
48 | elif len(line) == 0:
49 | continue
50 | elif line.startswith('root:'):
51 | root = line.split(':', 1)[1].strip()
52 | else:
53 | sig = line.split('-', 1)[0]
54 | assert sig not in models
55 | models[sig] = ROOT_URL + root + line
56 | return models
57 |
58 |
59 | def get_model(name: str,
60 | repo: tp.Optional[Path] = None):
61 | """`name` must be a bag of models name or a pretrained signature
62 | from the remote AWS model repo or the specified local repo if `repo` is not None.
63 | """
64 | if name == 'demucs_unittest':
65 | return demucs_unittest()
66 | model_repo: ModelOnlyRepo
67 | if repo is None:
68 | models = _parse_remote_files(REMOTE_ROOT / 'files.txt')
69 | model_repo = RemoteRepo(models)
70 | bag_repo = BagOnlyRepo(REMOTE_ROOT, model_repo)
71 | else:
72 | if not repo.is_dir():
73 | # fatal(f"{repo} must exist and be a directory.")
74 | pass
75 | model_repo = LocalRepo(repo)
76 | bag_repo = BagOnlyRepo(repo, model_repo)
77 | any_repo = AnyModelRepo(model_repo, bag_repo)
78 | try:
79 | model = any_repo.get_model(name)
80 | except ImportError as exc:
81 | if 'diffq' in exc.args[0]:
82 | _check_diffq()
83 | raise
84 |
85 | model.eval()
86 | return model
87 |
88 |
89 | def get_model_from_args(args):
90 | """
91 | Load local model package or pre-trained model.
92 | """
93 | if args.name is None:
94 | args.name = DEFAULT_MODEL
95 | # print(bold("Important: the default model was recently changed to `htdemucs`"),
96 | # "the latest Hybrid Transformer Demucs model. In some cases, this model can "
97 | # "actually perform worse than previous models. To get back the old default model "
98 | # "use `-n mdx_extra_q`.")
99 | print("Important: the default model was recently changed to `htdemucs`",
100 | "the latest Hybrid Transformer Demucs model. In some cases, this model can "
101 | "actually perform worse than previous models. To get back the old default model "
102 | "use `-n mdx_extra_q`.")
103 | return get_model(name=args.name, repo=args.repo)
104 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/demucs/py.typed
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/files.txt:
--------------------------------------------------------------------------------
1 | # MDX Models
2 | root: mdx_final/
3 | 0d19c1c6-0f06f20e.th
4 | 5d2d6c55-db83574e.th
5 | 7d865c68-3d5dd56b.th
6 | 7ecf8ec1-70f50cc9.th
7 | a1d90b5c-ae9d2452.th
8 | c511e2ab-fe698775.th
9 | cfa93e08-61801ae1.th
10 | e51eebcc-c1b80bdd.th
11 | 6b9c2ca1-3fd82607.th
12 | b72baf4e-8778635e.th
13 | 42e558d4-196e0e1b.th
14 | 305bc58f-18378783.th
15 | 14fc6a69-a89dd0ee.th
16 | 464b36d7-e5a9386e.th
17 | 7fd6ef75-a905dd85.th
18 | 83fc094f-4a16d450.th
19 | 1ef250f1-592467ce.th
20 | 902315c2-b39ce9c9.th
21 | 9a6b4851-03af0aa6.th
22 | fa0cb7f9-100d8bf4.th
23 | # Hybrid Transformer models
24 | root: hybrid_transformer/
25 | 955717e8-8726e21a.th
26 | f7e0c4bc-ba3fe64a.th
27 | d12395a8-e57c48e6.th
28 | 92cfc3b6-ef3bcb9c.th
29 | 04573f0d-f3cf25b2.th
30 | 75fc33f5-1941ce65.th
31 | # Experimental 6 sources model
32 | 5c90dfd2-34c22ccb.th
33 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/hdemucs_mmi.yaml:
--------------------------------------------------------------------------------
1 | models: ['75fc33f5']
2 | segment: 44
3 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/htdemucs.yaml:
--------------------------------------------------------------------------------
1 | models: ['955717e8']
2 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/htdemucs_6s.yaml:
--------------------------------------------------------------------------------
1 | models: ['5c90dfd2']
2 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/htdemucs_ft.yaml:
--------------------------------------------------------------------------------
1 | models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d']
2 | weights: [
3 | [1., 0., 0., 0.],
4 | [0., 1., 0., 0.],
5 | [0., 0., 1., 0.],
6 | [0., 0., 0., 1.],
7 | ]
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/mdx.yaml:
--------------------------------------------------------------------------------
1 | models: ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68']
2 | weights: [
3 | [1., 1., 0., 0.],
4 | [0., 1., 0., 0.],
5 | [1., 0., 1., 1.],
6 | [1., 0., 1., 1.],
7 | ]
8 | segment: 44
9 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/mdx_extra.yaml:
--------------------------------------------------------------------------------
1 | models: ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08']
2 | segment: 44
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/mdx_extra_q.yaml:
--------------------------------------------------------------------------------
1 | models: ['83fc094f', '464b36d7', '14fc6a69', '7fd6ef75']
2 | segment: 44
3 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/mdx_q.yaml:
--------------------------------------------------------------------------------
1 | models: ['6b9c2ca1', 'b72baf4e', '42e558d4', '305bc58f']
2 | weights: [
3 | [1., 1., 0., 0.],
4 | [0., 1., 0., 0.],
5 | [1., 0., 1., 1.],
6 | [1., 0., 1., 1.],
7 | ]
8 | segment: 44
9 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/repro_mdx_a.yaml:
--------------------------------------------------------------------------------
1 | models: ['9a6b4851', '1ef250f1', 'fa0cb7f9', '902315c2']
2 | segment: 44
3 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/repro_mdx_a_hybrid_only.yaml:
--------------------------------------------------------------------------------
1 | models: ['fa0cb7f9', '902315c2', 'fa0cb7f9', '902315c2']
2 | segment: 44
3 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/remote/repro_mdx_a_time_only.yaml:
--------------------------------------------------------------------------------
1 | models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1']
2 | segment: 44
3 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/repitch.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """Utility for on the fly pitch/tempo change for data augmentation."""
7 |
8 | import random
9 | import subprocess as sp
10 | import tempfile
11 |
12 | import torch
13 | import torchaudio as ta
14 |
15 | from .audio import save_audio
16 |
17 |
18 | class RepitchedWrapper:
19 | """
20 | Wrap a dataset to apply online change of pitch / tempo.
21 | """
22 | def __init__(self, dataset, proba=0.2, max_pitch=2, max_tempo=12,
23 | tempo_std=5, vocals=[3], same=True):
24 | self.dataset = dataset
25 | self.proba = proba
26 | self.max_pitch = max_pitch
27 | self.max_tempo = max_tempo
28 | self.tempo_std = tempo_std
29 | self.same = same
30 | self.vocals = vocals
31 |
32 | def __len__(self):
33 | return len(self.dataset)
34 |
35 | def __getitem__(self, index):
36 | streams = self.dataset[index]
37 | in_length = streams.shape[-1]
38 | out_length = int((1 - 0.01 * self.max_tempo) * in_length)
39 |
40 | if random.random() < self.proba:
41 | outs = []
42 | for idx, stream in enumerate(streams):
43 | if idx == 0 or not self.same:
44 | delta_pitch = random.randint(-self.max_pitch, self.max_pitch)
45 | delta_tempo = random.gauss(0, self.tempo_std)
46 | delta_tempo = min(max(-self.max_tempo, delta_tempo), self.max_tempo)
47 | stream = repitch(
48 | stream,
49 | delta_pitch,
50 | delta_tempo,
51 | voice=idx in self.vocals)
52 | outs.append(stream[:, :out_length])
53 | streams = torch.stack(outs)
54 | else:
55 | streams = streams[..., :out_length]
56 | return streams
57 |
58 |
59 | def repitch(wav, pitch, tempo, voice=False, quick=False, samplerate=44100):
60 | """
61 | tempo is a relative delta in percentage, so tempo=10 means tempo at 110%!
62 | pitch is in semi tones.
63 | Requires `soundstretch` to be installed, see
64 | https://www.surina.net/soundtouch/soundstretch.html
65 | """
66 | infile = tempfile.NamedTemporaryFile(suffix=".wav")
67 | outfile = tempfile.NamedTemporaryFile(suffix=".wav")
68 | save_audio(wav, infile.name, samplerate, clip='clamp')
69 | command = [
70 | "soundstretch",
71 | infile.name,
72 | outfile.name,
73 | f"-pitch={pitch}",
74 | f"-tempo={tempo:.6f}",
75 | ]
76 | if quick:
77 | command += ["-quick"]
78 | if voice:
79 | command += ["-speech"]
80 | try:
81 | sp.run(command, capture_output=True, check=True)
82 | except sp.CalledProcessError as error:
83 | raise RuntimeError(f"Could not change bpm because {error.stderr.decode('utf-8')}")
84 | wav, sr = ta.load(outfile.name)
85 | assert sr == samplerate
86 | return wav
87 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/spec.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """Conveniance wrapper to perform STFT and iSTFT"""
7 |
8 | import torch as th
9 |
10 |
11 | def spectro(x, n_fft=512, hop_length=None, pad=0):
12 | *other, length = x.shape
13 | x = x.reshape(-1, length)
14 | is_mps = x.device.type == 'mps'
15 | if is_mps:
16 | x = x.cpu()
17 | z = th.stft(x,
18 | n_fft * (1 + pad),
19 | hop_length or n_fft // 4,
20 | window=th.hann_window(n_fft).to(x),
21 | win_length=n_fft,
22 | normalized=True,
23 | center=True,
24 | return_complex=True,
25 | pad_mode='reflect')
26 | _, freqs, frame = z.shape
27 | return z.view(*other, freqs, frame)
28 |
29 |
30 | def ispectro(z, hop_length=None, length=None, pad=0):
31 | *other, freqs, frames = z.shape
32 | n_fft = 2 * freqs - 2
33 | z = z.view(-1, freqs, frames)
34 | win_length = n_fft // (1 + pad)
35 | is_mps = z.device.type == 'mps'
36 | if is_mps:
37 | z = z.cpu()
38 | x = th.istft(z,
39 | n_fft,
40 | hop_length,
41 | window=th.hann_window(win_length).to(z.real),
42 | win_length=win_length,
43 | normalized=True,
44 | length=length,
45 | center=True)
46 | _, length = x.shape
47 | return x.view(*other, length)
48 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/states.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """
7 | Utilities to save and load models.
8 | """
9 | from contextlib import contextmanager
10 |
11 | import functools
12 | import hashlib
13 | import inspect
14 | import io
15 | from pathlib import Path
16 | import warnings
17 |
18 | from omegaconf import OmegaConf
19 | # from dora.log import fatal
20 | import torch
21 |
22 |
23 | def _check_diffq():
24 | try:
25 | import diffq # noqa
26 | except ImportError:
27 | # fatal('Trying to use DiffQ, but diffq is not installed.\n'
28 | # 'On Windows run: python.exe -m pip install diffq \n'
29 | # 'On Linux/Mac, run: python3 -m pip install diffq')
30 | pass
31 |
32 |
33 | def get_quantizer(model, args, optimizer=None):
34 | """Return the quantizer given the XP quantization args."""
35 | quantizer = None
36 | if args.diffq:
37 | _check_diffq()
38 | from diffq import DiffQuantizer
39 | quantizer = DiffQuantizer(
40 | model, min_size=args.min_size, group_size=args.group_size)
41 | if optimizer is not None:
42 | quantizer.setup_optimizer(optimizer)
43 | elif args.qat:
44 | _check_diffq()
45 | from diffq import UniformQuantizer
46 | quantizer = UniformQuantizer(
47 | model, bits=args.qat, min_size=args.min_size)
48 | return quantizer
49 |
50 |
51 | def load_model(path_or_package, strict=False):
52 | """Load a model from the given serialized model, either given as a dict (already loaded)
53 | or a path to a file on disk."""
54 | if isinstance(path_or_package, dict):
55 | package = path_or_package
56 | elif isinstance(path_or_package, (str, Path)):
57 | with warnings.catch_warnings():
58 | warnings.simplefilter("ignore")
59 | path = path_or_package
60 | package = torch.load(path, 'cpu')
61 | else:
62 | raise ValueError(f"Invalid type for {path_or_package}.")
63 |
64 | klass = package["klass"]
65 | args = package["args"]
66 | kwargs = package["kwargs"]
67 |
68 | if strict:
69 | model = klass(*args, **kwargs)
70 | else:
71 | sig = inspect.signature(klass)
72 | for key in list(kwargs):
73 | if key not in sig.parameters:
74 | warnings.warn("Dropping inexistant parameter " + key)
75 | del kwargs[key]
76 | model = klass(*args, **kwargs)
77 |
78 | state = package["state"]
79 |
80 | set_state(model, state)
81 | return model
82 |
83 |
84 | def get_state(model, quantizer, half=False):
85 | """Get the state from a model, potentially with quantization applied.
86 | If `half` is True, model are stored as half precision, which shouldn't impact performance
87 | but half the state size."""
88 | if quantizer is None:
89 | dtype = torch.half if half else None
90 | state = {k: p.data.to(device='cpu', dtype=dtype) for k, p in model.state_dict().items()}
91 | else:
92 | state = quantizer.get_quantized_state()
93 | state['__quantized'] = True
94 | return state
95 |
96 |
97 | def set_state(model, state, quantizer=None):
98 | """Set the state on a given model."""
99 | if state.get('__quantized'):
100 | if quantizer is not None:
101 | quantizer.restore_quantized_state(model, state['quantized'])
102 | else:
103 | _check_diffq()
104 | from diffq import restore_quantized_state
105 | restore_quantized_state(model, state)
106 | else:
107 | model.load_state_dict(state)
108 | return state
109 |
110 |
111 | def save_with_checksum(content, path):
112 | """Save the given value on disk, along with a sha256 hash.
113 | Should be used with the output of either `serialize_model` or `get_state`."""
114 | buf = io.BytesIO()
115 | torch.save(content, buf)
116 | sig = hashlib.sha256(buf.getvalue()).hexdigest()[:8]
117 |
118 | path = path.parent / (path.stem + "-" + sig + path.suffix)
119 | path.write_bytes(buf.getvalue())
120 |
121 |
122 | def serialize_model(model, training_args, quantizer=None, half=True):
123 | args, kwargs = model._init_args_kwargs
124 | klass = model.__class__
125 |
126 | state = get_state(model, quantizer, half)
127 | return {
128 | 'klass': klass,
129 | 'args': args,
130 | 'kwargs': kwargs,
131 | 'state': state,
132 | 'training_args': OmegaConf.to_container(training_args, resolve=True),
133 | }
134 |
135 |
136 | def copy_state(state):
137 | return {k: v.cpu().clone() for k, v in state.items()}
138 |
139 |
140 | @contextmanager
141 | def swap_state(model, state):
142 | """
143 | Context manager that swaps the state of a model, e.g:
144 |
145 | # model is in old state
146 | with swap_state(model, new_state):
147 | # model in new state
148 | # model back to old state
149 | """
150 | old_state = copy_state(model.state_dict())
151 | model.load_state_dict(state, strict=False)
152 | try:
153 | yield
154 | finally:
155 | model.load_state_dict(old_state)
156 |
157 |
158 | def capture_init(init):
159 | @functools.wraps(init)
160 | def __init__(self, *args, **kwargs):
161 | self._init_args_kwargs = (args, kwargs)
162 | init(self, *args, **kwargs)
163 |
164 | return __init__
165 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/svd.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | """Ways to make the model stronger."""
7 | import random
8 | import torch
9 |
10 |
11 | def power_iteration(m, niters=1, bs=1):
12 | """This is the power method. batch size is used to try multiple starting point in parallel."""
13 | assert m.dim() == 2
14 | assert m.shape[0] == m.shape[1]
15 | dim = m.shape[0]
16 | b = torch.randn(dim, bs, device=m.device, dtype=m.dtype)
17 |
18 | for _ in range(niters):
19 | n = m.mm(b)
20 | norm = n.norm(dim=0, keepdim=True)
21 | b = n / (1e-10 + norm)
22 |
23 | return norm.mean()
24 |
25 |
26 | # We need a shared RNG to make sure all the distributed worker will skip the penalty together,
27 | # as otherwise we wouldn't get any speed up.
28 | penalty_rng = random.Random(1234)
29 |
30 |
31 | def svd_penalty(model, min_size=0.1, dim=1, niters=2, powm=False, convtr=True,
32 | proba=1, conv_only=False, exact=False, bs=1):
33 | """
34 | Penalty on the largest singular value for a layer.
35 | Args:
36 | - model: model to penalize
37 | - min_size: minimum size in MB of a layer to penalize.
38 | - dim: projection dimension for the svd_lowrank. Higher is better but slower.
39 | - niters: number of iterations in the algorithm used by svd_lowrank.
40 | - powm: use power method instead of lowrank SVD, my own experience
41 | is that it is both slower and less stable.
42 | - convtr: when True, differentiate between Conv and Transposed Conv.
43 | this is kept for compatibility with older experiments.
44 | - proba: probability to apply the penalty.
45 | - conv_only: only apply to conv and conv transposed, not LSTM
46 | (might not be reliable for other models than Demucs).
47 | - exact: use exact SVD (slow but useful at validation).
48 | - bs: batch_size for power method.
49 | """
50 | total = 0
51 | if penalty_rng.random() > proba:
52 | return 0.
53 |
54 | for m in model.modules():
55 | for name, p in m.named_parameters(recurse=False):
56 | if p.numel() / 2**18 < min_size:
57 | continue
58 | if convtr:
59 | if isinstance(m, (torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d)):
60 | if p.dim() in [3, 4]:
61 | p = p.transpose(0, 1).contiguous()
62 | if p.dim() == 3:
63 | p = p.view(len(p), -1)
64 | elif p.dim() == 4:
65 | p = p.view(len(p), -1)
66 | elif p.dim() == 1:
67 | continue
68 | elif conv_only:
69 | continue
70 | assert p.dim() == 2, (name, p.shape)
71 | if exact:
72 | estimate = torch.svd(p, compute_uv=False)[1].pow(2).max()
73 | elif powm:
74 | a, b = p.shape
75 | if a < b:
76 | n = p.mm(p.t())
77 | else:
78 | n = p.t().mm(p)
79 | estimate = power_iteration(n, niters, bs)
80 | else:
81 | estimate = torch.svd_lowrank(p, dim, niters)[1][0].pow(2)
82 | total += estimate
83 | return total / proba
84 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | from collections import defaultdict
8 | from concurrent.futures import CancelledError
9 | from contextlib import contextmanager
10 | import math
11 | import os
12 | import tempfile
13 | import typing as tp
14 |
15 | import torch
16 | from torch.nn import functional as F
17 | from torch.utils.data import Subset
18 |
19 |
20 | def unfold(a, kernel_size, stride):
21 | """Given input of size [*OT, T], output Tensor of size [*OT, F, K]
22 | with K the kernel size, by extracting frames with the given stride.
23 |
24 | This will pad the input so that `F = ceil(T / K)`.
25 |
26 | see https://github.com/pytorch/pytorch/issues/60466
27 | """
28 | *shape, length = a.shape
29 | n_frames = math.ceil(length / stride)
30 | tgt_length = (n_frames - 1) * stride + kernel_size
31 | a = F.pad(a, (0, tgt_length - length))
32 | strides = list(a.stride())
33 | assert strides[-1] == 1, 'data should be contiguous'
34 | strides = strides[:-1] + [stride, 1]
35 | return a.as_strided([*shape, n_frames, kernel_size], strides)
36 |
37 |
38 | def center_trim(tensor: torch.Tensor, reference: tp.Union[torch.Tensor, int]):
39 | """
40 | Center trim `tensor` with respect to `reference`, along the last dimension.
41 | `reference` can also be a number, representing the length to trim to.
42 | If the size difference != 0 mod 2, the extra sample is removed on the right side.
43 | """
44 | ref_size: int
45 | if isinstance(reference, torch.Tensor):
46 | ref_size = reference.size(-1)
47 | else:
48 | ref_size = reference
49 | delta = tensor.size(-1) - ref_size
50 | if delta < 0:
51 | raise ValueError("tensor must be larger than reference. " f"Delta is {delta}.")
52 | if delta:
53 | tensor = tensor[..., delta // 2:-(delta - delta // 2)]
54 | return tensor
55 |
56 |
57 | def pull_metric(history: tp.List[dict], name: str):
58 | out = []
59 | for metrics in history:
60 | metric = metrics
61 | for part in name.split("."):
62 | metric = metric[part]
63 | out.append(metric)
64 | return out
65 |
66 |
67 | def EMA(beta: float = 1):
68 | """
69 | Exponential Moving Average callback.
70 | Returns a single function that can be called to repeatidly update the EMA
71 | with a dict of metrics. The callback will return
72 | the new averaged dict of metrics.
73 |
74 | Note that for `beta=1`, this is just plain averaging.
75 | """
76 | fix: tp.Dict[str, float] = defaultdict(float)
77 | total: tp.Dict[str, float] = defaultdict(float)
78 |
79 | def _update(metrics: dict, weight: float = 1) -> dict:
80 | nonlocal total, fix
81 | for key, value in metrics.items():
82 | total[key] = total[key] * beta + weight * float(value)
83 | fix[key] = fix[key] * beta + weight
84 | return {key: tot / fix[key] for key, tot in total.items()}
85 | return _update
86 |
87 |
88 | def sizeof_fmt(num: float, suffix: str = 'B'):
89 | """
90 | Given `num` bytes, return human readable size.
91 | Taken from https://stackoverflow.com/a/1094933
92 | """
93 | for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
94 | if abs(num) < 1024.0:
95 | return "%3.1f%s%s" % (num, unit, suffix)
96 | num /= 1024.0
97 | return "%.1f%s%s" % (num, 'Yi', suffix)
98 |
99 |
100 | @contextmanager
101 | def temp_filenames(count: int, delete=True):
102 | names = []
103 | try:
104 | for _ in range(count):
105 | names.append(tempfile.NamedTemporaryFile(delete=False).name)
106 | yield names
107 | finally:
108 | if delete:
109 | for name in names:
110 | os.unlink(name)
111 |
112 |
113 | def random_subset(dataset, max_samples: int, seed: int = 42):
114 | if max_samples >= len(dataset):
115 | return dataset
116 |
117 | generator = torch.Generator().manual_seed(seed)
118 | perm = torch.randperm(len(dataset), generator=generator)
119 | return Subset(dataset, perm[:max_samples].tolist())
120 |
121 |
122 | class DummyPoolExecutor:
123 | class DummyResult:
124 | def __init__(self, func, _dict, *args, **kwargs):
125 | self.func = func
126 | self._dict = _dict
127 | self.args = args
128 | self.kwargs = kwargs
129 |
130 | def result(self):
131 | if self._dict["run"]:
132 | return self.func(*self.args, **self.kwargs)
133 | else:
134 | raise CancelledError()
135 |
136 | def __init__(self, workers=0):
137 | self._dict = {"run": True}
138 |
139 | def submit(self, func, *args, **kwargs):
140 | return DummyPoolExecutor.DummyResult(func, self._dict, *args, **kwargs)
141 |
142 | def shutdown(self, *_, **__):
143 | self._dict["run"] = False
144 |
145 | def __enter__(self):
146 | return self
147 |
148 | def __exit__(self, exc_type, exc_value, exc_tb):
149 | return
150 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/demucs/wdemucs.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | # For compat
7 | from .hdemucs import HDemucs
8 |
9 | WDemucs = HDemucs
10 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/docs/linux.md:
--------------------------------------------------------------------------------
1 | # Linux support for Demucs
2 |
3 | If your distribution has at least Python 3.8, and you just wish to separate
4 | tracks with Demucs, not train it, you can just run
5 |
6 | ```bash
7 | pip3 install --user -U demucs
8 | # Then anytime you want to use demucs, just do
9 | python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1
10 | # If you have added the user specific pip bin/ folder to your path, you can also do
11 | demucs -d cpu PATH_TO_AUDIO_FILE_1
12 | ```
13 |
14 | If Python is too old, or you want to be able to train, I recommend [installing Miniconda][miniconda], with Python 3.8 or more.
15 |
16 | ```bash
17 | conda activate
18 | pip3 install -U demucs
19 | # Then anytime you want to use demucs, first do conda activate, then
20 | demucs -d cpu PATH_TO_AUDIO_FILE_1
21 | ```
22 |
23 | Of course, you can also use a specific env for Demucs.
24 |
25 | **Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or as a distribution package (e.g. `sudo apt-get install ffmpeg`).
26 |
27 |
28 | [miniconda]: https://docs.conda.io/en/latest/miniconda.html#linux-installers
29 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/docs/mac.md:
--------------------------------------------------------------------------------
1 | # macOS support for Demucs
2 |
3 | If you have a sufficiently recent version of macOS, you can just run
4 |
5 | ```bash
6 | python3 -m pip install --user -U demucs
7 | # Then anytime you want to use demucs, just do
8 | python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1
9 | # If you have added the user specific pip bin/ folder to your path, you can also do
10 | demucs -d cpu PATH_TO_AUDIO_FILE_1
11 | ```
12 |
13 | If you do not already have Anaconda installed or much experience with the terminal on macOS, here are some detailed instructions:
14 |
15 | 1. Download [Anaconda 3.8 (or more recent) 64-bit for macOS][anaconda]:
16 | 2. Open [Anaconda Prompt in macOS][prompt]
17 | 3. Follow these commands:
18 | ```bash
19 | conda activate
20 | pip3 install -U demucs
21 | # Then anytime you want to use demucs, first do conda activate, then
22 | demucs -d cpu PATH_TO_AUDIO_FILE_1
23 | ```
24 |
25 | **Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or with Homebrew for instance (`brew install ffmpeg`).
26 |
27 | [anaconda]: https://www.anaconda.com/download
28 | [prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-nav-mac
29 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/docs/mdx.md:
--------------------------------------------------------------------------------
1 | # Music DemiXing challenge (MDX)
2 |
3 | If you want to use Demucs for the [MDX challenge](https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021),
4 | please follow the instructions hereafter
5 |
6 | ## Installing Demucs
7 |
8 | Follow the instructions from the [main README](https://github.com/facebookresearch/demucs#requirements)
9 | in order to setup Demucs using Anaconda. You will need the full setup up for training, including soundstretch.
10 |
11 | ## Getting MusDB-HQ
12 |
13 | Download [MusDB-HQ](https://zenodo.org/record/3338373) to some folder and unzip it.
14 |
15 | ## Training Demucs
16 |
17 | Train Demucs (you might need to change the batch size depending on the number of GPUs available).
18 | It seems 48 channels is enough to get the best performance on MusDB-HQ, and training will faster
19 | and less memory demanding. In any case, the 64 channels versions is timing out on the challenge.
20 | ```bash
21 | ./run.py --channels=48 --batch_size 64 --musdb=PATH_TO_MUSDB --is_wav [EXTRA_FLAGS]
22 | ```
23 |
24 | ### Post training
25 |
26 | Once the training is completed, a new model file will be exported in `models/`.
27 |
28 | You can look at the SDR on the MusDB dataset using `python result_table.py`.
29 |
30 |
31 | ### Evaluate and export a model before training is over
32 |
33 | If you want to export a model before training is complete, use the following command:
34 | ```bash
35 | python -m demucs [ALL EXACT TRAINING FLAGS] --save_model
36 | ```
37 | You can also pass the `--half` flag, in order to save weights in half precision. This will divide the model size by 2 and won't impact SDR.
38 |
39 | Once this is done, you can partially evaluate a model with
40 | ```bash
41 | ./run.py --test NAME_OF_MODEL.th --musdb=PATH_TO_MUSDB --is_wav
42 | ```
43 |
44 | **Note:** `NAME_OF_MODEL.th` is given relative to the models folder (given by `--models`, defaults to `models/`), so don't include it in the name.
45 |
46 |
47 | ### Training smaller models
48 |
49 | If you want to quickly test idea, I would recommend training a 16 kHz model, and testing if things work there or not, before training the full 44kHz model. You can train one of those with
50 | ```bash
51 | ./run.py --channels=32 --samplerate 16000 --samples 160000 --data_stride 16000 --depth=5 --batch_size 64 --repitch=0 --musdb=PATH_TO_MUSDB --is_wav [EXTRA_FLAGS]
52 | ```
53 | (repitch must be turned off, because things will break at 16kHz).
54 |
55 | ## Submitting your model
56 |
57 | 1. Git clone [the Music Demixing Challenge - Starter Kit - Demucs Edition](https://github.com/adefossez/music-demixing-challenge-starter-kit).
58 | 2. Inside the starter kit, create a `models/` folder and copy over the trained model from the Demucs repo (renaming
59 | it for instance `my_model.th`)
60 | 3. Inside the `test_demuc.py` file, change the function `prediction_setup`: comment the loading
61 | of the pre-trained model, and uncomment the code to load your own model.
62 | 4. Edit the file `aicrowd.json` with your username.
63 | 5. Install [git-lfs](https://git-lfs.github.com/). Then run
64 |
65 | ```bash
66 | git lfs install
67 | git add models/
68 | git add -u .
69 | git commit -m "My Demucs submission"
70 | ```
71 | 6. Follow the [submission instructions](https://github.com/AIcrowd/music-demixing-challenge-starter-kit/blob/master/docs/SUBMISSION.md).
72 |
73 | Best of luck 🤞
74 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/docs/release.md:
--------------------------------------------------------------------------------
1 | # Release notes for Demucs
2 |
3 | ## V4.1.0a1, TBD
4 |
5 | Get models list
6 |
7 | Check segment of HTDemucs inside BagOfModels
8 |
9 | Added api.py to be called from another program
10 |
11 | Use api in separate.py
12 |
13 | Added `--other-method`: method to get `no_{STEM}`, add up all the other stems (add), original track substract the specific stem (minus), and discard (none)
14 |
15 | Added type `HTDemucs` to type alias `AnyModel`.
16 |
17 | ## V4.0.1, 8th of September 2023
18 |
19 | **From this version, Python 3.7 is no longer supported. This is not a problem since the latest PyTorch 2.0.0 no longer support it either.**
20 |
21 | Various improvements by @CarlGao4. Support for `segment` param inside of HTDemucs
22 | model.
23 |
24 | Made diffq an optional dependency, with an error message if not installed.
25 |
26 | Added output format flac (Free Lossless Audio Codec)
27 |
28 | Will use CPU for complex numbers, when using MPS device (all other computations are performed by mps).
29 |
30 | Optimize codes to save memory
31 |
32 | Allow changing preset of MP3
33 |
34 | ## V4.0.0, 7th of December 2022
35 |
36 | Adding hybrid transformer Demucs model.
37 |
38 | Added support for [Torchaudio implementation of HDemucs](https://pytorch.org/audio/main/tutorials/hybrid_demucs_tutorial.html), thanks @skim0514.
39 |
40 | Added experimental 6 sources model `htdemucs_6s` (`drums`, `bass`, `other`, `vocals`, `piano`, `guitar`).
41 |
42 | ## V3.0.6, 16th of November 2022
43 |
44 | Option to customize output path of stems (@CarlGao4)
45 |
46 | Fixed bug in pad1d leading to failure sometimes.
47 |
48 | ## V3.0.5, 17th of August 2022
49 |
50 | Added `--segment` flag to customize the segment length and use less memory (thanks @CarlGao4).
51 |
52 | Fix reflect padding bug on small inputs.
53 |
54 | Compatible with pyTorch 1.12
55 |
56 | ## V3.0.4, 24th of February 2022
57 |
58 | Added option to split into two stems (i.e. vocals, vs. non vocals), thanks to @CarlGao4.
59 |
60 | Added `--float32`, `--int24` and `--clip-mode` options to customize how output stems are saved.
61 |
62 | ## V3.0.3, 2nd of December 2021
63 |
64 | Fix bug in weights used for different sources. Thanks @keunwoochoi for the report and fix.
65 |
66 | Improving drastically memory usage on GPU for long files. Thanks a lot @famzah for providing this.
67 |
68 | Adding multithread evaluation on CPU (`-j` option).
69 |
70 | (v3.0.2 had a bug with the CPU pool and is skipped.)
71 |
72 | ## V3.0.1, 12th of November 2021
73 |
74 | Release of Demucs v3, featuring hybrid domain separation and much more.
75 | This drops support for Conv-Tasnet and training on the non HQ MusDB dataset.
76 | There is no version 3.0.0 because I messed up.
77 |
78 | ## V2.0.2, 26th of May 2021
79 |
80 | - Fix in Tasnet (PR #178)
81 | - Use ffmpeg in priority when available instead of torchaudio to avoid small shift in MP3 data.
82 | - other minor fixes
83 |
84 | ## v2.0.1, 11th of May 2021
85 |
86 | MusDB HQ support added. Custom wav dataset support added.
87 | Minor changes: issue with padding of mp3 and torchaudio reading, in order to limit that,
88 | Demucs now uses ffmpeg in priority and fallback to torchaudio.
89 | Replaced pre-trained demucs model with one trained on more recent codebase.
90 |
91 | ## v2.0.0, 28th of April 2021
92 |
93 | This is a big release, with at lof of breaking changes. You will likely
94 | need to install Demucs from scratch.
95 |
96 |
97 |
98 | - Demucs now supports on the fly resampling by a factor of 2.
99 | This improves SDR almost 0.3 points.
100 | - Random scaling of each source added (From Uhlich et al. 2017).
101 | - Random pitch and tempo augmentation addded, from [Cohen-Hadria et al. 2019].
102 | - With extra augmentation, the best performing Demucs model now has only 64 channels
103 | instead of 100, so model size goes from 2.4GB to 1GB. Also SDR is up from 5.6 SDR to 6.3 when trained only on MusDB.
104 | - Quantized model using [DiffQ](https://github.com/facebookresearch/diffq) has been added. Model size is 150MB, no loss in quality as far as I, or the metrics,
105 | can say.
106 | - Pretrained models are now using the TorchHub interface.
107 | - Overlap mode for separation, to limit inconsitencies at
108 | frame boundaries, with linear transition over the overlap. Overlap is currently
109 | at 25%. Not that this is only done for separation, not training, because
110 | I added that quite late to the code. For Conv-TasNet this can improve
111 | SDR quite a bit (+0.3 points, to 6.0).
112 | - PyPI hosting, for separation, not training!
113 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/docs/sdx23.md:
--------------------------------------------------------------------------------
1 | # SDX 23 challenge
2 |
3 | Checkout [the challenge page](https://www.aicrowd.com/challenges/sound-demixing-challenge-2023)
4 | for more information. This page is specifically on training models for the [MDX'23 sub-challenge](https://www.aicrowd.com/challenges/sound-demixing-challenge-2023/problems/music-demixing-track-mdx-23).
5 | There are two tracks: one trained on a dataset with bleeding, and the other with label mixups.
6 |
7 | This gives instructions on training an Hybrid Demucs model on those datasets.
8 | I haven't tried the HT Demucs model, as it typically requires quite a bit of training data but the same could be done with it.
9 |
10 | You will need to work from an up to date clone of this repo. See the [generic training instructions](./training.md) for more information.
11 |
12 | ## Getting the data
13 |
14 | Register on the challenge, then checkout the [Resources page](https://www.aicrowd.com/challenges/sound-demixing-challenge-2023/problems/music-demixing-track-mdx-23/dataset_files) and download the dataset you are
15 | interested in.
16 |
17 | Update the `conf/dset/sdx23_bleeding.yaml` and `conf/dset/sdx23_labelnoise.yaml` files to point to the right path.
18 |
19 | **Make sure soundfile** is installed (`conda install -c conda-forge libsndfile; pip install soundfile`).
20 |
21 | ### Create proper train / valid structure
22 |
23 | Demucs requires a valid set to work properly. Go to the folder where you extracted the tracks then do:
24 |
25 | ```shell
26 | mkdir train
27 | mv * train # there will be a warning saying cannot move train to itself but that's fine the other tracks should have.
28 | mkdir valid
29 | cd train
30 | mv 5640831d-7853-4d06-8166-988e2844b652 bc964128-da16-4e4c-af95-4d1211e78c70 \
31 | cc7f7675-d3c8-4a49-a2d7-a8959b694004 f40ffd10-4e8b-41e6-bd8a-971929ca9138 \
32 | bc1f2967-f834-43bd-aadc-95afc897cfe7 cc3e4991-6cce-40fe-a917-81a4fbb92ea6 \
33 | ed90a89a-bf22-444d-af3d-d9ac3896ebd2 f4b735de-14b1-4091-a9ba-c8b30c0740a7 ../valid
34 | ```
35 |
36 | ## Training
37 |
38 | See `dora grid sdx23` for a starting point. You can do `dora grid sdx23 --init --dry_run` then `dora run -f SIG -d` with `SIG` one of the signature
39 | to train on a machine with GPUs if you do not have a SLURM cluster.
40 |
41 | Keep in mind that the valid tracks and train tracks are corrupted in different ways for those tasks, so do not expect
42 | the valid loss to go down as smoothly as with normal training on the clean MusDB.
43 |
44 | I only trained Hybrid Demucs baselines as Hybrid Transformer typically requires more data.
45 |
46 |
47 | ## Exporting models
48 |
49 | Run
50 | ```
51 | python -m tools.export SIG
52 | ```
53 |
54 | This will export the trained model into the `release_models` folder.
55 |
56 | ## Submitting a model
57 |
58 | Clone the [Demucs Starter Kit for SDX23](https://github.com/adefossez/sdx23). Follow the instructions there.
59 |
60 | You will to copy the models under `release_models` in the `sdx23/models/` folder before you can use them.
61 | Make sure you have git-lfs properly installed and setup before adding those files to your fork of `sdx23`.
62 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/docs/windows.md:
--------------------------------------------------------------------------------
1 | # Windows support for Demucs
2 |
3 | ## Installation and usage
4 |
5 | If you don't have much experience with Anaconda, python or the shell, here are more detailed instructions. Note that **Demucs is not supported on 32bits systems** (as Pytorch is not available there).
6 |
7 | - First install Anaconda with **Python 3.8** or more recent, which you can find [here][install].
8 | - Start the [Anaconda prompt][prompt].
9 |
10 | Then, all commands that follow must be run from this prompt.
11 |
12 |
13 | I have no coding experience and these are too difficult for me
14 |
15 | > Then a GUI is suitable for you. See [Demucs GUI](https://github.com/CarlGao4/Demucs-Gui)
16 |
17 |
18 |
19 | ### If you want to use your GPU
20 |
21 | If you have graphic cards produced by NVIDIA with more than 2GiB of memory, you can separate tracks with GPU acceleration. To achieve this, you must install Pytorch with CUDA. If Pytorch was already installed (you already installed Demucs for instance), first run `python.exe -m pip uninstall torch torchaudio`.
22 | Then visit [Pytorch Home Page](https://pytorch.org/get-started/locally/) and follow the guide on it to install with CUDA support. Please make sure that the version of torchaudio should no greater than 2.1 (which is the latest version when this document is written, but 2.2.0 is sure unsupported)
23 |
24 | ### Installation
25 |
26 | Start the Anaconda prompt, and run the following
27 |
28 | ```cmd
29 | conda install -c conda-forge ffmpeg
30 | python.exe -m pip install -U demucs SoundFile
31 | ```
32 |
33 | ### Upgrade
34 |
35 | To upgrade Demucs, simply run `python.exe -m pip install -U demucs`, from the Anaconda prompt.
36 |
37 | ### Usage
38 |
39 | Then to use Demucs, just start the **Anaconda prompt** and run:
40 | ```
41 | demucs -d cpu "PATH_TO_AUDIO_FILE_1" ["PATH_TO_AUDIO_FILE_2" ...]
42 | ```
43 | The `"` around the filename are required if the path contains spaces. A simple way to input these paths is draging a file from a folder into the terminal.
44 |
45 | To find out the separated files, you can run this command and open the folders:
46 | ```
47 | explorer separated
48 | ```
49 |
50 | ### Separating an entire folder
51 |
52 | You can use the following command to separate an entire folder of mp3s for instance (replace the extension `.mp3` if needs be for other file types)
53 | ```
54 | cd FOLDER
55 | for %i in (*.mp3) do (demucs -d cpu "%i")
56 | ```
57 |
58 | ## Potential errors
59 |
60 | If you have an error saying that `mkl_intel_thread.dll` cannot be found, you can try to first run
61 | `conda install -c defaults intel-openmp -f`. Then try again to run the `demucs` command. If it still doesn't work, you can try to run first `set CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1`, then again the `demucs` command and hopefully it will work 🙏.
62 |
63 | **If you get a permission error**, please try starting the Anaconda Prompt as administrator.
64 |
65 |
66 | [install]: https://www.anaconda.com/download
67 | [prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-prompt-win
68 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/environment-cpu.yml:
--------------------------------------------------------------------------------
1 | name: demucs
2 |
3 | channels:
4 | - pytorch
5 | - conda-forge
6 |
7 | dependencies:
8 | - python>=3.8,<3.10
9 | - ffmpeg>=4.2
10 | - pytorch>=1.8.1
11 | - torchaudio>=0.8
12 | - tqdm>=4.36
13 | - pip
14 | - pip:
15 | - diffq>=0.2
16 | - dora-search
17 | - einops
18 | - hydra-colorlog>=1.1
19 | - hydra-core>=1.1
20 | - julius>=0.2.3
21 | - lameenc>=1.2
22 | - openunmix
23 | - musdb>=0.4.0
24 | - museval>=0.4.0
25 | - soundfile
26 | - submitit
27 | - treetable>=0.2.3
28 |
29 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/environment-cuda.yml:
--------------------------------------------------------------------------------
1 | name: demucs
2 |
3 | channels:
4 | - pytorch
5 | - conda-forge
6 |
7 | dependencies:
8 | - python>=3.8,<3.10
9 | - ffmpeg>=4.2
10 | - pytorch>=1.8.1
11 | - torchaudio>=0.8
12 | - cudatoolkit>=10
13 | - tqdm>=4.36
14 | - pip
15 | - pip:
16 | - diffq>=0.2
17 | - dora-search
18 | - einops
19 | - hydra-colorlog>=1.1
20 | - hydra-core>=1.1
21 | - julius>=0.2.3
22 | - lameenc>=1.2
23 | - openunmix
24 | - musdb>=0.4.0
25 | - museval>=0.4.0
26 | - soundfile
27 | - submitit
28 | - treetable>=0.2.3
29 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/hubconf.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | dependencies = ['dora-search', 'julius', 'lameenc', 'openunmix', 'pyyaml',
8 | 'torch', 'torchaudio', 'tqdm']
9 |
10 | from demucs.pretrained import get_model
11 |
12 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 |
3 | [mypy-treetable,torchaudio.*,diffq,yaml,tqdm,lameenc,musdb,museval,openunmix.*,einops,xformers.*]
4 | ignore_missing_imports = True
5 |
6 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/outputs.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/outputs.tar.gz
--------------------------------------------------------------------------------
/src/models_dir/demucs/requirements.txt:
--------------------------------------------------------------------------------
1 | # please make sure you have already a pytorch install that is cuda enabled!
2 | dora-search>=0.1.12
3 | diffq>=0.2.1
4 | einops
5 | flake8
6 | hydra-colorlog>=1.1
7 | hydra-core>=1.1
8 | julius>=0.2.3
9 | lameenc>=1.2
10 | museval
11 | mypy
12 | openunmix
13 | pyyaml
14 | submitit
15 | torch>=1.8.1
16 | torchaudio>=0.8,<2.1
17 | tqdm
18 | treetable
19 | soundfile>=0.10.3;sys_platform=="win32"
20 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/requirements_minimal.txt:
--------------------------------------------------------------------------------
1 | # please make sure you have already a pytorch install that is cuda enabled!
2 | dora-search
3 | einops
4 | julius>=0.2.3
5 | lameenc>=1.2
6 | openunmix
7 | pyyaml
8 | torch>=1.8.1
9 | torchaudio>=0.8,<2.1
10 | tqdm
11 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/setup.cfg:
--------------------------------------------------------------------------------
1 | [pep8]
2 | max-line-length = 100
3 |
4 | [flake8]
5 | max-line-length = 100
6 |
7 | [yapf]
8 | column_limit = 100
9 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | # author: adefossez
7 | # Inspired from https://github.com/kennethreitz/setup.py
8 |
9 | from pathlib import Path
10 |
11 | from setuptools import setup
12 |
13 |
14 | NAME = 'demucs'
15 | DESCRIPTION = 'Music source separation in the waveform domain.'
16 |
17 | URL = 'https://github.com/facebookresearch/demucs'
18 | EMAIL = 'defossez@fb.com'
19 | AUTHOR = 'Alexandre Défossez'
20 | REQUIRES_PYTHON = '>=3.8.0'
21 |
22 | HERE = Path(__file__).parent
23 |
24 | # Get version without explicitely loading the module.
25 | for line in open('demucs/__init__.py'):
26 | line = line.strip()
27 | if '__version__' in line:
28 | context = {}
29 | exec(line, context)
30 | VERSION = context['__version__']
31 |
32 |
33 | def load_requirements(name):
34 | required = [i.strip() for i in open(HERE / name)]
35 | required = [i for i in required if not i.startswith('#')]
36 | return required
37 |
38 |
39 | REQUIRED = load_requirements('requirements_minimal.txt')
40 | ALL_REQUIRED = load_requirements('requirements.txt')
41 |
42 | try:
43 | with open(HERE / "README.md", encoding='utf-8') as f:
44 | long_description = '\n' + f.read()
45 | except FileNotFoundError:
46 | long_description = DESCRIPTION
47 |
48 | setup(
49 | name=NAME,
50 | version=VERSION,
51 | description=DESCRIPTION,
52 | long_description=long_description,
53 | long_description_content_type='text/markdown',
54 | author=AUTHOR,
55 | author_email=EMAIL,
56 | python_requires=REQUIRES_PYTHON,
57 | url=URL,
58 | packages=['demucs'],
59 | extras_require={
60 | 'dev': ALL_REQUIRED,
61 | },
62 | install_requires=REQUIRED,
63 | include_package_data=True,
64 | entry_points={
65 | 'console_scripts': ['demucs=demucs.separate:main'],
66 | },
67 | license='MIT License',
68 | classifiers=[
69 | # Trove classifiers
70 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
71 | 'License :: OSI Approved :: MIT License',
72 | 'Topic :: Multimedia :: Sound/Audio',
73 | 'Topic :: Scientific/Engineering :: Artificial Intelligence',
74 | ],
75 | )
76 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/test.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/demucs/test.mp3
--------------------------------------------------------------------------------
/src/models_dir/demucs/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/tools/bench.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | """
8 | benchmarking script, useful to check for OOM, reasonable train time,
9 | and for the MDX competion, estimate if we will match the time limit."""
10 | from contextlib import contextmanager
11 | import logging
12 | import sys
13 | import time
14 | import torch
15 |
16 | from demucs.train import get_solver, main
17 | from demucs.apply import apply_model
18 |
19 | logging.basicConfig(level=logging.INFO, stream=sys.stderr)
20 |
21 |
22 | class Result:
23 | pass
24 |
25 |
26 | @contextmanager
27 | def bench():
28 | import gc
29 | gc.collect()
30 | torch.cuda.reset_max_memory_allocated()
31 | torch.cuda.empty_cache()
32 | result = Result()
33 | # before = torch.cuda.memory_allocated()
34 | before = 0
35 | begin = time.time()
36 | try:
37 | yield result
38 | finally:
39 | torch.cuda.synchronize()
40 | mem = (torch.cuda.max_memory_allocated() - before) / 2 ** 20
41 | tim = time.time() - begin
42 | result.mem = mem
43 | result.tim = tim
44 |
45 |
46 | xp = main.get_xp_from_sig(sys.argv[1])
47 | xp = main.get_xp(xp.argv + sys.argv[2:])
48 | with xp.enter():
49 | solver = get_solver(xp.cfg)
50 | if getattr(solver.model, 'use_train_segment', False):
51 | batch = solver.augment(next(iter(solver.loaders['train'])))
52 | solver.model.segment = Fraction(batch.shape[-1], solver.model.samplerate)
53 | train_segment = solver.model.segment
54 | solver.model.eval()
55 | model = solver.model
56 | model.cuda()
57 | x = torch.randn(2, xp.cfg.dset.channels, int(10 * model.samplerate), device='cuda')
58 | with bench() as res:
59 | y = model(x)
60 | y.sum().backward()
61 | del y
62 | for p in model.parameters():
63 | p.grad = None
64 | print(f"FB: {res.mem:.1f} MB, {res.tim * 1000:.1f} ms")
65 |
66 | x = torch.randn(1, xp.cfg.dset.channels, int(model.segment * model.samplerate), device='cuda')
67 | with bench() as res:
68 | with torch.no_grad():
69 | y = model(x)
70 | del y
71 | print(f"FV: {res.mem:.1f} MB, {res.tim * 1000:.1f} ms")
72 |
73 | model.cpu()
74 | torch.set_num_threads(1)
75 | test = torch.randn(1, xp.cfg.dset.channels, model.samplerate * 40)
76 | b = time.time()
77 | apply_model(model, test, split=True, shifts=1)
78 | print("CPU 40 sec:", time.time() - b)
79 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/tools/convert.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | # Script to convert option names and model args from the dev branch to
8 | # the cleanup release one. There should be no reaso to use that anymore.
9 |
10 | import argparse
11 | import io
12 | import json
13 | from pathlib import Path
14 | import subprocess as sp
15 |
16 | import torch
17 |
18 | from demucs import train, pretrained, states
19 |
20 | DEV_REPO = Path.home() / 'tmp/release_demucs_mdx'
21 |
22 |
23 | TO_REMOVE = [
24 | 'demucs.dconv_kw.gelu=True',
25 | 'demucs.dconv_kw.nfreqs=0',
26 | 'demucs.dconv_kw.nfreqs=0',
27 | 'demucs.dconv_kw.version=4',
28 | 'demucs.norm=gn',
29 | 'wdemucs.nice=True',
30 | 'wdemucs.good=True',
31 | 'wdemucs.freq_emb=-0.2',
32 | 'special=True',
33 | 'special=False',
34 | ]
35 |
36 | TO_REPLACE = [
37 | ('power', 'svd'),
38 | ('wdemucs', 'hdemucs'),
39 | ('hdemucs.hybrid=True', 'hdemucs.hybrid_old=True'),
40 | ('hdemucs.hybrid=2', 'hdemucs.hybrid=True'),
41 | ]
42 |
43 | TO_INJECT = [
44 | ('model=hdemucs', ['hdemucs.cac=False']),
45 | ('model=hdemucs', ['hdemucs.norm_starts=999']),
46 | ]
47 |
48 |
49 | def get_original_argv(sig):
50 | return json.load(open(Path(DEV_REPO) / f'outputs/xps/{sig}/.argv.json'))
51 |
52 |
53 | def transform(argv, mappings, verbose=False):
54 | for rm in TO_REMOVE:
55 | while rm in argv:
56 | argv.remove(rm)
57 |
58 | for old, new in TO_REPLACE:
59 | argv[:] = [a.replace(old, new) for a in argv]
60 |
61 | for condition, args in TO_INJECT:
62 | if condition in argv:
63 | argv[:] = args + argv
64 |
65 | for idx, arg in enumerate(argv):
66 | if 'continue_from=' in arg:
67 | dep_sig = arg.split('=')[1]
68 | if dep_sig.startswith('"'):
69 | dep_sig = eval(dep_sig)
70 | if verbose:
71 | print("Need to recursively convert dependency XP", dep_sig)
72 | new_sig = convert(dep_sig, mappings, verbose).sig
73 | argv[idx] = f'continue_from="{new_sig}"'
74 |
75 |
76 | def convert(sig, mappings, verbose=False):
77 | argv = get_original_argv(sig)
78 | if verbose:
79 | print("Original argv", argv)
80 | transform(argv, mappings, verbose)
81 | if verbose:
82 | print("New argv", argv)
83 | xp = train.main.get_xp(argv)
84 | train.main.init_xp(xp)
85 | if verbose:
86 | print("Mapping", sig, "->", xp.sig)
87 | mappings[sig] = xp.sig
88 | return xp
89 |
90 |
91 | def _eval_old(old_sig, x):
92 | script = (
93 | 'from demucs import pretrained; import torch; import sys; import io; '
94 | 'buf = io.BytesIO(sys.stdin.buffer.read()); '
95 | 'x = torch.load(buf); m = pretrained.load_pretrained_model('
96 | f'"{old_sig}"); torch.save(m(x), sys.stdout.buffer)')
97 |
98 | buf = io.BytesIO()
99 | torch.save(x, buf)
100 | proc = sp.run(
101 | ['python3', '-c', script], input=buf.getvalue(), capture_output=True, cwd=DEV_REPO)
102 | if proc.returncode != 0:
103 | print("Error", proc.stderr.decode())
104 | assert False
105 |
106 | buf = io.BytesIO(proc.stdout)
107 | return torch.load(buf)
108 |
109 |
110 | def compare(old_sig, model):
111 | test = torch.randn(1, 2, 44100 * 10)
112 | old_out = _eval_old(old_sig, test)
113 | out = model(test)
114 |
115 | delta = 20 * torch.log10((out - old_out).norm() / out.norm()).item()
116 | return delta
117 |
118 |
119 | def main():
120 | torch.manual_seed(1234)
121 | parser = argparse.ArgumentParser('convert')
122 | parser.add_argument('sigs', nargs='*')
123 | parser.add_argument('-o', '--output', type=Path, default=Path('release_models'))
124 | parser.add_argument('-d', '--dump', action='store_true')
125 | parser.add_argument('-c', '--compare', action='store_true')
126 | parser.add_argument('-v', '--verbose', action='store_true')
127 | args = parser.parse_args()
128 |
129 | args.output.mkdir(exist_ok=True, parents=True)
130 | mappings = {}
131 | for sig in args.sigs:
132 | xp = convert(sig, mappings, args.verbose)
133 | if args.dump or args.compare:
134 | old_pkg = pretrained._load_package(sig, old=True)
135 | model = train.get_model(xp.cfg)
136 | model.load_state_dict(old_pkg['state'])
137 | if args.dump:
138 | pkg = states.serialize_model(model, xp.cfg)
139 | states.save_with_checksum(pkg, args.output / f'{xp.sig}.th')
140 | if args.compare:
141 | delta = compare(sig, model)
142 | print("Delta for", sig, xp.sig, delta)
143 |
144 | mappings[sig] = xp.sig
145 |
146 | print("FINAL MAPPINGS")
147 | for old, new in mappings.items():
148 | print(old, " ", new)
149 |
150 |
151 | if __name__ == '__main__':
152 | main()
153 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/tools/export.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | """Export a trained model from the full checkpoint (with optimizer etc.) to
8 | a final checkpoint, with only the model itself. The model is always stored as
9 | half float to gain space, and because this has zero impact on the final loss.
10 | When DiffQ was used for training, the model will actually be quantized and bitpacked."""
11 | from argparse import ArgumentParser
12 | from fractions import Fraction
13 | import logging
14 | from pathlib import Path
15 | import sys
16 | import torch
17 |
18 | from demucs import train
19 | from demucs.states import serialize_model, save_with_checksum
20 |
21 |
22 | logger = logging.getLogger(__name__)
23 |
24 |
25 | def main():
26 | logging.basicConfig(level=logging.INFO, stream=sys.stderr)
27 |
28 | parser = ArgumentParser("tools.export", description="Export trained models from XP sigs.")
29 | parser.add_argument('signatures', nargs='*', help='XP signatures.')
30 | parser.add_argument('-o', '--out', type=Path, default=Path("release_models"),
31 | help="Path where to store release models (default release_models)")
32 | parser.add_argument('-s', '--sign', action='store_true',
33 | help='Add sha256 prefix checksum to the filename.')
34 |
35 | args = parser.parse_args()
36 | args.out.mkdir(exist_ok=True, parents=True)
37 |
38 | for sig in args.signatures:
39 | xp = train.main.get_xp_from_sig(sig)
40 | name = train.main.get_name(xp)
41 | logger.info('Handling %s/%s', sig, name)
42 |
43 | out_path = args.out / (sig + ".th")
44 |
45 | solver = train.get_solver_from_sig(sig)
46 | if len(solver.history) < solver.args.epochs:
47 | logger.warning(
48 | 'Model %s has less epoch than expected (%d / %d)',
49 | sig, len(solver.history), solver.args.epochs)
50 |
51 | solver.model.load_state_dict(solver.best_state)
52 | pkg = serialize_model(solver.model, solver.args, solver.quantizer, half=True)
53 | if getattr(solver.model, 'use_train_segment', False):
54 | batch = solver.augment(next(iter(solver.loaders['train'])))
55 | pkg['kwargs']['segment'] = Fraction(batch.shape[-1], solver.model.samplerate)
56 | print("Override", pkg['kwargs']['segment'])
57 | valid, test = None, None
58 | for m in solver.history:
59 | if 'valid' in m:
60 | valid = m['valid']
61 | if 'test' in m:
62 | test = m['test']
63 | pkg['metrics'] = (valid, test)
64 | if args.sign:
65 | save_with_checksum(pkg, out_path)
66 | else:
67 | torch.save(pkg, out_path)
68 |
69 |
70 | if __name__ == '__main__':
71 | main()
72 |
--------------------------------------------------------------------------------
/src/models_dir/demucs/tools/notpytest_test_pretrained.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | # Script to evaluate pretrained models.
8 |
9 | from argparse import ArgumentParser
10 | import logging
11 | import sys
12 |
13 | import torch
14 |
15 | from demucs import train, pretrained, evaluate
16 |
17 |
18 | def main():
19 | torch.set_num_threads(1)
20 | logging.basicConfig(stream=sys.stderr, level=logging.INFO)
21 | parser = ArgumentParser("tools.test_pretrained",
22 | description="Evaluate pre-trained models or bags of models "
23 | "on MusDB.")
24 | pretrained.add_model_flags(parser)
25 | parser.add_argument('overrides', nargs='*',
26 | help='Extra overrides, e.g. test.shifts=2.')
27 | args = parser.parse_args()
28 |
29 | xp = train.main.get_xp(args.overrides)
30 | with xp.enter():
31 | solver = train.get_solver(xp.cfg)
32 |
33 | model = pretrained.get_model_from_args(args)
34 | solver.model = model.to(solver.device)
35 | solver.model.eval()
36 |
37 | with torch.no_grad():
38 | results = evaluate.evaluate(solver, xp.cfg.test.sdr)
39 | print(results)
40 |
41 |
42 | if __name__ == '__main__':
43 | main()
44 |
--------------------------------------------------------------------------------
/src/models_dir/mdx/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/mdx/__init__.py
--------------------------------------------------------------------------------
/src/models_dir/mdx/mdxnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from .modules import TFC_TDF
4 | from pytorch_lightning import LightningModule
5 |
6 | dim_s = 4
7 |
8 | class AbstractMDXNet(LightningModule):
9 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap):
10 | super().__init__()
11 | self.target_name = target_name
12 | self.lr = lr
13 | self.optimizer = optimizer
14 | self.dim_c = dim_c
15 | self.dim_f = dim_f
16 | self.dim_t = dim_t
17 | self.n_fft = n_fft
18 | self.n_bins = n_fft // 2 + 1
19 | self.hop_length = hop_length
20 | self.window = nn.Parameter(torch.hann_window(window_length=self.n_fft, periodic=True), requires_grad=False)
21 | self.freq_pad = nn.Parameter(torch.zeros([1, dim_c, self.n_bins - self.dim_f, self.dim_t]), requires_grad=False)
22 |
23 | def get_optimizer(self):
24 | if self.optimizer == 'rmsprop':
25 | return torch.optim.RMSprop(self.parameters(), self.lr)
26 |
27 | if self.optimizer == 'adamw':
28 | return torch.optim.AdamW(self.parameters(), self.lr)
29 |
30 | class ConvTDFNet(AbstractMDXNet):
31 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length,
32 | num_blocks, l, g, k, bn, bias, overlap):
33 |
34 | super(ConvTDFNet, self).__init__(
35 | target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap)
36 | #self.save_hyperparameters()
37 |
38 | self.num_blocks = num_blocks
39 | self.l = l
40 | self.g = g
41 | self.k = k
42 | self.bn = bn
43 | self.bias = bias
44 |
45 | if optimizer == 'rmsprop':
46 | norm = nn.BatchNorm2d
47 |
48 | if optimizer == 'adamw':
49 | norm = lambda input:nn.GroupNorm(2, input)
50 |
51 | self.n = num_blocks // 2
52 | scale = (2, 2)
53 |
54 | self.first_conv = nn.Sequential(
55 | nn.Conv2d(in_channels=self.dim_c, out_channels=g, kernel_size=(1, 1)),
56 | norm(g),
57 | nn.ReLU(),
58 | )
59 |
60 | f = self.dim_f
61 | c = g
62 | self.encoding_blocks = nn.ModuleList()
63 | self.ds = nn.ModuleList()
64 | for i in range(self.n):
65 | self.encoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm))
66 | self.ds.append(
67 | nn.Sequential(
68 | nn.Conv2d(in_channels=c, out_channels=c + g, kernel_size=scale, stride=scale),
69 | norm(c + g),
70 | nn.ReLU()
71 | )
72 | )
73 | f = f // 2
74 | c += g
75 |
76 | self.bottleneck_block = TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm)
77 |
78 | self.decoding_blocks = nn.ModuleList()
79 | self.us = nn.ModuleList()
80 | for i in range(self.n):
81 | self.us.append(
82 | nn.Sequential(
83 | nn.ConvTranspose2d(in_channels=c, out_channels=c - g, kernel_size=scale, stride=scale),
84 | norm(c - g),
85 | nn.ReLU()
86 | )
87 | )
88 | f = f * 2
89 | c -= g
90 |
91 | self.decoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm))
92 |
93 | self.final_conv = nn.Sequential(
94 | nn.Conv2d(in_channels=c, out_channels=self.dim_c, kernel_size=(1, 1)),
95 | )
96 |
97 | def forward(self, x):
98 |
99 | x = self.first_conv(x)
100 |
101 | x = x.transpose(-1, -2)
102 |
103 | ds_outputs = []
104 | for i in range(self.n):
105 | x = self.encoding_blocks[i](x)
106 | ds_outputs.append(x)
107 | x = self.ds[i](x)
108 |
109 | x = self.bottleneck_block(x)
110 |
111 | for i in range(self.n):
112 | x = self.us[i](x)
113 | x *= ds_outputs[-i - 1]
114 | x = self.decoding_blocks[i](x)
115 |
116 | x = x.transpose(-1, -2)
117 |
118 | x = self.final_conv(x)
119 |
120 | return x
121 |
122 | class Mixer(nn.Module):
123 | def __init__(self, device, mixer_path):
124 |
125 | super(Mixer, self).__init__()
126 |
127 | self.linear = nn.Linear((dim_s+1)*2, dim_s*2, bias=False)
128 |
129 | self.load_state_dict(
130 | torch.load(mixer_path, map_location=device)
131 | )
132 |
133 | def forward(self, x):
134 | x = x.reshape(1,(dim_s+1)*2,-1).transpose(-1,-2)
135 | x = self.linear(x)
136 | return x.transpose(-1,-2).reshape(dim_s,2,-1)
--------------------------------------------------------------------------------
/src/models_dir/mdx/modelparams/model_name_mapper.json:
--------------------------------------------------------------------------------
1 | {
2 | "UVR_MDXNET_1_9703": "UVR-MDX-NET 1",
3 | "UVR_MDXNET_2_9682": "UVR-MDX-NET 2",
4 | "UVR_MDXNET_3_9662": "UVR-MDX-NET 3",
5 | "UVR_MDXNET_KARA": "UVR-MDX-NET Karaoke",
6 | "UVR_MDXNET_Main": "UVR-MDX-NET Main",
7 | "UVR-MDX-NET-Inst_1": "UVR-MDX-NET Inst 1",
8 | "UVR-MDX-NET-Inst_2": "UVR-MDX-NET Inst 2",
9 | "UVR-MDX-NET-Inst_3": "UVR-MDX-NET Inst 3",
10 | "UVR-MDX-NET-Inst_4": "UVR-MDX-NET Inst 4",
11 | "UVR-MDX-NET-Inst_Main": "UVR-MDX-NET Inst Main",
12 | "UVR-MDX-NET-Inst_Main_2": "UVR-MDX-NET Inst Main 2",
13 | "UVR-MDX-NET-Inst_HQ_1": "UVR-MDX-NET Inst HQ 1",
14 | "UVR-MDX-NET-Inst_HQ_2": "UVR-MDX-NET Inst HQ 2",
15 | "UVR-MDX-NET-Inst_HQ_3": "UVR-MDX-NET Inst HQ 3",
16 | "UVR_MDXNET_KARA_2": "UVR-MDX-NET Karaoke 2",
17 | "Kim_Vocal_1": "Kim Vocal 1",
18 | "Kim_Vocal_2": "Kim Vocal 2",
19 | "Kim_Inst": "Kim Inst",
20 | "MDX23C-8KFFT-InstVoc_HQ.ckpt": "MDX23C-InstVoc HQ",
21 | "MDX23C-8KFFT-InstVoc_HQ_2.ckpt": "MDX23C-InstVoc HQ 2",
22 | "MDX23C_D1581.ckpt": "MDX23C-InstVoc D1581",
23 | "Reverb_HQ_By_FoxJoy": "Reverb HQ"
24 | }
--------------------------------------------------------------------------------
/src/models_dir/mdx/modules.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class TFC(nn.Module):
6 | def __init__(self, c, l, k, norm):
7 | super(TFC, self).__init__()
8 |
9 | self.H = nn.ModuleList()
10 | for i in range(l):
11 | self.H.append(
12 | nn.Sequential(
13 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2),
14 | norm(c),
15 | nn.ReLU(),
16 | )
17 | )
18 |
19 | def forward(self, x):
20 | for h in self.H:
21 | x = h(x)
22 | return x
23 |
24 |
25 | class DenseTFC(nn.Module):
26 | def __init__(self, c, l, k, norm):
27 | super(DenseTFC, self).__init__()
28 |
29 | self.conv = nn.ModuleList()
30 | for i in range(l):
31 | self.conv.append(
32 | nn.Sequential(
33 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2),
34 | norm(c),
35 | nn.ReLU(),
36 | )
37 | )
38 |
39 | def forward(self, x):
40 | for layer in self.conv[:-1]:
41 | x = torch.cat([layer(x), x], 1)
42 | return self.conv[-1](x)
43 |
44 |
45 | class TFC_TDF(nn.Module):
46 | def __init__(self, c, l, f, k, bn, dense=False, bias=True, norm=nn.BatchNorm2d):
47 |
48 | super(TFC_TDF, self).__init__()
49 |
50 | self.use_tdf = bn is not None
51 |
52 | self.tfc = DenseTFC(c, l, k, norm) if dense else TFC(c, l, k, norm)
53 |
54 | if self.use_tdf:
55 | if bn == 0:
56 | self.tdf = nn.Sequential(
57 | nn.Linear(f, f, bias=bias),
58 | norm(c),
59 | nn.ReLU()
60 | )
61 | else:
62 | self.tdf = nn.Sequential(
63 | nn.Linear(f, f // bn, bias=bias),
64 | norm(c),
65 | nn.ReLU(),
66 | nn.Linear(f // bn, f, bias=bias),
67 | norm(c),
68 | nn.ReLU()
69 | )
70 |
71 | def forward(self, x):
72 | x = self.tfc(x)
73 | return x + self.tdf(x) if self.use_tdf else x
74 |
75 |
--------------------------------------------------------------------------------
/src/models_dir/mdx/pyrb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import tempfile
4 | import six
5 | import numpy as np
6 | import soundfile as sf
7 | import sys
8 |
9 | if getattr(sys, 'frozen', False):
10 | BASE_PATH_RUB = sys._MEIPASS
11 | else:
12 | BASE_PATH_RUB = os.path.dirname(os.path.abspath(__file__))
13 |
14 | __all__ = ['time_stretch', 'pitch_shift']
15 |
16 | __RUBBERBAND_UTIL = os.path.join(BASE_PATH_RUB, 'rubberband')
17 |
18 | if six.PY2:
19 | DEVNULL = open(os.devnull, 'w')
20 | else:
21 | DEVNULL = subprocess.DEVNULL
22 |
23 | def __rubberband(y, sr, **kwargs):
24 |
25 | assert sr > 0
26 |
27 | # Get the input and output tempfile
28 | fd, infile = tempfile.mkstemp(suffix='.wav')
29 | os.close(fd)
30 | fd, outfile = tempfile.mkstemp(suffix='.wav')
31 | os.close(fd)
32 |
33 | # dump the audio
34 | sf.write(infile, y, sr)
35 |
36 | try:
37 | # Execute rubberband
38 | arguments = [__RUBBERBAND_UTIL, '-q']
39 |
40 | for key, value in six.iteritems(kwargs):
41 | arguments.append(str(key))
42 | arguments.append(str(value))
43 |
44 | arguments.extend([infile, outfile])
45 |
46 | subprocess.check_call(arguments, stdout=DEVNULL, stderr=DEVNULL)
47 |
48 | # Load the processed audio.
49 | y_out, _ = sf.read(outfile, always_2d=True)
50 |
51 | # make sure that output dimensions matches input
52 | if y.ndim == 1:
53 | y_out = np.squeeze(y_out)
54 |
55 | except OSError as exc:
56 | six.raise_from(RuntimeError('Failed to execute rubberband. '
57 | 'Please verify that rubberband-cli '
58 | 'is installed.'),
59 | exc)
60 |
61 | finally:
62 | # Remove temp files
63 | os.unlink(infile)
64 | os.unlink(outfile)
65 |
66 | return y_out
67 |
68 | def time_stretch(y, sr, rate, rbargs=None):
69 | if rate <= 0:
70 | raise ValueError('rate must be strictly positive')
71 |
72 | if rate == 1.0:
73 | return y
74 |
75 | if rbargs is None:
76 | rbargs = dict()
77 |
78 | rbargs.setdefault('--tempo', rate)
79 |
80 | return __rubberband(y, sr, **rbargs)
81 |
82 | def pitch_shift(y, sr, n_steps, rbargs=None):
83 |
84 | if n_steps == 0:
85 | return y
86 |
87 | if rbargs is None:
88 | rbargs = dict()
89 |
90 | rbargs.setdefault('--pitch', n_steps)
91 |
92 | return __rubberband(y, sr, **rbargs)
93 |
--------------------------------------------------------------------------------
/src/models_dir/mdxc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/models_dir/mdxc/__init__.py
--------------------------------------------------------------------------------
/src/models_dir/mdxc/mdxnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from .modules import TFC_TDF
4 | from pytorch_lightning import LightningModule
5 |
6 | dim_s = 4
7 |
8 | class AbstractMDXNet(LightningModule):
9 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap):
10 | super().__init__()
11 | self.target_name = target_name
12 | self.lr = lr
13 | self.optimizer = optimizer
14 | self.dim_c = dim_c
15 | self.dim_f = dim_f
16 | self.dim_t = dim_t
17 | self.n_fft = n_fft
18 | self.n_bins = n_fft // 2 + 1
19 | self.hop_length = hop_length
20 | self.window = nn.Parameter(torch.hann_window(window_length=self.n_fft, periodic=True), requires_grad=False)
21 | self.freq_pad = nn.Parameter(torch.zeros([1, dim_c, self.n_bins - self.dim_f, self.dim_t]), requires_grad=False)
22 |
23 | def get_optimizer(self):
24 | if self.optimizer == 'rmsprop':
25 | return torch.optim.RMSprop(self.parameters(), self.lr)
26 |
27 | if self.optimizer == 'adamw':
28 | return torch.optim.AdamW(self.parameters(), self.lr)
29 |
30 | class ConvTDFNet(AbstractMDXNet):
31 | def __init__(self, target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length,
32 | num_blocks, l, g, k, bn, bias, overlap):
33 |
34 | super(ConvTDFNet, self).__init__(
35 | target_name, lr, optimizer, dim_c, dim_f, dim_t, n_fft, hop_length, overlap)
36 | #self.save_hyperparameters()
37 |
38 | self.num_blocks = num_blocks
39 | self.l = l
40 | self.g = g
41 | self.k = k
42 | self.bn = bn
43 | self.bias = bias
44 |
45 | if optimizer == 'rmsprop':
46 | norm = nn.BatchNorm2d
47 |
48 | if optimizer == 'adamw':
49 | norm = lambda input:nn.GroupNorm(2, input)
50 |
51 | self.n = num_blocks // 2
52 | scale = (2, 2)
53 |
54 | self.first_conv = nn.Sequential(
55 | nn.Conv2d(in_channels=self.dim_c, out_channels=g, kernel_size=(1, 1)),
56 | norm(g),
57 | nn.ReLU(),
58 | )
59 |
60 | f = self.dim_f
61 | c = g
62 | self.encoding_blocks = nn.ModuleList()
63 | self.ds = nn.ModuleList()
64 | for i in range(self.n):
65 | self.encoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm))
66 | self.ds.append(
67 | nn.Sequential(
68 | nn.Conv2d(in_channels=c, out_channels=c + g, kernel_size=scale, stride=scale),
69 | norm(c + g),
70 | nn.ReLU()
71 | )
72 | )
73 | f = f // 2
74 | c += g
75 |
76 | self.bottleneck_block = TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm)
77 |
78 | self.decoding_blocks = nn.ModuleList()
79 | self.us = nn.ModuleList()
80 | for i in range(self.n):
81 | self.us.append(
82 | nn.Sequential(
83 | nn.ConvTranspose2d(in_channels=c, out_channels=c - g, kernel_size=scale, stride=scale),
84 | norm(c - g),
85 | nn.ReLU()
86 | )
87 | )
88 | f = f * 2
89 | c -= g
90 |
91 | self.decoding_blocks.append(TFC_TDF(c, l, f, k, bn, bias=bias, norm=norm))
92 |
93 | self.final_conv = nn.Sequential(
94 | nn.Conv2d(in_channels=c, out_channels=self.dim_c, kernel_size=(1, 1)),
95 | )
96 |
97 | def forward(self, x):
98 |
99 | x = self.first_conv(x)
100 |
101 | x = x.transpose(-1, -2)
102 |
103 | ds_outputs = []
104 | for i in range(self.n):
105 | x = self.encoding_blocks[i](x)
106 | ds_outputs.append(x)
107 | x = self.ds[i](x)
108 |
109 | x = self.bottleneck_block(x)
110 |
111 | for i in range(self.n):
112 | x = self.us[i](x)
113 | x *= ds_outputs[-i - 1]
114 | x = self.decoding_blocks[i](x)
115 |
116 | x = x.transpose(-1, -2)
117 |
118 | x = self.final_conv(x)
119 |
120 | return x
121 |
122 | class Mixer(nn.Module):
123 | def __init__(self, device, mixer_path):
124 |
125 | super(Mixer, self).__init__()
126 |
127 | self.linear = nn.Linear((dim_s+1)*2, dim_s*2, bias=False)
128 |
129 | self.load_state_dict(
130 | torch.load(mixer_path, map_location=device)
131 | )
132 |
133 | def forward(self, x):
134 | x = x.reshape(1,(dim_s+1)*2,-1).transpose(-1,-2)
135 | x = self.linear(x)
136 | return x.transpose(-1,-2).reshape(dim_s,2,-1)
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model1.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 260096
3 | dim_f: 4096
4 | dim_t: 128
5 | hop_length: 2048
6 | n_fft: 8192
7 | num_channels: 2
8 | sample_rate: 44100
9 | model:
10 | act: gelu
11 | bottleneck_factor: 4
12 | growth: 64
13 | norm: InstanceNorm
14 | num_blocks_per_scale: 2
15 | num_channels: 128
16 | num_scales: 5
17 | num_subbands: 4
18 | scale:
19 | - 2
20 | - 2
21 | training:
22 | batch_size: 8
23 | grad_clip: 0
24 | instruments:
25 | - Vocals
26 | - Drums
27 | - Bass
28 | - Other
29 | lr: 5.0e-05
30 | target_instrument: null
31 | inference:
32 | batch_size: 1
33 | dim_t: 256
34 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model2.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 260096
3 | dim_f: 4096
4 | dim_t: 128
5 | hop_length: 2048
6 | n_fft: 8192
7 | num_channels: 2
8 | sample_rate: 44100
9 | model:
10 | act: gelu
11 | bottleneck_factor: 4
12 | growth: 64
13 | norm: InstanceNorm
14 | num_blocks_per_scale: 2
15 | num_channels: 256
16 | num_scales: 5
17 | num_subbands: 4
18 | scale:
19 | - 2
20 | - 2
21 | training:
22 | batch_size: 8
23 | grad_clip: 0
24 | instruments:
25 | - Vocals
26 | - Drums
27 | - Bass
28 | - Other
29 | lr: 3.0e-05
30 | target_instrument: null
31 | inference:
32 | batch_size: 1
33 | dim_t: 256
34 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model3.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 260096
3 | dim_f: 4096
4 | dim_t: 128
5 | hop_length: 2048
6 | n_fft: 12288
7 | num_channels: 2
8 | sample_rate: 44100
9 | model:
10 | act: gelu
11 | bottleneck_factor: 4
12 | growth: 64
13 | norm: InstanceNorm
14 | num_blocks_per_scale: 2
15 | num_channels: 128
16 | num_scales: 5
17 | num_subbands: 4
18 | scale:
19 | - 2
20 | - 2
21 | training:
22 | batch_size: 8
23 | grad_clip: 0
24 | instruments:
25 | - Vocals
26 | - Drums
27 | - Bass
28 | - Other
29 | lr: 5.0e-05
30 | target_instrument: Vocals
31 | inference:
32 | batch_size: 1
33 | dim_t: 256
34 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/modelA.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 261120
3 | dim_f: 4096
4 | dim_t: 256
5 | hop_length: 1024
6 | min_mean_abs: 0.01
7 | n_fft: 8192
8 | num_channels: 2
9 | sample_rate: 44100
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 64
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 64
17 | num_scales: 5
18 | num_subbands: 4
19 | scale:
20 | - 2
21 | - 2
22 | training:
23 | batch_size: 6
24 | coarse_loss_clip: true
25 | ema_momentum: 0.999
26 | grad_clip: null
27 | instruments:
28 | - Vocals
29 | - Drums
30 | - Bass
31 | - Other
32 | lr: 0.0001
33 | num_steps: 100000
34 | q: 0.4
35 | target_instrument: null
36 | inference:
37 | batch_size: 2
38 | dim_t: 256
39 | num_overlap: 8
40 |
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/modelB.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 261120
3 | dim_f: 4096
4 | dim_t: 256
5 | hop_length: 1024
6 | min_mean_abs: 0.01
7 | n_fft: 8192
8 | num_channels: 2
9 | sample_rate: 44100
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 64
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 64
17 | num_scales: 5
18 | num_subbands: 4
19 | scale:
20 | - 2
21 | - 2
22 | training:
23 | batch_size: 6
24 | coarse_loss_clip: false
25 | datasets:
26 | - ../data/moises/bleeding
27 | ema_momentum: 0.999
28 | grad_clip: null
29 | instruments:
30 | - Vocals
31 | - Drums
32 | - Bass
33 | - Other
34 | lr: 0.0001
35 | num_steps: 150000
36 | q: 0.93
37 | target_instrument: null
38 | inference:
39 | batch_size: 2
40 | dim_t: 256
41 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_061321.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 260096
3 | dim_f: 4096
4 | dim_t: 256
5 | hop_length: 2048
6 | n_fft: 12288
7 | num_channels: 2
8 | sample_rate: 44100
9 | min_mean_abs: 0.001
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 64
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 128
17 | num_scales: 5
18 | num_subbands: 4
19 | scale:
20 | - 2
21 | - 2
22 | name: epoch_10.ckpt
23 | training:
24 | batch_size: 16
25 | grad_clip: 0
26 | instruments:
27 | - Vocals
28 | - Instrumental
29 | lr: 5.0e-05
30 | target_instrument: null
31 | num_epochs: 100
32 | num_steps: 1000
33 | inference:
34 | batch_size: 1
35 | dim_t: 256
36 | num_overlap: 8
37 |
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 260096
3 | dim_f: 6144
4 | dim_t: 128
5 | hop_length: 2048
6 | n_fft: 12288
7 | num_channels: 2
8 | sample_rate: 44100
9 | min_mean_abs: 0.001
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 64
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 128
17 | num_scales: 5
18 | num_subbands: 6
19 | scale:
20 | - 2
21 | - 2
22 | training:
23 | batch_size: 14
24 | grad_clip: 0
25 | instruments:
26 | - Vocals
27 | - Instrumental
28 | lr: 3.0e-05
29 | target_instrument: null
30 | num_epochs: 1000
31 | num_steps: 1000
32 | augmentation: 1
33 | inference:
34 | batch_size: 1
35 | dim_t: 256
36 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_2.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 260096
3 | dim_f: 6144
4 | dim_t: 128
5 | hop_length: 2048
6 | n_fft: 12288
7 | num_channels: 2
8 | sample_rate: 44100
9 | min_mean_abs: 0.001
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 128
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 128
17 | num_scales: 5
18 | num_subbands: 6
19 | scale:
20 | - 2
21 | - 2
22 | training:
23 | batch_size: 14
24 | grad_clip: 0
25 | instruments:
26 | - Vocals
27 | - Instrumental
28 | lr: 2.0e-05
29 | target_instrument: null
30 | num_epochs: 1000
31 | num_steps: 1000
32 | augmentation: 1
33 | inference:
34 | batch_size: 1
35 | dim_t: 256
36 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_3.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 261120
3 | dim_f: 6144
4 | dim_t: 256
5 | hop_length: 1024
6 | n_fft: 12288
7 | num_channels: 2
8 | sample_rate: 44100
9 | min_mean_abs: 0.001
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 128
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 128
17 | num_scales: 5
18 | num_subbands: 6
19 | scale:
20 | - 2
21 | - 2
22 | training:
23 | batch_size: 6
24 | grad_clip: 0
25 | instruments:
26 | - Vocals
27 | - Instrumental
28 | lr: 1.0e-05
29 | target_instrument: null
30 | num_epochs: 1000
31 | num_steps: 1000
32 | augmentation: 1
33 | q: 0.95
34 | coarse_loss_clip: true
35 | ema_momentum: 0.999
36 | inference:
37 | batch_size: 1
38 | dim_t: 256
39 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_4.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 261120
3 | dim_f: 6144
4 | dim_t: 256
5 | hop_length: 1024
6 | n_fft: 12288
7 | num_channels: 2
8 | sample_rate: 44100
9 | min_mean_abs: 0.001
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 128
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 128
17 | num_scales: 5
18 | num_subbands: 6
19 | scale:
20 | - 2
21 | - 2
22 | training:
23 | batch_size: 6
24 | grad_clip: 0
25 | instruments:
26 | - Vocals
27 | - Instrumental
28 | lr: 0.7e-05
29 | patience: 2
30 | target_instrument: null
31 | num_epochs: 1000
32 | num_steps: 1000
33 | augmentation: 1
34 | q: 0.95
35 | coarse_loss_clip: true
36 | ema_momentum: 0.999
37 | inference:
38 | batch_size: 1
39 | dim_t: 256
40 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/model_2_stem_full_band_8k.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 261120
3 | dim_f: 4096
4 | dim_t: 256
5 | hop_length: 1024
6 | n_fft: 8192
7 | num_channels: 2
8 | sample_rate: 44100
9 | min_mean_abs: 0.001
10 | model:
11 | act: gelu
12 | bottleneck_factor: 4
13 | growth: 128
14 | norm: InstanceNorm
15 | num_blocks_per_scale: 2
16 | num_channels: 128
17 | num_scales: 5
18 | num_subbands: 4
19 | scale:
20 | - 2
21 | - 2
22 | training:
23 | batch_size: 6
24 | grad_clip: 0
25 | instruments:
26 | - Vocals
27 | - Instrumental
28 | lr: 1.0e-05
29 | patience: 2
30 | reduce_factor: 0.95
31 | target_instrument: null
32 | num_epochs: 1000
33 | num_steps: 1000
34 | augmentation: 1
35 | augmentation_type: simple1
36 | augmentation_mix: true
37 | q: 0.95
38 | coarse_loss_clip: true
39 | ema_momentum: 0.999
40 | inference:
41 | batch_size: 1
42 | dim_t: 256
43 | num_overlap: 8
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/mdx_c_configs/sndfx.yaml:
--------------------------------------------------------------------------------
1 | audio:
2 | chunk_size: 261120
3 | dim_f: 1024
4 | dim_t: 256
5 | hop_length: 1024
6 | min_mean_abs: 0.01
7 | n_fft: 2048
8 | num_channels: 2
9 | sample_rate: 44100
10 | stereo_prob: 0.7
11 | model:
12 | act: gelu
13 | bottleneck_factor: 4
14 | growth: 64
15 | norm: InstanceNorm
16 | num_blocks_per_scale: 2
17 | num_channels: 64
18 | num_scales: 5
19 | num_subbands: 4
20 | scale:
21 | - 2
22 | - 2
23 | training:
24 | batch_size: 8
25 | ema_momentum: 0.999
26 | grad_clip: null
27 | instruments:
28 | - Music
29 | - Speech
30 | - SFX
31 | lr: 0.0001
32 | num_steps: 30000
33 | target_instrument: null
34 | inference:
35 | batch_size: 8
36 | dim_t: 256
37 | instruments:
38 | - Music
39 | - Dialog
40 | - Effect
41 | num_overlap: 8
42 |
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modelparams/model_name_mapper.json:
--------------------------------------------------------------------------------
1 | {
2 | "UVR_MDXNET_1_9703": "UVR-MDX-NET 1",
3 | "UVR_MDXNET_2_9682": "UVR-MDX-NET 2",
4 | "UVR_MDXNET_3_9662": "UVR-MDX-NET 3",
5 | "UVR_MDXNET_KARA": "UVR-MDX-NET Karaoke",
6 | "UVR_MDXNET_Main": "UVR-MDX-NET Main",
7 | "UVR-MDX-NET-Inst_1": "UVR-MDX-NET Inst 1",
8 | "UVR-MDX-NET-Inst_2": "UVR-MDX-NET Inst 2",
9 | "UVR-MDX-NET-Inst_3": "UVR-MDX-NET Inst 3",
10 | "UVR-MDX-NET-Inst_4": "UVR-MDX-NET Inst 4",
11 | "UVR-MDX-NET-Inst_Main": "UVR-MDX-NET Inst Main",
12 | "UVR-MDX-NET-Inst_Main_2": "UVR-MDX-NET Inst Main 2",
13 | "UVR-MDX-NET-Inst_HQ_1": "UVR-MDX-NET Inst HQ 1",
14 | "UVR-MDX-NET-Inst_HQ_2": "UVR-MDX-NET Inst HQ 2",
15 | "UVR-MDX-NET-Inst_HQ_3": "UVR-MDX-NET Inst HQ 3",
16 | "UVR_MDXNET_KARA_2": "UVR-MDX-NET Karaoke 2",
17 | "Kim_Vocal_1": "Kim Vocal 1",
18 | "Kim_Vocal_2": "Kim Vocal 2",
19 | "Kim_Inst": "Kim Inst",
20 | "MDX23C-8KFFT-InstVoc_HQ.ckpt": "MDX23C-InstVoc HQ",
21 | "MDX23C-8KFFT-InstVoc_HQ_2.ckpt": "MDX23C-InstVoc HQ 2",
22 | "MDX23C_D1581.ckpt": "MDX23C-InstVoc D1581",
23 | "Reverb_HQ_By_FoxJoy": "Reverb HQ"
24 | }
--------------------------------------------------------------------------------
/src/models_dir/mdxc/modules.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class TFC(nn.Module):
6 | def __init__(self, c, l, k, norm):
7 | super(TFC, self).__init__()
8 |
9 | self.H = nn.ModuleList()
10 | for i in range(l):
11 | self.H.append(
12 | nn.Sequential(
13 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2),
14 | norm(c),
15 | nn.ReLU(),
16 | )
17 | )
18 |
19 | def forward(self, x):
20 | for h in self.H:
21 | x = h(x)
22 | return x
23 |
24 |
25 | class DenseTFC(nn.Module):
26 | def __init__(self, c, l, k, norm):
27 | super(DenseTFC, self).__init__()
28 |
29 | self.conv = nn.ModuleList()
30 | for i in range(l):
31 | self.conv.append(
32 | nn.Sequential(
33 | nn.Conv2d(in_channels=c, out_channels=c, kernel_size=k, stride=1, padding=k // 2),
34 | norm(c),
35 | nn.ReLU(),
36 | )
37 | )
38 |
39 | def forward(self, x):
40 | for layer in self.conv[:-1]:
41 | x = torch.cat([layer(x), x], 1)
42 | return self.conv[-1](x)
43 |
44 |
45 | class TFC_TDF(nn.Module):
46 | def __init__(self, c, l, f, k, bn, dense=False, bias=True, norm=nn.BatchNorm2d):
47 |
48 | super(TFC_TDF, self).__init__()
49 |
50 | self.use_tdf = bn is not None
51 |
52 | self.tfc = DenseTFC(c, l, k, norm) if dense else TFC(c, l, k, norm)
53 |
54 | if self.use_tdf:
55 | if bn == 0:
56 | self.tdf = nn.Sequential(
57 | nn.Linear(f, f, bias=bias),
58 | norm(c),
59 | nn.ReLU()
60 | )
61 | else:
62 | self.tdf = nn.Sequential(
63 | nn.Linear(f, f // bn, bias=bias),
64 | norm(c),
65 | nn.ReLU(),
66 | nn.Linear(f // bn, f, bias=bias),
67 | norm(c),
68 | nn.ReLU()
69 | )
70 |
71 | def forward(self, x):
72 | x = self.tfc(x)
73 | return x + self.tdf(x) if self.use_tdf else x
74 |
75 |
--------------------------------------------------------------------------------
/src/models_dir/mdxc/pyrb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import tempfile
4 | import six
5 | import numpy as np
6 | import soundfile as sf
7 | import sys
8 |
9 | if getattr(sys, 'frozen', False):
10 | BASE_PATH_RUB = sys._MEIPASS
11 | else:
12 | BASE_PATH_RUB = os.path.dirname(os.path.abspath(__file__))
13 |
14 | __all__ = ['time_stretch', 'pitch_shift']
15 |
16 | __RUBBERBAND_UTIL = os.path.join(BASE_PATH_RUB, 'rubberband')
17 |
18 | if six.PY2:
19 | DEVNULL = open(os.devnull, 'w')
20 | else:
21 | DEVNULL = subprocess.DEVNULL
22 |
23 | def __rubberband(y, sr, **kwargs):
24 |
25 | assert sr > 0
26 |
27 | # Get the input and output tempfile
28 | fd, infile = tempfile.mkstemp(suffix='.wav')
29 | os.close(fd)
30 | fd, outfile = tempfile.mkstemp(suffix='.wav')
31 | os.close(fd)
32 |
33 | # dump the audio
34 | sf.write(infile, y, sr)
35 |
36 | try:
37 | # Execute rubberband
38 | arguments = [__RUBBERBAND_UTIL, '-q']
39 |
40 | for key, value in six.iteritems(kwargs):
41 | arguments.append(str(key))
42 | arguments.append(str(value))
43 |
44 | arguments.extend([infile, outfile])
45 |
46 | subprocess.check_call(arguments, stdout=DEVNULL, stderr=DEVNULL)
47 |
48 | # Load the processed audio.
49 | y_out, _ = sf.read(outfile, always_2d=True)
50 |
51 | # make sure that output dimensions matches input
52 | if y.ndim == 1:
53 | y_out = np.squeeze(y_out)
54 |
55 | except OSError as exc:
56 | six.raise_from(RuntimeError('Failed to execute rubberband. '
57 | 'Please verify that rubberband-cli '
58 | 'is installed.'),
59 | exc)
60 |
61 | finally:
62 | # Remove temp files
63 | os.unlink(infile)
64 | os.unlink(outfile)
65 |
66 | return y_out
67 |
68 | def time_stretch(y, sr, rate, rbargs=None):
69 | if rate <= 0:
70 | raise ValueError('rate must be strictly positive')
71 |
72 | if rate == 1.0:
73 | return y
74 |
75 | if rbargs is None:
76 | rbargs = dict()
77 |
78 | rbargs.setdefault('--tempo', rate)
79 |
80 | return __rubberband(y, sr, **rbargs)
81 |
82 | def pitch_shift(y, sr, n_steps, rbargs=None):
83 |
84 | if n_steps == 0:
85 | return y
86 |
87 | if rbargs is None:
88 | rbargs = dict()
89 |
90 | rbargs.setdefault('--pitch', n_steps)
91 |
92 | return __rubberband(y, sr, **rbargs)
93 |
--------------------------------------------------------------------------------
/src/models_dir/models.json:
--------------------------------------------------------------------------------
1 | {
2 | "demucs":{
3 | "hdemucs_mmi":{
4 | "model_path":[
5 | "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/75fc33f5-1941ce65.th",
6 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/hdemucs_mmi.yaml"
7 | ]
8 | }
9 | },
10 | "vr_network":{
11 | "1_HP-UVR":{
12 | "model_path":[
13 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/1_HP-UVR.pth"
14 | ]
15 | }
16 | },
17 | "mdx":{
18 | "UVR-MDX-NET-Inst_1":{
19 | "model_path":[
20 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR-MDX-NET-Inst_1.onnx"
21 | ]
22 | }
23 | },
24 | "mdxc":{
25 | "MDX23C-8KFFT-InstVoc_HQ":{
26 | "model_path":[
27 | "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/MDX23C-8KFFT-InstVoc_HQ.ckpt"
28 | ]
29 | }
30 | }
31 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/__init__.py:
--------------------------------------------------------------------------------
1 | # VR init.
2 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/constants.py:
--------------------------------------------------------------------------------
1 | import platform
2 |
3 | #Platform Details
4 | OPERATING_SYSTEM = platform.system()
5 | SYSTEM_ARCH = platform.platform()
6 | SYSTEM_PROC = platform.processor()
7 | ARM = 'arm'
8 |
9 | # Network Constants
10 | N_BINS = 'n_bins'
11 |
12 |
13 | ALL_STEMS = 'All Stems'
14 | VOCAL_STEM = 'Vocals'
15 | INST_STEM = 'Instrumental'
16 | OTHER_STEM = 'Other'
17 | BASS_STEM = 'Bass'
18 | DRUM_STEM = 'Drums'
19 | GUITAR_STEM = 'Guitar'
20 | PIANO_STEM = 'Piano'
21 | SYNTH_STEM = 'Synthesizer'
22 | STRINGS_STEM = 'Strings'
23 | WOODWINDS_STEM = 'Woodwinds'
24 | BRASS_STEM = 'Brass'
25 | WIND_INST_STEM = 'Wind Inst'
26 | NO_OTHER_STEM = 'No Other'
27 | NO_BASS_STEM = 'No Bass'
28 | NO_DRUM_STEM = 'No Drums'
29 | NO_GUITAR_STEM = 'No Guitar'
30 | NO_PIANO_STEM = 'No Piano'
31 | NO_SYNTH_STEM = 'No Synthesizer'
32 | NO_STRINGS_STEM = 'No Strings'
33 | NO_WOODWINDS_STEM = 'No Woodwinds'
34 | NO_WIND_INST_STEM = 'No Wind Inst'
35 | NO_BRASS_STEM = 'No Brass'
36 | PRIMARY_STEM = 'Primary Stem'
37 | SECONDARY_STEM = 'Secondary Stem'
38 |
39 |
40 | NO_STEM = "No "
41 |
42 | NON_ACCOM_STEMS = (
43 | VOCAL_STEM,
44 | OTHER_STEM,
45 | BASS_STEM,
46 | DRUM_STEM,
47 | GUITAR_STEM,
48 | PIANO_STEM,
49 | SYNTH_STEM,
50 | STRINGS_STEM,
51 | WOODWINDS_STEM,
52 | BRASS_STEM,
53 | WIND_INST_STEM)
--------------------------------------------------------------------------------
/src/models_dir/vr_network/layers.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torch.nn.functional as F
4 |
5 | from . import spec_utils
6 |
7 | class Conv2DBNActiv(nn.Module):
8 |
9 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
10 | super(Conv2DBNActiv, self).__init__()
11 | self.conv = nn.Sequential(
12 | nn.Conv2d(
13 | nin, nout,
14 | kernel_size=ksize,
15 | stride=stride,
16 | padding=pad,
17 | dilation=dilation,
18 | bias=False),
19 | nn.BatchNorm2d(nout),
20 | activ()
21 | )
22 |
23 | def __call__(self, x):
24 | return self.conv(x)
25 |
26 | class SeperableConv2DBNActiv(nn.Module):
27 |
28 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
29 | super(SeperableConv2DBNActiv, self).__init__()
30 | self.conv = nn.Sequential(
31 | nn.Conv2d(
32 | nin, nin,
33 | kernel_size=ksize,
34 | stride=stride,
35 | padding=pad,
36 | dilation=dilation,
37 | groups=nin,
38 | bias=False),
39 | nn.Conv2d(
40 | nin, nout,
41 | kernel_size=1,
42 | bias=False),
43 | nn.BatchNorm2d(nout),
44 | activ()
45 | )
46 |
47 | def __call__(self, x):
48 | return self.conv(x)
49 |
50 |
51 | class Encoder(nn.Module):
52 |
53 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
54 | super(Encoder, self).__init__()
55 | self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
56 | self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
57 |
58 | def __call__(self, x):
59 | skip = self.conv1(x)
60 | h = self.conv2(skip)
61 |
62 | return h, skip
63 |
64 |
65 | class Decoder(nn.Module):
66 |
67 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
68 | super(Decoder, self).__init__()
69 | self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
70 | self.dropout = nn.Dropout2d(0.1) if dropout else None
71 |
72 | def __call__(self, x, skip=None):
73 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
74 | if skip is not None:
75 | skip = spec_utils.crop_center(skip, x)
76 | x = torch.cat([x, skip], dim=1)
77 | h = self.conv(x)
78 |
79 | if self.dropout is not None:
80 | h = self.dropout(h)
81 |
82 | return h
83 |
84 |
85 | class ASPPModule(nn.Module):
86 |
87 | def __init__(self, nn_architecture, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
88 | super(ASPPModule, self).__init__()
89 | self.conv1 = nn.Sequential(
90 | nn.AdaptiveAvgPool2d((1, None)),
91 | Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
92 | )
93 |
94 | self.nn_architecture = nn_architecture
95 | self.six_layer = [129605]
96 | self.seven_layer = [537238, 537227, 33966]
97 |
98 | extra_conv = SeperableConv2DBNActiv(
99 | nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
100 |
101 | self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
102 | self.conv3 = SeperableConv2DBNActiv(
103 | nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
104 | self.conv4 = SeperableConv2DBNActiv(
105 | nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
106 | self.conv5 = SeperableConv2DBNActiv(
107 | nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
108 |
109 | if self.nn_architecture in self.six_layer:
110 | self.conv6 = extra_conv
111 | nin_x = 6
112 | elif self.nn_architecture in self.seven_layer:
113 | self.conv6 = extra_conv
114 | self.conv7 = extra_conv
115 | nin_x = 7
116 | else:
117 | nin_x = 5
118 |
119 | self.bottleneck = nn.Sequential(
120 | Conv2DBNActiv(nin * nin_x, nout, 1, 1, 0, activ=activ),
121 | nn.Dropout2d(0.1)
122 | )
123 |
124 | def forward(self, x):
125 | _, _, h, w = x.size()
126 | feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
127 | feat2 = self.conv2(x)
128 | feat3 = self.conv3(x)
129 | feat4 = self.conv4(x)
130 | feat5 = self.conv5(x)
131 |
132 | if self.nn_architecture in self.six_layer:
133 | feat6 = self.conv6(x)
134 | out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6), dim=1)
135 | elif self.nn_architecture in self.seven_layer:
136 | feat6 = self.conv6(x)
137 | feat7 = self.conv7(x)
138 | out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
139 | else:
140 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
141 |
142 | bottle = self.bottleneck(out)
143 | return bottle
144 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/layers_new.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torch.nn.functional as F
4 |
5 | from . import spec_utils
6 |
7 | class Conv2DBNActiv(nn.Module):
8 |
9 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
10 | super(Conv2DBNActiv, self).__init__()
11 | self.conv = nn.Sequential(
12 | nn.Conv2d(
13 | nin, nout,
14 | kernel_size=ksize,
15 | stride=stride,
16 | padding=pad,
17 | dilation=dilation,
18 | bias=False),
19 | nn.BatchNorm2d(nout),
20 | activ()
21 | )
22 |
23 | def __call__(self, x):
24 | return self.conv(x)
25 |
26 | class Encoder(nn.Module):
27 |
28 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
29 | super(Encoder, self).__init__()
30 | self.conv1 = Conv2DBNActiv(nin, nout, ksize, stride, pad, activ=activ)
31 | self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
32 |
33 | def __call__(self, x):
34 | h = self.conv1(x)
35 | h = self.conv2(h)
36 |
37 | return h
38 |
39 |
40 | class Decoder(nn.Module):
41 |
42 | def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
43 | super(Decoder, self).__init__()
44 | self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
45 | # self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
46 | self.dropout = nn.Dropout2d(0.1) if dropout else None
47 |
48 | def __call__(self, x, skip=None):
49 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
50 |
51 | if skip is not None:
52 | skip = spec_utils.crop_center(skip, x)
53 | x = torch.cat([x, skip], dim=1)
54 |
55 | h = self.conv1(x)
56 | # h = self.conv2(h)
57 |
58 | if self.dropout is not None:
59 | h = self.dropout(h)
60 |
61 | return h
62 |
63 |
64 | class ASPPModule(nn.Module):
65 |
66 | def __init__(self, nin, nout, dilations=(4, 8, 12), activ=nn.ReLU, dropout=False):
67 | super(ASPPModule, self).__init__()
68 | self.conv1 = nn.Sequential(
69 | nn.AdaptiveAvgPool2d((1, None)),
70 | Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ)
71 | )
72 | self.conv2 = Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ)
73 | self.conv3 = Conv2DBNActiv(
74 | nin, nout, 3, 1, dilations[0], dilations[0], activ=activ
75 | )
76 | self.conv4 = Conv2DBNActiv(
77 | nin, nout, 3, 1, dilations[1], dilations[1], activ=activ
78 | )
79 | self.conv5 = Conv2DBNActiv(
80 | nin, nout, 3, 1, dilations[2], dilations[2], activ=activ
81 | )
82 | self.bottleneck = Conv2DBNActiv(nout * 5, nout, 1, 1, 0, activ=activ)
83 | self.dropout = nn.Dropout2d(0.1) if dropout else None
84 |
85 | def forward(self, x):
86 | _, _, h, w = x.size()
87 | feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
88 | feat2 = self.conv2(x)
89 | feat3 = self.conv3(x)
90 | feat4 = self.conv4(x)
91 | feat5 = self.conv5(x)
92 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
93 | out = self.bottleneck(out)
94 |
95 | if self.dropout is not None:
96 | out = self.dropout(out)
97 |
98 | return out
99 |
100 |
101 | class LSTMModule(nn.Module):
102 |
103 | def __init__(self, nin_conv, nin_lstm, nout_lstm):
104 | super(LSTMModule, self).__init__()
105 | self.conv = Conv2DBNActiv(nin_conv, 1, 1, 1, 0)
106 | self.lstm = nn.LSTM(
107 | input_size=nin_lstm,
108 | hidden_size=nout_lstm // 2,
109 | bidirectional=True
110 | )
111 | self.dense = nn.Sequential(
112 | nn.Linear(nout_lstm, nin_lstm),
113 | nn.BatchNorm1d(nin_lstm),
114 | nn.ReLU()
115 | )
116 |
117 | def forward(self, x):
118 | N, _, nbins, nframes = x.size()
119 | h = self.conv(x)[:, 0] # N, nbins, nframes
120 | h = h.permute(2, 0, 1) # nframes, N, nbins
121 | h, _ = self.lstm(h)
122 | h = self.dense(h.reshape(-1, h.size()[-1])) # nframes * N, nbins
123 | h = h.reshape(nframes, N, 1, nbins)
124 | h = h.permute(1, 2, 3, 0)
125 |
126 | return h
127 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/model_param_init.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | default_param = {}
4 | default_param['bins'] = -1
5 | default_param['unstable_bins'] = -1 # training only
6 | default_param['stable_bins'] = -1 # training only
7 | default_param['sr'] = 44100
8 | default_param['pre_filter_start'] = -1
9 | default_param['pre_filter_stop'] = -1
10 | default_param['band'] = {}
11 |
12 | N_BINS = 'n_bins'
13 |
14 | def int_keys(d):
15 | r = {}
16 | for k, v in d:
17 | if k.isdigit():
18 | k = int(k)
19 | r[k] = v
20 | return r
21 |
22 | class ModelParameters(object):
23 | def __init__(self, config_path=''):
24 | with open(config_path, 'r') as f:
25 | self.param = json.loads(f.read(), object_pairs_hook=int_keys)
26 |
27 | for k in ['mid_side', 'mid_side_b', 'mid_side_b2', 'stereo_w', 'stereo_n', 'reverse']:
28 | if not k in self.param:
29 | self.param[k] = False
30 |
31 | if N_BINS in self.param:
32 | self.param['bins'] = self.param[N_BINS]
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr16000_hl512.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 1024,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 16000,
8 | "hl": 512,
9 | "n_fft": 2048,
10 | "crop_start": 0,
11 | "crop_stop": 1024,
12 | "hpf_start": -1,
13 | "res_type": "sinc_best"
14 | }
15 | },
16 | "sr": 16000,
17 | "pre_filter_start": 1023,
18 | "pre_filter_stop": 1024
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr32000_hl512.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 1024,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 32000,
8 | "hl": 512,
9 | "n_fft": 2048,
10 | "crop_start": 0,
11 | "crop_stop": 1024,
12 | "hpf_start": -1,
13 | "res_type": "kaiser_fast"
14 | }
15 | },
16 | "sr": 32000,
17 | "pre_filter_start": 1000,
18 | "pre_filter_stop": 1021
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr33075_hl384.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 1024,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 33075,
8 | "hl": 384,
9 | "n_fft": 2048,
10 | "crop_start": 0,
11 | "crop_stop": 1024,
12 | "hpf_start": -1,
13 | "res_type": "sinc_best"
14 | }
15 | },
16 | "sr": 33075,
17 | "pre_filter_start": 1000,
18 | "pre_filter_stop": 1021
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr44100_hl1024.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 1024,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 44100,
8 | "hl": 1024,
9 | "n_fft": 2048,
10 | "crop_start": 0,
11 | "crop_stop": 1024,
12 | "hpf_start": -1,
13 | "res_type": "sinc_best"
14 | }
15 | },
16 | "sr": 44100,
17 | "pre_filter_start": 1023,
18 | "pre_filter_stop": 1024
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr44100_hl256.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 256,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 44100,
8 | "hl": 256,
9 | "n_fft": 512,
10 | "crop_start": 0,
11 | "crop_stop": 256,
12 | "hpf_start": -1,
13 | "res_type": "sinc_best"
14 | }
15 | },
16 | "sr": 44100,
17 | "pre_filter_start": 256,
18 | "pre_filter_stop": 256
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr44100_hl512.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 1024,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 44100,
8 | "hl": 512,
9 | "n_fft": 2048,
10 | "crop_start": 0,
11 | "crop_stop": 1024,
12 | "hpf_start": -1,
13 | "res_type": "sinc_best"
14 | }
15 | },
16 | "sr": 44100,
17 | "pre_filter_start": 1023,
18 | "pre_filter_stop": 1024
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr44100_hl512_cut.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 1024,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 44100,
8 | "hl": 512,
9 | "n_fft": 2048,
10 | "crop_start": 0,
11 | "crop_stop": 700,
12 | "hpf_start": -1,
13 | "res_type": "sinc_best"
14 | }
15 | },
16 | "sr": 44100,
17 | "pre_filter_start": 1023,
18 | "pre_filter_stop": 700
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/1band_sr44100_hl512_nf1024.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 512,
3 | "unstable_bins": 0,
4 | "reduction_bins": 0,
5 | "band": {
6 | "1": {
7 | "sr": 44100,
8 | "hl": 512,
9 | "n_fft": 1024,
10 | "crop_start": 0,
11 | "crop_stop": 512,
12 | "hpf_start": -1,
13 | "res_type": "sinc_best"
14 | }
15 | },
16 | "sr": 44100,
17 | "pre_filter_start": 511,
18 | "pre_filter_stop": 512
19 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/2band_32000.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 768,
3 | "unstable_bins": 7,
4 | "reduction_bins": 705,
5 | "band": {
6 | "1": {
7 | "sr": 6000,
8 | "hl": 66,
9 | "n_fft": 512,
10 | "crop_start": 0,
11 | "crop_stop": 240,
12 | "lpf_start": 60,
13 | "lpf_stop": 118,
14 | "res_type": "sinc_fastest"
15 | },
16 | "2": {
17 | "sr": 32000,
18 | "hl": 352,
19 | "n_fft": 1024,
20 | "crop_start": 22,
21 | "crop_stop": 505,
22 | "hpf_start": 44,
23 | "hpf_stop": 23,
24 | "res_type": "sinc_medium"
25 | }
26 | },
27 | "sr": 32000,
28 | "pre_filter_start": 710,
29 | "pre_filter_stop": 731
30 | }
31 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/2band_44100_lofi.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 512,
3 | "unstable_bins": 7,
4 | "reduction_bins": 510,
5 | "band": {
6 | "1": {
7 | "sr": 11025,
8 | "hl": 160,
9 | "n_fft": 768,
10 | "crop_start": 0,
11 | "crop_stop": 192,
12 | "lpf_start": 41,
13 | "lpf_stop": 139,
14 | "res_type": "sinc_fastest"
15 | },
16 | "2": {
17 | "sr": 44100,
18 | "hl": 640,
19 | "n_fft": 1024,
20 | "crop_start": 10,
21 | "crop_stop": 320,
22 | "hpf_start": 47,
23 | "hpf_stop": 15,
24 | "res_type": "sinc_medium"
25 | }
26 | },
27 | "sr": 44100,
28 | "pre_filter_start": 510,
29 | "pre_filter_stop": 512
30 | }
31 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/2band_48000.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 768,
3 | "unstable_bins": 7,
4 | "reduction_bins": 705,
5 | "band": {
6 | "1": {
7 | "sr": 6000,
8 | "hl": 66,
9 | "n_fft": 512,
10 | "crop_start": 0,
11 | "crop_stop": 240,
12 | "lpf_start": 60,
13 | "lpf_stop": 240,
14 | "res_type": "sinc_fastest"
15 | },
16 | "2": {
17 | "sr": 48000,
18 | "hl": 528,
19 | "n_fft": 1536,
20 | "crop_start": 22,
21 | "crop_stop": 505,
22 | "hpf_start": 82,
23 | "hpf_stop": 22,
24 | "res_type": "sinc_medium"
25 | }
26 | },
27 | "sr": 48000,
28 | "pre_filter_start": 710,
29 | "pre_filter_stop": 731
30 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/3band_44100.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 768,
3 | "unstable_bins": 5,
4 | "reduction_bins": 733,
5 | "band": {
6 | "1": {
7 | "sr": 11025,
8 | "hl": 128,
9 | "n_fft": 768,
10 | "crop_start": 0,
11 | "crop_stop": 278,
12 | "lpf_start": 28,
13 | "lpf_stop": 140,
14 | "res_type": "polyphase"
15 | },
16 | "2": {
17 | "sr": 22050,
18 | "hl": 256,
19 | "n_fft": 768,
20 | "crop_start": 14,
21 | "crop_stop": 322,
22 | "hpf_start": 70,
23 | "hpf_stop": 14,
24 | "lpf_start": 283,
25 | "lpf_stop": 314,
26 | "res_type": "polyphase"
27 | },
28 | "3": {
29 | "sr": 44100,
30 | "hl": 512,
31 | "n_fft": 768,
32 | "crop_start": 131,
33 | "crop_stop": 313,
34 | "hpf_start": 154,
35 | "hpf_stop": 141,
36 | "res_type": "sinc_medium"
37 | }
38 | },
39 | "sr": 44100,
40 | "pre_filter_start": 757,
41 | "pre_filter_stop": 768
42 | }
43 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/3band_44100_mid.json:
--------------------------------------------------------------------------------
1 | {
2 | "mid_side": true,
3 | "bins": 768,
4 | "unstable_bins": 5,
5 | "reduction_bins": 733,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 128,
10 | "n_fft": 768,
11 | "crop_start": 0,
12 | "crop_stop": 278,
13 | "lpf_start": 28,
14 | "lpf_stop": 140,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 22050,
19 | "hl": 256,
20 | "n_fft": 768,
21 | "crop_start": 14,
22 | "crop_stop": 322,
23 | "hpf_start": 70,
24 | "hpf_stop": 14,
25 | "lpf_start": 283,
26 | "lpf_stop": 314,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 44100,
31 | "hl": 512,
32 | "n_fft": 768,
33 | "crop_start": 131,
34 | "crop_stop": 313,
35 | "hpf_start": 154,
36 | "hpf_stop": 141,
37 | "res_type": "sinc_medium"
38 | }
39 | },
40 | "sr": 44100,
41 | "pre_filter_start": 757,
42 | "pre_filter_stop": 768
43 | }
44 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/3band_44100_msb2.json:
--------------------------------------------------------------------------------
1 | {
2 | "mid_side_b2": true,
3 | "bins": 640,
4 | "unstable_bins": 7,
5 | "reduction_bins": 565,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 108,
10 | "n_fft": 1024,
11 | "crop_start": 0,
12 | "crop_stop": 187,
13 | "lpf_start": 92,
14 | "lpf_stop": 186,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 22050,
19 | "hl": 216,
20 | "n_fft": 768,
21 | "crop_start": 0,
22 | "crop_stop": 212,
23 | "hpf_start": 68,
24 | "hpf_stop": 34,
25 | "lpf_start": 174,
26 | "lpf_stop": 209,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 44100,
31 | "hl": 432,
32 | "n_fft": 640,
33 | "crop_start": 66,
34 | "crop_stop": 307,
35 | "hpf_start": 86,
36 | "hpf_stop": 72,
37 | "res_type": "kaiser_fast"
38 | }
39 | },
40 | "sr": 44100,
41 | "pre_filter_start": 639,
42 | "pre_filter_stop": 640
43 | }
44 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_44100.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 768,
3 | "unstable_bins": 7,
4 | "reduction_bins": 668,
5 | "band": {
6 | "1": {
7 | "sr": 11025,
8 | "hl": 128,
9 | "n_fft": 1024,
10 | "crop_start": 0,
11 | "crop_stop": 186,
12 | "lpf_start": 37,
13 | "lpf_stop": 73,
14 | "res_type": "polyphase"
15 | },
16 | "2": {
17 | "sr": 11025,
18 | "hl": 128,
19 | "n_fft": 512,
20 | "crop_start": 4,
21 | "crop_stop": 185,
22 | "hpf_start": 36,
23 | "hpf_stop": 18,
24 | "lpf_start": 93,
25 | "lpf_stop": 185,
26 | "res_type": "polyphase"
27 | },
28 | "3": {
29 | "sr": 22050,
30 | "hl": 256,
31 | "n_fft": 512,
32 | "crop_start": 46,
33 | "crop_stop": 186,
34 | "hpf_start": 93,
35 | "hpf_stop": 46,
36 | "lpf_start": 164,
37 | "lpf_stop": 186,
38 | "res_type": "polyphase"
39 | },
40 | "4": {
41 | "sr": 44100,
42 | "hl": 512,
43 | "n_fft": 768,
44 | "crop_start": 121,
45 | "crop_stop": 382,
46 | "hpf_start": 138,
47 | "hpf_stop": 123,
48 | "res_type": "sinc_medium"
49 | }
50 | },
51 | "sr": 44100,
52 | "pre_filter_start": 740,
53 | "pre_filter_stop": 768
54 | }
55 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_44100_mid.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 768,
3 | "unstable_bins": 7,
4 | "mid_side": true,
5 | "reduction_bins": 668,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 128,
10 | "n_fft": 1024,
11 | "crop_start": 0,
12 | "crop_stop": 186,
13 | "lpf_start": 37,
14 | "lpf_stop": 73,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 11025,
19 | "hl": 128,
20 | "n_fft": 512,
21 | "crop_start": 4,
22 | "crop_stop": 185,
23 | "hpf_start": 36,
24 | "hpf_stop": 18,
25 | "lpf_start": 93,
26 | "lpf_stop": 185,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 22050,
31 | "hl": 256,
32 | "n_fft": 512,
33 | "crop_start": 46,
34 | "crop_stop": 186,
35 | "hpf_start": 93,
36 | "hpf_stop": 46,
37 | "lpf_start": 164,
38 | "lpf_stop": 186,
39 | "res_type": "polyphase"
40 | },
41 | "4": {
42 | "sr": 44100,
43 | "hl": 512,
44 | "n_fft": 768,
45 | "crop_start": 121,
46 | "crop_stop": 382,
47 | "hpf_start": 138,
48 | "hpf_stop": 123,
49 | "res_type": "sinc_medium"
50 | }
51 | },
52 | "sr": 44100,
53 | "pre_filter_start": 740,
54 | "pre_filter_stop": 768
55 | }
56 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_44100_msb.json:
--------------------------------------------------------------------------------
1 | {
2 | "mid_side_b": true,
3 | "bins": 768,
4 | "unstable_bins": 7,
5 | "reduction_bins": 668,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 128,
10 | "n_fft": 1024,
11 | "crop_start": 0,
12 | "crop_stop": 186,
13 | "lpf_start": 37,
14 | "lpf_stop": 73,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 11025,
19 | "hl": 128,
20 | "n_fft": 512,
21 | "crop_start": 4,
22 | "crop_stop": 185,
23 | "hpf_start": 36,
24 | "hpf_stop": 18,
25 | "lpf_start": 93,
26 | "lpf_stop": 185,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 22050,
31 | "hl": 256,
32 | "n_fft": 512,
33 | "crop_start": 46,
34 | "crop_stop": 186,
35 | "hpf_start": 93,
36 | "hpf_stop": 46,
37 | "lpf_start": 164,
38 | "lpf_stop": 186,
39 | "res_type": "polyphase"
40 | },
41 | "4": {
42 | "sr": 44100,
43 | "hl": 512,
44 | "n_fft": 768,
45 | "crop_start": 121,
46 | "crop_stop": 382,
47 | "hpf_start": 138,
48 | "hpf_stop": 123,
49 | "res_type": "sinc_medium"
50 | }
51 | },
52 | "sr": 44100,
53 | "pre_filter_start": 740,
54 | "pre_filter_stop": 768
55 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_44100_msb2.json:
--------------------------------------------------------------------------------
1 | {
2 | "mid_side_b": true,
3 | "bins": 768,
4 | "unstable_bins": 7,
5 | "reduction_bins": 668,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 128,
10 | "n_fft": 1024,
11 | "crop_start": 0,
12 | "crop_stop": 186,
13 | "lpf_start": 37,
14 | "lpf_stop": 73,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 11025,
19 | "hl": 128,
20 | "n_fft": 512,
21 | "crop_start": 4,
22 | "crop_stop": 185,
23 | "hpf_start": 36,
24 | "hpf_stop": 18,
25 | "lpf_start": 93,
26 | "lpf_stop": 185,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 22050,
31 | "hl": 256,
32 | "n_fft": 512,
33 | "crop_start": 46,
34 | "crop_stop": 186,
35 | "hpf_start": 93,
36 | "hpf_stop": 46,
37 | "lpf_start": 164,
38 | "lpf_stop": 186,
39 | "res_type": "polyphase"
40 | },
41 | "4": {
42 | "sr": 44100,
43 | "hl": 512,
44 | "n_fft": 768,
45 | "crop_start": 121,
46 | "crop_stop": 382,
47 | "hpf_start": 138,
48 | "hpf_stop": 123,
49 | "res_type": "sinc_medium"
50 | }
51 | },
52 | "sr": 44100,
53 | "pre_filter_start": 740,
54 | "pre_filter_stop": 768
55 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_44100_reverse.json:
--------------------------------------------------------------------------------
1 | {
2 | "reverse": true,
3 | "bins": 768,
4 | "unstable_bins": 7,
5 | "reduction_bins": 668,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 128,
10 | "n_fft": 1024,
11 | "crop_start": 0,
12 | "crop_stop": 186,
13 | "lpf_start": 37,
14 | "lpf_stop": 73,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 11025,
19 | "hl": 128,
20 | "n_fft": 512,
21 | "crop_start": 4,
22 | "crop_stop": 185,
23 | "hpf_start": 36,
24 | "hpf_stop": 18,
25 | "lpf_start": 93,
26 | "lpf_stop": 185,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 22050,
31 | "hl": 256,
32 | "n_fft": 512,
33 | "crop_start": 46,
34 | "crop_stop": 186,
35 | "hpf_start": 93,
36 | "hpf_stop": 46,
37 | "lpf_start": 164,
38 | "lpf_stop": 186,
39 | "res_type": "polyphase"
40 | },
41 | "4": {
42 | "sr": 44100,
43 | "hl": 512,
44 | "n_fft": 768,
45 | "crop_start": 121,
46 | "crop_stop": 382,
47 | "hpf_start": 138,
48 | "hpf_stop": 123,
49 | "res_type": "sinc_medium"
50 | }
51 | },
52 | "sr": 44100,
53 | "pre_filter_start": 740,
54 | "pre_filter_stop": 768
55 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_44100_sw.json:
--------------------------------------------------------------------------------
1 | {
2 | "stereo_w": true,
3 | "bins": 768,
4 | "unstable_bins": 7,
5 | "reduction_bins": 668,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 128,
10 | "n_fft": 1024,
11 | "crop_start": 0,
12 | "crop_stop": 186,
13 | "lpf_start": 37,
14 | "lpf_stop": 73,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 11025,
19 | "hl": 128,
20 | "n_fft": 512,
21 | "crop_start": 4,
22 | "crop_stop": 185,
23 | "hpf_start": 36,
24 | "hpf_stop": 18,
25 | "lpf_start": 93,
26 | "lpf_stop": 185,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 22050,
31 | "hl": 256,
32 | "n_fft": 512,
33 | "crop_start": 46,
34 | "crop_stop": 186,
35 | "hpf_start": 93,
36 | "hpf_stop": 46,
37 | "lpf_start": 164,
38 | "lpf_stop": 186,
39 | "res_type": "polyphase"
40 | },
41 | "4": {
42 | "sr": 44100,
43 | "hl": 512,
44 | "n_fft": 768,
45 | "crop_start": 121,
46 | "crop_stop": 382,
47 | "hpf_start": 138,
48 | "hpf_stop": 123,
49 | "res_type": "sinc_medium"
50 | }
51 | },
52 | "sr": 44100,
53 | "pre_filter_start": 740,
54 | "pre_filter_stop": 768
55 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_v2.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 672,
3 | "unstable_bins": 8,
4 | "reduction_bins": 637,
5 | "band": {
6 | "1": {
7 | "sr": 7350,
8 | "hl": 80,
9 | "n_fft": 640,
10 | "crop_start": 0,
11 | "crop_stop": 85,
12 | "lpf_start": 25,
13 | "lpf_stop": 53,
14 | "res_type": "polyphase"
15 | },
16 | "2": {
17 | "sr": 7350,
18 | "hl": 80,
19 | "n_fft": 320,
20 | "crop_start": 4,
21 | "crop_stop": 87,
22 | "hpf_start": 25,
23 | "hpf_stop": 12,
24 | "lpf_start": 31,
25 | "lpf_stop": 62,
26 | "res_type": "polyphase"
27 | },
28 | "3": {
29 | "sr": 14700,
30 | "hl": 160,
31 | "n_fft": 512,
32 | "crop_start": 17,
33 | "crop_stop": 216,
34 | "hpf_start": 48,
35 | "hpf_stop": 24,
36 | "lpf_start": 139,
37 | "lpf_stop": 210,
38 | "res_type": "polyphase"
39 | },
40 | "4": {
41 | "sr": 44100,
42 | "hl": 480,
43 | "n_fft": 960,
44 | "crop_start": 78,
45 | "crop_stop": 383,
46 | "hpf_start": 130,
47 | "hpf_stop": 86,
48 | "res_type": "kaiser_fast"
49 | }
50 | },
51 | "sr": 44100,
52 | "pre_filter_start": 668,
53 | "pre_filter_stop": 672
54 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_v2_sn.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 672,
3 | "unstable_bins": 8,
4 | "reduction_bins": 637,
5 | "band": {
6 | "1": {
7 | "sr": 7350,
8 | "hl": 80,
9 | "n_fft": 640,
10 | "crop_start": 0,
11 | "crop_stop": 85,
12 | "lpf_start": 25,
13 | "lpf_stop": 53,
14 | "res_type": "polyphase"
15 | },
16 | "2": {
17 | "sr": 7350,
18 | "hl": 80,
19 | "n_fft": 320,
20 | "crop_start": 4,
21 | "crop_stop": 87,
22 | "hpf_start": 25,
23 | "hpf_stop": 12,
24 | "lpf_start": 31,
25 | "lpf_stop": 62,
26 | "res_type": "polyphase"
27 | },
28 | "3": {
29 | "sr": 14700,
30 | "hl": 160,
31 | "n_fft": 512,
32 | "crop_start": 17,
33 | "crop_stop": 216,
34 | "hpf_start": 48,
35 | "hpf_stop": 24,
36 | "lpf_start": 139,
37 | "lpf_stop": 210,
38 | "res_type": "polyphase"
39 | },
40 | "4": {
41 | "sr": 44100,
42 | "hl": 480,
43 | "n_fft": 960,
44 | "crop_start": 78,
45 | "crop_stop": 383,
46 | "hpf_start": 130,
47 | "hpf_stop": 86,
48 | "convert_channels": "stereo_n",
49 | "res_type": "kaiser_fast"
50 | }
51 | },
52 | "sr": 44100,
53 | "pre_filter_start": 668,
54 | "pre_filter_stop": 672
55 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_v3.json:
--------------------------------------------------------------------------------
1 | {
2 | "bins": 672,
3 | "unstable_bins": 8,
4 | "reduction_bins": 530,
5 | "band": {
6 | "1": {
7 | "sr": 7350,
8 | "hl": 80,
9 | "n_fft": 640,
10 | "crop_start": 0,
11 | "crop_stop": 85,
12 | "lpf_start": 25,
13 | "lpf_stop": 53,
14 | "res_type": "polyphase"
15 | },
16 | "2": {
17 | "sr": 7350,
18 | "hl": 80,
19 | "n_fft": 320,
20 | "crop_start": 4,
21 | "crop_stop": 87,
22 | "hpf_start": 25,
23 | "hpf_stop": 12,
24 | "lpf_start": 31,
25 | "lpf_stop": 62,
26 | "res_type": "polyphase"
27 | },
28 | "3": {
29 | "sr": 14700,
30 | "hl": 160,
31 | "n_fft": 512,
32 | "crop_start": 17,
33 | "crop_stop": 216,
34 | "hpf_start": 48,
35 | "hpf_stop": 24,
36 | "lpf_start": 139,
37 | "lpf_stop": 210,
38 | "res_type": "polyphase"
39 | },
40 | "4": {
41 | "sr": 44100,
42 | "hl": 480,
43 | "n_fft": 960,
44 | "crop_start": 78,
45 | "crop_stop": 383,
46 | "hpf_start": 130,
47 | "hpf_stop": 86,
48 | "res_type": "kaiser_fast"
49 | }
50 | },
51 | "sr": 44100,
52 | "pre_filter_start": 668,
53 | "pre_filter_stop": 672
54 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/4band_v3_sn.json:
--------------------------------------------------------------------------------
1 | {
2 | "n_bins": 672,
3 | "unstable_bins": 8,
4 | "stable_bins": 530,
5 | "band": {
6 | "1": {
7 | "sr": 7350,
8 | "hl": 80,
9 | "n_fft": 640,
10 | "crop_start": 0,
11 | "crop_stop": 85,
12 | "lpf_start": 25,
13 | "lpf_stop": 53,
14 | "res_type": "polyphase"
15 | },
16 | "2": {
17 | "sr": 7350,
18 | "hl": 80,
19 | "n_fft": 320,
20 | "crop_start": 4,
21 | "crop_stop": 87,
22 | "hpf_start": 25,
23 | "hpf_stop": 12,
24 | "lpf_start": 31,
25 | "lpf_stop": 62,
26 | "res_type": "polyphase"
27 | },
28 | "3": {
29 | "sr": 14700,
30 | "hl": 160,
31 | "n_fft": 512,
32 | "crop_start": 17,
33 | "crop_stop": 216,
34 | "hpf_start": 48,
35 | "hpf_stop": 24,
36 | "lpf_start": 139,
37 | "lpf_stop": 210,
38 | "res_type": "polyphase"
39 | },
40 | "4": {
41 | "sr": 44100,
42 | "hl": 480,
43 | "n_fft": 960,
44 | "crop_start": 78,
45 | "crop_stop": 383,
46 | "hpf_start": 130,
47 | "hpf_stop": 86,
48 | "convert_channels": "stereo_n",
49 | "res_type": "kaiser_fast"
50 | }
51 | },
52 | "sr": 44100,
53 | "pre_filter_start": 668,
54 | "pre_filter_stop": 672
55 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/ensemble.json:
--------------------------------------------------------------------------------
1 | {
2 | "mid_side_b2": true,
3 | "bins": 1280,
4 | "unstable_bins": 7,
5 | "reduction_bins": 565,
6 | "band": {
7 | "1": {
8 | "sr": 11025,
9 | "hl": 108,
10 | "n_fft": 2048,
11 | "crop_start": 0,
12 | "crop_stop": 374,
13 | "lpf_start": 92,
14 | "lpf_stop": 186,
15 | "res_type": "polyphase"
16 | },
17 | "2": {
18 | "sr": 22050,
19 | "hl": 216,
20 | "n_fft": 1536,
21 | "crop_start": 0,
22 | "crop_stop": 424,
23 | "hpf_start": 68,
24 | "hpf_stop": 34,
25 | "lpf_start": 348,
26 | "lpf_stop": 418,
27 | "res_type": "polyphase"
28 | },
29 | "3": {
30 | "sr": 44100,
31 | "hl": 432,
32 | "n_fft": 1280,
33 | "crop_start": 132,
34 | "crop_stop": 614,
35 | "hpf_start": 172,
36 | "hpf_stop": 144,
37 | "res_type": "polyphase"
38 | }
39 | },
40 | "sr": 44100,
41 | "pre_filter_start": 1280,
42 | "pre_filter_stop": 1280
43 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/modelparams/model_data.json:
--------------------------------------------------------------------------------
1 | {
2 | "0d0e6d143046b0eecc41a22e60224582": {
3 | "vr_model_param": "3band_44100_mid",
4 | "primary_stem": "Instrumental"
5 | },
6 | "18b52f873021a0af556fb4ecd552bb8e": {
7 | "vr_model_param": "2band_32000",
8 | "primary_stem": "Instrumental"
9 | },
10 | "1fc66027c82b499c7d8f55f79e64cadc": {
11 | "vr_model_param": "2band_32000",
12 | "primary_stem": "Instrumental"
13 | },
14 | "2aa34fbc01f8e6d2bf509726481e7142": {
15 | "vr_model_param": "4band_44100",
16 | "primary_stem": "No Piano"
17 | },
18 | "3e18f639b11abea7361db1a4a91c2559": {
19 | "vr_model_param": "4band_44100",
20 | "primary_stem": "Instrumental"
21 | },
22 | "570b5f50054609a17741369a35007ddd": {
23 | "vr_model_param": "4band_v3",
24 | "primary_stem": "Instrumental"
25 | },
26 | "5a6e24c1b530f2dab045a522ef89b751": {
27 | "vr_model_param": "1band_sr44100_hl512",
28 | "primary_stem": "Instrumental"
29 | },
30 | "6b5916069a49be3fe29d4397ecfd73fa": {
31 | "vr_model_param": "3band_44100_msb2",
32 | "primary_stem": "Instrumental",
33 | "is_karaoke": true
34 | },
35 | "74b3bc5fa2b69f29baf7839b858bc679": {
36 | "vr_model_param": "4band_44100",
37 | "primary_stem": "Instrumental"
38 | },
39 | "827213b316df36b52a1f3d04fec89369": {
40 | "vr_model_param": "4band_44100",
41 | "primary_stem": "Instrumental"
42 | },
43 | "911d4048eee7223eca4ee0efb7d29256": {
44 | "vr_model_param": "4band_44100",
45 | "primary_stem": "Vocals"
46 | },
47 | "941f3f7f0b0341f12087aacdfef644b1": {
48 | "vr_model_param": "4band_v2",
49 | "primary_stem": "Instrumental"
50 | },
51 | "a02827cf69d75781a35c0e8a327f3195": {
52 | "vr_model_param": "1band_sr33075_hl384",
53 | "primary_stem": "Instrumental"
54 | },
55 | "b165fbff113c959dba5303b74c6484bc": {
56 | "vr_model_param": "3band_44100",
57 | "primary_stem": "Instrumental"
58 | },
59 | "b5f988cd3e891dca7253bf5f0f3427c7": {
60 | "vr_model_param": "4band_44100",
61 | "primary_stem": "Instrumental"
62 | },
63 | "b99c35723bc35cb11ed14a4780006a80": {
64 | "vr_model_param": "1band_sr44100_hl1024",
65 | "primary_stem": "Instrumental"
66 | },
67 | "ba02fd25b71d620eebbdb49e18e4c336": {
68 | "vr_model_param": "3band_44100_mid",
69 | "primary_stem": "Instrumental"
70 | },
71 | "c4476ef424d8cba65f38d8d04e8514e2": {
72 | "vr_model_param": "3band_44100_msb2",
73 | "primary_stem": "Instrumental"
74 | },
75 | "da2d37b8be2972e550a409bae08335aa": {
76 | "vr_model_param": "4band_44100",
77 | "primary_stem": "Vocals"
78 | },
79 | "db57205d3133e39df8e050b435a78c80": {
80 | "vr_model_param": "4band_44100",
81 | "primary_stem": "Instrumental"
82 | },
83 | "ea83b08e32ec2303456fe50659035f69": {
84 | "vr_model_param": "4band_v3",
85 | "primary_stem": "Instrumental"
86 | },
87 | "f6ea8473ff86017b5ebd586ccacf156b": {
88 | "vr_model_param": "4band_v2_sn",
89 | "primary_stem": "Instrumental",
90 | "is_karaoke": true
91 | },
92 | "fd297a61eafc9d829033f8b987c39a3d": {
93 | "vr_model_param": "1band_sr32000_hl512",
94 | "primary_stem": "Instrumental"
95 | },
96 | "0ec76fd9e65f81d8b4fbd13af4826ed8": {
97 | "vr_model_param": "4band_v3",
98 | "primary_stem": "No Woodwinds"
99 | },
100 | "0fb9249ffe4ffc38d7b16243f394c0ff": {
101 | "vr_model_param": "4band_v3",
102 | "primary_stem": "No Reverb"
103 | },
104 | "6857b2972e1754913aad0c9a1678c753": {
105 | "vr_model_param": "4band_v3",
106 | "primary_stem": "No Echo",
107 | "nout": 48,
108 | "nout_lstm": 128
109 | },
110 | "f200a145434efc7dcf0cd093f517ed52": {
111 | "vr_model_param": "4band_v3",
112 | "primary_stem": "No Echo",
113 | "nout": 48,
114 | "nout_lstm": 128
115 | },
116 | "44c55d8b5d2e3edea98c2b2bf93071c7": {
117 | "vr_model_param": "4band_v3",
118 | "primary_stem": "Noise",
119 | "nout": 48,
120 | "nout_lstm": 128
121 | },
122 | "51ea8c43a6928ed3c10ef5cb2707d57b": {
123 | "vr_model_param": "1band_sr44100_hl1024",
124 | "primary_stem": "Noise",
125 | "nout": 16,
126 | "nout_lstm": 128
127 | },
128 | "944950a9c5963a5eb70b445d67b7068a": {
129 | "vr_model_param": "4band_v3_sn",
130 | "primary_stem": "Vocals",
131 | "nout": 64,
132 | "nout_lstm": 128,
133 | "is_karaoke": false,
134 | "is_bv_model": true,
135 | "is_bv_model_rebalanced": 0.9
136 | }
137 | }
--------------------------------------------------------------------------------
/src/models_dir/vr_network/nets_new.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torch.nn.functional as F
4 | from . import layers_new as layers
5 |
6 | class BaseNet(nn.Module):
7 |
8 | def __init__(self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))):
9 | super(BaseNet, self).__init__()
10 | self.enc1 = layers.Conv2DBNActiv(nin, nout, 3, 1, 1)
11 | self.enc2 = layers.Encoder(nout, nout * 2, 3, 2, 1)
12 | self.enc3 = layers.Encoder(nout * 2, nout * 4, 3, 2, 1)
13 | self.enc4 = layers.Encoder(nout * 4, nout * 6, 3, 2, 1)
14 | self.enc5 = layers.Encoder(nout * 6, nout * 8, 3, 2, 1)
15 |
16 | self.aspp = layers.ASPPModule(nout * 8, nout * 8, dilations, dropout=True)
17 |
18 | self.dec4 = layers.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1)
19 | self.dec3 = layers.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1)
20 | self.dec2 = layers.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1)
21 | self.lstm_dec2 = layers.LSTMModule(nout * 2, nin_lstm, nout_lstm)
22 | self.dec1 = layers.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1)
23 |
24 | def __call__(self, x):
25 | e1 = self.enc1(x)
26 | e2 = self.enc2(e1)
27 | e3 = self.enc3(e2)
28 | e4 = self.enc4(e3)
29 | e5 = self.enc5(e4)
30 |
31 | h = self.aspp(e5)
32 |
33 | h = self.dec4(h, e4)
34 | h = self.dec3(h, e3)
35 | h = self.dec2(h, e2)
36 | h = torch.cat([h, self.lstm_dec2(h)], dim=1)
37 | h = self.dec1(h, e1)
38 |
39 | return h
40 |
41 | class CascadedNet(nn.Module):
42 |
43 | def __init__(self, n_fft, nn_arch_size=51000, nout=32, nout_lstm=128):
44 | super(CascadedNet, self).__init__()
45 | self.max_bin = n_fft // 2
46 | self.output_bin = n_fft // 2 + 1
47 | self.nin_lstm = self.max_bin // 2
48 | self.offset = 64
49 | nout = 64 if nn_arch_size == 218409 else nout
50 |
51 | #print(nout, nout_lstm, n_fft)
52 |
53 | self.stg1_low_band_net = nn.Sequential(
54 | BaseNet(2, nout // 2, self.nin_lstm // 2, nout_lstm),
55 | layers.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0)
56 | )
57 | self.stg1_high_band_net = BaseNet(2, nout // 4, self.nin_lstm // 2, nout_lstm // 2)
58 |
59 | self.stg2_low_band_net = nn.Sequential(
60 | BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm),
61 | layers.Conv2DBNActiv(nout, nout // 2, 1, 1, 0)
62 | )
63 | self.stg2_high_band_net = BaseNet(nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2)
64 |
65 | self.stg3_full_band_net = BaseNet(3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm)
66 |
67 | self.out = nn.Conv2d(nout, 2, 1, bias=False)
68 | self.aux_out = nn.Conv2d(3 * nout // 4, 2, 1, bias=False)
69 |
70 | def forward(self, x):
71 | x = x[:, :, :self.max_bin]
72 |
73 | bandw = x.size()[2] // 2
74 | l1_in = x[:, :, :bandw]
75 | h1_in = x[:, :, bandw:]
76 | l1 = self.stg1_low_band_net(l1_in)
77 | h1 = self.stg1_high_band_net(h1_in)
78 | aux1 = torch.cat([l1, h1], dim=2)
79 |
80 | l2_in = torch.cat([l1_in, l1], dim=1)
81 | h2_in = torch.cat([h1_in, h1], dim=1)
82 | l2 = self.stg2_low_band_net(l2_in)
83 | h2 = self.stg2_high_band_net(h2_in)
84 | aux2 = torch.cat([l2, h2], dim=2)
85 |
86 | f3_in = torch.cat([x, aux1, aux2], dim=1)
87 | f3 = self.stg3_full_band_net(f3_in)
88 |
89 | mask = torch.sigmoid(self.out(f3))
90 | mask = F.pad(
91 | input=mask,
92 | pad=(0, 0, 0, self.output_bin - mask.size()[2]),
93 | mode='replicate'
94 | )
95 |
96 | if self.training:
97 | aux = torch.cat([aux1, aux2], dim=1)
98 | aux = torch.sigmoid(self.aux_out(aux))
99 | aux = F.pad(
100 | input=aux,
101 | pad=(0, 0, 0, self.output_bin - aux.size()[2]),
102 | mode='replicate'
103 | )
104 | return mask, aux
105 | else:
106 | return mask
107 |
108 | def predict_mask(self, x):
109 | mask = self.forward(x)
110 |
111 | if self.offset > 0:
112 | mask = mask[:, :, :, self.offset:-self.offset]
113 | assert mask.size()[3] > 0
114 |
115 | return mask
116 |
117 | def predict(self, x):
118 | mask = self.forward(x)
119 | pred_mag = x * mask
120 |
121 | if self.offset > 0:
122 | pred_mag = pred_mag[:, :, :, self.offset:-self.offset]
123 | assert pred_mag.size()[3] > 0
124 |
125 | return pred_mag
126 |
--------------------------------------------------------------------------------
/src/models_dir/vr_network/pyrb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import tempfile
4 | import six
5 | import numpy as np
6 | import soundfile as sf
7 | import sys
8 |
9 | if getattr(sys, 'frozen', False):
10 | BASE_PATH_RUB = sys._MEIPASS
11 | else:
12 | BASE_PATH_RUB = os.path.dirname(os.path.abspath(__file__))
13 |
14 | __all__ = ['time_stretch', 'pitch_shift']
15 |
16 | __RUBBERBAND_UTIL = os.path.join(BASE_PATH_RUB, 'rubberband')
17 |
18 | if six.PY2:
19 | DEVNULL = open(os.devnull, 'w')
20 | else:
21 | DEVNULL = subprocess.DEVNULL
22 |
23 | def __rubberband(y, sr, **kwargs):
24 |
25 | assert sr > 0
26 |
27 | # Get the input and output tempfile
28 | fd, infile = tempfile.mkstemp(suffix='.wav')
29 | os.close(fd)
30 | fd, outfile = tempfile.mkstemp(suffix='.wav')
31 | os.close(fd)
32 |
33 | # dump the audio
34 | sf.write(infile, y, sr)
35 |
36 | try:
37 | # Execute rubberband
38 | arguments = [__RUBBERBAND_UTIL, '-q']
39 |
40 | for key, value in six.iteritems(kwargs):
41 | arguments.append(str(key))
42 | arguments.append(str(value))
43 |
44 | arguments.extend([infile, outfile])
45 |
46 | subprocess.check_call(arguments, stdout=DEVNULL, stderr=DEVNULL)
47 |
48 | # Load the processed audio.
49 | y_out, _ = sf.read(outfile, always_2d=True)
50 |
51 | # make sure that output dimensions matches input
52 | if y.ndim == 1:
53 | y_out = np.squeeze(y_out)
54 |
55 | except OSError as exc:
56 | six.raise_from(RuntimeError('Failed to execute rubberband. '
57 | 'Please verify that rubberband-cli '
58 | 'is installed.'),
59 | exc)
60 |
61 | finally:
62 | # Remove temp files
63 | os.unlink(infile)
64 | os.unlink(outfile)
65 |
66 | return y_out
67 |
68 | def time_stretch(y, sr, rate, rbargs=None):
69 | if rate <= 0:
70 | raise ValueError('rate must be strictly positive')
71 |
72 | if rate == 1.0:
73 | return y
74 |
75 | if rbargs is None:
76 | rbargs = dict()
77 |
78 | rbargs.setdefault('--tempo', rate)
79 |
80 | return __rubberband(y, sr, **rbargs)
81 |
82 | def pitch_shift(y, sr, n_steps, rbargs=None):
83 |
84 | if n_steps == 0:
85 | return y
86 |
87 | if rbargs is None:
88 | rbargs = dict()
89 |
90 | rbargs.setdefault('--pitch', n_steps)
91 |
92 | return __rubberband(y, sr, **rbargs)
93 |
--------------------------------------------------------------------------------
/src/pipelines.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/pipelines.py
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/src/utils/__init__.py
--------------------------------------------------------------------------------
/src/utils/fastio.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import soundfile as sf
3 | import librosa
4 | import audiofile as af
5 | import types
6 | from typing import Union, List, Tuple
7 | import numpy.typing as npt
8 |
9 | def read(path:str, insure_2d:bool=True, target_sampling_rate=None, logger=None)->Tuple[npt.NDArray, int]:
10 | """Read audio file first try with audiofile then with soundfile and last with librosa
11 |
12 | Args:
13 | path (str): path to the audio file
14 | insure_2d (bool, optional): insure that the audio data is 2D.
15 | If audio is dosen't have 2 channels it will be converted to 2D by repeating the channel.
16 | Defaults to True.
17 | logger (logging.Logger, optional): logger. Defaults to None.
18 |
19 | Returns:
20 | tuple: audio data and samplerate
21 |
22 | Raises:
23 | ValueError: Failed to read the audio file with any of the available libraries
24 |
25 | """
26 |
27 | ext = path.split('.')[-1]
28 | signal, sampling_rate = None, None
29 |
30 | if ext in ['wav', 'flac', 'ogg', 'mp3']:
31 | try:
32 | signal, sampling_rate = af.read(path)
33 | except Exception as e:
34 | if logger:
35 | logger.warning(f"audiofile failed to read {path} with error {e}")
36 |
37 | if signal is None:
38 | try:
39 | signal, sampling_rate = sf.read(path)
40 | except Exception as e:
41 | if logger:
42 | logger.warning(f"soundfile failed to read {path} with error {e}")
43 |
44 | if signal is None:
45 | try:
46 | signal, sampling_rate = librosa.load(path, sr=None, mono=False)
47 | except Exception as e:
48 | if logger:
49 | logger.error(f"librosa failed to read {path} with error {e}")
50 |
51 | if target_sampling_rate is not None:
52 | signal = librosa.resample(signal, sampling_rate, target_sampling_rate)
53 | sampling_rate = target_sampling_rate
54 |
55 | if signal is not None:
56 | signal = insure_2d_signal(signal, insure_2d, logger)
57 | return signal, sampling_rate
58 |
59 | raise ValueError(f"Failed to read {path} with any of the available libraries")
60 |
61 |
62 | def insure_2d_signal(signal:npt.NDArray, insure_2d:bool, logger=None)->npt.NDArray:
63 | """Insure that the audio data is 2D.
64 | If audio is dosen't have 2 channels it will be converted to 2D by repeating the channel.
65 | If audio has more than 2 channels the extra channels will be removed.
66 |
67 | Args:
68 | signal (np.array): audio data
69 | insure_2d (bool): insure that the audio data is 2D.
70 | logger (logging.Logger, optional): logger. Defaults to None.
71 |
72 | Returns:
73 | np.array: 2D audio data
74 |
75 | """
76 | if insure_2d and signal.ndim == 1:
77 | signal = np.stack([signal, signal])
78 | if logger:
79 | logger.warning(f"Insured 2D signal for audio data. Original shape was {signal.shape}")
80 | elif insure_2d and signal.ndim > 2:
81 | if logger:
82 | logger.warning(f"Insured 2D signal for audio data. Original shape was {signal.shape}")
83 | signal = signal[:2]
84 | return signal
85 |
86 |
87 | def write(path:str, signal:Union[npt.NDArray, List], sampling_rate:int, ext:str=None, logger=None):
88 | """Write audio file first try with audiofile then with soundfile and last with librosa
89 |
90 | Args:
91 | path (str): path to the audio file
92 | signal (np.array|list): audio data
93 | sampling_rate (int): samplerate
94 | ext (str, optional): file extension ovverides the file extension from the path. Defaults to None. Example: 'wav', 'flac', 'ogg', 'mp3' don't add the dot.
95 | logger (logging.Logger, optional): logger. Defaults to None.
96 |
97 | Raises:
98 | ValueError: Failed to write the audio file with any of the available libraries
99 | """
100 | if ext is not None:
101 | path = path+'.'+ext
102 |
103 |
104 | if ext in ['wav', 'flac', 'ogg', 'mp3']:
105 | try:
106 | af.write(path, signal, sampling_rate)
107 | return
108 | except Exception as e:
109 | if logger:
110 | logger.warning(f"audiofile failed to write {path} with error {e}")
111 |
112 | try:
113 | sf.write(path, signal.T, sampling_rate)
114 | return
115 | except Exception as e:
116 | if logger:
117 | logger.warning(f"soundfile failed to write {path} with error {e}")
118 |
119 | try:
120 | librosa.output.write_wav(path, signal.T, sampling_rate)
121 | return
122 | except Exception as e:
123 | if logger:
124 | logger.error(f"librosa failed to write {path} with error {e}")
125 |
126 | raise ValueError(f"Failed to write {path} with any of the available libraries")
127 |
128 |
--------------------------------------------------------------------------------
/src/utils/get_models.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | import urllib.request
4 | from typing import List
5 | from pathlib import Path
6 | import json
7 |
8 | uvr_path = Path(__file__).parent.parent
9 |
10 | def download_model(model_name:str, model_arch:str, model_path:List[str]=None, logger=None)->str:
11 | """Download model from Hugging Face model hub
12 |
13 | Args:
14 | model_name (str): model name.
15 | model_path (list[str]): model pathS to download the model from. Defaults to None (loads paths from uvr/models_dir/models.json file)
16 | model_arch (str): model architecture. A path in ../models_dir/{model_arch}/weights/{model_name}
17 | If path is not found it will be created. And if the model is already downloaded it will not be downloaded again.
18 | logger (logging.Logger, optional): logger. Defaults to None.
19 |
20 | Returns:
21 | str: path to the downloaded model
22 | """
23 | if model_path is None:
24 | if logger:
25 | logger.error(f"Model path is not provided for {model_name} auto loading from models.json file")
26 | models_json_path = os.path.join(uvr_path, "models_dir", "models.json")
27 | models = json.load(open(models_json_path, "r"))
28 | model_path = models[model_arch][model_name]["model_path"]
29 |
30 | save_path = os.path.join(uvr_path, "models_dir", model_arch, "weights", model_name)
31 |
32 | if not os.path.exists(save_path):
33 | os.makedirs(save_path)
34 |
35 | files = [path.split("/")[-1] for path in model_path]
36 | if model_exists(model_name=model_name, model_arch=model_arch, files=files):
37 | if logger:
38 | logger.info(f"Model {model_name} is already exists in {save_path}")
39 | return save_path
40 |
41 | try:
42 | # os.system(f"wget {model_path} -P {local_model_path}")
43 | for file_name, path in zip(files, model_path):
44 | local_file_path = os.path.join(save_path, file_name)
45 | urllib.request.urlretrieve(path, local_file_path)
46 | if logger:
47 | logger.info(f"Downloaded {model_name} from {model_path}")
48 |
49 |
50 | return save_path
51 |
52 | except Exception as e:
53 | if logger:
54 | logger.error(f"Failed to download {model_name} from {model_path} with error {e}")
55 |
56 | return None
57 |
58 | def model_exists(model_name:str, model_arch:str, files:List=None)->bool:
59 | """Check if the model exists in ../models_dir/{model_arch}/weights/{model_name}
60 |
61 | Args:
62 | model_name (str): model name.
63 | model_arch (str): model architecture.
64 | files (list[str], optional): list of files to check if they exist. Defaults to None. If not provided it will check if the model directory exists.
65 |
66 | Returns:
67 | bool: True if the model exists, False otherwise
68 | """
69 | # remove extension from the model name
70 | if len(model_name.split('.')) > 1:
71 | model_name = model_name.split('.')[0]
72 |
73 | save_path = os.path.join(uvr_path, "models_dir", model_arch, "weights", model_name)
74 | if files is not None:
75 | for file in files:
76 | local_model_path = os.path.join(save_path, file)
77 | if not os.path.isfile(local_model_path):
78 | return False
79 |
80 | if os.path.exists(save_path):
81 | return True
82 | return False
83 |
84 | """
85 | Example of the model json file:
86 | models_json = {
87 |
88 | "demucs":{
89 | "name1":{
90 | "model_path":"https://abc/bcd/model.pt",
91 | "other_metadata":1,
92 | },
93 | }
94 | }
95 | """
96 |
97 | def download_all_models(models_json:dict=None, logger=None)->dict:
98 | """Download all models from the models_json
99 |
100 | Args:
101 | models_json (dict): dictionary of models to download. Defaults to None (loads paths from uvr/models_dir/models.json file)
102 | logger (logging.Logger, optional): logger. Defaults to None.
103 |
104 | Returns:
105 | dict: dictionary of downloaded models. with the same structure as the input models_json.
106 | architectures -> model_name -> model_path. Also the model_path will be the local path to the downloaded model.
107 | If the model is already downloaded it will not be downloaded again. And if the model failed to download it will be None.
108 | """
109 | paths = {}
110 | if models_json is None:
111 | if logger:
112 | logger.error(f"Model path is not provided for {model_name} auto loading from models.json file")
113 | models_json_path = os.path.join(uvr_path, "models_dir", "models.json")
114 | models_json = json.load(open(models_json_path, "r"))
115 |
116 | for model_arch, models in models_json.items():
117 | paths[model_arch] = {}
118 | for model_name, model_data in models.items():
119 | model_path = model_data["model_path"]
120 | model_path = download_model(model_name=model_name, model_path=model_path, model_arch=model_arch, logger=logger)
121 | paths[model_arch][model_name] = model_path
122 |
123 | return paths
124 |
125 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/tests/__init__.py
--------------------------------------------------------------------------------
/tests/models_status.json:
--------------------------------------------------------------------------------
1 | {
2 | "demucs": {
3 | "hdemucs_mmi": {
4 | "audio": {
5 | "download": true,
6 | "load": true,
7 | "run": true,
8 | "time": 28.517844915390015
9 | },
10 | "wav": {
11 | "download": true,
12 | "load": true,
13 | "run": true,
14 | "time": 19.17659068107605
15 | },
16 | "mp3": {
17 | "download": true,
18 | "load": true,
19 | "run": true,
20 | "time": 20.161988019943237
21 | },
22 | "flac": {
23 | "download": true,
24 | "load": true,
25 | "run": true,
26 | "time": 23.07073998451233
27 | }
28 | }
29 | },
30 | "vr_network": {
31 | "1_HP-UVR": {
32 | "audio": {
33 | "download": true,
34 | "load": true,
35 | "run": true,
36 | "time": 11.637473106384277
37 | },
38 | "wav": {
39 | "download": true,
40 | "load": true,
41 | "run": true,
42 | "time": 12.053731918334961
43 | },
44 | "mp3": {
45 | "download": true,
46 | "load": true,
47 | "run": true,
48 | "time": 17.92900800704956
49 | },
50 | "flac": {
51 | "download": true,
52 | "load": true,
53 | "run": true,
54 | "time": 10.97541093826294
55 | }
56 | }
57 | },
58 | "mdx": {
59 | "UVR-MDX-NET-Inst_1": {
60 | "audio": {
61 | "download": true,
62 | "load": true,
63 | "run": true,
64 | "time": 6.675442218780518
65 | },
66 | "wav": {
67 | "download": true,
68 | "load": true,
69 | "run": true,
70 | "time": 4.095139265060425
71 | },
72 | "mp3": {
73 | "download": true,
74 | "load": true,
75 | "run": true,
76 | "time": 4.160974740982056
77 | },
78 | "flac": {
79 | "download": true,
80 | "load": true,
81 | "run": true,
82 | "time": 4.121398687362671
83 | }
84 | }
85 | },
86 | "mdxc": {
87 | "MDX23C-8KFFT-InstVoc_HQ": {
88 | "audio": {
89 | "download": true,
90 | "load": true,
91 | "run": true,
92 | "time": 71.7358889579773
93 | },
94 | "wav": {
95 | "download": true,
96 | "load": true,
97 | "run": true,
98 | "time": 74.15330004692078
99 | },
100 | "mp3": {
101 | "download": true,
102 | "load": true,
103 | "run": true,
104 | "time": 76.09534502029419
105 | },
106 | "flac": {
107 | "download": true,
108 | "load": true,
109 | "run": true,
110 | "time": 72.53368592262268
111 | }
112 | }
113 | }
114 | }
--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NextAudioGen/ultimatevocalremover_api/344e8550b3213928628f6ad97404cf539ef38e33/tests/utils/__init__.py
--------------------------------------------------------------------------------
/tests/utils/test_fastio.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/tests/utils/test_get_models.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from ...src.utils import get_models
4 |
5 | def is_samepath(path1, path2):
6 | return os.path.abspath(path1) == os.path.abspath(path2)
7 |
8 | def rm_models_dir(model_arch):
9 | current_path = os.getcwd()
10 | rm_path = os.path.join(current_path, "src", "models_dir", model_arch)
11 | rm_path = os.path.abspath(rm_path)
12 | # print("rm_path", rm_path)
13 | # os.remove(rm_path)
14 | shutil.rmtree(rm_path)
15 |
16 | def test_model_dont_exists():
17 | model_name = "model_name"
18 | model_arch = "model_arch"
19 | assert get_models.model_exists(model_name=model_name, model_arch=model_arch) == False
20 |
21 | def test_model_exists():
22 | model_name = "model_name"
23 | model_arch = "model_arch"
24 | files = ["model_name.txt"]
25 | current_path = os.getcwd()
26 | save_path = os.path.join(current_path, "src", "models_dir", model_arch, "weights", model_name)
27 | if not os.path.exists(save_path):
28 | os.makedirs(save_path)
29 |
30 | for file_ in files:
31 | local_model_path = os.path.join(save_path, file_)
32 | local_model_path = os.path.abspath(local_model_path)
33 |
34 | with open(local_model_path, 'w') as f:
35 | f.write("test")
36 |
37 | assert get_models.model_exists(model_name=model_name, model_arch=model_arch, files=files) == True
38 | rm_models_dir(model_arch)
39 |
40 | def test_download_model():
41 | model_arch = "model_arch"
42 | model_name = "model_name"
43 | model_path = ["https://www.google.com"]
44 | model_file = model_path[0].split("/")[-1]
45 |
46 | path = get_models.download_model(model_name=model_name, model_path=model_path, model_arch=model_arch)
47 | current_path = os.getcwd()
48 | save_path = os.path.join(current_path, "src", "models_dir", model_arch, "weights", model_name)
49 | local_file_path = os.path.join(save_path, model_file)
50 | assert is_samepath(path, save_path) == True
51 | assert os.path.isfile(local_file_path) == True
52 | rm_models_dir(model_arch)
53 |
54 | test_models_json = {
55 | "arch1":{
56 | "model1":{
57 | "model_path":["https://www.google.com"]
58 | }
59 | },
60 | "arch2":{
61 | "model2":{
62 | "model_path":["https://www.apple.com"]
63 | }
64 | }
65 | }
66 |
67 | def test_get_all_models():
68 | test_models_json_res = {
69 | "arch1":{
70 | "model1": "www.google.com"
71 | },
72 | "arch2":{
73 | "model2": "www.apple.com"
74 | }
75 | }
76 |
77 | models = get_models.download_all_models(test_models_json)
78 | for arch in test_models_json_res:
79 | assert arch in models
80 | for model in test_models_json_res[arch]:
81 | assert model in models[arch]
82 | # print(models[arch][model])
83 | current_path = os.getcwd()
84 | # ref_model_path = test_models_json_res[arch][model]
85 | ref_model_path = os.path.join(current_path, "src", "models_dir", arch, "weights", model)
86 | assert is_samepath(models[arch][model], ref_model_path) == True
87 | assert get_models.model_exists(model_name=model, model_arch=arch) == True
88 |
89 | rm_models_dir(arch)
90 |
91 |
92 |
93 |
--------------------------------------------------------------------------------