├── .gitignore ├── CREDITS.md ├── LICENSE-APACHE-2 ├── LICENSE-CCA-ANY ├── LICENSE-GPL-V3 ├── README.md ├── __init__.py ├── core ├── compression.py ├── conversions.py ├── filters.py ├── harmonics.py ├── io.py ├── kernels │ ├── compressor │ │ └── compressor.cu │ └── limiter │ │ ├── limiter_dev.cu │ │ ├── limiter_down_parallel.cu │ │ ├── limiter_hard_clipper.cu │ │ ├── limiter_soft_clipper.cu │ │ ├── limiter_updown_parallel.cu │ │ └── limiter_updown_weighted_parallel.cu ├── limiting.py ├── loudness.py ├── mixing.py ├── plotting.py ├── sampling.py ├── saturation.py ├── spectral.py ├── tests.py ├── utilities.py ├── utilitiescuda.py └── widening.py ├── data └── widener │ ├── init_vn_filters.txt │ └── opt_vn_filters.txt ├── effects ├── SignalProcessingConvolutionReverb.py ├── SignalProcessingPaulStretch.py ├── SignalProcessingPitchShifter.py ├── SignalProcessingStereoWidening.py └── __init__.py ├── generators ├── SignalProcessingPadSynth.py ├── SignalProcessingPadSynthChoir.py └── __init__.py ├── nodes.py ├── noxfile.py ├── processors ├── SignalProcessingBaxandallEQ.py ├── SignalProcessingCompressor.py ├── SignalProcessingFilter.py ├── SignalProcessingHarmonicsEnhancer.py ├── SignalProcessingLimiter.py ├── SignalProcessingLoadAudio.py ├── SignalProcessingLoudness.py ├── SignalProcessingMixdown.py ├── SignalProcessingNormalizer.py ├── SignalProcessingSaturation.py └── __init__.py ├── pyproject.toml ├── pytest.ini ├── requirements.txt ├── tests ├── __init__.py ├── conftest.py ├── test_baxandall.py ├── test_compressor.py ├── test_convolution_reverb.py ├── test_filter.py ├── test_harmonics.py ├── test_limiting.py ├── test_normalizer.py ├── test_padsynthchoir.py ├── test_paulstretch.py ├── test_pitchshift.py ├── test_plotting.py ├── test_saturation.py └── test_widening.py └── visuals ├── SignalProcessingSpectrogram.py ├── SignalProcessingWaveform.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # development directories 10 | audio/ 11 | .nox/ 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 114 | .pdm.toml 115 | .pdm-python 116 | .pdm-build/ 117 | 118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 119 | __pypackages__/ 120 | 121 | # Celery stuff 122 | celerybeat-schedule 123 | celerybeat.pid 124 | 125 | # SageMath parsed files 126 | *.sage.py 127 | 128 | # Environments 129 | .env 130 | .venv 131 | env/ 132 | venv/ 133 | ENV/ 134 | env.bak/ 135 | venv.bak/ 136 | 137 | # Spyder project settings 138 | .spyderproject 139 | .spyproject 140 | 141 | # Rope project settings 142 | .ropeproject 143 | 144 | # mkdocs documentation 145 | /site 146 | 147 | # mypy 148 | .mypy_cache/ 149 | .dmypy.json 150 | dmypy.json 151 | 152 | # Pyre type checker 153 | .pyre/ 154 | 155 | # pytype static type analyzer 156 | .pytype/ 157 | 158 | # Cython debug symbols 159 | cython_debug/ 160 | 161 | # PyCharm 162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 164 | # and can be added to the global gitignore or merged into this file. For a more nuclear 165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 166 | #.idea/ 167 | -------------------------------------------------------------------------------- /CREDITS.md: -------------------------------------------------------------------------------- 1 | ### Credits 2 | - **`Nasca Octavian Paul`** 3 | - **`Greg Hopkins`** 4 | - **`Orchisama Das`** -------------------------------------------------------------------------------- /LICENSE-CCA-ANY: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS 2 | 3 | __all__ = [ 4 | "NODE_CLASS_MAPPINGS", 5 | "NODE_DISPLAY_NAME_MAPPINGS", 6 | "SignalProcessingFilter", 7 | "SignalProcessingLoadAudio", 8 | ] 9 | -------------------------------------------------------------------------------- /core/compression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various compression methods 10 | """ 11 | 12 | 13 | import cupy as cp 14 | import torch 15 | import numpy as np 16 | 17 | from typing import Tuple, Any 18 | from ..core.utilitiescuda import read_kernel_by_name 19 | 20 | compressor_kernel = read_kernel_by_name( 21 | "compressor", kernel_class="compressor", kernel_identifier="compexp_kernel" 22 | ) 23 | 24 | 25 | def compressor( 26 | audio_in: torch.Tensor, 27 | sample_rate: int, 28 | comp: float = -0.3, # Compression/expansion factor 29 | attack: float = 0.1, # Attack time in ms 30 | release: float = 60.0, # Release time in ms 31 | a: float = 0.3, # Filter parameter < 1 32 | device: str = "cuda", 33 | ) -> Tuple[torch.Tensor, Any]: 34 | """ 35 | Compresses or expands stereo audio using an optimized CUDA kernel. 36 | 37 | Parameters: 38 | audio_in (torch.Tensor or np.ndarray): Input stereo audio signal with shape (n_samples, 2). 39 | sample_rate (int): Sampling rate in Hz. 40 | comp (float): Compression/expansion factor. 41 | attack (float): Attack time in milliseconds. 42 | release (float): Release time in milliseconds. 43 | a (float): Filter parameter (< 1) for envelope smoothing. 44 | device (str): Device to place the output tensor ('cuda' or 'cpu'). 45 | 46 | Returns: 47 | torch.Tensor: Compressed stereo audio with shape (n_samples, 2). 48 | """ 49 | # Convert input to NumPy array if necessary 50 | audio_in = audio_in.T 51 | if isinstance(audio_in, torch.Tensor): 52 | audio_in = audio_in.detach().cpu().numpy() 53 | else: 54 | audio_in = np.asarray(audio_in, dtype=np.float64) 55 | 56 | # 2. Ensure the audio is in shape (n_samples, 2) 57 | if audio_in.ndim != 2 or audio_in.shape[1] != 2: 58 | raise ValueError( 59 | f"Input audio must have shape (n_samples, 2), but got {audio_in.shape}" 60 | ) 61 | 62 | n_samples, n_channels = audio_in.shape 63 | 64 | # Flatten the audio for kernel processing 65 | wav_in_flat = audio_in.flatten() 66 | 67 | # Move data to GPU 68 | wav_in_gpu = cp.asarray(wav_in_flat, dtype=cp.float64) 69 | wav_out_gpu = cp.zeros_like(wav_in_gpu) 70 | 71 | # Define grid and block dimensions 72 | block_size = 256 # Number of threads per block 73 | grid_size = n_channels # One block per channel 74 | 75 | # Launch the CUDA kernel 76 | compressor_kernel( 77 | (grid_size,), # Grid dimensions 78 | (block_size,), # Block dimensions 79 | ( 80 | wav_in_gpu, 81 | wav_out_gpu, 82 | np.int32(n_channels), 83 | np.int32(n_samples), 84 | np.float64(comp), 85 | np.float64(release), 86 | np.float64(attack), 87 | np.float64(a), 88 | np.float64(sample_rate), 89 | ), 90 | ) 91 | 92 | # Retrieve results from GPU 93 | wav_out_host = wav_out_gpu.get().astype(np.float64) 94 | 95 | # Reshape the output 96 | wav_out_stereo = wav_out_host.reshape((n_samples, n_channels)) 97 | 98 | # Convert back to Torch tensor if desired 99 | if device == "cuda": 100 | out_tensor = torch.from_numpy(wav_out_stereo).to("cuda") 101 | else: 102 | out_tensor = torch.from_numpy(wav_out_stereo).to("cpu") 103 | 104 | return out_tensor.T, None 105 | -------------------------------------------------------------------------------- /core/conversions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various conversion methods 10 | """ 11 | 12 | import torch 13 | 14 | 15 | def db_to_lin(value: float) -> float: 16 | return 10 ** (value / 20) 17 | 18 | 19 | def lin_to_tb(value: float) -> torch.Tensor: 20 | return 20 * torch.log10(torch.abs(value) + 1.0e-24) 21 | 22 | 23 | def get_sign(value: float) -> torch.Tensor: 24 | sign = torch.sign(value) 25 | return sign 26 | -------------------------------------------------------------------------------- /core/filters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | utility functions for various filtering tasks 10 | """ 11 | 12 | import torch 13 | from scipy.signal import butter 14 | import torchaudio.functional as AF 15 | 16 | 17 | def anti_aliasing_iir_filter( 18 | audio: torch.Tensor, sample_rate: int, cutoff: float = 0.0, order: int = 4 19 | ) -> torch.Tensor: 20 | """ 21 | Apply an anti-aliasing IIR low-pass filter to the audio. 22 | 23 | Parameters: 24 | audio (Tensor): [channels, samples] audio signal. 25 | sr (int): Sample rate. 26 | cutoff (float): Cutoff frequency for the filter. Defaults to Nyquist limit (sr / 2). 27 | order (int): Order of the IIR filter. 28 | 29 | Returns: 30 | Tensor: Filtered audio signal. 31 | """ 32 | if cutoff == 0: 33 | cutoff = sample_rate / 2 # Default to Nyquist frequency 34 | nyquist = sample_rate / 2 35 | normalized_cutoff = cutoff / nyquist - 0.01 36 | 37 | # Design the Butterworth filter 38 | b, a = butter(order, normalized_cutoff, btype="low", output="ba") 39 | b = torch.tensor(b, dtype=audio.dtype, device=audio.device) 40 | a = torch.tensor(a, dtype=audio.dtype, device=audio.device) 41 | 42 | # Apply the filter 43 | filtered_audio = AF.lfilter(audio, b_coeffs=b, a_coeffs=a) 44 | 45 | return filtered_audio 46 | 47 | 48 | def band_stop_filter( 49 | audio: torch.Tensor, 50 | sample_rate: int, 51 | low_cut: float, 52 | high_cut: float, 53 | filter_order: int = 2, 54 | ) -> torch.Tensor: 55 | """ 56 | Apply a band-stop filter to attenuate lower mid frequencies. 57 | 58 | Parameters: 59 | audio (Tensor): [channels, samples] input audio signal. 60 | low_cut (float): Lower cutoff frequency of the band in Hz. 61 | high_cut (float): Upper cutoff frequency of the band in Hz. 62 | sr (int): Sample rate in Hz. 63 | filter_order (int): Order of the Butterworth filter. 64 | 65 | Returns: 66 | Tensor: Audio signal after band-stop filtering. 67 | """ 68 | nyquist = sample_rate / 2 69 | normalized_band = [low_cut / nyquist, high_cut / nyquist] 70 | 71 | # Design band-stop Butterworth filter 72 | b, a = butter(filter_order, normalized_band, btype="bandstop", analog=False) 73 | 74 | # Convert coefficients to Torch tensors 75 | b = torch.tensor(b, dtype=audio.dtype, device=audio.device) 76 | a = torch.tensor(a, dtype=audio.dtype, device=audio.device) 77 | 78 | # Ensure [channels, samples] format 79 | if audio.dim() == 1: 80 | audio = audio.unsqueeze(0) 81 | 82 | # Apply the filter using lfilter 83 | filtered_audio = AF.lfilter(audio, a_coeffs=a, b_coeffs=b, clamp=False) 84 | 85 | return filtered_audio.squeeze(0) if filtered_audio.size(0) == 1 else filtered_audio 86 | 87 | 88 | def low_pass_filter( 89 | audio: torch.Tensor, sampler_rate: int, cutoff_freq: float, filter_order: int = 4 90 | ) -> torch.Tensor: 91 | """ 92 | Apply a low-pass filter using a Butterworth filter. 93 | """ 94 | # Design Butterworth filter using SciPy 95 | nyquist = sampler_rate / 2 96 | normalized_cutoff = cutoff_freq / nyquist 97 | b, a = butter(filter_order, normalized_cutoff, btype="low", analog=False) 98 | 99 | # Convert coefficients to Torch tensors 100 | b = torch.tensor(b, dtype=audio.dtype, device=audio.device) 101 | a = torch.tensor(a, dtype=audio.dtype, device=audio.device) 102 | 103 | # Ensure [channels, samples] format 104 | if audio.dim() == 1: 105 | audio = audio.unsqueeze(0) 106 | 107 | # Apply the filter using torchaudio 108 | filtered_audio = AF.lfilter(audio, a_coeffs=a, b_coeffs=b, clamp=False) 109 | 110 | return filtered_audio.squeeze(0) if filtered_audio.size(0) == 1 else filtered_audio 111 | 112 | 113 | def butter_filter( 114 | audio: torch.Tensor, 115 | sample_rate: int, 116 | cutoff_freq: float, 117 | filter_type: str = "low", 118 | order: int = 4, 119 | ) -> torch.Tensor: 120 | """ 121 | Create and apply a Butterworth filter (low-pass or high-pass). 122 | 123 | Parameters: 124 | audio (Tensor): [channels, samples] input audio signal. 125 | cutoff_freq (float): Cutoff frequency in Hz. 126 | sr (int): Sample rate in Hz. 127 | filter_type (str): "low" for low-pass, "high" for high-pass. 128 | order (int): Filter order. 129 | 130 | Returns: 131 | Tensor: Filtered audio signal. 132 | """ 133 | nyquist = sample_rate / 2 134 | normalized_cutoff = cutoff_freq / nyquist 135 | b, a = butter(order, normalized_cutoff, btype=filter_type, analog=False) 136 | 137 | b = torch.tensor(b, dtype=audio.dtype, device=audio.device) 138 | a = torch.tensor(a, dtype=audio.dtype, device=audio.device) 139 | 140 | # Ensure [channels, samples] format 141 | if audio.dim() == 1: 142 | audio = audio.unsqueeze(0) 143 | 144 | filtered_audio = AF.lfilter(audio, a_coeffs=a, b_coeffs=b, clamp=False) 145 | return filtered_audio.squeeze(0) if filtered_audio.size(0) == 1 else filtered_audio 146 | 147 | 148 | def butter_low_pass( 149 | audio: torch.Tensor, sample_rate: int, cutoff_freq: float, order: int = 4 150 | ) -> torch.Tensor: 151 | return butter_filter( 152 | audio, sample_rate, cutoff_freq, filter_type="low", order=order 153 | ) 154 | 155 | 156 | def butter_high_pass( 157 | audio: torch.Tensor, sample_rate: int, cutoff_freq: int, order: int = 4 158 | ) -> torch.Tensor: 159 | return butter_filter( 160 | audio, sample_rate, cutoff_freq, filter_type="high", order=order 161 | ) 162 | -------------------------------------------------------------------------------- /core/harmonics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various plotting methods for debugging and visualization 10 | """ 11 | 12 | 13 | import torch 14 | import torchaudio 15 | import torchaudio.functional as F 16 | from typing import List 17 | 18 | 19 | def enhance_harmonics( 20 | audio: torch.Tensor, 21 | sample_rate: int, 22 | harmonics: List[int] = [1, 3, 5, 7, 9, 11], 23 | gain_db: float = 5, 24 | base_frequency: float = 0, 25 | Q: float = 0.707, 26 | ) -> torch.Tensor: 27 | 28 | pitch = F.detect_pitch_frequency(audio, sample_rate) 29 | base_frequency = pitch.mean().item() 30 | if base_frequency <= 0: # Fallback if pitch detection fails 31 | base_frequency = 440 # Use a default base frequency 32 | 33 | # Apply EQ boosts to specific harmonic frequencies 34 | for harmonic in harmonics: 35 | freq = base_frequency * harmonic 36 | if freq < sample_rate / 2: # Ensure it's within the Nyquist frequency 37 | audio = F.equalizer_biquad( 38 | audio, sample_rate, center_freq=freq, gain=gain_db, Q=Q 39 | ) 40 | 41 | return audio 42 | 43 | 44 | def enhance_harmonics2( 45 | audio: torch.Tensor, 46 | sample_rate: int, 47 | harmonics: list[int] = [1, 2, 3, 4, 5], 48 | gain_db: float = 5, 49 | base_frequency: float = 0, 50 | Q: float = 0.707, 51 | ) -> torch.Tensor: 52 | """ 53 | Enhance specified harmonics in an audio signal, emulating Distressor-like harmonic enhancement. 54 | Parameters: 55 | audio (Tensor): Input audio signal (1D or 2D [channels, samples]). 56 | sample_rate (int): Sampling rate of the audio. 57 | harmonics (list): List of harmonic multipliers to enhance. 58 | gain_db (float): Gain to apply to each harmonic. 59 | base_frequency (float, optional): Fundamental frequency. If None, it will be estimated. 60 | Q (float): Quality factor for the EQ bands. 61 | Returns: 62 | Tensor: Audio signal with enhanced harmonics. 63 | """ 64 | 65 | if base_frequency == 0: 66 | # Detect the pitch frequency using torchaudio's pitch detection 67 | pitch = F.detect_pitch_frequency(audio, sample_rate) 68 | base_frequency = pitch.mean().item() 69 | if base_frequency <= 0: # Fallback if pitch detection fails 70 | base_frequency = 440 # Default base frequency (A4) 71 | 72 | # Create a copy of the input signal for processing 73 | processed_audio: torch.Tensor = audio.clone() 74 | 75 | # Enhance harmonics using biquad EQ for precision 76 | for harmonic in harmonics: 77 | freq = base_frequency * harmonic 78 | if freq < sample_rate / 2: # Ensure frequency is within Nyquist limit 79 | processed_audio = F.equalizer_biquad( 80 | processed_audio, sample_rate, center_freq=freq, gain=gain_db, Q=Q 81 | ) 82 | 83 | # Apply a non-linear saturation for warmth and further harmonic enhancement 84 | def non_linear_saturation(audio: torch.Tensor, drive: float = 1.0) -> torch.Tensor: 85 | k = torch.tensor(1.0 + drive, dtype=audio.dtype, device=audio.device) 86 | return torch.tanh(k * audio) / torch.tanh(k) 87 | 88 | processed_audio = non_linear_saturation(processed_audio, drive=gain_db / 10.0) 89 | 90 | # Blend processed harmonics with the original signal 91 | output_audio = audio + processed_audio * (gain_db / 20.0) # Scale blend by gain 92 | return output_audio / torch.max(torch.abs(output_audio)) # Normalize output 93 | 94 | 95 | def batch_equalizer_biquad( 96 | audio: torch.Tensor, sample_rate: int, freqs: torch.Tensor, gain_db: float, Q: float 97 | ) -> torch.Tensor: 98 | """ 99 | Apply biquad filters to enhance multiple harmonics in a batch. 100 | Parameters: 101 | audio (Tensor): Input audio signal (1D or 2D [channels, samples]). 102 | sample_rate (int): Sampling rate of the audio. 103 | freqs (Tensor): Frequencies for biquad filters. 104 | gain_db (float): Gain to apply to each harmonic. 105 | Q (float): Quality factor for all filters. 106 | Returns: 107 | Tensor: Audio signal with harmonics enhanced. 108 | """ 109 | audio = audio.unsqueeze(0) if audio.dim() == 1 else audio 110 | 111 | # Precompute filter coefficients for all frequencies 112 | coeffs = [ 113 | torchaudio.functional.equalizer_biquad( 114 | audio, sample_rate, center_freq=f, gain=gain_db, Q=Q 115 | ) 116 | for f in freqs 117 | ] 118 | 119 | # Sum the filtered outputs for all harmonics 120 | filtered_audio = sum(coeffs) 121 | 122 | return filtered_audio 123 | 124 | 125 | def enhance_harmonics3( 126 | audio: torch.Tensor, 127 | sample_rate: int, 128 | harmonics: List[int] = [1, 2, 3, 4, 5], 129 | gain_db: float = 5.0, 130 | base_frequency: float = 0, 131 | Q: float = 0.707, 132 | ) -> torch.Tensor: 133 | """ 134 | Enhance specified harmonics in an audio signal, efficiently processing harmonics in a batch. 135 | Parameters: 136 | audio (Tensor): Input audio signal (1D or 2D [channels, samples]). 137 | sample_rate (int): Sampling rate of the audio. 138 | harmonics (list): List of harmonic multipliers to enhance. 139 | gain_db (float): Gain to apply to each harmonic. 140 | base_frequency (float, optional): Fundamental frequency. If None, it will be estimated. 141 | Q (float): Quality factor for the EQ bands. 142 | Returns: 143 | Tensor: Audio signal with enhanced harmonics. 144 | """ 145 | if base_frequency is None: 146 | # Detect the pitch frequency using torchaudio's pitch detection 147 | pitch = torchaudio.functional.detect_pitch_frequency(audio, sample_rate) 148 | base_frequency = pitch.mean().item() 149 | if base_frequency <= 0: # Fallback if pitch detection fails 150 | base_frequency = 440.0 # Default base frequency (A4) 151 | 152 | # Calculate harmonic frequencies 153 | harmonic_freqs = torch.tensor( 154 | [base_frequency * h for h in harmonics], device=audio.device 155 | ) 156 | 157 | # Ensure frequencies are within Nyquist limit 158 | harmonic_freqs = harmonic_freqs[harmonic_freqs < sample_rate / 2] 159 | 160 | # Apply batched harmonic enhancement 161 | processed_audio = batch_equalizer_biquad( 162 | audio, sample_rate, harmonic_freqs, gain_db, Q 163 | ) 164 | 165 | # Normalize the output 166 | return processed_audio / torch.max(torch.abs(processed_audio)) 167 | 168 | 169 | def enahnce_harmonics_23( 170 | audio: torch.Tensor, sample_rate: int, gain_db_base: int = 0, Q: float = 0.707 171 | ) -> torch.Tensor: 172 | 173 | audio = enhance_harmonics3( 174 | audio, sample_rate, harmonics=[2], gain_db=gain_db_base + 1, Q=0.303 175 | ) 176 | audio = enhance_harmonics3( 177 | audio, sample_rate, harmonics=[3], gain_db=gain_db_base + 3, Q=0.303 178 | ) 179 | 180 | return audio 181 | -------------------------------------------------------------------------------- /core/kernels/compressor/compressor.cu: -------------------------------------------------------------------------------- 1 | #ifndef M_PI 2 | #define M_PI 3.14159265358979323846 3 | #endif 4 | 5 | extern "C" __global__ 6 | void compexp_kernel( 7 | const double* wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1] 8 | double* wav_out, // Output audio signal 9 | const int n_channels, // Number of channels (e.g., 2 for stereo) 10 | const int n_samples, // Number of samples per channel 11 | const double comp, // Compression/expansion factor 12 | const double release, // Release time in ms 13 | const double attack, // Attack time in ms 14 | const double a, // Filter parameter < 1 15 | const double Fs // Sampling rate in Hz 16 | ) { 17 | int ch = blockIdx.x; // Each block processes one channel 18 | int thread_id = threadIdx.x; // Thread within the block 19 | int stride = blockDim.x; // Number of threads in the block 20 | 21 | if (ch >= n_channels) return; 22 | 23 | double attack_coeff = exp(-1.0 / (Fs * (attack * 1e-3))); 24 | double release_coeff = exp(-1.0 / (Fs * (release * 1e-3))); 25 | 26 | double h = 0.0; // Initialize filter state for envelope detection 27 | 28 | // Divide samples across threads in parallel 29 | for (int i = thread_id; i < n_samples; i += stride) { 30 | int sample_idx = i * n_channels + ch; 31 | double sample = wav_in[sample_idx]; 32 | 33 | // Envelope detection using attack/release dynamics 34 | double abs_sample = fabs(sample); 35 | if (abs_sample > h) { 36 | h = attack_coeff * (h - abs_sample) + abs_sample; 37 | } else { 38 | h = release_coeff * (h - abs_sample) + abs_sample; 39 | } 40 | 41 | // Apply compression/expansion 42 | double gain; 43 | if (comp > 0) { // Compression: attenuate higher envelope values 44 | gain = pow(h + 1e-8, -comp); 45 | } else { // Expansion: boost lower envelope values 46 | gain = pow(h + 1e-8, -comp); 47 | } 48 | 49 | // Scale output 50 | wav_out[sample_idx] = sample * gain; 51 | } 52 | } -------------------------------------------------------------------------------- /core/kernels/limiter/limiter_dev.cu: -------------------------------------------------------------------------------- 1 | #ifndef M_PI 2 | #define M_PI 3.14159265358979323846 3 | #endif 4 | 5 | extern "C" __global__ 6 | void limiter_kernel( 7 | const double*__restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1] 8 | double* __restrict__ wav_out, // Output audio signal 9 | double* __restrict__ debug_out, // Debug buffer [envelope, gain] 10 | const int n_channels, // Number of channels (e.g., 2 for stereo) 11 | const int n_samples, // Number of samples per channel 12 | double threshold, // Threshold in percents (0-100) 13 | double slope, // Slope angle in percents (0-100) 14 | const double sr, // Sample rate (samples/sec) 15 | double twnd, // Window time for RMS in ms 16 | double tatt, // Attack time in ms 17 | double trel // Release time in ms 18 | ) { 19 | // Only one thread handles the entire stereo pair 20 | int ch = blockIdx.x * blockDim.x + threadIdx.x; // Thread processes a single channel 21 | 22 | if (ch >= n_channels) return; 23 | 24 | double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3))); 25 | double release_coeff = exp(-1.0 / (sr * (trel * 1e-3))); 26 | double envelope = 0.00; 27 | //threshold = .55; 28 | //slope = 1.0; 29 | 30 | for (int i = 0; i < n_samples; ++i) { 31 | 32 | double sample = wav_in[i * n_channels + ch]; 33 | 34 | // Envelope tracking 35 | double abs_sample = fabs(sample); 36 | if (abs_sample > envelope) { 37 | envelope = attack_coeff * (envelope - abs_sample) + abs_sample; 38 | } else { 39 | envelope = release_coeff * (envelope - abs_sample) + abs_sample; 40 | } 41 | 42 | // Gain calculation 43 | double gain = 1.0; 44 | if (envelope > threshold) { 45 | gain = pow(10.0, -slope * (log10(envelope) - log10(threshold))); 46 | } 47 | // Upward compression below threshold 48 | double upward_compression_gain = 1.0; 49 | if (envelope < threshold && envelope > 0.0) { 50 | upward_compression_gain = pow(10.0, slope * (log10(threshold) - log10(envelope))); 51 | } 52 | // Apply gain 53 | wav_out[i * n_channels + ch] = sample * gain * upward_compression_gain; 54 | 55 | // Debugging output (envelope and gain) 56 | if (debug_out) { 57 | debug_out[i * n_channels + ch] = gain; 58 | } 59 | } 60 | } -------------------------------------------------------------------------------- /core/kernels/limiter/limiter_down_parallel.cu: -------------------------------------------------------------------------------- 1 | #ifndef M_PI 2 | #define M_PI 3.14159265358979323846 3 | #endif 4 | 5 | extern "C" __global__ 6 | void limiter_kernel( 7 | const double* __restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1] 8 | double* __restrict__ wav_out, // Output audio signal 9 | const int n_channels, // Number of channels (e.g., 2 for stereo) 10 | const int n_samples, // Number of samples per channel 11 | const double threshold, // Threshold in linear scale (e.g., 0.5 for 50%) 12 | const double slope, // Slope parameter for gain calculation 13 | const double sr, // Sample rate in Hz 14 | const double tatt, // Attack time in ms 15 | const double trel // Release time in ms 16 | ) { 17 | int ch = blockIdx.x; // Each block processes one channel 18 | int thread_id = threadIdx.x; // Thread within the block 19 | int stride = blockDim.x; // Number of threads in the block 20 | 21 | if (ch >= n_channels) return; 22 | 23 | double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3))); 24 | double release_coeff = exp(-1.0 / (sr * (trel * 1e-3))); 25 | 26 | // Use release time to determine precompute samples 27 | int precompute_samples = int((trel * 1e-3) * sr); // Convert release time to samples 28 | 29 | // Each thread computes its own range of samples 30 | int start_idx = thread_id * (n_samples / stride); 31 | int end_idx = (thread_id + 1) * (n_samples / stride); 32 | 33 | // Extend range backward for precomputing 34 | int precompute_start_idx = max(0, start_idx - precompute_samples); 35 | 36 | double envelope = 0.0; // Envelope tracking state 37 | 38 | // Precompute envelope for the extra range 39 | for (int i = precompute_start_idx; i < start_idx; ++i) { 40 | int sample_idx = i * n_channels + ch; 41 | double sample = wav_in[sample_idx]; 42 | double abs_sample = fabs(sample); 43 | 44 | if (abs_sample > envelope) { 45 | envelope = attack_coeff * (envelope - abs_sample) + abs_sample; 46 | } else { 47 | envelope = release_coeff * (envelope - abs_sample) + abs_sample; 48 | } 49 | } 50 | 51 | // Process assigned range of samples 52 | for (int i = start_idx; i < end_idx; ++i) { 53 | int sample_idx = i * n_channels + ch; 54 | double sample = wav_in[sample_idx]; 55 | double abs_sample = fabs(sample); 56 | 57 | // Envelope tracking 58 | if (abs_sample > envelope) { 59 | envelope = attack_coeff * (envelope - abs_sample) + abs_sample; 60 | } else { 61 | envelope = release_coeff * (envelope - abs_sample) + abs_sample; 62 | } 63 | 64 | // Gain calculation for downward limiting 65 | double gain = 1.0; 66 | if (envelope > threshold) { 67 | gain = pow(10.0, -slope * (log10(envelope) - log10(threshold))); 68 | } 69 | 70 | // Apply both gains 71 | wav_out[sample_idx] = sample * gain; 72 | } 73 | } -------------------------------------------------------------------------------- /core/kernels/limiter/limiter_hard_clipper.cu: -------------------------------------------------------------------------------- 1 | #ifndef M_PI 2 | #define M_PI 3.14159265358979323846 3 | #endif 4 | 5 | extern "C" __global__ 6 | void limiter_kernel( 7 | const double* __restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1] 8 | double* __restrict__ wav_out, // Output audio signal 9 | const int n_channels, // Number of channels (e.g., 2 for stereo) 10 | const int n_samples, // Number of samples per channel 11 | const double threshold, // Threshold in linear scale (e.g., 0.5 for 50%) 12 | const double slope, // Slope parameter for gain calculation 13 | const double sr, // Sample rate in Hz 14 | const double tatt, // Attack time in ms 15 | const double trel // Release time in ms 16 | ) 17 | { 18 | int ch = blockIdx.x; // Each block processes one channel 19 | int thread_id = threadIdx.x; // Thread within the block 20 | int stride = blockDim.x; // Number of threads in the block 21 | 22 | if (ch >= n_channels) return; 23 | 24 | int start_idx = thread_id * (n_samples / stride); 25 | int end_idx = (thread_id + 1) * (n_samples / stride); 26 | 27 | double clip_limit = threshold; 28 | double clip_limit_inv = 1.0 / clip_limit; 29 | 30 | for (int i = start_idx; i < end_idx; ++i) 31 | { 32 | int sample_idx = i * n_channels + ch; 33 | double y = wav_in[sample_idx]; 34 | 35 | double abs = fabs(y); 36 | if(abs >= clip_limit){ 37 | if (y < 0){ 38 | y = -clip_limit; 39 | }else{ 40 | y = clip_limit; 41 | } 42 | } 43 | wav_out[sample_idx] = y; 44 | } 45 | } -------------------------------------------------------------------------------- /core/kernels/limiter/limiter_soft_clipper.cu: -------------------------------------------------------------------------------- 1 | #ifndef M_PI 2 | #define M_PI 3.14159265358979323846 3 | #endif 4 | 5 | extern "C" __global__ 6 | void limiter_kernel( 7 | const double* __restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1] 8 | double* __restrict__ wav_out, // Output audio signal 9 | const int n_channels, // Number of channels (e.g., 2 for stereo) 10 | const int n_samples, // Number of samples per channel 11 | const double threshold, // Threshold in linear scale (e.g., 0.5 for 50%) 12 | const double slope, // Slope parameter for gain calculation 13 | const double sr, // Sample rate in Hz 14 | const double tatt, // Attack time in ms 15 | const double trel // Release time in ms 16 | ) 17 | { 18 | int ch = blockIdx.x; // Each block processes one channel 19 | int thread_id = threadIdx.x; // Thread within the block 20 | int stride = blockDim.x; // Number of threads in the block 21 | 22 | if (ch >= n_channels) return; 23 | 24 | int start_idx = thread_id * (n_samples / stride); 25 | int end_idx = (thread_id + 1) * (n_samples / stride); 26 | 27 | double clip_limit = threshold; 28 | 29 | for (int i = start_idx; i < end_idx; ++i){ 30 | int sample_idx = i * n_channels + ch; 31 | double y = wav_in[sample_idx]; 32 | 33 | // cubic soft clipping 34 | if (y <= -1.0) { 35 | y = -2.0 / 3.0; 36 | } else if (y >= 1.0) { 37 | y = 2.0 / 3.0; 38 | } else { 39 | y = y - (1.0 / 3.0) * y * y * y; 40 | } 41 | wav_out[sample_idx] = y; 42 | } 43 | } -------------------------------------------------------------------------------- /core/kernels/limiter/limiter_updown_parallel.cu: -------------------------------------------------------------------------------- 1 | #ifndef M_PI 2 | #define M_PI 3.14159265358979323846 3 | #endif 4 | 5 | extern "C" __global__ 6 | void limiter_kernel( 7 | const double* __restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1] 8 | double* __restrict__ wav_out, // Output audio signal 9 | const int n_channels, // Number of channels (e.g., 2 for stereo) 10 | const int n_samples, // Number of samples per channel 11 | const double threshold, // Threshold in linear scale (e.g., 0.5 for 50%) 12 | const double slope, // Slope parameter for gain calculation 13 | const double sr, // Sample rate in Hz 14 | const double tatt, // Attack time in ms 15 | const double trel // Release time in ms 16 | ) { 17 | int ch = blockIdx.x; // Each block processes one channel 18 | int thread_id = threadIdx.x; // Thread within the block 19 | int stride = blockDim.x; // Number of threads in the block 20 | 21 | if (ch >= n_channels) return; 22 | 23 | double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3))); 24 | double release_coeff = exp(-1.0 / (sr * (trel * 1e-3))); 25 | 26 | // Use release time to determine precompute samples 27 | int precompute_samples = int((trel * 1e-3) * sr); // Convert release time to samples 28 | 29 | // Each thread computes its own range of samples 30 | int start_idx = thread_id * (n_samples / stride); 31 | int end_idx = (thread_id + 1) * (n_samples / stride); 32 | 33 | // Extend range backward for precomputing 34 | int precompute_start_idx = max(0, start_idx - precompute_samples); 35 | 36 | double envelope = 0.0; // Envelope tracking state 37 | 38 | // Precompute envelope for the extra range 39 | for (int i = precompute_start_idx; i < start_idx; ++i) { 40 | int sample_idx = i * n_channels + ch; 41 | double sample = wav_in[sample_idx]; 42 | double abs_sample = fabs(sample); 43 | 44 | if (abs_sample > envelope) { 45 | envelope = attack_coeff * (envelope - abs_sample) + abs_sample; 46 | } else { 47 | envelope = release_coeff * (envelope - abs_sample) + abs_sample; 48 | } 49 | } 50 | 51 | // Process assigned range of samples 52 | for (int i = start_idx; i < end_idx; ++i) { 53 | int sample_idx = i * n_channels + ch; 54 | double sample = wav_in[sample_idx]; 55 | double abs_sample = fabs(sample); 56 | 57 | // Envelope tracking 58 | if (abs_sample > envelope) { 59 | envelope = attack_coeff * (envelope - abs_sample) + abs_sample; 60 | } else { 61 | envelope = release_coeff * (envelope - abs_sample) + abs_sample; 62 | } 63 | 64 | // Gain calculation for downward limiting 65 | double gain = 1.0; 66 | if (envelope > threshold) { 67 | gain = pow(10.0, -slope * (log10(envelope) - log10(threshold))); 68 | } 69 | 70 | // Upward compression below threshold 71 | double upward_compression_gain = 1.0; 72 | if (envelope < threshold && envelope > 0.0) { 73 | upward_compression_gain = pow(10.0, slope * (log10(threshold) - log10(envelope))); 74 | } 75 | 76 | // Apply both gains 77 | wav_out[sample_idx] = sample * gain * upward_compression_gain; 78 | } 79 | } -------------------------------------------------------------------------------- /core/kernels/limiter/limiter_updown_weighted_parallel.cu: -------------------------------------------------------------------------------- 1 | #ifndef M_PI 2 | #define M_PI 3.14159265358979323846 3 | #endif 4 | 5 | extern "C" __global__ 6 | void limiter_kernel( 7 | const double* __restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1] 8 | double* __restrict__ wav_out, // Output audio signal 9 | const int n_channels, // Number of channels (e.g., 2 for stereo) 10 | const int n_samples, // Number of samples per channel 11 | const double threshold, // Threshold in linear scale (e.g., 0.5 for 50%) 12 | const double slope, // Slope parameter for gain calculation 13 | const double sr, // Sample rate in Hz 14 | const double tatt, // Attack time in ms 15 | const double trel // Release time in ms 16 | ) 17 | { 18 | int ch = blockIdx.x; // Each block processes one channel 19 | int thread_id = threadIdx.x; // Thread within the block 20 | int stride = blockDim.x; // Number of threads in the block 21 | 22 | if (ch >= n_channels) return; 23 | 24 | double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3))); 25 | double release_coeff = exp(-1.0 / (sr * (trel * 1e-3))); 26 | 27 | // Use release time to determine precompute samples 28 | int precompute_samples = int((trel * 1e-3) * sr); // Convert release time to samples 29 | 30 | // Each thread computes its own range of samples 31 | int start_idx = thread_id * (n_samples / stride); 32 | int end_idx = (thread_id + 1) * (n_samples / stride); 33 | 34 | // Extend range backward for precomputing 35 | int precompute_start_idx = max(0, start_idx - precompute_samples); 36 | 37 | double envelope = 0.0; // Envelope tracking state 38 | 39 | // Precompute envelope for the extra range 40 | for (int i = precompute_start_idx; i < start_idx; ++i) { 41 | int sample_idx = i * n_channels + ch; 42 | double sample = wav_in[sample_idx]; 43 | double abs_sample = fabs(sample); 44 | 45 | if (abs_sample > envelope) { 46 | envelope = attack_coeff * (envelope - abs_sample) + abs_sample; 47 | } else { 48 | envelope = release_coeff * (envelope - abs_sample) + abs_sample; 49 | } 50 | } 51 | 52 | // Process assigned range of samples 53 | for (int i = start_idx; i < end_idx; ++i) { 54 | int sample_idx = i * n_channels + ch; 55 | double sample = wav_in[sample_idx]; 56 | double abs_sample = fabs(sample); 57 | 58 | // Envelope tracking 59 | if (abs_sample > envelope) { 60 | envelope = attack_coeff * (envelope - abs_sample) + abs_sample; 61 | } else { 62 | envelope = release_coeff * (envelope - abs_sample) + abs_sample; 63 | } 64 | 65 | // Gain calculation for downward limiting 66 | double gain = 1.0; 67 | if (envelope > threshold) { 68 | gain = pow(10.0, -slope * (log10(envelope) - log10(threshold))); 69 | } 70 | 71 | // Upward compression below threshold with gradual application 72 | double upward_compression_gain = 1.0; 73 | if (envelope < threshold && envelope > 0.0) { 74 | double t = 1.0 - threshold; // Scaling factor based on threshold 75 | // Dynamic factor increases as envelope decreases 76 | double dynamic_factor = (threshold - envelope) / threshold; 77 | // Clamp to [0, 1] 78 | dynamic_factor = fmin(fmax(dynamic_factor, 0.0), 1.0); 79 | upward_compression_gain = 1.0 + t * dynamic_factor * pow(10.0, slope * (log10(threshold) - log10(envelope))); 80 | } 81 | 82 | // Apply both gains 83 | wav_out[sample_idx] = sample * gain * upward_compression_gain; 84 | } 85 | } -------------------------------------------------------------------------------- /core/limiting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various conversion methods 10 | """ 11 | 12 | import cupy as cp 13 | import torch 14 | import numpy as np 15 | from typing import List 16 | 17 | from ..core.utilitiescuda import read_kernel_by_name 18 | 19 | limiter_updown_parallel = read_kernel_by_name( 20 | "limiter_updown_parallel", 21 | kernel_class="limiter", 22 | kernel_identifier="limiter_kernel", 23 | ) 24 | limiter_updown_weighted_parallel = read_kernel_by_name( 25 | "limiter_updown_weighted_parallel", 26 | kernel_class="limiter", 27 | kernel_identifier="limiter_kernel", 28 | ) 29 | limiter_down_parallel = read_kernel_by_name( 30 | "limiter_down_parallel", kernel_class="limiter", kernel_identifier="limiter_kernel" 31 | ) 32 | limiter_soft_clipper = read_kernel_by_name( 33 | "limiter_soft_clipper", kernel_class="limiter", kernel_identifier="limiter_kernel" 34 | ) 35 | limiter_hard_clipper = read_kernel_by_name( 36 | "limiter_hard_clipper", kernel_class="limiter", kernel_identifier="limiter_kernel" 37 | ) 38 | limiter_dev = read_kernel_by_name( 39 | "limiter_dev", kernel_class="limiter", kernel_identifier="limiter_kernel" 40 | ) 41 | 42 | _limiter_kernel_map = { 43 | "downward-upward": limiter_updown_weighted_parallel, 44 | "downward": limiter_down_parallel, 45 | "soft-clipper": limiter_soft_clipper, 46 | "hard-clipper": limiter_hard_clipper, 47 | } 48 | 49 | 50 | def limiter_get_modes() -> List[str]: 51 | return list(_limiter_kernel_map.keys()) 52 | 53 | 54 | def limiter( 55 | audio_in: torch.Tensor | np.ndarray, 56 | mode: str = "downward", 57 | sample_rate: int = 44100, 58 | threshold: float = 0.5, # Threshold in percents 59 | slope: float = 1.0, # Slope in percents 60 | attack_ms: float = 0.008, # Attack time in ms 61 | release_ms: float = 100.0, # Release time in ms 62 | ) -> torch.Tensor: 63 | """ 64 | Compresses stereo audio using an optimized CUDA kernel with running sum RMS calculation. 65 | 66 | Parameters: 67 | audio_in (torch.Tensor or np.ndarray): Input stereo audio signal with shape (n_samples, 2). 68 | sample_rate (int): Sampling rate in Hz. 69 | threshold (float): Threshold in percents (0-100). 70 | slope (float): Slope angle in percents (0-100). 71 | rms_window_ms (float): RMS window width in milliseconds. 72 | attack_ms (float): Attack time in milliseconds. 73 | release_ms (float): Release time in milliseconds. 74 | chunk_size (int): Number of samples per chunk. 75 | device (str): Device to place the output tensor ('cuda' or 'cpu'). 76 | 77 | Returns: 78 | torch.Tensor: Compressed stereo audio with shape (n_samples, 2). 79 | np.ndarray: Debug information (envelope and gain) with shape (n_samples, 2). 80 | """ 81 | # Convert input to CPU double-precision NumPy array if necessary 82 | 83 | device = audio_in.device 84 | 85 | audio_in = audio_in.T 86 | if isinstance(audio_in, torch.Tensor): 87 | audio_in = audio_in.detach().cpu() 88 | audio_in = audio_in.numpy() 89 | else: 90 | audio_in = np.asarray(audio_in, dtype=np.float64) 91 | 92 | # Ensure the audio is in shape (n_samples, 2) 93 | if audio_in.ndim != 2 or audio_in.shape[1] != 2: 94 | raise ValueError( 95 | f"Input audio must have shape (n_samples, 2), but got {audio_in.shape}" 96 | ) 97 | 98 | n_samples = audio_in.shape[0] 99 | n_channels = audio_in.shape[1] 100 | 101 | wav_in_flat = audio_in.flatten() 102 | 103 | wav_in_gpu = cp.asarray(wav_in_flat, dtype=cp.float64) 104 | wav_out_gpu = cp.zeros_like(wav_in_gpu) 105 | 106 | # Define grid and block dimensions 107 | block_size = 64 108 | grid_size = n_channels # (n_channels + block_size - 1) // block_size 109 | shared_mem_size = n_channels * cp.float64().nbytes # Shared memory for envelopes 110 | 111 | # attack_coeff = math.exp(-1.0 / (sample_rate * (attack_ms * 1e-3))) 112 | # release_coeff = math.exp(-1.0 / (sample_rate * (release_ms * 1e-3))) 113 | 114 | # print('attack_coeff',attack_coeff) 115 | # print('attack_coeff',release_coeff) 116 | 117 | if mode not in limiter_get_modes(): 118 | raise Exception(f"Limiter Kernel '{mode}' Not Found") 119 | 120 | kernel = _limiter_kernel_map[mode] 121 | 122 | kernel( 123 | (grid_size,), 124 | (block_size,), 125 | ( 126 | wav_in_gpu, 127 | wav_out_gpu, 128 | np.int32(n_channels), 129 | np.int32(n_samples), 130 | np.float64(threshold), 131 | np.float64(slope), 132 | np.float64(sample_rate), 133 | np.float64(attack_ms), 134 | np.float64(release_ms), 135 | ), 136 | shared_mem=shared_mem_size, 137 | ) 138 | 139 | # Retrieve results 140 | wav_out_host = wav_out_gpu.get().astype(np.float64) 141 | # Reshape the output 142 | wav_out_stereo = wav_out_host.reshape((n_samples, n_channels)) 143 | 144 | if device == "cuda": 145 | out_tensor = torch.from_numpy(wav_out_stereo).to(device) 146 | else: 147 | out_tensor = torch.from_numpy(wav_out_stereo).to("cpu") 148 | 149 | return out_tensor.T 150 | -------------------------------------------------------------------------------- /core/loudness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various normalization methods 10 | """ 11 | 12 | import torch 13 | import pyloudnorm as pyln # pip install pyloudnorm 14 | import numpy as np 15 | 16 | 17 | def rms_normalization(audio: torch.Tensor, target_rms: float = 0.1) -> torch.Tensor: 18 | rms = torch.sqrt(torch.mean(audio**2)) 19 | scaling_factor = target_rms / rms 20 | normalized_audio = audio * scaling_factor 21 | return normalized_audio 22 | 23 | 24 | def lufs_normalization( 25 | audio: torch.Tensor, sample_rate: int, target_lufs: float = -14.0 26 | ) -> torch.Tensor: 27 | meter = pyln.Meter(sample_rate) # Create a loudness meter 28 | 29 | __audio = audio.T.cpu().numpy() 30 | loudness = meter.integrated_loudness(__audio) # Current LUFS 31 | 32 | loudness_offset = target_lufs - loudness 33 | normalized_audio = __audio * (10 ** (loudness_offset / 20.0)) 34 | 35 | result = torch.from_numpy(normalized_audio) 36 | result = result.T 37 | 38 | result = result.to(device=audio.device, dtype=result.dtype) 39 | 40 | return result 41 | 42 | 43 | def peak_normalization(audio: torch.Tensor, target_peak: float = 0.9) -> torch.Tensor: 44 | peak = torch.max(torch.abs(audio)) 45 | scaling_factor = target_peak / peak 46 | normalized_audio = audio * scaling_factor 47 | return normalized_audio 48 | 49 | 50 | def get_loudness(audio: torch.Tensor, sample_rate: int) -> float: 51 | meter = pyln.Meter(sample_rate) # Create a loudness meter 52 | audio = audio.T 53 | audio = audio.cpu() 54 | loudness: float = float(meter.integrated_loudness(audio.numpy())) # Current LUFS 55 | return loudness 56 | 57 | 58 | def set_loudness2( 59 | audio_signal: torch.Tensor, sample_rate: int, target_loudness_db: float = -20.0 60 | ) -> torch.Tensor: 61 | """ 62 | Adjusts the loudness of the audio signal to a target level in decibels. 63 | 64 | Args: 65 | audio_signal (torch.Tensor): Input audio signal (channels, samples). 66 | sample_rate (int): Sample rate of the audio signal. 67 | target_loudness_db (float): Desired loudness in dB (e.g., -20.0 dB). 68 | 69 | Returns: 70 | torch.Tensor: Audio signal adjusted to the target loudness. 71 | """ 72 | # Convert PyTorch tensor to NumPy array for loudness calculation 73 | audio_np = audio_signal.cpu().numpy().T # Convert to [samples, channels] 74 | 75 | # Use pyloudnorm Meter to calculate and normalize loudness 76 | meter = pyln.Meter(sample_rate) # Create loudness meter 77 | current_loudness = meter.integrated_loudness(audio_np) # Measure LUFS 78 | 79 | # Compute loudness adjustment gain 80 | loudness_offset = target_loudness_db - current_loudness 81 | gain_factor = 10 ** (loudness_offset / 20.0) 82 | 83 | # Apply gain to adjust loudness 84 | adjusted_audio_np = audio_np * gain_factor 85 | 86 | # Convert back to PyTorch tensor 87 | adjusted_audio = torch.from_numpy(adjusted_audio_np.T).to( 88 | audio_signal.device, dtype=torch.float32 89 | ) 90 | 91 | return adjusted_audio 92 | 93 | 94 | def set_loudness( 95 | audio_signal: torch.Tensor, sample_rate: int, target_loudness_db: float = -20.0 96 | ) -> torch.Tensor: 97 | """ 98 | Adjusts the loudness of the audio signal to a target level in decibels, 99 | ensuring no clipping occurs. 100 | 101 | Args: 102 | audio_signal (torch.Tensor): Input audio signal (channels, samples). 103 | sample_rate (int): Sample rate of the audio signal. 104 | target_loudness_db (float): Desired loudness in dB (e.g., -20.0 dB). 105 | 106 | Returns: 107 | torch.Tensor: Audio signal adjusted to the target loudness. 108 | """ 109 | # Convert PyTorch tensor to NumPy array for loudness calculation 110 | audio_np = audio_signal.cpu().numpy().T # Convert to [samples, channels] 111 | 112 | # Use pyloudnorm Meter to calculate and normalize loudness 113 | meter = pyln.Meter(sample_rate) # Create loudness meter 114 | current_loudness = meter.integrated_loudness(audio_np) # Measure LUFS 115 | 116 | # Compute loudness adjustment gain 117 | loudness_offset = target_loudness_db - current_loudness 118 | gain_factor = 10 ** (loudness_offset / 20.0) 119 | 120 | # Apply gain to adjust loudness 121 | adjusted_audio_np = audio_np * gain_factor 122 | 123 | # Prevent clipping by normalizing the peak 124 | peak_amplitude = np.max(np.abs(adjusted_audio_np)) 125 | if peak_amplitude > 1.0: 126 | adjusted_audio_np = adjusted_audio_np / peak_amplitude 127 | 128 | # Convert back to PyTorch tensor 129 | adjusted_audio = torch.from_numpy(adjusted_audio_np.T).to( 130 | audio_signal.device, dtype=torch.float32 131 | ) 132 | 133 | return adjusted_audio 134 | 135 | 136 | def automatic_gain_control( 137 | audio: torch.Tensor, target_level: float = 0.7, alpha: float = 0.1 138 | ) -> torch.Tensor: 139 | current_level = torch.mean(torch.abs(audio)) 140 | gain = target_level / (current_level + 1e-6) 141 | smoothed_gain = alpha * gain + (1 - alpha) * 1.0 142 | agc_audio = audio * smoothed_gain 143 | return agc_audio 144 | -------------------------------------------------------------------------------- /core/mixing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | audio mixing and combining methods 10 | """ 11 | 12 | import torch 13 | 14 | 15 | def combine_audio_files( 16 | waveform_a: torch.Tensor, 17 | waveform_b: torch.Tensor, 18 | sample_rate: int, 19 | chunk_duration: float = 2.0, 20 | ) -> torch.Tensor: 21 | """ 22 | Combine two audio files by alternating 2-second chunks, cropping to the shorter audio. 23 | 24 | Args: 25 | waveform_a (torch.Tensor): Tensor of the first audio file (channels x samples). 26 | waveform_b (torch.Tensor): Tensor of the second audio file (channels x samples). 27 | sample_rate (int): Sample rate of the audio files. 28 | chunk_duration (float): Duration of each chunk in seconds (default is 2 seconds). 29 | 30 | Returns: 31 | torch.Tensor: Combined waveform. 32 | """ 33 | # Crop to the shorter length 34 | min_length = min(waveform_a.shape[1], waveform_b.shape[1]) 35 | waveform_a = waveform_a[:, :min_length] 36 | waveform_b = waveform_b[:, :min_length] 37 | 38 | # Calculate chunk size in samples 39 | chunk_size = int(chunk_duration * sample_rate) 40 | 41 | # Determine the total number of samples 42 | total_samples = waveform_a.shape[1] 43 | 44 | # Initialize the output waveform 45 | combined_waveform = [] 46 | 47 | # Alternate chunks between the two audio files 48 | for start in range(0, total_samples, chunk_size): 49 | end = min(start + chunk_size, total_samples) 50 | combined_waveform.append(waveform_a[:, start:end]) 51 | combined_waveform.append(waveform_b[:, start:end]) 52 | 53 | # Concatenate the combined waveform 54 | combined_waveform = torch.cat(combined_waveform, dim=1) 55 | 56 | return combined_waveform 57 | -------------------------------------------------------------------------------- /core/plotting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various plotting methods for debugging and visualization 10 | """ 11 | import numpy as np 12 | import torch 13 | import torchaudio 14 | from PIL import Image 15 | import matplotlib.pyplot as plt 16 | from typing import Tuple, List 17 | from matplotlib.figure import Figure 18 | 19 | 20 | def _figure_to_image(figure: Figure, dpi: int = 96) -> Image.Image: 21 | """Convert a Matplotlib figure to a high-resolution RGB PIL Image.""" 22 | figure.set_dpi(dpi) 23 | figure.canvas.draw() 24 | data = np.frombuffer(figure.canvas.tostring_argb(), dtype=np.uint8) 25 | width, height = figure.canvas.get_width_height() 26 | image = data.reshape((height, width, 4)) # ARGB format 27 | 28 | # Convert ARGB to RGB 29 | rgb_image = np.zeros((height, width, 3), dtype=np.uint8) 30 | rgb_image[:, :, 0] = image[:, :, 1] # Red 31 | rgb_image[:, :, 1] = image[:, :, 2] # Green 32 | rgb_image[:, :, 2] = image[:, :, 3] # Blue 33 | 34 | return Image.fromarray(rgb_image) 35 | 36 | 37 | def get_wave( 38 | waveform: torch.Tensor, 39 | sample_rate: int, 40 | title: str = "Waveform", 41 | xlim: int = 1000, 42 | ylim: int = 1000, 43 | ) -> Image: 44 | waveform = waveform.cpu().numpy() 45 | 46 | num_channels, num_frames = waveform.shape 47 | time_axis = torch.arange(0, num_frames) / sample_rate 48 | 49 | figure, axes = plt.subplots(num_channels, 1) 50 | if num_channels == 1: 51 | axes = [axes] 52 | for c in range(num_channels): 53 | axes[c].plot(time_axis, waveform[c], linewidth=1) 54 | axes[c].grid(True) 55 | 56 | if num_channels > 1: 57 | axes[c].set_ylabel(f"Channel {c+1}") 58 | if xlim: 59 | axes[c].set_xlim(xlim) 60 | if ylim: 61 | axes[c].set_ylim(ylim) 62 | 63 | figure.suptitle(title) 64 | 65 | waveform_image = _figure_to_image(figure) 66 | 67 | return waveform_image 68 | 69 | 70 | def get_spectogram( 71 | waveform: torch.Tensor, 72 | sample_rate: int, 73 | n_fft: int = 4096, 74 | n_mels: int = 512, 75 | title: str = "Spectrogram", 76 | xlim: int = 8192, 77 | dpi: int = 96, # Set a high DPI for better image resolution 78 | ) -> np.ndarray: 79 | """Generate and plot a high-resolution spectrogram from a waveform.""" 80 | 81 | # Parameters for Mel Spectrogram 82 | win_length = n_fft // 2 83 | hop_length = n_fft // 4 # Smaller hop for better time resolution 84 | 85 | spectrogram_transform = torchaudio.transforms.MelSpectrogram( 86 | sample_rate=sample_rate, 87 | n_fft=n_fft, 88 | win_length=win_length, 89 | hop_length=hop_length, 90 | center=True, 91 | pad_mode="reflect", 92 | normalized=True, 93 | power=2.0, # Using power spectrogram 94 | norm="slaney", 95 | n_mels=n_mels, 96 | mel_scale="slaney", 97 | ).to(waveform.device, dtype=waveform.dtype) 98 | 99 | # Compute spectrogram 100 | mel_spectrogram = spectrogram_transform(waveform).cpu() 101 | 102 | # Convert to decibel scale for better visualization 103 | spectrogram = torchaudio.transforms.AmplitudeToDB(top_db=80)( 104 | mel_spectrogram 105 | ).numpy() 106 | 107 | # Plot the spectrogram 108 | num_channels, _ = waveform.shape 109 | figure, axes = plt.subplots( 110 | num_channels, 1, figsize=(20, 10 * num_channels), squeeze=False, dpi=dpi 111 | ) 112 | figure.suptitle(title, fontsize=16) 113 | 114 | for i, ax in enumerate(axes[:, 0]): # Unpack axes 115 | ax.imshow( 116 | spectrogram[i], 117 | origin="lower", 118 | aspect="auto", 119 | extent=[0, xlim, 0, sample_rate / 2], 120 | cmap="magma", 121 | ) 122 | ax.set_title(f"Channel {i + 1}", fontsize=14) 123 | ax.set_xlabel("Time (frames)", fontsize=12) 124 | ax.set_ylabel("Frequency (Hz)", fontsize=12) 125 | ax.tick_params(axis="both", which="major", labelsize=10) 126 | 127 | plt.tight_layout(rect=[0, 0.03, 1, 0.95]) 128 | 129 | # Convert the figure to a high-resolution image 130 | waveform_image = _figure_to_image(figure, dpi=dpi) 131 | plt.close(figure) # Close the figure to free up memory 132 | return waveform_image 133 | 134 | 135 | def save_image(filepath: str, image: Image) -> Image: 136 | image.save(filepath, format="PNG", quality=95) 137 | 138 | 139 | def save_harmonic_spectrum( 140 | waveform: torch.Tensor, 141 | sample_rate: int, 142 | output_image: str, 143 | figsize: Tuple[int, int] = (12, 6), 144 | num_harmonics: int = 10, 145 | ) -> None: 146 | """ 147 | Generate the harmonic spectrum of a waveform and save it as an image. 148 | 149 | Parameters: 150 | waveform (torch.Tensor): Audio waveform tensor. 151 | sample_rate (int): Sample rate of the audio. 152 | output_image (str): Path to save the output image. 153 | figsize (tuple): Size of the output figure in inches. 154 | num_harmonics (int): Number of harmonics to calculate. 155 | 156 | Returns: 157 | None 158 | """ 159 | # Ensure mono audio (combine channels if necessary) 160 | if waveform.size(0) > 1: 161 | waveform = torch.mean(waveform, dim=0, keepdim=True) 162 | 163 | # Perform FFT to get the frequency domain 164 | fft = torch.fft.fft(waveform).to(device=waveform.device) 165 | magnitude = torch.abs(fft[0]) # Magnitude of the FFT 166 | frequencies = torch.fft.fftfreq(waveform.size(1), d=1 / sample_rate).to( 167 | device=waveform.device 168 | ) 169 | 170 | # Extract the fundamental frequency 171 | fundamental_idx = torch.argmax(magnitude[: len(magnitude) // 2]) 172 | fundamental_freq = frequencies[fundamental_idx] 173 | 174 | # Calculate harmonic frequencies 175 | harmonic_frequencies = [fundamental_freq * (i + 1) for i in range(num_harmonics)] 176 | harmonic_amplitudes = [ 177 | magnitude[int(harmonic / sample_rate * len(magnitude))] 178 | for harmonic in harmonic_frequencies 179 | ] 180 | harmonic_frequencies = torch.tensor(harmonic_frequencies).cpu().numpy() 181 | harmonic_amplitudes = torch.tensor(harmonic_amplitudes).cpu().numpy() 182 | 183 | # Plot the harmonic spectrum 184 | plt.figure(figsize=figsize) 185 | plt.plot(harmonic_frequencies, harmonic_amplitudes, color="blue", linewidth=2) 186 | plt.title("Harmonic Spectrum") 187 | plt.xlabel("Frequency (Hz)") 188 | plt.ylabel("Amplitude") 189 | plt.grid(True) 190 | 191 | # Save the image 192 | plt.tight_layout() 193 | plt.savefig(output_image, dpi=300) 194 | plt.close() 195 | 196 | 197 | PREDEFINED_COLORS = [ 198 | "#1f77b4", 199 | "#ff7f0e", 200 | "#2ca02c", 201 | "#d62728", 202 | "#9467bd", 203 | "#8c564b", 204 | "#e377c2", 205 | "#7f7f7f", 206 | "#bcbd22", 207 | "#17becf", 208 | ] 209 | 210 | 211 | def plot_multiple_harmonic_spectra( 212 | audio_data: List[Tuple[torch.Tensor, int, str]], 213 | output_image: str, 214 | figsize: Tuple[int, int] = (12, 6), 215 | num_harmonics: int = 16, 216 | upper_bound: int = 140000, 217 | title: str = "Harmonic Spectrum", 218 | ) -> None: 219 | """ 220 | Optimized: Plot high-resolution harmonic spectra for multiple audio waveforms. 221 | 222 | Parameters: 223 | audio_data (list): List of (waveform, sample_rate, label) tuples. 224 | output_image (str): Path to save the combined output image. 225 | figsize (tuple): Size of the figure. 226 | num_harmonics (int): Number of harmonics to calculate. 227 | upper_bound (int): Maximum frequency to display. 228 | title (str): Title of the plot. 229 | """ 230 | plt.figure(figsize=figsize) 231 | 232 | for idx, (waveform, sample_rate, label) in enumerate(audio_data): 233 | # Ensure mono audio 234 | if waveform.size(0) > 1: 235 | waveform = torch.mean(waveform, dim=0) 236 | 237 | # Move waveform to GPU for efficient computation 238 | device = ( 239 | waveform.device 240 | if waveform.is_cuda 241 | else "cuda" if torch.cuda.is_available() else "cpu" 242 | ) 243 | waveform = waveform.to(device) 244 | 245 | # High-resolution FFT (Zero-padding for better frequency resolution) 246 | n_fft = 1 * waveform.size(0) # Zero-padding factor of 4 247 | fft = torch.fft.fft(waveform, n=n_fft).to(device=waveform.device) 248 | magnitude = torch.abs(fft[: n_fft // 2]) 249 | frequencies = torch.fft.fftfreq(n_fft, d=1 / sample_rate)[: n_fft // 2] 250 | 251 | # Find fundamental frequency 252 | fundamental_idx = torch.argmax( 253 | magnitude[: len(magnitude) // 4] 254 | ) # Search in the first quarter 255 | fundamental_freq = frequencies[fundamental_idx] 256 | 257 | # Precompute harmonic frequencies 258 | harmonic_freqs = fundamental_freq * torch.arange( 259 | 1, num_harmonics + 1, device=device 260 | ) 261 | harmonic_indices = (harmonic_freqs / (sample_rate / n_fft)).long() 262 | harmonic_amplitudes = magnitude[harmonic_indices].cpu().numpy() 263 | 264 | # Limit harmonic frequencies to upper bound 265 | harmonic_freqs = harmonic_freqs[harmonic_freqs <= upper_bound].cpu().numpy() 266 | harmonic_amplitudes = harmonic_amplitudes[: len(harmonic_freqs)] 267 | 268 | # Convert data to CPU for plotting 269 | frequencies_cpu = frequencies.cpu().numpy() 270 | magnitude_cpu = magnitude.cpu().numpy() 271 | 272 | # Plot the full spectrum 273 | color = PREDEFINED_COLORS[idx % len(PREDEFINED_COLORS)] 274 | plt.plot( 275 | frequencies_cpu, 276 | magnitude_cpu, 277 | color=color, 278 | alpha=0.4, 279 | label=f"Full Spectrum ({label})", 280 | ) 281 | 282 | # Overlay harmonic peaks 283 | plt.vlines( 284 | harmonic_freqs, 285 | ymin=0, 286 | ymax=harmonic_amplitudes, 287 | color=color, 288 | linewidth=1.5, 289 | linestyle="--", 290 | label=f"Harmonics ({label})", 291 | ) 292 | 293 | plt.title(title) 294 | plt.xscale("log") 295 | plt.xlim(20, upper_bound) 296 | plt.xlabel("Frequency (Hz)") 297 | plt.ylabel("Amplitude") 298 | plt.legend() 299 | plt.grid(True, which="both", linestyle="--", linewidth=0.5) 300 | 301 | # Save and close the figure 302 | plt.tight_layout() 303 | plt.savefig(output_image, dpi=150) 304 | plt.close() 305 | -------------------------------------------------------------------------------- /core/sampling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | utility functions for sampling 10 | """ 11 | 12 | import torch 13 | import torchaudio 14 | 15 | 16 | def oversample( 17 | audio_signal: torch.Tensor, sample_rate: int, factor: int = 8 18 | ) -> torch.Tensor: 19 | 20 | resampler = torchaudio.transforms.Resample(sample_rate, sample_rate * factor).to( 21 | device=audio_signal.device, dtype=audio_signal.dtype 22 | ) 23 | return resampler(audio_signal), sample_rate * factor 24 | 25 | 26 | def downsample( 27 | audio_signal: torch.Tensor, sample_rate: int, factor: int = 8 28 | ) -> torch.Tensor: 29 | 30 | resampler = torchaudio.transforms.Resample(sample_rate, sample_rate // factor).to( 31 | device=audio_signal.device, dtype=audio_signal.dtype 32 | ) 33 | return resampler(audio_signal) 34 | -------------------------------------------------------------------------------- /core/saturation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various conversion methods 10 | """ 11 | 12 | import torch 13 | import torch.nn as nn 14 | from torch.nn import Module 15 | from ..core.sampling import oversample, downsample 16 | from ..core.harmonics import enahnce_harmonics_23 17 | 18 | from typing import List 19 | 20 | 21 | def sigmoid_saturation(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor: 22 | """ 23 | Apply sigmoid saturation with drive control. 24 | Parameters: 25 | audio: [channels, samples] input audio signal in the range [-1, 1]. 26 | drive: Controls the steepness of the sigmoid in range [0, 100.0]. 27 | Returns: 28 | Saturated audio signal. 29 | """ 30 | k = 0.1 + (drive / 100.0) * 10 # Map drive to steepness control 31 | normalized = (audio + 1) / 2 32 | saturated = 1 / (1 + torch.exp(-k * (normalized - 0.5))) 33 | return 2 * saturated - 1 34 | 35 | 36 | def tanh_saturation(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor: 37 | """ 38 | Apply tanh saturation with drive control. 39 | Parameters: 40 | audio: Input audio signal (Tensor). 41 | drive: Controls the strength of tanh effect in range [0, 100.0]. 42 | Returns: 43 | Saturated audio signal. 44 | """ 45 | 46 | audio = audio / torch.max(torch.abs(audio)) 47 | k = 0.1 + (drive / 100.0) * 10 # Map drive to scaling factor 48 | k_tensor = torch.tensor( 49 | k, dtype=audio.dtype, device=audio.device 50 | ) # Convert k to Tensor 51 | return torch.tanh( 52 | k_tensor * audio 53 | ) # / torch.tanh(k_tensor) # Normalize output range 54 | 55 | 56 | def poly_saturation(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor: 57 | """ 58 | Apply cubic polynomial saturation with drive control. 59 | Parameters: 60 | audio: Input audio signal. 61 | drive: Controls the strength of the cubic term in range [0, 100.0]. 62 | Returns: 63 | Saturated audio signal. 64 | """ 65 | c3 = 0.01 + (drive / 100.0) * 0.3 # Map drive to nonlinearity strength 66 | return audio - c3 * audio**3 67 | 68 | 69 | def logarithmic_mapping(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor: 70 | """ 71 | Apply logarithmic mapping with drive control. 72 | Parameters: 73 | audio: Input audio signal. 74 | drive: Controls the scaling of the logarithmic mapping. 75 | Returns: 76 | Saturated audio signal. 77 | """ 78 | max_value = 0.1 + (drive / 100.0) * 10 # Map drive to maximum scaling 79 | return ( 80 | torch.sign(audio) 81 | * torch.log1p(torch.abs(audio * max_value)) 82 | / torch.log1p(torch.tensor(max_value)) 83 | ) 84 | 85 | 86 | class Saturator(Module): 87 | @staticmethod 88 | def get_modes() -> List[str]: 89 | return ["poly", "soft", "tanh", "sig", "log"] 90 | 91 | def __init__( 92 | self, 93 | drive: float = 0.5, 94 | order: int = 3, 95 | sample_rate: int = 48000, 96 | mode: str = "poly", 97 | oversample_factor: int = 4, 98 | ): 99 | super(Saturator, self).__init__() 100 | self.order: int = order 101 | self.sample_rate: int = sample_rate 102 | self.mode: str = mode 103 | self.drive: float = drive 104 | self.oversample_factor: int = oversample_factor 105 | self.harmonics_level: int = 0 106 | 107 | order = order # third order polynomial approximation 108 | 109 | # Input scaling and output gain (adjust as needed) 110 | self.input_scale = nn.Parameter(torch.tensor(1.0, dtype=torch.float32)) 111 | self.output_gain = nn.Parameter(torch.tensor(1.0, dtype=torch.float32)) 112 | 113 | def forward(self, x: torch.Tensor) -> torch.Tensor: 114 | 115 | y, sample_rate = oversample(x, self.sample_rate, factor=self.oversample_factor) 116 | 117 | y = enahnce_harmonics_23(y, sample_rate, gain_db_base=self.harmonics_level) 118 | 119 | if self.mode == "poly": 120 | y = poly_saturation(y, drive=self.drive) 121 | elif self.mode == "tanh": 122 | y = tanh_saturation(y, drive=self.drive) 123 | elif self.mode == "sig": 124 | y = sigmoid_saturation(y, drive=self.drive) 125 | elif self.mode == "log": 126 | y = logarithmic_mapping(y, drive=self.drive) 127 | 128 | y = downsample(y, sample_rate, factor=self.oversample_factor) 129 | 130 | return y 131 | 132 | 133 | def saturator_get_modes() -> List[str]: 134 | return ["poly", "tanh", "sig", "log"] 135 | 136 | 137 | def saturator( 138 | audio_in: torch.Tensor, 139 | mode: str = "poly", 140 | sample_rate: int = 44100, 141 | drive: float = 1.5, # Removed lookahead 142 | oversample_factor: int = 4, 143 | harmonics_level: float = 1.2, 144 | ) -> torch.Tensor: 145 | y = audio_in.clone() 146 | # loudness = get_loudness(audio_in, sample_rate) 147 | # y = automatic_gain_control(audio_in) 148 | # y = y*drive_pre 149 | y, _sample_rate = oversample(y, sample_rate, factor=oversample_factor) 150 | 151 | # y = enahnce_harmonics_23(y, _sample_rate, gain_db_base=harmonics_level) 152 | 153 | if mode == "poly": 154 | drive = drive * 2 155 | y = poly_saturation(y, drive=drive) 156 | elif mode == "tanh": 157 | drive = drive / 3 158 | y = tanh_saturation(y, drive=drive) 159 | elif mode == "sig": 160 | y = sigmoid_saturation(y, drive=drive) 161 | elif mode == "log": 162 | y = logarithmic_mapping(y, drive=drive) 163 | 164 | y = downsample(y, sample_rate, factor=oversample_factor) 165 | # y = lufs_normalization(y, sample_rate, loudness) 166 | 167 | return y 168 | -------------------------------------------------------------------------------- /core/tests.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | # Reusable constants 4 | DATA_ROOT = Path("ComfyUI_SignalProcessing/audio") 5 | 6 | 7 | def get_output_file_path(output_root: Path, test_name: str, mode: str) -> Path: 8 | """Generate output file path for a given test.""" 9 | return output_root / f"{test_name}-{mode}.wav" 10 | -------------------------------------------------------------------------------- /core/utilities.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various development utilities 10 | """ 11 | 12 | import os 13 | import sys 14 | import re 15 | 16 | # Define the regex 17 | pattern = r"^ComfyUI-\d+\.\d+\.\d+$" 18 | 19 | this_file_directory = os.path.join(os.path.dirname(os.path.realpath(__file__))) 20 | 21 | 22 | def find_comfy_root() -> str: 23 | path = os.fspath(this_file_directory) 24 | 25 | if isinstance(this_file_directory, bytes): 26 | sep = b"/" 27 | else: 28 | sep = "/" 29 | tokens = path.split(sep) 30 | while not re.match(pattern, tokens[-1]): 31 | tokens.pop(-1) 32 | 33 | path = "/".join(tokens) 34 | return path 35 | 36 | 37 | # add comfy to path for local devepment only 38 | # find comfy root by going upwards hoping it match regex 39 | # export coffy_local_dev=1 40 | def comfy_root_to_syspath() -> None: 41 | try: 42 | if os.environ["coffy_local_dev"] == "1": 43 | pass 44 | else: 45 | raise (Exception()) 46 | except Exception: 47 | return 48 | 49 | path = find_comfy_root() 50 | if path not in sys.path: 51 | sys.path.insert(0, path) 52 | -------------------------------------------------------------------------------- /core/utilitiescuda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | various conversion methods 10 | """ 11 | 12 | import os 13 | import cupy as cp 14 | 15 | 16 | def read_kernel_by_name( 17 | kernel_name: str, 18 | kernel_class: str = "limiter", 19 | kernel_identifier: str = "limiter_kernel", 20 | ) -> cp.RawKernel: 21 | this_directory = os.path.dirname(os.path.abspath(__file__)) 22 | kernel_relativepath = f"kernels/{kernel_class}/{kernel_name}.cu" 23 | kernel_filepath = os.path.join(this_directory, kernel_relativepath) 24 | print(f"Loading CUDA kernel... {kernel_relativepath}") 25 | 26 | if not os.path.exists(kernel_filepath): 27 | raise FileNotFoundError(f"Kernel file not found: {kernel_filepath}") 28 | 29 | with open(kernel_filepath, "r", encoding="utf-8") as file: # Open as text 30 | code = file.read() # Read kernel source code as string 31 | # Pass code to RawKernel 32 | return cp.RawKernel(code=code, name=kernel_identifier, backend="nvrtc") 33 | -------------------------------------------------------------------------------- /data/widener/init_vn_filters.txt: -------------------------------------------------------------------------------- 1 | 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0580 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.5971 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4181 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.2201 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0441 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0358 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0774 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0320 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0071 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0097 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0019 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0012 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0032 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 2 | -1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9928 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.5530 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4268 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0525 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1981 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1238 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0155 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0339 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0053 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0042 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0014 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0036 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0026 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 3 | -------------------------------------------------------------------------------- /data/widener/opt_vn_filters.txt: -------------------------------------------------------------------------------- 1 | 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0580 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.5971 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4181 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.2201 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0441 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0358 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0774 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0320 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0071 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0097 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0019 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0012 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0032 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 2 | -1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9928 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.5530 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4268 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0525 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1981 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1238 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0155 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0339 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0053 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0042 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0014 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0036 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0026 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 3 | -------------------------------------------------------------------------------- /effects/SignalProcessingConvolutionReverb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Convolution Reverb 10 | """ 11 | 12 | import os 13 | import torch 14 | import torchaudio 15 | import torch.nn.functional as F 16 | 17 | from typing import Dict, Tuple, Union 18 | 19 | from ..core.utilities import comfy_root_to_syspath 20 | from ..core.io import audio_from_comfy_2d, audio_to_comfy_3d, from_disk_as_raw_2d 21 | from ..core.loudness import lufs_normalization, get_loudness 22 | import folder_paths 23 | 24 | comfy_root_to_syspath() # add comfy to sys path for dev 25 | 26 | 27 | class SignalProcessingConvolutionReverb: 28 | supported_formats = [".wav", ".mp3", ".ogg", ".m4a", ".flac", ".mp4"] 29 | this_directory = os.path.dirname(os.path.realpath(__file__)) 30 | ir_directory = os.path.join(os.path.split(this_directory)[0], "audio", "ir") 31 | 32 | @classmethod 33 | def INPUT_TYPES(s) -> Dict[str, torch.Tensor]: 34 | 35 | files, _ = folder_paths.recursive_search( 36 | SignalProcessingConvolutionReverb.ir_directory 37 | ) 38 | 39 | ir_files = [] 40 | for file in files: 41 | try: 42 | _, ext = os.path.splitext(file) 43 | if ext in SignalProcessingConvolutionReverb.supported_formats: 44 | ir_files.append(file) 45 | except Exception: 46 | pass 47 | 48 | return { 49 | "required": { 50 | "impulse_response": (sorted(ir_files),), 51 | "audio_input": ("AUDIO",), 52 | "wet_dry": ( 53 | "FLOAT", 54 | { 55 | "default": 0.5, 56 | "min": 0.0, 57 | "max": 1.0, 58 | "step": 0.01, 59 | }, 60 | ), 61 | }, 62 | } 63 | 64 | RETURN_TYPES = ("AUDIO",) 65 | RETURN_NAMES = ("audio",) 66 | CATEGORY = "Signal Processing" 67 | FUNCTION = "process" 68 | 69 | def process( 70 | self, 71 | impulse_response: str, 72 | audio_input: Dict[str, Union[torch.Tensor, int]], 73 | wet_dry: float, 74 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 75 | 76 | try_gpu: bool = True 77 | repeat: bool = True 78 | 79 | waveform, sample_rate = audio_from_comfy_2d( 80 | audio_input, repeat=repeat, try_gpu=try_gpu 81 | ) 82 | 83 | loudness = get_loudness(waveform, sample_rate) 84 | 85 | it_filepath = os.path.join( 86 | SignalProcessingConvolutionReverb.ir_directory, impulse_response 87 | ) 88 | 89 | ir, ir_sr = from_disk_as_raw_2d(it_filepath, repeat=repeat, try_gpu=try_gpu) 90 | 91 | # Resample IR if sampling rates do not match 92 | if ir_sr != sample_rate: 93 | resampler = torchaudio.transforms.Resample( 94 | orig_freq=ir_sr, new_freq=sample_rate 95 | ).to(ir.device, dtype=waveform.dtype) 96 | ir = resampler(ir) 97 | ir_sr = sample_rate 98 | 99 | # if wave is mono and ir is not mono 100 | if waveform.shape[0] == 1 and ir.shape[0] == 2: 101 | ir = ir.mean(dim=0, keepdim=True) 102 | if waveform.shape[0] == 2 and ir.shape[0] == 1: 103 | ir = ir.repeat(2, 1) 104 | 105 | processed_audio = self.apply_reverb(waveform, sample_rate, ir, wet_dry=wet_dry) 106 | processed_audio = lufs_normalization(processed_audio, sample_rate, loudness) 107 | 108 | return audio_to_comfy_3d(processed_audio, sample_rate, cpu=True) 109 | 110 | def apply_reverb( 111 | self, audio: torch.Tensor, sr: int, ir: torch.Tensor, wet_dry: float = 0.5 112 | ) -> torch.Tensor: 113 | 114 | num_audio_channels, audio_length = audio.shape 115 | num_ir_channels, ir_length = ir.shape 116 | 117 | # Normalize IR to prevent amplification 118 | ir = ir / torch.max(torch.abs(ir)) if torch.max(torch.abs(ir)) > 0 else ir 119 | 120 | # Initialize list to hold processed channels 121 | processed_channels = [] 122 | 123 | # Apply convolution per channel 124 | for channel in range(num_audio_channels): 125 | # Get the current audio and IR channel 126 | audio_channel = audio[channel].unsqueeze(0).unsqueeze(0) # Shape: [1, 1, N] 127 | ir_channel = ( 128 | ir[channel].flip(0).unsqueeze(0).unsqueeze(0) 129 | ) # Reverse IR, Shape: [1, 1, M] 130 | 131 | # Perform convolution 132 | convolved = F.conv1d( 133 | audio_channel, ir_channel, padding=ir_length - 1 134 | ) # Shape: [1, 1, N + M -1] 135 | 136 | # Remove batch and channel dimensions 137 | convolved = convolved.squeeze(0).squeeze(0) # Shape: [N + M -1] 138 | 139 | # Trim convolved signal to original audio length 140 | convolved = convolved[:audio_length] 141 | 142 | # Normalize convolved signal to prevent clipping 143 | max_val = torch.max(torch.abs(convolved)) 144 | if max_val > 0: 145 | convolved = convolved / max_val 146 | 147 | # Apply wet/dry mix 148 | dry = 1 - wet_dry 149 | wet = wet_dry 150 | processed = dry * audio[channel] + wet * convolved 151 | 152 | # Prevent clipping by normalizing if necessary 153 | processed_max = torch.max(torch.abs(processed)) 154 | if processed_max > 1.0: 155 | processed = processed / processed_max 156 | 157 | # Append processed channel 158 | processed_channels.append(processed) 159 | 160 | # Stack channels back into a tensor 161 | processed_audio = torch.stack(processed_channels) # Shape: [2, N] 162 | 163 | return processed_audio 164 | -------------------------------------------------------------------------------- /effects/SignalProcessingPaulStretch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | This is a port of Paul's Extreme Sound Stretch (Paulstretch) - by Nasca Octavian PAUL 10 | http://www.paulnasca.com/ 11 | http://hypermammut.sourceforge.net/paulstretch/ 12 | https://github.com/paulnasca/paulstretch_python 13 | https://github.com/paulnasca/paulstretch_python/blob/master/paulstretch_stereo.py 14 | """ 15 | 16 | import torch 17 | 18 | import math 19 | from typing import Tuple, Dict, Any, Union 20 | 21 | from ..core.utilities import comfy_root_to_syspath 22 | from ..core.io import audio_from_comfy_2d, audio_to_comfy_3d 23 | from ..core.loudness import lufs_normalization, get_loudness 24 | 25 | comfy_root_to_syspath() # add comfy to sys path for dev 26 | 27 | 28 | class SignalProcessingPaulStretch: 29 | @classmethod 30 | def INPUT_TYPES(cls) -> Dict[str, Any]: 31 | return { 32 | "required": { 33 | "audio_input": ("AUDIO", {"forceInput": True}), 34 | "stretch_factor": ( 35 | "FLOAT", 36 | {"default": 8.0, "min": 1.0, "max": 100.0, "step": 0.1}, 37 | ), 38 | "window_size_seconds": ( 39 | "FLOAT", 40 | {"default": 0.25, "min": 0.05, "max": 10.0, "step": 0.05}, 41 | ), 42 | } 43 | } 44 | 45 | RETURN_TYPES = ("AUDIO",) 46 | RETURN_NAMES = ("audio",) 47 | CATEGORY = "Signal Processing" 48 | FUNCTION = "process" 49 | 50 | def process( 51 | self, 52 | audio_input: Dict[str, Union[torch.Tensor, int]], 53 | stretch_factor: float, 54 | window_size_seconds: float, 55 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 56 | 57 | # Conditional processing: If stretch_factor is 1.0, return original audio 58 | if stretch_factor == 1.0: 59 | return audio_to_comfy_3d( 60 | audio_input["waveform"], audio_input["sample_rate"] 61 | ) 62 | 63 | # Extract waveform and sample_rate 64 | waveform, sample_rate = audio_from_comfy_2d( 65 | audio_input, repeat=True, try_gpu=True 66 | ) 67 | loudness = get_loudness(waveform, sample_rate) 68 | 69 | nchannels, nsamples = waveform.shape 70 | 71 | # Optimize window size to be divisible by 2, 3, and 5 72 | window_size = int(window_size_seconds * sample_rate) 73 | if window_size < 16: 74 | window_size = 16 75 | window_size = self.optimize_windowsize(window_size) 76 | window_size = int(window_size / 2) * 2 # Ensure even window size 77 | half_window_size = int(window_size / 2) 78 | 79 | # Correct the end of the waveform by applying a fade-out 80 | end_size = int(sample_rate * 0.05) 81 | if end_size < 16: 82 | end_size = 16 83 | fade_out = torch.linspace( 84 | 1.0, 0.0, end_size, device=waveform.device, dtype=waveform.dtype 85 | ) 86 | waveform[:, -end_size:] = waveform[:, -end_size:] * fade_out 87 | 88 | # Compute displacement 89 | start_pos = 0.0 90 | displace_pos = (window_size * 0.5) / stretch_factor 91 | 92 | # Create custom window function as in original code 93 | window = torch.pow( 94 | 1.0 95 | - torch.pow( 96 | torch.linspace( 97 | -1.0, 1.0, window_size, device=waveform.device, dtype=waveform.dtype 98 | ), 99 | 2.0, 100 | ), 101 | 1.25, 102 | ) 103 | 104 | # Initialize old windowed buffer 105 | old_windowed_buf = torch.zeros( 106 | (nchannels, window_size), device=waveform.device, dtype=waveform.dtype 107 | ) 108 | 109 | # Initialize list to store output frames 110 | output_frames = [] 111 | 112 | # Processing loop 113 | frame_count = 0 114 | while True: 115 | # Get the windowed buffer 116 | istart_pos = int(math.floor(start_pos)) 117 | buf = waveform[:, istart_pos : istart_pos + window_size] 118 | if buf.shape[1] < window_size: 119 | padding = window_size - buf.shape[1] 120 | buf = torch.nn.functional.pad(buf, (0, padding), "constant", 0.0) 121 | buf = buf * window 122 | 123 | # FFT: Real FFT since the input is real 124 | freqs = torch.fft.rfft(buf, dim=1) 125 | 126 | # Get amplitudes and randomize phases 127 | amplitudes = freqs.abs() 128 | phases = ( 129 | torch.rand(freqs.shape, device=waveform.device, dtype=waveform.dtype) 130 | * 2 131 | * math.pi 132 | ) 133 | freqs = amplitudes * torch.exp(1j * phases) 134 | 135 | # Inverse FFT 136 | buf_ifft = torch.fft.irfft(freqs, n=window_size, dim=1) 137 | 138 | # Window again the output buffer 139 | buf_ifft = buf_ifft * window 140 | 141 | # Overlap-add the output 142 | output = ( 143 | buf_ifft[:, :half_window_size] + old_windowed_buf[:, half_window_size:] 144 | ) 145 | old_windowed_buf = buf_ifft 146 | 147 | # Append to output_frames 148 | output_frames.append(output) 149 | 150 | # Increment start_pos 151 | start_pos += displace_pos 152 | frame_count += 1 153 | 154 | # Check if we have reached the end of the input 155 | if start_pos >= nsamples: 156 | break 157 | 158 | # Concatenate all output frames horizontally 159 | output_array = torch.cat(output_frames, dim=1) 160 | 161 | # LUFS Normalization 162 | output_tensor = lufs_normalization(output_array, sample_rate, loudness) 163 | 164 | # Return as audio dictionary 165 | return audio_to_comfy_3d(output_tensor, sample_rate) 166 | 167 | @staticmethod 168 | def optimize_windowsize(n: int) -> int: 169 | 170 | orig_n = n 171 | while True: 172 | n = orig_n 173 | while (n % 2) == 0: 174 | n //= 2 175 | while (n % 3) == 0: 176 | n //= 3 177 | while (n % 5) == 0: 178 | n //= 5 179 | 180 | if n < 2: 181 | break 182 | orig_n += 1 183 | return orig_n 184 | -------------------------------------------------------------------------------- /effects/SignalProcessingPitchShifter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Pitch shifting node 10 | """ 11 | 12 | import torch 13 | from typing import Tuple, Dict, Any, Union 14 | 15 | import torchaudio.functional as F 16 | 17 | from ..core.utilities import comfy_root_to_syspath 18 | from ..core.io import audio_from_comfy_3d, audio_to_comfy_3d 19 | from ..core.loudness import lufs_normalization, get_loudness 20 | 21 | comfy_root_to_syspath() # add comfy to sys path for dev 22 | 23 | 24 | class SignalProcessingPitchShifter: 25 | @classmethod 26 | def INPUT_TYPES(cls) -> Dict[str, Any]: 27 | return { 28 | "required": { 29 | "audio_input": ("AUDIO",), # Input audio 30 | "pitch_shift_factor": ( 31 | "INT", 32 | {"default": 2, "min": -12 * 4, "max": 12 * 4, "step": 1}, 33 | ), 34 | }, 35 | "optional": {}, 36 | } 37 | 38 | RETURN_TYPES = ("AUDIO",) 39 | RETURN_NAMES = ("output_audio",) 40 | CATEGORY = "Signal Processing" 41 | FUNCTION = "process" 42 | 43 | def process( 44 | self, 45 | audio_input: Dict[str, Union[torch.Tensor, int]], 46 | pitch_shift_factor: int = 2, 47 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 48 | 49 | try_gpu: bool = True 50 | waveform, sample_rate = audio_from_comfy_3d(audio_input, try_gpu=try_gpu) 51 | 52 | loudness = get_loudness(waveform, sample_rate) 53 | 54 | pitch_shifted_waveform = F.pitch_shift( 55 | waveform, sample_rate, pitch_shift_factor 56 | ) 57 | pitch_shifted_waveform = lufs_normalization( 58 | pitch_shifted_waveform, sample_rate, loudness 59 | ) 60 | 61 | return audio_to_comfy_3d(pitch_shifted_waveform, sample_rate) 62 | -------------------------------------------------------------------------------- /effects/SignalProcessingStereoWidening.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Audio widening node 10 | """ 11 | 12 | import os 13 | import sys 14 | import math 15 | import torch 16 | 17 | from typing import Dict, Any, Tuple, Union 18 | from ..core.io import audio_from_comfy_2d, audio_to_comfy_3d 19 | from ..core.loudness import lufs_normalization, get_loudness 20 | from ..core.widening import ( 21 | StereoWidenerFrequencyBased, 22 | DecorrelationType, 23 | FilterbankType, 24 | ) 25 | 26 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy")) 27 | 28 | 29 | def interpolate(t: float, a: float, b: float) -> float: 30 | if not 0.0 <= t <= 1.0: 31 | raise ValueError("t must be in the range [0.0, 1.0]") 32 | return a + t * (b - a) 33 | 34 | 35 | class SignalProcessingStereoWidening: 36 | @classmethod 37 | def INPUT_TYPES(cls) -> Dict[str, Any]: 38 | return { 39 | "required": { 40 | "mode": (["decorrelation", "simple"],), 41 | "audio_input": ("AUDIO",), 42 | }, 43 | "optional": { 44 | "width": ( 45 | "FLOAT", 46 | {"default": 6.0, "min": 1.0, "max": 8.0, "step": 0.1}, 47 | ), 48 | }, 49 | } 50 | 51 | RETURN_TYPES = ("AUDIO",) 52 | RETURN_NAMES = ("widened_audio",) 53 | CATEGORY = "Signal Processing" 54 | FUNCTION = "process" 55 | 56 | def process( 57 | self, 58 | mode: str, 59 | audio_input: Dict[str, Union[torch.Tensor, int]], 60 | width: float = 1.2, 61 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 62 | """ 63 | Widen stereo audio or convert mono audio to wide stereo 64 | using the provided widening algorithm. 65 | 66 | Parameters: 67 | audio_input (Dict): Dictionary containing 'waveform' and 'sample_rate'. 68 | width (float): Width factor (>1.0). 69 | 70 | Returns: 71 | Tuple[Dict[str, torch.Tensor]]: Dictionary with widened 'waveform' and 'sample_rate'. 72 | """ 73 | 74 | waveform, sample_rate = audio_from_comfy_2d( 75 | audio_input, repeat=False, try_gpu=True 76 | ) 77 | channels, num_samples = waveform.shape 78 | 79 | loudness = get_loudness(waveform, sample_rate) 80 | 81 | if mode == "simple": 82 | 83 | if channels not in [1, 2]: 84 | raise ValueError( 85 | f"Unsupported number of channels: {channels}. \ 86 | Only mono and stereo are supported." 87 | ) 88 | 89 | # Calculate coefficients based on the provided width parameter 90 | width_coeff = 1.0 / max(1.0 + width, 2.0) # Scalar 91 | 92 | coef_mid = 1.0 * width_coeff # Coefficient for mid 93 | coef_sides = width * width_coeff # Coefficient for sides 94 | 95 | if channels == 2: 96 | # Stereo to Widened Stereo 97 | L = waveform[0, :] # Left channel 98 | R = waveform[1, :] # Right channel 99 | 100 | # Apply the widening algorithm 101 | mid = (L + R) * coef_mid # Mid signal 102 | sides = (R - L) * coef_sides # Side signal 103 | 104 | widened_L = mid - sides # New Left channel 105 | widened_R = mid + sides # New Right channel 106 | 107 | # Stack the widened channels back into a stereo waveform 108 | widened_waveform = torch.stack( 109 | (widened_L, widened_R), dim=0 110 | ) # [2, samples] 111 | 112 | elif channels == 1: 113 | # Mono to Wide Stereo 114 | L = waveform[0, :].clone() # Duplicate mono channel to Left 115 | R = waveform[0, :].clone() # Duplicate mono channel to Right 116 | 117 | # Apply the widening algorithm 118 | mid = (L + R) * coef_mid # Mid signal 119 | sides = (R - L) * coef_sides # Side signal 120 | 121 | widened_L = mid - sides # New Left channel 122 | widened_R = mid + sides # New Right channel 123 | 124 | # Stack the widened channels into a stereo waveform 125 | widened_waveform = torch.stack( 126 | (widened_L, widened_R), dim=0 127 | ) # [2, samples] 128 | 129 | widened_waveform = lufs_normalization( 130 | widened_waveform, sample_rate, loudness 131 | ) 132 | 133 | return audio_to_comfy_3d(widened_waveform, sample_rate) 134 | 135 | if mode == "decorrelation": 136 | 137 | waveform = waveform.cpu() 138 | 139 | decorellation_type = DecorrelationType.VELVET 140 | filterbank_type = FilterbankType.ENERGY_PRESERVE 141 | start_value = 0.0 142 | end_value = math.pi / 2 143 | 144 | if width > 1.0: 145 | width = 1.0 146 | 147 | beta = interpolate(width, start_value, end_value) 148 | cutoff_frequency_hz = 22000 # sample_rate//2 # max possible 149 | cutoff_frequency_hz = (sample_rate // 2) - 10 # max possible 150 | 151 | stereoWidener = StereoWidenerFrequencyBased( 152 | waveform, 153 | sample_rate, 154 | filterbank_type, 155 | decorellation_type, 156 | [beta, beta], 157 | cutoff_frequency_hz, 158 | ) 159 | widener_result = stereoWidener.process() 160 | widened_waveform = torch.from_numpy(widener_result) 161 | widened_waveform = widened_waveform.T 162 | 163 | widened_waveform = lufs_normalization( 164 | widened_waveform, sample_rate, loudness 165 | ) 166 | 167 | return audio_to_comfy_3d(widened_waveform, sample_rate) 168 | 169 | return audio_to_comfy_3d(waveform, sample_rate) 170 | -------------------------------------------------------------------------------- /effects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/effects/__init__.py -------------------------------------------------------------------------------- /generators/SignalProcessingPadSynth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Pad Synthesiser port of code from this article : 10 | https://zynaddsubfx.sourceforge.io/doc/PADsynth/PADsynth.htm#c_implementation 11 | """ 12 | 13 | 14 | import torch 15 | import math 16 | from typing import Tuple, Dict, Any, Union 17 | 18 | from ..core.io import audio_to_comfy_3d 19 | 20 | 21 | class SignalProcessingPadSynth: 22 | @classmethod 23 | def INPUT_TYPES(cls) -> Dict[str, Any]: 24 | return { 25 | "required": { 26 | "sample_rate": ( 27 | "INT", 28 | {"default": 44100, "min": 8000, "max": 96000, "step": 1}, 29 | ), 30 | "fundamental_freq": ( 31 | "FLOAT", 32 | {"default": 261.0, "min": 20.0, "max": 2000.0, "step": 1.0}, 33 | ), 34 | "bandwidth_cents": ( 35 | "FLOAT", 36 | {"default": 40.0, "min": 10.0, "max": 100.0, "step": 1.0}, 37 | ), 38 | "number_harmonics": ( 39 | "INT", 40 | {"default": 64, "min": 1, "max": 128, "step": 1}, 41 | ), 42 | } 43 | } 44 | 45 | RETURN_TYPES = ("AUDIO",) 46 | RETURN_NAMES = ("audio",) 47 | CATEGORY = "Signal Processing" 48 | FUNCTION = "process" 49 | 50 | def process( 51 | self, 52 | sample_rate: int, 53 | fundamental_freq: float, 54 | bandwidth_cents: float, 55 | number_harmonics: int, 56 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 57 | """ 58 | Apply PADsynth algorithm to generate audio. 59 | 60 | Parameters: 61 | samplerate (int): Sampling rate in Hz. 62 | fundamental_freq (float): Fundamental frequency in Hz. 63 | bandwidth_cents (float): Bandwidth in cents for Gaussian profile. 64 | number_harmonics (int): Number of harmonics to generate. 65 | 66 | Returns: 67 | Tuple[Dict[str, torch.Tensor]]: Generated audio with waveform and sample rate. 68 | """ 69 | 70 | # Define FFT size 71 | N = 262144 # As per C++ code 72 | 73 | # Use default amplitude distribution 74 | A = torch.zeros(number_harmonics, dtype=torch.double) 75 | A[0] = 0.0 # A[0] is not used 76 | for i in range(1, number_harmonics): 77 | A[i] = 1.0 / i 78 | if (i % 2) == 0: 79 | A[i] *= 2.0 80 | 81 | # Initialize frequency amplitude and phase arrays 82 | freq_amp = torch.zeros(N // 2, dtype=torch.double) 83 | freq_phase = ( 84 | torch.rand(N // 2, dtype=torch.double) * 2.0 * math.pi 85 | ) # Random phases between 0 and 2pi 86 | 87 | # Define Gaussian profile function 88 | def profile(fi: torch.Tensor, bwi: torch.Tensor) -> torch.Tensor: 89 | x = fi / bwi 90 | x_sq = x**2 91 | # Avoid computing exp(-x^2) for x_sq > 14.71280603 92 | mask = x_sq <= 14.71280603 93 | result = torch.zeros_like(x_sq) 94 | result[mask] = torch.exp(-x_sq[mask]) / bwi[mask] 95 | return result 96 | 97 | # Convert bandwidth from cents to Hz 98 | # bw_Hz = (2^(bw/1200) -1) * f * nh 99 | # Convert bandwidth_cents to multiplier 100 | bw_multiplier = 2.0 ** (bandwidth_cents / 1200.0) - 1.0 101 | 102 | # Populate frequency amplitude array 103 | for nh in range(1, number_harmonics): 104 | f_nh = fundamental_freq * nh 105 | bw_Hz = bw_multiplier * f_nh 106 | bwi = bw_Hz / (2.0 * sample_rate) 107 | fi = f_nh / sample_rate # Normalized frequency 108 | 109 | # Create tensors for frequency bins 110 | i = torch.arange(N // 2, dtype=torch.double) 111 | # Normalized frequency for each bin 112 | normalized_freq = ( 113 | i / N 114 | ) # Equivalent to i * (sample_rate / N) / sample_rate = i / N 115 | 116 | # Compute profile 117 | fi_tensor = torch.full_like(i, fi) 118 | bwi_tensor = torch.full_like(i, bwi) 119 | profile_values = profile(normalized_freq - fi_tensor, bwi_tensor) 120 | 121 | # Update frequency amplitude 122 | freq_amp += profile_values * A[nh] 123 | 124 | # Construct complex frequency domain tensor 125 | real = freq_amp * torch.cos(freq_phase) 126 | imag = freq_amp * torch.sin(freq_phase) 127 | freq_complex = torch.complex(real, imag) # Shape: (N//2,) 128 | 129 | # Perform IFFT using torch.fft.irfft 130 | smp = torch.fft.irfft(freq_complex, n=N) # Shape: (N,) 131 | 132 | # Normalize the signal to prevent clipping 133 | max_val = torch.max(torch.abs(smp)) 134 | if max_val < 1e-5: 135 | max_val = 1e-5 # Prevent division by zero 136 | smp = smp / (max_val * math.sqrt(2)) # Normalize to 1/sqrt(2) as in C++ code 137 | 138 | # Convert to float32 for saving 139 | smp = smp.float() 140 | 141 | # Prepare waveform tensor: (C, N) 142 | waveform_out = smp.unsqueeze(0) # Mono audio 143 | 144 | return audio_to_comfy_3d(waveform_out, sample_rate) 145 | -------------------------------------------------------------------------------- /generators/SignalProcessingPadSynthChoir.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Pad Synth Choir Synthesizer port of code from this article 10 | https://zynaddsubfx.sourceforge.io/doc/PADsynth/PADsynth.htm#c_implementation 11 | """ 12 | 13 | import torch 14 | import math 15 | from typing import Tuple, List, Dict, Any, Union 16 | 17 | 18 | class SignalProcessingPadSynthChoir: 19 | @classmethod 20 | def INPUT_TYPES(cls) -> Dict[str, Any]: 21 | return { 22 | "required": { 23 | "samplerate": ( 24 | "INT", 25 | {"default": 44100, "min": 8000, "max": 96000, "step": 1}, 26 | ), 27 | "base_freq": ( 28 | "FLOAT", 29 | {"default": 130.81, "min": 20.0, "max": 2000.0, "step": 1.0}, 30 | ), 31 | "step_size": ("INT", {"default": 4, "min": 1, "max": 24, "step": 1}), 32 | "num_notes": ("INT", {"default": 7, "min": 1, "max": 24, "step": 1}), 33 | "bandwidth_cents": ( 34 | "FLOAT", 35 | {"default": 60.0, "min": 10.0, "max": 100.0, "step": 1.0}, 36 | ), 37 | "number_harmonics": ( 38 | "INT", 39 | {"default": 64, "min": 1, "max": 128, "step": 1}, 40 | ), 41 | } 42 | } 43 | 44 | RETURN_TYPES = ("AUDIO_LIST", "INT") 45 | RETURN_NAMES = ("audios", "sample_rate") 46 | CATEGORY = "Signal Processing" 47 | FUNCTION = "process" 48 | 49 | def process( 50 | self, 51 | samplerate: int, 52 | base_freq: float, 53 | step_size: int, 54 | num_notes: int, 55 | bandwidth_cents: float, 56 | number_harmonics: int, 57 | ) -> Tuple[List[Dict[str, Union[torch.Tensor, int]]], int]: 58 | """ 59 | Apply PADsynth choir algorithm to generate multiple audio files. 60 | 61 | Parameters: 62 | samplerate (int): Sampling rate in Hz. 63 | base_freq (float): Base frequency in Hz. 64 | step_size (int): Step size in semitones between notes. 65 | num_notes (int): Number of notes to generate. 66 | bandwidth_cents (float): Bandwidth in cents for Gaussian profile. 67 | number_harmonics (int): Number of harmonics to generate. 68 | 69 | Returns: 70 | Tuple[List[Dict[str, torch.Tensor]]]: 71 | List of generated audios with waveform and sample rate. 72 | """ 73 | 74 | # Define FFT size 75 | N = 262144 # As per C++ code 76 | 77 | audios = [] 78 | 79 | for note_index in range(num_notes): 80 | note_semitones = step_size * note_index 81 | f1 = base_freq * (2.0 ** (note_semitones / 12.0)) 82 | 83 | # Compute amplitude_per_harmonic with formants 84 | A = torch.zeros(number_harmonics, dtype=torch.double) 85 | A[0] = 0.0 # A[0] is not used 86 | 87 | for i in range(1, number_harmonics): 88 | # Calculate formants based on the C++ choir implementation 89 | formants = ( 90 | math.exp(-(((i * f1 - 600.0) / 150.0) ** 2)) 91 | + math.exp(-(((i * f1 - 900.0) / 250.0) ** 2)) 92 | + math.exp(-(((i * f1 - 2200.0) / 200.0) ** 2)) 93 | + math.exp(-(((i * f1 - 2600.0) / 250.0) ** 2)) 94 | + math.exp(-(((i * f1) / 3000.0) ** 2)) * 0.1 95 | ) 96 | A[i] = (1.0 / i) * formants 97 | # Optionally, you can debug amplitude values 98 | # logger.debug(f"Harmonic {i}: A[{i}]={A[i]:.4f}") 99 | 100 | # Initialize frequency amplitude and phase arrays 101 | freq_amp = torch.zeros(N // 2, dtype=torch.double) 102 | freq_phase = ( 103 | torch.rand(N // 2, dtype=torch.double) * 2.0 * math.pi 104 | ) # Random phases between 0 and 2pi 105 | 106 | # Define Gaussian profile function 107 | def profile(fi: torch.Tensor, bwi: torch.Tensor) -> torch.Tensor: 108 | x = fi / bwi 109 | x_sq = x**2 110 | # Avoid computing exp(-x^2) for x_sq > 14.71280603 111 | mask = x_sq <= 14.71280603 112 | result = torch.zeros_like(x_sq) 113 | result[mask] = torch.exp(-x_sq[mask]) / bwi[mask] 114 | return result 115 | 116 | # Convert bandwidth from cents to Hz 117 | # bw_Hz = (2^(bw/1200) -1) * f * nh 118 | bw_multiplier = 2.0 ** (bandwidth_cents / 1200.0) - 1.0 119 | 120 | # Create tensors for frequency bins 121 | i = torch.arange(N // 2, dtype=torch.double) 122 | normalized_freq = i / N # Equivalent to i / N 123 | 124 | # Compute and accumulate frequency amplitudes for each harmonic 125 | for nh in range(1, number_harmonics): 126 | f_nh = f1 * nh 127 | bw_Hz = bw_multiplier * f_nh 128 | bwi = bw_Hz / (2.0 * samplerate) 129 | fi = f_nh / samplerate # Normalized frequency 130 | 131 | fi_tensor = torch.full_like(i, fi) 132 | bwi_tensor = torch.full_like(i, bwi) 133 | profile_values = profile(normalized_freq - fi_tensor, bwi_tensor) 134 | 135 | # Update frequency amplitude 136 | freq_amp += profile_values * A[nh] 137 | 138 | # Construct complex frequency domain tensor 139 | real = freq_amp * torch.cos(freq_phase) 140 | imag = freq_amp * torch.sin(freq_phase) 141 | freq_complex = torch.complex(real, imag) # Shape: (N//2,) 142 | 143 | # Perform IFFT using torch.fft.irfft 144 | smp = torch.fft.irfft(freq_complex, n=N) # Shape: (N,) 145 | 146 | # Normalize the signal to prevent clipping 147 | max_val = torch.max(torch.abs(smp)) 148 | if max_val < 1e-5: 149 | max_val = 1e-5 # Prevent division by zero 150 | smp = smp / ( 151 | max_val * math.sqrt(2) 152 | ) # Normalize to 1/sqrt(2) as in C++ code 153 | 154 | # Convert to float32 for saving 155 | smp = smp.float() 156 | 157 | # Prepare waveform tensor: (C, N) 158 | waveform_out = smp.unsqueeze(0) # Mono audio 159 | 160 | # Reshape waveform_out to include batch dimension: (1, C, N) 161 | waveform_out = waveform_out.unsqueeze(0) # Shape: (1, C, N) 162 | 163 | # Append to audios list 164 | audios.append({"waveform": waveform_out, "sample_rate": samplerate}) 165 | 166 | # Return the list of generated audios 167 | 168 | return audios, samplerate 169 | -------------------------------------------------------------------------------- /generators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/generators/__init__.py -------------------------------------------------------------------------------- /nodes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | node definitions 10 | """ 11 | 12 | # generators 13 | from .generators.SignalProcessingPadSynth import SignalProcessingPadSynth 14 | from .generators.SignalProcessingPadSynthChoir import SignalProcessingPadSynthChoir 15 | 16 | # effects 17 | from .effects.SignalProcessingStereoWidening import SignalProcessingStereoWidening 18 | from .effects.SignalProcessingPaulStretch import SignalProcessingPaulStretch 19 | from .effects.SignalProcessingPitchShifter import SignalProcessingPitchShifter 20 | from .effects.SignalProcessingConvolutionReverb import SignalProcessingConvolutionReverb 21 | 22 | # processors 23 | from .processors.SignalProcessingFilter import SignalProcessingFilter 24 | from .processors.SignalProcessingMixdown import SignalProcessingMixdown 25 | from .processors.SignalProcessingLoadAudio import SignalProcessingLoadAudio 26 | from .processors.SignalProcessingNormalizer import SignalProcessingNormalizer 27 | from .processors.SignalProcessingLoudness import SignalProcessingLoudness 28 | from .processors.SignalProcessingBaxandallEQ import ( 29 | SignalProcessingBaxandallEQ, 30 | SignalProcessingBaxandall3BandEQ, 31 | ) 32 | from .processors.SignalProcessingHarmonicsEnhancer import ( 33 | SignalProcessingHarmonicsEnhancer, 34 | ) 35 | from .processors.SignalProcessingSaturation import SignalProcessingSaturation 36 | from .processors.SignalProcessingLimiter import SignalProcessingLimiter 37 | 38 | # from .processors.SignalProcessingCompressor import SignalProcessingCompressor 39 | 40 | # visuals 41 | from .visuals.SignalProcessingSpectrogram import SignalProcessingSpectrogram 42 | from .visuals.SignalProcessingWaveform import SignalProcessingWaveform 43 | 44 | NODE_CLASS_MAPPINGS = { 45 | "SignalProcessingLoadAudio": SignalProcessingLoadAudio, 46 | "SignalProcessingFilter": SignalProcessingFilter, 47 | "SignalProcessingPaulStretch": SignalProcessingPaulStretch, 48 | "SignalProcessingPadSynth": SignalProcessingPadSynth, 49 | "SignalProcessingPadSynthChoir": SignalProcessingPadSynthChoir, 50 | "SignalProcessingMixdown": SignalProcessingMixdown, 51 | "SignalProcessingSpectrogram": SignalProcessingSpectrogram, 52 | "SignalProcessingWaveform": SignalProcessingWaveform, 53 | "SignalProcessingStereoWidening": SignalProcessingStereoWidening, 54 | "SignalProcessingPitchShifter": SignalProcessingPitchShifter, 55 | "SignalProcessingConvolutionReverb": SignalProcessingConvolutionReverb, 56 | "SignalProcessingNormalizer": SignalProcessingNormalizer, 57 | "SignalProcessingLoudness": SignalProcessingLoudness, 58 | "SignalProcessingBaxandallEQ": SignalProcessingBaxandallEQ, 59 | "SignalProcessingBaxandall3BandEQ": SignalProcessingBaxandall3BandEQ, 60 | "SignalProcessingHarmonicsEnhancer": SignalProcessingHarmonicsEnhancer, 61 | "SignalProcessingSaturation": SignalProcessingSaturation, 62 | "SignalProcessingLimiter": SignalProcessingLimiter, 63 | # "SignalProcessingCompressor": SignalProcessingCompressor, 64 | } 65 | 66 | NODE_DISPLAY_NAME_MAPPINGS = { 67 | "SignalProcessingLoadAudio": "(SP) Load Audio", 68 | "SignalProcessingFilter": "(SP) Filter", 69 | "SignalProcessingPaulStretch": "(SP) PaulStretch", 70 | "SignalProcessingPadSynth": "(SP) PadSynth", 71 | "SignalProcessingPadSynthChoir": "(SP) PadSynth Choir", 72 | "SignalProcessingMixdown": "(SP) Mix Down", 73 | "SignalProcessingSpectrogram": "(SP) Spectogram", 74 | "SignalProcessingWaveform": "(SP) Waveform", 75 | "SignalProcessingStereoWidening": "(SP) Stereo Width", 76 | "SignalProcessingPitchShifter": "(SP) PitchShift", 77 | "SignalProcessingConvolutionReverb": "(SP) Convolution Reverb", 78 | "SignalProcessingNormalizer": "(SP) Normalizer", 79 | "SignalProcessingLoudness": "(SP) Loudness", 80 | "SignalProcessingBaxandallEQ": "(SP) Baxandall EQ", 81 | "SignalProcessingBaxandall3BandEQ": "(SP) Baxandall 3 Band EQ", 82 | "SignalProcessingHarmonicsEnhancer": "(SP) Enhance Harmonics", 83 | "SignalProcessingSaturation": "(SP) Saturation", 84 | "SignalProcessingLimiter": "(SP) Limiter", 85 | # "SignalProcessingCompressor": "(SP) Compressor", 86 | } 87 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | from nox import Session 3 | 4 | PYTHON_VERSIONS = ["3.10.12"] 5 | REUSE_ENV = True 6 | 7 | 8 | @nox.session(python=PYTHON_VERSIONS, tags=["style"], reuse_venv=REUSE_ENV) 9 | def lint(session: Session) -> None: 10 | 11 | session.install("black") 12 | session.install("flake8") 13 | session.install("mypy") 14 | 15 | session.run("black", ".") 16 | session.run("flake8", "--max-line-length=100", "--ignore=E501,E203,W503", ".") 17 | session.run( 18 | "mypy", 19 | ".", 20 | "--ignore-missing-imports", 21 | "--strict", 22 | "--show-error-codes", 23 | ) 24 | 25 | 26 | @nox.session(python=PYTHON_VERSIONS, tags=["tests"], reuse_venv=REUSE_ENV) 27 | def tests(session: Session) -> None: 28 | """Run pytest tests with Scalene profiling.""" 29 | 30 | session.install("scalene", "pytest", "pytest-cov", "pytest-xdist") 31 | requirements = nox.project.load_toml("pyproject.toml")["project"]["dependencies"] 32 | for _, v in requirements.items(): 33 | session.install(*v) 34 | 35 | project_name = nox.project.load_toml("pyproject.toml")["project"]["name"] 36 | 37 | pytest_path = session.run("which", "pytest", external=True, silent=True).strip() 38 | 39 | if not session.posargs: 40 | with session.cd(".."): 41 | session.run( 42 | "scalene", 43 | # "--profile-all", 44 | pytest_path, 45 | f"--cov={project_name}", 46 | "--cov-report=term", 47 | "--cov-report=html", 48 | f"{project_name}/tests/", 49 | external=True, 50 | ) 51 | else: 52 | with session.cd(".."): 53 | session.run( 54 | "pytest", 55 | "--rootdir=.", 56 | f"--cov={project_name}", 57 | "--cov-report=term", 58 | "--cov-report=html", 59 | f"{project_name}/tests/", 60 | "-k", 61 | session.posargs[0], 62 | external=True, 63 | ) 64 | -------------------------------------------------------------------------------- /processors/SignalProcessingBaxandallEQ.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Baxandall shelf EQ 10 | references used : # reference https://webaudio.github.io/Audio-EQ-Cookbook/Audio-EQ-Cookbook.txt 11 | """ 12 | 13 | import torch 14 | import torchaudio 15 | import math 16 | from typing import Dict, Any, Tuple, Union 17 | 18 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_3d 19 | from ..core.loudness import lufs_normalization, get_loudness 20 | 21 | 22 | class SignalProcessingBaxandallEQ: 23 | @classmethod 24 | def INPUT_TYPES(cls) -> Dict[str, Any]: 25 | return { 26 | "required": { 27 | "audio_input": ("AUDIO",), 28 | "bass_gain_db": ( 29 | "FLOAT", 30 | {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1}, 31 | ), 32 | "treble_gain_db": ( 33 | "FLOAT", 34 | {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1}, 35 | ), 36 | }, 37 | } 38 | 39 | RETURN_TYPES = ("AUDIO",) 40 | RETURN_NAMES = ("processed_audio",) 41 | CATEGORY = "Signal Processing" 42 | FUNCTION = "process" 43 | 44 | def process( 45 | self, 46 | audio_input: Dict[str, Union[torch.Tensor, int]], 47 | bass_gain_db: float = 0.0, 48 | treble_gain_db: float = 0.0, 49 | ) -> Tuple[Dict[str, torch.Tensor]]: 50 | waveform, sample_rate = audio_from_comfy_3d(audio_input, try_gpu=True) 51 | loudness = get_loudness(waveform, sample_rate) 52 | 53 | # Apply Bass Shelf (low shelf) using RBJ formula 54 | b_bass, a_bass = self.design_rbj_shelf( 55 | sample_rate, freq=100.0, gain_db=bass_gain_db, shelf_type="low" 56 | ) 57 | waveform = torchaudio.functional.lfilter( 58 | waveform, 59 | a_bass.to(waveform.device), 60 | b_bass.to(waveform.device), 61 | clamp=False, 62 | ) 63 | 64 | # Apply Treble Shelf (high shelf) using RBJ formula 65 | b_treble, a_treble = self.design_rbj_shelf( 66 | sample_rate, freq=10000.0, gain_db=treble_gain_db, shelf_type="high" 67 | ) 68 | waveform = torchaudio.functional.lfilter( 69 | waveform, 70 | a_treble.to(waveform.device), 71 | b_treble.to(waveform.device), 72 | clamp=False, 73 | ) 74 | 75 | waveform = lufs_normalization(waveform, sample_rate, loudness) 76 | return audio_to_comfy_3d(waveform, sample_rate) 77 | 78 | def design_rbj_shelf( 79 | self, sr: int, freq: float, gain_db: float, shelf_type: str = "low" 80 | ) -> Tuple[torch.Tensor, torch.Tensor]: 81 | # RBJ Audio EQ Cookbook shelf filters 82 | A = 10.0 ** (gain_db / 40.0) 83 | w0 = 2 * math.pi * freq / sr 84 | alpha = ( 85 | math.sin(w0) / 2.0 * math.sqrt((A + 1 / A) * (1.0 / 1.0 - 1) + 2.0) 86 | ) # S=1.0 87 | 88 | cosw0 = math.cos(w0) 89 | if shelf_type == "low": 90 | b0 = A * ((A + 1) - (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha) 91 | b1 = 2 * A * ((A - 1) - (A + 1) * cosw0) 92 | b2 = A * ((A + 1) - (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha) 93 | a0 = (A + 1) + (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha 94 | a1 = -2 * ((A - 1) + (A + 1) * cosw0) 95 | a2 = (A + 1) + (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha 96 | else: # high shelf 97 | b0 = A * ((A + 1) + (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha) 98 | b1 = -2 * A * ((A - 1) + (A + 1) * cosw0) 99 | b2 = A * ((A + 1) + (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha) 100 | a0 = (A + 1) - (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha 101 | a1 = 2 * ((A - 1) - (A + 1) * cosw0) 102 | a2 = (A + 1) - (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha 103 | 104 | b = torch.tensor([b0 / a0, b1 / a0, b2 / a0], dtype=torch.float64) 105 | a = torch.tensor([1.0, a1 / a0, a2 / a0], dtype=torch.float64) 106 | return b, a 107 | 108 | 109 | class SignalProcessingBaxandall3BandEQ: 110 | @classmethod 111 | def INPUT_TYPES(cls) -> Dict[str, Any]: 112 | return { 113 | "required": { 114 | "audio_input": ("AUDIO",), 115 | "bass_gain_db": ( 116 | "FLOAT", 117 | {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1}, 118 | ), 119 | "mid_gain_db": ( 120 | "FLOAT", 121 | {"default": 0.0, "min": -20.0, "max": 20.0, "step": 0.1}, 122 | ), 123 | "treble_gain_db": ( 124 | "FLOAT", 125 | {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1}, 126 | ), 127 | "low_freq": ( 128 | "FLOAT", 129 | {"default": 100.0, "min": 20.0, "max": 500.0, "step": 1.0}, 130 | ), 131 | "mid_freq": ( 132 | "FLOAT", 133 | {"default": 1000.0, "min": 200.0, "max": 5000.0, "step": 10.0}, 134 | ), 135 | "high_freq": ( 136 | "FLOAT", 137 | {"default": 10000.0, "min": 2000.0, "max": 20000.0, "step": 100.0}, 138 | ), 139 | "mid_q": ( 140 | "FLOAT", 141 | {"default": 0.7, "min": 0.1, "max": 10.0, "step": 0.1}, 142 | ), 143 | }, 144 | } 145 | 146 | RETURN_TYPES = ("AUDIO",) 147 | RETURN_NAMES = ("processed_audio",) 148 | CATEGORY = "Signal Processing" 149 | FUNCTION = "process" 150 | 151 | def process( 152 | self, 153 | audio_input: Dict[str, Union[torch.Tensor, int]], 154 | bass_gain_db: float = 0.0, 155 | mid_gain_db: float = 0.0, 156 | treble_gain_db: float = 0.0, 157 | low_freq: float = 100.0, 158 | mid_freq: float = 1000.0, 159 | high_freq: float = 10000.0, 160 | mid_q: float = 0.7, 161 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 162 | 163 | waveform, sample_rate = audio_from_comfy_3d(audio_input, try_gpu=True) 164 | device = waveform.device 165 | dtype = waveform.dtype 166 | loudness = get_loudness(waveform, sample_rate) 167 | 168 | # Low shelf filter 169 | b_low, a_low = self.design_rbj_shelf( 170 | sample_rate, low_freq, bass_gain_db, shelf_type="low" 171 | ) 172 | b_low = b_low.to(device=device, dtype=dtype) 173 | a_low = a_low.to(device=device, dtype=dtype) 174 | waveform = torchaudio.functional.lfilter(waveform, a_low, b_low, clamp=False) 175 | 176 | # Mid peaking filter 177 | b_mid, a_mid = self.design_rbj_peak(sample_rate, mid_freq, mid_gain_db, Q=mid_q) 178 | b_mid = b_mid.to(device=device, dtype=dtype) 179 | a_mid = a_mid.to(device=device, dtype=dtype) 180 | waveform = torchaudio.functional.lfilter(waveform, a_mid, b_mid, clamp=False) 181 | 182 | # High shelf filter 183 | b_high, a_high = self.design_rbj_shelf( 184 | sample_rate, high_freq, treble_gain_db, shelf_type="high" 185 | ) 186 | b_high = b_high.to(device=device, dtype=dtype) 187 | a_high = a_high.to(device=device, dtype=dtype) 188 | waveform = torchaudio.functional.lfilter(waveform, a_high, b_high, clamp=False) 189 | 190 | # Normalize loudness after EQ 191 | waveform = lufs_normalization(waveform, sample_rate, loudness) 192 | 193 | return audio_to_comfy_3d(waveform, sample_rate) 194 | 195 | def design_rbj_shelf( 196 | self, sr: int, freq: float, gain_db: float, shelf_type: str = "low" 197 | ) -> Tuple[torch.Tensor, torch.Tensor]: 198 | # RBJ audio EQ cookbook formula for shelving filters 199 | A = 10.0 ** (gain_db / 40.0) 200 | w0 = 2.0 * math.pi * freq / sr 201 | # Slope S=1.0 (Baxandall-like gentle slope) 202 | S = 1.0 203 | alpha = math.sin(w0) / 2.0 * math.sqrt((A + 1.0 / A) * (1.0 / S - 1.0) + 2.0) 204 | cosw0 = math.cos(w0) 205 | 206 | if shelf_type == "low": 207 | b0 = A * ((A + 1.0) - (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha) 208 | b1 = 2.0 * A * ((A - 1.0) - (A + 1.0) * cosw0) 209 | b2 = A * ((A + 1.0) - (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha) 210 | a0 = (A + 1.0) + (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha 211 | a1 = -2.0 * ((A - 1.0) + (A + 1.0) * cosw0) 212 | a2 = (A + 1.0) + (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha 213 | else: 214 | # high shelf 215 | b0 = A * ((A + 1.0) + (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha) 216 | b1 = -2.0 * A * ((A - 1.0) + (A + 1.0) * cosw0) 217 | b2 = A * ((A + 1.0) + (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha) 218 | a0 = (A + 1.0) - (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha 219 | a1 = 2.0 * ((A - 1.0) - (A + 1.0) * cosw0) 220 | a2 = (A + 1.0) - (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha 221 | 222 | b = torch.tensor([b0 / a0, b1 / a0, b2 / a0], dtype=torch.float64) 223 | a = torch.tensor([1.0, a1 / a0, a2 / a0], dtype=torch.float64) 224 | return b, a 225 | 226 | def design_rbj_peak( 227 | self, sr: int, freq: float, gain_db: float, Q: float = 0.7 228 | ) -> Tuple[torch.Tensor, torch.Tensor]: 229 | # RBJ audio EQ cookbook peak filter 230 | A = 10.0 ** (gain_db / 40.0) 231 | w0 = 2.0 * math.pi * freq / sr 232 | alpha = math.sin(w0) / (2.0 * Q) 233 | cosw0 = math.cos(w0) 234 | 235 | b0 = 1.0 + alpha * A 236 | b1 = -2.0 * cosw0 237 | b2 = 1.0 - alpha * A 238 | a0 = 1.0 + alpha / A 239 | a1 = -2.0 * cosw0 240 | a2 = 1.0 - alpha / A 241 | 242 | b = torch.tensor([b0 / a0, b1 / a0, b2 / a0], dtype=torch.float64) 243 | a = torch.tensor([1.0, a1 / a0, a2 / a0], dtype=torch.float64) 244 | return b, a 245 | -------------------------------------------------------------------------------- /processors/SignalProcessingCompressor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | This file contains a description of a compressor node that utilizes a CUDA-optimized kernel. 10 | """ 11 | import torch 12 | from typing import Dict, Any, Tuple, Union 13 | 14 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d 15 | from ..core.compression import compressor 16 | from ..core.loudness import get_loudness, lufs_normalization 17 | 18 | 19 | class SignalProcessingCompressor: 20 | @classmethod 21 | def INPUT_TYPES(cls) -> Dict[str, Any]: 22 | return { 23 | "required": { 24 | "audio_input": ("AUDIO",), 25 | "comp": ( 26 | "FLOAT", 27 | {"default": -0.3, "min": -2.0, "max": 2.0, "step": 0.01}, 28 | ), 29 | "attack": ( 30 | "FLOAT", 31 | {"default": 0.1, "min": 0.01, "max": 100.0, "step": 0.01}, 32 | ), 33 | "release": ( 34 | "FLOAT", 35 | {"default": 60.0, "min": 0.01, "max": 1000.0, "step": 0.1}, 36 | ), 37 | "filter_param": ( 38 | "FLOAT", 39 | {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01}, 40 | ), 41 | } 42 | } 43 | 44 | RETURN_TYPES = ("AUDIO",) 45 | RETURN_NAMES = ("audio",) 46 | CATEGORY = "Signal Processing" 47 | FUNCTION = "process" 48 | 49 | def process( 50 | self, 51 | audio_input: Dict[str, Union[torch.Tensor, int]], 52 | comp: float = -0.3, # Compression/expansion factor 53 | attack: float = 0.1, # Attack time in ms 54 | release: float = 60.0, # Release time in ms 55 | filter_param: float = 0.3, # Filter parameter < 1 56 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 57 | """ 58 | Apply compression or expansion to the audio input using CUDA. 59 | 60 | Parameters: 61 | audio_input (Dict[str, Union[torch.Tensor, int]]): Input audio waveform and sample rate. 62 | comp (float): Compression/expansion factor. 63 | attack (float): Attack time in milliseconds. 64 | release (float): Release time in milliseconds. 65 | filter_param (float): Filter parameter for envelope smoothing. 66 | 67 | Returns: 68 | Tuple[Dict[str, Union[torch.Tensor, int]]]: Compressed audio and sample rate. 69 | """ 70 | # Extract waveform and sample rate 71 | waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True) 72 | 73 | loudness = get_loudness(waveform, sample_rate=sample_rate) 74 | 75 | # Apply the compressor kernel 76 | filtered_waveform, _ = compressor( 77 | waveform, 78 | sample_rate, 79 | comp=comp, 80 | attack=attack, 81 | release=release, 82 | a=filter_param, 83 | device="cuda" if torch.cuda.is_available() else "cpu", 84 | ) 85 | 86 | filtered_waveform = lufs_normalization( 87 | filtered_waveform, sample_rate=sample_rate, target_lufs=loudness 88 | ) 89 | 90 | # Return the processed audio 91 | return audio_to_comfy_3d(filtered_waveform, sample_rate) 92 | -------------------------------------------------------------------------------- /processors/SignalProcessingFilter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | The code defines a classic audio filter set for performing various audio processing tasks such as filtering out unwanted frequencies 10 | """ 11 | 12 | import torch 13 | import torchaudio 14 | from typing import Dict, Any, Tuple, Union 15 | 16 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_3d 17 | from ..core.loudness import lufs_normalization, get_loudness 18 | 19 | 20 | class SignalProcessingFilter: 21 | @classmethod 22 | def INPUT_TYPES(cls) -> Dict[str, Any]: 23 | return { 24 | "required": { 25 | "audio_input": ("AUDIO", {"forceInput": True}), 26 | "cutoff": ( 27 | "FLOAT", 28 | {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}, 29 | ), 30 | "filter_type": ( 31 | ["lowpass", "highpass", "bandpass", "bandstop"], 32 | {"default": "lowpass"}, 33 | ), 34 | "q_factor": ( 35 | "FLOAT", 36 | {"default": 0.707, "min": 0.1, "max": 5.0, "step": 0.01}, 37 | ), # For resonance/bandwidth 38 | } 39 | } 40 | 41 | RETURN_TYPES = ("AUDIO", "INT") 42 | RETURN_NAMES = ("audio", "sample_rate") 43 | CATEGORY = "Signal Processing" 44 | FUNCTION = "process" 45 | 46 | def process( 47 | self, 48 | audio_input: Dict[str, Union[torch.Tensor, int]], 49 | cutoff: float, 50 | filter_type: str, 51 | q_factor: float, 52 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 53 | """ 54 | Apply a specified filter to the input audio. 55 | 56 | Parameters: 57 | audio (Dict[str, torch.Tensor]): Input audio with 'waveform' and 'sample_rate'. 58 | cutoff (float): Normalized cutoff frequency (0.0 to 1.0). 59 | filter_type (str): Type of filter ('lowpass', 'highpass', 'bandpass', 'bandstop'). 60 | q_factor (float): Quality factor determining the filter's bandwidth. 61 | 62 | Returns: 63 | Tuple[Dict[str, torch.Tensor]]: Filtered audio. 64 | """ 65 | 66 | waveform, sample_rate = audio_from_comfy_3d(audio_input) 67 | 68 | loudness = get_loudness(waveform, sample_rate) 69 | 70 | nyquist = sample_rate / 2.0 71 | 72 | # Define minimum and maximum frequencies for mapping 73 | log_min = 20.0 # 20 Hz, typical lower bound of human hearing 74 | log_max = nyquist - 100.0 # Slightly below Nyquist to prevent instability 75 | 76 | # Avoid log(0) by ensuring cutoff is within (0,1) 77 | cutoff = min(max(cutoff, 1e-6), 1.0 - 1e-6) 78 | 79 | # Logarithmic mapping 80 | log_min = torch.log(torch.tensor(log_min)) 81 | log_max = torch.log(torch.tensor(log_max)) 82 | log_cutoff = log_min + cutoff * (log_max - log_min) 83 | cutoff_freq = torch.exp(log_cutoff).item() 84 | 85 | # Choose filter type 86 | if filter_type == "lowpass": 87 | filtered_waveform = torchaudio.functional.lowpass_biquad( 88 | waveform, sample_rate, cutoff_freq, Q=q_factor 89 | ) 90 | elif filter_type == "highpass": 91 | filtered_waveform = torchaudio.functional.highpass_biquad( 92 | waveform, sample_rate, cutoff_freq, Q=q_factor 93 | ) 94 | elif filter_type in ["bandpass", "bandstop"]: 95 | center_freq = cutoff_freq 96 | # Ensure that the bandwidth does not exceed the Nyquist frequency 97 | 98 | if filter_type == "bandpass": 99 | filtered_waveform = torchaudio.functional.bandpass_biquad( 100 | waveform, sample_rate, center_freq, Q=q_factor 101 | ) 102 | else: # bandstop 103 | filtered_waveform = torchaudio.functional.band_biquad( 104 | waveform, sample_rate, center_freq, Q=q_factor 105 | ) 106 | else: 107 | raise ValueError(f"Unsupported filter type: {filter_type}") 108 | 109 | filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness) 110 | 111 | return audio_to_comfy_3d(filtered_waveform, sample_rate) 112 | -------------------------------------------------------------------------------- /processors/SignalProcessingHarmonicsEnhancer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Classic Audio filter set 10 | """ 11 | from ast import literal_eval 12 | import torch 13 | import torchaudio 14 | from typing import Dict, Any, List, Tuple, Union 15 | 16 | from ..core.utilities import comfy_root_to_syspath 17 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d 18 | from ..core.loudness import lufs_normalization, get_loudness 19 | 20 | comfy_root_to_syspath() # add comfy to sys path for dev 21 | 22 | 23 | class SignalProcessingHarmonicsEnhancer: 24 | @classmethod 25 | def INPUT_TYPES(cls) -> Dict[str, Any]: 26 | return { 27 | "required": { 28 | "audio_input": ("AUDIO",), 29 | "harmonics": ("STRING", {"default": "1,3,5,7,9"}), 30 | "mode": (["detect base frequency", "use base frequency"],), 31 | "base_frequency": ("FLOAT", {"default": 440, "min": 0, "max": 20000}), 32 | "gain_db": ("INT", {"default": 5, "min": 0, "max": 500, "step": 1}), 33 | "Q": ("FLOAT", {"default": 0.707, "min": 0, "max": 1.0, "step": 0.01}), 34 | } 35 | } 36 | 37 | RETURN_TYPES = ("AUDIO",) 38 | RETURN_NAMES = ("audio",) 39 | CATEGORY = "Signal Processing" 40 | FUNCTION = "process" 41 | 42 | def process( 43 | self, 44 | audio_input: Dict[str, Union[torch.Tensor, int]], 45 | harmonics: str = "1,3,5,7,9", 46 | mode: str = "detect base frequency", 47 | base_frequency: int = 440, 48 | gain_db: int = 5, 49 | Q: float = 0.707, 50 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 51 | waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True) 52 | loudness = get_loudness(waveform, sample_rate) 53 | 54 | try: 55 | harmonics_list: List[int] = [literal_eval(x) for x in harmonics.split(",")] 56 | except Exception: 57 | raise RuntimeWarning( 58 | "Invalid Harmonics Format. Please delimit integers by a comma \ 59 | ',' like this 1,,3,5,7,9 " 60 | ) 61 | if mode == "detect base frequency": 62 | filtered_waveform = self.enhance_harmonics( 63 | waveform, sample_rate, harmonics=harmonics_list, gain_db=gain_db, Q=Q 64 | ) 65 | elif mode == "use base frequency": 66 | filtered_waveform = self.enhance_harmonics( 67 | waveform, 68 | sample_rate, 69 | harmonics=harmonics_list, 70 | gain_db=gain_db, 71 | base_frequency=base_frequency, 72 | Q=Q, 73 | ) 74 | 75 | filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness) 76 | return audio_to_comfy_3d(filtered_waveform, sample_rate) 77 | 78 | def add_harmonics(self, audio: torch.Tensor, gain: float = 1.2) -> torch.Tensor: 79 | # Apply saturation using a tanh curve 80 | harmonic_audio = torch.tanh(audio * gain) 81 | return harmonic_audio 82 | 83 | def detect_fundamental(self, audio: torch.Tensor, sample_rate: int) -> torch.Tensor: 84 | # Estimate the fundamental frequency using a pitch detection method 85 | pitch = torchaudio.functional.detect_pitch_frequency(audio, sample_rate) 86 | 87 | return pitch 88 | 89 | def detect_fundamental_mean(self, audio: torch.Tensor, sample_rate: int) -> int: 90 | # Estimate the fundamental frequency using a pitch detection method 91 | pitch = torchaudio.functional.detect_pitch_frequency(audio, sample_rate) 92 | 93 | return int(pitch.mean().item()) 94 | 95 | def enhance_harmonics( 96 | self, 97 | audio: torch.Tensor, 98 | sample_rate: int, 99 | harmonics: List[int] = [1, 3, 5, 7, 9, 11], 100 | gain_db: float = 5, 101 | base_frequency: float = 0, 102 | Q: float = 0.707, 103 | ) -> torch.Tensor: 104 | # Detect the base frequency 105 | if base_frequency == 0: 106 | base_frequency = self.detect_fundamental_mean(audio, sample_rate) 107 | if base_frequency <= 0: # Fallback if pitch detection fails 108 | base_frequency = 440 # Use a default base frequency 109 | 110 | # Apply EQ boosts to specific harmonic frequencies 111 | for harmonic in harmonics: 112 | freq = base_frequency * harmonic 113 | if freq < sample_rate / 2: # Ensure it's within the Nyquist frequency 114 | audio = torchaudio.functional.equalizer_biquad( 115 | audio, sample_rate, center_freq=freq, gain=gain_db, Q=Q 116 | ) 117 | 118 | return audio 119 | -------------------------------------------------------------------------------- /processors/SignalProcessingLimiter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Classic Limiter CUDA Optimized 10 | 11 | Reference: 12 | https://ccrma.stanford.edu/~jatin/ComplexNonlinearities/Hysteresis.html 13 | https://viennatalk.mdw.ac.at/papers/Pap_01_79_Tronchin.pdf 14 | https://jatinchowdhury18.medium.com/complex-nonlinearities-episode-3-hysteresis-fdeb2cd3e3f6 15 | https://ccrma.stanford.edu/~dtyeh/papers/yeh07_dafx_clipode.pdf 16 | """ 17 | import torch 18 | from typing import Dict, Any, Tuple, Union 19 | 20 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d 21 | from ..core.limiting import limiter, limiter_get_modes 22 | from ..core.loudness import get_loudness, lufs_normalization 23 | 24 | 25 | class SignalProcessingLimiter: 26 | @classmethod 27 | def INPUT_TYPES(cls) -> Dict[str, Any]: 28 | return { 29 | "required": { 30 | "audio_input": ("AUDIO",), 31 | "mode": (limiter_get_modes(),), 32 | "threshold": ( 33 | "FLOAT", 34 | {"default": 100.0, "min": 0.0, "max": 100.0, "step": 0.1}, 35 | ), 36 | "slope": ( 37 | "FLOAT", 38 | {"default": 100.0, "min": 0.0, "max": 100.0, "step": 0.1}, 39 | ), 40 | "release_ms": ( 41 | "FLOAT", 42 | {"default": 100.0, "min": 0.0, "max": 1000.0, "step": 0.1}, 43 | ), 44 | } 45 | } 46 | 47 | RETURN_TYPES = ("AUDIO",) 48 | RETURN_NAMES = ("audio",) 49 | CATEGORY = "Signal Processing" 50 | FUNCTION = "process" 51 | 52 | def process( 53 | self, 54 | audio_input: Dict[str, Union[torch.Tensor, int]], 55 | mode: str = "downward", 56 | threshold: float = 50.0, # Threshold in percents 57 | slope: float = 100.0, # Slope in percents 58 | release_ms: float = 100.0, # Release time in ms 59 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 60 | waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True) 61 | 62 | loudness = get_loudness(waveform, sample_rate) 63 | 64 | filtered_waveform = limiter( 65 | waveform, 66 | mode=mode, 67 | sample_rate=sample_rate, 68 | threshold=threshold / 100.0, 69 | slope=slope / 100, 70 | release_ms=release_ms, 71 | ) 72 | 73 | filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness) 74 | 75 | return audio_to_comfy_3d(filtered_waveform, sample_rate) 76 | -------------------------------------------------------------------------------- /processors/SignalProcessingLoadAudio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Audio loading node 10 | """ 11 | 12 | import sys 13 | import os 14 | import torch 15 | from typing import Dict, Tuple, Any, Union 16 | 17 | from ..core.io import from_disk_as_dict_3d 18 | import folder_paths 19 | 20 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy")) 21 | 22 | 23 | class SignalProcessingLoadAudio: 24 | supported_formats = ["wav", "mp3", "ogg", "m4a", "flac", "mp4"] 25 | input_dir = os.path.join(folder_paths.get_input_directory(), "samples") 26 | 27 | @classmethod 28 | def INPUT_TYPES(s) -> Dict[str, Any]: 29 | supported_extensions = tuple( 30 | f".{fmt.lower()}" for fmt in SignalProcessingLoadAudio.supported_formats 31 | ) 32 | 33 | files, _ = folder_paths.recursive_search(SignalProcessingLoadAudio.input_dir) 34 | filtered_files = [x for x in files if x.lower().endswith(supported_extensions)] 35 | files = [ 36 | os.path.join(SignalProcessingLoadAudio.input_dir, x) for x in filtered_files 37 | ] 38 | 39 | return { 40 | "required": { 41 | "audio_file": (sorted(files), {"image_upload": True}), 42 | "gain": ( 43 | "FLOAT", 44 | {"default": 1.0, "min": 0.0, "max": 8.0, "step": 0.01}, 45 | ), 46 | }, 47 | } 48 | 49 | RETURN_TYPES = ("AUDIO",) 50 | RETURN_NAMES = ("audio",) 51 | CATEGORY = "Signal Processing" 52 | FUNCTION = "process" 53 | 54 | def process( 55 | self, audio_file: str, gain: float 56 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 57 | return from_disk_as_dict_3d(audio_file=audio_file, gain=gain) 58 | -------------------------------------------------------------------------------- /processors/SignalProcessingLoudness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Loudness node 10 | """ 11 | 12 | import torch 13 | from typing import Tuple, Dict, Any, Union 14 | 15 | from ..core.io import audio_from_comfy_2d 16 | from ..core.loudness import get_loudness 17 | 18 | 19 | class SignalProcessingLoudness: 20 | @classmethod 21 | def INPUT_TYPES(cls) -> Dict[str, Any]: 22 | return { 23 | "required": { 24 | "audio_input": ("AUDIO",), 25 | }, 26 | } 27 | 28 | RETURN_TYPES = ("FLOAT",) 29 | RETURN_NAMES = ("loudness",) 30 | CATEGORY = "Signal Processing" 31 | FUNCTION = "process" 32 | 33 | def process(self, audio_input: Dict[str, Union[torch.Tensor, int]]) -> Tuple[float]: 34 | waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True) 35 | 36 | loudness: float = get_loudness(waveform, sample_rate) 37 | 38 | return (loudness,) 39 | -------------------------------------------------------------------------------- /processors/SignalProcessingMixdown.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Mixdown node for pad synths 10 | """ 11 | 12 | import torch 13 | 14 | from typing import Tuple, List, Dict, Union, Any 15 | import torchaudio 16 | 17 | from ..core.io import audio_to_comfy_3d 18 | from ..core.loudness import lufs_normalization 19 | 20 | 21 | class SignalProcessingMixdown: 22 | @classmethod 23 | def INPUT_TYPES(cls) -> Dict[str, Any]: 24 | return { 25 | "required": { 26 | "audio_inputs": ("AUDIO_LIST", {"default": []}), 27 | }, 28 | "optional": { 29 | "gain_factors": ( 30 | "FLOAT_LIST", 31 | {"default": [], "min": 0.0, "max": 2.0, "step": 0.1}, 32 | ), 33 | # If empty, default to [1.0] * num_audios 34 | }, 35 | } 36 | 37 | RETURN_TYPES = ("AUDIO",) 38 | RETURN_NAMES = ("mixed_audio",) 39 | CATEGORY = "Signal Processing" 40 | FUNCTION = "process" 41 | 42 | def process( 43 | self, 44 | audio_inputs: List[Dict[str, Union[torch.Tensor, int]]], 45 | gain_factors: List[float] = [], 46 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 47 | """ 48 | Mix down multiple audio inputs into a single audio output 49 | with optional individual volume controls. 50 | 51 | Parameters: 52 | audio_inputs (List[Dict]): List of audio inputs, 53 | each containing 'waveform' and 'sample_rate'. 54 | output_normalization (float): Normalization factor for the mixed audio (0.0 to 1.0). 55 | gain_factors (List[float], optional): List of gain factors for each audio input. 56 | 57 | Returns: 58 | Tuple[Dict[str, torch.Tensor], int]: Mixed audio with waveform and sample rate. 59 | """ 60 | 61 | if not audio_inputs: 62 | raise ValueError("No audio inputs provided for mixing.") 63 | 64 | num_audios = len(audio_inputs) 65 | 66 | # Handle gain_factors 67 | if not gain_factors: 68 | gain_factors = [1.0] * num_audios 69 | elif len(gain_factors) != num_audios: 70 | raise ValueError( 71 | f"Number of gain factors ({len(gain_factors)}) \ 72 | does not match number of audio inputs ({num_audios})." 73 | ) 74 | 75 | # Extract sample rates and verify consistency 76 | sample_rates: List[int] = [audio["sample_rate"] for audio in audio_inputs] 77 | target_sample_rate = sample_rates[0] 78 | 79 | for idx, sr in enumerate(sample_rates): 80 | if sr != target_sample_rate: 81 | resampler = torchaudio.transforms.Resample( 82 | orig_freq=sr, new_freq=target_sample_rate 83 | ) 84 | 85 | _waveform: torch.Tensor = audio_inputs[idx]["waveform"] 86 | resampler.to( 87 | device=_waveform.device, 88 | dtype=_waveform.dtype, 89 | ) 90 | audio_inputs[idx]["waveform"] = resampler(_waveform) 91 | audio_inputs[idx]["sample_rate"] = target_sample_rate 92 | 93 | # Determine the maximum length among all audio inputs 94 | lengthsw: List[torch.Tensor] = [audio["waveform"] for audio in audio_inputs] 95 | lengths: List[int] = [wave.shape[-1] for wave in lengthsw] 96 | max_length = max(lengths) 97 | 98 | # Pad or truncate each audio to match the maximum length and apply gain 99 | for idx, audio in enumerate(audio_inputs): 100 | waveform: torch.Tensor = audio["waveform"] 101 | current_length = waveform.shape[-1] 102 | gain = gain_factors[idx] 103 | 104 | if current_length < max_length: 105 | padding = max_length - current_length 106 | # Pad with zeros (silence) at the end 107 | waveform = torch.nn.functional.pad(waveform, (0, padding)) 108 | elif current_length > max_length: 109 | # Truncate the waveform to max_length 110 | waveform = waveform[:, :, :max_length] 111 | 112 | # Apply gain 113 | waveform = waveform * gain 114 | 115 | audio["waveform"] = waveform 116 | 117 | # Sum all waveforms to create the mix 118 | mixed_waveform: torch.Tensor = torch.zeros_like(audio_inputs[0]["waveform"]) 119 | for idx, audio in enumerate(audio_inputs): 120 | mixed_waveform += audio["waveform"] 121 | 122 | mixed_waveform = lufs_normalization(mixed_waveform, target_sample_rate) 123 | 124 | return audio_to_comfy_3d(mixed_waveform, target_sample_rate) 125 | -------------------------------------------------------------------------------- /processors/SignalProcessingNormalizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Varios normalizatin techqniues node 10 | """ 11 | 12 | import torch 13 | from typing import Dict, Any, Tuple, Union 14 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d 15 | from ..core.loudness import ( 16 | rms_normalization, 17 | lufs_normalization, 18 | peak_normalization, 19 | automatic_gain_control, 20 | ) 21 | 22 | 23 | class SignalProcessingNormalizer: 24 | @classmethod 25 | def INPUT_TYPES(cls) -> Dict[str, Any]: 26 | return { 27 | "required": { 28 | "audio_input": ("AUDIO",), 29 | "mode": (["lufs", "rms", "peak", "auto"],), 30 | "target_rms": ( 31 | "FLOAT", 32 | {"default": 0.1, "min": 0, "max": 10.0, "step": 0.1}, 33 | ), 34 | "target_lufs_db": ( 35 | "FLOAT", 36 | {"default": -14.0, "min": -100, "max": 100.0, "step": 0.1}, 37 | ), 38 | "target_peak": ( 39 | "FLOAT", 40 | {"default": 0.9, "min": 0.0, "max": 1.0, "step": 0.1}, 41 | ), 42 | "target_auto": ( 43 | "FLOAT", 44 | {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.1}, 45 | ), 46 | "target_auto_alpha": ( 47 | "FLOAT", 48 | {"default": 0.1, "min": 0.0, "max": 10.0, "step": 0.1}, 49 | ), 50 | }, 51 | } 52 | 53 | RETURN_TYPES = ("AUDIO",) 54 | RETURN_NAMES = ("processed_audio",) 55 | CATEGORY = "Signal Processing" 56 | FUNCTION = "process" 57 | 58 | def process( 59 | self, 60 | audio_input: Dict[str, Union[torch.Tensor, int]], 61 | mode: str, 62 | target_rms: float, 63 | target_lufs_db: float, 64 | target_peak: float, 65 | target_auto: float, 66 | target_auto_alpha: float, 67 | ) -> Tuple[Dict[str, torch.Tensor]]: 68 | 69 | waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True) 70 | 71 | if mode == "rms": 72 | processed_waveform = rms_normalization(waveform, target_rms) 73 | elif mode == "lufs": 74 | processed_waveform = lufs_normalization( 75 | waveform, sample_rate, target_lufs_db 76 | ) 77 | elif mode == "peak": 78 | processed_waveform = peak_normalization(waveform, target_peak) 79 | elif mode == "auto": 80 | processed_waveform = automatic_gain_control( 81 | waveform, target_auto, target_auto_alpha 82 | ) 83 | else: 84 | processed_waveform = waveform 85 | 86 | return audio_to_comfy_3d(processed_waveform, sample_rate) 87 | -------------------------------------------------------------------------------- /processors/SignalProcessingSaturation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Classic Audio filter set 10 | 11 | Reference: 12 | https://ccrma.stanford.edu/~jatin/ComplexNonlinearities/Hysteresis.html 13 | https://viennatalk.mdw.ac.at/papers/Pap_01_79_Tronchin.pdf 14 | https://jatinchowdhury18.medium.com/complex-nonlinearities-episode-3-hysteresis-fdeb2cd3e3f6 15 | https://ccrma.stanford.edu/~dtyeh/papers/yeh07_dafx_clipode.pdf 16 | """ 17 | 18 | import torch 19 | from typing import Dict, Any, Tuple, Union 20 | 21 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d 22 | from ..core.saturation import saturator, saturator_get_modes 23 | from ..core.loudness import get_loudness, lufs_normalization 24 | 25 | 26 | class SignalProcessingSaturation: 27 | @classmethod 28 | def INPUT_TYPES(cls) -> Dict[str, Any]: 29 | return { 30 | "required": { 31 | "audio_input": ("AUDIO",), 32 | "mode": (saturator_get_modes(),), 33 | "drive": ( 34 | "FLOAT", 35 | {"default": 50.0, "min": 0.0, "max": 200.0, "step": 0.1}, 36 | ), 37 | } 38 | } 39 | 40 | RETURN_TYPES = ("AUDIO",) 41 | RETURN_NAMES = ("audio",) 42 | CATEGORY = "Signal Processing" 43 | FUNCTION = "process" 44 | 45 | def process( 46 | self, 47 | audio_input: Dict[str, Union[torch.Tensor, int]], 48 | mode: str = "poly", 49 | drive: float = 50.0, 50 | ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]: 51 | waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True) 52 | 53 | loudness = get_loudness(waveform, sample_rate) 54 | 55 | filtered_waveform = saturator( 56 | waveform, mode=mode, sample_rate=sample_rate, drive=drive 57 | ) 58 | 59 | filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness) 60 | 61 | return audio_to_comfy_3d(filtered_waveform, sample_rate) 62 | -------------------------------------------------------------------------------- /processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/processors/__init__.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ComfyUI_SignalProcessing" 3 | description = "Audio processing nodes for comfyui." 4 | version = "0.0.2" 5 | license = { file = "LICENSE-GPL-V3" } 6 | 7 | [project.urls] 8 | Repository = "https://github.com/c0ffymachyne/ComfyUI_SignalProcessing" 9 | 10 | [tool.comfy] 11 | PublisherId = "c0ffymachyne" 12 | DisplayName = "ComfyUI_SignalProcessing" 13 | Icon = "images/icon.jpg" 14 | Models = [] 15 | 16 | [project.dependencies] 17 | torch = [ 18 | "--index-url", 19 | "https://download.pytorch.org/whl/cu118", 20 | "torch==2.4.1", 21 | "torchaudio==2.4.1", 22 | "torchvision==0.19.1" 23 | ] 24 | other = [ 25 | "numpy>=1.23.0", 26 | "scipy>=1.5.0", 27 | "pyfar", 28 | "scipy", 29 | "pyloudnorm", 30 | "cupy-cuda11x" 31 | ] 32 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | inference: mark as inference test (deselect with '-m "not inference"') 4 | execution: mark as execution test (deselect with '-m "not execution"') 5 | testpaths = 6 | tests 7 | tests-unit 8 | addopts = -s 9 | pythonpath = . 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | pyfar 3 | numpy 4 | torch 5 | torchaudio 6 | pyloudnorm 7 | cupy-cuda11x 8 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from typing import Dict, Union 3 | from pathlib import Path 4 | import torch 5 | from ..core.io import from_disk_as_raw_3d 6 | 7 | # Test data roots 8 | DATA_ROOT = Path("ComfyUI_SignalProcessing/audio") 9 | INPUT_FILE = DATA_ROOT / "inputs/002-orig.mp4" 10 | INPUT_FILE = DATA_ROOT / "inputs/pf-01.mp3" 11 | INPUT_IR_FILE = DATA_ROOT / "inputs/ir.wav" 12 | INPUTS_ROOT = DATA_ROOT / "inputs" 13 | INPUT_FILES = { 14 | file.name: file.resolve() for file in INPUTS_ROOT.rglob("*") if file.is_file() 15 | } 16 | 17 | TestData = Dict[str, Union[Dict[str, Union[torch.Tensor, int]], Path]] 18 | 19 | 20 | @pytest.fixture 21 | def test_data(request) -> TestData: 22 | 23 | test_name = request.node.name # Automatically get the current test function name 24 | test_name = str(request.node.function.__name__) 25 | 26 | OUTPUT_ROOT = DATA_ROOT / f"outputs/{test_name}" 27 | INPUTS_ROOT = DATA_ROOT / "inputs" 28 | 29 | param_values = ( 30 | request.node.callspec.params if hasattr(request.node, "callspec") else {} 31 | ) 32 | pest_param_str = "_".join(f"{key}-{value}" for key, value in param_values.items()) 33 | 34 | audio_slice_begin_seconds: float = 60.0 35 | audio_slice_duration_seconds: float = 120.0 36 | 37 | # Prepare audio data 38 | audio, sample_rate = from_disk_as_raw_3d( 39 | str(INPUT_FILE.absolute()), 40 | try_gpu=True, 41 | start_seconds=audio_slice_begin_seconds, 42 | duration_seconds=audio_slice_duration_seconds, 43 | ) 44 | OUTPUT_ROOT.mkdir(parents=True, exist_ok=True) # Ensure the output directory exists 45 | 46 | audio_to_comfy: Dict[torch.Tensor, int] = { 47 | "waveform": audio, 48 | "sample_rate": sample_rate, 49 | } 50 | 51 | return { 52 | "audio": audio_to_comfy, 53 | "output_root": OUTPUT_ROOT, 54 | "inputs_root": INPUTS_ROOT, 55 | "test_name": test_name, 56 | "pest_param_str": pest_param_str, 57 | } 58 | -------------------------------------------------------------------------------- /tests/test_baxandall.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..tests.conftest import TestData 3 | from typing import Tuple 4 | from ..core.io import audio_from_comfy_3d_to_disk 5 | from ..processors.SignalProcessingBaxandallEQ import ( 6 | SignalProcessingBaxandallEQ, 7 | SignalProcessingBaxandall3BandEQ, 8 | ) 9 | 10 | 11 | TEST_NAME = "baxandall" 12 | 13 | params: list[Tuple[float, float]] = [ 14 | (9.0, 0.0), 15 | (6.0, 0.0), 16 | (3.0, 0.0), 17 | (1.0, 0.0), 18 | (0.0, 9.0), 19 | (0.0, 6.0), 20 | (0.0, 3.0), 21 | (0.0, 1.0), 22 | (0.0, 0.0), 23 | ] 24 | 25 | 26 | @pytest.mark.parametrize( 27 | "bass_gain_db, treble_gain_db", 28 | params, 29 | ) 30 | def test_baxandalleq_general( 31 | test_data: TestData, bass_gain_db: float, treble_gain_db: float 32 | ) -> None: 33 | 34 | node = SignalProcessingBaxandallEQ() 35 | 36 | output = node.process( 37 | audio_input=test_data["audio"], 38 | bass_gain_db=bass_gain_db, 39 | treble_gain_db=treble_gain_db, 40 | )[0] 41 | 42 | pest_param_str = test_data["pest_param_str"] 43 | output_filepath = test_data["output_root"] / f"{pest_param_str}.wav" 44 | audio_from_comfy_3d_to_disk(output, output_filepath) 45 | 46 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 47 | assert output is not None, "Processed audio output is None." 48 | 49 | print(f"test_baxandalleq_general {test_data['output_root']}") 50 | 51 | 52 | @pytest.mark.parametrize( 53 | "bass_gain_db,mid_gain_db,treble_gain_db,low_freq,mid_freq,high_freq,mid_q", 54 | [ 55 | (9.0, 9.0, 9.0, 100.0, 1000.0, 10000.0, 0.707), 56 | (6.0, 6.0, 6.0, 100.0, 1000.0, 10000.0, 0.707), 57 | (3.0, 3.0, 3.0, 100.0, 1000.0, 10000.0, 0.707), 58 | (1.0, 1.0, 1.0, 100.0, 1000.0, 10000.0, 0.707), 59 | (0.0, 0.0, 0.0, 100.0, 1000.0, 10000.0, 0.707), 60 | ], 61 | ) 62 | def test_baxandalleq3band_general( 63 | test_data: TestData, 64 | bass_gain_db: float, 65 | mid_gain_db: float, 66 | treble_gain_db: float, 67 | low_freq: float, 68 | mid_freq: float, 69 | high_freq: float, 70 | mid_q: float, 71 | ) -> None: 72 | 73 | node = SignalProcessingBaxandall3BandEQ() 74 | 75 | output = node.process( 76 | audio_input=test_data["audio"], 77 | bass_gain_db=bass_gain_db, 78 | mid_gain_db=mid_gain_db, 79 | treble_gain_db=treble_gain_db, 80 | low_freq=low_freq, 81 | mid_freq=mid_freq, 82 | high_freq=high_freq, 83 | mid_q=mid_q, 84 | )[0] 85 | 86 | pest_param_str = test_data["pest_param_str"] 87 | output_filepath = test_data["output_root"] / f"{pest_param_str}.wav" 88 | audio_from_comfy_3d_to_disk(output, output_filepath) 89 | 90 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 91 | assert output is not None, "Processed audio output is None." 92 | 93 | print(f"test_baxandalleq3band_general {test_data['output_root']}") 94 | -------------------------------------------------------------------------------- /tests/test_compressor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..tests.conftest import TestData 3 | from typing import Tuple 4 | from ..core.io import audio_from_comfy_3d_to_disk 5 | from ..processors.SignalProcessingCompressor import SignalProcessingCompressor 6 | 7 | TEST_NAME = "compression" 8 | 9 | params: list[Tuple[float, float, float, float]] = [ 10 | (0.9, 0.1, 60.0, 0.3), 11 | (0.3, 0.1, 60.0, 0.3), 12 | (0.1, 0.1, 60.0, 0.3), 13 | (-0.0, 0.1, 60.0, 0.3), 14 | (-0.1, 0.1, 60.0, 0.3), 15 | (-0.3, 0.1, 60.0, 0.3), 16 | (-0.9, 0.1, 60.0, 0.3), 17 | ] 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "comp, attack, release, filter_param", 22 | params, 23 | ) 24 | def test_compressor_general( 25 | test_data: TestData, comp: float, attack: float, release: float, filter_param: float 26 | ) -> None: 27 | """ 28 | Test SignalProcessingCompressor with various parameter configurations. 29 | """ 30 | node = SignalProcessingCompressor() 31 | 32 | # Process input audio 33 | output = node.process( 34 | audio_input=test_data["audio"], 35 | comp=comp, 36 | attack=attack, 37 | release=release, 38 | filter_param=filter_param, 39 | )[0] 40 | 41 | # Save the output audio 42 | pest_param_str = test_data["pest_param_str"] 43 | output_filepath = test_data["output_root"] / f"{pest_param_str}.wav" 44 | audio_from_comfy_3d_to_disk(output, output_filepath) 45 | 46 | # Assertions 47 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 48 | assert output is not None, "Processed audio output is None." 49 | 50 | print(f"test_compressor_general {test_data['output_root']}") 51 | -------------------------------------------------------------------------------- /tests/test_convolution_reverb.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .conftest import TestData 3 | from ..core.io import audio_from_comfy_3d_to_disk 4 | from ..effects.SignalProcessingConvolutionReverb import ( 5 | SignalProcessingConvolutionReverb, 6 | ) 7 | 8 | TEST_NAME = "convolution_reverb" 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "impulse_response, wet_dry", 13 | [("ir.wav", 1.0), ("ir.wav", 0.6), ("ir.wav", 0.3), ("ir.wav", 0.0)], 14 | ) 15 | def test_convolution_reverb_general( 16 | test_data: TestData, impulse_response: str, wet_dry: float 17 | ) -> None: 18 | SignalProcessingConvolutionReverb.ir_directory = test_data["inputs_root"] / "ir" 19 | node = SignalProcessingConvolutionReverb() 20 | 21 | node.INPUT_TYPES() 22 | 23 | output = node.process( 24 | impulse_response=impulse_response, 25 | audio_input=test_data["audio"], 26 | wet_dry=wet_dry, 27 | )[0] 28 | 29 | pest_param_str = test_data["pest_param_str"] 30 | output_filepath = test_data["output_root"] / f"{pest_param_str}.wav" 31 | audio_from_comfy_3d_to_disk(output, output_filepath) 32 | 33 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 34 | assert output is not None, "Processed audio output is None." 35 | 36 | print(f"test_convolution_reverb_general {test_data['output_root']}") 37 | -------------------------------------------------------------------------------- /tests/test_filter.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..tests.conftest import TestData 3 | from ..core.io import audio_from_comfy_3d_to_disk 4 | from ..processors.SignalProcessingFilter import ( 5 | SignalProcessingFilter, 6 | ) 7 | 8 | TEST_NAME = "filter" 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "cutoff, q_factor", 13 | [ 14 | (0.9, 0.707), 15 | (0.6, 0.707), 16 | (0.3, 0.707), 17 | (0.1, 0.707), 18 | (0.0, 0.707), 19 | ], 20 | ) 21 | def test_filter_general(test_data: TestData, cutoff: float, q_factor: float) -> None: 22 | 23 | node: SignalProcessingFilter = SignalProcessingFilter() 24 | modes: str = node.INPUT_TYPES()["required"]["filter_type"][0] # Extract modes 25 | 26 | for mode in modes: 27 | output = node.process( 28 | audio_input=test_data["audio"], 29 | cutoff=cutoff, 30 | filter_type=mode, 31 | q_factor=q_factor, 32 | )[0] 33 | 34 | pest_param_str = test_data["pest_param_str"] 35 | output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav" 36 | audio_from_comfy_3d_to_disk(output, output_filepath) 37 | 38 | assert ( 39 | output_filepath.exists() 40 | ), f"Output file {output_filepath} was not created." 41 | assert output is not None, f"Processed audio output is None for mode {mode}." 42 | 43 | print(f"test_filter_general {test_data['output_root']}") 44 | -------------------------------------------------------------------------------- /tests/test_harmonics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..tests.conftest import TestData 3 | from typing import Tuple 4 | from ..core.io import audio_from_comfy_3d_to_disk 5 | from ..processors.SignalProcessingHarmonicsEnhancer import ( 6 | SignalProcessingHarmonicsEnhancer, 7 | ) 8 | 9 | TEST_NAME = "harmonics" 10 | 11 | params: list[Tuple[str, int, float, float]] = [ 12 | ("2,3", 440, 6.0, 0.707), 13 | ("2,3", 440, 3.0, 0.707), 14 | ("2,3", 440, 1.0, 0.707), 15 | ("2,3", 440, 0.0, 0.707), 16 | ] 17 | 18 | 19 | @pytest.mark.parametrize( 20 | "harmonics, base_frequency, gain_db, Q", 21 | params, 22 | ) 23 | def test_harmonics_general( 24 | test_data: TestData, harmonics: str, base_frequency: int, gain_db: int, Q: float 25 | ) -> None: 26 | 27 | node = SignalProcessingHarmonicsEnhancer() 28 | modes: str = node.INPUT_TYPES()["required"]["mode"][0] # Extract modes 29 | 30 | for mode in modes: 31 | output = node.process( 32 | audio_input=test_data["audio"], 33 | harmonics=harmonics, 34 | mode=mode, 35 | base_frequency=base_frequency, 36 | gain_db=gain_db, 37 | Q=Q, 38 | )[0] 39 | 40 | pest_param_str = test_data["pest_param_str"] 41 | output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav" 42 | audio_from_comfy_3d_to_disk(output, output_filepath) 43 | 44 | assert ( 45 | output_filepath.exists() 46 | ), f"Output file {output_filepath} was not created." 47 | assert output is not None, f"Processed audio output is None for mode {mode}." 48 | 49 | print(f"test_harmonics_general {test_data['output_root']}") 50 | -------------------------------------------------------------------------------- /tests/test_limiting.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..tests.conftest import TestData 3 | from typing import Tuple 4 | from ..core.io import audio_from_comfy_3d_to_disk 5 | from ..processors.SignalProcessingLimiter import SignalProcessingLimiter 6 | 7 | TEST_NAME = "limiting" 8 | 9 | params: list[Tuple[float, float, float]] = [ 10 | (0.0, 100.0, 600.0), 11 | (10.0, 100.0, 600.0), 12 | (30.0, 100.0, 600.0), 13 | (90.0, 100.0, 600.0), 14 | ] 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "threshold, slope, release_ms", 19 | params, 20 | ) 21 | def test_limiting_general( 22 | test_data: TestData, threshold: float, slope: float, release_ms: float 23 | ) -> None: 24 | 25 | node = SignalProcessingLimiter() 26 | modes = node.INPUT_TYPES()["required"]["mode"][0] # Extract modes 27 | 28 | for mode in modes: 29 | 30 | output = node.process( 31 | audio_input=test_data["audio"], 32 | mode=mode, 33 | threshold=threshold, 34 | slope=slope, 35 | release_ms=release_ms, 36 | )[0] 37 | 38 | pest_param_str = test_data["pest_param_str"] 39 | output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav" 40 | audio_from_comfy_3d_to_disk(output, output_filepath) 41 | 42 | assert ( 43 | output_filepath.exists() 44 | ), f"Output file {output_filepath} was not created." 45 | assert output is not None, f"Processed audio output is None for mode {mode}." 46 | 47 | print(f"test_limiting_general {test_data['output_root']}") 48 | -------------------------------------------------------------------------------- /tests/test_normalizer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .conftest import TestData 3 | from ..core.io import audio_from_comfy_3d_to_disk 4 | from ..processors.SignalProcessingNormalizer import SignalProcessingNormalizer 5 | 6 | TEST_NAME = "normalizer" 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "target_rms, target_lufs_db, target_peak, target_auto, target_auto_alpha", 11 | [ 12 | (-0.0, -14.0, -0.0, -0.0, -0.0), 13 | (0.3, -14.0, 0.3, 0.3, 0.3), 14 | (0.6, -14.0, 0.6, 0.6, 0.6), 15 | (0.9, -14.0, 0.9, 0.9, 0.9), 16 | ], 17 | ) 18 | def test_normalizer_general( 19 | test_data: TestData, 20 | target_rms: float, 21 | target_lufs_db: float, 22 | target_peak: float, 23 | target_auto: float, 24 | target_auto_alpha: float, 25 | ) -> None: 26 | 27 | node = SignalProcessingNormalizer() 28 | modes = node.INPUT_TYPES()["required"]["mode"][0] 29 | 30 | for mode in modes: 31 | 32 | output = node.process( 33 | audio_input=test_data["audio"], 34 | mode=mode, 35 | target_rms=target_rms, 36 | target_lufs_db=target_lufs_db, 37 | target_peak=target_peak, 38 | target_auto=target_auto, 39 | target_auto_alpha=target_auto_alpha, 40 | )[0] 41 | 42 | pest_param_str = test_data["pest_param_str"] 43 | output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav" 44 | audio_from_comfy_3d_to_disk(output, output_filepath) 45 | 46 | assert ( 47 | output_filepath.exists() 48 | ), f"Output file {output_filepath} was not created." 49 | assert output is not None, f"Processed audio output is None for mode {mode}." 50 | 51 | print(f"test_normalizer_general {test_data['output_root']}") 52 | -------------------------------------------------------------------------------- /tests/test_padsynthchoir.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from ..tests.conftest import TestData 3 | from ..generators.SignalProcessingPadSynthChoir import SignalProcessingPadSynthChoir 4 | 5 | TEST_NAME = "synth_and_mixdown" 6 | OUTPUT_ROOT = Path(f"ComfyUI_SignalProcessing/audio/outputs/{TEST_NAME}") 7 | 8 | 9 | def test_synth_and_mixdown(test_data: TestData) -> None: 10 | 11 | OUTPUT_ROOT.mkdir(parents=True, exist_ok=True) 12 | 13 | samplerate = 44100 14 | base_freq = 440.0 15 | step_size = 4 16 | num_notes = 5 17 | bandwidth_cents = 60.0 18 | number_harmonics = 32 19 | 20 | synth_node = SignalProcessingPadSynthChoir() 21 | synth_output, sample_rate = synth_node.process( 22 | samplerate=samplerate, 23 | base_freq=base_freq, 24 | step_size=step_size, 25 | num_notes=num_notes, 26 | bandwidth_cents=bandwidth_cents, 27 | number_harmonics=number_harmonics, 28 | ) 29 | 30 | print(f"test_synth_and_mixdown {test_data['output_root']}") 31 | -------------------------------------------------------------------------------- /tests/test_paulstretch.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .conftest import TestData 3 | from ..core.io import audio_from_comfy_3d_to_disk 4 | from ..effects.SignalProcessingPaulStretch import SignalProcessingPaulStretch 5 | 6 | TEST_NAME = "paulstretch" 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "stretch_factor, window_size_seconds", 11 | [ 12 | (9.0, 0.25), 13 | (6.0, 0.25), 14 | (3.0, 0.25), 15 | (1.0, 0.25), 16 | (2.0, 0.25), 17 | (2.0, 0.5), 18 | (2.0, 0.75), 19 | (2.0, 1.0), 20 | (2.0, 3.0), 21 | (2.0, 6.0), 22 | ], 23 | ) 24 | def test_paul_stretch_general( 25 | test_data: TestData, stretch_factor: float, window_size_seconds: float 26 | ) -> None: 27 | 28 | node = SignalProcessingPaulStretch() 29 | output = node.process( 30 | audio_input=test_data["audio"], 31 | stretch_factor=stretch_factor, 32 | window_size_seconds=window_size_seconds, 33 | )[0] 34 | 35 | pest_param_str = test_data["pest_param_str"] 36 | output_filepath = test_data["output_root"] / f"{pest_param_str}.wav" 37 | audio_from_comfy_3d_to_disk(output, output_filepath) 38 | 39 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 40 | assert output is not None, "Processed audio output is None." 41 | 42 | print(f"test_paul_stretch_general {test_data['output_root']}") 43 | -------------------------------------------------------------------------------- /tests/test_pitchshift.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from .conftest import TestData 3 | from ..core.io import audio_from_comfy_3d_to_disk 4 | from ..effects.SignalProcessingPitchShifter import SignalProcessingPitchShifter 5 | 6 | TEST_NAME = "pitchshift" 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "pitch_shift_factor", 11 | [(3), (1), (0), (-1), (-3)], 12 | ) 13 | def test_pitch_shift_general(test_data: TestData, pitch_shift_factor: int) -> None: 14 | 15 | node = SignalProcessingPitchShifter() 16 | 17 | output = node.process( 18 | audio_input=test_data["audio"], pitch_shift_factor=pitch_shift_factor 19 | )[0] 20 | 21 | pest_param_str = test_data["pest_param_str"] 22 | output_filepath = test_data["output_root"] / f"{pest_param_str}.wav" 23 | audio_from_comfy_3d_to_disk(output, output_filepath) 24 | 25 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 26 | assert output is not None, "Processed audio output is None." 27 | 28 | print(f"test_pitch_shift_general {test_data['output_root']}") 29 | -------------------------------------------------------------------------------- /tests/test_plotting.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from torchvision.transforms import ToPILImage 3 | from .conftest import TestData 4 | from ..core.plotting import get_spectogram, get_wave, save_image 5 | from ..visuals.SignalProcessingSpectrogram import SignalProcessingSpectrogram 6 | 7 | TEST_NAME = "plotting" 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "n_fft, n_mels, xlim", 12 | [ 13 | (4096, 128 * 1, 8192), 14 | (4096, 128 * 2, 8192), 15 | (4096, 128 * 4, 8192), 16 | (4096, 128 * 6, 8192), 17 | ], 18 | ) 19 | def test_plotting_spectogram_general( 20 | test_data: TestData, n_fft: int, n_mels: int, xlim: int 21 | ) -> None: 22 | 23 | waveform = test_data["audio"]["waveform"].squeeze(0) 24 | sample_rate = test_data["audio"]["sample_rate"] 25 | 26 | print("waveform", waveform.shape) 27 | 28 | spectogram = get_spectogram( 29 | waveform, sample_rate=sample_rate, n_fft=n_fft, n_mels=n_mels, xlim=xlim 30 | ) 31 | 32 | pest_param_str = test_data["pest_param_str"] 33 | output_filepath = test_data["output_root"] / f"{pest_param_str}.png" 34 | 35 | save_image(output_filepath, spectogram) 36 | 37 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 38 | assert output_filepath is not None, "Processed audio output is None." 39 | 40 | print(f"test_plotting_spectogram_general {test_data['output_root']}") 41 | 42 | 43 | @pytest.mark.parametrize( 44 | "stretch_factor, window_size_seconds", 45 | [(8.0, 0.25)], 46 | ) 47 | def test_plotting_waveform_general( 48 | test_data: TestData, stretch_factor: float, window_size_seconds: float 49 | ) -> None: 50 | 51 | waveform = test_data["audio"]["waveform"].squeeze(0) 52 | sample_rate = test_data["audio"]["sample_rate"] 53 | 54 | print("waveform", waveform.shape) 55 | 56 | spectogram = get_wave(waveform, sample_rate=sample_rate, xlim=4096) 57 | 58 | pest_param_str = test_data["pest_param_str"] 59 | output_filepath = test_data["output_root"] / f"{pest_param_str}.png" 60 | 61 | save_image(output_filepath, spectogram) 62 | 63 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 64 | assert output_filepath is not None, "Processed audio output is None." 65 | 66 | print(f"test_plotting_spectogram_general {test_data['output_root']}") 67 | 68 | 69 | @pytest.mark.parametrize( 70 | "stretch_factor, window_size_seconds", 71 | [(8.0, 0.25)], 72 | ) 73 | def test_plotting_waveform_node_general( 74 | test_data: TestData, stretch_factor: float, window_size_seconds: float 75 | ) -> None: 76 | 77 | node = SignalProcessingSpectrogram() 78 | 79 | output = node.process(audio_input=test_data["audio"])[0] 80 | 81 | pest_param_str = test_data["pest_param_str"] 82 | output_filepath = test_data["output_root"] / f"{pest_param_str}.png" 83 | 84 | to_pil = ToPILImage() 85 | print("rgb_image[0] -----------------------------", output[0].shape) 86 | rgb_image = output[0][..., :3] 87 | rgb_image = rgb_image.permute(2, 0, 1) 88 | print("rgb_image -----------------------------", rgb_image.shape) 89 | spectogram = to_pil(rgb_image) 90 | print("spectogram -----------------------------", spectogram) 91 | save_image(output_filepath, spectogram) 92 | 93 | assert output_filepath.exists(), f"Output file {output_filepath} was not created." 94 | assert output is not None, "Processed audio output is None." 95 | 96 | print(f"test_pitch_shift_general {test_data['output_root']}") 97 | -------------------------------------------------------------------------------- /tests/test_saturation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..tests.conftest import TestData 3 | from ..core.io import audio_from_comfy_3d_to_disk 4 | from ..processors.SignalProcessingSaturation import SignalProcessingSaturation 5 | 6 | TEST_NAME = "saturation" 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "drive", 11 | [ 12 | (90.0), 13 | (60.0), 14 | (30.0), 15 | (10.0), 16 | (0.0), 17 | ], 18 | ) 19 | def test_saturation_general(test_data: TestData, drive: float) -> None: 20 | 21 | node = SignalProcessingSaturation() 22 | modes = node.INPUT_TYPES()["required"]["mode"][0] # Extract modes 23 | 24 | for mode in modes: 25 | 26 | output = node.process(audio_input=test_data["audio"], mode=mode, drive=drive)[0] 27 | 28 | pest_param_str = test_data["pest_param_str"] 29 | output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav" 30 | audio_from_comfy_3d_to_disk(output, output_filepath) 31 | 32 | assert ( 33 | output_filepath.exists() 34 | ), f"Output file {output_filepath} was not created." 35 | assert output is not None, f"Processed audio output is None for mode {mode}." 36 | 37 | print(f"test_saturation_general {test_data['output_root']}") 38 | -------------------------------------------------------------------------------- /tests/test_widening.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ..tests.conftest import TestData 3 | from ..core.io import audio_from_comfy_3d_to_disk 4 | from ..effects.SignalProcessingStereoWidening import SignalProcessingStereoWidening 5 | 6 | TEST_NAME = "widening" 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "width", 11 | [ 12 | (3.0), 13 | (2.0), 14 | (1.0), 15 | (0.5), 16 | (0.25), 17 | (0.0), 18 | ], 19 | ) 20 | def test_widening_general(test_data: TestData, width: float) -> None: 21 | 22 | node = SignalProcessingStereoWidening() 23 | modes = node.INPUT_TYPES()["required"]["mode"][0] 24 | 25 | for mode in modes: 26 | 27 | output = node.process(audio_input=test_data["audio"], mode=mode, width=width)[0] 28 | 29 | pest_param_str = test_data["pest_param_str"] 30 | output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav" 31 | audio_from_comfy_3d_to_disk(output, output_filepath) 32 | 33 | assert ( 34 | output_filepath.exists() 35 | ), f"Output file {output_filepath} was not created." 36 | assert output is not None, f"Processed audio output is None for mode {mode}." 37 | 38 | print(f"test_widening_general {test_data['output_root']}") 39 | -------------------------------------------------------------------------------- /visuals/SignalProcessingSpectrogram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Spectogram image node 10 | """ 11 | 12 | import torch 13 | 14 | from PIL import Image 15 | 16 | from typing import Dict, Tuple, Any, Type, List 17 | 18 | import numpy as np 19 | import torchaudio 20 | 21 | import matplotlib.pyplot as plt 22 | 23 | 24 | class SignalProcessingSpectrogram: 25 | @classmethod 26 | def INPUT_TYPES(cls: Type["SignalProcessingSpectrogram"]) -> Dict[str, Any]: 27 | cmaps: List[str] = ["viridis", "plasma", "inferno", "magma", "cividis"] 28 | return { 29 | "required": { 30 | "audio_input": ("AUDIO",), 31 | "color_map": (cmaps,), 32 | }, 33 | "optional": { 34 | "n_fft": ( 35 | "INT", 36 | {"default": 4096, "min": 512, "max": 8192, "step": 256}, 37 | ), 38 | "hop_length": ( 39 | "INT", 40 | {"default": 128, "min": 64, "max": 4096, "step": 128}, 41 | ), 42 | "n_mels": ("INT", {"default": 512, "min": 32, "max": 2048, "step": 32}), 43 | "top_db": ( 44 | "FLOAT", 45 | {"default": 80.0, "min": 10.0, "max": 100.0, "step": 5.0}, 46 | ), 47 | }, 48 | } 49 | 50 | RETURN_TYPES = ("IMAGE",) 51 | RETURN_NAMES = ("spectrogram_image",) 52 | CATEGORY = "Signal Processing" 53 | FUNCTION = "process" 54 | 55 | def process( 56 | self, 57 | audio_input: Dict[str, torch.Tensor], 58 | color_map: str = "viridis", 59 | n_fft: int = 2048, 60 | hop_length: int = 512, 61 | n_mels: int = 128, 62 | top_db: float = 80.0, 63 | ) -> Tuple[torch.Tensor]: 64 | waveform = audio_input.get("waveform") 65 | sample_rate = audio_input.get("sample_rate") 66 | 67 | # Validate that waveform and sample_rate are not None 68 | if waveform is None: 69 | raise ValueError("The 'waveform' key is missing or None in 'audio_input'.") 70 | if not isinstance(waveform, torch.Tensor): 71 | raise TypeError( 72 | f"Expected 'waveform' to be a torch.Tensor, got {type(waveform)}." 73 | ) 74 | if sample_rate is None: 75 | raise ValueError( 76 | "The 'sample_rate' key is missing or None in 'audio_input'." 77 | ) 78 | if not isinstance(sample_rate, int): 79 | raise TypeError( 80 | f"Expected 'sample_rate' to be an int, got {type(sample_rate)}." 81 | ) 82 | 83 | # waveform, sample_rate = audio_from_comfy_2d(audio_input) 84 | 85 | # Convert to mono by averaging channels 86 | if waveform.ndim == 3: 87 | # [batch, channels, samples] 88 | waveform = waveform.mean(dim=1, keepdim=True) # [batch, 1, samples] 89 | waveform = waveform.squeeze(0) # [1, samples] 90 | elif waveform.ndim == 2: 91 | # [channels, samples] 92 | if waveform.shape[0] > 1: 93 | waveform = waveform.mean(dim=0, keepdim=True) # [1, samples] 94 | else: 95 | waveform = waveform.unsqueeze(0) # [1, samples] 96 | elif waveform.ndim == 1: 97 | # [samples] 98 | waveform = waveform.unsqueeze(0) # [1, samples] 99 | else: 100 | raise ValueError(f"Unsupported waveform shape: {waveform.shape}") 101 | 102 | # Generate Mel Spectrogram 103 | spectrogram_transform = torchaudio.transforms.MelSpectrogram( 104 | sample_rate=sample_rate, 105 | n_fft=n_fft, 106 | hop_length=hop_length, 107 | n_mels=n_mels, 108 | power=2.0, 109 | norm="slaney", 110 | mel_scale="htk", 111 | ).to(waveform.device, dtype=waveform.dtype) 112 | spectrogram = spectrogram_transform(waveform) # [1, n_mels, time_frames] 113 | 114 | # Convert to decibel scale 115 | amplitude_to_db = torchaudio.transforms.AmplitudeToDB(top_db=top_db) 116 | spectrogram_db = amplitude_to_db(spectrogram) # [1, n_mels, time_frames] 117 | 118 | # Convert to numpy 119 | spectrogram_db = ( 120 | spectrogram_db.squeeze().detach().cpu().numpy() 121 | ) # [n_mels, time_frames] 122 | 123 | # Clip spectrogram to a range for better contrast 124 | spectrogram_db = np.clip(spectrogram_db, -top_db, 0.0) 125 | 126 | # Normalize spectrogram to [0,1] 127 | spectrogram_normalized = (spectrogram_db + top_db) / top_db # [0,1] 128 | 129 | # Apply a colormap (e.g., 'inferno') using matplotlib 130 | cmap = plt.get_cmap(color_map) 131 | spectrogram_colored = cmap( 132 | spectrogram_normalized 133 | ) # [n_mels, time_frames, 4] RGBA 134 | 135 | # Convert to RGB by removing alpha channel 136 | spectrogram_rgb = (spectrogram_colored[:, :, :3] * 255).astype( 137 | np.uint8 138 | ) # [n_mels, time_frames, 3] 139 | spectrogram_rgb = np.squeeze(spectrogram_rgb) 140 | 141 | # Check the shape and adjust if necessary 142 | if len(spectrogram_rgb.shape) == 3 and spectrogram_rgb.shape[-1] == 3: 143 | # Ensure the array is in uint8 format (0-255 range) 144 | spectrogram_rgb = np.clip(spectrogram_rgb, 0, 255).astype(np.uint8) 145 | else: 146 | raise ValueError(f"Unexpected spectrogram shape: {spectrogram_rgb.shape}") 147 | 148 | # Convert to RGB image 149 | spectrogram_image = Image.fromarray(spectrogram_rgb).convert("RGB") 150 | 151 | # Optionally resize for better resolution 152 | spectrogram_image = spectrogram_image.resize( 153 | (spectrogram_image.width * 2, spectrogram_image.height * 2), Image.BILINEAR 154 | ) 155 | 156 | # Convert to numpy array and normalize to [0,1] 157 | image_np = np.array(spectrogram_image).astype(np.float32) / 255.0 # [H, W, 3] 158 | 159 | # Convert to torch tensor and add batch dimension 160 | # image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W] 161 | 162 | image = torch.from_numpy(image_np)[None,] 163 | 164 | return (image,) 165 | -------------------------------------------------------------------------------- /visuals/SignalProcessingWaveform.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Author: C0ffymachyne 5 | License: GPLv3 6 | Version: 1.0.0 7 | 8 | Description: 9 | Waveform image rendering node 10 | """ 11 | 12 | import torch 13 | from io import BytesIO 14 | from PIL import Image 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | 18 | from typing import Any, Dict, Tuple 19 | 20 | from ..core.utilities import comfy_root_to_syspath 21 | 22 | comfy_root_to_syspath() # add comfy to sys path for dev 23 | 24 | 25 | class SignalProcessingWaveform: 26 | @classmethod 27 | def INPUT_TYPES(cls) -> Dict[str, Any]: 28 | return { 29 | "required": { 30 | "audio_input": ("AUDIO",), 31 | }, 32 | "optional": { 33 | "color": ("STRING", {"default": "black"}), 34 | "background_color": ("STRING", {"default": "white"}), 35 | "width": ( 36 | "INT", 37 | {"default": 800, "min": 100, "max": 4000, "step": 100}, 38 | ), 39 | "height": ("INT", {"default": 200, "min": 50, "max": 1000, "step": 50}), 40 | "line_width": ( 41 | "FLOAT", 42 | {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}, 43 | ), 44 | }, 45 | } 46 | 47 | RETURN_TYPES = ("IMAGE",) 48 | RETURN_NAMES = ("waveform_image",) 49 | CATEGORY = "Signal Processing" 50 | FUNCTION = "process" 51 | 52 | def process( 53 | self, 54 | audio_input: torch.Tensor, 55 | color: str = "white", 56 | background_color: str = "black", 57 | width: int = 800, 58 | height: int = 200, 59 | line_width: float = 1.0, 60 | ) -> Tuple[torch.Tensor]: 61 | waveform = audio_input.get( 62 | "waveform" 63 | ) # [channels, samples] or [batch, channels, samples] 64 | 65 | # Convert to mono by averaging channels 66 | if waveform.ndim == 3: 67 | # [batch, channels, samples] 68 | waveform = waveform.mean(dim=1, keepdim=True) # [batch, 1, samples] 69 | waveform = waveform.squeeze(0) # [1, samples] 70 | elif waveform.ndim == 2: 71 | # [channels, samples] 72 | if waveform.shape[0] > 1: 73 | waveform = waveform.mean(dim=0, keepdim=True) # [1, samples] 74 | else: 75 | waveform = waveform.unsqueeze(0) # [1, samples] 76 | elif waveform.ndim == 1: 77 | # [samples] 78 | waveform = waveform.unsqueeze(0) # [1, samples] 79 | else: 80 | raise ValueError(f"Unsupported waveform shape: {waveform.shape}") 81 | 82 | # Convert waveform to numpy 83 | waveform = waveform.to(dtype=torch.float32) 84 | waveform_np = waveform.squeeze().detach().cpu().numpy() # [samples] 85 | 86 | # Create a matplotlib figure without axes 87 | plt.figure(figsize=(width / 100, height / 100), dpi=96) 88 | plt.axis("off") 89 | plt.margins(0, 0) 90 | plt.gca().set_facecolor(background_color) 91 | plt.gca().set_position([0, 0, 1, 1]) 92 | 93 | # Plot the waveform 94 | plt.plot(waveform_np, color=color, linewidth=line_width) 95 | plt.ylim(-1.3, 1.3) # Set y-axis limits to -1 and 1 96 | plt.tight_layout(pad=0) 97 | 98 | # Save the plot to a buffer 99 | buf = BytesIO() 100 | plt.savefig(buf, format="png", bbox_inches="tight", pad_inches=0) 101 | plt.close() 102 | 103 | # Load the image from the buffer 104 | buf.seek(0) 105 | waveform_image = Image.open(buf).convert("RGB") 106 | 107 | # Resize if necessary 108 | waveform_image = waveform_image.resize((width, height), Image.BILINEAR) 109 | 110 | # Convert to numpy array and normalize to [0,1] 111 | image_np = np.array(waveform_image).astype(np.float32) / 255.0 # [H, W, 3] 112 | 113 | # Convert to torch tensor and add batch dimension 114 | image = torch.from_numpy(image_np)[None,] 115 | 116 | return (image,) 117 | 118 | 119 | class SignalProcessingWaveform2: 120 | @classmethod 121 | def INPUT_TYPES(cls) -> Dict[str, Any]: 122 | return { 123 | "required": { 124 | "audio_input": ("AUDIO",), 125 | }, 126 | "optional": { 127 | "color": ("STRING", {"default": "black"}), 128 | "background_color": ("STRING", {"default": "white"}), 129 | "width": ( 130 | "INT", 131 | {"default": 800, "min": 100, "max": 4000, "step": 100}, 132 | ), 133 | "height": ("INT", {"default": 200, "min": 50, "max": 1000, "step": 50}), 134 | "line_width": ( 135 | "FLOAT", 136 | {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1}, 137 | ), 138 | }, 139 | } 140 | 141 | RETURN_TYPES = ("IMAGE",) 142 | RETURN_NAMES = ("waveform_image",) 143 | CATEGORY = "Signal Processing" 144 | FUNCTION = "process" 145 | 146 | def process( 147 | self, 148 | audio_input: torch.Tensor, 149 | color: str = "black", 150 | background_color: str = "white", 151 | width: int = 800, 152 | height: int = 200, 153 | line_width: float = 1.0, 154 | ) -> Tuple[torch.Tensor]: 155 | waveform = audio_input.get( 156 | "waveform" 157 | ) # [channels, samples] or [batch, channels, samples] 158 | 159 | # Convert to mono by averaging channels 160 | if waveform.ndim == 3: 161 | # [batch, channels, samples] 162 | waveform = waveform.mean(dim=1, keepdim=True) # [batch, 1, samples] 163 | waveform = waveform.squeeze(0) # [1, samples] 164 | elif waveform.ndim == 2: 165 | # [channels, samples] 166 | if waveform.shape[0] > 1: 167 | waveform = waveform.mean(dim=0, keepdim=True) # [1, samples] 168 | else: 169 | waveform = waveform.unsqueeze(0) # [1, samples] 170 | elif waveform.ndim == 1: 171 | # [samples] 172 | waveform = waveform.unsqueeze(0) # [1, samples] 173 | else: 174 | raise ValueError(f"Unsupported waveform shape: {waveform.shape}") 175 | 176 | # Ensure waveform is in float32 177 | waveform = waveform.to(dtype=torch.float32) 178 | waveform_np = waveform.squeeze().detach().cpu().numpy() # [samples] 179 | 180 | # Create a matplotlib figure without axes 181 | plt.figure(figsize=(width / 100, height / 100), dpi=96) 182 | plt.axis("off") 183 | plt.margins(0, 0) 184 | ax = plt.gca() 185 | ax.set_facecolor(background_color) 186 | ax.set_position([0, 0, 1, 1]) 187 | 188 | # Plot the waveform with fixed y-axis limits 189 | plt.plot(waveform_np, color=color, linewidth=line_width) 190 | plt.ylim(-1, 1) # Set y-axis limits to -1 and 1 191 | plt.tight_layout(pad=0) 192 | 193 | # Save the plot to a buffer 194 | buf = BytesIO() 195 | plt.savefig(buf, format="png", bbox_inches="tight", pad_inches=0) 196 | plt.close() 197 | 198 | # Load the image from the buffer 199 | buf.seek(0) 200 | waveform_image = Image.open(buf).convert("RGB") 201 | 202 | # Resize if necessary 203 | waveform_image = waveform_image.resize((width, height), Image.BILINEAR) 204 | 205 | # Convert to numpy array and normalize to [0,1] 206 | image_np = np.array(waveform_image).astype(np.float32) / 255.0 # [H, W, 3] 207 | 208 | # Convert to torch tensor and add batch dimension 209 | image = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W] 210 | 211 | return (image,) 212 | -------------------------------------------------------------------------------- /visuals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/visuals/__init__.py --------------------------------------------------------------------------------