├── .gitignore
├── CREDITS.md
├── LICENSE-APACHE-2
├── LICENSE-CCA-ANY
├── LICENSE-GPL-V3
├── README.md
├── __init__.py
├── core
    ├── compression.py
    ├── conversions.py
    ├── filters.py
    ├── harmonics.py
    ├── io.py
    ├── kernels
    │   ├── compressor
    │   │   └── compressor.cu
    │   └── limiter
    │   │   ├── limiter_dev.cu
    │   │   ├── limiter_down_parallel.cu
    │   │   ├── limiter_hard_clipper.cu
    │   │   ├── limiter_soft_clipper.cu
    │   │   ├── limiter_updown_parallel.cu
    │   │   └── limiter_updown_weighted_parallel.cu
    ├── limiting.py
    ├── loudness.py
    ├── mixing.py
    ├── plotting.py
    ├── sampling.py
    ├── saturation.py
    ├── spectral.py
    ├── tests.py
    ├── utilities.py
    ├── utilitiescuda.py
    └── widening.py
├── data
    └── widener
    │   ├── init_vn_filters.txt
    │   └── opt_vn_filters.txt
├── effects
    ├── SignalProcessingConvolutionReverb.py
    ├── SignalProcessingPaulStretch.py
    ├── SignalProcessingPitchShifter.py
    ├── SignalProcessingStereoWidening.py
    └── __init__.py
├── generators
    ├── SignalProcessingPadSynth.py
    ├── SignalProcessingPadSynthChoir.py
    └── __init__.py
├── nodes.py
├── noxfile.py
├── processors
    ├── SignalProcessingBaxandallEQ.py
    ├── SignalProcessingCompressor.py
    ├── SignalProcessingFilter.py
    ├── SignalProcessingHarmonicsEnhancer.py
    ├── SignalProcessingLimiter.py
    ├── SignalProcessingLoadAudio.py
    ├── SignalProcessingLoudness.py
    ├── SignalProcessingMixdown.py
    ├── SignalProcessingNormalizer.py
    ├── SignalProcessingSaturation.py
    └── __init__.py
├── pyproject.toml
├── pytest.ini
├── requirements.txt
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── test_baxandall.py
    ├── test_compressor.py
    ├── test_convolution_reverb.py
    ├── test_filter.py
    ├── test_harmonics.py
    ├── test_limiting.py
    ├── test_normalizer.py
    ├── test_padsynthchoir.py
    ├── test_paulstretch.py
    ├── test_pitchshift.py
    ├── test_plotting.py
    ├── test_saturation.py
    └── test_widening.py
└── visuals
    ├── SignalProcessingSpectrogram.py
    ├── SignalProcessingWaveform.py
    └── __init__.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # development directories
 10 | audio/
 11 | .nox/
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
114 | .pdm.toml
115 | .pdm-python
116 | .pdm-build/
117 | 
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 | 
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | 


--------------------------------------------------------------------------------
/CREDITS.md:
--------------------------------------------------------------------------------
1 | ### Credits
2 | - **`Nasca Octavian Paul`**
3 | - **`Greg Hopkins`**
4 | - **`Orchisama Das`**


--------------------------------------------------------------------------------
/LICENSE-CCA-ANY:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2 | 
3 | __all__ = [
4 |     "NODE_CLASS_MAPPINGS",
5 |     "NODE_DISPLAY_NAME_MAPPINGS",
6 |     "SignalProcessingFilter",
7 |     "SignalProcessingLoadAudio",
8 | ]
9 | 


--------------------------------------------------------------------------------
/core/compression.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     various compression methods
 10 | """
 11 | 
 12 | 
 13 | import cupy as cp
 14 | import torch
 15 | import numpy as np
 16 | 
 17 | from typing import Tuple, Any
 18 | from ..core.utilitiescuda import read_kernel_by_name
 19 | 
 20 | compressor_kernel = read_kernel_by_name(
 21 |     "compressor", kernel_class="compressor", kernel_identifier="compexp_kernel"
 22 | )
 23 | 
 24 | 
 25 | def compressor(
 26 |     audio_in: torch.Tensor,
 27 |     sample_rate: int,
 28 |     comp: float = -0.3,  # Compression/expansion factor
 29 |     attack: float = 0.1,  # Attack time in ms
 30 |     release: float = 60.0,  # Release time in ms
 31 |     a: float = 0.3,  # Filter parameter < 1
 32 |     device: str = "cuda",
 33 | ) -> Tuple[torch.Tensor, Any]:
 34 |     """
 35 |     Compresses or expands stereo audio using an optimized CUDA kernel.
 36 | 
 37 |     Parameters:
 38 |         audio_in (torch.Tensor or np.ndarray): Input stereo audio signal with shape (n_samples, 2).
 39 |         sample_rate (int): Sampling rate in Hz.
 40 |         comp (float): Compression/expansion factor.
 41 |         attack (float): Attack time in milliseconds.
 42 |         release (float): Release time in milliseconds.
 43 |         a (float): Filter parameter (< 1) for envelope smoothing.
 44 |         device (str): Device to place the output tensor ('cuda' or 'cpu').
 45 | 
 46 |     Returns:
 47 |         torch.Tensor: Compressed stereo audio with shape (n_samples, 2).
 48 |     """
 49 |     # Convert input to NumPy array if necessary
 50 |     audio_in = audio_in.T
 51 |     if isinstance(audio_in, torch.Tensor):
 52 |         audio_in = audio_in.detach().cpu().numpy()
 53 |     else:
 54 |         audio_in = np.asarray(audio_in, dtype=np.float64)
 55 | 
 56 |     # 2. Ensure the audio is in shape (n_samples, 2)
 57 |     if audio_in.ndim != 2 or audio_in.shape[1] != 2:
 58 |         raise ValueError(
 59 |             f"Input audio must have shape (n_samples, 2), but got {audio_in.shape}"
 60 |         )
 61 | 
 62 |     n_samples, n_channels = audio_in.shape
 63 | 
 64 |     # Flatten the audio for kernel processing
 65 |     wav_in_flat = audio_in.flatten()
 66 | 
 67 |     # Move data to GPU
 68 |     wav_in_gpu = cp.asarray(wav_in_flat, dtype=cp.float64)
 69 |     wav_out_gpu = cp.zeros_like(wav_in_gpu)
 70 | 
 71 |     # Define grid and block dimensions
 72 |     block_size = 256  # Number of threads per block
 73 |     grid_size = n_channels  # One block per channel
 74 | 
 75 |     # Launch the CUDA kernel
 76 |     compressor_kernel(
 77 |         (grid_size,),  # Grid dimensions
 78 |         (block_size,),  # Block dimensions
 79 |         (
 80 |             wav_in_gpu,
 81 |             wav_out_gpu,
 82 |             np.int32(n_channels),
 83 |             np.int32(n_samples),
 84 |             np.float64(comp),
 85 |             np.float64(release),
 86 |             np.float64(attack),
 87 |             np.float64(a),
 88 |             np.float64(sample_rate),
 89 |         ),
 90 |     )
 91 | 
 92 |     # Retrieve results from GPU
 93 |     wav_out_host = wav_out_gpu.get().astype(np.float64)
 94 | 
 95 |     # Reshape the output
 96 |     wav_out_stereo = wav_out_host.reshape((n_samples, n_channels))
 97 | 
 98 |     # Convert back to Torch tensor if desired
 99 |     if device == "cuda":
100 |         out_tensor = torch.from_numpy(wav_out_stereo).to("cuda")
101 |     else:
102 |         out_tensor = torch.from_numpy(wav_out_stereo).to("cpu")
103 | 
104 |     return out_tensor.T, None
105 | 


--------------------------------------------------------------------------------
/core/conversions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     various conversion methods
10 | """
11 | 
12 | import torch
13 | 
14 | 
15 | def db_to_lin(value: float) -> float:
16 |     return 10 ** (value / 20)
17 | 
18 | 
19 | def lin_to_tb(value: float) -> torch.Tensor:
20 |     return 20 * torch.log10(torch.abs(value) + 1.0e-24)
21 | 
22 | 
23 | def get_sign(value: float) -> torch.Tensor:
24 |     sign = torch.sign(value)
25 |     return sign
26 | 


--------------------------------------------------------------------------------
/core/filters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     utility functions for various filtering tasks
 10 | """
 11 | 
 12 | import torch
 13 | from scipy.signal import butter
 14 | import torchaudio.functional as AF
 15 | 
 16 | 
 17 | def anti_aliasing_iir_filter(
 18 |     audio: torch.Tensor, sample_rate: int, cutoff: float = 0.0, order: int = 4
 19 | ) -> torch.Tensor:
 20 |     """
 21 |     Apply an anti-aliasing IIR low-pass filter to the audio.
 22 | 
 23 |     Parameters:
 24 |         audio (Tensor): [channels, samples] audio signal.
 25 |         sr (int): Sample rate.
 26 |         cutoff (float): Cutoff frequency for the filter. Defaults to Nyquist limit (sr / 2).
 27 |         order (int): Order of the IIR filter.
 28 | 
 29 |     Returns:
 30 |         Tensor: Filtered audio signal.
 31 |     """
 32 |     if cutoff == 0:
 33 |         cutoff = sample_rate / 2  # Default to Nyquist frequency
 34 |     nyquist = sample_rate / 2
 35 |     normalized_cutoff = cutoff / nyquist - 0.01
 36 | 
 37 |     # Design the Butterworth filter
 38 |     b, a = butter(order, normalized_cutoff, btype="low", output="ba")
 39 |     b = torch.tensor(b, dtype=audio.dtype, device=audio.device)
 40 |     a = torch.tensor(a, dtype=audio.dtype, device=audio.device)
 41 | 
 42 |     # Apply the filter
 43 |     filtered_audio = AF.lfilter(audio, b_coeffs=b, a_coeffs=a)
 44 | 
 45 |     return filtered_audio
 46 | 
 47 | 
 48 | def band_stop_filter(
 49 |     audio: torch.Tensor,
 50 |     sample_rate: int,
 51 |     low_cut: float,
 52 |     high_cut: float,
 53 |     filter_order: int = 2,
 54 | ) -> torch.Tensor:
 55 |     """
 56 |     Apply a band-stop filter to attenuate lower mid frequencies.
 57 | 
 58 |     Parameters:
 59 |         audio (Tensor): [channels, samples] input audio signal.
 60 |         low_cut (float): Lower cutoff frequency of the band in Hz.
 61 |         high_cut (float): Upper cutoff frequency of the band in Hz.
 62 |         sr (int): Sample rate in Hz.
 63 |         filter_order (int): Order of the Butterworth filter.
 64 | 
 65 |     Returns:
 66 |         Tensor: Audio signal after band-stop filtering.
 67 |     """
 68 |     nyquist = sample_rate / 2
 69 |     normalized_band = [low_cut / nyquist, high_cut / nyquist]
 70 | 
 71 |     # Design band-stop Butterworth filter
 72 |     b, a = butter(filter_order, normalized_band, btype="bandstop", analog=False)
 73 | 
 74 |     # Convert coefficients to Torch tensors
 75 |     b = torch.tensor(b, dtype=audio.dtype, device=audio.device)
 76 |     a = torch.tensor(a, dtype=audio.dtype, device=audio.device)
 77 | 
 78 |     # Ensure [channels, samples] format
 79 |     if audio.dim() == 1:
 80 |         audio = audio.unsqueeze(0)
 81 | 
 82 |     # Apply the filter using lfilter
 83 |     filtered_audio = AF.lfilter(audio, a_coeffs=a, b_coeffs=b, clamp=False)
 84 | 
 85 |     return filtered_audio.squeeze(0) if filtered_audio.size(0) == 1 else filtered_audio
 86 | 
 87 | 
 88 | def low_pass_filter(
 89 |     audio: torch.Tensor, sampler_rate: int, cutoff_freq: float, filter_order: int = 4
 90 | ) -> torch.Tensor:
 91 |     """
 92 |     Apply a low-pass filter using a Butterworth filter.
 93 |     """
 94 |     # Design Butterworth filter using SciPy
 95 |     nyquist = sampler_rate / 2
 96 |     normalized_cutoff = cutoff_freq / nyquist
 97 |     b, a = butter(filter_order, normalized_cutoff, btype="low", analog=False)
 98 | 
 99 |     # Convert coefficients to Torch tensors
100 |     b = torch.tensor(b, dtype=audio.dtype, device=audio.device)
101 |     a = torch.tensor(a, dtype=audio.dtype, device=audio.device)
102 | 
103 |     # Ensure [channels, samples] format
104 |     if audio.dim() == 1:
105 |         audio = audio.unsqueeze(0)
106 | 
107 |     # Apply the filter using torchaudio
108 |     filtered_audio = AF.lfilter(audio, a_coeffs=a, b_coeffs=b, clamp=False)
109 | 
110 |     return filtered_audio.squeeze(0) if filtered_audio.size(0) == 1 else filtered_audio
111 | 
112 | 
113 | def butter_filter(
114 |     audio: torch.Tensor,
115 |     sample_rate: int,
116 |     cutoff_freq: float,
117 |     filter_type: str = "low",
118 |     order: int = 4,
119 | ) -> torch.Tensor:
120 |     """
121 |     Create and apply a Butterworth filter (low-pass or high-pass).
122 | 
123 |     Parameters:
124 |         audio (Tensor): [channels, samples] input audio signal.
125 |         cutoff_freq (float): Cutoff frequency in Hz.
126 |         sr (int): Sample rate in Hz.
127 |         filter_type (str): "low" for low-pass, "high" for high-pass.
128 |         order (int): Filter order.
129 | 
130 |     Returns:
131 |         Tensor: Filtered audio signal.
132 |     """
133 |     nyquist = sample_rate / 2
134 |     normalized_cutoff = cutoff_freq / nyquist
135 |     b, a = butter(order, normalized_cutoff, btype=filter_type, analog=False)
136 | 
137 |     b = torch.tensor(b, dtype=audio.dtype, device=audio.device)
138 |     a = torch.tensor(a, dtype=audio.dtype, device=audio.device)
139 | 
140 |     # Ensure [channels, samples] format
141 |     if audio.dim() == 1:
142 |         audio = audio.unsqueeze(0)
143 | 
144 |     filtered_audio = AF.lfilter(audio, a_coeffs=a, b_coeffs=b, clamp=False)
145 |     return filtered_audio.squeeze(0) if filtered_audio.size(0) == 1 else filtered_audio
146 | 
147 | 
148 | def butter_low_pass(
149 |     audio: torch.Tensor, sample_rate: int, cutoff_freq: float, order: int = 4
150 | ) -> torch.Tensor:
151 |     return butter_filter(
152 |         audio, sample_rate, cutoff_freq, filter_type="low", order=order
153 |     )
154 | 
155 | 
156 | def butter_high_pass(
157 |     audio: torch.Tensor, sample_rate: int, cutoff_freq: int, order: int = 4
158 | ) -> torch.Tensor:
159 |     return butter_filter(
160 |         audio, sample_rate, cutoff_freq, filter_type="high", order=order
161 |     )
162 | 


--------------------------------------------------------------------------------
/core/harmonics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     various plotting methods for debugging and visualization
 10 | """
 11 | 
 12 | 
 13 | import torch
 14 | import torchaudio
 15 | import torchaudio.functional as F
 16 | from typing import List
 17 | 
 18 | 
 19 | def enhance_harmonics(
 20 |     audio: torch.Tensor,
 21 |     sample_rate: int,
 22 |     harmonics: List[int] = [1, 3, 5, 7, 9, 11],
 23 |     gain_db: float = 5,
 24 |     base_frequency: float = 0,
 25 |     Q: float = 0.707,
 26 | ) -> torch.Tensor:
 27 | 
 28 |     pitch = F.detect_pitch_frequency(audio, sample_rate)
 29 |     base_frequency = pitch.mean().item()
 30 |     if base_frequency <= 0:  # Fallback if pitch detection fails
 31 |         base_frequency = 440  # Use a default base frequency
 32 | 
 33 |     # Apply EQ boosts to specific harmonic frequencies
 34 |     for harmonic in harmonics:
 35 |         freq = base_frequency * harmonic
 36 |         if freq < sample_rate / 2:  # Ensure it's within the Nyquist frequency
 37 |             audio = F.equalizer_biquad(
 38 |                 audio, sample_rate, center_freq=freq, gain=gain_db, Q=Q
 39 |             )
 40 | 
 41 |     return audio
 42 | 
 43 | 
 44 | def enhance_harmonics2(
 45 |     audio: torch.Tensor,
 46 |     sample_rate: int,
 47 |     harmonics: list[int] = [1, 2, 3, 4, 5],
 48 |     gain_db: float = 5,
 49 |     base_frequency: float = 0,
 50 |     Q: float = 0.707,
 51 | ) -> torch.Tensor:
 52 |     """
 53 |     Enhance specified harmonics in an audio signal, emulating Distressor-like harmonic enhancement.
 54 |     Parameters:
 55 |         audio (Tensor): Input audio signal (1D or 2D [channels, samples]).
 56 |         sample_rate (int): Sampling rate of the audio.
 57 |         harmonics (list): List of harmonic multipliers to enhance.
 58 |         gain_db (float): Gain to apply to each harmonic.
 59 |         base_frequency (float, optional): Fundamental frequency. If None, it will be estimated.
 60 |         Q (float): Quality factor for the EQ bands.
 61 |     Returns:
 62 |         Tensor: Audio signal with enhanced harmonics.
 63 |     """
 64 | 
 65 |     if base_frequency == 0:
 66 |         # Detect the pitch frequency using torchaudio's pitch detection
 67 |         pitch = F.detect_pitch_frequency(audio, sample_rate)
 68 |         base_frequency = pitch.mean().item()
 69 |         if base_frequency <= 0:  # Fallback if pitch detection fails
 70 |             base_frequency = 440  # Default base frequency (A4)
 71 | 
 72 |     # Create a copy of the input signal for processing
 73 |     processed_audio: torch.Tensor = audio.clone()
 74 | 
 75 |     # Enhance harmonics using biquad EQ for precision
 76 |     for harmonic in harmonics:
 77 |         freq = base_frequency * harmonic
 78 |         if freq < sample_rate / 2:  # Ensure frequency is within Nyquist limit
 79 |             processed_audio = F.equalizer_biquad(
 80 |                 processed_audio, sample_rate, center_freq=freq, gain=gain_db, Q=Q
 81 |             )
 82 | 
 83 |     # Apply a non-linear saturation for warmth and further harmonic enhancement
 84 |     def non_linear_saturation(audio: torch.Tensor, drive: float = 1.0) -> torch.Tensor:
 85 |         k = torch.tensor(1.0 + drive, dtype=audio.dtype, device=audio.device)
 86 |         return torch.tanh(k * audio) / torch.tanh(k)
 87 | 
 88 |     processed_audio = non_linear_saturation(processed_audio, drive=gain_db / 10.0)
 89 | 
 90 |     # Blend processed harmonics with the original signal
 91 |     output_audio = audio + processed_audio * (gain_db / 20.0)  # Scale blend by gain
 92 |     return output_audio / torch.max(torch.abs(output_audio))  # Normalize output
 93 | 
 94 | 
 95 | def batch_equalizer_biquad(
 96 |     audio: torch.Tensor, sample_rate: int, freqs: torch.Tensor, gain_db: float, Q: float
 97 | ) -> torch.Tensor:
 98 |     """
 99 |     Apply biquad filters to enhance multiple harmonics in a batch.
100 |     Parameters:
101 |         audio (Tensor): Input audio signal (1D or 2D [channels, samples]).
102 |         sample_rate (int): Sampling rate of the audio.
103 |         freqs (Tensor): Frequencies for biquad filters.
104 |         gain_db (float): Gain to apply to each harmonic.
105 |         Q (float): Quality factor for all filters.
106 |     Returns:
107 |         Tensor: Audio signal with harmonics enhanced.
108 |     """
109 |     audio = audio.unsqueeze(0) if audio.dim() == 1 else audio
110 | 
111 |     # Precompute filter coefficients for all frequencies
112 |     coeffs = [
113 |         torchaudio.functional.equalizer_biquad(
114 |             audio, sample_rate, center_freq=f, gain=gain_db, Q=Q
115 |         )
116 |         for f in freqs
117 |     ]
118 | 
119 |     # Sum the filtered outputs for all harmonics
120 |     filtered_audio = sum(coeffs)
121 | 
122 |     return filtered_audio
123 | 
124 | 
125 | def enhance_harmonics3(
126 |     audio: torch.Tensor,
127 |     sample_rate: int,
128 |     harmonics: List[int] = [1, 2, 3, 4, 5],
129 |     gain_db: float = 5.0,
130 |     base_frequency: float = 0,
131 |     Q: float = 0.707,
132 | ) -> torch.Tensor:
133 |     """
134 |     Enhance specified harmonics in an audio signal, efficiently processing harmonics in a batch.
135 |     Parameters:
136 |         audio (Tensor): Input audio signal (1D or 2D [channels, samples]).
137 |         sample_rate (int): Sampling rate of the audio.
138 |         harmonics (list): List of harmonic multipliers to enhance.
139 |         gain_db (float): Gain to apply to each harmonic.
140 |         base_frequency (float, optional): Fundamental frequency. If None, it will be estimated.
141 |         Q (float): Quality factor for the EQ bands.
142 |     Returns:
143 |         Tensor: Audio signal with enhanced harmonics.
144 |     """
145 |     if base_frequency is None:
146 |         # Detect the pitch frequency using torchaudio's pitch detection
147 |         pitch = torchaudio.functional.detect_pitch_frequency(audio, sample_rate)
148 |         base_frequency = pitch.mean().item()
149 |         if base_frequency <= 0:  # Fallback if pitch detection fails
150 |             base_frequency = 440.0  # Default base frequency (A4)
151 | 
152 |     # Calculate harmonic frequencies
153 |     harmonic_freqs = torch.tensor(
154 |         [base_frequency * h for h in harmonics], device=audio.device
155 |     )
156 | 
157 |     # Ensure frequencies are within Nyquist limit
158 |     harmonic_freqs = harmonic_freqs[harmonic_freqs < sample_rate / 2]
159 | 
160 |     # Apply batched harmonic enhancement
161 |     processed_audio = batch_equalizer_biquad(
162 |         audio, sample_rate, harmonic_freqs, gain_db, Q
163 |     )
164 | 
165 |     # Normalize the output
166 |     return processed_audio / torch.max(torch.abs(processed_audio))
167 | 
168 | 
169 | def enahnce_harmonics_23(
170 |     audio: torch.Tensor, sample_rate: int, gain_db_base: int = 0, Q: float = 0.707
171 | ) -> torch.Tensor:
172 | 
173 |     audio = enhance_harmonics3(
174 |         audio, sample_rate, harmonics=[2], gain_db=gain_db_base + 1, Q=0.303
175 |     )
176 |     audio = enhance_harmonics3(
177 |         audio, sample_rate, harmonics=[3], gain_db=gain_db_base + 3, Q=0.303
178 |     )
179 | 
180 |     return audio
181 | 


--------------------------------------------------------------------------------
/core/kernels/compressor/compressor.cu:
--------------------------------------------------------------------------------
 1 | #ifndef M_PI
 2 | #define M_PI 3.14159265358979323846
 3 | #endif
 4 | 
 5 | extern "C" __global__
 6 | void compexp_kernel(
 7 |     const double* wav_in,       // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1]
 8 |     double* wav_out,            // Output audio signal
 9 |     const int n_channels,       // Number of channels (e.g., 2 for stereo)
10 |     const int n_samples,        // Number of samples per channel
11 |     const double comp,          // Compression/expansion factor
12 |     const double release,       // Release time in ms
13 |     const double attack,        // Attack time in ms
14 |     const double a,             // Filter parameter < 1
15 |     const double Fs             // Sampling rate in Hz
16 | ) {
17 |     int ch = blockIdx.x;  // Each block processes one channel
18 |     int thread_id = threadIdx.x; // Thread within the block
19 |     int stride = blockDim.x;     // Number of threads in the block
20 | 
21 |     if (ch >= n_channels) return;
22 | 
23 |     double attack_coeff = exp(-1.0 / (Fs * (attack * 1e-3)));
24 |     double release_coeff = exp(-1.0 / (Fs * (release * 1e-3)));
25 | 
26 |     double h = 0.0;  // Initialize filter state for envelope detection
27 | 
28 |     // Divide samples across threads in parallel
29 |     for (int i = thread_id; i < n_samples; i += stride) {
30 |         int sample_idx = i * n_channels + ch;
31 |         double sample = wav_in[sample_idx];
32 | 
33 |         // Envelope detection using attack/release dynamics
34 |         double abs_sample = fabs(sample);
35 |         if (abs_sample > h) {
36 |             h = attack_coeff * (h - abs_sample) + abs_sample;
37 |         } else {
38 |             h = release_coeff * (h - abs_sample) + abs_sample;
39 |         }
40 | 
41 |         // Apply compression/expansion
42 |         double gain;
43 |         if (comp > 0) { // Compression: attenuate higher envelope values
44 |             gain = pow(h + 1e-8, -comp);
45 |         } else { // Expansion: boost lower envelope values
46 |             gain = pow(h + 1e-8, -comp);
47 |         }
48 | 
49 |         // Scale output
50 |         wav_out[sample_idx] = sample * gain;
51 |     }
52 | }


--------------------------------------------------------------------------------
/core/kernels/limiter/limiter_dev.cu:
--------------------------------------------------------------------------------
 1 | #ifndef M_PI
 2 | #define M_PI 3.14159265358979323846
 3 | #endif
 4 | 
 5 | extern "C" __global__
 6 | void limiter_kernel(
 7 |     const double*__restrict__ wav_in,       // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1]
 8 |     double* __restrict__ wav_out,            // Output audio signal
 9 |     double* __restrict__ debug_out,          // Debug buffer [envelope, gain]
10 |     const int n_channels,       // Number of channels (e.g., 2 for stereo)
11 |     const int n_samples,        // Number of samples per channel
12 |     double threshold,     // Threshold in percents (0-100)
13 |     double slope,         // Slope angle in percents (0-100)
14 |     const double sr,            // Sample rate (samples/sec)
15 |     double twnd,          // Window time for RMS in ms
16 |     double tatt,          // Attack time in ms
17 |     double trel           // Release time in ms
18 | ) {
19 |     // Only one thread handles the entire stereo pair
20 |     int ch = blockIdx.x * blockDim.x + threadIdx.x; // Thread processes a single channel
21 | 
22 |     if (ch >= n_channels) return;
23 | 
24 |     double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3)));
25 |     double release_coeff = exp(-1.0 / (sr * (trel * 1e-3)));
26 |     double envelope = 0.00;
27 |     //threshold = .55;
28 |     //slope = 1.0;
29 | 
30 |     for (int i = 0; i < n_samples; ++i) {
31 | 
32 |         double sample = wav_in[i * n_channels + ch];
33 | 
34 |         // Envelope tracking
35 |         double abs_sample = fabs(sample);
36 |         if (abs_sample > envelope) {
37 |             envelope = attack_coeff * (envelope - abs_sample) + abs_sample;
38 |         } else {
39 |             envelope = release_coeff * (envelope - abs_sample) + abs_sample;
40 |         }
41 | 
42 |         // Gain calculation
43 |         double gain = 1.0;
44 |         if (envelope > threshold) {
45 |             gain = pow(10.0, -slope * (log10(envelope) - log10(threshold)));
46 |         }
47 |         // Upward compression below threshold
48 |         double upward_compression_gain = 1.0;
49 |         if (envelope < threshold && envelope > 0.0) {
50 |             upward_compression_gain = pow(10.0, slope * (log10(threshold) - log10(envelope)));
51 |         }
52 |         // Apply gain
53 |         wav_out[i * n_channels + ch] = sample * gain * upward_compression_gain;
54 | 
55 |         // Debugging output (envelope and gain)
56 |         if (debug_out) {
57 |             debug_out[i * n_channels + ch] = gain;
58 |         }
59 |     }
60 | }


--------------------------------------------------------------------------------
/core/kernels/limiter/limiter_down_parallel.cu:
--------------------------------------------------------------------------------
 1 | #ifndef M_PI
 2 | #define M_PI 3.14159265358979323846
 3 | #endif
 4 | 
 5 | extern "C" __global__
 6 | void limiter_kernel(
 7 |     const double* __restrict__ wav_in,       // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1]
 8 |     double* __restrict__ wav_out,            // Output audio signal
 9 |     const int n_channels,       // Number of channels (e.g., 2 for stereo)
10 |     const int n_samples,        // Number of samples per channel
11 |     const double threshold,     // Threshold in linear scale (e.g., 0.5 for 50%)
12 |     const double slope,         // Slope parameter for gain calculation
13 |     const double sr,            // Sample rate in Hz
14 |     const double tatt,          // Attack time in ms
15 |     const double trel           // Release time in ms
16 | ) {
17 |     int ch = blockIdx.x;  // Each block processes one channel
18 |     int thread_id = threadIdx.x; // Thread within the block
19 |     int stride = blockDim.x;     // Number of threads in the block
20 | 
21 |     if (ch >= n_channels) return;
22 | 
23 |     double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3)));
24 |     double release_coeff = exp(-1.0 / (sr * (trel * 1e-3)));
25 | 
26 |     // Use release time to determine precompute samples
27 |     int precompute_samples = int((trel * 1e-3) * sr); // Convert release time to samples
28 | 
29 |     // Each thread computes its own range of samples
30 |     int start_idx = thread_id * (n_samples / stride);
31 |     int end_idx = (thread_id + 1) * (n_samples / stride);
32 | 
33 |     // Extend range backward for precomputing
34 |     int precompute_start_idx = max(0, start_idx - precompute_samples);
35 | 
36 |     double envelope = 0.0;  // Envelope tracking state
37 | 
38 |     // Precompute envelope for the extra range
39 |     for (int i = precompute_start_idx; i < start_idx; ++i) {
40 |         int sample_idx = i * n_channels + ch;
41 |         double sample = wav_in[sample_idx];
42 |         double abs_sample = fabs(sample);
43 | 
44 |         if (abs_sample > envelope) {
45 |             envelope = attack_coeff * (envelope - abs_sample) + abs_sample;
46 |         } else {
47 |             envelope = release_coeff * (envelope - abs_sample) + abs_sample;
48 |         }
49 |     }
50 | 
51 |     // Process assigned range of samples
52 |     for (int i = start_idx; i < end_idx; ++i) {
53 |         int sample_idx = i * n_channels + ch;
54 |         double sample = wav_in[sample_idx];
55 |         double abs_sample = fabs(sample);
56 | 
57 |         // Envelope tracking
58 |         if (abs_sample > envelope) {
59 |             envelope = attack_coeff * (envelope - abs_sample) + abs_sample;
60 |         } else {
61 |             envelope = release_coeff * (envelope - abs_sample) + abs_sample;
62 |         }
63 | 
64 |         // Gain calculation for downward limiting
65 |         double gain = 1.0;
66 |         if (envelope > threshold) {
67 |             gain = pow(10.0, -slope * (log10(envelope) - log10(threshold)));
68 |         }
69 | 
70 |         // Apply both gains
71 |         wav_out[sample_idx] = sample * gain;
72 |     }
73 | }


--------------------------------------------------------------------------------
/core/kernels/limiter/limiter_hard_clipper.cu:
--------------------------------------------------------------------------------
 1 | #ifndef M_PI
 2 | #define M_PI 3.14159265358979323846
 3 | #endif
 4 | 
 5 | extern "C" __global__
 6 | void limiter_kernel(
 7 |     const double* __restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1]
 8 |     double* __restrict__ wav_out,      // Output audio signal
 9 |     const int n_channels,       // Number of channels (e.g., 2 for stereo)
10 |     const int n_samples,        // Number of samples per channel
11 |     const double threshold,     // Threshold in linear scale (e.g., 0.5 for 50%)
12 |     const double slope,         // Slope parameter for gain calculation
13 |     const double sr,            // Sample rate in Hz
14 |     const double tatt,          // Attack time in ms
15 |     const double trel           // Release time in ms
16 | )
17 | {
18 |     int ch = blockIdx.x;  // Each block processes one channel
19 |     int thread_id = threadIdx.x; // Thread within the block
20 |     int stride = blockDim.x;     // Number of threads in the block
21 | 
22 |     if (ch >= n_channels) return;
23 | 
24 |     int start_idx = thread_id * (n_samples / stride);
25 |     int end_idx = (thread_id + 1) * (n_samples / stride);
26 | 
27 |     double clip_limit = threshold;
28 |     double clip_limit_inv = 1.0 / clip_limit;
29 | 
30 |     for (int i = start_idx; i < end_idx; ++i)
31 |     {
32 |         int sample_idx = i * n_channels + ch;
33 |         double y = wav_in[sample_idx];
34 | 
35 |         double abs = fabs(y);
36 |         if(abs >= clip_limit){
37 |             if (y < 0){
38 |                 y = -clip_limit;
39 |             }else{
40 |                 y = clip_limit;
41 |             }
42 |         }
43 |         wav_out[sample_idx] = y;
44 |     }
45 | }


--------------------------------------------------------------------------------
/core/kernels/limiter/limiter_soft_clipper.cu:
--------------------------------------------------------------------------------
 1 | #ifndef M_PI
 2 | #define M_PI 3.14159265358979323846
 3 | #endif
 4 | 
 5 | extern "C" __global__
 6 | void limiter_kernel(
 7 |     const double* __restrict__ wav_in, // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1]
 8 |     double* __restrict__ wav_out,      // Output audio signal
 9 |     const int n_channels,       // Number of channels (e.g., 2 for stereo)
10 |     const int n_samples,        // Number of samples per channel
11 |     const double threshold,     // Threshold in linear scale (e.g., 0.5 for 50%)
12 |     const double slope,         // Slope parameter for gain calculation
13 |     const double sr,            // Sample rate in Hz
14 |     const double tatt,          // Attack time in ms
15 |     const double trel           // Release time in ms
16 | )
17 | {
18 |     int ch = blockIdx.x;  // Each block processes one channel
19 |     int thread_id = threadIdx.x; // Thread within the block
20 |     int stride = blockDim.x;     // Number of threads in the block
21 | 
22 |     if (ch >= n_channels) return;
23 | 
24 |     int start_idx = thread_id * (n_samples / stride);
25 |     int end_idx = (thread_id + 1) * (n_samples / stride);
26 | 
27 |     double clip_limit = threshold;
28 | 
29 |     for (int i = start_idx; i < end_idx; ++i){
30 |         int sample_idx = i * n_channels + ch;
31 |         double y = wav_in[sample_idx];
32 | 
33 |         // cubic soft clipping
34 |         if (y <= -1.0) {
35 |             y = -2.0 / 3.0;
36 |         } else if (y >= 1.0) {
37 |             y = 2.0 / 3.0;
38 |         } else {
39 |             y = y - (1.0 / 3.0) * y * y * y;
40 |         }
41 |         wav_out[sample_idx] = y;
42 |     }
43 | }


--------------------------------------------------------------------------------
/core/kernels/limiter/limiter_updown_parallel.cu:
--------------------------------------------------------------------------------
 1 | #ifndef M_PI
 2 | #define M_PI 3.14159265358979323846
 3 | #endif
 4 | 
 5 | extern "C" __global__
 6 | void limiter_kernel(
 7 |     const double* __restrict__ wav_in,       // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1]
 8 |     double* __restrict__ wav_out,            // Output audio signal
 9 |     const int n_channels,       // Number of channels (e.g., 2 for stereo)
10 |     const int n_samples,        // Number of samples per channel
11 |     const double threshold,     // Threshold in linear scale (e.g., 0.5 for 50%)
12 |     const double slope,         // Slope parameter for gain calculation
13 |     const double sr,            // Sample rate in Hz
14 |     const double tatt,          // Attack time in ms
15 |     const double trel           // Release time in ms
16 | ) {
17 |     int ch = blockIdx.x;  // Each block processes one channel
18 |     int thread_id = threadIdx.x; // Thread within the block
19 |     int stride = blockDim.x;     // Number of threads in the block
20 | 
21 |     if (ch >= n_channels) return;
22 | 
23 |     double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3)));
24 |     double release_coeff = exp(-1.0 / (sr * (trel * 1e-3)));
25 | 
26 |     // Use release time to determine precompute samples
27 |     int precompute_samples = int((trel * 1e-3) * sr); // Convert release time to samples
28 | 
29 |     // Each thread computes its own range of samples
30 |     int start_idx = thread_id * (n_samples / stride);
31 |     int end_idx = (thread_id + 1) * (n_samples / stride);
32 | 
33 |     // Extend range backward for precomputing
34 |     int precompute_start_idx = max(0, start_idx - precompute_samples);
35 | 
36 |     double envelope = 0.0;  // Envelope tracking state
37 | 
38 |     // Precompute envelope for the extra range
39 |     for (int i = precompute_start_idx; i < start_idx; ++i) {
40 |         int sample_idx = i * n_channels + ch;
41 |         double sample = wav_in[sample_idx];
42 |         double abs_sample = fabs(sample);
43 | 
44 |         if (abs_sample > envelope) {
45 |             envelope = attack_coeff * (envelope - abs_sample) + abs_sample;
46 |         } else {
47 |             envelope = release_coeff * (envelope - abs_sample) + abs_sample;
48 |         }
49 |     }
50 | 
51 |     // Process assigned range of samples
52 |     for (int i = start_idx; i < end_idx; ++i) {
53 |         int sample_idx = i * n_channels + ch;
54 |         double sample = wav_in[sample_idx];
55 |         double abs_sample = fabs(sample);
56 | 
57 |         // Envelope tracking
58 |         if (abs_sample > envelope) {
59 |             envelope = attack_coeff * (envelope - abs_sample) + abs_sample;
60 |         } else {
61 |             envelope = release_coeff * (envelope - abs_sample) + abs_sample;
62 |         }
63 | 
64 |         // Gain calculation for downward limiting
65 |         double gain = 1.0;
66 |         if (envelope > threshold) {
67 |             gain = pow(10.0, -slope * (log10(envelope) - log10(threshold)));
68 |         }
69 | 
70 |         // Upward compression below threshold
71 |         double upward_compression_gain = 1.0;
72 |         if (envelope < threshold && envelope > 0.0) {
73 |             upward_compression_gain = pow(10.0, slope * (log10(threshold) - log10(envelope)));
74 |         }
75 | 
76 |         // Apply both gains
77 |         wav_out[sample_idx] = sample * gain * upward_compression_gain;
78 |     }
79 | }


--------------------------------------------------------------------------------
/core/kernels/limiter/limiter_updown_weighted_parallel.cu:
--------------------------------------------------------------------------------
 1 | #ifndef M_PI
 2 | #define M_PI 3.14159265358979323846
 3 | #endif
 4 | 
 5 | extern "C" __global__
 6 | void limiter_kernel(
 7 |     const double* __restrict__ wav_in,       // Input audio signal [L0, R0, L1, R1, ..., LN-1, RN-1]
 8 |     double* __restrict__ wav_out,            // Output audio signal
 9 |     const int n_channels,       // Number of channels (e.g., 2 for stereo)
10 |     const int n_samples,        // Number of samples per channel
11 |     const double threshold,     // Threshold in linear scale (e.g., 0.5 for 50%)
12 |     const double slope,         // Slope parameter for gain calculation
13 |     const double sr,            // Sample rate in Hz
14 |     const double tatt,          // Attack time in ms
15 |     const double trel           // Release time in ms
16 | )
17 | {
18 |     int ch = blockIdx.x;  // Each block processes one channel
19 |     int thread_id = threadIdx.x; // Thread within the block
20 |     int stride = blockDim.x;     // Number of threads in the block
21 | 
22 |     if (ch >= n_channels) return;
23 | 
24 |     double attack_coeff = exp(-1.0 / (sr * (tatt * 1e-3)));
25 |     double release_coeff = exp(-1.0 / (sr * (trel * 1e-3)));
26 | 
27 |     // Use release time to determine precompute samples
28 |     int precompute_samples = int((trel * 1e-3) * sr); // Convert release time to samples
29 | 
30 |     // Each thread computes its own range of samples
31 |     int start_idx = thread_id * (n_samples / stride);
32 |     int end_idx = (thread_id + 1) * (n_samples / stride);
33 | 
34 |     // Extend range backward for precomputing
35 |     int precompute_start_idx = max(0, start_idx - precompute_samples);
36 | 
37 |     double envelope = 0.0;  // Envelope tracking state
38 | 
39 |     // Precompute envelope for the extra range
40 |     for (int i = precompute_start_idx; i < start_idx; ++i) {
41 |         int sample_idx = i * n_channels + ch;
42 |         double sample = wav_in[sample_idx];
43 |         double abs_sample = fabs(sample);
44 | 
45 |         if (abs_sample > envelope) {
46 |             envelope = attack_coeff * (envelope - abs_sample) + abs_sample;
47 |         } else {
48 |             envelope = release_coeff * (envelope - abs_sample) + abs_sample;
49 |         }
50 |     }
51 | 
52 |     // Process assigned range of samples
53 |     for (int i = start_idx; i < end_idx; ++i) {
54 |         int sample_idx = i * n_channels + ch;
55 |         double sample = wav_in[sample_idx];
56 |         double abs_sample = fabs(sample);
57 | 
58 |         // Envelope tracking
59 |         if (abs_sample > envelope) {
60 |             envelope = attack_coeff * (envelope - abs_sample) + abs_sample;
61 |         } else {
62 |             envelope = release_coeff * (envelope - abs_sample) + abs_sample;
63 |         }
64 | 
65 |         // Gain calculation for downward limiting
66 |         double gain = 1.0;
67 |         if (envelope > threshold) {
68 |             gain = pow(10.0, -slope * (log10(envelope) - log10(threshold)));
69 |         }
70 | 
71 |         // Upward compression below threshold with gradual application
72 |         double upward_compression_gain = 1.0;
73 |         if (envelope < threshold && envelope > 0.0) {
74 |             double t = 1.0 - threshold; // Scaling factor based on threshold
75 |             // Dynamic factor increases as envelope decreases
76 |             double dynamic_factor = (threshold - envelope) / threshold;
77 |             // Clamp to [0, 1]
78 |             dynamic_factor = fmin(fmax(dynamic_factor, 0.0), 1.0);
79 |             upward_compression_gain = 1.0 + t * dynamic_factor * pow(10.0, slope * (log10(threshold) - log10(envelope)));
80 |         }
81 | 
82 |         // Apply both gains
83 |         wav_out[sample_idx] = sample * gain * upward_compression_gain;
84 |     }
85 | }


--------------------------------------------------------------------------------
/core/limiting.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     various conversion methods
 10 | """
 11 | 
 12 | import cupy as cp
 13 | import torch
 14 | import numpy as np
 15 | from typing import List
 16 | 
 17 | from ..core.utilitiescuda import read_kernel_by_name
 18 | 
 19 | limiter_updown_parallel = read_kernel_by_name(
 20 |     "limiter_updown_parallel",
 21 |     kernel_class="limiter",
 22 |     kernel_identifier="limiter_kernel",
 23 | )
 24 | limiter_updown_weighted_parallel = read_kernel_by_name(
 25 |     "limiter_updown_weighted_parallel",
 26 |     kernel_class="limiter",
 27 |     kernel_identifier="limiter_kernel",
 28 | )
 29 | limiter_down_parallel = read_kernel_by_name(
 30 |     "limiter_down_parallel", kernel_class="limiter", kernel_identifier="limiter_kernel"
 31 | )
 32 | limiter_soft_clipper = read_kernel_by_name(
 33 |     "limiter_soft_clipper", kernel_class="limiter", kernel_identifier="limiter_kernel"
 34 | )
 35 | limiter_hard_clipper = read_kernel_by_name(
 36 |     "limiter_hard_clipper", kernel_class="limiter", kernel_identifier="limiter_kernel"
 37 | )
 38 | limiter_dev = read_kernel_by_name(
 39 |     "limiter_dev", kernel_class="limiter", kernel_identifier="limiter_kernel"
 40 | )
 41 | 
 42 | _limiter_kernel_map = {
 43 |     "downward-upward": limiter_updown_weighted_parallel,
 44 |     "downward": limiter_down_parallel,
 45 |     "soft-clipper": limiter_soft_clipper,
 46 |     "hard-clipper": limiter_hard_clipper,
 47 | }
 48 | 
 49 | 
 50 | def limiter_get_modes() -> List[str]:
 51 |     return list(_limiter_kernel_map.keys())
 52 | 
 53 | 
 54 | def limiter(
 55 |     audio_in: torch.Tensor | np.ndarray,
 56 |     mode: str = "downward",
 57 |     sample_rate: int = 44100,
 58 |     threshold: float = 0.5,  # Threshold in percents
 59 |     slope: float = 1.0,  # Slope in percents
 60 |     attack_ms: float = 0.008,  # Attack time in ms
 61 |     release_ms: float = 100.0,  # Release time in ms
 62 | ) -> torch.Tensor:
 63 |     """
 64 |     Compresses stereo audio using an optimized CUDA kernel with running sum RMS calculation.
 65 | 
 66 |     Parameters:
 67 |         audio_in (torch.Tensor or np.ndarray): Input stereo audio signal with shape (n_samples, 2).
 68 |         sample_rate (int): Sampling rate in Hz.
 69 |         threshold (float): Threshold in percents (0-100).
 70 |         slope (float): Slope angle in percents (0-100).
 71 |         rms_window_ms (float): RMS window width in milliseconds.
 72 |         attack_ms (float): Attack time in milliseconds.
 73 |         release_ms (float): Release time in milliseconds.
 74 |         chunk_size (int): Number of samples per chunk.
 75 |         device (str): Device to place the output tensor ('cuda' or 'cpu').
 76 | 
 77 |     Returns:
 78 |         torch.Tensor: Compressed stereo audio with shape (n_samples, 2).
 79 |         np.ndarray: Debug information (envelope and gain) with shape (n_samples, 2).
 80 |     """
 81 |     # Convert input to CPU double-precision NumPy array if necessary
 82 | 
 83 |     device = audio_in.device
 84 | 
 85 |     audio_in = audio_in.T
 86 |     if isinstance(audio_in, torch.Tensor):
 87 |         audio_in = audio_in.detach().cpu()
 88 |         audio_in = audio_in.numpy()
 89 |     else:
 90 |         audio_in = np.asarray(audio_in, dtype=np.float64)
 91 | 
 92 |     # Ensure the audio is in shape (n_samples, 2)
 93 |     if audio_in.ndim != 2 or audio_in.shape[1] != 2:
 94 |         raise ValueError(
 95 |             f"Input audio must have shape (n_samples, 2), but got {audio_in.shape}"
 96 |         )
 97 | 
 98 |     n_samples = audio_in.shape[0]
 99 |     n_channels = audio_in.shape[1]
100 | 
101 |     wav_in_flat = audio_in.flatten()
102 | 
103 |     wav_in_gpu = cp.asarray(wav_in_flat, dtype=cp.float64)
104 |     wav_out_gpu = cp.zeros_like(wav_in_gpu)
105 | 
106 |     # Define grid and block dimensions
107 |     block_size = 64
108 |     grid_size = n_channels  # (n_channels + block_size - 1) // block_size
109 |     shared_mem_size = n_channels * cp.float64().nbytes  # Shared memory for envelopes
110 | 
111 |     # attack_coeff = math.exp(-1.0 / (sample_rate * (attack_ms * 1e-3)))
112 |     # release_coeff = math.exp(-1.0 / (sample_rate * (release_ms * 1e-3)))
113 | 
114 |     # print('attack_coeff',attack_coeff)
115 |     # print('attack_coeff',release_coeff)
116 | 
117 |     if mode not in limiter_get_modes():
118 |         raise Exception(f"Limiter Kernel '{mode}' Not Found")
119 | 
120 |     kernel = _limiter_kernel_map[mode]
121 | 
122 |     kernel(
123 |         (grid_size,),
124 |         (block_size,),
125 |         (
126 |             wav_in_gpu,
127 |             wav_out_gpu,
128 |             np.int32(n_channels),
129 |             np.int32(n_samples),
130 |             np.float64(threshold),
131 |             np.float64(slope),
132 |             np.float64(sample_rate),
133 |             np.float64(attack_ms),
134 |             np.float64(release_ms),
135 |         ),
136 |         shared_mem=shared_mem_size,
137 |     )
138 | 
139 |     # Retrieve results
140 |     wav_out_host = wav_out_gpu.get().astype(np.float64)
141 |     # Reshape the output
142 |     wav_out_stereo = wav_out_host.reshape((n_samples, n_channels))
143 | 
144 |     if device == "cuda":
145 |         out_tensor = torch.from_numpy(wav_out_stereo).to(device)
146 |     else:
147 |         out_tensor = torch.from_numpy(wav_out_stereo).to("cpu")
148 | 
149 |     return out_tensor.T
150 | 


--------------------------------------------------------------------------------
/core/loudness.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     various normalization methods
 10 | """
 11 | 
 12 | import torch
 13 | import pyloudnorm as pyln  # pip install pyloudnorm
 14 | import numpy as np
 15 | 
 16 | 
 17 | def rms_normalization(audio: torch.Tensor, target_rms: float = 0.1) -> torch.Tensor:
 18 |     rms = torch.sqrt(torch.mean(audio**2))
 19 |     scaling_factor = target_rms / rms
 20 |     normalized_audio = audio * scaling_factor
 21 |     return normalized_audio
 22 | 
 23 | 
 24 | def lufs_normalization(
 25 |     audio: torch.Tensor, sample_rate: int, target_lufs: float = -14.0
 26 | ) -> torch.Tensor:
 27 |     meter = pyln.Meter(sample_rate)  # Create a loudness meter
 28 | 
 29 |     __audio = audio.T.cpu().numpy()
 30 |     loudness = meter.integrated_loudness(__audio)  # Current LUFS
 31 | 
 32 |     loudness_offset = target_lufs - loudness
 33 |     normalized_audio = __audio * (10 ** (loudness_offset / 20.0))
 34 | 
 35 |     result = torch.from_numpy(normalized_audio)
 36 |     result = result.T
 37 | 
 38 |     result = result.to(device=audio.device, dtype=result.dtype)
 39 | 
 40 |     return result
 41 | 
 42 | 
 43 | def peak_normalization(audio: torch.Tensor, target_peak: float = 0.9) -> torch.Tensor:
 44 |     peak = torch.max(torch.abs(audio))
 45 |     scaling_factor = target_peak / peak
 46 |     normalized_audio = audio * scaling_factor
 47 |     return normalized_audio
 48 | 
 49 | 
 50 | def get_loudness(audio: torch.Tensor, sample_rate: int) -> float:
 51 |     meter = pyln.Meter(sample_rate)  # Create a loudness meter
 52 |     audio = audio.T
 53 |     audio = audio.cpu()
 54 |     loudness: float = float(meter.integrated_loudness(audio.numpy()))  # Current LUFS
 55 |     return loudness
 56 | 
 57 | 
 58 | def set_loudness2(
 59 |     audio_signal: torch.Tensor, sample_rate: int, target_loudness_db: float = -20.0
 60 | ) -> torch.Tensor:
 61 |     """
 62 |     Adjusts the loudness of the audio signal to a target level in decibels.
 63 | 
 64 |     Args:
 65 |         audio_signal (torch.Tensor): Input audio signal (channels, samples).
 66 |         sample_rate (int): Sample rate of the audio signal.
 67 |         target_loudness_db (float): Desired loudness in dB (e.g., -20.0 dB).
 68 | 
 69 |     Returns:
 70 |         torch.Tensor: Audio signal adjusted to the target loudness.
 71 |     """
 72 |     # Convert PyTorch tensor to NumPy array for loudness calculation
 73 |     audio_np = audio_signal.cpu().numpy().T  # Convert to [samples, channels]
 74 | 
 75 |     # Use pyloudnorm Meter to calculate and normalize loudness
 76 |     meter = pyln.Meter(sample_rate)  # Create loudness meter
 77 |     current_loudness = meter.integrated_loudness(audio_np)  # Measure LUFS
 78 | 
 79 |     # Compute loudness adjustment gain
 80 |     loudness_offset = target_loudness_db - current_loudness
 81 |     gain_factor = 10 ** (loudness_offset / 20.0)
 82 | 
 83 |     # Apply gain to adjust loudness
 84 |     adjusted_audio_np = audio_np * gain_factor
 85 | 
 86 |     # Convert back to PyTorch tensor
 87 |     adjusted_audio = torch.from_numpy(adjusted_audio_np.T).to(
 88 |         audio_signal.device, dtype=torch.float32
 89 |     )
 90 | 
 91 |     return adjusted_audio
 92 | 
 93 | 
 94 | def set_loudness(
 95 |     audio_signal: torch.Tensor, sample_rate: int, target_loudness_db: float = -20.0
 96 | ) -> torch.Tensor:
 97 |     """
 98 |     Adjusts the loudness of the audio signal to a target level in decibels,
 99 |     ensuring no clipping occurs.
100 | 
101 |     Args:
102 |         audio_signal (torch.Tensor): Input audio signal (channels, samples).
103 |         sample_rate (int): Sample rate of the audio signal.
104 |         target_loudness_db (float): Desired loudness in dB (e.g., -20.0 dB).
105 | 
106 |     Returns:
107 |         torch.Tensor: Audio signal adjusted to the target loudness.
108 |     """
109 |     # Convert PyTorch tensor to NumPy array for loudness calculation
110 |     audio_np = audio_signal.cpu().numpy().T  # Convert to [samples, channels]
111 | 
112 |     # Use pyloudnorm Meter to calculate and normalize loudness
113 |     meter = pyln.Meter(sample_rate)  # Create loudness meter
114 |     current_loudness = meter.integrated_loudness(audio_np)  # Measure LUFS
115 | 
116 |     # Compute loudness adjustment gain
117 |     loudness_offset = target_loudness_db - current_loudness
118 |     gain_factor = 10 ** (loudness_offset / 20.0)
119 | 
120 |     # Apply gain to adjust loudness
121 |     adjusted_audio_np = audio_np * gain_factor
122 | 
123 |     # Prevent clipping by normalizing the peak
124 |     peak_amplitude = np.max(np.abs(adjusted_audio_np))
125 |     if peak_amplitude > 1.0:
126 |         adjusted_audio_np = adjusted_audio_np / peak_amplitude
127 | 
128 |     # Convert back to PyTorch tensor
129 |     adjusted_audio = torch.from_numpy(adjusted_audio_np.T).to(
130 |         audio_signal.device, dtype=torch.float32
131 |     )
132 | 
133 |     return adjusted_audio
134 | 
135 | 
136 | def automatic_gain_control(
137 |     audio: torch.Tensor, target_level: float = 0.7, alpha: float = 0.1
138 | ) -> torch.Tensor:
139 |     current_level = torch.mean(torch.abs(audio))
140 |     gain = target_level / (current_level + 1e-6)
141 |     smoothed_gain = alpha * gain + (1 - alpha) * 1.0
142 |     agc_audio = audio * smoothed_gain
143 |     return agc_audio
144 | 


--------------------------------------------------------------------------------
/core/mixing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     audio mixing and combining methods
10 | """
11 | 
12 | import torch
13 | 
14 | 
15 | def combine_audio_files(
16 |     waveform_a: torch.Tensor,
17 |     waveform_b: torch.Tensor,
18 |     sample_rate: int,
19 |     chunk_duration: float = 2.0,
20 | ) -> torch.Tensor:
21 |     """
22 |     Combine two audio files by alternating 2-second chunks, cropping to the shorter audio.
23 | 
24 |     Args:
25 |         waveform_a (torch.Tensor): Tensor of the first audio file (channels x samples).
26 |         waveform_b (torch.Tensor): Tensor of the second audio file (channels x samples).
27 |         sample_rate (int): Sample rate of the audio files.
28 |         chunk_duration (float): Duration of each chunk in seconds (default is 2 seconds).
29 | 
30 |     Returns:
31 |         torch.Tensor: Combined waveform.
32 |     """
33 |     # Crop to the shorter length
34 |     min_length = min(waveform_a.shape[1], waveform_b.shape[1])
35 |     waveform_a = waveform_a[:, :min_length]
36 |     waveform_b = waveform_b[:, :min_length]
37 | 
38 |     # Calculate chunk size in samples
39 |     chunk_size = int(chunk_duration * sample_rate)
40 | 
41 |     # Determine the total number of samples
42 |     total_samples = waveform_a.shape[1]
43 | 
44 |     # Initialize the output waveform
45 |     combined_waveform = []
46 | 
47 |     # Alternate chunks between the two audio files
48 |     for start in range(0, total_samples, chunk_size):
49 |         end = min(start + chunk_size, total_samples)
50 |         combined_waveform.append(waveform_a[:, start:end])
51 |         combined_waveform.append(waveform_b[:, start:end])
52 | 
53 |     # Concatenate the combined waveform
54 |     combined_waveform = torch.cat(combined_waveform, dim=1)
55 | 
56 |     return combined_waveform
57 | 


--------------------------------------------------------------------------------
/core/plotting.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     various plotting methods for debugging and visualization
 10 | """
 11 | import numpy as np
 12 | import torch
 13 | import torchaudio
 14 | from PIL import Image
 15 | import matplotlib.pyplot as plt
 16 | from typing import Tuple, List
 17 | from matplotlib.figure import Figure
 18 | 
 19 | 
 20 | def _figure_to_image(figure: Figure, dpi: int = 96) -> Image.Image:
 21 |     """Convert a Matplotlib figure to a high-resolution RGB PIL Image."""
 22 |     figure.set_dpi(dpi)
 23 |     figure.canvas.draw()
 24 |     data = np.frombuffer(figure.canvas.tostring_argb(), dtype=np.uint8)
 25 |     width, height = figure.canvas.get_width_height()
 26 |     image = data.reshape((height, width, 4))  # ARGB format
 27 | 
 28 |     # Convert ARGB to RGB
 29 |     rgb_image = np.zeros((height, width, 3), dtype=np.uint8)
 30 |     rgb_image[:, :, 0] = image[:, :, 1]  # Red
 31 |     rgb_image[:, :, 1] = image[:, :, 2]  # Green
 32 |     rgb_image[:, :, 2] = image[:, :, 3]  # Blue
 33 | 
 34 |     return Image.fromarray(rgb_image)
 35 | 
 36 | 
 37 | def get_wave(
 38 |     waveform: torch.Tensor,
 39 |     sample_rate: int,
 40 |     title: str = "Waveform",
 41 |     xlim: int = 1000,
 42 |     ylim: int = 1000,
 43 | ) -> Image:
 44 |     waveform = waveform.cpu().numpy()
 45 | 
 46 |     num_channels, num_frames = waveform.shape
 47 |     time_axis = torch.arange(0, num_frames) / sample_rate
 48 | 
 49 |     figure, axes = plt.subplots(num_channels, 1)
 50 |     if num_channels == 1:
 51 |         axes = [axes]
 52 |     for c in range(num_channels):
 53 |         axes[c].plot(time_axis, waveform[c], linewidth=1)
 54 |         axes[c].grid(True)
 55 | 
 56 |     if num_channels > 1:
 57 |         axes[c].set_ylabel(f"Channel {c+1}")
 58 |     if xlim:
 59 |         axes[c].set_xlim(xlim)
 60 |     if ylim:
 61 |         axes[c].set_ylim(ylim)
 62 | 
 63 |     figure.suptitle(title)
 64 | 
 65 |     waveform_image = _figure_to_image(figure)
 66 | 
 67 |     return waveform_image
 68 | 
 69 | 
 70 | def get_spectogram(
 71 |     waveform: torch.Tensor,
 72 |     sample_rate: int,
 73 |     n_fft: int = 4096,
 74 |     n_mels: int = 512,
 75 |     title: str = "Spectrogram",
 76 |     xlim: int = 8192,
 77 |     dpi: int = 96,  # Set a high DPI for better image resolution
 78 | ) -> np.ndarray:
 79 |     """Generate and plot a high-resolution spectrogram from a waveform."""
 80 | 
 81 |     # Parameters for Mel Spectrogram
 82 |     win_length = n_fft // 2
 83 |     hop_length = n_fft // 4  # Smaller hop for better time resolution
 84 | 
 85 |     spectrogram_transform = torchaudio.transforms.MelSpectrogram(
 86 |         sample_rate=sample_rate,
 87 |         n_fft=n_fft,
 88 |         win_length=win_length,
 89 |         hop_length=hop_length,
 90 |         center=True,
 91 |         pad_mode="reflect",
 92 |         normalized=True,
 93 |         power=2.0,  # Using power spectrogram
 94 |         norm="slaney",
 95 |         n_mels=n_mels,
 96 |         mel_scale="slaney",
 97 |     ).to(waveform.device, dtype=waveform.dtype)
 98 | 
 99 |     # Compute spectrogram
100 |     mel_spectrogram = spectrogram_transform(waveform).cpu()
101 | 
102 |     # Convert to decibel scale for better visualization
103 |     spectrogram = torchaudio.transforms.AmplitudeToDB(top_db=80)(
104 |         mel_spectrogram
105 |     ).numpy()
106 | 
107 |     # Plot the spectrogram
108 |     num_channels, _ = waveform.shape
109 |     figure, axes = plt.subplots(
110 |         num_channels, 1, figsize=(20, 10 * num_channels), squeeze=False, dpi=dpi
111 |     )
112 |     figure.suptitle(title, fontsize=16)
113 | 
114 |     for i, ax in enumerate(axes[:, 0]):  # Unpack axes
115 |         ax.imshow(
116 |             spectrogram[i],
117 |             origin="lower",
118 |             aspect="auto",
119 |             extent=[0, xlim, 0, sample_rate / 2],
120 |             cmap="magma",
121 |         )
122 |         ax.set_title(f"Channel {i + 1}", fontsize=14)
123 |         ax.set_xlabel("Time (frames)", fontsize=12)
124 |         ax.set_ylabel("Frequency (Hz)", fontsize=12)
125 |         ax.tick_params(axis="both", which="major", labelsize=10)
126 | 
127 |     plt.tight_layout(rect=[0, 0.03, 1, 0.95])
128 | 
129 |     # Convert the figure to a high-resolution image
130 |     waveform_image = _figure_to_image(figure, dpi=dpi)
131 |     plt.close(figure)  # Close the figure to free up memory
132 |     return waveform_image
133 | 
134 | 
135 | def save_image(filepath: str, image: Image) -> Image:
136 |     image.save(filepath, format="PNG", quality=95)
137 | 
138 | 
139 | def save_harmonic_spectrum(
140 |     waveform: torch.Tensor,
141 |     sample_rate: int,
142 |     output_image: str,
143 |     figsize: Tuple[int, int] = (12, 6),
144 |     num_harmonics: int = 10,
145 | ) -> None:
146 |     """
147 |     Generate the harmonic spectrum of a waveform and save it as an image.
148 | 
149 |     Parameters:
150 |         waveform (torch.Tensor): Audio waveform tensor.
151 |         sample_rate (int): Sample rate of the audio.
152 |         output_image (str): Path to save the output image.
153 |         figsize (tuple): Size of the output figure in inches.
154 |         num_harmonics (int): Number of harmonics to calculate.
155 | 
156 |     Returns:
157 |         None
158 |     """
159 |     # Ensure mono audio (combine channels if necessary)
160 |     if waveform.size(0) > 1:
161 |         waveform = torch.mean(waveform, dim=0, keepdim=True)
162 | 
163 |     # Perform FFT to get the frequency domain
164 |     fft = torch.fft.fft(waveform).to(device=waveform.device)
165 |     magnitude = torch.abs(fft[0])  # Magnitude of the FFT
166 |     frequencies = torch.fft.fftfreq(waveform.size(1), d=1 / sample_rate).to(
167 |         device=waveform.device
168 |     )
169 | 
170 |     # Extract the fundamental frequency
171 |     fundamental_idx = torch.argmax(magnitude[: len(magnitude) // 2])
172 |     fundamental_freq = frequencies[fundamental_idx]
173 | 
174 |     # Calculate harmonic frequencies
175 |     harmonic_frequencies = [fundamental_freq * (i + 1) for i in range(num_harmonics)]
176 |     harmonic_amplitudes = [
177 |         magnitude[int(harmonic / sample_rate * len(magnitude))]
178 |         for harmonic in harmonic_frequencies
179 |     ]
180 |     harmonic_frequencies = torch.tensor(harmonic_frequencies).cpu().numpy()
181 |     harmonic_amplitudes = torch.tensor(harmonic_amplitudes).cpu().numpy()
182 | 
183 |     # Plot the harmonic spectrum
184 |     plt.figure(figsize=figsize)
185 |     plt.plot(harmonic_frequencies, harmonic_amplitudes, color="blue", linewidth=2)
186 |     plt.title("Harmonic Spectrum")
187 |     plt.xlabel("Frequency (Hz)")
188 |     plt.ylabel("Amplitude")
189 |     plt.grid(True)
190 | 
191 |     # Save the image
192 |     plt.tight_layout()
193 |     plt.savefig(output_image, dpi=300)
194 |     plt.close()
195 | 
196 | 
197 | PREDEFINED_COLORS = [
198 |     "#1f77b4",
199 |     "#ff7f0e",
200 |     "#2ca02c",
201 |     "#d62728",
202 |     "#9467bd",
203 |     "#8c564b",
204 |     "#e377c2",
205 |     "#7f7f7f",
206 |     "#bcbd22",
207 |     "#17becf",
208 | ]
209 | 
210 | 
211 | def plot_multiple_harmonic_spectra(
212 |     audio_data: List[Tuple[torch.Tensor, int, str]],
213 |     output_image: str,
214 |     figsize: Tuple[int, int] = (12, 6),
215 |     num_harmonics: int = 16,
216 |     upper_bound: int = 140000,
217 |     title: str = "Harmonic Spectrum",
218 | ) -> None:
219 |     """
220 |     Optimized: Plot high-resolution harmonic spectra for multiple audio waveforms.
221 | 
222 |     Parameters:
223 |         audio_data (list): List of (waveform, sample_rate, label) tuples.
224 |         output_image (str): Path to save the combined output image.
225 |         figsize (tuple): Size of the figure.
226 |         num_harmonics (int): Number of harmonics to calculate.
227 |         upper_bound (int): Maximum frequency to display.
228 |         title (str): Title of the plot.
229 |     """
230 |     plt.figure(figsize=figsize)
231 | 
232 |     for idx, (waveform, sample_rate, label) in enumerate(audio_data):
233 |         # Ensure mono audio
234 |         if waveform.size(0) > 1:
235 |             waveform = torch.mean(waveform, dim=0)
236 | 
237 |         # Move waveform to GPU for efficient computation
238 |         device = (
239 |             waveform.device
240 |             if waveform.is_cuda
241 |             else "cuda" if torch.cuda.is_available() else "cpu"
242 |         )
243 |         waveform = waveform.to(device)
244 | 
245 |         # High-resolution FFT (Zero-padding for better frequency resolution)
246 |         n_fft = 1 * waveform.size(0)  # Zero-padding factor of 4
247 |         fft = torch.fft.fft(waveform, n=n_fft).to(device=waveform.device)
248 |         magnitude = torch.abs(fft[: n_fft // 2])
249 |         frequencies = torch.fft.fftfreq(n_fft, d=1 / sample_rate)[: n_fft // 2]
250 | 
251 |         # Find fundamental frequency
252 |         fundamental_idx = torch.argmax(
253 |             magnitude[: len(magnitude) // 4]
254 |         )  # Search in the first quarter
255 |         fundamental_freq = frequencies[fundamental_idx]
256 | 
257 |         # Precompute harmonic frequencies
258 |         harmonic_freqs = fundamental_freq * torch.arange(
259 |             1, num_harmonics + 1, device=device
260 |         )
261 |         harmonic_indices = (harmonic_freqs / (sample_rate / n_fft)).long()
262 |         harmonic_amplitudes = magnitude[harmonic_indices].cpu().numpy()
263 | 
264 |         # Limit harmonic frequencies to upper bound
265 |         harmonic_freqs = harmonic_freqs[harmonic_freqs <= upper_bound].cpu().numpy()
266 |         harmonic_amplitudes = harmonic_amplitudes[: len(harmonic_freqs)]
267 | 
268 |         # Convert data to CPU for plotting
269 |         frequencies_cpu = frequencies.cpu().numpy()
270 |         magnitude_cpu = magnitude.cpu().numpy()
271 | 
272 |         # Plot the full spectrum
273 |         color = PREDEFINED_COLORS[idx % len(PREDEFINED_COLORS)]
274 |         plt.plot(
275 |             frequencies_cpu,
276 |             magnitude_cpu,
277 |             color=color,
278 |             alpha=0.4,
279 |             label=f"Full Spectrum ({label})",
280 |         )
281 | 
282 |         # Overlay harmonic peaks
283 |         plt.vlines(
284 |             harmonic_freqs,
285 |             ymin=0,
286 |             ymax=harmonic_amplitudes,
287 |             color=color,
288 |             linewidth=1.5,
289 |             linestyle="--",
290 |             label=f"Harmonics ({label})",
291 |         )
292 | 
293 |     plt.title(title)
294 |     plt.xscale("log")
295 |     plt.xlim(20, upper_bound)
296 |     plt.xlabel("Frequency (Hz)")
297 |     plt.ylabel("Amplitude")
298 |     plt.legend()
299 |     plt.grid(True, which="both", linestyle="--", linewidth=0.5)
300 | 
301 |     # Save and close the figure
302 |     plt.tight_layout()
303 |     plt.savefig(output_image, dpi=150)
304 |     plt.close()
305 | 


--------------------------------------------------------------------------------
/core/sampling.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     utility functions for sampling
10 | """
11 | 
12 | import torch
13 | import torchaudio
14 | 
15 | 
16 | def oversample(
17 |     audio_signal: torch.Tensor, sample_rate: int, factor: int = 8
18 | ) -> torch.Tensor:
19 | 
20 |     resampler = torchaudio.transforms.Resample(sample_rate, sample_rate * factor).to(
21 |         device=audio_signal.device, dtype=audio_signal.dtype
22 |     )
23 |     return resampler(audio_signal), sample_rate * factor
24 | 
25 | 
26 | def downsample(
27 |     audio_signal: torch.Tensor, sample_rate: int, factor: int = 8
28 | ) -> torch.Tensor:
29 | 
30 |     resampler = torchaudio.transforms.Resample(sample_rate, sample_rate // factor).to(
31 |         device=audio_signal.device, dtype=audio_signal.dtype
32 |     )
33 |     return resampler(audio_signal)
34 | 


--------------------------------------------------------------------------------
/core/saturation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     various conversion methods
 10 | """
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | from torch.nn import Module
 15 | from ..core.sampling import oversample, downsample
 16 | from ..core.harmonics import enahnce_harmonics_23
 17 | 
 18 | from typing import List
 19 | 
 20 | 
 21 | def sigmoid_saturation(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor:
 22 |     """
 23 |     Apply sigmoid saturation with drive control.
 24 |     Parameters:
 25 |         audio: [channels, samples] input audio signal in the range [-1, 1].
 26 |         drive: Controls the steepness of the sigmoid in range [0, 100.0].
 27 |     Returns:
 28 |         Saturated audio signal.
 29 |     """
 30 |     k = 0.1 + (drive / 100.0) * 10  # Map drive to steepness control
 31 |     normalized = (audio + 1) / 2
 32 |     saturated = 1 / (1 + torch.exp(-k * (normalized - 0.5)))
 33 |     return 2 * saturated - 1
 34 | 
 35 | 
 36 | def tanh_saturation(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor:
 37 |     """
 38 |     Apply tanh saturation with drive control.
 39 |     Parameters:
 40 |         audio: Input audio signal (Tensor).
 41 |         drive: Controls the strength of tanh effect in range [0, 100.0].
 42 |     Returns:
 43 |         Saturated audio signal.
 44 |     """
 45 | 
 46 |     audio = audio / torch.max(torch.abs(audio))
 47 |     k = 0.1 + (drive / 100.0) * 10  # Map drive to scaling factor
 48 |     k_tensor = torch.tensor(
 49 |         k, dtype=audio.dtype, device=audio.device
 50 |     )  # Convert k to Tensor
 51 |     return torch.tanh(
 52 |         k_tensor * audio
 53 |     )  # / torch.tanh(k_tensor)  # Normalize output range
 54 | 
 55 | 
 56 | def poly_saturation(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor:
 57 |     """
 58 |     Apply cubic polynomial saturation with drive control.
 59 |     Parameters:
 60 |         audio: Input audio signal.
 61 |         drive: Controls the strength of the cubic term in range [0, 100.0].
 62 |     Returns:
 63 |         Saturated audio signal.
 64 |     """
 65 |     c3 = 0.01 + (drive / 100.0) * 0.3  # Map drive to nonlinearity strength
 66 |     return audio - c3 * audio**3
 67 | 
 68 | 
 69 | def logarithmic_mapping(audio: torch.Tensor, drive: float = 50.0) -> torch.Tensor:
 70 |     """
 71 |     Apply logarithmic mapping with drive control.
 72 |     Parameters:
 73 |         audio: Input audio signal.
 74 |         drive: Controls the scaling of the logarithmic mapping.
 75 |     Returns:
 76 |         Saturated audio signal.
 77 |     """
 78 |     max_value = 0.1 + (drive / 100.0) * 10  # Map drive to maximum scaling
 79 |     return (
 80 |         torch.sign(audio)
 81 |         * torch.log1p(torch.abs(audio * max_value))
 82 |         / torch.log1p(torch.tensor(max_value))
 83 |     )
 84 | 
 85 | 
 86 | class Saturator(Module):
 87 |     @staticmethod
 88 |     def get_modes() -> List[str]:
 89 |         return ["poly", "soft", "tanh", "sig", "log"]
 90 | 
 91 |     def __init__(
 92 |         self,
 93 |         drive: float = 0.5,
 94 |         order: int = 3,
 95 |         sample_rate: int = 48000,
 96 |         mode: str = "poly",
 97 |         oversample_factor: int = 4,
 98 |     ):
 99 |         super(Saturator, self).__init__()
100 |         self.order: int = order
101 |         self.sample_rate: int = sample_rate
102 |         self.mode: str = mode
103 |         self.drive: float = drive
104 |         self.oversample_factor: int = oversample_factor
105 |         self.harmonics_level: int = 0
106 | 
107 |         order = order  # third order polynomial approximation
108 | 
109 |         # Input scaling and output gain (adjust as needed)
110 |         self.input_scale = nn.Parameter(torch.tensor(1.0, dtype=torch.float32))
111 |         self.output_gain = nn.Parameter(torch.tensor(1.0, dtype=torch.float32))
112 | 
113 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
114 | 
115 |         y, sample_rate = oversample(x, self.sample_rate, factor=self.oversample_factor)
116 | 
117 |         y = enahnce_harmonics_23(y, sample_rate, gain_db_base=self.harmonics_level)
118 | 
119 |         if self.mode == "poly":
120 |             y = poly_saturation(y, drive=self.drive)
121 |         elif self.mode == "tanh":
122 |             y = tanh_saturation(y, drive=self.drive)
123 |         elif self.mode == "sig":
124 |             y = sigmoid_saturation(y, drive=self.drive)
125 |         elif self.mode == "log":
126 |             y = logarithmic_mapping(y, drive=self.drive)
127 | 
128 |         y = downsample(y, sample_rate, factor=self.oversample_factor)
129 | 
130 |         return y
131 | 
132 | 
133 | def saturator_get_modes() -> List[str]:
134 |     return ["poly", "tanh", "sig", "log"]
135 | 
136 | 
137 | def saturator(
138 |     audio_in: torch.Tensor,
139 |     mode: str = "poly",
140 |     sample_rate: int = 44100,
141 |     drive: float = 1.5,  # Removed lookahead
142 |     oversample_factor: int = 4,
143 |     harmonics_level: float = 1.2,
144 | ) -> torch.Tensor:
145 |     y = audio_in.clone()
146 |     # loudness = get_loudness(audio_in, sample_rate)
147 |     # y = automatic_gain_control(audio_in)
148 |     # y = y*drive_pre
149 |     y, _sample_rate = oversample(y, sample_rate, factor=oversample_factor)
150 | 
151 |     # y = enahnce_harmonics_23(y, _sample_rate, gain_db_base=harmonics_level)
152 | 
153 |     if mode == "poly":
154 |         drive = drive * 2
155 |         y = poly_saturation(y, drive=drive)
156 |     elif mode == "tanh":
157 |         drive = drive / 3
158 |         y = tanh_saturation(y, drive=drive)
159 |     elif mode == "sig":
160 |         y = sigmoid_saturation(y, drive=drive)
161 |     elif mode == "log":
162 |         y = logarithmic_mapping(y, drive=drive)
163 | 
164 |     y = downsample(y, sample_rate, factor=oversample_factor)
165 |     # y = lufs_normalization(y, sample_rate, loudness)
166 | 
167 |     return y
168 | 


--------------------------------------------------------------------------------
/core/tests.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | # Reusable constants
 4 | DATA_ROOT = Path("ComfyUI_SignalProcessing/audio")
 5 | 
 6 | 
 7 | def get_output_file_path(output_root: Path, test_name: str, mode: str) -> Path:
 8 |     """Generate output file path for a given test."""
 9 |     return output_root / f"{test_name}-{mode}.wav"
10 | 


--------------------------------------------------------------------------------
/core/utilities.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     various development utilities
10 | """
11 | 
12 | import os
13 | import sys
14 | import re
15 | 
16 | # Define the regex
17 | pattern = r"^ComfyUI-\d+\.\d+\.\d+$"
18 | 
19 | this_file_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)))
20 | 
21 | 
22 | def find_comfy_root() -> str:
23 |     path = os.fspath(this_file_directory)
24 | 
25 |     if isinstance(this_file_directory, bytes):
26 |         sep = b"/"
27 |     else:
28 |         sep = "/"
29 |     tokens = path.split(sep)
30 |     while not re.match(pattern, tokens[-1]):
31 |         tokens.pop(-1)
32 | 
33 |     path = "/".join(tokens)
34 |     return path
35 | 
36 | 
37 | # add comfy to path for local devepment only
38 | # find comfy root by going upwards hoping it match regex
39 | # export coffy_local_dev=1
40 | def comfy_root_to_syspath() -> None:
41 |     try:
42 |         if os.environ["coffy_local_dev"] == "1":
43 |             pass
44 |         else:
45 |             raise (Exception())
46 |     except Exception:
47 |         return
48 | 
49 |     path = find_comfy_root()
50 |     if path not in sys.path:
51 |         sys.path.insert(0, path)
52 | 


--------------------------------------------------------------------------------
/core/utilitiescuda.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     various conversion methods
10 | """
11 | 
12 | import os
13 | import cupy as cp
14 | 
15 | 
16 | def read_kernel_by_name(
17 |     kernel_name: str,
18 |     kernel_class: str = "limiter",
19 |     kernel_identifier: str = "limiter_kernel",
20 | ) -> cp.RawKernel:
21 |     this_directory = os.path.dirname(os.path.abspath(__file__))
22 |     kernel_relativepath = f"kernels/{kernel_class}/{kernel_name}.cu"
23 |     kernel_filepath = os.path.join(this_directory, kernel_relativepath)
24 |     print(f"Loading CUDA kernel... {kernel_relativepath}")
25 | 
26 |     if not os.path.exists(kernel_filepath):
27 |         raise FileNotFoundError(f"Kernel file not found: {kernel_filepath}")
28 | 
29 |     with open(kernel_filepath, "r", encoding="utf-8") as file:  # Open as text
30 |         code = file.read()  # Read kernel source code as string
31 |         # Pass code to RawKernel
32 |         return cp.RawKernel(code=code, name=kernel_identifier, backend="nvrtc")
33 | 


--------------------------------------------------------------------------------
/data/widener/init_vn_filters.txt:
--------------------------------------------------------------------------------
1 | 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0580 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.5971 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4181 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.2201 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0441 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0358 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0774 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0320 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0071 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0097 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0019 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0012 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0032 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
2 | -1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9928 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.5530 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4268 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0525 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1981 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1238 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0155 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0339 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0053 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0042 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0014 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0036 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0026 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
3 | 


--------------------------------------------------------------------------------
/data/widener/opt_vn_filters.txt:
--------------------------------------------------------------------------------
1 | 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0580 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.5971 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4181 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.2201 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0441 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0358 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0774 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0320 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0071 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0097 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0019 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0012 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0032 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
2 | -1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9928 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.5530 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4268 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0525 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1981 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1238 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0155 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0339 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0053 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0042 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0014 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0036 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0026 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
3 | 


--------------------------------------------------------------------------------
/effects/SignalProcessingConvolutionReverb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Convolution Reverb
 10 | """
 11 | 
 12 | import os
 13 | import torch
 14 | import torchaudio
 15 | import torch.nn.functional as F
 16 | 
 17 | from typing import Dict, Tuple, Union
 18 | 
 19 | from ..core.utilities import comfy_root_to_syspath
 20 | from ..core.io import audio_from_comfy_2d, audio_to_comfy_3d, from_disk_as_raw_2d
 21 | from ..core.loudness import lufs_normalization, get_loudness
 22 | import folder_paths
 23 | 
 24 | comfy_root_to_syspath()  # add comfy to sys path for dev
 25 | 
 26 | 
 27 | class SignalProcessingConvolutionReverb:
 28 |     supported_formats = [".wav", ".mp3", ".ogg", ".m4a", ".flac", ".mp4"]
 29 |     this_directory = os.path.dirname(os.path.realpath(__file__))
 30 |     ir_directory = os.path.join(os.path.split(this_directory)[0], "audio", "ir")
 31 | 
 32 |     @classmethod
 33 |     def INPUT_TYPES(s) -> Dict[str, torch.Tensor]:
 34 | 
 35 |         files, _ = folder_paths.recursive_search(
 36 |             SignalProcessingConvolutionReverb.ir_directory
 37 |         )
 38 | 
 39 |         ir_files = []
 40 |         for file in files:
 41 |             try:
 42 |                 _, ext = os.path.splitext(file)
 43 |                 if ext in SignalProcessingConvolutionReverb.supported_formats:
 44 |                     ir_files.append(file)
 45 |             except Exception:
 46 |                 pass
 47 | 
 48 |         return {
 49 |             "required": {
 50 |                 "impulse_response": (sorted(ir_files),),
 51 |                 "audio_input": ("AUDIO",),
 52 |                 "wet_dry": (
 53 |                     "FLOAT",
 54 |                     {
 55 |                         "default": 0.5,
 56 |                         "min": 0.0,
 57 |                         "max": 1.0,
 58 |                         "step": 0.01,
 59 |                     },
 60 |                 ),
 61 |             },
 62 |         }
 63 | 
 64 |     RETURN_TYPES = ("AUDIO",)
 65 |     RETURN_NAMES = ("audio",)
 66 |     CATEGORY = "Signal Processing"
 67 |     FUNCTION = "process"
 68 | 
 69 |     def process(
 70 |         self,
 71 |         impulse_response: str,
 72 |         audio_input: Dict[str, Union[torch.Tensor, int]],
 73 |         wet_dry: float,
 74 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
 75 | 
 76 |         try_gpu: bool = True
 77 |         repeat: bool = True
 78 | 
 79 |         waveform, sample_rate = audio_from_comfy_2d(
 80 |             audio_input, repeat=repeat, try_gpu=try_gpu
 81 |         )
 82 | 
 83 |         loudness = get_loudness(waveform, sample_rate)
 84 | 
 85 |         it_filepath = os.path.join(
 86 |             SignalProcessingConvolutionReverb.ir_directory, impulse_response
 87 |         )
 88 | 
 89 |         ir, ir_sr = from_disk_as_raw_2d(it_filepath, repeat=repeat, try_gpu=try_gpu)
 90 | 
 91 |         # Resample IR if sampling rates do not match
 92 |         if ir_sr != sample_rate:
 93 |             resampler = torchaudio.transforms.Resample(
 94 |                 orig_freq=ir_sr, new_freq=sample_rate
 95 |             ).to(ir.device, dtype=waveform.dtype)
 96 |             ir = resampler(ir)
 97 |             ir_sr = sample_rate
 98 | 
 99 |         # if wave is mono and ir is not mono
100 |         if waveform.shape[0] == 1 and ir.shape[0] == 2:
101 |             ir = ir.mean(dim=0, keepdim=True)
102 |         if waveform.shape[0] == 2 and ir.shape[0] == 1:
103 |             ir = ir.repeat(2, 1)
104 | 
105 |         processed_audio = self.apply_reverb(waveform, sample_rate, ir, wet_dry=wet_dry)
106 |         processed_audio = lufs_normalization(processed_audio, sample_rate, loudness)
107 | 
108 |         return audio_to_comfy_3d(processed_audio, sample_rate, cpu=True)
109 | 
110 |     def apply_reverb(
111 |         self, audio: torch.Tensor, sr: int, ir: torch.Tensor, wet_dry: float = 0.5
112 |     ) -> torch.Tensor:
113 | 
114 |         num_audio_channels, audio_length = audio.shape
115 |         num_ir_channels, ir_length = ir.shape
116 | 
117 |         # Normalize IR to prevent amplification
118 |         ir = ir / torch.max(torch.abs(ir)) if torch.max(torch.abs(ir)) > 0 else ir
119 | 
120 |         # Initialize list to hold processed channels
121 |         processed_channels = []
122 | 
123 |         # Apply convolution per channel
124 |         for channel in range(num_audio_channels):
125 |             # Get the current audio and IR channel
126 |             audio_channel = audio[channel].unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, N]
127 |             ir_channel = (
128 |                 ir[channel].flip(0).unsqueeze(0).unsqueeze(0)
129 |             )  # Reverse IR, Shape: [1, 1, M]
130 | 
131 |             # Perform convolution
132 |             convolved = F.conv1d(
133 |                 audio_channel, ir_channel, padding=ir_length - 1
134 |             )  # Shape: [1, 1, N + M -1]
135 | 
136 |             # Remove batch and channel dimensions
137 |             convolved = convolved.squeeze(0).squeeze(0)  # Shape: [N + M -1]
138 | 
139 |             # Trim convolved signal to original audio length
140 |             convolved = convolved[:audio_length]
141 | 
142 |             # Normalize convolved signal to prevent clipping
143 |             max_val = torch.max(torch.abs(convolved))
144 |             if max_val > 0:
145 |                 convolved = convolved / max_val
146 | 
147 |             # Apply wet/dry mix
148 |             dry = 1 - wet_dry
149 |             wet = wet_dry
150 |             processed = dry * audio[channel] + wet * convolved
151 | 
152 |             # Prevent clipping by normalizing if necessary
153 |             processed_max = torch.max(torch.abs(processed))
154 |             if processed_max > 1.0:
155 |                 processed = processed / processed_max
156 | 
157 |             # Append processed channel
158 |             processed_channels.append(processed)
159 | 
160 |         # Stack channels back into a tensor
161 |         processed_audio = torch.stack(processed_channels)  # Shape: [2, N]
162 | 
163 |         return processed_audio
164 | 


--------------------------------------------------------------------------------
/effects/SignalProcessingPaulStretch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     This is a port of Paul's Extreme Sound Stretch (Paulstretch) - by Nasca Octavian PAUL
 10 |     http://www.paulnasca.com/
 11 |     http://hypermammut.sourceforge.net/paulstretch/
 12 |     https://github.com/paulnasca/paulstretch_python
 13 |     https://github.com/paulnasca/paulstretch_python/blob/master/paulstretch_stereo.py
 14 | """
 15 | 
 16 | import torch
 17 | 
 18 | import math
 19 | from typing import Tuple, Dict, Any, Union
 20 | 
 21 | from ..core.utilities import comfy_root_to_syspath
 22 | from ..core.io import audio_from_comfy_2d, audio_to_comfy_3d
 23 | from ..core.loudness import lufs_normalization, get_loudness
 24 | 
 25 | comfy_root_to_syspath()  # add comfy to sys path for dev
 26 | 
 27 | 
 28 | class SignalProcessingPaulStretch:
 29 |     @classmethod
 30 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 31 |         return {
 32 |             "required": {
 33 |                 "audio_input": ("AUDIO", {"forceInput": True}),
 34 |                 "stretch_factor": (
 35 |                     "FLOAT",
 36 |                     {"default": 8.0, "min": 1.0, "max": 100.0, "step": 0.1},
 37 |                 ),
 38 |                 "window_size_seconds": (
 39 |                     "FLOAT",
 40 |                     {"default": 0.25, "min": 0.05, "max": 10.0, "step": 0.05},
 41 |                 ),
 42 |             }
 43 |         }
 44 | 
 45 |     RETURN_TYPES = ("AUDIO",)
 46 |     RETURN_NAMES = ("audio",)
 47 |     CATEGORY = "Signal Processing"
 48 |     FUNCTION = "process"
 49 | 
 50 |     def process(
 51 |         self,
 52 |         audio_input: Dict[str, Union[torch.Tensor, int]],
 53 |         stretch_factor: float,
 54 |         window_size_seconds: float,
 55 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
 56 | 
 57 |         # Conditional processing: If stretch_factor is 1.0, return original audio
 58 |         if stretch_factor == 1.0:
 59 |             return audio_to_comfy_3d(
 60 |                 audio_input["waveform"], audio_input["sample_rate"]
 61 |             )
 62 | 
 63 |         # Extract waveform and sample_rate
 64 |         waveform, sample_rate = audio_from_comfy_2d(
 65 |             audio_input, repeat=True, try_gpu=True
 66 |         )
 67 |         loudness = get_loudness(waveform, sample_rate)
 68 | 
 69 |         nchannels, nsamples = waveform.shape
 70 | 
 71 |         # Optimize window size to be divisible by 2, 3, and 5
 72 |         window_size = int(window_size_seconds * sample_rate)
 73 |         if window_size < 16:
 74 |             window_size = 16
 75 |         window_size = self.optimize_windowsize(window_size)
 76 |         window_size = int(window_size / 2) * 2  # Ensure even window size
 77 |         half_window_size = int(window_size / 2)
 78 | 
 79 |         # Correct the end of the waveform by applying a fade-out
 80 |         end_size = int(sample_rate * 0.05)
 81 |         if end_size < 16:
 82 |             end_size = 16
 83 |         fade_out = torch.linspace(
 84 |             1.0, 0.0, end_size, device=waveform.device, dtype=waveform.dtype
 85 |         )
 86 |         waveform[:, -end_size:] = waveform[:, -end_size:] * fade_out
 87 | 
 88 |         # Compute displacement
 89 |         start_pos = 0.0
 90 |         displace_pos = (window_size * 0.5) / stretch_factor
 91 | 
 92 |         # Create custom window function as in original code
 93 |         window = torch.pow(
 94 |             1.0
 95 |             - torch.pow(
 96 |                 torch.linspace(
 97 |                     -1.0, 1.0, window_size, device=waveform.device, dtype=waveform.dtype
 98 |                 ),
 99 |                 2.0,
100 |             ),
101 |             1.25,
102 |         )
103 | 
104 |         # Initialize old windowed buffer
105 |         old_windowed_buf = torch.zeros(
106 |             (nchannels, window_size), device=waveform.device, dtype=waveform.dtype
107 |         )
108 | 
109 |         # Initialize list to store output frames
110 |         output_frames = []
111 | 
112 |         # Processing loop
113 |         frame_count = 0
114 |         while True:
115 |             # Get the windowed buffer
116 |             istart_pos = int(math.floor(start_pos))
117 |             buf = waveform[:, istart_pos : istart_pos + window_size]
118 |             if buf.shape[1] < window_size:
119 |                 padding = window_size - buf.shape[1]
120 |                 buf = torch.nn.functional.pad(buf, (0, padding), "constant", 0.0)
121 |             buf = buf * window
122 | 
123 |             # FFT: Real FFT since the input is real
124 |             freqs = torch.fft.rfft(buf, dim=1)
125 | 
126 |             # Get amplitudes and randomize phases
127 |             amplitudes = freqs.abs()
128 |             phases = (
129 |                 torch.rand(freqs.shape, device=waveform.device, dtype=waveform.dtype)
130 |                 * 2
131 |                 * math.pi
132 |             )
133 |             freqs = amplitudes * torch.exp(1j * phases)
134 | 
135 |             # Inverse FFT
136 |             buf_ifft = torch.fft.irfft(freqs, n=window_size, dim=1)
137 | 
138 |             # Window again the output buffer
139 |             buf_ifft = buf_ifft * window
140 | 
141 |             # Overlap-add the output
142 |             output = (
143 |                 buf_ifft[:, :half_window_size] + old_windowed_buf[:, half_window_size:]
144 |             )
145 |             old_windowed_buf = buf_ifft
146 | 
147 |             # Append to output_frames
148 |             output_frames.append(output)
149 | 
150 |             # Increment start_pos
151 |             start_pos += displace_pos
152 |             frame_count += 1
153 | 
154 |             # Check if we have reached the end of the input
155 |             if start_pos >= nsamples:
156 |                 break
157 | 
158 |         # Concatenate all output frames horizontally
159 |         output_array = torch.cat(output_frames, dim=1)
160 | 
161 |         # LUFS Normalization
162 |         output_tensor = lufs_normalization(output_array, sample_rate, loudness)
163 | 
164 |         # Return as audio dictionary
165 |         return audio_to_comfy_3d(output_tensor, sample_rate)
166 | 
167 |     @staticmethod
168 |     def optimize_windowsize(n: int) -> int:
169 | 
170 |         orig_n = n
171 |         while True:
172 |             n = orig_n
173 |             while (n % 2) == 0:
174 |                 n //= 2
175 |             while (n % 3) == 0:
176 |                 n //= 3
177 |             while (n % 5) == 0:
178 |                 n //= 5
179 | 
180 |             if n < 2:
181 |                 break
182 |             orig_n += 1
183 |         return orig_n
184 | 


--------------------------------------------------------------------------------
/effects/SignalProcessingPitchShifter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     Pitch shifting node
10 | """
11 | 
12 | import torch
13 | from typing import Tuple, Dict, Any, Union
14 | 
15 | import torchaudio.functional as F
16 | 
17 | from ..core.utilities import comfy_root_to_syspath
18 | from ..core.io import audio_from_comfy_3d, audio_to_comfy_3d
19 | from ..core.loudness import lufs_normalization, get_loudness
20 | 
21 | comfy_root_to_syspath()  # add comfy to sys path for dev
22 | 
23 | 
24 | class SignalProcessingPitchShifter:
25 |     @classmethod
26 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
27 |         return {
28 |             "required": {
29 |                 "audio_input": ("AUDIO",),  # Input audio
30 |                 "pitch_shift_factor": (
31 |                     "INT",
32 |                     {"default": 2, "min": -12 * 4, "max": 12 * 4, "step": 1},
33 |                 ),
34 |             },
35 |             "optional": {},
36 |         }
37 | 
38 |     RETURN_TYPES = ("AUDIO",)
39 |     RETURN_NAMES = ("output_audio",)
40 |     CATEGORY = "Signal Processing"
41 |     FUNCTION = "process"
42 | 
43 |     def process(
44 |         self,
45 |         audio_input: Dict[str, Union[torch.Tensor, int]],
46 |         pitch_shift_factor: int = 2,
47 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
48 | 
49 |         try_gpu: bool = True
50 |         waveform, sample_rate = audio_from_comfy_3d(audio_input, try_gpu=try_gpu)
51 | 
52 |         loudness = get_loudness(waveform, sample_rate)
53 | 
54 |         pitch_shifted_waveform = F.pitch_shift(
55 |             waveform, sample_rate, pitch_shift_factor
56 |         )
57 |         pitch_shifted_waveform = lufs_normalization(
58 |             pitch_shifted_waveform, sample_rate, loudness
59 |         )
60 | 
61 |         return audio_to_comfy_3d(pitch_shifted_waveform, sample_rate)
62 | 


--------------------------------------------------------------------------------
/effects/SignalProcessingStereoWidening.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Audio widening node
 10 | """
 11 | 
 12 | import os
 13 | import sys
 14 | import math
 15 | import torch
 16 | 
 17 | from typing import Dict, Any, Tuple, Union
 18 | from ..core.io import audio_from_comfy_2d, audio_to_comfy_3d
 19 | from ..core.loudness import lufs_normalization, get_loudness
 20 | from ..core.widening import (
 21 |     StereoWidenerFrequencyBased,
 22 |     DecorrelationType,
 23 |     FilterbankType,
 24 | )
 25 | 
 26 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
 27 | 
 28 | 
 29 | def interpolate(t: float, a: float, b: float) -> float:
 30 |     if not 0.0 <= t <= 1.0:
 31 |         raise ValueError("t must be in the range [0.0, 1.0]")
 32 |     return a + t * (b - a)
 33 | 
 34 | 
 35 | class SignalProcessingStereoWidening:
 36 |     @classmethod
 37 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 38 |         return {
 39 |             "required": {
 40 |                 "mode": (["decorrelation", "simple"],),
 41 |                 "audio_input": ("AUDIO",),
 42 |             },
 43 |             "optional": {
 44 |                 "width": (
 45 |                     "FLOAT",
 46 |                     {"default": 6.0, "min": 1.0, "max": 8.0, "step": 0.1},
 47 |                 ),
 48 |             },
 49 |         }
 50 | 
 51 |     RETURN_TYPES = ("AUDIO",)
 52 |     RETURN_NAMES = ("widened_audio",)
 53 |     CATEGORY = "Signal Processing"
 54 |     FUNCTION = "process"
 55 | 
 56 |     def process(
 57 |         self,
 58 |         mode: str,
 59 |         audio_input: Dict[str, Union[torch.Tensor, int]],
 60 |         width: float = 1.2,
 61 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
 62 |         """
 63 |         Widen stereo audio or convert mono audio to wide stereo
 64 |         using the provided widening algorithm.
 65 | 
 66 |         Parameters:
 67 |             audio_input (Dict): Dictionary containing 'waveform' and 'sample_rate'.
 68 |             width (float): Width factor (>1.0).
 69 | 
 70 |         Returns:
 71 |             Tuple[Dict[str, torch.Tensor]]: Dictionary with widened 'waveform' and 'sample_rate'.
 72 |         """
 73 | 
 74 |         waveform, sample_rate = audio_from_comfy_2d(
 75 |             audio_input, repeat=False, try_gpu=True
 76 |         )
 77 |         channels, num_samples = waveform.shape
 78 | 
 79 |         loudness = get_loudness(waveform, sample_rate)
 80 | 
 81 |         if mode == "simple":
 82 | 
 83 |             if channels not in [1, 2]:
 84 |                 raise ValueError(
 85 |                     f"Unsupported number of channels: {channels}. \
 86 |                         Only mono and stereo are supported."
 87 |                 )
 88 | 
 89 |             # Calculate coefficients based on the provided width parameter
 90 |             width_coeff = 1.0 / max(1.0 + width, 2.0)  # Scalar
 91 | 
 92 |             coef_mid = 1.0 * width_coeff  # Coefficient for mid
 93 |             coef_sides = width * width_coeff  # Coefficient for sides
 94 | 
 95 |             if channels == 2:
 96 |                 # Stereo to Widened Stereo
 97 |                 L = waveform[0, :]  # Left channel
 98 |                 R = waveform[1, :]  # Right channel
 99 | 
100 |                 # Apply the widening algorithm
101 |                 mid = (L + R) * coef_mid  # Mid signal
102 |                 sides = (R - L) * coef_sides  # Side signal
103 | 
104 |                 widened_L = mid - sides  # New Left channel
105 |                 widened_R = mid + sides  # New Right channel
106 | 
107 |                 # Stack the widened channels back into a stereo waveform
108 |                 widened_waveform = torch.stack(
109 |                     (widened_L, widened_R), dim=0
110 |                 )  # [2, samples]
111 | 
112 |             elif channels == 1:
113 |                 # Mono to Wide Stereo
114 |                 L = waveform[0, :].clone()  # Duplicate mono channel to Left
115 |                 R = waveform[0, :].clone()  # Duplicate mono channel to Right
116 | 
117 |                 # Apply the widening algorithm
118 |                 mid = (L + R) * coef_mid  # Mid signal
119 |                 sides = (R - L) * coef_sides  # Side signal
120 | 
121 |                 widened_L = mid - sides  # New Left channel
122 |                 widened_R = mid + sides  # New Right channel
123 | 
124 |                 # Stack the widened channels into a stereo waveform
125 |                 widened_waveform = torch.stack(
126 |                     (widened_L, widened_R), dim=0
127 |                 )  # [2, samples]
128 | 
129 |             widened_waveform = lufs_normalization(
130 |                 widened_waveform, sample_rate, loudness
131 |             )
132 | 
133 |             return audio_to_comfy_3d(widened_waveform, sample_rate)
134 | 
135 |         if mode == "decorrelation":
136 | 
137 |             waveform = waveform.cpu()
138 | 
139 |             decorellation_type = DecorrelationType.VELVET
140 |             filterbank_type = FilterbankType.ENERGY_PRESERVE
141 |             start_value = 0.0
142 |             end_value = math.pi / 2
143 | 
144 |             if width > 1.0:
145 |                 width = 1.0
146 | 
147 |             beta = interpolate(width, start_value, end_value)
148 |             cutoff_frequency_hz = 22000  # sample_rate//2 # max possible
149 |             cutoff_frequency_hz = (sample_rate // 2) - 10  # max possible
150 | 
151 |             stereoWidener = StereoWidenerFrequencyBased(
152 |                 waveform,
153 |                 sample_rate,
154 |                 filterbank_type,
155 |                 decorellation_type,
156 |                 [beta, beta],
157 |                 cutoff_frequency_hz,
158 |             )
159 |             widener_result = stereoWidener.process()
160 |             widened_waveform = torch.from_numpy(widener_result)
161 |             widened_waveform = widened_waveform.T
162 | 
163 |             widened_waveform = lufs_normalization(
164 |                 widened_waveform, sample_rate, loudness
165 |             )
166 | 
167 |             return audio_to_comfy_3d(widened_waveform, sample_rate)
168 | 
169 |         return audio_to_comfy_3d(waveform, sample_rate)
170 | 


--------------------------------------------------------------------------------
/effects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/effects/__init__.py


--------------------------------------------------------------------------------
/generators/SignalProcessingPadSynth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Pad Synthesiser port of code from this article :
 10 |     https://zynaddsubfx.sourceforge.io/doc/PADsynth/PADsynth.htm#c_implementation
 11 | """
 12 | 
 13 | 
 14 | import torch
 15 | import math
 16 | from typing import Tuple, Dict, Any, Union
 17 | 
 18 | from ..core.io import audio_to_comfy_3d
 19 | 
 20 | 
 21 | class SignalProcessingPadSynth:
 22 |     @classmethod
 23 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 24 |         return {
 25 |             "required": {
 26 |                 "sample_rate": (
 27 |                     "INT",
 28 |                     {"default": 44100, "min": 8000, "max": 96000, "step": 1},
 29 |                 ),
 30 |                 "fundamental_freq": (
 31 |                     "FLOAT",
 32 |                     {"default": 261.0, "min": 20.0, "max": 2000.0, "step": 1.0},
 33 |                 ),
 34 |                 "bandwidth_cents": (
 35 |                     "FLOAT",
 36 |                     {"default": 40.0, "min": 10.0, "max": 100.0, "step": 1.0},
 37 |                 ),
 38 |                 "number_harmonics": (
 39 |                     "INT",
 40 |                     {"default": 64, "min": 1, "max": 128, "step": 1},
 41 |                 ),
 42 |             }
 43 |         }
 44 | 
 45 |     RETURN_TYPES = ("AUDIO",)
 46 |     RETURN_NAMES = ("audio",)
 47 |     CATEGORY = "Signal Processing"
 48 |     FUNCTION = "process"
 49 | 
 50 |     def process(
 51 |         self,
 52 |         sample_rate: int,
 53 |         fundamental_freq: float,
 54 |         bandwidth_cents: float,
 55 |         number_harmonics: int,
 56 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
 57 |         """
 58 |         Apply PADsynth algorithm to generate audio.
 59 | 
 60 |         Parameters:
 61 |             samplerate (int): Sampling rate in Hz.
 62 |             fundamental_freq (float): Fundamental frequency in Hz.
 63 |             bandwidth_cents (float): Bandwidth in cents for Gaussian profile.
 64 |             number_harmonics (int): Number of harmonics to generate.
 65 | 
 66 |         Returns:
 67 |             Tuple[Dict[str, torch.Tensor]]: Generated audio with waveform and sample rate.
 68 |         """
 69 | 
 70 |         # Define FFT size
 71 |         N = 262144  # As per C++ code
 72 | 
 73 |         # Use default amplitude distribution
 74 |         A = torch.zeros(number_harmonics, dtype=torch.double)
 75 |         A[0] = 0.0  # A[0] is not used
 76 |         for i in range(1, number_harmonics):
 77 |             A[i] = 1.0 / i
 78 |             if (i % 2) == 0:
 79 |                 A[i] *= 2.0
 80 | 
 81 |         # Initialize frequency amplitude and phase arrays
 82 |         freq_amp = torch.zeros(N // 2, dtype=torch.double)
 83 |         freq_phase = (
 84 |             torch.rand(N // 2, dtype=torch.double) * 2.0 * math.pi
 85 |         )  # Random phases between 0 and 2pi
 86 | 
 87 |         # Define Gaussian profile function
 88 |         def profile(fi: torch.Tensor, bwi: torch.Tensor) -> torch.Tensor:
 89 |             x = fi / bwi
 90 |             x_sq = x**2
 91 |             # Avoid computing exp(-x^2) for x_sq > 14.71280603
 92 |             mask = x_sq <= 14.71280603
 93 |             result = torch.zeros_like(x_sq)
 94 |             result[mask] = torch.exp(-x_sq[mask]) / bwi[mask]
 95 |             return result
 96 | 
 97 |         # Convert bandwidth from cents to Hz
 98 |         # bw_Hz = (2^(bw/1200) -1) * f * nh
 99 |         # Convert bandwidth_cents to multiplier
100 |         bw_multiplier = 2.0 ** (bandwidth_cents / 1200.0) - 1.0
101 | 
102 |         # Populate frequency amplitude array
103 |         for nh in range(1, number_harmonics):
104 |             f_nh = fundamental_freq * nh
105 |             bw_Hz = bw_multiplier * f_nh
106 |             bwi = bw_Hz / (2.0 * sample_rate)
107 |             fi = f_nh / sample_rate  # Normalized frequency
108 | 
109 |             # Create tensors for frequency bins
110 |             i = torch.arange(N // 2, dtype=torch.double)
111 |             # Normalized frequency for each bin
112 |             normalized_freq = (
113 |                 i / N
114 |             )  # Equivalent to i * (sample_rate / N) / sample_rate = i / N
115 | 
116 |             # Compute profile
117 |             fi_tensor = torch.full_like(i, fi)
118 |             bwi_tensor = torch.full_like(i, bwi)
119 |             profile_values = profile(normalized_freq - fi_tensor, bwi_tensor)
120 | 
121 |             # Update frequency amplitude
122 |             freq_amp += profile_values * A[nh]
123 | 
124 |         # Construct complex frequency domain tensor
125 |         real = freq_amp * torch.cos(freq_phase)
126 |         imag = freq_amp * torch.sin(freq_phase)
127 |         freq_complex = torch.complex(real, imag)  # Shape: (N//2,)
128 | 
129 |         # Perform IFFT using torch.fft.irfft
130 |         smp = torch.fft.irfft(freq_complex, n=N)  # Shape: (N,)
131 | 
132 |         # Normalize the signal to prevent clipping
133 |         max_val = torch.max(torch.abs(smp))
134 |         if max_val < 1e-5:
135 |             max_val = 1e-5  # Prevent division by zero
136 |         smp = smp / (max_val * math.sqrt(2))  # Normalize to 1/sqrt(2) as in C++ code
137 | 
138 |         # Convert to float32 for saving
139 |         smp = smp.float()
140 | 
141 |         # Prepare waveform tensor: (C, N)
142 |         waveform_out = smp.unsqueeze(0)  # Mono audio
143 | 
144 |         return audio_to_comfy_3d(waveform_out, sample_rate)
145 | 


--------------------------------------------------------------------------------
/generators/SignalProcessingPadSynthChoir.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Pad Synth Choir Synthesizer port of code from this article
 10 |     https://zynaddsubfx.sourceforge.io/doc/PADsynth/PADsynth.htm#c_implementation
 11 | """
 12 | 
 13 | import torch
 14 | import math
 15 | from typing import Tuple, List, Dict, Any, Union
 16 | 
 17 | 
 18 | class SignalProcessingPadSynthChoir:
 19 |     @classmethod
 20 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 21 |         return {
 22 |             "required": {
 23 |                 "samplerate": (
 24 |                     "INT",
 25 |                     {"default": 44100, "min": 8000, "max": 96000, "step": 1},
 26 |                 ),
 27 |                 "base_freq": (
 28 |                     "FLOAT",
 29 |                     {"default": 130.81, "min": 20.0, "max": 2000.0, "step": 1.0},
 30 |                 ),
 31 |                 "step_size": ("INT", {"default": 4, "min": 1, "max": 24, "step": 1}),
 32 |                 "num_notes": ("INT", {"default": 7, "min": 1, "max": 24, "step": 1}),
 33 |                 "bandwidth_cents": (
 34 |                     "FLOAT",
 35 |                     {"default": 60.0, "min": 10.0, "max": 100.0, "step": 1.0},
 36 |                 ),
 37 |                 "number_harmonics": (
 38 |                     "INT",
 39 |                     {"default": 64, "min": 1, "max": 128, "step": 1},
 40 |                 ),
 41 |             }
 42 |         }
 43 | 
 44 |     RETURN_TYPES = ("AUDIO_LIST", "INT")
 45 |     RETURN_NAMES = ("audios", "sample_rate")
 46 |     CATEGORY = "Signal Processing"
 47 |     FUNCTION = "process"
 48 | 
 49 |     def process(
 50 |         self,
 51 |         samplerate: int,
 52 |         base_freq: float,
 53 |         step_size: int,
 54 |         num_notes: int,
 55 |         bandwidth_cents: float,
 56 |         number_harmonics: int,
 57 |     ) -> Tuple[List[Dict[str, Union[torch.Tensor, int]]], int]:
 58 |         """
 59 |         Apply PADsynth choir algorithm to generate multiple audio files.
 60 | 
 61 |         Parameters:
 62 |             samplerate (int): Sampling rate in Hz.
 63 |             base_freq (float): Base frequency in Hz.
 64 |             step_size (int): Step size in semitones between notes.
 65 |             num_notes (int): Number of notes to generate.
 66 |             bandwidth_cents (float): Bandwidth in cents for Gaussian profile.
 67 |             number_harmonics (int): Number of harmonics to generate.
 68 | 
 69 |         Returns:
 70 |             Tuple[List[Dict[str, torch.Tensor]]]:
 71 |                 List of generated audios with waveform and sample rate.
 72 |         """
 73 | 
 74 |         # Define FFT size
 75 |         N = 262144  # As per C++ code
 76 | 
 77 |         audios = []
 78 | 
 79 |         for note_index in range(num_notes):
 80 |             note_semitones = step_size * note_index
 81 |             f1 = base_freq * (2.0 ** (note_semitones / 12.0))
 82 | 
 83 |             # Compute amplitude_per_harmonic with formants
 84 |             A = torch.zeros(number_harmonics, dtype=torch.double)
 85 |             A[0] = 0.0  # A[0] is not used
 86 | 
 87 |             for i in range(1, number_harmonics):
 88 |                 # Calculate formants based on the C++ choir implementation
 89 |                 formants = (
 90 |                     math.exp(-(((i * f1 - 600.0) / 150.0) ** 2))
 91 |                     + math.exp(-(((i * f1 - 900.0) / 250.0) ** 2))
 92 |                     + math.exp(-(((i * f1 - 2200.0) / 200.0) ** 2))
 93 |                     + math.exp(-(((i * f1 - 2600.0) / 250.0) ** 2))
 94 |                     + math.exp(-(((i * f1) / 3000.0) ** 2)) * 0.1
 95 |                 )
 96 |                 A[i] = (1.0 / i) * formants
 97 |                 # Optionally, you can debug amplitude values
 98 |                 # logger.debug(f"Harmonic {i}: A[{i}]={A[i]:.4f}")
 99 | 
100 |             # Initialize frequency amplitude and phase arrays
101 |             freq_amp = torch.zeros(N // 2, dtype=torch.double)
102 |             freq_phase = (
103 |                 torch.rand(N // 2, dtype=torch.double) * 2.0 * math.pi
104 |             )  # Random phases between 0 and 2pi
105 | 
106 |             # Define Gaussian profile function
107 |             def profile(fi: torch.Tensor, bwi: torch.Tensor) -> torch.Tensor:
108 |                 x = fi / bwi
109 |                 x_sq = x**2
110 |                 # Avoid computing exp(-x^2) for x_sq > 14.71280603
111 |                 mask = x_sq <= 14.71280603
112 |                 result = torch.zeros_like(x_sq)
113 |                 result[mask] = torch.exp(-x_sq[mask]) / bwi[mask]
114 |                 return result
115 | 
116 |             # Convert bandwidth from cents to Hz
117 |             # bw_Hz = (2^(bw/1200) -1) * f * nh
118 |             bw_multiplier = 2.0 ** (bandwidth_cents / 1200.0) - 1.0
119 | 
120 |             # Create tensors for frequency bins
121 |             i = torch.arange(N // 2, dtype=torch.double)
122 |             normalized_freq = i / N  # Equivalent to i / N
123 | 
124 |             # Compute and accumulate frequency amplitudes for each harmonic
125 |             for nh in range(1, number_harmonics):
126 |                 f_nh = f1 * nh
127 |                 bw_Hz = bw_multiplier * f_nh
128 |                 bwi = bw_Hz / (2.0 * samplerate)
129 |                 fi = f_nh / samplerate  # Normalized frequency
130 | 
131 |                 fi_tensor = torch.full_like(i, fi)
132 |                 bwi_tensor = torch.full_like(i, bwi)
133 |                 profile_values = profile(normalized_freq - fi_tensor, bwi_tensor)
134 | 
135 |                 # Update frequency amplitude
136 |                 freq_amp += profile_values * A[nh]
137 | 
138 |             # Construct complex frequency domain tensor
139 |             real = freq_amp * torch.cos(freq_phase)
140 |             imag = freq_amp * torch.sin(freq_phase)
141 |             freq_complex = torch.complex(real, imag)  # Shape: (N//2,)
142 | 
143 |             # Perform IFFT using torch.fft.irfft
144 |             smp = torch.fft.irfft(freq_complex, n=N)  # Shape: (N,)
145 | 
146 |             # Normalize the signal to prevent clipping
147 |             max_val = torch.max(torch.abs(smp))
148 |             if max_val < 1e-5:
149 |                 max_val = 1e-5  # Prevent division by zero
150 |             smp = smp / (
151 |                 max_val * math.sqrt(2)
152 |             )  # Normalize to 1/sqrt(2) as in C++ code
153 | 
154 |             # Convert to float32 for saving
155 |             smp = smp.float()
156 | 
157 |             # Prepare waveform tensor: (C, N)
158 |             waveform_out = smp.unsqueeze(0)  # Mono audio
159 | 
160 |             # Reshape waveform_out to include batch dimension: (1, C, N)
161 |             waveform_out = waveform_out.unsqueeze(0)  # Shape: (1, C, N)
162 | 
163 |             # Append to audios list
164 |             audios.append({"waveform": waveform_out, "sample_rate": samplerate})
165 | 
166 |         # Return the list of generated audios
167 | 
168 |         return audios, samplerate
169 | 


--------------------------------------------------------------------------------
/generators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/generators/__init__.py


--------------------------------------------------------------------------------
/nodes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     node definitions
10 | """
11 | 
12 | # generators
13 | from .generators.SignalProcessingPadSynth import SignalProcessingPadSynth
14 | from .generators.SignalProcessingPadSynthChoir import SignalProcessingPadSynthChoir
15 | 
16 | # effects
17 | from .effects.SignalProcessingStereoWidening import SignalProcessingStereoWidening
18 | from .effects.SignalProcessingPaulStretch import SignalProcessingPaulStretch
19 | from .effects.SignalProcessingPitchShifter import SignalProcessingPitchShifter
20 | from .effects.SignalProcessingConvolutionReverb import SignalProcessingConvolutionReverb
21 | 
22 | # processors
23 | from .processors.SignalProcessingFilter import SignalProcessingFilter
24 | from .processors.SignalProcessingMixdown import SignalProcessingMixdown
25 | from .processors.SignalProcessingLoadAudio import SignalProcessingLoadAudio
26 | from .processors.SignalProcessingNormalizer import SignalProcessingNormalizer
27 | from .processors.SignalProcessingLoudness import SignalProcessingLoudness
28 | from .processors.SignalProcessingBaxandallEQ import (
29 |     SignalProcessingBaxandallEQ,
30 |     SignalProcessingBaxandall3BandEQ,
31 | )
32 | from .processors.SignalProcessingHarmonicsEnhancer import (
33 |     SignalProcessingHarmonicsEnhancer,
34 | )
35 | from .processors.SignalProcessingSaturation import SignalProcessingSaturation
36 | from .processors.SignalProcessingLimiter import SignalProcessingLimiter
37 | 
38 | # from .processors.SignalProcessingCompressor import SignalProcessingCompressor
39 | 
40 | # visuals
41 | from .visuals.SignalProcessingSpectrogram import SignalProcessingSpectrogram
42 | from .visuals.SignalProcessingWaveform import SignalProcessingWaveform
43 | 
44 | NODE_CLASS_MAPPINGS = {
45 |     "SignalProcessingLoadAudio": SignalProcessingLoadAudio,
46 |     "SignalProcessingFilter": SignalProcessingFilter,
47 |     "SignalProcessingPaulStretch": SignalProcessingPaulStretch,
48 |     "SignalProcessingPadSynth": SignalProcessingPadSynth,
49 |     "SignalProcessingPadSynthChoir": SignalProcessingPadSynthChoir,
50 |     "SignalProcessingMixdown": SignalProcessingMixdown,
51 |     "SignalProcessingSpectrogram": SignalProcessingSpectrogram,
52 |     "SignalProcessingWaveform": SignalProcessingWaveform,
53 |     "SignalProcessingStereoWidening": SignalProcessingStereoWidening,
54 |     "SignalProcessingPitchShifter": SignalProcessingPitchShifter,
55 |     "SignalProcessingConvolutionReverb": SignalProcessingConvolutionReverb,
56 |     "SignalProcessingNormalizer": SignalProcessingNormalizer,
57 |     "SignalProcessingLoudness": SignalProcessingLoudness,
58 |     "SignalProcessingBaxandallEQ": SignalProcessingBaxandallEQ,
59 |     "SignalProcessingBaxandall3BandEQ": SignalProcessingBaxandall3BandEQ,
60 |     "SignalProcessingHarmonicsEnhancer": SignalProcessingHarmonicsEnhancer,
61 |     "SignalProcessingSaturation": SignalProcessingSaturation,
62 |     "SignalProcessingLimiter": SignalProcessingLimiter,
63 |     # "SignalProcessingCompressor": SignalProcessingCompressor,
64 | }
65 | 
66 | NODE_DISPLAY_NAME_MAPPINGS = {
67 |     "SignalProcessingLoadAudio": "(SP) Load Audio",
68 |     "SignalProcessingFilter": "(SP) Filter",
69 |     "SignalProcessingPaulStretch": "(SP) PaulStretch",
70 |     "SignalProcessingPadSynth": "(SP) PadSynth",
71 |     "SignalProcessingPadSynthChoir": "(SP) PadSynth Choir",
72 |     "SignalProcessingMixdown": "(SP) Mix Down",
73 |     "SignalProcessingSpectrogram": "(SP) Spectogram",
74 |     "SignalProcessingWaveform": "(SP) Waveform",
75 |     "SignalProcessingStereoWidening": "(SP) Stereo Width",
76 |     "SignalProcessingPitchShifter": "(SP) PitchShift",
77 |     "SignalProcessingConvolutionReverb": "(SP) Convolution Reverb",
78 |     "SignalProcessingNormalizer": "(SP) Normalizer",
79 |     "SignalProcessingLoudness": "(SP) Loudness",
80 |     "SignalProcessingBaxandallEQ": "(SP) Baxandall EQ",
81 |     "SignalProcessingBaxandall3BandEQ": "(SP) Baxandall 3 Band EQ",
82 |     "SignalProcessingHarmonicsEnhancer": "(SP) Enhance Harmonics",
83 |     "SignalProcessingSaturation": "(SP) Saturation",
84 |     "SignalProcessingLimiter": "(SP) Limiter",
85 |     # "SignalProcessingCompressor": "(SP) Compressor",
86 | }
87 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | from nox import Session
 3 | 
 4 | PYTHON_VERSIONS = ["3.10.12"]
 5 | REUSE_ENV = True
 6 | 
 7 | 
 8 | @nox.session(python=PYTHON_VERSIONS, tags=["style"], reuse_venv=REUSE_ENV)
 9 | def lint(session: Session) -> None:
10 | 
11 |     session.install("black")
12 |     session.install("flake8")
13 |     session.install("mypy")
14 | 
15 |     session.run("black", ".")
16 |     session.run("flake8", "--max-line-length=100", "--ignore=E501,E203,W503", ".")
17 |     session.run(
18 |         "mypy",
19 |         ".",
20 |         "--ignore-missing-imports",
21 |         "--strict",
22 |         "--show-error-codes",
23 |     )
24 | 
25 | 
26 | @nox.session(python=PYTHON_VERSIONS, tags=["tests"], reuse_venv=REUSE_ENV)
27 | def tests(session: Session) -> None:
28 |     """Run pytest tests with Scalene profiling."""
29 | 
30 |     session.install("scalene", "pytest", "pytest-cov", "pytest-xdist")
31 |     requirements = nox.project.load_toml("pyproject.toml")["project"]["dependencies"]
32 |     for _, v in requirements.items():
33 |         session.install(*v)
34 | 
35 |     project_name = nox.project.load_toml("pyproject.toml")["project"]["name"]
36 | 
37 |     pytest_path = session.run("which", "pytest", external=True, silent=True).strip()
38 | 
39 |     if not session.posargs:
40 |         with session.cd(".."):
41 |             session.run(
42 |                 "scalene",
43 |                 # "--profile-all",
44 |                 pytest_path,
45 |                 f"--cov={project_name}",
46 |                 "--cov-report=term",
47 |                 "--cov-report=html",
48 |                 f"{project_name}/tests/",
49 |                 external=True,
50 |             )
51 |     else:
52 |         with session.cd(".."):
53 |             session.run(
54 |                 "pytest",
55 |                 "--rootdir=.",
56 |                 f"--cov={project_name}",
57 |                 "--cov-report=term",
58 |                 "--cov-report=html",
59 |                 f"{project_name}/tests/",
60 |                 "-k",
61 |                 session.posargs[0],
62 |                 external=True,
63 |             )
64 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingBaxandallEQ.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Baxandall shelf EQ
 10 |     references used : # reference https://webaudio.github.io/Audio-EQ-Cookbook/Audio-EQ-Cookbook.txt
 11 | """
 12 | 
 13 | import torch
 14 | import torchaudio
 15 | import math
 16 | from typing import Dict, Any, Tuple, Union
 17 | 
 18 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_3d
 19 | from ..core.loudness import lufs_normalization, get_loudness
 20 | 
 21 | 
 22 | class SignalProcessingBaxandallEQ:
 23 |     @classmethod
 24 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 25 |         return {
 26 |             "required": {
 27 |                 "audio_input": ("AUDIO",),
 28 |                 "bass_gain_db": (
 29 |                     "FLOAT",
 30 |                     {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1},
 31 |                 ),
 32 |                 "treble_gain_db": (
 33 |                     "FLOAT",
 34 |                     {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1},
 35 |                 ),
 36 |             },
 37 |         }
 38 | 
 39 |     RETURN_TYPES = ("AUDIO",)
 40 |     RETURN_NAMES = ("processed_audio",)
 41 |     CATEGORY = "Signal Processing"
 42 |     FUNCTION = "process"
 43 | 
 44 |     def process(
 45 |         self,
 46 |         audio_input: Dict[str, Union[torch.Tensor, int]],
 47 |         bass_gain_db: float = 0.0,
 48 |         treble_gain_db: float = 0.0,
 49 |     ) -> Tuple[Dict[str, torch.Tensor]]:
 50 |         waveform, sample_rate = audio_from_comfy_3d(audio_input, try_gpu=True)
 51 |         loudness = get_loudness(waveform, sample_rate)
 52 | 
 53 |         # Apply Bass Shelf (low shelf) using RBJ formula
 54 |         b_bass, a_bass = self.design_rbj_shelf(
 55 |             sample_rate, freq=100.0, gain_db=bass_gain_db, shelf_type="low"
 56 |         )
 57 |         waveform = torchaudio.functional.lfilter(
 58 |             waveform,
 59 |             a_bass.to(waveform.device),
 60 |             b_bass.to(waveform.device),
 61 |             clamp=False,
 62 |         )
 63 | 
 64 |         # Apply Treble Shelf (high shelf) using RBJ formula
 65 |         b_treble, a_treble = self.design_rbj_shelf(
 66 |             sample_rate, freq=10000.0, gain_db=treble_gain_db, shelf_type="high"
 67 |         )
 68 |         waveform = torchaudio.functional.lfilter(
 69 |             waveform,
 70 |             a_treble.to(waveform.device),
 71 |             b_treble.to(waveform.device),
 72 |             clamp=False,
 73 |         )
 74 | 
 75 |         waveform = lufs_normalization(waveform, sample_rate, loudness)
 76 |         return audio_to_comfy_3d(waveform, sample_rate)
 77 | 
 78 |     def design_rbj_shelf(
 79 |         self, sr: int, freq: float, gain_db: float, shelf_type: str = "low"
 80 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
 81 |         # RBJ Audio EQ Cookbook shelf filters
 82 |         A = 10.0 ** (gain_db / 40.0)
 83 |         w0 = 2 * math.pi * freq / sr
 84 |         alpha = (
 85 |             math.sin(w0) / 2.0 * math.sqrt((A + 1 / A) * (1.0 / 1.0 - 1) + 2.0)
 86 |         )  # S=1.0
 87 | 
 88 |         cosw0 = math.cos(w0)
 89 |         if shelf_type == "low":
 90 |             b0 = A * ((A + 1) - (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha)
 91 |             b1 = 2 * A * ((A - 1) - (A + 1) * cosw0)
 92 |             b2 = A * ((A + 1) - (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha)
 93 |             a0 = (A + 1) + (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha
 94 |             a1 = -2 * ((A - 1) + (A + 1) * cosw0)
 95 |             a2 = (A + 1) + (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha
 96 |         else:  # high shelf
 97 |             b0 = A * ((A + 1) + (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha)
 98 |             b1 = -2 * A * ((A - 1) + (A + 1) * cosw0)
 99 |             b2 = A * ((A + 1) + (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha)
100 |             a0 = (A + 1) - (A - 1) * cosw0 + 2 * math.sqrt(A) * alpha
101 |             a1 = 2 * ((A - 1) - (A + 1) * cosw0)
102 |             a2 = (A + 1) - (A - 1) * cosw0 - 2 * math.sqrt(A) * alpha
103 | 
104 |         b = torch.tensor([b0 / a0, b1 / a0, b2 / a0], dtype=torch.float64)
105 |         a = torch.tensor([1.0, a1 / a0, a2 / a0], dtype=torch.float64)
106 |         return b, a
107 | 
108 | 
109 | class SignalProcessingBaxandall3BandEQ:
110 |     @classmethod
111 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
112 |         return {
113 |             "required": {
114 |                 "audio_input": ("AUDIO",),
115 |                 "bass_gain_db": (
116 |                     "FLOAT",
117 |                     {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1},
118 |                 ),
119 |                 "mid_gain_db": (
120 |                     "FLOAT",
121 |                     {"default": 0.0, "min": -20.0, "max": 20.0, "step": 0.1},
122 |                 ),
123 |                 "treble_gain_db": (
124 |                     "FLOAT",
125 |                     {"default": 1.0, "min": -20.0, "max": 20.0, "step": 0.1},
126 |                 ),
127 |                 "low_freq": (
128 |                     "FLOAT",
129 |                     {"default": 100.0, "min": 20.0, "max": 500.0, "step": 1.0},
130 |                 ),
131 |                 "mid_freq": (
132 |                     "FLOAT",
133 |                     {"default": 1000.0, "min": 200.0, "max": 5000.0, "step": 10.0},
134 |                 ),
135 |                 "high_freq": (
136 |                     "FLOAT",
137 |                     {"default": 10000.0, "min": 2000.0, "max": 20000.0, "step": 100.0},
138 |                 ),
139 |                 "mid_q": (
140 |                     "FLOAT",
141 |                     {"default": 0.7, "min": 0.1, "max": 10.0, "step": 0.1},
142 |                 ),
143 |             },
144 |         }
145 | 
146 |     RETURN_TYPES = ("AUDIO",)
147 |     RETURN_NAMES = ("processed_audio",)
148 |     CATEGORY = "Signal Processing"
149 |     FUNCTION = "process"
150 | 
151 |     def process(
152 |         self,
153 |         audio_input: Dict[str, Union[torch.Tensor, int]],
154 |         bass_gain_db: float = 0.0,
155 |         mid_gain_db: float = 0.0,
156 |         treble_gain_db: float = 0.0,
157 |         low_freq: float = 100.0,
158 |         mid_freq: float = 1000.0,
159 |         high_freq: float = 10000.0,
160 |         mid_q: float = 0.7,
161 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
162 | 
163 |         waveform, sample_rate = audio_from_comfy_3d(audio_input, try_gpu=True)
164 |         device = waveform.device
165 |         dtype = waveform.dtype
166 |         loudness = get_loudness(waveform, sample_rate)
167 | 
168 |         # Low shelf filter
169 |         b_low, a_low = self.design_rbj_shelf(
170 |             sample_rate, low_freq, bass_gain_db, shelf_type="low"
171 |         )
172 |         b_low = b_low.to(device=device, dtype=dtype)
173 |         a_low = a_low.to(device=device, dtype=dtype)
174 |         waveform = torchaudio.functional.lfilter(waveform, a_low, b_low, clamp=False)
175 | 
176 |         # Mid peaking filter
177 |         b_mid, a_mid = self.design_rbj_peak(sample_rate, mid_freq, mid_gain_db, Q=mid_q)
178 |         b_mid = b_mid.to(device=device, dtype=dtype)
179 |         a_mid = a_mid.to(device=device, dtype=dtype)
180 |         waveform = torchaudio.functional.lfilter(waveform, a_mid, b_mid, clamp=False)
181 | 
182 |         # High shelf filter
183 |         b_high, a_high = self.design_rbj_shelf(
184 |             sample_rate, high_freq, treble_gain_db, shelf_type="high"
185 |         )
186 |         b_high = b_high.to(device=device, dtype=dtype)
187 |         a_high = a_high.to(device=device, dtype=dtype)
188 |         waveform = torchaudio.functional.lfilter(waveform, a_high, b_high, clamp=False)
189 | 
190 |         # Normalize loudness after EQ
191 |         waveform = lufs_normalization(waveform, sample_rate, loudness)
192 | 
193 |         return audio_to_comfy_3d(waveform, sample_rate)
194 | 
195 |     def design_rbj_shelf(
196 |         self, sr: int, freq: float, gain_db: float, shelf_type: str = "low"
197 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
198 |         # RBJ audio EQ cookbook formula for shelving filters
199 |         A = 10.0 ** (gain_db / 40.0)
200 |         w0 = 2.0 * math.pi * freq / sr
201 |         # Slope S=1.0 (Baxandall-like gentle slope)
202 |         S = 1.0
203 |         alpha = math.sin(w0) / 2.0 * math.sqrt((A + 1.0 / A) * (1.0 / S - 1.0) + 2.0)
204 |         cosw0 = math.cos(w0)
205 | 
206 |         if shelf_type == "low":
207 |             b0 = A * ((A + 1.0) - (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha)
208 |             b1 = 2.0 * A * ((A - 1.0) - (A + 1.0) * cosw0)
209 |             b2 = A * ((A + 1.0) - (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha)
210 |             a0 = (A + 1.0) + (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha
211 |             a1 = -2.0 * ((A - 1.0) + (A + 1.0) * cosw0)
212 |             a2 = (A + 1.0) + (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha
213 |         else:
214 |             # high shelf
215 |             b0 = A * ((A + 1.0) + (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha)
216 |             b1 = -2.0 * A * ((A - 1.0) + (A + 1.0) * cosw0)
217 |             b2 = A * ((A + 1.0) + (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha)
218 |             a0 = (A + 1.0) - (A - 1.0) * cosw0 + 2.0 * math.sqrt(A) * alpha
219 |             a1 = 2.0 * ((A - 1.0) - (A + 1.0) * cosw0)
220 |             a2 = (A + 1.0) - (A - 1.0) * cosw0 - 2.0 * math.sqrt(A) * alpha
221 | 
222 |         b = torch.tensor([b0 / a0, b1 / a0, b2 / a0], dtype=torch.float64)
223 |         a = torch.tensor([1.0, a1 / a0, a2 / a0], dtype=torch.float64)
224 |         return b, a
225 | 
226 |     def design_rbj_peak(
227 |         self, sr: int, freq: float, gain_db: float, Q: float = 0.7
228 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
229 |         # RBJ audio EQ cookbook peak filter
230 |         A = 10.0 ** (gain_db / 40.0)
231 |         w0 = 2.0 * math.pi * freq / sr
232 |         alpha = math.sin(w0) / (2.0 * Q)
233 |         cosw0 = math.cos(w0)
234 | 
235 |         b0 = 1.0 + alpha * A
236 |         b1 = -2.0 * cosw0
237 |         b2 = 1.0 - alpha * A
238 |         a0 = 1.0 + alpha / A
239 |         a1 = -2.0 * cosw0
240 |         a2 = 1.0 - alpha / A
241 | 
242 |         b = torch.tensor([b0 / a0, b1 / a0, b2 / a0], dtype=torch.float64)
243 |         a = torch.tensor([1.0, a1 / a0, a2 / a0], dtype=torch.float64)
244 |         return b, a
245 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingCompressor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |   This file contains a description of a compressor node that utilizes a CUDA-optimized kernel.
10 | """
11 | import torch
12 | from typing import Dict, Any, Tuple, Union
13 | 
14 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d
15 | from ..core.compression import compressor
16 | from ..core.loudness import get_loudness, lufs_normalization
17 | 
18 | 
19 | class SignalProcessingCompressor:
20 |     @classmethod
21 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
22 |         return {
23 |             "required": {
24 |                 "audio_input": ("AUDIO",),
25 |                 "comp": (
26 |                     "FLOAT",
27 |                     {"default": -0.3, "min": -2.0, "max": 2.0, "step": 0.01},
28 |                 ),
29 |                 "attack": (
30 |                     "FLOAT",
31 |                     {"default": 0.1, "min": 0.01, "max": 100.0, "step": 0.01},
32 |                 ),
33 |                 "release": (
34 |                     "FLOAT",
35 |                     {"default": 60.0, "min": 0.01, "max": 1000.0, "step": 0.1},
36 |                 ),
37 |                 "filter_param": (
38 |                     "FLOAT",
39 |                     {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01},
40 |                 ),
41 |             }
42 |         }
43 | 
44 |     RETURN_TYPES = ("AUDIO",)
45 |     RETURN_NAMES = ("audio",)
46 |     CATEGORY = "Signal Processing"
47 |     FUNCTION = "process"
48 | 
49 |     def process(
50 |         self,
51 |         audio_input: Dict[str, Union[torch.Tensor, int]],
52 |         comp: float = -0.3,  # Compression/expansion factor
53 |         attack: float = 0.1,  # Attack time in ms
54 |         release: float = 60.0,  # Release time in ms
55 |         filter_param: float = 0.3,  # Filter parameter < 1
56 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
57 |         """
58 |         Apply compression or expansion to the audio input using CUDA.
59 | 
60 |         Parameters:
61 |             audio_input (Dict[str, Union[torch.Tensor, int]]): Input audio waveform and sample rate.
62 |             comp (float): Compression/expansion factor.
63 |             attack (float): Attack time in milliseconds.
64 |             release (float): Release time in milliseconds.
65 |             filter_param (float): Filter parameter for envelope smoothing.
66 | 
67 |         Returns:
68 |             Tuple[Dict[str, Union[torch.Tensor, int]]]: Compressed audio and sample rate.
69 |         """
70 |         # Extract waveform and sample rate
71 |         waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True)
72 | 
73 |         loudness = get_loudness(waveform, sample_rate=sample_rate)
74 | 
75 |         # Apply the compressor kernel
76 |         filtered_waveform, _ = compressor(
77 |             waveform,
78 |             sample_rate,
79 |             comp=comp,
80 |             attack=attack,
81 |             release=release,
82 |             a=filter_param,
83 |             device="cuda" if torch.cuda.is_available() else "cpu",
84 |         )
85 | 
86 |         filtered_waveform = lufs_normalization(
87 |             filtered_waveform, sample_rate=sample_rate, target_lufs=loudness
88 |         )
89 | 
90 |         # Return the processed audio
91 |         return audio_to_comfy_3d(filtered_waveform, sample_rate)
92 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingFilter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |      The code defines a classic audio filter set for performing various audio processing tasks such as filtering out unwanted frequencies
 10 | """
 11 | 
 12 | import torch
 13 | import torchaudio
 14 | from typing import Dict, Any, Tuple, Union
 15 | 
 16 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_3d
 17 | from ..core.loudness import lufs_normalization, get_loudness
 18 | 
 19 | 
 20 | class SignalProcessingFilter:
 21 |     @classmethod
 22 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 23 |         return {
 24 |             "required": {
 25 |                 "audio_input": ("AUDIO", {"forceInput": True}),
 26 |                 "cutoff": (
 27 |                     "FLOAT",
 28 |                     {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01},
 29 |                 ),
 30 |                 "filter_type": (
 31 |                     ["lowpass", "highpass", "bandpass", "bandstop"],
 32 |                     {"default": "lowpass"},
 33 |                 ),
 34 |                 "q_factor": (
 35 |                     "FLOAT",
 36 |                     {"default": 0.707, "min": 0.1, "max": 5.0, "step": 0.01},
 37 |                 ),  # For resonance/bandwidth
 38 |             }
 39 |         }
 40 | 
 41 |     RETURN_TYPES = ("AUDIO", "INT")
 42 |     RETURN_NAMES = ("audio", "sample_rate")
 43 |     CATEGORY = "Signal Processing"
 44 |     FUNCTION = "process"
 45 | 
 46 |     def process(
 47 |         self,
 48 |         audio_input: Dict[str, Union[torch.Tensor, int]],
 49 |         cutoff: float,
 50 |         filter_type: str,
 51 |         q_factor: float,
 52 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
 53 |         """
 54 |         Apply a specified filter to the input audio.
 55 | 
 56 |         Parameters:
 57 |             audio (Dict[str, torch.Tensor]): Input audio with 'waveform' and 'sample_rate'.
 58 |             cutoff (float): Normalized cutoff frequency (0.0 to 1.0).
 59 |             filter_type (str): Type of filter ('lowpass', 'highpass', 'bandpass', 'bandstop').
 60 |             q_factor (float): Quality factor determining the filter's bandwidth.
 61 | 
 62 |         Returns:
 63 |             Tuple[Dict[str, torch.Tensor]]: Filtered audio.
 64 |         """
 65 | 
 66 |         waveform, sample_rate = audio_from_comfy_3d(audio_input)
 67 | 
 68 |         loudness = get_loudness(waveform, sample_rate)
 69 | 
 70 |         nyquist = sample_rate / 2.0
 71 | 
 72 |         # Define minimum and maximum frequencies for mapping
 73 |         log_min = 20.0  # 20 Hz, typical lower bound of human hearing
 74 |         log_max = nyquist - 100.0  # Slightly below Nyquist to prevent instability
 75 | 
 76 |         # Avoid log(0) by ensuring cutoff is within (0,1)
 77 |         cutoff = min(max(cutoff, 1e-6), 1.0 - 1e-6)
 78 | 
 79 |         # Logarithmic mapping
 80 |         log_min = torch.log(torch.tensor(log_min))
 81 |         log_max = torch.log(torch.tensor(log_max))
 82 |         log_cutoff = log_min + cutoff * (log_max - log_min)
 83 |         cutoff_freq = torch.exp(log_cutoff).item()
 84 | 
 85 |         # Choose filter type
 86 |         if filter_type == "lowpass":
 87 |             filtered_waveform = torchaudio.functional.lowpass_biquad(
 88 |                 waveform, sample_rate, cutoff_freq, Q=q_factor
 89 |             )
 90 |         elif filter_type == "highpass":
 91 |             filtered_waveform = torchaudio.functional.highpass_biquad(
 92 |                 waveform, sample_rate, cutoff_freq, Q=q_factor
 93 |             )
 94 |         elif filter_type in ["bandpass", "bandstop"]:
 95 |             center_freq = cutoff_freq
 96 |             # Ensure that the bandwidth does not exceed the Nyquist frequency
 97 | 
 98 |             if filter_type == "bandpass":
 99 |                 filtered_waveform = torchaudio.functional.bandpass_biquad(
100 |                     waveform, sample_rate, center_freq, Q=q_factor
101 |                 )
102 |             else:  # bandstop
103 |                 filtered_waveform = torchaudio.functional.band_biquad(
104 |                     waveform, sample_rate, center_freq, Q=q_factor
105 |                 )
106 |         else:
107 |             raise ValueError(f"Unsupported filter type: {filter_type}")
108 | 
109 |         filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness)
110 | 
111 |         return audio_to_comfy_3d(filtered_waveform, sample_rate)
112 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingHarmonicsEnhancer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Classic Audio filter set
 10 | """
 11 | from ast import literal_eval
 12 | import torch
 13 | import torchaudio
 14 | from typing import Dict, Any, List, Tuple, Union
 15 | 
 16 | from ..core.utilities import comfy_root_to_syspath
 17 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d
 18 | from ..core.loudness import lufs_normalization, get_loudness
 19 | 
 20 | comfy_root_to_syspath()  # add comfy to sys path for dev
 21 | 
 22 | 
 23 | class SignalProcessingHarmonicsEnhancer:
 24 |     @classmethod
 25 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 26 |         return {
 27 |             "required": {
 28 |                 "audio_input": ("AUDIO",),
 29 |                 "harmonics": ("STRING", {"default": "1,3,5,7,9"}),
 30 |                 "mode": (["detect base frequency", "use base frequency"],),
 31 |                 "base_frequency": ("FLOAT", {"default": 440, "min": 0, "max": 20000}),
 32 |                 "gain_db": ("INT", {"default": 5, "min": 0, "max": 500, "step": 1}),
 33 |                 "Q": ("FLOAT", {"default": 0.707, "min": 0, "max": 1.0, "step": 0.01}),
 34 |             }
 35 |         }
 36 | 
 37 |     RETURN_TYPES = ("AUDIO",)
 38 |     RETURN_NAMES = ("audio",)
 39 |     CATEGORY = "Signal Processing"
 40 |     FUNCTION = "process"
 41 | 
 42 |     def process(
 43 |         self,
 44 |         audio_input: Dict[str, Union[torch.Tensor, int]],
 45 |         harmonics: str = "1,3,5,7,9",
 46 |         mode: str = "detect base frequency",
 47 |         base_frequency: int = 440,
 48 |         gain_db: int = 5,
 49 |         Q: float = 0.707,
 50 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
 51 |         waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True)
 52 |         loudness = get_loudness(waveform, sample_rate)
 53 | 
 54 |         try:
 55 |             harmonics_list: List[int] = [literal_eval(x) for x in harmonics.split(",")]
 56 |         except Exception:
 57 |             raise RuntimeWarning(
 58 |                 "Invalid Harmonics Format. Please delimit integers by a comma \
 59 |                     ',' like this 1,,3,5,7,9 "
 60 |             )
 61 |         if mode == "detect base frequency":
 62 |             filtered_waveform = self.enhance_harmonics(
 63 |                 waveform, sample_rate, harmonics=harmonics_list, gain_db=gain_db, Q=Q
 64 |             )
 65 |         elif mode == "use base frequency":
 66 |             filtered_waveform = self.enhance_harmonics(
 67 |                 waveform,
 68 |                 sample_rate,
 69 |                 harmonics=harmonics_list,
 70 |                 gain_db=gain_db,
 71 |                 base_frequency=base_frequency,
 72 |                 Q=Q,
 73 |             )
 74 | 
 75 |         filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness)
 76 |         return audio_to_comfy_3d(filtered_waveform, sample_rate)
 77 | 
 78 |     def add_harmonics(self, audio: torch.Tensor, gain: float = 1.2) -> torch.Tensor:
 79 |         # Apply saturation using a tanh curve
 80 |         harmonic_audio = torch.tanh(audio * gain)
 81 |         return harmonic_audio
 82 | 
 83 |     def detect_fundamental(self, audio: torch.Tensor, sample_rate: int) -> torch.Tensor:
 84 |         # Estimate the fundamental frequency using a pitch detection method
 85 |         pitch = torchaudio.functional.detect_pitch_frequency(audio, sample_rate)
 86 | 
 87 |         return pitch
 88 | 
 89 |     def detect_fundamental_mean(self, audio: torch.Tensor, sample_rate: int) -> int:
 90 |         # Estimate the fundamental frequency using a pitch detection method
 91 |         pitch = torchaudio.functional.detect_pitch_frequency(audio, sample_rate)
 92 | 
 93 |         return int(pitch.mean().item())
 94 | 
 95 |     def enhance_harmonics(
 96 |         self,
 97 |         audio: torch.Tensor,
 98 |         sample_rate: int,
 99 |         harmonics: List[int] = [1, 3, 5, 7, 9, 11],
100 |         gain_db: float = 5,
101 |         base_frequency: float = 0,
102 |         Q: float = 0.707,
103 |     ) -> torch.Tensor:
104 |         # Detect the base frequency
105 |         if base_frequency == 0:
106 |             base_frequency = self.detect_fundamental_mean(audio, sample_rate)
107 |             if base_frequency <= 0:  # Fallback if pitch detection fails
108 |                 base_frequency = 440  # Use a default base frequency
109 | 
110 |         # Apply EQ boosts to specific harmonic frequencies
111 |         for harmonic in harmonics:
112 |             freq = base_frequency * harmonic
113 |             if freq < sample_rate / 2:  # Ensure it's within the Nyquist frequency
114 |                 audio = torchaudio.functional.equalizer_biquad(
115 |                     audio, sample_rate, center_freq=freq, gain=gain_db, Q=Q
116 |                 )
117 | 
118 |         return audio
119 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingLimiter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     Classic Limiter CUDA Optimized
10 | 
11 | Reference:
12 |     https://ccrma.stanford.edu/~jatin/ComplexNonlinearities/Hysteresis.html
13 |     https://viennatalk.mdw.ac.at/papers/Pap_01_79_Tronchin.pdf
14 |     https://jatinchowdhury18.medium.com/complex-nonlinearities-episode-3-hysteresis-fdeb2cd3e3f6
15 |     https://ccrma.stanford.edu/~dtyeh/papers/yeh07_dafx_clipode.pdf
16 | """
17 | import torch
18 | from typing import Dict, Any, Tuple, Union
19 | 
20 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d
21 | from ..core.limiting import limiter, limiter_get_modes
22 | from ..core.loudness import get_loudness, lufs_normalization
23 | 
24 | 
25 | class SignalProcessingLimiter:
26 |     @classmethod
27 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
28 |         return {
29 |             "required": {
30 |                 "audio_input": ("AUDIO",),
31 |                 "mode": (limiter_get_modes(),),
32 |                 "threshold": (
33 |                     "FLOAT",
34 |                     {"default": 100.0, "min": 0.0, "max": 100.0, "step": 0.1},
35 |                 ),
36 |                 "slope": (
37 |                     "FLOAT",
38 |                     {"default": 100.0, "min": 0.0, "max": 100.0, "step": 0.1},
39 |                 ),
40 |                 "release_ms": (
41 |                     "FLOAT",
42 |                     {"default": 100.0, "min": 0.0, "max": 1000.0, "step": 0.1},
43 |                 ),
44 |             }
45 |         }
46 | 
47 |     RETURN_TYPES = ("AUDIO",)
48 |     RETURN_NAMES = ("audio",)
49 |     CATEGORY = "Signal Processing"
50 |     FUNCTION = "process"
51 | 
52 |     def process(
53 |         self,
54 |         audio_input: Dict[str, Union[torch.Tensor, int]],
55 |         mode: str = "downward",
56 |         threshold: float = 50.0,  # Threshold in percents
57 |         slope: float = 100.0,  # Slope in percents
58 |         release_ms: float = 100.0,  # Release time in ms
59 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
60 |         waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True)
61 | 
62 |         loudness = get_loudness(waveform, sample_rate)
63 | 
64 |         filtered_waveform = limiter(
65 |             waveform,
66 |             mode=mode,
67 |             sample_rate=sample_rate,
68 |             threshold=threshold / 100.0,
69 |             slope=slope / 100,
70 |             release_ms=release_ms,
71 |         )
72 | 
73 |         filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness)
74 | 
75 |         return audio_to_comfy_3d(filtered_waveform, sample_rate)
76 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingLoadAudio.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     Audio loading node
10 | """
11 | 
12 | import sys
13 | import os
14 | import torch
15 | from typing import Dict, Tuple, Any, Union
16 | 
17 | from ..core.io import from_disk_as_dict_3d
18 | import folder_paths
19 | 
20 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
21 | 
22 | 
23 | class SignalProcessingLoadAudio:
24 |     supported_formats = ["wav", "mp3", "ogg", "m4a", "flac", "mp4"]
25 |     input_dir = os.path.join(folder_paths.get_input_directory(), "samples")
26 | 
27 |     @classmethod
28 |     def INPUT_TYPES(s) -> Dict[str, Any]:
29 |         supported_extensions = tuple(
30 |             f".{fmt.lower()}" for fmt in SignalProcessingLoadAudio.supported_formats
31 |         )
32 | 
33 |         files, _ = folder_paths.recursive_search(SignalProcessingLoadAudio.input_dir)
34 |         filtered_files = [x for x in files if x.lower().endswith(supported_extensions)]
35 |         files = [
36 |             os.path.join(SignalProcessingLoadAudio.input_dir, x) for x in filtered_files
37 |         ]
38 | 
39 |         return {
40 |             "required": {
41 |                 "audio_file": (sorted(files), {"image_upload": True}),
42 |                 "gain": (
43 |                     "FLOAT",
44 |                     {"default": 1.0, "min": 0.0, "max": 8.0, "step": 0.01},
45 |                 ),
46 |             },
47 |         }
48 | 
49 |     RETURN_TYPES = ("AUDIO",)
50 |     RETURN_NAMES = ("audio",)
51 |     CATEGORY = "Signal Processing"
52 |     FUNCTION = "process"
53 | 
54 |     def process(
55 |         self, audio_file: str, gain: float
56 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
57 |         return from_disk_as_dict_3d(audio_file=audio_file, gain=gain)
58 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingLoudness.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     Loudness node
10 | """
11 | 
12 | import torch
13 | from typing import Tuple, Dict, Any, Union
14 | 
15 | from ..core.io import audio_from_comfy_2d
16 | from ..core.loudness import get_loudness
17 | 
18 | 
19 | class SignalProcessingLoudness:
20 |     @classmethod
21 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
22 |         return {
23 |             "required": {
24 |                 "audio_input": ("AUDIO",),
25 |             },
26 |         }
27 | 
28 |     RETURN_TYPES = ("FLOAT",)
29 |     RETURN_NAMES = ("loudness",)
30 |     CATEGORY = "Signal Processing"
31 |     FUNCTION = "process"
32 | 
33 |     def process(self, audio_input: Dict[str, Union[torch.Tensor, int]]) -> Tuple[float]:
34 |         waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True)
35 | 
36 |         loudness: float = get_loudness(waveform, sample_rate)
37 | 
38 |         return (loudness,)
39 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingMixdown.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Mixdown node for pad synths
 10 | """
 11 | 
 12 | import torch
 13 | 
 14 | from typing import Tuple, List, Dict, Union, Any
 15 | import torchaudio
 16 | 
 17 | from ..core.io import audio_to_comfy_3d
 18 | from ..core.loudness import lufs_normalization
 19 | 
 20 | 
 21 | class SignalProcessingMixdown:
 22 |     @classmethod
 23 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 24 |         return {
 25 |             "required": {
 26 |                 "audio_inputs": ("AUDIO_LIST", {"default": []}),
 27 |             },
 28 |             "optional": {
 29 |                 "gain_factors": (
 30 |                     "FLOAT_LIST",
 31 |                     {"default": [], "min": 0.0, "max": 2.0, "step": 0.1},
 32 |                 ),
 33 |                 # If empty, default to [1.0] * num_audios
 34 |             },
 35 |         }
 36 | 
 37 |     RETURN_TYPES = ("AUDIO",)
 38 |     RETURN_NAMES = ("mixed_audio",)
 39 |     CATEGORY = "Signal Processing"
 40 |     FUNCTION = "process"
 41 | 
 42 |     def process(
 43 |         self,
 44 |         audio_inputs: List[Dict[str, Union[torch.Tensor, int]]],
 45 |         gain_factors: List[float] = [],
 46 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
 47 |         """
 48 |         Mix down multiple audio inputs into a single audio output
 49 |         with optional individual volume controls.
 50 | 
 51 |         Parameters:
 52 |             audio_inputs (List[Dict]): List of audio inputs,
 53 |             each containing 'waveform' and 'sample_rate'.
 54 |             output_normalization (float): Normalization factor for the mixed audio (0.0 to 1.0).
 55 |             gain_factors (List[float], optional): List of gain factors for each audio input.
 56 | 
 57 |         Returns:
 58 |             Tuple[Dict[str, torch.Tensor], int]: Mixed audio with waveform and sample rate.
 59 |         """
 60 | 
 61 |         if not audio_inputs:
 62 |             raise ValueError("No audio inputs provided for mixing.")
 63 | 
 64 |         num_audios = len(audio_inputs)
 65 | 
 66 |         # Handle gain_factors
 67 |         if not gain_factors:
 68 |             gain_factors = [1.0] * num_audios
 69 |         elif len(gain_factors) != num_audios:
 70 |             raise ValueError(
 71 |                 f"Number of gain factors ({len(gain_factors)}) \
 72 |                     does not match number of audio inputs ({num_audios})."
 73 |             )
 74 | 
 75 |         # Extract sample rates and verify consistency
 76 |         sample_rates: List[int] = [audio["sample_rate"] for audio in audio_inputs]
 77 |         target_sample_rate = sample_rates[0]
 78 | 
 79 |         for idx, sr in enumerate(sample_rates):
 80 |             if sr != target_sample_rate:
 81 |                 resampler = torchaudio.transforms.Resample(
 82 |                     orig_freq=sr, new_freq=target_sample_rate
 83 |                 )
 84 | 
 85 |                 _waveform: torch.Tensor = audio_inputs[idx]["waveform"]
 86 |                 resampler.to(
 87 |                     device=_waveform.device,
 88 |                     dtype=_waveform.dtype,
 89 |                 )
 90 |                 audio_inputs[idx]["waveform"] = resampler(_waveform)
 91 |                 audio_inputs[idx]["sample_rate"] = target_sample_rate
 92 | 
 93 |         # Determine the maximum length among all audio inputs
 94 |         lengthsw: List[torch.Tensor] = [audio["waveform"] for audio in audio_inputs]
 95 |         lengths: List[int] = [wave.shape[-1] for wave in lengthsw]
 96 |         max_length = max(lengths)
 97 | 
 98 |         # Pad or truncate each audio to match the maximum length and apply gain
 99 |         for idx, audio in enumerate(audio_inputs):
100 |             waveform: torch.Tensor = audio["waveform"]
101 |             current_length = waveform.shape[-1]
102 |             gain = gain_factors[idx]
103 | 
104 |             if current_length < max_length:
105 |                 padding = max_length - current_length
106 |                 # Pad with zeros (silence) at the end
107 |                 waveform = torch.nn.functional.pad(waveform, (0, padding))
108 |             elif current_length > max_length:
109 |                 # Truncate the waveform to max_length
110 |                 waveform = waveform[:, :, :max_length]
111 | 
112 |             # Apply gain
113 |             waveform = waveform * gain
114 | 
115 |             audio["waveform"] = waveform
116 | 
117 |         # Sum all waveforms to create the mix
118 |         mixed_waveform: torch.Tensor = torch.zeros_like(audio_inputs[0]["waveform"])
119 |         for idx, audio in enumerate(audio_inputs):
120 |             mixed_waveform += audio["waveform"]
121 | 
122 |         mixed_waveform = lufs_normalization(mixed_waveform, target_sample_rate)
123 | 
124 |         return audio_to_comfy_3d(mixed_waveform, target_sample_rate)
125 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingNormalizer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     Varios normalizatin techqniues node
10 | """
11 | 
12 | import torch
13 | from typing import Dict, Any, Tuple, Union
14 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d
15 | from ..core.loudness import (
16 |     rms_normalization,
17 |     lufs_normalization,
18 |     peak_normalization,
19 |     automatic_gain_control,
20 | )
21 | 
22 | 
23 | class SignalProcessingNormalizer:
24 |     @classmethod
25 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
26 |         return {
27 |             "required": {
28 |                 "audio_input": ("AUDIO",),
29 |                 "mode": (["lufs", "rms", "peak", "auto"],),
30 |                 "target_rms": (
31 |                     "FLOAT",
32 |                     {"default": 0.1, "min": 0, "max": 10.0, "step": 0.1},
33 |                 ),
34 |                 "target_lufs_db": (
35 |                     "FLOAT",
36 |                     {"default": -14.0, "min": -100, "max": 100.0, "step": 0.1},
37 |                 ),
38 |                 "target_peak": (
39 |                     "FLOAT",
40 |                     {"default": 0.9, "min": 0.0, "max": 1.0, "step": 0.1},
41 |                 ),
42 |                 "target_auto": (
43 |                     "FLOAT",
44 |                     {"default": 0.7, "min": 0.0, "max": 1.0, "step": 0.1},
45 |                 ),
46 |                 "target_auto_alpha": (
47 |                     "FLOAT",
48 |                     {"default": 0.1, "min": 0.0, "max": 10.0, "step": 0.1},
49 |                 ),
50 |             },
51 |         }
52 | 
53 |     RETURN_TYPES = ("AUDIO",)
54 |     RETURN_NAMES = ("processed_audio",)
55 |     CATEGORY = "Signal Processing"
56 |     FUNCTION = "process"
57 | 
58 |     def process(
59 |         self,
60 |         audio_input: Dict[str, Union[torch.Tensor, int]],
61 |         mode: str,
62 |         target_rms: float,
63 |         target_lufs_db: float,
64 |         target_peak: float,
65 |         target_auto: float,
66 |         target_auto_alpha: float,
67 |     ) -> Tuple[Dict[str, torch.Tensor]]:
68 | 
69 |         waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True)
70 | 
71 |         if mode == "rms":
72 |             processed_waveform = rms_normalization(waveform, target_rms)
73 |         elif mode == "lufs":
74 |             processed_waveform = lufs_normalization(
75 |                 waveform, sample_rate, target_lufs_db
76 |             )
77 |         elif mode == "peak":
78 |             processed_waveform = peak_normalization(waveform, target_peak)
79 |         elif mode == "auto":
80 |             processed_waveform = automatic_gain_control(
81 |                 waveform, target_auto, target_auto_alpha
82 |             )
83 |         else:
84 |             processed_waveform = waveform
85 | 
86 |         return audio_to_comfy_3d(processed_waveform, sample_rate)
87 | 


--------------------------------------------------------------------------------
/processors/SignalProcessingSaturation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Author: C0ffymachyne
 5 | License: GPLv3
 6 | Version: 1.0.0
 7 | 
 8 | Description:
 9 |     Classic Audio filter set
10 | 
11 | Reference:
12 |     https://ccrma.stanford.edu/~jatin/ComplexNonlinearities/Hysteresis.html
13 |     https://viennatalk.mdw.ac.at/papers/Pap_01_79_Tronchin.pdf
14 |     https://jatinchowdhury18.medium.com/complex-nonlinearities-episode-3-hysteresis-fdeb2cd3e3f6
15 |     https://ccrma.stanford.edu/~dtyeh/papers/yeh07_dafx_clipode.pdf
16 | """
17 | 
18 | import torch
19 | from typing import Dict, Any, Tuple, Union
20 | 
21 | from ..core.io import audio_to_comfy_3d, audio_from_comfy_2d
22 | from ..core.saturation import saturator, saturator_get_modes
23 | from ..core.loudness import get_loudness, lufs_normalization
24 | 
25 | 
26 | class SignalProcessingSaturation:
27 |     @classmethod
28 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
29 |         return {
30 |             "required": {
31 |                 "audio_input": ("AUDIO",),
32 |                 "mode": (saturator_get_modes(),),
33 |                 "drive": (
34 |                     "FLOAT",
35 |                     {"default": 50.0, "min": 0.0, "max": 200.0, "step": 0.1},
36 |                 ),
37 |             }
38 |         }
39 | 
40 |     RETURN_TYPES = ("AUDIO",)
41 |     RETURN_NAMES = ("audio",)
42 |     CATEGORY = "Signal Processing"
43 |     FUNCTION = "process"
44 | 
45 |     def process(
46 |         self,
47 |         audio_input: Dict[str, Union[torch.Tensor, int]],
48 |         mode: str = "poly",
49 |         drive: float = 50.0,
50 |     ) -> Tuple[Dict[str, Union[torch.Tensor, int]]]:
51 |         waveform, sample_rate = audio_from_comfy_2d(audio_input, try_gpu=True)
52 | 
53 |         loudness = get_loudness(waveform, sample_rate)
54 | 
55 |         filtered_waveform = saturator(
56 |             waveform, mode=mode, sample_rate=sample_rate, drive=drive
57 |         )
58 | 
59 |         filtered_waveform = lufs_normalization(filtered_waveform, sample_rate, loudness)
60 | 
61 |         return audio_to_comfy_3d(filtered_waveform, sample_rate)
62 | 


--------------------------------------------------------------------------------
/processors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/processors/__init__.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ComfyUI_SignalProcessing"
 3 | description = "Audio processing nodes for comfyui."
 4 | version = "0.0.2"
 5 | license = { file = "LICENSE-GPL-V3" }
 6 | 
 7 | [project.urls]
 8 | Repository = "https://github.com/c0ffymachyne/ComfyUI_SignalProcessing"
 9 | 
10 | [tool.comfy]
11 | PublisherId = "c0ffymachyne"
12 | DisplayName = "ComfyUI_SignalProcessing"
13 | Icon = "images/icon.jpg"
14 | Models = []
15 | 
16 | [project.dependencies] 
17 | torch = [
18 |     "--index-url",
19 |     "https://download.pytorch.org/whl/cu118",
20 |     "torch==2.4.1",
21 |     "torchaudio==2.4.1",
22 |     "torchvision==0.19.1"
23 | ]
24 | other = [
25 |     "numpy>=1.23.0",
26 |     "scipy>=1.5.0",
27 |     "pyfar",
28 |     "scipy",
29 |     "pyloudnorm",
30 |     "cupy-cuda11x"
31 | ]
32 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | markers = 
 3 |   inference: mark as inference test (deselect with '-m "not inference"')
 4 |   execution: mark as execution test (deselect with '-m "not execution"')
 5 | testpaths =
 6 |   tests
 7 |   tests-unit
 8 | addopts = -s
 9 | pythonpath = .
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | pyfar
3 | numpy
4 | torch
5 | torchaudio
6 | pyloudnorm
7 | cupy-cuda11x
8 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from typing import Dict, Union
 3 | from pathlib import Path
 4 | import torch
 5 | from ..core.io import from_disk_as_raw_3d
 6 | 
 7 | # Test data roots
 8 | DATA_ROOT = Path("ComfyUI_SignalProcessing/audio")
 9 | INPUT_FILE = DATA_ROOT / "inputs/002-orig.mp4"
10 | INPUT_FILE = DATA_ROOT / "inputs/pf-01.mp3"
11 | INPUT_IR_FILE = DATA_ROOT / "inputs/ir.wav"
12 | INPUTS_ROOT = DATA_ROOT / "inputs"
13 | INPUT_FILES = {
14 |     file.name: file.resolve() for file in INPUTS_ROOT.rglob("*") if file.is_file()
15 | }
16 | 
17 | TestData = Dict[str, Union[Dict[str, Union[torch.Tensor, int]], Path]]
18 | 
19 | 
20 | @pytest.fixture
21 | def test_data(request) -> TestData:
22 | 
23 |     test_name = request.node.name  # Automatically get the current test function name
24 |     test_name = str(request.node.function.__name__)
25 | 
26 |     OUTPUT_ROOT = DATA_ROOT / f"outputs/{test_name}"
27 |     INPUTS_ROOT = DATA_ROOT / "inputs"
28 | 
29 |     param_values = (
30 |         request.node.callspec.params if hasattr(request.node, "callspec") else {}
31 |     )
32 |     pest_param_str = "_".join(f"{key}-{value}" for key, value in param_values.items())
33 | 
34 |     audio_slice_begin_seconds: float = 60.0
35 |     audio_slice_duration_seconds: float = 120.0
36 | 
37 |     # Prepare audio data
38 |     audio, sample_rate = from_disk_as_raw_3d(
39 |         str(INPUT_FILE.absolute()),
40 |         try_gpu=True,
41 |         start_seconds=audio_slice_begin_seconds,
42 |         duration_seconds=audio_slice_duration_seconds,
43 |     )
44 |     OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)  # Ensure the output directory exists
45 | 
46 |     audio_to_comfy: Dict[torch.Tensor, int] = {
47 |         "waveform": audio,
48 |         "sample_rate": sample_rate,
49 |     }
50 | 
51 |     return {
52 |         "audio": audio_to_comfy,
53 |         "output_root": OUTPUT_ROOT,
54 |         "inputs_root": INPUTS_ROOT,
55 |         "test_name": test_name,
56 |         "pest_param_str": pest_param_str,
57 |     }
58 | 


--------------------------------------------------------------------------------
/tests/test_baxandall.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..tests.conftest import TestData
 3 | from typing import Tuple
 4 | from ..core.io import audio_from_comfy_3d_to_disk
 5 | from ..processors.SignalProcessingBaxandallEQ import (
 6 |     SignalProcessingBaxandallEQ,
 7 |     SignalProcessingBaxandall3BandEQ,
 8 | )
 9 | 
10 | 
11 | TEST_NAME = "baxandall"
12 | 
13 | params: list[Tuple[float, float]] = [
14 |     (9.0, 0.0),
15 |     (6.0, 0.0),
16 |     (3.0, 0.0),
17 |     (1.0, 0.0),
18 |     (0.0, 9.0),
19 |     (0.0, 6.0),
20 |     (0.0, 3.0),
21 |     (0.0, 1.0),
22 |     (0.0, 0.0),
23 | ]
24 | 
25 | 
26 | @pytest.mark.parametrize(
27 |     "bass_gain_db, treble_gain_db",
28 |     params,
29 | )
30 | def test_baxandalleq_general(
31 |     test_data: TestData, bass_gain_db: float, treble_gain_db: float
32 | ) -> None:
33 | 
34 |     node = SignalProcessingBaxandallEQ()
35 | 
36 |     output = node.process(
37 |         audio_input=test_data["audio"],
38 |         bass_gain_db=bass_gain_db,
39 |         treble_gain_db=treble_gain_db,
40 |     )[0]
41 | 
42 |     pest_param_str = test_data["pest_param_str"]
43 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.wav"
44 |     audio_from_comfy_3d_to_disk(output, output_filepath)
45 | 
46 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
47 |     assert output is not None, "Processed audio output is None."
48 | 
49 |     print(f"test_baxandalleq_general {test_data['output_root']}")
50 | 
51 | 
52 | @pytest.mark.parametrize(
53 |     "bass_gain_db,mid_gain_db,treble_gain_db,low_freq,mid_freq,high_freq,mid_q",
54 |     [
55 |         (9.0, 9.0, 9.0, 100.0, 1000.0, 10000.0, 0.707),
56 |         (6.0, 6.0, 6.0, 100.0, 1000.0, 10000.0, 0.707),
57 |         (3.0, 3.0, 3.0, 100.0, 1000.0, 10000.0, 0.707),
58 |         (1.0, 1.0, 1.0, 100.0, 1000.0, 10000.0, 0.707),
59 |         (0.0, 0.0, 0.0, 100.0, 1000.0, 10000.0, 0.707),
60 |     ],
61 | )
62 | def test_baxandalleq3band_general(
63 |     test_data: TestData,
64 |     bass_gain_db: float,
65 |     mid_gain_db: float,
66 |     treble_gain_db: float,
67 |     low_freq: float,
68 |     mid_freq: float,
69 |     high_freq: float,
70 |     mid_q: float,
71 | ) -> None:
72 | 
73 |     node = SignalProcessingBaxandall3BandEQ()
74 | 
75 |     output = node.process(
76 |         audio_input=test_data["audio"],
77 |         bass_gain_db=bass_gain_db,
78 |         mid_gain_db=mid_gain_db,
79 |         treble_gain_db=treble_gain_db,
80 |         low_freq=low_freq,
81 |         mid_freq=mid_freq,
82 |         high_freq=high_freq,
83 |         mid_q=mid_q,
84 |     )[0]
85 | 
86 |     pest_param_str = test_data["pest_param_str"]
87 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.wav"
88 |     audio_from_comfy_3d_to_disk(output, output_filepath)
89 | 
90 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
91 |     assert output is not None, "Processed audio output is None."
92 | 
93 |     print(f"test_baxandalleq3band_general {test_data['output_root']}")
94 | 


--------------------------------------------------------------------------------
/tests/test_compressor.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..tests.conftest import TestData
 3 | from typing import Tuple
 4 | from ..core.io import audio_from_comfy_3d_to_disk
 5 | from ..processors.SignalProcessingCompressor import SignalProcessingCompressor
 6 | 
 7 | TEST_NAME = "compression"
 8 | 
 9 | params: list[Tuple[float, float, float, float]] = [
10 |     (0.9, 0.1, 60.0, 0.3),
11 |     (0.3, 0.1, 60.0, 0.3),
12 |     (0.1, 0.1, 60.0, 0.3),
13 |     (-0.0, 0.1, 60.0, 0.3),
14 |     (-0.1, 0.1, 60.0, 0.3),
15 |     (-0.3, 0.1, 60.0, 0.3),
16 |     (-0.9, 0.1, 60.0, 0.3),
17 | ]
18 | 
19 | 
20 | @pytest.mark.parametrize(
21 |     "comp, attack, release, filter_param",
22 |     params,
23 | )
24 | def test_compressor_general(
25 |     test_data: TestData, comp: float, attack: float, release: float, filter_param: float
26 | ) -> None:
27 |     """
28 |     Test SignalProcessingCompressor with various parameter configurations.
29 |     """
30 |     node = SignalProcessingCompressor()
31 | 
32 |     # Process input audio
33 |     output = node.process(
34 |         audio_input=test_data["audio"],
35 |         comp=comp,
36 |         attack=attack,
37 |         release=release,
38 |         filter_param=filter_param,
39 |     )[0]
40 | 
41 |     # Save the output audio
42 |     pest_param_str = test_data["pest_param_str"]
43 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.wav"
44 |     audio_from_comfy_3d_to_disk(output, output_filepath)
45 | 
46 |     # Assertions
47 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
48 |     assert output is not None, "Processed audio output is None."
49 | 
50 |     print(f"test_compressor_general {test_data['output_root']}")
51 | 


--------------------------------------------------------------------------------
/tests/test_convolution_reverb.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from .conftest import TestData
 3 | from ..core.io import audio_from_comfy_3d_to_disk
 4 | from ..effects.SignalProcessingConvolutionReverb import (
 5 |     SignalProcessingConvolutionReverb,
 6 | )
 7 | 
 8 | TEST_NAME = "convolution_reverb"
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     "impulse_response, wet_dry",
13 |     [("ir.wav", 1.0), ("ir.wav", 0.6), ("ir.wav", 0.3), ("ir.wav", 0.0)],
14 | )
15 | def test_convolution_reverb_general(
16 |     test_data: TestData, impulse_response: str, wet_dry: float
17 | ) -> None:
18 |     SignalProcessingConvolutionReverb.ir_directory = test_data["inputs_root"] / "ir"
19 |     node = SignalProcessingConvolutionReverb()
20 | 
21 |     node.INPUT_TYPES()
22 | 
23 |     output = node.process(
24 |         impulse_response=impulse_response,
25 |         audio_input=test_data["audio"],
26 |         wet_dry=wet_dry,
27 |     )[0]
28 | 
29 |     pest_param_str = test_data["pest_param_str"]
30 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.wav"
31 |     audio_from_comfy_3d_to_disk(output, output_filepath)
32 | 
33 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
34 |     assert output is not None, "Processed audio output is None."
35 | 
36 |     print(f"test_convolution_reverb_general {test_data['output_root']}")
37 | 


--------------------------------------------------------------------------------
/tests/test_filter.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..tests.conftest import TestData
 3 | from ..core.io import audio_from_comfy_3d_to_disk
 4 | from ..processors.SignalProcessingFilter import (
 5 |     SignalProcessingFilter,
 6 | )
 7 | 
 8 | TEST_NAME = "filter"
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     "cutoff, q_factor",
13 |     [
14 |         (0.9, 0.707),
15 |         (0.6, 0.707),
16 |         (0.3, 0.707),
17 |         (0.1, 0.707),
18 |         (0.0, 0.707),
19 |     ],
20 | )
21 | def test_filter_general(test_data: TestData, cutoff: float, q_factor: float) -> None:
22 | 
23 |     node: SignalProcessingFilter = SignalProcessingFilter()
24 |     modes: str = node.INPUT_TYPES()["required"]["filter_type"][0]  # Extract modes
25 | 
26 |     for mode in modes:
27 |         output = node.process(
28 |             audio_input=test_data["audio"],
29 |             cutoff=cutoff,
30 |             filter_type=mode,
31 |             q_factor=q_factor,
32 |         )[0]
33 | 
34 |         pest_param_str = test_data["pest_param_str"]
35 |         output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav"
36 |         audio_from_comfy_3d_to_disk(output, output_filepath)
37 | 
38 |         assert (
39 |             output_filepath.exists()
40 |         ), f"Output file {output_filepath} was not created."
41 |         assert output is not None, f"Processed audio output is None for mode {mode}."
42 | 
43 |     print(f"test_filter_general {test_data['output_root']}")
44 | 


--------------------------------------------------------------------------------
/tests/test_harmonics.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..tests.conftest import TestData
 3 | from typing import Tuple
 4 | from ..core.io import audio_from_comfy_3d_to_disk
 5 | from ..processors.SignalProcessingHarmonicsEnhancer import (
 6 |     SignalProcessingHarmonicsEnhancer,
 7 | )
 8 | 
 9 | TEST_NAME = "harmonics"
10 | 
11 | params: list[Tuple[str, int, float, float]] = [
12 |     ("2,3", 440, 6.0, 0.707),
13 |     ("2,3", 440, 3.0, 0.707),
14 |     ("2,3", 440, 1.0, 0.707),
15 |     ("2,3", 440, 0.0, 0.707),
16 | ]
17 | 
18 | 
19 | @pytest.mark.parametrize(
20 |     "harmonics, base_frequency, gain_db, Q",
21 |     params,
22 | )
23 | def test_harmonics_general(
24 |     test_data: TestData, harmonics: str, base_frequency: int, gain_db: int, Q: float
25 | ) -> None:
26 | 
27 |     node = SignalProcessingHarmonicsEnhancer()
28 |     modes: str = node.INPUT_TYPES()["required"]["mode"][0]  # Extract modes
29 | 
30 |     for mode in modes:
31 |         output = node.process(
32 |             audio_input=test_data["audio"],
33 |             harmonics=harmonics,
34 |             mode=mode,
35 |             base_frequency=base_frequency,
36 |             gain_db=gain_db,
37 |             Q=Q,
38 |         )[0]
39 | 
40 |         pest_param_str = test_data["pest_param_str"]
41 |         output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav"
42 |         audio_from_comfy_3d_to_disk(output, output_filepath)
43 | 
44 |         assert (
45 |             output_filepath.exists()
46 |         ), f"Output file {output_filepath} was not created."
47 |         assert output is not None, f"Processed audio output is None for mode {mode}."
48 | 
49 |     print(f"test_harmonics_general {test_data['output_root']}")
50 | 


--------------------------------------------------------------------------------
/tests/test_limiting.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..tests.conftest import TestData
 3 | from typing import Tuple
 4 | from ..core.io import audio_from_comfy_3d_to_disk
 5 | from ..processors.SignalProcessingLimiter import SignalProcessingLimiter
 6 | 
 7 | TEST_NAME = "limiting"
 8 | 
 9 | params: list[Tuple[float, float, float]] = [
10 |     (0.0, 100.0, 600.0),
11 |     (10.0, 100.0, 600.0),
12 |     (30.0, 100.0, 600.0),
13 |     (90.0, 100.0, 600.0),
14 | ]
15 | 
16 | 
17 | @pytest.mark.parametrize(
18 |     "threshold, slope, release_ms",
19 |     params,
20 | )
21 | def test_limiting_general(
22 |     test_data: TestData, threshold: float, slope: float, release_ms: float
23 | ) -> None:
24 | 
25 |     node = SignalProcessingLimiter()
26 |     modes = node.INPUT_TYPES()["required"]["mode"][0]  # Extract modes
27 | 
28 |     for mode in modes:
29 | 
30 |         output = node.process(
31 |             audio_input=test_data["audio"],
32 |             mode=mode,
33 |             threshold=threshold,
34 |             slope=slope,
35 |             release_ms=release_ms,
36 |         )[0]
37 | 
38 |         pest_param_str = test_data["pest_param_str"]
39 |         output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav"
40 |         audio_from_comfy_3d_to_disk(output, output_filepath)
41 | 
42 |         assert (
43 |             output_filepath.exists()
44 |         ), f"Output file {output_filepath} was not created."
45 |         assert output is not None, f"Processed audio output is None for mode {mode}."
46 | 
47 |     print(f"test_limiting_general {test_data['output_root']}")
48 | 


--------------------------------------------------------------------------------
/tests/test_normalizer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from .conftest import TestData
 3 | from ..core.io import audio_from_comfy_3d_to_disk
 4 | from ..processors.SignalProcessingNormalizer import SignalProcessingNormalizer
 5 | 
 6 | TEST_NAME = "normalizer"
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "target_rms, target_lufs_db, target_peak, target_auto, target_auto_alpha",
11 |     [
12 |         (-0.0, -14.0, -0.0, -0.0, -0.0),
13 |         (0.3, -14.0, 0.3, 0.3, 0.3),
14 |         (0.6, -14.0, 0.6, 0.6, 0.6),
15 |         (0.9, -14.0, 0.9, 0.9, 0.9),
16 |     ],
17 | )
18 | def test_normalizer_general(
19 |     test_data: TestData,
20 |     target_rms: float,
21 |     target_lufs_db: float,
22 |     target_peak: float,
23 |     target_auto: float,
24 |     target_auto_alpha: float,
25 | ) -> None:
26 | 
27 |     node = SignalProcessingNormalizer()
28 |     modes = node.INPUT_TYPES()["required"]["mode"][0]
29 | 
30 |     for mode in modes:
31 | 
32 |         output = node.process(
33 |             audio_input=test_data["audio"],
34 |             mode=mode,
35 |             target_rms=target_rms,
36 |             target_lufs_db=target_lufs_db,
37 |             target_peak=target_peak,
38 |             target_auto=target_auto,
39 |             target_auto_alpha=target_auto_alpha,
40 |         )[0]
41 | 
42 |         pest_param_str = test_data["pest_param_str"]
43 |         output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav"
44 |         audio_from_comfy_3d_to_disk(output, output_filepath)
45 | 
46 |         assert (
47 |             output_filepath.exists()
48 |         ), f"Output file {output_filepath} was not created."
49 |         assert output is not None, f"Processed audio output is None for mode {mode}."
50 | 
51 |     print(f"test_normalizer_general {test_data['output_root']}")
52 | 


--------------------------------------------------------------------------------
/tests/test_padsynthchoir.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from ..tests.conftest import TestData
 3 | from ..generators.SignalProcessingPadSynthChoir import SignalProcessingPadSynthChoir
 4 | 
 5 | TEST_NAME = "synth_and_mixdown"
 6 | OUTPUT_ROOT = Path(f"ComfyUI_SignalProcessing/audio/outputs/{TEST_NAME}")
 7 | 
 8 | 
 9 | def test_synth_and_mixdown(test_data: TestData) -> None:
10 | 
11 |     OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
12 | 
13 |     samplerate = 44100
14 |     base_freq = 440.0
15 |     step_size = 4
16 |     num_notes = 5
17 |     bandwidth_cents = 60.0
18 |     number_harmonics = 32
19 | 
20 |     synth_node = SignalProcessingPadSynthChoir()
21 |     synth_output, sample_rate = synth_node.process(
22 |         samplerate=samplerate,
23 |         base_freq=base_freq,
24 |         step_size=step_size,
25 |         num_notes=num_notes,
26 |         bandwidth_cents=bandwidth_cents,
27 |         number_harmonics=number_harmonics,
28 |     )
29 | 
30 |     print(f"test_synth_and_mixdown {test_data['output_root']}")
31 | 


--------------------------------------------------------------------------------
/tests/test_paulstretch.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from .conftest import TestData
 3 | from ..core.io import audio_from_comfy_3d_to_disk
 4 | from ..effects.SignalProcessingPaulStretch import SignalProcessingPaulStretch
 5 | 
 6 | TEST_NAME = "paulstretch"
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "stretch_factor, window_size_seconds",
11 |     [
12 |         (9.0, 0.25),
13 |         (6.0, 0.25),
14 |         (3.0, 0.25),
15 |         (1.0, 0.25),
16 |         (2.0, 0.25),
17 |         (2.0, 0.5),
18 |         (2.0, 0.75),
19 |         (2.0, 1.0),
20 |         (2.0, 3.0),
21 |         (2.0, 6.0),
22 |     ],
23 | )
24 | def test_paul_stretch_general(
25 |     test_data: TestData, stretch_factor: float, window_size_seconds: float
26 | ) -> None:
27 | 
28 |     node = SignalProcessingPaulStretch()
29 |     output = node.process(
30 |         audio_input=test_data["audio"],
31 |         stretch_factor=stretch_factor,
32 |         window_size_seconds=window_size_seconds,
33 |     )[0]
34 | 
35 |     pest_param_str = test_data["pest_param_str"]
36 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.wav"
37 |     audio_from_comfy_3d_to_disk(output, output_filepath)
38 | 
39 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
40 |     assert output is not None, "Processed audio output is None."
41 | 
42 |     print(f"test_paul_stretch_general {test_data['output_root']}")
43 | 


--------------------------------------------------------------------------------
/tests/test_pitchshift.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from .conftest import TestData
 3 | from ..core.io import audio_from_comfy_3d_to_disk
 4 | from ..effects.SignalProcessingPitchShifter import SignalProcessingPitchShifter
 5 | 
 6 | TEST_NAME = "pitchshift"
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "pitch_shift_factor",
11 |     [(3), (1), (0), (-1), (-3)],
12 | )
13 | def test_pitch_shift_general(test_data: TestData, pitch_shift_factor: int) -> None:
14 | 
15 |     node = SignalProcessingPitchShifter()
16 | 
17 |     output = node.process(
18 |         audio_input=test_data["audio"], pitch_shift_factor=pitch_shift_factor
19 |     )[0]
20 | 
21 |     pest_param_str = test_data["pest_param_str"]
22 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.wav"
23 |     audio_from_comfy_3d_to_disk(output, output_filepath)
24 | 
25 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
26 |     assert output is not None, "Processed audio output is None."
27 | 
28 |     print(f"test_pitch_shift_general {test_data['output_root']}")
29 | 


--------------------------------------------------------------------------------
/tests/test_plotting.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from torchvision.transforms import ToPILImage
 3 | from .conftest import TestData
 4 | from ..core.plotting import get_spectogram, get_wave, save_image
 5 | from ..visuals.SignalProcessingSpectrogram import SignalProcessingSpectrogram
 6 | 
 7 | TEST_NAME = "plotting"
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     "n_fft, n_mels, xlim",
12 |     [
13 |         (4096, 128 * 1, 8192),
14 |         (4096, 128 * 2, 8192),
15 |         (4096, 128 * 4, 8192),
16 |         (4096, 128 * 6, 8192),
17 |     ],
18 | )
19 | def test_plotting_spectogram_general(
20 |     test_data: TestData, n_fft: int, n_mels: int, xlim: int
21 | ) -> None:
22 | 
23 |     waveform = test_data["audio"]["waveform"].squeeze(0)
24 |     sample_rate = test_data["audio"]["sample_rate"]
25 | 
26 |     print("waveform", waveform.shape)
27 | 
28 |     spectogram = get_spectogram(
29 |         waveform, sample_rate=sample_rate, n_fft=n_fft, n_mels=n_mels, xlim=xlim
30 |     )
31 | 
32 |     pest_param_str = test_data["pest_param_str"]
33 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.png"
34 | 
35 |     save_image(output_filepath, spectogram)
36 | 
37 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
38 |     assert output_filepath is not None, "Processed audio output is None."
39 | 
40 |     print(f"test_plotting_spectogram_general {test_data['output_root']}")
41 | 
42 | 
43 | @pytest.mark.parametrize(
44 |     "stretch_factor, window_size_seconds",
45 |     [(8.0, 0.25)],
46 | )
47 | def test_plotting_waveform_general(
48 |     test_data: TestData, stretch_factor: float, window_size_seconds: float
49 | ) -> None:
50 | 
51 |     waveform = test_data["audio"]["waveform"].squeeze(0)
52 |     sample_rate = test_data["audio"]["sample_rate"]
53 | 
54 |     print("waveform", waveform.shape)
55 | 
56 |     spectogram = get_wave(waveform, sample_rate=sample_rate, xlim=4096)
57 | 
58 |     pest_param_str = test_data["pest_param_str"]
59 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.png"
60 | 
61 |     save_image(output_filepath, spectogram)
62 | 
63 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
64 |     assert output_filepath is not None, "Processed audio output is None."
65 | 
66 |     print(f"test_plotting_spectogram_general {test_data['output_root']}")
67 | 
68 | 
69 | @pytest.mark.parametrize(
70 |     "stretch_factor, window_size_seconds",
71 |     [(8.0, 0.25)],
72 | )
73 | def test_plotting_waveform_node_general(
74 |     test_data: TestData, stretch_factor: float, window_size_seconds: float
75 | ) -> None:
76 | 
77 |     node = SignalProcessingSpectrogram()
78 | 
79 |     output = node.process(audio_input=test_data["audio"])[0]
80 | 
81 |     pest_param_str = test_data["pest_param_str"]
82 |     output_filepath = test_data["output_root"] / f"{pest_param_str}.png"
83 | 
84 |     to_pil = ToPILImage()
85 |     print("rgb_image[0] -----------------------------", output[0].shape)
86 |     rgb_image = output[0][..., :3]
87 |     rgb_image = rgb_image.permute(2, 0, 1)
88 |     print("rgb_image -----------------------------", rgb_image.shape)
89 |     spectogram = to_pil(rgb_image)
90 |     print("spectogram -----------------------------", spectogram)
91 |     save_image(output_filepath, spectogram)
92 | 
93 |     assert output_filepath.exists(), f"Output file {output_filepath} was not created."
94 |     assert output is not None, "Processed audio output is None."
95 | 
96 |     print(f"test_pitch_shift_general {test_data['output_root']}")
97 | 


--------------------------------------------------------------------------------
/tests/test_saturation.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..tests.conftest import TestData
 3 | from ..core.io import audio_from_comfy_3d_to_disk
 4 | from ..processors.SignalProcessingSaturation import SignalProcessingSaturation
 5 | 
 6 | TEST_NAME = "saturation"
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "drive",
11 |     [
12 |         (90.0),
13 |         (60.0),
14 |         (30.0),
15 |         (10.0),
16 |         (0.0),
17 |     ],
18 | )
19 | def test_saturation_general(test_data: TestData, drive: float) -> None:
20 | 
21 |     node = SignalProcessingSaturation()
22 |     modes = node.INPUT_TYPES()["required"]["mode"][0]  # Extract modes
23 | 
24 |     for mode in modes:
25 | 
26 |         output = node.process(audio_input=test_data["audio"], mode=mode, drive=drive)[0]
27 | 
28 |         pest_param_str = test_data["pest_param_str"]
29 |         output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav"
30 |         audio_from_comfy_3d_to_disk(output, output_filepath)
31 | 
32 |         assert (
33 |             output_filepath.exists()
34 |         ), f"Output file {output_filepath} was not created."
35 |         assert output is not None, f"Processed audio output is None for mode {mode}."
36 | 
37 |     print(f"test_saturation_general {test_data['output_root']}")
38 | 


--------------------------------------------------------------------------------
/tests/test_widening.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ..tests.conftest import TestData
 3 | from ..core.io import audio_from_comfy_3d_to_disk
 4 | from ..effects.SignalProcessingStereoWidening import SignalProcessingStereoWidening
 5 | 
 6 | TEST_NAME = "widening"
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "width",
11 |     [
12 |         (3.0),
13 |         (2.0),
14 |         (1.0),
15 |         (0.5),
16 |         (0.25),
17 |         (0.0),
18 |     ],
19 | )
20 | def test_widening_general(test_data: TestData, width: float) -> None:
21 | 
22 |     node = SignalProcessingStereoWidening()
23 |     modes = node.INPUT_TYPES()["required"]["mode"][0]
24 | 
25 |     for mode in modes:
26 | 
27 |         output = node.process(audio_input=test_data["audio"], mode=mode, width=width)[0]
28 | 
29 |         pest_param_str = test_data["pest_param_str"]
30 |         output_filepath = test_data["output_root"] / f"{mode}-{pest_param_str}.wav"
31 |         audio_from_comfy_3d_to_disk(output, output_filepath)
32 | 
33 |         assert (
34 |             output_filepath.exists()
35 |         ), f"Output file {output_filepath} was not created."
36 |         assert output is not None, f"Processed audio output is None for mode {mode}."
37 | 
38 |     print(f"test_widening_general {test_data['output_root']}")
39 | 


--------------------------------------------------------------------------------
/visuals/SignalProcessingSpectrogram.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Spectogram image node
 10 | """
 11 | 
 12 | import torch
 13 | 
 14 | from PIL import Image
 15 | 
 16 | from typing import Dict, Tuple, Any, Type, List
 17 | 
 18 | import numpy as np
 19 | import torchaudio
 20 | 
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | 
 24 | class SignalProcessingSpectrogram:
 25 |     @classmethod
 26 |     def INPUT_TYPES(cls: Type["SignalProcessingSpectrogram"]) -> Dict[str, Any]:
 27 |         cmaps: List[str] = ["viridis", "plasma", "inferno", "magma", "cividis"]
 28 |         return {
 29 |             "required": {
 30 |                 "audio_input": ("AUDIO",),
 31 |                 "color_map": (cmaps,),
 32 |             },
 33 |             "optional": {
 34 |                 "n_fft": (
 35 |                     "INT",
 36 |                     {"default": 4096, "min": 512, "max": 8192, "step": 256},
 37 |                 ),
 38 |                 "hop_length": (
 39 |                     "INT",
 40 |                     {"default": 128, "min": 64, "max": 4096, "step": 128},
 41 |                 ),
 42 |                 "n_mels": ("INT", {"default": 512, "min": 32, "max": 2048, "step": 32}),
 43 |                 "top_db": (
 44 |                     "FLOAT",
 45 |                     {"default": 80.0, "min": 10.0, "max": 100.0, "step": 5.0},
 46 |                 ),
 47 |             },
 48 |         }
 49 | 
 50 |     RETURN_TYPES = ("IMAGE",)
 51 |     RETURN_NAMES = ("spectrogram_image",)
 52 |     CATEGORY = "Signal Processing"
 53 |     FUNCTION = "process"
 54 | 
 55 |     def process(
 56 |         self,
 57 |         audio_input: Dict[str, torch.Tensor],
 58 |         color_map: str = "viridis",
 59 |         n_fft: int = 2048,
 60 |         hop_length: int = 512,
 61 |         n_mels: int = 128,
 62 |         top_db: float = 80.0,
 63 |     ) -> Tuple[torch.Tensor]:
 64 |         waveform = audio_input.get("waveform")
 65 |         sample_rate = audio_input.get("sample_rate")
 66 | 
 67 |         # Validate that waveform and sample_rate are not None
 68 |         if waveform is None:
 69 |             raise ValueError("The 'waveform' key is missing or None in 'audio_input'.")
 70 |         if not isinstance(waveform, torch.Tensor):
 71 |             raise TypeError(
 72 |                 f"Expected 'waveform' to be a torch.Tensor, got {type(waveform)}."
 73 |             )
 74 |         if sample_rate is None:
 75 |             raise ValueError(
 76 |                 "The 'sample_rate' key is missing or None in 'audio_input'."
 77 |             )
 78 |         if not isinstance(sample_rate, int):
 79 |             raise TypeError(
 80 |                 f"Expected 'sample_rate' to be an int, got {type(sample_rate)}."
 81 |             )
 82 | 
 83 |         # waveform, sample_rate = audio_from_comfy_2d(audio_input)
 84 | 
 85 |         # Convert to mono by averaging channels
 86 |         if waveform.ndim == 3:
 87 |             # [batch, channels, samples]
 88 |             waveform = waveform.mean(dim=1, keepdim=True)  # [batch, 1, samples]
 89 |             waveform = waveform.squeeze(0)  # [1, samples]
 90 |         elif waveform.ndim == 2:
 91 |             # [channels, samples]
 92 |             if waveform.shape[0] > 1:
 93 |                 waveform = waveform.mean(dim=0, keepdim=True)  # [1, samples]
 94 |             else:
 95 |                 waveform = waveform.unsqueeze(0)  # [1, samples]
 96 |         elif waveform.ndim == 1:
 97 |             # [samples]
 98 |             waveform = waveform.unsqueeze(0)  # [1, samples]
 99 |         else:
100 |             raise ValueError(f"Unsupported waveform shape: {waveform.shape}")
101 | 
102 |         # Generate Mel Spectrogram
103 |         spectrogram_transform = torchaudio.transforms.MelSpectrogram(
104 |             sample_rate=sample_rate,
105 |             n_fft=n_fft,
106 |             hop_length=hop_length,
107 |             n_mels=n_mels,
108 |             power=2.0,
109 |             norm="slaney",
110 |             mel_scale="htk",
111 |         ).to(waveform.device, dtype=waveform.dtype)
112 |         spectrogram = spectrogram_transform(waveform)  # [1, n_mels, time_frames]
113 | 
114 |         # Convert to decibel scale
115 |         amplitude_to_db = torchaudio.transforms.AmplitudeToDB(top_db=top_db)
116 |         spectrogram_db = amplitude_to_db(spectrogram)  # [1, n_mels, time_frames]
117 | 
118 |         # Convert to numpy
119 |         spectrogram_db = (
120 |             spectrogram_db.squeeze().detach().cpu().numpy()
121 |         )  # [n_mels, time_frames]
122 | 
123 |         # Clip spectrogram to a range for better contrast
124 |         spectrogram_db = np.clip(spectrogram_db, -top_db, 0.0)
125 | 
126 |         # Normalize spectrogram to [0,1]
127 |         spectrogram_normalized = (spectrogram_db + top_db) / top_db  # [0,1]
128 | 
129 |         # Apply a colormap (e.g., 'inferno') using matplotlib
130 |         cmap = plt.get_cmap(color_map)
131 |         spectrogram_colored = cmap(
132 |             spectrogram_normalized
133 |         )  # [n_mels, time_frames, 4] RGBA
134 | 
135 |         # Convert to RGB by removing alpha channel
136 |         spectrogram_rgb = (spectrogram_colored[:, :, :3] * 255).astype(
137 |             np.uint8
138 |         )  # [n_mels, time_frames, 3]
139 |         spectrogram_rgb = np.squeeze(spectrogram_rgb)
140 | 
141 |         # Check the shape and adjust if necessary
142 |         if len(spectrogram_rgb.shape) == 3 and spectrogram_rgb.shape[-1] == 3:
143 |             # Ensure the array is in uint8 format (0-255 range)
144 |             spectrogram_rgb = np.clip(spectrogram_rgb, 0, 255).astype(np.uint8)
145 |         else:
146 |             raise ValueError(f"Unexpected spectrogram shape: {spectrogram_rgb.shape}")
147 | 
148 |         # Convert to RGB image
149 |         spectrogram_image = Image.fromarray(spectrogram_rgb).convert("RGB")
150 | 
151 |         # Optionally resize for better resolution
152 |         spectrogram_image = spectrogram_image.resize(
153 |             (spectrogram_image.width * 2, spectrogram_image.height * 2), Image.BILINEAR
154 |         )
155 | 
156 |         # Convert to numpy array and normalize to [0,1]
157 |         image_np = np.array(spectrogram_image).astype(np.float32) / 255.0  # [H, W, 3]
158 | 
159 |         # Convert to torch tensor and add batch dimension
160 |         # image_tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
161 | 
162 |         image = torch.from_numpy(image_np)[None,]
163 | 
164 |         return (image,)
165 | 


--------------------------------------------------------------------------------
/visuals/SignalProcessingWaveform.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Author: C0ffymachyne
  5 | License: GPLv3
  6 | Version: 1.0.0
  7 | 
  8 | Description:
  9 |     Waveform image rendering node
 10 | """
 11 | 
 12 | import torch
 13 | from io import BytesIO
 14 | from PIL import Image
 15 | import numpy as np
 16 | import matplotlib.pyplot as plt
 17 | 
 18 | from typing import Any, Dict, Tuple
 19 | 
 20 | from ..core.utilities import comfy_root_to_syspath
 21 | 
 22 | comfy_root_to_syspath()  # add comfy to sys path for dev
 23 | 
 24 | 
 25 | class SignalProcessingWaveform:
 26 |     @classmethod
 27 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
 28 |         return {
 29 |             "required": {
 30 |                 "audio_input": ("AUDIO",),
 31 |             },
 32 |             "optional": {
 33 |                 "color": ("STRING", {"default": "black"}),
 34 |                 "background_color": ("STRING", {"default": "white"}),
 35 |                 "width": (
 36 |                     "INT",
 37 |                     {"default": 800, "min": 100, "max": 4000, "step": 100},
 38 |                 ),
 39 |                 "height": ("INT", {"default": 200, "min": 50, "max": 1000, "step": 50}),
 40 |                 "line_width": (
 41 |                     "FLOAT",
 42 |                     {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1},
 43 |                 ),
 44 |             },
 45 |         }
 46 | 
 47 |     RETURN_TYPES = ("IMAGE",)
 48 |     RETURN_NAMES = ("waveform_image",)
 49 |     CATEGORY = "Signal Processing"
 50 |     FUNCTION = "process"
 51 | 
 52 |     def process(
 53 |         self,
 54 |         audio_input: torch.Tensor,
 55 |         color: str = "white",
 56 |         background_color: str = "black",
 57 |         width: int = 800,
 58 |         height: int = 200,
 59 |         line_width: float = 1.0,
 60 |     ) -> Tuple[torch.Tensor]:
 61 |         waveform = audio_input.get(
 62 |             "waveform"
 63 |         )  # [channels, samples] or [batch, channels, samples]
 64 | 
 65 |         # Convert to mono by averaging channels
 66 |         if waveform.ndim == 3:
 67 |             # [batch, channels, samples]
 68 |             waveform = waveform.mean(dim=1, keepdim=True)  # [batch, 1, samples]
 69 |             waveform = waveform.squeeze(0)  # [1, samples]
 70 |         elif waveform.ndim == 2:
 71 |             # [channels, samples]
 72 |             if waveform.shape[0] > 1:
 73 |                 waveform = waveform.mean(dim=0, keepdim=True)  # [1, samples]
 74 |             else:
 75 |                 waveform = waveform.unsqueeze(0)  # [1, samples]
 76 |         elif waveform.ndim == 1:
 77 |             # [samples]
 78 |             waveform = waveform.unsqueeze(0)  # [1, samples]
 79 |         else:
 80 |             raise ValueError(f"Unsupported waveform shape: {waveform.shape}")
 81 | 
 82 |         # Convert waveform to numpy
 83 |         waveform = waveform.to(dtype=torch.float32)
 84 |         waveform_np = waveform.squeeze().detach().cpu().numpy()  # [samples]
 85 | 
 86 |         # Create a matplotlib figure without axes
 87 |         plt.figure(figsize=(width / 100, height / 100), dpi=96)
 88 |         plt.axis("off")
 89 |         plt.margins(0, 0)
 90 |         plt.gca().set_facecolor(background_color)
 91 |         plt.gca().set_position([0, 0, 1, 1])
 92 | 
 93 |         # Plot the waveform
 94 |         plt.plot(waveform_np, color=color, linewidth=line_width)
 95 |         plt.ylim(-1.3, 1.3)  # Set y-axis limits to -1 and 1
 96 |         plt.tight_layout(pad=0)
 97 | 
 98 |         # Save the plot to a buffer
 99 |         buf = BytesIO()
100 |         plt.savefig(buf, format="png", bbox_inches="tight", pad_inches=0)
101 |         plt.close()
102 | 
103 |         # Load the image from the buffer
104 |         buf.seek(0)
105 |         waveform_image = Image.open(buf).convert("RGB")
106 | 
107 |         # Resize if necessary
108 |         waveform_image = waveform_image.resize((width, height), Image.BILINEAR)
109 | 
110 |         # Convert to numpy array and normalize to [0,1]
111 |         image_np = np.array(waveform_image).astype(np.float32) / 255.0  # [H, W, 3]
112 | 
113 |         # Convert to torch tensor and add batch dimension
114 |         image = torch.from_numpy(image_np)[None,]
115 | 
116 |         return (image,)
117 | 
118 | 
119 | class SignalProcessingWaveform2:
120 |     @classmethod
121 |     def INPUT_TYPES(cls) -> Dict[str, Any]:
122 |         return {
123 |             "required": {
124 |                 "audio_input": ("AUDIO",),
125 |             },
126 |             "optional": {
127 |                 "color": ("STRING", {"default": "black"}),
128 |                 "background_color": ("STRING", {"default": "white"}),
129 |                 "width": (
130 |                     "INT",
131 |                     {"default": 800, "min": 100, "max": 4000, "step": 100},
132 |                 ),
133 |                 "height": ("INT", {"default": 200, "min": 50, "max": 1000, "step": 50}),
134 |                 "line_width": (
135 |                     "FLOAT",
136 |                     {"default": 1.0, "min": 0.1, "max": 10.0, "step": 0.1},
137 |                 ),
138 |             },
139 |         }
140 | 
141 |     RETURN_TYPES = ("IMAGE",)
142 |     RETURN_NAMES = ("waveform_image",)
143 |     CATEGORY = "Signal Processing"
144 |     FUNCTION = "process"
145 | 
146 |     def process(
147 |         self,
148 |         audio_input: torch.Tensor,
149 |         color: str = "black",
150 |         background_color: str = "white",
151 |         width: int = 800,
152 |         height: int = 200,
153 |         line_width: float = 1.0,
154 |     ) -> Tuple[torch.Tensor]:
155 |         waveform = audio_input.get(
156 |             "waveform"
157 |         )  # [channels, samples] or [batch, channels, samples]
158 | 
159 |         # Convert to mono by averaging channels
160 |         if waveform.ndim == 3:
161 |             # [batch, channels, samples]
162 |             waveform = waveform.mean(dim=1, keepdim=True)  # [batch, 1, samples]
163 |             waveform = waveform.squeeze(0)  # [1, samples]
164 |         elif waveform.ndim == 2:
165 |             # [channels, samples]
166 |             if waveform.shape[0] > 1:
167 |                 waveform = waveform.mean(dim=0, keepdim=True)  # [1, samples]
168 |             else:
169 |                 waveform = waveform.unsqueeze(0)  # [1, samples]
170 |         elif waveform.ndim == 1:
171 |             # [samples]
172 |             waveform = waveform.unsqueeze(0)  # [1, samples]
173 |         else:
174 |             raise ValueError(f"Unsupported waveform shape: {waveform.shape}")
175 | 
176 |         # Ensure waveform is in float32
177 |         waveform = waveform.to(dtype=torch.float32)
178 |         waveform_np = waveform.squeeze().detach().cpu().numpy()  # [samples]
179 | 
180 |         # Create a matplotlib figure without axes
181 |         plt.figure(figsize=(width / 100, height / 100), dpi=96)
182 |         plt.axis("off")
183 |         plt.margins(0, 0)
184 |         ax = plt.gca()
185 |         ax.set_facecolor(background_color)
186 |         ax.set_position([0, 0, 1, 1])
187 | 
188 |         # Plot the waveform with fixed y-axis limits
189 |         plt.plot(waveform_np, color=color, linewidth=line_width)
190 |         plt.ylim(-1, 1)  # Set y-axis limits to -1 and 1
191 |         plt.tight_layout(pad=0)
192 | 
193 |         # Save the plot to a buffer
194 |         buf = BytesIO()
195 |         plt.savefig(buf, format="png", bbox_inches="tight", pad_inches=0)
196 |         plt.close()
197 | 
198 |         # Load the image from the buffer
199 |         buf.seek(0)
200 |         waveform_image = Image.open(buf).convert("RGB")
201 | 
202 |         # Resize if necessary
203 |         waveform_image = waveform_image.resize((width, height), Image.BILINEAR)
204 | 
205 |         # Convert to numpy array and normalize to [0,1]
206 |         image_np = np.array(waveform_image).astype(np.float32) / 255.0  # [H, W, 3]
207 | 
208 |         # Convert to torch tensor and add batch dimension
209 |         image = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
210 | 
211 |         return (image,)
212 | 


--------------------------------------------------------------------------------
/visuals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c0ffymachyne/ComfyUI_SignalProcessing/a50ab911eee901646fdb810f18ae8aed2ca1960b/visuals/__init__.py


--------------------------------------------------------------------------------