85 |
86 |
87 | .. include:: installation.rst
88 | .. include:: start.rst
89 |
90 |
91 | LICENCE
92 | -------
93 |
94 | Sudio is released under the GNU AFFERO GENERAL PUBLIC LICENSE Version 3. See the `LICENSE `_ file for details.
95 |
96 |
97 |
98 |
99 |
100 | .. toctree::
101 | :maxdepth: 10
102 | :caption: Contents
103 |
104 | installation
105 | start
106 | core
107 | pipeline
108 | audiosys
109 | api/io_root
110 | io
111 | metadata
112 | stream
113 | types
114 | utils
115 | rateshift
116 | process
117 | Source code
118 |
119 |
120 | Indices and tables
121 | ==================
122 |
123 | * :ref:`genindex`
124 | * :ref:`modindex`
125 | * :ref:`search`
126 |
--------------------------------------------------------------------------------
/sudio/utils/typeconversion.py:
--------------------------------------------------------------------------------
1 |
2 | # SUDIO - Audio Processing Platform
3 | # Copyright (C) 2024 Hossein Zahaki
4 |
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU Lesser General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or
8 | # any later version.
9 |
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU Lesser General Public License for more details.
14 |
15 | # You should have received a copy of the GNU Lesser General Public License
16 | # along with this program. If not, see .
17 |
18 | # - GitHub: https://github.com/MrZahaki/sudio
19 |
20 |
21 | # This program is distributed in the hope that it will be useful,
22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 | # GNU Lesser General Public License for more details.
25 |
26 | # You should have received a copy of the GNU Lesser General Public License
27 | # along with this program. If not, see .
28 |
29 | # - GitHub: https://github.com/MrZahaki/sudio
30 |
31 |
32 |
33 | from sudio.io import SampleFormat
34 | import numpy as np
35 |
36 |
37 |
38 | def convert_array_type(arr:np.ndarray, target_format:SampleFormat, source_format:SampleFormat=SampleFormat.UNKNOWN):
39 | """
40 | Convert the data type of a NumPy array based on the given SampleFormat.
41 |
42 | Args:
43 | arr (np.ndarray): Input NumPy array
44 | target_format (SampleFormat): Desired output format
45 |
46 | Returns:
47 | np.ndarray: Converted NumPy array
48 | """
49 | if not (source_format == SampleFormat.UNKNOWN) and not(source_format == target_format):
50 | if not source_format == SampleFormat.FLOAT32:
51 | arr = arr.astype(np.float32)
52 |
53 | if source_format == SampleFormat.SIGNED32:
54 | arr = arr / 2**31
55 | elif source_format == SampleFormat.SIGNED24:
56 | arr = arr / 2**23
57 | elif source_format == SampleFormat.SIGNED16:
58 | arr = arr / 2**15
59 | elif source_format == SampleFormat.UNSIGNED8:
60 | arr = arr / 2**8
61 |
62 | if target_format == SampleFormat.FLOAT32:
63 | ...
64 | elif target_format == SampleFormat.SIGNED32:
65 | arr = np.clip(arr * (2 ** 31), -(2 ** 31), (2 ** 31) - 1)
66 | elif target_format == SampleFormat.SIGNED24:
67 | arr = np.clip(arr * (2 ** 23), -(2 ** 23), (2 ** 23) - 1)
68 | elif target_format == SampleFormat.SIGNED16:
69 | arr = np.clip(arr * (2 ** 15), -(2 ** 15), (2 ** 15) - 1)
70 | elif target_format == SampleFormat.UNSIGNED8:
71 | arr = np.clip(arr * 128 + 128, 0, 255)
72 |
73 |
74 | if target_format == SampleFormat.FLOAT32:
75 | return arr.astype(np.float32)
76 | elif target_format == SampleFormat.SIGNED32 or target_format == SampleFormat.SIGNED24:
77 | return arr.astype(np.int32)
78 | elif target_format == SampleFormat.SIGNED16:
79 | return arr.astype(np.int16)
80 | elif target_format == SampleFormat.UNSIGNED8:
81 | return arr.astype(np.uint8)
82 | elif target_format == SampleFormat.UNKNOWN:
83 | return arr
84 | else:
85 | raise ValueError("Unsupported sample format")
86 |
--------------------------------------------------------------------------------
/sudio/utils/math.pyx:
--------------------------------------------------------------------------------
1 | # cython: language_level=3
2 | # distutils: extra_compile_args = -O3
3 |
4 |
5 | # SUDIO - Audio Processing Platform
6 | # Copyright (C) 2024 Hossein Zahaki
7 |
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Lesser General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # any later version.
12 |
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Lesser General Public License for more details.
17 |
18 | # You should have received a copy of the GNU Lesser General Public License
19 | # along with this program. If not, see .
20 |
21 | # - GitHub: https://github.com/MrZahaki/sudio
22 |
23 |
24 | # This program is distributed in the hope that it will be useful,
25 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
26 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 | # GNU Lesser General Public License for more details.
28 |
29 | # You should have received a copy of the GNU Lesser General Public License
30 | # along with this program. If not, see .
31 |
32 | # - GitHub: https://github.com/MrZahaki/sudio
33 |
34 |
35 | import numpy as np
36 | cimport numpy as np
37 | cimport cython
38 |
39 |
40 | cpdef int find_nearest_divisible(int reference_number, int divisor):
41 | """
42 | Finds the number closest to 'reference_number' that is divisible by 'divisor'.
43 |
44 | Args:
45 | reference_number (int): The reference number.
46 | divisor (int): The divisor.
47 | """
48 | cdef int modulo = reference_number % divisor
49 | cdef int quotient = reference_number // divisor
50 | cdef int option1 = reference_number - modulo
51 | cdef int option2 = reference_number + (divisor - modulo)
52 |
53 | return option1 if abs(option1 - reference_number) <= abs(option2 - reference_number) else option2
54 |
55 | cpdef int find_nearest_divisor(int num, int divisor) except? -1:
56 | """
57 | Finds the nearest divisor with zero remainder for 'num'.
58 |
59 | Args:
60 | num (int): The dividend.
61 | divisor (int): The candidate divisor.
62 |
63 | Returns:
64 | int: The nearest divisor.
65 | """
66 | cdef int div = int(round(num / divisor))
67 | cdef int lower = div
68 | cdef int upper = div
69 |
70 | while upper < num:
71 | if num % lower == 0:
72 | return lower
73 | if num % upper == 0:
74 | return upper
75 |
76 | lower -= 1
77 | upper += 1
78 |
79 | raise ValueError("No divisor with a zero remainder found.")
80 |
81 |
82 |
83 | @cython.boundscheck(False)
84 | @cython.wraparound(False)
85 | cpdef db2amp(db):
86 | """
87 | Convert decibels to amplitude.
88 |
89 | Args:
90 | db (int, float, ndarray): Decibel value(s)
91 |
92 | """
93 | return np.power(10.0, (db / 20.0), dtype=np.float64)
94 |
95 | @cython.boundscheck(False)
96 | @cython.wraparound(False)
97 | cpdef amp2db(amp):
98 | """
99 | Convert amplitude to decibels.
100 |
101 | Args:
102 | amp (int, float, ndarray): Amplitude value(s)
103 |
104 | """
105 | return 20.0 * np.log10(amp, dtype=np.float64)
106 |
107 |
--------------------------------------------------------------------------------
/docs/process.rst:
--------------------------------------------------------------------------------
1 | Process Module
2 | ==============
3 |
4 |
5 | .. raw:: html
6 |
7 |
8 |
16 |
17 |
18 |
19 | .. automodule:: sudio.process.audio_wrap
20 | :members:
21 | :undoc-members:
22 | :show-inheritance:
23 |
24 |
25 | .. automodule:: sudio.process.fx.tempo
26 | :members:
27 | :undoc-members:
28 | :show-inheritance:
29 |
30 | .. automodule:: sudio.process.fx.gain
31 | :members:
32 | :undoc-members:
33 | :show-inheritance:
34 |
35 | .. automodule:: sudio.process.fx.fx
36 | :members:
37 | :undoc-members:
38 | :show-inheritance:
39 |
40 |
41 | .. automodule:: sudio.process.fx.channel_mixer
42 | :members:
43 | :undoc-members:
44 | :show-inheritance:
45 |
46 |
47 | .. automodule:: sudio.process.fx.pitch_shifter
48 | :members:
49 | :undoc-members:
50 | :show-inheritance:
51 |
52 | .. automodule:: sudio.process.fx.fade_envelope
53 | :members:
54 | :undoc-members:
55 | :show-inheritance:
56 |
57 | The Fade Envelope module offers a rich set of predefined envelopes to shape audio dynamics. Each preset provides a unique way to modify the amplitude characteristics of an audio signal.
58 |
59 | Preset Catalog
60 | --------------
61 |
62 | .. image:: /_static/fade_envelope_presets.png
63 | :alt: Fade Envelope Presets Visualization
64 | :align: center
65 |
66 | Available Presets
67 | ^^^^^^^^^^^^^^^^^
68 |
69 | 1. **Smooth Ends**
70 |
71 | 2. **Bell Curve**
72 |
73 | 3. **Keep Attack Only**
74 |
75 | 4. **Linear Fade In**
76 |
77 | 5. **Linear Fade Out**
78 |
79 | 6. **Pulse**
80 |
81 | 7. **Remove Attack**
82 |
83 | 8. **Smooth Attack**
84 |
85 | 9. **Smooth Fade In**
86 |
87 | 10. **Smooth Fade Out**
88 |
89 | 11. **Smooth Release**
90 |
91 | 12. **Tremors**
92 |
93 | 13. **Zigzag Cut**
94 |
95 | Usage Example
96 | -------------
97 |
98 | .. code-block:: python
99 |
100 | from sudio.process.fx import FadeEnvelope, FadePreset
101 |
102 | # Apply a smooth fade in to an audio signal
103 | fx = FadeEnvelope()
104 | processed_audio = fx.process(
105 | audio_data,
106 | preset=FadePreset.SMOOTH_FADE_IN
107 | )
108 |
109 |
110 | Making Custom Presets
111 | ---------------------
112 |
113 | Custom presets in Sudio let you shape your audio's dynamics in unique ways. Pass a numpy array to the FadeEnvelope effect,
114 | and Sudio's processing transforms it into a smooth, musically coherent envelope using interpolation, and gaussian filter.
115 | You can create precise sound manipulations, control the wet/dry mix, and adjust the output gain. in this mode sawtooth_freq, fade_release, and fade_attack parameters are unavailable.
116 |
117 | .. code-block:: python
118 |
119 | from sudio.process.fx import FadeEnvelope
120 |
121 | s = song[10:30]
122 | custom_preset = np.array([0.0, 0.0, 0.1, 0.2, 0.3, 0.7, 0.1, 0.0])
123 | s.afx(
124 | FadeEnvelope,
125 | preset=custom_preset,
126 | enable_spline=True,
127 | start=10.5,
128 | stop=25,
129 | output_gain_db=-5,
130 | wet_mix=.9
131 | )
132 | su.echo(s)
133 |
134 | .. image:: /_static/fade_envelope_custom_preset.png
135 | :alt: Fade Envelope Custom Preset Visualization
136 | :align: center
137 |
--------------------------------------------------------------------------------
/sudio/metadata/metadata.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | # SUDIO - Audio Processing Platform
4 | # Copyright (C) 2024 Hossein Zahaki
5 |
6 | # This program is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU Lesser General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or
9 | # any later version.
10 |
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 |
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with this program. If not, see .
18 |
19 | # - GitHub: https://github.com/MrZahaki/sudio
20 |
21 |
22 | # This program is distributed in the hope that it will be useful,
23 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | # GNU Lesser General Public License for more details.
26 |
27 | # You should have received a copy of the GNU Lesser General Public License
28 | # along with this program. If not, see .
29 |
30 | # - GitHub: https://github.com/MrZahaki/sudio
31 |
32 |
33 |
34 | class AudioMetadata:
35 | """
36 | Flexible metadata container for audio records.
37 |
38 | Allows dynamic attribute creation and provides dictionary-like
39 | access to metadata with additional utility methods.
40 | """
41 | def __init__(self, name, **kwargs):
42 | """
43 | Initialize an audio metadata object.
44 |
45 | Args
46 | ----
47 |
48 | - name (str): Primary identifier for the audio record.
49 | - **kwargs: Additional metadata attributes to set.
50 | """
51 |
52 | self.name = name
53 | self.__dict__.update(kwargs)
54 |
55 | def __getitem__(self, key):
56 | """
57 | Allow dictionary-style attribute access.
58 |
59 | Args:
60 | -----
61 |
62 | key (str): Attribute name to retrieve.
63 |
64 | Returns:
65 | --------
66 |
67 | Value of the specified attribute.
68 | """
69 | return getattr(self, key)
70 |
71 | def __setitem__(self, key, value):
72 | """
73 | Allow dictionary-style attribute setting.
74 |
75 | Args:
76 | -----
77 |
78 | - key (str): Attribute name to set.
79 | - value: Value to assign to the attribute.
80 | """
81 | setattr(self, key, value)
82 |
83 | def keys(self):
84 | """
85 | Get a list of all non-private attribute names.
86 |
87 | Returns:
88 | --------
89 |
90 | list: Names of public attributes in the metadata.
91 | """
92 | return [attr for attr in self.__dict__ if not attr.startswith('_')]
93 |
94 | def copy(self):
95 | """
96 | Create a deep copy of the metadata object.
97 |
98 | Returns:
99 | --------
100 |
101 | AudioMetadata: A new instance with the same attributes.
102 | """
103 | return AudioMetadata(**{k: getattr(self, k) for k in self.keys()})
104 |
105 | def get_data(self):
106 | """
107 | Return the metadata object itself.
108 |
109 | Useful for maintaining a consistent interface with other data retrieval methods.
110 |
111 | Returns:
112 | --------
113 |
114 | AudioMetadata: The current metadata instance.
115 | """
116 | return self
--------------------------------------------------------------------------------
/sudio/process/fx/gain.py:
--------------------------------------------------------------------------------
1 | # SUDIO - Audio Processing Platform
2 | # Copyright (C) 2024 Hossein Zahaki
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Lesser General Public License as published
6 | # by the Free Software Foundation, either version 3 of the License, or
7 | # any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Lesser General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Lesser General Public License
15 | # along with this program. If not, see .
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 |
20 | # This program is distributed in the hope that it will be useful,
21 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 | # GNU Lesser General Public License for more details.
24 |
25 | # You should have received a copy of the GNU Lesser General Public License
26 | # along with this program. If not, see .
27 |
28 | # - GitHub: https://github.com/MrZahaki/sudio
29 |
30 |
31 | from typing import Union
32 | from sudio.process.fx import FX
33 | from sudio.io import SampleFormat
34 | from sudio.utils.math import db2amp
35 | import numpy as np
36 |
37 | class Gain(FX):
38 | def __init__(self, *args, **kwargs) -> None:
39 | features = {
40 | 'streaming_feature': True,
41 | 'offline_feature': True,
42 | 'preferred_datatype': SampleFormat.FLOAT32
43 | }
44 | """
45 | Initialize the Gain audio effect processor.
46 |
47 | Configures gain processing with streaming and offline capabilities,
48 | optimized for 32-bit floating-point audio processing.
49 |
50 | Parameters:
51 | -----------
52 | *args : Variable positional arguments
53 | Arguments for parent FX class initialization.
54 |
55 | **kwargs : Variable keyword arguments
56 | Additional configuration parameters.
57 | """
58 | super().__init__(*args, **kwargs, **features)
59 |
60 | def process(
61 | self,
62 | data: np.ndarray,
63 | gain_db: Union[float, int] = 0.0,
64 | channel:int=None,
65 | **kwargs
66 | ) -> np.ndarray:
67 | """
68 | Apply dynamic gain adjustment to audio signals with soft clipping.
69 |
70 | Modify audio amplitude using decibel-based gain control, featuring built-in
71 | soft clipping to prevent harsh distortion and maintain signal integrity.
72 |
73 | Parameters:
74 | -----------
75 | data : numpy.ndarray
76 | Input audio data to be gain-processed. Supports single and multi-channel inputs.
77 |
78 | gain_db : float or int, optional
79 | Gain adjustment in decibels:
80 | - 0.0 (default): No volume change
81 | - Negative values: Reduce volume
82 | - Positive values: Increase volume
83 |
84 | Additional keyword arguments are ignored in this implementation.
85 |
86 | Returns:
87 | --------
88 | numpy.ndarray
89 | Gain-adjusted audio data with preserved dynamic range and minimal distortions
90 |
91 | Examples:
92 | ---------
93 | >>> from sudio.process.fx import Gain
94 | >>> su = sudio.Master()
95 | >>> rec = su.add('file.mp3')
96 | >>> rec.afx(Gain, gain_db=-30, start=2.7, stop=7)
97 | """
98 |
99 | gain = db2amp(gain_db)
100 | processed_data = np.tanh(data * gain)
101 | return processed_data
102 |
103 |
--------------------------------------------------------------------------------
/sudio/metadata/audio_record_database.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | # SUDIO - Audio Processing Platform
4 | # Copyright (C) 2024 Hossein Zahaki
5 |
6 | # This program is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU Lesser General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or
9 | # any later version.
10 |
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 |
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with this program. If not, see .
18 |
19 | # - GitHub: https://github.com/MrZahaki/sudio
20 |
21 |
22 | # This program is distributed in the hope that it will be useful,
23 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | # GNU Lesser General Public License for more details.
26 |
27 | # You should have received a copy of the GNU Lesser General Public License
28 | # along with this program. If not, see .
29 |
30 | # - GitHub: https://github.com/MrZahaki/sudio
31 |
32 |
33 |
34 | class AudioRecordDatabase:
35 | """
36 | A lightweight in-memory database for managing audio records.
37 |
38 | Provides dictionary-like access and manipulation of audio records
39 | with simple key (name) based storage and retrieval.
40 | """
41 |
42 | def __init__(self):
43 | """
44 | Initialize an empty audio record database.
45 |
46 | Creates an empty dictionary to store audio records with names as keys.
47 | """
48 | self.records = {}
49 |
50 | def add_record(self, record):
51 | """
52 | Add an audio record to the database.
53 |
54 | Args:
55 | -----
56 |
57 | record: An audio record with a 'name' attribute to be used as the key.
58 | """
59 | self.records[record.name] = record
60 |
61 | def get_record(self, name):
62 | """
63 | Retrieve a specific audio record by its name.
64 |
65 | Args:
66 | -----
67 |
68 | name (str): Name of the audio record to retrieve.
69 |
70 | Returns:
71 | --------
72 |
73 | The audio record if found, None otherwise.
74 | """
75 |
76 | return self.records.get(name)
77 |
78 | def remove_record(self, name):
79 | """
80 | Remove a record from the database by its name.
81 |
82 | Args:
83 | -----
84 |
85 | name (str): Name of the audio record to remove.
86 | """
87 | if name in self.records:
88 | del self.records[name]
89 |
90 | def index(self):
91 | """
92 | Get a list of all record names in the database.
93 |
94 | Returns:
95 | --------
96 |
97 | list: Names of all records currently in the database.
98 | """
99 | return list(self.records.keys())
100 |
101 | def __getitem__(self, name):
102 | """
103 | Allow dictionary-style access to records.
104 |
105 | Args:
106 | -----
107 |
108 | name (str): Name of the record to retrieve.
109 |
110 | Returns:
111 | --------
112 |
113 | The audio record corresponding to the name.
114 | """
115 | return self.get_record(name)
116 |
117 | def __setitem__(self, name, record):
118 | """
119 | Allow dictionary-style record addition.
120 |
121 | Args:
122 | -----
123 |
124 | - name (str): Name to associate with the record.
125 | - record: The audio record to store.
126 | """
127 | self.add_record(record)
--------------------------------------------------------------------------------
/sudio/utils/window.py:
--------------------------------------------------------------------------------
1 |
2 | # SUDIO - Audio Processing Platform
3 | # Copyright (C) 2024 Hossein Zahaki
4 |
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU Lesser General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or
8 | # any later version.
9 |
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU Lesser General Public License for more details.
14 |
15 | # You should have received a copy of the GNU Lesser General Public License
16 | # along with this program. If not, see .
17 |
18 | # - GitHub: https://github.com/MrZahaki/sudio
19 |
20 |
21 | # This program is distributed in the hope that it will be useful,
22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 | # GNU Lesser General Public License for more details.
25 |
26 | # You should have received a copy of the GNU Lesser General Public License
27 | # along with this program. If not, see .
28 |
29 | # - GitHub: https://github.com/MrZahaki/sudio
30 |
31 |
32 |
33 | import numpy as np
34 |
35 |
36 | def win_parser_mono(window, win_num):
37 | return window[win_num]
38 |
39 |
40 | def win_parser(window, win_num, nchannel):
41 | return window[nchannel][win_num]
42 |
43 |
44 | def single_channel_windowing(
45 | data:np.ndarray,
46 | windowing_buffer:list,
47 | window:np.ndarray,
48 | nhop:int,
49 | ):
50 |
51 | retval = np.vstack((windowing_buffer[1], np.hstack((windowing_buffer[1][nhop:],
52 | windowing_buffer[0][:nhop])))) * window
53 | windowing_buffer.pop()
54 | windowing_buffer.insert(0, data)
55 |
56 | return retval
57 |
58 |
59 | def multi_channel_windowing(
60 | data:np.ndarray,
61 | windowing_buffer:list,
62 | window:np.ndarray,
63 | nhop:int,
64 | nchannels:int,
65 | ):
66 |
67 | retval = []
68 | for i in range(nchannels):
69 | retval.append(np.vstack((windowing_buffer[i][1], np.hstack(
70 | (windowing_buffer[i][1][nhop:], windowing_buffer[i][0][:nhop])))) * window)
71 | windowing_buffer[i].pop()
72 | windowing_buffer[i].insert(0, data[i])
73 | return np.array(retval)
74 |
75 |
76 | def single_channel_overlap(
77 | data:np.ndarray,
78 | overlap_buffer:list,
79 | nhop:int,
80 | ):
81 |
82 | retval = np.hstack((overlap_buffer[0][nhop:], data[1][:nhop])) + data[0]
83 | overlap_buffer[0] = data[1]
84 | return retval
85 |
86 |
87 | def multi_channel_overlap(
88 | data:np.ndarray,
89 | overlap_buffer:list,
90 | nhop:int,
91 | nchannels:int,
92 | ):
93 | # data.shape =>(number of channels, number of windows(2), size of data chunk depend on primary_filter activity).
94 | # _overlap_buffer => [buffer 0, buffer1, buffer(number of channels)]
95 | # for 2 channel data must be an 2[two ], 2, self._data_chunk(e.g. 256)
96 | # pre post, data,
97 | # 2 window per frame
98 |
99 | # retval = np.hstack((data[n-1][nhop:], current_win[n+1][:nhop])) + current_win[n]
100 | # data[n-1] = current_win[n+1]
101 | retval = np.hstack((overlap_buffer[0][nhop:], win_parser(data, 1, 0)[:nhop])) + \
102 | win_parser(data, 0, 0)
103 | overlap_buffer[0] = win_parser(data, 1, 0)
104 |
105 | for i in range(1, nchannels):
106 | tmp = np.hstack((overlap_buffer[i][nhop:], win_parser(data, 1, i)[:nhop])) + \
107 | win_parser(data, 0, i)
108 | retval = np.vstack((retval, tmp))
109 | overlap_buffer[i] = win_parser(data, 1, i)
110 |
111 | return retval
112 |
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/sudio/utils/channel.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | # SUDIO - Audio Processing Platform
4 | # Copyright (C) 2024 Hossein Zahaki
5 |
6 | # This program is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU Lesser General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or
9 | # any later version.
10 |
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 |
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with this program. If not, see .
18 |
19 | # - GitHub: https://github.com/MrZahaki/sudio
20 |
21 |
22 | # This program is distributed in the hope that it will be useful,
23 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | # GNU Lesser General Public License for more details.
26 |
27 | # You should have received a copy of the GNU Lesser General Public License
28 | # along with this program. If not, see .
29 |
30 | # - GitHub: https://github.com/MrZahaki/sudio
31 |
32 |
33 |
34 | import numpy as np
35 |
36 |
37 | def shuffle3d_channels(arr):
38 | """
39 | Shuffles the channels of a 3D array and returns a flattened result.
40 |
41 | Parameters:
42 | - arr (numpy.ndarray): Input 3D array of shape (frames, channels, samples_per_frame)
43 |
44 | Returns:
45 | - numpy.ndarray: Flattened array with interleaved channels.
46 | """
47 | frames, channels, samples_per_frame = arr.shape
48 | # Reshape to (frames * samples_per_frame, channels)
49 | reshaped = arr.transpose(0, 2, 1).reshape(-1, channels)
50 | # Interleave channels and flatten
51 | return reshaped.flatten()
52 |
53 |
54 | # @Mem.master.add
55 | def shuffle2d_channels(arr):
56 | """
57 | Shuffles the channels of a 2D array and returns a flattened result.
58 |
59 | Parameters:
60 | - arr (numpy.ndarray): Input 2D array of shape (m, n), where m and n are dimensions.
61 |
62 | Returns:
63 | - numpy.ndarray: Flattened array with shuffled channels.
64 | """
65 | return arr.T.reshape(-1)
66 |
67 |
68 | def get_mute_mode_data(nchannel, nperseg):
69 | if nchannel < 2:
70 | return np.zeros((nperseg), 'f')
71 | else:
72 | return np.zeros((nchannel, nperseg), 'f')
73 |
74 |
75 | def map_channels(in_data:np.ndarray, in_channels, out_channels):
76 | """
77 | Map input audio channels to desired output channels.
78 |
79 | Args:
80 | in_data (np.ndarray): Input audio data.
81 | in_channels (int): Number of input channels.
82 | out_channels (int): Number of desired output channels.
83 | data_chunk (int): Size of data chunk for processing.
84 |
85 | Returns:
86 | np.ndarray: Processed audio data with desired number of channels.
87 | """
88 |
89 | if in_channels == 1:
90 | output = np.expand_dims(in_data, 0)
91 | else:
92 | # Reshape multi-channel data
93 | output = in_data.reshape(-1, in_channels).T
94 |
95 | # Upmixing
96 | if in_channels < out_channels:
97 | # Duplicate last channel for additional output channels
98 | output = np.vstack((output, np.tile(output[-1], (out_channels - in_channels, 1))))
99 |
100 | # Downmixing
101 | elif in_channels > out_channels:
102 | output = np.mean(output, axis=0, keepdims=True)
103 | output = np.tile(output[-1], (in_channels - out_channels, 1))
104 |
105 | # if in_channels == 2 and out_channels == 1:
106 | # # Stereo to mono
107 | # output = np.mean(output, axis=0, keepdims=True)
108 | # else:
109 | # # General downmixing (average channels)
110 | # output = output[:out_channels]
111 |
112 | return output
--------------------------------------------------------------------------------
/docs/rateshift.rst:
--------------------------------------------------------------------------------
1 | RateShift Module
2 | ================
3 |
4 |
5 | .. raw:: html
6 |
7 |
8 |
16 |
17 |
18 |
19 | The `sudio.rateshift` module provides Python bindings for high-quality audio resampling using various interpolation techniques. This module allows users to resample audio data, either through simple function calls or using classes that handle more complex use cases.
20 |
21 | Module Contents
22 | ---------------
23 |
24 | Enums
25 | -----
26 |
27 | >>> sudio.rateshift.ConverterType
28 |
29 | An enum representing different types of audio resampling converters.
30 |
31 | - ``sinc_best``: Highest quality sinc-based resampler.
32 | - ``sinc_medium``: Medium quality sinc-based resampler.
33 | - ``sinc_fastest``: Fastest sinc-based resampler.
34 | - ``zero_order_hold``: Zero-order hold (nearest-neighbor interpolation).
35 | - ``linear``: Linear interpolation resampler.
36 |
37 | Classes
38 | -------
39 |
40 | >>> sudio.rateshift.Resampler
41 |
42 | A class that provides functionality for resampling audio data.
43 |
44 | **Methods**:
45 | - ``__init__(converter_type, channels=1)``: Initializes the `Resampler` object.
46 |
47 | **Arguments**:
48 | - ``converter_type`` (ConverterType): Type of resampler to use.
49 | - ``channels`` (int, optional): Number of channels. Defaults to 1.
50 |
51 | - ``process(input, sr_ratio, end_of_input=False)``: Processes audio data through the resampler.
52 |
53 | **Arguments**:
54 | - ``input`` (numpy.ndarray): Input audio data as a NumPy array.
55 | - ``sr_ratio`` (float): Ratio of output sample rate to input sample rate.
56 | - ``end_of_input`` (bool, optional): Whether this is the final batch of input data. Defaults to False.
57 |
58 | **Returns**:
59 | - numpy.ndarray: Resampled audio data.
60 |
61 | - ``set_ratio(ratio)``: Sets a new resampling ratio.
62 |
63 | **Arguments**:
64 | - ``ratio`` (float): New resampling ratio.
65 |
66 | - ``reset()``: Resets the internal state of the resampler.
67 |
68 | >>> sudio.rateshift.CallbackResampler
69 |
70 | A class that resamples audio data using a callback function to provide input data.
71 |
72 | **Methods**:
73 | - ``__init__(callback, ratio, converter_type, channels)``: Initializes the `CallbackResampler` object.
74 |
75 | **Arguments**:
76 | - ``callback`` (callable): Function that supplies input audio data.
77 | - ``ratio`` (float): Initial resampling ratio.
78 | - ``converter_type`` (ConverterType): Type of resampler to use.
79 | - ``channels`` (int): Number of channels in the audio.
80 |
81 | - ``read(frames)``: Reads and resamples the specified number of audio frames.
82 |
83 | **Arguments**:
84 | - ``frames`` (int): Number of frames to read.
85 |
86 | **Returns**:
87 | - numpy.ndarray: Resampled audio data.
88 |
89 | - ``set_starting_ratio(ratio)``: Sets a new starting ratio for resampling.
90 |
91 | **Arguments**:
92 | - ``ratio`` (float): New starting ratio for resampling.
93 |
94 | - ``reset()``: Resets the internal state of the resampler.
95 |
96 | Functions
97 | ---------
98 |
99 | >>> sudio.rateshift.resample(input, sr_ratio, converter_type, channels=1)
100 |
101 | Resamples audio data.
102 |
103 | This function provides a simplified interface for resampling audio data with specified parameters.
104 |
105 | **Arguments**:
106 | - ``input`` (numpy.ndarray): Input audio data as a NumPy array.
107 | - ``sr_ratio`` (float): Ratio of output sample rate to input sample rate.
108 | - ``converter_type`` (ConverterType): Type of resampling converter to use.
109 | - ``channels`` (int, optional): Number of channels in the audio. Defaults to 1.
110 |
111 | **Returns**:
112 | - numpy.ndarray: Resampled audio data.
113 |
114 |
--------------------------------------------------------------------------------
/sudio/audiosys/sync.py:
--------------------------------------------------------------------------------
1 |
2 | # SUDIO - Audio Processing Platform
3 | # Copyright (C) 2024 Hossein Zahaki
4 |
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU Lesser General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or
8 | # any later version.
9 |
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU Lesser General Public License for more details.
14 |
15 | # You should have received a copy of the GNU Lesser General Public License
16 | # along with this program. If not, see .
17 |
18 | # - GitHub: https://github.com/MrZahaki/sudio
19 |
20 |
21 | # This program is distributed in the hope that it will be useful,
22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 | # GNU Lesser General Public License for more details.
25 |
26 | # You should have received a copy of the GNU Lesser General Public License
27 | # along with this program. If not, see .
28 |
29 | # - GitHub: https://github.com/MrZahaki/sudio
30 |
31 |
32 |
33 | import numpy as np
34 | from sudio.rateshift import ConverterType, resample
35 | from sudio.io import SampleFormat
36 | from sudio.io import get_sample_size
37 | from sudio.utils.channel import shuffle2d_channels
38 | from sudio.metadata import AudioMetadata
39 |
40 |
41 | def synchronize_audio(rec: AudioMetadata,
42 | nchannels: int,
43 | sample_rate: int,
44 | sample_format_id: int,
45 | output_data='byte') -> dict:
46 | """
47 | Synchronizes and transforms audio recording parameters.
48 |
49 | Modifies audio recording by adjusting channels, resampling,
50 | and converting sample format.
51 |
52 | Args:
53 | -----
54 |
55 | - rec: Audio recording metadata
56 | - nchannels: Desired number of audio channels
57 | - sample_rate: Target sample rate
58 | - sample_format_id: Desired audio sample format
59 | - output_data: Output data format ('byte' or 'ndarray')
60 |
61 | Returns:
62 | --------
63 |
64 | - Modified audio recording metadata
65 | """
66 |
67 | form = get_sample_size(rec.sampleFormat)
68 | if rec.sampleFormat == SampleFormat.FLOAT32:
69 | form = ' rec.nchannels:
75 | data = np.vstack([data for i in range(nchannels)])
76 | rec.nchannels = nchannels
77 |
78 | else:
79 | # Safety update: Ensure all arrays have the same size
80 | channel_data = [data[i::rec.nchannels] for i in range(nchannels)]
81 | min_length = min(len(channel) for channel in channel_data)
82 | channel_data = [channel[:min_length] for channel in channel_data]
83 | data = np.array(channel_data)
84 |
85 | if not sample_rate == rec.frameRate:
86 | scale = sample_rate / rec.frameRate
87 | dtype = data.dtype
88 | data = data.astype(np.float32)
89 | data = resample(data, scale, ConverterType.sinc_fastest)
90 | data.astype(dtype)
91 |
92 | if output_data.startswith('b') and rec.nchannels > 1:
93 | data = shuffle2d_channels(data)
94 |
95 | rec.nchannels = nchannels
96 | rec.sampleFormat = sample_format_id
97 |
98 | form = get_sample_size(sample_format_id)
99 | if sample_format_id == SampleFormat.FLOAT32:
100 | form = '.
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 |
20 | # This program is distributed in the hope that it will be useful,
21 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 | # GNU Lesser General Public License for more details.
24 |
25 | # You should have received a copy of the GNU Lesser General Public License
26 | # along with this program. If not, see .
27 |
28 | # - GitHub: https://github.com/MrZahaki/sudio
29 |
30 | from typing import Union, List, Optional
31 | import numpy as np
32 | from sudio.process.fx import FX
33 | from sudio.io import SampleFormat
34 | from sudio.process.fx._channel_mixer import channel_mixer
35 |
36 |
37 | class ChannelMixer(FX):
38 | def __init__(
39 | self, *args,
40 | **kwargs) -> None:
41 | """
42 | initialize the ChannelMixer audio effect processor.
43 |
44 | Parameters:
45 | -----------
46 | *args : Variable positional arguments
47 | Arguments to be passed to the parent FX class initializer.
48 | **kwargs : dict, optional
49 | Additional keyword arguments for configuration.
50 |
51 | """
52 | features = {
53 | 'streaming_feature': True,
54 | 'offline_feature': True,
55 | 'preferred_datatype': SampleFormat.FLOAT32
56 | }
57 | super().__init__(*args, **kwargs, **features)
58 |
59 | def process(
60 | self,
61 | data: np.ndarray,
62 | correlation: Optional[Union[List[List[float]], np.ndarray]] = None,
63 | **kwargs
64 | ) -> np.ndarray:
65 | """
66 | apply channel mixing to the input audio signal based on a correlation matrix.
67 |
68 | manipulates multi-channel audio by applying inter-channel correlation
69 | transformations while preserving signal characteristics.
70 |
71 | Parameters:
72 | -----------
73 | data : numpy.ndarray
74 | Input multi-channel audio data. Must have at least 2 dimensions.
75 | Shape expected to be (num_channels, num_samples).
76 |
77 | correlation : Union[List[List[float]], numpy.ndarray], optional
78 | Correlation matrix defining inter-channel relationships.
79 | - If None, returns input data unchanged
80 | - Must be a square matrix matching number of input channels
81 | - Values must be between -1 and 1
82 | - Matrix shape: (num_channels, num_channels)
83 |
84 | **kwargs : dict, optional
85 | Additional processing parameters (currently unused).
86 |
87 | Returns:
88 | --------
89 | numpy.ndarray
90 | Channel-mixed audio data with the same shape as input.
91 |
92 | Raises:
93 | -------
94 | ValueError
95 | - If input data has fewer than 2 channels
96 | - If correlation matrix is incorrectly shaped
97 | - If correlation matrix contains values outside [-1, 1]
98 |
99 | Examples:
100 | ---------
101 | >>> from sudio.process.fx import ChannelMixer
102 | >>> su = sudio.Master()
103 | >>> rec = su.add('file.mp3')
104 | >>> newrec = rec.afx(ChannelMixer, correlation=[[.4,-.6], [0,1]]) #for two channel
105 | """
106 |
107 | if data.ndim < 2:
108 | raise ValueError("Input data must be multichannel")
109 |
110 | nchannels = self._nchannels
111 |
112 | if correlation is None:
113 | return data
114 | else:
115 | correlation = np.asarray(correlation, dtype=np.float32)
116 |
117 | if (correlation.shape != (nchannels, nchannels) or
118 | not np.all((-1 <= correlation) & (correlation <= 1))):
119 | raise ValueError(f"Invalid correlation matrix: shape {correlation.shape}, expected {(nchannels, nchannels)}")
120 |
121 | return channel_mixer(data, correlation)
122 |
123 |
--------------------------------------------------------------------------------
/sudio/utils/timed_indexed_string.py:
--------------------------------------------------------------------------------
1 |
2 | # SUDIO - Audio Processing Platform
3 | # Copyright (C) 2024 Hossein Zahaki
4 |
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU Lesser General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or
8 | # any later version.
9 |
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU Lesser General Public License for more details.
14 |
15 | # You should have received a copy of the GNU Lesser General Public License
16 | # along with this program. If not, see .
17 |
18 | # - GitHub: https://github.com/MrZahaki/sudio
19 |
20 |
21 | # This program is distributed in the hope that it will be useful,
22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 | # GNU Lesser General Public License for more details.
25 |
26 | # You should have received a copy of the GNU Lesser General Public License
27 | # along with this program. If not, see .
28 |
29 | # - GitHub: https://github.com/MrZahaki/sudio
30 |
31 |
32 | from sudio.utils.strtool import generate_timestamp_name
33 |
34 |
35 | class TimedIndexedString:
36 | def __init__(self,
37 | string: str,
38 | start_from: int = None,
39 | first_index: int = 0,
40 | start_before: str = None,
41 | max_index: int = None,
42 | seed: str = ''):
43 | """
44 | A class for generating indexed strings with optional time-based suffixes.
45 |
46 | Args:
47 | base_string (str): The initial string.
48 | start_from (int, optional): Starting index. Defaults to None.
49 | first_index (int, optional): Initial index. Defaults to 0.
50 | start_before (str, optional): Character before which the index should start. Defaults to None.
51 | max_index (int, optional): Maximum index. Defaults to None.
52 | seed (str, optional): Seed for additional uniqueness. Defaults to ''.
53 | """
54 | if start_from is not None:
55 | index = start_from
56 | elif start_before is not None:
57 | index = string.index(start_before)
58 | else:
59 | raise ValueError("Either 'start_from' or 'start_before' must be provided.")
60 |
61 | self._count = first_index
62 | self._first_index = first_index
63 | self._max_index = max_index
64 | self._str: list = [(string[:index] + '_' + seed + '_') if seed and (seed not in string) else string[:index] + '_',
65 | '',
66 | string[index:]]
67 | self._current_str = ''
68 |
69 | def __call__(self, *args, timed_random: bool = False, timed_regular: bool = False, seed: str = ''):
70 | """
71 | Generates an indexed string.
72 |
73 | Args:
74 | *args: Additional string elements to append to the index.
75 | timed_random (bool, optional): Whether to add a timed random suffix. Defaults to False.
76 | timed_regular (bool, optional): Whether to add a timed regular suffix. Defaults to False.
77 | seed (str, optional): Seed for additional uniqueness. Defaults to ''.
78 |
79 | Returns:
80 | str: The generated indexed string.
81 |
82 | Raises:
83 | OverflowError: If the maximum index is reached.
84 |
85 | Example:
86 | >>> indexed_name = TimedIndexedString("example_", start_before="_", max_index=3)
87 | >>> indexed_name("suffix") # 'example_0_suffix'
88 | >>> indexed_name("extra", timed_regular=True) # 'example_1_extra_YYYYMMDD_SSMMM'
89 | """
90 | if self._max_index and self._count > self._max_index:
91 | raise OverflowError("Maximum index reached.")
92 |
93 | self._str[1] = str(self._count) + ''.join(args)
94 | self._count += 1
95 |
96 | if seed:
97 | self._str[0] += '_' + seed
98 |
99 | if timed_random:
100 | self._str[1] += generate_timestamp_name('_')
101 | elif timed_regular:
102 | self._str[1] += '_' + generate_timestamp_name()
103 |
104 | self._current_str = ''.join(self._str)
105 | return self._current_str
106 |
107 | def reset(self):
108 | """
109 | Resets the index counter to its initial value.
110 | """
111 | self._count = self._first_index
112 |
113 | def get(self):
114 | """
115 | Gets the current indexed string.
116 |
117 | Returns:
118 | str: The current indexed string.
119 | """
120 | return self._current_str
121 |
--------------------------------------------------------------------------------
/sudio/rateshift/src/python_bindings.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SUDIO - Audio Processing Platform
3 | * Copyright (C) 2024 Hossein Zahaki
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published
7 | * by the Free Software Foundation, either version 3 of the License, or
8 | * any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program. If not, see .
17 | *
18 | * - GitHub: https://github.com/MrZahaki/sudio
19 | */
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include "rateshift.hpp"
26 |
27 | namespace py = pybind11;
28 |
29 |
30 |
31 | PYBIND11_MODULE(_rateshift, m) {
32 |
33 | py::enum_(m, "ConverterType")
34 | .value("sinc_best", rateshift::ConverterType::sinc_best)
35 | .value("sinc_medium", rateshift::ConverterType::sinc_medium)
36 | .value("sinc_fastest", rateshift::ConverterType::sinc_fastest)
37 | .value("zero_order_hold", rateshift::ConverterType::zero_order_hold)
38 | .value("linear", rateshift::ConverterType::linear)
39 | .export_values();
40 |
41 | py::class_(m, "Resampler")
42 | .def(py::init(),
43 | py::arg("converter_type"), py::arg("channels") = 1
44 | )
45 | .def("process", [](rateshift::Resampler& self, py::array_t input, double sr_ratio, bool end_of_input = false) {
46 | py::buffer_info buf = input.request();
47 | std::vector input_vec(static_cast(buf.ptr), static_cast(buf.ptr) + buf.size);
48 | std::vector output = self.process(input_vec, sr_ratio, end_of_input);
49 | return py::array_t(output.size(), output.data());
50 | }, py::arg("input"), py::arg("sr_ratio"), py::arg("end_of_input") = false
51 | )
52 | .def("set_ratio", &rateshift::Resampler::set_ratio, py::arg("ratio"),
53 | "Set a new resampling ratio.")
54 | .def("reset", &rateshift::Resampler::reset,
55 | "Reset the internal state of the resampler.");
56 |
57 | py::class_(m, "CallbackResampler")
58 | .def(py::init(),
59 | py::arg("callback"), py::arg("ratio"), py::arg("converter_type"), py::arg("channels")
60 | )
61 | .def("read", [](rateshift::CallbackResampler& self, size_t frames) {
62 | std::vector output = self.read(frames);
63 | return py::array_t(output.size(), output.data());
64 | }, py::arg("frames"))
65 | .def("set_starting_ratio", &rateshift::CallbackResampler::set_starting_ratio, py::arg("ratio"),
66 | "Set a new starting ratio for the resampler.")
67 | .def("reset", &rateshift::CallbackResampler::reset,
68 | "Reset the internal state of the resampler.");
69 |
70 | m.def("resample", [](py::array_t input, double sr_ratio, rateshift::ConverterType converter_type) {
71 | py::buffer_info buf = input.request();
72 | size_t channels = (buf.ndim < 2) ? 1 : buf.shape[0];
73 | size_t samples = (buf.ndim < 2) ? buf.shape[0] : buf.shape[1];
74 |
75 | std::vector input_flat(buf.size);
76 |
77 | if(channels < 2){
78 | std::memcpy(input_flat.data(), buf.ptr, buf.size * sizeof(float));
79 | }
80 | else{
81 | for (size_t i = 0; i < channels; ++i) {
82 | for (size_t j = 0; j < samples; ++j) {
83 | input_flat[j * channels + i] = static_cast(buf.ptr)[i * samples + j];
84 | }
85 | }
86 | }
87 |
88 | std::vector output = rateshift::resample(input_flat, sr_ratio, converter_type, channels);
89 |
90 | if(channels > 1){
91 | size_t new_samples = output.size() / channels;
92 | std::vector output_reshaped(output.size());
93 |
94 | // Transpose back
95 | for (size_t i = 0; i < new_samples; ++i) {
96 | for (size_t j = 0; j < channels; ++j) {
97 | output_reshaped[j * new_samples + i] = output[i * channels + j];
98 | }
99 | }
100 |
101 | return py::array_t({channels, new_samples}, output_reshaped.data());
102 | }
103 | else{
104 | return py::array_t(output.size(), output.data());
105 | }
106 | },
107 | py::arg("input"), py::arg("sr_ratio"), py::arg("converter_type")
108 | );
109 | }
110 |
111 |
--------------------------------------------------------------------------------
/sudio/io/inc/stdstream.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SUDIO - Audio Processing Platform
3 | * Copyright (C) 2024 Hossein Zahaki
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published
7 | * by the Free Software Foundation, either version 3 of the License, or
8 | * any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program. If not, see .
17 | *
18 | * - GitHub: https://github.com/MrZahaki/sudio
19 | */
20 |
21 | #pragma once
22 |
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 |
31 |
32 | # ifdef PA_ENABLE_DEBUG_OUTPUT
33 | # undef PA_ENABLE_DEBUG_OUTPUT
34 | # endif
35 |
36 | namespace stdstream {
37 |
38 | // Base exception class for all audio-related errors
39 | class AudioException : public std::runtime_error {
40 | public:
41 | explicit AudioException(const std::string& message) : std::runtime_error(message) {}
42 | };
43 |
44 | // Initialization and setup errors
45 | class AudioInitException : public AudioException {
46 | public:
47 | explicit AudioInitException(const std::string& message) : AudioException(message) {}
48 | };
49 |
50 | // Device-related errors
51 | class DeviceException : public AudioException {
52 | public:
53 | explicit DeviceException(const std::string& message) : AudioException(message) {}
54 | };
55 |
56 | // Invalid parameter errors
57 | class InvalidParameterException : public AudioException {
58 | public:
59 | explicit InvalidParameterException(const std::string& message) : AudioException(message) {}
60 | };
61 |
62 | // Stream operation errors
63 | class StreamException : public AudioException {
64 | public:
65 | explicit StreamException(const std::string& message) : AudioException(message) {}
66 | };
67 |
68 | // Resource unavailable errors
69 | class ResourceException : public AudioException {
70 | public:
71 | explicit ResourceException(const std::string& message) : AudioException(message) {}
72 | };
73 |
74 |
75 | struct AudioDeviceInfo {
76 | int index;
77 | std::string name;
78 | int maxInputChannels;
79 | int maxOutputChannels;
80 | double defaultSampleRate;
81 | bool isDefaultInput;
82 | bool isDefaultOutput;
83 | };
84 |
85 | class AudioStream {
86 | public:
87 | AudioStream();
88 | ~AudioStream();
89 |
90 | using InputCallback = std::function;
91 | using OutputCallback = std::function;
92 |
93 | void open(int inputDeviceIndex = -1, int outputDeviceIndex = -1,
94 | double sampleRate = 0, PaSampleFormat format = paFloat32,
95 | int inputChannels = 0, int outputChannels = 0,
96 | unsigned long framesPerBuffer = paFramesPerBufferUnspecified,
97 | bool enableInput = true, bool enableOutput = true,
98 | PaStreamFlags streamFlags = paNoFlag,
99 | InputCallback inputCallback = nullptr,
100 | OutputCallback outputCallback = nullptr);
101 | void start();
102 | void stop();
103 | void close();
104 |
105 | std::vector getInputDevices();
106 | std::vector getOutputDevices();
107 | AudioDeviceInfo getDefaultInputDevice();
108 | AudioDeviceInfo getDefaultOutputDevice();
109 | int getDeviceCount();
110 | AudioDeviceInfo getDeviceInfoByIndex(int index);
111 |
112 | long readStream(uint8_t* buffer, unsigned long frames);
113 | long writeStream(const uint8_t* buffer, unsigned long frames);
114 | long getStreamReadAvailable();
115 | long getStreamWriteAvailable();
116 | int outputChannels;
117 | int inputChannels;
118 | PaSampleFormat streamFormat;
119 |
120 | private:
121 | PaStream* stream;
122 | std::atomic continueStreaming;
123 | InputCallback userInputCallback;
124 | OutputCallback userOutputCallback;
125 | static int paCallback(const void* inputBuffer, void* outputBuffer,
126 | unsigned long framesPerBuffer,
127 | const PaStreamCallbackTimeInfo* timeInfo,
128 | PaStreamCallbackFlags statusFlags,
129 | void* userData);
130 | int handleCallback(const void* inputBuffer, void* outputBuffer,
131 | unsigned long framesPerBuffer,
132 | const PaStreamCallbackTimeInfo* timeInfo,
133 | PaStreamCallbackFlags statusFlags);
134 | bool isBlockingMode;
135 | bool inputEnabled;
136 | bool outputEnabled;
137 | };
138 |
139 | void writeToDefaultOutput(
140 | const std::vector& data,
141 | PaSampleFormat sampleFormat,
142 | int channels,
143 | double sampleRate,
144 | int outputDeviceIndex = -1
145 | );
146 |
147 | } // namespace stdstream
--------------------------------------------------------------------------------
/sudio/rateshift/src/rateshift.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SUDIO - Audio Processing Platform
3 | * Copyright (C) 2024 Hossein Zahaki
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published
7 | * by the Free Software Foundation, either version 3 of the License, or
8 | * any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program. If not, see .
17 | *
18 | * - GitHub: https://github.com/MrZahaki/sudio
19 | */
20 |
21 | #include "rateshift.hpp"
22 | #include
23 | #include
24 |
25 | namespace rateshift {
26 |
27 | namespace {
28 | void error_handler(int errnum) {
29 | if (errnum != 0) {
30 | throw std::runtime_error(src_strerror(errnum));
31 | }
32 | }
33 |
34 | int convert_type(ConverterType type) {
35 | switch (type) {
36 | case ConverterType::sinc_best: return SRC_SINC_BEST_QUALITY;
37 | case ConverterType::sinc_medium: return SRC_SINC_MEDIUM_QUALITY;
38 | case ConverterType::sinc_fastest: return SRC_SINC_FASTEST;
39 | case ConverterType::zero_order_hold: return SRC_ZERO_ORDER_HOLD;
40 | case ConverterType::linear: return SRC_LINEAR;
41 | default: throw std::invalid_argument("Invalid converter type");
42 | }
43 | }
44 | }
45 |
46 | Resampler::Resampler(ConverterType converter_type, int channels)
47 | : _converter_type(convert_type(converter_type)), _channels(channels) {
48 | int error;
49 | _state = src_new(_converter_type, _channels, &error);
50 | error_handler(error);
51 | }
52 |
53 | Resampler::~Resampler() {
54 | if (_state) {
55 | src_delete(_state);
56 | }
57 | }
58 |
59 | std::vector Resampler::process(const std::vector& input, double sr_ratio, bool end_of_input) {
60 | size_t input_frames = input.size() / _channels;
61 | size_t output_frames = static_cast(std::ceil(input_frames * sr_ratio));
62 | std::vector output(output_frames * _channels);
63 |
64 | SRC_DATA src_data = {
65 | const_cast(input.data()),
66 | const_cast(output.data()),
67 | static_cast(input_frames),
68 | static_cast(output_frames),
69 | 0, 0,
70 | end_of_input ? 1 : 0,
71 | sr_ratio
72 | };
73 |
74 | error_handler(src_process(_state, &src_data));
75 |
76 | output.resize(src_data.output_frames_gen * _channels);
77 | return output;
78 | }
79 |
80 | void Resampler::set_ratio(double new_ratio) {
81 | error_handler(src_set_ratio(_state, new_ratio));
82 | }
83 |
84 | void Resampler::reset() {
85 | error_handler(src_reset(_state));
86 | }
87 |
88 | CallbackResampler::CallbackResampler(callback_t callback_func, double ratio, ConverterType converter_type, size_t channels)
89 | : _callback(std::move(callback_func)), _ratio(ratio), _converter_type(convert_type(converter_type)), _channels(channels) {
90 | int error;
91 | _state = src_callback_new(
92 | [](void* cb_data, float** data) -> long {
93 | auto* self = static_cast(cb_data);
94 | auto input = self->_callback();
95 | if (input.empty()) return 0;
96 | *data = input.data();
97 | return static_cast(input.size() / self->_channels);
98 | },
99 | _converter_type,
100 | static_cast(_channels),
101 | &error,
102 | this
103 | );
104 | error_handler(error);
105 | }
106 |
107 | CallbackResampler::~CallbackResampler() {
108 | if (_state) {
109 | src_delete(_state);
110 | }
111 | }
112 |
113 | std::vector CallbackResampler::read(size_t frames) {
114 | std::vector output(frames * _channels);
115 | long frames_read = src_callback_read(_state, _ratio, static_cast(frames), output.data());
116 |
117 | if (frames_read == 0) {
118 | error_handler(src_error(_state));
119 | }
120 |
121 | output.resize(frames_read * _channels);
122 | return output;
123 | }
124 |
125 | void CallbackResampler::set_starting_ratio(double new_ratio) {
126 | error_handler(src_set_ratio(_state, new_ratio));
127 | _ratio = new_ratio;
128 | }
129 |
130 | void CallbackResampler::reset() {
131 | error_handler(src_reset(_state));
132 | }
133 |
134 | std::vector resample(const std::vector& input, double sr_ratio, ConverterType converter_type, int channels) {
135 | if (input.size() % channels != 0) {
136 | throw std::invalid_argument("Input size must be divisible by number of channels");
137 | }
138 |
139 | size_t input_frames = input.size() / channels;
140 | size_t output_frames = static_cast(std::ceil(input_frames * sr_ratio));
141 | std::vector output(output_frames * channels);
142 |
143 | SRC_DATA src_data = {
144 | const_cast(input.data()), // Assuming ALREADY INTERLEAVED input
145 | static_cast(output.data()),
146 | static_cast(input_frames),
147 | static_cast(output_frames),
148 | 0, // Input frames used
149 | 0, // Output frames generated
150 | 0, // Input sample rate (0 = same as output)
151 | sr_ratio
152 | };
153 |
154 | error_handler(src_simple(&src_data, convert_type(converter_type), channels));
155 |
156 | output.resize(src_data.output_frames_gen * channels);
157 | return output;
158 | }
159 | }
--------------------------------------------------------------------------------
/sudio/process/fx/tempo.py:
--------------------------------------------------------------------------------
1 | # SUDIO - Audio Processing Platform
2 | # Copyright (C) 2024 Hossein Zahaki
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Lesser General Public License as published
6 | # by the Free Software Foundation, either version 3 of the License, or
7 | # any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Lesser General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Lesser General Public License
15 | # along with this program. If not, see .
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 |
20 | # This program is distributed in the hope that it will be useful,
21 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 | # GNU Lesser General Public License for more details.
24 |
25 | # You should have received a copy of the GNU Lesser General Public License
26 | # along with this program. If not, see .
27 |
28 | # - GitHub: https://github.com/MrZahaki/sudio
29 |
30 |
31 | from sudio.process.fx import FX
32 | from sudio.io import SampleFormat
33 | import numpy as np
34 | from typing import TYPE_CHECKING
35 | from sudio.process.fx._tempo import tempo_cy
36 |
37 | class Tempo(FX):
38 | def __init__(self, *args, **kwargs) -> None:
39 | """
40 | Initialize the Tempo audio effect processor for time stretching.
41 |
42 | Configures time stretching with support for both streaming and offline
43 | audio processing, optimized for 32-bit floating-point precision.
44 |
45 | Notes:
46 | ------
47 | Implements advanced time stretching using WSOLA (Waveform Similarity
48 | Overlap-Add) algorithm to modify audio tempo without altering pitch.
49 | """
50 | features = {
51 | 'streaming_feature': True,
52 | 'offline_feature': True,
53 | 'preferred_datatype': SampleFormat.FLOAT32
54 | }
55 | super().__init__(*args, **kwargs, **features)
56 |
57 | def process(self, data: np.ndarray, tempo:float=1.0, envelope:np.ndarray=[], **kwargs):
58 | """
59 | Perform time stretching on the input audio data without altering pitch.
60 |
61 | This method allows tempo modification through uniform or dynamic tempo changes,
62 | utilizing an advanced Waveform Similarity Overlap-Add (WSOLA) algorithm to
63 | manipulate audio duration while preserving sound quality and spectral characteristics.
64 |
65 | Parameters:
66 | -----------
67 | data : np.ndarray
68 | Input audio data as a NumPy array. Supports mono and multi-channel audio.
69 | Recommended data type is float32.
70 |
71 | tempo : float, optional
72 | Tempo scaling factor for time stretching.
73 | - 1.0 means no change in tempo/duration
74 | - < 1.0 slows down audio (increases duration)
75 | - > 1.0 speeds up audio (decreases duration)
76 | Default is 1.0.
77 |
78 | Examples:
79 | - 0.5: doubles audio duration
80 | - 2.0: halves audio duration
81 |
82 | envelope : np.ndarray, optional
83 | Dynamic tempo envelope for time-varying tempo modifications.
84 | Allows non-uniform tempo changes across the audio signal.
85 | Default is an empty list (uniform tempo modification).
86 |
87 | Example:
88 | - A varying array of tempo ratios can create complex time-stretching effects
89 |
90 | **kwargs : dict
91 | Additional keyword arguments passed to the underlying tempo algorithm.
92 | Allows fine-tuning of advanced parameters such as:
93 | - sequence_ms: Sequence length for time-stretching window
94 | - seekwindow_ms: Search window for finding similar waveforms
95 | - overlap_ms: Crossfade overlap between segments
96 | - enable_spline: Enable spline interpolation for envelope
97 | - spline_sigma: Gaussian smoothing parameter for envelope
98 |
99 | Returns:
100 | --------
101 | np.ndarray
102 | Time-stretched audio data with the same number of channels and original data type
103 | as the input.
104 |
105 | Examples:
106 | ---------
107 | >>> slow_audio = tempo_processor.process(audio_data, tempo=0.5) # Slow down audio
108 | >>> fast_audio = tempo_processor.process(audio_data, tempo=1.5) # Speed up audio
109 | >>> dynamic_tempo = tempo_processor.process(audio_data, envelope=[0.5, 1.0, 2.0]) # Dynamic tempo
110 |
111 | Notes:
112 | ------
113 | - Preserves audio quality with minimal artifacts
114 | - Uses advanced WSOLA algorithm for smooth time stretching
115 | - Supports both uniform and dynamic tempo modifications
116 | - Computationally efficient implementation
117 | - Does not change the pitch of the audio
118 |
119 | Warnings:
120 | ---------
121 | - Extreme tempo modifications (very low or high values) may introduce
122 | audible artifacts or sound distortions
123 | - Performance and quality may vary depending on audio complexity
124 | """
125 |
126 | dtype = data.dtype
127 | data = tempo_cy(
128 | data,
129 | np.asarray(envelope, dtype=np.double),
130 | self._sample_rate,
131 | default_tempo=tempo,
132 | **kwargs
133 | )
134 | return data.astype(dtype)
135 |
136 |
--------------------------------------------------------------------------------
/sudio/process/fx/pitch_shifter.py:
--------------------------------------------------------------------------------
1 | # SUDIO - Audio Processing Platform
2 | # Copyright (C) 2024 Hossein Zahaki
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Lesser General Public License as published
6 | # by the Free Software Foundation, either version 3 of the License, or
7 | # any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Lesser General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Lesser General Public License
15 | # along with this program. If not, see .
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 |
20 | # This program is distributed in the hope that it will be useful,
21 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 | # GNU Lesser General Public License for more details.
24 |
25 | # You should have received a copy of the GNU Lesser General Public License
26 | # along with this program. If not, see .
27 |
28 | # - GitHub: https://github.com/MrZahaki/sudio
29 |
30 | from sudio.process.fx import FX
31 | from sudio.process.fx._pitch_shifter import pitch_shifter_cy
32 | from sudio.io import SampleFormat
33 | import numpy as np
34 |
35 | class PitchShifter(FX):
36 | def __init__(self, *args, **kwargs) -> None:
37 | """
38 | Initialize the PitchShifter audio effect processor.
39 |
40 | This method configures the PitchShifter effect with specific processing features,
41 | setting up support for both streaming and offline audio processing.
42 | """
43 |
44 | features = {
45 | 'streaming_feature': True,
46 | 'offline_feature': True,
47 | 'preferred_datatype': SampleFormat.FLOAT32
48 | }
49 | super().__init__(*args, **kwargs, **features)
50 |
51 | def process(
52 | self,
53 | data: np.ndarray,
54 | semitones:np.float32=0.0,
55 | cent:np.float32=0.0,
56 | ratio:np.float32=1.0,
57 | envelope:np.ndarray=[],
58 | **kwargs
59 | ):
60 |
61 | """
62 | Perform pitch shifting on the input audio data.
63 |
64 | This method allows pitch modification through multiple parametrization approaches:
65 | 1. Semitone and cent-based pitch shifting
66 | 2. Direct ratio-based pitch shifting
67 | 3. Envelope-based dynamic pitch shifting
68 |
69 | Parameters:
70 | -----------
71 | data : np.ndarray
72 | Input audio data as a NumPy array. Supports mono and multi-channel audio.
73 | Recommended data type is float32.
74 |
75 | semitones : np.float32, optional
76 | Number of semitones to shift the pitch.
77 | Positive values increase pitch, negative values decrease pitch.
78 | Default is 0.0 (no change).
79 |
80 | Example:
81 | - 12.0 shifts up one octave
82 | - -12.0 shifts down one octave
83 |
84 | cent : np.float32, optional
85 | Fine-tuning pitch adjustment in cents (1/100th of a semitone).
86 | Allows precise micro-tuning between semitones.
87 | Default is 0.0.
88 |
89 | Example:
90 | - 50.0 shifts up half a semitone
91 | - -25.0 shifts down a quarter semitone
92 |
93 | ratio : np.float32, optional
94 | Direct pitch ratio modifier.
95 | - 1.0 means no change
96 | - > 1.0 increases pitch
97 | - < 1.0 decreases pitch
98 | Default is 1.0.
99 |
100 | Note: When semitones or cents are used, this ratio is multiplicative.
101 |
102 | envelope : np.ndarray, optional
103 | Dynamic pitch envelope for time-varying pitch shifting.
104 | If provided, allows non-uniform pitch modifications across the audio.
105 | Default is an empty list (uniform pitch shifting).
106 |
107 | Example:
108 | - A varying array of ratios can create complex pitch modulations
109 |
110 | **kwargs : dict
111 | Additional keyword arguments passed to the underlying pitch shifting algorithm.
112 | Allows fine-tuning of advanced parameters like:
113 | - sample_rate: Audio sample rate
114 | - frame_length: Processing frame size
115 | - converter_type: Resampling algorithm
116 |
117 | Returns:
118 | --------
119 | np.ndarray
120 | Pitch-shifted audio data with the same number of channels as input.
121 |
122 | Examples:
123 | ---------
124 | >>> record = record.afx(PitchShifter, start=30, envelope=[1, 3, 1, 1]) # Dynamic pitch shift
125 | >>> record = record.afx(PitchShifter, semitones=4) # Shift up 4 semitones
126 |
127 | Notes:
128 | ------
129 | - Uses high-quality time-domain pitch shifting algorithm
130 | - Preserves audio quality with minimal artifacts
131 | - Supports both uniform and dynamic pitch modifications
132 | """
133 |
134 | # pitch ratio based on semitones and cents
135 | if semitones != 0.0 or cent != 0.0:
136 | pitch_ratio = pow(2.0, (semitones + cent / 100) / 12.0) * ratio
137 | else:
138 | pitch_ratio = ratio
139 |
140 | res = pitch_shifter_cy(
141 | data,
142 | np.asarray(envelope, dtype=np.double),
143 | sample_rate=self._sample_rate,
144 | ratio=pitch_ratio,
145 | **kwargs
146 | )
147 |
148 | return res
149 |
150 |
151 |
--------------------------------------------------------------------------------
/sudio/io/inc/codec.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | * SUDIO - Audio Processing Platform
3 | * Copyright (C) 2024 Hossein Zahaki
4 | *
5 | * This program is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published
7 | * by the Free Software Foundation, either version 3 of the License, or
8 | * any later version.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program. If not, see .
17 | *
18 | * - GitHub: https://github.com/MrZahaki/sudio
19 | */
20 |
21 | #pragma once
22 |
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 |
30 |
31 | extern "C" {
32 | #include
33 | }
34 |
35 | namespace suio {
36 |
37 | enum class FileFormat {
38 | UNKNOWN,
39 | WAV,
40 | FLAC,
41 | VORBIS,
42 | MP3
43 | };
44 |
45 | struct AudioFileInfo {
46 | std::string name;
47 | FileFormat fileFormat;
48 | uint32_t nchannels;
49 | uint32_t sampleRate;
50 | ma_format sampleFormat;
51 | uint64_t numFrames;
52 | float duration;
53 | };
54 |
55 | class AudioCodec {
56 | public:
57 | static std::vector decodeAudioFile(const std::string& filename,
58 | ma_format outputFormat = ma_format_s16,
59 | uint32_t nchannels = 2,
60 | uint32_t sampleRate = 44100,
61 | ma_dither_mode dither = ma_dither_mode_none);
62 |
63 | static std::vector decodeVorbisFile(const std::string& filename,
64 | ma_format format,
65 | uint32_t nchannels,
66 | uint32_t sampleRate);
67 |
68 | static uint64_t encodeWavFile(const std::string& filename,
69 | const std::vector& data,
70 | ma_format format,
71 | uint32_t nchannels,
72 | uint32_t sampleRate);
73 |
74 |
75 | static uint64_t encodeMP3File( const std::string& filename,
76 | const std::vector& data,
77 | ma_format format,
78 | uint32_t nchannels,
79 | uint32_t sampleRate,
80 | int bitrate,
81 | int quality
82 | );
83 |
84 | static std::vector encodeToMP3(
85 | const std::vector& data,
86 | ma_format format,
87 | uint32_t nchannels,
88 | uint32_t sampleRate,
89 | int bitrate,
90 | int quality
91 | );
92 |
93 | static uint64_t encodeFlacFile(const std::string& filename,
94 | const std::vector& data,
95 | ma_format format,
96 | uint32_t nchannels,
97 | uint32_t sampleRate,
98 | int compressionLevel
99 | );
100 |
101 | static uint64_t encodeVorbisFile(const std::string& filename,
102 | const std::vector& data,
103 | ma_format format,
104 | uint32_t nchannels,
105 | uint32_t sampleRate,
106 | float quality
107 | );
108 |
109 | static AudioFileInfo getFileInfo(const std::string& filename);
110 |
111 |
112 | static std::unique_ptr initializeDecoder(const std::string& filename,
113 | ma_format outputFormat,
114 | uint32_t nchannels,
115 | uint32_t sampleRate,
116 | ma_dither_mode dither);
117 |
118 | static std::vector readDecoderFrames(ma_decoder* decoder,
119 | uint64_t framesToRead);
120 |
121 | class AudioFileStream {
122 | public:
123 | AudioFileStream(const std::string& filename,
124 | ma_format outputFormat = ma_format_s16,
125 | uint32_t nchannels = 2,
126 | uint32_t sampleRate = 44100,
127 | uint64_t framesToRead = 1024,
128 | ma_dither_mode dither = ma_dither_mode_none,
129 | uint64_t seekFrame = 0);
130 |
131 | ~AudioFileStream();
132 |
133 | std::vector readFrames(uint64_t framesToRead = 0);
134 |
135 | private:
136 | std::unique_ptr m_decoder;
137 | uint64_t m_framesToRead;
138 | uint32_t m_nchannels;
139 | ma_format m_outputFormat;
140 | };
141 |
142 | static std::vector encodeToWav(
143 | const std::vector& data,
144 | ma_format format,
145 | uint32_t nchannels,
146 | uint32_t sampleRate);
147 |
148 | };
149 |
150 | } // namespace suio
--------------------------------------------------------------------------------
/sudio/utils/strtool.py:
--------------------------------------------------------------------------------
1 |
2 | # SUDIO - Audio Processing Platform
3 | # Copyright (C) 2024 Hossein Zahaki
4 |
5 | # This program is free software: you can redistribute it and/or modify
6 | # it under the terms of the GNU Lesser General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or
8 | # any later version.
9 |
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | # GNU Lesser General Public License for more details.
14 |
15 | # You should have received a copy of the GNU Lesser General Public License
16 | # along with this program. If not, see .
17 |
18 | # - GitHub: https://github.com/MrZahaki/sudio
19 |
20 |
21 | # This program is distributed in the hope that it will be useful,
22 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 | # GNU Lesser General Public License for more details.
25 |
26 | # You should have received a copy of the GNU Lesser General Public License
27 | # along with this program. If not, see .
28 |
29 | # - GitHub: https://github.com/MrZahaki/sudio
30 |
31 |
32 | import time
33 |
34 | def convert_string_to_python_type(st):
35 | """
36 | Converts the input string 'st' to a more appropriate Python data type based on its content.
37 |
38 | Args:
39 | st (str): The input string to be converted.
40 |
41 | Returns:
42 | Union[int, float, dict, list, str]: The converted value. If 'st' represents a numeric value, it
43 | is converted to an integer or float. If 'st' contains curly braces, it is converted to a dictionary.
44 | If 'st' contains square brackets, it is converted to a list. Otherwise, the original string is returned.
45 |
46 | Example:
47 | >>> convert_string_to_python_type("42")
48 | 42
49 | >>> convert_string_to_python_type("3.14")
50 | 3.14
51 | >>> convert_string_to_python_type("{ 'key': 'value' }")
52 | {'key': 'value'}
53 | >>> convert_string_to_python_type("[1, 2, 3]")
54 | [1, 2, 3]
55 | >>> convert_string_to_python_type("Hello World")
56 | 'Hello World'
57 | """
58 | if st.isnumeric():
59 | st = int(st)
60 | elif st.replace('.', '', 1).isnumeric():
61 | st = float(st)
62 | elif '{' in st and '}' in st:
63 | st = st.strip('{').strip('}')
64 | st = parse_dictionary_string(st)
65 | elif '[' in st and ']' in st:
66 | st = st.strip('[').strip(']')
67 | st = parse_list_string(st)
68 | return st
69 |
70 |
71 | def parse_dictionary_string(st, dict_eq=':', item_sep=','):
72 | """
73 | Converts a string 'st' representing a dictionary-like structure to a Python dictionary.
74 |
75 | Args:
76 | st (str): The input string representing the dictionary.
77 | dict_eq (str, optional): The string used as the key-value separator. Defaults to ':'.
78 | item_sep (str, optional): The string used as the item separator. Defaults to ','.
79 |
80 | Returns:
81 | dict: The converted dictionary.
82 |
83 | Example:
84 | >>> parse_dictionary_string("{key1:value1, key2:value2}")
85 | {'key1': 'value1', 'key2': 'value2'}
86 | >>> parse_dictionary_string("name:John, age:30, city:New York", dict_eq=':', item_sep=', ')
87 | {'name': 'John', 'age': 30, 'city': 'New York'}
88 | >>> parse_dictionary_string("a=1; b=2; c=3", dict_eq='=', item_sep='; ')
89 | {'a': 1, 'b': 2, 'c': 3}
90 | """
91 | if not st:
92 | return None
93 | buff = {}
94 | i = [i.strip().split(dict_eq) for i in st.split(item_sep)]
95 | for j in i:
96 | if j:
97 | value = j[1] if len(j) > 1 else None
98 | if value:
99 | value = convert_string_to_python_type(value)
100 | buff[j[0]] = value
101 | return buff
102 |
103 |
104 | def parse_list_string(st):
105 | """
106 | Converts a string 'st' representing a list-like structure to a Python list.
107 |
108 | Args:
109 | st (str): The input string representing the list.
110 |
111 | Returns:
112 | list: The converted list.
113 |
114 | Example:
115 | >>> parse_list_string("[1, 2, 3, 4, 5]")
116 | [1, 2, 3, 4, 5]
117 | >>> parse_list_string("apple, orange, banana, mango")
118 | ['apple', 'orange', 'banana', 'mango']
119 | >>> parse_list_string("3.14, 2.71, 1.618", convert_string_to_python_type=float)
120 | [3.14, 2.71, 1.618]
121 | """
122 | if not st:
123 | return None
124 | buff = []
125 | i = [i.strip() for i in st.split(',')]
126 | for value in i:
127 | if value:
128 | value = convert_string_to_python_type(value)
129 | buff.append(value)
130 | return buff
131 |
132 |
133 |
134 | def generate_timestamp_name(seed=None):
135 | """
136 | Generates a timestamp-based name.
137 |
138 | Args:
139 | seed (str, optional): Seed value for additional uniqueness. Defaults to None.
140 |
141 | Returns:
142 | str: The generated timestamp-based name.
143 |
144 | Example:
145 | >>> generate_timestamp_name()
146 | 'YYYYMMDD_SSMMM'
147 | >>> generate_timestamp_name("custom_seed")
148 | 'custom_seedDD_SSMMM'
149 |
150 | Note:
151 | - 'YYYY' represents the year.
152 | - 'MM' represents the month.
153 | - 'DD' represents the day.
154 | - 'SS' represents the second.
155 | - 'MMM' represents the millisecond.
156 | - The seed, if provided, is appended to the beginning of the name.
157 | """
158 | current_time = time.gmtime()
159 | if seed:
160 | name = str(seed) + str(current_time.tm_mday) + str(current_time.tm_sec) + str(time.time() % 1)[2:5]
161 | else:
162 | name = (str(current_time.tm_year) + str(current_time.tm_mon) +
163 | str(current_time.tm_mday) + '_' + str(current_time.tm_sec) + str(time.time() % 1)[2:5])
164 | return name
165 |
166 |
167 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | # Sudio 🎵
9 |
10 | [](https://badge.fury.io/py/sudio)
11 | [](https://www.pepy.tech/projects/sudio)
12 | [](https://mrzahaki.github.io/sudio/)
13 | [](https://github.com/mrzahaki/sudio/actions/workflows/python-package.yml)
14 | [](https://pypi.org/project/sudio/)
15 | [](https://shields.io/)
16 | [](https://www.gnu.org/licenses/lgpl-3.0)
17 | [](https://colab.research.google.com/github/mrzahaki/sudio/blob/Master/docs/_static/sudio.ipynb)
18 |
19 |
20 | use case of audio processing and manipulation, providing set of tools for working with digital audio files. supports operations like time-domain slicing, frequency filtering, audio mixing, streaming, and effect application across various audio formats, making complex audio engineering tasks accessible through a pythonic interface.
21 |
22 |
23 | ## 🚀 Quick Start
24 |
25 | ### Installation
26 |
27 | install Sudio using pip:
28 |
29 | ```bash
30 | pip install sudio==1.0.10
31 | ```
32 |
33 | ### Basic Usage
34 |
35 | an example to get you started with sudio:
36 |
37 | ```python
38 | import sudio
39 | from sudio.process.fx import (
40 | PitchShifter,
41 | Tempo,
42 | ChannelMixer,
43 | FadeEnvelope,
44 | FadePreset
45 | )
46 | su = sudio.Master()
47 |
48 | song = su.add('./Farhad Jahangiri - 5 Sobh (320).mp3')
49 |
50 | cool_remix = (
51 | song[:40]
52 | .afx(
53 | PitchShifter,
54 | semitones=-3
55 | ).afx(
56 | PitchShifter,
57 | start=2,
58 | duration=0.8,
59 | envelope=[0.8, 2, 1]
60 | ).afx(
61 | PitchShifter,
62 | start=10,
63 | duration=0.8,
64 | envelope=[0.65, 3, 1]
65 | ).afx(
66 | PitchShifter,
67 | start=20,
68 | duration=0.8,
69 | envelope=[2, 0.7, 1]
70 | ).afx(
71 | PitchShifter,
72 | start=30,
73 | duration=4,
74 | envelope=[1, 3, 1, 1]
75 | ).afx(
76 | Tempo,
77 | envelope=[1, 0.95, 1.2, 1]
78 | ).afx(
79 | FadeEnvelope,
80 | start=0,
81 | stop=10,
82 | preset=FadePreset.SMOOTH_FADE_IN
83 | )
84 | )
85 |
86 | side_slide = (
87 | song[:10].afx(
88 | ChannelMixer,
89 | correlation=[[0.4, -0.6], [0, 1]]
90 | ).afx(
91 | FadeEnvelope,
92 | preset=FadePreset.SMOOTH_FADE_OUT
93 | )
94 | )
95 |
96 | cool_remix = side_slide + cool_remix
97 |
98 | # simple 4 band EQ
99 | cool_remix = cool_remix[
100 | : '200': 'order=6, scale=0.7',
101 | '200':'800':'scale=0.5',
102 | '1000':'4000':'scale=0.4',
103 | '4000'::'scale=0.6'
104 | ]
105 |
106 | su.export(
107 | cool_remix,
108 | 'remix.mp3',
109 | quality=.8,
110 | bitrate=256
111 | )
112 |
113 | su.echo(cool_remix)
114 | ```
115 |
116 | #### Remix
117 | [Listen to the remix](https://raw.githubusercontent.com/mrzahaki/sudio/Master/docs/_static/remix.mp3)
118 |
119 | #### Original
120 | [Listen to the main track](https://raw.githubusercontent.com/mrzahaki/sudio/Master/docs/_static/main.mp3)
121 |
122 |
123 |
124 |
125 | it used effects like PitchShifter, which allows dynamic pitch alterations through static semitone shifts and dynamic pitch envelopes, Tempo for seamless time-stretching without pitch distortion, ChannelMixer to rebalance and spatialize audio channels, and FadeEnvelope for nuanced amplitude shaping. The remix workflow shows the library's flexibility by applying multiple pitch-shifting effects with varying start times and envelopes, dynamically adjusting tempo, adding a smooth fade-in, creating a side-slide effect through channel mixing, and scaling different remix sections. By chaining these effects together you can craft complex audio transformations, enabling audio remixing with just a few lines of code.
126 |
127 |
128 |
129 |
130 | ### Explore Sudio
131 |
132 | Get started with `Sudio` processing in minutes using [Google Colab](https://colab.research.google.com/github/mrzahaki/sudio/blob/Master/docs/_static/sudio.ipynb)!
133 |
134 |
135 | ## 🎹 Key Features
136 |
137 | Handles both real-time streaming and offline processing, allowing for dynamic applications like live audio effects as well as batch processing of audio files.
138 |
139 | Allows integration of custom processing modules.
140 |
141 | Flexible audio playback, precise time-domain slicing, and Comprehensive filtering options
142 |
143 | Advanced audio manipulation (joining, mixing, shifting)
144 |
145 | Real-time audio streaming with dynamic control (pause, resume, jump)
146 |
147 | Custom audio processing pipelines for complex effects
148 |
149 | Multi-format support with quality-controlled encoding/decoding
150 |
151 |
152 | ## 📚 Documentation
153 |
154 | for detailed documentation and examples, visit the [Sudio Documentation](https://mrzahaki.github.io/sudio/).
155 |
156 |
157 | ## 💖 [Support Sudio](https://donate.webmoney.com/w/OTmd0tU8H4gRUG4eXokeQb)
158 |
159 | **Audio software shouldn’t lock you in.** Sudio offers free, open-source tools for developers and creators—built to replace proprietary apps without the fees or restrictions. To stay viable, it relies on community support: code contributions, testing, donations, or simply spreading the word. If open tools matter to you, let’s keep improving them.
160 |
161 | ## 📄 License
162 |
163 | released under the GNU LESSER GENERAL PUBLIC LICENSE Version 3. See the [LICENSE](https://github.com/mrzahaki/sudio/blob/Master/LICENSE) file for details.
164 |
--------------------------------------------------------------------------------
/sudio/rateshift/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # SUDIO - Audio Processing Platform
2 | # Copyright (C) 2024 Hossein Zahaki
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Lesser General Public License as published
6 | # by the Free Software Foundation, either version 3 of the License, or
7 | # any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Lesser General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Lesser General Public License
15 | # along with this program. If not, see .
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 | cmake_minimum_required(VERSION 3.20)
20 | project(python_rateshift LANGUAGES CXX)
21 |
22 | set(CMAKE_CXX_STANDARD 14)
23 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
24 | set(CMAKE_CXX_EXTENSIONS OFF)
25 | set(CMAKE_EXPORT_COMPILE_COMMANDS OFF)
26 | set(CMAKE_VERBOSE_MAKEFILE OFF)
27 | set(CMAKE_SUPPRESS_REGENERATION ON)
28 | set_property(GLOBAL PROPERTY RULE_MESSAGES OFF)
29 | set(CMAKE_RULE_MESSAGES OFF)
30 | set(CMAKE_TEST_OUTPUT OFF)
31 | set(CMAKE_INSTALL_MESSAGE NEVER)
32 |
33 |
34 | enable_language(C)
35 | enable_language(CXX)
36 |
37 |
38 | if (POLICY CMP0094)
39 | cmake_policy(SET CMP0094 NEW)
40 | endif()
41 |
42 | if (NOT DEFINED Python_FIND_REGISTRY)
43 | set(Python_FIND_REGISTRY "LAST")
44 | endif()
45 | if (NOT DEFINED Python_FIND_FRAMEWORK)
46 | set(Python_FIND_FRAMEWORK "LAST")
47 | endif()
48 |
49 | function(apply_target_compile_options target)
50 | if(MSVC)
51 | target_compile_options(${target} PRIVATE
52 | /W0
53 | /EHsc
54 | /MP
55 | /bigobj
56 | )
57 | else()
58 | target_compile_options(${target} PRIVATE
59 | -O3
60 | -fPIC
61 | )
62 | endif()
63 | endfunction()
64 |
65 | find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
66 |
67 | message(STATUS "Python_EXECUTABLE: ${Python_EXECUTABLE}")
68 | message(STATUS "Python_INCLUDE_DIRS: ${Python_INCLUDE_DIRS}")
69 | message(STATUS "Python_LIBRARIES: ${Python_LIBRARIES}")
70 |
71 | list(PREPEND CMAKE_PREFIX_PATH ${Python_PREFIX})
72 |
73 |
74 | if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.cache/pybind11")
75 | include(FetchContent)
76 | message(STATUS "Fetching pybind11...")
77 | FetchContent_Declare(
78 | pybind11
79 | GIT_REPOSITORY https://github.com/pybind/pybind11
80 | GIT_TAG v2.13.6
81 | )
82 | FetchContent_MakeAvailable(pybind11)
83 | else()
84 | message(STATUS "Using existing pybind11 in ${CMAKE_CURRENT_SOURCE_DIR}/.cache/pybind11")
85 | add_subdirectory(".cache/pybind11")
86 | endif()
87 |
88 |
89 | set(LIBSAMPLERATE_VERSION 0.2.2)
90 | set(BUILD_TESTING OFF CACHE BOOL "Disable libsamplerate test build")
91 |
92 |
93 | if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libsamplerate")
94 | include(FetchContent)
95 | message(STATUS "Fetching libsamplerate...")
96 | FetchContent_Declare(
97 | libsamplerate
98 | GIT_REPOSITORY https://github.com/libsndfile/libsamplerate
99 | GIT_TAG c96f5e3de9c4488f4e6c97f59f5245f22fda22f7 # 0.2.2
100 | )
101 | FetchContent_MakeAvailable(libsamplerate)
102 | else()
103 | message(STATUS "Using existing libsamplerate in ${CMAKE_CURRENT_SOURCE_DIR}/libsamplerate")
104 | add_subdirectory(libsamplerate)
105 | endif()
106 |
107 | set_target_properties(samplerate PROPERTIES POSITION_INDEPENDENT_CODE ON)
108 |
109 | pybind11_add_module(python_rateshift
110 | src/python_bindings.cpp
111 | src/rateshift.cpp
112 | )
113 |
114 | target_include_directories(python_rateshift
115 | PRIVATE
116 | ${CMAKE_CURRENT_SOURCE_DIR}/external/libsamplerate/include
117 | ${CMAKE_CURRENT_SOURCE_DIR}/inc
118 | ${Python_INCLUDE_DIRS}
119 | )
120 |
121 |
122 | if(MSVC)
123 | add_compile_options(
124 | /W0
125 | /experimental:external
126 | /external:anglebrackets
127 | /external:W0
128 | /EHsc
129 | /MP
130 | /bigobj
131 | )
132 |
133 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO")
134 |
135 | else()
136 | if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
137 | add_compile_options(
138 | -w
139 | -Wno-everything
140 | -O3
141 | -fPIC
142 | )
143 | elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
144 | add_compile_options(
145 | -w
146 | -Wno-all
147 | -O3
148 | -fPIC
149 | )
150 | endif()
151 | endif()
152 |
153 | set(CMAKE_WARN_DEPRECATED OFF CACHE BOOL "Suppress deprecation warnings" FORCE)
154 | set(CMAKE_SUPPRESS_DEPRECATION_MESSAGES ON CACHE BOOL "Suppress deprecated declarations" FORCE)
155 |
156 | apply_target_compile_options(python_rateshift)
157 |
158 | if(DEFINED PACKAGE_VERSION_INFO)
159 | add_definitions(-DPACKAGE_VERSION_INFO="${PACKAGE_VERSION_INFO}")
160 | endif()
161 |
162 | target_compile_definitions(python_rateshift
163 | PUBLIC
164 | LIBSAMPLERATE_VERSION="${LIBSAMPLERATE_VERSION}"
165 | PRIVATE
166 | $<$:VERSION_INFO="${PACKAGE_VERSION_INFO}">
167 | )
168 |
169 | set_target_properties(python_rateshift
170 | PROPERTIES
171 | PREFIX ""
172 | OUTPUT_NAME "_rateshift"
173 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
174 | RUNTIME_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
175 | LINKER_LANGUAGE CXX
176 | )
177 |
178 | target_link_libraries(python_rateshift
179 | PRIVATE
180 | samplerate
181 | ${Python_LIBRARIES}
182 | )
183 |
184 | include(GNUInstallDirs)
185 | install(TARGETS python_rateshift
186 | COMPONENT python
187 | RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
188 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
189 | ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
190 | )
191 |
192 |
--------------------------------------------------------------------------------
/sudio/process/fx/fx.py:
--------------------------------------------------------------------------------
1 | # SUDIO - Audio Processing Platform
2 | # Copyright (C) 2024 Hossein Zahaki
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Lesser General Public License as published
6 | # by the Free Software Foundation, either version 3 of the License, or
7 | # any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Lesser General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Lesser General Public License
15 | # along with this program. If not, see .
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 |
20 | # This program is distributed in the hope that it will be useful,
21 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 | # GNU Lesser General Public License for more details.
24 |
25 | # You should have received a copy of the GNU Lesser General Public License
26 | # along with this program. If not, see .
27 |
28 | # - GitHub: https://github.com/MrZahaki/sudio
29 |
30 |
31 | from sudio.io import SampleFormat
32 |
33 | class FX:
34 | def __init__(
35 | self,
36 | *args,
37 | data_size: int = None,
38 | sample_rate: int = None,
39 | nchannels: int = None,
40 | sample_format: SampleFormat = SampleFormat.UNKNOWN,
41 | data_nperseg:int=None,
42 | sample_type: str = '',
43 | sample_width: int = None,
44 | streaming_feature: bool = True,
45 | offline_feature: bool = True,
46 | preferred_datatype: SampleFormat = SampleFormat.UNKNOWN,
47 | **kwargs,
48 | ) -> None:
49 |
50 | """
51 | Initialize the base Effects (FX) processor with audio configuration and processing features.
52 |
53 | This method sets up the fundamental parameters and capabilities for audio signal processing,
54 | providing a flexible foundation for various audio effects and transformations.
55 |
56 | Parameters:
57 | -----------
58 | data_size : int, optional
59 | Total size of the audio data in samples. Helps in memory allocation and processing planning.
60 |
61 | sample_rate : int, optional
62 | Number of audio samples processed per second. Critical for time-based effects and analysis.
63 |
64 | nchannels : int, optional
65 | Number of audio channels (mono, stereo, etc.). Determines multi-channel processing strategies.
66 |
67 | sample_format : SampleFormat, optional
68 | Represents the audio data's numeric representation and precision.
69 | Defaults to UNKNOWN if not specified.
70 |
71 | data_nperseg : int, optional
72 | Number of samples per segment, useful for segmented audio processing techniques.
73 |
74 | sample_type : str, optional
75 | Additional type information about the audio samples.
76 |
77 | sample_width : int, optional
78 | Bit depth or bytes per sample, influencing audio resolution and dynamic range.
79 |
80 | streaming_feature : bool, default True
81 | Indicates if the effect supports real-time, streaming audio processing.
82 |
83 | offline_feature : bool, default True
84 | Determines if the effect can process entire audio files or large datasets.
85 |
86 | preferred_datatype : SampleFormat, optional
87 | Suggested sample format for optimal processing. Defaults to UNKNOWN.
88 |
89 | Notes:
90 | ------
91 | This base class provides a standardized interface for audio effect processors,
92 | enabling consistent configuration and feature detection across different effects.
93 | """
94 | self._streaming_feature = streaming_feature
95 | self._offline_feature = offline_feature
96 | self._preferred_datatype = preferred_datatype
97 | self._data_size = data_size
98 | self._sample_rate = sample_rate
99 | self._nchannels = nchannels
100 | self._sample_format = sample_format
101 | self._data_nperseg = data_nperseg
102 | self._sample_type = sample_type
103 | self._sample_width = sample_width
104 |
105 | def is_streaming_supported(self) -> bool:
106 | """
107 | Determine if audio streaming is supported for this effect.
108 | """
109 | return self._streaming_feature
110 |
111 | def is_offline_supported(self) -> bool:
112 | """
113 | Check if file/batch audio processing is supported.
114 | """
115 | return self._offline_feature
116 |
117 | def get_preferred_datatype(self)->SampleFormat:
118 | """
119 | Retrieve the recommended sample format for optimal processing.
120 | """
121 | return self._preferred_datatype
122 |
123 |
124 | def get_data_size(self)->int:
125 | """
126 | Get the total size of audio data in samples.
127 | """
128 | return self._data_size
129 |
130 | def get_sample_rate(self)->int:
131 | """
132 | Retrieve the audio sampling rate.
133 | """
134 | return self._sample_rate
135 |
136 | def get_nchannels(self)->int:
137 | """
138 | Get the number of audio channels.
139 | """
140 | return self._nchannels
141 |
142 | def get_sample_format(self)->SampleFormat:
143 | """
144 | Retrieve the audio sample format.
145 | """
146 | return self._sample_format
147 |
148 | def get_sample_type(self)->str:
149 | """
150 | Get additional sample type information.
151 | """
152 | return self._sample_type
153 |
154 | def get_sample_width(self):
155 | """
156 | Retrieve the bit depth or bytes per sample.
157 | """
158 | return self._sample_width
159 |
160 | def process(*args, **kwargs):
161 | """
162 | Base method for audio signal processing.
163 |
164 | This method should be implemented by specific effect classes
165 | to define their unique audio transformation logic.
166 | """
167 | ...
168 |
169 |
--------------------------------------------------------------------------------
/sudio/process/fx/_pitch_shifter.pyx:
--------------------------------------------------------------------------------
1 | # cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True
2 |
3 | # SUDIO - Audio Processing Platform
4 | # Copyright (C) 2024 Hossein Zahaki
5 |
6 | # This program is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU Lesser General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or
9 | # any later version.
10 |
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 |
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with this program. If not, see .
18 |
19 | # - GitHub: https://github.com/MrZahaki/sudio
20 |
21 |
22 | # This program is distributed in the hope that it will be useful,
23 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | # GNU Lesser General Public License for more details.
26 |
27 | # You should have received a copy of the GNU Lesser General Public License
28 | # along with this program. If not, see .
29 |
30 | # - GitHub: https://github.com/MrZahaki/sudio
31 |
32 |
33 | import numpy as np
34 | cimport numpy as np
35 | cimport cython
36 | from sudio.process.fx._tempo cimport _tempo_cy
37 | from sudio.process.fx._fade_envelope cimport prepare_envelope
38 | from scipy.interpolate import interp1d
39 | from sudio.rateshift import ConverterType, resample, Resampler
40 |
41 | @cython.boundscheck(False)
42 | @cython.wraparound(False)
43 | cpdef np.ndarray pitch_shifter_cy(
44 | np.ndarray input_audio,
45 | np.ndarray[double, ndim=1] envelope,
46 | float ratio=1.0,
47 | int sample_rate=44100,
48 | bint enable_spline=True,
49 | float spline_sigma = 0.1,
50 | float fade_max_db = 0.0,
51 | float fade_min_db = -60.0,
52 | int envbuffer = 400,
53 | int envlen = 600,
54 | int frame_length = 40,
55 | int sequence_ms = 82,
56 | int seekwindow_ms = 28,
57 | int overlap_ms = 12,
58 | object converter_type=ConverterType.sinc_fastest
59 | ):
60 |
61 | if len(envelope) > 1:
62 | envelope = 1.0 / envelope
63 | envelope = prepare_envelope(
64 | envlen,
65 | envelope,
66 | enable_spline,
67 | spline_sigma,
68 | fade_max_db,
69 | fade_min_db,
70 | envbuffer
71 | )
72 | else:
73 | ratio = 1.0 / ratio
74 | envelope = np.full(envlen, ratio, dtype=np.float64)
75 |
76 |
77 | intp = interp1d(
78 | np.linspace(0, 1, len(envelope)),
79 | envelope
80 | )
81 |
82 | cdef np.ndarray tempo_res = _tempo_cy(
83 | input_audio,
84 | intp,
85 | sample_rate=sample_rate,
86 | sequence_ms=sequence_ms,
87 | seekwindow_ms=seekwindow_ms,
88 | overlap_ms=overlap_ms,
89 | )
90 |
91 | if tempo_res.ndim == 1:
92 | tempo_res = tempo_res[np.newaxis, :]
93 |
94 |
95 | cdef np.ndarray[np.float32_t, ndim=2] result
96 | cdef int nchannels = tempo_res.shape[0]
97 | cdef int samples = tempo_res.shape[1]
98 | cdef int data_chunk = (sample_rate * frame_length) // 1000
99 | cdef int total_steps = samples // data_chunk
100 | cdef int current_pos
101 | cdef float current_ratio
102 | cdef np.ndarray[np.float32_t, ndim=1] frame
103 | cdef np.ndarray[np.float32_t, ndim=2] resampled
104 |
105 |
106 | if len(envelope) > 1:
107 | result = np.zeros((nchannels, 0), dtype=np.float32)
108 | resampler = Resampler(converter_type, nchannels)
109 |
110 | for i in range(total_steps):
111 | current_pos = i * data_chunk
112 | is_last_chunk = current_pos + data_chunk >= samples
113 | current_ratio = float(intp(float(current_pos) / samples))
114 | resampler.set_ratio(current_ratio)
115 | frame = tempo_res[:, current_pos: current_pos + data_chunk].T.flatten()
116 | frame = resampler.process(frame, current_ratio, is_last_chunk)
117 | resampled = frame.reshape(len(frame) // nchannels, nchannels).T
118 | result = np.concatenate((result, resampled), axis=1)
119 |
120 | else:
121 | result = resample(
122 | tempo_res,
123 | ratio,
124 | converter_type
125 | )
126 |
127 | if nchannels == 1:
128 | result = result[0]
129 |
130 | return result
131 |
132 |
133 |
134 | @cython.boundscheck(False)
135 | @cython.wraparound(False)
136 | cpdef np.ndarray _pitch_shifter_cy(
137 | np.ndarray input_audio,
138 | object intp,
139 | int sample_rate=44100,
140 | int frame_length = 85,
141 | int sequence_ms = 82,
142 | int seekwindow_ms = 28,
143 | int overlap_ms = 12,
144 | object converter_type=ConverterType.sinc_fastest
145 | ):
146 |
147 | cdef np.ndarray tempo_res = _tempo_cy(
148 | input_audio,
149 | intp,
150 | sample_rate=sample_rate,
151 | sequence_ms=sequence_ms,
152 | seekwindow_ms=seekwindow_ms,
153 | overlap_ms=overlap_ms,
154 | )
155 |
156 | if tempo_res.ndim == 1:
157 | tempo_res = tempo_res[np.newaxis, :]
158 |
159 |
160 | cdef np.ndarray[np.float32_t, ndim=2] result
161 | cdef int nchannels = tempo_res.shape[0]
162 | cdef int samples = tempo_res.shape[1]
163 | cdef int data_chunk = (sample_rate * frame_length) // 1000
164 | cdef int total_steps = samples // data_chunk
165 | cdef int current_pos
166 | cdef float current_ratio
167 | cdef np.ndarray[np.float32_t, ndim=1] frame
168 | cdef np.ndarray[np.float32_t, ndim=2] resampled
169 |
170 |
171 | result = np.zeros((nchannels, 0), dtype=np.float32)
172 | resampler = Resampler(converter_type, nchannels)
173 |
174 | for i in range(total_steps):
175 | current_pos = i * data_chunk
176 | is_last_chunk = current_pos + data_chunk >= samples
177 | current_ratio = float(intp(float(current_pos) / samples))
178 | frame = tempo_res[:, current_pos: current_pos + data_chunk].T.flatten()
179 | frame = resampler.process(frame, current_ratio, is_last_chunk)
180 | resampled = frame.reshape(len(frame) // nchannels, nchannels).T
181 | result = np.concatenate((result, resampled), axis=1)
182 |
183 |
184 | if nchannels == 1:
185 | result = result[0]
186 |
187 | return result
188 |
189 |
--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 | on:
3 | workflow_dispatch:
4 | inputs:
5 | build_type:
6 | description: 'Type of build to run'
7 | required: true
8 | default: 'all'
9 | type: choice
10 | options:
11 | - all
12 | - wheels
13 | - sdist
14 | create_release:
15 | description: 'Create a new release'
16 | required: true
17 | type: boolean
18 | default: false
19 | upload_to_pypi:
20 | description: 'Upload to PyPI'
21 | required: true
22 | type: boolean
23 | default: false
24 |
25 | permissions:
26 | contents: read
27 |
28 | jobs:
29 | extract_version:
30 | runs-on: ubuntu-latest
31 | outputs:
32 | version: ${{ steps.get_version.outputs.VERSION }}
33 | permissions:
34 | contents: read
35 | steps:
36 | - uses: actions/checkout@v4
37 | - name: Set up Python
38 | uses: actions/setup-python@v5
39 | with:
40 | python-version: '3.12'
41 | - name: Install dependencies
42 | run: |
43 | python -m pip install --upgrade pip
44 | python -m pip install toml
45 | - name: Get version from pyproject.toml
46 | id: get_version
47 | run: |
48 | VERSION=$(python -c "
49 | import toml
50 | import sys
51 | try:
52 | data = toml.load('pyproject.toml')
53 | if 'tool' in data and 'poetry' in data['tool']:
54 | version = data['tool']['poetry']['version']
55 | elif 'project' in data:
56 | version = data['project']['version']
57 | else:
58 | raise KeyError('Unable to find version in pyproject.toml')
59 | print(version)
60 | except Exception as e:
61 | print(f'Error: {str(e)}', file=sys.stderr)
62 | sys.exit(1)
63 | ")
64 | echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
65 |
66 | build_wheels:
67 | name: Build wheel ${{ matrix.python }}-${{ matrix.buildplat[1] }}-${{ matrix.buildplat[2] }}
68 | runs-on: ${{ matrix.buildplat[0] }}
69 | permissions:
70 | contents: read
71 | if: github.event.inputs.build_type == 'all' || github.event.inputs.build_type == 'wheels'
72 | strategy:
73 | fail-fast: false
74 | matrix:
75 | buildplat:
76 | - [ubuntu-22.04, manylinux_x86_64, ""]
77 | - [ubuntu-22.04, musllinux_x86_64, ""]
78 | - [macos-13, macosx_x86_64, openblas]
79 | - [macos-13, macosx_x86_64, accelerate]
80 | - [macos-14, macosx_arm64, accelerate]
81 | - [windows-2019, win_amd64, ""]
82 | - [windows-2019, win32, ""]
83 | python: ["cp310", "cp311", "cp312", "pp310", "cp313", "cp313t"]
84 | exclude:
85 | - buildplat: [windows-2019, win32, ""]
86 | python: "pp310"
87 | - buildplat: [ ubuntu-22.04, musllinux_x86_64, "" ]
88 | python: "pp310"
89 | - buildplat: [ macos-14, macosx_arm64, accelerate ]
90 | python: "pp310"
91 | - buildplat: [ windows-2019, win_amd64, "" ]
92 | python: "cp313t"
93 | - buildplat: [ windows-2019, win32, "" ]
94 | python: "cp313t"
95 | - buildplat: [ macos-13, macosx_x86_64, openblas]
96 | python: "cp313t"
97 |
98 | steps:
99 | - uses: actions/checkout@v4
100 |
101 | - name: Setup MSVC (32-bit)
102 | if: ${{ matrix.buildplat[1] == 'win32' }}
103 | uses: bus1/cabuild/action/msdevshell@e22aba57d6e74891d059d66501b6b5aed8123c4d # v1
104 | with:
105 | architecture: 'x86'
106 |
107 | - name: Set up Python
108 | uses: actions/setup-python@v3
109 | with:
110 | python-version: '3.x'
111 |
112 |
113 | - name: Install cibuildwheel
114 | run: |
115 | python -m pip install --upgrade pip
116 | python -m pip install cibuildwheel==2.22.0
117 |
118 | - name: Build wheel
119 | uses: pypa/cibuildwheel@ee63bf16da6cddfb925f542f2c7b59ad50e93969 # 2.22
120 | env:
121 | CIBW_PRERELEASE_PYTHONS: True
122 | CIBW_FREE_THREADED_SUPPORT: True
123 | CIBW_BUILD: ${{ matrix.python }}-${{ matrix.buildplat[1] }}
124 |
125 | - name: install wheel
126 | shell: bash
127 | run: |
128 | python -m pip install ./wheelhouse/*-${{ matrix.python }}-${{ matrix.buildplat[1] }}-${{ matrix.buildplat[2] }}.whl
129 |
130 | - name: Test package import and basic functionality
131 | shell: python
132 | run: |
133 | import sudio
134 | from sudio import Master
135 |
136 | # Example basic tests
137 | print("Sudio version:", sudio.__version__)
138 |
139 | # Attempt to create a Master instance
140 | master = Master()
141 | print("Master instance created successfully")
142 |
143 | - uses: actions/upload-artifact@v3
144 | with:
145 | name: ${{ matrix.python }}-${{ matrix.buildplat[1] }}-${{ matrix.buildplat[2] }}
146 | path: ./wheelhouse/*.whl
147 |
148 | build_sdist:
149 | name: Build sdist
150 | runs-on: ubuntu-latest
151 | permissions:
152 | contents: read
153 | if: github.event.inputs.build_type == 'all' || github.event.inputs.build_type == 'sdist'
154 |
155 | steps:
156 | - uses: actions/checkout@v4
157 | - name: Set up Python
158 | uses: actions/setup-python@v3
159 | with:
160 | python-version: '3.11'
161 |
162 | - name: Build sdist
163 | run: |
164 | python -m pip install -U pip build
165 | python -m build --sdist -Csetup-args=-Dallow-noblas=true
166 |
167 | - name: Check README rendering for PyPI
168 | run: |
169 | python -m pip install twine
170 | twine check dist/*
171 |
172 | - uses: actions/upload-artifact@v3
173 | with:
174 | path: ./dist/*
175 |
176 | upload_pypi:
177 | needs: [build_wheels, build_sdist]
178 | runs-on: ubuntu-latest
179 | permissions:
180 | id-token: write
181 | if: github.event.inputs.upload_to_pypi == 'true'
182 | steps:
183 | - uses: actions/download-artifact@v3
184 | with:
185 | name: artifact
186 | path: dist
187 |
188 | - name: List Build contents
189 | run: ls -l dist
190 |
191 | - uses: pypa/gh-action-pypi-publish@release/v1
192 | with:
193 | user: __token__
194 | password: ${{ secrets.PYPI_TOKEN }}
195 | skip_existing: true
196 |
197 | create_release:
198 | needs: [extract_version, build_wheels, build_sdist, upload_pypi]
199 | runs-on: ubuntu-latest
200 | permissions:
201 | contents: write
202 | if: github.event.inputs.create_release == 'true'
203 | steps:
204 | - uses: actions/checkout@v4
205 | - name: Download all artifacts
206 | uses: actions/download-artifact@v3
207 | with:
208 | name: artifact
209 | path: dist
210 | - name: Create and push tag
211 | run: |
212 | git config user.name github-actions
213 | git config user.email github-actions@github.com
214 | git tag v${{ needs.extract_version.outputs.version }}
215 | git push origin v${{ needs.extract_version.outputs.version }}
216 | - name: Create Release
217 | id: create_release
218 | uses: softprops/action-gh-release@v1
219 | with:
220 | tag_name: v${{ needs.extract_version.outputs.version }}
221 | name: Release v${{ needs.extract_version.outputs.version }}
222 | draft: false
223 | prerelease: false
224 | generate_release_notes: true
225 | files: |
226 | dist/**/*.whl
227 | dist/**/*.tar.gz
228 | env:
229 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
230 |
231 |
--------------------------------------------------------------------------------
/sudio/process/fx/fade_envelope.py:
--------------------------------------------------------------------------------
1 | # SUDIO - Audio Processing Platform
2 | # Copyright (C) 2024 Hossein Zahaki
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Lesser General Public License as published
6 | # by the Free Software Foundation, either version 3 of the License, or
7 | # any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Lesser General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Lesser General Public License
15 | # along with this program. If not, see .
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 |
20 | # This program is distributed in the hope that it will be useful,
21 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 | # GNU Lesser General Public License for more details.
24 |
25 | # You should have received a copy of the GNU Lesser General Public License
26 | # along with this program. If not, see .
27 |
28 | # - GitHub: https://github.com/MrZahaki/sudio
29 |
30 |
31 | import numpy as np
32 | from sudio.process.fx import FX
33 | from sudio.io import SampleFormat
34 | from sudio.process.fx._fade_envelope import generate_envelope, prepare_envelope
35 | from sudio.process.fx._fade_envelope import FadePreset as FP
36 | from enum import Enum
37 | from typing import Union
38 |
39 | class FadePreset(Enum):
40 | SMOOTH_ENDS = FP.SMOOTH_ENDS
41 | BELL_CURVE = FP.BELL_CURVE
42 | KEEP_ATTACK_ONLY = FP.KEEP_ATTACK_ONLY
43 | LINEAR_FADE_IN = FP.LINEAR_FADE_IN
44 | LINEAR_FADE_OUT = FP.LINEAR_FADE_OUT
45 | PULSE = FP.PULSE
46 | REMOVE_ATTACK = FP.REMOVE_ATTACK
47 | SMOOTH_ATTACK = FP.SMOOTH_ATTACK
48 | SMOOTH_FADE_IN = FP.SMOOTH_FADE_IN
49 | SMOOTH_FADE_OUT = FP.SMOOTH_FADE_OUT
50 | SMOOTH_RELEASE = FP.SMOOTH_RELEASE
51 | TREMORS = FP.TREMORS
52 | ZIGZAG_CUT = FP.ZIGZAG_CUT
53 |
54 |
55 | class FadeEnvelope(FX):
56 | def __init__(self, *args, **kwargs) -> None:
57 | features = {
58 | 'streaming_feature': True,
59 | 'offline_feature': True,
60 | 'preferred_datatype': SampleFormat.FLOAT32
61 | }
62 | """
63 | Initialize the FadeEnvelope audio effect processor.
64 |
65 | This method configures the FadeEnvelope effect with specific processing features,
66 | setting up support for both streaming and offline audio processing.
67 |
68 | Parameters:
69 | -----------
70 | *args : Variable positional arguments
71 | Allows passing additional arguments to the parent FX class.
72 |
73 | **kwargs : Variable keyword arguments
74 | Additional configuration parameters for the effect.
75 |
76 | Features:
77 | ---------
78 | - Supports streaming audio processing
79 | - Supports offline audio processing
80 | - Prefers 32-bit floating-point audio format for high-precision dynamics manipulation
81 |
82 | Notes:
83 | ------
84 | The FadeEnvelope effect provides versatile amplitude shaping capabilities,
85 | enabling complex audio envelope transformations with minimal computational overhead.
86 | """
87 | super().__init__(*args, **kwargs, **features)
88 |
89 |
90 | def process(
91 | self,
92 | data: np.ndarray,
93 | preset: Union[FadePreset, np.ndarray] = FadePreset.SMOOTH_ENDS,
94 | **kwargs
95 | ) -> np.ndarray:
96 | """
97 | Shape your audio's dynamics with customizable envelope effects!
98 |
99 | This method allows you to apply various envelope shapes to your audio signal,
100 | transforming its amplitude characteristics with precision and creativity.
101 | Whether you want to smooth out transitions, create pulsing effects,
102 | or craft unique fade patterns, this method has you covered.
103 |
104 | Parameters:
105 | -----------
106 | data : numpy.ndarray
107 | Your input audio data. Can be a single channel or multi-channel array.
108 | The envelope will be applied across the last dimension of the array.
109 |
110 | preset : FadePreset or numpy.ndarray, optional
111 | Define how you want to shape your audio's amplitude:
112 |
113 | - If you choose a FadePreset (default: SMOOTH_ENDS):
114 | Select from predefined envelope shapes like smooth fades,
115 | bell curves, pulse effects, tremors, and more. Each preset
116 | offers a unique way to sculpt your sound.
117 |
118 | - If you provide a custom numpy array:
119 | Create your own bespoke envelope by passing in a custom amplitude array.
120 | This gives you ultimate flexibility in sound design.
121 |
122 | Additional keyword arguments (optional):
123 | ----------------------------------------
124 | Customize envelope generation with these powerful parameters:
125 |
126 | Envelope Generation Parameters:
127 | - enable_spline : bool
128 | Smoothen your envelope with spline interpolation. Great for creating
129 | more organic, natural-feeling transitions.
130 |
131 | - spline_sigma : float, default varies
132 | Control the smoothness of spline interpolation. Lower values create
133 | sharper transitions, higher values create more gradual blends.
134 |
135 | - fade_max_db : float, default 0.0
136 | Set the maximum amplitude in decibels. Useful for controlling peak loudness.
137 |
138 | - fade_max_min_db : float, default -60.0
139 | Define the minimum amplitude in decibels. Helps create subtle or dramatic fades.
140 |
141 | - fade_attack : float, optional
142 | Specify the proportion of the audio dedicated to the attack phase.
143 | Influences how quickly the sound reaches its peak volume.
144 |
145 | - fade_release : float, optional
146 | Set the proportion of the audio dedicated to the release phase.
147 | Controls how the sound tapers off.
148 |
149 | - buffer_size : int, default 400
150 | Adjust the internal buffer size for envelope generation.
151 |
152 | - sawtooth_freq : float, default 37.7
153 | For presets involving sawtooth wave modulation, control the frequency
154 | of the underlying oscillation.
155 |
156 | Returns:
157 | --------
158 | numpy.ndarray
159 | Your processed audio data with the envelope applied.
160 | Maintains the same shape and type as the input data.
161 |
162 | Examples:
163 | ---------
164 | >>> from sudio.process.fx import FadeEnvelope, FadePreset
165 | >>> su = sudio.Master()
166 | >>> rec = su.add('file.mp3')
167 | >>> rec.afx(FadeEnvelope, preset=FadePreset.PULSE, start=0, stop=10)
168 | """
169 | if isinstance(preset, (np.ndarray, list, tuple)):
170 |
171 | envelope = prepare_envelope(
172 | data.shape[-1],
173 | np.array(preset, dtype=np.double),
174 | **kwargs
175 | )
176 | elif isinstance(preset, FadePreset):
177 | envelope = generate_envelope(
178 | data.shape[-1],
179 | preset.value,
180 | **kwargs
181 | )
182 | else:
183 | raise TypeError('Invalid preset')
184 |
185 | processed_data = data * envelope
186 | return processed_data
187 |
188 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # SUDIO - Audio Processing Platform
2 | # Copyright (C) 2024 Hossein Zahaki
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Lesser General Public License as published
6 | # by the Free Software Foundation, either version 3 of the License, or
7 | # any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Lesser General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Lesser General Public License
15 | # along with this program. If not, see .
16 |
17 | # - GitHub: https://github.com/MrZahaki/sudio
18 |
19 |
20 | import os
21 | import subprocess
22 | import sys
23 | from pathlib import Path
24 |
25 | from setuptools import setup, find_packages, Extension
26 | from setuptools.command.build_ext import build_ext
27 | from Cython.Build import cythonize
28 | import numpy
29 | import shutil
30 |
31 |
32 | CMAKE_PLATFORMS = {
33 | "win32": "Win32",
34 | "win-amd64": "x64",
35 | "win-arm32": "ARM",
36 | "win-arm64": "ARM64",
37 | }
38 |
39 | class CMakeExtension(Extension):
40 | def __init__(self, name: str, sourcedir: str = "") -> None:
41 | super().__init__(name, sources=[])
42 | self.sourcedir = os.fspath(Path(sourcedir).resolve())
43 |
44 | class CythonExtension(Extension):
45 | pass
46 |
47 | class CustomBuildExt(build_ext):
48 |
49 | def build_extension(self, ext):
50 | if isinstance(ext, CMakeExtension):
51 | return self.build_cmake_extension(ext)
52 | elif isinstance(ext, CythonExtension) or isinstance(ext, Extension):
53 | return super().build_extension(ext)
54 | else:
55 | raise ValueError(f"Unknown extension type: {type(ext)}")
56 |
57 | def build_cmake_extension(self, ext):
58 | ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
59 | extdir = ext_fullpath.parent.resolve()
60 |
61 | debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
62 | cfg = "Debug" if debug else "Release"
63 |
64 | cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
65 |
66 | cmake_args = [
67 | f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
68 | f"-DPYTHON_EXECUTABLE={sys.executable}",
69 | f"-DCMAKE_BUILD_TYPE={cfg}",
70 | f"-DPACKAGE_VERSION_INFO={self.distribution.get_version()}",
71 | ]
72 | build_args = []
73 |
74 | if "CMAKE_ARGS" in os.environ:
75 | cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
76 |
77 | if self.compiler.compiler_type != "msvc":
78 | self._setup_unix_build(cmake_generator, cmake_args)
79 | else:
80 | self._setup_windows_build(cmake_generator, cmake_args, build_args, cfg, extdir)
81 |
82 | self._setup_cross_platform_args(build_args, cmake_args)
83 |
84 | build_temp = Path(self.build_temp) / ext.name
85 | build_temp.mkdir(parents=True, exist_ok=True)
86 |
87 | self._run_cmake_build(ext, cmake_args, build_args, build_temp)
88 |
89 | def _setup_unix_build(self, cmake_generator, cmake_args):
90 | if not cmake_generator or cmake_generator == "Ninja":
91 | try:
92 | import ninja
93 | ninja_executable_path = Path(ninja.BIN_DIR) / "ninja"
94 | cmake_args.extend([
95 | "-GNinja",
96 | f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
97 | ])
98 | except ImportError:
99 | pass
100 | cmake_args.extend(["-DCMAKE_POSITION_INDEPENDENT_CODE=ON", "-DCMAKE_BUILD_TYPE=Release"])
101 |
102 | def _setup_windows_build(self, cmake_generator, cmake_args, build_args, cfg, extdir):
103 | single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
104 | contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
105 |
106 | if not single_config and not contains_arch:
107 | cmake_args.append(f"-A{CMAKE_PLATFORMS[self.plat_name]}")
108 |
109 | if not single_config:
110 | cmake_args.append(f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}")
111 | build_args.extend(["--config", cfg])
112 |
113 | def _setup_cross_platform_args(self, build_args, cmake_args):
114 | if "universal2" in self.plat_name:
115 | cmake_args.append("-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64")
116 |
117 | if self.plat_name.startswith("macosx-") and "MACOSX_DEPLOYMENT_TARGET" not in os.environ:
118 | os.environ["MACOSX_DEPLOYMENT_TARGET"] = self.plat_name.split("-")[1]
119 |
120 | if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
121 | if hasattr(self, "parallel") and self.parallel:
122 | build_args.append(f"-j{self.parallel}")
123 |
124 | output_dir = Path(self.build_lib) / 'sudio'
125 | cmake_args.extend([
126 | f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={output_dir}",
127 | f"-DCMAKE_INSTALL_LIBDIR={output_dir}",
128 | f"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY={output_dir}"
129 | ])
130 |
131 | def _run_cmake_build(self, ext, cmake_args, build_args, build_temp):
132 | subprocess.run(
133 | ["cmake", ext.sourcedir, *cmake_args, "-Wno-dev", "--log-level", "NOTICE" ], cwd=build_temp, check=True
134 | )
135 | subprocess.run(
136 | ["cmake", "--build", ".", *build_args], cwd=build_temp, check=True
137 | )
138 |
139 | output_dir = Path(self.build_lib) / 'sudio'
140 | output_dir.mkdir(parents=True, exist_ok=True)
141 |
142 | for so_file in build_temp.glob('**/*.so'):
143 | dest = output_dir / so_file.name
144 | shutil.copy2(so_file, dest)
145 | print(f"Copied {so_file} to {dest}")
146 |
147 |
148 | if sys.platform.startswith('win'):
149 | extra_link_args = []
150 | else:
151 | extra_link_args = ['-lm']
152 |
153 |
154 | numpy_include = numpy.get_include()
155 |
156 |
157 | cython_extensions = [
158 | Extension(
159 | "sudio.process.fx._tempo",
160 | ["sudio/process/fx/_tempo.pyx"],
161 | include_dirs=[numpy_include],
162 | extra_link_args=extra_link_args,
163 | extra_compile_args=["-O3"],
164 | ),
165 | Extension(
166 | "sudio.process.fx._fade_envelope",
167 | ["sudio/process/fx/_fade_envelope.pyx"],
168 | extra_link_args=extra_link_args,
169 | include_dirs=[numpy_include],
170 | extra_compile_args=["-O3"],
171 | ),
172 | Extension(
173 | "sudio.process.fx._channel_mixer",
174 | ["sudio/process/fx/_channel_mixer.pyx"],
175 | extra_link_args=extra_link_args,
176 | include_dirs=[numpy_include],
177 | extra_compile_args=["-O3"],
178 | ),
179 | Extension(
180 | "sudio.process.fx._pitch_shifter",
181 | ["sudio/process/fx/_pitch_shifter.pyx"],
182 | extra_link_args=extra_link_args,
183 | include_dirs=[numpy_include],
184 | extra_compile_args=["-O3"],
185 | ),
186 | Extension(
187 | "sudio.utils.math",
188 | ["sudio/utils/math.pyx"],
189 | extra_link_args=extra_link_args,
190 | include_dirs=[numpy_include],
191 | extra_compile_args=['-O3'],
192 | language='c'
193 | )
194 | ]
195 |
196 | cmake_extensions = [
197 | CMakeExtension('sudio._rateshift', sourcedir='sudio/rateshift'),
198 | CMakeExtension('sudio._suio', sourcedir='sudio/io'),
199 | ]
200 |
201 | cythonized_extensions = cythonize(
202 | cython_extensions,
203 | compiler_directives={
204 | 'language_level': '3',
205 | 'boundscheck': False,
206 | 'wraparound': False,
207 | 'cdivision': True,
208 | 'nonecheck': False,
209 | }
210 | )
211 |
212 | setup(
213 | packages=find_packages(),
214 | package_dir={'': '.'},
215 | ext_modules=[
216 | *cmake_extensions,
217 | *cythonized_extensions,
218 | ],
219 | cmdclass={'build_ext': CustomBuildExt},
220 | zip_safe=False,
221 | package_data={
222 | "": ["*.pxd", "*.pyx"],
223 | },
224 | )
225 |
226 |
--------------------------------------------------------------------------------
/sudio/process/fx/_tempo.pyx:
--------------------------------------------------------------------------------
1 | # distutils: language=c++
2 | # cython: language_level=3
3 |
4 |
5 | # SUDIO - Audio Processing Platform
6 | # Copyright (C) 2024 Hossein Zahaki
7 |
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Lesser General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # any later version.
12 |
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Lesser General Public License for more details.
17 |
18 | # You should have received a copy of the GNU Lesser General Public License
19 | # along with this program. If not, see .
20 |
21 | # - GitHub: https://github.com/MrZahaki/sudio
22 |
23 |
24 | # This program is distributed in the hope that it will be useful,
25 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
26 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 | # GNU Lesser General Public License for more details.
28 |
29 | # You should have received a copy of the GNU Lesser General Public License
30 | # along with this program. If not, see .
31 |
32 | # - GitHub: https://github.com/MrZahaki/sudio
33 |
34 | # distutils: language=c++
35 | # cython: language_level=3
36 |
37 | import numpy as np
38 | cimport numpy as np
39 | cimport cython
40 | cimport sudio.process.fx._fade_envelope as fade_envelope
41 |
42 | from libc.math cimport sqrt
43 | from scipy.interpolate import interp1d
44 | from scipy.ndimage import gaussian_filter1d
45 | from sudio.utils.math cimport db2amp
46 |
47 | DEF DEFAULT_SEQUENCE_MS = 82
48 | DEF DEFAULT_SEEKWINDOW_MS = 28
49 | DEF DEFAULT_OVERLAP_MS = 12
50 |
51 |
52 | @cython.boundscheck(False)
53 | @cython.wraparound(False)
54 | cpdef np.ndarray tempo_cy(
55 | np.ndarray input_audio,
56 | np.ndarray[double, ndim=1] envelope,
57 | int sample_rate=44100,
58 | int sequence_ms=DEFAULT_SEQUENCE_MS,
59 | int seekwindow_ms=DEFAULT_SEEKWINDOW_MS,
60 | int overlap_ms=DEFAULT_OVERLAP_MS,
61 | bint enable_spline = False,
62 | double spline_sigma = 0.1,
63 | double fade_max_db = 0.0,
64 | double fade_min_db = -60.0,
65 | int envbuffer = 400,
66 | int envlen = 600,
67 | double default_tempo = 1.0
68 | ):
69 |
70 |
71 | input_audio = np.asarray(input_audio, dtype=np.float32)
72 | if input_audio.ndim == 1:
73 | input_audio = input_audio[np.newaxis, :]
74 |
75 | cdef:
76 | np.ndarray[double, ndim=1] tempo_values
77 |
78 |
79 | if len(envelope) > 1:
80 | tempo_values = fade_envelope.prepare_envelope(
81 | envlen,
82 | envelope,
83 | enable_spline,
84 | spline_sigma,
85 | fade_max_db,
86 | fade_min_db,
87 | envbuffer
88 | )
89 | else:
90 | tempo_values = np.full(envlen, default_tempo, dtype=np.float64)
91 |
92 | intp = interp1d(
93 | np.linspace(0, 1, len(tempo_values)),
94 | tempo_values
95 | )
96 |
97 | cdef np.ndarray result = _tempo_cy(
98 | input_audio,
99 | intp,
100 | sample_rate,
101 | sequence_ms,
102 | seekwindow_ms,
103 | overlap_ms
104 | )
105 |
106 | return result
107 |
108 |
109 |
110 |
111 | @cython.boundscheck(False)
112 | @cython.wraparound(False)
113 | cpdef np.ndarray _tempo_cy(
114 | np.ndarray input_audio,
115 | object intp,
116 | int sample_rate=44100,
117 | int sequence_ms=DEFAULT_SEQUENCE_MS,
118 | int seekwindow_ms=DEFAULT_SEEKWINDOW_MS,
119 | int overlap_ms=DEFAULT_OVERLAP_MS,
120 | ):
121 |
122 |
123 | input_audio = np.asarray(input_audio, dtype=np.float32)
124 | if input_audio.ndim == 1:
125 | input_audio = input_audio[np.newaxis, :]
126 |
127 | cdef:
128 | int channels = input_audio.shape[0]
129 | int frames = input_audio.shape[1]
130 | int overlap_length = (sample_rate * overlap_ms) // 1000
131 | int sequence_length = (sample_rate * sequence_ms) // 1000
132 | int seekwindow_length = (sample_rate * seekwindow_ms) // 1000
133 | int output_frames = frames * 2 # Generous initial allocation
134 | int input_pos = 0
135 | int output_pos = 0
136 | int best_offset
137 | double corr, best_corr
138 | int i, ch
139 | float scale1, scale2
140 | float[:] signal1_view
141 | float[:] signal2_view
142 | np.ndarray[np.float32_t, ndim=2] output_buffer = np.zeros((channels, output_frames), dtype=np.float32)
143 | np.ndarray[np.float32_t, ndim=2] mid_buffer = np.zeros((channels, overlap_length), dtype=np.float32)
144 |
145 | double current_tempo
146 | double skip_fract = 0.0
147 | int skip
148 | double nominal_skip
149 | double input_progress_ratio
150 |
151 |
152 | while input_pos + seekwindow_length < frames:
153 | input_progress_ratio = float(input_pos) / frames
154 | current_tempo = float(intp(input_progress_ratio))
155 |
156 | nominal_skip = current_tempo * (sequence_length - overlap_length)
157 | skip = int(skip_fract + nominal_skip + 0.5)
158 | skip_fract += nominal_skip - skip
159 |
160 | best_offset = 0
161 | best_corr = -1.0
162 | for i in range(seekwindow_length - overlap_length):
163 | signal1_view = input_audio[0, input_pos + i:input_pos + i + overlap_length]
164 | signal2_view = mid_buffer[0, :overlap_length]
165 | corr = calc_correlation(signal1_view, signal2_view)
166 | if corr > best_corr:
167 | best_corr = corr
168 | best_offset = i
169 |
170 | for ch in range(channels):
171 | for i in range(overlap_length):
172 | scale1 = float(i) / overlap_length
173 | scale2 = 1.0 - scale1
174 | output_buffer[ch, output_pos + i] = (
175 | input_audio[ch, input_pos + best_offset + i] * scale1 +
176 | mid_buffer[ch, i] * scale2
177 | )
178 |
179 | sequence_offset = input_pos + best_offset + overlap_length
180 | sequence_length_current = min(sequence_length - overlap_length,
181 | frames - sequence_offset)
182 |
183 | if sequence_length_current > 0:
184 | for ch in range(channels):
185 | output_buffer[ch, output_pos + overlap_length:
186 | output_pos + overlap_length + sequence_length_current] = \
187 | input_audio[ch, sequence_offset:
188 | sequence_offset + sequence_length_current]
189 |
190 | if sequence_offset + sequence_length_current - overlap_length < frames:
191 | for ch in range(channels):
192 | mid_buffer[ch, :] = input_audio[ch,
193 | sequence_offset + sequence_length_current - overlap_length:
194 | sequence_offset + sequence_length_current]
195 |
196 | input_pos += skip
197 | output_pos += sequence_length_current
198 |
199 | #trim output buffer mono/multi-channel
200 | result = output_buffer[:, :output_pos]
201 | if input_audio.shape[0] == 1:
202 | result = result[0]
203 |
204 | return result
205 |
206 |
207 | @cython.boundscheck(False)
208 | @cython.wraparound(False)
209 | cdef double calc_correlation(float[:] signal1, float[:] signal2) nogil:
210 | """Calculate normalized cross-correlation between two signals"""
211 | cdef:
212 | int length = signal1.shape[0]
213 | int i
214 | double corr = 0.0
215 | double norm1 = 0.0
216 | double norm2 = 0.0
217 |
218 | for i in range(length):
219 | corr += signal1[i] * signal2[i]
220 | norm1 += signal1[i] * signal1[i]
221 | norm2 += signal2[i] * signal2[i]
222 |
223 | if norm1 == 0.0 or norm2 == 0.0:
224 | return 0.0
225 |
226 | return corr / sqrt(norm1 * norm2)
227 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
--------------------------------------------------------------------------------
/sudio/stream/streamcontrol.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | # SUDIO - Audio Processing Platform
4 | # Copyright (C) 2024 Hossein Zahaki
5 |
6 | # This program is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU Lesser General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or
9 | # any later version.
10 |
11 | # This program is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 |
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with this program. If not, see .
18 |
19 | # - GitHub: https://github.com/MrZahaki/sudio
20 |
21 |
22 | # This program is distributed in the hope that it will be useful,
23 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 | # GNU Lesser General Public License for more details.
26 |
27 | # You should have received a copy of the GNU Lesser General Public License
28 | # along with this program. If not, see .
29 |
30 | # - GitHub: https://github.com/MrZahaki/sudio
31 |
32 |
33 | from sudio.stream.streamingtimecontroller import StreamingTimeController
34 |
35 |
36 | class StreamControl:
37 | """
38 | The StreamControl class is defined to control the mainstream audio playback for a special 'record'.
39 | """
40 |
41 | time = StreamingTimeController()
42 |
43 | def __init__(self, master, record, on_stop, loop_mode, stream_mode):
44 | '''
45 | Initialize the StreamControl class to manage the mainstream audio playback for a specific 'record'.
46 |
47 | Args:
48 |
49 | - master: An instance of the main audio processing class.
50 | - record: A dictionary containing information about the audio file to be streamed.
51 | - on_stop: Callback function to execute when audio playback stops.
52 | - loop_mode: Boolean indicating whether to enable loop mode.
53 | - stream_mode: Type of streaming operation (e.g., 'read', 'write').
54 |
55 | Attributes:
56 |
57 | - self._master: Reference to the main audio processing instance.
58 | - self._stream_type: Type of streaming operation.
59 | - self._stream_file: File object representing the audio file.
60 | - self._stream_file_size: Size of the audio file.
61 | - self._size_calculator: Lambda function to calculate the size of a data chunk.
62 | - self._stream_data_size: Size of the audio data to be streamed.
63 | - self._stream_data_pointer: Current position in the audio file.
64 | - self._stream_on_stop: Callback function for when audio playback stops.
65 | - self._stream_loop_mode: Boolean indicating whether loop mode is enabled.
66 | - self.duration: Duration of the audio file.
67 | - self._time_calculator: Lambda function to convert time to byte offset.
68 | - self._itime_calculator: Lambda function to convert byte offset to time.
69 |
70 | Raises:
71 |
72 | - PermissionError: If attempting to initialize while another file is streaming.
73 |
74 | Note:
75 |
76 | - This class facilitates the management of audio playback for a specific audio file, ensuring proper synchronization
77 | and handling of playback-related operations.
78 | '''
79 |
80 | self._master = master
81 | self._stream_type = stream_mode
82 | self._stream_file = record.o
83 | self._stream_file_size = record.size
84 | self._size_calculator = lambda: (self._master._data_chunk *
85 | self._master._nchannels *
86 | self._master._sample_width)
87 | self._stream_data_size = self._size_calculator()
88 | self._stream_data_pointer = self._stream_file.tell()
89 | self._stream_on_stop = on_stop
90 | self._stream_loop_mode = loop_mode
91 |
92 | self.duration = record.duration
93 | self._time_calculator = lambda t: int(self._master._sample_rate *
94 | self._master._nchannels *
95 | self._master._sample_width *
96 | t)
97 |
98 | self._itime_calculator = lambda byte: byte / (self._master._sample_rate *
99 | self._master._nchannels *
100 | self._master._sample_width)
101 |
102 | self._stream_busy_error = "Another file is currently streaming"
103 | self._stream_empty_error = "The stream is currently empty"
104 |
105 |
106 | def _ready(self):
107 | """
108 | Ensure the stream is ready for playback.
109 | """
110 | assert not self._is_streaming(), PermissionError(self._stream_busy_error)
111 |
112 | if not self.isready():
113 | self._master._exstream_mode.clear()
114 | if self._master._stream_file is not None:
115 | self._master._stream_file.seek(self._stream_data_pointer, 0)
116 |
117 | self._master._stream_type = self._stream_type
118 | self._master._stream_file = self._stream_file
119 | self._master._stream_data_size = self._stream_data_size
120 | self._master._stream_data_pointer = self._stream_data_pointer
121 | self._master._stream_on_stop = self._stream_on_stop
122 | self._master._stream_loop_mode = self._stream_loop_mode
123 |
124 | def isready(self):
125 | """
126 | Check if the stream is ready for playback.
127 |
128 | Returns:
129 | - bool: True if ready, False otherwise
130 | """
131 | return (self._master._stream_type == self._stream_type and
132 | self._stream_file == self._master._stream_file and
133 | self._stream_data_size == self._master._stream_data_size and
134 | self._stream_on_stop == self._master._stream_on_stop and
135 | self._stream_loop_mode == self._master._stream_loop_mode)
136 |
137 | def is_streaming(self):
138 | """
139 | Check if the stream is currently in the streaming state.
140 |
141 | Returns:
142 | - bool: True if streaming, False otherwise
143 | """
144 | if self.isready():
145 | return self._master._exstream_mode.is_set()
146 | else:
147 | return False
148 |
149 | def _is_streaming(self):
150 | """
151 | Check if the stream is currently in the streaming state (internal use).
152 |
153 | Returns:
154 | - bool: True if streaming, False otherwise
155 | """
156 | return self._master._exstream_mode.is_set()
157 |
158 | def start(self):
159 | """
160 | Start the audio playback stream.
161 | """
162 | self._ready()
163 | self._master._exstream_mode.set()
164 | self._master._main_stream_safe_release()
165 | self._master._main_stream.clear()
166 |
167 |
168 | def resume(self):
169 | """
170 | Resume the audio playback stream.
171 | """
172 | assert self.isready(), PermissionError(self._stream_busy_error)
173 | self._master._exstream_mode.set()
174 | self._master._main_stream.clear()
175 |
176 | def stop(self):
177 | """
178 | Stop the audio playback stream.
179 | """
180 | assert self.isready(), PermissionError(self._stream_busy_error)
181 | assert self._is_streaming(), PermissionError(self._stream_empty_error)
182 | self._master._stream_file.seek(self._stream_data_pointer, 0)
183 | self._master._exstream_mode.clear()
184 | # self._master._main_stream.clear()
185 |
186 | def pause(self):
187 | """
188 | Pause the audio playback stream.
189 | """
190 | assert self.isready(), PermissionError(self._stream_busy_error)
191 | assert self._is_streaming(), PermissionError(self._stream_empty_error)
192 | self._master._exstream_mode.clear()
193 | # self._master._main_stream.clear()
194 |
195 | def enable_loop(self):
196 | """
197 | Enable looping for the audio playback stream.
198 | """
199 | assert self.isready(), PermissionError(self._stream_busy_error)
200 | assert self._is_streaming(), PermissionError(self._stream_empty_error)
201 | self._master._stream_loop_mode = True
202 | self._stream_loop_mode = True
203 |
204 | def disable_loop(self):
205 | """
206 | Disable looping for the audio playback stream.
207 | """
208 | assert self.isready(), PermissionError(self._stream_busy_error)
209 | assert self._is_streaming(), PermissionError(self._stream_empty_error)
210 | self._master._stream_loop_mode = False
211 | self._stream_loop_mode = False
212 |
--------------------------------------------------------------------------------