├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt ├── slicer.py └── slicer2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # PyCharm 132 | /.idea/ 133 | 134 | # Tests 135 | /test*.py 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Team OpenVPI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Audio Slicer 2 | 3 | Python script that slices audio with silence detection 4 | 5 | --- 6 | 7 | This is the 2.0 version of audio slicer, which provides: 8 | 9 | - Great improvements on speed (400x compared to previous 15x) 10 | - Enhanced slicing logic with fewer errors 11 | 12 | The 1.0 version can be found [here](https://github.com/openvpi/audio-slicer/tree/old). 13 | 14 | GUI version can be found [here](https://github.com/flutydeer/audio-slicer). 15 | 16 | ## Algorithm 17 | 18 | ### Silence detection 19 | 20 | This script uses RMS (root mean score) to measure the quiteness of the audio and detect silent parts. RMS values of each frame (frame length set as **hop size**) are calculated and all frames with an RMS below the **threshold** will be regarded as silent frames. 21 | 22 | ### Audio slicing 23 | 24 | Once the valid (sound) part reached **min length** since last slice and a silent part longer than **min interval** are detected, the audio will be sliced apart from the frame(s) with the lowest RMS value within the silent area. Long silence parts may be deleted. 25 | 26 | ## Requirements 27 | 28 | ### If you are using Python API 29 | 30 | ```bash 31 | pip install numpy 32 | ``` 33 | 34 | ### If you are using CLI 35 | 36 | ```shell 37 | pip install librosa 38 | pip install soundfile 39 | ``` 40 | 41 | or 42 | 43 | ```shell 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | ## Usage 48 | 49 | ### Using Python API 50 | 51 | ```python 52 | import librosa # Optional. Use any library you like to read audio files. 53 | import soundfile # Optional. Use any library you like to write audio files. 54 | 55 | from slicer2 import Slicer 56 | 57 | audio, sr = librosa.load('example.wav', sr=None, mono=False) # Load an audio file with librosa. 58 | slicer = Slicer( 59 | sr=sr, 60 | threshold=-40, 61 | min_length=5000, 62 | min_interval=300, 63 | hop_size=10, 64 | max_sil_kept=500 65 | ) 66 | chunks = slicer.slice(audio) 67 | for i, chunk in enumerate(chunks): 68 | if len(chunk.shape) > 1: 69 | chunk = chunk.T # Swap axes if the audio is stereo. 70 | soundfile.write(f'clips/example_{i}.wav', chunk, sr) # Save sliced audio files with soundfile. 71 | ``` 72 | 73 | ### Using CLI 74 | 75 | The script can be run with CLI as below: 76 | 77 | ```bash 78 | python slicer2.py audio [--out OUT] [--db_thresh DB_THRESH] [--min_length MIN_LENGTH] [--min_interval MIN_INTERVAL] [--hop_size HOP_SIZE] [--max_sil_kept MAX_SIL_KEPT] 79 | ``` 80 | 81 | where `audio` refers to the audio to be sliced, `--out` defaults to the same directory as the audio, and other options have default values as listed [here](#Parameters). 82 | 83 | ## Parameters 84 | 85 | ### sr 86 | 87 | Sampling rate of the input audio. 88 | 89 | ### db_threshold 90 | 91 | The RMS threshold presented in dB. Areas where all RMS values are below this threshold will be regarded as silence. Increase this value if your audio is noisy. Defaults to -40. 92 | 93 | ### min_length 94 | 95 | The minimum length required for each sliced audio clip, presented in milliseconds. Defaults to 5000. 96 | 97 | ### min_interval 98 | 99 | The minimum length for a silence part to be sliced, presented in milliseconds. Set this value smaller if your audio contains only short breaks. The smaller this value is, the more sliced audio clips this script is likely to generate. Note that this value must be smaller than min_length and larger than hop_size. Defaults to 300. 100 | 101 | ### hop_size 102 | 103 | Length of each RMS frame, presented in milliseconds. Increasing this value will increase the precision of slicing, but will slow down the process. Defaults to 10. 104 | 105 | ### max_silence_kept 106 | 107 | The maximum silence length kept around the sliced audio, presented in milliseconds. Adjust this value according to your needs. Note that setting this value does not mean that silence parts in the sliced audio have exactly the given length. The algorithm will search for the best position to slice, as described above. Defaults to 1000. 108 | 109 | ## Performance 110 | 111 | This script runs over 400x faster than real-time on an Intel i7 8750H CPU. Speed may vary according to your CPU and your disk. Though `Slicer` is thread-safe, multi-threading does not seem neccessary due to the I/O bottleneck. 112 | 113 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvpi/audio-slicer/9958eede8f38fb6ce26914b1673e202ecfce70f3/requirements.txt -------------------------------------------------------------------------------- /slicer.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from argparse import ArgumentParser 3 | import time 4 | 5 | import librosa 6 | import numpy as np 7 | import soundfile 8 | from scipy.ndimage import maximum_filter1d, uniform_filter1d 9 | 10 | 11 | def timeit(func): 12 | def run(*args, **kwargs): 13 | t = time.time() 14 | res = func(*args, **kwargs) 15 | print('executing \'%s\' cost %.3fs' % (func.__name__, time.time() - t)) 16 | return res 17 | return run 18 | 19 | 20 | # @timeit 21 | def _window_maximum(arr, win_sz): 22 | return maximum_filter1d(arr, size=win_sz)[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1] 23 | 24 | 25 | # @timeit 26 | def _window_rms(arr, win_sz): 27 | filtered = np.sqrt(uniform_filter1d(np.power(arr, 2), win_sz) - np.power(uniform_filter1d(arr, win_sz), 2)) 28 | return filtered[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1] 29 | 30 | 31 | def level2db(levels, eps=1e-12): 32 | return 20 * np.log10(np.clip(levels, a_min=eps, a_max=1)) 33 | 34 | 35 | def _apply_slice(audio, begin, end): 36 | if len(audio.shape) > 1: 37 | return audio[:, begin: end] 38 | else: 39 | return audio[begin: end] 40 | 41 | 42 | class Slicer: 43 | def __init__(self, 44 | sr: int, 45 | db_threshold: float = -40, 46 | min_length: int = 5000, 47 | win_l: int = 300, 48 | win_s: int = 20, 49 | max_silence_kept: int = 500): 50 | self.db_threshold = db_threshold 51 | self.min_samples = round(sr * min_length / 1000) 52 | self.win_ln = round(sr * win_l / 1000) 53 | self.win_sn = round(sr * win_s / 1000) 54 | self.max_silence = round(sr * max_silence_kept / 1000) 55 | if not self.min_samples >= self.win_ln >= self.win_sn: 56 | raise ValueError('The following condition must be satisfied: min_length >= win_l >= win_s') 57 | if not self.max_silence >= self.win_sn: 58 | raise ValueError('The following condition must be satisfied: max_silence_kept >= win_s') 59 | 60 | @timeit 61 | def slice(self, audio): 62 | if len(audio.shape) > 1: 63 | samples = librosa.to_mono(audio) 64 | else: 65 | samples = audio 66 | if samples.shape[0] <= self.min_samples: 67 | return [audio] 68 | # get absolute amplitudes 69 | abs_amp = np.abs(samples - np.mean(samples)) 70 | # calculate local maximum with large window 71 | win_max_db = level2db(_window_maximum(abs_amp, win_sz=self.win_ln)) 72 | sil_tags = [] 73 | left = right = 0 74 | while right < win_max_db.shape[0]: 75 | if win_max_db[right] < self.db_threshold: 76 | right += 1 77 | elif left == right: 78 | left += 1 79 | right += 1 80 | else: 81 | if left == 0: 82 | split_loc_l = left 83 | else: 84 | sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2) 85 | rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn)) 86 | split_win_l = left + np.argmin(rms_db_left) 87 | split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn]) 88 | if len(sil_tags) != 0 and split_loc_l - sil_tags[-1][1] < self.min_samples and right < win_max_db.shape[0] - 1: 89 | right += 1 90 | left = right 91 | continue 92 | if right == win_max_db.shape[0] - 1: 93 | split_loc_r = right + self.win_ln 94 | else: 95 | sil_right_n = min(self.max_silence, (right + self.win_ln - left) // 2) 96 | rms_db_right = level2db(_window_rms(samples[right + self.win_ln - sil_right_n: right + self.win_ln], win_sz=self.win_sn)) 97 | split_win_r = right + self.win_ln - sil_right_n + np.argmin(rms_db_right) 98 | split_loc_r = split_win_r + np.argmin(abs_amp[split_win_r: split_win_r + self.win_sn]) 99 | sil_tags.append((split_loc_l, split_loc_r)) 100 | right += 1 101 | left = right 102 | if left != right: 103 | sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2) 104 | rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn)) 105 | split_win_l = left + np.argmin(rms_db_left) 106 | split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn]) 107 | sil_tags.append((split_loc_l, samples.shape[0])) 108 | if len(sil_tags) == 0: 109 | return [audio] 110 | else: 111 | chunks = [] 112 | if sil_tags[0][0] > 0: 113 | chunks.append(_apply_slice(audio, 0, sil_tags[0][0])) 114 | for i in range(0, len(sil_tags) - 1): 115 | chunks.append(_apply_slice(audio, sil_tags[i][1], sil_tags[i + 1][0])) 116 | if sil_tags[-1][1] < samples.shape[0] - 1: 117 | chunks.append(_apply_slice(audio, sil_tags[-1][1], samples.shape[0])) 118 | return chunks 119 | 120 | 121 | def main(): 122 | parser = ArgumentParser() 123 | parser.add_argument('audio', type=str, help='The audio to be sliced') 124 | parser.add_argument('--out', type=str, help='Output directory of the sliced audio clips') 125 | parser.add_argument('--db_thresh', type=float, required=False, default=-40, help='The dB threshold for silence detection') 126 | parser.add_argument('--min_len', type=int, required=False, default=5000, help='The minimum milliseconds required for each sliced audio clip') 127 | parser.add_argument('--win_l', type=int, required=False, default=300, help='Size of the large sliding window, presented in milliseconds') 128 | parser.add_argument('--win_s', type=int, required=False, default=20, help='Size of the small sliding window, presented in milliseconds') 129 | parser.add_argument('--max_sil_kept', type=int, required=False, default=500, help='The maximum silence length kept around the sliced audio, presented in milliseconds') 130 | args = parser.parse_args() 131 | out = args.out 132 | if out is None: 133 | out = os.path.dirname(os.path.abspath(args.audio)) 134 | audio, sr = librosa.load(args.audio, sr=None) 135 | slicer = Slicer( 136 | sr=sr, 137 | db_threshold=args.db_thresh, 138 | min_length=args.min_len, 139 | win_l=args.win_l, 140 | win_s=args.win_s, 141 | max_silence_kept=args.max_sil_kept 142 | ) 143 | chunks = slicer.slice(audio) 144 | if not os.path.exists(out): 145 | os.makedirs(out) 146 | for i, chunk in enumerate(chunks): 147 | soundfile.write(os.path.join(out, f'%s_%d.wav' % (os.path.basename(args.audio).rsplit('.', maxsplit=1)[0], i)), chunk, sr) 148 | 149 | 150 | if __name__ == '__main__': 151 | main() 152 | -------------------------------------------------------------------------------- /slicer2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # This function is obtained from librosa. 5 | def get_rms( 6 | y, 7 | *, 8 | frame_length=2048, 9 | hop_length=512, 10 | pad_mode="constant", 11 | ): 12 | padding = (int(frame_length // 2), int(frame_length // 2)) 13 | y = np.pad(y, padding, mode=pad_mode) 14 | 15 | axis = -1 16 | # put our new within-frame axis at the end for now 17 | out_strides = y.strides + tuple([y.strides[axis]]) 18 | # Reduce the shape on the framing axis 19 | x_shape_trimmed = list(y.shape) 20 | x_shape_trimmed[axis] -= frame_length - 1 21 | out_shape = tuple(x_shape_trimmed) + tuple([frame_length]) 22 | xw = np.lib.stride_tricks.as_strided( 23 | y, shape=out_shape, strides=out_strides 24 | ) 25 | if axis < 0: 26 | target_axis = axis - 1 27 | else: 28 | target_axis = axis + 1 29 | xw = np.moveaxis(xw, -1, target_axis) 30 | # Downsample along the target axis 31 | slices = [slice(None)] * xw.ndim 32 | slices[axis] = slice(0, None, hop_length) 33 | x = xw[tuple(slices)] 34 | 35 | # Calculate power 36 | power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True) 37 | 38 | return np.sqrt(power) 39 | 40 | 41 | class Slicer: 42 | def __init__(self, 43 | sr: int, 44 | threshold: float = -40., 45 | min_length: int = 5000, 46 | min_interval: int = 300, 47 | hop_size: int = 20, 48 | max_sil_kept: int = 5000): 49 | if not min_length >= min_interval >= hop_size: 50 | raise ValueError('The following condition must be satisfied: min_length >= min_interval >= hop_size') 51 | if not max_sil_kept >= hop_size: 52 | raise ValueError('The following condition must be satisfied: max_sil_kept >= hop_size') 53 | min_interval = sr * min_interval / 1000 54 | self.threshold = 10 ** (threshold / 20.) 55 | self.hop_size = round(sr * hop_size / 1000) 56 | self.win_size = min(round(min_interval), 4 * self.hop_size) 57 | self.min_length = round(sr * min_length / 1000 / self.hop_size) 58 | self.min_interval = round(min_interval / self.hop_size) 59 | self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size) 60 | 61 | def _apply_slice(self, waveform, begin, end): 62 | if len(waveform.shape) > 1: 63 | return waveform[:, begin * self.hop_size: min(waveform.shape[1], end * self.hop_size)] 64 | else: 65 | return waveform[begin * self.hop_size: min(waveform.shape[0], end * self.hop_size)] 66 | 67 | # @timeit 68 | def slice(self, waveform): 69 | if len(waveform.shape) > 1: 70 | samples = waveform.mean(axis=0) 71 | else: 72 | samples = waveform 73 | if (samples.shape[0] + self.hop_size - 1) // self.hop_size <= self.min_length: 74 | return [waveform] 75 | rms_list = get_rms(y=samples, frame_length=self.win_size, hop_length=self.hop_size).squeeze(0) 76 | sil_tags = [] 77 | silence_start = None 78 | clip_start = 0 79 | for i, rms in enumerate(rms_list): 80 | # Keep looping while frame is silent. 81 | if rms < self.threshold: 82 | # Record start of silent frames. 83 | if silence_start is None: 84 | silence_start = i 85 | continue 86 | # Keep looping while frame is not silent and silence start has not been recorded. 87 | if silence_start is None: 88 | continue 89 | # Clear recorded silence start if interval is not enough or clip is too short 90 | is_leading_silence = silence_start == 0 and i > self.max_sil_kept 91 | need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length 92 | if not is_leading_silence and not need_slice_middle: 93 | silence_start = None 94 | continue 95 | # Need slicing. Record the range of silent frames to be removed. 96 | if i - silence_start <= self.max_sil_kept: 97 | pos = rms_list[silence_start: i + 1].argmin() + silence_start 98 | if silence_start == 0: 99 | sil_tags.append((0, pos)) 100 | else: 101 | sil_tags.append((pos, pos)) 102 | clip_start = pos 103 | elif i - silence_start <= self.max_sil_kept * 2: 104 | pos = rms_list[i - self.max_sil_kept: silence_start + self.max_sil_kept + 1].argmin() 105 | pos += i - self.max_sil_kept 106 | pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start 107 | pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept 108 | if silence_start == 0: 109 | sil_tags.append((0, pos_r)) 110 | clip_start = pos_r 111 | else: 112 | sil_tags.append((min(pos_l, pos), max(pos_r, pos))) 113 | clip_start = max(pos_r, pos) 114 | else: 115 | pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start 116 | pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept 117 | if silence_start == 0: 118 | sil_tags.append((0, pos_r)) 119 | else: 120 | sil_tags.append((pos_l, pos_r)) 121 | clip_start = pos_r 122 | silence_start = None 123 | # Deal with trailing silence. 124 | total_frames = rms_list.shape[0] 125 | if silence_start is not None and total_frames - silence_start >= self.min_interval: 126 | silence_end = min(total_frames, silence_start + self.max_sil_kept) 127 | pos = rms_list[silence_start: silence_end + 1].argmin() + silence_start 128 | sil_tags.append((pos, total_frames + 1)) 129 | # Apply and return slices. 130 | if len(sil_tags) == 0: 131 | return [waveform] 132 | else: 133 | chunks = [] 134 | if sil_tags[0][0] > 0: 135 | chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0])) 136 | for i in range(len(sil_tags) - 1): 137 | chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0])) 138 | if sil_tags[-1][1] < total_frames: 139 | chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames)) 140 | return chunks 141 | 142 | 143 | def main(): 144 | import os.path 145 | from argparse import ArgumentParser 146 | 147 | import librosa 148 | import soundfile 149 | 150 | parser = ArgumentParser() 151 | parser.add_argument('audio', type=str, help='The audio to be sliced') 152 | parser.add_argument('--out', type=str, help='Output directory of the sliced audio clips') 153 | parser.add_argument('--db_thresh', type=float, required=False, default=-40, 154 | help='The dB threshold for silence detection') 155 | parser.add_argument('--min_length', type=int, required=False, default=5000, 156 | help='The minimum milliseconds required for each sliced audio clip') 157 | parser.add_argument('--min_interval', type=int, required=False, default=300, 158 | help='The minimum milliseconds for a silence part to be sliced') 159 | parser.add_argument('--hop_size', type=int, required=False, default=10, 160 | help='Frame length in milliseconds') 161 | parser.add_argument('--max_sil_kept', type=int, required=False, default=500, 162 | help='The maximum silence length kept around the sliced clip, presented in milliseconds') 163 | args = parser.parse_args() 164 | out = args.out 165 | if out is None: 166 | out = os.path.dirname(os.path.abspath(args.audio)) 167 | audio, sr = librosa.load(args.audio, sr=None, mono=False) 168 | slicer = Slicer( 169 | sr=sr, 170 | threshold=args.db_thresh, 171 | min_length=args.min_length, 172 | min_interval=args.min_interval, 173 | hop_size=args.hop_size, 174 | max_sil_kept=args.max_sil_kept 175 | ) 176 | chunks = slicer.slice(audio) 177 | if not os.path.exists(out): 178 | os.makedirs(out) 179 | for i, chunk in enumerate(chunks): 180 | if len(chunk.shape) > 1: 181 | chunk = chunk.T 182 | soundfile.write(os.path.join(out, f'%s_%d.wav' % (os.path.basename(args.audio).rsplit('.', maxsplit=1)[0], i)), chunk, sr) 183 | 184 | 185 | if __name__ == '__main__': 186 | main() 187 | --------------------------------------------------------------------------------