├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
├── slicer.py
└── slicer2.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # PyCharm
132 | /.idea/
133 | 
134 | # Tests
135 | /test*.py
136 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Team OpenVPI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Audio Slicer
  2 | 
  3 | Python script that slices audio with silence detection
  4 | 
  5 | ---
  6 | 
  7 | This is the 2.0 version of audio slicer, which provides:
  8 | 
  9 | - Great improvements on speed (400x compared to previous 15x)
 10 | - Enhanced slicing logic with fewer errors
 11 | 
 12 | The 1.0 version can be found [here](https://github.com/openvpi/audio-slicer/tree/old).
 13 | 
 14 | GUI version can be found [here](https://github.com/flutydeer/audio-slicer).
 15 | 
 16 | ## Algorithm
 17 | 
 18 | ### Silence detection
 19 | 
 20 | This script uses RMS (root mean score) to measure the quiteness of the audio and detect silent parts. RMS values of each frame (frame length set as **hop size**) are calculated and all frames with an RMS below the **threshold** will be regarded as silent frames.
 21 | 
 22 | ### Audio slicing
 23 | 
 24 | Once the valid (sound) part reached **min length** since last slice and a silent part longer than **min interval** are detected, the audio will be sliced apart from the frame(s) with the lowest RMS value within the silent area. Long silence parts may be deleted.
 25 | 
 26 | ## Requirements
 27 | 
 28 | ### If you are using Python API
 29 | 
 30 | ```bash
 31 | pip install numpy
 32 | ```
 33 | 
 34 | ### If you are using CLI
 35 | 
 36 | ```shell
 37 | pip install librosa
 38 | pip install soundfile
 39 | ```
 40 | 
 41 | or
 42 | 
 43 | ```shell
 44 | pip install -r requirements.txt
 45 | ```
 46 | 
 47 | ## Usage
 48 | 
 49 | ### Using Python API
 50 | 
 51 | ```python
 52 | import librosa  # Optional. Use any library you like to read audio files.
 53 | import soundfile  # Optional. Use any library you like to write audio files.
 54 | 
 55 | from slicer2 import Slicer
 56 | 
 57 | audio, sr = librosa.load('example.wav', sr=None, mono=False)  # Load an audio file with librosa.
 58 | slicer = Slicer(
 59 |     sr=sr,
 60 |     threshold=-40,
 61 |     min_length=5000,
 62 |     min_interval=300,
 63 |     hop_size=10,
 64 |     max_sil_kept=500
 65 | )
 66 | chunks = slicer.slice(audio)
 67 | for i, chunk in enumerate(chunks):
 68 |     if len(chunk.shape) > 1:
 69 |         chunk = chunk.T  # Swap axes if the audio is stereo.
 70 |     soundfile.write(f'clips/example_{i}.wav', chunk, sr)  # Save sliced audio files with soundfile.
 71 | ```
 72 | 
 73 | ### Using CLI
 74 | 
 75 | The script can be run with CLI as below:
 76 | 
 77 | ```bash
 78 | python slicer2.py audio [--out OUT] [--db_thresh DB_THRESH] [--min_length MIN_LENGTH] [--min_interval MIN_INTERVAL] [--hop_size HOP_SIZE] [--max_sil_kept MAX_SIL_KEPT]
 79 | ```
 80 | 
 81 | where `audio` refers to the audio to be sliced, `--out` defaults to the same directory as the audio, and other options have default values as listed [here](#Parameters).
 82 | 
 83 | ## Parameters
 84 | 
 85 | ### sr
 86 | 
 87 | Sampling rate of the input audio.
 88 | 
 89 | ### db_threshold
 90 | 
 91 | The RMS threshold presented in dB. Areas where all RMS values are below this threshold will be regarded as silence. Increase this value if your audio is noisy. Defaults to -40.
 92 | 
 93 | ### min_length
 94 | 
 95 | The minimum length required for each sliced audio clip, presented in milliseconds. Defaults to 5000.
 96 | 
 97 | ### min_interval
 98 | 
 99 | The minimum length for a silence part to be sliced, presented in milliseconds. Set this value smaller if your audio contains only short breaks. The smaller this value is, the more sliced audio clips this script is likely to generate. Note that this value must be smaller than min_length and larger than hop_size. Defaults to 300.
100 | 
101 | ### hop_size
102 | 
103 | Length of each RMS frame, presented in milliseconds. Increasing this value will increase the precision of slicing, but will slow down the process. Defaults to 10.
104 | 
105 | ### max_silence_kept
106 | 
107 | The maximum silence length kept around the sliced audio, presented in milliseconds. Adjust this value according to your needs. Note that setting this value does not mean that silence parts in the sliced audio have exactly the given length. The algorithm will search for the best position to slice, as described above. Defaults to 1000.
108 | 
109 | ## Performance
110 | 
111 | This script runs over 400x faster than real-time on an Intel i7 8750H CPU. Speed may vary according to your CPU and your disk. Though `Slicer` is thread-safe, multi-threading does not seem neccessary due to the I/O bottleneck.
112 | 
113 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvpi/audio-slicer/9958eede8f38fb6ce26914b1673e202ecfce70f3/requirements.txt


--------------------------------------------------------------------------------
/slicer.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | from argparse import ArgumentParser
  3 | import time
  4 | 
  5 | import librosa
  6 | import numpy as np
  7 | import soundfile
  8 | from scipy.ndimage import maximum_filter1d, uniform_filter1d
  9 | 
 10 | 
 11 | def timeit(func):
 12 |     def run(*args, **kwargs):
 13 |         t = time.time()
 14 |         res = func(*args, **kwargs)
 15 |         print('executing \'%s\' cost %.3fs' % (func.__name__, time.time() - t))
 16 |         return res
 17 |     return run
 18 | 
 19 | 
 20 | # @timeit
 21 | def _window_maximum(arr, win_sz):
 22 |     return maximum_filter1d(arr, size=win_sz)[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1]
 23 | 
 24 | 
 25 | # @timeit
 26 | def _window_rms(arr, win_sz):
 27 |     filtered = np.sqrt(uniform_filter1d(np.power(arr, 2), win_sz) - np.power(uniform_filter1d(arr, win_sz), 2))
 28 |     return filtered[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1]
 29 | 
 30 | 
 31 | def level2db(levels, eps=1e-12):
 32 |     return 20 * np.log10(np.clip(levels, a_min=eps, a_max=1))
 33 | 
 34 | 
 35 | def _apply_slice(audio, begin, end):
 36 |     if len(audio.shape) > 1:
 37 |         return audio[:, begin: end]
 38 |     else:
 39 |         return audio[begin: end]
 40 | 
 41 | 
 42 | class Slicer:
 43 |     def __init__(self,
 44 |                  sr: int,
 45 |                  db_threshold: float = -40,
 46 |                  min_length: int = 5000,
 47 |                  win_l: int = 300,
 48 |                  win_s: int = 20,
 49 |                  max_silence_kept: int = 500):
 50 |         self.db_threshold = db_threshold
 51 |         self.min_samples = round(sr * min_length / 1000)
 52 |         self.win_ln = round(sr * win_l / 1000)
 53 |         self.win_sn = round(sr * win_s / 1000)
 54 |         self.max_silence = round(sr * max_silence_kept / 1000)
 55 |         if not self.min_samples >= self.win_ln >= self.win_sn:
 56 |             raise ValueError('The following condition must be satisfied: min_length >= win_l >= win_s')
 57 |         if not self.max_silence >= self.win_sn:
 58 |             raise ValueError('The following condition must be satisfied: max_silence_kept >= win_s')
 59 | 
 60 |     @timeit
 61 |     def slice(self, audio):
 62 |         if len(audio.shape) > 1:
 63 |             samples = librosa.to_mono(audio)
 64 |         else:
 65 |             samples = audio
 66 |         if samples.shape[0] <= self.min_samples:
 67 |             return [audio]
 68 |         # get absolute amplitudes
 69 |         abs_amp = np.abs(samples - np.mean(samples))
 70 |         # calculate local maximum with large window
 71 |         win_max_db = level2db(_window_maximum(abs_amp, win_sz=self.win_ln))
 72 |         sil_tags = []
 73 |         left = right = 0
 74 |         while right < win_max_db.shape[0]:
 75 |             if win_max_db[right] < self.db_threshold:
 76 |                 right += 1
 77 |             elif left == right:
 78 |                 left += 1
 79 |                 right += 1
 80 |             else:
 81 |                 if left == 0:
 82 |                     split_loc_l = left
 83 |                 else:
 84 |                     sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2)
 85 |                     rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn))
 86 |                     split_win_l = left + np.argmin(rms_db_left)
 87 |                     split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn])
 88 |                 if len(sil_tags) != 0 and split_loc_l - sil_tags[-1][1] < self.min_samples and right < win_max_db.shape[0] - 1:
 89 |                     right += 1
 90 |                     left = right
 91 |                     continue
 92 |                 if right == win_max_db.shape[0] - 1:
 93 |                     split_loc_r = right + self.win_ln
 94 |                 else:
 95 |                     sil_right_n = min(self.max_silence, (right + self.win_ln - left) // 2)
 96 |                     rms_db_right = level2db(_window_rms(samples[right + self.win_ln - sil_right_n: right + self.win_ln], win_sz=self.win_sn))
 97 |                     split_win_r = right + self.win_ln - sil_right_n + np.argmin(rms_db_right)
 98 |                     split_loc_r = split_win_r + np.argmin(abs_amp[split_win_r: split_win_r + self.win_sn])
 99 |                 sil_tags.append((split_loc_l, split_loc_r))
100 |                 right += 1
101 |                 left = right
102 |         if left != right:
103 |             sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2)
104 |             rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn))
105 |             split_win_l = left + np.argmin(rms_db_left)
106 |             split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn])
107 |             sil_tags.append((split_loc_l, samples.shape[0]))
108 |         if len(sil_tags) == 0:
109 |             return [audio]
110 |         else:
111 |             chunks = []
112 |             if sil_tags[0][0] > 0:
113 |                 chunks.append(_apply_slice(audio, 0, sil_tags[0][0]))
114 |             for i in range(0, len(sil_tags) - 1):
115 |                 chunks.append(_apply_slice(audio, sil_tags[i][1], sil_tags[i + 1][0]))
116 |             if sil_tags[-1][1] < samples.shape[0] - 1:
117 |                 chunks.append(_apply_slice(audio, sil_tags[-1][1], samples.shape[0]))
118 |             return chunks
119 | 
120 | 
121 | def main():
122 |     parser = ArgumentParser()
123 |     parser.add_argument('audio', type=str, help='The audio to be sliced')
124 |     parser.add_argument('--out', type=str, help='Output directory of the sliced audio clips')
125 |     parser.add_argument('--db_thresh', type=float, required=False, default=-40, help='The dB threshold for silence detection')
126 |     parser.add_argument('--min_len', type=int, required=False, default=5000, help='The minimum milliseconds required for each sliced audio clip')
127 |     parser.add_argument('--win_l', type=int, required=False, default=300, help='Size of the large sliding window, presented in milliseconds')
128 |     parser.add_argument('--win_s', type=int, required=False, default=20, help='Size of the small sliding window, presented in milliseconds')
129 |     parser.add_argument('--max_sil_kept', type=int, required=False, default=500, help='The maximum silence length kept around the sliced audio, presented in milliseconds')
130 |     args = parser.parse_args()
131 |     out = args.out
132 |     if out is None:
133 |         out = os.path.dirname(os.path.abspath(args.audio))
134 |     audio, sr = librosa.load(args.audio, sr=None)
135 |     slicer = Slicer(
136 |         sr=sr,
137 |         db_threshold=args.db_thresh,
138 |         min_length=args.min_len,
139 |         win_l=args.win_l,
140 |         win_s=args.win_s,
141 |         max_silence_kept=args.max_sil_kept
142 |     )
143 |     chunks = slicer.slice(audio)
144 |     if not os.path.exists(out):
145 |         os.makedirs(out)
146 |     for i, chunk in enumerate(chunks):
147 |         soundfile.write(os.path.join(out, f'%s_%d.wav' % (os.path.basename(args.audio).rsplit('.', maxsplit=1)[0], i)), chunk, sr)
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     main()
152 | 


--------------------------------------------------------------------------------
/slicer2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | # This function is obtained from librosa.
  5 | def get_rms(
  6 |     y,
  7 |     *,
  8 |     frame_length=2048,
  9 |     hop_length=512,
 10 |     pad_mode="constant",
 11 | ):
 12 |     padding = (int(frame_length // 2), int(frame_length // 2))
 13 |     y = np.pad(y, padding, mode=pad_mode)
 14 | 
 15 |     axis = -1
 16 |     # put our new within-frame axis at the end for now
 17 |     out_strides = y.strides + tuple([y.strides[axis]])
 18 |     # Reduce the shape on the framing axis
 19 |     x_shape_trimmed = list(y.shape)
 20 |     x_shape_trimmed[axis] -= frame_length - 1
 21 |     out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
 22 |     xw = np.lib.stride_tricks.as_strided(
 23 |         y, shape=out_shape, strides=out_strides
 24 |     )
 25 |     if axis < 0:
 26 |         target_axis = axis - 1
 27 |     else:
 28 |         target_axis = axis + 1
 29 |     xw = np.moveaxis(xw, -1, target_axis)
 30 |     # Downsample along the target axis
 31 |     slices = [slice(None)] * xw.ndim
 32 |     slices[axis] = slice(0, None, hop_length)
 33 |     x = xw[tuple(slices)]
 34 | 
 35 |     # Calculate power
 36 |     power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
 37 | 
 38 |     return np.sqrt(power)
 39 | 
 40 | 
 41 | class Slicer:
 42 |     def __init__(self,
 43 |                  sr: int,
 44 |                  threshold: float = -40.,
 45 |                  min_length: int = 5000,
 46 |                  min_interval: int = 300,
 47 |                  hop_size: int = 20,
 48 |                  max_sil_kept: int = 5000):
 49 |         if not min_length >= min_interval >= hop_size:
 50 |             raise ValueError('The following condition must be satisfied: min_length >= min_interval >= hop_size')
 51 |         if not max_sil_kept >= hop_size:
 52 |             raise ValueError('The following condition must be satisfied: max_sil_kept >= hop_size')
 53 |         min_interval = sr * min_interval / 1000
 54 |         self.threshold = 10 ** (threshold / 20.)
 55 |         self.hop_size = round(sr * hop_size / 1000)
 56 |         self.win_size = min(round(min_interval), 4 * self.hop_size)
 57 |         self.min_length = round(sr * min_length / 1000 / self.hop_size)
 58 |         self.min_interval = round(min_interval / self.hop_size)
 59 |         self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)
 60 | 
 61 |     def _apply_slice(self, waveform, begin, end):
 62 |         if len(waveform.shape) > 1:
 63 |             return waveform[:, begin * self.hop_size: min(waveform.shape[1], end * self.hop_size)]
 64 |         else:
 65 |             return waveform[begin * self.hop_size: min(waveform.shape[0], end * self.hop_size)]
 66 | 
 67 |     # @timeit
 68 |     def slice(self, waveform):
 69 |         if len(waveform.shape) > 1:
 70 |             samples = waveform.mean(axis=0)
 71 |         else:
 72 |             samples = waveform
 73 |         if (samples.shape[0] + self.hop_size - 1) // self.hop_size <= self.min_length:
 74 |             return [waveform]
 75 |         rms_list = get_rms(y=samples, frame_length=self.win_size, hop_length=self.hop_size).squeeze(0)
 76 |         sil_tags = []
 77 |         silence_start = None
 78 |         clip_start = 0
 79 |         for i, rms in enumerate(rms_list):
 80 |             # Keep looping while frame is silent.
 81 |             if rms < self.threshold:
 82 |                 # Record start of silent frames.
 83 |                 if silence_start is None:
 84 |                     silence_start = i
 85 |                 continue
 86 |             # Keep looping while frame is not silent and silence start has not been recorded.
 87 |             if silence_start is None:
 88 |                 continue
 89 |             # Clear recorded silence start if interval is not enough or clip is too short
 90 |             is_leading_silence = silence_start == 0 and i > self.max_sil_kept
 91 |             need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length
 92 |             if not is_leading_silence and not need_slice_middle:
 93 |                 silence_start = None
 94 |                 continue
 95 |             # Need slicing. Record the range of silent frames to be removed.
 96 |             if i - silence_start <= self.max_sil_kept:
 97 |                 pos = rms_list[silence_start: i + 1].argmin() + silence_start
 98 |                 if silence_start == 0:
 99 |                     sil_tags.append((0, pos))
100 |                 else:
101 |                     sil_tags.append((pos, pos))
102 |                 clip_start = pos
103 |             elif i - silence_start <= self.max_sil_kept * 2:
104 |                 pos = rms_list[i - self.max_sil_kept: silence_start + self.max_sil_kept + 1].argmin()
105 |                 pos += i - self.max_sil_kept
106 |                 pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start
107 |                 pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept
108 |                 if silence_start == 0:
109 |                     sil_tags.append((0, pos_r))
110 |                     clip_start = pos_r
111 |                 else:
112 |                     sil_tags.append((min(pos_l, pos), max(pos_r, pos)))
113 |                     clip_start = max(pos_r, pos)
114 |             else:
115 |                 pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start
116 |                 pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept
117 |                 if silence_start == 0:
118 |                     sil_tags.append((0, pos_r))
119 |                 else:
120 |                     sil_tags.append((pos_l, pos_r))
121 |                 clip_start = pos_r
122 |             silence_start = None
123 |         # Deal with trailing silence.
124 |         total_frames = rms_list.shape[0]
125 |         if silence_start is not None and total_frames - silence_start >= self.min_interval:
126 |             silence_end = min(total_frames, silence_start + self.max_sil_kept)
127 |             pos = rms_list[silence_start: silence_end + 1].argmin() + silence_start
128 |             sil_tags.append((pos, total_frames + 1))
129 |         # Apply and return slices.
130 |         if len(sil_tags) == 0:
131 |             return [waveform]
132 |         else:
133 |             chunks = []
134 |             if sil_tags[0][0] > 0:
135 |                 chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))
136 |             for i in range(len(sil_tags) - 1):
137 |                 chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0]))
138 |             if sil_tags[-1][1] < total_frames:
139 |                 chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))
140 |             return chunks
141 | 
142 | 
143 | def main():
144 |     import os.path
145 |     from argparse import ArgumentParser
146 | 
147 |     import librosa
148 |     import soundfile
149 | 
150 |     parser = ArgumentParser()
151 |     parser.add_argument('audio', type=str, help='The audio to be sliced')
152 |     parser.add_argument('--out', type=str, help='Output directory of the sliced audio clips')
153 |     parser.add_argument('--db_thresh', type=float, required=False, default=-40,
154 |                         help='The dB threshold for silence detection')
155 |     parser.add_argument('--min_length', type=int, required=False, default=5000,
156 |                         help='The minimum milliseconds required for each sliced audio clip')
157 |     parser.add_argument('--min_interval', type=int, required=False, default=300,
158 |                         help='The minimum milliseconds for a silence part to be sliced')
159 |     parser.add_argument('--hop_size', type=int, required=False, default=10,
160 |                         help='Frame length in milliseconds')
161 |     parser.add_argument('--max_sil_kept', type=int, required=False, default=500,
162 |                         help='The maximum silence length kept around the sliced clip, presented in milliseconds')
163 |     args = parser.parse_args()
164 |     out = args.out
165 |     if out is None:
166 |         out = os.path.dirname(os.path.abspath(args.audio))
167 |     audio, sr = librosa.load(args.audio, sr=None, mono=False)
168 |     slicer = Slicer(
169 |         sr=sr,
170 |         threshold=args.db_thresh,
171 |         min_length=args.min_length,
172 |         min_interval=args.min_interval,
173 |         hop_size=args.hop_size,
174 |         max_sil_kept=args.max_sil_kept
175 |     )
176 |     chunks = slicer.slice(audio)
177 |     if not os.path.exists(out):
178 |         os.makedirs(out)
179 |     for i, chunk in enumerate(chunks):
180 |         if len(chunk.shape) > 1:
181 |             chunk = chunk.T
182 |         soundfile.write(os.path.join(out, f'%s_%d.wav' % (os.path.basename(args.audio).rsplit('.', maxsplit=1)[0], i)), chunk, sr)
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     main()
187 | 


--------------------------------------------------------------------------------