├── .gitignore ├── LICENSE ├── README.md ├── README.zh-CN.md ├── file_version_info.txt ├── gui ├── Ui_MainWindow.py ├── mainwindow.py └── ui_mainwindow.ui ├── pack-gui.ps1 ├── requirements.txt ├── screenshots ├── screenshot_dark.jpg └── screenshot_light.jpg ├── slicer-gui.py ├── slicer.py └── slicer2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | venv-linux/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Logs 115 | log/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # PyCharm 136 | /.idea/ 137 | 138 | # Tests 139 | /test*.py 140 | 141 | # VSCode 142 | /.vscode/ 143 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Team OpenVPI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Audio Slicer 2 | 3 | A simple GUI application that slices audio with silence detection. 4 | 5 | [中文文档](./README.zh-CN.md) 6 | 7 | ## Screenshots 8 | 9 | ![image](./screenshot/../screenshots/screenshot_dark.jpg) 10 | 11 | The app also has a light theme. 12 | 13 | ## Usage 14 | 15 | ### Windows 16 | 17 | - Download and extract the latest release [here](https://github.com/flutydeer/audio-slicer/releases). 18 | 19 | - Run "slicer-gui.exe". 20 | 21 | ### MacOS & Linux 22 | 23 | - Clone the repository. 24 | 25 | - Run the following command to install requirements: 26 | 27 | ```shell 28 | pip install -r requirements.txt 29 | ``` 30 | 31 | - Run the following command to launch GUI: 32 | 33 | ```Shell 34 | python slicer-gui.py 35 | ``` 36 | 37 | Just simply add your audio files to the task list by clicking the "Add Audio Files..." button or dragging and drop them to the window, click the "Start" button and wait for it to finish. The progress bar cannot indicate the progress of individual tasks, so it keeps 0% until finished when there is only 1 task in the task list. 38 | ## Algorithm 39 | 40 | ### Silence detection 41 | 42 | This application uses RMS (root mean score) to measure the quiteness of the audio and detect silent parts. RMS values of each frame (frame length set as **hop size**) are calculated and all frames with an RMS below the **threshold** will be regarded as silent frames. 43 | 44 | ### Audio slicing 45 | 46 | Once the valid (sound) part reached **min length** since last slice and a silent part longer than **min interval** are detected, the audio will be sliced apart from the frame(s) with the lowest RMS value within the silent area. Long silence parts may be deleted. 47 | 48 | 49 | 50 | ## Parameters 51 | 52 | ### Threshold 53 | 54 | The RMS threshold presented in dB. Areas where all RMS values are below this threshold will be regarded as silence. Increase this value if your audio is noisy. Defaults to -40. 55 | 56 | ### Minimum Length 57 | 58 | The minimum length required for each sliced audio clip, presented in milliseconds. Defaults to 5000. 59 | 60 | ### Minimum Interval 61 | 62 | The minimum length for a silence part to be sliced, presented in milliseconds. Set this value smaller if your audio contains only short breaks. The smaller this value is, the more sliced audio clips this application is likely to generate. Note that this value must be smaller than min_length and larger than hop_size. Defaults to 300. 63 | 64 | ### Hop Size 65 | 66 | Length of each RMS frame, presented in milliseconds. Increasing this value will increase the precision of slicing, but will slow down the process. Defaults to 10. 67 | 68 | ### Maximum Silence Length 69 | 70 | The maximum silence length kept around the sliced audio, presented in milliseconds. Adjust this value according to your needs. Note that setting this value does not mean that silence parts in the sliced audio have exactly the given length. The algorithm will search for the best position to slice, as described above. Defaults to 1000. 71 | 72 | ## Performance 73 | 74 | This application runs over 400x faster than real-time on an Intel i7 8750H CPU. Speed may vary according to your CPU and your disk. 75 | -------------------------------------------------------------------------------- /README.zh-CN.md: -------------------------------------------------------------------------------- 1 | # 音频切片机 2 | 一个简约的 GUI 应用程序,通过静音检测对音频进行切片。 3 | 4 | ## 屏幕截图 5 | 6 | ![image](./screenshot/../screenshots/screenshot_dark.jpg) 7 | 8 | 应用还有一个浅色主题。 9 | 10 | ## 用法 11 | 12 | ### Windows 13 | 14 | - 在[这里](https://github.com/flutydeer/audio-slicer/releases)下载并解压最新版本。 15 | 16 | - 运行“slicer-gui.exe”。 17 | 18 | ### MacOS & Linux 19 | 20 | - 克隆此仓库。 21 | 22 | - 运行以下命令安装环境: 23 | 24 | ```shell 25 | pip install -r requirements.txt 26 | ``` 27 | 28 | - 运行以下命令启动 GUI: 29 | 30 | ```Shell 31 | python slicer-gui.py 32 | ``` 33 | 34 | 只需点击“Add Audio Files...”按钮来添加音频文件,或将它们拖放到窗口中,单击“Start”按钮并等待任务完成。进度条无法指示单个任务的进度,因此当任务列表中只有1个任务时,它会保持0%直到完成。 35 | ## 算法 36 | 37 | ### 静音检测 38 | 39 | 本应用根据 RMS(均方根)来测量音频的安静度并检测静音部分,计算每个帧的 RMS 值(帧长度设为 **hop size**(跳跃步长)),RMS 低于 **threshold**(阈值)的所有帧都将被视为静默帧。 40 | 41 | ### 音频切片 42 | 43 | 一旦检测到自上次切片以来的有效(声音)部分达到 **min length** (最小长度),且长度超过 **min interval**(最小间距)的静音部分,该音频将从静音区域内 RMS 值最低的帧脱离出来。长时间静音的部分可能会被删除。 44 | 45 | 46 | ## 参数 47 | 48 | ### Threshold(阈值) 49 | 50 | 以 dB 表示的 RMS 阈值。所有 RMS 值都低于此阈值的区域将被视为静音。如果音频有噪音,请增加此值。默认值为 -40。 51 | 52 | ### Minimum Length(最小长度) 53 | 54 | 每个切片音频剪辑所需的最小长度,以毫秒为单位。默认值为 5000。 55 | 56 | ### Minimum Interval(最小间距) 57 | 58 | 要切片的静音部分的最小长度,以毫秒为单位。如果音频仅包含短暂的中断,请将此值设置得更小。此值越小,此应用程序可能生成的切片音频剪辑就越多。请注意,此值必须小于 min length 且大于 hop size。默认值为 300。 59 | 60 | ### Hop Size(跳跃步长) 61 | 62 | 每个 RMS 帧的长度,以毫秒为单位。增加此值将提高切片的精度,但会降低处理速度。默认值为 10。 63 | 64 | ### Maximum Silence Length(最大静音长度) 65 | 66 | 在切片音频周围保持的最大静音长度,以毫秒为单位。根据需要调整此值。请注意,设置此值并不意味着切片音频中的静音部分具有完全给定的长度。如上所述,该算法将搜索要切片的最佳位置。默认值为 1000。 67 | 68 | ## 性能 69 | 70 | 此应用程序在 Intel i7 8750H CPU 上的运行速度超过 400 倍于实时。速度可能因 CPU 和磁盘而异。 71 | -------------------------------------------------------------------------------- /file_version_info.txt: -------------------------------------------------------------------------------- 1 | # UTF-8 2 | # 3 | # For more details about fixed file info 'ffi' see: 4 | # http://msdn.microsoft.com/en-us/library/ms646997.aspx 5 | VSVersionInfo( 6 | ffi=FixedFileInfo( 7 | # filevers and prodvers should be always a tuple with four items: (1, 2, 3, 4) 8 | # Set not needed items to zero 0. 9 | filevers=(1, 3, 0, 0), 10 | prodvers=(1, 3, 0, 0), 11 | # Contains a bitmask that specifies the valid bits 'flags'r 12 | mask=0x0, 13 | # Contains a bitmask that specifies the Boolean attributes of the file. 14 | flags=0x0, 15 | # The operating system for which this file was designed. 16 | # 0x4 - NT and there is no need to change it. 17 | OS=0x4, 18 | # The general type of file. 19 | # 0x1 - the file is an application. 20 | fileType=0x2, 21 | # The function of the file. 22 | # 0x0 - the function is not defined for this fileType 23 | subtype=0x0, 24 | # Creation date and time stamp. 25 | date=(0, 0) 26 | ), 27 | kids=[ 28 | StringFileInfo( 29 | [ 30 | StringTable( 31 | '000904b0', 32 | [StringStruct('CompanyName', 'OpenVPI Team'), 33 | StringStruct('FileDescription', 'Audio Slicer'), 34 | StringStruct('FileVersion', '1.3.0.0'), 35 | StringStruct('InternalName', 'slicer-gui.exe'), 36 | StringStruct('LegalCopyright', 'Copyright 2020-2024 OpenVPI Team'), 37 | StringStruct('OriginalFilename', 'slicer-gui.exe'), 38 | StringStruct('ProductName', 'Audio Slicer'), 39 | StringStruct('ProductVersion', '1.3.0.0')]) 40 | ]), 41 | VarFileInfo([VarStruct('Translation', [9, 1200])]) 42 | ] 43 | ) -------------------------------------------------------------------------------- /gui/Ui_MainWindow.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ################################################################################ 4 | ## Form generated from reading UI file 'ui_mainwindow.ui' 5 | ## 6 | ## Created by: Qt User Interface Compiler version 6.7.2 7 | ## 8 | ## WARNING! All changes made in this file will be lost when recompiling UI file! 9 | ################################################################################ 10 | 11 | from PySide6.QtCore import (QCoreApplication, QDate, QDateTime, QLocale, 12 | QMetaObject, QObject, QPoint, QRect, 13 | QSize, QTime, QUrl, Qt) 14 | from PySide6.QtGui import (QBrush, QColor, QConicalGradient, QCursor, 15 | QFont, QFontDatabase, QGradient, QIcon, 16 | QImage, QKeySequence, QLinearGradient, QPainter, 17 | QPalette, QPixmap, QRadialGradient, QTransform) 18 | from PySide6.QtWidgets import (QApplication, QButtonGroup, QFormLayout, QFrame, 19 | QGroupBox, QHBoxLayout, QLabel, QLineEdit, 20 | QListWidget, QListWidgetItem, QMainWindow, QProgressBar, 21 | QPushButton, QRadioButton, QSizePolicy, QSpacerItem, 22 | QVBoxLayout, QWidget) 23 | 24 | class Ui_MainWindow(object): 25 | def setupUi(self, MainWindow): 26 | if not MainWindow.objectName(): 27 | MainWindow.setObjectName(u"MainWindow") 28 | MainWindow.resize(768, 480) 29 | font = QFont() 30 | font.setFamilies([u"Microsoft YaHei UI"]) 31 | MainWindow.setFont(font) 32 | self.centralwidget = QWidget(MainWindow) 33 | self.centralwidget.setObjectName(u"centralwidget") 34 | self.verticalLayout = QVBoxLayout(self.centralwidget) 35 | self.verticalLayout.setObjectName(u"verticalLayout") 36 | self.horizontalLayout_2 = QHBoxLayout() 37 | self.horizontalLayout_2.setObjectName(u"horizontalLayout_2") 38 | self.pushButtonAddFiles = QPushButton(self.centralwidget) 39 | self.pushButtonAddFiles.setObjectName(u"pushButtonAddFiles") 40 | self.pushButtonAddFiles.setEnabled(True) 41 | sizePolicy = QSizePolicy(QSizePolicy.Policy.Fixed, QSizePolicy.Policy.Fixed) 42 | sizePolicy.setHorizontalStretch(0) 43 | sizePolicy.setVerticalStretch(0) 44 | sizePolicy.setHeightForWidth(self.pushButtonAddFiles.sizePolicy().hasHeightForWidth()) 45 | self.pushButtonAddFiles.setSizePolicy(sizePolicy) 46 | 47 | self.horizontalLayout_2.addWidget(self.pushButtonAddFiles) 48 | 49 | self.horizontalSpacer = QSpacerItem(40, 20, QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Minimum) 50 | 51 | self.horizontalLayout_2.addItem(self.horizontalSpacer) 52 | 53 | 54 | self.verticalLayout.addLayout(self.horizontalLayout_2) 55 | 56 | self.horizontalLayout = QHBoxLayout() 57 | self.horizontalLayout.setObjectName(u"horizontalLayout") 58 | self.groupBox = QGroupBox(self.centralwidget) 59 | self.groupBox.setObjectName(u"groupBox") 60 | sizePolicy1 = QSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Minimum) 61 | sizePolicy1.setHorizontalStretch(0) 62 | sizePolicy1.setVerticalStretch(0) 63 | sizePolicy1.setHeightForWidth(self.groupBox.sizePolicy().hasHeightForWidth()) 64 | self.groupBox.setSizePolicy(sizePolicy1) 65 | self.verticalLayout_2 = QVBoxLayout(self.groupBox) 66 | self.verticalLayout_2.setObjectName(u"verticalLayout_2") 67 | self.listWidgetTaskList = QListWidget(self.groupBox) 68 | self.listWidgetTaskList.setObjectName(u"listWidgetTaskList") 69 | self.listWidgetTaskList.setFrameShadow(QFrame.Shadow.Plain) 70 | 71 | self.verticalLayout_2.addWidget(self.listWidgetTaskList) 72 | 73 | self.pushButtonClearList = QPushButton(self.groupBox) 74 | self.pushButtonClearList.setObjectName(u"pushButtonClearList") 75 | 76 | self.verticalLayout_2.addWidget(self.pushButtonClearList) 77 | 78 | 79 | self.horizontalLayout.addWidget(self.groupBox) 80 | 81 | self.groupBox_2 = QGroupBox(self.centralwidget) 82 | self.groupBox_2.setObjectName(u"groupBox_2") 83 | sizePolicy1.setHeightForWidth(self.groupBox_2.sizePolicy().hasHeightForWidth()) 84 | self.groupBox_2.setSizePolicy(sizePolicy1) 85 | self.verticalLayout_3 = QVBoxLayout(self.groupBox_2) 86 | self.verticalLayout_3.setObjectName(u"verticalLayout_3") 87 | self.formLayout = QFormLayout() 88 | self.formLayout.setObjectName(u"formLayout") 89 | self.label_2 = QLabel(self.groupBox_2) 90 | self.label_2.setObjectName(u"label_2") 91 | 92 | self.formLayout.setWidget(0, QFormLayout.LabelRole, self.label_2) 93 | 94 | self.lineEditThreshold = QLineEdit(self.groupBox_2) 95 | self.lineEditThreshold.setObjectName(u"lineEditThreshold") 96 | self.lineEditThreshold.setAlignment(Qt.AlignmentFlag.AlignRight|Qt.AlignmentFlag.AlignTrailing|Qt.AlignmentFlag.AlignVCenter) 97 | 98 | self.formLayout.setWidget(0, QFormLayout.FieldRole, self.lineEditThreshold) 99 | 100 | self.label_3 = QLabel(self.groupBox_2) 101 | self.label_3.setObjectName(u"label_3") 102 | 103 | self.formLayout.setWidget(1, QFormLayout.LabelRole, self.label_3) 104 | 105 | self.lineEditMinLen = QLineEdit(self.groupBox_2) 106 | self.lineEditMinLen.setObjectName(u"lineEditMinLen") 107 | self.lineEditMinLen.setAlignment(Qt.AlignmentFlag.AlignRight|Qt.AlignmentFlag.AlignTrailing|Qt.AlignmentFlag.AlignVCenter) 108 | 109 | self.formLayout.setWidget(1, QFormLayout.FieldRole, self.lineEditMinLen) 110 | 111 | self.label_4 = QLabel(self.groupBox_2) 112 | self.label_4.setObjectName(u"label_4") 113 | 114 | self.formLayout.setWidget(2, QFormLayout.LabelRole, self.label_4) 115 | 116 | self.lineEditMinInterval = QLineEdit(self.groupBox_2) 117 | self.lineEditMinInterval.setObjectName(u"lineEditMinInterval") 118 | self.lineEditMinInterval.setAlignment(Qt.AlignmentFlag.AlignRight|Qt.AlignmentFlag.AlignTrailing|Qt.AlignmentFlag.AlignVCenter) 119 | 120 | self.formLayout.setWidget(2, QFormLayout.FieldRole, self.lineEditMinInterval) 121 | 122 | self.label_5 = QLabel(self.groupBox_2) 123 | self.label_5.setObjectName(u"label_5") 124 | 125 | self.formLayout.setWidget(3, QFormLayout.LabelRole, self.label_5) 126 | 127 | self.lineEditHopSize = QLineEdit(self.groupBox_2) 128 | self.lineEditHopSize.setObjectName(u"lineEditHopSize") 129 | self.lineEditHopSize.setAlignment(Qt.AlignmentFlag.AlignRight|Qt.AlignmentFlag.AlignTrailing|Qt.AlignmentFlag.AlignVCenter) 130 | 131 | self.formLayout.setWidget(3, QFormLayout.FieldRole, self.lineEditHopSize) 132 | 133 | self.label_6 = QLabel(self.groupBox_2) 134 | self.label_6.setObjectName(u"label_6") 135 | 136 | self.formLayout.setWidget(4, QFormLayout.LabelRole, self.label_6) 137 | 138 | self.lineEditMaxSilence = QLineEdit(self.groupBox_2) 139 | self.lineEditMaxSilence.setObjectName(u"lineEditMaxSilence") 140 | self.lineEditMaxSilence.setAlignment(Qt.AlignmentFlag.AlignRight|Qt.AlignmentFlag.AlignTrailing|Qt.AlignmentFlag.AlignVCenter) 141 | 142 | self.formLayout.setWidget(4, QFormLayout.FieldRole, self.lineEditMaxSilence) 143 | 144 | 145 | self.verticalLayout_3.addLayout(self.formLayout) 146 | 147 | self.label_7 = QLabel(self.groupBox_2) 148 | self.label_7.setObjectName(u"label_7") 149 | 150 | self.verticalLayout_3.addWidget(self.label_7) 151 | 152 | self.horizontalLayout_4 = QHBoxLayout() 153 | self.horizontalLayout_4.setObjectName(u"horizontalLayout_4") 154 | self.lineEditOutputDir = QLineEdit(self.groupBox_2) 155 | self.lineEditOutputDir.setObjectName(u"lineEditOutputDir") 156 | self.lineEditOutputDir.setAlignment(Qt.AlignmentFlag.AlignLeading|Qt.AlignmentFlag.AlignLeft|Qt.AlignmentFlag.AlignVCenter) 157 | 158 | self.horizontalLayout_4.addWidget(self.lineEditOutputDir) 159 | 160 | self.pushButtonBrowse = QPushButton(self.groupBox_2) 161 | self.pushButtonBrowse.setObjectName(u"pushButtonBrowse") 162 | 163 | self.horizontalLayout_4.addWidget(self.pushButtonBrowse) 164 | 165 | 166 | self.verticalLayout_3.addLayout(self.horizontalLayout_4) 167 | 168 | self.horizontalLayout_5 = QHBoxLayout() 169 | self.horizontalLayout_5.setObjectName(u"horizontalLayout_5") 170 | self.label = QLabel(self.groupBox_2) 171 | self.label.setObjectName(u"label") 172 | 173 | self.horizontalLayout_5.addWidget(self.label) 174 | 175 | self.radioButtonWav = QRadioButton(self.groupBox_2) 176 | self.buttonGroup = QButtonGroup(MainWindow) 177 | self.buttonGroup.setObjectName(u"buttonGroup") 178 | self.buttonGroup.addButton(self.radioButtonWav) 179 | self.radioButtonWav.setObjectName(u"radioButtonWav") 180 | self.radioButtonWav.setText(u"wav") 181 | self.radioButtonWav.setChecked(True) 182 | 183 | self.horizontalLayout_5.addWidget(self.radioButtonWav, 0, Qt.AlignmentFlag.AlignHCenter) 184 | 185 | self.radioButtonFlac = QRadioButton(self.groupBox_2) 186 | self.buttonGroup.addButton(self.radioButtonFlac) 187 | self.radioButtonFlac.setObjectName(u"radioButtonFlac") 188 | self.radioButtonFlac.setText(u"flac") 189 | 190 | self.horizontalLayout_5.addWidget(self.radioButtonFlac, 0, Qt.AlignmentFlag.AlignHCenter) 191 | 192 | self.radioButtonMp3 = QRadioButton(self.groupBox_2) 193 | self.buttonGroup.addButton(self.radioButtonMp3) 194 | self.radioButtonMp3.setObjectName(u"radioButtonMp3") 195 | self.radioButtonMp3.setEnabled(True) 196 | self.radioButtonMp3.setText(u"mp3") 197 | 198 | self.horizontalLayout_5.addWidget(self.radioButtonMp3, 0, Qt.AlignmentFlag.AlignHCenter) 199 | 200 | 201 | self.verticalLayout_3.addLayout(self.horizontalLayout_5) 202 | 203 | self.verticalSpacer = QSpacerItem(20, 40, QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Expanding) 204 | 205 | self.verticalLayout_3.addItem(self.verticalSpacer) 206 | 207 | 208 | self.horizontalLayout.addWidget(self.groupBox_2) 209 | 210 | 211 | self.verticalLayout.addLayout(self.horizontalLayout) 212 | 213 | self.horizontalLayout_3 = QHBoxLayout() 214 | self.horizontalLayout_3.setObjectName(u"horizontalLayout_3") 215 | self.pushButtonAbout = QPushButton(self.centralwidget) 216 | self.pushButtonAbout.setObjectName(u"pushButtonAbout") 217 | 218 | self.horizontalLayout_3.addWidget(self.pushButtonAbout) 219 | 220 | self.progressBar = QProgressBar(self.centralwidget) 221 | self.progressBar.setObjectName(u"progressBar") 222 | self.progressBar.setValue(0) 223 | 224 | self.horizontalLayout_3.addWidget(self.progressBar) 225 | 226 | self.pushButtonStart = QPushButton(self.centralwidget) 227 | self.pushButtonStart.setObjectName(u"pushButtonStart") 228 | 229 | self.horizontalLayout_3.addWidget(self.pushButtonStart) 230 | 231 | 232 | self.verticalLayout.addLayout(self.horizontalLayout_3) 233 | 234 | MainWindow.setCentralWidget(self.centralwidget) 235 | 236 | self.retranslateUi(MainWindow) 237 | 238 | QMetaObject.connectSlotsByName(MainWindow) 239 | # setupUi 240 | 241 | def retranslateUi(self, MainWindow): 242 | MainWindow.setWindowTitle(QCoreApplication.translate("MainWindow", u"MainWindow", None)) 243 | self.pushButtonAddFiles.setText(QCoreApplication.translate("MainWindow", u"Add Audio Files...", None)) 244 | self.groupBox.setTitle(QCoreApplication.translate("MainWindow", u"Task List", None)) 245 | self.pushButtonClearList.setText(QCoreApplication.translate("MainWindow", u"Clear List", None)) 246 | self.groupBox_2.setTitle(QCoreApplication.translate("MainWindow", u"Settings", None)) 247 | self.label_2.setText(QCoreApplication.translate("MainWindow", u"Threshold (dB)", None)) 248 | self.lineEditThreshold.setText(QCoreApplication.translate("MainWindow", u"-40", None)) 249 | self.label_3.setText(QCoreApplication.translate("MainWindow", u"Minimum Length (ms)", None)) 250 | self.lineEditMinLen.setText(QCoreApplication.translate("MainWindow", u"5000", None)) 251 | self.label_4.setText(QCoreApplication.translate("MainWindow", u"Minimum Interval (ms)", None)) 252 | self.lineEditMinInterval.setText(QCoreApplication.translate("MainWindow", u"300", None)) 253 | self.label_5.setText(QCoreApplication.translate("MainWindow", u"Hop Size (ms)", None)) 254 | self.lineEditHopSize.setText(QCoreApplication.translate("MainWindow", u"10", None)) 255 | self.label_6.setText(QCoreApplication.translate("MainWindow", u"Maximum Silence Length (ms)", None)) 256 | self.lineEditMaxSilence.setText(QCoreApplication.translate("MainWindow", u"1000", None)) 257 | self.label_7.setText(QCoreApplication.translate("MainWindow", u"Output Directory (default to the same as the audio)", None)) 258 | self.lineEditOutputDir.setText("") 259 | self.pushButtonBrowse.setText(QCoreApplication.translate("MainWindow", u"Browse...", None)) 260 | self.label.setText(QCoreApplication.translate("MainWindow", u"Output Format", None)) 261 | self.pushButtonAbout.setText(QCoreApplication.translate("MainWindow", u"About", None)) 262 | self.pushButtonStart.setText(QCoreApplication.translate("MainWindow", u"Start", None)) 263 | # retranslateUi 264 | 265 | -------------------------------------------------------------------------------- /gui/mainwindow.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import soundfile 4 | import numpy as np 5 | import urllib 6 | 7 | from typing import List 8 | from PySide6.QtCore import * 9 | from PySide6.QtWidgets import * 10 | from PySide6.QtGui import * 11 | from slicer2 import Slicer 12 | 13 | from gui.Ui_MainWindow import Ui_MainWindow 14 | 15 | 16 | class MainWindow(QMainWindow): 17 | def __init__(self): 18 | super(MainWindow, self).__init__() 19 | 20 | self.ui = Ui_MainWindow() 21 | self.ui.setupUi(self) 22 | 23 | self.ui.pushButtonAddFiles.clicked.connect(self._q_add_audio_files) 24 | self.ui.pushButtonBrowse.clicked.connect(self._q_browse_output_dir) 25 | self.ui.pushButtonClearList.clicked.connect(self._q_clear_audio_list) 26 | self.ui.pushButtonAbout.clicked.connect(self._q_about) 27 | self.ui.pushButtonStart.clicked.connect(self._q_start) 28 | 29 | self.ui.progressBar.setMinimum(0) 30 | self.ui.progressBar.setMaximum(100) 31 | self.ui.progressBar.setValue(0) 32 | self.ui.pushButtonStart.setDefault(True) 33 | 34 | validator = QRegularExpressionValidator(QRegularExpression(r"\d+")) 35 | self.ui.lineEditThreshold.setValidator(QDoubleValidator()) 36 | self.ui.lineEditMinLen.setValidator(validator) 37 | self.ui.lineEditMinInterval.setValidator(validator) 38 | self.ui.lineEditHopSize.setValidator(validator) 39 | self.ui.lineEditMaxSilence.setValidator(validator) 40 | 41 | self.ui.listWidgetTaskList.setAlternatingRowColors(True) 42 | 43 | # State variables 44 | self.workers: list[QThread] = [] 45 | self.workCount = 0 46 | self.workFinished = 0 47 | self.processing = False 48 | 49 | self.setWindowTitle(QApplication.applicationName()) 50 | 51 | # Must set to accept drag and drop events 52 | self.setAcceptDrops(True) 53 | 54 | # Get available formats/extensions supported 55 | self.availableFormats = [str(formatExt).lower( 56 | ) for formatExt in soundfile.available_formats().keys()] 57 | # libsndfile supports Opus in Ogg container 58 | # .opus is a valid extension and recommended for Ogg Opus (see RFC 7845, Section 9) 59 | # append opus for convenience as tools like youtube-dl(p) extract to .opus by default 60 | self.availableFormats.append("opus") 61 | 62 | self.formatAllFilter = " ".join( 63 | [f"*.{formatExt}" for formatExt in self.availableFormats]) 64 | self.formatIndividualFilter = ";;".join( 65 | [f"{formatExt} (*.{formatExt})" for formatExt in sorted(self.availableFormats)]) 66 | 67 | def _q_browse_output_dir(self): 68 | path = QFileDialog.getExistingDirectory( 69 | self, "Browse Output Directory", ".") 70 | if path != "": 71 | self.ui.lineEditOutputDir.setText(QDir.toNativeSeparators(path)) 72 | 73 | def _q_add_audio_files(self): 74 | if self.processing: 75 | self.warningProcessNotFinished() 76 | return 77 | 78 | paths, _ = QFileDialog.getOpenFileNames( 79 | self, 'Select Audio Files', ".", f'Audio ({self.formatAllFilter});;{self.formatIndividualFilter}') 80 | for path in paths: 81 | item = QListWidgetItem() 82 | item.setSizeHint(QSize(200, 24)) 83 | item.setText(QFileInfo(path).fileName()) 84 | # Save full path at custom role 85 | item.setData(Qt.ItemDataRole.UserRole + 1, path) 86 | self.ui.listWidgetTaskList.addItem(item) 87 | 88 | def _q_clear_audio_list(self): 89 | if self.processing: 90 | self.warningProcessNotFinished() 91 | return 92 | 93 | self.ui.listWidgetTaskList.clear() 94 | 95 | def _q_about(self): 96 | QMessageBox.information( 97 | self, "About", "Audio Slicer v1.3.0\nCopyright 2020-2024 OpenVPI Team") 98 | 99 | def _q_start(self): 100 | if self.processing: 101 | self.warningProcessNotFinished() 102 | return 103 | 104 | item_count = self.ui.listWidgetTaskList.count() 105 | if item_count == 0: 106 | return 107 | 108 | output_format = self.ui.buttonGroup.checkedButton().text() 109 | if output_format == "mp3": 110 | ret = QMessageBox.warning(self, "Warning", 111 | "MP3 is not recommended for saving vocals as it is lossy. " 112 | "If you want to save disk space, consider using FLAC instead. " 113 | "Do you want to continue?", 114 | QMessageBox.Ok | QMessageBox.Cancel, QMessageBox.Cancel) 115 | if ret == QMessageBox.Cancel: 116 | return 117 | 118 | class WorkThread(QThread): 119 | oneFinished = Signal() 120 | 121 | def __init__(self, filenames: List[str], window: MainWindow): 122 | super().__init__() 123 | 124 | self.filenames = filenames 125 | self.win = window 126 | 127 | def run(self): 128 | for filename in self.filenames: 129 | audio, sr = soundfile.read(filename, dtype=np.float32) 130 | is_mono = True 131 | if len(audio.shape) > 1: 132 | is_mono = False 133 | audio = audio.T 134 | slicer = Slicer( 135 | sr=sr, 136 | threshold=float(self.win.ui.lineEditThreshold.text()), 137 | min_length=int(self.win.ui.lineEditMinLen.text()), 138 | min_interval=int( 139 | self.win.ui.lineEditMinInterval.text()), 140 | hop_size=int(self.win.ui.lineEditHopSize.text()), 141 | max_sil_kept=int(self.win.ui.lineEditMaxSilence.text()) 142 | ) 143 | chunks = slicer.slice(audio) 144 | out_dir = self.win.ui.lineEditOutputDir.text() 145 | if out_dir == '': 146 | out_dir = os.path.dirname(os.path.abspath(filename)) 147 | else: 148 | # Make dir if not exists 149 | info = QDir(out_dir) 150 | if not info.exists(): 151 | info.mkpath(out_dir) 152 | 153 | ext = self.win.ui.buttonGroup.checkedButton().text() 154 | for i, chunk in enumerate(chunks): 155 | path = os.path.join(out_dir, f'%s_%d.{ext}' % (os.path.basename(filename) 156 | .rsplit('.', maxsplit=1)[0], i)) 157 | if not is_mono: 158 | chunk = chunk.T 159 | soundfile.write(path, chunk, sr) 160 | 161 | self.oneFinished.emit() 162 | 163 | # Collect paths 164 | paths: list[str] = [] 165 | for i in range(0, item_count): 166 | item = self.ui.listWidgetTaskList.item(i) 167 | path = item.data(Qt.ItemDataRole.UserRole + 1) # Get full path 168 | paths.append(path) 169 | 170 | self.ui.progressBar.setMaximum(item_count) 171 | self.ui.progressBar.setValue(0) 172 | 173 | self.workCount = item_count 174 | self.workFinished = 0 175 | self.setProcessing(True) 176 | 177 | # Start work thread 178 | worker = WorkThread(paths, self) 179 | worker.oneFinished.connect(self._q_oneFinished) 180 | worker.finished.connect(self._q_threadFinished) 181 | worker.start() 182 | 183 | self.workers.append(worker) # Collect in case of auto deletion 184 | 185 | def _q_oneFinished(self): 186 | self.workFinished += 1 187 | self.ui.progressBar.setValue(self.workFinished) 188 | 189 | def _q_threadFinished(self): 190 | # Join all workers 191 | for worker in self.workers: 192 | worker.wait() 193 | self.workers.clear() 194 | self.setProcessing(False) 195 | 196 | QMessageBox.information( 197 | self, QApplication.applicationName(), "Slicing complete!") 198 | 199 | def warningProcessNotFinished(self): 200 | QMessageBox.warning(self, QApplication.applicationName(), 201 | "Please wait for slicing to complete!") 202 | 203 | def setProcessing(self, processing: bool): 204 | enabled = not processing 205 | self.ui.pushButtonStart.setText( 206 | "Slicing..." if processing else "Start") 207 | self.ui.pushButtonStart.setEnabled(enabled) 208 | self.ui.pushButtonAddFiles.setEnabled(enabled) 209 | self.ui.listWidgetTaskList.setEnabled(enabled) 210 | self.ui.pushButtonClearList.setEnabled(enabled) 211 | self.ui.lineEditThreshold.setEnabled(enabled) 212 | self.ui.lineEditMinLen.setEnabled(enabled) 213 | self.ui.lineEditMinInterval.setEnabled(enabled) 214 | self.ui.lineEditHopSize.setEnabled(enabled) 215 | self.ui.lineEditMaxSilence.setEnabled(enabled) 216 | self.ui.lineEditOutputDir.setEnabled(enabled) 217 | self.ui.pushButtonBrowse.setEnabled(enabled) 218 | self.processing = processing 219 | 220 | # Event Handlers 221 | def closeEvent(self, event): 222 | if self.processing: 223 | self.warningProcessNotFinished() 224 | event.ignore() 225 | 226 | def dragEnterEvent(self, event): 227 | urls = event.mimeData().urls() 228 | valid = False 229 | for url in urls: 230 | if not url.isLocalFile(): 231 | continue 232 | path = url.toLocalFile() 233 | ext = os.path.splitext(path)[1] 234 | if ext[1:].lower() in self.availableFormats: 235 | valid = True 236 | break 237 | if valid: 238 | event.accept() 239 | 240 | def dropEvent(self, event): 241 | urls = event.mimeData().urls() 242 | for url in urls: 243 | if not url.isLocalFile(): 244 | continue 245 | path = url.toLocalFile() 246 | ext = os.path.splitext(path)[1] 247 | if ext[1:].lower() not in self.availableFormats: 248 | continue 249 | item = QListWidgetItem() 250 | item.setSizeHint(QSize(200, 24)) 251 | item.setText(QFileInfo(path).fileName()) 252 | item.setData(Qt.ItemDataRole.UserRole + 1, 253 | path) 254 | self.ui.listWidgetTaskList.addItem(item) 255 | -------------------------------------------------------------------------------- /gui/ui_mainwindow.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | MainWindow 4 | 5 | 6 | 7 | 0 8 | 0 9 | 768 10 | 480 11 | 12 | 13 | 14 | 15 | Microsoft YaHei UI 16 | 17 | 18 | 19 | MainWindow 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | true 29 | 30 | 31 | 32 | 0 33 | 0 34 | 35 | 36 | 37 | Add Audio Files... 38 | 39 | 40 | 41 | 42 | 43 | 44 | Qt::Orientation::Horizontal 45 | 46 | 47 | 48 | 40 49 | 20 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 0 63 | 0 64 | 65 | 66 | 67 | Task List 68 | 69 | 70 | 71 | 72 | 73 | QFrame::Shadow::Plain 74 | 75 | 76 | 77 | 78 | 79 | 80 | Clear List 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 0 92 | 0 93 | 94 | 95 | 96 | Settings 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | Threshold (dB) 105 | 106 | 107 | 108 | 109 | 110 | 111 | -40 112 | 113 | 114 | Qt::AlignmentFlag::AlignRight|Qt::AlignmentFlag::AlignTrailing|Qt::AlignmentFlag::AlignVCenter 115 | 116 | 117 | 118 | 119 | 120 | 121 | Minimum Length (ms) 122 | 123 | 124 | 125 | 126 | 127 | 128 | 5000 129 | 130 | 131 | Qt::AlignmentFlag::AlignRight|Qt::AlignmentFlag::AlignTrailing|Qt::AlignmentFlag::AlignVCenter 132 | 133 | 134 | 135 | 136 | 137 | 138 | Minimum Interval (ms) 139 | 140 | 141 | 142 | 143 | 144 | 145 | 300 146 | 147 | 148 | Qt::AlignmentFlag::AlignRight|Qt::AlignmentFlag::AlignTrailing|Qt::AlignmentFlag::AlignVCenter 149 | 150 | 151 | 152 | 153 | 154 | 155 | Hop Size (ms) 156 | 157 | 158 | 159 | 160 | 161 | 162 | 10 163 | 164 | 165 | Qt::AlignmentFlag::AlignRight|Qt::AlignmentFlag::AlignTrailing|Qt::AlignmentFlag::AlignVCenter 166 | 167 | 168 | 169 | 170 | 171 | 172 | Maximum Silence Length (ms) 173 | 174 | 175 | 176 | 177 | 178 | 179 | 1000 180 | 181 | 182 | Qt::AlignmentFlag::AlignRight|Qt::AlignmentFlag::AlignTrailing|Qt::AlignmentFlag::AlignVCenter 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | Output Directory (default to the same as the audio) 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignVCenter 204 | 205 | 206 | 207 | 208 | 209 | 210 | Browse... 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | Output Format 222 | 223 | 224 | 225 | 226 | 227 | 228 | wav 229 | 230 | 231 | true 232 | 233 | 234 | buttonGroup 235 | 236 | 237 | 238 | 239 | 240 | 241 | flac 242 | 243 | 244 | buttonGroup 245 | 246 | 247 | 248 | 249 | 250 | 251 | true 252 | 253 | 254 | mp3 255 | 256 | 257 | buttonGroup 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | Qt::Orientation::Vertical 267 | 268 | 269 | 270 | 20 271 | 40 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | About 287 | 288 | 289 | 290 | 291 | 292 | 293 | 0 294 | 295 | 296 | 297 | 298 | 299 | 300 | Start 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | -------------------------------------------------------------------------------- /pack-gui.ps1: -------------------------------------------------------------------------------- 1 | # Activate virtual environment 2 | & ./venv/Scripts/Activate.ps1 3 | 4 | # Run PyInstaller 5 | pyinstaller --onedir --noconsole --version-file file_version_info.txt slicer-gui.py --noconfirm 6 | 7 | # Remove useless components 8 | Remove-Item ./dist/slicer-gui/PySide6/opengl32sw.dll 9 | Remove-Item ./dist/slicer-gui/PySide6/Qt6Quick.dll 10 | Remove-Item ./dist/slicer-gui/PySide6/Qt6Pdf.dll 11 | Remove-Item ./dist/slicer-gui/PySide6/Qt6Qml.dll 12 | Remove-Item ./dist/slicer-gui/PySide6/Qt6OpenGL.dll 13 | Remove-Item ./dist/slicer-gui/PySide6/Qt6Network.dll 14 | Remove-Item ./dist/slicer-gui/PySide6/QtNetwork.pyd 15 | Remove-Item ./dist/slicer-gui/PySide6/Qt6QmlModels.dll 16 | Remove-Item ./dist/slicer-gui/PySide6/Qt6VirtualKeyboard.dll 17 | Remove-Item -Path ./dist/slicer-gui/PySide6/translations -Recurse 18 | 19 | # Compress files 20 | Compress-Archive -Path .\dist\slicer-gui -DestinationPath .\dist\slicer-gui-windows.zip -Force -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flutydeer/audio-slicer/a455f1a263d1e2278dd6e05d2f78ce0fcb50b6b6/requirements.txt -------------------------------------------------------------------------------- /screenshots/screenshot_dark.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flutydeer/audio-slicer/a455f1a263d1e2278dd6e05d2f78ce0fcb50b6b6/screenshots/screenshot_dark.jpg -------------------------------------------------------------------------------- /screenshots/screenshot_light.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flutydeer/audio-slicer/a455f1a263d1e2278dd6e05d2f78ce0fcb50b6b6/screenshots/screenshot_light.jpg -------------------------------------------------------------------------------- /slicer-gui.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import datetime 4 | import qdarktheme 5 | from PySide6.QtWidgets import QApplication, QStyleFactory 6 | from PySide6.QtGui import QFont 7 | 8 | import gui.mainwindow 9 | 10 | if __name__ == '__main__': 11 | # Write console outputs to log file. 12 | __stderr__ = sys.stderr 13 | date_time = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') 14 | folder = os.path.exists('log') 15 | if not folder: 16 | os.makedirs('log') 17 | sys.stderr = open(f'log/log {date_time}.txt', 'w') 18 | 19 | app = QApplication(sys.argv) 20 | app.setApplicationName("Audio Slicer") 21 | app.setApplicationDisplayName("Audio Slicer") 22 | 23 | # Apply auto dark theme 24 | qdarktheme.setup_theme( 25 | theme="auto", 26 | # custom_colors={ 27 | # "[dark]": { 28 | # "primary": "#8dc8d1", 29 | # }, 30 | # "[light]": { 31 | # "primary": "#3b7d92", 32 | # } 33 | # } 34 | ) 35 | 36 | # Auto dark title bar on Windows 10/11 37 | style = QStyleFactory.create("fusion") 38 | app.setStyle(style) 39 | 40 | font = QFont() 41 | # font.setPixelSize(12) 42 | font.setHintingPreference(QFont.PreferNoHinting) 43 | app.setFont(font) 44 | 45 | window = gui.mainwindow.MainWindow() 46 | window.show() 47 | 48 | sys.exit(app.exec()) 49 | -------------------------------------------------------------------------------- /slicer.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from argparse import ArgumentParser 3 | import time 4 | 5 | import librosa 6 | import numpy as np 7 | import soundfile 8 | from scipy.ndimage import maximum_filter1d, uniform_filter1d 9 | 10 | 11 | def timeit(func): 12 | def run(*args, **kwargs): 13 | t = time.time() 14 | res = func(*args, **kwargs) 15 | print('executing \'%s\' cost %.3fs' % (func.__name__, time.time() - t)) 16 | return res 17 | return run 18 | 19 | 20 | # @timeit 21 | def _window_maximum(arr, win_sz): 22 | return maximum_filter1d(arr, size=win_sz)[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1] 23 | 24 | 25 | # @timeit 26 | def _window_rms(arr, win_sz): 27 | filtered = np.sqrt(uniform_filter1d(np.power(arr, 2), win_sz) - np.power(uniform_filter1d(arr, win_sz), 2)) 28 | return filtered[win_sz // 2: win_sz // 2 + arr.shape[0] - win_sz + 1] 29 | 30 | 31 | def level2db(levels, eps=1e-12): 32 | return 20 * np.log10(np.clip(levels, a_min=eps, a_max=1)) 33 | 34 | 35 | def _apply_slice(audio, begin, end): 36 | if len(audio.shape) > 1: 37 | return audio[:, begin: end] 38 | else: 39 | return audio[begin: end] 40 | 41 | 42 | class Slicer: 43 | def __init__(self, 44 | sr: int, 45 | db_threshold: float = -40, 46 | min_length: int = 5000, 47 | win_l: int = 300, 48 | win_s: int = 20, 49 | max_silence_kept: int = 500): 50 | self.db_threshold = db_threshold 51 | self.min_samples = round(sr * min_length / 1000) 52 | self.win_ln = round(sr * win_l / 1000) 53 | self.win_sn = round(sr * win_s / 1000) 54 | self.max_silence = round(sr * max_silence_kept / 1000) 55 | if not self.min_samples >= self.win_ln >= self.win_sn: 56 | raise ValueError('The following condition must be satisfied: min_length >= win_l >= win_s') 57 | if not self.max_silence >= self.win_sn: 58 | raise ValueError('The following condition must be satisfied: max_silence_kept >= win_s') 59 | 60 | @timeit 61 | def slice(self, audio): 62 | if len(audio.shape) > 1: 63 | samples = librosa.to_mono(audio) 64 | else: 65 | samples = audio 66 | if samples.shape[0] <= self.min_samples: 67 | return [audio] 68 | # get absolute amplitudes 69 | abs_amp = np.abs(samples - np.mean(samples)) 70 | # calculate local maximum with large window 71 | win_max_db = level2db(_window_maximum(abs_amp, win_sz=self.win_ln)) 72 | sil_tags = [] 73 | left = right = 0 74 | while right < win_max_db.shape[0]: 75 | if win_max_db[right] < self.db_threshold: 76 | right += 1 77 | elif left == right: 78 | left += 1 79 | right += 1 80 | else: 81 | if left == 0: 82 | split_loc_l = left 83 | else: 84 | sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2) 85 | rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn)) 86 | split_win_l = left + np.argmin(rms_db_left) 87 | split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn]) 88 | if len(sil_tags) != 0 and split_loc_l - sil_tags[-1][1] < self.min_samples and right < win_max_db.shape[0] - 1: 89 | right += 1 90 | left = right 91 | continue 92 | if right == win_max_db.shape[0] - 1: 93 | split_loc_r = right + self.win_ln 94 | else: 95 | sil_right_n = min(self.max_silence, (right + self.win_ln - left) // 2) 96 | rms_db_right = level2db(_window_rms(samples[right + self.win_ln - sil_right_n: right + self.win_ln], win_sz=self.win_sn)) 97 | split_win_r = right + self.win_ln - sil_right_n + np.argmin(rms_db_right) 98 | split_loc_r = split_win_r + np.argmin(abs_amp[split_win_r: split_win_r + self.win_sn]) 99 | sil_tags.append((split_loc_l, split_loc_r)) 100 | right += 1 101 | left = right 102 | if left != right: 103 | sil_left_n = min(self.max_silence, (right + self.win_ln - left) // 2) 104 | rms_db_left = level2db(_window_rms(samples[left: left + sil_left_n], win_sz=self.win_sn)) 105 | split_win_l = left + np.argmin(rms_db_left) 106 | split_loc_l = split_win_l + np.argmin(abs_amp[split_win_l: split_win_l + self.win_sn]) 107 | sil_tags.append((split_loc_l, samples.shape[0])) 108 | if len(sil_tags) == 0: 109 | return [audio] 110 | else: 111 | chunks = [] 112 | if sil_tags[0][0] > 0: 113 | chunks.append(_apply_slice(audio, 0, sil_tags[0][0])) 114 | for i in range(0, len(sil_tags) - 1): 115 | chunks.append(_apply_slice(audio, sil_tags[i][1], sil_tags[i + 1][0])) 116 | if sil_tags[-1][1] < samples.shape[0] - 1: 117 | chunks.append(_apply_slice(audio, sil_tags[-1][1], samples.shape[0])) 118 | return chunks 119 | 120 | 121 | def main(): 122 | parser = ArgumentParser() 123 | parser.add_argument('audio', type=str, help='The audio to be sliced') 124 | parser.add_argument('--out', type=str, help='Output directory of the sliced audio clips') 125 | parser.add_argument('--db_thresh', type=float, required=False, default=-40, help='The dB threshold for silence detection') 126 | parser.add_argument('--min_len', type=int, required=False, default=5000, help='The minimum milliseconds required for each sliced audio clip') 127 | parser.add_argument('--win_l', type=int, required=False, default=300, help='Size of the large sliding window, presented in milliseconds') 128 | parser.add_argument('--win_s', type=int, required=False, default=20, help='Size of the small sliding window, presented in milliseconds') 129 | parser.add_argument('--max_sil_kept', type=int, required=False, default=500, help='The maximum silence length kept around the sliced audio, presented in milliseconds') 130 | args = parser.parse_args() 131 | out = args.out 132 | if out is None: 133 | out = os.path.dirname(os.path.abspath(args.audio)) 134 | audio, sr = librosa.load(args.audio, sr=None) 135 | slicer = Slicer( 136 | sr=sr, 137 | db_threshold=args.db_thresh, 138 | min_length=args.min_len, 139 | win_l=args.win_l, 140 | win_s=args.win_s, 141 | max_silence_kept=args.max_sil_kept 142 | ) 143 | chunks = slicer.slice(audio) 144 | if not os.path.exists(out): 145 | os.makedirs(out) 146 | for i, chunk in enumerate(chunks): 147 | soundfile.write(os.path.join(out, f'%s_%d.wav' % (os.path.basename(args.audio).rsplit('.', maxsplit=1)[0], i)), chunk, sr) 148 | 149 | 150 | if __name__ == '__main__': 151 | main() 152 | -------------------------------------------------------------------------------- /slicer2.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from argparse import ArgumentParser 3 | 4 | import numpy as np 5 | import soundfile 6 | 7 | 8 | # This function is obtained from librosa. 9 | def get_rms( 10 | y, 11 | *, 12 | frame_length=2048, 13 | hop_length=512, 14 | pad_mode="constant", 15 | ): 16 | padding = (int(frame_length // 2), int(frame_length // 2)) 17 | y = np.pad(y, padding, mode=pad_mode) 18 | 19 | axis = -1 20 | # put our new within-frame axis at the end for now 21 | out_strides = y.strides + tuple([y.strides[axis]]) 22 | # Reduce the shape on the framing axis 23 | x_shape_trimmed = list(y.shape) 24 | x_shape_trimmed[axis] -= frame_length - 1 25 | out_shape = tuple(x_shape_trimmed) + tuple([frame_length]) 26 | xw = np.lib.stride_tricks.as_strided( 27 | y, shape=out_shape, strides=out_strides 28 | ) 29 | if axis < 0: 30 | target_axis = axis - 1 31 | else: 32 | target_axis = axis + 1 33 | xw = np.moveaxis(xw, -1, target_axis) 34 | # Downsample along the target axis 35 | slices = [slice(None)] * xw.ndim 36 | slices[axis] = slice(0, None, hop_length) 37 | x = xw[tuple(slices)] 38 | 39 | # Calculate power 40 | power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True) 41 | 42 | return np.sqrt(power) 43 | 44 | 45 | class Slicer: 46 | def __init__(self, 47 | sr: int, 48 | threshold: float = -40., 49 | min_length: int = 5000, 50 | min_interval: int = 300, 51 | hop_size: int = 20, 52 | max_sil_kept: int = 5000): 53 | if not min_length >= min_interval >= hop_size: 54 | raise ValueError('The following condition must be satisfied: min_length >= min_interval >= hop_size') 55 | if not max_sil_kept >= hop_size: 56 | raise ValueError('The following condition must be satisfied: max_sil_kept >= hop_size') 57 | min_interval = sr * min_interval / 1000 58 | self.threshold = 10 ** (threshold / 20.) 59 | self.hop_size = round(sr * hop_size / 1000) 60 | self.win_size = min(round(min_interval), 4 * self.hop_size) 61 | self.min_length = round(sr * min_length / 1000 / self.hop_size) 62 | self.min_interval = round(min_interval / self.hop_size) 63 | self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size) 64 | 65 | def _apply_slice(self, waveform, begin, end): 66 | if len(waveform.shape) > 1: 67 | return waveform[:, begin * self.hop_size: min(waveform.shape[1], end * self.hop_size)] 68 | else: 69 | return waveform[begin * self.hop_size: min(waveform.shape[0], end * self.hop_size)] 70 | 71 | # @timeit 72 | def slice(self, waveform): 73 | if len(waveform.shape) > 1: 74 | samples = waveform.mean(axis=0) 75 | else: 76 | samples = waveform 77 | if (samples.shape[0] + self.hop_size - 1) // self.hop_size <= self.min_length: 78 | return [waveform] 79 | rms_list = get_rms(y=samples, frame_length=self.win_size, hop_length=self.hop_size).squeeze(0) 80 | sil_tags = [] 81 | silence_start = None 82 | clip_start = 0 83 | for i, rms in enumerate(rms_list): 84 | # Keep looping while frame is silent. 85 | if rms < self.threshold: 86 | # Record start of silent frames. 87 | if silence_start is None: 88 | silence_start = i 89 | continue 90 | # Keep looping while frame is not silent and silence start has not been recorded. 91 | if silence_start is None: 92 | continue 93 | # Clear recorded silence start if interval is not enough or clip is too short 94 | is_leading_silence = silence_start == 0 and i > self.max_sil_kept 95 | need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length 96 | if not is_leading_silence and not need_slice_middle: 97 | silence_start = None 98 | continue 99 | # Need slicing. Record the range of silent frames to be removed. 100 | if i - silence_start <= self.max_sil_kept: 101 | pos = rms_list[silence_start: i + 1].argmin() + silence_start 102 | if silence_start == 0: 103 | sil_tags.append((0, pos)) 104 | else: 105 | sil_tags.append((pos, pos)) 106 | clip_start = pos 107 | elif i - silence_start <= self.max_sil_kept * 2: 108 | pos = rms_list[i - self.max_sil_kept: silence_start + self.max_sil_kept + 1].argmin() 109 | pos += i - self.max_sil_kept 110 | pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start 111 | pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept 112 | if silence_start == 0: 113 | sil_tags.append((0, pos_r)) 114 | clip_start = pos_r 115 | else: 116 | sil_tags.append((min(pos_l, pos), max(pos_r, pos))) 117 | clip_start = max(pos_r, pos) 118 | else: 119 | pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start 120 | pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept 121 | if silence_start == 0: 122 | sil_tags.append((0, pos_r)) 123 | else: 124 | sil_tags.append((pos_l, pos_r)) 125 | clip_start = pos_r 126 | silence_start = None 127 | # Deal with trailing silence. 128 | total_frames = rms_list.shape[0] 129 | if silence_start is not None and total_frames - silence_start >= self.min_interval: 130 | silence_end = min(total_frames, silence_start + self.max_sil_kept) 131 | pos = rms_list[silence_start: silence_end + 1].argmin() + silence_start 132 | sil_tags.append((pos, total_frames + 1)) 133 | # Apply and return slices. 134 | if len(sil_tags) == 0: 135 | return [waveform] 136 | else: 137 | chunks = [] 138 | if sil_tags[0][0] > 0: 139 | chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0])) 140 | for i in range(len(sil_tags) - 1): 141 | chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0])) 142 | if sil_tags[-1][1] < total_frames: 143 | chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames)) 144 | return chunks 145 | 146 | 147 | def main(): 148 | parser = ArgumentParser() 149 | parser.add_argument('audio', type=str, help='The audio to be sliced') 150 | parser.add_argument('--out', type=str, help='Output directory of the sliced audio clips') 151 | parser.add_argument('--db_thresh', type=float, required=False, default=-40, 152 | help='The dB threshold for silence detection') 153 | parser.add_argument('--min_length', type=int, required=False, default=5000, 154 | help='The minimum milliseconds required for each sliced audio clip') 155 | parser.add_argument('--min_interval', type=int, required=False, default=300, 156 | help='The minimum milliseconds for a silence part to be sliced') 157 | parser.add_argument('--hop_size', type=int, required=False, default=10, 158 | help='Frame length in milliseconds') 159 | parser.add_argument('--max_sil_kept', type=int, required=False, default=500, 160 | help='The maximum silence length kept around the sliced clip, presented in milliseconds') 161 | args = parser.parse_args() 162 | out = args.out 163 | if out is None: 164 | out = os.path.dirname(os.path.abspath(args.audio)) 165 | import librosa 166 | audio, sr = librosa.load(args.audio, sr=None) 167 | slicer = Slicer( 168 | sr=sr, 169 | threshold=args.db_thresh, 170 | min_length=args.min_length, 171 | min_interval=args.min_interval, 172 | hop_size=args.hop_size, 173 | max_sil_kept=args.max_sil_kept 174 | ) 175 | chunks = slicer.slice(audio) 176 | if not os.path.exists(out): 177 | os.makedirs(out) 178 | for i, chunk in enumerate(chunks): 179 | soundfile.write(os.path.join(out, f'%s_%d.wav' % (os.path.basename(args.audio).rsplit('.', maxsplit=1)[0], i)), chunk, sr) 180 | 181 | 182 | if __name__ == '__main__': 183 | main() 184 | --------------------------------------------------------------------------------