├── .gitignore ├── README.md ├── doc ├── audio │ ├── L.png │ ├── R.png │ ├── SF.png │ ├── input.png │ ├── lab_S.png │ ├── lab_S_final.png │ ├── lab_S_trans.png │ └── nc.png └── midi │ ├── L.png │ ├── R.png │ ├── SF.png │ ├── input.png │ ├── lab_S.png │ ├── lab_S_final.png │ ├── lab_S_trans.png │ └── nc.png ├── setup.py ├── sf_segmenter ├── __init__.py ├── feature.py ├── segmenter.py └── vis.py ├── test_IO.py ├── test_process.py └── testcases └── 1430.mid /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .vscode/ 3 | .ipynb_checkpoints/ 4 | .DS_Store 5 | miditoolkit.egg-info/ 6 | @eaDir 7 | *.pyc 8 | *.pypirc 9 | Thumbs.db 10 | *.gz -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sf_segmenter 2 | --- 3 | Music Segmentation/Labeling Algorithm. Based on SF (Structural feature) method. 4 | Modified from [MSAF](https://github.com/urinieto/msaf). I simplified the IO for arbitrary input features and quick experiments. 5 | 6 | ## Installation 7 | version: 0.0.1 8 | ``` 9 | pip install sf-segmenter 10 | ``` 11 | 12 | ## Reference 13 | * Serrà, J., Müller, M., Grosche, P., & Arcos, J. L. (2012). Unsupervised Detection of Music Boundaries by Time Series Structure Features. In Proc. of the 26th AAAI Conference on Artificial Intelligence (pp. 1613–1619).Toronto, Canada. 14 | 15 | * J. Serrà, M. Müller, P. Grosche and J. L. Arcos, "Unsupervised Music Structure Annotation by Time Series Structure Features and Segment Similarity," in IEEE Transactions on Multimedia, vol. 16, no. 5, pp. 1229-1240, Aug. 2014, doi: 10.1109/TMM.2014.2310701. 16 | 17 | ## Resources 18 | 19 | * audiolabs/FMP course/Chapter 4: Music Structure Analysis: https://www.audiolabs-erlangen.de/resources/MIR/FMP/C4/C4.html 20 | 21 | -------------------------------------------------------------------------------- /doc/audio/L.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/L.png -------------------------------------------------------------------------------- /doc/audio/R.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/R.png -------------------------------------------------------------------------------- /doc/audio/SF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/SF.png -------------------------------------------------------------------------------- /doc/audio/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/input.png -------------------------------------------------------------------------------- /doc/audio/lab_S.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/lab_S.png -------------------------------------------------------------------------------- /doc/audio/lab_S_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/lab_S_final.png -------------------------------------------------------------------------------- /doc/audio/lab_S_trans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/lab_S_trans.png -------------------------------------------------------------------------------- /doc/audio/nc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/nc.png -------------------------------------------------------------------------------- /doc/midi/L.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/L.png -------------------------------------------------------------------------------- /doc/midi/R.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/R.png -------------------------------------------------------------------------------- /doc/midi/SF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/SF.png -------------------------------------------------------------------------------- /doc/midi/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/input.png -------------------------------------------------------------------------------- /doc/midi/lab_S.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/lab_S.png -------------------------------------------------------------------------------- /doc/midi/lab_S_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/lab_S_final.png -------------------------------------------------------------------------------- /doc/midi/lab_S_trans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/lab_S_trans.png -------------------------------------------------------------------------------- /doc/midi/nc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/nc.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='sf_segmenter', 5 | version='0.0.2', 6 | description='', 7 | author='wayne391', 8 | author_email='s101062219@gmail.com', 9 | url='https://github.com/wayne391/sf_segmenter', 10 | packages=find_packages(), 11 | classifiers=[ 12 | "License :: OSI Approved :: MIT License", 13 | "Programming Language :: Python", 14 | ], 15 | keywords='music audio midi mir', 16 | license='MIT', 17 | install_requires=[ 18 | 'miditoolkit >= 0.1.14', 19 | ] 20 | ) 21 | 22 | 23 | """ 24 | python setup.py sdist 25 | twine upload dist/* 26 | """ -------------------------------------------------------------------------------- /sf_segmenter/__init__.py: -------------------------------------------------------------------------------- 1 | from .feature import * 2 | from .segmenter import * 3 | from .vis import * 4 | 5 | __version__ = '0.0.2' -------------------------------------------------------------------------------- /sf_segmenter/feature.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import numpy as np 3 | from miditoolkit.pianoroll import utils as mt_utils 4 | 5 | 6 | def audio_extract_pcp( 7 | audio, 8 | sr, 9 | n_fft=4096, 10 | hop_len=int(4096 * 0.75), 11 | pcp_bins=84, 12 | pcp_norm=np.inf, 13 | pcp_f_min=27.5, 14 | pcp_n_octaves=6): 15 | 16 | audio_harmonic, _ = librosa.effects.hpss(audio) 17 | pcp_cqt = np.abs(librosa.hybrid_cqt( 18 | audio_harmonic, 19 | sr=sr, 20 | hop_length=hop_len, 21 | n_bins=pcp_bins, 22 | norm=pcp_norm, 23 | fmin=pcp_f_min)) ** 2 24 | 25 | pcp = librosa.feature.chroma_cqt( 26 | C=pcp_cqt, 27 | sr=sr, 28 | hop_length=hop_len, 29 | n_octaves=pcp_n_octaves, 30 | fmin=pcp_f_min).T 31 | return pcp 32 | 33 | 34 | def midi_extract_beat_sync_pianoroll( 35 | pianoroll, 36 | beat_resol, 37 | is_tochroma=False): 38 | 39 | # sync to beat 40 | beat_sync_pr = np.zeros( 41 | (int(np.ceil(pianoroll.shape[0] / beat_resol)), 42 | pianoroll.shape[1])) 43 | 44 | for beat in range(beat_sync_pr.shape[0]): 45 | st = beat * beat_resol 46 | ed = (beat + 1) * beat_resol 47 | beat_sync_pr[beat] = np.sum(pianoroll[st:ed, :], axis=0) 48 | 49 | # normalize 50 | beat_sync_pr = ( 51 | beat_sync_pr - beat_sync_pr.mean()) / beat_sync_pr.std() 52 | beat_sync_pr = ( 53 | beat_sync_pr - beat_sync_pr.min()) / (beat_sync_pr.max() - beat_sync_pr.min()) 54 | 55 | # to chroma 56 | if is_tochroma: 57 | beat_sync_pr = mt_utils.tochroma(beat_sync_pr) 58 | return beat_sync_pr 59 | -------------------------------------------------------------------------------- /sf_segmenter/segmenter.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import soundfile as sf 3 | import numpy as np 4 | from scipy import signal 5 | from scipy.spatial import distance 6 | from scipy.ndimage import filters 7 | 8 | from .vis import plot_feats 9 | from .feature import midi_extract_beat_sync_pianoroll, audio_extract_pcp 10 | 11 | 12 | import miditoolkit 13 | from miditoolkit.midi import parser as mid_parser 14 | from miditoolkit.pianoroll import parser as pr_parser 15 | 16 | CONFIG = { 17 | "M_gaussian": 27, 18 | "m_embedded": 3, 19 | "k_nearest": 0.04, 20 | "Mp_adaptive": 28, 21 | "offset_thres": 0.05, 22 | "bound_norm_feats": np.inf # min_max, log, np.inf, 23 | # -np.inf, float >= 0, None 24 | # For framesync features 25 | # "M_gaussian" : 100, 26 | # "m_embedded" : 3, 27 | # "k_nearest" : 0.06, 28 | # "Mp_adaptive" : 100, 29 | # "offset_thres" : 0.01 30 | } 31 | 32 | 33 | def cummulative_sum_Q(R): 34 | len_x, len_y = R.shape 35 | Q = np.zeros((len_x + 2, len_y + 2)) 36 | for i in range(len_x): 37 | for j in range(len_y): 38 | Q[i+2, j+2] = max( 39 | Q[i+1, j+1], 40 | Q[i, j+1], 41 | Q[i+1, j]) + R[i, j] 42 | return np.max(Q) 43 | 44 | 45 | def normalize(X, norm_type, floor=0.0, min_db=-80): 46 | """Normalizes the given matrix of features. 47 | Parameters 48 | ---------- 49 | X: np.array 50 | Each row represents a feature vector. 51 | norm_type: {"min_max", "log", np.inf, -np.inf, 0, float > 0, None} 52 | - `"min_max"`: Min/max scaling is performed 53 | - `"log"`: Logarithmic scaling is performed 54 | - `np.inf`: Maximum absolute value 55 | - `-np.inf`: Mininum absolute value 56 | - `0`: Number of non-zeros 57 | - float: Corresponding l_p norm. 58 | - None : No normalization is performed 59 | Returns 60 | ------- 61 | norm_X: np.array 62 | Normalized `X` according the the input parameters. 63 | """ 64 | if isinstance(norm_type, str): 65 | if norm_type == "min_max": 66 | return min_max_normalize(X, floor=floor) 67 | if norm_type == "log": 68 | return lognormalize(X, floor=floor, min_db=min_db) 69 | return librosa.util.normalize(X, norm=norm_type, axis=1) 70 | 71 | 72 | def median_filter(X, M=8): 73 | """Median filter along the first axis of the feature matrix X.""" 74 | for i in range(X.shape[1]): 75 | X[:, i] = filters.median_filter(X[:, i], size=M) 76 | return X 77 | 78 | 79 | def gaussian_filter(X, M=8, axis=0): 80 | """Gaussian filter along the first axis of the feature matrix X.""" 81 | for i in range(X.shape[axis]): 82 | if axis == 1: 83 | X[:, i] = filters.gaussian_filter(X[:, i], sigma=M / 2.) 84 | elif axis == 0: 85 | X[i, :] = filters.gaussian_filter(X[i, :], sigma=M / 2.) 86 | return X 87 | 88 | 89 | def compute_gaussian_krnl(M): 90 | """Creates a gaussian kernel following Serra's paper.""" 91 | g = signal.gaussian(M, M / 3., sym=True) 92 | G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) 93 | G[M // 2:, :M // 2] = -G[M // 2:, :M // 2] 94 | G[:M // 2, M // 1:] = -G[:M // 2, M // 1:] 95 | return G 96 | 97 | 98 | def compute_nc(X): 99 | """Computes the novelty curve from the structural features.""" 100 | N = X.shape[0] 101 | # nc = np.sum(np.diff(X, axis=0), axis=1) # Difference between SF's 102 | 103 | nc = np.zeros(N) 104 | for i in range(N - 1): 105 | nc[i] = distance.euclidean(X[i, :], X[i + 1, :]) 106 | 107 | # Normalize 108 | nc += np.abs(nc.min()) 109 | nc /= float(nc.max()) 110 | return nc 111 | 112 | 113 | def pick_peaks(nc, L=16, offset_denom=0.1): 114 | """Obtain peaks from a novelty curve using an adaptive threshold.""" 115 | offset = nc.mean() * float(offset_denom) 116 | th = filters.median_filter(nc, size=L) + offset 117 | #th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset 118 | #import pylab as plt 119 | #plt.plot(nc) 120 | #plt.plot(th) 121 | #plt.show() 122 | # th = np.ones(nc.shape[0]) * nc.mean() - 0.08 123 | peaks = [] 124 | for i in range(1, nc.shape[0] - 1): 125 | # is it a peak? 126 | if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]: 127 | # is it above the threshold? 128 | if nc[i] > th[i]: 129 | peaks.append(i) 130 | return peaks 131 | 132 | 133 | def circular_shift(X): 134 | """Shifts circularly the X squre matrix in order to get a 135 | time-lag matrix.""" 136 | N = X.shape[0] 137 | L = np.zeros(X.shape) 138 | for i in range(N): 139 | L[i, :] = np.asarray([X[(i + j) % N, j] for j in range(N)]) 140 | return L 141 | 142 | 143 | def embedded_space(X, m, tau=1): 144 | """Time-delay embedding with m dimensions and tau delays.""" 145 | N = X.shape[0] - int(np.ceil(m)) 146 | Y = np.zeros((N, int(np.ceil(X.shape[1] * m)))) 147 | for i in range(N): 148 | # print X[i:i+m,:].flatten().shape, w, X.shape 149 | # print Y[i,:].shape 150 | rem = int((m % 1) * X.shape[1]) # Reminder for float m 151 | Y[i, :] = np.concatenate((X[i:i + int(m), :].flatten(), 152 | X[i + int(m), :rem])) 153 | return Y 154 | 155 | 156 | def run_label( 157 | boundaries, 158 | R, 159 | max_iter=100, 160 | return_feat=False): 161 | 162 | """Labeling algorithm.""" 163 | n_boundaries = len(boundaries) 164 | 165 | # compute S 166 | S = np.zeros((n_boundaries, n_boundaries)) 167 | for i in range(n_boundaries - 1): 168 | for j in range(n_boundaries - 1): 169 | i_st, i_ed = boundaries[i], boundaries[i+1] 170 | j_st, j_ed = boundaries[j], boundaries[j+1] 171 | 172 | len_i = i_ed - i_st 173 | len_j = j_ed - j_st 174 | score = cummulative_sum_Q(R[i_st:i_ed, j_st:j_ed]) 175 | S[i, j] = score / min(len_i, len_j) 176 | 177 | # threshold 178 | thr = np.std(S) + np.mean(S) 179 | S[S <= thr] = 0 180 | 181 | # iteration 182 | S_trans = S.copy() 183 | 184 | for i in range(max_iter): 185 | S_trans = np.matmul(S_trans, S) 186 | 187 | S_final = S_trans > 1 188 | 189 | # proc output 190 | n_seg = len(S_trans) - 1 191 | labs = np.ones(n_seg) * -1 192 | cur_tag = int(-1) 193 | for i in range(n_seg): 194 | print(' >', i) 195 | if labs[i] == -1: 196 | cur_tag += 1 197 | labs[i] = cur_tag 198 | for j in range(n_seg): 199 | if S_final[i, j]: 200 | labs[j] = cur_tag 201 | 202 | if return_feat: 203 | return labs, (S, S_trans, S_final) 204 | else: 205 | return labs 206 | 207 | 208 | class Segmenter(object): 209 | def __init__(self, config=CONFIG): 210 | self.config = config 211 | self.refresh() 212 | 213 | def refresh(self): 214 | # collect feats 215 | # - segmentation 216 | self.F = None 217 | self.E = None 218 | self.R = None 219 | self.L = None 220 | self.SF = None 221 | self.nc = None 222 | 223 | # - labeling 224 | self.S = None 225 | self.S_trans = None 226 | self.S_final = None 227 | 228 | # - res 229 | self.boundaries = None 230 | self.labs = None 231 | 232 | 233 | def proc_midi(self, path_midi, is_label=True): 234 | # parse midi to pianoroll 235 | midi_obj = mid_parser.MidiFile(path_midi) 236 | notes = midi_obj.instruments[0].notes 237 | pianoroll = pr_parser.notes2pianoroll( 238 | notes) 239 | 240 | # pianoroll to beat sync pianoroll 241 | pianoroll_sync = midi_extract_beat_sync_pianoroll( 242 | pianoroll, 243 | midi_obj.ticks_per_beat) 244 | 245 | return self.process(pianoroll_sync, is_label=is_label) 246 | 247 | def proc_audio(self, path_audio, sr=22050, is_label=True): 248 | y, sr = librosa.load(path_audio, sr=sr) 249 | pcp = audio_extract_pcp(y, sr) 250 | return self.process(pcp, is_label=is_label) 251 | 252 | 253 | def process( 254 | self, 255 | F, 256 | is_label=False): 257 | """Main process. 258 | Returns 259 | 260 | F: feature. T x D 261 | """ 262 | self.refresh() 263 | 264 | # Structural Features params 265 | Mp = self.config["Mp_adaptive"] # Size of the adaptive threshold for 266 | # peak picking 267 | od = self.config["offset_thres"] # Offset coefficient for adaptive 268 | # thresholding 269 | M = self.config["M_gaussian"] # Size of gaussian kernel in beats 270 | m = self.config["m_embedded"] # Number of embedded dimensions 271 | k = self.config["k_nearest"] # k*N-nearest neighbors for the 272 | # recurrence plot 273 | 274 | # Normalize 275 | F = normalize(F, norm_type=self.config["bound_norm_feats"]) 276 | 277 | # Check size in case the track is too short 278 | if F.shape[0] > 20: 279 | # Emedding the feature space (i.e. shingle) 280 | E = embedded_space(F, m) 281 | 282 | # Recurrence matrix 283 | R = librosa.segment.recurrence_matrix( 284 | E.T, 285 | k=k * int(F.shape[0]), 286 | width=1, # zeros from the diagonal 287 | metric="euclidean", 288 | sym=True).astype(np.float32) 289 | 290 | # Circular shift 291 | L = circular_shift(R) 292 | 293 | # Obtain structural features by filtering the lag matrix 294 | SF = gaussian_filter(L.T, M=M, axis=1) 295 | SF = gaussian_filter(SF, M=1, axis=0) 296 | 297 | # Compute the novelty curve 298 | nc = compute_nc(SF) 299 | 300 | # Find peaks in the novelty curve 301 | est_bounds = pick_peaks(nc, L=Mp, offset_denom=od) 302 | 303 | # Re-align embedded space 304 | est_bounds = np.asarray(est_bounds) + int(np.ceil(m / 2.)) 305 | else: 306 | est_bounds = [] 307 | 308 | # Add first and last frames 309 | est_idxs = np.concatenate(([0], est_bounds, [F.shape[0] - 1])) 310 | est_idxs = np.unique(est_idxs) 311 | 312 | assert est_idxs[0] == 0 and est_idxs[-1] == F.shape[0] - 1 313 | 314 | # collect feature 315 | self.F = F 316 | self.E = E 317 | self.R = R 318 | self.L = L 319 | self.SF = SF 320 | self.nc = nc 321 | 322 | if is_label: 323 | labs, (S, S_trans, S_final) = run_label( 324 | est_idxs, 325 | R, 326 | return_feat=True) 327 | 328 | self.S = S 329 | self.S_trans = S_trans 330 | self.S_final = S_final 331 | 332 | self.boundaries = est_idxs 333 | self.labs = labs 334 | return est_idxs, labs 335 | else: 336 | self.boundaries = est_idxs 337 | return est_idxs 338 | 339 | def plot( 340 | self, 341 | outdir=None, 342 | vis_bounds=True): 343 | 344 | plot_feats( 345 | F=self.F, 346 | R=self.R, 347 | L=self.L, 348 | SF=self.SF, 349 | nc=self.nc, 350 | S=self.S, 351 | S_trans=self.S_trans, 352 | S_final=self.S_final, 353 | boundaries=self.boundaries, 354 | outdir=outdir, 355 | vis_bounds=vis_bounds) 356 | -------------------------------------------------------------------------------- /sf_segmenter/vis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib.pyplot as plt 3 | 4 | """ 5 | data = { 6 | 'input': segmenter.F, 7 | 'R': segmenter.R, 8 | 'L': segmenter.L, 9 | 'SF': segmenter.SF, 10 | 'nc': segmenter.nc, 11 | } 12 | 13 | """ 14 | def plot_feats( 15 | F=None, 16 | R=None, 17 | L=None, 18 | SF=None, 19 | nc=None, 20 | S=None, 21 | S_trans=None, 22 | S_final=None, 23 | boundaries=None, 24 | outdir=None, 25 | vis_bounds=False): 26 | 27 | if outdir: 28 | os.makedirs(outdir, exist_ok=True) 29 | print(' [o] save to ...', outdir) 30 | 31 | # plot input feature 32 | if F is not None: 33 | plt.figure() 34 | plt.imshow(F.T, interpolation="nearest", aspect="auto") 35 | plt.title('input feature') 36 | plt.savefig(os.path.join(outdir, 'input.png')) 37 | # plt.show() 38 | plt.close() 39 | 40 | # plot recurrence plot (R) 41 | if R is not None: 42 | plt.figure(figsize=(5, 5)) 43 | plt.imshow(R, interpolation="nearest", cmap=plt.get_cmap("binary")) 44 | if vis_bounds and boundaries is not None: 45 | [plt.axvline(p, color="red", linestyle=':') for p in boundaries] 46 | [plt.axhline(p, color="red", linestyle=':') for p in boundaries] 47 | plt.title('recurrence plot') 48 | plt.savefig(os.path.join(outdir, 'R.png')) 49 | # plt.show() 50 | plt.close() 51 | 52 | # plot time-lag (L) 53 | if L is not None: 54 | plt.figure(figsize=(5, 5)) 55 | plt.imshow(L, interpolation="nearest", cmap=plt.get_cmap("binary")) 56 | if vis_bounds and boundaries is not None: 57 | [plt.axvline(p, color="red", linestyle=':') for p in boundaries] 58 | [plt.axhline(p, color="red", linestyle=':') for p in boundaries] 59 | plt.savefig(os.path.join(outdir, 'L.png')) 60 | # plt.show() 61 | plt.close() 62 | 63 | # plot smoothed time-lag (SF) 64 | if SF is not None: 65 | plt.figure(figsize=(5, 5)) 66 | plt.imshow(SF.T, interpolation="nearest", cmap=plt.get_cmap("binary")) 67 | plt.title('SF (after filtering)') 68 | if vis_bounds and boundaries is not None: 69 | [plt.axvline(p, color="red", linestyle=':') for p in boundaries] 70 | [plt.axhline(p, color="red", linestyle=':') for p in boundaries] 71 | plt.savefig(os.path.join(outdir, 'SF.png')) 72 | # plt.show(block=False)) 73 | plt.close() 74 | 75 | # plot novelty cureve (nc) 76 | if nc is not None: 77 | plt.figure() 78 | plt.plot(nc) 79 | if boundaries is not None: 80 | [plt.axvline(p, color="green", linestyle=':') for p in boundaries] 81 | plt.title('novelty curve') 82 | plt.savefig(os.path.join(outdir, 'nc.png')) 83 | # plt.show() 84 | plt.close() 85 | 86 | # labeling features 87 | if S is not None: 88 | plt.figure(figsize=(5, 5)) 89 | plt.imshow(S, interpolation="nearest", cmap=plt.get_cmap("binary")) 90 | plt.title('labeling: S') 91 | plt.savefig(os.path.join(outdir, 'lab_S.png')) 92 | plt.close() 93 | 94 | if S_trans is not None: 95 | plt.figure(figsize=(5, 5)) 96 | plt.imshow(S_trans, interpolation="nearest", cmap=plt.get_cmap("binary")) 97 | plt.title('labeling: S trans') 98 | plt.savefig(os.path.join(outdir, 'lab_S_trans.png')) 99 | plt.close() 100 | 101 | if S_final is not None: 102 | plt.figure(figsize=(5, 5)) 103 | plt.imshow(S_final, interpolation="nearest", cmap=plt.get_cmap("binary")) 104 | plt.title('labeling: S final') 105 | plt.savefig(os.path.join(outdir, 'lab_S_final.png')) 106 | plt.close() 107 | -------------------------------------------------------------------------------- /test_IO.py: -------------------------------------------------------------------------------- 1 | from sf_segmenter.segmenter import Segmenter 2 | import librosa 3 | import miditoolkit 4 | 5 | # init 6 | segmenter = Segmenter() 7 | 8 | # audio 9 | path_audio = librosa.util.example_audio_file() 10 | boundaries, labs = segmenter.proc_audio(path_audio) 11 | segmenter.plot(outdir='doc/audio') 12 | print('boundaries:', boundaries) 13 | print('labs:', labs) 14 | 15 | # midi 16 | # path_midi = miditoolkit.midi.utils.example_midi_file() 17 | path_midi = 'testcases/1430.mid' 18 | boundaries, labs = segmenter.proc_midi(path_midi) 19 | segmenter.plot(outdir='doc/midi') 20 | print('boundaries:', boundaries) 21 | print('labs:', labs) -------------------------------------------------------------------------------- /test_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sf_segmenter.vis import plot_feats 3 | from sf_segmenter.segmenter import Segmenter 4 | 5 | 6 | # --- audio --- # 7 | print(' [*] runnung audio...') 8 | 9 | import librosa 10 | import soundfile as sf 11 | from sf_segmenter.feature import audio_extract_pcp 12 | 13 | 14 | # dummy input 15 | # input_feat = np.zeros((442, 12)) # T x F 16 | 17 | # real input 18 | path_audio = librosa.util.example_audio_file() 19 | # y, sr = sf.read(path_audio) 20 | y, sr = librosa.load(path_audio) 21 | pcp = audio_extract_pcp(y, sr) 22 | 23 | # init 24 | segmenter = Segmenter() 25 | 26 | # run 27 | boundaries, labs = segmenter.process(pcp, is_label=True) 28 | print('boundaries:', boundaries) 29 | print('labs:', labs) 30 | 31 | # vis 32 | segmenter.plot(outdir='doc/audio') 33 | 34 | 35 | # --- midi --- # 36 | print(' [*] runnung midi...') 37 | import miditoolkit 38 | from miditoolkit.midi import parser as mid_parser 39 | from miditoolkit.pianoroll import parser as pr_parser 40 | from sf_segmenter.feature import midi_extract_beat_sync_pianoroll 41 | 42 | # parse midi to pianoroll 43 | path_midi = miditoolkit.midi.utils.example_midi_file() 44 | midi_obj = mid_parser.MidiFile(path_midi) 45 | 46 | notes = midi_obj.instruments[0].notes 47 | pianoroll = pr_parser.notes2pianoroll( 48 | notes) 49 | 50 | # pianoroll to beat sync pianoroll 51 | pianoroll_sync = midi_extract_beat_sync_pianoroll( 52 | pianoroll, 53 | midi_obj.ticks_per_beat) 54 | 55 | print('pianoroll_sync :', pianoroll_sync.shape) 56 | # init 57 | segmenter = Segmenter() 58 | 59 | # run 60 | boundaries, labs = segmenter.process(pianoroll_sync, is_label=True) 61 | print('boundaries:', boundaries) 62 | print('labs:', labs) 63 | 64 | # vis 65 | segmenter.plot(outdir='doc/midi') -------------------------------------------------------------------------------- /testcases/1430.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/testcases/1430.mid --------------------------------------------------------------------------------