├── .gitignore
├── README.md
├── doc
    ├── audio
    │   ├── L.png
    │   ├── R.png
    │   ├── SF.png
    │   ├── input.png
    │   ├── lab_S.png
    │   ├── lab_S_final.png
    │   ├── lab_S_trans.png
    │   └── nc.png
    └── midi
    │   ├── L.png
    │   ├── R.png
    │   ├── SF.png
    │   ├── input.png
    │   ├── lab_S.png
    │   ├── lab_S_final.png
    │   ├── lab_S_trans.png
    │   └── nc.png
├── setup.py
├── sf_segmenter
    ├── __init__.py
    ├── feature.py
    ├── segmenter.py
    └── vis.py
├── test_IO.py
├── test_process.py
└── testcases
    └── 1430.mid


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | .vscode/
 3 | .ipynb_checkpoints/
 4 | .DS_Store
 5 | miditoolkit.egg-info/
 6 | @eaDir
 7 | *.pyc
 8 | *.pypirc
 9 | Thumbs.db
10 | *.gz


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # sf_segmenter
 2 | ---
 3 | Music Segmentation/Labeling Algorithm. Based on SF (Structural feature) method.   
 4 | Modified from [MSAF](https://github.com/urinieto/msaf). I simplified the IO for arbitrary input features and quick experiments. 
 5 | 
 6 | ## Installation 
 7 | version: 0.0.1
 8 | ```
 9 | pip install sf-segmenter
10 | ```
11 | 
12 | ## Reference
13 | * Serrà, J., Müller, M., Grosche, P., & Arcos, J. L. (2012). Unsupervised Detection of Music Boundaries by Time Series Structure Features. In Proc. of the 26th AAAI Conference on Artificial Intelligence (pp. 1613–1619).Toronto, Canada.
14 | 
15 | * J. Serrà, M. Müller, P. Grosche and J. L. Arcos, "Unsupervised Music Structure Annotation by Time Series Structure Features and Segment Similarity," in IEEE Transactions on Multimedia, vol. 16, no. 5, pp. 1229-1240, Aug. 2014, doi: 10.1109/TMM.2014.2310701.
16 | 
17 | ## Resources
18 | 
19 | * audiolabs/FMP course/Chapter 4: Music Structure Analysis: https://www.audiolabs-erlangen.de/resources/MIR/FMP/C4/C4.html
20 | 
21 | 


--------------------------------------------------------------------------------
/doc/audio/L.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/L.png


--------------------------------------------------------------------------------
/doc/audio/R.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/R.png


--------------------------------------------------------------------------------
/doc/audio/SF.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/SF.png


--------------------------------------------------------------------------------
/doc/audio/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/input.png


--------------------------------------------------------------------------------
/doc/audio/lab_S.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/lab_S.png


--------------------------------------------------------------------------------
/doc/audio/lab_S_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/lab_S_final.png


--------------------------------------------------------------------------------
/doc/audio/lab_S_trans.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/lab_S_trans.png


--------------------------------------------------------------------------------
/doc/audio/nc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/audio/nc.png


--------------------------------------------------------------------------------
/doc/midi/L.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/L.png


--------------------------------------------------------------------------------
/doc/midi/R.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/R.png


--------------------------------------------------------------------------------
/doc/midi/SF.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/SF.png


--------------------------------------------------------------------------------
/doc/midi/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/input.png


--------------------------------------------------------------------------------
/doc/midi/lab_S.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/lab_S.png


--------------------------------------------------------------------------------
/doc/midi/lab_S_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/lab_S_final.png


--------------------------------------------------------------------------------
/doc/midi/lab_S_trans.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/lab_S_trans.png


--------------------------------------------------------------------------------
/doc/midi/nc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/doc/midi/nc.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='sf_segmenter',
 5 |     version='0.0.2',
 6 |     description='',
 7 |     author='wayne391',
 8 |     author_email='s101062219@gmail.com',
 9 |     url='https://github.com/wayne391/sf_segmenter',
10 |     packages=find_packages(),
11 |     classifiers=[
12 |         "License :: OSI Approved :: MIT License",
13 |         "Programming Language :: Python",
14 |     ],
15 |     keywords='music audio midi mir',
16 |     license='MIT',
17 |     install_requires=[
18 |         'miditoolkit >= 0.1.14',
19 |     ]
20 | )
21 | 
22 | 
23 | """
24 | python setup.py sdist
25 | twine upload dist/*
26 | """


--------------------------------------------------------------------------------
/sf_segmenter/__init__.py:
--------------------------------------------------------------------------------
1 | from .feature import *
2 | from .segmenter import *
3 | from .vis import *
4 | 
5 | __version__ = '0.0.2'


--------------------------------------------------------------------------------
/sf_segmenter/feature.py:
--------------------------------------------------------------------------------
 1 | import librosa
 2 | import numpy as np
 3 | from miditoolkit.pianoroll import utils as mt_utils
 4 | 
 5 | 
 6 | def audio_extract_pcp(
 7 |         audio, 
 8 |         sr,
 9 |         n_fft=4096,
10 |         hop_len=int(4096 * 0.75),
11 |         pcp_bins=84,
12 |         pcp_norm=np.inf,
13 |         pcp_f_min=27.5,
14 |         pcp_n_octaves=6):
15 | 
16 |     audio_harmonic, _ = librosa.effects.hpss(audio)
17 |     pcp_cqt = np.abs(librosa.hybrid_cqt(
18 |                 audio_harmonic,
19 |                 sr=sr,
20 |                 hop_length=hop_len,
21 |                 n_bins=pcp_bins,
22 |                 norm=pcp_norm,
23 |                 fmin=pcp_f_min)) ** 2
24 | 
25 |     pcp = librosa.feature.chroma_cqt(
26 |                 C=pcp_cqt,
27 |                 sr=sr,
28 |                 hop_length=hop_len,
29 |                 n_octaves=pcp_n_octaves,
30 |                 fmin=pcp_f_min).T
31 |     return pcp
32 | 
33 | 
34 | def midi_extract_beat_sync_pianoroll(
35 |         pianoroll,
36 |         beat_resol,
37 |         is_tochroma=False):
38 | 
39 |     # sync to beat
40 |     beat_sync_pr = np.zeros(
41 |         (int(np.ceil(pianoroll.shape[0] /  beat_resol)),
42 |          pianoroll.shape[1]))
43 | 
44 |     for beat in range(beat_sync_pr.shape[0]):
45 |         st = beat * beat_resol
46 |         ed = (beat + 1) * beat_resol
47 |         beat_sync_pr[beat] = np.sum(pianoroll[st:ed, :], axis=0)
48 |     
49 |     # normalize
50 |     beat_sync_pr = (
51 |         beat_sync_pr - beat_sync_pr.mean()) / beat_sync_pr.std()
52 |     beat_sync_pr = (
53 |         beat_sync_pr - beat_sync_pr.min()) / (beat_sync_pr.max() - beat_sync_pr.min())
54 | 
55 |     # to chroma
56 |     if is_tochroma:
57 |         beat_sync_pr = mt_utils.tochroma(beat_sync_pr)
58 |     return beat_sync_pr
59 | 


--------------------------------------------------------------------------------
/sf_segmenter/segmenter.py:
--------------------------------------------------------------------------------
  1 | import librosa
  2 | import soundfile as sf
  3 | import numpy as np
  4 | from scipy import signal
  5 | from scipy.spatial import distance
  6 | from scipy.ndimage import filters
  7 | 
  8 | from .vis import plot_feats
  9 | from .feature import midi_extract_beat_sync_pianoroll, audio_extract_pcp
 10 | 
 11 | 
 12 | import miditoolkit
 13 | from miditoolkit.midi import parser as mid_parser
 14 | from miditoolkit.pianoroll import parser as pr_parser
 15 | 
 16 | CONFIG = {
 17 |     "M_gaussian": 27,
 18 |     "m_embedded": 3,
 19 |     "k_nearest": 0.04,
 20 |     "Mp_adaptive": 28,
 21 |     "offset_thres": 0.05,
 22 |     "bound_norm_feats": np.inf  # min_max, log, np.inf,
 23 |                                 # -np.inf, float >= 0, None
 24 |     # For framesync features
 25 |     # "M_gaussian"    : 100,
 26 |     # "m_embedded"    : 3,
 27 |     # "k_nearest"     : 0.06,
 28 |     # "Mp_adaptive"   : 100,
 29 |     # "offset_thres"  : 0.01
 30 | }
 31 | 
 32 | 
 33 | def cummulative_sum_Q(R):
 34 |     len_x, len_y = R.shape
 35 |     Q = np.zeros((len_x + 2, len_y + 2))
 36 |     for i in range(len_x):
 37 |         for j in range(len_y):
 38 |             Q[i+2, j+2] = max(
 39 |                     Q[i+1, j+1],
 40 |                     Q[i, j+1],
 41 |                     Q[i+1, j]) + R[i, j]
 42 |     return np.max(Q)
 43 |     
 44 | 
 45 | def normalize(X, norm_type, floor=0.0, min_db=-80):
 46 |     """Normalizes the given matrix of features.
 47 |     Parameters
 48 |     ----------
 49 |     X: np.array
 50 |         Each row represents a feature vector.
 51 |     norm_type: {"min_max", "log", np.inf, -np.inf, 0, float > 0, None}
 52 |         - `"min_max"`: Min/max scaling is performed
 53 |         - `"log"`: Logarithmic scaling is performed
 54 |         - `np.inf`: Maximum absolute value
 55 |         - `-np.inf`: Mininum absolute value
 56 |         - `0`: Number of non-zeros
 57 |         - float: Corresponding l_p norm.
 58 |         - None : No normalization is performed
 59 |     Returns
 60 |     -------
 61 |     norm_X: np.array
 62 |         Normalized `X` according the the input parameters.
 63 |     """
 64 |     if isinstance(norm_type, str):
 65 |         if norm_type == "min_max":
 66 |             return min_max_normalize(X, floor=floor)
 67 |         if norm_type == "log":
 68 |             return lognormalize(X, floor=floor, min_db=min_db)
 69 |     return librosa.util.normalize(X, norm=norm_type, axis=1)
 70 | 
 71 | 
 72 | def median_filter(X, M=8):
 73 |     """Median filter along the first axis of the feature matrix X."""
 74 |     for i in range(X.shape[1]):
 75 |         X[:, i] = filters.median_filter(X[:, i], size=M)
 76 |     return X
 77 | 
 78 | 
 79 | def gaussian_filter(X, M=8, axis=0):
 80 |     """Gaussian filter along the first axis of the feature matrix X."""
 81 |     for i in range(X.shape[axis]):
 82 |         if axis == 1:
 83 |             X[:, i] = filters.gaussian_filter(X[:, i], sigma=M / 2.)
 84 |         elif axis == 0:
 85 |             X[i, :] = filters.gaussian_filter(X[i, :], sigma=M / 2.)
 86 |     return X
 87 | 
 88 | 
 89 | def compute_gaussian_krnl(M):
 90 |     """Creates a gaussian kernel following Serra's paper."""
 91 |     g = signal.gaussian(M, M / 3., sym=True)
 92 |     G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
 93 |     G[M // 2:, :M // 2] = -G[M // 2:, :M // 2]
 94 |     G[:M // 2, M // 1:] = -G[:M // 2, M // 1:]
 95 |     return G
 96 | 
 97 | 
 98 | def compute_nc(X):
 99 |     """Computes the novelty curve from the structural features."""
100 |     N = X.shape[0]
101 |     # nc = np.sum(np.diff(X, axis=0), axis=1) # Difference between SF's
102 | 
103 |     nc = np.zeros(N)
104 |     for i in range(N - 1):
105 |         nc[i] = distance.euclidean(X[i, :], X[i + 1, :])
106 | 
107 |     # Normalize
108 |     nc += np.abs(nc.min())
109 |     nc /= float(nc.max())
110 |     return nc
111 | 
112 | 
113 | def pick_peaks(nc, L=16, offset_denom=0.1):
114 |     """Obtain peaks from a novelty curve using an adaptive threshold."""
115 |     offset = nc.mean() * float(offset_denom)
116 |     th = filters.median_filter(nc, size=L) + offset
117 |     #th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset
118 |     #import pylab as plt
119 |     #plt.plot(nc)
120 |     #plt.plot(th)
121 |     #plt.show()
122 |     # th = np.ones(nc.shape[0]) * nc.mean() - 0.08
123 |     peaks = []
124 |     for i in range(1, nc.shape[0] - 1):
125 |         # is it a peak?
126 |         if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
127 |             # is it above the threshold?
128 |             if nc[i] > th[i]:
129 |                 peaks.append(i)
130 |     return peaks
131 | 
132 | 
133 | def circular_shift(X):
134 |     """Shifts circularly the X squre matrix in order to get a
135 |         time-lag matrix."""
136 |     N = X.shape[0]
137 |     L = np.zeros(X.shape)
138 |     for i in range(N):
139 |         L[i, :] = np.asarray([X[(i + j) % N, j] for j in range(N)])
140 |     return L
141 | 
142 | 
143 | def embedded_space(X, m, tau=1):
144 |     """Time-delay embedding with m dimensions and tau delays."""
145 |     N = X.shape[0] - int(np.ceil(m))
146 |     Y = np.zeros((N, int(np.ceil(X.shape[1] * m))))
147 |     for i in range(N):
148 |         # print X[i:i+m,:].flatten().shape, w, X.shape
149 |         # print Y[i,:].shape
150 |         rem = int((m % 1) * X.shape[1])  # Reminder for float m
151 |         Y[i, :] = np.concatenate((X[i:i + int(m), :].flatten(),
152 |                                  X[i + int(m), :rem]))
153 |     return Y
154 | 
155 | 
156 | def run_label(
157 |         boundaries, 
158 |         R,
159 |         max_iter=100, 
160 |         return_feat=False):
161 | 
162 |     """Labeling algorithm."""
163 |     n_boundaries = len(boundaries)
164 |     
165 |     # compute S
166 |     S = np.zeros((n_boundaries, n_boundaries))
167 |     for i in range(n_boundaries - 1):
168 |         for j in range(n_boundaries - 1):
169 |             i_st, i_ed = boundaries[i], boundaries[i+1]
170 |             j_st, j_ed = boundaries[j], boundaries[j+1]
171 | 
172 |             len_i = i_ed - i_st
173 |             len_j = j_ed - j_st
174 |             score = cummulative_sum_Q(R[i_st:i_ed, j_st:j_ed])
175 |             S[i, j] = score / min(len_i, len_j)    
176 |     
177 |     # threshold
178 |     thr = np.std(S) + np.mean(S)
179 |     S[S <= thr] = 0       
180 |     
181 |     # iteration
182 |     S_trans = S.copy()
183 |     
184 |     for i in range(max_iter):
185 |         S_trans = np.matmul(S_trans, S)
186 |         
187 |     S_final = S_trans > 1
188 |     
189 |     # proc output
190 |     n_seg = len(S_trans) - 1
191 |     labs = np.ones(n_seg) * -1
192 |     cur_tag = int(-1)
193 |     for i in range(n_seg):
194 |         print(' >', i)
195 |         if labs[i] == -1:
196 |             cur_tag += 1
197 |             labs[i] = cur_tag
198 |             for j in range(n_seg):
199 |                 if S_final[i, j]:
200 |                     labs[j] = cur_tag
201 |     
202 |     if return_feat:
203 |         return labs, (S, S_trans, S_final)
204 |     else:
205 |         return labs
206 | 
207 | 
208 | class Segmenter(object):
209 |     def __init__(self, config=CONFIG):
210 |         self.config = config
211 |         self.refresh()
212 | 
213 |     def refresh(self):
214 |         # collect feats
215 |         # - segmentation
216 |         self.F = None
217 |         self.E = None
218 |         self.R = None
219 |         self.L = None
220 |         self.SF = None
221 |         self.nc = None
222 | 
223 |         # - labeling
224 |         self.S = None
225 |         self.S_trans = None
226 |         self.S_final = None
227 | 
228 |         # - res
229 |         self.boundaries = None
230 |         self.labs = None
231 | 
232 | 
233 |     def proc_midi(self, path_midi, is_label=True):
234 |         # parse midi to pianoroll
235 |         midi_obj = mid_parser.MidiFile(path_midi)
236 |         notes = midi_obj.instruments[0].notes
237 |         pianoroll = pr_parser.notes2pianoroll(
238 |                             notes)
239 | 
240 |         # pianoroll to beat sync pianoroll
241 |         pianoroll_sync = midi_extract_beat_sync_pianoroll(
242 |                 pianoroll,
243 |                 midi_obj.ticks_per_beat)  
244 | 
245 |         return self.process(pianoroll_sync, is_label=is_label)
246 | 
247 |     def proc_audio(self, path_audio, sr=22050, is_label=True):
248 |         y, sr = librosa.load(path_audio, sr=sr)
249 |         pcp = audio_extract_pcp(y, sr)
250 |         return self.process(pcp, is_label=is_label)
251 | 
252 |         
253 |     def process(
254 |             self, 
255 |             F, 
256 |             is_label=False):
257 |         """Main process.
258 |         Returns
259 | 
260 |         F: feature. T x D
261 |         """
262 |         self.refresh()
263 | 
264 |         # Structural Features params
265 |         Mp = self.config["Mp_adaptive"]   # Size of the adaptive threshold for
266 |                                           # peak picking
267 |         od = self.config["offset_thres"]  # Offset coefficient for adaptive
268 |                                           # thresholding
269 |         M = self.config["M_gaussian"]     # Size of gaussian kernel in beats
270 |         m = self.config["m_embedded"]     # Number of embedded dimensions
271 |         k = self.config["k_nearest"]      # k*N-nearest neighbors for the
272 |                                           # recurrence plot
273 | 
274 |         # Normalize
275 |         F = normalize(F, norm_type=self.config["bound_norm_feats"])
276 | 
277 |         # Check size in case the track is too short
278 |         if F.shape[0] > 20:
279 |             # Emedding the feature space (i.e. shingle)
280 |             E = embedded_space(F, m)
281 |            
282 |             # Recurrence matrix
283 |             R = librosa.segment.recurrence_matrix(
284 |                 E.T,
285 |                 k=k * int(F.shape[0]),
286 |                 width=1,  # zeros from the diagonal
287 |                 metric="euclidean",
288 |                 sym=True).astype(np.float32)
289 | 
290 |             # Circular shift
291 |             L = circular_shift(R)
292 | 
293 |             # Obtain structural features by filtering the lag matrix
294 |             SF = gaussian_filter(L.T, M=M, axis=1)
295 |             SF = gaussian_filter(SF, M=1, axis=0)
296 | 
297 |             # Compute the novelty curve
298 |             nc = compute_nc(SF)
299 | 
300 |             # Find peaks in the novelty curve
301 |             est_bounds = pick_peaks(nc, L=Mp, offset_denom=od)
302 | 
303 |             # Re-align embedded space
304 |             est_bounds = np.asarray(est_bounds) + int(np.ceil(m / 2.))
305 |         else:
306 |             est_bounds = []
307 | 
308 |         # Add first and last frames
309 |         est_idxs = np.concatenate(([0], est_bounds, [F.shape[0] - 1]))
310 |         est_idxs = np.unique(est_idxs)
311 | 
312 |         assert est_idxs[0] == 0 and est_idxs[-1] == F.shape[0] - 1
313 |         
314 |         # collect  feature
315 |         self.F = F
316 |         self.E = E
317 |         self.R = R
318 |         self.L = L
319 |         self.SF = SF
320 |         self.nc = nc
321 | 
322 |         if is_label:
323 |             labs, (S, S_trans, S_final) = run_label(
324 |                 est_idxs, 
325 |                 R,
326 |                 return_feat=True)
327 | 
328 |             self.S = S
329 |             self.S_trans = S_trans
330 |             self.S_final = S_final
331 |             
332 |             self.boundaries = est_idxs
333 |             self.labs = labs
334 |             return est_idxs, labs
335 |         else:
336 |             self.boundaries = est_idxs
337 |             return est_idxs
338 | 
339 |     def plot(
340 |             self, 
341 |             outdir=None,
342 |             vis_bounds=True):
343 | 
344 |         plot_feats(
345 |             F=self.F,
346 |             R=self.R,
347 |             L=self.L,
348 |             SF=self.SF,
349 |             nc=self.nc,
350 |             S=self.S,
351 |             S_trans=self.S_trans,
352 |             S_final=self.S_final,
353 |             boundaries=self.boundaries,
354 |             outdir=outdir,
355 |             vis_bounds=vis_bounds)
356 | 


--------------------------------------------------------------------------------
/sf_segmenter/vis.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | """
  5 |     data = {
  6 |         'input': segmenter.F,
  7 |         'R': segmenter.R,
  8 |         'L': segmenter.L,
  9 |         'SF': segmenter.SF,
 10 |         'nc': segmenter.nc,
 11 |     }
 12 | 
 13 | """
 14 | def plot_feats(
 15 |         F=None,
 16 |         R=None,
 17 |         L=None,
 18 |         SF=None,
 19 |         nc=None,
 20 |         S=None,
 21 |         S_trans=None,
 22 |         S_final=None,
 23 |         boundaries=None,
 24 |         outdir=None,
 25 |         vis_bounds=False):
 26 | 
 27 |     if outdir:
 28 |         os.makedirs(outdir, exist_ok=True)
 29 |     print(' [o] save to ...', outdir)
 30 |         
 31 |     # plot input feature
 32 |     if F is not None:
 33 |         plt.figure()
 34 |         plt.imshow(F.T, interpolation="nearest", aspect="auto")
 35 |         plt.title('input feature')
 36 |         plt.savefig(os.path.join(outdir, 'input.png'))
 37 |         # plt.show()
 38 |         plt.close()
 39 | 
 40 |     # plot recurrence plot (R)
 41 |     if R is not None:
 42 |         plt.figure(figsize=(5, 5))
 43 |         plt.imshow(R, interpolation="nearest", cmap=plt.get_cmap("binary"))
 44 |         if vis_bounds and boundaries is not None:
 45 |             [plt.axvline(p, color="red", linestyle=':') for p in boundaries]
 46 |             [plt.axhline(p, color="red", linestyle=':') for p in boundaries]
 47 |         plt.title('recurrence plot')
 48 |         plt.savefig(os.path.join(outdir, 'R.png'))
 49 |         # plt.show()
 50 |         plt.close()
 51 | 
 52 |     # plot time-lag (L)
 53 |     if L is not None:
 54 |         plt.figure(figsize=(5, 5))
 55 |         plt.imshow(L, interpolation="nearest", cmap=plt.get_cmap("binary"))
 56 |         if vis_bounds and boundaries is not None:
 57 |             [plt.axvline(p, color="red", linestyle=':') for p in boundaries]
 58 |             [plt.axhline(p, color="red", linestyle=':') for p in boundaries]
 59 |         plt.savefig(os.path.join(outdir, 'L.png'))
 60 |         # plt.show()
 61 |         plt.close()
 62 | 
 63 |     #  plot smoothed time-lag (SF)
 64 |     if SF is not None:
 65 |         plt.figure(figsize=(5, 5))
 66 |         plt.imshow(SF.T, interpolation="nearest", cmap=plt.get_cmap("binary"))
 67 |         plt.title('SF (after filtering)')
 68 |         if vis_bounds and boundaries is not None:
 69 |             [plt.axvline(p, color="red", linestyle=':') for p in boundaries]
 70 |             [plt.axhline(p, color="red", linestyle=':') for p in boundaries]
 71 |         plt.savefig(os.path.join(outdir, 'SF.png'))
 72 |         # plt.show(block=False))
 73 |         plt.close()
 74 | 
 75 |     # plot novelty cureve (nc)
 76 |     if nc is not None:
 77 |         plt.figure()
 78 |         plt.plot(nc)
 79 |         if boundaries is not None:
 80 |             [plt.axvline(p, color="green", linestyle=':') for p in boundaries]
 81 |         plt.title('novelty curve')
 82 |         plt.savefig(os.path.join(outdir, 'nc.png'))
 83 |         # plt.show()
 84 |         plt.close()
 85 | 
 86 |     # labeling features
 87 |     if S is not None:
 88 |         plt.figure(figsize=(5, 5))
 89 |         plt.imshow(S, interpolation="nearest", cmap=plt.get_cmap("binary"))
 90 |         plt.title('labeling: S')
 91 |         plt.savefig(os.path.join(outdir, 'lab_S.png'))
 92 |         plt.close()
 93 | 
 94 |     if S_trans is not None:
 95 |         plt.figure(figsize=(5, 5))
 96 |         plt.imshow(S_trans, interpolation="nearest", cmap=plt.get_cmap("binary"))
 97 |         plt.title('labeling: S trans')
 98 |         plt.savefig(os.path.join(outdir, 'lab_S_trans.png'))
 99 |         plt.close()
100 |        
101 |     if S_final is not None:
102 |         plt.figure(figsize=(5, 5))
103 |         plt.imshow(S_final, interpolation="nearest", cmap=plt.get_cmap("binary"))
104 |         plt.title('labeling: S final')
105 |         plt.savefig(os.path.join(outdir, 'lab_S_final.png'))
106 |         plt.close()
107 | 


--------------------------------------------------------------------------------
/test_IO.py:
--------------------------------------------------------------------------------
 1 | from sf_segmenter.segmenter import Segmenter
 2 | import librosa
 3 | import miditoolkit
 4 | 
 5 | # init
 6 | segmenter = Segmenter()
 7 | 
 8 | # audio
 9 | path_audio = librosa.util.example_audio_file()
10 | boundaries, labs = segmenter.proc_audio(path_audio)
11 | segmenter.plot(outdir='doc/audio')
12 | print('boundaries:', boundaries)
13 | print('labs:', labs)
14 | 
15 | # midi
16 | # path_midi = miditoolkit.midi.utils.example_midi_file()
17 | path_midi = 'testcases/1430.mid'
18 | boundaries, labs = segmenter.proc_midi(path_midi)
19 | segmenter.plot(outdir='doc/midi')
20 | print('boundaries:', boundaries)
21 | print('labs:', labs)


--------------------------------------------------------------------------------
/test_process.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sf_segmenter.vis import plot_feats
 3 | from sf_segmenter.segmenter import Segmenter
 4 | 
 5 | 
 6 | # --- audio --- #
 7 | print(' [*] runnung audio...')
 8 | 
 9 | import librosa
10 | import soundfile as sf
11 | from sf_segmenter.feature import audio_extract_pcp
12 | 
13 | 
14 | # dummy input
15 | # input_feat = np.zeros((442, 12)) # T x F
16 | 
17 | # real input
18 | path_audio = librosa.util.example_audio_file()
19 | # y, sr = sf.read(path_audio)
20 | y, sr = librosa.load(path_audio)
21 | pcp = audio_extract_pcp(y, sr)
22 | 
23 | #  init
24 | segmenter = Segmenter()
25 | 
26 | # run
27 | boundaries, labs = segmenter.process(pcp, is_label=True)
28 | print('boundaries:', boundaries)
29 | print('labs:', labs)
30 | 
31 | # vis
32 | segmenter.plot(outdir='doc/audio')
33 | 
34 | 
35 | # --- midi --- # 
36 | print(' [*] runnung midi...')
37 | import miditoolkit
38 | from miditoolkit.midi import parser as mid_parser
39 | from miditoolkit.pianoroll import parser as pr_parser
40 | from sf_segmenter.feature import midi_extract_beat_sync_pianoroll
41 | 
42 | # parse midi to pianoroll
43 | path_midi = miditoolkit.midi.utils.example_midi_file()
44 | midi_obj = mid_parser.MidiFile(path_midi)
45 | 
46 | notes = midi_obj.instruments[0].notes
47 | pianoroll = pr_parser.notes2pianoroll(
48 |                     notes)
49 | 
50 | # pianoroll to beat sync pianoroll
51 | pianoroll_sync = midi_extract_beat_sync_pianoroll(
52 |         pianoroll,
53 |         midi_obj.ticks_per_beat)  
54 | 
55 | print('pianoroll_sync :', pianoroll_sync.shape)
56 | #  init
57 | segmenter = Segmenter()
58 | 
59 | # run
60 | boundaries, labs = segmenter.process(pianoroll_sync, is_label=True)
61 | print('boundaries:', boundaries)
62 | print('labs:', labs)
63 | 
64 | # vis
65 | segmenter.plot(outdir='doc/midi')


--------------------------------------------------------------------------------
/testcases/1430.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wayne391/sf_segmenter/a02eecf66ad73839999399478dfdb2ba8a5716be/testcases/1430.mid


--------------------------------------------------------------------------------