├── README.md ├── metric ├── README.md ├── ScoreSimilarity.py └── ScoreSimilarity_orig.py └── tokenization_tools ├── README.md ├── detokenizer ├── README.md ├── sample │ ├── generated_score.musicxml │ ├── input_tokens.txt │ └── sample_usage.ipynb └── tokens_to_score.py ├── requirements.txt └── tokenizer ├── README.md ├── sample ├── generated_tokens.txt ├── input_score.musicxml └── sample_usage.ipynb └── score_to_tokens.py /README.md: -------------------------------------------------------------------------------- 1 | # Score Transformer 2 | 3 | This is the official repository for "Score Transformer" (ACM Multimedia Asia 2021 / ISMIR2021 LBD). 4 | 5 | [Paper](https://arxiv.org/abs/2112.00355) | [Short paper](https://archives.ismir.net/ismir2021/latebreaking/000032.pdf) | [Project page](https://score-transformer.github.io/) 6 | 7 | 13 | 14 | ## Overview 15 | 16 | This repository provides: 17 | - [**Tokenization tools**](tokenization_tools) between MusicXML scores and score tokens 18 | - **note: updated version is available [here](https://github.com/suzuqn/ScoreRearrangement)!** 19 | - A [**metric**](metric) used in the papers 20 | 21 | ## Citation 22 | If you find this repository helpful, please consider citing our paper: 23 | ``` 24 | @inproceedings{suzuki2021, 25 | author = {Suzuki, Masahiro}, 26 | title = {Score Transformer: Generating Musical Score from Note-level Representation}, 27 | booktitle = {Proceedings of the 3rd ACM International Conference on Multimedia in Asia}, 28 | year = {2021}, 29 | pages = {31:1--31:7}, 30 | doi = {10.1145/3469877.3490612} 31 | } 32 | ``` 33 | -------------------------------------------------------------------------------- /metric/README.md: -------------------------------------------------------------------------------- 1 | ### MetricForScoreSimilarity 2 | The original implementation is from https://github.com/AndreaCogliati/MetricForScoreSimilarity, which is official implementation for the paper "A metric for Music Notation Transcription Accuracy." 3 | 4 | We partially modified this implementation as described in our paper (see Section 5.4): 5 | 6 | 1. added three musical aspects (*voice*, *beam*, and *tie*) to evaluate our model thoroughly 7 | 2. excluded two aspects (*barline* and *note grouping*) that were also measured using other aspects (*time signature* vs. *barline*, and *voice* vs. *note grouping*) 8 | 3. separated *insertion* and *deletion* errors 9 | 10 | and post-process the result using Pandas in a following way: 11 | 12 | 4. integrated *note* and *rest* metrics 13 | 5. calculated error rates note-wisely -------------------------------------------------------------------------------- /metric/ScoreSimilarity.py: -------------------------------------------------------------------------------- 1 | import music21 2 | import numpy as np 3 | from enum import IntEnum 4 | import copy 5 | import itertools 6 | 7 | 8 | class ScoreErrors(IntEnum): 9 | Clef = 0 10 | KeySignature = 1 11 | TimeSignature = 2 12 | NoteDeletion = 3 13 | NoteInsertion = 4 14 | NoteSpelling = 5 15 | NoteDuration = 6 16 | StemDirection = 7 17 | Beams = 8 # added 18 | Tie = 9 # added 19 | RestInsertion = 10 20 | RestDeletion = 11 21 | RestDuration = 12 22 | StaffAssignment = 13 23 | Voice = 14 # added 24 | 25 | def scoreAlignment(aScore, bScore): 26 | """Compare two musical scores. 27 | 28 | Parameters: 29 | 30 | aScore/bScore: music21.stream.Score objects 31 | 32 | Return value: 33 | 34 | (path, d): 35 | path is a list of tuples containing pairs of matching offsets 36 | d is the alignment matrix 37 | """ 38 | 39 | def convertScoreToListOfPitches(aScore): 40 | """Convert a piano score into a list of tuples containing pitches 41 | 42 | Parameter: 43 | aScore a music21.Stream containing two music21.stream.PartStaff 44 | 45 | Return value: 46 | list of tuples (offset, pitches) 47 | offset is a real number indicating the offset of an object in music21 terms 48 | pitches is a list of pitches in MIDI numbers 49 | """ 50 | 51 | def getPitches(el): 52 | if isinstance(el, music21.note.Note): 53 | return [el.pitch.midi] 54 | elif isinstance(el, music21.chord.Chord): 55 | currentList = [] 56 | for pitch in el.pitches: 57 | currentList.append(pitch.midi) 58 | return currentList 59 | 60 | def convertStreamToList(aStream): 61 | aList = [] 62 | currentOffset = 0.0 63 | currentList = [] 64 | for el in aStream: 65 | if el.offset == currentOffset: 66 | currentList += getPitches(el) 67 | else: 68 | aList.append((currentOffset, currentList)) 69 | currentOffset = el.offset 70 | currentList = getPitches(el) 71 | return aList 72 | 73 | def flattenStream(aStream): 74 | newStream = music21.stream.Stream() 75 | for el in aStream.recurse(): 76 | if isinstance(el, music21.note.Note) or isinstance(el, music21.chord.Chord): 77 | newStream.insert(el.getOffsetInHierarchy(aStream), el) 78 | return newStream 79 | 80 | # aList = convertStreamToList(aScore.flat.notes) 81 | 82 | # added 83 | parts = aScore.getElementsByClass([music21.stream.PartStaff, music21.stream.Part]) 84 | flat_notes = sorted(itertools.chain.from_iterable([flattenStream(part).elements for part in parts]), key=lambda x:x.offset) 85 | aList = convertStreamToList(flat_notes) 86 | 87 | return aList 88 | 89 | def compareSets(aSet, bSet): 90 | """Compare two sets of pitches. 91 | 92 | Parameters: 93 | 94 | aSet/bSet: list of pitches 95 | 96 | Return value: 97 | 98 | the number of mismatching objects in the two sets 99 | """ 100 | 101 | a = aSet.copy() 102 | b = bSet.copy() 103 | 104 | # Remove matching pitches from both sets 105 | aTemp = [] 106 | for obj in a: 107 | if obj in b: 108 | b.remove(obj) 109 | else: 110 | aTemp.append(obj) 111 | a = aTemp 112 | 113 | return len(a) + len(b) 114 | 115 | def costMatrix(s, t): 116 | m = len(s) 117 | n = len(t) 118 | d = np.zeros((m + 1, n + 1)) 119 | 120 | for i in range(1, m + 1): 121 | d[i, 0] = np.inf 122 | 123 | for j in range(1, n + 1): 124 | d[0, j] = np.inf 125 | 126 | for j in range(1, n + 1): 127 | for i in range(1, m + 1): 128 | cost = compareSets(s[i - 1][1], t[j - 1][1]) 129 | idx = np.argmin([d[i - 1, j], d[i, j - 1], d[i - 1, j - 1]]) 130 | if idx == 0: 131 | d[i, j] = d[i - 1, j] + cost 132 | elif idx == 1: 133 | d[i, j] = d[i, j - 1] + cost 134 | else: 135 | d[i, j] = d[i - 1, j - 1] + cost 136 | 137 | return d 138 | 139 | # scoreAlignment 140 | aList = convertScoreToListOfPitches(aScore) 141 | bList = convertScoreToListOfPitches(bScore) 142 | d = costMatrix(aList, bList) 143 | 144 | (i,j) = (d.shape[0] - 1, d.shape[1] - 1) 145 | path = [] 146 | while not (i == 0 and j == 0): 147 | aOff = aList[i-1][0] 148 | bOff = bList[j-1][0] 149 | path = [(aOff,bOff)] + path 150 | 151 | idx = np.argmin([d[i - 1, j], d[i, j - 1], d[i - 1, j - 1]]) 152 | if idx == 0: 153 | i = i - 1 154 | elif idx == 1: 155 | j = j - 1 156 | else: 157 | i, j = i - 1, j - 1 158 | 159 | return path, d 160 | 161 | 162 | 163 | def scoreSimilarity(estScore, gtScore): 164 | """Compare two musical scores. 165 | 166 | Parameters: 167 | 168 | estScore/gtScore: music21.stream.Score objects of piano scores. The scores must contain two 169 | music21.stream.PartStaff substreams (top and bottom staves) 170 | 171 | estScore is the estimated transcription 172 | gtScore is the ground truth 173 | 174 | Return value: 175 | 176 | a NumPy array containing the differences between the two scores: 177 | 178 | barlines, clefs, key signatures, time signatures, note, note spelling, 179 | note duration, staff assignment, rest, rest duration 180 | 181 | The differences for notes, rests and barlines are normalized with the number of symbols 182 | in the ground truth 183 | """ 184 | 185 | def isInstanceOfClasses(obj, classes): 186 | """Helper function to determine if an item is an instance of several classes""" 187 | for cls in classes: 188 | if isinstance(obj, cls): 189 | return True 190 | return False 191 | 192 | def countSymbols(aScore): 193 | """Count the number of symbols in a score 194 | 195 | Parameter: 196 | aScore a music21.Stream 197 | 198 | Return value: 199 | the number of music symbols (notes, rests, chords, barlines) in the score 200 | """ 201 | 202 | # Classes to consider 203 | CLASSES = [music21.note.Note, music21.chord.Chord, music21.note.Rest] 204 | 205 | nSymbols = {'n_' + cls.__name__: sum([len(el.notes) if cls == music21.chord.Chord else 1 206 | for el in aScore.recurse() if isinstance(el, cls)]) 207 | for cls in CLASSES} 208 | 209 | return nSymbols 210 | 211 | def convertScoreToList(aScore): 212 | """Convert a piano score into a list of tuples 213 | 214 | Parameter: 215 | aScore a music21.Stream containing two music21.stream.PartStaff 216 | 217 | Return value: 218 | list of tuples (offset, staff, object) 219 | offset is a real number indicating the offset of an object in music21 terms 220 | staff is an integer indicating the staff (0 = top, 1 = bottom) 221 | object is a music21 object 222 | """ 223 | 224 | # Classes to consider 225 | CLASSES = [music21.bar.Barline, music21.note.Note, music21.note.Rest, music21.chord.Chord] 226 | 227 | def convertStreamToList(aStream, staff): 228 | aList = [] 229 | currentOffset = 0.0 230 | currentList = [] 231 | for el in aStream.recurse(): 232 | if isInstanceOfClasses(el, CLASSES): 233 | if el.getOffsetInHierarchy(aStream) == currentOffset: 234 | currentList.append((staff, el)) 235 | else: 236 | aList.append((currentOffset, currentList)) 237 | currentOffset = el.getOffsetInHierarchy(aStream) 238 | currentList = [(staff, el)] 239 | return aList 240 | 241 | def flattenStream(aStream): 242 | newStream = music21.stream.Stream() 243 | for el in aStream.recurse(): 244 | if isInstanceOfClasses(el, CLASSES): 245 | newStream.insert(el.getOffsetInHierarchy(aStream), el) 246 | elif isinstance(el, music21.stream.Measure): 247 | newStream.insert(el.getOffsetInHierarchy(aStream), music21.bar.Barline()) 248 | return newStream 249 | 250 | def getNext(iterator): 251 | try: 252 | return next(iterator) 253 | except StopIteration: 254 | return None 255 | 256 | parts = aScore.getElementsByClass([music21.stream.PartStaff, music21.stream.Part]) # get staves 257 | topStaffList = convertStreamToList(flattenStream(parts[0]), 0) 258 | bottomStaffList = convertStreamToList(flattenStream(parts[1]), 1) if len(parts) == 2 else [] 259 | 260 | aList = [] 261 | tIterator = iter(topStaffList) 262 | bIterator = iter(bottomStaffList) 263 | tEl = getNext(tIterator) 264 | bEl = getNext(bIterator) 265 | 266 | while tEl or bEl: 267 | if not tEl: 268 | aList.append((bEl[0], bEl[1])) 269 | bEl = getNext(bIterator) 270 | elif not bEl: 271 | aList.append((tEl[0], tEl[1])) 272 | tEl = getNext(tIterator) 273 | else: 274 | if tEl[0] < bEl[0]: 275 | aList.append((tEl[0], tEl[1])) 276 | tEl = getNext(tIterator) 277 | elif tEl[0] > bEl[0]: 278 | aList.append((bEl[0], bEl[1])) 279 | bEl = getNext(bIterator) 280 | else: 281 | aList.append((tEl[0], tEl[1] + bEl[1])) 282 | tEl = getNext(tIterator) 283 | bEl = getNext(bIterator) 284 | 285 | return aList 286 | 287 | def countObjects(aSet): 288 | """Count objects in a set 289 | 290 | Parameters: 291 | 292 | aSet: list of tuples (staff, object) 293 | staff is an integer indicating the staff (1 = top, 2 = bottom) 294 | object is a music21 object 295 | 296 | Return value: 297 | 298 | a tuple with the numbers of objects in the set (see definition of errors below) 299 | """ 300 | 301 | errors = np.zeros((len(ScoreErrors.__members__)), int) 302 | 303 | for obj in aSet: 304 | if isinstance(obj[1], (music21.stream.Measure, music21.bar.Barline, music21.clef.Clef, \ 305 | music21.key.Key, music21.key.KeySignature, music21.meter.TimeSignature)): 306 | pass 307 | elif isinstance(obj[1], music21.note.Note): 308 | errors[ScoreErrors.NoteDeletion] += 1 309 | elif isinstance(obj[1], music21.chord.Chord): 310 | errors[ScoreErrors.NoteDeletion] += len(obj[1].pitches) 311 | elif isinstance(obj[1], music21.note.Rest): 312 | errors[ScoreErrors.RestDeletion] += 1 313 | else: 314 | print('Class not found:', type(obj[1])) 315 | 316 | return errors 317 | 318 | def compareSets(aSet, bSet): 319 | """Compare two sets of concurrent musical objects. 320 | 321 | Parameters: 322 | 323 | aSet/bSet: list of tuples (staff, object) 324 | staff is an integer indicating the staff (1 = top, 2 = bottom) 325 | object is a music21 object 326 | 327 | Return value: 328 | 329 | a tuple with the differences between the two sets (see definition of errors below) 330 | """ 331 | 332 | def findEnharmonicEquivalent(note, aSet): 333 | """Find the first enharmonic equivalent in a set 334 | 335 | Parameters: 336 | 337 | note: a music21.note.Note object 338 | aSet: list of tuples (staff, object) 339 | staff is an integer indicating the staff (0 = top, 1 = bottom) 340 | object is a music21 object 341 | 342 | Return value: 343 | 344 | index of the first enharmonic equivalent of note in aSet 345 | -1 otherwise 346 | """ 347 | for i, obj in enumerate(aSet): 348 | if isinstance(obj[1], music21.note.Note) and obj[1].pitch.ps == note.pitch.ps: 349 | return i 350 | return -1 351 | 352 | def splitChords(aSet): 353 | """Split chords into seperate notes 354 | 355 | Parameters: 356 | 357 | aSet: list of tuples (staff, object) 358 | staff is an integer indicating the staff (0 = top, 1 = bottom) 359 | object is a music21 object 360 | 361 | Return value: 362 | a tuple (newSet, chords) 363 | newSet: aSet with split chords 364 | chords: the number of chords in aSet 365 | 366 | """ 367 | newSet = [] 368 | chordSet = [] # added 369 | numChords = 0 370 | for obj in aSet: 371 | if isinstance(obj[1], music21.chord.Chord): 372 | numChords += 1 373 | for note in obj[1]: # added 374 | if not note.containerHierarchy: 375 | note.containerHierarchy = obj[1].containerHierarchy 376 | if not note.contextSites: 377 | note.contextSites = obj[1].contextSites 378 | if note.stemDirection == 'unspecified': 379 | note.stemDirection = obj[1].stemDirection 380 | 381 | # newNote = copy.deepcopy(note) 382 | newSet.append((obj[0], note)) 383 | chordSet.append(obj) # added 384 | else: 385 | newSet.append(obj) 386 | 387 | return newSet, chordSet, numChords # modified 388 | 389 | def compareObj(aObj, bObj): 390 | # Compare Music 21 objects 391 | if isinstance(aObj, music21.note.Note) or isinstance(aObj, music21.chord.Chord): 392 | return False 393 | if aObj == bObj: 394 | return True 395 | if type(aObj) != type(bObj): 396 | if not isinstance(aObj, music21.key.Key) and not isinstance(aObj, music21.key.KeySignature): # added 397 | return False 398 | if isinstance(aObj, music21.stream.Measure): 399 | return True 400 | if isinstance(aObj, music21.bar.Barline): 401 | return True 402 | if isinstance(aObj, music21.clef.Clef): 403 | if type(aObj) == type(bObj): 404 | return True 405 | if isinstance(aObj, music21.key.Key) or isinstance(aObj, music21.key.KeySignature): # mod 406 | if aObj.sharps == bObj.sharps: 407 | return True 408 | if isinstance(aObj, music21.meter.TimeSignature): 409 | if aObj.numerator / aObj.beatCount == bObj.numerator / bObj.beatCount: # mod 410 | return True 411 | if isinstance(aObj, music21.note.Note): 412 | if aObj.pitch == bObj.pitch and aObj.duration == bObj.duration and aObj.stemDirection == bObj.stemDirection: 413 | return True 414 | if isinstance(aObj, music21.note.Rest): 415 | if aObj.duration == bObj.duration: 416 | return True 417 | if isinstance(aObj, music21.chord.Chord): 418 | if aObj.duration == bObj.duration and aObj.pitches == bObj.pitches: 419 | return True 420 | return False 421 | 422 | def findObj(aPair, aSet): 423 | # Find 424 | for bPair in aSet: 425 | if aPair[0] == bPair[0]: 426 | if compareObj(aPair[1], bPair[1]): 427 | return bPair 428 | return None 429 | 430 | def comparePitch(aObj, bObj): # added 431 | if isinstance(aObj, music21.note.Note): 432 | return aObj.pitch == bObj.pitch 433 | elif isinstance(aObj, music21.chord.Chord): 434 | return set(aObj.pitches) == set(bObj.pitches) 435 | 436 | def getBeams(noteObj): # added 437 | return '_'.join(['-'.join([b.type, b.direction]) if b.direction else b.type for b in noteObj.beams]) 438 | 439 | def getTie(noteObj): # added 440 | return noteObj.tie.type if noteObj.tie is not None else '' 441 | 442 | def referClef(noteObj): # added 443 | return noteObj.getContextByClass('Clef').name if noteObj.getContextByClass('Clef') is not None else '' 444 | 445 | def referTimeSig(noteObj): # added 446 | return noteObj.getContextByClass('TimeSignature').numerator / noteObj.getContextByClass('TimeSignature').denominator \ 447 | if noteObj.getContextByClass('TimeSignature') is not None else '' 448 | 449 | def referKeySig(noteObj): # added 450 | keyObj = (noteObj.getContextByClass('Key') or noteObj.getContextByClass('KeySignature')) 451 | return keyObj.sharps if keyObj else 0 452 | 453 | def referVoice(noteObj): # added 454 | return noteObj.getContextByClass('Voice').id if noteObj.getContextByClass('Voice') is not None else '1' 455 | 456 | errors = np.zeros((len(ScoreErrors.__members__)), int) 457 | 458 | a = aSet.copy() 459 | b = bSet.copy() 460 | 461 | # Remove matching pairs from both sets 462 | aTemp = [] 463 | for pair in a: 464 | bPair = findObj(pair, b) 465 | if bPair: 466 | b.remove(bPair) 467 | else: 468 | aTemp.append(pair) 469 | a = aTemp 470 | 471 | # Find mismatched staff placement 472 | aTemp = [] 473 | for obj in a: 474 | bTemp = [o[1] for o in b if o[0] != obj[0]] 475 | if obj[1] in bTemp: 476 | idx = b.index((1 - obj[0], obj[1])) 477 | del b[idx] 478 | errors[ScoreErrors.StaffAssignment] += 1 479 | else: 480 | aTemp.append(obj) 481 | a = aTemp 482 | 483 | a, aChords, aNumChords = splitChords(a) 484 | b, bChords, bNumChords = splitChords(b) 485 | 486 | # Find mismatches in notes 487 | aTemp = [] 488 | for obj in a: 489 | if isinstance(obj[1], music21.note.Note): 490 | found = False 491 | for bObj in b: 492 | if isinstance(bObj[1], music21.note.Note) and bObj[1].pitch == obj[1].pitch: 493 | if bObj[0] != obj[0]: 494 | errors[ScoreErrors.StaffAssignment] += 1 495 | else: # added 496 | if bObj[1].duration != obj[1].duration: 497 | errors[ScoreErrors.NoteDuration] += 1 498 | if bObj[1].stemDirection != obj[1].stemDirection: 499 | errors[ScoreErrors.StemDirection] += 1 500 | 501 | if getBeams(bObj[1]) != getBeams(obj[1]): # added 502 | errors[ScoreErrors.Beams] += 1 503 | if getTie(bObj[1]) != getTie(obj[1]): # added 504 | errors[ScoreErrors.Tie] += 1 505 | if referClef(bObj[1]) != referClef(obj[1]): # added 506 | errors[ScoreErrors.Clef] += 1 507 | if referTimeSig(bObj[1]) != referTimeSig(obj[1]): # added 508 | errors[ScoreErrors.TimeSignature] += 1 509 | if referKeySig(bObj[1]) != referKeySig(obj[1]): # added 510 | errors[ScoreErrors.KeySignature] += 1 511 | if referVoice(bObj[1]) != referVoice(obj[1]): # added 512 | errors[ScoreErrors.Voice] += 1 513 | 514 | b.remove(bObj) 515 | found = True 516 | break 517 | if not found: 518 | aTemp.append(obj) 519 | else: 520 | aTemp.append(obj) 521 | a = aTemp 522 | 523 | # Find mismatched duration of rests 524 | aTemp = [] 525 | for obj in a: 526 | if isinstance(obj[1], music21.note.Rest): 527 | for bObj in b: 528 | if isinstance(bObj[1], music21.note.Rest) and bObj[1].duration != obj[1].duration: 529 | b.remove(bObj) 530 | errors[ScoreErrors.RestDuration] += 1 531 | break 532 | aTemp.append(obj) 533 | else: 534 | aTemp.append(obj) 535 | a = aTemp 536 | 537 | # Find enharmonic equivalents and report spelling mistakes and duration mistakes 538 | aTemp = [] 539 | for obj in a: 540 | if isinstance(obj[1], music21.note.Note): 541 | idx = findEnharmonicEquivalent(obj[1], b) 542 | if idx != -1: 543 | if b[idx][0] != obj[0]: 544 | errors[ScoreErrors.StaffAssignment] += 1 545 | if b[idx][1].duration != obj[1].duration: 546 | errors[ScoreErrors.NoteDuration] += 1 547 | if b[idx][1].stemDirection != obj[1].stemDirection: 548 | errors[ScoreErrors.StemDirection] += 1 549 | 550 | if getBeams(b[idx][1]) != getBeams(obj[1]): # added 551 | errors[ScoreErrors.Beams] += 1 552 | if getTie(b[idx][1]) != getTie(obj[1]): # added 553 | errors[ScoreErrors.Tie] += 1 554 | if referClef(b[idx][1]) != referClef(obj[1]): # added 555 | errors[ScoreErrors.Clef] += 1 556 | if referTimeSig(b[idx][1]) != referTimeSig(obj[1]): # added 557 | errors[ScoreErrors.TimeSignature] += 1 558 | if referKeySig(b[idx][1]) != referKeySig(obj[1]): # added 559 | errors[ScoreErrors.KeySignature] += 1 560 | if referVoice(b[idx][1]) != referVoice(obj[1]): # added 561 | errors[ScoreErrors.Voice] += 1 562 | 563 | del b[idx] 564 | errors[ScoreErrors.NoteSpelling] += 1 565 | else: 566 | aTemp.append(obj) 567 | else: 568 | aTemp.append(obj) 569 | a = aTemp 570 | 571 | aErrors = countObjects(a) 572 | bErrors = countObjects(b) 573 | 574 | errors += bErrors 575 | errors[ScoreErrors.NoteInsertion] = aErrors[ScoreErrors.NoteDeletion] 576 | errors[ScoreErrors.RestInsertion] = aErrors[ScoreErrors.RestDeletion] 577 | 578 | # print() 579 | # print('aSet =', aSet) 580 | # print('bSet =', bSet) 581 | # print('errors =', errors) 582 | # print() 583 | 584 | return errors 585 | 586 | def getSet(aList, start, end): 587 | set = [] 588 | for aTuple in aList: 589 | if aTuple[0] >= end: 590 | return set 591 | if aTuple[0] >= start: 592 | set += aTuple[1] 593 | return set 594 | 595 | # scoreSimilarity 596 | path, _ = scoreAlignment(estScore, gtScore) 597 | 598 | aList = convertScoreToList(estScore) 599 | bList = convertScoreToList(gtScore) 600 | 601 | nSymbols = countSymbols(gtScore) 602 | 603 | errors = np.zeros((len(ScoreErrors.__members__)), float) 604 | 605 | aStart, aEnd = 0.0, 0.0 606 | bStart, bEnd = 0.0, 0.0 607 | for pair in path: 608 | if pair[0] != aEnd and pair[1] != bEnd: 609 | aEnd, bEnd = pair[0], pair[1] 610 | errors += compareSets(getSet(aList, aStart, aEnd), getSet(bList, bStart, bEnd)) 611 | 612 | aStart, aEnd = aEnd, aEnd 613 | bStart, bEnd = bEnd, bEnd 614 | elif pair[0] == aEnd: 615 | bEnd = pair[1] 616 | else: 617 | aEnd = pair[0] 618 | 619 | errors += compareSets(getSet(aList, aStart, float('inf')), getSet(bList, bStart, float('inf'))) 620 | 621 | results = {k: int(v) for k, v in zip(ScoreErrors.__members__.keys(), errors)} 622 | results.update(nSymbols) 623 | 624 | return results 625 | -------------------------------------------------------------------------------- /metric/ScoreSimilarity_orig.py: -------------------------------------------------------------------------------- 1 | import music21 2 | import numpy as np 3 | from enum import IntEnum 4 | 5 | 6 | class ScoreErrors(IntEnum): 7 | Barline = 0 8 | Clef = 1 9 | KeySignature = 2 10 | TimeSignature = 3 11 | Note = 4 12 | NoteSpelling = 5 13 | NoteDuration = 6 14 | StemDirection = 7 15 | Grouping = 8 16 | Rest = 9 17 | RestDuration = 10 18 | StaffAssignment = 11 19 | 20 | 21 | def scoreAlignment(aScore, bScore): 22 | """Compare two musical scores. 23 | 24 | Parameters: 25 | 26 | aScore/bScore: music21.stream.Score objects 27 | 28 | Return value: 29 | 30 | (path, d): 31 | path is a list of tuples containing pairs of matching offsets 32 | d is the alignment matrix 33 | """ 34 | 35 | def convertScoreToListOfPitches(aScore): 36 | """Convert a piano score into a list of tuples containing pitches 37 | 38 | Parameter: 39 | aScore a music21.Stream containing two music21.stream.PartStaff 40 | 41 | Return value: 42 | list of tuples (offset, pitches) 43 | offset is a real number indicating the offset of an object in music21 terms 44 | pitches is a list of pitches in MIDI numbers 45 | """ 46 | 47 | def getPitches(el): 48 | if isinstance(el, music21.note.Note): 49 | return [el.pitch.midi] 50 | elif isinstance(el, music21.chord.Chord): 51 | currentList = [] 52 | for pitch in el.pitches: 53 | currentList.append(pitch.midi) 54 | return currentList 55 | 56 | def convertStreamToList(aStream): 57 | aList = [] 58 | currentOffset = 0.0 59 | currentList = [] 60 | for el in aStream: 61 | if el.offset == currentOffset: 62 | currentList += getPitches(el) 63 | else: 64 | aList.append((currentOffset, currentList)) 65 | currentOffset = el.offset 66 | currentList = getPitches(el) 67 | return aList 68 | 69 | aList = convertStreamToList(aScore.flat.notes) 70 | return aList 71 | 72 | def compareSets(aSet, bSet): 73 | """Compare two sets of pitches. 74 | 75 | Parameters: 76 | 77 | aSet/bSet: list of pitches 78 | 79 | Return value: 80 | 81 | the number of mismatching objects in the two sets 82 | """ 83 | 84 | a = aSet.copy() 85 | b = bSet.copy() 86 | 87 | # Remove matching pitches from both sets 88 | aTemp = [] 89 | for obj in a: 90 | if obj in b: 91 | b.remove(obj) 92 | else: 93 | aTemp.append(obj) 94 | a = aTemp 95 | 96 | return len(a) + len(b) 97 | 98 | def costMatrix(s, t): 99 | m = len(s) 100 | n = len(t) 101 | d = np.zeros((m + 1, n + 1)) 102 | 103 | for i in range(1, m + 1): 104 | d[i, 0] = np.inf 105 | 106 | for j in range(1, n + 1): 107 | d[0, j] = np.inf 108 | 109 | for j in range(1, n + 1): 110 | for i in range(1, m + 1): 111 | cost = compareSets(s[i - 1][1], t[j - 1][1]) 112 | idx = np.argmin([d[i - 1, j], d[i, j - 1], d[i - 1, j - 1]]) 113 | if idx == 0: 114 | d[i, j] = d[i - 1, j] + cost 115 | elif idx == 1: 116 | d[i, j] = d[i, j - 1] + cost 117 | else: 118 | d[i, j] = d[i - 1, j - 1] + cost 119 | 120 | return d 121 | 122 | # scoreAlignment 123 | aList = convertScoreToListOfPitches(aScore) 124 | bList = convertScoreToListOfPitches(bScore) 125 | d = costMatrix(aList, bList) 126 | 127 | (i,j) = (d.shape[0] - 1, d.shape[1] - 1) 128 | path = [] 129 | while not (i == 0 and j == 0): 130 | aOff = aList[i-1][0] 131 | bOff = bList[j-1][0] 132 | path = [(aOff,bOff)] + path 133 | 134 | idx = np.argmin([d[i - 1, j], d[i, j - 1], d[i - 1, j - 1]]) 135 | if idx == 0: 136 | i = i - 1 137 | elif idx == 1: 138 | j = j - 1 139 | else: 140 | i, j = i - 1, j - 1 141 | 142 | return path, d 143 | 144 | 145 | 146 | def scoreSimilarity(estScore, gtScore): 147 | """Compare two musical scores. 148 | 149 | Parameters: 150 | 151 | estScore/gtScore: music21.stream.Score objects of piano scores. The scores must contain two 152 | music21.stream.PartStaff substreams (top and bottom staves) 153 | 154 | estScore is the estimated transcription 155 | gtScore is the ground truth 156 | 157 | Return value: 158 | 159 | a NumPy array containing the differences between the two scores: 160 | 161 | barlines, clefs, key signatures, time signatures, note, note spelling, 162 | note duration, staff assignment, rest, rest duration 163 | 164 | The differences for notes, rests and barlines are normalized with the number of symbols 165 | in the ground truth 166 | """ 167 | 168 | def isInstanceOfClasses(obj, classes): 169 | """Helper function to determine if an item is an instance of several classes""" 170 | for cls in classes: 171 | if isinstance(obj, cls): 172 | return True 173 | return False 174 | 175 | def countSymbols(aScore): 176 | """Count the number of symbols in a score 177 | 178 | Parameter: 179 | aScore a music21.Stream 180 | 181 | Return value: 182 | the number of music symbols (notes, rests, chords, barlines) in the score 183 | """ 184 | 185 | # Classes to consider 186 | CLASSES = [music21.bar.Barline, music21.note.Note, music21.note.Rest, 187 | music21.chord.Chord] 188 | 189 | nSymbols = 0 190 | for el in aScore.recurse(): 191 | if isInstanceOfClasses(el, CLASSES): 192 | nSymbols += 1 193 | 194 | return nSymbols 195 | 196 | def convertScoreToList(aScore): 197 | """Convert a piano score into a list of tuples 198 | 199 | Parameter: 200 | aScore a music21.Stream containing two music21.stream.PartStaff 201 | 202 | Return value: 203 | list of tuples (offset, staff, object) 204 | offset is a real number indicating the offset of an object in music21 terms 205 | staff is an integer indicating the staff (0 = top, 1 = bottom) 206 | object is a music21 object 207 | """ 208 | 209 | # Classes to consider 210 | CLASSES = [music21.bar.Barline, music21.clef.Clef, 211 | music21.key.Key, music21.meter.TimeSignature, music21.note.Note, music21.note.Rest, 212 | music21.chord.Chord] 213 | 214 | def convertStreamToList(aStream, staff): 215 | aList = [] 216 | currentOffset = 0.0 217 | currentList = [] 218 | for el in aStream.recurse(): 219 | if isInstanceOfClasses(el, CLASSES): 220 | if el.getOffsetInHierarchy(aStream) == currentOffset: 221 | currentList.append((staff, el)) 222 | else: 223 | aList.append((currentOffset, currentList)) 224 | currentOffset = el.getOffsetInHierarchy(aStream) 225 | currentList = [(staff, el)] 226 | return aList 227 | 228 | def flattenStream(aStream): 229 | newStream = music21.stream.Stream() 230 | for el in aStream.recurse(): 231 | if isInstanceOfClasses(el, CLASSES): 232 | newStream.insert(el.getOffsetInHierarchy(aStream), el) 233 | elif isinstance(el, music21.stream.Measure): 234 | newStream.insert(el.getOffsetInHierarchy(aStream), music21.bar.Barline()) 235 | return newStream 236 | 237 | def getNext(iterator): 238 | try: 239 | return next(iterator) 240 | except StopIteration: 241 | return None 242 | 243 | parts = aScore.getElementsByClass([music21.stream.PartStaff, music21.stream.Part]) # get staves 244 | topStaffList = convertStreamToList(flattenStream(parts[0]), 0) 245 | bottomStaffList = convertStreamToList(flattenStream(parts[1]), 1) 246 | 247 | aList = [] 248 | tIterator = iter(topStaffList) 249 | bIterator = iter(bottomStaffList) 250 | tEl = getNext(tIterator) 251 | bEl = getNext(bIterator) 252 | 253 | while tEl or bEl: 254 | if not tEl: 255 | aList.append((bEl[0], bEl[1])) 256 | bEl = getNext(bIterator) 257 | elif not bEl: 258 | aList.append((tEl[0], tEl[1])) 259 | tEl = getNext(tIterator) 260 | else: 261 | if tEl[0] < bEl[0]: 262 | aList.append((tEl[0], tEl[1])) 263 | tEl = getNext(tIterator) 264 | elif tEl[0] > bEl[0]: 265 | aList.append((bEl[0], bEl[1])) 266 | bEl = getNext(bIterator) 267 | else: 268 | aList.append((tEl[0], tEl[1] + bEl[1])) 269 | tEl = getNext(tIterator) 270 | bEl = getNext(bIterator) 271 | 272 | return aList 273 | 274 | def countObjects(aSet): 275 | """Count objects in a set 276 | 277 | Parameters: 278 | 279 | aSet: list of tuples (staff, object) 280 | staff is an integer indicating the staff (1 = top, 2 = bottom) 281 | object is a music21 object 282 | 283 | Return value: 284 | 285 | a tuple with the numbers of objects in the set (see definition of errors below) 286 | """ 287 | 288 | errors = np.zeros((len(ScoreErrors.__members__)), int) 289 | 290 | for obj in aSet: 291 | if isinstance(obj[1], music21.stream.Measure) or isinstance(obj[1], music21.bar.Barline): 292 | errors[ScoreErrors.Barline] += 1 293 | elif isinstance(obj[1], music21.clef.Clef): 294 | errors[ScoreErrors.Clef] += 1 295 | elif isinstance(obj[1], music21.key.Key): 296 | errors[ScoreErrors.KeySignature] += 1 297 | elif isinstance(obj[1], music21.meter.TimeSignature): 298 | errors[ScoreErrors.TimeSignature] += 1 299 | elif isinstance(obj[1], music21.note.Note): 300 | errors[ScoreErrors.Note] += 1 301 | elif isinstance(obj[1], music21.chord.Chord): 302 | errors[ScoreErrors.Note] += len(obj[1].pitches) 303 | elif isinstance(obj[1], music21.note.Rest): 304 | errors[ScoreErrors.Rest] += 1 305 | else: 306 | print('Class not found:', type(obj[1])) 307 | 308 | return errors 309 | 310 | def compareSets(aSet, bSet): 311 | """Compare two sets of concurrent musical objects. 312 | 313 | Parameters: 314 | 315 | aSet/bSet: list of tuples (staff, object) 316 | staff is an integer indicating the staff (1 = top, 2 = bottom) 317 | object is a music21 object 318 | 319 | Return value: 320 | 321 | a tuple with the differences between the two sets (see definition of errors below) 322 | """ 323 | 324 | def findEnharmonicEquivalent(note, aSet): 325 | """Find the first enharmonic equivalent in a set 326 | 327 | Parameters: 328 | 329 | note: a music21.note.Note object 330 | aSet: list of tuples (staff, object) 331 | staff is an integer indicating the staff (0 = top, 1 = bottom) 332 | object is a music21 object 333 | 334 | Return value: 335 | 336 | index of the first enharmonic equivalent of note in aSet 337 | -1 otherwise 338 | """ 339 | for i, obj in enumerate(aSet): 340 | if isinstance(obj[1], music21.note.Note) and obj[1].pitch.ps == note.pitch.ps: 341 | return i 342 | return -1 343 | 344 | def splitChords(aSet): 345 | """Split chords into seperate notes 346 | 347 | Parameters: 348 | 349 | aSet: list of tuples (staff, object) 350 | staff is an integer indicating the staff (0 = top, 1 = bottom) 351 | object is a music21 object 352 | 353 | Return value: 354 | a tuple (newSet, chords) 355 | newSet: aSet with split chords 356 | chords: the number of chords in aSet 357 | 358 | """ 359 | newSet = [] 360 | chords = 0 361 | for obj in aSet: 362 | if isinstance(obj[1], music21.chord.Chord): 363 | chords += 1 364 | for pitch in obj[1].pitches: 365 | newNote = music21.note.Note() 366 | newNote.offset = obj[1].offset 367 | newNote.pitch = pitch 368 | newNote.duration = obj[1].duration 369 | newNote.stemDirection = obj[1].getStemDirection(pitch) 370 | newSet.append((obj[0], newNote)) 371 | else: 372 | newSet.append(obj) 373 | 374 | return newSet, chords 375 | 376 | def compareObj(aObj, bObj): 377 | # Compare Music 21 objects 378 | if aObj == bObj: 379 | return True 380 | if type(aObj) != type(bObj): 381 | return False 382 | if isinstance(aObj, music21.stream.Measure): 383 | return True 384 | if isinstance(aObj, music21.bar.Barline): 385 | return True 386 | if isinstance(aObj, music21.clef.Clef): 387 | if type(aObj) == type(bObj): 388 | return True 389 | if isinstance(aObj, music21.key.Key): 390 | if aObj.sharps == bObj.sharps: 391 | return True 392 | if isinstance(aObj, music21.meter.TimeSignature): 393 | if aObj.numerator == bObj.numerator and aObj.beatCount == bObj.beatCount: 394 | return True 395 | if isinstance(aObj, music21.note.Note): 396 | if aObj.pitch == bObj.pitch and aObj.duration == bObj.duration and aObj.stemDirection == bObj.stemDirection: 397 | return True 398 | if isinstance(aObj, music21.note.Rest): 399 | if aObj.duration == bObj.duration: 400 | return True 401 | if isinstance(aObj, music21.chord.Chord): 402 | if aObj.duration == bObj.duration and aObj.pitches == bObj.pitches and aObj.stemDirection == bObj.stemDirection: 403 | return True 404 | return False 405 | 406 | def findObj(aPair, aSet): 407 | # Find 408 | for bPair in aSet: 409 | if aPair[0] == bPair[0]: 410 | if compareObj(aPair[1], bPair[1]): 411 | return bPair 412 | return None 413 | 414 | errors = np.zeros((len(ScoreErrors.__members__)), int) 415 | 416 | a = aSet.copy() 417 | b = bSet.copy() 418 | 419 | # Remove matching pairs from both sets 420 | # aTemp = [] 421 | # for obj in a: 422 | # if obj in b: 423 | # b.remove(obj) 424 | # else: 425 | # aTemp.append(obj) 426 | # a = aTemp 427 | aTemp = [] 428 | for pair in a: 429 | bPair = findObj(pair, b) 430 | if bPair: 431 | b.remove(bPair) 432 | else: 433 | aTemp.append(pair) 434 | a = aTemp 435 | 436 | # Find mismatched staff placement 437 | aTemp = [] 438 | for obj in a: 439 | bTemp = [o[1] for o in b if o[0] != obj[0]] 440 | if obj[1] in bTemp: 441 | idx = b.index((1 - obj[0], obj[1])) 442 | del b[idx] 443 | errors[ScoreErrors.StaffAssignment] += 1 444 | else: 445 | aTemp.append(obj) 446 | a = aTemp 447 | 448 | # Split chords and report grouping errors 449 | a, aChords = splitChords(a) 450 | b, bChords = splitChords(b) 451 | errors[ScoreErrors.Grouping] += abs(aChords - bChords) 452 | 453 | # Find mismatches in notes 454 | aTemp = [] 455 | for obj in a: 456 | if isinstance(obj[1], music21.note.Note): 457 | found = False 458 | for bObj in b: 459 | if isinstance(bObj[1], music21.note.Note) and bObj[1].pitch == obj[1].pitch: 460 | if bObj[0] != obj[0]: 461 | errors[ScoreErrors.StaffAssignment] += 1 462 | if bObj[1].duration != obj[1].duration: 463 | errors[ScoreErrors.NoteDuration] += 1 464 | if bObj[1].stemDirection != obj[1].stemDirection: 465 | errors[ScoreErrors.StemDirection] += 1 466 | b.remove(bObj) 467 | found = True 468 | break 469 | if not found: 470 | aTemp.append(obj) 471 | else: 472 | aTemp.append(obj) 473 | a = aTemp 474 | 475 | # Find mismatched duration of rests 476 | aTemp = [] 477 | for obj in a: 478 | if isinstance(obj[1], music21.note.Rest): 479 | for bObj in b: 480 | if isinstance(bObj[1], music21.note.Rest) and bObj[1].duration != obj[1].duration: 481 | b.remove(bObj) 482 | errors[ScoreErrors.RestDuration] += 1 483 | break 484 | aTemp.append(obj) 485 | else: 486 | aTemp.append(obj) 487 | a = aTemp 488 | 489 | # Find enharmonic equivalents and report spelling mistakes and duration mistakes 490 | aTemp = [] 491 | for obj in a: 492 | if isinstance(obj[1], music21.note.Note): 493 | idx = findEnharmonicEquivalent(obj[1], b) 494 | if idx != -1: 495 | if b[idx][0] != obj[0]: 496 | errors[ScoreErrors.StaffAssignment] += 1 497 | if b[idx][1].duration != obj[1].duration: 498 | errors[ScoreErrors.NoteDuration] += 1 499 | if b[idx][1].stemDirection != obj[1].stemDirection: 500 | errors[ScoreErrors.StemDirection] += 1 501 | del b[idx] 502 | errors[ScoreErrors.NoteSpelling] += 1 503 | else: 504 | aTemp.append(obj) 505 | else: 506 | aTemp.append(obj) 507 | a = aTemp 508 | 509 | errors += countObjects(a) 510 | errors += countObjects(b) 511 | 512 | # print() 513 | # print('aSet =', aSet) 514 | # print('bSet =', bSet) 515 | # print('errors =', errors) 516 | # print() 517 | 518 | return errors 519 | 520 | def errorsToCost(errors): 521 | cost = errors[ScoreErrors.Barline] 522 | cost += errors[ScoreErrors.Clef] 523 | cost += errors[ScoreErrors.KeySignature] 524 | cost += errors[ScoreErrors.TimeSignature] 525 | cost += errors[ScoreErrors.Note] 526 | cost += errors[ScoreErrors.NoteSpelling] * 1 / 4 527 | cost += errors[ScoreErrors.NoteDuration] * 1 / 4 528 | cost += errors[ScoreErrors.StemDirection] * 1 / 4 529 | cost += errors[ScoreErrors.StaffAssignment] * 1 / 2 530 | cost += errors[ScoreErrors.Grouping] 531 | cost += errors[ScoreErrors.Rest] 532 | cost += errors[ScoreErrors.RestDuration] * 1 / 2 533 | return cost 534 | 535 | def getSet(aList, start, end): 536 | set = [] 537 | for aTuple in aList: 538 | if aTuple[0] >= end: 539 | return set 540 | if aTuple[0] >= start: 541 | set += aTuple[1] 542 | return set 543 | 544 | # scoreSimilarity 545 | path, _ = scoreAlignment(estScore, gtScore) 546 | 547 | aList = convertScoreToList(estScore) 548 | bList = convertScoreToList(gtScore) 549 | 550 | nSymbols = countSymbols(gtScore) 551 | 552 | errors = np.zeros((len(ScoreErrors.__members__)), float) 553 | 554 | aStart, aEnd = 0.0, 0.0 555 | bStart, bEnd = 0.0, 0.0 556 | for pair in path: 557 | if pair[0] != aEnd and pair[1] != bEnd: 558 | aEnd, bEnd = pair[0], pair[1] 559 | errors += compareSets(getSet(aList, aStart, aEnd), getSet(bList, bStart, bEnd)) 560 | aStart, aEnd = aEnd, aEnd 561 | bStart, bEnd = bEnd, bEnd 562 | elif pair[0] == aEnd: 563 | bEnd = pair[1] 564 | else: 565 | aEnd = pair[0] 566 | errors += compareSets(getSet(aList, aStart, float('inf')), getSet(bList, bStart, float('inf'))) 567 | for aspect in [ScoreErrors.Note, ScoreErrors.NoteSpelling, ScoreErrors.NoteDuration, ScoreErrors.StemDirection, 568 | ScoreErrors.StaffAssignment, ScoreErrors.Grouping, ScoreErrors.Rest, ScoreErrors.RestDuration]: 569 | errors[aspect] /= nSymbols 570 | 571 | return errors 572 | 573 | # 574 | # Evaluate dataset 575 | # 576 | 577 | from music21 import converter 578 | import os 579 | import numpy as np 580 | import scipy.io as sio 581 | 582 | METHODS = ['F', 'G', 'C', 'M'] 583 | METHODS_ORD = [2, 3, 0, 1] 584 | BASEDIR = 'dataset' 585 | N = 19 586 | pieces = list(range(1,N+1)) 587 | gt = [None] * N 588 | for piece in pieces: 589 | filename = os.path.join(BASEDIR, 'K-' + str(piece) + '.mxl') 590 | try: 591 | gt[piece - 1] = converter.parse(filename) 592 | except: 593 | print("Can't load", filename) 594 | pass 595 | 596 | results = -np.ones((len(METHODS), N, len(ScoreErrors.__members__))) 597 | for piece in pieces: 598 | if gt[piece - 1] == None: 599 | continue 600 | for method in METHODS: 601 | filename = os.path.join(BASEDIR, method + '-' + str(piece) + '.mxl') 602 | try: 603 | comparisonPiece = converter.parse(filename) 604 | print(filename, end = ' ') 605 | score = scoreSimilarity(gt[piece - 1], comparisonPiece) 606 | print(score) 607 | results[METHODS_ORD[METHODS.index(method)], piece - 1, :] = score 608 | except music21.converter.ConverterException: 609 | pass 610 | except Exception as err: 611 | print(type(err), err) 612 | 613 | print('Saving results to MAT file') 614 | mat_results = {'results' : results} 615 | sio.savemat('resultsWithAlignment', mat_results) 616 | print('Done') -------------------------------------------------------------------------------- /tokenization_tools/README.md: -------------------------------------------------------------------------------- 1 | # Tokenization tools 2 | 3 | This directory contains the tokenizer and de-tokenizer between **MusicXML** and proposed **score token** representation. 4 | 5 | - [**tokenizer**](tokenizer) 6 | - MusicXML -> Score tokens 7 | 8 | - [**de-tokenizer**](detokenizer) 9 | - Score tokens -> MusicXML 10 | 11 | #### requirements 12 | 13 | Python 3.6+ 14 | 15 | - **tokenizer** 16 | - beautifulsoup4 (4.6.3) 17 | - lxml (4.9.1) 18 | - pretty_midi (0.2.9) 19 | 20 | - **de-tokenizer** 21 | - music21 (7.3.3) 22 | 23 | Note: The library versions here are not specified ones, but **tested** ones. 24 | -------------------------------------------------------------------------------- /tokenization_tools/detokenizer/README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | Detokenizer builds musical scores from token sequences, utilizing [music21](https://web.mit.edu/music21/). 4 | 5 | ## Usage 6 | 7 | #### 1. import 8 | 9 | ```python 10 | from tokens_to_score import tokens_to_score 11 | ``` 12 | 13 | #### 2. pass token sequence (as a string) to the function 14 | 15 | ```Python 16 | s = tokens_to_score(token_sequence) 17 | ``` 18 | 19 | - s : music21 Score object 20 | 21 | 22 | #### 3. write score into a MusicXML file with ".write" method (of music21 object) 23 | 24 | ```python 25 | s.write('musicxml', 'generated_score') 26 | ``` 27 | 28 | - You'll get the "generated_score.xml" file. 29 | 30 | ## Specifications 31 | 32 | ### Supported tokens 33 | 34 | - Score tokens (that "[score_to_tokens.py](../tokenizer/)" generates) 35 | 36 | ### Requirements 37 | 38 | Python 3.6+ 39 | 40 | - music21 (7.3.3) 41 | 42 | Note: The library version here are not specified one, but **tested** one. 43 | -------------------------------------------------------------------------------- /tokenization_tools/detokenizer/sample/generated_score.musicxml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Music21 Fragment 5 | 6 | Music21 7 | 8 | 2021-11-05 9 | music21 v.6.7.1 10 | 11 | 12 | 13 | 14 | 7 15 | 40 16 | 17 | 18 | 19 | 20 | brace 21 | yes 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 10080 34 | 38 | 2 39 | 40 | G 41 | 2 42 | 43 | 44 | F 45 | 4 46 | 47 | 48 | 49 | 50 | D 51 | 5 52 | 53 | 5040 54 | 0 55 | eighth 56 | up 57 | 1 58 | begin 59 | 60 | 61 | 62 | E 63 | 5 64 | 65 | 5040 66 | 0 67 | eighth 68 | up 69 | 1 70 | continue 71 | 72 | 73 | 74 | D 75 | 5 76 | 77 | 5040 78 | 0 79 | eighth 80 | up 81 | 1 82 | continue 83 | 84 | 85 | 86 | E 87 | 5 88 | 89 | 5040 90 | 0 91 | eighth 92 | up 93 | 1 94 | end 95 | 96 | 97 | 98 | D 99 | 5 100 | 101 | 10080 102 | 0 103 | quarter 104 | up 105 | 1 106 | 107 | 108 | 109 | E 110 | 5 111 | 112 | 10080 113 | 0 114 | quarter 115 | up 116 | 1 117 | 118 | 119 | 40320 120 | 121 | 122 | 123 | B 124 | 4 125 | 126 | 10080 127 | 1 128 | quarter 129 | down 130 | 1 131 | 132 | 133 | 134 | 135 | F 136 | 4 137 | 138 | 10080 139 | 1 140 | quarter 141 | 1 142 | 143 | 144 | 145 | B 146 | 4 147 | 148 | 10080 149 | 1 150 | quarter 151 | down 152 | 1 153 | 154 | 155 | 156 | 157 | F 158 | 4 159 | 160 | 10080 161 | 1 162 | quarter 163 | 1 164 | 165 | 166 | 167 | F 168 | 4 169 | 170 | 10080 171 | 1 172 | quarter 173 | down 174 | 1 175 | 176 | 177 | 178 | 179 | B 180 | 4 181 | 182 | 10080 183 | 1 184 | quarter 185 | 1 186 | 187 | 188 | 189 | F 190 | 4 191 | 192 | 10080 193 | 1 194 | quarter 195 | down 196 | 1 197 | 198 | 199 | 200 | 201 | B 202 | 4 203 | 204 | 10080 205 | 1 206 | quarter 207 | 1 208 | 209 | 210 | 80640 211 | 212 | 213 | 214 | G 215 | 2 216 | 217 | 10080 218 | 2 219 | quarter 220 | up 221 | 2 222 | 223 | 224 | 225 | B 226 | 3 227 | 228 | 10080 229 | 2 230 | quarter 231 | down 232 | 2 233 | 234 | 235 | 236 | 237 | G 238 | 3 239 | 240 | 10080 241 | 2 242 | quarter 243 | 2 244 | 245 | 246 | 247 | G 248 | 2 249 | 250 | 10080 251 | 2 252 | quarter 253 | up 254 | 2 255 | 256 | 257 | 258 | B 259 | 3 260 | 261 | 10080 262 | 2 263 | quarter 264 | down 265 | 2 266 | 267 | 268 | 269 | 270 | G 271 | 3 272 | 273 | 10080 274 | 2 275 | quarter 276 | 2 277 | 278 | 279 | 280 | 281 | 282 | 283 | C 284 | 5 285 | 286 | 10080 287 | 1 288 | quarter 289 | up 290 | 1 291 | 292 | 293 | 294 | 295 | E 296 | 4 297 | 298 | 10080 299 | 1 300 | quarter 301 | 1 302 | 303 | 304 | 305 | F 306 | 5 307 | 308 | 10080 309 | 1 310 | quarter 311 | down 312 | 1 313 | 314 | 315 | 316 | 317 | C 318 | 5 319 | 320 | 10080 321 | 1 322 | quarter 323 | 1 324 | 325 | 326 | 327 | 328 | F 329 | 4 330 | 331 | 10080 332 | 1 333 | quarter 334 | 1 335 | 336 | 337 | 338 | E 339 | 5 340 | 341 | 10080 342 | 1 343 | quarter 344 | up 345 | 1 346 | 347 | 348 | 349 | 350 | C 351 | 5 352 | 353 | 10080 354 | 1 355 | quarter 356 | 1 357 | 358 | 359 | 360 | 361 | E 362 | 4 363 | 364 | 10080 365 | 1 366 | quarter 367 | 1 368 | 369 | 370 | 371 | E 372 | 5 373 | 374 | 5040 375 | 1 376 | eighth 377 | down 378 | 1 379 | begin 380 | 381 | 382 | 383 | E 384 | -1 385 | 5 386 | 387 | 5040 388 | 1 389 | eighth 390 | flat 391 | down 392 | 1 393 | end 394 | 395 | 396 | 90720 397 | 398 | 399 | 400 | G 401 | 3 402 | 403 | 10080 404 | 2 405 | quarter 406 | down 407 | 2 408 | 409 | 410 | 411 | 412 | C 413 | 3 414 | 415 | 10080 416 | 2 417 | quarter 418 | 2 419 | 420 | 421 | 422 | A 423 | 3 424 | 425 | 10080 426 | 2 427 | quarter 428 | down 429 | 2 430 | 431 | 432 | 433 | 434 | C 435 | 3 436 | 437 | 10080 438 | 2 439 | quarter 440 | 2 441 | 442 | 443 | 444 | G 445 | 3 446 | 447 | 10080 448 | 2 449 | quarter 450 | down 451 | 2 452 | 453 | 454 | 455 | 456 | C 457 | 3 458 | 459 | 10080 460 | 2 461 | quarter 462 | 2 463 | 464 | 465 | 466 | 10080 467 | 2 468 | quarter 469 | 2 470 | 471 | 472 | 473 | 474 | 475 | 476 | D 477 | 5 478 | 479 | 5040 480 | 0 481 | eighth 482 | up 483 | 1 484 | begin 485 | 486 | 487 | 488 | E 489 | 0 490 | 5 491 | 492 | 5040 493 | 0 494 | eighth 495 | natural 496 | up 497 | 1 498 | continue 499 | 500 | 501 | 502 | D 503 | 5 504 | 505 | 5040 506 | 0 507 | eighth 508 | up 509 | 1 510 | continue 511 | 512 | 513 | 514 | E 515 | 5 516 | 517 | 5040 518 | 0 519 | eighth 520 | up 521 | 1 522 | end 523 | 524 | 525 | 526 | D 527 | 5 528 | 529 | 10080 530 | 0 531 | quarter 532 | up 533 | 1 534 | 535 | 536 | 537 | A 538 | 5 539 | 540 | 10080 541 | 0 542 | quarter 543 | up 544 | 1 545 | 546 | 547 | 40320 548 | 549 | 550 | 551 | F 552 | 1 553 | 4 554 | 555 | 10080 556 | 1 557 | quarter 558 | sharp 559 | down 560 | 1 561 | 562 | 563 | 564 | 565 | C 566 | 5 567 | 568 | 10080 569 | 1 570 | quarter 571 | 1 572 | 573 | 574 | 575 | F 576 | 1 577 | 4 578 | 579 | 10080 580 | 1 581 | quarter 582 | sharp 583 | down 584 | 1 585 | 586 | 587 | 588 | 589 | C 590 | 5 591 | 592 | 10080 593 | 1 594 | quarter 595 | 1 596 | 597 | 598 | 599 | F 600 | 1 601 | 4 602 | 603 | 10080 604 | 1 605 | quarter 606 | sharp 607 | down 608 | 1 609 | 610 | 611 | 612 | 613 | C 614 | 5 615 | 616 | 10080 617 | 1 618 | quarter 619 | 1 620 | 621 | 622 | 623 | D 624 | 5 625 | 626 | 10080 627 | 1 628 | quarter 629 | down 630 | 1 631 | 632 | 633 | 634 | 635 | C 636 | 5 637 | 638 | 10080 639 | 1 640 | quarter 641 | 1 642 | 643 | 644 | 80640 645 | 646 | 647 | 648 | A 649 | 2 650 | 651 | 10080 652 | 2 653 | quarter 654 | up 655 | 2 656 | 657 | 658 | 659 | C 660 | 4 661 | 662 | 10080 663 | 2 664 | quarter 665 | down 666 | 2 667 | 668 | 669 | 670 | 671 | F 672 | 1 673 | 3 674 | 675 | 10080 676 | 2 677 | quarter 678 | sharp 679 | 2 680 | 681 | 682 | 683 | 684 | D 685 | 3 686 | 687 | 10080 688 | 2 689 | quarter 690 | 2 691 | 692 | 693 | 694 | D 695 | 2 696 | 697 | 10080 698 | 2 699 | quarter 700 | up 701 | 2 702 | 703 | 704 | 705 | C 706 | 4 707 | 708 | 10080 709 | 2 710 | quarter 711 | down 712 | 2 713 | 714 | 715 | 716 | 717 | F 718 | 1 719 | 3 720 | 721 | 10080 722 | 2 723 | quarter 724 | sharp 725 | 2 726 | 727 | 728 | 729 | 730 | D 731 | 3 732 | 733 | 10080 734 | 2 735 | quarter 736 | 2 737 | 738 | 739 | 740 | 741 | 742 | 743 | B 744 | 4 745 | 746 | 5040 747 | 1 748 | eighth 749 | down 750 | 1 751 | begin 752 | 753 | 754 | 755 | A 756 | 5 757 | 758 | 5040 759 | 1 760 | eighth 761 | down 762 | 1 763 | continue 764 | 765 | 766 | 767 | G 768 | 5 769 | 770 | 5040 771 | 1 772 | eighth 773 | down 774 | 1 775 | continue 776 | 777 | 778 | 779 | F 780 | 0 781 | 5 782 | 783 | 5040 784 | 1 785 | eighth 786 | natural 787 | down 788 | 1 789 | end 790 | 791 | 792 | 793 | D 794 | 5 795 | 796 | 5040 797 | 1 798 | eighth 799 | up 800 | 1 801 | begin 802 | 803 | 804 | 805 | B 806 | 4 807 | 808 | 5040 809 | 1 810 | eighth 811 | up 812 | 1 813 | continue 814 | 815 | 816 | 817 | A 818 | 4 819 | 820 | 5040 821 | 1 822 | eighth 823 | up 824 | 1 825 | continue 826 | 827 | 828 | 829 | G 830 | 4 831 | 832 | 5040 833 | 1 834 | eighth 835 | up 836 | 1 837 | end 838 | 839 | 840 | 40320 841 | 842 | 843 | 844 | G 845 | 2 846 | 847 | 10080 848 | 2 849 | quarter 850 | up 851 | 2 852 | 853 | 854 | 855 | F 856 | 0 857 | 4 858 | 859 | 10080 860 | 2 861 | quarter 862 | natural 863 | down 864 | 2 865 | 866 | 867 | 868 | 869 | B 870 | 3 871 | 872 | 10080 873 | 2 874 | quarter 875 | 2 876 | 877 | 878 | 879 | 880 | G 881 | 3 882 | 883 | 10080 884 | 2 885 | quarter 886 | 2 887 | 888 | 889 | 890 | F 891 | 4 892 | 893 | 10080 894 | 2 895 | quarter 896 | down 897 | 2 898 | 899 | 900 | 901 | 902 | B 903 | 3 904 | 905 | 10080 906 | 2 907 | quarter 908 | 2 909 | 910 | 911 | 912 | 913 | G 914 | 3 915 | 916 | 10080 917 | 2 918 | quarter 919 | 2 920 | 921 | 922 | 923 | 10080 924 | 2 925 | quarter 926 | 2 927 | 928 | 929 | regular 930 | 931 | 932 | 933 | -------------------------------------------------------------------------------- /tokenization_tools/detokenizer/sample/input_tokens.txt: -------------------------------------------------------------------------------- 1 | R bar clef_treble time_4/4 note_D5 len_1/2 stem_up beam_start note_E5 len_1/2 stem_up beam_continue note_D5 len_1/2 stem_up beam_continue note_E5 len_1/2 stem_up beam_stop note_D5 len_1 stem_up note_E5 len_1 stem_up note_B4 note_F4 len_1 stem_down note_B4 note_F4 len_1 stem_down note_F4 note_B4 len_1 stem_down note_F4 note_B4 len_1 stem_down bar note_C5 note_E4 len_1 stem_up note_F5 note_C5 note_F4 len_1 stem_down note_E5 note_C5 note_E4 len_1 stem_up note_E5 len_1/2 stem_down beam_start note_Eb5 len_1/2 stem_down beam_stop bar note_D5 len_1/2 stem_up beam_start note_E5 len_1/2 stem_up beam_continue note_D5 len_1/2 stem_up beam_continue note_E5 len_1/2 stem_up beam_stop note_D5 len_1 stem_up note_A5 len_1 stem_up note_F#4 note_C5 len_1 stem_down note_F#4 note_C5 len_1 stem_down note_F#4 note_C5 len_1 stem_down note_D5 note_C5 len_1 stem_down bar note_B4 len_1/2 stem_down beam_start note_A5 len_1/2 stem_down beam_continue note_G5 len_1/2 stem_down beam_continue note_F5 len_1/2 stem_down beam_stop note_D5 len_1/2 stem_up beam_start note_B4 len_1/2 stem_up beam_continue note_A4 len_1/2 stem_up beam_continue note_G4 len_1/2 stem_up beam_stop L bar clef_bass time_4/4 note_G2 len_1 stem_up note_B3 note_G3 len_1 stem_down note_G2 len_1 stem_up note_B3 note_G3 len_1 stem_down bar note_G3 note_C3 len_1 stem_down note_A3 note_C3 len_1 stem_down note_G3 note_C3 len_1 stem_down rest len_1 bar note_A2 len_1 stem_up note_C4 note_F#3 note_D3 len_1 stem_down note_D2 len_1 stem_up note_C4 note_F#3 note_D3 len_1 stem_down bar note_G2 len_1 stem_up note_F4 note_B3 note_G3 len_1 stem_down note_F4 note_B3 note_G3 len_1 stem_down rest len_1 -------------------------------------------------------------------------------- /tokenization_tools/detokenizer/sample/sample_usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# import \"tokens_to_score.py\" (assuming the file is in the same directory)\n", 10 | "from tokens_to_score import *" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": [ 21 | "'R bar clef_treble time_4/4 note_D5 len_1/2 stem_up beam_start note_E5 len_1/2 stem_up beam_continue note_D5 len_1/2 stem_up beam_continue note_E5 len_1/2 stem_up beam_stop note_D5 len_1 stem_up note_E5 len_1 stem_up note_B4 note_F4 len_1 stem_down note_B4 note_F4 len_1 stem_down note_F4 note_B4 len_1 stem_down note_F4 note_B4 len_1 stem_down bar note_C5 note_E4 len_1 stem_up note_F5 note_C5 note_F4 len_1 stem_down note_E5 note_C5 note_E4 len_1 stem_up note_E5 len_1/2 stem_down beam_start note_Eb5 len_1/2 stem_down beam_stop bar note_D5 len_1/2 stem_up beam_start note_E5 len_1/2 stem_up beam_continue note_D5 len_1/2 stem_up beam_continue note_E5 len_1/2 stem_up beam_stop note_D5 len_1 stem_up note_A5 len_1 stem_up note_F#4 note_C5 len_1 stem_down note_F#4 note_C5 len_1 stem_down note_F#4 note_C5 len_1 stem_down note_D5 note_C5 len_1 stem_down bar note_B4 len_1/2 stem_down beam_start note_A5 len_1/2 stem_down beam_continue note_G5 len_1/2 stem_down beam_continue note_F5 len_1/2 stem_down beam_stop note_D5 len_1/2 stem_up beam_start note_B4 len_1/2 stem_up beam_continue note_A4 len_1/2 stem_up beam_continue note_G4 len_1/2 stem_up beam_stop L bar clef_bass time_4/4 note_G2 len_1 stem_up note_B3 note_G3 len_1 stem_down note_G2 len_1 stem_up note_B3 note_G3 len_1 stem_down bar note_G3 note_C3 len_1 stem_down note_A3 note_C3 len_1 stem_down note_G3 note_C3 len_1 stem_down rest len_1 bar note_A2 len_1 stem_up note_C4 note_F#3 note_D3 len_1 stem_down note_D2 len_1 stem_up note_C4 note_F#3 note_D3 len_1 stem_down bar note_G2 len_1 stem_up note_F4 note_B3 note_G3 len_1 stem_down note_F4 note_B3 note_G3 len_1 stem_down rest len_1'" 22 | ] 23 | }, 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "output_type": "execute_result" 27 | } 28 | ], 29 | "source": [ 30 | "# load tokens\n", 31 | "token_sequence = open('input_tokens.txt').read()\n", 32 | "token_sequence" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# convert them to music21 Score object\n", 42 | "s = tokens_to_score(token_sequence)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "'generated_score.xml'" 54 | ] 55 | }, 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "# write into a MusicXML file\n", 63 | "s.write('musicxml', 'generated_score')" 64 | ] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.8.8" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 4 88 | } 89 | -------------------------------------------------------------------------------- /tokenization_tools/detokenizer/tokens_to_score.py: -------------------------------------------------------------------------------- 1 | from music21 import * 2 | 3 | # dictionary to change note names 4 | sharp_to_flat = {'C#': 'D-', 'D#': 'E-', 'F#': 'G-', 'G#': 'A-', 'A#': 'B-'} 5 | flat_to_sharp = {v:k for k, v in sharp_to_flat.items()} 6 | 7 | # translate note numbers into note names considering key signature 8 | def pitch_to_name(pitch_, key=key.KeySignature(0)): 9 | if pitch_.isdecimal(): 10 | name = str(pitch.Pitch(int(pitch_))) 11 | if key.sharps < 0: 12 | for k, v in sharp_to_flat.items(): 13 | name = name.replace(k, v) 14 | elif key.sharps > 0: 15 | for k, v in flat_to_sharp.items(): 16 | name = name.replace(k, v) 17 | return name 18 | else: 19 | return pitch_.replace('b', '-') 20 | 21 | # aggregate note(rest)-related tokens 22 | def aggr_note_token(tokens): 23 | notes, others, out = [], [], [] 24 | note_flag, len_flag = False, False 25 | 26 | for t in tokens: 27 | parts = t.split('_') 28 | if parts[0] in ('note', 'rest'): 29 | if note_flag and len_flag and len(notes): 30 | out.append(' '.join(notes)) 31 | notes = [] 32 | note_flag = True 33 | len_flag = False 34 | notes.append(t) 35 | elif parts[0] == 'len': 36 | len_flag = True 37 | notes.append(t) 38 | elif parts[0] in ('stem', 'beam', 'tie'): 39 | notes.append(t) 40 | else: # other than note-related 41 | if len(notes): 42 | out.append(' '.join(notes)) 43 | notes = [] 44 | out.append(t) 45 | 46 | # buffer flush 47 | if len(notes): 48 | out.append(' '.join(notes)) 49 | 50 | return out 51 | 52 | # translate clef or signature token into music21 object 53 | def single_token_to_obj(token): 54 | parts = token.split('_') 55 | if parts[0] == 'clef': 56 | if parts[1] == 'treble': 57 | return clef.TrebleClef() 58 | elif parts[1] == 'bass': 59 | return clef.BassClef() 60 | elif parts[0] == 'key': 61 | if parts[1] == 'sharp': 62 | return key.KeySignature(int(parts[2])) 63 | elif parts[1] == 'flat': 64 | return key.KeySignature(-1 * int(parts[2])) 65 | elif parts[1] == 'natural': 66 | return key.KeySignature(0) 67 | elif parts[0] == 'time': 68 | if '/' in parts[1]: 69 | return meter.TimeSignature(parts[1]) 70 | else: 71 | return meter.TimeSignature(parts[1]+'/4' if int(parts[1]) < 6 else parts[1]+'/8') 72 | 73 | # translate note(rest)-related tokens into music21 object 74 | def note_token_to_obj(tokens, key): 75 | if tokens[0] == 'rest': # for rests 76 | length = str_to_float(tokens[1]) 77 | return note.Rest(quarterLength=length) 78 | 79 | # for notes 80 | note_names = [pitch_to_name(t.split('_')[1], key) for t in tokens if t.split('_')[0] == 'note'] 81 | lengths = [str_to_float(t) for t in tokens if t.split('_')[0] == 'len'] 82 | direction = [t.split('_')[1] for t in tokens if t.split('_')[0] in ('stem', 'dir')] + [t.split('_')[2] for t in tokens if t.split('_')[0] == 'len' and len(t.split('_')) >= 3] 83 | beams = [t.split('_')[1:] for t in tokens if t.split('_')[0] == 'beam'] + [t.split('_')[3:] for t in tokens if t.split('_')[0] == 'len' and len(t.split('_')) >= 4] 84 | tie_ = [t.split('_')[1] for t in tokens if t.split('_')[0] == 'tie'] 85 | 86 | if len(note_names) > 1: # chord 87 | if len(lengths) > 1: 88 | chords = [] 89 | for i, l in enumerate(lengths): 90 | chord_ = chord.Chord(note_names, quarterLength=l) 91 | if len(direction): 92 | chord_.stemDirection = direction[0] 93 | 94 | if len(beams): 95 | append_beams(chord_, beams) 96 | 97 | if len(tie_): 98 | chord_.tie = tie.Tie('continue') 99 | elif i == 0: 100 | chord_.tie = tie.Tie('start') 101 | elif i == len(lengths) - 1: 102 | chord_.tie = tie.Tie('stop') 103 | else: 104 | chord_.tie = tie.Tie('continue') 105 | 106 | chords.append(chord_) 107 | 108 | return chords 109 | else: 110 | chord_ = chord.Chord(note_names, quarterLength=lengths[0]) 111 | if len(direction): 112 | chord_.stemDirection = direction[0] 113 | if len(beams): 114 | append_beams(chord_, beams) 115 | if len(tie_): 116 | chord_.tie = tie.Tie(tie_[0]) 117 | return chord_ 118 | else: # note 119 | if len(lengths) > 1: 120 | notes = [] 121 | for i, l in enumerate(lengths): 122 | note_ = note.Note(note_names[0], quarterLength=l) 123 | if len(direction): 124 | note_.stemDirection = direction[0] 125 | 126 | if len(beams): 127 | append_beams(note_, beams) 128 | 129 | if len(tie_): 130 | note_.tie = tie.Tie('continue') 131 | elif i == 0: 132 | note_.tie = tie.Tie('start') 133 | elif i == len(lengths) - 1: 134 | note_.tie = tie.Tie('stop') 135 | else: 136 | note_.tie = tie.Tie('continue') 137 | 138 | notes.append(note_) 139 | 140 | return notes 141 | else: 142 | note_ = note.Note(note_names[0], quarterLength=lengths[0]) 143 | if len(direction): 144 | note_.stemDirection = direction[0] 145 | if len(beams): 146 | append_beams(note_, beams) 147 | if len(tie_): 148 | note_.tie = tie.Tie(tie_[0]) 149 | return note_ 150 | 151 | # [aux func] translate note length into float number 152 | def str_to_float(t): 153 | length = t.split('_')[1] if 'len' in t else t 154 | if '/' in length: 155 | numerator, denominator = length.split('/') 156 | return int(numerator) / int(denominator) 157 | else: 158 | return float(length) 159 | 160 | # [aux func] append beams property to music21 Note or Chord object 161 | def append_beams(obj, beams): 162 | for b in beams[0]: 163 | if '-' in b: 164 | former, latter = b.split('-') 165 | obj.beams.append(former, latter) 166 | else: 167 | obj.beams.append(b) 168 | 169 | def tokens_to_PartStaff(tokens, key_=0, start_voice=1): 170 | tokens = concatenated_to_regular(tokens) 171 | 172 | p = stream.PartStaff() 173 | k = key.KeySignature(key_) 174 | 175 | voice_id = start_voice 176 | voice_flag = False 177 | after_voice = False 178 | voice_start = None 179 | 180 | ottava_flag = False 181 | ottava_elements = [] 182 | 183 | tokens = aggr_note_token(tokens) 184 | 185 | for i, t in enumerate(tokens): 186 | if t == 'bar': 187 | if i != 0: 188 | p.append(m) 189 | m = stream.Measure() 190 | voice_id = start_voice 191 | voice_start = None 192 | voice_flag = False 193 | after_voice = False 194 | elif t == '': 195 | v = stream.Voice(id=voice_id) 196 | voice_flag = True 197 | if voice_start is None: 198 | voice_start = m.duration.quarterLength # record the start point of voice 199 | elif t == '': 200 | if voice_flag: 201 | v.makeAccidentals(useKeySignature=k) 202 | for element in v: 203 | element.offset += voice_start 204 | m.append(v) 205 | voice_id += 1 206 | voice_flag = False 207 | after_voice = True 208 | elif t.split('_')[0] in ('clef', 'key', 'time'): 209 | if t[:11] == 'key_natural' and i+1 < len(tokens) and tokens[i+1].split('_')[0] == 'key': 210 | continue # workaround for MuseScore (which ignores consecutive key signtures): if key signatures appear in succession, skip the one with natural 211 | o = single_token_to_obj(t) 212 | if voice_flag: 213 | v.append(o) 214 | else: 215 | m.append(o) 216 | if t.split('_')[0] == 'key': # generate another key signature object to use makeAccidentals and to translate note number to name 217 | k = o 218 | elif t[:4] in ('note', 'rest'): 219 | n = note_token_to_obj(t.split(), k) 220 | if ottava_flag: 221 | ottava_elements.append(n) 222 | 223 | if voice_flag: 224 | v.append(n) 225 | else: 226 | m.append(n) 227 | 228 | if after_voice: 229 | n.offset -= v.quarterLength * (voice_id - 1) 230 | # last measure 231 | p.append(m) 232 | p.makeAccidentals() 233 | 234 | return p 235 | 236 | def concatenated_to_regular(tokens): 237 | regular_tokens = [] 238 | for t in tokens: 239 | if t.startswith('len') or t.startswith('attr'): 240 | attrs = t.split('_') 241 | if len(attrs) == 2: 242 | regular_tokens.append(f'len_{attrs[1]}') 243 | elif len(attrs) == 3: 244 | regular_tokens += [f'len_{attrs[1]}', f'stem_{attrs[2]}'] 245 | else: 246 | regular_tokens += [f'len_{attrs[1]}', f'stem_{attrs[2]}', f'beam_{"_".join(attrs[3:])}'] 247 | else: 248 | regular_tokens.append(t) 249 | return regular_tokens 250 | 251 | # build music21 Score object from a token sequnece (string) 252 | def tokens_to_score(string, voice_numbering=False): 253 | R_str, L_str = split_R_L(string) 254 | R_tokens = R_str.split() 255 | L_tokens = L_str.split() 256 | if voice_numbering: 257 | r = tokens_to_PartStaff(R_tokens) 258 | r_voices = max([len(m.voices) if m.hasVoices() else 1 for m in r]) 259 | l = tokens_to_PartStaff(L_tokens, start_voice=r_voices+1) 260 | else: 261 | r = tokens_to_PartStaff(R_tokens, start_voice=0) 262 | l = tokens_to_PartStaff(L_tokens, start_voice=0) 263 | 264 | # add last barline 265 | r.elements[-1].rightBarline = bar.Barline('regular') 266 | l.elements[-1].rightBarline = bar.Barline('regular') 267 | 268 | s = stream.Score() 269 | g = layout.StaffGroup([r, l], symbol='brace', barTogether=True) 270 | s.append([g, r, l]) 271 | return s 272 | 273 | def split_R_L(string): 274 | tokens = string.split() 275 | tokens = concatenated_to_regular(tokens) 276 | 277 | if 'L' in tokens: 278 | R = ' '.join(tokens[tokens.index('R')+1:tokens.index('L')]) 279 | L = ' '.join(tokens[tokens.index('L')+1:]) 280 | else: 281 | R = ' '.join(tokens[tokens.index('R')+1:]) 282 | L = '' 283 | return R, L -------------------------------------------------------------------------------- /tokenization_tools/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | lxml 3 | music21 4 | pretty_midi 5 | -------------------------------------------------------------------------------- /tokenization_tools/tokenizer/README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | Tokenizer creates token sequences from musical scores, utilizing [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/). 4 | 5 | ## Usage 6 | 7 | #### 1. import 8 | 9 | ```python 10 | from score_to_tokens import MusicXML_to_tokens 11 | ``` 12 | 13 | #### 2. pass a score path to "MusicXML_to_tokens" function 14 | 15 | ```Python 16 | tokens = MusicXML_to_tokens('input_score.musicxml') 17 | ``` 18 | 19 | - The list of tokens will be returned. 20 | 21 | ## Specifications 22 | 23 | ### Supported scores / formats 24 | 25 | - Piano scores (for both hands) 26 | - MusicXML format 27 | 28 | ### Supported score elements 29 | 30 | - Barline 31 | - Clef (treble / bass) 32 | - Key Signature 33 | - Time Signature 34 | - Note 35 | - note name (+ accidental) / length / stem direction / beam / tie 36 | - Rest 37 | - length 38 | 39 | ### Requirements 40 | 41 | Python 3.6+ 42 | 43 | - beautifulsoup4 (4.6.3) 44 | - lxml (4.9.1) 45 | - pretty_midi (0.2.9) 46 | 47 | Note: The library versions here are not specified ones, but **tested** ones. 48 | -------------------------------------------------------------------------------- /tokenization_tools/tokenizer/sample/generated_tokens.txt: -------------------------------------------------------------------------------- 1 | R bar clef_treble key_sharp_3 time_3/4 note_E5 len_2 stem_down tie_stop note_C#5 len_1/2 stem_down beam_start note_E5 len_1/2 stem_down beam_stop bar note_E5 len_2 stem_up note_C#5 len_1/2 stem_up beam_start note_B4 len_1/4 stem_up beam_continue_start note_A4 len_1/4 stem_up beam_stop_stop note_A4 len_2 stem_down note_E4 len_1 stem_down bar note_C#5 len_2 stem_up note_B4 len_1 stem_up note_G#4 len_3 stem_down bar note_A4 len_2 stem_up note_F#4 len_1 stem_up note_D4 len_3 stem_down L bar clef_bass key_sharp_3 time_3/4 note_G#3 note_E3 note_A2 len_2 stem_down note_G#3 note_E3 note_A2 len_1 stem_down bar note_F#2 len_1/2 stem_up beam_start note_C#3 len_1/2 stem_up beam_stop note_F#3 len_1/2 stem_down beam_start note_G#3 len_1/2 stem_down beam_stop note_A3 len_1/2 stem_down beam_start note_B3 len_1/2 stem_down beam_stop bar note_E2 len_1/2 stem_up beam_start note_C#3 len_1/2 stem_up beam_stop note_E3 len_1/2 stem_down beam_start note_F#3 len_1/2 stem_down beam_stop note_C#4 len_1/2 stem_down beam_start note_G#3 len_1/2 stem_down beam_stop bar note_D2 len_1/2 stem_up beam_start note_A2 len_1/2 stem_up beam_stop note_D3 len_1/2 stem_down beam_start note_F#3 len_1/2 stem_down beam_stop note_G#3 len_1/2 stem_down beam_start note_A3 len_1/2 stem_down beam_stop -------------------------------------------------------------------------------- /tokenization_tools/tokenizer/sample/input_score.musicxml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Už z hor zní zvon 6 | 7 | 8 | 9 | MuseScore 3.1.0 10 | 2021-05-13 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 7.05556 21 | 40 22 | 23 | 24 | 1683.78 25 | 1190.55 26 | 27 | 56.6929 28 | 56.6929 29 | 56.6929 30 | 113.386 31 | 32 | 33 | 56.6929 34 | 56.6929 35 | 56.6929 36 | 113.386 37 | 38 | 39 | 40 | 41 | 42 | 43 | Traditional 44 | 45 | Arranged by Markéta Kapustová 46 | 47 | 48 | 49 | Amazing grace 50 | 51 | 52 | 53 | Piano 54 | Pno. 55 | 56 | Piano 57 | 58 | 59 | 60 | 1 61 | 1 62 | 78.7402 63 | 0 64 | 65 | 66 | 67 | 68 | 4 69 | F 70 | 4 71 | 72 | G 73 | 2 74 | 75 | 3 76 | 80 | 81 | 82 | E 83 | 5 84 | 85 | 8 86 | 87 | 1 88 | half 89 | down 90 | 1 91 | 92 | 93 | 94 | 95 | 96 | 97 | C 98 | 1 99 | 5 100 | 101 | 2 102 | 1 103 | eighth 104 | down 105 | 1 106 | begin 107 | 108 | 109 | 110 | E 111 | 5 112 | 113 | 2 114 | 1 115 | eighth 116 | down 117 | 1 118 | end 119 | 120 | 121 | 12 122 | 123 | 124 | 125 | 126 | 127 | 2 128 | 129 | 130 | 131 | A 132 | 2 133 | 134 | 8 135 | 5 136 | half 137 | down 138 | 2 139 | 140 | 141 | 142 | 143 | E 144 | 3 145 | 146 | 8 147 | 5 148 | half 149 | down 150 | 2 151 | 152 | 153 | 154 | 155 | G 156 | 1 157 | 3 158 | 159 | 8 160 | 5 161 | half 162 | down 163 | 2 164 | 165 | 166 | 167 | A 168 | 2 169 | 170 | 4 171 | 5 172 | quarter 173 | down 174 | 2 175 | 176 | 177 | 178 | 179 | E 180 | 3 181 | 182 | 4 183 | 5 184 | quarter 185 | down 186 | 2 187 | 188 | 189 | 190 | 191 | G 192 | 1 193 | 3 194 | 195 | 4 196 | 5 197 | quarter 198 | down 199 | 2 200 | 201 | 202 | 203 | 204 | 205 | 2 206 | 207 | 208 | 209 | 210 | 211 | E 212 | 5 213 | 214 | 8 215 | 1 216 | half 217 | up 218 | 1 219 | 220 | 221 | 222 | C 223 | 1 224 | 5 225 | 226 | 2 227 | 1 228 | eighth 229 | up 230 | 1 231 | begin 232 | 233 | 234 | 235 | B 236 | 4 237 | 238 | 1 239 | 1 240 | 16th 241 | up 242 | 1 243 | continue 244 | begin 245 | 246 | 247 | 248 | A 249 | 4 250 | 251 | 1 252 | 1 253 | 16th 254 | up 255 | 1 256 | end 257 | end 258 | 259 | 260 | 12 261 | 262 | 263 | 264 | A 265 | 4 266 | 267 | 8 268 | 2 269 | half 270 | down 271 | 1 272 | 273 | 274 | 275 | E 276 | 4 277 | 278 | 4 279 | 2 280 | quarter 281 | down 282 | 1 283 | 284 | 285 | 12 286 | 287 | 288 | 289 | 290 | 291 | 2 292 | 293 | 294 | 295 | F 296 | 1 297 | 2 298 | 299 | 2 300 | 5 301 | eighth 302 | up 303 | 2 304 | begin 305 | 306 | 307 | 308 | C 309 | 1 310 | 3 311 | 312 | 2 313 | 5 314 | eighth 315 | up 316 | 2 317 | end 318 | 319 | 320 | 321 | F 322 | 1 323 | 3 324 | 325 | 2 326 | 5 327 | eighth 328 | down 329 | 2 330 | begin 331 | 332 | 333 | 334 | G 335 | 1 336 | 3 337 | 338 | 2 339 | 5 340 | eighth 341 | down 342 | 2 343 | end 344 | 345 | 346 | 347 | A 348 | 3 349 | 350 | 2 351 | 5 352 | eighth 353 | down 354 | 2 355 | begin 356 | 357 | 358 | 359 | B 360 | 3 361 | 362 | 2 363 | 5 364 | eighth 365 | down 366 | 2 367 | end 368 | 369 | 370 | 371 | 372 | 373 | 2 374 | 375 | 376 | 377 | 378 | 379 | 380 | 21.00 381 | 0.00 382 | 383 | 129.65 384 | 385 | 386 | 65.00 387 | 388 | 389 | 390 | 391 | C 392 | 1 393 | 5 394 | 395 | 8 396 | 1 397 | half 398 | up 399 | 1 400 | 401 | 402 | 403 | B 404 | 4 405 | 406 | 4 407 | 1 408 | quarter 409 | up 410 | 1 411 | 412 | 413 | 12 414 | 415 | 416 | 417 | G 418 | 1 419 | 4 420 | 421 | 12 422 | 2 423 | half 424 | 425 | down 426 | 1 427 | 428 | 429 | 12 430 | 431 | 432 | 433 | 434 | 435 | 2 436 | 437 | 438 | 439 | E 440 | 2 441 | 442 | 2 443 | 5 444 | eighth 445 | up 446 | 2 447 | begin 448 | 449 | 450 | 451 | C 452 | 1 453 | 3 454 | 455 | 2 456 | 5 457 | eighth 458 | up 459 | 2 460 | end 461 | 462 | 463 | 464 | E 465 | 3 466 | 467 | 2 468 | 5 469 | eighth 470 | down 471 | 2 472 | begin 473 | 474 | 475 | 476 | F 477 | 1 478 | 3 479 | 480 | 2 481 | 5 482 | eighth 483 | down 484 | 2 485 | end 486 | 487 | 488 | 489 | C 490 | 1 491 | 4 492 | 493 | 2 494 | 5 495 | eighth 496 | down 497 | 2 498 | begin 499 | 500 | 501 | 502 | G 503 | 1 504 | 3 505 | 506 | 2 507 | 5 508 | eighth 509 | down 510 | 2 511 | end 512 | 513 | 514 | 515 | 516 | 517 | 2 518 | 519 | 520 | 521 | 522 | 523 | A 524 | 4 525 | 526 | 8 527 | 1 528 | half 529 | up 530 | 1 531 | 532 | 533 | 534 | F 535 | 1 536 | 4 537 | 538 | 4 539 | 1 540 | quarter 541 | up 542 | 1 543 | 544 | 545 | 12 546 | 547 | 548 | 549 | D 550 | 4 551 | 552 | 12 553 | 2 554 | half 555 | 556 | down 557 | 1 558 | 559 | 560 | 12 561 | 562 | 563 | 564 | 565 | 566 | 2 567 | 568 | 569 | 570 | D 571 | 2 572 | 573 | 2 574 | 5 575 | eighth 576 | up 577 | 2 578 | begin 579 | 580 | 581 | 582 | A 583 | 2 584 | 585 | 2 586 | 5 587 | eighth 588 | up 589 | 2 590 | end 591 | 592 | 593 | 594 | D 595 | 3 596 | 597 | 2 598 | 5 599 | eighth 600 | down 601 | 2 602 | begin 603 | 604 | 605 | 606 | F 607 | 1 608 | 3 609 | 610 | 2 611 | 5 612 | eighth 613 | down 614 | 2 615 | end 616 | 617 | 618 | 619 | G 620 | 1 621 | 3 622 | 623 | 2 624 | 5 625 | eighth 626 | down 627 | 2 628 | begin 629 | 630 | 631 | 632 | A 633 | 3 634 | 635 | 2 636 | 5 637 | eighth 638 | down 639 | 2 640 | end 641 | 642 | 643 | 644 | 645 | 646 | 2 647 | 648 | 649 | 650 | -------------------------------------------------------------------------------- /tokenization_tools/tokenizer/sample/sample_usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# import \"score_to_tokens.py\" (assuming the file is in the same directory)\n", 10 | "from score_to_tokens import *" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": [ 21 | "'R bar clef_treble key_sharp_3 time_3/4 note_E5 len_2 stem_down tie_stop note_C#5 len_1/2 stem_down beam_start note_E5 len_1/2 stem_down beam_stop bar note_E5 len_2 stem_up note_C#5 len_1/2 stem_up beam_start note_B4 len_1/4 stem_up beam_continue_start note_A4 len_1/4 stem_up beam_stop_stop note_A4 len_2 stem_down note_E4 len_1 stem_down bar note_C#5 len_2 stem_up note_B4 len_1 stem_up note_G#4 len_3 stem_down bar note_A4 len_2 stem_up note_F#4 len_1 stem_up note_D4 len_3 stem_down L bar clef_bass key_sharp_3 time_3/4 note_G#3 note_E3 note_A2 len_2 stem_down note_G#3 note_E3 note_A2 len_1 stem_down bar note_F#2 len_1/2 stem_up beam_start note_C#3 len_1/2 stem_up beam_stop note_F#3 len_1/2 stem_down beam_start note_G#3 len_1/2 stem_down beam_stop note_A3 len_1/2 stem_down beam_start note_B3 len_1/2 stem_down beam_stop bar note_E2 len_1/2 stem_up beam_start note_C#3 len_1/2 stem_up beam_stop note_E3 len_1/2 stem_down beam_start note_F#3 len_1/2 stem_down beam_stop note_C#4 len_1/2 stem_down beam_start note_G#3 len_1/2 stem_down beam_stop bar note_D2 len_1/2 stem_up beam_start note_A2 len_1/2 stem_up beam_stop note_D3 len_1/2 stem_down beam_start note_F#3 len_1/2 stem_down beam_stop note_G#3 len_1/2 stem_down beam_start note_A3 len_1/2 stem_down beam_stop'" 22 | ] 23 | }, 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "output_type": "execute_result" 27 | } 28 | ], 29 | "source": [ 30 | "# load MusicXML file and convert its content to tokens\n", 31 | "tokens = MusicXML_to_tokens('input_score.musicxml')\n", 32 | "' '.join(tokens)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# write out\n", 42 | "with open('generated_tokens.txt', 'w') as f:\n", 43 | " f.write(' '.join(tokens))" 44 | ] 45 | } 46 | ], 47 | "metadata": { 48 | "kernelspec": { 49 | "display_name": "Python 3", 50 | "language": "python", 51 | "name": "python3" 52 | }, 53 | "language_info": { 54 | "codemirror_mode": { 55 | "name": "ipython", 56 | "version": 3 57 | }, 58 | "file_extension": ".py", 59 | "mimetype": "text/x-python", 60 | "name": "python", 61 | "nbconvert_exporter": "python", 62 | "pygments_lexer": "ipython3", 63 | "version": "3.8.8" 64 | } 65 | }, 66 | "nbformat": 4, 67 | "nbformat_minor": 4 68 | } 69 | -------------------------------------------------------------------------------- /tokenization_tools/tokenizer/score_to_tokens.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from bs4.element import Tag 3 | from fractions import Fraction 4 | import pretty_midi 5 | 6 | def attributes_to_tokens(attributes, staff=None): # tokenize 'attributes' section in MusicXML 7 | tokens = [] 8 | divisions = None 9 | 10 | for child in attributes.contents: 11 | type_ = child.name 12 | if type_ == 'divisions': 13 | divisions = int(child.text) 14 | elif type_ in ('clef', 'key', 'time'): 15 | if staff is not None: 16 | if 'number' in child.attrs and int(child['number']) != staff: 17 | continue 18 | tokens.append(attribute_to_token(child)) 19 | 20 | return tokens, divisions 21 | 22 | def attribute_to_token(child): # clef, key signature, and time signature 23 | type_ = child.name 24 | if type_ == 'clef': 25 | if child.sign.text == 'G': 26 | return 'clef_treble' 27 | elif child.sign.text == 'F': 28 | return 'clef_bass' 29 | elif type_ == 'key': 30 | key = int(child.fifths.text) 31 | if key < 0: 32 | return f'key_flat_{abs(key)}' 33 | elif key > 0: 34 | return f'key_sharp_{key}' 35 | else: 36 | return f'key_natural_{key}' 37 | elif type_ == 'time': 38 | times = [int(c.text) for c in child.contents if isinstance(c, Tag)] # excluding '\n' 39 | if times[1] == 2: 40 | return f'time_{times[0]*2}/{times[1]*2}' 41 | elif times[1] > 4: 42 | fraction = str(Fraction(times[0], times[1])) 43 | if int(fraction.split('/')[1]) == 2: # X/2 44 | return f"time_{int(fraction.split('/')[0])*2}/{int(fraction.split('/')[0])*2}" 45 | else: 46 | return 'time_' + fraction 47 | else: 48 | return f'time_{times[0]}/{times[1]}' 49 | 50 | def aggregate_notes(voice_notes): # notes to chord 51 | for note in voice_notes[1:]: 52 | if note.chord is not None: 53 | last_note = note.find_previous('note') 54 | last_note.insert(0, note.pitch) 55 | note.decompose() 56 | 57 | def note_to_tokens(note, divisions=8, note_name=True): # notes and rests 58 | beam_translations = {'begin': 'start', 'end': 'stop', 'forward hook': 'partial-right', 'backward hook': 'partial-left'} 59 | 60 | if note.duration is None: # gracenote 61 | return [] 62 | 63 | duration_in_fraction = str(Fraction(int(note.duration.text), divisions)) 64 | 65 | if note.rest: 66 | return ['rest', f'len_{duration_in_fraction}'] # for rests 67 | 68 | tokens = [] 69 | 70 | # pitches 71 | for pitch in note.find_all('pitch'): 72 | if note_name: 73 | if pitch.alter: 74 | alter_to_symbol= {'-2': 'bb', '-1': 'b', '0':'', '1': '#', '2': '##'} 75 | tokens.append(f"note_{pitch.step.text}{alter_to_symbol[pitch.alter.text]}{pitch.octave.text}") 76 | else: 77 | tokens.append(f"note_{pitch.step.text}{pitch.octave.text}") 78 | else: 79 | note_number = pretty_midi.note_name_to_number(pitch.step.text + pitch.octave.text) # 'C4' -> 60 80 | if pitch.alter: 81 | note_number += int(pitch.alter.text) 82 | tokens.append(f'note_{note_number}') 83 | 84 | # len 85 | tokens.append(f'len_{duration_in_fraction}') 86 | 87 | if note.stem: 88 | tokens.append(f'stem_{note.stem.text}') 89 | 90 | if note.beam: 91 | beams = note.find_all('beam') 92 | tokens.append('beam_' + '_'.join([beam_translations[b.text] if b.text in beam_translations else b.text for b in beams])) 93 | 94 | if note.tied: 95 | tokens.append('tie_' + note.tied.attrs['type']) 96 | 97 | return tokens 98 | 99 | def element_segmentation(measure, soup, staff=None): # divide elements into three sections 100 | voice_starts, voice_ends = {}, {} 101 | position = 0 102 | for element in measure.contents: 103 | if element.name == 'note': 104 | if element.duration is None: # gracenote 105 | continue 106 | 107 | voice = element.voice.text 108 | duration = int(element.duration.text) 109 | if element.chord: # rewind for concurrent notes 110 | position -= last_duration 111 | 112 | if element.staff and int(element.staff.text) == staff: 113 | voice_starts[voice] = min(voice_starts[voice], position) if voice in voice_starts else position 114 | start_tag = soup.new_tag('start') 115 | start_tag.string = str(position) 116 | element.append(start_tag) 117 | 118 | position += duration 119 | 120 | if element.staff and int(element.staff.text) == staff: 121 | voice_ends[voice] = max(voice_ends[voice], position) if voice in voice_ends else position 122 | end_tag = soup.new_tag('end') 123 | end_tag.string = str(position) 124 | element.append(end_tag) 125 | 126 | last_duration = duration 127 | elif element.name == 'backup': 128 | position -= int(element.duration.text) 129 | elif element.name == 'forward': 130 | position += int(element.duration.text) 131 | else: # other types 132 | start_tag = soup.new_tag('start') 133 | end_tag = soup.new_tag('end') 134 | 135 | start_tag.string = str(position) 136 | end_tag.string = str(position) 137 | 138 | element.append(start_tag) 139 | element.append(end_tag) 140 | 141 | # voice section 142 | voice_start = sorted(voice_starts.values())[1] if voice_starts else 0 143 | voice_end = sorted(voice_ends.values(), reverse=True)[1] if voice_ends else 0 144 | 145 | pre_voice_elements, post_voice_elements, voice_elements = [], [], [] 146 | for element in measure.contents: 147 | if element.name in ('backup', 'forward'): 148 | continue 149 | if element.name == 'note' and element.duration is None: # gracenote 150 | continue 151 | if staff is not None: 152 | if element.staff and int(element.staff.text) != staff: 153 | continue 154 | 155 | if voice_starts or voice_ends: 156 | if int(element.end.text) <= voice_start: 157 | pre_voice_elements.append(element) 158 | elif voice_end <= int(element.start.text): 159 | post_voice_elements.append(element) 160 | else: 161 | voice_elements.append(element) 162 | else: 163 | pre_voice_elements.append(element) 164 | 165 | return pre_voice_elements, voice_elements, post_voice_elements 166 | 167 | def measures_to_tokens(measures, soup, staff=None, note_name=True): 168 | divisions = 0 169 | tokens = [] 170 | for measure in measures: 171 | 172 | tokens.append('bar') 173 | if staff is not None: 174 | notes = [n for n in measure.find_all('note') if n.staff and int(n.staff.text) == staff] 175 | else: 176 | notes = measure.find_all('note') 177 | 178 | voices = list(set([n.voice.text for n in notes if n.voice])) 179 | for voice in voices: 180 | voice_notes = [n for n in notes if n.voice and n.voice.text == voice] 181 | aggregate_notes(voice_notes) 182 | 183 | if len(voices) > 1: 184 | pre_voice_elements, voice_elements, post_voice_elements = element_segmentation(measure, soup, staff) 185 | 186 | for element in pre_voice_elements: 187 | if element.name == 'attributes': 188 | attr_tokens, div = attributes_to_tokens(element, staff) 189 | tokens += attr_tokens 190 | divisions = div if div else divisions 191 | elif element.name == 'note': 192 | tokens += note_to_tokens(element, divisions, note_name) 193 | 194 | if voice_elements: 195 | for voice in voices: 196 | tokens.append('') 197 | for element in voice_elements: 198 | if (element.voice and element.voice.text == voice) or (not element.voice and voice == '1'): 199 | if element.name == 'attributes': 200 | attr_tokens, div = attributes_to_tokens(element, staff) 201 | tokens += attr_tokens 202 | divisions = div if div else divisions 203 | elif element.name == 'note': 204 | tokens += note_to_tokens(element, divisions, note_name) 205 | tokens.append('') 206 | 207 | for element in post_voice_elements: 208 | if element.name == 'attributes': 209 | attr_tokens, div = attributes_to_tokens(element, staff) 210 | tokens += attr_tokens 211 | divisions = div if div else divisions 212 | elif element.name == 'note': 213 | tokens += note_to_tokens(element, divisions, note_name) 214 | else: 215 | for element in measure.contents: 216 | if staff is not None: 217 | if element.name in ('attributes', 'note') and element.staff and int(element.staff.text) != staff: 218 | continue 219 | if element.name == 'attributes': 220 | attr_tokens, div = attributes_to_tokens(element, staff) 221 | tokens += attr_tokens 222 | divisions = div if div else divisions 223 | elif element.name == 'note': 224 | tokens += note_to_tokens(element, divisions, note_name) 225 | 226 | return tokens 227 | 228 | def load_MusicXML(mxml_path): # load MusicXML contents using BeautifulSoup 229 | soup = BeautifulSoup(open(mxml_path, encoding='utf-8'), 'lxml-xml', from_encoding='utf-8') # MusicXML 230 | for tag in soup(string='\n'): # eliminate line breaks 231 | tag.extract() 232 | 233 | parts = soup.find_all('part') 234 | 235 | return [part.find_all('measure') for part in parts], soup 236 | 237 | def MusicXML_to_tokens(soup_or_mxml_path, note_name=True): # use this method 238 | if type(soup_or_mxml_path) is str: 239 | parts, soup = load_MusicXML(soup_or_mxml_path) 240 | else: 241 | soup = soup_or_mxml_path 242 | for tag in soup(string='\n'): # eliminate line breaks 243 | tag.extract() 244 | 245 | parts = [part.find_all('measure') for part in soup.find_all('part')] 246 | 247 | if len(parts) == 1: 248 | tokens = ['R'] + measures_to_tokens(parts[0], soup, staff=1, note_name=note_name) 249 | tokens += ['L'] + measures_to_tokens(parts[0], soup, staff=2, note_name=note_name) 250 | elif len(parts) == 2: 251 | tokens = ['R'] + measures_to_tokens(parts[0], soup, note_name=note_name) 252 | tokens += ['L'] + measures_to_tokens(parts[1], soup, note_name=note_name) 253 | 254 | return tokens 255 | --------------------------------------------------------------------------------