├── README.md ├── LICENSE ├── project ├── utils.py ├── midi_handler.py ├── test.py ├── model.py └── train.py └── MusicStyleTransfer.py /README.md: -------------------------------------------------------------------------------- 1 | # Music Style Transfer 2 | 3 | 4 | This repository includes the source code of the symbolic music style transfer algorithm from: 5 | 6 | Wei-Tsung Lu and Li Su, “Transferring the Style of Homophonic Music Using Recurrent Neural Networks and Autoregressive Model,” International Society of Music Information Retrieval Conference (ISMIR), September 2018. 7 | 8 | Listening samples are available [here]. 9 | 10 | ### Dependencies 11 | 12 | This repository requires following packages: 13 | 14 | - python 3.6 15 | - numpy 16 | - tensorflow 17 | - keras 18 | - mido 19 | 20 | 21 | ### Todos 22 | 23 | - 24 | 25 | License 26 | ---- 27 | 28 | MIT 29 | 30 | 31 | [//]:# 32 | 33 | [here]:https://drive.google.com/open?id=1hohsEvbAiBmTW6DUeEaoEha13otyn8oJ 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Wei Tsung Lu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /project/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.models import model_from_json, model_from_yaml 3 | 4 | 5 | def padding(x, timesteps, step): 6 | extended_chorale = np.array(x) 7 | padding_dimensions = (timesteps * step,) + extended_chorale.shape[1:] 8 | 9 | padding_start = np.zeros(padding_dimensions) 10 | padding_end = np.zeros(padding_dimensions) 11 | for a, b in enumerate(padding_start): 12 | padding_start[a][-3] = 1 13 | padding_end[a][-2] = 1 14 | 15 | extended_chorale = np.concatenate((padding_start, 16 | extended_chorale, 17 | padding_end), 18 | axis=0) 19 | return extended_chorale 20 | 21 | 22 | def padding_ade(x, timesteps, subdivision, step, gap): 23 | # extended_chorale = np.concatenate((-1*np.ones((x.shape[0],88)),x), axis = 1) 24 | extended_chorale = np.concatenate((x, np.zeros((x.shape[0], 1))), axis=1) 25 | 26 | for i in range(len(extended_chorale)): 27 | extended_chorale[i][-1] = i % subdivision + 1 28 | 29 | padding_dimensions = (timesteps * step + gap,) + extended_chorale.shape[1:] 30 | 31 | padding_start = np.zeros(padding_dimensions) 32 | padding_end = np.zeros(padding_dimensions) 33 | for a, b in enumerate(padding_start): 34 | padding_start[a][-3] = 1 35 | padding_end[a][-2] = 1 36 | 37 | extended_chorale = np.concatenate((padding_start, 38 | extended_chorale, 39 | padding_end), 40 | axis=0) 41 | 42 | return extended_chorale 43 | 44 | 45 | def add_beat(score, subdivision): 46 | b = np.zeros((score.shape[0], 1)) 47 | score = np.concatenate([score, b], axis=1) 48 | 49 | for time in range(0, len(score)): 50 | score[time][-1] = time % subdivision + 1 51 | 52 | return score 53 | 54 | def load_model(model_name): 55 | """ 56 | """ 57 | ext = '.yaml' 58 | model = model_from_yaml(open(model_name + ext).read()) 59 | model.load_weights(model_name + '_weights.h5') 60 | 61 | print("model " + model_name + " loaded") 62 | return model 63 | 64 | 65 | def save_model(model, model_name, overwrite=False): 66 | # SAVE MODEL 67 | 68 | string = model.to_yaml() 69 | ext = '.yaml' 70 | 71 | open(model_name + ext, 'w').write(string) 72 | model.save_weights(model_name + '_weights.h5', overwrite=overwrite) 73 | print("model " + model_name + " saved") -------------------------------------------------------------------------------- /project/midi_handler.py: -------------------------------------------------------------------------------- 1 | import mido 2 | from mido import MetaMessage, Message, MidiFile, MidiTrack 3 | 4 | import numpy as np 5 | 6 | 7 | def midi2score(song, 8 | subdivision): 9 | mid = mido.MidiFile(song) 10 | 11 | tempo = 0 12 | sec_per_tick = 0 13 | length = mid.length 14 | 15 | time = 0 16 | 17 | # set initial score len 18 | for msg in mid: 19 | if (msg.is_meta): 20 | if (msg.type == 'set_tempo'): 21 | tempo = msg.tempo 22 | else: 23 | if (msg.type == "note_on"): 24 | bpm = mido.tempo2bpm(tempo) 25 | sec_per_tick = 60 / bpm / subdivision 26 | break 27 | 28 | score = np.zeros((int(length / sec_per_tick) + 1, 90)) 29 | 30 | for msg in mid: 31 | time += msg.time 32 | pos = int(np.round(time / sec_per_tick)) 33 | if (pos + 1 > score.shape[0]): 34 | score = np.append(score, np.zeros((pos - score.shape[0] + 1, 90)), axis=0) 35 | if (msg.is_meta): 36 | if (msg.type == 'set_tempo'): 37 | tempo = mido.tempo2bpm(msg.tempo) 38 | sec_per_tick = 60 / tempo / subdivision 39 | 40 | elif (msg.type == 'note_on'): 41 | if (msg.velocity == 0): 42 | p = msg.note - 21 43 | score[pos:, p] = 0 44 | else: 45 | p = msg.note - 21 46 | score[pos:, p] = 1 47 | 48 | elif (msg.type == 'note_off'): 49 | p = msg.note - 21 50 | score[pos:, p] = 0 51 | 52 | return score 53 | 54 | 55 | def score2midi(name, 56 | score, 57 | subdivision, 58 | bpm, 59 | melody_constraint=False, 60 | melody=None): 61 | 62 | mid = MidiFile() 63 | track = MidiTrack() 64 | mid.tracks.append(track) 65 | 66 | # meta messages 67 | track.append(MetaMessage('set_tempo', tempo=mido.bpm2tempo(bpm))) 68 | # note messages 69 | ticks_per_beat = 480 70 | ticks_per_division = int(ticks_per_beat // subdivision) 71 | pitch_table = np.zeros((88, 1)) 72 | 73 | for index, tick in enumerate(score): 74 | # offsets handle 75 | current_notes = np.nonzero(pitch_table)[0] 76 | for _note in current_notes: 77 | if (tick[_note] == 0): 78 | track.append(Message('note_on', note=_note + 21, velocity=0, time=0)) 79 | pitch_table[_note] = 0 80 | 81 | # onsets handle 82 | # melody constraint handle 83 | onsets = np.nonzero(tick[:])[0] 84 | for _note in onsets: 85 | if (melody_constraint == True): 86 | if (melody[index][_note] == 1): 87 | if (pitch_table[_note] == 1): 88 | track.append(Message('note_on', note=_note + 21, velocity=0, time=0)) 89 | if (pitch_table[_note] != 2): 90 | track.append(Message('note_on', note=_note + 21, velocity=85, time=0)) 91 | pitch_table[_note] = 2 92 | elif (pitch_table[_note] == 0): 93 | track.append(Message('note_on', note=_note + 21, velocity=80, time=0)) 94 | pitch_table[_note] = 1 95 | else: 96 | if (pitch_table[_note] == 0): 97 | track.append(Message('note_on', note=_note + 21, velocity=80, time=0)) 98 | pitch_table[_note] = 1 99 | 100 | # time progress 101 | track.append(Message('note_on', note=0, velocity=0, time=ticks_per_division)) 102 | 103 | mid.save(name) -------------------------------------------------------------------------------- /MusicStyleTransfer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | 4 | from project.midi_handler import midi2score, score2midi 5 | from project.utils import padding, load_model, save_model, add_beat 6 | from project.test import style_transfer 7 | from project.model import lstm_wavenet 8 | from project.train import train 9 | 10 | def main(): 11 | # Arguments 12 | parser = argparse.ArgumentParser() 13 | 14 | parser.add_argument('-p', '--phase', 15 | help='phase: training or testing (default: %(default)s', 16 | type=str, default='testing') 17 | # arguments for testing 18 | parser.add_argument('-d', '--dataset_path', 19 | help='path to data set (default: %(default)s', 20 | type=str, default='bach_dataset.pickle') 21 | 22 | parser.add_argument('-e', '--epoch', 23 | help='number of epoch(default: %(default)s', 24 | type=int, default=80) 25 | parser.add_argument('-n', '--steps', 26 | help='number of step per epoch(default: %(default)s', 27 | type=int, default=6000) 28 | parser.add_argument('-b', '--batch_size_train', 29 | help='batch size(default: %(default)s', 30 | type=int, default=88*3) 31 | parser.add_argument('-o', '--output_model_name', 32 | help='name of the output model(default: %(default)s', 33 | type=str, default="out") 34 | # arguments for testing 35 | parser.add_argument('-m', '--model_path', 36 | help='path to existing model (default: %(default)s', 37 | type=str, default='bach') 38 | parser.add_argument('-i', '--input_file', 39 | help='path to input file (default: %(default)s', 40 | type=str, default="LiveAndLetDie_all.mid") 41 | parser.add_argument('-ii', '--input_file_melody', 42 | help='path to input melody file (default: %(default)s', 43 | type=str, default="LiveAndLetDie_main.mid") 44 | parser.add_argument('-s', '--subdivision', 45 | help='subdivision within one beat (default: %(default)s', 46 | type=int, default=4) 47 | 48 | args = parser.parse_args() 49 | print(args) 50 | 51 | if(args.phase == "training"): 52 | #set arguments 53 | 54 | timesteps = 32 55 | step = 4 56 | subdivision = args.subdivision 57 | batch_size = args.batch_size_train 58 | dataset_path = args.dataset_path 59 | 60 | #create model 61 | 62 | model = lstm_wavenet(num_features_lr=91, timesteps=timesteps, 63 | step=step, num_units_lstm=[150, 150, 150, 150], 64 | num_dense=150, 65 | conv_layers=5, 66 | skip_layers=2) 67 | 68 | model.compile(optimizer="adam", loss={'prediction': 'binary_crossentropy'}, metrics=['accuracy']) 69 | 70 | #train 71 | 72 | model = train(model, 73 | dataset_path, 74 | subdivision, 75 | epoch=args.epoch, 76 | steps=args.steps, 77 | timesteps=timesteps, 78 | step=step, 79 | batch_size=batch_size) 80 | #save model 81 | 82 | save_model(model, args.output_model_name) 83 | 84 | else: 85 | #load input file 86 | 87 | subdivision = args.subdivision 88 | path = args.input_file 89 | path_melody = args.input_file_melody 90 | score = midi2score(path, subdivision) 91 | 92 | if(path_melody == "none"): 93 | score_melody = np.zeros(score.shape) 94 | else: 95 | score_melody = midi2score(path_melody, subdivision) 96 | 97 | score = add_beat(score, subdivision) 98 | score_melody = add_beat(score_melody, subdivision) 99 | 100 | score = np.array(score[0:640]) 101 | score_melody = np.array(score_melody[0:640]) 102 | 103 | extended_score = padding(score, 32, 4) 104 | 105 | #load model 106 | 107 | model = load_model(model_name=args.model_path) 108 | 109 | #generation 110 | 111 | result = style_transfer(extended_score, score_melody, model, iter_num=25) 112 | 113 | #save result 114 | 115 | score2midi("test.mid", result, subdivision, 120, melody_constraint=True, melody=score_melody) 116 | print("saved") 117 | 118 | if __name__ == "__main__": 119 | main() -------------------------------------------------------------------------------- /project/test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tqdm 3 | 4 | 5 | def time_index_chosing(_range, 6 | interval, 7 | random=True, 8 | time_list=None): 9 | interval += 1 10 | if (random): 11 | time_index_base = np.random.randint(_range) 12 | else: 13 | time_index_base = time_list[np.random.randint(len(time_list))] 14 | 15 | c = time_index_base + np.arange(-interval * 200, interval * 200, interval) 16 | 17 | return c[np.where(np.logical_and(c >= 0, c < _range))] 18 | 19 | 20 | def midi_index_chosing(_len): 21 | return np.random.randint(88, size=_len) 22 | 23 | 24 | def generation_wavenet(model, 25 | score, 26 | time_indexes, 27 | midi_indexes, 28 | timesteps=32, 29 | step=4 30 | ): 31 | time_indexes = np.array(time_indexes) + timesteps * step 32 | 33 | left_features = np.array(score[[np.arange(t - (timesteps * step), t) for t in time_indexes], :]) 34 | right_features = np.array(score[[np.arange(t + 1, t + 1 + (timesteps * step)) for t in time_indexes], :]) 35 | central_features = np.reshape(np.array(score[time_indexes, :88]), (len(time_indexes), 88, 1)) 36 | 37 | for a, b in enumerate(midi_indexes): 38 | central_features[a, b:] = 0.5 39 | 40 | p = model.predict([left_features, central_features, right_features]) 41 | 42 | return p 43 | 44 | 45 | def style_transfer(extended_score, score_melody, 46 | model, 47 | iter_num=25, 48 | timesteps=32, 49 | step=4, 50 | threshold=0.5 51 | ): 52 | 53 | fixed_rhythm_score = score_melody 54 | original_len = len(score_melody) 55 | new_extended_score = np.array(extended_score) 56 | counter = 0 57 | alpha_initial = 0.6 58 | alpha = alpha_initial 59 | alpha_min = 0 60 | annealing_fraction = 0.6 61 | update_count = 0 62 | 63 | for i in tqdm.tqdm(range(iter_num)): 64 | time_list = np.arange(original_len) 65 | print("alpha = ", alpha) 66 | while (time_list.size > 0): 67 | if(alpha != -1): 68 | alpha = max(0, alpha_initial - update_count * (alpha_initial - alpha_min) / ( 69 | iter_num * original_len * annealing_fraction)) 70 | if(alpha == 0): 71 | extended_score = new_extended_score 72 | alpha = -1 73 | elif(counter / original_len > alpha and alpha != -1): 74 | counter = 0 75 | extended_score = np.array(new_extended_score) 76 | 77 | time_indexes = time_index_chosing(original_len, timesteps * step, random=False, time_list=time_list) 78 | l = len(time_indexes) 79 | sorter = np.argsort(time_list) 80 | d = sorter[np.searchsorted(time_list, time_indexes, sorter=sorter)] 81 | time_list = np.delete(time_list, d, 0) 82 | counter += l 83 | 84 | update_count += l 85 | 86 | if(alpha != -1): 87 | midi_indexes = np.arange(88).tolist() * len(time_indexes) 88 | time_indexes_repeat = np.repeat(time_indexes, 88) 89 | p = generation_wavenet(model, extended_score, time_indexes_repeat, midi_indexes, 90 | timesteps=timesteps, step=step) 91 | for i, t in enumerate(time_indexes_repeat): 92 | if(fixed_rhythm_score[t, midi_indexes[i]] == 0): 93 | if(p[i][1] > threshold): 94 | new_extended_score[t + timesteps * step, midi_indexes[i]] = 1 95 | elif(p[i][0] > threshold): 96 | new_extended_score[t + timesteps * step, midi_indexes[i]] = 0 97 | 98 | else: 99 | for midi_index in range(88): 100 | midi_indexes = [midi_index] * l 101 | p = generation_wavenet(model, extended_score, time_indexes, midi_indexes, 102 | timesteps=timesteps, step=step) 103 | for i, t in enumerate(time_indexes): 104 | if(fixed_rhythm_score[t, midi_indexes[i]] == 0): 105 | if(p[i][1] > threshold): 106 | new_extended_score[t + timesteps * step, midi_indexes[i]] = 1 107 | elif(p[i][0] > threshold): 108 | new_extended_score[t + timesteps * step, midi_indexes[i]] = 0 109 | 110 | 111 | return new_extended_score[timesteps*step:-timesteps*step,:88] -------------------------------------------------------------------------------- /project/model.py: -------------------------------------------------------------------------------- 1 | from keras.engine import Input, Model 2 | from keras.layers import Dense, Reshape, Permute, add, TimeDistributed, LSTM, CuDNNLSTM, Dropout, Lambda, concatenate, Multiply 3 | from keras.layers.core import Activation, Flatten, Dropout 4 | from keras.layers.normalization import BatchNormalization 5 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D,Conv2D,Conv2DTranspose,MaxPooling2D,Cropping2D 6 | from keras.layers.advanced_activations import LeakyReLU 7 | from keras.optimizers import SGD, Adam 8 | 9 | 10 | def lstm_wavenet(num_features_lr=91, 11 | num_pitches=88, 12 | num_units_lstm=[150, 150, 150, 150], 13 | num_dense=90, 14 | timesteps=32, 15 | step=4, 16 | conv_layers=5, 17 | skip_layers=2, 18 | using_cuda=True 19 | ): 20 | 21 | SelectedLSTM = CuDNNLSTM if using_cuda else LSTM 22 | 23 | left_features = Input(shape=(timesteps * step, num_features_lr), name='left_features') 24 | left_features_reshape = Reshape((timesteps, num_features_lr * step))(left_features) 25 | right_features = Input(shape=(timesteps * step, num_features_lr), name='right_features') 26 | right_features_reshape = Reshape((timesteps, num_features_lr * step))(right_features) 27 | central_features = Input(shape=(88, 1), name='central_features') 28 | 29 | embedding_left = Dense(input_dim=num_features_lr * step, 30 | units=num_dense, name='embedding_left') 31 | embedding_right = Dense(input_dim=num_features_lr * step, 32 | units=num_dense, name='embedding_right') 33 | 34 | predictions_left = left_features_reshape 35 | predictions_right = right_features_reshape 36 | 37 | # input dropout 38 | predictions_left = Dropout(0.3)(predictions_left) 39 | predictions_right = Dropout(0.3)(predictions_right) 40 | # embedding 41 | predictions_left = TimeDistributed(embedding_left)(predictions_left) 42 | predictions_right = TimeDistributed(embedding_right)(predictions_right) 43 | # left recurrent networks 44 | return_sequences = True 45 | for k, stack_index in enumerate(range(len(num_units_lstm))): 46 | if k == len(num_units_lstm) - 1: 47 | return_sequences = False 48 | if k > 0: 49 | predictions_left_tmp = add([LeakyReLU(alpha=0.2)(predictions_left), predictions_left_old]) 50 | predictions_right_tmp = add([LeakyReLU(alpha=0.2)(predictions_right), predictions_right_old]) 51 | else: 52 | predictions_left_tmp = predictions_left 53 | predictions_right_tmp = predictions_right 54 | 55 | predictions_left_old = predictions_left 56 | predictions_left = predictions_left_tmp 57 | predictions_left = SelectedLSTM(num_units_lstm[stack_index], 58 | return_sequences=return_sequences, 59 | name='lstm_left_' + str(stack_index) 60 | )(predictions_left) 61 | 62 | predictions_right_old = predictions_right 63 | predictions_right = predictions_right_tmp 64 | predictions_right = SelectedLSTM(num_units_lstm[stack_index], 65 | return_sequences=return_sequences, 66 | name='lstm_right_' + str(stack_index) 67 | )(predictions_right) 68 | # LSTM Dropout 69 | predictions_left = Dropout(0.3)(predictions_left) 70 | predictions_right = Dropout(0.3)(predictions_right) 71 | 72 | # retain only last input for skip connections 73 | predictions_left_old = Lambda(lambda t: t[:, -1, :], 74 | output_shape=lambda input_shape: (input_shape[0], input_shape[-1]) 75 | )(predictions_left_old) 76 | 77 | predictions_right_old = Lambda(lambda t: t[:, -1, :], 78 | output_shape=lambda input_shape: (input_shape[0], input_shape[-1]) 79 | )(predictions_right_old) 80 | # concat or sum 81 | predictions_left = concatenate([LeakyReLU(alpha=0.2)(predictions_left), predictions_left_old]) 82 | predictions_right = concatenate([LeakyReLU(alpha=0.2)(predictions_right), predictions_right_old]) 83 | predictions_context = concatenate([predictions_left, predictions_right]) 84 | predictions_context = LeakyReLU(alpha=0.2)(Dense(num_pitches)(predictions_context)) 85 | predictions_context = Reshape((num_pitches, 1))(predictions_context) 86 | # wavnet part 87 | skip = central_features 88 | skips = [] 89 | for i in range(conv_layers): 90 | conv_central_t = BatchNormalization()(Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(skip)) 91 | conv_central_s = BatchNormalization()(Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(skip)) 92 | if (i < skip_layers): 93 | conv_context_t = BatchNormalization()( 94 | Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(predictions_context)) 95 | conv_context_s = BatchNormalization()( 96 | Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(predictions_context)) 97 | conv_t = Activation('tanh')(add([conv_central_t, conv_context_t])) 98 | conv_s = Activation('sigmoid')(add([conv_central_s, conv_context_s])) 99 | conv = Multiply()([conv_t, conv_s]) 100 | conv = BatchNormalization()(Conv1D(1, 1, padding='same')(conv)) 101 | skip = add([conv, skip]) 102 | else: 103 | conv_t = Activation('tanh')(conv_central_t) 104 | conv_s = Activation('sigmoid')(conv_central_s) 105 | conv = Multiply()([conv_t, conv_s]) 106 | conv = BatchNormalization()(Conv1D(1, 1, padding='same')(conv)) 107 | skip = add([conv, skip]) 108 | skips.append(conv) 109 | 110 | out = LeakyReLU(alpha=0.2)(add(skips)) 111 | out = LeakyReLU(alpha=0.2)(Conv1D(1, 1)(out)) 112 | out = Flatten()(Conv1D(1, 1)(out)) 113 | out = Dense(2, activation='softmax', name='prediction')(out) 114 | 115 | model = Model(inputs=[left_features, central_features, right_features], 116 | outputs=out) 117 | model.compile(optimizer='adam', 118 | loss={'prediction': 'binary_crossentropy'}, 119 | ) 120 | 121 | return model -------------------------------------------------------------------------------- /project/train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | 4 | from project.utils import add_beat, padding 5 | 6 | 7 | def generator(batch_size, subdivision, timesteps, step, dataset, 8 | phase='train', percentage_train=0.8, 9 | constraint=False 10 | ): 11 | 12 | if("jazz" in dataset): 13 | Y, X_metadatas = pickle.load(open(dataset, 'rb')) 14 | else: 15 | Y, X_metadatas, index2notes, note2indexs, metadatas = pickle.load(open(dataset, 'rb')) 16 | 17 | # Set chorale_indices 18 | if phase == 'train': 19 | chorale_indices = np.arange(int(len(Y) * percentage_train)) 20 | if phase == 'test': 21 | chorale_indices = np.arange(int(len(Y) * percentage_train), len(Y)) 22 | if phase == 'all': 23 | chorale_indices = np.arange(int(len(Y))) 24 | 25 | if("jazz" in dataset): 26 | for a in range(len(Y)): 27 | if (a in chorale_indices): 28 | new_y = np.zeros((Y[a].shape[:2])) 29 | for timeindex, beat in enumerate(Y[a]): 30 | new_y[timeindex, np.nonzero(beat)[0]] = 1 31 | new_y = add_beat(new_y, subdivision) 32 | Y[a] = padding(new_y, timesteps, step) 33 | else: 34 | Y[a] = 0 35 | else: 36 | for a in range(len(Y)): 37 | if(a in chorale_indices): 38 | new_y = np.zeros(Y[a].shape[:2]) 39 | conc_y = np.sum(Y[a], axis=2) 40 | for timeindex, beat in enumerate(conc_y): 41 | new_y[timeindex][np.nonzero(beat)[0]] = 1 42 | new_y = add_beat(new_y,subdivision) 43 | Y[a] = padding(new_y, timesteps, step) 44 | else: 45 | Y[a] = 0 46 | 47 | central_features = [] 48 | right_features = [] 49 | left_features = [] 50 | labels = [] 51 | batch = 0 52 | midi_index = 0 53 | chorale_index = 0 54 | time_index = 0 55 | 56 | augmentation = 3 57 | non_zero = [] 58 | non_zero_counter = 0 59 | while True: 60 | if(midi_index == 0 and augmentation == 3): 61 | chorale_index = np.random.choice(chorale_indices) 62 | chorale = np.array(Y[chorale_index]) 63 | chorale_length = len(chorale) 64 | if(constraint == True): 65 | chorale_length = chorale_length - 2 * (timesteps * step) 66 | time_index = (timesteps * step) + np.random.randint(1, chorale_length // 4) * 4 67 | else: 68 | time_index = np.random.randint(timesteps * step, chorale_length - timesteps * step) 69 | non_zero = np.nonzero(Y[chorale_index][time_index, :-1])[0] 70 | if(len(non_zero) == 0): 71 | augmentation = 3 72 | else: 73 | augmentation = 0 74 | 75 | if(augmentation == 0 or augmentation == 2): 76 | midi_index = non_zero[non_zero_counter] 77 | non_zero_counter += 1 78 | 79 | central_feature = np.reshape(np.array(Y[chorale_index][time_index, :88]), (88, 1)) 80 | central_feature[midi_index:] = 0.5 81 | 82 | left_feature = Y[chorale_index][time_index - (timesteps * step):time_index, :] 83 | right_feature = Y[chorale_index][(time_index + 1):(time_index + 1) + timesteps * step, :] 84 | 85 | label = np.zeros((2)) 86 | label[int(Y[chorale_index][time_index, midi_index])] = 1 87 | 88 | central_features.append(central_feature) 89 | left_features.append(left_feature) 90 | right_features.append(right_feature) 91 | labels.append(label) 92 | 93 | batch += 1 94 | midi_index = (midi_index + 1) % 88 95 | 96 | if(augmentation == 1 and midi_index == 0): 97 | augmentation = 2 98 | if(augmentation == 0 or augmentation == 2): 99 | if(non_zero_counter == len(non_zero) and augmentation == 0): 100 | augmentation = 1 101 | midi_index = 0 102 | non_zero_counter = 0 103 | elif(non_zero_counter == len(non_zero) and augmentation == 2): 104 | augmentation = 3 105 | midi_index = 0 106 | non_zero_counter = 0 107 | 108 | # if there is a full batch 109 | if(batch == batch_size): 110 | next_element = ( 111 | np.array(left_features, dtype=np.float32), 112 | np.array(central_features, dtype=np.float32), 113 | np.array(right_features, dtype=np.float32), 114 | 115 | np.array(labels, dtype=np.float32)) 116 | 117 | yield next_element 118 | 119 | batch = 0 120 | 121 | central_features = [] 122 | right_features = [] 123 | left_features = [] 124 | labels = [] 125 | 126 | 127 | def train(model, 128 | dataset_path, 129 | subdivision, 130 | epoch=80, 131 | steps=6000, 132 | timesteps=32, 133 | step=4, 134 | batch_size=88*3): 135 | generator_train = (({'left_features': left_features, 136 | 'central_features': central_features, 137 | 'right_features': right_features 138 | }, 139 | {'prediction': labels}) 140 | for(left_features, 141 | central_features, 142 | right_features, 143 | labels) in generator(batch_size, subdivision=subdivision, 144 | timesteps=timesteps, step=step, 145 | dataset=dataset_path, constraint=False 146 | )) 147 | 148 | generator_val = (({'left_features': left_features, 149 | 'central_features': central_features, 150 | 'right_features': right_features 151 | }, 152 | {'prediction': labels}) 153 | for(left_features, 154 | central_features, 155 | right_features, 156 | labels) in generator(batch_size, subdivision=subdivision, 157 | timesteps=timesteps, step=step, phase='test', 158 | dataset=dataset_path, constraint=False 159 | )) 160 | 161 | model.fit_generator(generator_train, samples_per_epoch=steps, 162 | epochs=epoch, verbose=1, validation_data=generator_val, 163 | validation_steps=200, 164 | use_multiprocessing=False, 165 | max_queue_size=100, 166 | workers=1) 167 | 168 | 169 | return model --------------------------------------------------------------------------------