├── README.md
├── LICENSE
├── project
    ├── utils.py
    ├── midi_handler.py
    ├── test.py
    ├── model.py
    └── train.py
└── MusicStyleTransfer.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Music Style Transfer
 2 | 
 3 | 
 4 | This repository includes the source code of the symbolic music style transfer algorithm from:
 5 | 
 6 | Wei-Tsung Lu and Li Su, “Transferring the Style of Homophonic Music Using Recurrent Neural Networks and Autoregressive Model,” International Society of Music Information Retrieval Conference (ISMIR), September 2018.
 7 | 
 8 | Listening samples are available [here].
 9 | 
10 | ### Dependencies
11 | 
12 | This repository requires following packages:
13 | 
14 | - python 3.6
15 | - numpy
16 | - tensorflow
17 | - keras
18 | - mido
19 | 
20 | 
21 | ### Todos
22 | 
23 |  - 
24 | 
25 | License
26 | ----
27 | 
28 | MIT
29 | 
30 | 
31 | [//]:#
32 | 
33 |    [here]:https://drive.google.com/open?id=1hohsEvbAiBmTW6DUeEaoEha13otyn8oJ
34 | 
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Wei Tsung Lu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/project/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.models import model_from_json, model_from_yaml
 3 | 
 4 | 
 5 | def padding(x, timesteps, step):
 6 |     extended_chorale = np.array(x)
 7 |     padding_dimensions = (timesteps * step,) + extended_chorale.shape[1:]
 8 | 
 9 |     padding_start = np.zeros(padding_dimensions)
10 |     padding_end = np.zeros(padding_dimensions)
11 |     for a, b in enumerate(padding_start):
12 |         padding_start[a][-3] = 1
13 |         padding_end[a][-2] = 1
14 | 
15 |     extended_chorale = np.concatenate((padding_start,
16 |                                        extended_chorale,
17 |                                        padding_end),
18 |                                       axis=0)
19 |     return extended_chorale
20 | 
21 | 
22 | def padding_ade(x, timesteps, subdivision, step, gap):
23 |     # extended_chorale = np.concatenate((-1*np.ones((x.shape[0],88)),x), axis = 1)
24 |     extended_chorale = np.concatenate((x, np.zeros((x.shape[0], 1))), axis=1)
25 | 
26 |     for i in range(len(extended_chorale)):
27 |         extended_chorale[i][-1] = i % subdivision + 1
28 | 
29 |     padding_dimensions = (timesteps * step + gap,) + extended_chorale.shape[1:]
30 | 
31 |     padding_start = np.zeros(padding_dimensions)
32 |     padding_end = np.zeros(padding_dimensions)
33 |     for a, b in enumerate(padding_start):
34 |         padding_start[a][-3] = 1
35 |         padding_end[a][-2] = 1
36 | 
37 |     extended_chorale = np.concatenate((padding_start,
38 |                                        extended_chorale,
39 |                                        padding_end),
40 |                                       axis=0)
41 | 
42 |     return extended_chorale
43 | 
44 | 
45 | def add_beat(score, subdivision):
46 |     b = np.zeros((score.shape[0], 1))
47 |     score = np.concatenate([score, b], axis=1)
48 | 
49 |     for time in range(0, len(score)):
50 |         score[time][-1] = time % subdivision + 1
51 | 
52 |     return score
53 | 
54 | def load_model(model_name):
55 |     """
56 |     """
57 |     ext = '.yaml'
58 |     model = model_from_yaml(open(model_name + ext).read())
59 |     model.load_weights(model_name + '_weights.h5')
60 | 
61 |     print("model " + model_name + " loaded")
62 |     return model
63 | 
64 | 
65 | def save_model(model, model_name, overwrite=False):
66 |     # SAVE MODEL
67 | 
68 |     string = model.to_yaml()
69 |     ext = '.yaml'
70 | 
71 |     open(model_name + ext, 'w').write(string)
72 |     model.save_weights(model_name + '_weights.h5', overwrite=overwrite)
73 |     print("model " + model_name + " saved")


--------------------------------------------------------------------------------
/project/midi_handler.py:
--------------------------------------------------------------------------------
  1 | import mido
  2 | from mido import MetaMessage, Message, MidiFile, MidiTrack
  3 | 
  4 | import numpy as np
  5 | 
  6 | 
  7 | def midi2score(song,
  8 |                subdivision):
  9 |     mid = mido.MidiFile(song)
 10 | 
 11 |     tempo = 0
 12 |     sec_per_tick = 0
 13 |     length = mid.length
 14 | 
 15 |     time = 0
 16 | 
 17 |     # set initial score len
 18 |     for msg in mid:
 19 |         if (msg.is_meta):
 20 |             if (msg.type == 'set_tempo'):
 21 |                 tempo = msg.tempo
 22 |         else:
 23 |             if (msg.type == "note_on"):
 24 |                 bpm = mido.tempo2bpm(tempo)
 25 |                 sec_per_tick = 60 / bpm / subdivision
 26 |                 break
 27 | 
 28 |     score = np.zeros((int(length / sec_per_tick) + 1, 90))
 29 | 
 30 |     for msg in mid:
 31 |         time += msg.time
 32 |         pos = int(np.round(time / sec_per_tick))
 33 |         if (pos + 1 > score.shape[0]):
 34 |             score = np.append(score, np.zeros((pos - score.shape[0] + 1, 90)), axis=0)
 35 |         if (msg.is_meta):
 36 |             if (msg.type == 'set_tempo'):
 37 |                 tempo = mido.tempo2bpm(msg.tempo)
 38 |                 sec_per_tick = 60 / tempo / subdivision
 39 | 
 40 |         elif (msg.type == 'note_on'):
 41 |             if (msg.velocity == 0):
 42 |                 p = msg.note - 21
 43 |                 score[pos:, p] = 0
 44 |             else:
 45 |                 p = msg.note - 21
 46 |                 score[pos:, p] = 1
 47 | 
 48 |         elif (msg.type == 'note_off'):
 49 |             p = msg.note - 21
 50 |             score[pos:, p] = 0
 51 | 
 52 |     return score
 53 | 
 54 | 
 55 | def score2midi(name,
 56 |                score,
 57 |                subdivision,
 58 |                bpm,
 59 |                melody_constraint=False,
 60 |                melody=None):
 61 | 
 62 |     mid = MidiFile()
 63 |     track = MidiTrack()
 64 |     mid.tracks.append(track)
 65 | 
 66 |     # meta messages
 67 |     track.append(MetaMessage('set_tempo', tempo=mido.bpm2tempo(bpm)))
 68 |     # note messages
 69 |     ticks_per_beat = 480
 70 |     ticks_per_division = int(ticks_per_beat // subdivision)
 71 |     pitch_table = np.zeros((88, 1))
 72 | 
 73 |     for index, tick in enumerate(score):
 74 |         # offsets handle
 75 |         current_notes = np.nonzero(pitch_table)[0]
 76 |         for _note in current_notes:
 77 |             if (tick[_note] == 0):
 78 |                 track.append(Message('note_on', note=_note + 21, velocity=0, time=0))
 79 |                 pitch_table[_note] = 0
 80 | 
 81 |         # onsets handle
 82 |         # melody constraint handle
 83 |         onsets = np.nonzero(tick[:])[0]
 84 |         for _note in onsets:
 85 |             if (melody_constraint == True):
 86 |                 if (melody[index][_note] == 1):
 87 |                     if (pitch_table[_note] == 1):
 88 |                         track.append(Message('note_on', note=_note + 21, velocity=0, time=0))
 89 |                     if (pitch_table[_note] != 2):
 90 |                         track.append(Message('note_on', note=_note + 21, velocity=85, time=0))
 91 |                         pitch_table[_note] = 2
 92 |                 elif (pitch_table[_note] == 0):
 93 |                     track.append(Message('note_on', note=_note + 21, velocity=80, time=0))
 94 |                     pitch_table[_note] = 1
 95 |             else:
 96 |                 if (pitch_table[_note] == 0):
 97 |                     track.append(Message('note_on', note=_note + 21, velocity=80, time=0))
 98 |                     pitch_table[_note] = 1
 99 | 
100 |         # time progress
101 |         track.append(Message('note_on', note=0, velocity=0, time=ticks_per_division))
102 | 
103 |     mid.save(name)


--------------------------------------------------------------------------------
/MusicStyleTransfer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import argparse
  3 | 
  4 | from project.midi_handler import midi2score, score2midi
  5 | from project.utils import padding, load_model, save_model, add_beat
  6 | from project.test import style_transfer
  7 | from project.model import lstm_wavenet
  8 | from project.train import train
  9 | 
 10 | def main():
 11 |     # Arguments
 12 |     parser = argparse.ArgumentParser()
 13 | 
 14 |     parser.add_argument('-p', '--phase',
 15 |                         help='phase: training or testing (default: %(default)s',
 16 |                         type=str, default='testing')
 17 |     # arguments for testing
 18 |     parser.add_argument('-d', '--dataset_path',
 19 |                         help='path to data set (default: %(default)s',
 20 |                         type=str, default='bach_dataset.pickle')
 21 | 
 22 |     parser.add_argument('-e', '--epoch',
 23 |                         help='number of epoch(default: %(default)s',
 24 |                         type=int, default=80)
 25 |     parser.add_argument('-n', '--steps',
 26 |                         help='number of step per epoch(default: %(default)s',
 27 |                         type=int, default=6000)
 28 |     parser.add_argument('-b', '--batch_size_train',
 29 |                         help='batch size(default: %(default)s',
 30 |                         type=int, default=88*3)
 31 |     parser.add_argument('-o', '--output_model_name',
 32 |                         help='name of the output model(default: %(default)s',
 33 |                         type=str, default="out")
 34 |     # arguments for testing
 35 |     parser.add_argument('-m', '--model_path',
 36 |                         help='path to existing model (default: %(default)s',
 37 |                         type=str, default='bach')
 38 |     parser.add_argument('-i', '--input_file',
 39 |                         help='path to input file (default: %(default)s',
 40 |                         type=str, default="LiveAndLetDie_all.mid")
 41 |     parser.add_argument('-ii', '--input_file_melody',
 42 |                         help='path to input melody file (default: %(default)s',
 43 |                         type=str, default="LiveAndLetDie_main.mid")
 44 |     parser.add_argument('-s', '--subdivision',
 45 |                         help='subdivision within one beat (default: %(default)s',
 46 |                         type=int, default=4)
 47 | 
 48 |     args = parser.parse_args()
 49 |     print(args)
 50 | 
 51 |     if(args.phase == "training"):
 52 |         #set arguments
 53 | 
 54 |         timesteps = 32
 55 |         step = 4
 56 |         subdivision = args.subdivision
 57 |         batch_size = args.batch_size_train
 58 |         dataset_path = args.dataset_path
 59 | 
 60 |         #create model
 61 | 
 62 |         model = lstm_wavenet(num_features_lr=91, timesteps=timesteps,
 63 |                              step=step, num_units_lstm=[150, 150, 150, 150],
 64 |                              num_dense=150,
 65 |                              conv_layers=5,
 66 |                              skip_layers=2)
 67 | 
 68 |         model.compile(optimizer="adam", loss={'prediction': 'binary_crossentropy'}, metrics=['accuracy'])
 69 | 
 70 |         #train
 71 | 
 72 |         model = train(model,
 73 |                       dataset_path,
 74 |                       subdivision,
 75 |                       epoch=args.epoch,
 76 |                       steps=args.steps,
 77 |                       timesteps=timesteps,
 78 |                       step=step,
 79 |                       batch_size=batch_size)
 80 |         #save model
 81 | 
 82 |         save_model(model, args.output_model_name)
 83 | 
 84 |     else:
 85 |         #load input file
 86 | 
 87 |         subdivision = args.subdivision
 88 |         path = args.input_file
 89 |         path_melody = args.input_file_melody
 90 |         score = midi2score(path, subdivision)
 91 | 
 92 |         if(path_melody == "none"):
 93 |             score_melody = np.zeros(score.shape)
 94 |         else:
 95 |             score_melody = midi2score(path_melody, subdivision)
 96 | 
 97 |         score = add_beat(score, subdivision)
 98 |         score_melody = add_beat(score_melody, subdivision)
 99 | 
100 |         score = np.array(score[0:640])
101 |         score_melody = np.array(score_melody[0:640])
102 | 
103 |         extended_score = padding(score, 32, 4)
104 | 
105 |         #load model
106 | 
107 |         model = load_model(model_name=args.model_path)
108 | 
109 |         #generation
110 | 
111 |         result = style_transfer(extended_score, score_melody, model, iter_num=25)
112 | 
113 |         #save result
114 | 
115 |         score2midi("test.mid", result, subdivision, 120, melody_constraint=True, melody=score_melody)
116 |         print("saved")
117 | 
118 | if __name__ == "__main__":
119 |     main()


--------------------------------------------------------------------------------
/project/test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tqdm
  3 | 
  4 | 
  5 | def time_index_chosing(_range,
  6 |                        interval,
  7 |                        random=True,
  8 |                        time_list=None):
  9 |     interval += 1
 10 |     if (random):
 11 |         time_index_base = np.random.randint(_range)
 12 |     else:
 13 |         time_index_base = time_list[np.random.randint(len(time_list))]
 14 | 
 15 |     c = time_index_base + np.arange(-interval * 200, interval * 200, interval)
 16 | 
 17 |     return c[np.where(np.logical_and(c >= 0, c < _range))]
 18 | 
 19 | 
 20 | def midi_index_chosing(_len):
 21 |     return np.random.randint(88, size=_len)
 22 | 
 23 | 
 24 | def generation_wavenet(model,
 25 |                        score,
 26 |                        time_indexes,
 27 |                        midi_indexes,
 28 |                        timesteps=32,
 29 |                        step=4
 30 |                        ):
 31 |     time_indexes = np.array(time_indexes) + timesteps * step
 32 | 
 33 |     left_features = np.array(score[[np.arange(t - (timesteps * step), t) for t in time_indexes], :])
 34 |     right_features = np.array(score[[np.arange(t + 1, t + 1 + (timesteps * step)) for t in time_indexes], :])
 35 |     central_features = np.reshape(np.array(score[time_indexes, :88]), (len(time_indexes), 88, 1))
 36 | 
 37 |     for a, b in enumerate(midi_indexes):
 38 |         central_features[a, b:] = 0.5
 39 | 
 40 |     p = model.predict([left_features, central_features, right_features])
 41 | 
 42 |     return p
 43 | 
 44 | 
 45 | def style_transfer(extended_score, score_melody,
 46 |                    model,
 47 |                    iter_num=25,
 48 |                    timesteps=32,
 49 |                    step=4,
 50 |                    threshold=0.5
 51 |                    ):
 52 | 
 53 |     fixed_rhythm_score = score_melody
 54 |     original_len = len(score_melody)
 55 |     new_extended_score = np.array(extended_score)
 56 |     counter = 0
 57 |     alpha_initial = 0.6
 58 |     alpha = alpha_initial
 59 |     alpha_min = 0
 60 |     annealing_fraction = 0.6
 61 |     update_count = 0
 62 | 
 63 |     for i in tqdm.tqdm(range(iter_num)):
 64 |         time_list = np.arange(original_len)
 65 |         print("alpha = ", alpha)
 66 |         while (time_list.size > 0):
 67 |             if(alpha != -1):
 68 |                 alpha = max(0, alpha_initial - update_count * (alpha_initial - alpha_min) / (
 69 |                     iter_num * original_len * annealing_fraction))
 70 |             if(alpha == 0):
 71 |                 extended_score = new_extended_score
 72 |                 alpha = -1
 73 |             elif(counter / original_len > alpha and alpha != -1):
 74 |                 counter = 0
 75 |                 extended_score = np.array(new_extended_score)
 76 | 
 77 |             time_indexes = time_index_chosing(original_len, timesteps * step, random=False, time_list=time_list)
 78 |             l = len(time_indexes)
 79 |             sorter = np.argsort(time_list)
 80 |             d = sorter[np.searchsorted(time_list, time_indexes, sorter=sorter)]
 81 |             time_list = np.delete(time_list, d, 0)
 82 |             counter += l
 83 | 
 84 |             update_count += l
 85 | 
 86 |             if(alpha != -1):
 87 |                 midi_indexes = np.arange(88).tolist() * len(time_indexes)
 88 |                 time_indexes_repeat = np.repeat(time_indexes, 88)
 89 |                 p = generation_wavenet(model, extended_score, time_indexes_repeat, midi_indexes,
 90 |                                        timesteps=timesteps, step=step)
 91 |                 for i, t in enumerate(time_indexes_repeat):
 92 |                     if(fixed_rhythm_score[t, midi_indexes[i]] == 0):
 93 |                         if(p[i][1] > threshold):
 94 |                             new_extended_score[t + timesteps * step, midi_indexes[i]] = 1
 95 |                         elif(p[i][0] > threshold):
 96 |                             new_extended_score[t + timesteps * step, midi_indexes[i]] = 0
 97 | 
 98 |             else:
 99 |                 for midi_index in range(88):
100 |                     midi_indexes = [midi_index] * l
101 |                     p = generation_wavenet(model, extended_score, time_indexes, midi_indexes,
102 |                                            timesteps=timesteps, step=step)
103 |                     for i, t in enumerate(time_indexes):
104 |                         if(fixed_rhythm_score[t, midi_indexes[i]] == 0):
105 |                             if(p[i][1] > threshold):
106 |                                 new_extended_score[t + timesteps * step, midi_indexes[i]] = 1
107 |                             elif(p[i][0] > threshold):
108 |                                 new_extended_score[t + timesteps * step, midi_indexes[i]] = 0
109 | 
110 | 
111 |     return new_extended_score[timesteps*step:-timesteps*step,:88]


--------------------------------------------------------------------------------
/project/model.py:
--------------------------------------------------------------------------------
  1 | from keras.engine import Input, Model
  2 | from keras.layers import Dense, Reshape, Permute, add, TimeDistributed, LSTM, CuDNNLSTM, Dropout, Lambda, concatenate, Multiply
  3 | from keras.layers.core import Activation, Flatten, Dropout
  4 | from keras.layers.normalization import BatchNormalization
  5 | from keras.layers.convolutional import Conv1D, MaxPooling1D, UpSampling1D,Conv2D,Conv2DTranspose,MaxPooling2D,Cropping2D
  6 | from keras.layers.advanced_activations import LeakyReLU
  7 | from keras.optimizers import SGD, Adam
  8 | 
  9 | 
 10 | def lstm_wavenet(num_features_lr=91,
 11 |                      num_pitches=88,
 12 |                      num_units_lstm=[150, 150, 150, 150],
 13 |                      num_dense=90,
 14 |                      timesteps=32,
 15 |                      step=4,
 16 |                      conv_layers=5,
 17 |                      skip_layers=2,
 18 |                      using_cuda=True
 19 |                      ):
 20 | 
 21 |     SelectedLSTM = CuDNNLSTM if using_cuda else LSTM
 22 | 
 23 |     left_features = Input(shape=(timesteps * step, num_features_lr), name='left_features')
 24 |     left_features_reshape = Reshape((timesteps, num_features_lr * step))(left_features)
 25 |     right_features = Input(shape=(timesteps * step, num_features_lr), name='right_features')
 26 |     right_features_reshape = Reshape((timesteps, num_features_lr * step))(right_features)
 27 |     central_features = Input(shape=(88, 1), name='central_features')
 28 | 
 29 |     embedding_left = Dense(input_dim=num_features_lr * step,
 30 |                            units=num_dense, name='embedding_left')
 31 |     embedding_right = Dense(input_dim=num_features_lr * step,
 32 |                             units=num_dense, name='embedding_right')
 33 | 
 34 |     predictions_left = left_features_reshape
 35 |     predictions_right = right_features_reshape
 36 | 
 37 |     # input dropout
 38 |     predictions_left = Dropout(0.3)(predictions_left)
 39 |     predictions_right = Dropout(0.3)(predictions_right)
 40 |     # embedding
 41 |     predictions_left = TimeDistributed(embedding_left)(predictions_left)
 42 |     predictions_right = TimeDistributed(embedding_right)(predictions_right)
 43 |     # left recurrent networks
 44 |     return_sequences = True
 45 |     for k, stack_index in enumerate(range(len(num_units_lstm))):
 46 |         if k == len(num_units_lstm) - 1:
 47 |             return_sequences = False
 48 |         if k > 0:
 49 |             predictions_left_tmp = add([LeakyReLU(alpha=0.2)(predictions_left), predictions_left_old])
 50 |             predictions_right_tmp = add([LeakyReLU(alpha=0.2)(predictions_right), predictions_right_old])
 51 |         else:
 52 |             predictions_left_tmp = predictions_left
 53 |             predictions_right_tmp = predictions_right
 54 | 
 55 |         predictions_left_old = predictions_left
 56 |         predictions_left = predictions_left_tmp
 57 |         predictions_left = SelectedLSTM(num_units_lstm[stack_index],
 58 |                                         return_sequences=return_sequences,
 59 |                                         name='lstm_left_' + str(stack_index)
 60 |                                         )(predictions_left)
 61 | 
 62 |         predictions_right_old = predictions_right
 63 |         predictions_right = predictions_right_tmp
 64 |         predictions_right = SelectedLSTM(num_units_lstm[stack_index],
 65 |                                          return_sequences=return_sequences,
 66 |                                          name='lstm_right_' + str(stack_index)
 67 |                                          )(predictions_right)
 68 |         # LSTM Dropout
 69 |         predictions_left = Dropout(0.3)(predictions_left)
 70 |         predictions_right = Dropout(0.3)(predictions_right)
 71 | 
 72 |     # retain only last input for skip connections
 73 |     predictions_left_old = Lambda(lambda t: t[:, -1, :],
 74 |                                   output_shape=lambda input_shape: (input_shape[0], input_shape[-1])
 75 |                                   )(predictions_left_old)
 76 | 
 77 |     predictions_right_old = Lambda(lambda t: t[:, -1, :],
 78 |                                    output_shape=lambda input_shape: (input_shape[0], input_shape[-1])
 79 |                                    )(predictions_right_old)
 80 |     # concat or sum
 81 |     predictions_left = concatenate([LeakyReLU(alpha=0.2)(predictions_left), predictions_left_old])
 82 |     predictions_right = concatenate([LeakyReLU(alpha=0.2)(predictions_right), predictions_right_old])
 83 |     predictions_context = concatenate([predictions_left, predictions_right])
 84 |     predictions_context = LeakyReLU(alpha=0.2)(Dense(num_pitches)(predictions_context))
 85 |     predictions_context = Reshape((num_pitches, 1))(predictions_context)
 86 |     # wavnet part
 87 |     skip = central_features
 88 |     skips = []
 89 |     for i in range(conv_layers):
 90 |         conv_central_t = BatchNormalization()(Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(skip))
 91 |         conv_central_s = BatchNormalization()(Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(skip))
 92 |         if (i < skip_layers):
 93 |             conv_context_t = BatchNormalization()(
 94 |                 Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(predictions_context))
 95 |             conv_context_s = BatchNormalization()(
 96 |                 Conv1D(64, 2, dilation_rate=2 ** (i), padding='causal')(predictions_context))
 97 |             conv_t = Activation('tanh')(add([conv_central_t, conv_context_t]))
 98 |             conv_s = Activation('sigmoid')(add([conv_central_s, conv_context_s]))
 99 |             conv = Multiply()([conv_t, conv_s])
100 |             conv = BatchNormalization()(Conv1D(1, 1, padding='same')(conv))
101 |             skip = add([conv, skip])
102 |         else:
103 |             conv_t = Activation('tanh')(conv_central_t)
104 |             conv_s = Activation('sigmoid')(conv_central_s)
105 |             conv = Multiply()([conv_t, conv_s])
106 |             conv = BatchNormalization()(Conv1D(1, 1, padding='same')(conv))
107 |             skip = add([conv, skip])
108 |         skips.append(conv)
109 | 
110 |     out = LeakyReLU(alpha=0.2)(add(skips))
111 |     out = LeakyReLU(alpha=0.2)(Conv1D(1, 1)(out))
112 |     out = Flatten()(Conv1D(1, 1)(out))
113 |     out = Dense(2, activation='softmax', name='prediction')(out)
114 | 
115 |     model = Model(inputs=[left_features, central_features, right_features],
116 |                   outputs=out)
117 |     model.compile(optimizer='adam',
118 |                   loss={'prediction': 'binary_crossentropy'},
119 |                   )
120 | 
121 |     return model


--------------------------------------------------------------------------------
/project/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pickle
  3 | 
  4 | from project.utils import add_beat, padding
  5 | 
  6 | 
  7 | def generator(batch_size, subdivision, timesteps, step, dataset,
  8 |               phase='train', percentage_train=0.8,
  9 |               constraint=False
 10 |              ):
 11 | 
 12 |     if("jazz" in dataset):
 13 |         Y, X_metadatas = pickle.load(open(dataset, 'rb'))
 14 |     else:
 15 |         Y, X_metadatas, index2notes, note2indexs, metadatas = pickle.load(open(dataset, 'rb'))
 16 | 
 17 |     # Set chorale_indices
 18 |     if phase == 'train':
 19 |         chorale_indices = np.arange(int(len(Y) * percentage_train))
 20 |     if phase == 'test':
 21 |         chorale_indices = np.arange(int(len(Y) * percentage_train), len(Y))
 22 |     if phase == 'all':
 23 |         chorale_indices = np.arange(int(len(Y)))
 24 | 
 25 |     if("jazz" in dataset):
 26 |         for a in range(len(Y)):
 27 |             if (a in chorale_indices):
 28 |                 new_y = np.zeros((Y[a].shape[:2]))
 29 |                 for timeindex, beat in enumerate(Y[a]):
 30 |                     new_y[timeindex, np.nonzero(beat)[0]] = 1
 31 |                 new_y = add_beat(new_y, subdivision)
 32 |                 Y[a] = padding(new_y, timesteps, step)
 33 |             else:
 34 |                 Y[a] = 0
 35 |     else:
 36 |         for a in range(len(Y)):
 37 |             if(a in chorale_indices):
 38 |                 new_y = np.zeros(Y[a].shape[:2])
 39 |                 conc_y = np.sum(Y[a], axis=2)
 40 |                 for timeindex, beat in enumerate(conc_y):
 41 |                     new_y[timeindex][np.nonzero(beat)[0]] = 1
 42 |                 new_y = add_beat(new_y,subdivision)
 43 |                 Y[a] = padding(new_y, timesteps, step)
 44 |             else:
 45 |                 Y[a] = 0
 46 | 
 47 |     central_features = []
 48 |     right_features = []
 49 |     left_features = []
 50 |     labels = []
 51 |     batch = 0
 52 |     midi_index = 0
 53 |     chorale_index = 0
 54 |     time_index = 0
 55 | 
 56 |     augmentation = 3
 57 |     non_zero = []
 58 |     non_zero_counter = 0
 59 |     while True:
 60 |         if(midi_index == 0 and augmentation == 3):
 61 |             chorale_index = np.random.choice(chorale_indices)
 62 |             chorale = np.array(Y[chorale_index])
 63 |             chorale_length = len(chorale)
 64 |             if(constraint == True):
 65 |                 chorale_length = chorale_length - 2 * (timesteps * step)
 66 |                 time_index = (timesteps * step) + np.random.randint(1, chorale_length // 4) * 4
 67 |             else:
 68 |                 time_index = np.random.randint(timesteps * step, chorale_length - timesteps * step)
 69 |             non_zero = np.nonzero(Y[chorale_index][time_index, :-1])[0]
 70 |             if(len(non_zero) == 0):
 71 |                 augmentation = 3
 72 |             else:
 73 |                 augmentation = 0
 74 | 
 75 |         if(augmentation == 0 or augmentation == 2):
 76 |             midi_index = non_zero[non_zero_counter]
 77 |             non_zero_counter += 1
 78 | 
 79 |         central_feature = np.reshape(np.array(Y[chorale_index][time_index, :88]), (88, 1))
 80 |         central_feature[midi_index:] = 0.5
 81 | 
 82 |         left_feature = Y[chorale_index][time_index - (timesteps * step):time_index, :]
 83 |         right_feature = Y[chorale_index][(time_index + 1):(time_index + 1) + timesteps * step, :]
 84 | 
 85 |         label = np.zeros((2))
 86 |         label[int(Y[chorale_index][time_index, midi_index])] = 1
 87 | 
 88 |         central_features.append(central_feature)
 89 |         left_features.append(left_feature)
 90 |         right_features.append(right_feature)
 91 |         labels.append(label)
 92 | 
 93 |         batch += 1
 94 |         midi_index = (midi_index + 1) % 88
 95 | 
 96 |         if(augmentation == 1 and midi_index == 0):
 97 |             augmentation = 2
 98 |         if(augmentation == 0 or augmentation == 2):
 99 |             if(non_zero_counter == len(non_zero) and augmentation == 0):
100 |                 augmentation = 1
101 |                 midi_index = 0
102 |                 non_zero_counter = 0
103 |             elif(non_zero_counter == len(non_zero) and augmentation == 2):
104 |                 augmentation = 3
105 |                 midi_index = 0
106 |                 non_zero_counter = 0
107 | 
108 |         # if there is a full batch
109 |         if(batch == batch_size):
110 |             next_element = (
111 |                 np.array(left_features, dtype=np.float32),
112 |                 np.array(central_features, dtype=np.float32),
113 |                 np.array(right_features, dtype=np.float32),
114 | 
115 |                 np.array(labels, dtype=np.float32))
116 | 
117 |             yield next_element
118 | 
119 |             batch = 0
120 | 
121 |             central_features = []
122 |             right_features = []
123 |             left_features = []
124 |             labels = []
125 | 
126 | 
127 | def train(model,
128 |           dataset_path,
129 |           subdivision,
130 |           epoch=80,
131 |           steps=6000,
132 |           timesteps=32,
133 |           step=4,
134 |           batch_size=88*3):
135 |     generator_train = (({'left_features': left_features,
136 |                          'central_features': central_features,
137 |                          'right_features': right_features
138 |                          },
139 |                         {'prediction': labels})
140 |                        for(left_features,
141 |                            central_features,
142 |                            right_features,
143 |                            labels) in generator(batch_size, subdivision=subdivision,
144 |                                                 timesteps=timesteps, step=step,
145 |                                                 dataset=dataset_path, constraint=False
146 |                                                 ))
147 | 
148 |     generator_val = (({'left_features': left_features,
149 |                        'central_features': central_features,
150 |                        'right_features': right_features
151 |                        },
152 |                       {'prediction': labels})
153 |                      for(left_features,
154 |                          central_features,
155 |                          right_features,
156 |                          labels) in generator(batch_size, subdivision=subdivision,
157 |                                               timesteps=timesteps, step=step, phase='test',
158 |                                               dataset=dataset_path, constraint=False
159 |                                               ))
160 | 
161 |     model.fit_generator(generator_train, samples_per_epoch=steps,
162 |                         epochs=epoch, verbose=1, validation_data=generator_val,
163 |                         validation_steps=200,
164 |                         use_multiprocessing=False,
165 |                         max_queue_size=100,
166 |                         workers=1)
167 | 
168 | 
169 |     return model


--------------------------------------------------------------------------------