├── .gitignore
├── MIRdl.py
├── README.md
├── buildArchitecture.py
├── data
    ├── datasets
    │   └── INFO.md
    ├── preloaded
    │   └── INFO.md
    └── results
    │   └── INFO.md
├── load_datasets.py
├── runMIRdl.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.pyc
 3 | *.training
 4 | *.result
 5 | *.npz
 6 | *.param
 7 | *.pickle
 8 | venv/
 9 | data/datasets/Ballroom
10 | data/datasets/GTZAN
11 | test
12 | output/
13 | error/
14 | build/
15 | .idea/
16 | .Rhistory
17 | 


--------------------------------------------------------------------------------
/MIRdl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import time, random, csv
  4 | import numpy as np
  5 | 
  6 | import theano, lasagne
  7 | import theano.tensor as T
  8 | 
  9 | import load_datasets as loadData
 10 | import buildArchitecture as buildArch
 11 | 
 12 | def main(parameters):
 13 |     def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
 14 |         assert len(inputs) == len(targets)
 15 |         if shuffle:
 16 |             indices = np.arange(len(inputs))
 17 |             np.random.shuffle(indices)
 18 |         for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
 19 |             if shuffle:
 20 |                 excerpt = indices[start_idx:start_idx + batchsize]
 21 |             else:
 22 |                 excerpt = slice(start_idx, start_idx + batchsize)
 23 |             yield inputs[excerpt], targets[excerpt]
 24 | 
 25 |     print("Loading data..")
 26 |     parameters['numOutputNeurons'], X_train, y_train, X_val, y_val, X_test, y_test = loadData.load_dataset(parameters)
 27 | 
 28 |     print("Building network..")
 29 |     input_var = T.tensor4('inputs')
 30 |     target_var = T.ivector('targets')
 31 |     network,netLayers,parameters=buildArch.buildNet(input_var,parameters)
 32 | 
 33 |     print("Compiling functions..")
 34 | 
 35 |     def computeLoss(prediction, target_var, parameters):
 36 |         if parameters['cost']=='crossentropy':
 37 |             loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
 38 |         elif parameters['cost']=='squared_error':
 39 |             loss = lasagne.objectives.squared_error(prediction, target_var)
 40 |         loss = loss.mean()
 41 |         return loss
 42 | 
 43 |     # define training functions
 44 |     prediction = lasagne.layers.get_output(network)
 45 |     loss=computeLoss(prediction, target_var, parameters)
 46 |     params = lasagne.layers.get_all_params(network, trainable=True)
 47 |     updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=parameters['lr'], momentum=parameters['momentum'])
 48 | 
 49 |     # define testing/val functions
 50 |     test_prediction = lasagne.layers.get_output(network, deterministic=True)
 51 |     test_loss=computeLoss(test_prediction, target_var, parameters)
 52 |     test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX)
 53 | 
 54 |     # compile training and test/val functions
 55 |     train_fn = theano.function([input_var, target_var], loss, updates=updates)
 56 |     val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
 57 | 
 58 |     print("Training..")
 59 |     hash = random.getrandbits(128)
 60 |     trainLoss_ans=np.inf
 61 |     for epoch in range(parameters['num_epochs']):
 62 |         # training set
 63 |         train_err = 0
 64 |         train_batches = 0
 65 |         start_time = time.time()
 66 |         for batch in iterate_minibatches(X_train, y_train, parameters['batchSize'], shuffle=True):
 67 |             inputs, targets = batch
 68 |             train_err += train_fn(inputs, targets)
 69 |             train_batches += 1
 70 | 
 71 |         # validation set
 72 |         val_err = 0
 73 |         val_acc = 0
 74 |         val_batches = 0
 75 |         for batch in iterate_minibatches(X_val, y_val, parameters['batchSize'], shuffle=False):
 76 |             inputs, targets = batch
 77 |             err, acc = val_fn(inputs, targets)
 78 |             val_err += err
 79 |             val_acc += acc
 80 |             val_batches += 1
 81 | 
 82 |         # output
 83 |         print("Epoch {} of {} took {:.3f}s".format(
 84 |             epoch + 1, parameters['num_epochs'], time.time() - start_time))
 85 |         print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
 86 |         print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
 87 |         print("  validation accuracy:\t\t{:.2f} %".format(
 88 |             val_acc / val_batches * 100))
 89 | 
 90 |         #################  JUST STORING OUTPUTS INTO FILES FOR TRACKING ##################
 91 |         name='./data/results/'+parameters['dataset']+'_'+parameters['type']+'_'+str(hash)
 92 |         # save the best model
 93 |         if train_err/train_batches<trainLoss_ans: # [DOUBT] train loss or validation accuracy?
 94 |             np.savez(name, *lasagne.layers.get_all_param_values(network))
 95 |             res = open('./data/results/'+parameters['dataset']+'_'+parameters['type']+'_'+str(hash)+'.result', 'w')
 96 |             res.write("Epoch {} of {} took {:.3f}s\n".format(epoch + 1, parameters['num_epochs'], time.time() - start_time))
 97 |             res.write("  training loss:\t\t{:.6f}\n".format(train_err / train_batches))
 98 |             res.write("  validation loss:\t\t{:.6f}\n".format(val_err / val_batches))
 99 |             res.write("  validation accuracy:\t\t{:.2f} %\n".format(val_acc / val_batches * 100))
100 |             res.close()
101 |             trainLoss_ans=train_err/train_batches
102 |         # save parameters
103 |         if epoch==0:
104 |             param = open('./data/results/'+parameters['dataset']+'_'+parameters['type']+'_'+str(hash)+'.param', 'w')
105 |             for key, value in parameters.iteritems():
106 |                 param.write('-'+str(key))
107 |             param.write('\n')
108 |             for key, value in parameters.iteritems():
109 |                 param.write('-'+str(value))
110 |             param.write('\n')
111 |             param.close()
112 |             tr = open('./data/results/'+parameters['dataset']+'_'+parameters['type']+'_'+str(hash)+'.training', 'w')
113 |             tr.write('epoch,trainingLoss,validationLoss,validationAccuracy\n')
114 |             tr.close()
115 |         # save training evolution
116 |         tr = open('./data/results/'+parameters['dataset']+'_'+parameters['type']+'_'+str(hash)+'.training', 'a')
117 |         tr.write(str(epoch)+','+str(train_err/train_batches)+','+str(val_err / val_batches)+','+str(val_acc / val_batches * 100)+'\n')
118 |         tr.close()
119 |         ##################################################################################
120 | 
121 |     print("Testing with the best model..")
122 |     # load best model
123 |     with np.load(name+'.npz') as f:
124 |         param_values = [f['arr_%d' % i] for i in range(len(f.files))]
125 |     lasagne.layers.set_all_param_values(network, param_values)
126 | 
127 |     # test it!
128 |     test_err = 0
129 |     test_acc = 0
130 |     test_batches = 0
131 |     for batch in iterate_minibatches(X_test, y_test, parameters['batchSize'], shuffle=False):
132 |         inputs, targets = batch
133 |         err, acc = val_fn(inputs, targets)
134 |         test_err += err
135 |         test_acc += acc
136 |         test_batches += 1
137 | 
138 |     # output
139 |     print("Final results:")
140 |     print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
141 |     print("  test accuracy:\t\t{:.2f} %".format(
142 |         test_acc / test_batches * 100))
143 | 
144 |     #################  JUST STORING OUTPUTS INTO FILES FOR TRACKING ##################
145 |     res = open('./data/results/'+parameters['dataset']+'_'+parameters['type']+'_'+str(hash)+'.result', 'a')
146 |     res.write("\nFinal results:\n")
147 |     res.write("  test loss:\t\t\t{:.6f}\n".format(test_err / test_batches))
148 |     res.write("  test accuracy:\t\t{:.2f} %\n".format(test_acc / test_batches * 100))
149 |     res.close()
150 |     ##################################################################################


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Deep learning for music information retrieval
 2 | -----------------------------
 3 | 
 4 | This library is work in progress, use it at your own risk. I'm open to feedback!
 5 | 
 6 | It is build using python on Lasagne-Theano for deep learning and Essentia for feature extraction.
 7 | Currently, MIRdl is for easily doing music classification using any deep learning architecture available on Lasagne or Theano.
 8 | 
 9 | **Installation**
10 |  
11 |  Requires having Lasagne-Theano (http://lasagne.readthedocs.org/en/latest/user/installation.html) and Essentia (http://essentia.upf.edu/documentation/installing.html) installed.
12 |  
13 | **Important folders**
14 | - *./data/datasets*: the library expects to have the dataset divided by folders that represent the tag to be predicted. 
15 | - *./data/preloaded*: this directory contains the pickle files storing the datasets in a format readable for the library. The name of the pickle file contains all the parameters used for computing it.
16 | - *./data/results*: this directory stores the following files: **.result** (with training and test results), **.training** (having the training evolution, readable with utils.py!), **.param** (storing all the deep learning parameters used for each concrete experiment) and the **.npz** (where the best trained deep learning model is stored).
17 |  
18 | **Important scripts**
19 | - *runMIRdl.py*: where the network architecture is selected, you can also set the input and training parameters.
20 | - *buildArchitecture.py*: where the Lasagne-Theano network architecture is set.
21 | - *load_datasets.py*: where audios are loaded, formatted and normalized to be fed into the net. 
22 | - *MIRdl.py*: main part of the library where the training happens.
23 | - *utils.py*: it allows visualizing the training results (*./data/results*).
24 | 
25 | **Steps for using MIRdl**
26 | - **0.0)** Install.
27 | 
28 | - **0.1)** Understand this tutorial: http://lasagne.readthedocs.org/en/latest/user/tutorial.html. This library is based on it!
29 | 
30 | - **1)** Download a dataset. Copy it in *./data/datasets*. The library expects to have the dataset divided by folders that represent the tag to be predicted. 
31 | For example, for the GTZAN dataset (http://marsyasweb.appspot.com/download/data_sets/) the library expects:
32 | >./data/datasets/GTZAN/blues
33 | >
34 | >./data/datasets/GTZAN/classical
35 | >
36 | > (...)
37 | >
38 | >./data/datasets/GTZAN/rock
39 | - **2)** Adapt the *load_datasets.py* function to work using your dataset. We recommend you to use first the GTZAN dataset (already implemented) to understand how it works.
40 | 
41 | - **3)** Set the *runMIRdl.py* parameters and the deep learning architecture in *buildArchitecture.py*.
42 | 
43 | - **4)** Run *runMIRdl.py*.
44 | 
45 | - **5)** *[Optional]* Visualize what the net has learned with *utils.py*.  
46 | 
47 | **Future features**
48 | - Autoencoders support.
49 | - AcousticBrainz.org support.
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/buildArchitecture.py:
--------------------------------------------------------------------------------
 1 | import lasagne
 2 | 
 3 | def buildNet(input_var, parameters):
 4 |     'Select a deep learning architecture'
 5 |     if parameters['type']=='cnn1':
 6 |         return cnn1(input_var, parameters)
 7 |     else:
 8 |         print 'Architecture NOT supported'
 9 | 
10 | def cnn1(input_var,parameters):
11 | 
12 |     # set architecture parameters
13 |     parameters['filter_size']=(12,8)
14 |     parameters['num_filters']=15
15 |     parameters['nonlinearity']=lasagne.nonlinearities.rectify
16 |     parameters['W_init']=lasagne.init.GlorotUniform()
17 |     parameters['pool_size']=(2, 1)
18 |     parameters['dropout_p']=.5
19 |     parameters['num_dense_units']=200
20 | 
21 |     # set convolutional neural network
22 |     network={}
23 |     # input layer
24 |     network["1"] = lasagne.layers.InputLayer(shape=(None, int(parameters['numChannels']), int(parameters['melBands']), int(parameters['inputFrames'])),input_var=input_var)
25 |     # convolutional layer
26 |     network["2"] = lasagne.layers.Conv2DLayer(network["1"], num_filters=parameters['num_filters'], filter_size=parameters['filter_size'],nonlinearity=parameters['nonlinearity'],W=parameters['W_init'])
27 |     # pooling layer
28 |     network["3"] = lasagne.layers.MaxPool2DLayer(network["2"], pool_size=parameters['pool_size'])
29 |     # feed-forward layer
30 |     network["4"] = lasagne.layers.DenseLayer(lasagne.layers.dropout(network["3"], p=parameters['dropout_p']),num_units=parameters['num_dense_units'],nonlinearity=parameters['nonlinearity'])
31 |     # output layer
32 |     network["5"] = lasagne.layers.DenseLayer(lasagne.layers.dropout(network["4"], p=parameters['dropout_p']),num_units=int(parameters['numOutputNeurons']),nonlinearity=lasagne.nonlinearities.softmax)
33 | 
34 |     # returning the output layer standing for the net (network['5']), each layer separately (network) and the updated parameters for tracking.
35 |     return network["5"],network,parameters


--------------------------------------------------------------------------------
/data/datasets/INFO.md:
--------------------------------------------------------------------------------
 1 | In this directory (./data/datasets/) the library expects to have the dataset divided by folders that represent the tag to be predicted. 
 2 | 
 3 | For example, for the GTZAN dataset (http://marsyasweb.appspot.com/download/data_sets/) the library expects:
 4 | 
 5 | ./data/datasets/GTZAN/blues
 6 | 
 7 | ./data/datasets/GTZAN/classical
 8 | 
 9 | ./data/datasets/GTZAN/country
10 | 
11 | ./data/datasets/GTZAN/disco
12 | 
13 | ./data/datasets/GTZAN/hiphop
14 | 
15 | ./data/datasets/GTZAN/jazz
16 | 
17 | ./data/datasets/GTZAN/metal
18 | 
19 | ./data/datasets/GTZAN/pop
20 | 
21 | ./data/datasets/GTZAN/reggae
22 | 
23 | ./data/datasets/GTZAN/rock,
24 | 
25 | where each folder contains all the songs for training, testing and validating for each class.
26 | 


--------------------------------------------------------------------------------
/data/preloaded/INFO.md:
--------------------------------------------------------------------------------
1 | This directory contains the pickle files storing the datasets in a format readable for the library.
2 | 


--------------------------------------------------------------------------------
/data/results/INFO.md:
--------------------------------------------------------------------------------
1 | This directory stores the following files: **.result** (with training and test results), **.training** (having the training evolution, readable with utils.py!), **.param** (storing all the deep learning parameters used for each concrete experiment) and **.npz** (where the best trained deep learning model is stored).


--------------------------------------------------------------------------------
/load_datasets.py:
--------------------------------------------------------------------------------
  1 | import sys, os, pickle
  2 | import numpy as np
  3 | 
  4 | from essentia.standard import *
  5 | 
  6 | def load_dataset(parameters):
  7 |     'Choose which dataset you want to use and for which task - only supports classification, by now.'
  8 |     if parameters['dataset']=='ballroom':
  9 |         if parameters['task'] == 'classification':
 10 |             return ballroom_classification(parameters)
 11 | 
 12 |     elif parameters['dataset']=='GTZAN':
 13 |         if parameters['task'] == 'classification':
 14 |             return GTZAN_classification(parameters)
 15 | 
 16 |     else:
 17 |         print('Dataset NOT supported')
 18 | 
 19 | def normalization(spect,inputNorm):
 20 |     'Normalize the input spectrograms, choose a configuration.'
 21 |     if inputNorm=='energy':
 22 |         E=sum(sum(np.power(spect, 2)))/len(spect)
 23 |         spect=spect/E
 24 | 
 25 |     elif inputNorm=='energy20Log':
 26 |         E=sum(sum(np.power(spect, 2)))/len(spect)
 27 |         spect=spect/E
 28 |         spect = 20*np.log10(spect+1)
 29 | 
 30 |     elif inputNorm=='energyLog':
 31 |         E=sum(sum(np.power(spect, 2)))/len(spect)
 32 |         spect=spect/E
 33 |         spect = np.log10(spect+1)
 34 | 
 35 |     elif inputNorm=='log':
 36 |         spect = np.log10(spect+1)
 37 | 
 38 |     elif inputNorm=='None':
 39 |         print 'No normalization!'
 40 | 
 41 |     else:
 42 |         print 'This normalization does not exist!'
 43 | 
 44 |     ##!## remove silences ?
 45 |     ##!## log10(x+1) by itself avoids nans. No need of more stuff such as: spect[spect == -np.inf]= 0 or np.finfo(float).eps
 46 |     ##!## mean 0 variance 1 ?
 47 | 
 48 |     return spect
 49 | 
 50 | def GTZAN_classification(parameters):
 51 | 
 52 |     # setting the name for storing the pre-processed and formatted GTZAN data.
 53 |     pickleFile='./data/preloaded/'
 54 |     for key, value in parameters.iteritems():
 55 |         pickleFile=pickleFile+'_'+str(value)
 56 |     pickleFile=pickleFile+'.pickle'
 57 | 
 58 |     # if it was already computed, simply load it.
 59 |     if os.path.exists(pickleFile):
 60 | 
 61 |         print "-- Loading pre-computed spectrograms.."
 62 | 
 63 |         with open(pickleFile) as f:
 64 |             numOutputNeurons, X_train, X_test, X_val, y_train, y_test, y_val = pickle.load(f)
 65 | 
 66 |     # otherwise, compute it!
 67 |     else:
 68 | 
 69 |         # define where the audio are
 70 |         dir= './data/datasets/GTZAN'
 71 | 
 72 |         # associate with a dictionary the names of the folders (tags to be learned) to an integer
 73 |         dict = {'blues':0,'classical':1,'country':2,'disco':3,'hiphop':4,'jazz':5,'metal':6,'pop':7,'reggae':8,'rock':9}
 74 |         # TODO: create dict automatically!
 75 | 
 76 |         # number of 'sliced' spectrograms
 77 |         numInputs=16000
 78 |         # if not known: numInputs=countNumInputs(dir,parameters)
 79 | 
 80 |         # format the audio for classification
 81 |         numOutputNeurons, X_train, y_train, X_val, y_val, X_test, y_test = formatAudioClassification(dir,dict,numInputs,parameters)
 82 | 
 83 |         # Saving the computed features
 84 |         with open(pickleFile, 'w') as f:
 85 |             pickle.dump([numOutputNeurons, X_train, X_test, X_val, y_train, y_test, y_val], f)
 86 | 
 87 |     return numOutputNeurons, X_train, y_train, X_val, y_val, X_test, y_test
 88 | 
 89 | def ballroom_classification(parameters):
 90 |     'Similarly defined as the GTZAN dataset'
 91 |     pickleFile='./data/preloaded/'
 92 |     for key, value in parameters.iteritems():
 93 |         pickleFile=pickleFile+'_'+str(value)
 94 |     pickleFile=pickleFile+'.pickle'
 95 | 
 96 |     if os.path.exists(pickleFile):
 97 | 
 98 |         print "-- Loading pre-computed features.."
 99 | 
100 |         with open(pickleFile) as f:
101 |             numOutputNeurons, X_train, X_test, X_val, y_train, y_test, y_val = pickle.load(f)
102 | 
103 |     else:
104 | 
105 |         dir= './data/datasets/Ballroom/BallroomData'
106 |         dict = {'ChaChaCha':0,'Samba':1,'Quickstep':2,'VienneseWaltz':3,'Tango':4,'Jive':5,'Waltz':6,'Rumba':7}
107 | 
108 |         numInputs=11629
109 |         # if not known: numInputs=countNumInputs(dir,parameters)
110 | 
111 |         numOutputNeurons, X_train, y_train, X_val, y_val, X_test, y_test = formatAudioClassification(dir,dict,numInputs,parameters)
112 | 
113 |         # Saving the objects:
114 |         with open(pickleFile, 'w') as f:
115 |             pickle.dump([numOutputNeurons, X_train, X_test, X_val, y_train, y_test, y_val], f)
116 | 
117 |     return numOutputNeurons, X_train, y_train, X_val, y_val, X_test, y_test
118 | 
119 | def formatAudioClassification(dir,dict,num_inputs,parameters):
120 |     'Compute the spectrograms and format them to be fed in the network'
121 |     # init variables
122 |     D=np.zeros(num_inputs*parameters['melBands']*parameters['inputFrames'],dtype=np.float32).reshape(num_inputs,1,parameters['melBands'],parameters['inputFrames'])
123 |     A=np.zeros(num_inputs,dtype=np.uint8)+parameters['errorCode']
124 |     count=0
125 |     # walk through the directory: compute spectrograms, normalize and annotate.
126 |     for root, dirs, files in os.walk(dir):
127 |         for annotation in dirs:
128 |             for r, ds, fs in os.walk(root+'/'+annotation):
129 |                 for f in fs:
130 |                     spect = computeMelSpectrogram(root+'/'+annotation+'/'+f,parameters['frameSize'],parameters['hopSize'],parameters['windowType'],parameters['melBands'])
131 |                     spect=normalization(spect,parameters['inputNorm'])
132 |                     for c in chunk(spect,parameters['inputFrames']):
133 |                         # sliced spectrogram
134 |                         D[count][0]=c
135 |                         # associated annotation
136 |                         A[count]=dict[annotation]
137 |                         count=count+1
138 |                     print(root+'/'+annotation+'/'+f)
139 | 
140 |     # randomize order
141 |     D,A=shuffle_in_unison_inplace(D, A)
142 | 
143 |     # split training/test/validation data
144 |     cut_train=int(np.floor(parameters['trainSplit']*D.shape[0]))
145 |     cut_test=int(np.floor((parameters['trainSplit']+parameters['testSplit'])*D.shape[0]))
146 |     X_train, X_test, X_val = D[:cut_train], D[cut_train+1:cut_test], D[cut_test+1:]
147 |     y_train, y_test, y_val = A[:cut_train], A[cut_train+1:cut_test], A[cut_test+1:]
148 | 
149 |     # number of different classes/output neurons
150 |     numOutputNeurons=len(set(A))
151 | 
152 |     return numOutputNeurons, X_train, y_train, X_val, y_val, X_test, y_test
153 | 
154 | def countNumInputs(dir,parameters):
155 |     'Counting number of instances (sliced spectrograms) resulting for a given dataset'
156 |     num_inputs=0
157 |     for root, dirs, files in os.walk(dir):
158 |         for annotation in dirs:
159 |             for r, ds, fs in os.walk(root+'/'+annotation):
160 |                 for f in fs:
161 |                     spect = computeMelSpectrogram(root+'/'+annotation+'/'+f,parameters['frameSize'],parameters['hopSize'],parameters['windowType'],parameters['melBands'])
162 |                     num_inputs=num_inputs+len(chunk(spect,80))
163 |                     print num_inputs
164 | 
165 | def computeMelSpectrogram(file,frameSize,hopSize,windowType,melBands):
166 |     'Compute MEL spectrogram using Essentia python bindings'
167 |     loader = essentia.standard.MonoLoader(filename = file)
168 |     audio = loader()
169 |     w = Windowing(type = windowType)
170 |     spectrum = Spectrum()
171 |     mel = MelBands(numberBands = melBands) # 40 bands!
172 | 
173 |     melSpec = []
174 |     for frame in FrameGenerator(audio, frameSize = frameSize, hopSize = hopSize):
175 |         melSpec.append(mel(spectrum(w(frame))))
176 |     # we need to convert the list to an essentia.array first (== numpy.array of floats)
177 |     melSpec = essentia.array(melSpec).T
178 | 
179 |     return melSpec
180 | 
181 | def chunk(l, n):
182 |     'Yield successive n-sized chunks from l.'
183 |     out=[]
184 |     for i in xrange(0, int(l.shape[1]/n)*n, n):
185 |         out.append(l[:,i:i+n])
186 | 
187 |     return out
188 | 
189 | def shuffle_in_unison_inplace(a, b):
190 |     'Shuffle in unison the data with its annotations. Randomizing the order!'
191 |     assert len(a) == len(b)
192 |     p = np.random.permutation(len(a))
193 |     return a[p], b[p]


--------------------------------------------------------------------------------
/runMIRdl.py:
--------------------------------------------------------------------------------
 1 | import MIRdl
 2 | 
 3 | # Input data parameters
 4 | parameters={}
 5 | parameters['dataset'] = 'GTZAN' # 'ballroom' or 'GTZAN'
 6 | parameters['frameSize'] = 2048
 7 | parameters['hopSize'] = 1024
 8 | parameters['numChannels'] = 1
 9 | parameters['windowType'] = 'blackmanharris62'
10 | parameters['melBands'] = 40
11 | parameters['inputFrames'] = 80
12 | parameters['errorCode'] = 999
13 | parameters['inputNorm'] = 'energy20Log' # 'energy','energy20Log','energyLog','log' or 'None'
14 | 
15 | # Deep Learning architecture
16 | parameters['type'] = 'cnn1' # only 'cnn1'
17 | parameters['task'] = 'classification' # only 'classification'
18 | 
19 | # Training parameters
20 | parameters['trainSplit'] = 0.75
21 | parameters['testSplit'] = 0.15
22 | parameters['valSplit'] = 1-parameters['trainSplit']-parameters['testSplit']
23 | parameters['num_epochs'] = 2000
24 | parameters['batchSize'] = 500
25 | parameters['lr'] = 0.02
26 | parameters['momentum'] = 0.9
27 | parameters['cost']='crossentropy' # only 'crossentropy'
28 | 
29 | MIRdl.main(parameters)
30 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import theano, lasagne, csv
 2 | import theano.tensor as T
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import pandas as pd
 6 | import buildArchitecture as buildArch
 7 | 
 8 | def visualizeWcnn1(name):
 9 |     'Visualize weights of the convolutional layer of cnn1'
10 | 
11 |     ##!!## biases not shown !
12 |     ##!!## deterministic W ?
13 | 
14 |     # load parameters
15 |     with open(name+'.param', 'rb') as paramFile:
16 |         params = csv.reader(paramFile, delimiter='-')
17 |         count=0;
18 |         for param in params:
19 |             if count==0:
20 |                 tmp1=param
21 |                 count=count+1
22 |             else:
23 |                 tmp2=param
24 |     parameters = {}
25 |     for i in range(len(tmp2)):
26 |         parameters[tmp1[i]] = tmp2[i]
27 | 
28 |     print("Building network..")
29 |     input_var = T.tensor4('inputs')
30 |     network,netLayers,parameters=buildArch.buildNet(input_var,parameters)
31 |     # load trained network
32 |     with np.load(name+'.npz') as f:
33 |         param_values = [f['arr_%d' % i] for i in range(len(f.files))]
34 |     lasagne.layers.set_all_param_values(network, param_values)
35 | 
36 |     print("Compiling functions..")
37 |     # visualize convLayers
38 |     conv_w = theano.function([],netLayers['2'].W)
39 |     weights=conv_w()
40 | 
41 |     ##!!## plot considering 20log?
42 |     ##!!## set min/max to visualize always the same?
43 | 
44 |     # plot W!
45 |     for i in range(len(weights)):
46 |         plt.subplot(1, len(weights), i+1)
47 |         plt.imshow(np.squeeze(weights[i]), cmap=plt.cm.Reds, interpolation='None', aspect='auto')
48 |     plt.colorbar()
49 |     plt.show()
50 | 
51 | def trainingEvolution(name):
52 |     'Plot the training evolution: training loss, validation loss and validation accuracy.'
53 | 
54 |     # load data
55 |     df = pd.read_csv(name+'.training')
56 |     trainingLoss = df['trainingLoss']
57 |     validationLoss = df['validationLoss']
58 |     validationAccuracy = df['validationAccuracy']
59 | 
60 |     # plot training evolution!
61 |     plt.subplot(1, 2, 1)
62 |     plt.title('Loss')
63 |     plt.plot(range(1,len(trainingLoss)+1,1),trainingLoss, color='red',linestyle='--', marker='o',label="Training Loss")
64 |     plt.hold(True)
65 |     plt.plot(range(1,len(trainingLoss)+1,1),validationLoss,color='blue',linestyle='--', marker='o',label="Validation Loss")
66 |     plt.legend()
67 | 
68 |     plt.subplot(1, 2, 2)
69 |     plt.title('Validation Accuracy (%)')
70 |     plt.plot(range(1,len(trainingLoss)+1,1),validationAccuracy,color='blue',linestyle='--', marker='o')
71 | 
72 |     plt.show()
73 | 
74 | name='./data/results/ballroom_cnn1_46378760430139206020064112940399742791'
75 | visualizeWcnn1(name)
76 | trainingEvolution(name)


--------------------------------------------------------------------------------