├── .gitignore
├── LICENSE
├── README.md
├── augment_data.py
├── choose_equal_split.py
├── concatenate_csvs.py
├── create_spectrograms.py
├── ensembling
    ├── ensemble.theano.py
    └── get_output_layers.py
├── get_score_from_probabilities.py
├── get_score_from_top3_prediction.py
├── get_sum_of_csvs.py
├── majority_vote_ensembling.py
├── make_submission.py
├── prototxt
    ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt
    ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
    ├── deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
    ├── deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
    ├── main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
    ├── solver.augm.nolrcoef.prototxt
    └── solver.main.adadelta.prototxt
├── test_augm_network.py
├── test_main_network.py
└── theano
    ├── README.md
    ├── main.py
    ├── networks
        ├── __init__.py
        ├── base_network.py
        ├── rnn.py
        ├── rnn_2layers.py
        ├── rnn_2layers_5khz.py
        ├── tc_net.py
        ├── tc_net_deeprnn_shared_pad.py
        ├── tc_net_mod.py
        ├── tc_net_mod_5khz_small.py
        ├── tc_net_rnn.py
        ├── tc_net_rnn_nodense.py
        ├── tc_net_rnn_onernn.py
        ├── tc_net_rnn_onernn_notimepool.py
        ├── tc_net_rnn_shared.py
        ├── tc_net_rnn_shared_pad.py
        └── tc_net_rnn_shared_pad_augm.py
    └── plot.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 YerevaNN
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Spoken language identification with deep learning
 2 | 
 3 | Read more in the following blog posts:
 4 | 
 5 | * [About TopCoder contest and our CNN-based solution implemented in Caffe](http://yerevann.github.io/2015/10/11/spoken-language-identification-with-deep-convolutional-networks/) (October 2015)
 6 | * [About combining CNN and RNN using Theano/Lasagne](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/) (June 2016)
 7 | 
 8 | Theano/Lasagne models are [here](/theano). The basic steps to run them are:
 9 | 
10 | * Download the dataset from [here](https://community.topcoder.com/longcontest/?module=ViewProblemStatement&rd=16555&pm=13978) or use your own dataset.
11 | * Create spectrograms for recording using `create_spectrograms.py` or `augment_data.py`. The latter will also augment the data by randomly perturbing the spectrograms and cropping a random interval of length 9s from the recording. 
12 | * Create listfiles for training set and validation set, where each row of the a listfile describes one example and has 2 values seperated by a comma. The first one is the name of the example, the second one is the label (counting starts from 0). A typical listfile will look like [this](https://gist.github.com/Harhro94/aa11fe6b454c614cdedea882fd00f8d7).
13 | * Change the `png_folder` and listfile paths in [`theano/main.py`](/theano/main.py).
14 | * Run `theano/main.py`.


--------------------------------------------------------------------------------
/augment_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from matplotlib import pyplot as plt
  3 | import scipy.io.wavfile as wav
  4 | from numpy.lib import stride_tricks
  5 | import PIL.Image as Image
  6 | import os
  7 | 
  8 | """ short time fourier transform of audio signal """
  9 | def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
 10 |     win = window(frameSize)
 11 |     hopSize = int(frameSize - np.floor(overlapFac * frameSize))
 12 |     
 13 |     # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 14 |     samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)    
 15 |     # cols for windowing
 16 |     cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
 17 |     # zeros at end (thus samples can be fully covered by frames)
 18 |     samples = np.append(samples, np.zeros(frameSize))
 19 |     
 20 |     frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
 21 |     frames *= win
 22 |     
 23 |     return np.fft.rfft(frames)    
 24 |     
 25 | """ scale frequency axis logarithmically """    
 26 | def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):
 27 |     spec = spec[:, 0:256]
 28 |     timebins, freqbins = np.shape(spec)
 29 |     scale = np.linspace(0, 1, freqbins) #** factor
 30 |     
 31 |     # http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310
 32 |     scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale))
 33 |     scale *= (freqbins-1)/max(scale)
 34 | 
 35 |     newspec = np.complex128(np.zeros([timebins, freqbins]))
 36 |     allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
 37 |     freqs = [0.0 for i in range(freqbins)]
 38 |     totw = [0.0 for i in range(freqbins)]
 39 |     for i in range(0, freqbins):
 40 |         if (i < 1 or i + 1 >= freqbins):
 41 |             newspec[:, i] += spec[:, i]
 42 |             freqs[i] += allfreqs[i]
 43 |             totw[i] += 1.0
 44 |             continue
 45 |         else:
 46 |             # scale[15] = 17.2
 47 |             w_up = scale[i] - np.floor(scale[i])
 48 |             w_down = 1 - w_up
 49 |             j = int(np.floor(scale[i]))
 50 |            
 51 |             newspec[:, j] += w_down * spec[:, i]
 52 |             freqs[j] += w_down * allfreqs[i]
 53 |             totw[j] += w_down
 54 |             
 55 |             newspec[:, j + 1] += w_up * spec[:, i]
 56 |             freqs[j + 1] += w_up * allfreqs[i]
 57 |             totw[j + 1] += w_up
 58 |     
 59 |     for i in range(len(freqs)):
 60 |         if (totw[i] > 1e-6):
 61 |             freqs[i] /= totw[i]
 62 |     
 63 |     return newspec, freqs
 64 | 
 65 | """ plot spectrogram"""
 66 | def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0):
 67 |     samplerate, samples = wav.read(audiopath)
 68 |     samples = samples[:, channel]
 69 |     s = stft(samples, binsize)
 70 | 
 71 |     sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
 72 |     sshow = sshow[2:, :]
 73 |     ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
 74 |     timebins, freqbins = np.shape(ims)
 75 |     
 76 |     ims = np.transpose(ims)
 77 |     ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval
 78 |     #print "ims.shape", ims.shape
 79 |     
 80 |     image = Image.fromarray(ims) 
 81 |     image = image.convert('L')
 82 |     image.save(name)
 83 | 
 84 | 
 85 | file = open('trainingData.csv', 'r')
 86 | for iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv)
 87 |     filepath = line.split(',')[0]
 88 |     filename = filepath[:-4]
 89 |     wavfile = 'tmp.wav'
 90 |     os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath)
 91 |     for augmentIdx in range(0, 20):
 92 |         alpha = np.random.uniform(0.9, 1.1)
 93 |         offset = np.random.randint(90)
 94 |         plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png',
 95 |                  alpha=alpha, offset=offset)
 96 | 
 97 |     os.remove(wavfile)
 98 |     print "processed %d files" % (iter + 1)
 99 |     
100 | 


--------------------------------------------------------------------------------
/choose_equal_split.py:
--------------------------------------------------------------------------------
 1 | """split data into training and validation sets"""
 2 | import csv
 3 | 
 4 | with open('trainingData.csv', 'rb') as csvfile:
 5 |     next(csvfile) #skip headers
 6 |     data = list(csv.reader(csvfile, delimiter=','))
 7 | 
 8 |     #Map every language to an ID
 9 |     langs = set([language.strip() for _,language in data])
10 |     ID = {lang: i for i,lang in enumerate(sorted(langs))}
11 | 
12 |     #Write first 306 items to training set and the rest to validation set
13 |     cnt = [0 for _ in range(len(langs))]
14 |     with open('trainEqual.csv', 'w') as train:
15 |         with open('valEqaul.csv', 'w') as val:
16 |             for line in data:
17 |                 filepath, language = map(str.strip, line)
18 |                 id_lang = ID[language]
19 | 
20 |                 if (cnt[id_lang] < 306):
21 |                     train.write(filepath[:-4] + ',' + str(id_lang) + '\n')
22 |                 else:
23 |                     val.write(filepath[:-4] + ',' + str(id_lang) + '\n')
24 |                 cnt[id_lang] += 1
25 | 


--------------------------------------------------------------------------------
/concatenate_csvs.py:
--------------------------------------------------------------------------------
 1 | """ Usage: python concatenate_csvs.py csv1path csv2path ..
 2 | """
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | n_csv = len(sys.argv) - 1
 7 | cnt = 12320
 8 | 
 9 | csv = []
10 | for index in range(1, len(sys.argv)):
11 |     csv.append(open(sys.argv[index], 'r'))
12 |     
13 | outfile = open('concatenated.csv', 'w')
14 | 
15 | for iter in range(12320):
16 |     out = []
17 |     for index in range(n_csv):
18 |         cur_out = csv[index].readline().split(',')
19 |         cur_out = [float(x) for x in cur_out]
20 |         out += cur_out
21 | 
22 |     out = [("%.6f" % x) for x in out]
23 |     outfile.write(','.join(out) + '\n')


--------------------------------------------------------------------------------
/create_spectrograms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from matplotlib import pyplot as plt
  3 | import scipy.io.wavfile as wav
  4 | from numpy.lib import stride_tricks
  5 | import PIL.Image as Image
  6 | import os
  7 | 
  8 | """ short time fourier transform of audio signal """
  9 | def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
 10 |     win = window(frameSize)
 11 |     hopSize = int(frameSize - np.floor(overlapFac * frameSize))
 12 |     
 13 |     # zeros at beginning (thus center of 1st window should be for sample nr. 0)
 14 |     samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)    
 15 |     # cols for windowing
 16 |     cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
 17 |     # zeros at end (thus samples can be fully covered by frames)
 18 |     samples = np.append(samples, np.zeros(frameSize))
 19 |     
 20 |     frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
 21 |     frames *= win
 22 |     
 23 |     return np.fft.rfft(frames)    
 24 |     
 25 | """ scale frequency axis logarithmically """    
 26 | def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):
 27 |     spec = spec[:, 0:256]
 28 |     timebins, freqbins = np.shape(spec)
 29 |     scale = np.linspace(0, 1, freqbins) #** factor
 30 |     
 31 |     # http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310
 32 |     scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale))
 33 |     scale *= (freqbins-1)/max(scale)
 34 | 
 35 |     newspec = np.complex128(np.zeros([timebins, freqbins]))
 36 |     allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
 37 |     freqs = [0.0 for i in range(freqbins)]
 38 |     totw = [0.0 for i in range(freqbins)]
 39 |     for i in range(0, freqbins):
 40 |         if (i < 1 or i + 1 >= freqbins):
 41 |             newspec[:, i] += spec[:, i]
 42 |             freqs[i] += allfreqs[i]
 43 |             totw[i] += 1.0
 44 |             continue
 45 |         else:
 46 |             # scale[15] = 17.2
 47 |             w_up = scale[i] - np.floor(scale[i])
 48 |             w_down = 1 - w_up
 49 |             j = int(np.floor(scale[i]))
 50 |            
 51 |             newspec[:, j] += w_down * spec[:, i]
 52 |             freqs[j] += w_down * allfreqs[i]
 53 |             totw[j] += w_down
 54 |             
 55 |             newspec[:, j + 1] += w_up * spec[:, i]
 56 |             freqs[j + 1] += w_up * allfreqs[i]
 57 |             totw[j + 1] += w_up
 58 |     
 59 |     for i in range(len(freqs)):
 60 |         if (totw[i] > 1e-6):
 61 |             freqs[i] /= totw[i]
 62 |     
 63 |     return newspec, freqs
 64 | 
 65 | """ plot spectrogram"""
 66 | def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0):
 67 |     samplerate, samples = wav.read(audiopath)
 68 |     samples = samples[:, channel]
 69 |     s = stft(samples, binsize)
 70 | 
 71 |     sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
 72 |     sshow = sshow[2:, :]
 73 |     ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
 74 |     timebins, freqbins = np.shape(ims)
 75 |     
 76 |     ims = np.transpose(ims)
 77 |     # ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval
 78 |     ims = ims[0:256, :] # 0-11khz, ~10s interval
 79 |     #print "ims.shape", ims.shape
 80 |     
 81 |     image = Image.fromarray(ims) 
 82 |     image = image.convert('L')
 83 |     image.save(name)
 84 | 
 85 | 
 86 | file = open('trainingData.csv', 'r')
 87 | for iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv)
 88 |     filepath = line.split(',')[0]
 89 |     filename = filepath[:-4]
 90 |     wavfile = 'tmp.wav'
 91 |     os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath)
 92 |     """
 93 |     for augmentIdx in range(0, 20):
 94 |         alpha = np.random.uniform(0.9, 1.1)
 95 |         offset = np.random.randint(90)
 96 |         plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png',
 97 |                  alpha=alpha, offset=offset)
 98 |     """
 99 |     # we create only one spectrogram for each speach sample
100 |     # we don't do vocal tract length perturbation (alpha=1.0)
101 |     # also we don't crop 9s part from the speech
102 |     plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.png', alpha=1.0)
103 |     os.remove(wavfile)
104 |     print "processed %d files" % (iter + 1)


--------------------------------------------------------------------------------
/ensembling/ensemble.theano.py:
--------------------------------------------------------------------------------
  1 | """ Usage: python ensemble.theano.py model1 [another_model]*
  2 |     
  3 | for GPU mode
  4 |     1. export PATH=$PATH:/usr/local/cuda-6.5/bin
  5 |     2. THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,nvcc.flags='-arch=sm_30' python ensemble.theano.py model1 [another_model]*
  6 | """
  7 | 
  8 | import cPickle as pickle
  9 | import sys
 10 | import caffe
 11 | import numpy as np
 12 | 
 13 | caffe.set_mode_gpu()
 14 | 
 15 | def get_score(probs, label):
 16 |     pred = sorted([(x, it) for it, x in enumerate(probs)], reverse=True)
 17 |     if (pred[0][1] == label):
 18 |         return 1000
 19 |     if (pred[1][1] == label):
 20 |         return 400
 21 |     if (pred[2][1] == label): 
 22 |         return 160
 23 |     return 0
 24 |     
 25 | def get_full_score(preds, labels):
 26 |     topCoderScore = 0.0
 27 |     for i in range(len(labels)):
 28 |         topCoderScore += get_score(preds[i], labels[i])  
 29 |     
 30 |     return topCoderScore / len(labels) * 3520
 31 | 
 32 | ####################### COLLECTING INFO ABOUT LANGS ############################
 33 | file = open('../trainingData.csv')
 34 | data = file.readlines()[1:]
 35 | langs = set()
 36 | for line in data:
 37 |     filepath, language = line.split(',')
 38 |     language = language.strip()
 39 |     langs.add(language)
 40 | langs = sorted(langs)
 41 | file.close()
 42 | 
 43 | n_models = len(sys.argv) - 1
 44 | X = np.zeros((12320, n_models * 176), dtype=np.float32)
 45 | for iter in range(n_models):
 46 |     csvpath = 'probs/val/' + sys.argv[iter + 1]
 47 |     csv = open(csvpath, 'r')
 48 |     for row_id, line in enumerate(csv.readlines()):
 49 |         mas = line.split(',')
 50 |         mas = np.array([float(x) for x in mas], dtype=np.float32)
 51 |         X[row_id, 176*iter:176*(iter+1)] = mas
 52 |     csv.close()
 53 |     
 54 | Y = []
 55 | label_file = open('../valEqual.csv')
 56 | for line in label_file.readlines():
 57 |     Y.append(int(line.split(',')[1]))
 58 | label_file.close()
 59 | 
 60 | print "X.shape =", X.shape
 61 | print "len(Y) =", len(Y)
 62 | 
 63 | for iter in range(n_models):
 64 |     print "score of model %d = %f" % (iter+1, get_full_score(X[:, 176*iter:176*(iter+1)], Y))
 65 | 
 66 | 
 67 | ######################### TRAINING ENSEMBLING MODEL ############################
 68 | import theano
 69 | import theano.tensor as T
 70 | import lasagne
 71 | import lasagne.layers as layers
 72 | 
 73 | n_train_examples = 10000
 74 | X = X.astype(theano.config.floatX)
 75 | trainX = X[:n_train_examples]
 76 | trainY = Y[:n_train_examples]
 77 | valX = X[n_train_examples:]
 78 | valY = Y[n_train_examples:]
 79 | 
 80 | input_var = T.matrix('X')
 81 | target_var = T.ivector('y')
 82 | 
 83 | from lasagne.nonlinearities import softmax, sigmoid, rectify
 84 | network = lasagne.layers.InputLayer((None, X.shape[1]), input_var)
 85 | network = lasagne.layers.DenseLayer(network, 4000, nonlinearity=rectify)
 86 | network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5), 176, nonlinearity=softmax)
 87 | 
 88 | prediction = lasagne.layers.get_output(network)
 89 | loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
 90 | loss = loss.mean() + 0 * lasagne.regularization.regularize_network_params(
 91 |         network, lasagne.regularization.l2)
 92 | 
 93 | params = lasagne.layers.get_all_params(network, trainable=True)
 94 | learning_rate = theano.shared(np.float32(0.2))
 95 | updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9)
 96 | train_fn = theano.function([input_var, target_var], loss, updates=updates)
 97 | validation_fn = theano.function([input_var, target_var], loss)
 98 | 
 99 | for epoch in range(1000):
100 |     train_loss = train_fn(trainX, trainY)
101 |     val_loss = validation_fn(valX, valY)
102 |     print "Epoch %d: train_loss = %f, val_loss = %f, lr = %f" % (epoch + 1, train_loss, val_loss, learning_rate.get_value())
103 |     if (epoch > 0 and epoch % 200 == 0):
104 |         learning_rate.set_value(np.float32(learning_rate.get_value() * 0.7))
105 |     
106 | test_prediction = lasagne.layers.get_output(network, deterministic=True)
107 | predict_fn = theano.function([input_var], test_prediction)
108 | all_predictions = predict_fn(valX)
109 | 
110 | score = 0.0
111 | for probs, label in zip(all_predictions, valY):
112 |     score += get_score(probs, label)
113 | print "Final score on ensembling validaion = %f" % score
114 | print "Expected score = %f" % (score / len(valY) * 3520)
115 | 
116 | 
117 | print "\n\n==> creating submission..."
118 | X = np.zeros((12320, n_models * 176), dtype=np.float32)
119 | for iter in range(n_models):
120 |     csvpath = 'probs/test/' + sys.argv[iter + 1]
121 |     csv = open(csvpath, 'r')
122 |     for row_id, line in enumerate(csv.readlines()):
123 |         mas = line.split(',')
124 |         mas = np.array([float(x) for x in mas], dtype=np.float32)
125 |         X[row_id, 176*iter:176*(iter+1)] = mas
126 |     csv.close()
127 | 
128 | prediction = predict_fn(X)
129 | print "prediction.shape =", prediction.shape
130 | ensembled = open('ensembled.csv', 'w')
131 | for probs in prediction:
132 |     out = [str(x) for x in probs]
133 |     ensembled.write(','.join(out) + '\n')
134 | 
135 | 
136 | """
137 | ######################### SAVING MODEL TO BE ABLE TO REPRODUCE #################
138 | print "==> Saving model..."
139 | with open("model.pickle", 'w') as save_file:
140 | 	pickle.dump(obj = {'params' : layers.get_all_param_values(network)}, file = save_file, protocol = -1)
141 | """
142 | 


--------------------------------------------------------------------------------
/ensembling/get_output_layers.py:
--------------------------------------------------------------------------------
 1 | """ Usage: python get_output_layers.py test|val
 2 | """
 3 | import sys
 4 | import caffe
 5 | import numpy as np
 6 | 
 7 | caffe.set_mode_gpu()
 8 | 
 9 | deploy = '../prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt'
10 | model = 'augm_dropout0.3_on_augm84K-lr0.01_30K_iter_75000'
11 | model_path = '../models/' + model + '.caffemodel'
12 | 
13 | """
14 | ####################### networks with no augmentation ##########################
15 | net = caffe.Classifier(deploy, model_path)
16 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
17 | transformer.set_transpose('data', (2, 0, 1))
18 | net.blobs['data'].reshape(1, 1, 256, 858)
19 | 
20 | folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/'
21 | cnt = 12320
22 | file = open('../valEqual.csv', 'r')
23 | prob_file = open('probs/val/' + model + '.csv', 'w')
24 | 
25 | for iter in range(cnt):
26 |     name = file.readline().split(',')[0]
27 |     net.blobs['data'].data[...] = transformer.preprocess('data', 
28 |             caffe.io.load_image(folder + name + '.png', color=False))
29 |     probs = net.forward()['loss'][0]
30 |     probs = [str(x) for x in probs]
31 |     prob_file.write(','.join(probs) + '\n')
32 |     
33 |     if (iter % 100 == 0):
34 |         print "processed %d images" % (iter + 1)
35 | """
36 | 
37 | ######################### networks with augmentation ###########################
38 | assert sys.argv[1] in ('test', 'val')
39 | dataset = sys.argv[1]
40 | augm_cnt = 20
41 | cnt = 12320
42 | 
43 | if (dataset == 'val'):
44 |     folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'
45 |     file = open('../valEqual.csv', 'r')
46 | else:
47 |     folder = '../test/pngaugm/'
48 |     file = open('../testingData.csv', 'r')
49 | 
50 | # sum - mean of augm_cnt versions of speech
51 | # log - mean of logs of augm_cnt versions of speech
52 | # dense - last dense layer, 1024 outputs
53 | prob_file_sum = open('probs/' + dataset + '/' + model + '.sum' + str(augm_cnt) + '.csv', 'w')
54 | prob_file_log = open('probs/' + dataset + '/' + model + '.log' + str(augm_cnt) + '.csv', 'w')
55 | dense_file = open('probs/' + dataset + '/'+ model + '.dense' + str(augm_cnt) + '.csv', 'w')
56 | 
57 | net = caffe.Classifier(deploy, model_path)
58 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
59 | transformer.set_transpose('data', (2, 0, 1))
60 | 
61 | net.blobs['data'].reshape(augm_cnt, 1, 256, 768)
62 | for iter in range(cnt):
63 |     if (dataset == 'val'):
64 |         name = file.readline().split(',')[0]
65 |     else:
66 |         name = file.readline().strip()[:-4]
67 |     X = np.zeros((augm_cnt, 1, 256, 768), dtype=np.float32)
68 |     for index in range(augm_cnt):
69 |         augm_path = folder + name + '.' + str(index) + '.png'
70 |         X[index] = transformer.preprocess('data', caffe.io.load_image(augm_path, color=False))
71 | 
72 |     net.blobs['data'].data[...] = X
73 |     out = net.forward()['loss']
74 |     probs_sum = out.mean(axis=0)
75 |     probs_log = np.log(out + 1e-7).mean(axis=0)
76 |     dense = net.blobs['ip2new'].data
77 |     
78 |     probs_sum = [str(x) for x in probs_sum]
79 |     prob_file_sum.write(','.join(probs_sum) + '\n')
80 |     
81 |     probs_log = ["%f" % x for x in probs_log]
82 |     prob_file_log.write(','.join(probs_log) + '\n')
83 |     
84 |     for index in range(augm_cnt):
85 |         tmp = [str(x) for x in dense[index]]
86 |         dense_file.write(','.join(tmp) + '\n')
87 |     
88 |     if (iter % 10 == 0):
89 |         print "processed %d images" % (iter + 1)
90 | 


--------------------------------------------------------------------------------
/get_score_from_probabilities.py:
--------------------------------------------------------------------------------
 1 | """ USAGE: python get_score_from_probabilities.py --prediction= --anwser=
 2 |     prediction file may have less lines
 3 | """
 4 | import sys
 5 | import numpy as np
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument('--prediction', type=str)
10 | parser.add_argument('--answer', type=str, default='valDataNew.csv')
11 | args = parser.parse_args()
12 | print args
13 | 
14 | 
15 | # info about classes
16 | file = open('trainingData.csv')
17 | data = file.readlines()[1:]
18 | langs = set()
19 | for line in data:
20 |     filepath, language = line.split(',')
21 |     language = language.strip()
22 |     langs.add(language)
23 | langs = sorted(langs)
24 | 
25 | 
26 | prediction_file = open(args.prediction, 'r')
27 | prediction_lines = prediction_file.readlines()
28 | answer_file = open(args.answer, 'r')
29 | answer_lines = answer_file.readlines()
30 | cnt = len(prediction_lines)
31 | top_coder_score = 0.0
32 | correct = 0
33 | 
34 | wrong_answers = open('wrong_answers.txt', 'w')
35 | 
36 | for iter in range(cnt):
37 |     st = answer_lines[iter]
38 |     (name, label) = st.split(',')
39 |     label = int(label)
40 | 
41 |     out = prediction_lines[iter].split(',')
42 |     out = [float(x) for x in out]
43 |     pred = [(x, it) for it, x in enumerate(out)]
44 |     pred = sorted(pred, reverse=True)
45 | 
46 |     if (pred[0][1] == label):
47 |         correct += 1
48 |         top_coder_score = top_coder_score + 1000
49 |     elif (pred[1][1] == label):
50 |         #correct += 1
51 |         top_coder_score = top_coder_score + 400
52 |     elif (pred[2][1] == label): 
53 |         #correct += 1
54 |         top_coder_score = top_coder_score + 160
55 | 
56 |     if (pred[0][1] != label):
57 |         print >> wrong_answers, answer_lines[iter] + prediction_lines[iter]
58 |     
59 |     if ((iter + 1) % 100 == 0):
60 |         print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt)
61 |         print >> sys.stderr, "expected score:", top_coder_score / (iter + 1) * 35200
62 | 
63 | print >> sys.stderr, "Final score: ", top_coder_score, " / ", cnt, "000"
64 | print >> sys.stderr, "expected score:", top_coder_score / cnt * 35200
65 | print >> sys.stderr, "Accuracy: ", 100.0 * correct / cnt


--------------------------------------------------------------------------------
/get_score_from_top3_prediction.py:
--------------------------------------------------------------------------------
 1 | """ USAGE: python get_score_fromcsv.py --prediction= --anwser=
 2 |    
 3 |     Prediction file may have less lines
 4 |     
 5 |     Each line of prediction file must contain at least 3 integers: labels of top3
 6 |     predictions, then it may have some additional information
 7 | """
 8 | import sys
 9 | import numpy as np
10 | import argparse
11 | 
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('--prediction', type=str)
14 | parser.add_argument('--answer', type=str, default='valDataNew.csv')
15 | args = parser.parse_args()
16 | print args
17 | 
18 | 
19 | # info about classes
20 | file = open('trainingData.csv')
21 | data = file.readlines()[1:]
22 | langs = set()
23 | for line in data:
24 |     filepath, language = line.split(',')
25 |     language = language.strip()
26 |     langs.add(language)
27 | langs = sorted(langs)
28 | 
29 | 
30 | prediction_file = open(args.prediction, 'r')
31 | prediction_lines = prediction_file.readlines()
32 | answer_file = open(args.answer, 'r')
33 | answer_lines = answer_file.readlines()
34 | cnt = len(prediction_lines)
35 | top_coder_score = 0.0
36 | correct = 0
37 | 
38 | wrong_answers = open('wrong_answers.txt', 'w')
39 | 
40 | for iter in range(cnt):
41 |     st = answer_lines[iter]
42 |     (name, label) = st.split(',')
43 |     label = int(label)
44 | 
45 |     pred = prediction_lines[iter].split(',')
46 |     pred = [int(x) for x in pred]
47 | 
48 |     if (pred[0] == label):
49 |         correct += 1
50 |         top_coder_score = top_coder_score + 1000
51 |     elif (pred[1] == label):
52 |         #correct += 1
53 |         top_coder_score = top_coder_score + 400
54 |     elif (pred[2] == label):
55 |         #correct += 1
56 |         top_coder_score = top_coder_score + 160
57 | 
58 |     if (pred[0] != label):
59 |         print >> wrong_answers, (answer_lines[iter] + str(pred[3 + pred[0]]) + ',' + str(pred[3 + pred[1]]) + ',' + 
60 |             str(pred[3 + pred[2]]) + ', votes for correct answer: ' + str(pred[3 + label])) 
61 | 
62 |     if ((iter + 1) % 100 == 0):
63 |         print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt)
64 |         print >> sys.stderr, "expected score:", top_coder_score / (iter + 1) * 35200
65 | 
66 | print >> sys.stderr, "Final score: ", top_coder_score, " / ", cnt, "000"
67 | print >> sys.stderr, "expected score:", top_coder_score / cnt * 35200
68 | print >> sys.stderr, "Accuracy: ", 100.0 * correct / cnt


--------------------------------------------------------------------------------
/get_sum_of_csvs.py:
--------------------------------------------------------------------------------
 1 | """ Usage: python get_sum_csvs.py csv1path csv2path ..
 2 | """
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | n_csv = len(sys.argv) - 1
 7 | cnt = 12320
 8 | 
 9 | csv = []
10 | for index in range(1, len(sys.argv)):
11 |     csv.append(open(sys.argv[index], 'r'))
12 |     
13 | outfile = open('summed.csv', 'w')
14 | 
15 | for iter in range(12320):
16 |     out = np.zeros((176,), dtype=np.float32)
17 |     for index in range(n_csv):
18 |         cur_out = csv[index].readline().split(',')
19 |         cur_out = [float(x) for x in cur_out]
20 |         out += cur_out
21 |     
22 |     out = [("%.6f" % x) for x in out]
23 |     outfile.write(','.join(out) + '\n')


--------------------------------------------------------------------------------
/majority_vote_ensembling.py:
--------------------------------------------------------------------------------
 1 | """ Usage: python majority_vote_ensembling.py csv1path csv2path ..
 2 | """
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | n_csv = len(sys.argv) - 1
 7 | train_cnt = 12320
 8 | 
 9 | csv = []
10 | for index in range(1, len(sys.argv)):
11 |     csv.append(open(sys.argv[index], 'r'))
12 |     
13 | ensembled = open('top3_prediction_ensembled.csv', 'w')
14 | 
15 | for iter in range(train_cnt):
16 |     cnt = [0 for i in range(176)]
17 |     avg_prob = np.array([0.0 for i in range(176)])
18 | 
19 |     for index in range(n_csv):
20 |         cur_prob = csv[index].readline().split(',')
21 |         cur_prob = np.array([float(x) for x in cur_prob])
22 |         
23 |         avg_prob += cur_prob
24 |         prediction = cur_prob.argmax()
25 |         cnt[prediction] += 1
26 | 
27 | 
28 |     mas = [(cnt[index], avg_prob[index], index) for index in range(176)]
29 |     mas = sorted(mas, reverse=True)
30 |     
31 |     ensembled.write(str(mas[0][2]) + ',' + str(mas[1][2]) + ',' + str(mas[2][2]) + ',')
32 |     ensembled.write(','.join([str(x) for x in cnt]) + '\n')
33 |     


--------------------------------------------------------------------------------
/make_submission.py:
--------------------------------------------------------------------------------
 1 | """ Usage: python make_submission.py csvpath model_name
 2 | csv - must contain 12320 rows, 176 coloumns: the predictions for test set
 3 | """
 4 | 
 5 | import sys
 6 | import numpy as np
 7 | 
 8 | # info about classes
 9 | file = open('trainingData.csv')
10 | data = file.readlines()[1:]
11 | langs = set()
12 | for line in data:
13 |     filepath, language = line.split(',')
14 |     language = language.strip()
15 |     langs.add(language)
16 | langs = sorted(langs)
17 | 
18 | path = sys.argv[1]
19 | name = sys.argv[2]
20 | read_file = open(path, 'r')
21 | f = open('testingData.csv')
22 | cnt = 12320
23 | print_file = open('predictions/test_' + name + '.csv', 'w')
24 | 
25 | for iter in range(cnt):
26 |     st = f.readline()
27 |     name = st.strip()[:-4]
28 |     
29 |     out = read_file.readline().split(',')
30 |     out = [float(x) for x in out]
31 |     pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)
32 | 
33 |     for i in range(3):
34 |         lang_id = pred[i][1]
35 |         lang = langs[lang_id]
36 |         print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n')
37 | 
38 |     if (iter % 100 == 0):
39 |         print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt)
40 | 


--------------------------------------------------------------------------------
/prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LangNet"
  2 | # DATA LAYERS
  3 | layer {
  4 |   name: "mnist"
  5 |   type: "Data"
  6 |   top: "data"
  7 |   top: "label"
  8 |   include {
  9 |     phase: TRAIN
 10 |   }
 11 |   transform_param {
 12 |     scale: 0.00390625
 13 |   }
 14 |   data_param {
 15 |     source: "train/train_augm_db"
 16 |     batch_size: 24
 17 |     backend: LEVELDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     scale: 0.00390625
 30 |   }
 31 |   data_param {
 32 |     source: "train/val_augm_db"
 33 |     batch_size: 24
 34 |     backend: LEVELDB
 35 |   }
 36 | }
 37 | 
 38 | # CONV1-RELU1-POOL1
 39 | layer {
 40 |   name: "conv1"
 41 |   type: "Convolution"
 42 |   bottom: "data"
 43 |   top: "conv1"
 44 |   param {
 45 |     lr_mult: 1
 46 |   }
 47 |   param {
 48 |     lr_mult: 2
 49 |   }
 50 |   convolution_param {
 51 |     num_output: 32
 52 |     kernel_size: 7
 53 |     stride: 1
 54 |     weight_filler {
 55 |       type: "xavier"
 56 |     }
 57 |     bias_filler {
 58 |       type: "constant"
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "relu1"
 64 |   type: "ReLU"
 65 |   bottom: "conv1"
 66 |   top: "conv1"
 67 | }
 68 | layer {
 69 |   name: "pool1"
 70 |   type: "Pooling"
 71 |   bottom: "conv1"
 72 |   top: "pool1"
 73 |   pooling_param {
 74 |     pool: MAX
 75 |     kernel_size: 3
 76 |     stride: 2
 77 |   }
 78 | }
 79 | 
 80 | # CONV2-RELU2-POOL2_
 81 | layer {
 82 |   name: "conv2"
 83 |   type: "Convolution"
 84 |   bottom: "pool1"
 85 |   top: "conv2"
 86 |   param {
 87 |     lr_mult: 1
 88 |   }
 89 |   param {
 90 |     lr_mult: 2
 91 |   }
 92 |   convolution_param {
 93 |     num_output: 64
 94 |     kernel_size: 5
 95 |     stride: 1
 96 |     weight_filler {
 97 |       type: "xavier"
 98 |     }
 99 |     bias_filler {
100 |       type: "constant"
101 |     }
102 |   }
103 | }
104 | layer {
105 |   name: "relu2"
106 |   type: "ReLU"
107 |   bottom: "conv2"
108 |   top: "conv2"
109 | }
110 | layer {
111 |   name: "pool2"
112 |   type: "Pooling"
113 |   bottom: "conv2"
114 |   top: "pool2"
115 |   pooling_param {
116 |     pool: MAX
117 |     kernel_size: 3
118 |     stride: 2
119 |   }
120 | }
121 | 
122 | # CONV3-RELU3-POOL3
123 | layer {
124 |   name: "conv3"
125 |   type: "Convolution"
126 |   bottom: "pool2"
127 |   top: "conv3"
128 |   param {
129 |     lr_mult: 1
130 |   }
131 |   param {
132 |     lr_mult: 2
133 |   }
134 |   convolution_param {
135 |     num_output: 64
136 |     kernel_size: 3
137 |     stride: 1
138 |     weight_filler {
139 |       type: "xavier"
140 |     }
141 |     bias_filler {
142 |       type: "constant"
143 |     }
144 |   }
145 | }
146 | layer {
147 |   name: "relu3"
148 |   type: "ReLU"
149 |   bottom: "conv3"
150 |   top: "conv3"
151 | }
152 | layer {
153 |   name: "pool3"
154 |   type: "Pooling"
155 |   bottom: "conv3"
156 |   top: "pool3"
157 |   pooling_param {
158 |     pool: MAX
159 |     kernel_size: 3
160 |     stride:2
161 |   }
162 | }
163 | 
164 | # CONV4-RELU4-POOL4
165 | layer {
166 |   name: "conv4"
167 |   type: "Convolution"
168 |   bottom: "pool3"
169 |   top: "conv4"
170 |   param {
171 |     lr_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |   }
176 |   convolution_param {
177 |     num_output: 128
178 |     kernel_size: 3
179 |     stride: 1
180 |     weight_filler {
181 |       type: "xavier"
182 |     }
183 |     bias_filler {
184 |       type: "constant"
185 |     }
186 |   }
187 | }
188 | layer {
189 |   name: "relu4"
190 |   type: "ReLU"
191 |   bottom: "conv4"
192 |   top: "conv4"
193 | }
194 | layer {
195 |   name: "pool4"
196 |   type: "Pooling"
197 |   bottom: "conv4"
198 |   top: "pool4"
199 |   pooling_param {
200 |     pool: MAX
201 |     kernel_size: 3
202 |     stride:2
203 |   }
204 | }
205 | 
206 | # CONV5-RELU5-POOL5
207 | layer {
208 |   name: "conv5"
209 |   type: "Convolution"
210 |   bottom: "pool4"
211 |   top: "conv5"
212 |   param {
213 |     lr_mult: 1
214 |   }
215 |   param {
216 |     lr_mult: 2
217 |   }
218 |   convolution_param {
219 |     num_output: 128
220 |     kernel_size: 3
221 |     stride: 1
222 |     weight_filler {
223 |       type: "xavier"
224 |     }
225 |     bias_filler {
226 |       type: "constant"
227 |     }
228 |   }
229 | }
230 | layer {
231 |   name: "relu5"
232 |   type: "ReLU"
233 |   bottom: "conv5"
234 |   top: "conv5"
235 | }
236 | layer {
237 |   name: "pool5"
238 |   type: "Pooling"
239 |   bottom: "conv5"
240 |   top: "pool5"
241 |   pooling_param {
242 |     pool: MAX
243 |     kernel_size: 3
244 |     stride:2
245 |   }
246 | }
247 | 
248 | # CONV6-RELU6-POOL6
249 | layer {
250 |   name: "conv6"
251 |   type: "Convolution"
252 |   bottom: "pool5"
253 |   top: "conv6"
254 |   param {
255 |     lr_mult: 1
256 |   }
257 |   param {
258 |     lr_mult: 2
259 |   }
260 |   convolution_param {
261 |     num_output: 256
262 |     kernel_size: 3
263 |     stride: 1
264 |     weight_filler {
265 |       type: "xavier"
266 |     }
267 |     bias_filler {
268 |       type: "constant"
269 |     }
270 |   }
271 | }
272 | layer {
273 |   name: "relu6"
274 |   type: "ReLU"
275 |   bottom: "conv6"
276 |   top: "conv6"
277 | }
278 | layer {
279 |   name: "pool6"
280 |   type: "Pooling"
281 |   bottom: "conv6"
282 |   top: "pool6"
283 |   pooling_param {
284 |     pool: MAX
285 |     kernel_size: 3
286 |     stride:2
287 |   }
288 | }
289 | 
290 | # IP layers
291 | layer {
292 |   name: "ip1new"
293 |   type: "InnerProduct"
294 |   bottom: "pool6"
295 |   top: "ip1new"
296 |   param {
297 |     lr_mult: 1
298 |   }
299 |   param {
300 |     lr_mult: 2
301 |   }
302 |   inner_product_param {
303 |     num_output: 1024
304 |     weight_filler {
305 |       type: "xavier"
306 |     }
307 |     bias_filler {
308 |       type: "constant"
309 |     }
310 |   }
311 | }
312 | layer {
313 |   name: "reluOnIp1"
314 |   type: "ReLU"
315 |   bottom: "ip1new"
316 |   top: "ip1new"
317 | }
318 | layer {
319 |   name: "ip2new"
320 |   type: "InnerProduct"
321 |   bottom: "ip1new"
322 |   top: "ip2new"
323 |   param {
324 |     lr_mult: 1
325 |   }
326 |   param {
327 |     lr_mult: 2
328 |   }
329 |   inner_product_param {
330 |     num_output: 1024
331 |     weight_filler {
332 |       type: "xavier"
333 |     }
334 |     bias_filler {
335 |       type: "constant"
336 |     }
337 |   }
338 | }
339 | layer {
340 |   name: "reluOnIp2"
341 |   type: "ReLU"
342 |   bottom: "ip2new"
343 |   top: "ip2new"
344 | }
345 | layer {
346 |   name: "ip3new"
347 |   type: "InnerProduct"
348 |   bottom: "ip2new"
349 |   top: "ip3new"
350 |   param {
351 |     lr_mult: 1
352 |   }
353 |   param {
354 |     lr_mult: 2
355 |   }
356 |   inner_product_param {
357 |     num_output: 176
358 |     weight_filler {
359 |       type: "xavier"
360 |     }
361 |     bias_filler {
362 |       type: "constant"
363 |     }
364 |   }
365 | }
366 | layer {
367 |   name: "accuracy"
368 |   type: "Accuracy"
369 |   bottom: "ip3new"
370 |   bottom: "label"
371 |   top: "accuracy"
372 |   include {
373 |     phase: TEST
374 |   }
375 | }
376 | layer {
377 |   name: "loss"
378 |   type: "SoftmaxWithLoss"
379 |   bottom: "ip3new"
380 |   bottom: "label"
381 |   top: "loss"
382 | }
383 | 


--------------------------------------------------------------------------------
/prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LangNet"
  2 | # DATA LAYERS
  3 | layer {
  4 |   name: "mnist"
  5 |   type: "Data"
  6 |   top: "data"
  7 |   top: "label"
  8 |   include {
  9 |     phase: TRAIN
 10 |   }
 11 |   transform_param {
 12 |     scale: 0.00390625
 13 |   }
 14 |   data_param {
 15 |     source: "train/train_augm_db"
 16 |     batch_size: 23
 17 |     backend: LEVELDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     scale: 0.00390625
 30 |   }
 31 |   data_param {
 32 |     source: "train/val_augm_db"
 33 |     batch_size: 24
 34 |     backend: LEVELDB
 35 |   }
 36 | }
 37 | 
 38 | # CONV1-RELU1-POOL1
 39 | layer {
 40 |   name: "conv1"
 41 |   type: "Convolution"
 42 |   bottom: "data"
 43 |   top: "conv1"
 44 |   param {
 45 |     lr_mult: 1
 46 |   }
 47 |   param {
 48 |     lr_mult: 2
 49 |   }
 50 |   convolution_param {
 51 |     num_output: 32
 52 |     kernel_size: 7
 53 |     stride: 1
 54 |     weight_filler {
 55 |       type: "xavier"
 56 |     }
 57 |     bias_filler {
 58 |       type: "constant"
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "relu1"
 64 |   type: "ReLU"
 65 |   bottom: "conv1"
 66 |   top: "conv1"
 67 | }
 68 | layer {
 69 |   name: "pool1"
 70 |   type: "Pooling"
 71 |   bottom: "conv1"
 72 |   top: "pool1"
 73 |   pooling_param {
 74 |     pool: MAX
 75 |     kernel_size: 3
 76 |     stride: 2
 77 |   }
 78 | }
 79 | 
 80 | # CONV2-RELU2-POOL2_
 81 | layer {
 82 |   name: "conv2"
 83 |   type: "Convolution"
 84 |   bottom: "pool1"
 85 |   top: "conv2"
 86 |   param {
 87 |     lr_mult: 1
 88 |   }
 89 |   param {
 90 |     lr_mult: 2
 91 |   }
 92 |   convolution_param {
 93 |     num_output: 64
 94 |     kernel_size: 5
 95 |     stride: 1
 96 |     weight_filler {
 97 |       type: "xavier"
 98 |     }
 99 |     bias_filler {
100 |       type: "constant"
101 |     }
102 |   }
103 | }
104 | layer {
105 |   name: "relu2"
106 |   type: "ReLU"
107 |   bottom: "conv2"
108 |   top: "conv2"
109 | }
110 | layer {
111 |   name: "pool2"
112 |   type: "Pooling"
113 |   bottom: "conv2"
114 |   top: "pool2"
115 |   pooling_param {
116 |     pool: MAX
117 |     kernel_size: 3
118 |     stride: 2
119 |   }
120 | }
121 | 
122 | # CONV3-RELU3-POOL3
123 | layer {
124 |   name: "conv3"
125 |   type: "Convolution"
126 |   bottom: "pool2"
127 |   top: "conv3"
128 |   param {
129 |     lr_mult: 1
130 |   }
131 |   param {
132 |     lr_mult: 2
133 |   }
134 |   convolution_param {
135 |     num_output: 64
136 |     kernel_size: 3
137 |     stride: 1
138 |     weight_filler {
139 |       type: "xavier"
140 |     }
141 |     bias_filler {
142 |       type: "constant"
143 |     }
144 |   }
145 | }
146 | layer {
147 |   name: "relu3"
148 |   type: "ReLU"
149 |   bottom: "conv3"
150 |   top: "conv3"
151 | }
152 | layer {
153 |   name: "pool3"
154 |   type: "Pooling"
155 |   bottom: "conv3"
156 |   top: "pool3"
157 |   pooling_param {
158 |     pool: MAX
159 |     kernel_size: 3
160 |     stride:2
161 |   }
162 | }
163 | 
164 | # CONV4-RELU4-POOL4
165 | layer {
166 |   name: "conv4"
167 |   type: "Convolution"
168 |   bottom: "pool3"
169 |   top: "conv4"
170 |   param {
171 |     lr_mult: 1
172 |   }
173 |   param {
174 |     lr_mult: 2
175 |   }
176 |   convolution_param {
177 |     num_output: 128
178 |     kernel_size: 3
179 |     stride: 1
180 |     weight_filler {
181 |       type: "xavier"
182 |     }
183 |     bias_filler {
184 |       type: "constant"
185 |     }
186 |   }
187 | }
188 | layer {
189 |   name: "relu4"
190 |   type: "ReLU"
191 |   bottom: "conv4"
192 |   top: "conv4"
193 | }
194 | layer {
195 |   name: "pool4"
196 |   type: "Pooling"
197 |   bottom: "conv4"
198 |   top: "pool4"
199 |   pooling_param {
200 |     pool: MAX
201 |     kernel_size: 3
202 |     stride:2
203 |   }
204 | }
205 | 
206 | # CONV5-RELU5-POOL5
207 | layer {
208 |   name: "conv5"
209 |   type: "Convolution"
210 |   bottom: "pool4"
211 |   top: "conv5"
212 |   param {
213 |     lr_mult: 1
214 |   }
215 |   param {
216 |     lr_mult: 2
217 |   }
218 |   convolution_param {
219 |     num_output: 128
220 |     kernel_size: 3
221 |     stride: 1
222 |     weight_filler {
223 |       type: "xavier"
224 |     }
225 |     bias_filler {
226 |       type: "constant"
227 |     }
228 |   }
229 | }
230 | layer {
231 |   name: "relu5"
232 |   type: "ReLU"
233 |   bottom: "conv5"
234 |   top: "conv5"
235 | }
236 | layer {
237 |   name: "pool5"
238 |   type: "Pooling"
239 |   bottom: "conv5"
240 |   top: "pool5"
241 |   pooling_param {
242 |     pool: MAX
243 |     kernel_size: 3
244 |     stride:2
245 |   }
246 | }
247 | 
248 | # CONV6-RELU6-POOL6
249 | layer {
250 |   name: "conv6"
251 |   type: "Convolution"
252 |   bottom: "pool5"
253 |   top: "conv6"
254 |   param {
255 |     lr_mult: 1
256 |   }
257 |   param {
258 |     lr_mult: 2
259 |   }
260 |   convolution_param {
261 |     num_output: 256
262 |     kernel_size: 3
263 |     stride: 1
264 |     weight_filler {
265 |       type: "xavier"
266 |     }
267 |     bias_filler {
268 |       type: "constant"
269 |     }
270 |   }
271 | }
272 | layer {
273 |   name: "relu6"
274 |   type: "ReLU"
275 |   bottom: "conv6"
276 |   top: "conv6"
277 | }
278 | layer {
279 |   name: "pool6"
280 |   type: "Pooling"
281 |   bottom: "conv6"
282 |   top: "pool6"
283 |   pooling_param {
284 |     pool: MAX
285 |     kernel_size: 3
286 |     stride:2
287 |   }
288 | }
289 | 
290 | # IP layers
291 | layer {
292 |   name: "ip1new"
293 |   type: "InnerProduct"
294 |   bottom: "pool6"
295 |   top: "ip1new"
296 |   param {
297 |     lr_mult: 1
298 |   }
299 |   param {
300 |     lr_mult: 2
301 |   }
302 |   inner_product_param {
303 |     num_output: 1024
304 |     weight_filler {
305 |       type: "xavier"
306 |     }
307 |     bias_filler {
308 |       type: "constant"
309 |     }
310 |   }
311 | }
312 | layer {
313 |   name: "reluOnIp1"
314 |   type: "ReLU"
315 |   bottom: "ip1new"
316 |   top: "ip1new"
317 | }
318 | layer {
319 |   name: "dropOnIp1"
320 |   type: "Dropout"
321 |   dropout_param {
322 |     dropout_ratio: 0.3
323 |   }
324 |   bottom: "ip1new"
325 |   top: "ip1new"
326 | }
327 | layer {
328 |   name: "ip2new"
329 |   type: "InnerProduct"
330 |   bottom: "ip1new"
331 |   top: "ip2new"
332 |   param {
333 |     lr_mult: 1
334 |   }
335 |   param {
336 |     lr_mult: 2
337 |   }
338 |   inner_product_param {
339 |     num_output: 1024
340 |     weight_filler {
341 |       type: "xavier"
342 |     }
343 |     bias_filler {
344 |       type: "constant"
345 |     }
346 |   }
347 | }
348 | layer {
349 |   name: "reluOnIp2"
350 |   type: "ReLU"
351 |   bottom: "ip2new"
352 |   top: "ip2new"
353 | }
354 | layer {
355 |   name: "dropOnIp2"
356 |   type: "Dropout"
357 |   dropout_param {
358 |     dropout_ratio: 0.3
359 |   }
360 |   bottom: "ip2new"
361 |   top: "ip2new"
362 | }
363 | layer {
364 |   name: "ip3new"
365 |   type: "InnerProduct"
366 |   bottom: "ip2new"
367 |   top: "ip3new"
368 |   param {
369 |     lr_mult: 1
370 |   }
371 |   param {
372 |     lr_mult: 2
373 |   }
374 |   inner_product_param {
375 |     num_output: 176
376 |     weight_filler {
377 |       type: "xavier"
378 |     }
379 |     bias_filler {
380 |       type: "constant"
381 |     }
382 |   }
383 | }
384 | layer {
385 |   name: "accuracy"
386 |   type: "Accuracy"
387 |   bottom: "ip3new"
388 |   bottom: "label"
389 |   top: "accuracy"
390 |   include {
391 |     phase: TEST
392 |   }
393 | }
394 | layer {
395 |   name: "loss"
396 |   type: "SoftmaxWithLoss"
397 |   bottom: "ip3new"
398 |   bottom: "label"
399 |   top: "loss"
400 | }
401 | 


--------------------------------------------------------------------------------
/prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LangNet"
  2 | # DATA LAYERS
  3 | input: "data"
  4 | input_dim: 1
  5 | input_dim: 1
  6 | input_dim: 256
  7 | input_dim: 768
  8 | 
  9 | # CONV1-RELU1-POOL1
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   param {
 16 |     lr_mult: 1
 17 |   }
 18 |   param {
 19 |     lr_mult: 2
 20 |   }
 21 |   convolution_param {
 22 |     num_output: 32
 23 |     kernel_size: 7
 24 |     stride: 1
 25 |     weight_filler {
 26 |       type: "xavier"
 27 |     }
 28 |     bias_filler {
 29 |       type: "constant"
 30 |     }
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "pool1"
 41 |   type: "Pooling"
 42 |   bottom: "conv1"
 43 |   top: "pool1"
 44 |   pooling_param {
 45 |     pool: MAX
 46 |     kernel_size: 3
 47 |     stride: 2
 48 |   }
 49 | }
 50 | 
 51 | # CONV2-RELU2-POOL2_
 52 | layer {
 53 |   name: "conv2"
 54 |   type: "Convolution"
 55 |   bottom: "pool1"
 56 |   top: "conv2"
 57 |   param {
 58 |     lr_mult: 1
 59 |   }
 60 |   param {
 61 |     lr_mult: 2
 62 |   }
 63 |   convolution_param {
 64 |     num_output: 64
 65 |     kernel_size: 5
 66 |     stride: 1
 67 |     weight_filler {
 68 |       type: "xavier"
 69 |     }
 70 |     bias_filler {
 71 |       type: "constant"
 72 |     }
 73 |   }
 74 | }
 75 | layer {
 76 |   name: "relu2"
 77 |   type: "ReLU"
 78 |   bottom: "conv2"
 79 |   top: "conv2"
 80 | }
 81 | layer {
 82 |   name: "pool2"
 83 |   type: "Pooling"
 84 |   bottom: "conv2"
 85 |   top: "pool2"
 86 |   pooling_param {
 87 |     pool: MAX
 88 |     kernel_size: 3
 89 |     stride: 2
 90 |   }
 91 | }
 92 | 
 93 | # CONV3-RELU3-POOL3
 94 | layer {
 95 |   name: "conv3"
 96 |   type: "Convolution"
 97 |   bottom: "pool2"
 98 |   top: "conv3"
 99 |   param {
100 |     lr_mult: 1
101 |   }
102 |   param {
103 |     lr_mult: 2
104 |   }
105 |   convolution_param {
106 |     num_output: 64
107 |     kernel_size: 3
108 |     stride: 1
109 |     weight_filler {
110 |       type: "xavier"
111 |     }
112 |     bias_filler {
113 |       type: "constant"
114 |     }
115 |   }
116 | }
117 | layer {
118 |   name: "relu3"
119 |   type: "ReLU"
120 |   bottom: "conv3"
121 |   top: "conv3"
122 | }
123 | layer {
124 |   name: "pool3"
125 |   type: "Pooling"
126 |   bottom: "conv3"
127 |   top: "pool3"
128 |   pooling_param {
129 |     pool: MAX
130 |     kernel_size: 3
131 |     stride:2
132 |   }
133 | }
134 | 
135 | # CONV4-RELU4-POOL4
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "pool3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 1
143 |   }
144 |   param {
145 |     lr_mult: 2
146 |   }
147 |   convolution_param {
148 |     num_output: 128
149 |     kernel_size: 3
150 |     stride: 1
151 |     weight_filler {
152 |       type: "xavier"
153 |     }
154 |     bias_filler {
155 |       type: "constant"
156 |     }
157 |   }
158 | }
159 | layer {
160 |   name: "relu4"
161 |   type: "ReLU"
162 |   bottom: "conv4"
163 |   top: "conv4"
164 | }
165 | layer {
166 |   name: "pool4"
167 |   type: "Pooling"
168 |   bottom: "conv4"
169 |   top: "pool4"
170 |   pooling_param {
171 |     pool: MAX
172 |     kernel_size: 3
173 |     stride:2
174 |   }
175 | }
176 | 
177 | # CONV5-RELU5-POOL5
178 | layer {
179 |   name: "conv5"
180 |   type: "Convolution"
181 |   bottom: "pool4"
182 |   top: "conv5"
183 |   param {
184 |     lr_mult: 1
185 |   }
186 |   param {
187 |     lr_mult: 2
188 |   }
189 |   convolution_param {
190 |     num_output: 128
191 |     kernel_size: 3
192 |     stride: 1
193 |     weight_filler {
194 |       type: "xavier"
195 |     }
196 |     bias_filler {
197 |       type: "constant"
198 |     }
199 |   }
200 | }
201 | layer {
202 |   name: "relu5"
203 |   type: "ReLU"
204 |   bottom: "conv5"
205 |   top: "conv5"
206 | }
207 | layer {
208 |   name: "pool5"
209 |   type: "Pooling"
210 |   bottom: "conv5"
211 |   top: "pool5"
212 |   pooling_param {
213 |     pool: MAX
214 |     kernel_size: 3
215 |     stride:2
216 |   }
217 | }
218 | 
219 | # CONV6-RELU6-POOL6
220 | layer {
221 |   name: "conv6"
222 |   type: "Convolution"
223 |   bottom: "pool5"
224 |   top: "conv6"
225 |   param {
226 |     lr_mult: 1
227 |   }
228 |   param {
229 |     lr_mult: 2
230 |   }
231 |   convolution_param {
232 |     num_output: 256
233 |     kernel_size: 3
234 |     stride: 1
235 |     weight_filler {
236 |       type: "xavier"
237 |     }
238 |     bias_filler {
239 |       type: "constant"
240 |     }
241 |   }
242 | }
243 | layer {
244 |   name: "relu6"
245 |   type: "ReLU"
246 |   bottom: "conv6"
247 |   top: "conv6"
248 | }
249 | layer {
250 |   name: "pool6"
251 |   type: "Pooling"
252 |   bottom: "conv6"
253 |   top: "pool6"
254 |   pooling_param {
255 |     pool: MAX
256 |     kernel_size: 3
257 |     stride:2
258 |   }
259 | }
260 | 
261 | # IP layers
262 | layer {
263 |   name: "ip1new"
264 |   type: "InnerProduct"
265 |   bottom: "pool6"
266 |   top: "ip1new"
267 |   param {
268 |     lr_mult: 1
269 |   }
270 |   param {
271 |     lr_mult: 2
272 |   }
273 |   inner_product_param {
274 |     num_output: 1024
275 |     weight_filler {
276 |       type: "xavier"
277 |     }
278 |     bias_filler {
279 |       type: "constant"
280 |     }
281 |   }
282 | }
283 | layer {
284 |   name: "reluOnIp1"
285 |   type: "ReLU"
286 |   bottom: "ip1new"
287 |   top: "ip1new"
288 | }
289 | layer {
290 |   name: "dropOnIp1"
291 |   type: "Dropout"
292 |   dropout_param {
293 |     dropout_ratio: 0.3
294 |   }
295 |   bottom: "ip1new"
296 |   top: "ip1new"
297 | }
298 | layer {
299 |   name: "ip2new"
300 |   type: "InnerProduct"
301 |   bottom: "ip1new"
302 |   top: "ip2new"
303 |   param {
304 |     lr_mult: 1
305 |   }
306 |   param {
307 |     lr_mult: 2
308 |   }
309 |   inner_product_param {
310 |     num_output: 1024
311 |     weight_filler {
312 |       type: "xavier"
313 |     }
314 |     bias_filler {
315 |       type: "constant"
316 |     }
317 |   }
318 | }
319 | layer {
320 |   name: "reluOnIp2"
321 |   type: "ReLU"
322 |   bottom: "ip2new"
323 |   top: "ip2new"
324 | }
325 | layer {
326 |   name: "dropOnIp2"
327 |   type: "Dropout"
328 |   dropout_param {
329 |     dropout_ratio: 0.3
330 |   }
331 |   bottom: "ip2new"
332 |   top: "ip2new"
333 | }
334 | layer {
335 |   name: "ip3new"
336 |   type: "InnerProduct"
337 |   bottom: "ip2new"
338 |   top: "ip3new"
339 |   param {
340 |     lr_mult: 1
341 |   }
342 |   param {
343 |     lr_mult: 2
344 |   }
345 |   inner_product_param {
346 |     num_output: 176
347 |     weight_filler {
348 |       type: "xavier"
349 |     }
350 |     bias_filler {
351 |       type: "constant"
352 |     }
353 |   }
354 | }
355 | layer {
356 |   name: "loss"
357 |   type: "Softmax"
358 |   bottom: "ip3new"
359 |   top: "loss"
360 | }
361 | 


--------------------------------------------------------------------------------
/prototxt/deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LangNet"
  2 | # DATA LAYERS
  3 | input: "data"
  4 | input_dim: 1
  5 | input_dim: 1
  6 | input_dim: 256
  7 | input_dim: 858
  8 | 
  9 | # CONV1-RELU1-POOL1
 10 | layer {
 11 |   name: "conv1"
 12 |   type: "Convolution"
 13 |   bottom: "data"
 14 |   top: "conv1"
 15 |   param {
 16 |     lr_mult: 15
 17 |   }
 18 |   param {
 19 |     lr_mult: 30
 20 |   }
 21 |   convolution_param {
 22 |     num_output: 32
 23 |     kernel_size: 7
 24 |     stride: 1
 25 |     weight_filler {
 26 |       type: "xavier"
 27 |     }
 28 |     bias_filler {
 29 |       type: "constant"
 30 |     }
 31 |   }
 32 | }
 33 | layer {
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 |   bottom: "conv1"
 37 |   top: "conv1"
 38 | }
 39 | layer {
 40 |   name: "pool1"
 41 |   type: "Pooling"
 42 |   bottom: "conv1"
 43 |   top: "pool1"
 44 |   pooling_param {
 45 |     pool: MAX
 46 |     kernel_size: 3
 47 |     stride: 2
 48 |   }
 49 | }
 50 | 
 51 | # CONV2-RELU2-POOL2_
 52 | layer {
 53 |   name: "conv2"
 54 |   type: "Convolution"
 55 |   bottom: "pool1"
 56 |   top: "conv2"
 57 |   param {
 58 |     lr_mult: 12
 59 |   }
 60 |   param {
 61 |     lr_mult: 24
 62 |   }
 63 |   convolution_param {
 64 |     num_output: 64
 65 |     kernel_size: 5
 66 |     stride: 1
 67 |     weight_filler {
 68 |       type: "xavier"
 69 |     }
 70 |     bias_filler {
 71 |       type: "constant"
 72 |     }
 73 |   }
 74 | }
 75 | layer {
 76 |   name: "relu2"
 77 |   type: "ReLU"
 78 |   bottom: "conv2"
 79 |   top: "conv2"
 80 | }
 81 | layer {
 82 |   name: "pool2"
 83 |   type: "Pooling"
 84 |   bottom: "conv2"
 85 |   top: "pool2"
 86 |   pooling_param {
 87 |     pool: MAX
 88 |     kernel_size: 3
 89 |     stride: 2
 90 |   }
 91 | }
 92 | 
 93 | # CONV3-RELU3-POOL3
 94 | layer {
 95 |   name: "conv3"
 96 |   type: "Convolution"
 97 |   bottom: "pool2"
 98 |   top: "conv3"
 99 |   param {
100 |     lr_mult: 9
101 |   }
102 |   param {
103 |     lr_mult: 18
104 |   }
105 |   convolution_param {
106 |     num_output: 64
107 |     kernel_size: 3
108 |     stride: 1
109 |     weight_filler {
110 |       type: "xavier"
111 |     }
112 |     bias_filler {
113 |       type: "constant"
114 |     }
115 |   }
116 | }
117 | layer {
118 |   name: "relu3"
119 |   type: "ReLU"
120 |   bottom: "conv3"
121 |   top: "conv3"
122 | }
123 | layer {
124 |   name: "pool3"
125 |   type: "Pooling"
126 |   bottom: "conv3"
127 |   top: "pool3"
128 |   pooling_param {
129 |     pool: MAX
130 |     kernel_size: 3
131 |     stride:2
132 |   }
133 | }
134 | 
135 | # CONV4-RELU4-POOL4
136 | layer {
137 |   name: "conv4"
138 |   type: "Convolution"
139 |   bottom: "pool3"
140 |   top: "conv4"
141 |   param {
142 |     lr_mult: 4
143 |   }
144 |   param {
145 |     lr_mult: 8
146 |   }
147 |   convolution_param {
148 |     num_output: 128
149 |     kernel_size: 3
150 |     stride: 1
151 |     weight_filler {
152 |       type: "xavier"
153 |     }
154 |     bias_filler {
155 |       type: "constant"
156 |     }
157 |   }
158 | }
159 | layer {
160 |   name: "relu4"
161 |   type: "ReLU"
162 |   bottom: "conv4"
163 |   top: "conv4"
164 | }
165 | layer {
166 |   name: "pool4"
167 |   type: "Pooling"
168 |   bottom: "conv4"
169 |   top: "pool4"
170 |   pooling_param {
171 |     pool: MAX
172 |     kernel_size: 3
173 |     stride:2
174 |   }
175 | }
176 | 
177 | # CONV5-RELU5-POOL5
178 | layer {
179 |   name: "conv5"
180 |   type: "Convolution"
181 |   bottom: "pool4"
182 |   top: "conv5"
183 |   param {
184 |     lr_mult: 2
185 |   }
186 |   param {
187 |     lr_mult: 4
188 |   }
189 |   convolution_param {
190 |     num_output: 128
191 |     kernel_size: 3
192 |     stride: 1
193 |     weight_filler {
194 |       type: "xavier"
195 |     }
196 |     bias_filler {
197 |       type: "constant"
198 |     }
199 |   }
200 | }
201 | layer {
202 |   name: "relu5"
203 |   type: "ReLU"
204 |   bottom: "conv5"
205 |   top: "conv5"
206 | }
207 | layer {
208 |   name: "pool5"
209 |   type: "Pooling"
210 |   bottom: "conv5"
211 |   top: "pool5"
212 |   pooling_param {
213 |     pool: MAX
214 |     kernel_size: 3
215 |     stride:2
216 |   }
217 | }
218 | 
219 | # CONV6-RELU6-POOL6
220 | layer {
221 |   name: "conv6"
222 |   type: "Convolution"
223 |   bottom: "pool5"
224 |   top: "conv6"
225 |   param {
226 |     lr_mult: 1
227 |   }
228 |   param {
229 |     lr_mult: 2
230 |   }
231 |   convolution_param {
232 |     num_output: 256
233 |     kernel_size: 3
234 |     stride: 1
235 |     weight_filler {
236 |       type: "xavier"
237 |     }
238 |     bias_filler {
239 |       type: "constant"
240 |     }
241 |   }
242 | }
243 | layer {
244 |   name: "relu6"
245 |   type: "ReLU"
246 |   bottom: "conv6"
247 |   top: "conv6"
248 | }
249 | layer {
250 |   name: "pool6"
251 |   type: "Pooling"
252 |   bottom: "conv6"
253 |   top: "pool6"
254 |   pooling_param {
255 |     pool: MAX
256 |     kernel_size: 3
257 |     stride:2
258 |   }
259 | }
260 | 
261 | # IP layers
262 | layer {
263 |   name: "ip1"
264 |   type: "InnerProduct"
265 |   bottom: "pool6"
266 |   top: "ip1"
267 |   param {
268 |     lr_mult: 1
269 |   }
270 |   param {
271 |     lr_mult: 2
272 |   }
273 |   inner_product_param {
274 |     num_output: 1024
275 |     weight_filler {
276 |       type: "xavier"
277 |     }
278 |     bias_filler {
279 |       type: "constant"
280 |     }
281 |   }
282 | }
283 | layer {
284 |   name: "reluOnIp1"
285 |   type: "ReLU"
286 |   bottom: "ip1"
287 |   top: "ip1"
288 | }
289 | layer {
290 |   name: "dropOnIp1"
291 |   type: "Dropout"
292 |   dropout_param {
293 |     dropout_ratio: 0.5
294 |   }
295 |   bottom: "ip1"
296 |   top: "ip1"
297 | }
298 | layer {
299 |   name: "ip2"
300 |   type: "InnerProduct"
301 |   bottom: "ip1"
302 |   top: "ip2"
303 |   param {
304 |     lr_mult: 1
305 |   }
306 |   param {
307 |     lr_mult: 2
308 |   }
309 |   inner_product_param {
310 |     num_output: 1024
311 |     weight_filler {
312 |       type: "xavier"
313 |     }
314 |     bias_filler {
315 |       type: "constant"
316 |     }
317 |   }
318 | }
319 | layer {
320 |   name: "reluOnIp2"
321 |   type: "ReLU"
322 |   bottom: "ip2"
323 |   top: "ip2"
324 | }
325 | layer {
326 |   name: "dropOnIp2"
327 |   type: "Dropout"
328 |   dropout_param {
329 |     dropout_ratio: 0.5
330 |   }
331 |   bottom: "ip2"
332 |   top: "ip2"
333 | }
334 | layer {
335 |   name: "ip3"
336 |   type: "InnerProduct"
337 |   bottom: "ip2"
338 |   top: "ip3"
339 |   param {
340 |     lr_mult: 1
341 |   }
342 |   param {
343 |     lr_mult: 2
344 |   }
345 |   inner_product_param {
346 |     num_output: 176
347 |     weight_filler {
348 |       type: "xavier"
349 |     }
350 |     bias_filler {
351 |       type: "constant"
352 |     }
353 |   }
354 | }
355 | layer {
356 |   name: "loss"
357 |   type: "Softmax"
358 |   bottom: "ip3"
359 |   top: "loss"
360 | }
361 | 


--------------------------------------------------------------------------------
/prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt:
--------------------------------------------------------------------------------
  1 | name: "LangNet"
  2 | # DATA LAYERS
  3 | layer {
  4 |   name: "mnist"
  5 |   type: "Data"
  6 |   top: "data"
  7 |   top: "label"
  8 |   include {
  9 |     phase: TRAIN
 10 |   }
 11 |   transform_param {
 12 |     scale: 0.00390625
 13 |   }
 14 |   data_param {
 15 |     source: "train/traindb"
 16 |     batch_size: 32
 17 |     backend: LEVELDB
 18 |   }
 19 | }
 20 | layer {
 21 |   name: "mnist"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   include {
 26 |     phase: TEST
 27 |   }
 28 |   transform_param {
 29 |     scale: 0.00390625
 30 |   }
 31 |   data_param {
 32 |     source: "train/valdb"
 33 |     batch_size: 1
 34 |     backend: LEVELDB
 35 |   }
 36 | }
 37 | 
 38 | # CONV1-RELU1-POOL1
 39 | layer {
 40 |   name: "conv1"
 41 |   type: "Convolution"
 42 |   bottom: "data"
 43 |   top: "conv1"
 44 |   param {
 45 |     lr_mult: 15
 46 |   }
 47 |   param {
 48 |     lr_mult: 30
 49 |   }
 50 |   convolution_param {
 51 |     num_output: 32
 52 |     kernel_size: 7
 53 |     stride: 1
 54 |     weight_filler {
 55 |       type: "xavier"
 56 |     }
 57 |     bias_filler {
 58 |       type: "constant"
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "relu1"
 64 |   type: "ReLU"
 65 |   bottom: "conv1"
 66 |   top: "conv1"
 67 | }
 68 | layer {
 69 |   name: "pool1"
 70 |   type: "Pooling"
 71 |   bottom: "conv1"
 72 |   top: "pool1"
 73 |   pooling_param {
 74 |     pool: MAX
 75 |     kernel_size: 3
 76 |     stride: 2
 77 |   }
 78 | }
 79 | 
 80 | # CONV2-RELU2-POOL2_
 81 | layer {
 82 |   name: "conv2"
 83 |   type: "Convolution"
 84 |   bottom: "pool1"
 85 |   top: "conv2"
 86 |   param {
 87 |     lr_mult: 12
 88 |   }
 89 |   param {
 90 |     lr_mult: 24
 91 |   }
 92 |   convolution_param {
 93 |     num_output: 64
 94 |     kernel_size: 5
 95 |     stride: 1
 96 |     weight_filler {
 97 |       type: "xavier"
 98 |     }
 99 |     bias_filler {
100 |       type: "constant"
101 |     }
102 |   }
103 | }
104 | layer {
105 |   name: "relu2"
106 |   type: "ReLU"
107 |   bottom: "conv2"
108 |   top: "conv2"
109 | }
110 | layer {
111 |   name: "pool2"
112 |   type: "Pooling"
113 |   bottom: "conv2"
114 |   top: "pool2"
115 |   pooling_param {
116 |     pool: MAX
117 |     kernel_size: 3
118 |     stride: 2
119 |   }
120 | }
121 | 
122 | # CONV3-RELU3-POOL3
123 | layer {
124 |   name: "conv3"
125 |   type: "Convolution"
126 |   bottom: "pool2"
127 |   top: "conv3"
128 |   param {
129 |     lr_mult: 9
130 |   }
131 |   param {
132 |     lr_mult: 18
133 |   }
134 |   convolution_param {
135 |     num_output: 64
136 |     kernel_size: 3
137 |     stride: 1
138 |     weight_filler {
139 |       type: "xavier"
140 |     }
141 |     bias_filler {
142 |       type: "constant"
143 |     }
144 |   }
145 | }
146 | layer {
147 |   name: "relu3"
148 |   type: "ReLU"
149 |   bottom: "conv3"
150 |   top: "conv3"
151 | }
152 | layer {
153 |   name: "pool3"
154 |   type: "Pooling"
155 |   bottom: "conv3"
156 |   top: "pool3"
157 |   pooling_param {
158 |     pool: MAX
159 |     kernel_size: 3
160 |     stride:2
161 |   }
162 | }
163 | 
164 | # CONV4-RELU4-POOL4
165 | layer {
166 |   name: "conv4"
167 |   type: "Convolution"
168 |   bottom: "pool3"
169 |   top: "conv4"
170 |   param {
171 |     lr_mult: 4
172 |   }
173 |   param {
174 |     lr_mult: 8
175 |   }
176 |   convolution_param {
177 |     num_output: 128
178 |     kernel_size: 3
179 |     stride: 1
180 |     weight_filler {
181 |       type: "xavier"
182 |     }
183 |     bias_filler {
184 |       type: "constant"
185 |     }
186 |   }
187 | }
188 | layer {
189 |   name: "relu4"
190 |   type: "ReLU"
191 |   bottom: "conv4"
192 |   top: "conv4"
193 | }
194 | layer {
195 |   name: "pool4"
196 |   type: "Pooling"
197 |   bottom: "conv4"
198 |   top: "pool4"
199 |   pooling_param {
200 |     pool: MAX
201 |     kernel_size: 3
202 |     stride:2
203 |   }
204 | }
205 | 
206 | # CONV5-RELU5-POOL5
207 | layer {
208 |   name: "conv5"
209 |   type: "Convolution"
210 |   bottom: "pool4"
211 |   top: "conv5"
212 |   param {
213 |     lr_mult: 2
214 |   }
215 |   param {
216 |     lr_mult: 4
217 |   }
218 |   convolution_param {
219 |     num_output: 128
220 |     kernel_size: 3
221 |     stride: 1
222 |     weight_filler {
223 |       type: "xavier"
224 |     }
225 |     bias_filler {
226 |       type: "constant"
227 |     }
228 |   }
229 | }
230 | layer {
231 |   name: "relu5"
232 |   type: "ReLU"
233 |   bottom: "conv5"
234 |   top: "conv5"
235 | }
236 | layer {
237 |   name: "pool5"
238 |   type: "Pooling"
239 |   bottom: "conv5"
240 |   top: "pool5"
241 |   pooling_param {
242 |     pool: MAX
243 |     kernel_size: 3
244 |     stride:2
245 |   }
246 | }
247 | 
248 | # CONV6-RELU6-POOL6
249 | layer {
250 |   name: "conv6"
251 |   type: "Convolution"
252 |   bottom: "pool5"
253 |   top: "conv6"
254 |   param {
255 |     lr_mult: 1
256 |   }
257 |   param {
258 |     lr_mult: 2
259 |   }
260 |   convolution_param {
261 |     num_output: 256
262 |     kernel_size: 3
263 |     stride: 1
264 |     weight_filler {
265 |       type: "xavier"
266 |     }
267 |     bias_filler {
268 |       type: "constant"
269 |     }
270 |   }
271 | }
272 | layer {
273 |   name: "relu6"
274 |   type: "ReLU"
275 |   bottom: "conv6"
276 |   top: "conv6"
277 | }
278 | layer {
279 |   name: "pool6"
280 |   type: "Pooling"
281 |   bottom: "conv6"
282 |   top: "pool6"
283 |   pooling_param {
284 |     pool: MAX
285 |     kernel_size: 3
286 |     stride:2
287 |   }
288 | }
289 | 
290 | # IP layers
291 | layer {
292 |   name: "ip1"
293 |   type: "InnerProduct"
294 |   bottom: "pool6"
295 |   top: "ip1"
296 |   param {
297 |     lr_mult: 1
298 |   }
299 |   param {
300 |     lr_mult: 2
301 |   }
302 |   inner_product_param {
303 |     num_output: 1024
304 |     weight_filler {
305 |       type: "xavier"
306 |     }
307 |     bias_filler {
308 |       type: "constant"
309 |     }
310 |   }
311 | }
312 | layer {
313 |   name: "reluOnIp1"
314 |   type: "ReLU"
315 |   bottom: "ip1"
316 |   top: "ip1"
317 | }
318 | layer {
319 |   name: "dropOnIp1"
320 |   type: "Dropout"
321 |   dropout_param {
322 |     dropout_ratio: 0.5
323 |   }
324 |   bottom: "ip1"
325 |   top: "ip1"
326 | }
327 | layer {
328 |   name: "ip2"
329 |   type: "InnerProduct"
330 |   bottom: "ip1"
331 |   top: "ip2"
332 |   param {
333 |     lr_mult: 1
334 |   }
335 |   param {
336 |     lr_mult: 2
337 |   }
338 |   inner_product_param {
339 |     num_output: 1024
340 |     weight_filler {
341 |       type: "xavier"
342 |     }
343 |     bias_filler {
344 |       type: "constant"
345 |     }
346 |   }
347 | }
348 | layer {
349 |   name: "reluOnIp2"
350 |   type: "ReLU"
351 |   bottom: "ip2"
352 |   top: "ip2"
353 | }
354 | layer {
355 |   name: "dropOnIp2"
356 |   type: "Dropout"
357 |   dropout_param {
358 |     dropout_ratio: 0.5
359 |   }
360 |   bottom: "ip2"
361 |   top: "ip2"
362 | }
363 | layer {
364 |   name: "ip3"
365 |   type: "InnerProduct"
366 |   bottom: "ip2"
367 |   top: "ip3"
368 |   param {
369 |     lr_mult: 1
370 |   }
371 |   param {
372 |     lr_mult: 2
373 |   }
374 |   inner_product_param {
375 |     num_output: 176
376 |     weight_filler {
377 |       type: "xavier"
378 |     }
379 |     bias_filler {
380 |       type: "constant"
381 |     }
382 |   }
383 | }
384 | layer {
385 |   name: "accuracy"
386 |   type: "Accuracy"
387 |   bottom: "ip3"
388 |   bottom: "label"
389 |   top: "accuracy"
390 |   include {
391 |     phase: TEST
392 |   }
393 | }
394 | layer {
395 |   name: "loss"
396 |   type: "SoftmaxWithLoss"
397 |   bottom: "ip3"
398 |   bottom: "label"
399 |   top: "loss"
400 | }
401 | 


--------------------------------------------------------------------------------
/prototxt/solver.augm.nolrcoef.prototxt:
--------------------------------------------------------------------------------
 1 | net: "prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt"
 2 | 
 3 | test_iter: 512
 4 | test_interval: 1500
 5 | 
 6 | # The base learning rate, momentum and the weight decay of the network.
 7 | base_lr: 0.01
 8 | weight_decay: 0.0000
 9 | 
10 | # The learning rate policy
11 | # lr_policy: "fixed"
12 | # solver_type: ADADELTA
13 | 
14 | lr_policy: "inv"
15 | gamma: 0.0003
16 | power: 0.9
17 | 
18 | #lr_policy: "step"
19 | #gamma: 0.9
20 | #stepsize: 6000
21 | 
22 | display: 1
23 | 
24 | max_iter: 800000
25 | 
26 | snapshot: 3000
27 | snapshot_prefix: "models/augm_dropout0.3_on_augm84K-lr0.01_30K_90K"
28 | #log: "logs/augm_dropout0.3_on_augm84K-lr0.01_30K_90K.txt"
29 | solver_mode: GPU
30 | 
31 | 


--------------------------------------------------------------------------------
/prototxt/solver.main.adadelta.prototxt:
--------------------------------------------------------------------------------
 1 | net: "prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt"
 2 | 
 3 | test_iter: 100
 4 | test_interval: 100
 5 | 
 6 | # The base learning rate, momentum and the weight decay of the network.
 7 | weight_decay: 0.0000
 8 | 
 9 | # The learning rate policy
10 | base_lr: 0.01
11 | lr_policy: "fixed"
12 | solver_type: ADADELTA
13 | 
14 | display: 1
15 | 
16 | max_iter: 800000
17 | 
18 | snapshot: 3000
19 | snapshot_prefix: "models/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01"
20 | #log: "logs/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01.txt"
21 | solver_mode: GPU
22 | 
23 | 


--------------------------------------------------------------------------------
/test_augm_network.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import caffe
 3 | import numpy as np
 4 | 
 5 | caffe.set_mode_gpu()
 6 | 
 7 | # info about classes
 8 | file = open('trainingData.csv')
 9 | data = file.readlines()[1:]
10 | langs = set()
11 | for line in data:
12 |     filepath, language = line.split(',')
13 |     language = language.strip()
14 |     langs.add(language)
15 | langs = sorted(langs)
16 | 
17 | 
18 | # network parameters:
19 | deploy_name = 'augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3'
20 | network_name = 'augm_dropout0.3_on_augm84K-lr0.01_30K'
21 | iterations = '90000'
22 | aveSamples = 20 # average over this many samples
23 | 
24 | net = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt',
25 |                        pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel')
26 | 
27 | net.blobs['data'].reshape(1, 1, 256, 768)
28 | predict_set = sys.argv[1]
29 | 
30 | if (predict_set == "test"):
31 |     folder = 'test/png/'
32 |     f = open('testingData.csv')
33 |     cnt = 12320
34 |     print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')
35 | elif (predict_set == "val"):
36 |     folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'
37 |     f = open('valEqual.csv')
38 |     cnt = 12320
39 |     print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')
40 | else: # train
41 |     folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'
42 |     f = open('trainEqual.csv')
43 |     cnt = 10000
44 |     print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')
45 |     
46 | preds = []
47 | labels = []
48 | topcoder_score = 0.0
49 | processed = 0
50 | 
51 | for iter in range(cnt):
52 |     st = f.readline()
53 |     if (predict_set == "val" or predict_set == "train"):
54 |         (name, label) = st.split(',')
55 |         label = int(label)
56 |     else:
57 |         name = st.strip()[:-4]
58 |     processed += 1
59 |     out = np.zeros((176, ))
60 |     for randomIndex in range(aveSamples):
61 |         image = caffe.io.load_image(folder + name + '.' + str(randomIndex) + '.png', color=False)
62 |         image = np.transpose(image, (2, 0, 1))
63 |         #image = np.concatenate([image, np.zeros((1, 256, 858 - 768), dtype=np.float32)], axis=2)
64 |         net.blobs['data'].data[...] = image
65 |         out += net.forward()['loss'][0]
66 | 
67 |     pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)
68 |     
69 |     if (predict_set == "val" or predict_set == "train"):
70 |         if (pred[0][1] == label):
71 |             topcoder_score = topcoder_score + 1000
72 |         elif (pred[1][1] == label):
73 |             topcoder_score = topcoder_score + 400
74 |         elif (pred[2][1] == label): 
75 |             topcoder_score = topcoder_score + 160
76 |     
77 |     for i in range(3):
78 |         lang_id = pred[i][1]
79 |         lang = langs[lang_id]
80 |         print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n')
81 | 
82 |     if (iter % 100 == 0):
83 |         print >> sys.stderr, network_name + '_iter_' + iterations + '_' + str(aveSamples)
84 |         print >> sys.stderr, "processed %d / %d images (%d samples/mp3)" % (iter, cnt, aveSamples)
85 |         print >> sys.stderr, "score: ", topcoder_score
86 |         print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
87 | 
88 | print >> sys.stderr, "Final score: ", topcoder_score, " / ", cnt, "000"
89 | print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
90 | 


--------------------------------------------------------------------------------
/test_main_network.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import caffe
 3 | import numpy as np
 4 | 
 5 | caffe.set_mode_gpu()
 6 | 
 7 | # info about classes
 8 | file = open('trainingData.csv')
 9 | data = file.readlines()[1:]
10 | langs = set()
11 | for line in data:
12 |     filepath, language = line.split(',')
13 |     language = language.strip()
14 |     langs.add(language)
15 | langs = sorted(langs)
16 | 
17 | 
18 | # network parameters:
19 | deploy_name = 'main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR'
20 | network_name = deploy_name + '_150K-momentum'
21 | iterations = '51000'
22 | 
23 | net = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt',
24 |                        pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel')
25 | 
26 | transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
27 | transformer.set_transpose('data', (2, 0, 1))
28 | net.blobs['data'].reshape(1, 1, 256, 858)
29 | 
30 | predict_set = sys.argv[1]
31 | 
32 | if (predict_set == "test"):
33 |     folder = 'test/png/'
34 |     f = open('testingData.csv')
35 |     cnt = 12320
36 |     print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '.csv', 'w')
37 | elif (predict_set == "val"):
38 |     folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/' ## stegh dreci augm
39 |     f = open('valDataNew.csv')
40 |     cnt = 16176
41 |     print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '.csv', 'w')
42 | else: # train
43 |     folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/'
44 |     f = open('trainingDataNew.csv')
45 |     cnt = 10000
46 |     print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '.csv', 'w')
47 |     
48 | preds = []
49 | labels = []
50 | topcoder_score = 0
51 | processed = 0
52 | 
53 | for iter in range(cnt):
54 |     st = f.readline()
55 |     if (predict_set == "val" or predict_set == "train"):
56 |         (name, label) = st.split(',')
57 |         label = int(label)
58 |     else:
59 |         name = st.strip()[:-4]
60 |     processed += 1
61 |     
62 |     net.blobs['data'].data[...] = transformer.preprocess('data', 
63 |         caffe.io.load_image(folder + name + '.png', color=False))
64 |     
65 |     out = net.forward()['loss'][0]
66 | 
67 |     pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)
68 |     
69 |     if (predict_set == "val" or predict_set == "train"):
70 |         if (pred[0][1] == label):
71 |             topcoder_score = topcoder_score + 1000
72 |         elif (pred[1][1] == label):
73 |             topcoder_score = topcoder_score + 400
74 |         elif (pred[2][1] == label): 
75 |             topcoder_score = topcoder_score + 160
76 |     
77 |     for i in range(3):
78 |         lang_id = pred[i][1]
79 |         lang = langs[lang_id]
80 |         print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n')
81 | 
82 |     if (iter % 100 == 0):
83 |         print >> sys.stderr, "processed %d / %d images" % (iter, cnt)
84 |         print >> sys.stderr, "score: ", topcoder_score
85 |         print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
86 | 
87 | print >> sys.stderr, "Final score: ", topcoder_score, " / ", cnt, "000"
88 | print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
89 | 


--------------------------------------------------------------------------------
/theano/README.md:
--------------------------------------------------------------------------------
1 | # Spoken language identification
2 | 
3 | `networks` folder contains multiple CNN and/or RNN models implemented in Theano/Lasagne.
4 | 
5 | Read more in the corresponding [blog post](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/).
6 | 


--------------------------------------------------------------------------------
/theano/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | import sklearn.metrics as metrics
  4 | import argparse
  5 | import time
  6 | import json
  7 | import importlib
  8 | 
  9 | print "==> parsing input arguments"
 10 | parser = argparse.ArgumentParser()
 11 | 
 12 | # TODO: add argument to choose training set
 13 | parser.add_argument('--network', type=str, default="network_batch", help='embeding size (50, 100, 200, 300 only)')
 14 | parser.add_argument('--epochs', type=int, default=500, help='number of epochs to train')
 15 | parser.add_argument('--load_state', type=str, default="", help='state file path')
 16 | parser.add_argument('--mode', type=str, default="train", help='mode: train/test/test_on_train')
 17 | parser.add_argument('--batch_size', type=int, default=32, help='no commment')
 18 | parser.add_argument('--l2', type=float, default=0, help='L2 regularization')
 19 | parser.add_argument('--log_every', type=int, default=100, help='print information every x iteration')
 20 | parser.add_argument('--save_every', type=int, default=50000, help='save state every x iteration')
 21 | parser.add_argument('--prefix', type=str, default="", help='optional prefix of network name')
 22 | parser.add_argument('--dropout', type=float, default=0.0, help='dropout rate (between 0 and 1)')
 23 | parser.add_argument('--no-batch_norm', dest="batch_norm", action='store_false', help='batch normalization')
 24 | parser.add_argument('--rnn_num_units', type=int, default=500, help='number of hidden units if the network is RNN')
 25 | parser.add_argument('--equal_split', type=bool, default=False, help='use trainEqual.csv and valEqual.csv')
 26 | parser.add_argument('--forward_cnt', type=int, default=1, help='if forward pass is nondeterministic, then how many forward passes are averaged')
 27 | 
 28 | parser.set_defaults(batch_norm=True)
 29 | args = parser.parse_args()
 30 | print args
 31 | 
 32 | if (args.equal_split):
 33 |     train_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/trainEqual.csv", "r")
 34 |     test_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/valEqual.csv", "r")
 35 | else:
 36 |     train_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/trainingDataNew.csv", "r")
 37 |     test_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/valDataNew.csv", "r")
 38 | 
 39 | train_list_raw = train_listfile.readlines()
 40 | test_list_raw = test_listfile.readlines()
 41 | 
 42 | print "==> %d training examples" % len(train_list_raw)
 43 | print "==> %d validation examples" % len(test_list_raw)
 44 | 
 45 | train_listfile.close()
 46 | test_listfile.close()
 47 | 
 48 | args_dict = dict(args._get_kwargs())
 49 | args_dict['train_list_raw'] = train_list_raw
 50 | args_dict['test_list_raw'] = test_list_raw
 51 | args_dict['png_folder'] = "/mnt/hdd615/Hrayr/Spoken-language-identification/train/png/"
 52 |     
 53 | 
 54 | 
 55 | print "==> using network %s" % args.network
 56 | network_module = importlib.import_module("networks." + args.network)
 57 | network = network_module.Network(**args_dict)
 58 | 
 59 | 
 60 | network_name = args.prefix + '%s.bs%d%s%s' % (
 61 |     network.say_name(),
 62 |     args.batch_size, 
 63 |     ".bn" if args.batch_norm else "", 
 64 |     (".d" + str(args.dropout)) if args.dropout>0 else "")
 65 |     
 66 | print "==> network_name:", network_name
 67 | 
 68 | 
 69 | start_epoch = 0
 70 | if args.load_state != "":
 71 |     start_epoch = network.load_state(args.load_state) + 1
 72 | 
 73 | def do_epoch(mode, epoch):
 74 |     # mode is 'train' or 'test' or 'predict'
 75 |     y_true = []
 76 |     y_pred = []
 77 |     avg_loss = 0.0
 78 |     prev_time = time.time()
 79 | 
 80 |     batches_per_epoch = network.get_batches_per_epoch(mode)
 81 |     all_prediction = []
 82 | 
 83 |     for i in range(0, batches_per_epoch):
 84 |         step_data = network.step(i, mode)
 85 |         prediction = step_data["prediction"]
 86 |         answers = step_data["answers"]
 87 |         current_loss = step_data["current_loss"]
 88 |         log = step_data["log"]
 89 |         
 90 |         avg_loss += current_loss
 91 |         if (mode == "predict" or mode == "predict_on_train"):
 92 |             all_prediction.append(prediction)
 93 |             for pass_id in range(args.forward_cnt-1):
 94 |                 step_data = network.step(i, mode)
 95 |                 prediction += step_data["prediction"]
 96 |                 current_loss += step_data["current_loss"]
 97 |             prediction /= args.forward_cnt
 98 |             current_loss /= args.forward_cnt
 99 |             
100 |         for x in answers:
101 |             y_true.append(x)
102 |         
103 |         for x in prediction.argmax(axis=1):
104 |             y_pred.append(x)
105 |         
106 |         if ((i + 1) % args.log_every == 0):
107 |             cur_time = time.time()
108 |             print ("  %sing: %d.%d / %d \t loss: %3f \t avg_loss: %.5f \t %s \t time: %.2fs" % 
109 |                 (mode, epoch, (i + 1) * args.batch_size, batches_per_epoch * args.batch_size, 
110 |                  current_loss, avg_loss / (i + 1), log, cur_time - prev_time))
111 |             prev_time = cur_time
112 |       
113 |     
114 |     #print "confusion matrix:"
115 |     #print metrics.confusion_matrix(y_true, y_pred)
116 |     accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)])
117 |     print "accuracy: %.2f percent" % (accuracy * 100.0 / batches_per_epoch / args.batch_size)
118 |     
119 |     if (mode == "predict"):
120 |         all_prediction = np.vstack(all_prediction)
121 |         pred_filename = "predictions/" + ("equal_split." if args.equal_split else "") + \
122 |                          args.load_state[args.load_state.rfind('/')+1:] + ".csv"
123 |         with open(pred_filename, 'w') as pred_csv:
124 |             for x in all_prediction:
125 |                 print >> pred_csv, ",".join([("%.6f" % prob) for prob in x])
126 |                     
127 |     return avg_loss / batches_per_epoch
128 | 
129 | 
130 | if args.mode == 'train':
131 |     print "==> training"   	
132 |     for epoch in range(start_epoch, args.epochs):
133 |         do_epoch('train', epoch)
134 |         test_loss = do_epoch('test', epoch)
135 |         state_name = 'states/%s.epoch%d.test%.5f.state' % (network_name, epoch, test_loss)
136 |         print "==> saving ... %s" % state_name
137 |         network.save_params(state_name, epoch)
138 |         
139 | elif args.mode == 'test':
140 |     do_epoch('predict', 0)
141 | elif args.mode == 'test_on_train':
142 |     do_epoch('predict_on_train', 0)
143 | else:
144 |     raise Exception("unknown mode")


--------------------------------------------------------------------------------
/theano/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YerevaNN/Spoken-language-identification/e947dee00f301115cba1460b655a101d5239b932/theano/networks/__init__.py


--------------------------------------------------------------------------------
/theano/networks/base_network.py:
--------------------------------------------------------------------------------
 1 | import cPickle as pickle
 2 | 
 3 | 
 4 | class BaseNetwork:
 5 | 	
 6 | 	def say_name(self):
 7 | 		return "unknown"
 8 | 	
 9 | 	
10 | 	def save_params(self, file_name, epoch, **kwargs):
11 | 		with open(file_name, 'w') as save_file:
12 | 			pickle.dump(
13 | 				obj = {
14 | 					'params' : [x.get_value() for x in self.params],
15 | 					'epoch' : epoch, 
16 | 				},
17 | 				file = save_file,
18 | 				protocol = -1
19 | 			)
20 | 	
21 | 	
22 | 	def load_state(self, file_name):
23 | 		print "==> loading state %s" % file_name
24 | 		epoch = 0
25 | 		with open(file_name, 'r') as load_file:
26 | 			dict = pickle.load(load_file)
27 | 			loaded_params = dict['params']
28 | 			for (x, y) in zip(self.params, loaded_params):
29 | 				x.set_value(y)
30 | 			epoch = dict['epoch']
31 | 		return epoch
32 | 
33 | 
34 | 	def get_batches_per_epoch(self, mode):
35 | 		if (mode == 'train' or mode == 'predict_on_train'):
36 | 			return len(self.train_list_raw) / self.batch_size
37 | 		elif (mode == 'test' or mode == 'predict'):
38 | 			return len(self.test_list_raw) / self.batch_size
39 | 		else:
40 | 			raise Exception("unknown mode")
41 | 	
42 | 	
43 | 	def step(self, batch_index, mode):
44 | 		
45 | 		if (mode == "train"):
46 | 			data, answers = self.read_batch(self.train_list_raw, batch_index)
47 | 			theano_fn = self.train_fn
48 | 		elif (mode == "test" or mode == "predict"):
49 | 			data, answers = self.read_batch(self.test_list_raw, batch_index)
50 | 			theano_fn = self.test_fn
51 | 		elif (mode == "predict_on_train"):
52 | 			data, answers = self.read_batch(self.train_list_raw, batch_index)
53 | 			theano_fn = self.test_fn
54 | 		else:
55 | 			raise Exception("unrecognized mode")
56 | 		
57 | 		ret = theano_fn(data, answers)
58 | 		return {"prediction": ret[0],
59 | 				"answers": answers,
60 | 				"current_loss": ret[1],
61 | 				"log": "",
62 | 				}


--------------------------------------------------------------------------------
/theano/networks/rnn.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | import theano
 5 | import theano.tensor as T
 6 | 
 7 | import lasagne
 8 | from lasagne import layers
 9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
10 | 
11 | import PIL.Image as Image
12 | from base_network import BaseNetwork
13 | 
14 | floatX = theano.config.floatX
15 | 
16 | 
17 | class Network(BaseNetwork):
18 |     
19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, **kwargs):
20 |         
21 |         print "==> not used params in DMN class:", kwargs.keys()
22 |         self.train_list_raw = train_list_raw
23 |         self.test_list_raw = test_list_raw
24 |         self.png_folder = png_folder
25 |         self.batch_size = batch_size
26 |         self.l2 = l2
27 |         self.mode = mode
28 |         self.num_units = rnn_num_units
29 |         
30 |         self.input_var = T.tensor3('input_var')
31 |         self.answer_var = T.ivector('answer_var')
32 |         
33 |         print "==> building network"
34 |         example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) #########
35 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
36 | 
37 |         # InputLayer       
38 |         network = layers.InputLayer(shape=(None, 858, 256), input_var=self.input_var)
39 |         print layers.get_output(network).eval({self.input_var:example}).shape
40 | 
41 |         # GRULayer
42 |         network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
43 |         print layers.get_output(network).eval({self.input_var:example}).shape
44 |         
45 |         # Last layer: classification
46 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
47 |         print layers.get_output(network).eval({self.input_var:example}).shape
48 | 
49 |         self.params = layers.get_all_params(network, trainable=True)
50 |         self.prediction = layers.get_output(network)
51 |         
52 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
53 |         if (self.l2 > 0):
54 |             self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, 
55 |                                                                     lasagne.regularization.l2)
56 |         else:
57 |             self.loss_l2 = 0
58 |         self.loss = self.loss_ce + self.loss_l2
59 |         
60 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
61 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0005)
62 |         
63 |         if self.mode == 'train':
64 |             print "==> compiling train_fn"
65 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
66 |                                             outputs=[self.prediction, self.loss],
67 |                                             updates=updates)
68 |         
69 |         print "==> compiling test_fn"
70 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
71 |                                        outputs=[self.prediction, self.loss])
72 |     
73 |     
74 |     def say_name(self):
75 |         return "rnn.GRU.num_units%d" % self.num_units
76 |     
77 |     
78 |     def read_batch(self, data_raw, batch_index):
79 | 
80 |         start_index = batch_index * self.batch_size
81 |         end_index = start_index + self.batch_size
82 |         
83 |         data = np.zeros((self.batch_size, 858, 256), dtype=np.float32)
84 |         answers = []
85 |         
86 |         for i in range(start_index, end_index):
87 |             answers.append(int(data_raw[i].split(',')[1]))
88 |             name = data_raw[i].split(',')[0]
89 |             path = self.png_folder + name + ".png"
90 |             im = Image.open(path)
91 |             data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)
92 | 
93 |         answers = np.array(answers, dtype=np.int32)
94 |         return data, answers
95 |     
96 |     
97 | 


--------------------------------------------------------------------------------
/theano/networks/rnn_2layers.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.l2 = l2
 27 |         self.mode = mode
 28 |         self.num_units = rnn_num_units
 29 |         self.batch_norm = batch_norm
 30 |         
 31 |         self.input_var = T.tensor3('input_var')
 32 |         self.answer_var = T.ivector('answer_var')
 33 |         
 34 |         # scale inputs to be in [-1, 1]
 35 |         input_var_norm = 2 * self.input_var - 1
 36 |         
 37 |         print "==> building network"
 38 |         example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) #########
 39 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 40 | 
 41 |         # InputLayer       
 42 |         network = layers.InputLayer(shape=(None, 858, 256), input_var=input_var_norm)
 43 |         print layers.get_output(network).eval({self.input_var:example}).shape
 44 | 
 45 |         # GRULayer
 46 |         network = layers.GRULayer(incoming=network, num_units=self.num_units)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         
 49 |         # BatchNormalization Layer
 50 |         if (self.batch_norm):
 51 |             network = layers.BatchNormLayer(incoming=network)
 52 |             print layers.get_output(network).eval({self.input_var:example}).shape
 53 |         
 54 |         # GRULayer
 55 |         network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         
 58 |         # Last layer: classification
 59 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
 60 |         print layers.get_output(network).eval({self.input_var:example}).shape
 61 | 
 62 |         self.params = layers.get_all_params(network, trainable=True)
 63 |         self.prediction = layers.get_output(network)
 64 |         
 65 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
 66 |         if (self.l2 > 0):
 67 |             self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, 
 68 |                                                                     lasagne.regularization.l2)
 69 |         else:
 70 |             self.loss_l2 = 0
 71 |         self.loss = self.loss_ce + self.loss_l2
 72 |         
 73 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
 74 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
 75 |         
 76 |         if self.mode == 'train':
 77 |             print "==> compiling train_fn"
 78 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
 79 |                                             outputs=[self.prediction, self.loss],
 80 |                                             updates=updates)
 81 |         
 82 |         print "==> compiling test_fn"
 83 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
 84 |                                        outputs=[self.prediction, self.loss])
 85 |     
 86 |     
 87 |     def say_name(self):
 88 |         return "rnn_2layers.GRU.num_units%d" % self.num_units
 89 |     
 90 |     
 91 |     def read_batch(self, data_raw, batch_index):
 92 | 
 93 |         start_index = batch_index * self.batch_size
 94 |         end_index = start_index + self.batch_size
 95 |         
 96 |         data = np.zeros((self.batch_size, 858, 256), dtype=np.float32)
 97 |         answers = []
 98 |         
 99 |         for i in range(start_index, end_index):
100 |             answers.append(int(data_raw[i].split(',')[1]))
101 |             name = data_raw[i].split(',')[0]
102 |             path = self.png_folder + name + ".png"
103 |             im = Image.open(path)
104 |             data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)
105 | 
106 |         answers = np.array(answers, dtype=np.int32)
107 |         return data, answers
108 |     
109 |     


--------------------------------------------------------------------------------
/theano/networks/rnn_2layers_5khz.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs):
 20 |         
 21 |         print "==> not used params in network class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.l2 = l2
 27 |         self.mode = mode
 28 |         self.num_units = rnn_num_units
 29 |         self.batch_norm = batch_norm
 30 |         
 31 |         self.input_var = T.tensor3('input_var')
 32 |         self.answer_var = T.ivector('answer_var')
 33 |         
 34 |         # scale inputs to be in [-1, 1]
 35 |         input_var_norm = 2 * self.input_var - 1
 36 |         
 37 |         print "==> building network"
 38 |         example = np.random.uniform(size=(self.batch_size, 858, 128), low=0.0, high=1.0).astype(np.float32) #########
 39 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 40 | 
 41 |         # InputLayer       
 42 |         network = layers.InputLayer(shape=(None, 858, 128), input_var=input_var_norm)
 43 |         print layers.get_output(network).eval({self.input_var:example}).shape
 44 | 
 45 |         # GRULayer
 46 |         network = layers.GRULayer(incoming=network, num_units=self.num_units)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         
 49 |         # BatchNormalization Layer
 50 |         if (self.batch_norm):
 51 |             network = layers.BatchNormLayer(incoming=network)
 52 |             print layers.get_output(network).eval({self.input_var:example}).shape
 53 |         
 54 |         # GRULayer
 55 |         network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         
 58 |         # BatchNormalization Layer
 59 |         # There are some states, where this layer was disabled
 60 |         if (self.batch_norm):
 61 |             network = layers.BatchNormLayer(incoming=network)
 62 |             print layers.get_output(network).eval({self.input_var:example}).shape
 63 |         
 64 |         # Last layer: classification
 65 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 | 
 68 |         self.params = layers.get_all_params(network, trainable=True)
 69 |         self.prediction = layers.get_output(network)
 70 |         
 71 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
 72 |         if (self.l2 > 0):
 73 |             self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, 
 74 |                                                                     lasagne.regularization.l2)
 75 |         else:
 76 |             self.loss_l2 = 0
 77 |         self.loss = self.loss_ce + self.loss_l2
 78 |         
 79 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
 80 |         
 81 |         if self.mode == 'train':
 82 |             print "==> compiling train_fn"
 83 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
 84 |                                             outputs=[self.prediction, self.loss],
 85 |                                             updates=updates)
 86 |         
 87 |         print "==> compiling test_fn"
 88 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
 89 |                                        outputs=[self.prediction, self.loss])
 90 |     
 91 |     
 92 |     def say_name(self):
 93 |         return "rnn_2layers_5khz.GRU.num_units%d" % self.num_units
 94 |     
 95 |     
 96 |     def read_batch(self, data_raw, batch_index):
 97 | 
 98 |         start_index = batch_index * self.batch_size
 99 |         end_index = start_index + self.batch_size
100 |         
101 |         data = np.zeros((self.batch_size, 858, 128), dtype=np.float32)
102 |         answers = []
103 |         
104 |         for i in range(start_index, end_index):
105 |             answers.append(int(data_raw[i].split(',')[1]))
106 |             name = data_raw[i].split(',')[0]
107 |             path = self.png_folder + name + ".png"
108 |             im = Image.open(path)
109 |             data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)[:, :128]
110 | 
111 |         answers = np.array(answers, dtype=np.int32)
112 |         return data, answers
113 |     
114 |     


--------------------------------------------------------------------------------
/theano/networks/tc_net.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         
 31 |         self.input_var = T.tensor4('input_var')
 32 |         self.answer_var = T.ivector('answer_var')
 33 |         
 34 |         print "==> building network"
 35 |         example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) #########
 36 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 37 |        
 38 |         network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var)
 39 |         print layers.get_output(network).eval({self.input_var:example}).shape
 40 |         
 41 |         # CONV-RELU-POOL 1
 42 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 43 |                                      stride=1, nonlinearity=rectify)
 44 |         print layers.get_output(network).eval({self.input_var:example}).shape
 45 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 46 |         print layers.get_output(network).eval({self.input_var:example}).shape
 47 |         if (self.batch_norm):
 48 |             network = layers.BatchNormLayer(incoming=network)
 49 |         
 50 |         # CONV-RELU-POOL 2
 51 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 52 |                                      stride=1, nonlinearity=rectify)
 53 |         print layers.get_output(network).eval({self.input_var:example}).shape
 54 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 55 |         print layers.get_output(network).eval({self.input_var:example}).shape
 56 |         if (self.batch_norm):
 57 |             network = layers.BatchNormLayer(incoming=network)
 58 | 
 59 |         
 60 |         # CONV-RELU-POOL 3
 61 |         network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), 
 62 |                                      stride=1, nonlinearity=rectify)
 63 |         print layers.get_output(network).eval({self.input_var:example}).shape
 64 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 65 |         print layers.get_output(network).eval({self.input_var:example}).shape
 66 |         if (self.batch_norm):
 67 |             network = layers.BatchNormLayer(incoming=network)
 68 |         
 69 |         # CONV-RELU-POOL 4
 70 |         network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), 
 71 |                                      stride=1, nonlinearity=rectify)
 72 |         print layers.get_output(network).eval({self.input_var:example}).shape
 73 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 74 |         print layers.get_output(network).eval({self.input_var:example}).shape
 75 |         if (self.batch_norm):
 76 |             network = layers.BatchNormLayer(incoming=network)
 77 |         
 78 |         # CONV-RELU-POOL 5
 79 |         network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), 
 80 |                                      stride=1, nonlinearity=rectify)
 81 |         print layers.get_output(network).eval({self.input_var:example}).shape
 82 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 83 |         print layers.get_output(network).eval({self.input_var:example}).shape
 84 |         if (self.batch_norm):
 85 |             network = layers.BatchNormLayer(incoming=network)
 86 |         
 87 |         # CONV-RELU-POOL 6
 88 |         network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3), 
 89 |                                      stride=1, nonlinearity=rectify)
 90 |         print layers.get_output(network).eval({self.input_var:example}).shape
 91 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), ignore_border=False)
 92 |         print layers.get_output(network).eval({self.input_var:example}).shape
 93 |         if (self.batch_norm):
 94 |             network = layers.BatchNormLayer(incoming=network)
 95 |         
 96 |         # DENSE 1
 97 |         network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)
 98 |         if (self.batch_norm):
 99 |             network = layers.BatchNormLayer(incoming=network)
100 |         if (self.dropout > 0):
101 |             network = layers.dropout(network, self.dropout)
102 |         print layers.get_output(network).eval({self.input_var:example}).shape
103 |         
104 |         """
105 |         # DENSE 2
106 |         network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)
107 |         if (self.batch_norm):
108 |             network = layers.BatchNormLayer(incoming=network)
109 |         if (self.dropout > 0):
110 |             network = layers.dropout(network, self.dropout)
111 |         print layers.get_output(network).eval({self.input_var:example}).shape
112 |         """
113 |         
114 |         # Last layer: classification
115 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
116 |         print layers.get_output(network).eval({self.input_var:example}).shape
117 |         
118 |         
119 |         self.params = layers.get_all_params(network, trainable=True)
120 |         self.prediction = layers.get_output(network)
121 |         
122 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
123 |         if (self.l2 > 0):
124 |             self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, 
125 |                                                                     lasagne.regularization.l2)
126 |         else:
127 |             self.loss_l2 = 0
128 |         self.loss = self.loss_ce + self.loss_l2
129 |         
130 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
131 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
132 |         
133 |         if self.mode == 'train':
134 |             print "==> compiling train_fn"
135 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
136 |                                             outputs=[self.prediction, self.loss],
137 |                                             updates=updates)
138 |         
139 |         print "==> compiling test_fn"
140 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
141 |                                        outputs=[self.prediction, self.loss])
142 |     
143 |     
144 |     def say_name(self):
145 |         return "tc_net"
146 |     
147 |     
148 |     def read_batch(self, data_raw, batch_index):
149 | 
150 |         start_index = batch_index * self.batch_size
151 |         end_index = start_index + self.batch_size
152 |         
153 |         data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32)
154 |         answers = []
155 |         
156 |         for i in range(start_index, end_index):
157 |             answers.append(int(data_raw[i].split(',')[1]))
158 |             name = data_raw[i].split(',')[0]
159 |             path = self.png_folder + name + ".png"
160 |             im = Image.open(path)
161 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0
162 | 
163 |         answers = np.array(answers, dtype=np.int32)
164 |         return data, answers
165 |     
166 |     


--------------------------------------------------------------------------------
/theano/networks/tc_net_deeprnn_shared_pad.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | class Network(BaseNetwork):
 17 |     
 18 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 19 |         
 20 |         print "==> not used params in DMN class:", kwargs.keys()
 21 |         self.train_list_raw = train_list_raw
 22 |         self.test_list_raw = test_list_raw
 23 |         self.png_folder = png_folder
 24 |         self.batch_size = batch_size
 25 |         self.dropout = dropout
 26 |         self.l2 = l2
 27 |         self.mode = mode
 28 |         self.batch_norm = batch_norm
 29 |         self.num_units = rnn_num_units
 30 |         
 31 |         self.input_var = T.tensor4('input_var')
 32 |         self.answer_var = T.ivector('answer_var')
 33 |         
 34 |         print "==> building network"
 35 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 36 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 37 |        
 38 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 39 |         print layers.get_output(network).eval({self.input_var:example}).shape
 40 |         
 41 |         # CONV-RELU-POOL 1
 42 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 43 |                                      stride=1, nonlinearity=rectify)
 44 |         print layers.get_output(network).eval({self.input_var:example}).shape
 45 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 46 |         print layers.get_output(network).eval({self.input_var:example}).shape
 47 |         if (self.batch_norm):
 48 |             network = layers.BatchNormLayer(incoming=network)
 49 |         
 50 |         # CONV-RELU-POOL 2
 51 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 52 |                                      stride=1, nonlinearity=rectify)
 53 |         print layers.get_output(network).eval({self.input_var:example}).shape
 54 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 55 |         print layers.get_output(network).eval({self.input_var:example}).shape
 56 |         if (self.batch_norm):
 57 |             network = layers.BatchNormLayer(incoming=network)
 58 | 
 59 |         
 60 |         # CONV-RELU-POOL 3
 61 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 62 |                                      stride=1, nonlinearity=rectify)
 63 |         print layers.get_output(network).eval({self.input_var:example}).shape
 64 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 65 |         print layers.get_output(network).eval({self.input_var:example}).shape
 66 |         if (self.batch_norm):
 67 |             network = layers.BatchNormLayer(incoming=network)
 68 |         
 69 |         # CONV-RELU-POOL 4
 70 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 71 |                                      stride=1, nonlinearity=rectify)
 72 |         print layers.get_output(network).eval({self.input_var:example}).shape
 73 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 74 |         print layers.get_output(network).eval({self.input_var:example}).shape
 75 |         if (self.batch_norm):
 76 |             network = layers.BatchNormLayer(incoming=network)
 77 |         
 78 |         self.params = layers.get_all_params(network, trainable=True)
 79 |         
 80 |         output = layers.get_output(network)
 81 |         num_channels  = 32 
 82 |         filter_W = 54
 83 |         filter_H = 8
 84 |         
 85 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 86 |         # explicit values are better for optimizations
 87 |         
 88 |         channels = []
 89 |         for channel_index in range(num_channels):
 90 |             channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
 91 |         
 92 |         rnn_network_outputs = []
 93 |         W_in_to_updategate = None
 94 |         W_hid_to_updategate = None
 95 |         b_updategate = None
 96 |         W_in_to_resetgate = None
 97 |         W_hid_to_resetgate = None
 98 |         b_resetgate = None
 99 |         W_in_to_hidden_update = None
100 |         W_hid_to_hidden_update = None
101 |         b_hidden_update = None
102 |         
103 |         W_in_to_updategate1 = None
104 |         W_hid_to_updategate1 = None
105 |         b_updategate1 = None
106 |         W_in_to_resetgate1 = None
107 |         W_hid_to_resetgate1 = None
108 |         b_resetgate1 = None
109 |         W_in_to_hidden_update1 = None
110 |         W_hid_to_hidden_update1 = None
111 |         b_hidden_update1 = None
112 |         
113 |         for channel_index in range(num_channels):
114 |             rnn_input_var = channels[channel_index]
115 |             
116 |             # InputLayer       
117 |             network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
118 | 
119 |             if (channel_index == 0):
120 |                 # GRULayer
121 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False)
122 |                 W_in_to_updategate = network.W_in_to_updategate
123 |                 W_hid_to_updategate = network.W_hid_to_updategate
124 |                 b_updategate = network.b_updategate
125 |                 W_in_to_resetgate = network.W_in_to_resetgate
126 |                 W_hid_to_resetgate = network.W_hid_to_resetgate
127 |                 b_resetgate = network.b_resetgate
128 |                 W_in_to_hidden_update = network.W_in_to_hidden_update
129 |                 W_hid_to_hidden_update = network.W_hid_to_hidden_update
130 |                 b_hidden_update = network.b_hidden_update
131 |                 
132 |                 # BatchNormalization Layer
133 |                 if (self.batch_norm):
134 |                     network = layers.BatchNormLayer(incoming=network)
135 |                 
136 |                 # GRULayer
137 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
138 |                 W_in_to_updategate1 = network.W_in_to_updategate
139 |                 W_hid_to_updategate1 = network.W_hid_to_updategate
140 |                 b_updategate1 = network.b_updategate
141 |                 W_in_to_resetgate1 = network.W_in_to_resetgate
142 |                 W_hid_to_resetgate1 = network.W_hid_to_resetgate
143 |                 b_resetgate1 = network.b_resetgate
144 |                 W_in_to_hidden_update1 = network.W_in_to_hidden_update
145 |                 W_hid_to_hidden_update1 = network.W_hid_to_hidden_update
146 |                 b_hidden_update1 = network.b_hidden_update
147 |                         
148 |                 # add params 
149 |                 self.params += layers.get_all_params(network, trainable=True)
150 | 
151 |             else:
152 |                 # GRULayer, but shared
153 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False,
154 |                             resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
155 |                             updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
156 |                             hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
157 |                             
158 |                 # BatchNormalization Layer
159 |                 if (self.batch_norm):
160 |                     network = layers.BatchNormLayer(incoming=network)
161 |                     
162 |                 # GRULayer, but shared
163 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
164 |                             resetgate=layers.Gate(W_in=W_in_to_resetgate1, W_hid=W_hid_to_resetgate1, b=b_resetgate1),
165 |                             updategate=layers.Gate(W_in=W_in_to_updategate1, W_hid=W_hid_to_updategate1, b=b_updategate1),
166 |                             hidden_update=layers.Gate(W_in=W_in_to_hidden_update1, W_hid=W_hid_to_hidden_update1, b=b_hidden_update1))
167 |                 
168 |             
169 |             rnn_network_outputs.append(layers.get_output(network))
170 |         
171 |         all_output_var = T.concatenate(rnn_network_outputs, axis=1)
172 |         print all_output_var.eval({self.input_var:example}).shape
173 |         
174 |         # InputLayer
175 |         network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
176 |         
177 |         # Dropout Layer
178 |         if (self.dropout > 0):
179 |             network = layers.dropout(network, self.dropout)
180 |         
181 |         # BatchNormalization Layer
182 |         if (self.batch_norm):
183 |             network = layers.BatchNormLayer(incoming=network)
184 |         
185 |         # Last layer: classification
186 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
187 |         print layers.get_output(network).eval({self.input_var:example}).shape
188 |         
189 |     
190 |         self.params += layers.get_all_params(network, trainable=True)
191 |         self.prediction = layers.get_output(network)
192 |     
193 |         #print "==> param shapes", [x.eval().shape for x in self.params]
194 |         
195 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
196 |         if (self.l2 > 0):
197 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
198 |                                                                           lasagne.regularization.l2)
199 |         else:
200 |             self.loss_l2 = 0
201 |         self.loss = self.loss_ce + self.loss_l2
202 |         
203 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
204 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
205 |         
206 |         if self.mode == 'train':
207 |             print "==> compiling train_fn"
208 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
209 |                                             outputs=[self.prediction, self.loss],
210 |                                             updates=updates)
211 |         
212 |         print "==> compiling test_fn"
213 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
214 |                                        outputs=[self.prediction, self.loss])
215 |     
216 |     
217 |     def say_name(self):
218 |         return "tc_net_deeprnn.4conv.pad.GRU.shared.num_units%d.5khz" % self.num_units
219 |     
220 |     
221 |     def read_batch(self, data_raw, batch_index):
222 | 
223 |         start_index = batch_index * self.batch_size
224 |         end_index = start_index + self.batch_size
225 |         
226 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
227 |         answers = []
228 |         
229 |         for i in range(start_index, end_index):
230 |             answers.append(int(data_raw[i].split(',')[1]))
231 |             name = data_raw[i].split(',')[0]
232 |             path = self.png_folder + name + ".png"
233 |             im = Image.open(path)
234 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
235 | 
236 |         answers = np.array(answers, dtype=np.int32)
237 |         return data, answers
238 |     
239 |     


--------------------------------------------------------------------------------
/theano/networks/tc_net_mod.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         
 31 |         self.input_var = T.tensor4('input_var')
 32 |         self.answer_var = T.ivector('answer_var')
 33 |         
 34 |         print "==> building network"
 35 |         example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) #########
 36 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 37 |        
 38 |         network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var)
 39 |         print layers.get_output(network).eval({self.input_var:example}).shape
 40 |         
 41 |         
 42 |         # NOTE: replace pad=2 with ignore_border=False
 43 |         # CONV-RELU-POOL 1
 44 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 45 |                                      stride=1, nonlinearity=rectify)
 46 |         print layers.get_output(network).eval({self.input_var:example}).shape
 47 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 48 |         print layers.get_output(network).eval({self.input_var:example}).shape
 49 |         if (self.batch_norm):
 50 |             network = layers.BatchNormLayer(incoming=network)
 51 |         
 52 |         # CONV-RELU-POOL 2
 53 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 54 |                                      stride=1, nonlinearity=rectify)
 55 |         print layers.get_output(network).eval({self.input_var:example}).shape
 56 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 57 |         print layers.get_output(network).eval({self.input_var:example}).shape
 58 |         if (self.batch_norm):
 59 |             network = layers.BatchNormLayer(incoming=network)
 60 | 
 61 |         
 62 |         # CONV-RELU-POOL 3
 63 |         network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), 
 64 |                                      stride=1, nonlinearity=rectify)
 65 |         print layers.get_output(network).eval({self.input_var:example}).shape
 66 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 67 |         print layers.get_output(network).eval({self.input_var:example}).shape
 68 |         if (self.batch_norm):
 69 |             network = layers.BatchNormLayer(incoming=network)
 70 |         
 71 |         # CONV-RELU-POOL 4
 72 |         network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), 
 73 |                                      stride=1, nonlinearity=rectify)
 74 |         print layers.get_output(network).eval({self.input_var:example}).shape
 75 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 76 |         print layers.get_output(network).eval({self.input_var:example}).shape
 77 |         if (self.batch_norm):
 78 |             network = layers.BatchNormLayer(incoming=network)
 79 |         
 80 |         # CONV-RELU-POOL 5
 81 |         network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), 
 82 |                                      stride=1, nonlinearity=rectify)
 83 |         print layers.get_output(network).eval({self.input_var:example}).shape
 84 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 85 |         print layers.get_output(network).eval({self.input_var:example}).shape
 86 |         if (self.batch_norm):
 87 |             network = layers.BatchNormLayer(incoming=network)
 88 |         
 89 |         # CONV-RELU-POOL 6
 90 |         network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3), 
 91 |                                      stride=1, nonlinearity=rectify)
 92 |         print layers.get_output(network).eval({self.input_var:example}).shape
 93 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), pad=2)
 94 |         print layers.get_output(network).eval({self.input_var:example}).shape
 95 |         if (self.batch_norm):
 96 |             network = layers.BatchNormLayer(incoming=network)
 97 |         
 98 |         # DENSE 1
 99 |         network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)
100 |         if (self.batch_norm):
101 |             network = layers.BatchNormLayer(incoming=network)
102 |         if (self.dropout > 0):
103 |             network = layers.dropout(network, self.dropout)
104 |         print layers.get_output(network).eval({self.input_var:example}).shape
105 |         
106 |         
107 |         # Last layer: classification
108 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
109 |         print layers.get_output(network).eval({self.input_var:example}).shape
110 |         
111 |     
112 |         self.params = layers.get_all_params(network, trainable=True)
113 |         self.prediction = layers.get_output(network)
114 |     
115 |         print "==> param shapes", [x.eval().shape for x in self.params]
116 |         
117 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
118 |         if (self.l2 > 0):
119 |             self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, 
120 |                                                                     lasagne.regularization.l2)
121 |         else:
122 |             self.loss_l2 = 0
123 |         self.loss = self.loss_ce + self.loss_l2
124 |         
125 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
126 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
127 |         
128 |         if self.mode == 'train':
129 |             print "==> compiling train_fn"
130 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
131 |                                             outputs=[self.prediction, self.loss],
132 |                                             updates=updates)
133 |         
134 |         print "==> compiling test_fn"
135 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
136 |                                        outputs=[self.prediction, self.loss])
137 |     
138 |     
139 |     def say_name(self):
140 |         return "tc_net_mod"
141 |     
142 |     
143 |     def read_batch(self, data_raw, batch_index):
144 | 
145 |         start_index = batch_index * self.batch_size
146 |         end_index = start_index + self.batch_size
147 |         
148 |         data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32)
149 |         answers = []
150 |         
151 |         for i in range(start_index, end_index):
152 |             answers.append(int(data_raw[i].split(',')[1]))
153 |             name = data_raw[i].split(',')[0]
154 |             path = self.png_folder + name + ".png"
155 |             im = Image.open(path)
156 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0
157 | 
158 |         answers = np.array(answers, dtype=np.int32)
159 |         return data, answers
160 |     
161 |                 
162 | 


--------------------------------------------------------------------------------
/theano/networks/tc_net_mod_5khz_small.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         
 31 |         self.input_var = T.tensor4('input_var')
 32 |         self.answer_var = T.ivector('answer_var')
 33 |         
 34 |         print "==> building network"
 35 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 36 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 37 |        
 38 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 39 |         print layers.get_output(network).eval({self.input_var:example}).shape
 40 |         
 41 |         # CONV-RELU-POOL 1
 42 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 43 |                                      stride=1, nonlinearity=rectify)
 44 |         print layers.get_output(network).eval({self.input_var:example}).shape
 45 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 46 |         print layers.get_output(network).eval({self.input_var:example}).shape
 47 |         if (self.batch_norm):
 48 |             network = layers.BatchNormLayer(incoming=network)
 49 |         
 50 |         # CONV-RELU-POOL 2
 51 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 52 |                                      stride=1, nonlinearity=rectify)
 53 |         print layers.get_output(network).eval({self.input_var:example}).shape
 54 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 55 |         print layers.get_output(network).eval({self.input_var:example}).shape
 56 |         if (self.batch_norm):
 57 |             network = layers.BatchNormLayer(incoming=network)
 58 | 
 59 |         
 60 |         # CONV-RELU-POOL 3
 61 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 62 |                                      stride=1, nonlinearity=rectify)
 63 |         print layers.get_output(network).eval({self.input_var:example}).shape
 64 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 65 |         print layers.get_output(network).eval({self.input_var:example}).shape
 66 |         if (self.batch_norm):
 67 |             network = layers.BatchNormLayer(incoming=network)
 68 |         
 69 |         # CONV-RELU-POOL 4
 70 |         network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), 
 71 |                                      stride=1, nonlinearity=rectify)
 72 |         print layers.get_output(network).eval({self.input_var:example}).shape
 73 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 74 |         print layers.get_output(network).eval({self.input_var:example}).shape
 75 |         if (self.batch_norm):
 76 |             network = layers.BatchNormLayer(incoming=network)
 77 |         
 78 |         # CONV-RELU-POOL 5
 79 |         network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), 
 80 |                                      stride=1, nonlinearity=rectify)
 81 |         print layers.get_output(network).eval({self.input_var:example}).shape
 82 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 83 |         print layers.get_output(network).eval({self.input_var:example}).shape
 84 |         if (self.batch_norm):
 85 |             network = layers.BatchNormLayer(incoming=network)
 86 | 
 87 |         
 88 |         # DENSE 1
 89 |         network = layers.DenseLayer(incoming=network, num_units=256, nonlinearity=rectify)
 90 |         if (self.batch_norm):
 91 |             network = layers.BatchNormLayer(incoming=network)
 92 |         if (self.dropout > 0):
 93 |             network = layers.dropout(network, self.dropout)
 94 |         print layers.get_output(network).eval({self.input_var:example}).shape
 95 | 
 96 |                 
 97 |         # Last layer: classification
 98 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
 99 |         print layers.get_output(network).eval({self.input_var:example}).shape
100 |         
101 | 
102 |         self.params = layers.get_all_params(network, trainable=True)
103 |         self.prediction = layers.get_output(network)
104 |         self.test_prediction = layers.get_output(network, deterministic=True)
105 |         
106 |         print "==> param shapes", [x.eval().shape for x in self.params]
107 |         
108 |         def get_loss(prediction):
109 |             loss_ce = lasagne.objectives.categorical_crossentropy(prediction, self.answer_var).mean()
110 |             if (self.l2 > 0):
111 |                 loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, 
112 |                                                                         lasagne.regularization.l2)
113 |             else:
114 |                 loss_l2 = 0
115 |             return loss_ce + loss_l2
116 |     
117 |         self.loss = get_loss(self.prediction)
118 |         self.test_loss = get_loss(self.test_prediction)
119 |         
120 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
121 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
122 |         
123 |         if self.mode == 'train':
124 |             print "==> compiling train_fn"
125 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
126 |                                             outputs=[self.prediction, self.loss],
127 |                                             updates=updates)
128 |         
129 |         print "==> compiling test_fn"
130 |         # deterministic version
131 |         #self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
132 |         #                               outputs=[self.test_prediction, self.test_loss])
133 |     
134 |         # non deterministic version, as train_fn
135 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
136 |                                        outputs=[self.prediction, self.loss])
137 |     
138 |     
139 |     def say_name(self):
140 |         return "tc_net_mod_5khz_small"
141 |     
142 | 
143 |     def read_batch(self, data_raw, batch_index):
144 | 
145 |         start_index = batch_index * self.batch_size
146 |         end_index = start_index + self.batch_size
147 |         
148 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
149 |         answers = []
150 |         
151 |         for i in range(start_index, end_index):
152 |             answers.append(int(data_raw[i].split(',')[1]))
153 |             name = data_raw[i].split(',')[0]
154 |             path = self.png_folder + name + ".png"
155 |             im = Image.open(path)
156 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
157 | 
158 |         answers = np.array(answers, dtype=np.int32)
159 |         return data, answers
160 |     
161 |     


--------------------------------------------------------------------------------
/theano/networks/tc_net_rnn.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         self.num_units = rnn_num_units
 31 |         
 32 |         self.input_var = T.tensor4('input_var')
 33 |         self.answer_var = T.ivector('answer_var')
 34 |         
 35 |         print "==> building network"
 36 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 37 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 38 |        
 39 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 40 |         print layers.get_output(network).eval({self.input_var:example}).shape
 41 |         
 42 |         # CONV-RELU-POOL 1
 43 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 44 |                                      stride=1, nonlinearity=rectify)
 45 |         print layers.get_output(network).eval({self.input_var:example}).shape
 46 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         if (self.batch_norm):
 49 |             network = layers.BatchNormLayer(incoming=network)
 50 |         
 51 |         # CONV-RELU-POOL 2
 52 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 53 |                                      stride=1, nonlinearity=rectify)
 54 |         print layers.get_output(network).eval({self.input_var:example}).shape
 55 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         if (self.batch_norm):
 58 |             network = layers.BatchNormLayer(incoming=network)
 59 | 
 60 |         
 61 |         # CONV-RELU-POOL 3
 62 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 63 |                                      stride=1, nonlinearity=rectify)
 64 |         print layers.get_output(network).eval({self.input_var:example}).shape
 65 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 |         if (self.batch_norm):
 68 |             network = layers.BatchNormLayer(incoming=network)
 69 |         
 70 |         
 71 |         self.params = layers.get_all_params(network, trainable=True)
 72 |         
 73 |         output = layers.get_output(network)
 74 |         num_channels  = 32 
 75 |         filter_W = 104
 76 |         filter_H = 13
 77 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 78 |         # explicit values are better for optimizations
 79 |         
 80 |         channels = []
 81 |         for channel_index in range(num_channels):
 82 |             channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
 83 |         
 84 |         rnn_network_outputs = []
 85 |         for channel_index in range(num_channels):
 86 |             rnn_input_var = channels[channel_index]
 87 |             
 88 |             # InputLayer       
 89 |             network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
 90 | 
 91 |             # GRULayer
 92 |             network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
 93 |             
 94 |             # BatchNormalization Layer
 95 |             if (self.batch_norm):
 96 |                 network = layers.BatchNormLayer(incoming=network)
 97 |               
 98 |             # add params 
 99 |             self.params += layers.get_all_params(network, trainable=True)
100 |             
101 |             rnn_network_outputs.append(layers.get_output(network))
102 |         
103 |         all_output_var = T.concatenate(rnn_network_outputs, axis=1)
104 |         print all_output_var.eval({self.input_var:example}).shape
105 |         
106 |         # InputLayer
107 |         network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
108 |         
109 |         # DENSE 1
110 |         network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify)
111 |         if (self.batch_norm):
112 |             network = layers.BatchNormLayer(incoming=network)
113 |         if (self.dropout > 0):
114 |             network = layers.dropout(network, self.dropout)
115 |         print layers.get_output(network).eval({self.input_var:example}).shape
116 |         
117 |         
118 |         # Last layer: classification
119 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
120 |         print layers.get_output(network).eval({self.input_var:example}).shape
121 |         
122 |     
123 |         self.params += layers.get_all_params(network, trainable=True)
124 |         self.prediction = layers.get_output(network)
125 |     
126 |         #print "==> param shapes", [x.eval().shape for x in self.params]
127 |         
128 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
129 |         if (self.l2 > 0):
130 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
131 |                                                                           lasagne.regularization.l2)
132 |         else:
133 |             self.loss_l2 = 0
134 |         self.loss = self.loss_ce + self.loss_l2
135 |         
136 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
137 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
138 |         
139 |         if self.mode == 'train':
140 |             print "==> compiling train_fn"
141 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
142 |                                             outputs=[self.prediction, self.loss],
143 |                                             updates=updates)
144 |         
145 |         print "==> compiling test_fn"
146 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
147 |                                        outputs=[self.prediction, self.loss])
148 |     
149 |     
150 |     def say_name(self):
151 |         return "tc_net_rnn.GRU.3conv.num_units%d.5khz" % self.num_units
152 |     
153 |     
154 |     def read_batch(self, data_raw, batch_index):
155 | 
156 |         start_index = batch_index * self.batch_size
157 |         end_index = start_index + self.batch_size
158 |         
159 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
160 |         answers = []
161 |         
162 |         for i in range(start_index, end_index):
163 |             answers.append(int(data_raw[i].split(',')[1]))
164 |             name = data_raw[i].split(',')[0]
165 |             path = self.png_folder + name + ".png"
166 |             im = Image.open(path)
167 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
168 | 
169 |         answers = np.array(answers, dtype=np.int32)
170 |         return data, answers
171 |     
172 |     
173 | 


--------------------------------------------------------------------------------
/theano/networks/tc_net_rnn_nodense.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         self.num_units = rnn_num_units
 31 |         
 32 |         self.input_var = T.tensor4('input_var')
 33 |         self.answer_var = T.ivector('answer_var')
 34 |         
 35 |         print "==> building network"
 36 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 37 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 38 |        
 39 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 40 |         print layers.get_output(network).eval({self.input_var:example}).shape
 41 |         
 42 |         # CONV-RELU-POOL 1
 43 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 44 |                                      stride=1, nonlinearity=rectify)
 45 |         print layers.get_output(network).eval({self.input_var:example}).shape
 46 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         if (self.batch_norm):
 49 |             network = layers.BatchNormLayer(incoming=network)
 50 |         
 51 |         # CONV-RELU-POOL 2
 52 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 53 |                                      stride=1, nonlinearity=rectify)
 54 |         print layers.get_output(network).eval({self.input_var:example}).shape
 55 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         if (self.batch_norm):
 58 |             network = layers.BatchNormLayer(incoming=network)
 59 | 
 60 |         
 61 |         # CONV-RELU-POOL 3
 62 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 63 |                                      stride=1, nonlinearity=rectify)
 64 |         print layers.get_output(network).eval({self.input_var:example}).shape
 65 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 |         if (self.batch_norm):
 68 |             network = layers.BatchNormLayer(incoming=network)
 69 |         
 70 |         self.params = layers.get_all_params(network, trainable=True)
 71 |         
 72 |         output = layers.get_output(network)
 73 |         num_channels  = 32 
 74 |         filter_W = 104
 75 |         filter_H = 13
 76 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 77 |         # explicit values are better for optimizations
 78 |         
 79 |         channels = []
 80 |         for channel_index in range(num_channels):
 81 |             channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
 82 |         
 83 |         rnn_network_outputs = []
 84 |         for channel_index in range(num_channels):
 85 |             rnn_input_var = channels[channel_index]
 86 |             
 87 |             # InputLayer       
 88 |             network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
 89 | 
 90 |             # GRULayer
 91 |             network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
 92 |             
 93 |             # BatchNormalization Layer
 94 |             if (self.batch_norm):
 95 |                 network = layers.BatchNormLayer(incoming=network)
 96 |               
 97 |             # add params 
 98 |             self.params += layers.get_all_params(network, trainable=True)
 99 |             
100 |             rnn_network_outputs.append(layers.get_output(network))
101 |         
102 |         all_output_var = T.concatenate(rnn_network_outputs, axis=1)
103 |         print all_output_var.eval({self.input_var:example}).shape
104 |         
105 |         # InputLayer
106 |         network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
107 |         
108 |         """
109 |         # DENSE 1
110 |         network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify)
111 |         if (self.batch_norm):
112 |             network = layers.BatchNormLayer(incoming=network)
113 |         if (self.dropout > 0):
114 |             network = layers.dropout(network, self.dropout)
115 |         print layers.get_output(network).eval({self.input_var:example}).shape
116 |         """
117 |         
118 |         # Last layer: classification
119 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
120 |         print layers.get_output(network).eval({self.input_var:example}).shape
121 |         
122 |     
123 |         self.params += layers.get_all_params(network, trainable=True)
124 |         self.prediction = layers.get_output(network)
125 |     
126 |         #print "==> param shapes", [x.eval().shape for x in self.params]
127 |         
128 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
129 |         if (self.l2 > 0):
130 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
131 |                                                                           lasagne.regularization.l2)
132 |         else:
133 |             self.loss_l2 = 0
134 |         self.loss = self.loss_ce + self.loss_l2
135 |         
136 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
137 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
138 |         
139 |         if self.mode == 'train':
140 |             print "==> compiling train_fn"
141 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
142 |                                             outputs=[self.prediction, self.loss],
143 |                                             updates=updates)
144 |         
145 |         print "==> compiling test_fn"
146 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
147 |                                        outputs=[self.prediction, self.loss])
148 |     
149 |     
150 |     def say_name(self):
151 |         return "tc_net_rnn.3conv.GRU.num_units%d.nodense.5khz" % self.num_units
152 |     
153 |     
154 |     def read_batch(self, data_raw, batch_index):
155 | 
156 |         start_index = batch_index * self.batch_size
157 |         end_index = start_index + self.batch_size
158 |         
159 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
160 |         answers = []
161 |         
162 |         for i in range(start_index, end_index):
163 |             answers.append(int(data_raw[i].split(',')[1]))
164 |             name = data_raw[i].split(',')[0]
165 |             path = self.png_folder + name + ".png"
166 |             im = Image.open(path)
167 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
168 | 
169 |         answers = np.array(answers, dtype=np.int32)
170 |         return data, answers
171 |     


--------------------------------------------------------------------------------
/theano/networks/tc_net_rnn_onernn.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         self.num_units = rnn_num_units
 31 |         
 32 |         self.input_var = T.tensor4('input_var')
 33 |         self.answer_var = T.ivector('answer_var')
 34 |         
 35 |         print "==> building network"
 36 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 37 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 38 |        
 39 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 40 |         print layers.get_output(network).eval({self.input_var:example}).shape
 41 |         
 42 |         # CONV-RELU-POOL 1
 43 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 44 |                                      stride=1, nonlinearity=rectify)
 45 |         print layers.get_output(network).eval({self.input_var:example}).shape
 46 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         if (self.batch_norm):
 49 |             network = layers.BatchNormLayer(incoming=network)
 50 |         
 51 |         # CONV-RELU-POOL 2
 52 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 53 |                                      stride=1, nonlinearity=rectify)
 54 |         print layers.get_output(network).eval({self.input_var:example}).shape
 55 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         if (self.batch_norm):
 58 |             network = layers.BatchNormLayer(incoming=network)
 59 | 
 60 |         
 61 |         # CONV-RELU-POOL 3
 62 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 63 |                                      stride=1, nonlinearity=rectify)
 64 |         print layers.get_output(network).eval({self.input_var:example}).shape
 65 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 |         if (self.batch_norm):
 68 |             network = layers.BatchNormLayer(incoming=network)
 69 |         
 70 |         # CONV-RELU-POOL 4
 71 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 72 |                                      stride=1, nonlinearity=rectify)
 73 |         print layers.get_output(network).eval({self.input_var:example}).shape
 74 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 75 |         print layers.get_output(network).eval({self.input_var:example}).shape
 76 |         if (self.batch_norm):
 77 |             network = layers.BatchNormLayer(incoming=network)
 78 |         
 79 |         self.params = layers.get_all_params(network, trainable=True)
 80 |         
 81 |         output = layers.get_output(network)
 82 |         output = output.transpose((0, 3, 1, 2))
 83 |         output = output.flatten(ndim=3)
 84 |         
 85 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 86 |         # explicit values are better for optimizations
 87 |         num_channels = 32 
 88 |         filter_W = 54
 89 |         filter_H = 8
 90 |         
 91 |             
 92 |         # InputLayer       
 93 |         network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output)
 94 |         print layers.get_output(network).eval({self.input_var:example}).shape
 95 |         
 96 |         # GRULayer
 97 |         network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
 98 |         print layers.get_output(network).eval({self.input_var:example}).shape
 99 |         if (self.batch_norm):
100 |             network = layers.BatchNormLayer(incoming=network)
101 |         if (self.dropout > 0):
102 |             network = layers.dropout(network, self.dropout)
103 |         
104 |         # Last layer: classification
105 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
106 |         print layers.get_output(network).eval({self.input_var:example}).shape
107 |         
108 |         
109 |         self.params += layers.get_all_params(network, trainable=True)
110 |         self.prediction = layers.get_output(network)
111 |     
112 |         #print "==> param shapes", [x.eval().shape for x in self.params]
113 |         
114 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
115 |         if (self.l2 > 0):
116 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
117 |                                                                           lasagne.regularization.l2)
118 |         else:
119 |             self.loss_l2 = 0
120 |         self.loss = self.loss_ce + self.loss_l2
121 |         
122 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
123 |         #updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one
124 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0003)
125 |                 
126 |         if self.mode == 'train':
127 |             print "==> compiling train_fn"
128 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
129 |                                             outputs=[self.prediction, self.loss],
130 |                                             updates=updates)
131 |         
132 |         print "==> compiling test_fn"
133 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
134 |                                        outputs=[self.prediction, self.loss])
135 |     
136 |     
137 |     def say_name(self):
138 |         return "tc_net_rnn.4conv.pad.GRU.onernn.num_units%d.5khz" % self.num_units
139 |     
140 |     
141 |     def read_batch(self, data_raw, batch_index):
142 | 
143 |         start_index = batch_index * self.batch_size
144 |         end_index = start_index + self.batch_size
145 |         
146 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
147 |         answers = []
148 |         
149 |         for i in range(start_index, end_index):
150 |             answers.append(int(data_raw[i].split(',')[1]))
151 |             name = data_raw[i].split(',')[0]
152 |             path = self.png_folder + name + ".png"
153 |             im = Image.open(path)
154 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
155 | 
156 |         answers = np.array(answers, dtype=np.int32)
157 |         return data, answers
158 |     
159 |     


--------------------------------------------------------------------------------
/theano/networks/tc_net_rnn_onernn_notimepool.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         self.num_units = rnn_num_units
 31 |         
 32 |         self.input_var = T.tensor4('input_var')
 33 |         self.answer_var = T.ivector('answer_var')
 34 |         
 35 |         print "==> building network"
 36 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 37 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 38 |        
 39 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 40 |         print layers.get_output(network).eval({self.input_var:example}).shape
 41 |         
 42 |         # CONV-RELU-POOL 1
 43 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 44 |                                      stride=1, nonlinearity=rectify)
 45 |         print layers.get_output(network).eval({self.input_var:example}).shape
 46 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         if (self.batch_norm):
 49 |             network = layers.BatchNormLayer(incoming=network)
 50 |         
 51 |         # CONV-RELU-POOL 2
 52 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 53 |                                      stride=1, nonlinearity=rectify)
 54 |         print layers.get_output(network).eval({self.input_var:example}).shape
 55 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         if (self.batch_norm):
 58 |             network = layers.BatchNormLayer(incoming=network)
 59 | 
 60 |         
 61 |         # CONV-RELU-POOL 3
 62 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 63 |                                      stride=1, nonlinearity=rectify)
 64 |         print layers.get_output(network).eval({self.input_var:example}).shape
 65 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 |         if (self.batch_norm):
 68 |             network = layers.BatchNormLayer(incoming=network)
 69 |         
 70 |         # CONV-RELU-POOL 4
 71 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 72 |                                      stride=1, nonlinearity=rectify)
 73 |         print layers.get_output(network).eval({self.input_var:example}).shape
 74 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
 75 |         print layers.get_output(network).eval({self.input_var:example}).shape
 76 |         if (self.batch_norm):
 77 |             network = layers.BatchNormLayer(incoming=network)
 78 |         
 79 |         self.params = layers.get_all_params(network, trainable=True)
 80 |         
 81 |         output = layers.get_output(network)
 82 |         output = output.transpose((0, 3, 1, 2))
 83 |         output = output.flatten(ndim=3)
 84 |         
 85 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 86 |         # explicit values are better for optimizations
 87 |         num_channels = 32 
 88 |         filter_W = 852
 89 |         filter_H = 8
 90 |         
 91 |             
 92 |         # InputLayer       
 93 |         network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output)
 94 |         print layers.get_output(network).eval({self.input_var:example}).shape
 95 |         
 96 |         # GRULayer
 97 |         network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
 98 |         print layers.get_output(network).eval({self.input_var:example}).shape
 99 |         if (self.batch_norm):
100 |             network = layers.BatchNormLayer(incoming=network)
101 |         if (self.dropout > 0):
102 |             network = layers.dropout(network, self.dropout)
103 |         
104 |         # Last layer: classification
105 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
106 |         print layers.get_output(network).eval({self.input_var:example}).shape
107 |         
108 |         
109 |         self.params += layers.get_all_params(network, trainable=True)
110 |         self.prediction = layers.get_output(network)
111 |     
112 |         #print "==> param shapes", [x.eval().shape for x in self.params]
113 |         
114 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
115 |         if (self.l2 > 0):
116 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
117 |                                                                           lasagne.regularization.l2)
118 |         else:
119 |             self.loss_l2 = 0
120 |         self.loss = self.loss_ce + self.loss_l2
121 |         
122 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
123 |         #updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one
124 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.001)
125 |                 
126 |         if self.mode == 'train':
127 |             print "==> compiling train_fn"
128 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
129 |                                             outputs=[self.prediction, self.loss],
130 |                                             updates=updates)
131 |         
132 |         print "==> compiling test_fn"
133 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
134 |                                        outputs=[self.prediction, self.loss])
135 |     
136 |     
137 |     
138 |     def say_name(self):
139 |         return "tc_net_rnn.4conv.pad.GRU.onernn.notimepool.num_units%d.5khz" % self.num_units
140 | 
141 | 
142 |     def read_batch(self, data_raw, batch_index):
143 | 
144 |         start_index = batch_index * self.batch_size
145 |         end_index = start_index + self.batch_size
146 |         
147 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
148 |         answers = []
149 |         
150 |         for i in range(start_index, end_index):
151 |             answers.append(int(data_raw[i].split(',')[1]))
152 |             name = data_raw[i].split(',')[0]
153 |             path = self.png_folder + name + ".png"
154 |             im = Image.open(path)
155 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
156 | 
157 |         answers = np.array(answers, dtype=np.int32)
158 |         return data, answers
159 |     
160 |                 
161 | 


--------------------------------------------------------------------------------
/theano/networks/tc_net_rnn_shared.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         self.num_units = rnn_num_units
 31 |         
 32 |         self.input_var = T.tensor4('input_var')
 33 |         self.answer_var = T.ivector('answer_var')
 34 |         
 35 |         print "==> building network"
 36 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 37 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 38 |        
 39 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 40 |         print layers.get_output(network).eval({self.input_var:example}).shape
 41 |         
 42 |         # CONV-RELU-POOL 1
 43 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 44 |                                      stride=1, nonlinearity=rectify)
 45 |         print layers.get_output(network).eval({self.input_var:example}).shape
 46 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         if (self.batch_norm):
 49 |             network = layers.BatchNormLayer(incoming=network)
 50 |         
 51 |         # CONV-RELU-POOL 2
 52 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 53 |                                      stride=1, nonlinearity=rectify)
 54 |         print layers.get_output(network).eval({self.input_var:example}).shape
 55 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         if (self.batch_norm):
 58 |             network = layers.BatchNormLayer(incoming=network)
 59 | 
 60 |         
 61 |         # CONV-RELU-POOL 3
 62 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 63 |                                      stride=1, nonlinearity=rectify)
 64 |         print layers.get_output(network).eval({self.input_var:example}).shape
 65 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 |         if (self.batch_norm):
 68 |             network = layers.BatchNormLayer(incoming=network)
 69 |         
 70 |         self.params = layers.get_all_params(network, trainable=True)
 71 |         
 72 |         output = layers.get_output(network)
 73 |         num_channels  = 32 
 74 |         filter_W = 104
 75 |         filter_H = 13
 76 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 77 |         # explicit values are better for optimizations
 78 |         
 79 |         channels = []
 80 |         for channel_index in range(num_channels):
 81 |             channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
 82 |         
 83 |         rnn_network_outputs = []
 84 |         W_in_to_updategate = None
 85 |         W_hid_to_updategate = None
 86 |         b_updategate = None
 87 |         W_in_to_resetgate = None
 88 |         W_hid_to_resetgate = None
 89 |         b_resetgate = None
 90 |         W_in_to_hidden_update = None
 91 |         W_hid_to_hidden_update = None
 92 |         b_hidden_update = None
 93 |         
 94 |         for channel_index in range(num_channels):
 95 |             rnn_input_var = channels[channel_index]
 96 |             
 97 |             # InputLayer       
 98 |             network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
 99 | 
100 |             if (channel_index == 0):
101 |                 # GRULayer
102 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
103 |                 W_in_to_updategate = network.W_in_to_updategate
104 |                 W_hid_to_updategate = network.W_hid_to_updategate
105 |                 b_updategate = network.b_updategate
106 |                 W_in_to_resetgate = network.W_in_to_resetgate
107 |                 W_hid_to_resetgate = network.W_hid_to_resetgate
108 |                 b_resetgate = network.b_resetgate
109 |                 W_in_to_hidden_update = network.W_in_to_hidden_update
110 |                 W_hid_to_hidden_update = network.W_hid_to_hidden_update
111 |                 b_hidden_update = network.b_hidden_update
112 |                 
113 |                 # add params 
114 |                 self.params += layers.get_all_params(network, trainable=True)
115 | 
116 |             else:
117 |                 # GRULayer, but shared
118 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
119 |                             resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
120 |                             updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
121 |                             hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
122 |                             
123 |                 
124 |             
125 |             rnn_network_outputs.append(layers.get_output(network))
126 |         
127 |         all_output_var = T.concatenate(rnn_network_outputs, axis=1)
128 |         print all_output_var.eval({self.input_var:example}).shape
129 |         
130 |         # InputLayer
131 |         network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
132 |         
133 |         # BatchNormalization Layer
134 |         if (self.batch_norm):
135 |             network = layers.BatchNormLayer(incoming=network)
136 |         
137 |         # Last layer: classification
138 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
139 |         print layers.get_output(network).eval({self.input_var:example}).shape
140 |         
141 |     
142 |         self.params += layers.get_all_params(network, trainable=True)
143 |         self.prediction = layers.get_output(network)
144 |     
145 |         #print "==> param shapes", [x.eval().shape for x in self.params]
146 |         
147 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
148 |         if (self.l2 > 0):
149 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
150 |                                                                           lasagne.regularization.l2)
151 |         else:
152 |             self.loss_l2 = 0
153 |         self.loss = self.loss_ce + self.loss_l2
154 |         
155 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
156 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
157 |         
158 |         if self.mode == 'train':
159 |             print "==> compiling train_fn"
160 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
161 |                                             outputs=[self.prediction, self.loss],
162 |                                             updates=updates)
163 |         
164 |         print "==> compiling test_fn"
165 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
166 |                                        outputs=[self.prediction, self.loss])
167 |     
168 |     
169 |     def say_name(self):
170 |         return "tc_net_rnn.3conv.GRU.shared.num_units%d.5khz" % self.num_units
171 |     
172 | 
173 |     def read_batch(self, data_raw, batch_index):
174 | 
175 |         start_index = batch_index * self.batch_size
176 |         end_index = start_index + self.batch_size
177 |         
178 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
179 |         answers = []
180 |         
181 |         for i in range(start_index, end_index):
182 |             answers.append(int(data_raw[i].split(',')[1]))
183 |             name = data_raw[i].split(',')[0]
184 |             path = self.png_folder + name + ".png"
185 |             im = Image.open(path)
186 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
187 | 
188 |         answers = np.array(answers, dtype=np.int32)
189 |         return data, answers
190 |     


--------------------------------------------------------------------------------
/theano/networks/tc_net_rnn_shared_pad.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         self.num_units = rnn_num_units
 31 |         
 32 |         self.input_var = T.tensor4('input_var')
 33 |         self.answer_var = T.ivector('answer_var')
 34 |         
 35 |         print "==> building network"
 36 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
 37 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 38 |        
 39 |         network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
 40 |         print layers.get_output(network).eval({self.input_var:example}).shape
 41 |         
 42 |         # CONV-RELU-POOL 1
 43 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 44 |                                      stride=1, nonlinearity=rectify)
 45 |         print layers.get_output(network).eval({self.input_var:example}).shape
 46 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         if (self.batch_norm):
 49 |             network = layers.BatchNormLayer(incoming=network)
 50 |         
 51 |         # CONV-RELU-POOL 2
 52 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 53 |                                      stride=1, nonlinearity=rectify)
 54 |         print layers.get_output(network).eval({self.input_var:example}).shape
 55 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         if (self.batch_norm):
 58 |             network = layers.BatchNormLayer(incoming=network)
 59 | 
 60 |         
 61 |         # CONV-RELU-POOL 3
 62 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 63 |                                      stride=1, nonlinearity=rectify)
 64 |         print layers.get_output(network).eval({self.input_var:example}).shape
 65 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 |         if (self.batch_norm):
 68 |             network = layers.BatchNormLayer(incoming=network)
 69 |         
 70 |         # CONV-RELU-POOL 4
 71 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 72 |                                      stride=1, nonlinearity=rectify)
 73 |         print layers.get_output(network).eval({self.input_var:example}).shape
 74 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 75 |         print layers.get_output(network).eval({self.input_var:example}).shape
 76 |         if (self.batch_norm):
 77 |             network = layers.BatchNormLayer(incoming=network)
 78 |                 
 79 |         self.params = layers.get_all_params(network, trainable=True)
 80 |         
 81 |         output = layers.get_output(network)
 82 |         num_channels  = 32 
 83 |         filter_W = 54
 84 |         filter_H = 8
 85 |         
 86 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 87 |         # explicit values are better for optimizations
 88 |         
 89 |         channels = []
 90 |         for channel_index in range(num_channels):
 91 |             channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
 92 |         
 93 |         rnn_network_outputs = []
 94 |         W_in_to_updategate = None
 95 |         W_hid_to_updategate = None
 96 |         b_updategate = None
 97 |         W_in_to_resetgate = None
 98 |         W_hid_to_resetgate = None
 99 |         b_resetgate = None
100 |         W_in_to_hidden_update = None
101 |         W_hid_to_hidden_update = None
102 |         b_hidden_update = None
103 |         
104 |         for channel_index in range(num_channels):
105 |             rnn_input_var = channels[channel_index]
106 |             
107 |             # InputLayer       
108 |             network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
109 | 
110 |             if (channel_index == 0):
111 |                 # GRULayer
112 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
113 |                 W_in_to_updategate = network.W_in_to_updategate
114 |                 W_hid_to_updategate = network.W_hid_to_updategate
115 |                 b_updategate = network.b_updategate
116 |                 W_in_to_resetgate = network.W_in_to_resetgate
117 |                 W_hid_to_resetgate = network.W_hid_to_resetgate
118 |                 b_resetgate = network.b_resetgate
119 |                 W_in_to_hidden_update = network.W_in_to_hidden_update
120 |                 W_hid_to_hidden_update = network.W_hid_to_hidden_update
121 |                 b_hidden_update = network.b_hidden_update
122 |                 
123 |                 # add params 
124 |                 self.params += layers.get_all_params(network, trainable=True)
125 | 
126 |             else:
127 |                 # GRULayer, but shared
128 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
129 |                             resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
130 |                             updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
131 |                             hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
132 |                             
133 |                 
134 |             
135 |             rnn_network_outputs.append(layers.get_output(network))
136 |         
137 |         all_output_var = T.concatenate(rnn_network_outputs, axis=1)
138 |         print all_output_var.eval({self.input_var:example}).shape
139 |         
140 |         # InputLayer
141 |         network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
142 |         
143 |         # Dropout Layer
144 |         if (self.dropout > 0):
145 |             network = layers.dropout(network, self.dropout)
146 |         
147 |         # BatchNormalization Layer
148 |         if (self.batch_norm):
149 |             network = layers.BatchNormLayer(incoming=network)
150 |         
151 |         # Last layer: classification
152 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
153 |         print layers.get_output(network).eval({self.input_var:example}).shape
154 |         
155 |     
156 |         self.params += layers.get_all_params(network, trainable=True)
157 |         self.prediction = layers.get_output(network)
158 |     
159 |         #print "==> param shapes", [x.eval().shape for x in self.params]
160 |         
161 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
162 |         if (self.l2 > 0):
163 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
164 |                                                                           lasagne.regularization.l2)
165 |         else:
166 |             self.loss_l2 = 0
167 |         self.loss = self.loss_ce + self.loss_l2
168 |         
169 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
170 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
171 |         
172 |         if self.mode == 'train':
173 |             print "==> compiling train_fn"
174 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
175 |                                             outputs=[self.prediction, self.loss],
176 |                                             updates=updates)
177 |         
178 |         print "==> compiling test_fn"
179 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
180 |                                        outputs=[self.prediction, self.loss])
181 |     
182 |     
183 |     def say_name(self):
184 |         return "tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz" % self.num_units
185 |     
186 |     
187 |     def read_batch(self, data_raw, batch_index):
188 | 
189 |         start_index = batch_index * self.batch_size
190 |         end_index = start_index + self.batch_size
191 |         
192 |         data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
193 |         answers = []
194 |         
195 |         for i in range(start_index, end_index):
196 |             answers.append(int(data_raw[i].split(',')[1]))
197 |             name = data_raw[i].split(',')[0]
198 |             path = self.png_folder + name + ".png"
199 |             im = Image.open(path)
200 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
201 | 
202 |         answers = np.array(answers, dtype=np.int32)
203 |         return data, answers
204 |     
205 | 


--------------------------------------------------------------------------------
/theano/networks/tc_net_rnn_shared_pad_augm.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | import lasagne
  8 | from lasagne import layers
  9 | from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
 10 | 
 11 | import PIL.Image as Image
 12 | from base_network import BaseNetwork
 13 | 
 14 | floatX = theano.config.floatX
 15 | 
 16 | 
 17 | class Network(BaseNetwork):
 18 |     
 19 |     def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
 20 |         
 21 |         print "==> not used params in DMN class:", kwargs.keys()
 22 |         self.train_list_raw = train_list_raw
 23 |         self.test_list_raw = test_list_raw
 24 |         self.png_folder = png_folder
 25 |         self.batch_size = batch_size
 26 |         self.dropout = dropout
 27 |         self.l2 = l2
 28 |         self.mode = mode
 29 |         self.batch_norm = batch_norm
 30 |         self.num_units = rnn_num_units
 31 |         
 32 |         self.input_var = T.tensor4('input_var')
 33 |         self.answer_var = T.ivector('answer_var')
 34 |         
 35 |         print "==> building network"
 36 |         example = np.random.uniform(size=(self.batch_size, 1, 128, 768), low=0.0, high=1.0).astype(np.float32) #########
 37 |         answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
 38 |        
 39 |         network = layers.InputLayer(shape=(None, 1, 128, 768), input_var=self.input_var)
 40 |         print layers.get_output(network).eval({self.input_var:example}).shape
 41 |         
 42 |         # CONV-RELU-POOL 1
 43 |         network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), 
 44 |                                      stride=1, nonlinearity=rectify)
 45 |         print layers.get_output(network).eval({self.input_var:example}).shape
 46 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 47 |         print layers.get_output(network).eval({self.input_var:example}).shape
 48 |         if (self.batch_norm):
 49 |             network = layers.BatchNormLayer(incoming=network)
 50 |         
 51 |         # CONV-RELU-POOL 2
 52 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), 
 53 |                                      stride=1, nonlinearity=rectify)
 54 |         print layers.get_output(network).eval({self.input_var:example}).shape
 55 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 56 |         print layers.get_output(network).eval({self.input_var:example}).shape
 57 |         if (self.batch_norm):
 58 |             network = layers.BatchNormLayer(incoming=network)
 59 | 
 60 |         
 61 |         # CONV-RELU-POOL 3
 62 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 63 |                                      stride=1, nonlinearity=rectify)
 64 |         print layers.get_output(network).eval({self.input_var:example}).shape
 65 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 66 |         print layers.get_output(network).eval({self.input_var:example}).shape
 67 |         if (self.batch_norm):
 68 |             network = layers.BatchNormLayer(incoming=network)
 69 |         
 70 |         # CONV-RELU-POOL 4
 71 |         network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), 
 72 |                                      stride=1, nonlinearity=rectify)
 73 |         print layers.get_output(network).eval({self.input_var:example}).shape
 74 |         network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
 75 |         print layers.get_output(network).eval({self.input_var:example}).shape
 76 |         if (self.batch_norm):
 77 |             network = layers.BatchNormLayer(incoming=network)
 78 |         
 79 |         self.params = layers.get_all_params(network, trainable=True)
 80 |         
 81 |         output = layers.get_output(network)
 82 |         num_channels  = 32 
 83 |         filter_W = 48
 84 |         filter_H = 8
 85 |         
 86 |         # NOTE: these constants are shapes of last pool layer, it can be symbolic 
 87 |         # explicit values are better for optimizations
 88 |         
 89 |         channels = []
 90 |         for channel_index in range(num_channels):
 91 |             channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
 92 |         
 93 |         rnn_network_outputs = []
 94 |         W_in_to_updategate = None
 95 |         W_hid_to_updategate = None
 96 |         b_updategate = None
 97 |         W_in_to_resetgate = None
 98 |         W_hid_to_resetgate = None
 99 |         b_resetgate = None
100 |         W_in_to_hidden_update = None
101 |         W_hid_to_hidden_update = None
102 |         b_hidden_update = None
103 |         
104 |         for channel_index in range(num_channels):
105 |             rnn_input_var = channels[channel_index]
106 |             
107 |             # InputLayer       
108 |             network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
109 | 
110 |             if (channel_index == 0):
111 |                 # GRULayer
112 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
113 |                 W_in_to_updategate = network.W_in_to_updategate
114 |                 W_hid_to_updategate = network.W_hid_to_updategate
115 |                 b_updategate = network.b_updategate
116 |                 W_in_to_resetgate = network.W_in_to_resetgate
117 |                 W_hid_to_resetgate = network.W_hid_to_resetgate
118 |                 b_resetgate = network.b_resetgate
119 |                 W_in_to_hidden_update = network.W_in_to_hidden_update
120 |                 W_hid_to_hidden_update = network.W_hid_to_hidden_update
121 |                 b_hidden_update = network.b_hidden_update
122 |                 
123 |                 # add params 
124 |                 self.params += layers.get_all_params(network, trainable=True)
125 | 
126 |             else:
127 |                 # GRULayer, but shared
128 |                 network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
129 |                             resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
130 |                             updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
131 |                             hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
132 |                             
133 |                 
134 |             
135 |             rnn_network_outputs.append(layers.get_output(network))
136 |         
137 |         all_output_var = T.concatenate(rnn_network_outputs, axis=1)
138 |         print all_output_var.eval({self.input_var:example}).shape
139 |         
140 |         # InputLayer
141 |         network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
142 |         
143 |         # Dropout Layer
144 |         if (self.dropout > 0):
145 |             network = layers.dropout(network, self.dropout)
146 |         
147 |         # BatchNormalization Layer
148 |         if (self.batch_norm):
149 |             network = layers.BatchNormLayer(incoming=network)
150 |         
151 |         # Last layer: classification
152 |         network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
153 |         print layers.get_output(network).eval({self.input_var:example}).shape
154 |         
155 |     
156 |         self.params += layers.get_all_params(network, trainable=True)
157 |         self.prediction = layers.get_output(network)
158 |     
159 |         #print "==> param shapes", [x.eval().shape for x in self.params]
160 |         
161 |         self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
162 |         if (self.l2 > 0):
163 |             self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, 
164 |                                                                           lasagne.regularization.l2)
165 |         else:
166 |             self.loss_l2 = 0
167 |         self.loss = self.loss_ce + self.loss_l2
168 |         
169 |         #updates = lasagne.updates.adadelta(self.loss, self.params)
170 |         updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
171 |         
172 |         if self.mode == 'train':
173 |             print "==> compiling train_fn"
174 |             self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], 
175 |                                             outputs=[self.prediction, self.loss],
176 |                                             updates=updates)
177 |         
178 |         print "==> compiling test_fn"
179 |         self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
180 |                                        outputs=[self.prediction, self.loss])
181 |     
182 |     
183 |     def say_name(self):
184 |         return "tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz.augm" % self.num_units
185 |     
186 | 
187 |     def read_batch(self, data_raw, batch_index):
188 | 
189 |         start_index = batch_index * self.batch_size
190 |         end_index = start_index + self.batch_size
191 |         
192 |         data = np.zeros((self.batch_size, 1, 128, 768), dtype=np.float32)
193 |         answers = []
194 |         
195 |         for i in range(start_index, end_index):
196 |             answers.append(int(data_raw[i].split(',')[1]))
197 |             name = data_raw[i].split(',')[0]
198 |             path = self.png_folder + name + ".png"
199 |             im = Image.open(path)
200 |             offset = random.randint(0, 90)
201 |             data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, offset:offset+768] / 256.0
202 | 
203 |         answers = np.array(answers, dtype=np.int32)
204 |         return data, answers
205 |     


--------------------------------------------------------------------------------
/theano/plot.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib
  3 | matplotlib.use('Agg')
  4 | import matplotlib.pyplot as plt
  5 | import sys
  6 | import argparse
  7 | import os
  8 | 
  9 | 
 10 | #parsing arguments
 11 | parser = argparse.ArgumentParser()
 12 | parser.add_argument('--plot', type=str, default='plot.png', help='plotfile name with .png')
 13 | parser.add_argument('--log', type=str, default='log.txt', help='log file name')
 14 | parser.add_argument('--winVal', type=int, default='200', help='window for Val')
 15 | parser.add_argument('--winTrain', type=int, default='200', help='window for Train')
 16 | parser.add_argument('--no-legend', dest='legend', action='store_false')
 17 | parser.add_argument('--no-accuracy', dest='accuracy', action='store_false')
 18 | parser.add_argument('--no-loss', dest='loss', action='store_false')
 19 | parser.add_argument('--start_epoch', type=float, default=-1.0, help='start plotting from that epoch')
 20 | parser.set_defaults(loss=True)
 21 | parser.set_defaults(legend=True)
 22 | parser.set_defaults(accuracy=True)
 23 | 
 24 | args = parser.parse_args()
 25 | 
 26 | plotname = args.plot
 27 | windowVal = args.winVal
 28 | windowTrain = args.winTrain
 29 | accuracy = []
 30 | 
 31 | 
 32 | def movingAverage(loss, window):
 33 |     mas = []
 34 |     for i in range(len(loss)):
 35 |         j = i - window + 1
 36 |         if (j < 0):
 37 |             j = 0
 38 |         sum = 0.0
 39 |         for k in range(window):
 40 |             sum += loss[j + k]
 41 |         mas.append(sum / window)
 42 |     return mas
 43 | 
 44 | 
 45 | def plotTrainVal(filename, index, plotLabel):
 46 |     valx = []
 47 |     valy = []
 48 |     trainx = []
 49 |     trainy = []
 50 |     train_accuracyx = []
 51 |     train_accuracyy = []
 52 |     val_accuracyx = []
 53 |     val_accuracyy = []
 54 |     
 55 |     with open(filename, 'r') as logfile: 
 56 |         for st in logfile.readlines():
 57 |             head = st.split('\t')[0].strip()
 58 | 
 59 |             if (head[:7] == 'testing' or head[:8] == 'training'):
 60 |                 iteration_expr = head[head.find(':')+1:]
 61 |                 divpos = iteration_expr.find('/')
 62 |                 first = iteration_expr[:divpos]
 63 |                 iterations_per_epoch = float(iteration_expr[divpos+1:])
 64 |                 dotpos = first.find('.')
 65 |                 epoch = float(first[:dotpos])
 66 |                 iteration = float(first[dotpos+1:])
 67 |                 x = epoch + iteration / iterations_per_epoch
 68 |                 
 69 |                 st_loss = st[st.find("avg_loss"):]
 70 |                 cur_loss = float(st_loss[st_loss.find(':')+1:st_loss.find('\t')])
 71 |                 
 72 |                 if (head[:7] == 'testing'):
 73 |                     valx.append(x)
 74 |                     valy.append(cur_loss)
 75 |                 else:
 76 |                     trainx.append(x)
 77 |                     trainy.append(cur_loss)
 78 |             
 79 |             if st.strip()[:8] == "accuracy":
 80 |                 cur_accuracy = float(st[st.find(':')+1:st.find("percent")]) / 100.0
 81 |                 if (len(train_accuracyx) > len(val_accuracyx)):
 82 |                     val_accuracyx.append(valx[-1])
 83 |                     val_accuracyy.append(cur_accuracy)
 84 |                 else:
 85 |                     train_accuracyx.append(trainx[-1])
 86 |                     train_accuracyy.append(cur_accuracy)
 87 | 
 88 |     while(len(valx) > 0 and valx[0] < args.start_epoch):
 89 |         valx = valx[1:]
 90 |         valy = valy[1:]
 91 | 
 92 |     while(len(trainx) > 0 and trainx[0] < args.start_epoch):
 93 |         trainx = trainx[1:]
 94 |         trainy = trainy[1:]
 95 | 
 96 | 
 97 |     #window config
 98 |     wndVal = min(windowVal, int(0.8 * len(valy)))
 99 |     wndTrain = min(windowTrain, int(0.8 * len(trainy)))
100 |     
101 |     print "Train length: ", len(trainy), " \t\t window: ", wndTrain
102 |     print "Val length: ", len(valy), " \t\t window: ", wndVal
103 |     
104 |     #movAvg and correcting length
105 |     #valy = movingAverage(valy, wndVal)
106 |     #trainy = movingAverage(trainy, wndTrain)
107 |     #valx = valx[:len(valy)]
108 |     #trainx = trainx[:len(trainy)]
109 |     
110 | 
111 |     #plotting
112 |     greenDiff = 50
113 |     redBlueDiff = 50
114 |     
115 |     if (args.loss):
116 |         plt.plot(trainx, trainy, '#00' + hex(index * greenDiff)[2:] 
117 |                 + hex(256 - index * redBlueDiff)[2:],
118 |                 label=plotLabel + " train")
119 |         plt.hold(True)
120 | 
121 |         plt.plot(valx, valy, '#' + hex(256 - index * redBlueDiff)[2:] 
122 |                 + hex(index * greenDiff)[2:] + '00',
123 |                 label=plotLabel + " validation")
124 |         plt.hold(True)
125 |     
126 |     if (args.accuracy):
127 |         plt.plot(train_accuracyx, train_accuracyy, '#000000',
128 |                 label=plotLabel + " train_accuracy")
129 |         plt.hold(True)
130 | 
131 |         plt.plot(val_accuracyx, val_accuracyy, '#00FF00',
132 |                 label=plotLabel + " val_accuracy")
133 |         plt.hold(True)
134 |                 
135 |     print "plot index =", index
136 |     for (x, y) in zip(val_accuracyx, val_accuracyy):
137 |         print "\tepoch = %.0f, accuracy = %f" % (x - 1, y)
138 |     print '\tMax: %f // Epoch: %d' % (max(val_accuracyy), val_accuracyx[val_accuracyy.index(max(val_accuracyy))])
139 | 
140 | 
141 | plotTrainVal(args.log, 1, args.log)
142 | 
143 | 
144 | if (args.legend):
145 |     plt.legend(loc='upper right', fontsize='x-small')
146 | plt.gcf().savefig(plotname)
147 | 
148 | 


--------------------------------------------------------------------------------