├── NeuralSounds.py ├── convnet.py ├── convnet.py~ ├── mp3s_to_fft_features.py ├── nn_phase2.py └── vec_to_bpm.py /NeuralSounds.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 3 23:04:49 2016 4 | 5 | @author: liam 6 | """ 7 | 8 | import numpy as np 9 | from wavio import readwav 10 | from scipy.fftpack import fft as scifft 11 | from id3reader import Reader 12 | 13 | class NeuralSounds(): 14 | def __init__(self, downsample, num_samples_per_file, desired_X_time_dim, 15 | fft_sample_length, fft_step_size, track_fnames, 16 | tracks_path, rng=0): 17 | 18 | self.downsample = downsample 19 | self.rng = rng 20 | self.desired_rate = 44100 21 | self.num_samples_per_file = num_samples_per_file 22 | self.track_fnames = track_fnames 23 | self.desired_X_time_dim = desired_X_time_dim 24 | self.fft_sample_length = fft_sample_length 25 | self.fft_step_size = fft_step_size 26 | self.tracks_path = tracks_path 27 | #assert self.fft_sample_length % self.fft_step_size == 0 28 | self.clip_seconds = ((self.desired_X_time_dim - 1) * \ 29 | self.fft_step_size + self.fft_sample_length) * \ 30 | (1. / self.desired_rate) 31 | print 'each clip will be', self.clip_seconds, 'seconds long' 32 | self.fps = self.desired_X_time_dim / self.clip_seconds 33 | print 'resolution is', self.fps, 'frames per second' 34 | 35 | def preprocess_wav(self, wav, cut_silence=True): 36 | 37 | # Stereo to mono 38 | wav = np.mean(wav, axis=1) 39 | 40 | # Normalise 41 | wav = wav / np.max(wav) 42 | 43 | # > 0.03 to cut off any silence at the beginning of the song 44 | if cut_silence: 45 | wav = wav[np.where(np.abs(wav) > 0.03)[0][0]:] 46 | 47 | return wav 48 | 49 | def get_spectogram(self, wav): 50 | 51 | spectra = [] 52 | 53 | # Adding some noise, mainly as an alternative to log(1 + wav) 54 | wav += np.abs(np.random.rand(len(wav)) * 0.0001) 55 | 56 | # We're going to chop the wav into sections of fft_sample_length, 57 | # possibly overlapping if the fft_step_size is smaller than this 58 | spectra = np.array( 59 | [get_fft(wav[i:(i + self.fft_sample_length)]) for i in \ 60 | range(0, len(wav) - self.fft_sample_length, self.fft_step_size)]) 61 | 62 | # Normalise and take logs 63 | spectra = np.log(spectra / np.max(spectra)) 64 | 65 | # Check the spectra is not garbage 66 | if (np.sum(np.isnan(spectra)) > 0 or np.sum(np.abs(spectra)) < 100.): 67 | return None 68 | 69 | # Downsample the frequency bins if we so desire 70 | spectra = np.mean(spectra.reshape( 71 | spectra.shape[0], -1 , self.downsample, order='F'), axis=1) 72 | 73 | # Normalise this so it's between 0 and 1 74 | return normalise(spectra).astype(np.float32) 75 | 76 | def get_wav(self, track_full_path, seconds_cutoff=0): 77 | 78 | if track_full_path.endswith('.mp3'): 79 | # Convert the mp3 to a temporary wav file 80 | wav_path = '/tmp/tmp-bpm.wav' 81 | convert_an_mp3_to_wav(track_full_path, wav_path) 82 | cut_silence = True 83 | 84 | else: 85 | wav_path = track_full_path 86 | cut_silence = False 87 | 88 | # Read in the wav 89 | rate, sampwidth, wav = readwav(wav_path) 90 | 91 | # Preprocess it (cut off silence and normalise) 92 | wav = self.preprocess_wav(wav, cut_silence=cut_silence) 93 | 94 | # Make sure it's the right sampling rate 95 | if rate != self.desired_rate: 96 | return None 97 | 98 | if seconds_cutoff: 99 | wav = wav[:seconds_cutoff * rate] 100 | 101 | return wav 102 | 103 | def append_wav_to_Xy(self, wav): 104 | 105 | # Get the spectogram for the current wav 106 | X_curr = self.get_spectogram(wav) 107 | 108 | if X_curr is not None: 109 | 110 | # Get the number of seconds of audio represented by X_curr 111 | track_seconds = ((X_curr.shape[0] - 1) * self.fft_step_size + \ 112 | self.fft_sample_length) * (1. / self.desired_rate) 113 | 114 | # Get a target vector of the same length representing the same 115 | # number of seconds 116 | y_curr = get_target_vector(self.bpm, track_seconds, 117 | resolution=X_curr.shape[0], 118 | rng=self.rng) 119 | 120 | # Append some random slices of the 121 | i = 0 122 | while self.n < self.n_batch and i < self.num_samples_per_file: 123 | start = np.random.randint(0, X_curr.shape[0] - \ 124 | self.desired_X_time_dim) 125 | end = int(start + self.desired_X_time_dim) 126 | self.X.append(X_curr[start:end, :]) 127 | self.y.append(y_curr[start:end]) 128 | self.bpms.append(self.bpm) 129 | i += 1 130 | self.n += 1 131 | print 'done with this wav' 132 | 133 | def check_bpm(self, track_full_path): 134 | 135 | # Try to extract the BPM from the ID3 tag of an mp3 136 | try: 137 | self.bpm = int(get_track_bpm_from_id3_tag(track_full_path)) 138 | 139 | # Return False if cannot do this for some reason 140 | except ValueError: 141 | print 'BPM read error in file', track_full_path 142 | return False 143 | except TypeError: 144 | print 'BPM read error in file', track_full_path 145 | return False 146 | 147 | print self.bpm, 'bpm' 148 | return True 149 | 150 | def get_spectogram_training_set(self, n_batch=10): 151 | 152 | # Read in parameter and initialise count 153 | self.n_batch = n_batch 154 | self.n = 0 155 | 156 | # Initialise training data matrices 157 | self.X, self.y, self.bpms, self.fnames = [], [], [], [] 158 | 159 | # Keep looping until we have the desired number of training samples 160 | while self.n < self.n_batch: 161 | 162 | # Pick a track 163 | fname = np.random.choice(self.track_fnames, 1)[0] 164 | track_full_path = self.tracks_path + '/' + fname 165 | 166 | # If the track has BPM information 167 | if self.check_bpm(track_full_path): 168 | # Get a normalised wav of the track 169 | wav = self.get_wav(track_full_path) 170 | 171 | # Append multiple clips of the wav the the training X matrix 172 | if wav is not None: 173 | self.append_wav_to_Xy(wav) 174 | self.fnames.append(fname) 175 | 176 | print self.n, '/', self.n_batch, 'done' 177 | 178 | # Return X, y, bpms, fnames as np.arrays in desired shape for Keras 179 | self.X = np.array(self.X) 180 | self.y = np.array(self.y) 181 | self.X = self.X.reshape(-1, 1, self.X.shape[1], self.X.shape[2]) 182 | self.bpms = np.array(self.bpms) 183 | return (self.X, self.y, self.bpms, self.fnames) 184 | 185 | def reverse_find(s, subs): 186 | return len(s) - s[::-1].find(subs) 187 | 188 | def get_track_bpm_from_id3_tag(file_path): 189 | 190 | # Read BPM from ID3 tag 191 | print file_path 192 | if file_path.endswith('.mp3'): 193 | return Reader(file_path).getValue('TBPM') 194 | 195 | # Otherwise standard .wav file format should have BPM then space 196 | else: 197 | if '/' in file_path: 198 | return file_path[reverse_find(file_path, '/'):file_path.find(' ')] 199 | else: 200 | return file_path[0:file_path.find(' ')] 201 | 202 | def get_fft(s, downsample=16): 203 | c = scifft(s) 204 | # you only need half of the fft list (real signal symmetry) 205 | d = len(c)/2 206 | return abs(c[:d]) 207 | 208 | def normalise(v): 209 | return (v - v.min()) / (v.max() - v.min()) 210 | def rmse(a, b): 211 | return np.sqrt(np.mean(np.square(a-b))) 212 | def mae(a, b): 213 | return np.mean(np.abs(a-b)) 214 | 215 | def convert_an_mp3_to_wav(mp3_path, wav_path): 216 | import subprocess 217 | command = 'mpg123 -w ' + wav_path + ' "' + mp3_path + '"' 218 | subprocess.call(command, shell=True) 219 | 220 | def get_target_vector(bpm, seconds, resolution, rng): 221 | 222 | # Initialise the output array with some small random noise 223 | target_vec = np.random.rand(resolution) * 0.001 224 | 225 | seconds_per_beat = 60. / bpm 226 | frames_per_second = resolution * 1. / seconds 227 | frames_per_beat = seconds_per_beat * frames_per_second 228 | num_complete_beats = int(np.floor(target_vec.shape[0] / frames_per_beat)) 229 | 230 | # For each complete beat contained within the time this vector represents 231 | print frames_per_beat 232 | print num_complete_beats, 'num beats' 233 | for i in xrange(num_complete_beats): 234 | # Set the entries where beats occur to 1 235 | pos = int(np.round(i * frames_per_beat)) 236 | target_vec[pos] = 1. 237 | 238 | # We can add some padding around where the beats occur if we like... 239 | for j in xrange(-rng, rng): 240 | pos_new = pos + j 241 | if j != 0 and pos_new >= 0 and pos_new <= target_vec.shape[0]: 242 | target_vec[pos_new] = 1. / np.square(np.abs(j) + 1) 243 | 244 | return target_vec -------------------------------------------------------------------------------- /convnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Neural Network for detecting the BPM of a 4 second clip of music 3 | ''' 4 | 5 | from matplotlib import pyplot as plt 6 | import numpy as np 7 | import cPickle 8 | from vec_to_bpm import vec_to_bpm 9 | np.random.seed(1) # for reproducibility 10 | 11 | from keras.callbacks import EarlyStopping 12 | from keras.models import Sequential 13 | from keras.layers.core import Dense, Dropout, Activation, Reshape, Flatten 14 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 15 | from keras.layers.wrappers import TimeDistributed 16 | from keras.optimizers import SGD, Adam, RMSprop, Adadelta 17 | 18 | print("Loading training data...") 19 | X_train, y_train, bpms_train, fnames_train = cPickle.load(open('Xy_pulse3.dump', 'rb')) 20 | print("Loading validation data...") 21 | X_val, y_val, bpms_val, fnames_val = cPickle.load(open('Xy_vali_pulse3.dump', 'rb')) 22 | 23 | input_time_dim = X_train.shape[2] 24 | input_freq_dim = X_train.shape[3] 25 | output_length = y_train.shape[1] 26 | 27 | drop_hid = 0.25 28 | num_filters = 32 29 | dense_widths = [output_length*2, output_length] 30 | 31 | early = EarlyStopping(monitor='val_loss', patience=1, verbose=1, mode='auto') 32 | 33 | model = Sequential() 34 | 35 | model.add(Convolution2D(num_filters, 3, 3, border_mode='same', 36 | input_shape=(1, input_time_dim, input_freq_dim))) 37 | model.add(Activation('relu')) 38 | 39 | model.add(Convolution2D(num_filters, 5, 5, border_mode='same')) 40 | model.add(Activation('relu')) 41 | 42 | model.add(Reshape((input_time_dim, input_freq_dim * num_filters))) 43 | 44 | model.add(TimeDistributed(Dense(256))) 45 | model.add(Activation('relu')) 46 | model.add(TimeDistributed(Dense(128))) 47 | model.add(Activation('relu')) 48 | model.add(TimeDistributed(Dense(8))) 49 | model.add(Activation('relu')) 50 | 51 | model.add(Flatten()) 52 | if drop_hid: 53 | model.add(Dropout(drop_hid)) 54 | 55 | for w in dense_widths: 56 | model.add(Dense(w)) 57 | model.add(Activation('relu')) 58 | if drop_hid: 59 | model.add(Dropout(drop_hid)) 60 | model.add(Dense(output_length)) 61 | model.add(Activation('relu')) 62 | 63 | model.summary() 64 | 65 | #opt = Adadelta() 66 | #opt = SGD(lr=0.001) 67 | opt = Adam() 68 | 69 | model.compile(loss='mse', 70 | optimizer=opt, 71 | metrics=[]) 72 | 73 | batch_size = 1536 74 | nb_epoch = 2 75 | history = model.fit(X_train, y_train, 76 | batch_size=batch_size, nb_epoch=nb_epoch, 77 | verbose=1, validation_data=(X_val, y_val), 78 | shuffle=True, callbacks=[early]) 79 | 80 | model.save('convnet_aws.kerasmodel') 81 | 82 | -------------------------------------------------------------------------------- /convnet.py~: -------------------------------------------------------------------------------- 1 | ''' 2 | Neural Network for detecting the BPM of a 4 second clip of music 3 | ''' 4 | 5 | from matplotlib import pyplot as plt 6 | import numpy as np 7 | import cPickle 8 | from vec_to_bpm import vec_to_bpm 9 | np.random.seed(1) # for reproducibility 10 | 11 | from keras.callbacks import EarlyStopping 12 | from keras.models import Sequential 13 | from keras.layers.core import Dense, Dropout, Activation, Reshape, Flatten 14 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 15 | from keras.layers.wrappers import TimeDistributed 16 | from keras.optimizers import SGD, Adam, RMSprop, Adadelta 17 | 18 | print("Loading training data...") 19 | X_train, y_train, bpms_train, fnames_train = cPickle.load(open('Xy_pulse3.dump', 'rb')) 20 | print("Loading validation data...") 21 | X_val, y_val, bpms_val, fnames_val = cPickle.load(open('Xy_vali_pulse3.dump', 'rb')) 22 | 23 | input_time_dim = X_train.shape[2] 24 | input_freq_dim = X_train.shape[3] 25 | output_length = y_train.shape[1] 26 | 27 | drop_hid = 0.25 28 | num_filters = 32 29 | dense_widths = [output_length*2, output_length] 30 | 31 | early = EarlyStopping(monitor='val_loss', patience=1, verbose=1, mode='auto') 32 | 33 | model = Sequential() 34 | 35 | model.add(Convolution2D(num_filters, 3, 3, border_mode='same', 36 | input_shape=(1, input_time_dim, input_freq_dim))) 37 | model.add(Activation('relu')) 38 | 39 | model.add(Convolution2D(num_filters, 5, 5, border_mode='same')) 40 | model.add(Activation('relu')) 41 | 42 | model.add(Reshape((input_time_dim, input_freq_dim * num_filters))) 43 | 44 | model.add(TimeDistributed(Dense(256))) 45 | model.add(Activation('relu')) 46 | model.add(TimeDistributed(Dense(128))) 47 | model.add(Activation('relu')) 48 | model.add(TimeDistributed(Dense(8))) 49 | model.add(Activation('relu')) 50 | 51 | model.add(Flatten()) 52 | if drop_hid: 53 | model.add(Dropout(drop_hid)) 54 | 55 | for w in dense_widths: 56 | model.add(Dense(w)) 57 | model.add(Activation('relu')) 58 | if drop_hid: 59 | model.add(Dropout(drop_hid)) 60 | model.add(Dense(output_length)) 61 | model.add(Activation('relu')) 62 | 63 | model.summary() 64 | 65 | #opt = Adadelta() 66 | #opt = SGD(lr=0.001) 67 | opt = Adam() 68 | 69 | model.compile(loss='mse', 70 | optimizer=opt, 71 | metrics=[]) 72 | 73 | batch_size = 1536 74 | nb_epoch = 2 75 | history = model.fit(X_train, y_train, 76 | batch_size=batch_size, nb_epoch=nb_epoch, 77 | verbose=1, validation_data=(X_val, y_val), 78 | shuffle=True, callbacks=[early]) 79 | 80 | model.save('convnet_val_loss=' +\ 81 | str(np.round(history.history['val_loss'][-1], 4)) + '.kerasmodel') 82 | 83 | -------------------------------------------------------------------------------- /mp3s_to_fft_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 25 10:22:52 2016 4 | 5 | Help from: 6 | http://stackoverflow.com/a/23378284/6167850 7 | 8 | @author: liam 9 | """ 10 | from os import listdir 11 | import numpy as np 12 | from matplotlib import pyplot as plt 13 | import cPickle 14 | from NeuralSounds import * 15 | 16 | # Audio files are stored in this relative path 17 | TRACKS_PATH = '../tracks/bpmd_wavs' 18 | 19 | # Did multiples of 96 as my GPU has 96 cores 20 | NUM_TRAIN = 96 * 300 21 | NUM_VALI = 96 * 20 22 | NUM_TEST = 96 * 50 23 | 24 | # Randomly shuffle the mp3 files 25 | np.random.seed(2) 26 | all_tracks = listdir(TRACKS_PATH) 27 | np.random.shuffle(all_tracks) 28 | 29 | # Initialise the class with a large number of samples per file - this 30 | # increases speed as we get many samples from one .wav at a time 31 | ns = NeuralSounds(downsample=32, 32 | num_samples_per_file=960, 33 | desired_X_time_dim=160, 34 | fft_sample_length=768, 35 | fft_step_size=512, 36 | track_fnames=all_tracks[0:-6], 37 | tracks_path=TRACKS_PATH) 38 | 39 | X, y, bpms, fnames = ns.get_spectogram_training_set(n_batch=NUM_TRAIN) 40 | cPickle.dump((X, y, bpms, fnames), open('Xy_pulse3.dump', 'wb')) 41 | 42 | # Change the number of samples per file for creating the test and validation 43 | # sets, as these are smaller so speed is not such an issue, and we want 44 | # variation. Also use different tracks to test/validate. 45 | ns.num_samples_per_file = 100 46 | ns.track_fnames = all_tracks[-6:] 47 | 48 | X, y, bpms, fnames = ns.get_spectogram_training_set(NUM_VALI) 49 | cPickle.dump((X, y, bpms, fnames), open('Xy_vali_pulse3.dump', 'wb')) 50 | 51 | X, y, bpms, fnames = ns.get_spectogram_training_set(NUM_TEST) 52 | cPickle.dump((X, y, bpms, fnames), open('Xy_test_pulse3.dump', 'wb')) 53 | 54 | # We can plot the beat spikes (training output) over the spectograms 55 | # (training input). The spikes should match up with the beats. 56 | for i in xrange(0, len(y), max(len(y) / 10, 1)): 57 | plt.figure(figsize=(10,8)) 58 | plt.plot(y[i] * 1000 - 900, 'black', linewidth=1) 59 | plt.imshow(X[i][0].T, aspect='auto', origin='top') 60 | plt.show() 61 | -------------------------------------------------------------------------------- /nn_phase2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 3 23:09:01 2016 4 | 5 | @author: liam 6 | 7 | 1. Convert wav to a few 2 minute clips = 512 * 20 desired time dim 8 | 2. Take a 2 minute clip, divide it into the 20 sections 9 | 3. Predict each section using current NN 10 | 4. Concatenate sections 11 | 5. Predict target vector using concatenated sections 12 | 13 | """ 14 | 15 | from os import listdir 16 | import numpy as np 17 | from matplotlib import pyplot as plt 18 | import cPickle 19 | from NeuralSounds import * 20 | 21 | ''' 22 | In this relative path, you need a bunch of mp3s that have the correct BPM 23 | stored in their ID3 tag data, start on the first beat, and don't have any 24 | skipped beats, tempo changes, etc... at least not in the first minute 25 | ''' 26 | TRACKS_PATH = '../tracks/bpmd_wavs' 27 | 28 | # Did multiples of 96 as my GPU has 96 cores 29 | NUM_TRAIN = 96 30 | NUM_VALI = 96 31 | NUM_TEST = 96 32 | 33 | # Randomly shuffle the mp3 files 34 | np.random.seed(2) 35 | all_tracks = listdir(TRACKS_PATH) 36 | np.random.shuffle(all_tracks) 37 | 38 | # Initialise the class with a large number of samples per file - this 39 | # increases speed as we get many samples from one .wav at a time 40 | ns = NeuralSounds(downsample=32, 41 | num_samples_per_file=8, 42 | desired_X_time_dim=160 * 32, # about 1 minute 43 | fft_sample_length=768, 44 | fft_step_size=512, 45 | track_fnames=all_tracks[-6:], 46 | tracks_path=TRACKS_PATH) 47 | 48 | X_train, y_train, bpms_train, fnames_train = \ 49 | ns.get_spectogram_training_set(n_batch=NUM_TRAIN) 50 | 51 | ns.track_fnames=all_tracks[:-6] 52 | X_val, y_val, bpms_val, fnames_val = \ 53 | ns.get_spectogram_training_set(n_batch=NUM_VALI) 54 | #%% 55 | from keras.models import load_model 56 | model = load_model('convnet.kerasmodel') 57 | #%% 58 | def get_phase_2_train_input_vector(X, model): 59 | X_out = np.zeros((X.shape[0], X.shape[2])) 60 | for i in range(0, x.shape[2], 160): 61 | print np.round(i * 1. / x.shape[2], 2), '...' 62 | X_out[:, i:(i+160)] = model.predict(X[:, :, i:(i+160), :]) 63 | return X_out 64 | 65 | X_train = get_phase_2_train_input_vector(X, model) 66 | X_val = get_phase_2_train_input_vector(X_val, model) 67 | #%% 68 | from keras.callbacks import EarlyStopping 69 | from keras.models import Sequential 70 | from keras.layers.core import Dense, Dropout, Activation, Reshape, Flatten 71 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 72 | from keras.layers.wrappers import TimeDistributed 73 | from keras.optimizers import SGD, Adam, RMSprop, Adadelta 74 | 75 | output_length = y_train.shape[1] 76 | drop_in = 0.1, 77 | drop_hid = 0.25 78 | 79 | dense_widths = [output_length, output_length] 80 | 81 | early = EarlyStopping(monitor='val_loss', patience=100, verbose=1, mode='auto') 82 | 83 | phase2_model = Sequential() 84 | 85 | #phase2_model.add(Dropout(drop_in, input_dim=X_train.shape[1])) 86 | phase2_model.add(Dense(dense_widths[0], input_dim=X_train.shape[1])) 87 | 88 | for w in dense_widths[1:]: 89 | phase2_model.add(Dense(w)) 90 | phase2_model.add(Activation('relu')) 91 | if drop_hid: 92 | phase2_model.add(Dropout(drop_hid)) 93 | phase2_model.add(Dense(output_length)) 94 | phase2_model.add(Activation('relu')) 95 | 96 | phase2_model.summary() 97 | 98 | opt = Adadelta() 99 | #opt = SGD(lr=0.01) 100 | #opt = Adam() 101 | 102 | phase2_model.compile(loss='mse', 103 | optimizer=opt, 104 | metrics=[]) 105 | 106 | batch_size = 96 107 | nb_epoch = 100 108 | history = phase2_model.fit(X_train, y_train, 109 | batch_size=batch_size, nb_epoch=nb_epoch, 110 | verbose=1, validation_data=(X_val, y_val), 111 | shuffle=True, callbacks=[early]) 112 | #%% 113 | def plot_pulses(idx, X, y, model, rng=[0,500]): 114 | i = model.predict(X[idx:(idx+1)]) 115 | a = i[0, rng[0]:rng[1]] 116 | b = y[idx, rng[0]:rng[1]] 117 | plt.plot(np.vstack((a, b * 10000 - 9990)).T) 118 | plt.ylim([0, 1.]) 119 | plt.xlim([0, a.shape[0]]) 120 | plt.xlabel('Time') 121 | plt.ylabel('Pulse') 122 | plt.show() 123 | 124 | plot_pulses(5, X_val, y_val, phase2_model) 125 | #%% 126 | for i in range(0, len(X_train), len(X_train)/20): 127 | plot_pulses(i, X_train, y_train, phase2_model) 128 | plt.show() 129 | for i in range(0, len(X_train), len(X_train)/20): 130 | plt.plot(X_train[i][3000:3500]) 131 | plt.show() 132 | #%% 133 | for i in range(0, len(X_val), len(X_val)/20): 134 | plot_pulses(i, X_val, y_val, phase2_model) 135 | plt.show() 136 | #%% 137 | rng=[3000,3500] 138 | for i in range(0, len(X_val), len(X_val)/20): 139 | plt.plot(X_val[i][rng[0]:rng[1]]) 140 | plt.plot(y[i, rng[0]:rng[1]]) 141 | plt.show() 142 | 143 | 144 | -------------------------------------------------------------------------------- /vec_to_bpm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Aug 30 22:58:18 2016 4 | 5 | Module to estimate the bpm based on a vector of pulses 6 | 7 | @author: liam 8 | """ 9 | 10 | import numpy as np 11 | 12 | def linear_interp_nans(y): 13 | # Fit a linear regression to the non-nan y values 14 | 15 | # Create X matrix for linreg with an intercept and an index 16 | X = np.vstack((np.ones(len(y)), np.arange(len(y)))) 17 | 18 | # Get the non-NaN values of X and y 19 | X_fit = X[:, ~np.isnan(y)] 20 | y_fit = y[~np.isnan(y)].reshape(-1, 1) 21 | 22 | # Estimate the coefficients of the linear regression 23 | beta = np.linalg.lstsq(X_fit.T, y_fit)[0] 24 | 25 | # Fill in all the nan values using the predicted coefficients 26 | y.flat[np.isnan(y)] = np.dot(X[:, np.isnan(y)].T, beta) 27 | return y 28 | 29 | def vec_to_bpm(vec, verbose=False, resolution=172, seconds=4): 30 | fps = resolution * 1. / seconds 31 | a = np.where(vec > 0.01) 32 | a = np.array(a[0]) 33 | curr, means = [], [] 34 | for idx, i in enumerate(a[:-1]): 35 | if a[idx] == a[idx + 1] - 1: 36 | curr.append(a[idx]) 37 | else: 38 | if verbose: 39 | print 'curr', curr 40 | if idx == 0 or idx == (len(a) - 1): 41 | means.append(a[idx]) 42 | else: 43 | means.append(np.mean(curr)) 44 | curr = [] 45 | means = linear_interp_nans(np.array(means)) 46 | gap = means[1:] - means[:-1]# - 1\ 47 | gap_in_seconds = gap / fps 48 | gap_in_seconds = np.mean(gap_in_seconds) 49 | return 60. / gap_in_seconds --------------------------------------------------------------------------------