├── README.md ├── clean.sh ├── examples └── ode_an_die_freude.wav ├── generate_data.py ├── notes ├── 60 C4.wav ├── 62 D4.wav ├── 64 E4.wav ├── 65 F4.wav ├── 67 G4.wav ├── 69 A4.wav └── 71 B4.wav ├── onset_detection.py ├── pitch_detection.py ├── prev_accuracy.txt ├── train.py └── wav.py /README.md: -------------------------------------------------------------------------------- 1 | # piano note prediction 2 | 3 | This is a program to automatically detect and classify notes from a piano sound recording. 4 | 5 | Due to github's size limit on repositories, it wasn't possible to upload the complete training data set. 6 | 7 | 8 | The scripts are meant to run on a gpu, otherwise the program won't terminate within a reasonable time frame. 9 | 10 | I recommend installing anaconda, and using conda as a package manager and to create a virtual environment. 11 | 12 | Create a virtual environment: 13 | conda create -n test_env 14 | 15 | Enter the virtual environment: 16 | conda activate test_env 17 | 18 | Install numpy and matplotlib with conda: 19 | conda install numpy 20 | conda install matplotlib 21 | 22 | Since installing cuda, keras, and tensorflow is nontrivial and differs from setup to setup, I recommend following a suitable guide or tutorial. 23 | 24 | Cuda can be downloaded from the nvidia homepage: 25 | https://developer.nvidia.com/cuda-downloads 26 | 27 | Install compatible versions of Tensorflow and Keras for gpu: 28 | https://www.tensorflow.org/install 29 | https://keras.io/ 30 | 31 | 32 | 33 | 34 | Running the software 35 | 36 | 37 | Train the model for note prediction: 38 | python3 train.py 39 | 40 | 41 | To apply the trained model to predict notes, run pitch_detection.py with the file name of a 24bit mono WAV piano recording as an argument. There are a few example files to test with in the "examples" folder. 42 | 43 | An example call to predict notes could look like this: 44 | python pitch_detection.py examples/ode_an_die_freude.wav 45 | 46 | 47 | Note that while the visual representation of the predictions works better for short sound files, all predictions are printed to the terminal. 48 | 49 | 50 | Detect note onsents and plot the results: 51 | python onset_detection.py 52 | 53 | 54 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | # run this to clean up old model data 4 | 5 | rm -r ./__pycache__ 6 | rm ./model/piano_model.h5 7 | -------------------------------------------------------------------------------- /examples/ode_an_die_freude.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/examples/ode_an_die_freude.wav -------------------------------------------------------------------------------- /generate_data.py: -------------------------------------------------------------------------------- 1 | 2 | import wav 3 | import train 4 | import onset_detection 5 | 6 | import numpy as np 7 | import random 8 | import math 9 | 10 | 11 | class DataGenerator: 12 | 13 | numfreq = 1024 14 | overlap = 0 15 | samplerate = 44100 16 | time = 0.1 # length of the note starting from onset, used for generation 17 | # increase for better accuracy but slower training 18 | minpar = 1 19 | maxpar = 2 20 | source_pcms = [] # collected pcm data of source files 21 | onsetsall = [] # ith index is array with time of onsets in file i 22 | TrainX = [] 23 | TrainY = [] 24 | TestX = [] 25 | TestY = [] 26 | 27 | def __init__(self): 28 | self.source_pcms = [] # collected pcm data of source files 29 | self.onsetsall = [] # ith index is array with time of onsets in file i 30 | self.TrainX = [] 31 | self.TrainY = [] 32 | self.TestX = [] 33 | self.TestY = [] 34 | random.seed() 35 | 36 | def create_adddata(self, onsets, train): 37 | # create a single note or chord 38 | 39 | ndat = math.floor(self.time*self.samplerate) # number of frames in that time 40 | pcm_slice = [0]*ndat # create "silent" wav sample with length time 41 | Y = [0]*len(onsets) 42 | 43 | for pitch in range(len(onsets)): 44 | # generate a note or notes 45 | 46 | if onsets[pitch]>=0: 47 | 48 | for i in range(ndat): 49 | try: 50 | pcm_slice[i]+=self.source_pcms[pitch][onsets[pitch]+i] 51 | except: return 52 | 53 | Y[pitch] = 1 # indicates a note on this pitch 54 | 55 | for i in range(ndat): 56 | # normalize samples by dividing them through the number of occuring notes 57 | pcm_slice[i] = int(pcm_slice[i] / (len(onsets)-onsets.count(-1))) 58 | 59 | # compute fft from pcm_slice 60 | fft1 = wav.compute_fft(pcm_slice, 0, self.time, self.samplerate, 32, 0) 61 | fft2 = wav.compute_fft(pcm_slice, 0, self.time, self.samplerate, 256, 0) 62 | fft = wav.compute_fft(pcm_slice, 0, self.time, self.samplerate, self.numfreq, self.overlap) 63 | X = np.concatenate((fft1.flatten(), fft2.flatten(), fft.flatten())) 64 | 65 | if train: 66 | self.TrainX.append(X) 67 | self.TrainY.append(Y) 68 | else: 69 | self.TestX.append(X) 70 | self.TestY.append(Y) 71 | 72 | 73 | 74 | def create_samples(self, onsetlists, train, numblends): 75 | # create training or test samples from a subet of samples 76 | 77 | nlists = np.shape(onsetlists)[0] 78 | 79 | for k in range(nlists): # add all pure wavs 80 | for i in range(len(onsetlists[k])): 81 | onsets = [-1] * nlists 82 | onsets[k] = onsetlists[k][i] 83 | self.create_adddata(onsets, train) 84 | 85 | if nlists == 1: return 86 | 87 | for i in range(numblends): # number of blended samples to be created 88 | 89 | if i%1000 == 0: 90 | print(i) 91 | 92 | onsets = [-1]*nlists 93 | uplim = min((nlists+1),(self.maxpar+1)) 94 | num = random.choice([i for i in range(self.minpar,uplim)]) # random number of wavs to blend over each other 95 | wavs = random.sample(range(nlists), num) # random list of wavs to blend over 96 | 97 | for k in wavs: # loop over them 98 | onsets[k] = random.sample(onsetlists[k], 1)[0] # pick one sample from the wav 99 | 100 | self.create_adddata(onsets, train) # add the blended samples 101 | 102 | 103 | 104 | def create_data(self): 105 | # create the training and test data 106 | 107 | self.TrainX = [] 108 | self.TrainY = [] 109 | self.TestX = [] 110 | self.TestY = [] 111 | 112 | onsetstrain = [] 113 | onsetstest = [] 114 | 115 | ndat = np.shape(self.source_pcms)[0] 116 | 117 | for k in range(ndat): 118 | length = len(self.onsetsall[k]) # num of notes per file 119 | numtrain = math.floor(length*0.75) # use 75% of notes 120 | 121 | onsets = self.onsetsall[k] 122 | random.shuffle(onsets) 123 | onsetstrain.append(onsets[:numtrain]) 124 | onsetstest.append(onsets[numtrain:]) 125 | 126 | self.create_samples(onsetstrain, True, 5000) # create the training data, including blends 127 | self.create_samples(onsetstest, False, 1000) # create test data 128 | 129 | self.TrainX = np.array(self.TrainX) 130 | self.TrainY = np.array(self.TrainY) 131 | self.TestX = np.array(self.TestX) 132 | self.TestY = np.array(self.TestY) 133 | 134 | return self.TrainX, self.TrainY, self.TestX, self.TestY 135 | 136 | 137 | 138 | def load_wavs(self): 139 | # load the raw training data files 140 | 141 | trainingfiles = wav.get_training_filenames() 142 | print (trainingfiles) 143 | 144 | for pitch in trainingfiles: 145 | pcm_data, self.samplerate = wav.read_wav(pitch, 400) 146 | onsets = onset_detection.detect_onset(pcm_data, self.samplerate) # onsets in file 147 | self.source_pcms.append(pcm_data) 148 | self.onsetsall.append(onsets) 149 | 150 | return [i for i in range(0, len(trainingfiles))] 151 | 152 | -------------------------------------------------------------------------------- /notes/60 C4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/notes/60 C4.wav -------------------------------------------------------------------------------- /notes/62 D4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/notes/62 D4.wav -------------------------------------------------------------------------------- /notes/64 E4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/notes/64 E4.wav -------------------------------------------------------------------------------- /notes/65 F4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/notes/65 F4.wav -------------------------------------------------------------------------------- /notes/67 G4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/notes/67 G4.wav -------------------------------------------------------------------------------- /notes/69 A4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/notes/69 A4.wav -------------------------------------------------------------------------------- /notes/71 B4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miniplane/piano-note-prediction/4a65231cd6ebaad6322305f66d6066a32da2c2ce/notes/71 B4.wav -------------------------------------------------------------------------------- /onset_detection.py: -------------------------------------------------------------------------------- 1 | import wav 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import queue 6 | import sys 7 | 8 | 9 | def detect_onset(buffer, samplerate, plot=False, pitchdetection=False, silence=0.01): 10 | # detect onsents of notes in a wav file 11 | 12 | print("detecting onsets") 13 | 14 | ndat = np.shape(buffer)[0] 15 | sums = [] 16 | window_values = [] 17 | onset_times = [] 18 | windowsize = 1000 19 | skip = 0 20 | maximum = 0 21 | wsum = 0 22 | 23 | for i in range(len(buffer)): 24 | # calculate sum of values in a window for each sample, and add them to a list 25 | 26 | newval = abs(compression(buffer[i])) 27 | window_values.append(newval) 28 | wsum += newval 29 | 30 | if len(window_values) > windowsize: 31 | wsum -= window_values.pop(0) 32 | 33 | normalized_wsum = wsum / windowsize 34 | maximum = max(maximum, normalized_wsum) 35 | sums.append(normalized_wsum + 1) 36 | 37 | 38 | for i in range(0, len(sums)): 39 | # find the onsets 40 | 41 | sums[i] /= maximum # scale value of sample based on the max value 42 | 43 | if (skip > 0): 44 | skip -= 1 45 | continue 46 | if (sums[i] > silence): 47 | if (i > 5000) and (sums[i] / sums[i-5000] > 2): 48 | onset_times.append(i) 49 | skip = 10000 50 | 51 | print("onsets found:", onset_times) 52 | 53 | if plot: 54 | plot_onsets(sums, samplerate, onset_times) 55 | 56 | if pitchdetection: 57 | return(sums, onset_times) 58 | 59 | return(onset_times) 60 | 61 | 62 | def compression(sample, rate = 2): 63 | # prepare data for onset detection by making large values larger and small values smaller 64 | 65 | MAX = 2**23 # maximal possible value, 24 bit signed = 2^23 66 | 67 | sign = 1 68 | 69 | if (sample < 0): 70 | sign = -1 71 | 72 | normalized = abs(sample / MAX) 73 | normalized = 1 - ((1 - normalized)**rate) 74 | 75 | return sign * normalized * MAX 76 | 77 | 78 | def plot_onsets(sums, samplerate, onset_times): 79 | # create a visual representation of the onsets 80 | # debug function 81 | 82 | print("plot onsets") 83 | sums = np.asarray(sums) 84 | time = np.arange(len(sums)) / samplerate 85 | 86 | # draw a plot of the compressed audio samples 87 | plt.figure(1) 88 | plt.subplot(211) 89 | plt.plot(time, sums) 90 | 91 | # draw a vertical line at each detected onset 92 | for i in onset_times: 93 | xpos = i/samplerate 94 | plt.plot([xpos, xpos], [0, 1], 'k-', lw=1) 95 | 96 | plt.show() 97 | 98 | 99 | 100 | if __name__ == '__main__': 101 | 102 | if (len(sys.argv) > 1): 103 | pcm_data, samplerate = wav.read_wav(sys.argv[1], 400) 104 | detect_onset(pcm_data, samplerate, True) 105 | -------------------------------------------------------------------------------- /pitch_detection.py: -------------------------------------------------------------------------------- 1 | 2 | import onset_detection 3 | import wav 4 | 5 | import os.path 6 | import sys 7 | import math 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | 11 | from keras.models import load_model 12 | 13 | 14 | 15 | def extract_notes(pcm_data, onset_times, samplerate): 16 | # filter all notes from an audio file 17 | 18 | time = 0.1 19 | numfreq = 1024 20 | overlap = 0 21 | 22 | notes = [] 23 | 24 | for onset in onset_times: 25 | 26 | pcm_slice = pcm_data[onset:onset+math.floor(time*samplerate)] 27 | 28 | fft1 = wav.compute_fft(pcm_slice, 0, time, samplerate, 32, 0) 29 | fft2 = wav.compute_fft(pcm_slice, 0, time, samplerate, 256, 0) 30 | fft = wav.compute_fft(pcm_slice, 0, time, samplerate, numfreq, overlap) 31 | 32 | X = np.concatenate((fft1.flatten(), fft2.flatten(), fft.flatten())) # fft slice 33 | 34 | notes.append(X) 35 | 36 | return np.array(notes) 37 | 38 | 39 | 40 | def load_wav(wav_file): 41 | # load audio file 42 | 43 | pcm_data, samplerate = wav.read_wav(wav_file, 400) 44 | sums, onset_times = onset_detection.detect_onset(pcm_data, samplerate, False, True) 45 | return pcm_data, onset_times, samplerate, sums 46 | 47 | 48 | 49 | def evaluate_model(filename): 50 | # apply trained model on an audio file to predict pitches 51 | 52 | if os.path.isfile('model/piano_model.h5'): 53 | print("load model") 54 | model = load_model('model/piano_model.h5') 55 | else: 56 | print("no model available") 57 | return 58 | 59 | pcm_data, onset_times, samplerate, sums = load_wav(filename) 60 | notes = extract_notes(pcm_data, onset_times, samplerate) 61 | model.compile(loss='mean_squared_error', optimizer='adamax') 62 | 63 | trainingfiles = wav.get_training_filenames() 64 | 65 | for i in range(len(trainingfiles)): 66 | trainingfiles[i] = trainingfiles[i].split("/")[1].split(".")[0] 67 | 68 | pred = model.predict(notes) 69 | predicted_notes = [] 70 | 71 | for i in range(len(pred)): 72 | 73 | best_guess = 0 74 | num = 0 75 | nums = [] 76 | 77 | for e, f in enumerate(pred[i]): 78 | 79 | if best_guess < f: 80 | best_guess = f 81 | num = e 82 | 83 | if f > 0.85: 84 | nums.append(e) 85 | 86 | results = [trainingfiles[i] for i in nums] 87 | predicted_notes.append(results) 88 | print("Best guess:", " ".join(results)) 89 | 90 | plot_pitches(sums, samplerate, onset_times, predicted_notes) 91 | 92 | 93 | 94 | def plot_pitches(sums, samplerate, onset_times, predicted_notes): 95 | # create a visual representation of the onset_times 96 | # debug function 97 | 98 | print("plot onset_times") 99 | sums = np.asarray(sums) 100 | time = np.arange(len(sums)) / samplerate 101 | 102 | # draw a plot of the compressed audio samples 103 | plt.figure(1) 104 | plt.subplot(211) 105 | plt.plot(time, sums) 106 | 107 | # draw a vertical line at each detected onset 108 | for e, i in enumerate(onset_times): 109 | xpos = i/samplerate 110 | for u, j in enumerate(predicted_notes[e]): 111 | plt.text(xpos, 1.3-((u+1)*0.1), j) 112 | plt.plot([xpos, xpos], [0, 1], 'k-', lw=1) 113 | 114 | plt.show() 115 | 116 | 117 | 118 | if __name__ == '__main__': 119 | if (len(sys.argv) > 1): 120 | evaluate_model(sys.argv[1]) 121 | -------------------------------------------------------------------------------- /prev_accuracy.txt: -------------------------------------------------------------------------------- 1 | 1 2 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | 2 | import wav 3 | import generate_data 4 | 5 | import os.path 6 | import math 7 | 8 | import keras 9 | 10 | from keras.models import Sequential, load_model 11 | from keras.layers.core import Dense, Dropout, Activation 12 | 13 | TrainX = [] 14 | TrainY = [] 15 | TestX = [] 16 | TestY = [] 17 | 18 | categories = [] 19 | model_loaded = False 20 | 21 | 22 | 23 | def find_discriminator(pred_index, predictions, actual_values): 24 | # find cutoff value with minimal errors 25 | 26 | minsum = 9999999999999 # minimal error difference 27 | mincut = 0 # cutoff value with minimal error rate, basically weights of model 28 | 29 | for l in range(20,100): 30 | cut = l / 100 31 | sum_incorrect = 0 # counter for incorrect guesses 32 | 33 | for i in range(len(predictions)): 34 | 35 | predicted_value = 1 36 | 37 | if predictions[i][pred_index] < cut: 38 | predicted_value = 0 39 | 40 | sum_incorrect += abs(predicted_value - actual_values[i][pred_index]) 41 | 42 | if sum_incorrect <= minsum: 43 | minsum = sum_incorrect 44 | mincut = cut 45 | 46 | return len(predictions), minsum, mincut 47 | 48 | 49 | 50 | def evaluate_model(model): 51 | # evaluate and print model's performance 52 | 53 | global TrainX, TrainY, TestX, TestY 54 | 55 | pred = model.predict(TestX) 56 | for i in range(len(pred)): 57 | print("Predictions:" + str(['{:f}'.format(f) for f in pred[i]]) + " " + str(['{:1.0f}'.format(f) for f in TestY[i]]) ) 58 | 59 | sumwrong = 0 60 | sumpred = 0 61 | mincuts = [] 62 | 63 | for i in range(len(pred[0])): 64 | 65 | npred, minsum, mincut = find_discriminator(i, pred, TestY) 66 | print(str(npred) + " " + str(minsum) + " " + str(mincut)) 67 | 68 | sumwrong += minsum 69 | sumpred += npred 70 | mincuts.append(mincut) 71 | 72 | print("\ntotal trained: " + str(sumpred) + ", total wrong: " + str(sumwrong) + ", error rate: " + str(sumwrong / sumpred)) 73 | return sumwrong / sumpred 74 | 75 | 76 | 77 | def safe_model(model, accuracy): 78 | # save model if it performs better than the previously best model 79 | 80 | global model_loaded 81 | 82 | with open("prev_accuracy.txt", 'r+') as file: 83 | 84 | content = file.readlines() 85 | content = "".join([str(x.strip()) for x in content]) 86 | 87 | if not content or (float(content) > accuracy) or model_loaded==False: 88 | 89 | model.save('model/piano_model.h5') 90 | print("model saved as model/piano_model.h5") 91 | 92 | file.seek(0) 93 | file.truncate() 94 | file.write(str(accuracy)) 95 | 96 | elif float(content) < accuracy: 97 | print("accuracy worse than in the previous run. model not saved.") 98 | 99 | 100 | 101 | def train_model(categories, numepochs, model): 102 | 103 | print('training') 104 | global TrainX, TrainY, TestX, TestY, model_loaded 105 | 106 | if not model_loaded: 107 | model.add(Dense(6000, input_dim=len(TrainX[0]))) 108 | model.add(Activation('relu')) 109 | model.add(Dense(1000)) 110 | model.add(Activation('sigmoid')) 111 | model.add(Dropout(0.5)) 112 | model.add(Dense(len(categories))) 113 | model.add(Activation('sigmoid')) 114 | 115 | model.compile(loss='mean_squared_error', optimizer='adamax') 116 | model.fit(TrainX, TrainY, batch_size=256, nb_epoch=numepochs) 117 | return model 118 | 119 | 120 | 121 | def begin_training(iterations): 122 | 123 | global TrainX, TrainY, TestX, TestY, categories, model_loaded 124 | 125 | generator = generate_data.DataGenerator() 126 | categories = generator.load_wavs() 127 | print(categories) 128 | 129 | for i in range(iterations): 130 | 131 | TrainX, TrainY, TestX, TestY = generator.create_data() 132 | epoch_num = 50 133 | 134 | # load previous model if one exists 135 | if os.path.isfile('model/piano_model.h5'): 136 | print("Load previous model") 137 | model = load_model('model/piano_model.h5') 138 | model_loaded = True 139 | model = train_model(categories, epoch_num, model) 140 | else: 141 | print("Create new model") 142 | model = Sequential() 143 | model = train_model(categories, epoch_num, model) 144 | 145 | accuracy = evaluate_model(model) 146 | safe_model(model, accuracy) 147 | del model 148 | 149 | 150 | 151 | if __name__ == '__main__': 152 | begin_training(5) 153 | -------------------------------------------------------------------------------- /wav.py: -------------------------------------------------------------------------------- 1 | import train 2 | 3 | import wave 4 | import numpy as np 5 | import math 6 | import struct 7 | import random 8 | import os 9 | import glob 10 | from array import array 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | 15 | def create_wav(data=None): 16 | # debug function for saving training and test pcm data as wav files 17 | 18 | file = "test.wav" 19 | framerate = train.samplerate 20 | 21 | if data == None: 22 | data, framerate = read_wav(files[0], 400) 23 | 24 | w = wave.open(file, mode="wb") 25 | 26 | w.setframerate(framerate) 27 | w.setnchannels(1) # mono 28 | w.setsampwidth(3) 29 | 30 | buf = bytes() 31 | 32 | for val in data: 33 | buf += struct.pack('i', val)[1:4] 34 | 35 | w.writeframes(buf) 36 | w.close() 37 | 38 | 39 | 40 | def get_training_filenames(): 41 | # get all .wav files from the notes dir 42 | 43 | trainingfiles = [] 44 | 45 | path = os.path.abspath("notes") 46 | os.chdir(path) 47 | 48 | for file in glob.glob("*.wav"): 49 | trainingfiles.append("notes/"+file) 50 | 51 | os.chdir("..") 52 | 53 | return sorted(trainingfiles) 54 | 55 | 56 | 57 | def read_wav(filename, sec): 58 | # parse wav and return frames and framerate 59 | 60 | print ( "reading " + filename ) 61 | wvf = wave.open(filename) 62 | nframes = wvf.getnframes() 63 | framerate = wvf.getframerate() 64 | 65 | if(sec > 0): 66 | nframes = min(nframes, sec*framerate) 67 | 68 | pcm = wvf.readframes(nframes) # list of frames 69 | data = array('i') 70 | 71 | for i in range(0,nframes*3,3): 72 | data.append(struct.unpack('= len(wvd): 91 | break 92 | 93 | data = np.asarray( wvd[i:i+N] ) 94 | freqval = np.fft.rfft(data) 95 | ffts = [] 96 | 97 | for k in range(numfreq): 98 | ffts.append(np.abs( freqval[k] )) 99 | 100 | spec.append(ffts) 101 | times.append( i / framerate) 102 | i = i + incr 103 | 104 | return np.asarray(spec), np.asarray(times), np.asarray(freqs) 105 | 106 | 107 | 108 | def compute_fft(wvd, startidx, sec, framerate, numfreq, overlap): 109 | # compute fast fourier transformation of a recording 110 | 111 | fft, t, f = compute_spectrogram( wvd, startidx, sec, framerate, numfreq, overlap ) 112 | return fft 113 | 114 | 115 | 116 | def plot_wav(x, samplerate, Sxx, t, f, time): 117 | # plot wave file to a diagram 118 | # debug function 119 | 120 | print("plot wav") 121 | 122 | plt.figure(1) 123 | plt.subplot(211) 124 | plt.plot(time, x) 125 | 126 | plt.subplot(212) 127 | plt.pcolormesh(t, f, np.transpose( Sxx )) 128 | plt.ylabel('Frequency [Hz]') 129 | plt.xlabel('Time [sec]') 130 | 131 | plt.show() 132 | 133 | print(x) 134 | 135 | 136 | 137 | if __name__ == '__main__': 138 | # prints a spectogram 139 | 140 | print("read in file") 141 | x, samplerate = read_wav("examples/summertime.wav",400) 142 | print("compute_spectrogram") 143 | Sxx, t, f = compute_spectrogram(x, 0, 400, samplerate, 2048, 0) 144 | 145 | print("test") 146 | x = np.asarray(x) 147 | time = np.arange(len(x)) / samplerate 148 | plot_wav(x, samplerate, Sxx, t, f, time) 149 | 150 | 151 | --------------------------------------------------------------------------------