├── .gitignore ├── .gitmodules ├── README.md ├── conda.recipe └── meta.yaml ├── examples ├── DemoPyPeVoc.py ├── LPC on filtered noise.ipynb ├── PVexample.png ├── ProtectMarraigeInAmerica.wav ├── SoloGuitarArpegi.wav ├── SpeechRate.py ├── WavResynth.py ├── generate_mel_and_mfcc.py ├── glottal_flow.py ├── hide.wav ├── itches.wav ├── mfcc_segmenter.py ├── pepperCl.wav ├── pepperSx.wav ├── perlmanVn.wav ├── phoneme_descriptor_plot.py ├── phoneme_descriptors.py ├── phoneme_segmenter.py ├── progress_bar.ipynb ├── progress_bar.py ├── smaller_than_star.wav └── smirnoffVn.wav ├── pypevoc ├── AMDF.py ├── AudioInterface.py ├── FFTFilters.py ├── Heterodyne.py ├── PVAnalysis.py ├── PeakFinder.py ├── Periodicity.py ├── ProgressDisplay.py ├── SoundUtils.py ├── TransferFunctions.py ├── __init__.py └── speech │ ├── DAP.py │ ├── PitchJumps.py │ ├── SpeechAnalysis.py │ ├── SpeechChunker.py │ ├── SpeechSegmenter.py │ ├── __init__.py │ └── glottal.py ├── setup.py └── tests ├── test_glottal.py ├── test_peak_finder.py ├── test_periodicity.py ├── test_pypevoc.py ├── test_speech.py └── vibrato_obj.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | .DS_Store 4 | build/ 5 | dist/ 6 | *.egg-info/ 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pypevoc/Yin"] 2 | path = pypevoc/Yin 3 | url = https://github.com/goiosunsw/Yin 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyPeVoc 2 | 3 | *PyPeVoc* is a simple Phase Vocoder library intendend mostly to analysis of sounds. It aims to be flexible and easy to adapt, and is thus entirely written in Python, rathe than using some components in C, which would make it faster but harder to maintain. 4 | 5 | Unlike other Phase vocoders, *PyPeVoc* keepsthe phase information of each sine wave, so that resynthesised waveforms match the original as close as possible. Other Phase Vocoders nly keep magnitude information. This is not a problem for the sound, but it is for analysis. 6 | 7 | ## Phase Vocoder 8 | 9 | ***Phase Vocoding*** is a technique of decomposing a periodic sound into its ***quasi-sinusoidal*** components. These are sinewaves that have slowly-varying frequency and amplitude. 10 | 11 | Sine-wave decomposition allows a wide-range of analysis and resynthesis techniques, exemplified in the Wiki (soon to come) 12 | 13 | ## Example situation 14 | 15 | ### Import required libraries 16 | 17 | ```python 18 | import numpy as np 19 | from pypevoc import PV 20 | ``` 21 | 22 | ### Create a periodic sound 23 | 24 | ```python 25 | # Sample rate 26 | sr = 44100 27 | 28 | # Vibrato frequency 29 | vibfreq = 5.0 30 | 31 | # Average amplitude of harmonics 32 | hamp0 = 0.1*np.array([1, .5, .3]) 33 | 34 | # Fraction variation of harmonics in vibrato 35 | hvib = 1.0*np.array([.5,0.1,.9]) 36 | # relative phase of harmonic variation 37 | hph = np.array([0,np.pi/2,np.pi]) 38 | f0 = 500 39 | 40 | # Depth of frequency vibrato 41 | f0vib = 0.01 42 | 43 | # signal duration 44 | dur = 1.0 45 | 46 | # will contain the final sound vector 47 | sig = np.zeros(int(sr*dur)) + 0.01*(np.random.rand(int(sr*dur))-.5) 48 | 49 | # time vector 50 | t = np.arange(0, dur, 1./sr) 51 | 52 | # vectors for varying properties of the sound 53 | hvibsig = np.zeros((int(sr*dur), len(hamp0))) 54 | vibsig = np.sin(2*np.pi*vibfreq*t) 55 | 56 | f0sig = f0 * (1 + f0vib*vibsig) 57 | 58 | for n,ha in enumerate(hamp0): 59 | hno = n+1 60 | fsig = f0sig*hno 61 | phsig = np.cumsum(2*np.pi*fsig/sr) 62 | hvibsig[:,n] = ha * (1 + hvib[n]*np.sin(2*np.pi*vibfreq*t+hph[n])) 63 | sig += (hvibsig[:,n]) * np.sin(phsig) 64 | 65 | ``` 66 | 67 | ### Analyse 68 | 69 | ``` python 70 | # creates a Phase vocoder object 71 | mypv = PV(sig, sr, nfft=2048, npks=len(hamp0)) 72 | 73 | # run the analysis 74 | mypv.run_pv() 75 | 76 | # plot the results in a time-frequency graph 77 | mypv.plot_time_freq() 78 | ``` 79 | 80 | ### Result 81 | 82 | ![ScreenShot](examples/PVexample.png) 83 | -------------------------------------------------------------------------------- /conda.recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set version = "0.3.0" %} 2 | 3 | package: 4 | name: "pypevoc" 5 | version: {{ version }} 6 | 7 | source: 8 | path: ../ 9 | # git-url: https://github.com/goiosunsw/pypevoc.git 10 | 11 | # build: 12 | # number: 0 13 | # script: python setup.py install --single-version-externally-managed --record=record.txt 14 | 15 | requirements: 16 | host: 17 | - numpy 18 | - pip 19 | - python 20 | run: 21 | - numpy 22 | - python 23 | 24 | about: 25 | home: "https://github.com/goiosunsw/pypevoc.git" 26 | license: "GNU Lesser General Public v3 or later (LGPLv3+)" 27 | license_family: "LGPL" 28 | license_file: "" 29 | summary: "Pure python phase vocoder" 30 | doc_url: "" 31 | dev_url: "" 32 | 33 | extra: 34 | recipe-maintainers: 35 | - goiosunsw 36 | -------------------------------------------------------------------------------- /examples/DemoPyPeVoc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pylab as pl 3 | import pandas as pd 4 | import sys 5 | sys.path.append('..') 6 | import PVAnalysis as pv 7 | 8 | # Sample rate 9 | sr = 44100 10 | #sr=150000 11 | 12 | # Vibrato frequency 13 | vibfreq = 5.0 14 | 15 | # Average amplitude of harmonics 16 | hamp0 = 0.1*np.array([1, .5, .3]) 17 | #hamp0 = 0.1*np.array([0.9]) 18 | 19 | # Fraction variation of harmonics in vibrato 20 | hvib = 1.0*np.array([.5,0.1,.9]) 21 | #hvib = 1.0*np.array([-.2]) 22 | # relative phase of harmonic variation 23 | hph = np.array([0,np.pi/2,np.pi]) 24 | # Mean fundamental frequency 25 | f0 = 500 26 | 27 | # Depth of frequency vibrato 28 | f0vib = 0.01 29 | 30 | # signal duration 31 | dur = 1.0 32 | 33 | sig = np.zeros(int(sr*dur)) + 0.00*(np.random.rand(int(sr*dur))-.5) 34 | vibsig = np.zeros(int(sr*dur)) 35 | hvibsig = np.zeros((int(sr*dur),len(hamp0))) 36 | t = np.arange(0,dur,1./sr) 37 | 38 | vibsig = np.sin(2*np.pi*vibfreq*t) 39 | 40 | f0sig = f0 * (1 + f0vib*vibsig) 41 | 42 | for n,ha in enumerate(hamp0): 43 | hno = n+1 44 | fsig = f0sig*hno 45 | phsig = np.cumsum(2*np.pi*fsig/sr) 46 | hvibsig[:,n] = ha * (1+hvib[n]*np.sin(2*np.pi*vibfreq*t+hph[n])) 47 | sig += (hvibsig[:,n]) * np.sin(phsig) 48 | 49 | #pl.plot(sig) 50 | pl.figure() 51 | ss=pl.specgram(sig,NFFT=1024/2) 52 | 53 | mypv=pv.PV(sig,sr,nfft=1024,npks=len(hamp0)) 54 | mypv.run_pv() 55 | mypv.plot_time_freq() 56 | 57 | ss=mypv.toSinSum() 58 | ss.plot_time_freq_mag(minlen=5) 59 | 60 | w=ss.synth(sr,mypv.hop/1) 61 | 62 | #pl.hold(True) 63 | pl.plot(sig,label='orig') 64 | pl.hold(True) 65 | pl.plot(w,label='resynth') 66 | pl.legend() 67 | -------------------------------------------------------------------------------- /examples/PVexample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/PVexample.png -------------------------------------------------------------------------------- /examples/ProtectMarraigeInAmerica.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/ProtectMarraigeInAmerica.wav -------------------------------------------------------------------------------- /examples/SoloGuitarArpegi.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/SoloGuitarArpegi.wav -------------------------------------------------------------------------------- /examples/SpeechRate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # SpeachRate.py 5 | # 6 | # estimate the speech rate of a file, 7 | # also generating segmentation textgrids 8 | # 9 | # Copyright 2017 Andre Almeida 10 | # 11 | # This program is free software; you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation; either version 2 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program; if not, write to the Free Software 23 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 24 | # MA 02110-1301, USA. 25 | # 26 | # 27 | import SpeechSegmenter as ss 28 | import SpeechChunker as sc 29 | from scipy.io import wavfile 30 | import numpy as np 31 | import sys 32 | import os 33 | 34 | def segment_recording(sound_files): 35 | w=[] 36 | for ff in sound_files: 37 | sr,wi=wavfile.read(ff) 38 | w.append(wi) 39 | 40 | w=np.vstack(w).T 41 | sys.stderr.write("Read {} files, {} channels, {} samples\n"\ 42 | .format(len(sound_files),w.shape[1],w.shape[0])) 43 | sys.stderr.write("Segmenting audio\n") 44 | seg=sc.MultiChannelSegmenter(w,sr=sr) 45 | sys.stderr.write("Found {} chunks\n".format(len(seg.label))) 46 | return (seg.tst,seg.tend,seg.label) 47 | 48 | def analyse_rec(sound_files, output_dir='.'): 49 | # segment recordings 50 | w=[] 51 | for ff in sound_files: 52 | sr,wi=wavfile.read(ff) 53 | w.append(wi) 54 | 55 | w=np.vstack(w).T 56 | sys.stderr.write("Read {} files, {} channels, {} samples\n"\ 57 | .format(len(sound_files),w.shape[1],w.shape[0])) 58 | sys.stderr.write("Segmenting audio\n") 59 | if w.shape[1]>1: 60 | seg=sc.MultiChannelSegmenter(w,sr=sr,min_len=args.min_silence) 61 | else: 62 | #w=w.squeeze() 63 | seg = sc.SilenceDetector(w.squeeze(), sr=sr, method = 'pct01', 64 | min_len=args.min_silence) 65 | seg.label = [1 for tst in seg.tst] 66 | seg.centers = np.array([[0,0],[1,0]]) 67 | 68 | 69 | seg.to_textgrid(os.path.join(output_dir,"sources.TextGrid")) 70 | sys.stderr.write("Found {} chunks\n".format(len(seg.label))) 71 | 72 | intervals = (seg.tst,seg.tend,seg.label) 73 | 74 | # segment syllables for each channel 75 | for lab in set(seg.label): 76 | vi = [(ii[0],ii[1]) for ii in zip(*intervals) if ii[2]==lab] 77 | source = int(lab) 78 | # find the best channel to segment source 79 | chan = np.argmax(seg.centers[lab,:]) 80 | 81 | syl=ss.SyllableSegmenter(w[:,chan],sr=sr,voice_intervals=vi) 82 | syl.segment_amplitude_bumps() 83 | syl.classify_voicing() 84 | syl.to_textgrid(os.path.join(output_dir,'voiced_syllables_{}.TextGrid'.format(lab))) 85 | # output spreadsheet 86 | df = syl.to_pandas() 87 | df.to_excel(os.path.join(output_dir,'syllables_{}.xls'.format(lab))) 88 | 89 | 90 | def process_file_list(batch_file): 91 | import logging 92 | file_seq=[] 93 | with open(batch_file) as f: 94 | for line in f: 95 | files = [it.strip() for it in line.split(',') if len(it.strip())>0] 96 | if len(files)>0: 97 | basedir, filename = os.path.split(files[0]) 98 | try: 99 | analyse_rec(files, output_dir=basedir) 100 | except Exception as e: 101 | message = 'ERROR while processing files:\n' 102 | for f in files: 103 | message+=f 104 | message+='/n' 105 | logging.exception(message) 106 | #~ sys.stderr.write('ERROR while processing files:\n') 107 | #~ for f in files: 108 | #~ sys.stderr.write(f+'\n') 109 | #~ sys.stderr.write(str(e)) 110 | #~ sys.stderr.write('\n') 111 | #~ sys.stderr.write(e.__doc__ ) 112 | #~ sys.stderr.write('\n') 113 | return 0 114 | 115 | def main(args): 116 | 117 | sound_files = args.infiles 118 | print sound_files 119 | 120 | if args.batch: 121 | process_file_list(args.batch) 122 | 123 | else: 124 | if sound_files: 125 | analyse_rec(sound_files) 126 | else: 127 | sys.stderr.write('Input files or batch list (-b) are required!\n') 128 | 129 | 130 | return 0 131 | 132 | if __name__ == '__main__': 133 | import sys 134 | import argparse 135 | # construct the argument parse and parse the arguments 136 | ap = argparse.ArgumentParser() 137 | ap.add_argument("-o", "--output", nargs='?', default = '', 138 | help = "output file name") 139 | ap.add_argument("-n", "--min-silence", nargs='?', default = '0.3', type=float, 140 | help = "minimum silence duration in seconds") 141 | ap.add_argument("-b", "--batch", nargs='?', 142 | help = "input file list for batch processing") 143 | 144 | ap.add_argument("-s", "--start", type=float, nargs='?', default = '0', 145 | help = "start time") 146 | ap.add_argument("-e", "--end", type=float, nargs='?', default = '-1', 147 | help = "end time") 148 | 149 | 150 | ap.add_argument('infiles', nargs='*', help='Input sound files (required if not batch)') 151 | 152 | args = ap.parse_args() 153 | 154 | 155 | 156 | sys.exit(main(args)) 157 | -------------------------------------------------------------------------------- /examples/WavResynth.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pylab as pl 3 | import pandas as pd 4 | import sys 5 | from scipy.io import wavfile as wf 6 | 7 | sys.path.append('..') 8 | import PVAnalysis as pv 9 | 10 | #sr, sig = wf.read('pepperCl.wav') 11 | sr, sig = wf.read('pepperSx.wav') 12 | #sr, sig = wf.read('perlmanVn.wav') 13 | #sr, sig = wf.read('smirnoffVn.wav') 14 | #sr, sig = wf.read('ProtectMarraigeInAmerica.wav') 15 | #sr, sig = wf.read('SoloGuitarArpegi.wav') 16 | 17 | # scale to floating point (range -1 to 1) 18 | sig = sig/ float(np.iinfo(sig.dtype).max) 19 | 20 | #pl.plot(sig) 21 | pl.figure() 22 | ss=pl.specgram(sig,NFFT=1024/2) 23 | 24 | # Build the phase vocoder object 25 | mypv=pv.PV(sig,sr,nfft=1024*4,npks=25*4,hop=256*4) 26 | # Run the PV calculation 27 | mypv.run_pv() 28 | # plot the peaks that were found 29 | mypv.plot_time_freq() 30 | 31 | # convert to sinusoidal lines 32 | ss=mypv.toSinSum() 33 | 34 | # resynthesise based on PV analysis 35 | # (reduce hop to slow down, increase to accelerate) 36 | w=ss.synth(sr,mypv.hop/1) 37 | 38 | # plot original and resynthesis 39 | pl.figure() 40 | pl.plot(sig,label='orig') 41 | pl.hold(True) 42 | pl.plot(w,label='resynth') 43 | pl.legend() 44 | pl.show() 45 | 46 | fig,ax=pl.subplots(2,1,sharex=True) 47 | ax[0].plot(np.arange(len(sig))/float(sr),sig,label='orig') 48 | ax[0].hold(True) 49 | ax[0].plot(np.arange(len(w))/float(sr),w,label='resynth') 50 | ax[0].legend() 51 | mypv.plot_time_freq(ax=ax[1]) 52 | 53 | -------------------------------------------------------------------------------- /examples/generate_mel_and_mfcc.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from glob import glob 4 | 5 | import numpy as np 6 | from scipy.io import wavfile 7 | 8 | import pypevoc.FFTFilters as ft 9 | 10 | 11 | def read_wav_file(sndfile): 12 | return wavfile.read(sndfile) 13 | 14 | def melspec(sr,w,twind=0.025,thop=0.01,mfcc=False): 15 | mfb = ft.MelFilterBank(sr=sr,twind=twind, thop=thop) 16 | wp = ft.preemph(w,hpFreq=50,Fs=sr) 17 | cs,ms,ts = mfb.mfcc_and_mel(wp) 18 | return cs,np.log(ms),ts 19 | 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser() 23 | 24 | parser.add_argument('input', help='Input file or dir') 25 | parser.add_argument('--window_sec', '-w', help='window duration in seconds', default=0.025, type=float) 26 | parser.add_argument('--hop_sec','-H', help='hop in seconds', default=0.01, type=float) 27 | return parser.parse_args() 28 | 29 | def process_file(sndfile, twind=0.025, thop=0.01, output=None): 30 | sr,w = read_wav_file(sndfile) 31 | mc, ms, tm = melspec(sr,w,twind=twind,thop=thop) 32 | if output is None: 33 | basename = os.path.splitext(sndfile)[0] 34 | dname = basename+'_MEL_MFCC.npz' 35 | else: 36 | dname = output 37 | np.savez(dname, mfcc=mc, melspec=ms, t=tm) 38 | 39 | def process_dir(directory): 40 | from glob import glob 41 | filelist = glob(os.path.join(directory,'*.wav')) 42 | for sndfile in filelist: 43 | process_file(sndfile) 44 | 45 | if __name__ == '__main__': 46 | args = parse_args() 47 | if os.path.isdir(args.input): 48 | datadir = args.input 49 | process_dir(datadir) 50 | else: 51 | process_file(args.input, twind=args.window_sec, thop=args.hop_sec) -------------------------------------------------------------------------------- /examples/glottal_flow.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import matplotlib.pyplot as pl 4 | from scipy.io import wavfile 5 | 6 | from pypevoc.speech.glottal import iaif_ola, lpcc2pole 7 | 8 | try: 9 | filename = sys.argv[1] 10 | except IndexError: 11 | filename = "hide.wav" 12 | 13 | sr, w = wavfile.read(filename) 14 | 15 | g, dg, vt, gf = iaif_ola(w, Fs=sr) 16 | 17 | t = np.arange(len(w))/sr 18 | 19 | fig,ax = pl.subplots(2,sharex=True) 20 | ax[0].plot(t,w) 21 | ax[1].plot(t,g) 22 | 23 | try: 24 | import matlab.engine 25 | import matlab 26 | eng = matlab.engine.start_matlab() 27 | except ImportError: 28 | pass 29 | else: 30 | eng.addpath(eng.genpath('~/Devel/covarep/')) 31 | try: 32 | g_m, dg_m, vt_m, gf_m = eng.iaif_ola(matlab.double(w.tolist()), 33 | float(sr), 34 | nargout=4) 35 | except matlab.engine.MatlabExecutionError: 36 | pass 37 | else: 38 | ax[1].plot(t,np.array(g_m).flatten()) 39 | finally: 40 | eng.quit() 41 | 42 | pl.figure() 43 | pl.specgram(w, Fs=sr, NFFT=2**10) 44 | for ii in range(vt.shape[0]): 45 | t = len(w)/sr*ii/vt.shape[0] 46 | p,bw = lpcc2pole(vt[ii,:],sr) 47 | pl.scatter(np.ones(len(bw))*t, p, s=1/np.sqrt(bw), color='k') 48 | 49 | 50 | pl.show() 51 | -------------------------------------------------------------------------------- /examples/hide.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/hide.wav -------------------------------------------------------------------------------- /examples/itches.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/itches.wav -------------------------------------------------------------------------------- /examples/mfcc_segmenter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import numpy as np 4 | import pypevoc.FFTFilters as ft 5 | import scipy.signal as sig 6 | from scipy.io import wavfile 7 | 8 | 9 | def read_wav_file(sndfile): 10 | return wavfile.read(sndfile) 11 | 12 | 13 | def mfcc_change_rate(sr,w, twind=0.025, thop=0.01, 14 | mode='MELSPEC',ncc=12): 15 | 16 | mfb = ft.MelFilterBank(sr=sr,twind=twind, thop=thop) 17 | wp = ft.preemph(w,hpFreq=50,Fs=sr) 18 | if mode == 'MELSPEC': 19 | feat,tfeat = mfb.specout(wp) 20 | feat = np.log(feat) 21 | elif mode == 'MFCC': 22 | feat, tfeat = mfb.mfcc(wp) 23 | feat = feat[:,1:ncc+1] 24 | else: 25 | raise NotImplementedError, "{} unknown".format(method) 26 | 27 | ndiff = int(np.round(max_tchange/thop)) 28 | dfeat = np.zeros((ndiff,len(tfeat))) 29 | for ii in range(1,ndiff): 30 | dfeat[ii,ndiff:-ndiff] = np.sum((feat[:-ndiff*2,:]-feat[ndiff*2:,:])**2,axis=1) 31 | dfsum = np.sum(dfeat,axis=0) 32 | 33 | 34 | def mfcc_segments(sr,w,twind=0.025,thop=0.01, 35 | max_tchange=0.05,percentile_thresh=50, 36 | mode='MELSPEC', 37 | ncc=12): 38 | mfb = ft.MelFilterBank(sr=sr,twind=twind, thop=thop) 39 | wp = ft.preemph(w,hpFreq=50,Fs=sr) 40 | if mode == 'MELSPEC': 41 | feat,tfeat = mfb.specout(wp) 42 | feat = np.log(feat) 43 | elif mode == 'MFCC': 44 | feat, tfeat = mfb.mfcc(wp) 45 | feat = feat[:,1:ncc+1] 46 | else: 47 | raise NotImplementedError, "{} unknown".format(method) 48 | 49 | ndiff = int(np.round(max_tchange/thop)) 50 | dfeat = np.zeros((ndiff,len(tfeat))) 51 | for ii in range(1,ndiff): 52 | dfeat[ii,ndiff:-ndiff] = np.sum((feat[:-ndiff*2,:]-feat[ndiff*2:,:])**2,axis=1) 53 | dfsum = np.sum(dfeat,axis=0) 54 | dfspks = sig.argrelmax(dfsum)[0] 55 | pkthresh = np.percentile(dfsum,percentile_thresh) 56 | dfspks = dfspks[dfsum[dfspks] > pkthresh] 57 | return tfeat[dfspks], dfsum[dfspks] 58 | 59 | 60 | def file_segments(sr,w): 61 | times, vals = mfcc_segments(sr,w) 62 | dictlist = [] 63 | for t,v in zip(times,vals): 64 | dictlist.append({'start':tst, 65 | 'end':t, 66 | 'strength':val}) 67 | import pandas 68 | return pandas.DataFrame(dictlist) 69 | 70 | 71 | def process_file(sndfile, mode='MELSPEC'): 72 | sr,w = read_wav_file(sndfile) 73 | times, values = mfcc_segments(sr,w,mode=mode) 74 | for t,v in zip(times, values): 75 | print("{:f},{:f}".format(t,v)) 76 | 77 | 78 | def process_dir(directory): 79 | from glob import glob 80 | filelist = glob(os.path.join(directory,'*.wav')) 81 | for sndfile in filelist: 82 | sr,w = read_wav_file(sndfile) 83 | ints = file_segments(sr,w) 84 | for thisi in ints: 85 | ts = thisi['start'] 86 | te = thisi['end'] 87 | tph = thisi['phonemes'] 88 | print('{}, {:7.3f}, Speech START'.format(sndfile,ts)) 89 | for t in tph: 90 | print('{}, {:7.3f}, New phoneme'.format(sndfile,t+ts)) 91 | print('{}, {:7.3f}, Speech END'.format(sndfile,te)) 92 | 93 | 94 | 95 | def parse_args(): 96 | parser = argparse.ArgumentParser() 97 | 98 | parser.add_argument('input', help='Input file or dir') 99 | return parser.parse_args() 100 | 101 | 102 | if __name__ == '__main__': 103 | args = parse_args() 104 | if os.path.isdir(args.input): 105 | datadir = args.input 106 | process_dir(datadir) 107 | else: 108 | process_file(args.input, mode='MFCC') -------------------------------------------------------------------------------- /examples/pepperCl.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/pepperCl.wav -------------------------------------------------------------------------------- /examples/pepperSx.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/pepperSx.wav -------------------------------------------------------------------------------- /examples/perlmanVn.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/perlmanVn.wav -------------------------------------------------------------------------------- /examples/phoneme_descriptor_plot.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas 3 | import matplotlib.pyplot as plt 4 | from scipy.io import wavfile 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser() 8 | 9 | parser.add_argument('soundfile', help='Sound file') 10 | parser.add_argument('descfile', help='Descriptor csv file') 11 | return parser.parse_args() 12 | 13 | if __name__ == '__main__': 14 | args = parse_args() 15 | sr, w = wavfile.read(args.soundfile) 16 | #df=pandas.read_csv(args.descfile,names=['t_start','t_end','label','f0','RMS','Harm','F1','F2','F3','F4','F5'], 17 | # index_col=False) 18 | df =pandas.read_csv(args.descfile,index_col=0) 19 | 20 | fig,ax = plt.subplots(3,sharex=True,figsize=(6,8)) 21 | ax[0].specgram(w,Fs=sr,NFFT=1024) 22 | for ir, row in df.iterrows(): 23 | ts = row['t_start'] 24 | te = row['t_end'] 25 | if row['label'].find('START')>-1: 26 | color='k' 27 | else: 28 | color='r' 29 | if row['label'].find('END')>-1: 30 | for axi in ax: 31 | axi.axvline(te, color='k',alpha=.5) 32 | for axi in ax: 33 | axi.axvline(ts, color=color, alpha=.5) 34 | 35 | tm = (df['t_start']+df['t_end'])/2 36 | ax[0].plot(tm,df['f0'],'o-',color='k') 37 | ax[0].plot(tm,df['Centroid'],'o-',color='blue') 38 | for ii in range(1,5): 39 | ax[0].plot(tm,df['F%d'%ii],'o-',color='r') 40 | ax[1].semilogy(tm,df['RMS']) 41 | ax[2].plot(tm,df['Harmonicity']) 42 | 43 | 44 | plt.show() 45 | -------------------------------------------------------------------------------- /examples/phoneme_descriptors.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import traceback 4 | import pandas 5 | from pypevoc.speech.SpeechAnalysis import Formants 6 | from pypevoc.SoundUtils import RMSWind 7 | from pypevoc.PVAnalysis import PV 8 | from pypevoc.Heterodyne import HeterodyneHarmonic 9 | 10 | from phoneme_segmenter import * 11 | 12 | 13 | formant_window = .05 14 | moment_window=.01 15 | 16 | def get_formants(w,sr, twind=formant_window, twind_min=0.01): 17 | while twind>=twind_min: 18 | try: 19 | t, f, bw = Formants(w.copy(), sr, tWind=twind) 20 | f[0][0] 21 | except (ValueError, IndexError): 22 | twind/=2 23 | f=[[]] 24 | continue 25 | break 26 | 27 | fmed = np.nanmedian(f,axis=0) 28 | ret = dict() 29 | for ii, ff in enumerate(fmed): 30 | ret['F{}'.format(ii+1)] = ff 31 | return ret 32 | 33 | def get_RMS(w,sr): 34 | a,t=RMSWind(w,sr) 35 | return {'RMS':np.nanmean(a)} 36 | 37 | def get_f0(w,sr, tfft=0.04,pkthresh=1e-8,npks=50, nfftmin=128): 38 | ret = {'RMS':np.nan,'f0':np.nan,'Harmonicity':np.nan} 39 | nfft = next_power_2(sr*tfft) 40 | nhop = nfft//2 41 | 42 | while nfft>nfftmin: 43 | a,t=RMSWind(w,sr,nwind=nfft,nhop=nhop) 44 | if len(a)<1: 45 | nfft=nfft//2 46 | nhop=nfft//2 47 | else: 48 | break 49 | ret['RMS'] = np.nanmean(a) 50 | 51 | try: 52 | pv = PV(w,sr,nfft=nfft,pkthresh=pkthresh, 53 | npks=npks,progress=False) 54 | pv.run_pv() 55 | f0 = pv.calc_f0(thr=0.01) 56 | ret['f0']=np.nanmean(f0) 57 | except Exception: 58 | sys.stderr.write('Error calculating f0\n') 59 | return ret 60 | 61 | try: 62 | hh=HeterodyneHarmonic(w,sr,f=np.nanmean(f0),nwind=nfft,nhop=nhop) 63 | except Exception: 64 | sys.stderr.write('Error in first pass of Heterodyne\n') 65 | return ret 66 | try: 67 | f0,tf0=hh.calc_adjusted_freq(hh.f0) 68 | hh=HeterodyneHarmonic(w,sr,tf=tf0,f=f0,nharm=20,nwind=nfft,nhop=nhop) 69 | except Exception: 70 | sys.stderr.write('Error in second pass of Heterodyne\n') 71 | 72 | hpct = np.sqrt(np.sum(np.abs(hh.camp)**2,axis=1))/a 73 | 74 | ret['Harmonicity'] = np.nanmedian(hpct) 75 | return ret 76 | 77 | def get_spectral_moments(w,sr,tfft=moment_window): 78 | nfft = next_power_2(sr*tfft) 79 | wo = w.copy() 80 | wo[:-1] -= wo[1:] 81 | 82 | fsg, tsg, sg = sig.spectrogram(wo, fs=sr, nfft=nfft) 83 | avs = np.mean(sg,axis=1) 84 | cent = np.sum(avs*fsg)/np.sum(avs) 85 | var = np.sum(avs*(fsg-cent)**2/np.sum(avs)) 86 | return {'Centroid': cent, 87 | 'Stdev': np.sqrt(var)} 88 | 89 | 90 | def describe_phoneme(w,sr): 91 | desc = {} 92 | try: 93 | desc.update(get_f0(w,sr)) 94 | except Exception: 95 | traceback.print_exc() 96 | #desc.update(get_RMS(w,sr)) 97 | try: 98 | desc.update(get_spectral_moments(w,sr)) 99 | except Exception: 100 | traceback.print_exc() 101 | 102 | try: 103 | desc.update(get_formants(w,sr)) 104 | except Exception: 105 | traceback.print_exc() 106 | return desc 107 | 108 | 109 | def parse_args(): 110 | parser = argparse.ArgumentParser() 111 | 112 | parser.add_argument('input', help='Input file or dir') 113 | return parser.parse_args() 114 | 115 | def output_interval(w,sr,ts,te,label): 116 | try: 117 | desc = describe_phoneme(w,sr) 118 | except Exception: 119 | traceback.print_exc() 120 | desc = {} 121 | dstr = '{:.3f}, {:.3f}, {}'.format(ts,te,label) 122 | for k,v in desc.items(): 123 | dstr+=',{}'.format(v) 124 | print(dstr) 125 | 126 | def dict_interval(w,sr,ts,te,label): 127 | try: 128 | desc = describe_phoneme(w,sr) 129 | except Exception: 130 | traceback.print_exc() 131 | desc = {} 132 | return desc 133 | 134 | def file_df(sndfile): 135 | sr,w = read_wav_file(sndfile) 136 | ints = file_segments(sr,w) 137 | tps = 0. 138 | tpe=0. 139 | alld = [] 140 | for thisi in ints: 141 | ts = thisi['start'] 142 | te = thisi['end'] 143 | tph = thisi['phonemes'] 144 | label = 'SILENCE' 145 | tps = tpe 146 | tpe = ts 147 | ww = w[int(sr*tps):int(sr*tpe)] 148 | alld.append(dict_interval(ww,sr,tps,tpe,label)) 149 | alld[-1].update({'t_start':tps, 150 | 't_end':tpe, 151 | 'label':label}) 152 | 153 | tps = ts 154 | tpe = tps 155 | label = 'Utteration START' 156 | for tper in tph: 157 | tpe = tper+ts 158 | ww = w[int(sr*tps):int(sr*tpe)] 159 | alld.append(dict_interval(ww,sr,tps,tpe,label)) 160 | alld[-1].update({'t_start':tps, 161 | 't_end':tpe, 162 | 'label':label}) 163 | tps=tpe 164 | label = 'phoneme' 165 | tps = tpe 166 | tpe = te 167 | label = 'Utteration END' 168 | ww = w[int(sr*tps):int(sr*tpe)] 169 | alld.append(dict_interval(ww,sr,tps,tpe,label)) 170 | alld[-1].update({'t_start':tps, 171 | 't_end':tpe, 172 | 'label':label}) 173 | df = pandas.DataFrame(alld) 174 | return df 175 | 176 | def process_file(sndfile): 177 | df = file_df(sndfile) 178 | df.to_csv(sys.stdout) 179 | 180 | def process_dir(directory): 181 | from glob import glob 182 | filelist = glob(os.path.join(directory,'*.wav')) 183 | for sndfile in filelist: 184 | df = file_df(sndfile) 185 | basepath, ext = os.path.splitext(sndfile) 186 | df.to_csv(basepath+'.csv') 187 | 188 | if __name__ == '__main__': 189 | args = parse_args() 190 | if os.path.isdir(args.input): 191 | datadir = args.input 192 | process_dir(datadir) 193 | else: 194 | process_file(args.input) -------------------------------------------------------------------------------- /examples/phoneme_segmenter.py: -------------------------------------------------------------------------------- 1 | import os 2 | from glob import glob 3 | import argparse 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | from scipy.io import wavfile 7 | import scipy.signal as sig 8 | 9 | from pypevoc.speech.SpeechChunker import SilenceDetector 10 | from pypevoc.speech.SpeechSegmenter import SpeechSegmenter 11 | 12 | bands = [200.,300.,500.,800.,1200.] 13 | segmenter_detect_thresh=.5 14 | chunker_fmin = 100 15 | chunker_fmax = 1000 16 | chunker_wind_sec=.2 17 | chunker_method = 'pct10' 18 | 19 | def estimate_noise_background_spcetrum(w, sr, tst, tend, nfft=1024): 20 | sg, fsg, tsg = sig.specgram(w, fs=sr, NFFT=nfft) 21 | for ts,te in zip(sd.tend[:-1],sd.tst[1:]): 22 | isil = (tsg>=ts)&(tsg<=te) 23 | silence_sg_chunks.append(sg[:,isil]) 24 | 25 | silence_sg = np.hstack(silence_sg_chunks) 26 | 27 | return fsg, np.median(silence_sg,axis=1) 28 | 29 | def read_wav_file(sndfile): 30 | return wavfile.read(sndfile) 31 | 32 | def segment_wav(w, sr, fmin=100, fmax=1000, wind_sec=.2,method='pct10'): 33 | sd = SilenceDetector(w,sr=sr,fmin=fmin,fmax=fmax,wind_sec=wind_sec,method=method) 34 | return sd.tst, sd.tend 35 | 36 | def next_power_2(x): 37 | return int(2**np.ceil(np.log2(x))) 38 | 39 | def phoneme_segment_wav(w,sr, bands=[200.,300.,500.,800.,1200.], 40 | detect_thresh=.5,twind=0.04): 41 | 42 | nrough = next_power_2(sr*twind) 43 | ss = SpeechSegmenter(sr=sr, bands=bands, 44 | detect_thresh=detect_thresh, 45 | rough_window=nrough) 46 | ss.set_signal(w,sr=sr) 47 | tph = ss.process(w) 48 | tph = ss.refine_all_all_bands() 49 | return tph 50 | 51 | def file_segments(sr,w): 52 | tst, tend = segment_wav(w,sr,fmin=chunker_fmin,fmax=chunker_fmax, 53 | wind_sec=chunker_wind_sec,method=chunker_method) 54 | ints = [] 55 | for ts, te in zip(tst,tend): 56 | ww = w[int(ts*sr):int(te*sr)] 57 | tph = phoneme_segment_wav(ww, sr, bands=bands, 58 | detect_thresh=segmenter_detect_thresh) 59 | ints.append({'start':ts, 60 | 'end':te, 61 | 'phonemes':tph}) 62 | return ints 63 | 64 | def process_file(sndfile): 65 | sr,w = read_wav_file(sndfile) 66 | ints = file_segments(sr,w) 67 | for thisi in ints: 68 | ts = thisi['start'] 69 | te = thisi['end'] 70 | tph = thisi['phonemes'] 71 | print('{:7.3f}, Speech START'.format(ts)) 72 | for t in tph: 73 | print('{:7.3f}, New phoneme'.format(t+ts)) 74 | print('{:7.3f}, Speech END'.format(te)) 75 | 76 | def process_dir(directory): 77 | from glob import glob 78 | filelist = glob(os.path.join(directory,'*.wav')) 79 | for sndfile in filelist: 80 | sr,w = read_wav_file(sndfile) 81 | ints = file_segments(sr,w) 82 | for thisi in ints: 83 | ts = thisi['start'] 84 | te = thisi['end'] 85 | tph = thisi['phonemes'] 86 | print('{}, {:7.3f}, Speech START'.format(sndfile,ts)) 87 | for t in tph: 88 | print('{}, {:7.3f}, New phoneme'.format(sndfile,t+ts)) 89 | print('{}, {:7.3f}, Speech END'.format(sndfile,te)) 90 | 91 | 92 | 93 | def parse_args(): 94 | parser = argparse.ArgumentParser() 95 | 96 | parser.add_argument('input', help='Input file or dir') 97 | return parser.parse_args() 98 | 99 | 100 | if __name__ == '__main__': 101 | args = parse_args() 102 | if os.path.isdir(args.input): 103 | datadir = args.input 104 | process_dir(datadir) 105 | else: 106 | process_file(args.input) 107 | -------------------------------------------------------------------------------- /examples/progress_bar.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pypevoc.ProgressDisplay import Progress, in_ipynb\n", 10 | "from time import sleep" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 4, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": [ 21 | "True" 22 | ] 23 | }, 24 | "execution_count": 4, 25 | "metadata": {}, 26 | "output_type": "execute_result" 27 | } 28 | ], 29 | "source": [ 30 | "in_ipynb()" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 8, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "application/vnd.jupyter.widget-view+json": { 41 | "model_id": "48a6ae0e06a34643ad36169e03aa9319", 42 | "version_major": 2, 43 | "version_minor": 0 44 | }, 45 | "text/plain": [ 46 | "VBox(children=(HTML(value=''), IntProgress(value=1, bar_style='info')))" 47 | ] 48 | }, 49 | "metadata": {}, 50 | "output_type": "display_data" 51 | } 52 | ], 53 | "source": [ 54 | "n = 1000\n", 55 | "pd = Progress(n)\n", 56 | "for ii in range(n):\n", 57 | " pd.update(ii)\n", 58 | " sleep(0.002)\n", 59 | " \n", 60 | "pd.finish()" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 9, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "In IPYNB\n" 73 | ] 74 | }, 75 | { 76 | "data": { 77 | "application/vnd.jupyter.widget-view+json": { 78 | "model_id": "c9491944de314ba38a6f99456609b63d", 79 | "version_major": 2, 80 | "version_minor": 0 81 | }, 82 | "text/plain": [ 83 | "VBox(children=(HTML(value=''), IntProgress(value=1, bar_style='info')))" 84 | ] 85 | }, 86 | "metadata": {}, 87 | "output_type": "display_data" 88 | } 89 | ], 90 | "source": [ 91 | "%run progress_bar.py" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "unsw", 105 | "language": "python", 106 | "name": "unsw" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.7.3" 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 2 123 | } 124 | -------------------------------------------------------------------------------- /examples/progress_bar.py: -------------------------------------------------------------------------------- 1 | from pypevoc.ProgressDisplay import Progress, in_ipynb 2 | from time import sleep 3 | 4 | if in_ipynb(): 5 | print('In IPYNB') 6 | else: 7 | print('In console') 8 | 9 | n = 1000 10 | 11 | pd = Progress(n) 12 | 13 | for ii in range(n): 14 | pd.update(ii) 15 | sleep(.002) 16 | 17 | pd.finish() 18 | 19 | -------------------------------------------------------------------------------- /examples/smaller_than_star.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/smaller_than_star.wav -------------------------------------------------------------------------------- /examples/smirnoffVn.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/smirnoffVn.wav -------------------------------------------------------------------------------- /pypevoc/AMDF.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # AMDF.py 5 | # 6 | # Utilities based on the Average Maen Difference Function 7 | # * Fundamental frequency estimator 8 | # * Tonal character 9 | # 10 | # 11 | # Copyright 2014 Andre Almeida 12 | # 13 | # This program is free software; you can redistribute it and/or modify 14 | # it under the terms of the GNU General Public License as published by 15 | # the Free Software Foundation; either version 2 of the License, or 16 | # (at your option) any later version. 17 | # 18 | # This program is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | # GNU General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU General Public License 24 | # along with this program; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 26 | # MA 02110-1301, USA. 27 | # 28 | # 29 | 30 | import sys 31 | import numpy as np 32 | import PeakFinder as pf 33 | import pylab as pl 34 | from matplotlib.colors import hsv_to_rgb 35 | 36 | 37 | def amdf(x, mindelay = 0, maxdelay = None): 38 | nx = len(x) 39 | if maxdelay is None: 40 | maxdelay = nx 41 | 42 | y = np.zeros(nx) 43 | for i in range(mindelay,maxdelay): 44 | n = nx - i 45 | y[i] = (np.abs(x[0:nx-i]-x[i:])).sum()/n 46 | 47 | return y 48 | 49 | # I will try to update this object so that data required for the initialisation of every instance stays in the caller. Thee caller passes itself as argument to the callee 50 | 51 | class Periodicity(object): 52 | """Single period object, including multiple periodicity candidates 53 | """ 54 | def __init__(self, xw, sr=1, ncand = 8, candthresh = .8, vthresh = .2, mindelay=0, maxdelay=None, method='xcorr'): 55 | """Calculate the periodicity estimation for a window of a time signal 56 | 57 | Arguments: 58 | x: signal 59 | sr: sample rate 60 | candthresh: ratio to lowest minima to keep as peak 61 | vthresh: voicing threshold 62 | mindelay: minimum value of period 63 | maxdelay: maximum value of period 64 | ncand: maximum number of period candidates 65 | method: type of correlation correlation / matching to use 66 | 'xcorr' - correlation 67 | 'amdf' - average mean difference function 68 | 'zc' - zero crossing 69 | """ 70 | 71 | nwind = len(xw) 72 | self.sr = sr 73 | 74 | self.mindelay = mindelay 75 | if maxdelay is None: 76 | self.maxdelay = round(nwind/2) 77 | else: 78 | self.maxdelay = maxdelay 79 | 80 | self.method = method 81 | self.threshold = candthresh 82 | self.vthresh = vthresh 83 | self.ncand = ncand 84 | 85 | self.cand_period = np.array([]) 86 | self.cand_strength = np.array([]) 87 | 88 | self._calc(xw) 89 | 90 | 91 | def _calc(self, xw): 92 | """Calculate the periodicity candidates 93 | 94 | Arguments: 95 | xw: the windowed portion of time signal where periodicity is to be estimated 96 | """ 97 | 98 | nwind = len(xw) 99 | 100 | # unvoiced 101 | pkpos = np.array([]) 102 | pkstr = np.array([]) 103 | 104 | 105 | try: 106 | if self.method is 'amdf': 107 | xc = amdf(xw) 108 | 109 | maxxc = max(xc) 110 | 111 | xcpos = (maxxc-xc[self.mindelay:self.maxdelay]) / maxxc 112 | xcth = self.threshold 113 | 114 | elif self.method is 'xcorr': 115 | 116 | xc = np.correlate(xw,xw,"full") / self.wind 117 | xcred = xc[nwind-1+self.mindelay:nwind-1+self.maxdelay] 118 | xcpos = xcred/max(xc) 119 | xcth = self.threshold 120 | 121 | #print "In xcorr. max %f, thr %f"%(max(xcpos),xcth) 122 | 123 | if max(xcpos) > self.vthresh: 124 | # this is equivlent to finding minima below the absolute minimum * threshold 125 | peaks = pf.PeakFinder(xcpos, minval = xcth, npeaks = self.ncand) 126 | 127 | 128 | peaks.refine_all() 129 | #peaks.plot() 130 | 131 | pkpos = peaks.get_pos() + self.mindelay 132 | pkstr = peaks.get_val() 133 | 134 | #keep = pkpos 0.0: 19 | wav.setpos(int(startTime*float(framerate*nchannels))/nchannels) 20 | 21 | if endTime: 22 | nrdframes = int((endTime-startTime)*float(framerate*nchannels))/nchannels 23 | else: 24 | nrdframes = nframes-wav.tell() 25 | 26 | frames = wav.readframes (nrdframes * nchannels) 27 | out = struct.unpack_from ("%dh" % nrdframes * nchannels, frames) 28 | 29 | # Convert 2 channles to numpy arrays 30 | if nchannels == 2: 31 | left = array (list (everyOther (out, 0))) 32 | right = array (list (everyOther (out, 1))) 33 | return framerate, array(left,right) 34 | else: 35 | left = array (out) 36 | #right = left 37 | return framerate, left 38 | 39 | def wavCopy (infile, outfile, startTime=0.0, endTime=None): 40 | inwav = wave.open (infile, "r") 41 | outwav = wave.open (outfile, "w") 42 | (nchannels, sampwidth, framerate, 43 | nframes, comptype, compname) = inwav.getparams () 44 | 45 | if startTime > 0.0: 46 | inwav.setpos(int(startTime*float(framerate*nchannels))/nchannels) 47 | 48 | if endTime: 49 | nrdframes = int((endTime-startTime)*float(framerate*nchannels))/nchannels 50 | else: 51 | nrdframes = nframes-inwav.tell() 52 | 53 | outwav.setnchannels(nchannels) 54 | outwav.setsampwidth(sampwidth) 55 | outwav.setframerate(framerate) 56 | 57 | for ii in range(nrdframes): 58 | frames = inwav.readframes (nchannels) 59 | outwav.writeframes(frames) 60 | 61 | outwav.close() 62 | inwav.close() 63 | 64 | def wavSave (data, framerate, fname, sampwidth=2): 65 | wav = wave.open (fname, "w") 66 | wav.setframerate(framerate) 67 | wav.setsampwidth(sampwidth) 68 | if hasattr(data[0], '__len__'): 69 | nchan = len(data[0]) 70 | values = [struct.pack('h',int(d)) for dd in data for d in dd ] 71 | else: 72 | nchan = 1 73 | values = [struct.pack('h',int(d)) for d in data] 74 | wav.setnchannels(nchan) 75 | 76 | valstr = ''.join(values) 77 | wav.writeframes (valstr) 78 | 79 | wav.close 80 | 81 | 82 | def play(w,sr): 83 | if type(w) is not ndarray: 84 | w=array(w) 85 | try: 86 | nchan = w.shape[1] 87 | except IndexError: 88 | nchan=1 89 | if w.shape[0] <20000: 90 | w=pad(w,pad_width=(4000,4000),mode='constant', constant_values=(0,0)) 91 | 92 | w16 = w.astype('int16').tobytes() 93 | # Open stream with correct settings 94 | 95 | import pyaudio 96 | pya = pyaudio.PyAudio() 97 | stream = pya.open(format=pya.get_format_from_width(width=2), channels=nchan, rate=sr, output=True) 98 | # Assuming you have a numpy array called samples 99 | stream.write(w16) 100 | stream.stop_stream() 101 | stream.close() 102 | 103 | pya.terminate() 104 | 105 | 106 | -------------------------------------------------------------------------------- /pypevoc/FFTFilters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Filters.py 5 | # 6 | # Copyright 2017 Andre Almeida 7 | # 8 | # This program is free software; you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation; either version 2 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with this program; if not, write to the Free Software 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 21 | # MA 02110-1301, USA. 22 | # 23 | # 24 | 25 | import numpy as np 26 | 27 | class BandError(Exception): 28 | """Exception raised for errors in band definition. 29 | 30 | Attributes: 31 | expression -- input expression in which the error occurred 32 | message -- explanation of the error 33 | """ 34 | 35 | def __init__(self, message): 36 | self.message = message 37 | Exception.__init__(self, message) 38 | 39 | 40 | def preemph(w,hpFreq=0,Fs=1): 41 | ''' 42 | Applies a pre-emphasis filter to the signal w 43 | amplifies the signal with a +6dB/octave 44 | filter above the cut-on frequency 45 | 46 | Arugments: 47 | * hpFreq = cut-on frequency 48 | * Fs = sampling frequency 49 | ''' 50 | 51 | if hpFreq>0: 52 | a=np.exp(-2.*np.pi*hpFreq/float(Fs)); 53 | #preEmphA = [a,1-a]; 54 | #wo = sig.lfilter([1],preEmphA,w); 55 | wo=w.astype('f') 56 | wo[:-1] -= wo[1:]*a 57 | else: 58 | wo=w 59 | return wo 60 | 61 | def _f_to_mel_py(freq): 62 | # mel = 1125 * ln(1+f/700) 63 | return 1125. + np.log(1.+freq/700.) 64 | 65 | def _mel_to_f_py(mel): 66 | return 700.*(np.exp(mel-1125.)-1) 67 | 68 | f_to_mel = np.vectorize(_f_to_mel_py) 69 | mel_to_f = np.vectorize(_mel_to_f_py) 70 | 71 | 72 | def peaks(x): 73 | ''' 74 | Return indexes of all local maxima in x 75 | ''' 76 | pkmask = np.logical_and(x[:-2]=fst, 223 | fvec<=fend) 224 | if fend!=fst: 225 | filter_mask[idx]=(fvec[idx]-fst)/(fend-fst)*(g[1]-g[0])+g[0] 226 | else: 227 | raise BandError('Band is too narrow: try increasing nwind') 228 | 229 | 230 | return filter_mask 231 | 232 | 233 | 234 | 235 | class FilterBank(object): 236 | ''' 237 | FilterBank object: Defines a FFT-based filter bank 238 | ''' 239 | label = [] 240 | fvec = np.zeros(0) 241 | fb=np.zeros((0,0)) 242 | sr=1. 243 | 244 | def __init__(self, fspec_list=None, sr=1.0, 245 | nwind=256, windfunc=np.hanning, 246 | nhop=None, align_edges=True): 247 | ''' 248 | Create a filter bank from a list of filter specification 249 | objects PiecewiseFilterSpec 250 | 251 | By default creates a 2-band filterbank 252 | dividing the range [0,sr/2] into two bands 253 | ''' 254 | self.sr = sr 255 | self.wind = windfunc(nwind) 256 | self.nwind = int(nwind) 257 | if nhop: 258 | self.hop = nhop 259 | else: 260 | self.hop = int(nwind/2) 261 | 262 | self.fvec = np.linspace(0.,sr,nwind) 263 | if not fspec_list: 264 | fc=0.25 265 | fspec_list=[PiecewiseFilterSpec(mode='lowpass',freq=fc,sr=sr), 266 | PiecewiseFilterSpec(mode='hipass',freq=fc,sr=sr)] 267 | 268 | self.fb = np.zeros((len(fspec_list),len(self.fvec))) 269 | self.label=[] 270 | for ii,fspec in enumerate(fspec_list): 271 | self.fb[ii,:]=fspec.apply_to_freq_vector(self.fvec,align_edges=align_edges) 272 | self.label.append(fspec.label) 273 | 274 | def specout(self,w): 275 | ''' 276 | Calculate the output of the filterbank applied to w 277 | ''' 278 | 279 | n=0 280 | bankout = [] 281 | tout=[] 282 | while n1.0: 323 | unit = 'Hz' 324 | else: 325 | unit = '' 326 | 327 | flim = np.sort(flim).astype('f') 328 | for n,cc in enumerate(flim[1:-1]): 329 | bandf = flim[n:n+3] 330 | bandg = np.array([0.0,1.0,0.0]) 331 | lab = '{}{} band ({}-{}{})'.format(cc,unit,flim[n],flim[n+2],unit) 332 | fsl.append(PiecewiseFilterSpec(freq=bandf,gain=bandg,label=lab,sr=sr)) 333 | 334 | super(TriangularFilterBank,self).__init__(fspec_list=fsl,nwind=nwind,sr=sr,nhop=nhop) 335 | 336 | 337 | def nextpow2(x): 338 | return 2**(np.ceil(np.log2(x))) 339 | 340 | 341 | 342 | class MelFilterBank(TriangularFilterBank): 343 | def __init__(self,n=26,fmin=300.,fmax=8000.,twind=.025, sr=44100., thop=.01): 344 | nwind = int(2**np.round(np.log2(twind*sr))) 345 | nhop = int(thop*sr) 346 | melmin = f_to_mel(fmin) 347 | melmax = f_to_mel(fmax) 348 | fc = mel_to_f(np.linspace(melmin,melmax,n+2)) 349 | 350 | super(MelFilterBank,self).__init__(flim=fc,nwind=nwind,sr=sr,nhop=nhop) 351 | 352 | def mfcc(self,w,mode='DCT2'): 353 | spec, tspec = self.specout(w) 354 | logs = np.log(spec) 355 | if mode[:3]=='DCT': 356 | dctype = int(mode[3]) 357 | from scipy.fftpack import dct 358 | return dct(logs,type=dctype), tspec 359 | elif mode=='IFFT': 360 | return np.fft.ifft(logs), tspec 361 | else: 362 | raise NotImplementedError 363 | 364 | def mfcc_and_mel(self,w,mode='DCT2'): 365 | spec, tspec = self.specout(w) 366 | logs = np.log(spec) 367 | if mode[:3]=='DCT': 368 | dctype = int(mode[3]) 369 | from scipy.fftpack import dct 370 | return dct(logs,type=dctype), spec, tspec 371 | elif mode=='IFFT': 372 | return np.fft.ifft(logs), spec, tspec 373 | else: 374 | raise NotImplementedError 375 | 376 | def fft_filter(x, bands, gains): 377 | ''' 378 | Filter signal x using FFT and IFFT 379 | * x input signal 380 | * bands: list of start and stop frequencies of each band 381 | * gains: start and stop gains in each band 382 | 383 | Example: 384 | 385 | y = FFTfilter(x, [(0,0.1),(0.1,1.0)], [(1.,1.),(0.,0.)]) 386 | 387 | filters signal x low pass at 0.1 times the nyquist rate 388 | (sampling rate / 2) 389 | ''' 390 | 391 | xf = np.fft.fft(x) 392 | nyq = len(xf)/2 393 | 394 | ffilter = np.zeros(len(xf)) 395 | for bb, gg in zip(bands,gains): 396 | fmin = int(bb[0]*nyq) 397 | fmax = int(bb[1]*nyq) 398 | ffilter[fmin:fmax]=np.linspace(gg[0],gg[1],fmax-fmin) 399 | if fmin>0: 400 | ffilter[-fmax+1:-fmin+1]=np.linspace(gg[1],gg[0],fmax-fmin) 401 | else: 402 | ffilter[-fmax+1:]=np.linspace(gg[1],gg[0],fmax-fmin-1) 403 | print('{}-{} : gains [{}, {}]'.format(fmin,fmax,gg[0],gg[1])) 404 | 405 | xf_filt = xf*ffilter 406 | return np.fft.ifft(xf_filt) 407 | 408 | -------------------------------------------------------------------------------- /pypevoc/Heterodyne.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Heterodyne.py 5 | # 6 | # Synchronous decomposition of periodic signals 7 | # 8 | # Copyright 2018 Andre Almeida 9 | # 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation; either version 2 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program; if not, write to the Free Software 22 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 23 | # MA 02110-1301, USA. 24 | # 25 | # 26 | 27 | import sys 28 | import collections 29 | import numpy as np 30 | 31 | from .PVAnalysis import SinSum 32 | 33 | 34 | 35 | def heterodyne(x, hetsig, wind=None, hop=None): 36 | """ 37 | Heterodyner: calculates the complex amplitude of a sine wave centered at f 38 | 39 | Arguments: 40 | x: signal 41 | f: normalised frequency vector of same length as x 42 | (frequency/sr) 43 | wind: windowing function (array, defaults to 256 point rectangular) 44 | nhop: samples between windows (defaults to 1/2 the window length) 45 | """ 46 | 47 | ret = [] 48 | icent = [] 49 | if wind is None: 50 | wind = np.ones(2**8) 51 | wlen = len(wind) 52 | wnorm = np.sum(wind) 53 | #fvec[np.logical_not(np.isfinite(fvec))]=0 54 | xf = x*hetsig 55 | for ii in range(0,len(x)-wlen,hop): 56 | xx = xf[ii:ii+wlen] 57 | xw = xx*wind 58 | ret.append(np.sum(xw)/wnorm) 59 | icent.append(ii+wlen//2) 60 | return np.array(ret)*2,np.array(icent) 61 | 62 | 63 | def heterodyne_corr(x,sr,f,maxwind=2**14,nhop=2**10,nper=3,dc_cut=50, release_partials=True): 64 | """ 65 | Heterodyne from sequential residuals 66 | 67 | Performs partial heterodyne detection on a signal x for partials 68 | at frequency f. 69 | 70 | release_partials: heterodynes are performed sequenctially on the 71 | residuals from previous partial detections 72 | """ 73 | xx = x.copy() 74 | t = np.arange(len(x))/sr 75 | 76 | nharm = len(f) 77 | ret = [] 78 | part = np.zeros((len(x),nharm)) 79 | for ii,ff in enumerate(f): 80 | if ff==0.: 81 | nwind=maxwind 82 | foth = np.delete(f,ii) 83 | nwind = (sr/np.min(np.abs(foth-ff))*nper) 84 | print(nwind) 85 | hetsig = np.exp(1j*2*np.pi*ff*t) 86 | if release_partials: 87 | cc,ih = heterodyne(xx,hetsig,wind=np.hanning(nwind),hop=nhop) 88 | else: 89 | cc,ih = heterodyne(x,hetsig,wind=np.hanning(nwind),hop=nhop) 90 | if ff==0.: 91 | cc/=2 92 | th=ih/sr 93 | ret.append(ts.SampledTimeSeries(cc,th,label='%.2f'%ff)) 94 | ret[-1].f = ff 95 | hf = np.interp(t,th,cc) 96 | xp = np.real(np.conjugate(hf)*hetsig) 97 | xx-=xp 98 | part[:,ii]=xp 99 | return ret,xx,part 100 | 101 | class Heterodyne(object): 102 | """ 103 | Perform a sine sum decomposition based on a f0 track 104 | """ 105 | 106 | def __init__(self, x, sr=1.0, nwind=1024, wfun=np.hanning, nhop=None): 107 | """ 108 | Create a heterodyner object, storing the signal basic information 109 | of the analysis 110 | 111 | Arguments: 112 | * x: signal 113 | * sr: sampling rate 114 | * nwind: default window length 115 | * wfun: default windowing function 116 | * ampthr: amplitude threshold for filtering in resynthesis 117 | """ 118 | self.x = x 119 | self.sr = sr 120 | self.nwind = nwind 121 | self.nhop = nhop 122 | self.nsamp = len(x) 123 | self.wfun = wfun 124 | self.ampthr = ampthr 125 | 126 | self._fix_params() 127 | 128 | def add_partial(self, f, tf=None, fidx=None, 129 | wind=None, nhop=None, 130 | t=None, idx=None): 131 | """ 132 | set the starting indices for the windowed analysis 133 | """ 134 | self.idx = idx 135 | 136 | 137 | def harmonic_times(self, n=1): 138 | if self.variable_resolution: 139 | return self.th[n-1] 140 | else: 141 | return self.th 142 | 143 | def harmonic_amplitudes(self, n=1): 144 | if self.variable_resolution: 145 | return self.ah[n-1] 146 | else: 147 | return self.ah[:,n-1] 148 | 149 | def harmonic_frequencies(self, n=1): 150 | if self.variable_resolution: 151 | return self.f[self.idxh[n-1]]*n 152 | else: 153 | return self.f[self.idxh]*n 154 | 155 | 156 | def heterodyner_signal(self, n=1): 157 | """ 158 | return a reference variable-frequency signal 159 | with frequency equal to n* the harmonic of the frequency vector 160 | """ 161 | omega = self.fvec*2*np.pi*n 162 | phvec = np.cumsum(omega) 163 | return np.exp(1j*phvec) 164 | 165 | def set_fvec(self, f0c, th=None, adjust=False): 166 | tvec = np.arange(len(self.x))/self.sr 167 | 168 | 169 | if th is not None: 170 | fvec = np.interp(tvec, th, f0c) 171 | else: 172 | fvec = f0c 173 | 174 | # fix for single-frequency values 175 | if not isinstance(fvec, collections.abc.Sequence): 176 | fvec = fvec*np.ones(self.nsamp) 177 | 178 | self.fvec = fvec/self.sr 179 | self.fmin = max(self.fmin,min(fvec)) 180 | 181 | if adjust: 182 | f0c, th = self.calc_adjusted_freq(fvec) 183 | self.fvec = np.interp(tvec, th, f0c) 184 | if self.variable_resolution: 185 | self.th = [[] for ii in range(self.nharm)] 186 | self.ah = [[] for ii in range(self.nharm)] 187 | self.idxh = [[] for ii in range(self.nharm)] 188 | else: 189 | self.th = np.arange(self.nwind//2, self.nsamp-self.nwind//2, self.nhop)/self.sr 190 | self.idxh = np.arange(self.nwind//2, self.nsamp-self.nwind//2, 191 | self.nhop).astype('i') 192 | self.ah = np.zeros((self.th.shape[0],self.nharm),dtype='complex') 193 | 194 | 195 | def extract_partial(self, n): 196 | """ 197 | calculates complex amplitudes of partials 198 | """ 199 | x = self.x 200 | hetsig = self.heterodyner_signal(n=n) 201 | if self.variable_resolution: 202 | wind = self.wfun(int(self.nper/self.fmin*self.sr/n)) 203 | else: 204 | wind = self.wind 205 | h,th = heterodyne(x, hetsig, wind=wind, hop=self.nhop) 206 | return h, th 207 | 208 | def filter_harmonic(self, n): 209 | """ 210 | mute intervals not to be taken into account in resynthesis 211 | """ 212 | tvec = np.arange(self.nsamp)/self.sr 213 | hf = np.interp(tvec, self.harmonic_times(n), self.harmonic_amplitudes(n)) 214 | idx = (self.fself.fmax) | (self.f*n>self.sr/2.2) 215 | rmsmin = np.max(np.abs(hf))*self.ampthr 216 | idx = idx | (np.abs(hf)self.fmax) | (self.f0*n>self.sr/2.2) 480 | rmsmin = np.max(np.abs(hf))*self.ampthr 481 | idx = idx | (np.abs(hf) 10 | # 11 | # This program is free software; you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation; either version 2 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program; if not, write to the Free Software 23 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 24 | # MA 02110-1301, USA. 25 | # 26 | # 27 | 28 | """ Defines a class for detecting peaks in a numpy array""" 29 | 30 | import numpy as np 31 | 32 | 33 | class PeakFinder(object): 34 | 35 | def __init__(self, y, x=None, npeaks=None, minrattomax=None, minval=None): 36 | """Creates the peak finder object from a numpy array 37 | 38 | Arguments: 39 | 40 | x: the numpy array in which to find peaks 41 | npeaks: maximum number of peaks to find 42 | 43 | Thresholds: 44 | minrattomax: ratio of minimum to maximum peak amplitude 45 | (has priority over minval if set to other 46 | than None) 47 | minval: an absolute minimum value of peak 48 | """ 49 | 50 | self.y = np.array(np.squeeze(y)) 51 | if x is not None: 52 | self.x = np.array(np.squeeze(x)) 53 | else: 54 | self.x = np.arange(len(self.y)) 55 | self._idx = np.array([]) 56 | self._val = np.array([]) 57 | if minrattomax is None: 58 | self.minamp = minval 59 | else: 60 | self.minamp = self.y.max()*minrattomax 61 | 62 | self.sorttype = 0 63 | 64 | if not npeaks: 65 | self.npeaks = len(self.y) 66 | else: 67 | self.npeaks = npeaks 68 | 69 | if not self.minamp: 70 | self.minamp = np.min(self.y) 71 | 72 | self.findpos() 73 | #self.sort_pos() 74 | # self.boundaries() 75 | 76 | @property 77 | def pos(self): 78 | return self._fine_pos[self._keep] 79 | 80 | @property 81 | def rough_pos(self): 82 | return self.x[self._idx[self._keep]] 83 | 84 | @property 85 | def all_pos(self): 86 | return self.x[self._idx] 87 | 88 | @property 89 | def val(self): 90 | return self._fine_val[self._keep] 91 | 92 | @property 93 | def all_val(self): 94 | return self._val 95 | 96 | @property 97 | def rough_val(self): 98 | return self._val[self._keep] 99 | 100 | @property 101 | def bounds(self): 102 | b = np.array(self._bounds) 103 | return self.x[b[self._keep,:]] 104 | 105 | @property 106 | def areas(self): 107 | return self._areas[self._keep] 108 | 109 | @property 110 | def prominence(self): 111 | return self._prominence[self._keep] 112 | 113 | def filter_by_salience(self, rad=1, sal=0): 114 | ''' Filters the peaks by salience. 115 | Any peak that is lower than the neighbouring 'rad' points 116 | is filtered out 117 | 118 | optional: 119 | * sal: salience (peaks must be at leas sal above other 120 | values in a radius rad) 121 | ''' 122 | 123 | npks = len(self._idx) 124 | # keep = np.ones(npks).astype('bool') 125 | 126 | for idx in range(npks): 127 | thispos = self._idx[idx] 128 | thisval = self._val[idx] 129 | wmin = max(thispos-rad, 1) 130 | wmax = min(thispos + rad, len(self.y)) 131 | w = self.y[wmin:wmax + 1] 132 | 133 | if any(w+sal > thisval): 134 | self._keep[idx] = False 135 | 136 | # self.keep = np.logical_and(self.keep, keep) 137 | 138 | def filter_by_prominence(self, prom=0.0, all=False): 139 | ''' 140 | Filter by peak prominence 141 | 142 | prominence at leas prom above relative minimum 143 | 144 | optional: 145 | * all: include peaks that were filtered out before 146 | ''' 147 | try: 148 | prominence = self._prominence 149 | except AttributeError: 150 | self.find_prominence() 151 | prominence = self._prominence 152 | 153 | self._keep[prominence= y[2:]).astype(int) 167 | pkmskamp = peakmask*(y[1:-1]-miny) 168 | # print(pkmskamp) 169 | 170 | pos = [] 171 | 172 | m = pkmskamp.max() 173 | b = pkmskamp.argmax() 174 | th = self.minamp-miny 175 | n = 1 176 | 177 | if m > th: 178 | pos.append(b + 1) 179 | pkmskamp[b] = th-1 180 | 181 | while m > th and n < self.npeaks: 182 | m = pkmskamp.max() 183 | b = pkmskamp.argmax() 184 | if m > th: 185 | pos.append(b + 1) 186 | pkmskamp[b] = th-1 187 | n += 1 188 | 189 | self._idx = np.array(np.sort(pos)) 190 | self._val = np.array([y[i] for i in self._idx]) 191 | self._keep = np.ones(len(self._idx),dtype='bool') 192 | self._order = np.arange(len(self._idx)) 193 | self._fine_pos = np.array([self.x[ii] for ii in self._idx]) 194 | self._fine_val = self._val 195 | 196 | def find_prominence(self, side_fun=np.min, all=False): 197 | if not all: 198 | pos = self._idx[self._keep] 199 | val = self._val[self._keep] 200 | else: 201 | pos = self.all_pos 202 | val = self.all_val 203 | lbound = np.concatenate(([0], pos)) 204 | rbound = np.concatenate((pos+1, [len(self.y)])) 205 | sal_l = [] 206 | sal_r = [] 207 | for lb, rb, v in zip(lbound[:-1], rbound[:-1], val): 208 | sal_l.append(v - np.min(self.y[lb:rb])) 209 | for lb, rb, v in zip(lbound[1:], rbound[1:], val): 210 | sal_r.append(v - np.min(self.y[lb:rb])) 211 | 212 | sal_l = np.array(sal_l) 213 | sal_r = np.array(sal_r) 214 | prominence = side_fun(np.array([sal_l,sal_r]),axis=0) 215 | 216 | if not all: 217 | self.prominence = prominence 218 | else: 219 | self.prominence = np.zeros(len(self.pos)) 220 | self.prominence[self._keep] = prominence 221 | return self.prominence[self._keep] 222 | 223 | def plot(self, logarithmic=False): 224 | """Plot a graphical representation of the peaks 225 | 226 | Arguments: 227 | (none) 228 | """ 229 | 230 | import pylab as pl 231 | 232 | pl.figure() 233 | pl.plot(self.x, self.y) 234 | pl.plot(self.all_pos, 235 | self.all_val, 'om') 236 | pl.plot(self.rough_pos, self.rough_val, 'og') 237 | if hasattr(self, 'bounds'): 238 | lmins = np.unique(self.bounds.flatten()) 239 | lminvals = self.y[lmins] 240 | pl.plot(lmins, lminvals, 'or') 241 | pl.plot(self.pos, self.val, 'dg') 242 | if logarithmic: 243 | pl.gca().set_yscale('log') 244 | 245 | def sort_ampl(self): 246 | """Sort the found peaks in decreasing order of amplitude 247 | 248 | Arguments: 249 | (none) 250 | """ 251 | if len(self.pos) > 1: 252 | idx = np.argsort(self._val)[::-1] 253 | self._order = idx 254 | self.sorttype = 2 255 | 256 | def sort_pos(self): 257 | """Sort the found peaks in order of position 258 | 259 | Arguments: 260 | (none) 261 | """ 262 | 263 | if len(self._idx) > 1: 264 | idx = np.argsort(self._idx) 265 | 266 | self._order = idx 267 | self.sorttype = 1 268 | 269 | def find_boundaries(self, all=False): 270 | """Find the local minima on either side of each peak 271 | 272 | Arguments: 273 | (none) 274 | """ 275 | try: 276 | prevb = np.argmin(self.y[0:self._idx[0]]) 277 | except IndexError: 278 | prevb = 0 279 | 280 | bounds = [] 281 | 282 | if not all: 283 | pos = self._idx[self._keep] 284 | else: 285 | pos = self._idx 286 | 287 | npks = len(pos) 288 | 289 | for i in range(npks): 290 | thismax = pos[i] 291 | if i < npks-1: 292 | nextmax = pos[i + 1] 293 | relb = np.argmin(self.y[thismax:nextmax]) 294 | nextb = relb + thismax 295 | else: 296 | nextmax = len(self.y)-1 297 | nextb = len(self.y)-1 298 | 299 | bounds.append([prevb, nextb]) 300 | prevb = nextb 301 | 302 | self._bounds = np.array(bounds) 303 | 304 | def refine_opt(self, idx, yvec=None, rad=2): 305 | """use fit to quadratic to locate a fine maximum of 306 | the peak position and value 307 | 308 | Arguments: 309 | idx: index of the peak to interpolate 310 | """ 311 | 312 | pos = self.pos[idx] 313 | if yvec is not None: 314 | y = yvec 315 | else: 316 | y = self.y 317 | 318 | # val = self.val[idx] 319 | imin = max(1, pos-rad) 320 | imax = min(pos + rad + 1, len(y)) 321 | sur = y[imin:imax] 322 | ifit = np.arange(imin-pos, imax-pos) 323 | 324 | pp = np.polyfit(ifit, sur, 2) 325 | lpos = - pp[1]/2.0/pp[0] 326 | fpos = float(pos) + lpos 327 | fval = pp[0]*lpos*lpos + pp[1]*lpos + pp[2] 328 | 329 | return fpos, fval.tolist() 330 | 331 | def refine(self, idx, fun=None, yvec=None): 332 | """use quadratic interpolation to locate a fine maximum of 333 | the peak position and value 334 | 335 | Arguments: 336 | idx: index of the peak to interpolate 337 | """ 338 | 339 | pos = self._idx[idx] 340 | if yvec is not None: 341 | y = yvec 342 | else: 343 | y = self.y 344 | 345 | if fun: 346 | from scipy.optimize import broyden1 as opt 347 | # val = fun(self.val[idx]) 348 | sur = fun(y[pos-1:pos+2]) 349 | else: 350 | # val = self.val[idx] 351 | sur = y[pos-1:pos+2] 352 | 353 | if sur[1] > sur[0] and sur[1] >= sur[2]: 354 | c = sur[1] 355 | b = (sur[2] - sur[0])/2 356 | a = (sur[2] + sur[0])/2 - c 357 | 358 | lpos = - b/2/a 359 | fpos = float(pos) + lpos 360 | if fun: 361 | ival = a*lpos*lpos + b*lpos + c 362 | # print "rpos = %d; rf(val) = %f; f(val) = %f; dpos = %f;"%(pos, sur[1], ival, lpos) 363 | fval = opt(lambda x: fun(x)-ival, self.val[idx]/2) 364 | else: 365 | fval = a*lpos*lpos + b*lpos + c 366 | # print "rpos = %d; rval = %f; val = %f; dpos = %f; pos = %f"%(pos, sur[1], fval, lpos, fpos) 367 | 368 | else: 369 | fpos = pos 370 | fval = sur[1] 371 | 372 | return np.interp(fpos, np.arange(len(self.x)), self.x), fval.tolist() 373 | 374 | def refine_all(self, logarithmic=False, rad=1): 375 | """use quadratic interpolation to refine all peaks 376 | 377 | Arguments: 378 | idx: index of the peak to interpolate 379 | """ 380 | 381 | if logarithmic: 382 | y = np.log10(self.y) 383 | else: 384 | y = self.y 385 | 386 | # rpos = self.pos 387 | # rval = self.val 388 | self._fine_pos = np.zeros(self._idx.shape) 389 | self._fine_val = np.zeros(self._idx.shape) 390 | 391 | for i in range(len(self._idx)): 392 | if logarithmic: 393 | if rad > 1: 394 | fpos, fval = self.refine_opt(i, yvec=y, rad=rad) 395 | else: 396 | fpos, fval = self.refine(i, yvec=y) 397 | else: 398 | if rad > 1: 399 | fpos, fval = self.refine_opt(i, rad=rad) 400 | else: 401 | fpos, fval = self.refine(i) 402 | self._fine_pos[i] = fpos 403 | if logarithmic: 404 | self._fine_val[i] = 10**fval 405 | else: 406 | self._fine_val[i] = fval 407 | 408 | def calc_individual_area(self, idx, funct=None, max_rad=None): 409 | lims = self._bounds[idx] 410 | if funct is None: 411 | return sum(self.y[lims[0]:lims[-1]]) 412 | else: 413 | return sum(funct(self.y[lims[0]:lims[-1]])) 414 | 415 | def get_areas(self, funct=None, max_rad=None): 416 | if not hasattr(self, '_bounds'): 417 | self.find_boundaries() 418 | 419 | areas = [] 420 | for idx in range(len(self._idx)): 421 | areas.append(self.calc_individual_area(idx, funct=funct)) 422 | 423 | self._areas = np.array(areas) 424 | 425 | return self._areas[self._keep] 426 | 427 | def get_pos_val(self, rough=False): 428 | """return a vector with peak position in first column 429 | and value in second column 430 | 431 | Arguments: 432 | rough: do not return the refined position 433 | """ 434 | 435 | rvec = np.array(zip(self.pos, self.val)) 436 | 437 | return rvec 438 | 439 | def to_dict(self): 440 | """ 441 | Return a list of dictionary with peak characteristics 442 | """ 443 | ret = [] 444 | for ii, (pos, val) in enumerate(zip(self.pos,self.val)): 445 | thisd = {'pos': pos, 446 | 'val': val} 447 | try: 448 | thisd['sal'] = self.prominence[self._keep][ii] 449 | except AttributeError: 450 | pass 451 | 452 | try: 453 | thisd['l_bound'] = self.bounds[ii,0] 454 | thisd['r_bound'] = self.bounds[ii,1] 455 | except AttributeError: 456 | pass 457 | 458 | try: 459 | thisd['area'] = self.areas[ii] 460 | except AttributeError: 461 | pass 462 | 463 | ret.append(thisd) 464 | 465 | return ret 466 | 467 | def to_data_frame(self): 468 | """ 469 | Return a pandas dataframe with peak information 470 | """ 471 | import pandas 472 | return pandas.DataFrame(self.to_dict()) 473 | 474 | # backwards compat 475 | def boundaries(self): 476 | try: 477 | self._bounds 478 | except AttributeError: 479 | self.find_boundaries() 480 | b = np.array(self._bounds) 481 | try: 482 | return self.x[b[self._keep, :]] 483 | except IndexError: 484 | return np.array([]) 485 | 486 | def get_pos(self): 487 | return np.array(self.pos) 488 | 489 | 490 | -------------------------------------------------------------------------------- /pypevoc/Periodicity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Periodicity.py 5 | # 6 | # Utilities for frequency and periodicity estimation 7 | # * Fundamental frequency estimator 8 | # * Tonal character 9 | # 10 | # 11 | # Copyright 2014 Andre Almeida 12 | # 13 | # This program is free software; you can redistribute it and/or modify 14 | # it under the terms of the GNU General Public License as published by 15 | # the Free Software Foundation; either version 2 of the License, or 16 | # (at your option) any later version. 17 | # 18 | # This program is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | # GNU General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU General Public License 24 | # along with this program; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 26 | # MA 02110-1301, USA. 27 | # 28 | # 29 | 30 | import sys 31 | import numpy as np 32 | import pylab as pl 33 | from matplotlib.colors import hsv_to_rgb 34 | from .PeakFinder import PeakFinder as pf 35 | from .ProgressDisplay import Progress 36 | #from AMDF import amdf 37 | 38 | def amdf(x, mindelay=0, maxdelay=None): 39 | nx = len(x) 40 | if maxdelay is None: 41 | maxdelay = nx 42 | 43 | y = np.zeros(nx) 44 | for i in range(mindelay, maxdelay): 45 | n = nx - i 46 | y[i] = (np.abs(x[0:nx-i]-x[i:])).sum()/n 47 | 48 | return y 49 | 50 | # I will try to update this object so that data required for 51 | # the initialisation of every instance stays in the caller. 52 | # Thee caller passes itself as argument to the callee 53 | 54 | 55 | class Periodicity(object): 56 | """Single period object, including multiple periodicity candidates 57 | """ 58 | def __init__(self, parent, index=0): 59 | """Calculate the periodicity estimation for a window 60 | of a time signal 61 | 62 | Arguments: 63 | parent: parent object contaigning entire signal 64 | idx: index of local peridoicity calulation 65 | """ 66 | 67 | self.parent = parent 68 | self.nwind = parent.nwind 69 | self.wnorm = parent.wnorm 70 | self.wind = parent.wind 71 | self.sr = parent.sr 72 | 73 | self.mindelay = parent.mindelay 74 | if parent.maxdelay is None: 75 | self.maxdelay = int(round(self.nwind/2)) 76 | else: 77 | self.maxdelay = int(parent.maxdelay) 78 | 79 | self.method = parent.method 80 | self.threshold = parent.threshold 81 | self.vthresh = parent.vthresh 82 | self.ncand = parent.ncand 83 | self.fftthresh = parent.fftthresh 84 | 85 | # Arrays with probable candidate periodicity and corresponding 86 | # strength 87 | self.cand_period = np.array([]) 88 | self.cand_strength = np.array([]) 89 | # Index of preferred candidate 90 | self.preferred = 0 91 | 92 | self.cand_method = parent.cand_method 93 | self.index=index 94 | 95 | self._calc() 96 | 97 | def _calc(self): 98 | """Calculate the periodicity candidates 99 | 100 | Arguments: 101 | xw: the windowed portion of time signal where periodicity 102 | is to be estimated 103 | """ 104 | nwleft = int(np.floor(self.nwind/2)) 105 | nwright = int(self.nwind - nwleft) 106 | idx = int(np.round(self.index)) 107 | ist = idx - nwleft 108 | iend = idx + nwright 109 | 110 | xs = self.parent.x[ist:iend] 111 | xw = (xs-np.mean(xs)) * self.wind 112 | 113 | nwind = self.nwind 114 | 115 | # unvoiced 116 | pkpos = np.array([]) 117 | pkstr = np.array([]) 118 | 119 | peaks = None 120 | 121 | try: 122 | if self.method is 'amdf': 123 | xc = amdf(xw) 124 | 125 | maxxc = max(xc[nwind-1-self.maxdelay:nwind-1+self.maxdelay]) 126 | xcn = (maxxc-xc)/maxxc 127 | imin = self.mindelay 128 | xcpos = xcn[imin:self.maxdelay] 129 | xcth = self.threshold 130 | 131 | elif self.method is 'xcorr': 132 | 133 | xc = np.correlate(xw, xw, "full") / self.wnorm 134 | 135 | negvals = np.flatnonzero(xc[nwind-1:] < 0) 136 | if len(negvals) > 0: 137 | firstneg = np.min(negvals) 138 | else: 139 | firstneg = self.mindelay 140 | imin = max(firstneg, self.mindelay) 141 | xcn = xc/max(xc[nwind-1-self.maxdelay:nwind-1+self.maxdelay]) 142 | xcpos = xcn[nwind-1+imin:nwind-1+self.maxdelay] 143 | 144 | xcth = self.threshold 145 | 146 | # print "In xcorr. max %f, thr %f"%(max(xcpos),xcth) 147 | 148 | if len(xcpos) > 0 and max(xcpos) > self.vthresh: 149 | # this is equivlent to finding minima 150 | # below the absolute minimum * threshold 151 | peaks = pf(xcpos, minval=xcth, 152 | npeaks=self.ncand) 153 | 154 | peaks.refine_all() 155 | # peaks.plot() 156 | 157 | pkpos = peaks.pos + imin 158 | pkstr = peaks.val 159 | 160 | # keep = pkpos 0: 168 | self.cand_period = pkpos 169 | self.cand_strength = pkstr 170 | 171 | if self.cand_method == 'fft': 172 | xf = np.fft.fft(xw) 173 | fftpeaks = pf(np.abs(xf[0:int(self.nwind/2)]), 174 | npeaks=self.ncand) 175 | # periodicity corresponding to fft peaks: 176 | fpos = fftpeaks.pos 177 | fval = fftpeaks.val 178 | fposkeep = fpos[fval > np.max(fval*self.fftthresh)] 179 | fftpkpos = self.nwind / fposkeep 180 | 181 | # minimum distance between correlation candidates 182 | # and fft peaks 183 | perdist = [np.min(np.abs(fftpkpos-thispos)) 184 | for thispos in pkpos] 185 | try: 186 | self.preferred = np.argmin(perdist) 187 | except ValueError: 188 | self.preferred = 0 189 | # print (fftpkpos) 190 | # print (pkpos) 191 | elif self.cand_method == 'min': 192 | self.preferred = np.argmin(pkpos) 193 | elif self.cand_method == 'similar': 194 | self.preferred = np.argmax(pkstr) 195 | else: 196 | self.preferred = 0 197 | # self.cand_period = np.array([np.nan]) 198 | # self.cand_strength = np.array([np.nan]) 199 | 200 | return xcn 201 | 202 | def plot_similarity(self, ax=None): 203 | 204 | xc = self._calc() 205 | 206 | if not ax: 207 | fig, ax = pl.subplots(1) 208 | ln = ax.plot(np.arange(len(xc))-self.nwind+1, xc) 209 | ax.hold('on') 210 | ax.plot(self.cand_period, self.cand_strength, 'o', 211 | color=ln[0].get_color()) 212 | 213 | def set_time_properties(self, index): 214 | """Set the sample and time value of this periodicity estimation 215 | 216 | Arguments: 217 | index: sample index 218 | """ 219 | 220 | self.index = float(index) 221 | self.time = float(index)/self.sr 222 | 223 | def sort_strength(self): 224 | """Sort candidates by periodicity strength 225 | 226 | Arguments: (None) 227 | """ 228 | 229 | idx = np.argsort(self.cand_strength)[::-1] 230 | self.cand_period = self.cand_period[idx] 231 | self.cand_strength = self.cand_strength[idx] 232 | pref = np.flatnonzero(idx == self.preferred) 233 | if len(pref) > 0: 234 | self.preferred = pref[0] 235 | else: 236 | self.preferred = [] 237 | 238 | def get_preferred_period(self): 239 | if len(self.cand_period) > 0: 240 | return self.cand_period[self.preferred] 241 | else: 242 | return 0 243 | 244 | def get_preferred_strength(self): 245 | if len(self.cand_period) > 0: 246 | return self.cand_strength[self.preferred] 247 | else: 248 | return 0 249 | 250 | 251 | class PeriodSeries(object): 252 | def __init__(self, x, sr=48000, window=None, hop=None, 253 | threshold = .8, vthresh = .2, 254 | fmin=50, fmax=5000, 255 | ncand=8, method='xcorr', 256 | cand_method='fft', fftthresh=0.1): 257 | """Calculate the average mean difference of x around index 258 | 259 | Arguments: 260 | x: signal 261 | sr: sample rate 262 | window: window around index used for difference calculations 263 | threshold: ratio to lowest minima to keep as peak 264 | vthresh: voicing threshold 265 | fmin: value of minimum possible frequency 266 | fmax: value of maximum possible frequency 267 | ncand: maximum number of period candidates 268 | method: type of correlation correlation / matching to use 269 | 'xcorr' - correlation 270 | 'amdf' - average mean difference function 271 | 'zc' - zero crossing 272 | cand_method: method for candidate selection: 273 | 'fft' - based on an fft of the window 274 | 'min' - minimum periodicity wins 275 | 'similar'- most similar wins 276 | fftthresh: threshold for fft peak selection (default=0.1) 277 | """ 278 | 279 | self.method = method 280 | self.x = x.astype(float) 281 | self.sr = sr 282 | 283 | self.nx = len(x) 284 | 285 | if fmin is None: 286 | maxdelay = None 287 | else: 288 | maxdelay = int(sr/fmin) 289 | 290 | if fmax is None: 291 | mindelay = 2 292 | else: 293 | mindelay = int(sr/fmax) 294 | 295 | if window is None: 296 | if maxdelay is None: 297 | window = self.nx 298 | else: 299 | window = 3*maxdelay 300 | 301 | if not np.iterable(window): 302 | window = np.ones(window) 303 | 304 | self.wind = window 305 | self._calc_window_norm() 306 | 307 | self.nwind = len(window) 308 | # self.windad = amdf(window) 309 | 310 | self.mindelay = mindelay 311 | if maxdelay is None: 312 | self.maxdelay = int(round(self.nwind/2)) 313 | else: 314 | self.maxdelay = maxdelay 315 | 316 | if hop is None: 317 | hop = self.nwind//2 318 | 319 | self.hop = hop 320 | 321 | self.method = method 322 | self.threshold = threshold 323 | self.vthresh = vthresh 324 | self.ncand = ncand 325 | self.cand_method = cand_method 326 | self.fftthresh = fftthresh 327 | 328 | # data storage 329 | self.periods = [] 330 | 331 | # progress indicator 332 | self.progress = Progress(end=self.nx) 333 | 334 | def _calc_window_norm(self): 335 | """Calculate the normalisation function for window 336 | 337 | Arguments: (None) 338 | """ 339 | 340 | if self.method is 'xcorr': 341 | w = self.wind 342 | self.wnorm = np.correlate(w, w, "full") 343 | else: 344 | self.wnorm = 1. 345 | 346 | def per_at_index(self, index): 347 | """Calculate the average mean difference of x around index 348 | 349 | Arguments: 350 | 351 | index: index of x for current amdf 352 | threshold: ratio to lowest minima to keep as peak 353 | """ 354 | 355 | pp = Periodicity(self, index) 356 | pp.set_time_properties(index) 357 | pp.sort_strength() 358 | 359 | # self.periods.append(pp) 360 | return pp 361 | 362 | def calc(self, hop=None, threshold=None): 363 | """Estimate local periodicity in the full time series 364 | 365 | Arguments: 366 | 367 | hop: samples bewteen estimations 368 | threshold: peak threshold for maintaining or rejecting 369 | candidates 370 | """ 371 | 372 | self.periods = [] 373 | if hop is None: 374 | hop = self.hop 375 | 376 | if threshold is not None: 377 | oldthresh = self.threshold 378 | self.threshold = threshold 379 | 380 | idxmax = self.nx - self.nwind 381 | idxvec = np.arange(self.nwind, idxmax, hop) 382 | 383 | sys.stderr.write("Calculating local periodicity... \n") 384 | 385 | for idx in idxvec: 386 | pp = self.per_at_index(idx) 387 | sys.stderr.write("\r{:6.2f}%%".format(idx*100/idxmax)) 388 | sys.stderr.flush() 389 | self.periods.append(pp) 390 | 391 | sys.stderr.write("\ndone\n" ) 392 | 393 | if threshold is not None: 394 | self.threshold = oldthresh 395 | 396 | def calcPeriodByPeriod(self, threshold=None, 397 | tf=None, f=None): 398 | """Estimate local periodicity in the full time series 399 | 400 | Arguments: 401 | 402 | hop: samples bewteen estimations 403 | threshold: peak threshold for maintaining or rejecting 404 | candidates 405 | """ 406 | 407 | self.periods = [] 408 | if threshold is not None: 409 | oldthresh = self.threshold 410 | self.threshold = threshold 411 | 412 | # Max index for starting window 413 | idxmax = self.nx - self.nwind 414 | 415 | sys.stdout.write("Calculating local periodicity... ") 416 | idx = self.nwind 417 | while idx < idxmax: 418 | pp = self.per_at_index(idx) 419 | oldidx = idx 420 | if f is None: 421 | di = pp.get_preferred_period() 422 | else: 423 | thisf = np.interp(pp.time, tf, f) 424 | if len(pp.cand_period)>0 and thisf>0: 425 | imin = np.argmin(np.abs(self.sr/thisf-pp.cand_period)) 426 | pp.preferred = imin 427 | di = pp.cand_period[imin] 428 | else: 429 | di=0 430 | if di: 431 | idx += di 432 | self.periods.append(pp) 433 | else: 434 | idx += self.mindelay 435 | 436 | # sys.stdout.write("\b"*15+"%6d / %6d" % (idx, self.nx)) 437 | # sys.stdout.flush() 438 | self.progress.update(idx) 439 | 440 | self.progress.update(self.nx) 441 | sys.stdout.write("\ndone\n") 442 | 443 | if threshold is not None: 444 | self.threshold = oldthresh 445 | 446 | def plot_candidates(self): 447 | """Plot a representation of candidate periodicity 448 | 449 | Size gives the periodicity strength, 450 | color the order of preference 451 | """ 452 | 453 | fig, ax = pl.subplots(2, sharex=True) 454 | 455 | hues = np.arange(self.ncand)/float(self.ncand) 456 | hsv = np.swapaxes(np.atleast_3d([[hues, np.ones(len(hues)), 457 | np.ones(len(hues))]]), 1, 2) 458 | cols = hsv_to_rgb(hsv).squeeze() 459 | 460 | for per in self.periods: 461 | nc = len(per.cand_period) 462 | 463 | ax[0].scatter(per.time*np.ones(nc), per.cand_period, 464 | s=per.cand_strength*100, 465 | c=cols[0:nc], alpha=.5) 466 | 467 | ax[0].plot(*zip(*[[per.time, float(per.get_preferred_period())] 468 | for per in self.periods]), color='k') 469 | 470 | ax[1].plot(self.get_times(), self.get_strength()) 471 | 472 | def get_f0(self, thresh=0.0): 473 | """Get f0 as a function of time 474 | 475 | thresh: threshod for period strength 476 | """ 477 | 478 | f0 = np.zeros(len(self.periods)) 479 | for ii, per in enumerate(self.periods): 480 | if per.get_preferred_strength() > thresh: 481 | f0[ii] = self.sr/per.get_preferred_period() 482 | else: 483 | f0[ii] = np.nan 484 | return f0 485 | 486 | def get_times(self): 487 | """Get f0 as a function of time 488 | """ 489 | 490 | f0 = np.zeros(len(self.periods)) 491 | for ii, per in enumerate(self.periods): 492 | f0[ii] = per.time 493 | return f0 494 | 495 | def get_strength(self): 496 | """Get f0 strength as a function of time 497 | """ 498 | 499 | ss = np.zeros(len(self.periods)) 500 | for ii, per in enumerate(self.periods): 501 | ss[ii] = per.get_preferred_strength() 502 | return ss 503 | 504 | 505 | class PeriodTimeSeries(PeriodSeries): 506 | pass 507 | 508 | 509 | class PeriodByPeriod(PeriodSeries): 510 | def __init__(self): 511 | super(PeriodByPeriod, self).__init__() 512 | 513 | def import_period_series(self, pts): 514 | """Imports a PeriodTimeSeries object 515 | 516 | :pts: PeriodTimeSeries object with 517 | time and frequency information 518 | :returns: None 519 | 520 | """ 521 | self.f = pts.f 522 | self.t = pts.t 523 | self.sr = pts.sr 524 | 525 | def period_marks_amdf(x, sr=1.0, t0=0.0, tf=[], f=[], window_size=1024, 526 | min_per=0.001): 527 | """add period marks information to file, 528 | based on sample per sample difference between adjacent periods 529 | 530 | :t0: first mark position 531 | :window_size: window to use for comparison between periods 532 | :returns: TODO 533 | 534 | """ 535 | marks_t = [t0] 536 | next_t = t0 537 | this_f0 = np.interp(marks_t[-1], tf, f) 538 | if np.isnan(this_f0): 539 | this_f0 = np.nanmean(f) 540 | period_samp = int(sr/this_f0) 541 | while next_t*sr < len(x) - period_samp - window_size: 542 | if not np.isnan(this_f0): 543 | period_samp = int(sr/this_f0) 544 | source_idx_st = int(next_t*sr) 545 | target_idx_st = source_idx_st + period_samp 546 | source_idx_end = source_idx_st + window_size 547 | target_idx_end = target_idx_st + window_size 548 | x_source = x[source_idx_st:source_idx_end] 549 | x_target = x[target_idx_st:target_idx_end] 550 | xc = amdf(x_source, x_target) 551 | # find max of xc near 0 lag 552 | # (at position window_size-1) 553 | peaks = pf(-xc) 554 | idx_min = np.argmin(np.abs(peaks.pos-window_size+1)) 555 | delay_samp, _ = peaks.refine(idx_min) 556 | # delay_samp = peaks.get_pos()[idx_min] 557 | delay_samp -= window_size-1 558 | # print delay_samp 559 | delay_t = (-delay_samp + period_samp)/sr 560 | if delay_t > min_per: 561 | marks_t.append(next_t+(window_size+period_samp/2)/sr) 562 | next_t += delay_t 563 | else: 564 | next_t += 1/this_f0 565 | 566 | else: 567 | next_t = next_t + delay_t 568 | 569 | this_f0 = np.interp(next_t, tf, f) 570 | return np.array(marks_t) 571 | 572 | 573 | def period_marks_corr(x, sr=1.0, t0=0.0, tf=[], f=[], window_size=1024, 574 | min_per=0.001): 575 | """add period marks information to file, 576 | based on correlation between adjacent periods 577 | 578 | :t0: first mark position 579 | :window_size: window to use for comparison between periods 580 | :returns: TODO 581 | 582 | """ 583 | marks_t = [t0] 584 | next_t = t0 585 | this_f0 = np.interp(marks_t[-1], tf, f) 586 | if np.isnan(this_f0): 587 | this_f0 = np.nanmean(f) 588 | period_samp = int(sr/this_f0) 589 | while next_t*sr < len(x) - period_samp - window_size: 590 | if not np.isnan(this_f0): 591 | period_samp = int(sr/this_f0) 592 | source_idx_st = int(next_t*sr) 593 | target_idx_st = source_idx_st + period_samp 594 | source_idx_end = source_idx_st + window_size 595 | target_idx_end = target_idx_st + window_size 596 | x_source = x[source_idx_st:source_idx_end] 597 | x_target = x[target_idx_st:target_idx_end] 598 | xc = np.correlate(x_source, x_target, "full") 599 | # find max of xc near 0 lag 600 | # (at position window_size-1) 601 | peaks = pf(xc) 602 | idx_min = np.argmin(np.abs(peaks.pos-window_size+1)) 603 | delay_samp, _ = peaks.refine(idx_min) 604 | # delay_samp = peaks.get_pos()[idx_min] 605 | delay_samp -= window_size-1 606 | # print delay_samp 607 | delay_t = (-delay_samp + period_samp)/sr 608 | if delay_t > min_per: 609 | marks_t.append(next_t+(window_size+period_samp/2)/sr) 610 | next_t += delay_t 611 | else: 612 | next_t += 1/this_f0 613 | 614 | else: 615 | next_t = next_t + delay_t 616 | 617 | this_f0 = np.interp(next_t, tf, f) 618 | return np.array(marks_t) 619 | 620 | 621 | def period_marks_peak(x, sr=1.0, tf=None, f=[], fit_points=3): 622 | """calculate period marks for x based on peak 623 | positions of the signal 624 | 625 | :x: signal 626 | :sr: sample rate (defalut 1 sample/sec) 627 | :tf: time at which frequency values are calulated 628 | (defaults to same samples as x) 629 | :f: frequency values 630 | :fit_points: number of points to use for peak fitting 631 | :returns: time markers 632 | """ 633 | 634 | # derivative of x 635 | # dx = np.diff(x) 636 | 637 | # make sure the rate is float 638 | sr = float(sr) 639 | 640 | # build time vector for signal 641 | tx = np.arange(len(x))/(sr) 642 | # interpolate frequency values 643 | if tf is None: 644 | try: 645 | assert(len(f) == len(x)) 646 | except(TypeError): 647 | f = f*np.ones(len(x)) 648 | else: 649 | # f_orig = f 650 | f = np.interp(tx, tf, f) 651 | 652 | real_mask = np.isfinite(f) 653 | idx_0 = np.nonzero(real_mask)[0][0] 654 | period_samp = int(sr/f[idx_0]) 655 | 656 | marks = [] 657 | maxval = [] 658 | 659 | # find the first minimum 660 | idx_start = idx_0 + np.argmin(x[idx_0:idx_0+period_samp]) 661 | while idx_start < len(x): 662 | idx_end = np.min([idx_start + period_samp, len(x)]) 663 | idx_max = np.argmax(x[idx_start:idx_end]) + idx_start 664 | 665 | if fit_points < 3: 666 | t_max = idx_max/sr 667 | # elif fit_points == 3: 668 | # # parabolic interpolation 669 | else: 670 | # parabolic fit 671 | rel_idx_start = int(np.max([0,-fit_points/2])) 672 | rel_idx_end = np.min([rel_idx_start + fit_points, 673 | len(x) - idx_max - 1]) 674 | # dx_fit = dx[idx_max+rel_idx_start:idx_max+rel_idx_end] 675 | # dx_abcissa = np.arange(rel_idx_start, rel_idx_end)+.5 676 | # fit_poly = np.polyfit(dx_abcissa, dx_fit, 1) 677 | # rel_refined_max = -fit_poly[1]/fit_poly[0] 678 | x_fit = x[idx_max+rel_idx_start:idx_max+rel_idx_end+1] 679 | x_abcissa = np.arange(rel_idx_start, rel_idx_end+1) 680 | try: 681 | fit_poly = np.polyfit(x_abcissa, x_fit, 2) 682 | rel_refined_max = -fit_poly[1]/fit_poly[0]/2 683 | except (ValueError, np.RankWarning): 684 | rel_refined_max = fit_points+1 685 | if np.abs(rel_refined_max) <= fit_points: 686 | t_max = (idx_max + rel_refined_max)/sr 687 | v_max = np.polyval(fit_poly, rel_refined_max) 688 | else: 689 | t_max = (idx_max)/sr 690 | v_max = x[idx_max] 691 | 692 | # prepare for next iteration 693 | this_f0 = f[idx_max] 694 | if np.isfinite(this_f0): 695 | period_samp = int(sr/this_f0) 696 | marks.append(t_max) 697 | maxval.append(v_max) 698 | 699 | # otherwise keep the same period 700 | # next starting point 701 | min_search_max = np.min([idx_max+period_samp, len(x)]) 702 | adv = np.argmin(x[idx_max:min_search_max]) 703 | if adv > 0: 704 | idx_start = idx_max + adv 705 | else: 706 | idx_start = idx_max + 1 707 | 708 | return np.array(marks)[:-1], np.array(maxval)[:-1] 709 | 710 | -------------------------------------------------------------------------------- /pypevoc/ProgressDisplay.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # ProgressDisplay.py 5 | # 6 | # An IPython-friendly progress bar 7 | # 8 | # 9 | # Copyright 2014 Andre Almeida 10 | # 11 | # This program is free software; you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation; either version 2 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program; if not, write to the Free Software 23 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 24 | # MA 02110-1301, USA. 25 | # 26 | # 27 | 28 | import sys 29 | 30 | try: 31 | from IPython.core.display import clear_output 32 | have_ipython = True 33 | except ImportError: 34 | have_ipython = False 35 | 36 | try: 37 | from IPython.display import display 38 | from ipywidgets import IntProgress, HTML, VBox 39 | have_ipywidgets=True 40 | except ImportError: 41 | have_ipywidgets=False 42 | 43 | def in_ipynb(): 44 | try: 45 | cfg = get_ipython().config 46 | try: 47 | ipk = cfg['IPKernelApp'] 48 | if len(ipk)==0: 49 | return False 50 | except KeyError: 51 | return False 52 | return True 53 | except NameError: 54 | return False 55 | 56 | 57 | 58 | class Progress(object): 59 | def __init__(self, end=1.): 60 | """ 61 | Creates a progress bar display 62 | """ 63 | self.current_val = 0.0 64 | self.max_val = end 65 | if in_ipynb(): 66 | if have_ipywidgets: 67 | self.label = HTML() 68 | self.progress = IntProgress(min=0,max=100,value=1) 69 | self.progress.bar_style = 'info' 70 | self.progressHTML = VBox([self.label, self.progress]) 71 | display(self.progressHTML) 72 | self.redraw = self._redraw_ipywidgets 73 | self.cleanup = self._cleanup_ipywidgets 74 | else: 75 | self.redraw = self._redraw_ipython 76 | self.cleanup = self._cleanup_ipython 77 | else: 78 | self.redraw = self._redraw_console 79 | self.cleanup = self._cleanup_console 80 | 81 | 82 | def update(self, val): 83 | """ 84 | Update the progress bar value 85 | """ 86 | self.current_val = val 87 | self.redraw() 88 | 89 | def _redraw_ipywidgets(self): 90 | self.label.value = str(self) 91 | self.progress.value = self.current_val/self.max_val*100 92 | 93 | def _redraw_ipython(self): 94 | clear_output() 95 | print(str(self)) 96 | sys.stdout.flush() 97 | 98 | def _redraw_console(self): 99 | print('\r'+str(self),end=" ") 100 | sys.stdout.flush() 101 | 102 | def __str__(self): 103 | pct = self.current_val/self.max_val*100 104 | return '%d / %d (%.2f%%)'%(self.current_val,self.max_val,pct) 105 | 106 | def _cleanup_console(self): 107 | print('\n') 108 | 109 | def _cleanup_ipython(self): 110 | pass 111 | 112 | def _cleanup_ipywidgets(self): 113 | pass 114 | 115 | def finish(self): 116 | self.update(self.max_val) 117 | self.cleanup() 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /pypevoc/SoundUtils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | 5 | def FftFilter(x, bands, gains): 6 | ''' 7 | Filter signal x using FFT and IFFT 8 | * x input signal 9 | * bands: list of start and stop frequencies of each band 10 | * gains: start and stop gains in each band 11 | 12 | Example: 13 | 14 | y = FFTfilter(x, [(0,0.1),(0.1,1.0)], [(1.,1.),(0.,0.)]) 15 | 16 | filters signal x low pass at 0.1 times the nyquist rate 17 | (sampling rate / 2) 18 | ''' 19 | 20 | xf = np.fft.fft(x) 21 | nyq = len(xf)/2 22 | 23 | ffilter = np.zeros(len(xf)) 24 | for bb, gg in zip(bands, gains): 25 | fmin = int(bb[0]*nyq) 26 | fmax = int(bb[1]*nyq) 27 | ffilter[fmin:fmax] = np.linspace(gg[0], gg[1], 28 | fmax-fmin) 29 | if fmin > 0: 30 | ffilter[-fmax+1:-fmin+1] = np.linspace(gg[1], gg[0], 31 | fmax-fmin) 32 | else: 33 | ffilter[-fmax+1:] = np.linspace(gg[1], gg[0], 34 | fmax-fmin-1) 35 | print('{}-{} : gains [{}, {}]'.format(fmin, fmax, 36 | gg[0], gg[1])) 37 | 38 | xf_filt = xf*ffilter 39 | return np.fft.ifft(xf_filt) 40 | 41 | 42 | def FuncWind(func, x, sr=1, nwind=1024, nhop=512, power=1, 43 | windfunc=np.blackman): 44 | ''' 45 | Applies a function window by window to a time series 46 | ''' 47 | 48 | nsam = len(x) 49 | ist = 0 50 | iend = ist+nwind 51 | 52 | t = [] 53 | ret = [] 54 | 55 | wind = windfunc(nwind) 56 | if power > 0: 57 | wsumpow = sum(wind**power) 58 | else: 59 | wsumpow = 1. 60 | 61 | while (iend < nsam): 62 | thisx = x[ist:iend] 63 | xw = thisx*wind 64 | 65 | ret.append(func(xw)/wsumpow) 66 | t.append(float(ist+iend)/2.0/float(sr)) 67 | 68 | ist = ist+nhop 69 | iend = ist+nwind 70 | 71 | return np.array(ret), np.array(t) 72 | 73 | 74 | def RMSWind(x, sr=1, nwind=1024, nhop=512, windfunc=np.blackman): 75 | ''' 76 | Calculates the RMS amplitude amplitude of x, in frames of 77 | length nwind, and in steps of nhop. windfunc is used as 78 | windowing function. 79 | 80 | nwind should be at least 3 periods if the signal is periodic. 81 | ''' 82 | 83 | nsam = len(x) 84 | ist = 0 85 | iend = ist+nwind 86 | 87 | t = [] 88 | ret = [] 89 | 90 | wind = windfunc(nwind) 91 | wsum2 = np.sum(wind**2) 92 | 93 | while (iend < nsam): 94 | thisx = x[ist:iend] 95 | xw = thisx*wind 96 | 97 | ret.append(np.sum(xw*xw/wsum2)) 98 | t.append(float(ist+iend)/2.0/float(sr)) 99 | 100 | ist = ist+nhop 101 | iend = ist+nwind 102 | 103 | return np.sqrt(np.array(ret)), np.array(t) 104 | 105 | 106 | def Heterodyn(x, f, sr=1, nwind=1024, nhop=512, 107 | windfunc=np.blackman): 108 | ''' 109 | Calculates the amplitude near frequency f in x 110 | 111 | nwind should be at least 3 periods if the signal is periodic. 112 | ''' 113 | sinsig = np.exp(2j*np.pi*np.arange(len(x))*f/float(sr)) 114 | hamp, t = FuncWind(np.sum, x*sinsig, power=1, sr=sr, 115 | nwind=nwind, nhop=nhop, 116 | windfunc=windfunc) 117 | return np.array(hamp)*2, np.array(t) 118 | 119 | 120 | def HeterodynWithF0Track(x, tf0, f0, sr=1, 121 | nwind=1024, nhop=512, 122 | windfunc=np.blackman): 123 | ''' 124 | Calculates the amplitude near frequency f0 in x 125 | (f0 is time-varying, values given at tf0 126 | 127 | nwind should be at least 3 periods if the signal 128 | is periodic. 129 | ''' 130 | valid_idx = np.logical_not(np.isnan(f0)) 131 | tx = np.arange(len(x))/float(sr) 132 | f0s = np.interp(tx, tf0[valid_idx], f0[valid_idx]) 133 | phs = np.cumsum(2*np.pi*f0s/sr) 134 | sinsig = np.exp(1j*phs) 135 | 136 | hamp, t = FuncWind(np.sum, x*sinsig, power=1, sr=sr, 137 | nwind=nwind, nhop=nhop, 138 | windfunc=windfunc) 139 | return np.array(hamp)*2, np.array(t) 140 | 141 | 142 | def SpecCentWind(x, sr=1, nwind=1024, nhop=512, windfunc=np.blackman): 143 | ''' 144 | Calculates the SpectralCentroid of x, in frames of 145 | length nwind, and in steps of nhop. windfunc is used as 146 | windowing function 147 | 148 | nwind should be at least 3 periods if the signal is periodic. 149 | ''' 150 | ff = np.arange(nwind/2)/float(nwind)*sr 151 | 152 | def SCvec(xw): 153 | xf = np.fft.fft(xw) 154 | xf2 = xf[:nwind/2] 155 | return sum(np.abs(xf2)*ff)/sum(np.abs(xf2)) 156 | 157 | amp, t = FuncWind(SCvec, x, power=0, sr=sr, 158 | nwind=nwind, nhop=nhop) 159 | 160 | return np.array(amp), np.array(t) 161 | 162 | 163 | def AvgWind(x, sr=1, nwind=1024, nhop=512, 164 | windfunc=np.blackman): 165 | ''' 166 | Calculates the RMS amplitude amplitude of x, in frames of 167 | length nwind, and in steps of nhop. windfunc is used as 168 | windowing function. 169 | 170 | nwind should be at least 3 periods if the signal is periodic. 171 | ''' 172 | 173 | nsam = len(x) 174 | ist = 0 175 | iend = ist+nwind 176 | 177 | t = [] 178 | amp = [] 179 | 180 | wind = windfunc(nwind) 181 | wsum = sum(wind) 182 | 183 | while (iend < nsam): 184 | thisx = x[ist:iend] 185 | xw = thisx*wind 186 | 187 | amp.append(sum(xw)/wsum) 188 | t.append(float(ist+iend)/2.0/float(sr)) 189 | 190 | ist = ist+nhop 191 | iend = ist+nwind 192 | 193 | return np.array(amp), np.array(t) 194 | 195 | 196 | def SpecFlux(x, sr=1, nwind=1024, nhop=512, minf=0, 197 | maxf=np.inf, windfunc=np.blackman): 198 | ''' 199 | Calculates the spectral flux in sunud 200 | ''' 201 | 202 | nsam = len(x) 203 | # first window 204 | ist = 0 205 | iend = ist+nwind 206 | 207 | t = [] 208 | res = [] 209 | 210 | wind = windfunc(nwind) 211 | minbin = int(minf/sr*nwind) 212 | maxbinf = (float(maxf)/sr*nwind) 213 | if maxbinf > nwind: 214 | maxbin = nwind 215 | else: 216 | maxbin = int(maxbinf) 217 | 218 | while (iend < nsam-nhop): 219 | thisx = x[ist:iend] 220 | nextx = x[ist+nhop:iend+nhop] 221 | 222 | ff = np.abs(np.fft.fft(thisx*wind)) 223 | fl = np.abs(np.fft.fft(nextx*wind)) 224 | 225 | res.append(np.sqrt(sum((ff[minbin:maxbin]-fl[minbin:maxbin])**2))) 226 | t.append(float(ist+iend+nhop)/2.0/float(sr)) 227 | 228 | ist = ist+nhop 229 | iend = ist+nwind 230 | 231 | return np.array(res), np.array(t) 232 | 233 | 234 | def aubio_f0yin(y, sr, nwind=1024, hop=512, 235 | method='yin', tolerance=None): 236 | ''' Applies f0 detection to a numpy vector using aubio 237 | ''' 238 | from aubio import pitch, fvec 239 | 240 | po = pitch(method, nwind, hop, sr) 241 | vs = fvec(nwind) 242 | 243 | if tolerance is not None: 244 | if tolerance > 0.0 and tolerance < 1.0: 245 | po.set_tolerance(tolerance) 246 | else: 247 | sys.stderr.write('Tolerance not set: Out of bounds\n') 248 | 249 | nsamples = y.shape[0] 250 | 251 | freq = [] 252 | time = [] 253 | conf = [] 254 | 255 | for ii in xrange(0,nsamples-nwind, hop): 256 | thisy = y[ii:ii+nwind] 257 | vs[:] = thisy 258 | time.append(float(ii+nwind/2)/sr) 259 | freq.append(po(vs)) 260 | conf.append(po.get_confidence()) 261 | return np.array(freq).squeeze(), np.array(time), np.array(conf) 262 | 263 | 264 | def PlaySound(w, sr=44100): 265 | import pyaudio 266 | 267 | p = pyaudio.PyAudio() 268 | stream = p.open(format=pyaudio.paFloat32, 269 | channels=1, rate=sr, output=1) 270 | 271 | stream.write(w.astype(np.float32).tostring()) 272 | 273 | stream.close() 274 | p.terminate() 275 | 276 | -------------------------------------------------------------------------------- /pypevoc/TransferFunctions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines some useful functions for the estimation of transfer functions 3 | """ 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as pl 7 | import scipy.signal as sig 8 | 9 | 10 | def tfe_sig(y, x, *args, **kwargs): 11 | """estimate transfer function from x to y, 12 | see csd for calling convention""" 13 | fxy, sxy = sig.csd(y, x, *args, **kwargs) 14 | fxx, sxx = sig.csd(x, x, *args, **kwargs) 15 | return sxy / sxx, fxx 16 | 17 | 18 | try: 19 | from matplotlib.mlab import psd, csd, cohere 20 | 21 | def tfe(y, x, *args, **kwargs): 22 | """estimate transfer function from x to y, 23 | see csd for calling convention""" 24 | sxy, fxy = csd(y, x, *args, **kwargs) 25 | sxx, fxx = psd(x, *args, **kwargs) 26 | return sxy / sxx, fxx 27 | 28 | 29 | except ImportError: 30 | tfe = tfe_sig 31 | 32 | 33 | def nextpow2(number): 34 | intlognum = int(np.log2(number)) 35 | return 2**intlognum 36 | 37 | 38 | def fft_filter(x, bands, gains): 39 | ''' 40 | Filter signal x using FFT and IFFT 41 | * x input signal 42 | * bands: list of start and stop frequencies of each band 43 | * gains: start and stop gains in each band 44 | 45 | Example: 46 | 47 | y = FFTfilter(x, [(0,0.1),(0.1,1.0)], [(1.,1.),(0.,0.)]) 48 | 49 | filters signal x low pass at 0.1 times the nyquist rate 50 | (sampling rate / 2) 51 | ''' 52 | 53 | xf = np.fft.fft(x) 54 | nyq = len(xf)/2 55 | 56 | ffilter = np.zeros(len(xf)) 57 | for bb, gg in zip(bands, gains): 58 | fmin = int(bb[0]*nyq) 59 | fmax = int(bb[1]*nyq) 60 | ffilter[fmin:fmax] = np.linspace(gg[0], gg[1], fmax-fmin) 61 | if fmin > 0: 62 | ffilter[-fmax+1:-fmin+1] = np.linspace(gg[1], gg[0], fmax-fmin) 63 | else: 64 | ffilter[-fmax+1:] = np.linspace(gg[1], gg[0], fmax-fmin-1) 65 | 66 | xf_filt = xf*ffilter 67 | return np.fft.ifft(xf_filt) 68 | 69 | 70 | def smthderiv(ff, ph, rad=1): 71 | dph = [] 72 | for i, phi in enumerate(ph): 73 | imin = max(0, i-rad) 74 | imax = min(len(ph), i+rad) 75 | pp = np.polyfit(ff[imin:imax], ph[imin:imax], 1) 76 | dph.append(pp[0]) 77 | return np.array(dph) 78 | 79 | 80 | def determineDelay(source, target, maxdel=2**16, ax=None): 81 | ''' 82 | Determine the delay between two signals 83 | (based on correlation extrema) 84 | 85 | Parameters: 86 | * Signals 87 | - source 88 | - target 89 | * maxdel: maximum delay to look for (in both directions) 90 | ''' 91 | sample_start = 0 92 | xd = source[sample_start:sample_start+maxdel] 93 | yd = target[sample_start:sample_start+maxdel] 94 | Cxx = np.correlate(xd, xd, 'full') 95 | Cxy = np.correlate(yd, xd, 'full') 96 | Pkx = np.argmax(np.abs(Cxx)) 97 | Pky = np.argmax(np.abs(Cxy)) 98 | if ax: 99 | try: 100 | ax.plot(Cxx) 101 | except AttributeError: 102 | fig, ax = pl.subplots(1) 103 | ax.plot(Cxx) 104 | ax.plot(Cxy) 105 | ax.axvline(Pkx, color='red') 106 | ax.plot(Pky, Cxy[Pky], 'o') 107 | 108 | delay = Pky-Pkx 109 | return delay 110 | 111 | 112 | def transferogram(source, target, rate=1, start_time=0., delta_time=1., 113 | sample_duration=.5, window_duration=.125, window_hop=None): 114 | ''' 115 | tfe, freqs, times, coherence = transferogram(...) 116 | 117 | Calculates a time-varying transfer function from source (x) 118 | to target (y) at intervals delta_time. 119 | 120 | Parameters: 121 | * source: source signal (reuqired) 122 | * target: target signal (required) 123 | * rate: sampling rate 124 | * start_time: starting time for tfe calculations 125 | * delta_time: distance between calculations 126 | * sample_duration: length of signals used in tfe estimates 127 | (longer than window_duration, used in averaging) 128 | * window_duration: inidvidual window length in tfe estimates 129 | * window_hop: hop between windows (defaults to window_duration/2) 130 | 131 | Returns: 132 | * tfe: transfer functions (complex matrix NxM) 133 | * freqs: frequencies corresponding to tfe estimates (array size N) 134 | * times: times corresponding to tfe estimates (array size M) 135 | * coherence: coherence matrix MxN 136 | ''' 137 | 138 | # convert time to samples 139 | sample_start = int(start_time*rate) 140 | sample_delta = int(delta_time*rate) 141 | sample_len = int(sample_duration*rate) 142 | 143 | if target is None: 144 | n_target = len(source) 145 | else: 146 | n_target = len(target) 147 | 148 | n_samples = min(len(source), n_target) 149 | sample_end = n_samples - sample_start - sample_len 150 | 151 | # windowing parameters 152 | nsamp_window = nextpow2(window_duration*rate) 153 | if window_hop: 154 | nsamp_window_hop = nextpow2(window_hop*rate) 155 | else: 156 | nsamp_window_hop = nsamp_window/2 157 | 158 | noverlap = nsamp_window - nsamp_window_hop 159 | 160 | resp = [] 161 | coherence = [] 162 | times = [] 163 | 164 | if target is None: 165 | for ii in np.arange(sample_start, sample_end, sample_delta): 166 | block_resp, freq = psd(source[ii:ii+sample_len], 167 | NFFT=nsamp_window, 168 | noverlap=noverlap, Fs=rate) 169 | block_coh = [] 170 | times.append((ii+sample_len/2)/float(rate)) 171 | resp.append(block_resp) 172 | coherence.append(block_coh) 173 | else: 174 | for ii in np.arange(sample_start, sample_end, sample_delta): 175 | block_resp, freq = tfe(target[ii:ii+sample_len], 176 | source[ii:ii+sample_len], NFFT=nsamp_window, 177 | noverlap=noverlap, Fs=rate) 178 | block_coh, _ = cohere(target[ii:ii+sample_len], 179 | source[ii:ii+sample_len], NFFT=nsamp_window, 180 | noverlap=noverlap, Fs=rate) 181 | times.append((ii+sample_len/2)/float(rate)) 182 | resp.append(block_resp) 183 | coherence.append(block_coh) 184 | 185 | return np.array(resp).T, freq, np.array(times), np.array(coherence).T 186 | 187 | 188 | def block_delay(source, target, window=None): 189 | if window is None: 190 | window = np.ones(len(source)) 191 | wind_source = window*source 192 | wind_target = window*target 193 | 194 | corr_st = np.correlate(wind_source, wind_target, "full") 195 | 196 | return np.argmax(corr_st)-len(source), np.max(corr_st) 197 | 198 | 199 | def maxdelwind(source, target, rate=1, start_time=0., delta_time=1., 200 | sample_duration=.5): 201 | ''' 202 | delay, times = maxdelwid(...) 203 | 204 | Calculates a time-varying delay function from source (x) 205 | to target (y) at intervals delta_time. 206 | 207 | Parameters: 208 | * source: source signal (reuqired) 209 | * target: target signal (required) 210 | * rate: sampling rate 211 | * start_time: starting time for tfe calculations 212 | * delta_time: distance between calculations 213 | * sample_duration: length of signals used in tfe estimates 214 | (longer than window_duration, used in averaging) 215 | Returns: 216 | * delay: max delay array 217 | * times: times corresponding to delay estimates (array size M) 218 | ''' 219 | 220 | # convert time to samples 221 | sample_start = int(start_time*rate) 222 | sample_delta = int(delta_time*rate) 223 | sample_len = int(sample_duration*rate) 224 | 225 | window = np.ones(sample_len) 226 | 227 | n_samples = min(len(source), len(target)) 228 | sample_end = n_samples - sample_start - sample_len 229 | 230 | delay = [] 231 | corr_strength = [] 232 | times = [] 233 | 234 | for block_start in np.arange(sample_start, sample_end, sample_delta): 235 | block_end = block_start + sample_len 236 | target_block = sig.detrend(target[block_start:block_end]) 237 | source_block = sig.detrend(source[block_start:block_end]) 238 | block_del, block_corr = block_delay(target_block, source_block, 239 | window=window) 240 | times.append((block_start+sample_len/2)/float(rate)) 241 | delay.append(block_del/float(rate)) 242 | corr_strength.append(block_corr) 243 | 244 | return np.array(delay), np.array(corr_strength), np.array(times) 245 | 246 | 247 | def plot_time_freq(tf_matrix, freq=None, time=None, ax=None, mask=None): 248 | if time is None: 249 | time = np.arange(tf_matrix.shape[1]) 250 | 251 | if freq is None: 252 | freq = np.arange(tf_matrix.shape[0]) 253 | 254 | if ax is None: 255 | fig, ax = pl.subplots(1) 256 | 257 | if mask is not None: 258 | tf_matrix[np.logical_not(mask)] = np.nan 259 | 260 | ax.imshow(tf_matrix, aspect='auto', origin='lower', 261 | extent=[min(time), max(time), min(freq), max(freq)]) 262 | 263 | -------------------------------------------------------------------------------- /pypevoc/__init__.py: -------------------------------------------------------------------------------- 1 | from .PVAnalysis import PV, PVHarmonic, SinSum 2 | from .Periodicity import PeriodSeries, period_marks_corr, period_marks_peak, period_marks_amdf 3 | from . import SoundUtils 4 | from . import TransferFunctions 5 | 6 | -------------------------------------------------------------------------------- /pypevoc/speech/DAP.py: -------------------------------------------------------------------------------- 1 | 2 | # pypevoc.speech.DAP.py 3 | # 4 | # Part of PyPeVoc python package 5 | # 6 | # Copyright (C) 2018 Andre Almeida 7 | # 8 | # based on covarep's env_dap.m: 9 | # https://github.com/covarep/covarep/blob/master/envelope/env_dap.m 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | 25 | import numpy as np 26 | import scipy.signal as sig 27 | import scipy.linalg as sla 28 | import logging 29 | 30 | class EnvelopeDAP(object): 31 | def __init__(self, sr=1.0, order=4, dftlen=2**12, maxit=50, alpha=.5, dISthresh=1e-6, 32 | minbw=None): 33 | self.sr = sr 34 | self.order = order 35 | self.dftlen = dftlen 36 | self.maxit = maxit 37 | self.alpha = alpha 38 | self.dISthresh = dISthresh 39 | if minbw is not None: 40 | self.minrr = np.exp(-np.pi/self.sr*minbw) 41 | 42 | def estimate(self, freqs, amps, order=None): 43 | if order is None: 44 | order = self.order 45 | omegas = 2*np.pi*freqs/self.sr 46 | amps = np.abs(amps) 47 | nharm = len(amps) 48 | 49 | # imaginary part of z variable 50 | ejw = np.exp(-1j*omegas * np.arange(0,order+1)) 51 | inv_ejw = np.exp(1j*omegas * np.arange(0,order+1)) 52 | 53 | # target autocorr matrix 54 | r = 1/nharm*np.real(amps**2*inv_ejw) 55 | rmx_inv = sla.inv(sla.toeplitz(r)) 56 | 57 | # initial guess (LPC) 58 | use_r = r[:order] 59 | a = sla.solve_toeplitz(r[:-1],-r[1:]) 60 | # calculate the prediction error 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /pypevoc/speech/PitchJumps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # PitchJumps.py 5 | # 6 | # Detect pitch jumps in vocal glides 7 | # 8 | # Copyright 2017 Andre Almeida 9 | # 10 | # This program is free software; you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation; either version 2 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program; if not, write to the Free Software 22 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 23 | # MA 02110-1301, USA. 24 | # 25 | # 26 | 27 | import os 28 | import sys 29 | import numpy as np 30 | #import matplotlib.pyplot as pl 31 | import pandas 32 | from .. import PV 33 | from .SpeechChunker import SilenceDetector 34 | from scipy.stats import ttest_ind 35 | from scipy.signal import argrelmax 36 | 37 | 38 | try: 39 | from scipy.io.wavfile import read as wavread 40 | from scipy.io.wavfile import write as wavwrite 41 | except ImportError: 42 | sys.stderr.write('Scipy wav reader not found!\nUsing internal reader\n') 43 | from AudioInterface import wavLoad as wavread 44 | from AudioInterface import wavWrite as wavwrite 45 | 46 | def nextpow2(x): 47 | return int(2**np.ceil(np.log2(x))) 48 | 49 | 50 | def zscore_wind(x, wleft=5, wright=5, hop=None, kind='mean'): 51 | if hop is None: 52 | hop = 1 53 | 54 | zs = np.zeros(len(x)) 55 | for ii in range(wleft,len(x)-wright,hop): 56 | xx = x[ii-wleft:ii+wright] 57 | if kind=='mean': 58 | mx = np.nanmean(xx) 59 | sx = np.std(xx) 60 | elif kind == 'median': 61 | mx = np.nanmedian(xx) 62 | sx = np.percentile(xx,75)-np.percentile(xx,25) 63 | zs[ii] = (x[ii]-mx)/sx 64 | return zs 65 | 66 | 67 | def linreg_err(t, x, wleft=5, wright=5, hop=None): 68 | if hop is None: 69 | hop = 1 70 | 71 | zs = np.zeros(len(x)) 72 | if wright < 0: 73 | wm = 0 74 | else: 75 | wm=wright 76 | for ii in range(wleft,len(x)-wm,hop): 77 | xx = x[ii-wleft:ii+wright] 78 | tt = t[ii-wleft:ii+wright] 79 | 80 | p = np.polyfit(tt,xx,1) 81 | resid = xx-np.polyval(p,tt) 82 | std = np.std(resid) 83 | zs[ii] = (x[ii]-np.polyval(p,t[ii]))/std 84 | return zs 85 | 86 | 87 | def linreg2_err(t, x, wleft=5, wright=5, hop=None, use_l=True, use_r=True): 88 | if hop is None: 89 | hop=1 90 | 91 | zs = np.zeros(len(x)) 92 | if wright<0: 93 | wm = 0 94 | else: 95 | wm=wright 96 | for ii in range(wleft,len(x)-wm,hop): 97 | ts=[] 98 | xl = x[ii-wleft:ii] 99 | tl = t[ii-wleft:ii] 100 | xr = x[ii:ii+wright] 101 | tr = t[ii:ii+wright] 102 | 103 | if use_l>0: 104 | pl = np.polyfit(tl,xl,1) 105 | residll = xl-np.polyval(pl,tl) 106 | stdll = np.std(residll) 107 | residlr = xr-np.polyval(pl,tr) 108 | stdlr = np.std(residlr) 109 | ttl,pvl = ttest_ind(residll,residlr) 110 | ts.append(ttl) 111 | 112 | if use_r>0: 113 | pr = np.polyfit(tr,xr,1) 114 | residrr = xr-np.polyval(pr,tr) 115 | stdrr = np.std(residrr) 116 | residrl = xl-np.polyval(pr,tl) 117 | stdrl = np.std(residrl) 118 | ttr,pvr = ttest_ind(residrr,residrl) 119 | ts.append(-ttr) 120 | zs[ii] = np.mean(ts) 121 | return zs 122 | 123 | 124 | def avg_interpolator(tn, t, x, twind=0): 125 | xn = np.zeros(len(tn)) 126 | for ii, tt in enumerate(tn): 127 | try: 128 | ior = np.flatnonzero(t > tt+twind)[0] 129 | except IndexError: 130 | ior = len(t) 131 | try: 132 | iol = np.flatnonzero(t < tt-twind)[-1] 133 | except IndexError: 134 | iol = 0 135 | 136 | xn[ii] = np.mean(x[iol:ior]) 137 | return xn 138 | 139 | 140 | class JumpDetector(object): 141 | def __init__(self, min_freq=70, 142 | pitch_t_hop=0.02, 143 | regressor_t=0.5, 144 | t_threshold=10, 145 | mag_threshold=0.01): 146 | """ 147 | Pitch jump detector object, 148 | 149 | Calculates pitch track and detects jumps by comparing linear 150 | trends on each side of a smaple 151 | 152 | Arguments: 153 | * min_freq: minimum frequency for pitch detector 154 | * pitch_t_hop: time between pitch estimates 155 | * regressor_t: time for estimation of linear slopes 156 | in pitch track 157 | * t_threshold: threshold for t-test comparator 158 | * mag_threshold: magnitude threshold for pitch track 159 | """ 160 | self.min_freq = min_freq 161 | self.pitch_t_hop = pitch_t_hop 162 | self.mag_threshold = mag_threshold 163 | self.t_threshold = t_threshold 164 | self.regressor_t = regressor_t 165 | self.slope_t = regressor_t 166 | 167 | def detect_pitch(self, w, sr): 168 | nfft = nextpow2(sr/self.min_freq*2) 169 | n_hop = nextpow2(sr*self.pitch_t_hop) 170 | pv = PV(w, sr, nfft=nfft, hop=n_hop) 171 | pv.run_pv() 172 | self.mag = np.sqrt(np.sum(pv.mag**2, axis=1)) 173 | self.t = pv.get_time_vector() 174 | self.f0 = pv.calc_f0() 175 | self.nfft = nfft 176 | self.nhop = n_hop 177 | 178 | def detect_jumps(self): 179 | wle = int(self.regressor_t/self.pitch_t_hop) 180 | isel = self.mag > np.max(self.mag)*self.mag_threshold 181 | tsel = self.t[isel] 182 | fsel = self.f0[isel] 183 | self.isel = isel 184 | #pl.plot(np.flatnonzero(isel),20*np.log10(m[isel])) 185 | 186 | le = linreg2_err(tsel, fsel, wleft=wle, wright=wle, use_l=True) 187 | #ax[0].plot(tsel,fsel) 188 | #ax[1].plot(tsel,le) 189 | 190 | imax = argrelmax(le)[0] 191 | lemax = le[imax] 192 | idx = imax[lemax > self.t_threshold] 193 | ijup = idx 194 | #ax[0].plot(tsel[idx],fsel[idx],'o') 195 | #ax[1].plot(tsel[idx],le[idx],'o') 196 | 197 | imin = argrelmax(-le)[0] 198 | lemin = le[imin] 199 | idx = imin[lemin < -self.t_threshold] 200 | ijdn = idx 201 | #ax[0].plot(tsel[idx],fsel[idx],'o') 202 | #ax[1].plot(tsel[idx],le[idx],'o') 203 | 204 | self.down_jump_indices = np.asarray(ijdn) 205 | self.up_jump_indices = np.asarray(ijup) 206 | self.down_jump_times = tsel[ijdn] 207 | self.up_jump_times = tsel[ijup] 208 | 209 | def calc_jump_params(self): 210 | tsel = self.t[self.isel] 211 | fsel = self.f0[self.isel] 212 | ijup = self.up_jump_indices 213 | ijdn = self.down_jump_indices 214 | 215 | nsl = int(self.slope_t/self.pitch_t_hop) 216 | 217 | alli = np.sort(np.concatenate((ijup, ijdn))) 218 | # pl.figure() 219 | #pl.plot(tsel,fsel) 220 | p = [] 221 | intcpts = [] 222 | sumres = [] 223 | for ii in alli: 224 | il = max(0, ii-nsl) 225 | ir = min(ii+nsl, len(fsel)) 226 | polyl = np.polyfit(tsel[il:ii], fsel[il:ii], 1) 227 | intl = np.polyval(polyl, tsel[ii]) 228 | rsuml = np.sqrt(np.nansum((fsel[il:ii]-np.polyval(polyl, tsel[il:ii]))**2)/(ii-il)) 229 | 230 | polyr = np.polyfit(tsel[ii+1:ir], fsel[ii+1:ir], 1) 231 | intr = np.polyval(polyr, tsel[ii]) 232 | rsumr = np.sqrt(np.nansum((fsel[ii+1:ir]-np.polyval(polyr, tsel[ii+1:ir]))**2)/(ir-ii)) 233 | 234 | #pl.plot(tsel[il:ii+1],np.polyval(polyl,tsel[il:ii+1]),color='r',alpha=.5) 235 | #pl.plot(tsel[ii:ir],np.polyval(polyr,tsel[ii:ir]),color='m',alpha=.5) 236 | 237 | p.append([polyl, polyr]) 238 | intcpts.append([intl, intr]) 239 | sumres.append([rsuml, rsumr]) 240 | 241 | self.intcpts = np.array(intcpts) 242 | self.sumres = np.array(sumres) 243 | 244 | #pl.plot(tsel[alli],intcpts[:,0],'o') 245 | #pl.plot(tsel[alli],intcpts[:,1],'o') 246 | 247 | def process(self, w, sr): 248 | """ 249 | process pitch tracking and jump detection 250 | """ 251 | self.detect_pitch(w, sr) 252 | self.detect_jumps() 253 | self.calc_jump_params() 254 | return np.sort(np.concatenate([self.up_jump_times, 255 | self.down_jump_times])) 256 | 257 | def get_jump_table(self): 258 | allt = np.sort(np.concatenate([self.up_jump_times, 259 | self.down_jump_times])) 260 | 261 | df = pandas.DataFrame({'segment_time': allt, 262 | 'f_before': self.intcpts[:, 0], 263 | 'f_after': self.intcpts[:, 1], 264 | 'f_cent': np.mean(self.intcpts, axis=1), 265 | 'df': (np.diff(self.intcpts, axis=1))[:, 0], 266 | 'residue_before': self.sumres[:, 0], 267 | 'residue_after': self.sumres[:, 1], 268 | 'residue_total': np.sum(self.sumres, axis=1)}) 269 | 270 | return df 271 | 272 | 273 | def segment_and_detect_jumps(w, sr, **kwargs): 274 | sc = SilenceDetector(w, sr, fmin=50, fmax=1000) 275 | jd = JumpDetector(**kwargs) 276 | df = pandas.DataFrame() 277 | for ii, (tst, tend) in enumerate(zip(sc.tst, sc.tend)): 278 | ww = w[int(tst*sr):int(tend*sr)] 279 | tjmp = jd.process(ww, sr) 280 | try: 281 | dfi = jd.get_jump_table() 282 | dfi['rec_time'] = dfi['segment_time']+tst 283 | dfi['region_nbr'] = ii 284 | df = df.append(dfi, ignore_index=True) 285 | except IndexError: 286 | sys.stderr.write("Jump table empty between {:.2f} and {:.2f}\n".format(tst,tend)) 287 | 288 | segments = pandas.DataFrame({'nbr': np.arange(len(sc.tst)), 'start': sc.tst, 289 | 'end': sc.tend}) 290 | return df, segments 291 | 292 | def file_reader(filename, chan): 293 | file_base, file_ext = os.path.splitext(filename) 294 | if file_ext.lower() == '.aup': 295 | import audacity 296 | aud = audacity.Aup(filename) 297 | w = aud.get_channel_data(chan) 298 | sr = aud.rate 299 | else: 300 | sys.stderr.write("Format not recognized: %s" % file_ext) 301 | return 302 | return(sr, w) 303 | 304 | 305 | def pitch_jump_file(filename, channel_nbr=0): 306 | sr, w = file_reader(filename, channel_nbr) 307 | df,dfs = segment_and_detect_jumps(w, sr) 308 | df.to_csv('pitch_jumps.csv') 309 | dfs.to_csv('segments.csv') 310 | -------------------------------------------------------------------------------- /pypevoc/speech/SpeechAnalysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import scipy.signal as sig 4 | import scipy.linalg as lg 5 | from scipy.io import wavfile 6 | from .. import FFTFilters as ftf 7 | from ..PeakFinder import PeakFinder 8 | 9 | def lpc(w, order, axis=-1): 10 | """ 11 | Calculate the lpc coefficients of the waveform 12 | """ 13 | 14 | nsamp = w.shape[axis] 15 | if order > nsamp: 16 | raise ValueError('Order must be smaller than size of vector') 17 | 18 | r = np.correlate(w, w, 'full') 19 | #use_r = np.zeros(order+1) 20 | #use_r[:order+1] = r[nsamp-1:nsamp+order] 21 | use_r = r[nsamp-1:nsamp+order] 22 | a = lg.solve_toeplitz(use_r[:-1], -use_r[1:]) 23 | 24 | return a 25 | 26 | def refine_max(x, pos): 27 | ''' 28 | Given the position of a peak pos in a series x, 29 | interpolate the position assuming that the peak is 30 | approximated by a quadratic function 31 | ''' 32 | if pos==0: 33 | pos=1 34 | 35 | sur = x[pos-1:pos+2] 36 | 37 | if sur[1]>sur[0] and sur[1]>sur[2]: 38 | c = sur[1] 39 | b = (sur[2] - sur[0])/2 40 | a = (sur[2] + sur[0])/2 - c 41 | 42 | lpos = - b/2/a 43 | fpos = float(pos) + lpos 44 | fval = a*lpos*lpos + b*lpos + c 45 | #print "rpos = %d; rval = %f; val = %f; dpos = %f; pos = %f"%(pos,sur[1],fval, lpos, fpos) 46 | 47 | else: 48 | fpos = pos 49 | fval = sur[1] 50 | 51 | return fpos,fval.tolist() 52 | 53 | 54 | def DistribMoments(x,f, MaxMoments=4): 55 | '''Calculate the moments in a distribution f(x) 56 | x: abcissa - values at which distribution is given 57 | f: value - values of the distribution 58 | MaxMoments: maximum moment order to return 59 | 60 | returns: 61 | COG: center of gravity 62 | StDev: standard deviation 63 | skew: skewness 64 | kurt: kurtosis 65 | Moments: array with all raw central moments 66 | ''' 67 | 68 | moments = [] 69 | m0 = np.sum(f) 70 | m1 = np.sum(f*x)/m0 71 | moments.append(m1) 72 | for mn in range(1,MaxMoments): 73 | moments.append(np.sum((x-m1)**(mn+1)*f)/m0) 74 | 75 | cog = m1 76 | stdev = np.sqrt(moments[1]) 77 | skew = moments[2]/moments[1]**1.5 78 | kurt = moments[3]/moments[1]**2 - 3. 79 | 80 | return cog,stdev,skew,kurt,moments 81 | 82 | def SpectralMoments(w, Fs, tWind=0.025, tHop=0.0125, 83 | windFunc=sig.hamming, fCut=300, maxMoments=4): 84 | '''Calculates spectral moments in short windows of signal w 85 | 86 | w: signal 87 | Fs: sample rate 88 | tWind: window length in seconds 89 | tHop: hop length in seconds 90 | windFunc: windowing function 91 | fCut: high-pass cutoff 92 | MaxMoments: maximum moment order to return 93 | 94 | returns: 95 | cog: center of gravity 96 | stdev: standard deviation 97 | skew: skewness 98 | kurt: kurtosis 99 | moments: all the moments 100 | 101 | 102 | ''' 103 | 104 | wLen = len(w) 105 | hopLenSam = int(np.round(Fs*tHop)); 106 | windowLenSam = int(np.round(Fs*tWind)); 107 | #print 'SpectralMoments: Fs={}; wLen={}; hop={}'.format(Fs,windowLenSam,hopLenSam) 108 | specLen = int(windowLenSam/2) 109 | 110 | dt = 1./Fs 111 | nFrames = int((wLen - windowLenSam-1)/hopLenSam) 112 | 113 | wind = windFunc(windowLenSam) 114 | SxxSum = np.zeros(specLen) 115 | freqS = (np.arange(specLen))*float(Fs)/windowLenSam 116 | 117 | for FN in np.arange(nFrames): 118 | I0 = (FN)*hopLenSam; 119 | Iend = I0 + windowLenSam; 120 | X = w[I0:Iend]; 121 | 122 | XW = X*sig.hamming(len(X)); 123 | XF = np.fft.fft(XW) 124 | Sxx = np.abs(XF)**2 125 | SxxSum = SxxSum + Sxx[0:specLen] 126 | 127 | # periodogram 128 | SxxS = np.sqrt(SxxSum/float(nFrames)) 129 | 130 | # intensity 131 | intens = np.sqrt(np.mean(SxxSum/float(nFrames))) 132 | 133 | # filter out low frequencies 134 | idx = freqS > fCut 135 | 136 | # compute moments 137 | cog, std, skew, kurt, mm=DistribMoments(freqS[idx], SxxS[idx], maxMoments) 138 | 139 | return dict(cog=cog, std=std, skew=skew, kurt=kurt, level=intens) 140 | 141 | def Periodogram(w, Fs, tWind=0.025, tHop=0.0125, 142 | windFunc=sig.hamming): 143 | '''Calculates spectral moments in short windows of signal w 144 | 145 | w: signal 146 | Fs: sample rate 147 | tWind: window length in seconds 148 | tHop: hop length in seconds 149 | windFunc: windowing function 150 | 151 | returns: 152 | Sxx: power spectrum 153 | f: frequency values 154 | 155 | 156 | ''' 157 | 158 | wLen = len(w) 159 | hopLenSam = int(np.round(Fs*tHop)); 160 | windowLenSam = int(np.round(Fs*tWind)); 161 | #print 'SpectralMoments: Fs={}; wLen={}; hop={}'.format(Fs,windowLenSam,hopLenSam) 162 | specLen = windowLenSam/2 163 | 164 | dt = 1./Fs 165 | nFrames = (wLen - windowLenSam-1)/hopLenSam 166 | 167 | wind = windFunc(windowLenSam) 168 | SxxSum = np.zeros(specLen) 169 | freqS = (np.arange(specLen))*float(Fs)/windowLenSam 170 | 171 | for FN in np.arange(nFrames): 172 | I0 = (FN)*hopLenSam; 173 | Iend = I0 + windowLenSam; 174 | X = w[I0:Iend]; 175 | 176 | XW = X*sig.hamming(len(X)); 177 | XF = np.fft.fft(XW) 178 | Sxx = np.abs(XF)**2 179 | SxxSum = SxxSum + Sxx[0:specLen] 180 | 181 | # periodogram 182 | Sxx = (SxxSum/float(nFrames)) 183 | 184 | return Sxx, freqS 185 | 186 | def lpc2form(a, Fs=1.0): 187 | ''' 188 | Convert all-pole coefficients to resonance frequencies 189 | and bandwidths 190 | 191 | a: LPC coefficients (all-pole coefficients excluding order 0) 192 | Fs: sampling rate 193 | ''' 194 | RTS = np.roots(np.concatenate(([1],a))); 195 | 196 | # roots are complex conjugate pairs 197 | RTS = RTS[np.imag(RTS)>=0]; 198 | AngZ = np.arctan2(np.imag(RTS),np.real(RTS)); 199 | 200 | # Convert normalised frequency to freq. 201 | nFreq = AngZ*(Fs/(2*np.pi)) 202 | Indices = np.argsort(nFreq); 203 | FreqS = nFreq[Indices] 204 | FreqS = FreqS[FreqS>0] 205 | 206 | # Bandwidths are the distance to the unit circle 207 | BW = -1/2*(Fs/(2*np.pi))*np.log(np.abs(RTS[Indices])) 208 | 209 | return FreqS, BW 210 | 211 | def lpc2form_full(a, Fs=1.0, npts=1024): 212 | FreqS, BW = lpc2form(a, Fs) 213 | omega, h = sig.freqz([1],np.concatenate(([1], a)), worN=npts) 214 | f = omega/np.pi * Fs/2 215 | pks = PeakFinder(x=f, y=np.abs(h)) 216 | pks.refine_all() 217 | 218 | return FreqS, BW, pks.pos, pks.val 219 | 220 | def Formants(w, Fs, tWind=0.025, tHop=0.0125, 221 | fMin=50, fMax=5500, bwMax=400, 222 | modelOrd=10, hpFreq=50, full=False): 223 | '''Estimate formants from waveform w with sample rate Fs 224 | 225 | tWind: window length in seconds 226 | tHop: hop length in seconds 227 | fMin: minimum frequency of formant in Hz 228 | fMax: maximum frequency of formant in Hz 229 | (determines resampling rate) 230 | bwMax: maximum bandwidth (Hz) 231 | modelOrder: model order for linear prediction (LPC) 232 | hpFreq: cutoff frequency of pre-emphasis filter 233 | (high-pass, 1st order) 234 | full: also calclate amplitudes and freqs of peaks 235 | ''' 236 | 237 | # pre-emphasise 238 | # 239 | if hpFreq>0: 240 | a=np.exp(-2.*np.pi*hpFreq/float(Fs)); 241 | #preEmphA = [a,1-a]; 242 | #wo = sig.lfilter([1],preEmphA,w); 243 | wo=w 244 | wo[:-1] -= wo[1:] 245 | else: 246 | wo=w 247 | 248 | # resample the original wave file 249 | # AnalysisFs = 8000; 250 | 251 | underSample = int(Fs/fMax/2); 252 | FsO = Fs; 253 | 254 | # Fourier method: can be slow! 255 | #w = sig.resample(wo,len(wo)/underSample); 256 | 257 | # Resample: polyhase method (only in scipy v18.1) 258 | w = sig.resample_poly(wo,1,underSample); 259 | 260 | Fs = int(FsO*len(w)/float(len(wo))); 261 | Fsf = float(Fs) 262 | 263 | wLen = len(w); 264 | 265 | hopLenSam = int(round(Fs*tHop)); 266 | windowLenSam = int(round(Fs*tWind)); 267 | #print 'Formant: Fs={}; wLen={}; hop={}'.format(Fs,windowLenSam,hopLenSam) 268 | 269 | dt = 1./Fs; 270 | nFrames = int(np.floor((wLen-windowLenSam-1)/hopLenSam)) 271 | 272 | Form = np.nan*np.ones((nFrames,int(modelOrd/2))); 273 | BandWidths = np.nan*np.ones((nFrames,int(modelOrd/2))); 274 | if full: 275 | Peaks = np.nan*np.ones((nFrames,int(modelOrd/2))); 276 | Amplitudes = np.nan*np.ones((nFrames,int(modelOrd/2))); 277 | Time = np.arange(nFrames+0)*hopLenSam/Fsf+windowLenSam/Fsf/2 278 | 279 | 280 | for FN in np.arange(nFrames): 281 | I0 = (FN)*hopLenSam; 282 | Iend = I0 + windowLenSam; 283 | X = w[I0:Iend]; 284 | 285 | XW = X*sig.hamming(len(X)); 286 | #XW = X*sig.gaussian(len(X),0.4); 287 | 288 | # pre-emphasis filter 289 | # all-pole high pass filter 290 | 291 | #PreEmph = [1 0.63]; 292 | #XW = filter(1,PreEmph,XW); 293 | 294 | # call LPC 295 | # A, err, rcoeff = lpc(XW,modelOrd); 296 | A = lpc(XW,modelOrd); 297 | 298 | if full: 299 | FreqS, BW, pkF, pkA = lpc2form_full(A, Fs) 300 | else: 301 | FreqS, BW = lpc2form(A, Fs) 302 | NN = 0 303 | for KK in range(len(FreqS)): 304 | if (FreqS[KK] > fMin and FreqS[KK] < fMax-fMin and BW[KK] 0: 309 | idx = np.argmin(np.abs(pkF-FreqS[KK])) 310 | Peaks[FN, NN] = pkF[idx] 311 | Amplitudes[FN, NN] = pkA[idx] 312 | NN = NN + 1 313 | else: 314 | #print('Rejected f={}, bw={}'.format(FreqS[KK],BW[KK])) 315 | pass 316 | if full: 317 | return Time, Form, BandWidths, Peaks, Amplitudes 318 | else: 319 | return Time, Form, BandWidths 320 | 321 | def rmsWind(w, nwind=256, nhop=None, windfunc=np.ones, sr=1): 322 | ''' 323 | calculate RMS values in window chunks of data 324 | ''' 325 | 326 | if not nhop: 327 | nhop=nwind/2 328 | 329 | i=0 330 | 331 | nw=len(w) 332 | 333 | tl=[] 334 | rl=[] 335 | 336 | wvr = windfunc(nwind) 337 | wvnorm = np.sqrt(sum(wvr**2)/nwind) 338 | 339 | wv = wvr/wvnorm 340 | 341 | while i0: 390 | wo = ftf.preemph(w,Fs=sr,hpFreq=hpFreq) 391 | else: 392 | wo = w 393 | 394 | tamp,amp = rmsWind(w,nwind=wsize*4) 395 | 396 | fdict = {pp:pd.DataFrame() for pp in pos} 397 | 398 | for st,end in intervals: 399 | kept=[] 400 | for pp in pos: 401 | print('{:.3f}-{:.3f} @ {}'.format(st,end,pp)) 402 | try: 403 | duration = end-st 404 | imed = int((st+duration*(pp))*sr) 405 | imin = int(imed-wsize/2) 406 | imax = int(imed+wsize/2) 407 | if imin < st*sr: 408 | imin = int(row[('all',start_col)]*sr) 409 | imax = int(imin+wsize) 410 | if imax > end*sr: 411 | imax = int(row[('all',end_col)]*sr) 412 | imin = int(imax - wsize) 413 | 414 | ww = wo[imin:imax] 415 | 416 | fricdata = FricativeData(ww,sr=sr,nwind=wsize) 417 | 418 | fdict[pos].append(fricdata) 419 | except ValueError as e: 420 | print(' ERROR at {}-{}'.format(st,end)) 421 | print(e) 422 | fdict[pos].append([]) 423 | return fdict 424 | -------------------------------------------------------------------------------- /pypevoc/speech/SpeechChunker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # SpeechChunkerer.py 5 | # 6 | # Copyright 2017 Andre Almeida 7 | # 8 | # This program is free software; you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation; either version 2 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with this program; if not, write to the Free Software 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 21 | # MA 02110-1301, USA. 22 | # 23 | # 24 | 25 | import sys 26 | import os 27 | 28 | import numpy as np 29 | 30 | from .SpeechAnalysis import rmsWind 31 | 32 | try: 33 | from scipy.io.wavfile import read as wavread 34 | from scipy.io.wavfile import write as wavwrite 35 | except ImportError: 36 | sys.stderr.write('Scipy wav reader not found!\nUsing internal reader\n') 37 | from AudioInterface import wavLoad as wavread 38 | from AudioInterface import wavWrite as wavwrite 39 | 40 | class SilenceDetector(object): 41 | ''' 42 | Detects regions of silence in a sound file 43 | ''' 44 | 45 | def __init__(self, x, sr=1, wind_sec=0.92, method = 'pct05', 46 | min_len = 0.1, max_len=5, fmin=None, fmax=None): 47 | ''' 48 | crate silence detector 49 | ''' 50 | self.x=x 51 | self.sr=sr 52 | self.nwind=int(wind_sec*sr) 53 | 54 | if fmin is None and fmax is None: 55 | self._calc_amplitude(nwind=self.nwind) 56 | else: 57 | self._calc_band_amplitude(nwind=self.nwind, fmin=fmin, fmax=fmax) 58 | if method[0:3].lower()=='pct': 59 | try: 60 | pctval = int(method[3:5]) 61 | except TypeError: 62 | pctval = 5 63 | self._percentile_discriminator(pct=pctval) 64 | elif method=='kmeans': 65 | self._k_means_discriminator() 66 | 67 | #return self._clusters_to_time_int() 68 | self.tst, self.tend = self._clusters_to_time_int(min_int=min_len, 69 | max_int=max_len) 70 | 71 | def _calc_amplitude(self,nwind=4096): 72 | ''' 73 | calculates amplitude for amplitude discriminator 74 | ''' 75 | self.nfr = int(nwind/2) 76 | self.at, ampl = rmsWind(self.x,sr=self.sr,nwind=self.nwind, 77 | nhop = self.nfr) 78 | self.ax = 20*np.log10(ampl) 79 | 80 | def _calc_band_amplitude(self,nwind=4096,fmin=50,fmax=5000): 81 | ''' 82 | calculates amplitude in a frequency band for amplitude discriminator 83 | ''' 84 | from ..FFTFilters import FilterBank, PiecewiseFilterSpec 85 | self.nfr = int(nwind/2) 86 | if fmin is None: 87 | fb = FilterBank([PiecewiseFilterSpec(freq=fmax,mode='lp',sr=self.sr)], 88 | sr=self.sr,nwind=self.nwind,nhop=self.nfr) 89 | 90 | elif fmax is None: 91 | fb = FilterBank([PiecewiseFilterSpec(freq=fmin,mode='hp',sr=self.sr)], 92 | sr=self.sr,nwind=self.nwind,nhop=self.nfr) 93 | else: 94 | fb = FilterBank([PiecewiseFilterSpec(freq=[fmin,fmax],mode='bp',sr=self.sr)], 95 | sr=self.sr,nwind=self.nwind,nhop=self.nfr) 96 | 97 | ampl, self.at = fb.specout(self.x) 98 | self.ax = 20*np.log10(ampl.flatten()) 99 | 100 | def _k_means_discriminator(self, batch_size=45): 101 | from sklearn.cluster import MiniBatchKMeans 102 | from sklearn.metrics.pairwise import pairwise_distances_argmin 103 | 104 | mbk = MiniBatchKMeans(init='k-means++', n_clusters=2, batch_size=batch_size, 105 | n_init=10, max_no_improvement=10, verbose=0) 106 | #t0 = time.time() 107 | X = np.log10(self.ax.reshape(-1, 1)) 108 | mbk.fit(X) 109 | cc = np.sort(mbk.cluster_centers_,axis=0) 110 | self.clusters = pairwise_distances_argmin(X,cc) 111 | 112 | 113 | 114 | def _percentile_discriminator(self, pct=5): 115 | ''' 116 | calculate threshold based on percentiles 117 | 118 | arguments: 119 | pct: percentile value 120 | ''' 121 | self.amin = np.percentile(self.ax, pct) 122 | self.amax = np.percentile(self.ax, 100-pct) 123 | self.ath = (self.amax+self.amin)/2 124 | 125 | self.clusters = np.zeros(self.ax.shape[0],dtype='int') 126 | self.clusters[self.ax>self.ath]=1 127 | 128 | def _clusters_to_time_int(self, min_int=0.0, max_int=None): 129 | 130 | tfr = self.nfr/float(self.sr) 131 | min_frames = int(np.round(min_int/tfr)) 132 | if max_int: 133 | maxlen = int(max_int/tfr) 134 | else: 135 | maxlen = len(self.ax) 136 | 137 | lastsplit=0 138 | 139 | i=0 140 | 141 | off=True 142 | 143 | nframes = len(self.clusters) 144 | 145 | tst=[] 146 | tend=[] 147 | 148 | while i+min_frames0 and off: 150 | tst.append(self.at[max(0,i-1)]) 151 | off=False 152 | i+=1 153 | elif self.clusters[i]<=0 and not off: 154 | if all(self.clusters[i:i+min_frames] <=0 ): 155 | off=True 156 | tend.append(self.at[i]) 157 | i+=min_frames 158 | else: 159 | i+=1 160 | else: 161 | i+=1 162 | if not off: 163 | tend.append(self.at[-1]) 164 | return tst,tend 165 | 166 | def to_textgrid(self, filename='segmentation.TextGrid', 167 | tiername='Segmentation'): 168 | 169 | from pympi import TextGrid 170 | tg=TextGrid(xmax=max(self.tend)) 171 | tier=tg.add_tier(tiername) 172 | for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)): 173 | tier.add_interval(ts,te,'{}'.format(lab)) 174 | 175 | tg.to_file(filename) 176 | 177 | def output(self, file_handle): 178 | for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)): 179 | file_handle.write('{},{},{}\n'.format(ts,te,lab)) 180 | 181 | def recognise(self, mode='sphinx', marg=0.2): 182 | import speech_recognition as srec 183 | # use the audio file as the audio source 184 | r = srec.Recognizer() 185 | 186 | if mode=='sphinx': 187 | recogniser = r.recognize_sphinx 188 | sys.stderr.write('Doing speech recognition with sphinx\n') 189 | if mode=='google': 190 | sys.stderr.write('Doing speech recognition with google\n') 191 | recogniser = r.recognise_google 192 | 193 | for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)): 194 | tstart = ts-marg 195 | tend = te+marg 196 | wo=self.x[int(tstart*self.sr):int(tend*self.sr)] 197 | 198 | wavwrite('speech_sample.wav',self.sr,wo.astype('int16')) 199 | 200 | with srec.AudioFile('speech_sample.wav') as source: 201 | audio = r.record(source) # read the entire audio file 202 | 203 | try: 204 | # for testing purposes, we're just using the default API key 205 | # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` 206 | # instead of `r.recognize_google(audio)` 207 | utt = recogniser(audio) 208 | #utt = r.recognize_sphinx(audio) 209 | self.label[ii] = utt 210 | sys.stderr.write('{}\n'.format(utt)) 211 | except srec.UnknownValueError: 212 | sys.stderr.write("Speech Recognition could not understand audio\n") 213 | except srec.RequestError as e: 214 | sys.stderr.write("Could not request results {}\n".format(e)) 215 | 216 | class MultiChannelSegmenter(object): 217 | ''' 218 | Detects regions from different sources from multi-channel recordings 219 | ''' 220 | 221 | def __init__(self, x, sr=1, nwind=4096, method = 'kmeans', 222 | nsources = 2, min_len = 0.1, max_len=5): 223 | ''' 224 | crate multi-channel analyser 225 | ''' 226 | self.x=x 227 | self.sr=sr 228 | self.nwind=nwind 229 | self.nsources = nsources 230 | 231 | self._calc_amplitude(nwind=nwind) 232 | if method[0:3].lower()=='pct': 233 | try: 234 | pctval = int(method[3:5]) 235 | except TypeError: 236 | pctval = 5 237 | self._percentile_discriminator(pct=pctval) 238 | elif method=='kmeans': 239 | self._k_means_discriminator() 240 | 241 | #return self._clusters_to_time_int() 242 | self.tst, self.tend, self.label = self._clusters_to_time_int( 243 | min_int=min_len, 244 | max_int=max_len) 245 | 246 | def _calc_amplitude(self,nwind=4096): 247 | ''' 248 | calculates amplitude for amplitude discriminator 249 | ''' 250 | self.nfr = int(nwind/2) 251 | 252 | ax=[] 253 | for i in range(self.x.shape[1]): 254 | self.at, axi = rmsWind(self.x[:,i],sr=self.sr, 255 | nwind=self.nwind, 256 | nhop = self.nfr) 257 | ax.append(axi) 258 | self.ax = np.array(ax).T 259 | self.dt = self.at[1]-self.at[0] 260 | 261 | def _k_means_discriminator(self, batch_size=45): 262 | from sklearn.cluster import MiniBatchKMeans 263 | from sklearn.metrics.pairwise import pairwise_distances_argmin 264 | 265 | mbk = MiniBatchKMeans(init='k-means++', n_clusters=self.nsources+1, 266 | batch_size=batch_size, 267 | n_init=10, max_no_improvement=10, verbose=0) 268 | #t0 = time.time() 269 | X = np.log10(self.ax) 270 | mbk.fit(X) 271 | cc = np.zeros(mbk.cluster_centers_.shape) 272 | # index of cluster corresponding to silence 273 | idx_silence = np.argmin(np.sum(mbk.cluster_centers_,axis=1)) 274 | cc[0,:] = mbk.cluster_centers_[idx_silence,:] 275 | idx_free = range(cc.shape[0]) 276 | idx_free.remove(idx_silence) 277 | cred = mbk.cluster_centers_-cc[0,:] 278 | # remaining indexes, sort them by channel 279 | used_chan=[] 280 | nchan = cc.shape[1] 281 | last_unmatched=0 282 | while idx_free: 283 | crem = cred[idx_free,:] 284 | r,idx_chan = np.unravel_index(crem.argmax(),crem.shape) 285 | idx_center = idx_free[r] 286 | if idx_chan not in used_chan: 287 | this_center = idx_chan+1 288 | else: 289 | # append to end of list 290 | this_center = cc.shape[0]-last_unmatched-1 291 | sys.stderr.write('Cluster {} not matched to channel\n'.format(idx_center)) 292 | cc[this_center,:]=mbk.cluster_centers_[idx_center,:] 293 | used_chan.append(idx_chan) 294 | idx_free.remove(idx_center) 295 | 296 | cc[1:,:] = np.delete(mbk.cluster_centers_,idx_silence,axis=0) 297 | #cc = mbk.cluster_centers_[idxs,:] 298 | self.clusters = pairwise_distances_argmin(X,cc) 299 | self.centers = cc 300 | 301 | 302 | def _clusters_to_time_int(self, min_int=0.0, max_int=None): 303 | 304 | tfr = self.nfr/float(self.sr) 305 | min_frames = int(np.round(min_int/tfr)) 306 | if max_int: 307 | maxlen = int(max_int/tfr) 308 | else: 309 | maxlen = len(self.ax) 310 | 311 | lastsplit=0 312 | 313 | i=0 314 | 315 | off=True 316 | 317 | nframes = len(self.clusters) 318 | 319 | tst=[] 320 | tend=[] 321 | label=[] 322 | 323 | lastlabel = 0 324 | 325 | while i+min_frames0 and off: 327 | tst.append(self.at[max(0,i-1)]) 328 | label.append(self.clusters[i]) 329 | lastlabel = self.clusters[i] 330 | off=False 331 | i+=1 332 | elif self.clusters[i]<=0 and not off: 333 | if all(self.clusters[i:i+min_frames] <=0 ): 334 | off=True 335 | tend.append(self.at[i]) 336 | i+=min_frames 337 | else: 338 | i+=1 339 | 340 | elif self.clusters[i] != lastlabel and not off: 341 | tend.append(self.at[i-1])#-self.dt/2) 342 | tst.append(self.at[i-1]) 343 | label.append(self.clusters[i]) 344 | lastlabel = self.clusters[i] 345 | off=False 346 | i+=1 347 | 348 | else: 349 | i+=1 350 | if not off: 351 | tend.append(self.at[-1]) 352 | return tst,tend,label 353 | 354 | def to_textgrid(self, filename='mc_segmentation.TextGrid', 355 | tiername='Segmentation'): 356 | 357 | from pympi import TextGrid 358 | tg=TextGrid(xmax=max(self.tend)) 359 | tier=tg.add_tier(tiername) 360 | for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)): 361 | tier.add_interval(ts,te,'{}'.format(lab)) 362 | 363 | tg.to_file(filename) 364 | 365 | def output(self, file_handle): 366 | for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)): 367 | filehandle.write('{},{},{}'.format(ts,te,lab)) 368 | 369 | def recognise(self, mode='sphinx', marg=0.2): 370 | # use the audio file as the audio source 371 | r = srec.Recognizer() 372 | 373 | if mode=='sphinx': 374 | recogniser = r.recognize_sphinx 375 | if mode=='google': 376 | recogniser = r.recognise_google 377 | 378 | for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)): 379 | tstart = st-marg 380 | tend = end+marg 381 | wo=w[int(tstart*fs):int(tend*fs)] 382 | 383 | wavwrite('speech_sample.wav',fs,wo) 384 | 385 | with srec.AudioFile('speech_sample.wav') as source: 386 | audio = r.record(source) # read the entire audio file 387 | 388 | try: 389 | # for testing purposes, we're just using the default API key 390 | # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` 391 | # instead of `r.recognize_google(audio)` 392 | utt = recogniser(audio) 393 | #utt = r.recognize_sphinx(audio) 394 | self.label[ii] = utt 395 | except srec.UnknownValueError: 396 | print("Speech Recognition could not understand audio") 397 | except srec.RequestError as e: 398 | print("Could not request results {}".format(e)) 399 | 400 | def output_results(seg, output_csv='', output_text_grid=''): 401 | if output_text_grid: 402 | seg.to_textgrid(output_text_grid) 403 | 404 | if output_csv: 405 | with open(output_csv,'w') as f: 406 | seg.output(f) 407 | else: 408 | seg.output(sys.stdout) 409 | 410 | 411 | def analyse_rec(sound_files, nsources=1, wind_sec=0.092, min_len=.3, 412 | recognise=None, output_csv='', output_text_grid=''): 413 | # segment recordings 414 | w=[] 415 | for ff in sound_files: 416 | sr,wi=wavread(ff) 417 | w.append(wi.T) 418 | 419 | w=np.vstack(w).T 420 | sys.stderr.write("Read {} files, {} channels, {} samples\n"\ 421 | .format(len(sound_files),w.shape[1],w.shape[0])) 422 | sys.stderr.write("Segmenting audio\n") 423 | if nsources>1: 424 | seg = MultiChannelSegmenter(w,sr=sr,min_len=min_len) 425 | else: 426 | #w=w.squeeze() 427 | if len(w.shape)>1: 428 | w = np.mean(w,axis=1) 429 | seg = SilenceDetector(w.squeeze(), sr=sr, method = 'pct05', 430 | min_len=min_len, wind_sec=wind_sec) 431 | seg.label = [1 for tst in seg.tst] 432 | seg.centers = np.array([[0,0],[1,0]]) 433 | 434 | if recognise: 435 | seg.recognise(mode=recognise) 436 | 437 | 438 | 439 | sys.stderr.write("Found {} chunks\n".format(len(seg.label))) 440 | 441 | output_results(seg, output_csv=output_csv, 442 | output_text_grid=output_text_grid) 443 | 444 | def process_file_list(batch_file, output_csv='', 445 | output_text_grid='', 446 | recognise=None, 447 | wind_sec=0.092, 448 | min_len=.3, 449 | nsources=0): 450 | 451 | import logging 452 | file_seq=[] 453 | 454 | suffix_csv = output_csv 455 | suffix_tg = output_text_grid 456 | out_csv='' 457 | out_tg='' 458 | 459 | if not (suffix_csv or suffix_tg): 460 | suffix_csv = '_segmentation.csv' 461 | 462 | with open(batch_file) as f: 463 | for line in f: 464 | files = [it.strip() for it in line.split(',') if len(it.strip())>0] 465 | 466 | if len(files)>0: 467 | basedir, filename = os.path.split(files[0]) 468 | if suffix_csv: 469 | out_csv,ext = os.path.splitext(files[0]) 470 | out_csv+=suffix_csv 471 | if suffix_tg: 472 | out_tg,ext = os.path.splitext(files[0]) 473 | out_tg+=suffix_tg 474 | try: 475 | analyse_rec(files, output_csv=out_csv, 476 | output_text_grid=out_tg, 477 | nsources=len(files), 478 | recognise=recognise, 479 | wind_sec=wind_sec, 480 | min_len=min_len) 481 | except Exception as e: 482 | message = 'ERROR while processing files:\n' 483 | for f in files: 484 | message+=f 485 | message+='/n' 486 | logging.exception(message) 487 | #~ sys.stderr.write('ERROR while processing files:\n') 488 | #~ for f in files: 489 | #~ sys.stderr.write(f+'\n') 490 | #~ sys.stderr.write(str(e)) 491 | #~ sys.stderr.write('\n') 492 | #~ sys.stderr.write(e.__doc__ ) 493 | #~ sys.stderr.write('\n') 494 | return 0 495 | 496 | 497 | def main(args): 498 | sound_files = args.infiles 499 | for sf in sound_files: 500 | sys.stderr.write(sf+', ') 501 | 502 | sys.stderr.write('\n') 503 | 504 | if args.batch: 505 | process_file_list(args.batch, nsources=args.n_sources, 506 | wind_sec=args.window, 507 | min_len=args.min_silence, 508 | output_csv=args.csv, 509 | output_text_grid=args.textgrid, 510 | recognise=args.recognise) 511 | 512 | else: 513 | if sound_files: 514 | analyse_rec(sound_files, nsources=args.n_sources, 515 | wind_sec=args.window, 516 | min_len=args.min_silence, 517 | output_csv=args.csv, 518 | output_text_grid=args.textgrid, 519 | recognise=args.recognise) 520 | else: 521 | sys.stderr.write('Input files or batch list (-b) are required!\n') 522 | 523 | 524 | return 0 525 | 526 | 527 | if __name__ == '__main__': 528 | import sys 529 | import argparse 530 | # construct the argument parse and parse the arguments 531 | ap = argparse.ArgumentParser() 532 | ap.add_argument("-n", "--min-silence", nargs='?', default = '0.3', type=float, 533 | help = "minimum silence duration in seconds") 534 | ap.add_argument("-w", "--window", nargs='?', default = '0.092', type=float, 535 | help = "window analysis duration in seconds") 536 | ap.add_argument("-b", "--batch", nargs='?', 537 | help = "input file list for batch processing") 538 | ap.add_argument("-r", "--recognise", nargs='?', 539 | help = "use speach recognition on each interval. Select method sphinx or google") 540 | ap.add_argument("-c", "--csv", nargs='?', default = '', 541 | help = "output to csv file name") 542 | ap.add_argument("-t", "--textgrid", nargs='?', default = '', 543 | help = "output to Praat Textgrid file name") 544 | 545 | 546 | 547 | ap.add_argument("-s", "--n-sources", type=float, nargs='?', default = '1', 548 | help = "number of expected sources in the file") 549 | 550 | 551 | ap.add_argument('infiles', nargs='*', help='Input sound files (required if not batch)') 552 | 553 | args = ap.parse_args() 554 | 555 | 556 | 557 | sys.exit(main(args)) 558 | 559 | -------------------------------------------------------------------------------- /pypevoc/speech/__init__.py: -------------------------------------------------------------------------------- 1 | from .SpeechSegmenter import SpeechSegmenter, SyllableSegmenter 2 | from .SpeechChunker import SilenceDetector, MultiChannelSegmenter 3 | from . import SpeechAnalysis as analysis 4 | -------------------------------------------------------------------------------- /pypevoc/speech/glottal.py: -------------------------------------------------------------------------------- 1 | # pypevoc.speech.glottal.py 2 | # 3 | # Part of PyPeVoc python package 4 | # 5 | # Copyright (C) 2018 Andre Almeida 6 | # 7 | # based on covarep's IAIF: 8 | # https://github.com/covarep/covarep/blob/master/glottalsource/iaif.m 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | 24 | import numpy as np 25 | import scipy.signal as sig 26 | import logging 27 | 28 | from .SpeechAnalysis import lpc as lpc_red 29 | 30 | # lpc_red ommits the first coefficient of an all-pole filter 31 | # needed for filtering 32 | def lpc(x, n): 33 | y = lpc_red(x, n) 34 | return np.concatenate([[1.], y]) 35 | 36 | def iaif_ola(x, Fs=1, nwind=None, nhop=None, 37 | tract_order=None, glottal_order=None, 38 | leaky_integration=0.99, wind_func=np.hanning, 39 | n_it=1): 40 | 41 | if nwind is None: 42 | nwind = int(np.round(25/1000*Fs)) 43 | if nhop is None: 44 | nhop = int(nwind/5) 45 | if tract_order is None: 46 | tract_order = 2*int(np.round(Fs/2000))+4 47 | if glottal_order is None: 48 | glottal_order = 2*int(np.round(Fs/4000)) 49 | 50 | wind = wind_func(nwind) 51 | 52 | # output signals 53 | glot = np.zeros(len(x)) 54 | dglot = np.zeros(len(x)) 55 | wins = np.zeros(len(x)) 56 | 57 | # filters, per frame 58 | vt_coef = [] 59 | glot_coef = [] 60 | 61 | ist = 0 62 | 63 | iaif = InverseFilter(Fs=Fs, nwind=nwind, 64 | tract_order=tract_order, 65 | glottal_order=glottal_order, 66 | leaky_integration=leaky_integration) 67 | 68 | while ist < len(x)-nwind: 69 | xw = x[ist:ist+nwind] 70 | g, gd, vt_f, g_f = iaif.apply(xw, n_it=n_it) 71 | 72 | glot[ist:ist+nwind] += g*wind 73 | dglot[ist:ist+nwind] += gd*wind 74 | wins[ist:ist+nwind] += wind 75 | 76 | vt_coef.append(vt_f) 77 | glot_coef.append(g_f) 78 | ist += nhop 79 | 80 | idx = wins>0 81 | glot[idx] /= wins[idx] 82 | dglot[idx] /= wins[idx] 83 | 84 | return glot, dglot, np.array(vt_coef), np.array(glot_coef) 85 | 86 | class PaddedFilter(object): 87 | def __init__(self, input_signal, 88 | n_before=0, n_after=0, 89 | mode='zeros'): 90 | # Padded filter object, applies filters to a signal 91 | # while first padding on left and/or right 92 | self.mode = mode 93 | self.n_before = n_before 94 | self.n_after = n_after 95 | self.input_signal = input_signal 96 | 97 | @property 98 | def input_signal(self): 99 | return self._input_signal 100 | 101 | @input_signal.setter 102 | def input_signal(self, x): 103 | self._input_signal = x 104 | if self.mode == 'ramp': 105 | pad_before = np.linspace(-x[0],x[0], self.n_before) 106 | pad_after = np.linspace(x[-1],-x[-1], self.n_after) 107 | else: 108 | pad_before = np.zeros(self.n_before) 109 | pad_after = np.zeros(self.n_after) 110 | self._padded_input = np.concatenate((pad_before, x, pad_after)) 111 | self._padded_output = self._padded_input 112 | 113 | @property 114 | def output_signal(self): 115 | if self.n_after: 116 | return self._padded_output[self.n_before:-self.n_after] 117 | else: 118 | return self._padded_output[self.n_before:] 119 | 120 | def apply_filter(self, b, a): 121 | self._padded_output = sig.lfilter(b, a, self._padded_input) 122 | return self.output_signal 123 | 124 | def apply_filter_to_last_output(self, b, a): 125 | self._padded_output = sig.lfilter(b, a, self._padded_output) 126 | return self.output_signal 127 | 128 | 129 | def fir_pre_phase(b, x, n_ramp=None): 130 | # applies a FIR filter with a pre-phase ramp 131 | # to reduce ripple 132 | # 133 | # Arguments: 134 | # * b: FIR coefficients 135 | # * x: input signal 136 | # * n_ramp: number of samples in pre-ramp 137 | # (default = len(b)) 138 | signal = np.concat((np.linspace(-x[0],x[0], n_ramp), x)) 139 | y = np.lfilter(b,1,signal) 140 | return y[n_ramp+1:] 141 | 142 | 143 | class InverseFilter(object): 144 | # implements an inverse filter object 145 | # based on Alku's IAIF 146 | # 147 | # P. Alku, "Glottal wave analysis with pitch synchronous iterative 148 | # adaptive inverse filtering", Speech Communication, vol. 11, no. 2-3, 149 | # pp. 109–118, 1992. 150 | def __init__(self, Fs=1, nwind=1024, wind_func=np.hanning, 151 | tract_order=None, glottal_order=None, 152 | leaky_integration=0.99, hpfilt=1): 153 | # Initialise an inverse filter object 154 | # 155 | # Fs: sample rate (default 1) 156 | # nwind: size of inpt chunks 157 | # tract_order: order for Vocal Tract LPC 158 | # (default: Fs/1000 + 4) 159 | # glottal_order: order fot Glottal Source LPC 160 | # (default: Fs/2000) 161 | # leaky_integration: leaky integration coef 162 | # hpfilt: number of high pass filters to apply 163 | 164 | if tract_order is None: 165 | tract_order = 2*int(np.round(Fs/2000)) 166 | 167 | if glottal_order is None: 168 | glottal_order = 2*int(np.round(Fs/4000))+4 169 | 170 | 171 | self.Fs = Fs 172 | self.tract_order = tract_order 173 | self.glottal_order = glottal_order 174 | try: 175 | assert nwind > self.tract_order 176 | except AssertionError: 177 | logging.warning('Frame not analysed') 178 | return 179 | self.nwind = int(nwind) 180 | self.hpfilt = hpfilt 181 | self.leaky_integrator = np.array([1, -leaky_integration]) 182 | #self.pre_filter = tract_order+1 183 | self.wind = wind_func(self.nwind) 184 | n_prel = self.init_preliminary_filter() 185 | n_pad = int(np.round(n_prel/2-1)) 186 | self.n_pad = tract_order+1 187 | # array for "a" coeffs of FIR filters 188 | self.id = np.array([1]) 189 | 190 | def apply(self,x,n_it=1): 191 | # Calculates the source and filter parameters 192 | # (independent of preliminary hp filter) 193 | # - Combined effect of lip radiation and glottal flow 194 | 195 | # create a padded filter object for chained filtering 196 | # hp_filterer = PaddedFilter(n_after=len(self.hpfilt_b), 197 | # input_signal=x, 198 | # mode='zeros') 199 | hp_pad = int(np.round(len(self.hpfilt_b)/2-1)) 200 | 201 | # HP filter to remove low frequency fluctuations 202 | for ii in range(self.hpfilt): 203 | # y = hp_filterer.apply_filter_to_last_output(self.hpfilt_b,self.id) 204 | y = np.concatenate([x, np.zeros(hp_pad)]) 205 | y = sig.lfilter(self.hpfilt_b, self.id, y) 206 | y = y[hp_pad:] 207 | # create a padded filter object for chained filtering 208 | filter_machine = PaddedFilter(n_before=self.n_pad, 209 | input_signal=y, 210 | mode='ramp') 211 | 212 | # first estimate of glottal flow and radiation filters 213 | Hg = lpc(y*self.wind, 1) 214 | y = filter_machine.apply_filter(Hg, self.id) 215 | 216 | # subsequent iterations of glottal and vt estimations 217 | for ii in range(n_it): 218 | Hvt = lpc(y*self.wind, self.tract_order) 219 | g = filter_machine.apply_filter(Hvt, self.id) 220 | g = filter_machine.apply_filter_to_last_output(self.id, 221 | self.leaky_integrator) 222 | 223 | Hg = lpc(g*self.wind, self.glottal_order) 224 | y = filter_machine.apply_filter(Hg, self.id) 225 | y = filter_machine.apply_filter_to_last_output(self.id, 226 | self.leaky_integrator) 227 | 228 | # final estimation of vocal tract and glottal flow 229 | Hvt = lpc(y*self.wind, self.tract_order) 230 | dg = filter_machine.apply_filter(Hvt, self.id) 231 | g = filter_machine.apply_filter_to_last_output(self.id, 232 | self.leaky_integrator) 233 | 234 | return g, dg, Hvt, Hg 235 | 236 | def init_preliminary_filter(self, order=None, freq_stop=40, freq_pass=70): 237 | # calculate filter coefficients for preliminary hp filter 238 | if order is None: 239 | order = int(np.round(300/16000*self.Fs)) 240 | logging.info('Preliminary high-pass filter order set to %d'%order) 241 | self.hpfilt_b = sig.firls(order, 242 | [0, freq_stop, freq_pass, self.Fs/2], 243 | [0, 0, 1, 1], 244 | [1, 1], 245 | fs=self.Fs) 246 | return len(self.hpfilt_b) 247 | 248 | 249 | def lpcc2pole(b, sr=1): 250 | def l2p_1(bb): 251 | rts = np.roots(bb) 252 | rts = rts[rts.imag>=0] 253 | omega_n = np.arctan2(rts.imag,rts.real) 254 | fp = omega_n*sr/2/np.pi 255 | idx = np.argsort(fp) 256 | fp = fp[idx] 257 | bw = -1/2*(sr/2/np.pi) * np.log(np.abs(rts[idx])) 258 | return fp, bw 259 | 260 | if len(b.shape)>1: 261 | poles = np.zeros((b.shape[0],int(b.shape[1]+1))) 262 | bws = np.zeros((b.shape[0],int(b.shape[1]+1))) 263 | for ii in range(b.shape[0]): 264 | p,bw = l2p_1(b[ii,:]) 265 | poles[ii,:len(p)]=(p) 266 | bws[ii,:len(p)]=(bw) 267 | poles=np.array(poles) 268 | bws=np.array(bws) 269 | else: 270 | poles, bws = l2p_1(b) 271 | 272 | return poles, bws 273 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | from setuptools import setup 3 | 4 | import unittest 5 | def my_tests(): 6 | test_loader = unittest.TestLoader() 7 | test_suite = test_loader.discover('tests', pattern='test_*.py') 8 | return test_suite 9 | 10 | setup(name='pypevoc', 11 | version='0.3', 12 | description='Pure python sound analysis tools', 13 | url='http://github.com/goiosunw/pypevoc', 14 | author='Andre Goios', 15 | author_email='a.almeida@unsw.edu.au', 16 | license='GPL v3', 17 | packages=['pypevoc', 'pypevoc.speech'], 18 | test_suite = 'setup.my_tests', 19 | zip_safe=False) 20 | 21 | -------------------------------------------------------------------------------- /tests/test_glottal.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | import pypevoc.speech.glottal as gl 5 | 6 | class testPaddedFilter(unittest.TestCase): 7 | def test_output_same_padding_as_input(self): 8 | filter = np.array([-1,1]) 9 | n_pad_before = 8 10 | n_signal = 16 11 | x = np.zeros(n_signal) 12 | glx = gl.PaddedFilter(x,n_before=n_pad_before) 13 | y = glx.apply_fir(filter) 14 | self.assertEqual(len(x),len(y)) 15 | 16 | def test_output_same_after_padding_as_input(self): 17 | filter = np.array([-1,1]) 18 | n_pad_before = 8 19 | n_pad_after = 8 20 | n_signal = 16 21 | x = np.zeros(n_signal) 22 | glx = gl.PaddedFilter(x,n_before=n_pad_before,n_after=n_pad_after) 23 | y = glx.apply_fir(filter) 24 | self.assertEqual(len(x),len(y)) 25 | 26 | def test_output_same_as_input(self): 27 | filter = np.array([1]) 28 | n_pad_before = 8 29 | n_signal = 16 30 | x = np.zeros(n_signal) 31 | glx = gl.PaddedFilter(x,n_before=n_pad_before) 32 | y = glx.apply_fir(filter) 33 | for xx, yy in zip(x, y): 34 | self.assertEqual(xx, yy) 35 | 36 | def test_private_buffer_1d(self): 37 | glx = gl.PaddedFilter(np.zeros(10),n_before=8) 38 | self.assertEqual(len(glx._padded_input.shape),1) 39 | 40 | def test_private_output_buffer_1d(self): 41 | glx = gl.PaddedFilter(np.zeros(10),n_before=8) 42 | self.assertEqual(len(glx._padded_output.shape),1) 43 | 44 | 45 | if __name__ == '__main__': 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /tests/test_peak_finder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import pypevoc.PeakFinder as pf 4 | 5 | import unittest 6 | 7 | def parabolic_peak(max_pos=1.0, max_val=1.0, n=3, a=-1.): 8 | x = np.arange(n) 9 | b = -max_pos * 2 * a 10 | c = max_val - a * max_pos * (b + max_pos) 11 | return a*x*x + b*x + c 12 | 13 | 14 | class testPeakFinder(unittest.TestCase): 15 | def testFindOnePeak(self): 16 | x = np.linspace(0, 1, 10) 17 | x = np.concatenate((x, np.linspace(.9, 1, 9))) 18 | peaks = pf.PeakFinder(x) 19 | assert(len(peaks.pos) == 1) 20 | self.assertEqual(peaks.pos, 9) 21 | 22 | def test_refine_one_peak_centered(self): 23 | x = parabolic_peak(max_pos=1.0) 24 | peaks = pf.PeakFinder(x) 25 | peaks.refine_all() 26 | assert(len(peaks.pos) == 1) 27 | self.assertEqual(peaks.pos, 1.0) 28 | 29 | def test_refine_one_peak_at_random_pos(self): 30 | mypos = 1.2 31 | x = parabolic_peak(max_pos=mypos, n=4) 32 | peaks = pf.PeakFinder(x) 33 | peaks.refine_all() 34 | self.assertListEqual(peaks.fpos.tolist(), [mypos]) 35 | 36 | def test_refine_one_peak_between_samples(self): 37 | x = parabolic_peak(max_pos=1.5, n=4) 38 | peaks = pf.PeakFinder(x) 39 | peaks.refine_all() 40 | self.assertListEqual(peaks.fpos.tolist(), [1.5]) 41 | 42 | def test_refine_one_peak_almost_between_samples(self): 43 | mypos = 1.499 44 | x = parabolic_peak(max_pos=mypos, n=4) 45 | peaks = pf.PeakFinder(x) 46 | peaks.refine_all() 47 | self.assertEqual(len(peaks.fpos), 1) 48 | self.assertAlmostEqual(peaks.fpos[0], mypos) 49 | 50 | 51 | 52 | def main(): 53 | unittest.main() 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /tests/test_periodicity.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | from pypevoc.Periodicity import period_marks_corr, PeriodTimeSeries 5 | 6 | 7 | def gen_sin(f=440, sr=48000, nsamp=4800): 8 | # nsamp = int(dur*sr) 9 | return np.sin(2.*np.pi*float(f)/sr*np.arange(nsamp)) 10 | 11 | 12 | class testPeriodicity(unittest.TestCase): 13 | def test_single_period_sin_xcorr(self): 14 | f0 = 500. 15 | sr = 48000 16 | nsam = 4800 17 | x = gen_sin(f=f0, sr=sr, nsamp=nsam) 18 | pts = PeriodTimeSeries(x, sr=sr, method='xcorr') 19 | period = pts.per_at_index(nsam/2) 20 | p0 = period.get_preferred_period() 21 | self.assertAlmostEqual(sr/p0, f0, delta=1.0) 22 | 23 | def test_preferred_period_is_scalar(self): 24 | x = gen_sin() 25 | nsam = len(x) 26 | pts = PeriodTimeSeries(x, method='xcorr') 27 | period = pts.per_at_index(nsam/2) 28 | p0 = period.get_preferred_period() 29 | self.assertIsInstance(p0, float) 30 | 31 | 32 | class testPeriodMarks(unittest.TestCase): 33 | def test_period_mark_corr_int_samples_per_period(self): 34 | sr = 1.0 35 | f0 = sr/8 36 | nsam = 1024 37 | x = gen_sin(f=f0, sr=sr, nsamp=nsam) 38 | marks = period_marks_corr(x, sr=sr, tf=[0, nsam], 39 | f=[f0, f0], window_size=256) 40 | period = 1./f0 41 | dmarks = np.diff(marks[1:]) 42 | for dm in dmarks: 43 | self.assertAlmostEqual(dm, period) 44 | 45 | def test_period_mark_corr_frac_samples_per_period(self): 46 | sr = 1.0 47 | f0 = sr/64.3 48 | nsam = 1024 49 | x = gen_sin(f=f0, sr=sr, nsamp=nsam) 50 | marks = period_marks_corr(x, sr=sr, tf=[0, nsam], 51 | f=[f0, f0], window_size=256) 52 | period = 1./f0 53 | dmarks = np.diff(marks[1:]) 54 | for dm in dmarks: 55 | self.assertAlmostEqual(dm, period, places=1) 56 | 57 | 58 | def main(): 59 | unittest.main() 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /tests/test_pypevoc.py: -------------------------------------------------------------------------------- 1 | import pypevoc.PVAnalysis as pv 2 | import numpy as np 3 | 4 | sr = 44100 5 | t = np.arange(sr)/float(sr) 6 | 7 | f = [400.,1200.] 8 | mag = [.1,.05] 9 | fmul=1.0 10 | 11 | minmag = min(mag)*0.001 12 | #minmag=-1 13 | 14 | xx = np.zeros(len(t)) 15 | for ff,mm in zip(f,mag): 16 | xx += mm*np.sin(2.0*np.pi*ff*fmul*t) 17 | 18 | p=pv.PV(xx,sr,nfft=2**10,hop=2**9) 19 | p.run_pv() 20 | ss=p.toSinSum() 21 | 22 | 23 | for ii,part in enumerate(ss.partial): 24 | avmag = np.mean(part.mag) 25 | if avmag > minmag: 26 | print('Partial %d, st=%d, len=%d, f=%f, mag =%f'%(ii,part.start_idx,len(part.f),np.mean(part.f),avmag)) 27 | -------------------------------------------------------------------------------- /tests/test_speech.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | import pypevoc.speech as sp 5 | 6 | def generate_recursive_noise(a = [-1], length=1000): 7 | x = np.random.rand(length) 8 | 9 | #y = np.zeros(len(x)-len(a_req)-1) 10 | y = [0] * len(a) 11 | for xx in x: 12 | yy = xx 13 | for ii, aa in enumerate(a): 14 | yy += -aa*y[-ii-1] 15 | y.append(yy) 16 | 17 | return np.array(y) 18 | 19 | 20 | class test_lpc(unittest.TestCase): 21 | def test_coef_length(self): 22 | a_req = [-.5,.25] 23 | y = generate_recursive_noise(a=a_req) 24 | order = len(a_req) 25 | a_pyp = sp.analysis.lpc(y,order=order) 26 | 27 | self.assertEqual(order, len(a_pyp)) 28 | 29 | 30 | def test_coef_equivalence(self): 31 | a_req = [-.5,.25] 32 | y = generate_recursive_noise(a=a_req) 33 | order = len(a_req) 34 | a_pyp = sp.analysis.lpc(y,order=order) 35 | 36 | for ap, at in zip(a_pyp, a_req): 37 | self.assertAlmostEqual(ap, at, delta=0.0001) 38 | 39 | def compare_to_talkbox(self): 40 | try: 41 | import scikits.talkbox as tbox 42 | except ImportError: 43 | return 44 | 45 | order = 1 46 | y = generate_recursive_noise() 47 | a_pyp = sp.analysis.lpc(y,order=order) 48 | a_tbx = tbox.lpc(y,order=order) 49 | 50 | for ap, at in zip(a_pyp, a_tbx): 51 | self.assertAlmostEqual(ap, at, delta=0.01) 52 | 53 | -------------------------------------------------------------------------------- /tests/vibrato_obj.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | from scipy.interpolate import interp1d 4 | 5 | def Centroid(hamp): 6 | '''Calculate the centroid of a harmonic sequence''' 7 | allamp = 0.0 8 | allfsum = 0.0 9 | for hno0, amp in enumerate(hamp): 10 | hno = hno0+1 11 | allamp += amp 12 | allfsum += hno*amp 13 | 14 | return allfsum/allamp 15 | 16 | def RMSampl(hamp): 17 | '''Calculate the RMS amplitude of a harmonic sequence''' 18 | ampsq = 0.0 19 | for hno0, amp in enumerate(hamp): 20 | hno = hno0+1 21 | ampsq += amp*amp 22 | 23 | return np.sqrt(ampsq) 24 | 25 | def SlopeToHmult(slope, nharm): 26 | '''Calculate a harmonic sequence for a constant dB slope''' 27 | 28 | base = np.exp(slope) 29 | hamp = [] 30 | for i in xrange(nharm): 31 | hn = i 32 | hamp.append(base**(hn-(nharm-1)/2.0)) 33 | #hamp.append(np.sqrt(((hn/float(nharm-1)-.5 )*2.*(-slope)+1.)/nharm)) 34 | ha = np.array(hamp) 35 | return ha /np.sqrt(sum(ha*ha)) 36 | 37 | 38 | class SlopeHarmonicScaler(object): 39 | '''Object for quick calculation of a harmonic for 40 | a desired spectral centroid 41 | * for val=0, centroid is on 1st harmonic 42 | * for val=1, centroid is on last harmonic 43 | * Centroid variation is produced by a change in spectral slope 44 | * Spectrum is a linear slope in dB''' 45 | def __init__(self, nharm=2, npoints=100, slopelim=4): 46 | self.nharm = nharm 47 | 48 | slopes = np.linspace(-slopelim,slopelim,npoints) 49 | cent = np.zeros(len(slopes)+2) 50 | hamps = np.zeros((len(slopes)+2,nharm)) 51 | 52 | 53 | 54 | for (ii,slope) in enumerate(slopes): 55 | hamp = SlopeToHmult(slope,nharm) 56 | cent[ii+1]=(Centroid(hamp)) 57 | hamps[ii+1,...] = hamp 58 | 59 | hamps[0,0]=1. 60 | hamps[-1,-1]=1. 61 | 62 | cent[0] = 1. 63 | cent[-1] = nharm 64 | 65 | self.cent = cent 66 | self.hamp = hamps 67 | 68 | self.generateInterpolators() 69 | 70 | self.vmin = np.min(cent) 71 | self.vmax = np.max(cent) 72 | 73 | def __call__(self, val): 74 | ''' 75 | Return harmonic amplitudes for a given spectral centroid 76 | ''' 77 | hh = [] 78 | cent = val*(self.nharm-1.)+1. 79 | for ii in xrange(self.nharm): 80 | hh.append(self.fharm[ii](cent)) 81 | return np.array(hh) 82 | 83 | def saveNumpy(self,filename): 84 | ''' 85 | Save a table of harmonic amplitudes to file 86 | ''' 87 | np.save(filename, (self.cent,self.hamp)) 88 | 89 | def loadNumpy(self,filename): 90 | ''' 91 | Load a table of harmonic amplitudes from file 92 | ''' 93 | self.cent, self.hamp = np.load(filename) 94 | self.generateInterpolators() 95 | 96 | def generateInterpolators(self): 97 | ''' 98 | Generates the interpolator function from a table 99 | of harmonic amplitudes 100 | ''' 101 | self.fharm=[] 102 | 103 | for ii in xrange(self.nharm): 104 | ff = interp1d(self.cent, self.hamp[...,ii], kind='cubic') 105 | self.fharm.append(ff) 106 | 107 | 108 | def outputJSArray(self, npoints=100, vlims=[0.,1.]): 109 | ''' 110 | Outputs an interpolated array of harmonic amplitudes 111 | for each value of spectral centroid (0-1) 112 | ''' 113 | 114 | sys.stdout.write("scvals = [ \n") 115 | 116 | for ii in xrange(npoints+1): 117 | vrange = max(vlims) - min(vlims) 118 | cent = min(vlims) + ii * vrange / float(npoints) 119 | hamp = self(cent) 120 | sys.stdout.write('[') 121 | for hh in hamp: 122 | sys.stdout.write('%f,'%hh) 123 | 124 | sys.stdout.write('], // %f\n'% cent) 125 | 126 | sys.stdout.write('];\n') 127 | 128 | 129 | class VibratoProfile(object): 130 | '''A vibrato time-profile''' 131 | def __init__(self, t_vals=[0.0,1.0], a_vals=[1.0,1.0], vibfreq=5.0): 132 | self.ti=np.array(t_vals) 133 | self.ai=np.array(a_vals) 134 | self.vibfreq = vibfreq 135 | 136 | # max of vibrato profile is 1 137 | amax = np.max(self.ai) 138 | if amax>0.: 139 | self.ai = self.ai/amax 140 | self.recalc_profile() 141 | 142 | 143 | def recalc_profile(self): 144 | 145 | t_max = max(self.ti) 146 | tout = np.arange(0,t_max,1./self.vibfreq/16.0) 147 | aout = np.interp(tout,self.ti,self.ai) 148 | self.t = tout 149 | 150 | amax = np.max(self.ai) 151 | if amax>0.: 152 | i_st = min(np.argmin(self.ai>0.0),1)-1 153 | t_st = self.ti[i_st] 154 | 155 | self.vibprof = aout*np.sin(2*np.pi*self.vibfreq*(tout-t_st)) 156 | else: 157 | self.vibprof = np.zeros_like(tout) 158 | 159 | 160 | def __call__(self,t): 161 | return np.interp(t,self.t,self.vibprof) 162 | 163 | def getDuration(self): 164 | return max(self.t) 165 | 166 | def setVibratoFreq(self, vibfreq): 167 | self.vibfreq=vibfreq 168 | self.recalc_profile() 169 | 170 | class Vibrato(object): 171 | '''Generate a sound from vibrato profile''' 172 | def __init__(self, harm0=[1.], sr=44100, f0=500., vibfreq=5.0): 173 | self.sr=sr 174 | self.f0=f0 175 | self.h0 = np.array(harm0) 176 | self.nharm = len(harm0) 177 | self.hs = SlopeHarmonicScaler(self.nharm) 178 | self.vibfreq=vibfreq 179 | 180 | self.setProfile() 181 | self.setEnvelope() 182 | 183 | 184 | 185 | def setProfile(self, t_prof=[0.0,1.0], v_prof=[1.0,1.0]): 186 | self.prof = VibratoProfile(t_prof,v_prof,vibfreq=self.vibfreq) 187 | 188 | def setVibratoFreq(self, vibfreq=5.0): 189 | self.prof.setVibFreq(vibfreq) 190 | 191 | def setEnvelope(self,t_att=0.0, t_rel=0.0): 192 | self.t_att=t_att 193 | self.t_rel=t_rel 194 | self.at_sam = int(round(t_att*self.sr)); 195 | self.rel_sam = int(round(t_rel*self.sr)); 196 | 197 | def getFrequencyTime(self,t,mult=1.0): 198 | '''Generates the values of frequency at times t''' 199 | vibsig = self.prof(t) 200 | return self.f0 * (1.0 + mult*vibsig) 201 | 202 | def getAmplitudeTime(self,t,hno=1,mult=1.0): 203 | '''Generates the values of amplitude of harmonic hno at times t''' 204 | vibsig = self.prof(t) 205 | 206 | # amplitude vector 207 | hamp = self.hs(bsig) 208 | aharm = hamp[hno-1]; 209 | a0 = self.h0[hno-1] * (1. + mult * vibsig) 210 | 211 | hsig = a0 * aharm 212 | 213 | return hsig 214 | 215 | def generateProfiles(self,brightness=[0.5,0.5], amplitude=0.0, frequency = 0.0, t=None): 216 | # Build amplitude and frequency profiles 217 | bmin = min(brightness) 218 | bmax = max(brightness) 219 | 220 | if t is None: 221 | t = np.arange(0,self.prof.getDuration(),1/float(self.sr)); 222 | 223 | vibsig = self.prof(t) 224 | bsig = vibsig * (bmax-bmin)/2. + (bmax+bmin)/2. 225 | 226 | hamp = self.hs(bsig) 227 | 228 | ## Build overal envelope 229 | env_a = np.ones_like(vibsig); 230 | 231 | 232 | if self.t_att>0: 233 | at_sam = np.min(np.nonzero(t>self.t_att)) 234 | env_a[0:at_sam] = np.linspace(0,1,at_sam); 235 | if self.t_rel>0: 236 | rel_sam = len(t)-np.min(np.nonzero(t>max(t)-self.t_rel)) 237 | env_a[-rel_sam:] = np.linspace(1,0,rel_sam); 238 | 239 | fh=np.zeros([len(vibsig),self.nharm]) 240 | ah=np.zeros([len(vibsig),self.nharm]) 241 | 242 | for i in range(1,self.nharm+1): 243 | # vector of frequency per sample 244 | fh[:,i-1] = i*self.f0 * (1.0 + frequency*vibsig) 245 | 246 | # amplitude vector 247 | hamp = self.hs(bsig) 248 | aavg = hamp[i-1]; 249 | a0 = self.h0[i-1] * (1. + amplitude * vibsig) 250 | 251 | ah[:,i-1] = a0 * aavg *env_a 252 | 253 | return fh,ah 254 | 255 | def calculateWav(self,brightness=[0.5,0.5], amplitude=0.0, frequency = 0.0): 256 | # Build signal 257 | bmin = min(brightness) 258 | bmax = max(brightness) 259 | 260 | t = np.arange(0,self.prof.getDuration(),1/float(self.sr)); 261 | sig = np.zeros_like(t); 262 | 263 | 264 | vibsig = self.prof(t) 265 | bsig = vibsig * (bmax-bmin)/2. + (bmax+bmin)/2. 266 | 267 | hamp = self.hs(bsig) 268 | for i in range(1,self.nharm+1): 269 | # vector of frequency per sample 270 | fharm = self.getFrequencyTime(t,mult=frequency) 271 | #fharm = i*self.f0 *np.ones_like(vibsig) 272 | # phase vector 273 | fcumsum = np.cumsum(2*np.pi*fharm)/self.sr; 274 | phi = np.concatenate(([0],fcumsum[0:-1])); 275 | 276 | # amplitude vector 277 | # amplitude vector 278 | hamp = self.hs(bsig) 279 | aharm = hamp[i-1]; 280 | a0 = self.h0[i-1] * (1. + amplitude * vibsig) 281 | hsig = a0 * aharm *env_a 282 | 283 | sig = sig+hsig; 284 | 285 | ## Build overal envelope 286 | env_a = np.ones_like(vibsig); 287 | 288 | if self.at_sam>0: 289 | env_a[0:self.at_sam] = np.linspace(0,1,self.at_sam); 290 | if self.rel_sam>0: 291 | env_a[-self.rel_sam:] = np.linspace(1,0,self.rel_sam); 292 | 293 | sig=sig*env_a; 294 | 295 | self.sig = sig 296 | return sig 297 | 298 | def saveWav(self,filename,sampwidth = 2): 299 | import wave 300 | import struct 301 | 302 | wav_file = wave.open(filename, "w") 303 | 304 | nchannels = 2 305 | amp = 2**(8*sampwidth) 306 | 307 | framerate = int(self.sr) 308 | nframes = len(self.sig) 309 | 310 | comptype = "NONE" 311 | compname = "not compressed" 312 | 313 | wav_file.setparams((nchannels, sampwidth, framerate, nframes, 314 | comptype, compname)) 315 | 316 | # numpy convert float to int 317 | xstereo = np.reshape(np.tile(self.sig,[2,1]).T*amp/2,2*len(self.sig)).astype('int16').tostring() 318 | 319 | wav_file.writeframes(xstereo) 320 | 321 | wav_file.close() 322 | 323 | 324 | def SlopeVibratoWAV(filename='out.wav', 325 | slope=0, 326 | nharm = 7, 327 | f0tonic=500.0, 328 | amp=0.1, 329 | hdepth = 6.0, 330 | vib_slope=1.0, 331 | sr=44100): 332 | '''Generate a sequence of similar vibrato notes:fluctuating in amplitude or slope 333 | ''' 334 | #sr=44100 335 | base = np.exp(slope) 336 | 337 | print vib_slope 338 | fact = 20./np.log(10) 339 | if vib_slope>0.0: 340 | hvib = [(float(hn)-(nharm-2.0)/2.0)*hdepth for hn in xrange(nharm-1)] 341 | else: 342 | hvib = [hdepth for hn in xrange(nharm-1)] 343 | print hvib 344 | #hvib = [fact*np.log10((hn-(nharm+1.0)/2.0)*slope) for hn in xrange(nharm-1)] 345 | 346 | hamp = np.array([(1.)**xx/xx**slope for xx in xrange(1,nharm)]) 347 | #hamp = np.concatenate(([0],hamp)) 348 | #hamp = np.zeros(nharm) 349 | #f0tonic = 500. 350 | #amp=0.05 351 | #amp=0.1 352 | #hamp = amp*np.ones(nharm) 353 | #hamp[0]=0.0 354 | 355 | 356 | #hvib = np.zeros(len(hamp)) 357 | # if vib_slope > 0.0: 358 | # for nn in range(nharm-1): 359 | # hvib[nn] = hdepth * (nharm/2. - float(nn)) 360 | # else: 361 | #for nn in range(nharm-1): 362 | # hvib[nn] = hdepth 363 | 364 | 365 | 366 | sig = HarmonicVibrato(ampseq=hamp,hvib=hvib,f0vib=0.00,f0=f0tonic, 367 | vib_prof_t=[0.0,0.3,0.7,1.5,1.6],vib_prof_a=[0.0,0.0,0.5,1.0,0.0],vibfreq=6.0, 368 | a0=amp,sr=sr,t_att=.05) 369 | 370 | write_wav(filename,sig,sr=sr) 371 | #wavwrite(filename,rate=sr,data=np.tile(sig,[1,2])) 372 | 373 | #return sig, sr 374 | #display(Audio(data=sig,rate=sr,autoplay=True)) 375 | 376 | 377 | --------------------------------------------------------------------------------