├── .gitignore
├── .gitmodules
├── README.md
├── conda.recipe
    └── meta.yaml
├── examples
    ├── DemoPyPeVoc.py
    ├── LPC on filtered noise.ipynb
    ├── PVexample.png
    ├── ProtectMarraigeInAmerica.wav
    ├── SoloGuitarArpegi.wav
    ├── SpeechRate.py
    ├── WavResynth.py
    ├── generate_mel_and_mfcc.py
    ├── glottal_flow.py
    ├── hide.wav
    ├── itches.wav
    ├── mfcc_segmenter.py
    ├── pepperCl.wav
    ├── pepperSx.wav
    ├── perlmanVn.wav
    ├── phoneme_descriptor_plot.py
    ├── phoneme_descriptors.py
    ├── phoneme_segmenter.py
    ├── progress_bar.ipynb
    ├── progress_bar.py
    ├── smaller_than_star.wav
    └── smirnoffVn.wav
├── pypevoc
    ├── AMDF.py
    ├── AudioInterface.py
    ├── FFTFilters.py
    ├── Heterodyne.py
    ├── PVAnalysis.py
    ├── PeakFinder.py
    ├── Periodicity.py
    ├── ProgressDisplay.py
    ├── SoundUtils.py
    ├── TransferFunctions.py
    ├── __init__.py
    └── speech
    │   ├── DAP.py
    │   ├── PitchJumps.py
    │   ├── SpeechAnalysis.py
    │   ├── SpeechChunker.py
    │   ├── SpeechSegmenter.py
    │   ├── __init__.py
    │   └── glottal.py
├── setup.py
└── tests
    ├── test_glottal.py
    ├── test_peak_finder.py
    ├── test_periodicity.py
    ├── test_pypevoc.py
    ├── test_speech.py
    └── vibrato_obj.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | .DS_Store
4 | build/
5 | dist/
6 | *.egg-info/
7 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pypevoc/Yin"]
2 | 	path = pypevoc/Yin
3 | 	url = https://github.com/goiosunsw/Yin
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyPeVoc
 2 | 
 3 | *PyPeVoc* is a simple Phase Vocoder library intendend mostly to analysis of sounds. It aims to be flexible and easy to adapt, and is thus entirely written in Python, rathe than using some components in C, which would make it faster but harder to maintain.
 4 | 
 5 | Unlike other Phase vocoders, *PyPeVoc* keepsthe phase information of each sine wave, so that resynthesised waveforms match the original as close as possible. Other Phase Vocoders nly keep magnitude information. This is not a problem for the sound, but it is for analysis. 
 6 | 
 7 | ## Phase Vocoder
 8 | 
 9 | ***Phase Vocoding*** is a technique of decomposing a periodic sound into its ***quasi-sinusoidal*** components. These are sinewaves that have slowly-varying frequency and amplitude. 
10 | 
11 | Sine-wave decomposition allows a wide-range of analysis and resynthesis techniques, exemplified in the Wiki (soon to come) 
12 | 
13 | ## Example situation
14 | 
15 | ### Import required libraries
16 | 
17 | ```python
18 | import numpy as np
19 | from pypevoc import PV
20 | ```
21 | 
22 | ### Create a periodic sound
23 | 
24 | ```python
25 | # Sample rate
26 | sr = 44100
27 | 
28 | # Vibrato frequency
29 | vibfreq = 5.0
30 | 
31 | # Average amplitude of harmonics
32 | hamp0 = 0.1*np.array([1, .5, .3])
33 | 
34 | # Fraction variation of harmonics in vibrato
35 | hvib = 1.0*np.array([.5,0.1,.9])
36 | # relative phase of harmonic variation
37 | hph = np.array([0,np.pi/2,np.pi])
38 | f0 = 500
39 | 
40 | # Depth of frequency vibrato
41 | f0vib = 0.01
42 | 
43 | # signal duration
44 | dur = 1.0
45 | 
46 | # will contain the final sound vector
47 | sig = np.zeros(int(sr*dur)) + 0.01*(np.random.rand(int(sr*dur))-.5)
48 | 
49 | # time vector
50 | t = np.arange(0, dur, 1./sr)
51 | 
52 | # vectors for varying properties of the sound 
53 | hvibsig = np.zeros((int(sr*dur), len(hamp0)))
54 | vibsig = np.sin(2*np.pi*vibfreq*t)
55 | 
56 | f0sig = f0 * (1 + f0vib*vibsig)
57 | 
58 | for n,ha in enumerate(hamp0):
59 |     hno = n+1
60 |     fsig = f0sig*hno
61 |     phsig = np.cumsum(2*np.pi*fsig/sr)
62 |     hvibsig[:,n] = ha * (1 + hvib[n]*np.sin(2*np.pi*vibfreq*t+hph[n]))
63 |     sig += (hvibsig[:,n]) * np.sin(phsig)
64 | 
65 | ```
66 | 
67 | ### Analyse 
68 | 
69 | ``` python
70 | # creates a Phase vocoder object
71 | mypv = PV(sig, sr, nfft=2048, npks=len(hamp0))
72 | 
73 | # run the analysis
74 | mypv.run_pv()
75 | 
76 | # plot the results in a time-frequency graph
77 | mypv.plot_time_freq()
78 | ```
79 | 
80 | ### Result
81 | 
82 | ![ScreenShot](examples/PVexample.png)
83 | 


--------------------------------------------------------------------------------
/conda.recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set version = "0.3.0" %}
 2 | 
 3 | package:
 4 |   name: "pypevoc"
 5 |   version: {{ version }}
 6 | 
 7 | source:
 8 |   path: ../
 9 |   # git-url: https://github.com/goiosunsw/pypevoc.git
10 | 
11 | # build:
12 | #   number: 0
13 | #   script: python setup.py install --single-version-externally-managed --record=record.txt
14 | 
15 | requirements:
16 |   host:
17 |     - numpy
18 |     - pip
19 |     - python
20 |   run:
21 |     - numpy
22 |     - python
23 | 
24 | about:
25 |   home: "https://github.com/goiosunsw/pypevoc.git"
26 |   license: "GNU Lesser General Public v3 or later (LGPLv3+)"
27 |   license_family: "LGPL"
28 |   license_file: ""
29 |   summary: "Pure python phase vocoder"
30 |   doc_url: ""
31 |   dev_url: ""
32 | 
33 | extra:
34 |   recipe-maintainers:
35 |     - goiosunsw
36 | 


--------------------------------------------------------------------------------
/examples/DemoPyPeVoc.py:
--------------------------------------------------------------------------------
 1 | import numpy  as np
 2 | import pylab  as pl
 3 | import pandas as pd
 4 | import sys
 5 | sys.path.append('..')
 6 | import PVAnalysis as pv
 7 | 
 8 | # Sample rate
 9 | sr = 44100
10 | #sr=150000
11 | 
12 | # Vibrato frequency
13 | vibfreq = 5.0
14 | 
15 | # Average amplitude of harmonics
16 | hamp0 = 0.1*np.array([1, .5, .3])
17 | #hamp0 = 0.1*np.array([0.9])
18 | 
19 | # Fraction variation of harmonics in vibrato
20 | hvib = 1.0*np.array([.5,0.1,.9])
21 | #hvib = 1.0*np.array([-.2])
22 | # relative phase of harmonic variation
23 | hph = np.array([0,np.pi/2,np.pi])
24 | # Mean fundamental frequency
25 | f0 = 500
26 | 
27 | # Depth of frequency vibrato
28 | f0vib = 0.01
29 | 
30 | # signal duration
31 | dur = 1.0
32 | 
33 | sig = np.zeros(int(sr*dur)) + 0.00*(np.random.rand(int(sr*dur))-.5)
34 | vibsig = np.zeros(int(sr*dur))
35 | hvibsig = np.zeros((int(sr*dur),len(hamp0)))
36 | t = np.arange(0,dur,1./sr)
37 | 
38 | vibsig = np.sin(2*np.pi*vibfreq*t)
39 | 
40 | f0sig = f0 * (1 + f0vib*vibsig)
41 | 
42 | for n,ha in enumerate(hamp0):
43 |     hno = n+1
44 |     fsig = f0sig*hno
45 |     phsig = np.cumsum(2*np.pi*fsig/sr)
46 |     hvibsig[:,n] = ha * (1+hvib[n]*np.sin(2*np.pi*vibfreq*t+hph[n]))
47 |     sig += (hvibsig[:,n]) * np.sin(phsig)
48 |     
49 | #pl.plot(sig)
50 | pl.figure()
51 | ss=pl.specgram(sig,NFFT=1024/2)
52 | 
53 | mypv=pv.PV(sig,sr,nfft=1024,npks=len(hamp0))
54 | mypv.run_pv()
55 | mypv.plot_time_freq()
56 | 
57 | ss=mypv.toSinSum()
58 | ss.plot_time_freq_mag(minlen=5)
59 | 
60 | w=ss.synth(sr,mypv.hop/1)
61 | 
62 | #pl.hold(True)
63 | pl.plot(sig,label='orig')
64 | pl.hold(True)
65 | pl.plot(w,label='resynth')
66 | pl.legend()
67 | 


--------------------------------------------------------------------------------
/examples/PVexample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/PVexample.png


--------------------------------------------------------------------------------
/examples/ProtectMarraigeInAmerica.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/ProtectMarraigeInAmerica.wav


--------------------------------------------------------------------------------
/examples/SoloGuitarArpegi.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/SoloGuitarArpegi.wav


--------------------------------------------------------------------------------
/examples/SpeechRate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  SpeachRate.py
  5 | #  
  6 | #  estimate the speech rate of a file, 
  7 | #  also generating segmentation textgrids
  8 | #
  9 | #  Copyright 2017 Andre Almeida <goios@goios-UX305UA>
 10 | #  
 11 | #  This program is free software; you can redistribute it and/or modify
 12 | #  it under the terms of the GNU General Public License as published by
 13 | #  the Free Software Foundation; either version 2 of the License, or
 14 | #  (at your option) any later version.
 15 | #  
 16 | #  This program is distributed in the hope that it will be useful,
 17 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 | #  GNU General Public License for more details.
 20 | #  
 21 | #  You should have received a copy of the GNU General Public License
 22 | #  along with this program; if not, write to the Free Software
 23 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 24 | #  MA 02110-1301, USA.
 25 | #  
 26 | #  
 27 | import SpeechSegmenter as ss
 28 | import SpeechChunker as sc
 29 | from scipy.io import wavfile
 30 | import numpy as np
 31 | import sys
 32 | import os
 33 | 
 34 | def segment_recording(sound_files):
 35 |     w=[]
 36 |     for ff in sound_files:
 37 |         sr,wi=wavfile.read(ff)
 38 |         w.append(wi)
 39 |     
 40 |     w=np.vstack(w).T
 41 |     sys.stderr.write("Read {} files, {} channels, {} samples\n"\
 42 |                      .format(len(sound_files),w.shape[1],w.shape[0]))
 43 |     sys.stderr.write("Segmenting audio\n")
 44 |     seg=sc.MultiChannelSegmenter(w,sr=sr)
 45 |     sys.stderr.write("Found {} chunks\n".format(len(seg.label)))
 46 |     return (seg.tst,seg.tend,seg.label)
 47 |     
 48 | def analyse_rec(sound_files, output_dir='.'):
 49 |     # segment recordings
 50 |     w=[]
 51 |     for ff in sound_files:
 52 |         sr,wi=wavfile.read(ff)
 53 |         w.append(wi)
 54 |     
 55 |     w=np.vstack(w).T
 56 |     sys.stderr.write("Read {} files, {} channels, {} samples\n"\
 57 |                      .format(len(sound_files),w.shape[1],w.shape[0]))
 58 |     sys.stderr.write("Segmenting audio\n")
 59 |     if w.shape[1]>1:
 60 |         seg=sc.MultiChannelSegmenter(w,sr=sr,min_len=args.min_silence)
 61 |     else:
 62 |         #w=w.squeeze()
 63 |         seg = sc.SilenceDetector(w.squeeze(), sr=sr, method = 'pct01',
 64 |                                 min_len=args.min_silence)
 65 |         seg.label = [1 for tst in seg.tst]
 66 |         seg.centers = np.array([[0,0],[1,0]])
 67 |                                 
 68 |         
 69 |     seg.to_textgrid(os.path.join(output_dir,"sources.TextGrid"))
 70 |     sys.stderr.write("Found {} chunks\n".format(len(seg.label)))
 71 |     
 72 |     intervals = (seg.tst,seg.tend,seg.label)
 73 |     
 74 |     # segment syllables for each channel
 75 |     for lab in set(seg.label):
 76 |         vi = [(ii[0],ii[1]) for ii in zip(*intervals) if ii[2]==lab]
 77 |         source = int(lab)
 78 |         # find the best channel to segment source
 79 |         chan = np.argmax(seg.centers[lab,:])
 80 |         
 81 |         syl=ss.SyllableSegmenter(w[:,chan],sr=sr,voice_intervals=vi)
 82 |         syl.segment_amplitude_bumps()
 83 |         syl.classify_voicing()
 84 |         syl.to_textgrid(os.path.join(output_dir,'voiced_syllables_{}.TextGrid'.format(lab)))
 85 |         # output spreadsheet
 86 |         df = syl.to_pandas()
 87 |         df.to_excel(os.path.join(output_dir,'syllables_{}.xls'.format(lab)))
 88 | 
 89 | 
 90 | def process_file_list(batch_file):
 91 |     import logging
 92 |     file_seq=[]
 93 |     with open(batch_file) as f:
 94 |         for line in f:
 95 |             files = [it.strip() for it in line.split(',') if len(it.strip())>0]
 96 |             if len(files)>0:
 97 |                 basedir, filename = os.path.split(files[0])
 98 |                 try:
 99 |                     analyse_rec(files, output_dir=basedir)
100 |                 except Exception as e:
101 |                     message = 'ERROR while processing files:\n'
102 |                     for f in files:
103 |                         message+=f
104 |                     message+='/n'
105 |                     logging.exception(message)
106 |                     #~ sys.stderr.write('ERROR while processing files:\n')
107 |                     #~ for f in files:
108 |                         #~ sys.stderr.write(f+'\n')
109 |                     #~ sys.stderr.write(str(e))
110 |                     #~ sys.stderr.write('\n')
111 |                     #~ sys.stderr.write(e.__doc__ )
112 |                     #~ sys.stderr.write('\n')
113 |     return 0
114 |     
115 | def main(args):
116 | 
117 |     sound_files = args.infiles
118 |     print sound_files
119 |     
120 |     if args.batch:
121 |         process_file_list(args.batch)
122 |         
123 |     else:
124 |         if sound_files:  
125 |             analyse_rec(sound_files)
126 |         else:
127 |             sys.stderr.write('Input files or batch list (-b) are required!\n')
128 |         
129 | 
130 |     return 0
131 | 
132 | if __name__ == '__main__':
133 |     import sys
134 |     import argparse
135 |     # construct the argument parse and parse the arguments
136 |     ap = argparse.ArgumentParser()
137 |     ap.add_argument("-o", "--output", nargs='?', default = '',
138 |         help = "output file name")
139 |     ap.add_argument("-n", "--min-silence", nargs='?', default = '0.3', type=float,
140 |         help = "minimum silence duration in seconds")
141 |     ap.add_argument("-b", "--batch", nargs='?', 
142 |         help = "input file list for batch processing")
143 | 
144 |     ap.add_argument("-s", "--start", type=float, nargs='?', default = '0',
145 |         help = "start time")
146 |     ap.add_argument("-e", "--end", type=float, nargs='?', default = '-1',
147 |         help = "end time")
148 | 
149 |     
150 |     ap.add_argument('infiles', nargs='*', help='Input sound files (required if not batch)')
151 |     
152 |     args = ap.parse_args()
153 | 
154 |     
155 | 
156 |     sys.exit(main(args))
157 | 


--------------------------------------------------------------------------------
/examples/WavResynth.py:
--------------------------------------------------------------------------------
 1 | import numpy  as np
 2 | import pylab  as pl
 3 | import pandas as pd
 4 | import sys
 5 | from scipy.io import wavfile as wf
 6 | 
 7 | sys.path.append('..')
 8 | import PVAnalysis as pv
 9 | 
10 | #sr, sig =  wf.read('pepperCl.wav')
11 | sr, sig =  wf.read('pepperSx.wav')
12 | #sr, sig =  wf.read('perlmanVn.wav')
13 | #sr, sig =  wf.read('smirnoffVn.wav')
14 | #sr, sig =  wf.read('ProtectMarraigeInAmerica.wav')
15 | #sr, sig =  wf.read('SoloGuitarArpegi.wav')
16 | 
17 | # scale to floating point (range -1 to 1)
18 | sig = sig/ float(np.iinfo(sig.dtype).max)
19 |     
20 | #pl.plot(sig)
21 | pl.figure()
22 | ss=pl.specgram(sig,NFFT=1024/2)
23 | 
24 | # Build the phase vocoder object
25 | mypv=pv.PV(sig,sr,nfft=1024*4,npks=25*4,hop=256*4)
26 | # Run the PV calculation
27 | mypv.run_pv()
28 | # plot the peaks that were found
29 | mypv.plot_time_freq()
30 | 
31 | # convert to sinusoidal lines
32 | ss=mypv.toSinSum()
33 | 
34 | # resynthesise based on PV analysis
35 | # (reduce hop to slow down, increase to accelerate)
36 | w=ss.synth(sr,mypv.hop/1)
37 | 
38 | # plot original and resynthesis
39 | pl.figure()
40 | pl.plot(sig,label='orig')
41 | pl.hold(True)
42 | pl.plot(w,label='resynth')
43 | pl.legend()
44 | pl.show()
45 | 
46 | fig,ax=pl.subplots(2,1,sharex=True)
47 | ax[0].plot(np.arange(len(sig))/float(sr),sig,label='orig')
48 | ax[0].hold(True)
49 | ax[0].plot(np.arange(len(w))/float(sr),w,label='resynth')
50 | ax[0].legend()
51 | mypv.plot_time_freq(ax=ax[1])
52 | 
53 | 


--------------------------------------------------------------------------------
/examples/generate_mel_and_mfcc.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from glob import glob
 4 | 
 5 | import numpy as np
 6 | from scipy.io import wavfile
 7 | 
 8 | import pypevoc.FFTFilters as ft
 9 | 
10 | 
11 | def read_wav_file(sndfile):
12 |     return wavfile.read(sndfile)
13 | 
14 | def melspec(sr,w,twind=0.025,thop=0.01,mfcc=False):
15 |     mfb = ft.MelFilterBank(sr=sr,twind=twind, thop=thop)
16 |     wp = ft.preemph(w,hpFreq=50,Fs=sr)
17 |     cs,ms,ts = mfb.mfcc_and_mel(wp)
18 |     return cs,np.log(ms),ts
19 | 
20 | 
21 | def parse_args():
22 |     parser = argparse.ArgumentParser()
23 |     
24 |     parser.add_argument('input', help='Input file or dir')
25 |     parser.add_argument('--window_sec', '-w', help='window duration in seconds', default=0.025, type=float)
26 |     parser.add_argument('--hop_sec','-H', help='hop in seconds', default=0.01, type=float)
27 |     return parser.parse_args() 
28 |     
29 | def process_file(sndfile, twind=0.025, thop=0.01, output=None):
30 |     sr,w = read_wav_file(sndfile)
31 |     mc, ms, tm = melspec(sr,w,twind=twind,thop=thop)
32 |     if output is None:
33 |         basename = os.path.splitext(sndfile)[0]
34 |         dname = basename+'_MEL_MFCC.npz'
35 |     else:
36 |         dname = output
37 |     np.savez(dname, mfcc=mc, melspec=ms, t=tm)
38 | 
39 | def process_dir(directory):
40 |     from glob import glob
41 |     filelist = glob(os.path.join(directory,'*.wav'))
42 |     for sndfile in filelist:
43 |         process_file(sndfile)
44 | 
45 | if __name__ == '__main__':
46 |     args = parse_args()
47 |     if os.path.isdir(args.input):
48 |         datadir = args.input
49 |         process_dir(datadir)
50 |     else:
51 |         process_file(args.input, twind=args.window_sec, thop=args.hop_sec)


--------------------------------------------------------------------------------
/examples/glottal_flow.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import matplotlib.pyplot as pl
 4 | from scipy.io import wavfile
 5 | 
 6 | from pypevoc.speech.glottal import iaif_ola, lpcc2pole
 7 | 
 8 | try:
 9 |     filename = sys.argv[1]
10 | except IndexError:
11 |     filename = "hide.wav"
12 | 
13 | sr, w = wavfile.read(filename)
14 | 
15 | g, dg, vt, gf = iaif_ola(w, Fs=sr)
16 | 
17 | t = np.arange(len(w))/sr
18 | 
19 | fig,ax = pl.subplots(2,sharex=True)
20 | ax[0].plot(t,w)
21 | ax[1].plot(t,g)
22 | 
23 | try: 
24 |     import matlab.engine
25 |     import matlab
26 |     eng = matlab.engine.start_matlab()
27 | except ImportError:
28 |     pass
29 | else:
30 |     eng.addpath(eng.genpath('~/Devel/covarep/'))
31 |     try:
32 |         g_m, dg_m, vt_m, gf_m = eng.iaif_ola(matlab.double(w.tolist()),
33 |                                      float(sr),
34 |                                      nargout=4)
35 |     except matlab.engine.MatlabExecutionError:
36 |         pass
37 |     else:
38 |         ax[1].plot(t,np.array(g_m).flatten())
39 |     finally:
40 |         eng.quit()
41 | 
42 | pl.figure()
43 | pl.specgram(w, Fs=sr, NFFT=2**10)
44 | for ii in range(vt.shape[0]):
45 |     t = len(w)/sr*ii/vt.shape[0]
46 |     p,bw = lpcc2pole(vt[ii,:],sr)
47 |     pl.scatter(np.ones(len(bw))*t, p, s=1/np.sqrt(bw), color='k')
48 | 
49 | 
50 | pl.show()
51 | 


--------------------------------------------------------------------------------
/examples/hide.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/hide.wav


--------------------------------------------------------------------------------
/examples/itches.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/itches.wav


--------------------------------------------------------------------------------
/examples/mfcc_segmenter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import numpy as np
  4 | import pypevoc.FFTFilters as ft
  5 | import scipy.signal as sig
  6 | from scipy.io import wavfile
  7 | 
  8 | 
  9 | def read_wav_file(sndfile):
 10 |     return wavfile.read(sndfile)
 11 | 
 12 | 
 13 | def mfcc_change_rate(sr,w, twind=0.025, thop=0.01,
 14 |                      mode='MELSPEC',ncc=12):
 15 |     
 16 |     mfb = ft.MelFilterBank(sr=sr,twind=twind, thop=thop)
 17 |     wp = ft.preemph(w,hpFreq=50,Fs=sr) 
 18 |     if mode == 'MELSPEC':
 19 |         feat,tfeat = mfb.specout(wp)
 20 |         feat = np.log(feat)
 21 |     elif mode == 'MFCC':
 22 |         feat, tfeat = mfb.mfcc(wp)
 23 |         feat = feat[:,1:ncc+1]
 24 |     else:
 25 |         raise NotImplementedError, "{} unknown".format(method)
 26 | 
 27 |     ndiff = int(np.round(max_tchange/thop))
 28 |     dfeat = np.zeros((ndiff,len(tfeat)))
 29 |     for ii in range(1,ndiff):
 30 |         dfeat[ii,ndiff:-ndiff] = np.sum((feat[:-ndiff*2,:]-feat[ndiff*2:,:])**2,axis=1)
 31 |     dfsum = np.sum(dfeat,axis=0)
 32 | 
 33 | 
 34 | def mfcc_segments(sr,w,twind=0.025,thop=0.01,
 35 |                   max_tchange=0.05,percentile_thresh=50,
 36 |                   mode='MELSPEC',
 37 |                   ncc=12):
 38 |     mfb = ft.MelFilterBank(sr=sr,twind=twind, thop=thop)
 39 |     wp = ft.preemph(w,hpFreq=50,Fs=sr) 
 40 |     if mode == 'MELSPEC':
 41 |         feat,tfeat = mfb.specout(wp)
 42 |         feat = np.log(feat)
 43 |     elif mode == 'MFCC':
 44 |         feat, tfeat = mfb.mfcc(wp)
 45 |         feat = feat[:,1:ncc+1]
 46 |     else:
 47 |         raise NotImplementedError, "{} unknown".format(method)
 48 | 
 49 |     ndiff = int(np.round(max_tchange/thop))
 50 |     dfeat = np.zeros((ndiff,len(tfeat)))
 51 |     for ii in range(1,ndiff):
 52 |         dfeat[ii,ndiff:-ndiff] = np.sum((feat[:-ndiff*2,:]-feat[ndiff*2:,:])**2,axis=1)
 53 |     dfsum = np.sum(dfeat,axis=0)
 54 |     dfspks = sig.argrelmax(dfsum)[0]
 55 |     pkthresh = np.percentile(dfsum,percentile_thresh)
 56 |     dfspks = dfspks[dfsum[dfspks] > pkthresh]
 57 |     return tfeat[dfspks], dfsum[dfspks]
 58 | 
 59 | 
 60 | def file_segments(sr,w):
 61 |     times, vals = mfcc_segments(sr,w)
 62 |     dictlist = []
 63 |     for t,v in zip(times,vals):
 64 |         dictlist.append({'start':tst,
 65 |                          'end':t,
 66 |                          'strength':val})    
 67 |     import pandas
 68 |     return pandas.DataFrame(dictlist)
 69 | 
 70 | 
 71 | def process_file(sndfile, mode='MELSPEC'):
 72 |     sr,w = read_wav_file(sndfile)
 73 |     times, values = mfcc_segments(sr,w,mode=mode)
 74 |     for t,v in zip(times, values):
 75 |         print("{:f},{:f}".format(t,v))
 76 | 
 77 | 
 78 | def process_dir(directory):
 79 |     from glob import glob
 80 |     filelist = glob(os.path.join(directory,'*.wav'))
 81 |     for sndfile in filelist:
 82 |         sr,w = read_wav_file(sndfile)
 83 |         ints = file_segments(sr,w)
 84 |         for thisi in ints:
 85 |             ts = thisi['start']
 86 |             te = thisi['end']
 87 |             tph = thisi['phonemes']
 88 |             print('{}, {:7.3f}, Speech START'.format(sndfile,ts))
 89 |             for t in tph:
 90 |                 print('{}, {:7.3f}, New phoneme'.format(sndfile,t+ts))
 91 |             print('{}, {:7.3f}, Speech END'.format(sndfile,te))
 92 |         
 93 | 
 94 | 
 95 | def parse_args():
 96 |     parser = argparse.ArgumentParser()
 97 |     
 98 |     parser.add_argument('input', help='Input file or dir')
 99 |     return parser.parse_args() 
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     args = parse_args()
104 |     if os.path.isdir(args.input):
105 |         datadir = args.input
106 |         process_dir(datadir)
107 |     else:
108 |         process_file(args.input, mode='MFCC')


--------------------------------------------------------------------------------
/examples/pepperCl.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/pepperCl.wav


--------------------------------------------------------------------------------
/examples/pepperSx.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/pepperSx.wav


--------------------------------------------------------------------------------
/examples/perlmanVn.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/perlmanVn.wav


--------------------------------------------------------------------------------
/examples/phoneme_descriptor_plot.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pandas
 3 | import matplotlib.pyplot as plt
 4 | from scipy.io import wavfile
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser()
 8 |     
 9 |     parser.add_argument('soundfile', help='Sound file')
10 |     parser.add_argument('descfile', help='Descriptor csv file')
11 |     return parser.parse_args() 
12 | 
13 | if __name__ == '__main__':
14 |     args = parse_args()
15 |     sr, w = wavfile.read(args.soundfile)
16 |     #df=pandas.read_csv(args.descfile,names=['t_start','t_end','label','f0','RMS','Harm','F1','F2','F3','F4','F5'],
17 |     #                   index_col=False)
18 |     df =pandas.read_csv(args.descfile,index_col=0) 
19 |     
20 |     fig,ax = plt.subplots(3,sharex=True,figsize=(6,8))
21 |     ax[0].specgram(w,Fs=sr,NFFT=1024)
22 |     for ir, row in df.iterrows():
23 |         ts = row['t_start']
24 |         te = row['t_end']
25 |         if row['label'].find('START')>-1:
26 |             color='k'
27 |         else:
28 |             color='r'
29 |         if row['label'].find('END')>-1:
30 |             for axi in ax:
31 |                 axi.axvline(te, color='k',alpha=.5)
32 |         for axi in ax:
33 |             axi.axvline(ts, color=color, alpha=.5)
34 |         
35 |     tm = (df['t_start']+df['t_end'])/2
36 |     ax[0].plot(tm,df['f0'],'o-',color='k')
37 |     ax[0].plot(tm,df['Centroid'],'o-',color='blue')
38 |     for ii in range(1,5):
39 |         ax[0].plot(tm,df['F%d'%ii],'o-',color='r')
40 |     ax[1].semilogy(tm,df['RMS'])
41 |     ax[2].plot(tm,df['Harmonicity'])
42 |     
43 | 
44 |     plt.show()
45 |     


--------------------------------------------------------------------------------
/examples/phoneme_descriptors.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | import traceback
  4 | import pandas
  5 | from pypevoc.speech.SpeechAnalysis import Formants
  6 | from pypevoc.SoundUtils import RMSWind
  7 | from pypevoc.PVAnalysis import PV
  8 | from pypevoc.Heterodyne import HeterodyneHarmonic
  9 | 
 10 | from phoneme_segmenter import *
 11 | 
 12 | 
 13 | formant_window = .05
 14 | moment_window=.01
 15 | 
 16 | def get_formants(w,sr, twind=formant_window, twind_min=0.01):
 17 |     while twind>=twind_min:
 18 |         try:
 19 |             t, f, bw = Formants(w.copy(), sr, tWind=twind)
 20 |             f[0][0]
 21 |         except (ValueError, IndexError):
 22 |             twind/=2
 23 |             f=[[]]
 24 |             continue
 25 |         break
 26 |         
 27 |     fmed = np.nanmedian(f,axis=0)
 28 |     ret = dict()
 29 |     for ii, ff in enumerate(fmed):
 30 |         ret['F{}'.format(ii+1)] = ff
 31 |     return ret
 32 | 
 33 | def get_RMS(w,sr):
 34 |     a,t=RMSWind(w,sr)
 35 |     return {'RMS':np.nanmean(a)}
 36 | 
 37 | def get_f0(w,sr, tfft=0.04,pkthresh=1e-8,npks=50, nfftmin=128):
 38 |     ret = {'RMS':np.nan,'f0':np.nan,'Harmonicity':np.nan}
 39 |     nfft = next_power_2(sr*tfft)
 40 |     nhop = nfft//2
 41 |     
 42 |     while nfft>nfftmin:
 43 |         a,t=RMSWind(w,sr,nwind=nfft,nhop=nhop)
 44 |         if len(a)<1:
 45 |             nfft=nfft//2
 46 |             nhop=nfft//2
 47 |         else:
 48 |             break
 49 |     ret['RMS'] = np.nanmean(a) 
 50 |     
 51 |     try:
 52 |         pv = PV(w,sr,nfft=nfft,pkthresh=pkthresh,
 53 |             npks=npks,progress=False)
 54 |         pv.run_pv()
 55 |         f0 = pv.calc_f0(thr=0.01)
 56 |         ret['f0']=np.nanmean(f0)
 57 |     except Exception:
 58 |         sys.stderr.write('Error calculating f0\n')
 59 |         return ret
 60 | 
 61 |     try:
 62 |         hh=HeterodyneHarmonic(w,sr,f=np.nanmean(f0),nwind=nfft,nhop=nhop)
 63 |     except Exception:
 64 |         sys.stderr.write('Error in first pass of Heterodyne\n')
 65 |         return ret
 66 |     try:
 67 |         f0,tf0=hh.calc_adjusted_freq(hh.f0)
 68 |         hh=HeterodyneHarmonic(w,sr,tf=tf0,f=f0,nharm=20,nwind=nfft,nhop=nhop)
 69 |     except Exception:
 70 |         sys.stderr.write('Error in second pass of Heterodyne\n')
 71 | 
 72 |     hpct = np.sqrt(np.sum(np.abs(hh.camp)**2,axis=1))/a
 73 |     
 74 |     ret['Harmonicity'] = np.nanmedian(hpct)
 75 |     return ret
 76 | 
 77 | def get_spectral_moments(w,sr,tfft=moment_window):
 78 |     nfft = next_power_2(sr*tfft)
 79 |     wo = w.copy()
 80 |     wo[:-1] -= wo[1:]
 81 |     
 82 |     fsg, tsg, sg = sig.spectrogram(wo, fs=sr, nfft=nfft)
 83 |     avs = np.mean(sg,axis=1)
 84 |     cent = np.sum(avs*fsg)/np.sum(avs)
 85 |     var = np.sum(avs*(fsg-cent)**2/np.sum(avs))
 86 |     return {'Centroid': cent,
 87 |             'Stdev': np.sqrt(var)}
 88 |     
 89 | 
 90 | def describe_phoneme(w,sr):
 91 |     desc = {}
 92 |     try:
 93 |         desc.update(get_f0(w,sr))
 94 |     except Exception:
 95 |         traceback.print_exc()
 96 |     #desc.update(get_RMS(w,sr))
 97 |     try:
 98 |         desc.update(get_spectral_moments(w,sr))
 99 |     except Exception:
100 |         traceback.print_exc()
101 |     
102 |     try:
103 |         desc.update(get_formants(w,sr))
104 |     except Exception:
105 |         traceback.print_exc()
106 |     return desc
107 |     
108 | 
109 | def parse_args():
110 |     parser = argparse.ArgumentParser()
111 |     
112 |     parser.add_argument('input', help='Input file or dir')
113 |     return parser.parse_args() 
114 | 
115 | def output_interval(w,sr,ts,te,label):
116 |     try:
117 |         desc = describe_phoneme(w,sr)
118 |     except Exception:
119 |         traceback.print_exc()
120 |         desc = {}
121 |     dstr = '{:.3f}, {:.3f}, {}'.format(ts,te,label)
122 |     for k,v in desc.items():
123 |         dstr+=',{}'.format(v)
124 |     print(dstr)
125 | 
126 | def dict_interval(w,sr,ts,te,label):
127 |     try:
128 |         desc = describe_phoneme(w,sr)
129 |     except Exception:
130 |         traceback.print_exc()
131 |         desc = {}
132 |     return desc
133 | 
134 | def file_df(sndfile):
135 |     sr,w = read_wav_file(sndfile)
136 |     ints = file_segments(sr,w)
137 |     tps = 0.
138 |     tpe=0.
139 |     alld = []
140 |     for thisi in ints:
141 |         ts = thisi['start']
142 |         te = thisi['end']
143 |         tph = thisi['phonemes']
144 |         label = 'SILENCE'
145 |         tps = tpe
146 |         tpe = ts
147 |         ww = w[int(sr*tps):int(sr*tpe)]
148 |         alld.append(dict_interval(ww,sr,tps,tpe,label))
149 |         alld[-1].update({'t_start':tps,
150 |                             't_end':tpe,
151 |                             'label':label})
152 |         
153 |         tps = ts
154 |         tpe = tps
155 |         label = 'Utteration START'
156 |         for tper in tph:
157 |             tpe = tper+ts
158 |             ww = w[int(sr*tps):int(sr*tpe)]
159 |             alld.append(dict_interval(ww,sr,tps,tpe,label))
160 |             alld[-1].update({'t_start':tps,
161 |                              't_end':tpe,
162 |                              'label':label})
163 |             tps=tpe
164 |             label = 'phoneme'
165 |         tps = tpe
166 |         tpe = te
167 |         label = 'Utteration END'
168 |         ww = w[int(sr*tps):int(sr*tpe)]
169 |         alld.append(dict_interval(ww,sr,tps,tpe,label))
170 |         alld[-1].update({'t_start':tps,
171 |                             't_end':tpe,
172 |                             'label':label})
173 |     df = pandas.DataFrame(alld)
174 |     return df
175 |     
176 | def process_file(sndfile):
177 |     df = file_df(sndfile)
178 |     df.to_csv(sys.stdout)
179 | 
180 | def process_dir(directory):
181 |     from glob import glob
182 |     filelist = glob(os.path.join(directory,'*.wav'))
183 |     for sndfile in filelist:
184 |         df = file_df(sndfile)
185 |         basepath, ext = os.path.splitext(sndfile)
186 |         df.to_csv(basepath+'.csv')
187 |  
188 | if __name__ == '__main__':
189 |     args = parse_args()
190 |     if os.path.isdir(args.input):
191 |         datadir = args.input
192 |         process_dir(datadir)
193 |     else:
194 |         process_file(args.input)


--------------------------------------------------------------------------------
/examples/phoneme_segmenter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from glob import glob
  3 | import argparse
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | from scipy.io import wavfile
  7 | import scipy.signal as sig
  8 | 
  9 | from pypevoc.speech.SpeechChunker import SilenceDetector
 10 | from pypevoc.speech.SpeechSegmenter import SpeechSegmenter
 11 | 
 12 | bands = [200.,300.,500.,800.,1200.]
 13 | segmenter_detect_thresh=.5
 14 | chunker_fmin = 100
 15 | chunker_fmax = 1000
 16 | chunker_wind_sec=.2
 17 | chunker_method = 'pct10'
 18 | 
 19 | def estimate_noise_background_spcetrum(w, sr, tst, tend, nfft=1024):
 20 |     sg, fsg, tsg = sig.specgram(w, fs=sr, NFFT=nfft)
 21 |     for ts,te in zip(sd.tend[:-1],sd.tst[1:]):
 22 |         isil = (tsg>=ts)&(tsg<=te)
 23 |         silence_sg_chunks.append(sg[:,isil])
 24 | 
 25 |     silence_sg = np.hstack(silence_sg_chunks)
 26 | 
 27 |     return fsg, np.median(silence_sg,axis=1)
 28 | 
 29 | def read_wav_file(sndfile):
 30 |     return wavfile.read(sndfile)
 31 | 
 32 | def segment_wav(w, sr, fmin=100, fmax=1000, wind_sec=.2,method='pct10'):
 33 |     sd = SilenceDetector(w,sr=sr,fmin=fmin,fmax=fmax,wind_sec=wind_sec,method=method)
 34 |     return sd.tst, sd.tend
 35 |     
 36 | def next_power_2(x):
 37 |     return int(2**np.ceil(np.log2(x)))
 38 |     
 39 | def phoneme_segment_wav(w,sr, bands=[200.,300.,500.,800.,1200.],
 40 |                         detect_thresh=.5,twind=0.04):
 41 |     
 42 |     nrough = next_power_2(sr*twind)
 43 |     ss = SpeechSegmenter(sr=sr, bands=bands,
 44 |                          detect_thresh=detect_thresh,
 45 |                          rough_window=nrough)
 46 |     ss.set_signal(w,sr=sr)
 47 |     tph = ss.process(w)
 48 |     tph = ss.refine_all_all_bands()
 49 |     return tph
 50 | 
 51 | def file_segments(sr,w):
 52 |     tst, tend = segment_wav(w,sr,fmin=chunker_fmin,fmax=chunker_fmax,
 53 |                             wind_sec=chunker_wind_sec,method=chunker_method)
 54 |     ints = []
 55 |     for ts, te in zip(tst,tend):
 56 |         ww = w[int(ts*sr):int(te*sr)]
 57 |         tph = phoneme_segment_wav(ww, sr, bands=bands, 
 58 |                                   detect_thresh=segmenter_detect_thresh)
 59 |         ints.append({'start':ts,
 60 |                      'end':te,
 61 |                      'phonemes':tph})
 62 |     return ints
 63 | 
 64 | def process_file(sndfile):
 65 |     sr,w = read_wav_file(sndfile)
 66 |     ints = file_segments(sr,w)
 67 |     for thisi in ints:
 68 |         ts = thisi['start']
 69 |         te = thisi['end']
 70 |         tph = thisi['phonemes']
 71 |         print('{:7.3f}, Speech START'.format(ts))
 72 |         for t in tph:
 73 |             print('{:7.3f}, New phoneme'.format(t+ts))
 74 |         print('{:7.3f}, Speech END'.format(te))
 75 | 
 76 | def process_dir(directory):
 77 |     from glob import glob
 78 |     filelist = glob(os.path.join(directory,'*.wav'))
 79 |     for sndfile in filelist:
 80 |         sr,w = read_wav_file(sndfile)
 81 |         ints = file_segments(sr,w)
 82 |         for thisi in ints:
 83 |             ts = thisi['start']
 84 |             te = thisi['end']
 85 |             tph = thisi['phonemes']
 86 |             print('{}, {:7.3f}, Speech START'.format(sndfile,ts))
 87 |             for t in tph:
 88 |                 print('{}, {:7.3f}, New phoneme'.format(sndfile,t+ts))
 89 |             print('{}, {:7.3f}, Speech END'.format(sndfile,te))
 90 |         
 91 | 
 92 | 
 93 | def parse_args():
 94 |     parser = argparse.ArgumentParser()
 95 |     
 96 |     parser.add_argument('input', help='Input file or dir')
 97 |     return parser.parse_args() 
 98 |     
 99 | 
100 | if __name__ == '__main__':
101 |     args = parse_args()
102 |     if os.path.isdir(args.input):
103 |         datadir = args.input
104 |         process_dir(datadir)
105 |     else:
106 |         process_file(args.input)
107 | 


--------------------------------------------------------------------------------
/examples/progress_bar.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from pypevoc.ProgressDisplay import Progress, in_ipynb\n",
 10 |     "from time import sleep"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 4,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/plain": [
 21 |        "True"
 22 |       ]
 23 |      },
 24 |      "execution_count": 4,
 25 |      "metadata": {},
 26 |      "output_type": "execute_result"
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "in_ipynb()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 8,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "application/vnd.jupyter.widget-view+json": {
 41 |        "model_id": "48a6ae0e06a34643ad36169e03aa9319",
 42 |        "version_major": 2,
 43 |        "version_minor": 0
 44 |       },
 45 |       "text/plain": [
 46 |        "VBox(children=(HTML(value=''), IntProgress(value=1, bar_style='info')))"
 47 |       ]
 48 |      },
 49 |      "metadata": {},
 50 |      "output_type": "display_data"
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "n = 1000\n",
 55 |     "pd = Progress(n)\n",
 56 |     "for ii in range(n):\n",
 57 |     "    pd.update(ii)\n",
 58 |     "    sleep(0.002)\n",
 59 |     "    \n",
 60 |     "pd.finish()"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 9,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "In IPYNB\n"
 73 |      ]
 74 |     },
 75 |     {
 76 |      "data": {
 77 |       "application/vnd.jupyter.widget-view+json": {
 78 |        "model_id": "c9491944de314ba38a6f99456609b63d",
 79 |        "version_major": 2,
 80 |        "version_minor": 0
 81 |       },
 82 |       "text/plain": [
 83 |        "VBox(children=(HTML(value=''), IntProgress(value=1, bar_style='info')))"
 84 |       ]
 85 |      },
 86 |      "metadata": {},
 87 |      "output_type": "display_data"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "%run progress_bar.py"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": []
100 |   }
101 |  ],
102 |  "metadata": {
103 |   "kernelspec": {
104 |    "display_name": "unsw",
105 |    "language": "python",
106 |    "name": "unsw"
107 |   },
108 |   "language_info": {
109 |    "codemirror_mode": {
110 |     "name": "ipython",
111 |     "version": 3
112 |    },
113 |    "file_extension": ".py",
114 |    "mimetype": "text/x-python",
115 |    "name": "python",
116 |    "nbconvert_exporter": "python",
117 |    "pygments_lexer": "ipython3",
118 |    "version": "3.7.3"
119 |   }
120 |  },
121 |  "nbformat": 4,
122 |  "nbformat_minor": 2
123 | }
124 | 


--------------------------------------------------------------------------------
/examples/progress_bar.py:
--------------------------------------------------------------------------------
 1 | from pypevoc.ProgressDisplay import Progress, in_ipynb
 2 | from time import sleep
 3 | 
 4 | if in_ipynb():
 5 |     print('In IPYNB')
 6 | else:
 7 |     print('In console')
 8 | 
 9 | n = 1000
10 | 
11 | pd = Progress(n)
12 | 
13 | for ii in range(n):
14 |     pd.update(ii)
15 |     sleep(.002)
16 | 
17 | pd.finish()
18 | 
19 | 


--------------------------------------------------------------------------------
/examples/smaller_than_star.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/smaller_than_star.wav


--------------------------------------------------------------------------------
/examples/smirnoffVn.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/goiosunsw/PyPeVoc/d1f118d4c17f2c5a44a865c84275d17922587f53/examples/smirnoffVn.wav


--------------------------------------------------------------------------------
/pypevoc/AMDF.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  AMDF.py
  5 | #  
  6 | #  Utilities based on the Average Maen Difference Function
  7 | #  * Fundamental frequency estimator
  8 | #  * Tonal character
  9 | #
 10 | #
 11 | #  Copyright 2014 Andre Almeida <goios@AndreUbuntu>
 12 | #  
 13 | #  This program is free software; you can redistribute it and/or modify
 14 | #  it under the terms of the GNU General Public License as published by
 15 | #  the Free Software Foundation; either version 2 of the License, or
 16 | #  (at your option) any later version.
 17 | #  
 18 | #  This program is distributed in the hope that it will be useful,
 19 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 21 | #  GNU General Public License for more details.
 22 | #  
 23 | #  You should have received a copy of the GNU General Public License
 24 | #  along with this program; if not, write to the Free Software
 25 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 26 | #  MA 02110-1301, USA.
 27 | #  
 28 | #  
 29 | 
 30 | import sys
 31 | import numpy as np
 32 | import PeakFinder as pf
 33 | import pylab as pl
 34 | from matplotlib.colors import hsv_to_rgb
 35 | 
 36 | 
 37 | def amdf(x, mindelay = 0, maxdelay = None):
 38 |     nx = len(x)
 39 |     if maxdelay is None:
 40 |         maxdelay = nx
 41 |         
 42 |     y = np.zeros(nx)
 43 |     for i in range(mindelay,maxdelay):
 44 |         n = nx - i
 45 |         y[i] = (np.abs(x[0:nx-i]-x[i:])).sum()/n
 46 |     
 47 |     return y
 48 | 
 49 | # I will try to update this object so that data required for the initialisation of every instance stays in the caller. Thee caller passes itself as argument to the callee
 50 | 
 51 | class Periodicity(object):
 52 |     """Single period object, including multiple periodicity candidates
 53 |     """
 54 |     def __init__(self, xw, sr=1, ncand = 8, candthresh = .8, vthresh = .2, mindelay=0, maxdelay=None, method='xcorr'):
 55 |         """Calculate the periodicity estimation for a window of a time signal
 56 |     
 57 |         Arguments: 
 58 |         x:          signal
 59 |         sr:         sample rate
 60 |         candthresh: ratio to lowest minima to keep as peak
 61 |         vthresh:    voicing threshold
 62 |         mindelay:   minimum value of period
 63 |         maxdelay:   maximum value of period
 64 |         ncand:      maximum number of period candidates
 65 |         method:     type of correlation correlation / matching to use
 66 |                     'xcorr' - correlation
 67 |                     'amdf'  - average mean difference function
 68 |                     'zc'    - zero crossing
 69 |         """
 70 | 
 71 |         nwind = len(xw)
 72 |         self.sr = sr
 73 |         
 74 |         self.mindelay = mindelay
 75 |         if maxdelay is None:
 76 |             self.maxdelay = round(nwind/2)
 77 |         else:
 78 |             self.maxdelay = maxdelay
 79 |             
 80 |         self.method = method
 81 |         self.threshold = candthresh
 82 |         self.vthresh = vthresh
 83 |         self.ncand = ncand
 84 |         
 85 |         self.cand_period = np.array([])
 86 |         self.cand_strength = np.array([])
 87 |         
 88 |         self._calc(xw)
 89 |         
 90 |         
 91 |     def _calc(self, xw):
 92 |         """Calculate the periodicity candidates
 93 |     
 94 |         Arguments: 
 95 |         xw: the windowed portion of time signal where periodicity is to be estimated
 96 |         """
 97 |         
 98 |         nwind = len(xw)
 99 |         
100 |         # unvoiced
101 |         pkpos = np.array([])
102 |         pkstr = np.array([])
103 | 
104 |         
105 |         try:
106 |             if self.method is 'amdf':
107 |                 xc = amdf(xw)
108 |             
109 |                 maxxc = max(xc)
110 |                 
111 |                 xcpos = (maxxc-xc[self.mindelay:self.maxdelay]) / maxxc
112 |                 xcth  = self.threshold
113 |                 
114 |             elif self.method is 'xcorr':
115 |                 
116 |                 xc = np.correlate(xw,xw,"full") / self.wind
117 |                 xcred = xc[nwind-1+self.mindelay:nwind-1+self.maxdelay]
118 |                 xcpos = xcred/max(xc)
119 |                 xcth = self.threshold
120 |                 
121 |                 #print "In xcorr. max %f, thr %f"%(max(xcpos),xcth)
122 |                 
123 |             if max(xcpos) > self.vthresh:
124 |                 # this is equivlent to finding minima below the absolute minimum * threshold
125 |                 peaks = pf.PeakFinder(xcpos, minval = xcth, npeaks = self.ncand)
126 |                 
127 |                 
128 |                 peaks.refine_all()
129 |                 #peaks.plot()
130 |                 
131 |                 pkpos = peaks.get_pos() + self.mindelay
132 |                 pkstr = peaks.get_val()
133 |                 
134 |                 #keep = pkpos<self.maxdelay
135 |                 #pkpos = pkpos[keep]
136 |                 #pkstr = pkstr[keep]
137 | 
138 |             
139 |         except IndexError:
140 |             pass
141 |             
142 |         self.cand_period = pkpos
143 |         self.cand_strength = pkstr
144 |         
145 |     def set_time_properties(self, index):
146 |         """Set the sample and time value of this periodicity estimation
147 |     
148 |         Arguments: 
149 |         index: sample index
150 |         """
151 | 
152 |         self.index = index
153 |         self.time = float(index)/self.sr
154 |         
155 |     def sort_strength(self):
156 |         """Sort candidates by periodicity strength
157 |     
158 |         Arguments: (None)
159 |         """
160 | 
161 |         idx = np.argsort(self.cand_strength)[::-1]
162 |         self.cand_period = self.cand_period[idx]
163 |         self.cand_strength = self.cand_strength[idx]
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | class PeriodTimeSeries(object):
172 |     def __init__(self, x, sr=1, window=None, hop=None, threshold = .8, vthresh = .2, mindelay=0, maxdelay=None, ncand = 8, method = 'xcorr'):
173 |         """Calculate the average mean difference of x around index
174 |     
175 |         Arguments: 
176 |         x:         signal
177 |         sr:        sample rate
178 |         window:    window around index used for difference calculations
179 |         threshold: ratio to lowest minima to keep as peak
180 |         vthresh:   voicing threshold
181 |         mindelay:  minimum value of period
182 |         maxdelay:  maximum value of period
183 |         ncand:     maximum number of period candidates
184 |         method:    type of correlation correlation / matching to use
185 |                    'xcorr' - correlation
186 |                    'amdf'  - average mean difference function
187 |                    'zc'    - zero crossing
188 |         """
189 |         
190 |         self.method = method
191 |         self.x = x.astype(float)
192 |         self.sr = sr
193 |         
194 |         self.nx = len(x)
195 |     
196 |         if window is None:
197 |             if maxdelay is None:
198 |                 window = self.nx
199 |             else:
200 |                 window = 2*self.maxedlay
201 |                 
202 |         if not np.iterable(window):
203 |             window = np.ones(window)
204 |         
205 |         self.wind = window
206 |         self.wnorm = self._calc_window_norm()
207 |     
208 |         self.nwind = len(window)
209 |         self.windad = amdf(window)
210 |         
211 |         if hop is None:
212 |             hop = round(self.nwind/2)
213 |         
214 |         self.hop = hop
215 |         
216 |         self.mindelay = mindelay
217 |         if maxdelay is None:
218 |             self.maxdelay = round(self.nwind/2)
219 |             
220 |         self.method = method
221 |         self.threshold = threshold
222 |         self.vthresh = vthresh
223 |         self.ncand = ncand
224 |         
225 |         # data storage
226 |         self.periods = []
227 |         
228 |     def _calc_window_norm(self):
229 |         """Calculate the normalisation function for window
230 |     
231 |         Arguments: (None)
232 |         """
233 |         
234 |         if self.method is 'xcorr':
235 |             w = self.wind
236 |             self.wnorm = np.correlate(w,w,"full")
237 |         else:
238 |             self.wnorm = 1.   
239 | 
240 | 
241 |     def per_at_index(self,index):
242 |         """Calculate the average mean difference of x around index
243 |     
244 |         Arguments: 
245 |     
246 |         index:  index of x for current amdf
247 |         threshold: ratio to lowest minima to keep as peak
248 |         """
249 |  
250 |         
251 |         nwleft = np.floor(self.nwind/2)
252 |         nwright = self.nwind - nwleft
253 |         ist = index - nwleft
254 |         iend = index + nwright
255 |         
256 |         xs = self.x[ist:iend]
257 |         xw = xs * self.wind
258 |         
259 |         pp = Periodicity(xw, sr=self.sr, candthresh = self.threshold, vthresh = self.vthresh, mindelay = self.mindelay, maxdelay = self.maxdelay, ncand=self.ncand, method=self.method)
260 |         pp.set_time_properties(index)
261 |         pp.sort_strength()
262 | 
263 |         self.periods.append(pp)
264 |             
265 |             
266 |     def calc(self, hop=None, threshold=None):
267 |         """Estimate local periodicity in the full time series
268 |     
269 |         Arguments: 
270 |     
271 |         hop:       samples bewteen estimations
272 |         threshold: peak threshold for maintaining or rejecting 
273 |                    candidates
274 |         """
275 | 
276 |         
277 |         if hop is None:
278 |             hop=self.hop
279 |         
280 |         if threshold is not None:
281 |             oldthresh = self.threshold
282 |             self.threshold = threshold
283 |         
284 |         idxmax = self.nx-self.nwind
285 |         idxvec = np.arange(self.nwind,idxmax,hop)
286 |         
287 |         sys.stdout.write("Calculating local periodicity... "  ) 
288 |         
289 |         for idx in idxvec:
290 |             self.per_at_index(idx)
291 |             sys.stdout.write("\b\b\b\b%3d%%" % (idx*100/idxmax) )
292 |             sys.stdout.flush()
293 | 
294 |         sys.stdout.write("\ndone\n"  ) 
295 |         
296 |         if threshold is not None:
297 |             self.threshold = oldthresh
298 |             
299 |     def plot_candidates(self):
300 |         """Plot a representation of candidate periodicity
301 |         
302 |         Size gives the periodicity strength, color the order of preference
303 |         """
304 |         
305 |         hues = np.arange(self.ncand)/float(self.ncand)
306 |         hsv = np.swapaxes(np.atleast_3d([[hues,np.ones(len(hues)),np.ones(len(hues))]]),1,2)
307 |         cols = hsv_to_rgb(hsv).squeeze()
308 |         
309 |         for per in self.periods:
310 |             nc = len(per.cand_period)
311 |             
312 |             pl.scatter(per.time*np.ones(nc),per.cand_period,s=per.cand_strength*100,c=cols[0:nc],alpha=.5)
313 |         
314 |         
315 | 


--------------------------------------------------------------------------------
/pypevoc/AudioInterface.py:
--------------------------------------------------------------------------------
  1 | from numpy import array, pad, array, ndarray
  2 | import wave
  3 | import struct
  4 | 
  5 | def everyOther (v, offset=0):
  6 |     return [v[i] for i in range(offset, len(v), 2)]
  7 | 
  8 | def wavInfo(fname):
  9 |     wav = wave.open (fname, "r")
 10 |     params = wav.getparams ()
 11 |     wav.close()
 12 |     return params
 13 |     # (nchannels, sampwidth, framerate, nframes, comptype, compname)
 14 | 
 15 | def wavLoad (fname, startTime=0.0, endTime=None):
 16 |     wav = wave.open (fname, "r")
 17 |     (nchannels, sampwidth, framerate, nframes, comptype, compname) = wav.getparams ()
 18 |     if startTime > 0.0:
 19 |         wav.setpos(int(startTime*float(framerate*nchannels))/nchannels)
 20 |     
 21 |     if endTime:
 22 |         nrdframes = int((endTime-startTime)*float(framerate*nchannels))/nchannels
 23 |     else:
 24 |         nrdframes = nframes-wav.tell()
 25 |         
 26 |     frames = wav.readframes (nrdframes * nchannels)
 27 |     out = struct.unpack_from ("%dh" % nrdframes * nchannels, frames)
 28 |     
 29 |     # Convert 2 channles to numpy arrays
 30 |     if nchannels == 2:
 31 |        left = array (list (everyOther (out, 0)))
 32 |        right = array (list  (everyOther (out, 1)))
 33 |        return framerate, array(left,right)
 34 |     else:
 35 |        left = array (out)
 36 |        #right = left
 37 |        return framerate, left
 38 | 
 39 | def wavCopy (infile, outfile, startTime=0.0, endTime=None):
 40 |     inwav = wave.open (infile, "r")
 41 |     outwav = wave.open (outfile, "w")
 42 |     (nchannels, sampwidth, framerate, 
 43 |         nframes, comptype, compname) = inwav.getparams ()
 44 | 
 45 |     if startTime > 0.0:
 46 |         inwav.setpos(int(startTime*float(framerate*nchannels))/nchannels)
 47 |     
 48 |     if endTime:
 49 |         nrdframes = int((endTime-startTime)*float(framerate*nchannels))/nchannels
 50 |     else:
 51 |         nrdframes = nframes-inwav.tell()
 52 |     
 53 |     outwav.setnchannels(nchannels)
 54 |     outwav.setsampwidth(sampwidth)
 55 |     outwav.setframerate(framerate)
 56 |     
 57 |     for ii in range(nrdframes):
 58 |         frames = inwav.readframes (nchannels)
 59 |         outwav.writeframes(frames)
 60 |     
 61 |     outwav.close()
 62 |     inwav.close()
 63 | 
 64 | def wavSave (data, framerate, fname, sampwidth=2):
 65 |     wav = wave.open (fname, "w")
 66 |     wav.setframerate(framerate)
 67 |     wav.setsampwidth(sampwidth)
 68 |     if hasattr(data[0], '__len__'):
 69 |         nchan = len(data[0])
 70 |         values = [struct.pack('h',int(d)) for dd in data for d in dd ]
 71 |     else:
 72 |         nchan = 1
 73 |         values = [struct.pack('h',int(d)) for d in data]
 74 |     wav.setnchannels(nchan)
 75 |         
 76 |     valstr = ''.join(values)
 77 |     wav.writeframes (valstr)
 78 |     
 79 |     wav.close
 80 | 
 81 |     
 82 | def play(w,sr):
 83 |     if type(w) is not ndarray:
 84 |         w=array(w)
 85 |     try:
 86 |         nchan = w.shape[1]
 87 |     except IndexError:
 88 |         nchan=1
 89 |         if w.shape[0] <20000:
 90 |             w=pad(w,pad_width=(4000,4000),mode='constant', constant_values=(0,0))
 91 |     
 92 |     w16 = w.astype('int16').tobytes()
 93 |     # Open stream with correct settings
 94 |     
 95 |     import pyaudio
 96 |     pya = pyaudio.PyAudio()
 97 |     stream = pya.open(format=pya.get_format_from_width(width=2), channels=nchan, rate=sr, output=True)
 98 |     # Assuming you have a numpy array called samples
 99 |     stream.write(w16)
100 |     stream.stop_stream()
101 |     stream.close()
102 | 
103 |     pya.terminate()
104 | 
105 |     
106 | 


--------------------------------------------------------------------------------
/pypevoc/FFTFilters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  Filters.py
  5 | #  
  6 | #  Copyright 2017 Andre Almeida <goios@goios-UX305UA>
  7 | #  
  8 | #  This program is free software; you can redistribute it and/or modify
  9 | #  it under the terms of the GNU General Public License as published by
 10 | #  the Free Software Foundation; either version 2 of the License, or
 11 | #  (at your option) any later version.
 12 | #  
 13 | #  This program is distributed in the hope that it will be useful,
 14 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #  GNU General Public License for more details.
 17 | #  
 18 | #  You should have received a copy of the GNU General Public License
 19 | #  along with this program; if not, write to the Free Software
 20 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 21 | #  MA 02110-1301, USA.
 22 | #  
 23 | #  
 24 | 
 25 | import numpy as np
 26 | 
 27 | class BandError(Exception):
 28 |     """Exception raised for errors in band definition.
 29 | 
 30 |     Attributes:
 31 |         expression -- input expression in which the error occurred
 32 |         message -- explanation of the error
 33 |     """
 34 | 
 35 |     def __init__(self,  message):
 36 |         self.message = message
 37 |         Exception.__init__(self, message)
 38 | 
 39 | 
 40 | def preemph(w,hpFreq=0,Fs=1):
 41 |     '''
 42 |     Applies a pre-emphasis filter to the signal w
 43 |         amplifies the signal with a +6dB/octave
 44 |         filter above the cut-on frequency
 45 |         
 46 |     Arugments:
 47 |     * hpFreq = cut-on frequency
 48 |     * Fs = sampling frequency
 49 |     '''
 50 |     
 51 |     if hpFreq>0:
 52 |         a=np.exp(-2.*np.pi*hpFreq/float(Fs));
 53 |         #preEmphA = [a,1-a];
 54 |         #wo = sig.lfilter([1],preEmphA,w);
 55 |         wo=w.astype('f')
 56 |         wo[:-1] -= wo[1:]*a
 57 |     else:
 58 |         wo=w
 59 |     return wo
 60 | 
 61 | def _f_to_mel_py(freq):
 62 |     # mel = 1125 * ln(1+f/700)
 63 |     return 1125. + np.log(1.+freq/700.)
 64 | 
 65 | def _mel_to_f_py(mel):
 66 |     return 700.*(np.exp(mel-1125.)-1)
 67 | 
 68 | f_to_mel = np.vectorize(_f_to_mel_py)
 69 | mel_to_f = np.vectorize(_mel_to_f_py)
 70 | 
 71 | 
 72 | def peaks(x):
 73 |     '''
 74 |     Return indexes of all local maxima in x
 75 |     '''
 76 |     pkmask = np.logical_and(x[:-2]<x[1:-1],x[2:]<x[1:-1])
 77 |     return np.flatnonzero(pkmask)+1
 78 | 
 79 |     
 80 | def nearest(a, b):
 81 |     '''Find elements of b nearest to elements of a'''
 82 |     an = np.zeros(len(a))
 83 |     for ia,aa in enumerate(a):
 84 |         idx = np.argmin(np.abs(b-aa))
 85 |         an[ia]=b[idx]
 86 |     return an
 87 | 
 88 | class PiecewiseFilterSpec(object):
 89 |     '''
 90 |     Builds and stores specifications for a FFT filter
 91 |     '''
 92 |     bandf = np.array([0.0, 0.5])
 93 |     bandg = np.array([1.0, 1.0])
 94 |     sr = 1.0
 95 |     label=''
 96 |     
 97 |     def __init__(self, mode='', cutoff=0.5, 
 98 |                  freq = np.array([0.0, 0.5]),
 99 |                  gain = np.array([1.0, 1.0]),
100 |                  sr = 1.0,
101 |                  label = ''):
102 |         '''
103 |         Create a new filter specification
104 |         
105 |         Can be called using the following presets passed as mode:
106 |         * Lowpass or lp: value in freq argument is cutoff
107 |         * Hipass or hp: value in freq argument is cuton
108 |         * Bandpass or bp: list in freq argument is cutoff and cuton
109 |         * Bandstop or bs: list in freq argument is cuton and cutoff
110 |         
111 |         Otherwise provide frequency vertexes and corresponding gains
112 |         in freq and gain arguments
113 |         
114 |         Frequencies are divided by sr
115 |         
116 |         Optionnaly provide a label for the filter
117 |         '''
118 |         
119 |         self.sr=sr
120 |         
121 |         if mode.lower()=='lp' or mode.lower()=='lowpass':
122 |             self.set_lowpass_cutoff(freq/float(sr))
123 |         elif mode.lower()=='hp' or mode.lower()=='hipass' or mode.lower()=='highpass':
124 |             self.set_hipass_cutoff(freq/float(sr))
125 |         elif mode.lower()=='bp' or mode.lower()=='bandpass':
126 |             self.set_bandpass_freqs(freq[0]/float(sr), freq[-1]/float(sr))
127 |         elif mode.lower()=='bs' or mode.lower()=='bandstop':
128 |             self.set_bandstop_freqs(freq[0]/float(sr), freq[-1]/float(sr))
129 |         else:
130 |             assert len(freq) == len(gain)
131 |             self.set_triangular_filter(freq,gain)
132 |             self.label = label
133 |         
134 |         if not self.label:
135 |             self.label = 'Piecewise filter with {} bands'.format(len(self.bandf)-1)
136 | 
137 |     def set_lowpass_cutoff(self,f):
138 |         self.bandf = np.array([[0.0, f],[f, 0.5]])
139 |         self.bandg = np.array([[1.0, 1.0],[0.0, 0.0]])
140 |         self.label = 'Lowpass filter, fc={}'.format(f*self.sr)
141 |         
142 |     def set_hipass_cutoff(self,f):
143 |         self.bandf = np.array([[0.0, f],[f, 0.5]])
144 |         self.bandg = np.array([[0.0, 0.0],[1.0, 1.0]])
145 |         self.label = 'Hipass filter, fc={}'.format(f*self.sr)
146 | 
147 |     def set_bandpass_freqs(self,f1,f2):
148 |         self.bandf = np.array([[0.0, f1],[f1, f2],[f2,0.5]])
149 |         self.bandg = np.array([[0.0, 0.0],[1.0, 1.0],[0.0, 0.0]])
150 |         self.label = 'Bandpass filter, fc={}'.format((f1/2+f2/2)*self.sr)
151 | 
152 |     def set_bandstop_freqs(self,f1,f2):
153 |         self.bandf = np.array([[0.0, f1],[f1, f2],[f2,0.5]])
154 |         self.bandg = np.array([[1.0, 1.0],[0.0, 0.0],[1.0, 1.0]])
155 |         self.label = 'Bandstop filter, fc={}'.format((f1/2+f2/2)*self.sr)
156 |     
157 |     def set_triangular_filter(self,freq,gain):
158 |         bands = []
159 |         bgains = []
160 |         idx = np.argsort(freq)
161 |         for iprev,inext in zip(idx[:-1],idx[1:]):
162 |             bands.append([freq[iprev]/self.sr,freq[inext]/self.sr])
163 |             bgains.append([gain[iprev],gain[inext]])
164 |             
165 |         self.bandf = np.array(bands)
166 |         self.bandg = np.array(bgains)
167 |         
168 | 
169 |         
170 |     def __repr__(self):
171 |         rep =  '{}:\n'.format(self.label)
172 |         for f,g in zip(self.bandf,self.bandg):
173 |             fst = f[0]*self.sr
174 |             fend = f[1]*self.sr
175 |             if g[0] == g[1]:
176 |                 rep+='  Freq = [{},{}]: gain = {}\n'.format(fst,fend,g[0])
177 |             else:
178 |                 rep+='  Freq = [{},{}]: gain = [{},{}]\n'.format(fst,fend,g[0],g[1])
179 |                 
180 |         return rep
181 |         
182 |     def get_frequency_gains(self):
183 |         '''
184 |         Returns frequency values and gains correspoinding to the
185 |         piecewise filter.
186 |         
187 |         fband, bandg = get_frequency_gains(self)
188 |         
189 |         fband and bandg ar Nx2 arrays
190 |         '''
191 |         return np.array(self.bandf)*self.sr, np.array(self.bandg)
192 | 
193 |     def get_frequency_edges(self):
194 |         '''
195 |         Returns the unique values of frequency edges
196 |         '''
197 |         return np.unique((np.array(self.bandf).flatten()*self.sr))
198 | 
199 |     
200 |     def apply_to_freq_vector(self,fvec, align_edges=False):
201 |         '''
202 |         Returns the values of gain at frequencies in fvec
203 |         '''
204 |         fvec=np.array(fvec)
205 |         flim = self.get_frequency_edges()
206 |         edge_dict=dict()
207 |         if align_edges:
208 |             for ff in flim:
209 |                 idx = np.argmin(np.abs(fvec-ff))
210 |                 edge_dict[ff] = fvec[idx]
211 |         else:
212 |             for ff in flim:
213 |                 edge_dict[ff] = ff
214 |         
215 |         #print edge_dict
216 |         
217 |         filter_mask = np.zeros(len(fvec))
218 |         freqs = self.bandf*self.sr
219 |         for f,g in zip(freqs,self.bandg):
220 |             fst = edge_dict[f[0]]
221 |             fend = edge_dict[f[1]]
222 |             idx = np.logical_and(fvec>=fst,
223 |                                  fvec<=fend)
224 |             if fend!=fst:
225 |                 filter_mask[idx]=(fvec[idx]-fst)/(fend-fst)*(g[1]-g[0])+g[0]
226 |             else:
227 |                 raise BandError('Band is too narrow: try increasing nwind')
228 |                     
229 | 
230 |         return filter_mask
231 | 
232 |         
233 |         
234 | 
235 | class FilterBank(object):
236 |     '''
237 |     FilterBank object: Defines a FFT-based filter bank
238 |     '''
239 |     label = []
240 |     fvec = np.zeros(0)
241 |     fb=np.zeros((0,0))
242 |     sr=1.
243 |     
244 |     def __init__(self, fspec_list=None, sr=1.0, 
245 |                  nwind=256, windfunc=np.hanning,
246 |                  nhop=None, align_edges=True):
247 |         '''
248 |         Create a filter bank from a list of filter specification 
249 |           objects PiecewiseFilterSpec
250 |           
251 |         By default creates a 2-band filterbank 
252 |           dividing the range [0,sr/2] into two bands 
253 |         '''
254 |         self.sr = sr
255 |         self.wind  = windfunc(nwind)
256 |         self.nwind = int(nwind)
257 |         if nhop:
258 |             self.hop = nhop
259 |         else:
260 |             self.hop = int(nwind/2)
261 | 
262 |         self.fvec = np.linspace(0.,sr,nwind)
263 |         if not fspec_list:
264 |             fc=0.25
265 |             fspec_list=[PiecewiseFilterSpec(mode='lowpass',freq=fc,sr=sr),
266 |                         PiecewiseFilterSpec(mode='hipass',freq=fc,sr=sr)]
267 |                         
268 |         self.fb = np.zeros((len(fspec_list),len(self.fvec)))
269 |         self.label=[]
270 |         for ii,fspec in enumerate(fspec_list):
271 |             self.fb[ii,:]=fspec.apply_to_freq_vector(self.fvec,align_edges=align_edges)
272 |             self.label.append(fspec.label)
273 |             
274 |     def specout(self,w):
275 |         '''
276 |         Calculate the output of the filterbank applied to w
277 |         '''
278 | 
279 |         n=0
280 |         bankout = []
281 |         tout=[]
282 |         while n<len(w)-self.nwind:
283 |             thisbank = []
284 |             ww = w[n:n+self.nwind]*self.wind
285 |             Sw = np.fft.fft(ww)
286 |             Sww = np.abs(Sw)**2
287 |             for i in range(self.fb.shape[0]):
288 |                 thisbank.append(sum(Sww*self.fb[i,:]))
289 |             bankout.append(thisbank)
290 |             tout.append((float(n)+self.nwind/2.)/float(self.sr))
291 |             n+=self.hop
292 |         return np.array(bankout),np.array(tout)
293 | 
294 |     def __repr__(self):
295 |         rstr = 'FilterBank with filters:\n'
296 |         for ll in self.label:
297 |             rstr+='  '+ll+'\n'
298 |         return rstr
299 | 
300 | class TriangularFilterBank(FilterBank):
301 |     '''
302 |     FilterBank object: Defines a FFT-based filter bank
303 |     '''
304 |     label = []
305 |     fvec = np.zeros(0)
306 |     fb=np.zeros((0,0))
307 |     sr=1.
308 | 
309 |     def __init__(self,flim=[0,.5,1.],nwind=256, sr=1., nhop=None):
310 |         '''
311 |         Create a filter bank:
312 |         * flim:  limits of frequency bands
313 |         * nwind: window for FFT
314 |         * nhop:  interval between successive filtered frames 
315 |                  (half window by default)
316 |         * sr:    sampling rate (by default = 1, in that case define flim
317 |             as a fraction of sampling rate [0,1)
318 |         '''
319 |         
320 |         fsl=[]
321 |         
322 |         if sr>1.0:
323 |             unit = 'Hz'
324 |         else:
325 |             unit = ''
326 |         
327 |         flim = np.sort(flim).astype('f')
328 |         for n,cc in enumerate(flim[1:-1]):
329 |             bandf = flim[n:n+3]
330 |             bandg = np.array([0.0,1.0,0.0])
331 |             lab = '{}{} band ({}-{}{})'.format(cc,unit,flim[n],flim[n+2],unit)
332 |             fsl.append(PiecewiseFilterSpec(freq=bandf,gain=bandg,label=lab,sr=sr))
333 |              
334 |         super(TriangularFilterBank,self).__init__(fspec_list=fsl,nwind=nwind,sr=sr,nhop=nhop)
335 | 
336 | 
337 | def nextpow2(x):
338 |     return 2**(np.ceil(np.log2(x)))
339 | 
340 | 
341 | 
342 | class MelFilterBank(TriangularFilterBank):
343 |     def __init__(self,n=26,fmin=300.,fmax=8000.,twind=.025, sr=44100., thop=.01):
344 |         nwind = int(2**np.round(np.log2(twind*sr)))
345 |         nhop = int(thop*sr)
346 |         melmin = f_to_mel(fmin)
347 |         melmax = f_to_mel(fmax)
348 |         fc = mel_to_f(np.linspace(melmin,melmax,n+2))
349 | 
350 |         super(MelFilterBank,self).__init__(flim=fc,nwind=nwind,sr=sr,nhop=nhop)
351 | 
352 |     def mfcc(self,w,mode='DCT2'):
353 |         spec, tspec = self.specout(w)
354 |         logs = np.log(spec)
355 |         if mode[:3]=='DCT':
356 |             dctype = int(mode[3])
357 |             from scipy.fftpack import dct
358 |             return dct(logs,type=dctype), tspec
359 |         elif mode=='IFFT':
360 |             return np.fft.ifft(logs), tspec
361 |         else:
362 |             raise NotImplementedError
363 | 
364 |     def mfcc_and_mel(self,w,mode='DCT2'):
365 |         spec, tspec = self.specout(w)
366 |         logs = np.log(spec)
367 |         if mode[:3]=='DCT':
368 |             dctype = int(mode[3])
369 |             from scipy.fftpack import dct
370 |             return dct(logs,type=dctype), spec, tspec
371 |         elif mode=='IFFT':
372 |             return np.fft.ifft(logs), spec, tspec
373 |         else:
374 |             raise NotImplementedError
375 |     
376 | def fft_filter(x, bands, gains):
377 |     '''
378 |     Filter signal x using FFT and IFFT
379 |     * x input signal
380 |     * bands: list of start and stop frequencies of each band
381 |     * gains: start and stop gains in each band
382 |     
383 |     Example:
384 |     
385 |     y = FFTfilter(x, [(0,0.1),(0.1,1.0)], [(1.,1.),(0.,0.)])
386 |     
387 |     filters signal x low pass at 0.1 times the nyquist rate
388 |       (sampling rate / 2)
389 |     '''
390 |     
391 |     xf = np.fft.fft(x)
392 |     nyq = len(xf)/2
393 |     
394 |     ffilter = np.zeros(len(xf))
395 |     for bb, gg in zip(bands,gains):
396 |         fmin = int(bb[0]*nyq)
397 |         fmax = int(bb[1]*nyq)
398 |         ffilter[fmin:fmax]=np.linspace(gg[0],gg[1],fmax-fmin)
399 |         if fmin>0:
400 |             ffilter[-fmax+1:-fmin+1]=np.linspace(gg[1],gg[0],fmax-fmin)
401 |         else:
402 |             ffilter[-fmax+1:]=np.linspace(gg[1],gg[0],fmax-fmin-1)
403 |         print('{}-{} : gains [{}, {}]'.format(fmin,fmax,gg[0],gg[1]))
404 |         
405 |     xf_filt = xf*ffilter
406 |     return np.fft.ifft(xf_filt) 
407 | 
408 | 


--------------------------------------------------------------------------------
/pypevoc/Heterodyne.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  Heterodyne.py
  5 | #
  6 | #  Synchronous decomposition of periodic signals 
  7 | #
  8 | #  Copyright 2018 Andre Almeida <andregoios@gmail.com>
  9 | #
 10 | #  This program is free software; you can redistribute it and/or modify
 11 | #  it under the terms of the GNU General Public License as published by
 12 | #  the Free Software Foundation; either version 2 of the License, or
 13 | #  (at your option) any later version.
 14 | #
 15 | #  This program is distributed in the hope that it will be useful,
 16 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #  GNU General Public License for more details.
 19 | #
 20 | #  You should have received a copy of the GNU General Public License
 21 | #  along with this program; if not, write to the Free Software
 22 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 23 | #  MA 02110-1301, USA.
 24 | #
 25 | #
 26 | 
 27 | import sys
 28 | import collections
 29 | import numpy as np
 30 | 
 31 | from .PVAnalysis import SinSum
 32 | 
 33 | 
 34 | 
 35 | def heterodyne(x, hetsig, wind=None, hop=None):
 36 |     """
 37 |     Heterodyner: calculates the complex amplitude of a sine wave centered at f
 38 | 
 39 |     Arguments:
 40 |         x: signal
 41 |         f: normalised frequency vector of same length as x
 42 |            (frequency/sr)
 43 |         wind: windowing function (array, defaults to 256 point rectangular)
 44 |         nhop: samples between windows (defaults to 1/2 the window length)
 45 |     """
 46 | 
 47 |     ret = []
 48 |     icent = []
 49 |     if wind is None:
 50 |         wind = np.ones(2**8)
 51 |     wlen = len(wind)
 52 |     wnorm = np.sum(wind)
 53 |     #fvec[np.logical_not(np.isfinite(fvec))]=0
 54 |     xf = x*hetsig
 55 |     for ii in range(0,len(x)-wlen,hop):
 56 |         xx = xf[ii:ii+wlen]
 57 |         xw = xx*wind
 58 |         ret.append(np.sum(xw)/wnorm)
 59 |         icent.append(ii+wlen//2)
 60 |     return np.array(ret)*2,np.array(icent)
 61 | 
 62 | 
 63 | def heterodyne_corr(x,sr,f,maxwind=2**14,nhop=2**10,nper=3,dc_cut=50, release_partials=True):
 64 |     """
 65 |     Heterodyne from sequential residuals
 66 | 
 67 |     Performs partial heterodyne detection on a signal x for partials
 68 |     at frequency f.
 69 |     
 70 |     release_partials: heterodynes are performed sequenctially on the
 71 |     residuals from previous partial detections
 72 |     """
 73 |     xx = x.copy()
 74 |     t = np.arange(len(x))/sr
 75 | 
 76 |     nharm = len(f)
 77 |     ret = []
 78 |     part = np.zeros((len(x),nharm))
 79 |     for ii,ff in enumerate(f):
 80 |         if ff==0.:
 81 |             nwind=maxwind
 82 |         foth = np.delete(f,ii)
 83 |         nwind = (sr/np.min(np.abs(foth-ff))*nper)
 84 |         print(nwind)
 85 |         hetsig = np.exp(1j*2*np.pi*ff*t)
 86 |         if release_partials:
 87 |             cc,ih = heterodyne(xx,hetsig,wind=np.hanning(nwind),hop=nhop)
 88 |         else:
 89 |             cc,ih = heterodyne(x,hetsig,wind=np.hanning(nwind),hop=nhop)
 90 |         if ff==0.:
 91 |             cc/=2
 92 |         th=ih/sr
 93 |         ret.append(ts.SampledTimeSeries(cc,th,label='%.2f'%ff))
 94 |         ret[-1].f = ff
 95 |         hf = np.interp(t,th,cc)
 96 |         xp = np.real(np.conjugate(hf)*hetsig)
 97 |         xx-=xp
 98 |         part[:,ii]=xp
 99 |     return ret,xx,part
100 | 
101 | class Heterodyne(object):
102 |     """
103 |     Perform a sine sum decomposition based on a f0 track
104 |     """
105 | 
106 |     def __init__(self, x, sr=1.0, nwind=1024, wfun=np.hanning, nhop=None):
107 |         """
108 |         Create a heterodyner object, storing the signal basic information
109 |         of the analysis
110 | 
111 |         Arguments:
112 |             * x:        signal
113 |             * sr:       sampling rate
114 |             * nwind:    default window length 
115 |             * wfun:     default windowing function
116 |             * ampthr:   amplitude threshold for filtering in resynthesis
117 |         """
118 |         self.x = x
119 |         self.sr = sr
120 |         self.nwind = nwind
121 |         self.nhop = nhop
122 |         self.nsamp = len(x)
123 |         self.wfun = wfun
124 |         self.ampthr = ampthr
125 | 
126 |         self._fix_params()
127 | 
128 |     def add_partial(self, f, tf=None, fidx=None,
129 |                     wind=None, nhop=None, 
130 |                     t=None, idx=None):
131 |         """
132 |         set the starting indices for the windowed analysis
133 |         """
134 |         self.idx = idx
135 |         
136 | 
137 |     def harmonic_times(self, n=1):
138 |         if self.variable_resolution:
139 |             return self.th[n-1]
140 |         else:
141 |             return self.th
142 | 
143 |     def harmonic_amplitudes(self, n=1):
144 |         if self.variable_resolution:
145 |             return self.ah[n-1]
146 |         else:
147 |             return self.ah[:,n-1]
148 | 
149 |     def harmonic_frequencies(self, n=1):
150 |         if self.variable_resolution:
151 |             return self.f[self.idxh[n-1]]*n
152 |         else:
153 |             return self.f[self.idxh]*n
154 | 
155 | 
156 |     def heterodyner_signal(self, n=1):
157 |         """
158 |         return a reference variable-frequency signal
159 |         with frequency equal to n* the harmonic of the frequency vector
160 |         """
161 |         omega = self.fvec*2*np.pi*n
162 |         phvec = np.cumsum(omega)
163 |         return np.exp(1j*phvec)
164 | 
165 |     def set_fvec(self, f0c, th=None, adjust=False):
166 |         tvec = np.arange(len(self.x))/self.sr
167 | 
168 |    
169 |         if th is not None:
170 |             fvec = np.interp(tvec, th, f0c)
171 |         else:
172 |             fvec = f0c
173 | 
174 |         # fix for single-frequency values
175 |         if not isinstance(fvec, collections.abc.Sequence):
176 |             fvec = fvec*np.ones(self.nsamp)
177 |    
178 |         self.fvec = fvec/self.sr
179 |         self.fmin = max(self.fmin,min(fvec))
180 | 
181 |         if adjust:
182 |             f0c, th = self.calc_adjusted_freq(fvec)
183 |             self.fvec = np.interp(tvec, th, f0c)
184 |         if self.variable_resolution:
185 |             self.th = [[] for ii in range(self.nharm)]
186 |             self.ah = [[] for ii in range(self.nharm)]
187 |             self.idxh = [[] for ii in range(self.nharm)]
188 |         else:
189 |             self.th = np.arange(self.nwind//2, self.nsamp-self.nwind//2, self.nhop)/self.sr
190 |             self.idxh = np.arange(self.nwind//2, self.nsamp-self.nwind//2,
191 |                                   self.nhop).astype('i')
192 |             self.ah = np.zeros((self.th.shape[0],self.nharm),dtype='complex')
193 | 
194 | 
195 |     def extract_partial(self, n):
196 |         """
197 |         calculates complex amplitudes of partials
198 |         """
199 |         x = self.x
200 |         hetsig = self.heterodyner_signal(n=n)
201 |         if self.variable_resolution:
202 |             wind = self.wfun(int(self.nper/self.fmin*self.sr/n))
203 |         else:
204 |             wind = self.wind
205 |         h,th = heterodyne(x, hetsig, wind=wind, hop=self.nhop)
206 |         return h, th
207 | 
208 |     def filter_harmonic(self, n):
209 |         """
210 |         mute intervals not to be taken into account in resynthesis
211 |         """
212 |         tvec = np.arange(self.nsamp)/self.sr
213 |         hf = np.interp(tvec, self.harmonic_times(n), self.harmonic_amplitudes(n))
214 |         idx = (self.f<self.fmin) | (self.f>self.fmax) | (self.f*n>self.sr/2.2)
215 |         rmsmin = np.max(np.abs(hf))*self.ampthr
216 |         idx = idx | (np.abs(hf)<rmsmin)
217 |         hf[idx] = 0
218 |         return hf
219 | 
220 |     def resynth_partial(self, n):
221 |         """
222 |         resynthesises a partial based on extracted complex amplitudes
223 |         """
224 |         th = self.harmonic_times(n)
225 |         h = self.harmonic_amplitudes(n)
226 |         hsig = self.heterodyner_signal(n)
227 |         tvec = np.arange(self.nsamp)/self.sr
228 | 
229 |         hf = np.interp(tvec, th, h)
230 |         ff = self.fvec
231 |         hf = self.filter_harmonic(n)
232 | 
233 |         return np.real(np.conjugate(hsig)*hf)
234 |         
235 |     def extract_partials(self):
236 |         """
237 |         extracts all partials in from 1 to nharm 
238 |         and puts them into a matrix
239 |         """
240 | 
241 |         for ii in range(1, self.nharm+1):
242 |             hh, th = self.extract_partial(ii)
243 |             self.ah[:,ii-1] = hh
244 | 
245 |         return self.ah, self.th
246 | 
247 | 
248 |     def resynth(self):
249 |         x = np.zeros(self.nsamp)
250 |         for ii in range(self.nharm):
251 |             x+=self.resynth_partial(ii)
252 |         return(x)
253 | 
254 |     def clone(self):
255 |         from copy import copy
256 |         return copy(self)
257 | 
258 | 
259 | 
260 | 
261 | class HeterodyneHarmonic:
262 |     """
263 |     Perform a sine sum decomposition based on a f0 track
264 |     """
265 | 
266 |     def __init__(self, x, sr=1.0, tf=None, f=None, nper=None, nwind=1024, nhop=None,
267 |                  wfun=np.hanning, ampthr=0.1, nharm=5, fmin=0.1, fmax=1000, include_dc=False):
268 |         """
269 |         Perform a sine sum analysis on signal x with sampling rate sr.
270 | 
271 |         Arguments:
272 |             * x:        signal
273 |             * sr:       sampling rate
274 |             * f:        frequency track (float or array)
275 |             * tf:       time values of f 
276 |                         (defaults to evenly distributed along x)
277 |             * nwind:    window length (defaults to 3* maximum period)
278 |             * nhop:     interval between estimations 
279 |                         (defaults to 1/2 nwind)
280 |             * nper:     number of periods to use in heterodyning
281 |                         (takes precedence over nwind)
282 |             * wfun:     windowing function
283 |             * nharm:    number of harmonics to use in decomposition
284 |             * ampthr:   amplitude threshold for filtering in resynthesis
285 |             * include_dc: calculate DC amplitude
286 |         """
287 |         self.x = x
288 |         self.sr = sr
289 |         self.nper = nper
290 |         self.nhop = nhop
291 |         self.nwind = nwind
292 |         self.tf = tf
293 |         self.fvals = f
294 |         self.nharm = nharm
295 |         self.nsamp = len(x)
296 |         self.wfun = wfun
297 |         self.ampthr = ampthr
298 |         self.fmin = fmin
299 |         self.fmax = fmax
300 |         self.include_dc = include_dc
301 | 
302 |         self._fix_params()
303 |         self.extract_partials()
304 | 
305 |     @property
306 |     def f0(self):
307 |         return self.fvec*self.sr
308 | 
309 |         
310 |     @property
311 |     def f(self):
312 |         tvec = np.arange(self.nsamp)/self.sr
313 |         f0 = np.interp(self.t, tvec, self.f0)
314 |         if self.include_dc:
315 |             return np.array([f0*(n) for n in range(self.nharm)]).T
316 |         else:
317 |             return np.array([f0*(n) for n in range(1,self.nharm)]).T
318 | 
319 |     @property
320 |     def angle_ratios(self):
321 |         ang = np.angle(self.camp/np.tile(self.camp[:,:1],(1,self.camp.shape[1])))
322 |         if self.include_dc:
323 |             ang=np.hstack((np.zeros((ang.shape[0],1)),ang))
324 |         return ang
325 |     
326 |     @property
327 |     def partial_frequencies(self):
328 |         dt = self.nhop/self.sr
329 |         newf=self.f[1:,:]-np.diff(np.unwrap(np.angle(self.camp)),axis=0)/dt/2/np.pi
330 |         if self.include_dc:
331 |             newf=np.hstack((np.zeros((newf.shape[0],1)),newf))
332 |         return newf
333 | 
334 |     @property
335 |     def t(self):
336 |         return self.th
337 | 
338 |     @property
339 |     def camp(self):
340 |         if self.include_dc:
341 |             return self.ah
342 |         else:
343 |             return self.ah[:,1:]
344 |     
345 |     @camp.setter
346 |     def camp(self, mx):
347 |         self.ah = mx
348 | 
349 |     def _fix_params(self):
350 |         if self.nhop is None:
351 |             self.nhop = self.nwind//2
352 |         self.wind = self.wfun(self.nwind)
353 |         #print(max(tvec))
354 |         tvec = np.arange(self.nsamp)/self.sr
355 | 
356 |         if self.nper is not None:
357 |             self.variable_resolution = True
358 |             self.nwind = int(round(self.nper/self.fmin))
359 |             print('window set to %d'%self.nwind)
360 |         else:
361 |             self.variable_resolution = False
362 | 
363 |         self.set_fvec(self.fvals,self.tf)
364 | 
365 |     def harmonic_times(self, n=1):
366 |         if self.variable_resolution:
367 |             return self.th[n]
368 |         else:
369 |             return self.th
370 | 
371 |     def harmonic_amplitudes(self, n=1):
372 |         if self.variable_resolution:
373 |             return self.ah[n]
374 |         else:
375 |             return self.ah[:,n]
376 | 
377 |     def harmonic_frequencies(self, n=1):
378 |         if self.variable_resolution:
379 |             return self.f[self.idxh[n]]*n
380 |         else:
381 |             return self.f[self.idxh]*n
382 | 
383 | 
384 |     def heterodyner_signal(self, n=1):
385 |         """
386 |         return a reference variable-frequency signal
387 |         with frequency equal to n* the harmonic of the frequency vector
388 |         """
389 |         return self.heterodyner_signal_from_f(self.fvec*n)
390 | 
391 |     def heterodyner_signal_from_f(self,f):
392 |         """
393 |         calculate a heterodyner signal based on the normalised
394 |         frequency vector
395 | 
396 |         input a normalised frequency vector
397 |         """
398 |         omega = f*2*np.pi
399 |         phvec = np.cumsum(omega)
400 |         return np.exp(1j*phvec)
401 | 
402 | 
403 |     def calc_adjusted_freq(self, fvec, nwind=None, nhop=None):
404 |         """
405 |         adjusts the frequency track accordin to a first-pass heterodyne 
406 |         analysis. 
407 | 
408 |         ARguments:
409 |             * nwind:        object nwind
410 |             * nhop:         object nhop
411 |         """
412 | 
413 |         x = self.x
414 |         sr = self.sr
415 |         tvec = np.arange(len(x))/sr
416 |         hetsig = self.heterodyner_signal_from_f(fvec)
417 |         if nwind is None:
418 |             wind = self.wind
419 |         else:
420 |             wind = self.wfun(nwind)
421 | 
422 |         if nhop is None:
423 |             nhop = len(wind)//2
424 | 
425 |         h, ih = heterodyne(x, hetsig, wind=wind, hop=nhop)
426 |         th = ih/sr
427 |         dph = np.concatenate(([0], np.diff(np.unwrap(np.angle(h)))))
428 |         f0c = np.interp(th,tvec,fvec)-dph/(nhop)/2/np.pi 
429 |         return f0c, th
430 | 
431 |     def set_fvec(self, f0c, th=None, adjust=False):
432 |         tvec = np.arange(len(self.x))/self.sr
433 | 
434 |         if th is not None:
435 |             fvec = np.interp(tvec, th, f0c)
436 |         else:
437 |             fvec = f0c
438 | 
439 |         # fix for single-frequency values
440 |         if not isinstance(fvec, collections.abc.Sequence):
441 |             fvec = fvec*np.ones(self.nsamp)
442 | 
443 |         self.fvec = fvec/self.sr
444 |         self.fmin = max(self.fmin,min(fvec))
445 | 
446 |         if adjust:
447 |             f0c, th = self.calc_adjusted_freq(fvec)
448 |             self.fvec = np.interp(tvec, th, f0c)
449 |         if self.variable_resolution:
450 |             self.th = [[] for ii in range(self.nharm)]
451 |             self.ah = [[] for ii in range(self.nharm)]
452 |             self.idxh = [[] for ii in range(self.nharm)]
453 |         else:
454 |             self.th = np.arange(self.nwind//2, self.nsamp-(self.nwind-self.nwind//2), self.nhop)/self.sr
455 |             self.idxh = np.arange(self.nwind//2, self.nsamp-self.nwind//2,
456 |                                   self.nhop).astype('i')
457 |             self.ah = np.zeros((self.th.shape[0],self.nharm),dtype='complex')
458 | 
459 | 
460 |     def extract_partial(self, n):
461 |         """
462 |         calculates complex amplitudes of partials
463 |         """
464 |         x = self.x
465 |         hetsig = self.heterodyner_signal(n=n)
466 |         if self.variable_resolution:
467 |             wind = self.wfun(int(self.nper/self.fmin*self.sr/n))
468 |         else:
469 |             wind = self.wind
470 |         h,th = heterodyne(x, hetsig, wind=wind, hop=self.nhop)
471 |         return h, th
472 | 
473 |     def filter_harmonic(self, n):
474 |         """
475 |         mute intervals not to be taken into account in resynthesis
476 |         """
477 |         tvec = np.arange(self.nsamp)/self.sr
478 |         hf = np.interp(tvec, self.harmonic_times(n), self.harmonic_amplitudes(n))
479 |         idx = (self.f0<self.fmin) | (self.f0>self.fmax) | (self.f0*n>self.sr/2.2)
480 |         rmsmin = np.max(np.abs(hf))*self.ampthr
481 |         idx = idx | (np.abs(hf)<rmsmin)
482 |         hf[idx] = 0
483 |         return hf
484 | 
485 |     def resynth_partial(self, n, filter=False):
486 |         """
487 |         resynthesises a partial based on extracted complex amplitudes
488 |         """
489 |         th = self.harmonic_times(n)
490 |         h = self.harmonic_amplitudes(n)
491 |         hsig = self.heterodyner_signal(n)
492 |         tvec = np.arange(self.nsamp)/self.sr
493 | 
494 |         hf = np.interp(tvec, th, h)
495 |         ff = self.fvec
496 |         if filter:
497 |             hf = self.filter_harmonic(n)
498 | 
499 |         return np.real(np.conjugate(hsig)*hf)
500 |         
501 |     def get_voice_component(self, x, sr, f, nharm, nfft=2**13, hop=2**11, ampthr=0.1, fmin=70, fmax=500):
502 |         xs = np.zeros(len(x))
503 |         tvec = np.arange(len(x))/sr
504 |         
505 |         idx = np.isnan(f)
506 |         fc = f
507 |         fc = f[np.logical_not(idx)]
508 |         tf = tvec[np.logical_not(idx)]
509 |         
510 |         for nn in range(nharm):
511 |             hno = nn+1
512 |             fh = fc*hno
513 |             h,th,hsig = heterodyne(x,sr,tf=tf,f=fh,wlen=nfft,hop=hop)
514 |             hf = np.interp(tvec,th,h)
515 |             #ff = f0f*hno
516 |             hf[idx]=0
517 |             xs += np.real(np.conjugate(hsig)*hf)
518 |             
519 |         return xs
520 | 
521 |     def extract_partials(self):
522 |         """
523 |         extracts all partials in from 1 to nharm 
524 |         and puts them into a matrix
525 |         """
526 | 
527 |         for ii in range(0, self.nharm):
528 |             hh, th = self.extract_partial(ii)
529 |             self.ah[:,ii] = hh
530 |         self.ah[:,0] /= 2
531 | 
532 |         return self.ah, self.th
533 | 
534 |     def resynth(self):
535 |         x = np.zeros(self.nsamp)
536 |         for ii in range(self.nharm):
537 |             x+=self.resynth_partial(ii)
538 |         return(x)
539 | 
540 |     def clone(self):
541 |         from copy import copy
542 |         return copy(self)
543 | 
544 | 
545 | 
546 | 


--------------------------------------------------------------------------------
/pypevoc/PeakFinder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  PeakFinder.py
  5 | #
  6 | #  Provides a simple way of finding peaks and their parameters,
  7 | #  from a data series
  8 | #
  9 | #  Copyright 2014 Andre Almeida <andre.almeida@univ-lemans.fr>
 10 | #
 11 | #  This program is free software; you can redistribute it and/or modify
 12 | #  it under the terms of the GNU General Public License as published by
 13 | #  the Free Software Foundation; either version 2 of the License, or
 14 | #  (at your option) any later version.
 15 | #
 16 | #  This program is distributed in the hope that it will be useful,
 17 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 | #  GNU General Public License for more details.
 20 | #
 21 | #  You should have received a copy of the GNU General Public License
 22 | #  along with this program; if not, write to the Free Software
 23 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 24 | #  MA 02110-1301, USA.
 25 | #
 26 | #
 27 | 
 28 | """ Defines a class for detecting peaks in a numpy array"""
 29 | 
 30 | import numpy as np
 31 | 
 32 | 
 33 | class PeakFinder(object):
 34 | 
 35 |     def __init__(self, y, x=None, npeaks=None, minrattomax=None, minval=None):
 36 |         """Creates the peak finder object from a numpy array
 37 | 
 38 |         Arguments:
 39 | 
 40 |             x:           the numpy array in which to find peaks
 41 |             npeaks:      maximum number of peaks to find
 42 | 
 43 |           Thresholds:
 44 |             minrattomax: ratio of minimum to maximum peak amplitude
 45 |                          (has priority over minval if set to other
 46 |                           than None)
 47 |             minval:      an absolute minimum value of peak
 48 |         """
 49 | 
 50 |         self.y = np.array(np.squeeze(y))
 51 |         if x is not None:
 52 |             self.x = np.array(np.squeeze(x))
 53 |         else:
 54 |             self.x = np.arange(len(self.y))
 55 |         self._idx = np.array([])
 56 |         self._val = np.array([])
 57 |         if minrattomax is None:
 58 |             self.minamp = minval
 59 |         else:
 60 |             self.minamp = self.y.max()*minrattomax
 61 | 
 62 |         self.sorttype = 0
 63 | 
 64 |         if not npeaks:
 65 |             self.npeaks = len(self.y)
 66 |         else:
 67 |             self.npeaks = npeaks
 68 | 
 69 |         if not self.minamp:
 70 |             self.minamp = np.min(self.y)
 71 | 
 72 |         self.findpos()
 73 |         #self.sort_pos()
 74 |         # self.boundaries()
 75 | 
 76 |     @property
 77 |     def pos(self):
 78 |         return self._fine_pos[self._keep]
 79 | 
 80 |     @property
 81 |     def rough_pos(self):
 82 |         return self.x[self._idx[self._keep]]
 83 | 
 84 |     @property
 85 |     def all_pos(self):
 86 |         return self.x[self._idx]
 87 | 
 88 |     @property
 89 |     def val(self):
 90 |         return self._fine_val[self._keep]
 91 | 
 92 |     @property
 93 |     def all_val(self):
 94 |         return self._val
 95 | 
 96 |     @property
 97 |     def rough_val(self):
 98 |         return self._val[self._keep]
 99 | 
100 |     @property
101 |     def bounds(self):
102 |         b = np.array(self._bounds)
103 |         return self.x[b[self._keep,:]]
104 |     
105 |     @property
106 |     def areas(self):
107 |         return self._areas[self._keep]
108 | 
109 |     @property
110 |     def prominence(self):
111 |         return self._prominence[self._keep]
112 | 
113 |     def filter_by_salience(self, rad=1, sal=0):
114 |         ''' Filters the peaks by salience.
115 |             Any peak that is lower than the neighbouring 'rad' points
116 |             is filtered out
117 | 
118 |             optional:
119 |             * sal: salience (peaks must be at leas sal above other 
120 |                    values in a radius rad)
121 |         '''
122 | 
123 |         npks = len(self._idx)
124 |         # keep = np.ones(npks).astype('bool')
125 | 
126 |         for idx in range(npks):
127 |             thispos = self._idx[idx]
128 |             thisval = self._val[idx]
129 |             wmin = max(thispos-rad, 1)
130 |             wmax = min(thispos + rad, len(self.y))
131 |             w = self.y[wmin:wmax + 1]
132 | 
133 |             if any(w+sal > thisval):
134 |                 self._keep[idx] = False
135 | 
136 |         # self.keep = np.logical_and(self.keep, keep)
137 | 
138 |     def filter_by_prominence(self, prom=0.0, all=False):
139 |         '''
140 |         Filter by peak prominence
141 | 
142 |         prominence at leas prom above relative minimum
143 | 
144 |         optional:
145 |         * all: include peaks that were filtered out before
146 |         '''
147 |         try:
148 |             prominence = self._prominence
149 |         except AttributeError:
150 |             self.find_prominence()
151 |             prominence = self._prominence
152 | 
153 |         self._keep[prominence<prom] = False
154 | 
155 |     def findpos(self):
156 |         """Finds the peaks positions
157 | 
158 |         Arguments:
159 |             (none)
160 |         """
161 | 
162 |         y = self.y
163 | 
164 |         miny = np.min(y)
165 | 
166 |         peakmask = (y[0:-2] < y[1:-1])*(y[1:-1] >= y[2:]).astype(int)
167 |         pkmskamp = peakmask*(y[1:-1]-miny)
168 |         # print(pkmskamp)
169 | 
170 |         pos = []
171 | 
172 |         m = pkmskamp.max()
173 |         b = pkmskamp.argmax()
174 |         th = self.minamp-miny
175 |         n = 1
176 | 
177 |         if m > th:
178 |             pos.append(b + 1)
179 |             pkmskamp[b] = th-1
180 | 
181 |         while m > th and n < self.npeaks:
182 |             m = pkmskamp.max()
183 |             b = pkmskamp.argmax()
184 |             if m > th:
185 |                 pos.append(b + 1)
186 |                 pkmskamp[b] = th-1
187 |                 n += 1
188 | 
189 |         self._idx = np.array(np.sort(pos))
190 |         self._val = np.array([y[i] for i in self._idx])
191 |         self._keep = np.ones(len(self._idx),dtype='bool')
192 |         self._order = np.arange(len(self._idx))
193 |         self._fine_pos = np.array([self.x[ii] for ii in self._idx])
194 |         self._fine_val = self._val
195 | 
196 |     def find_prominence(self, side_fun=np.min, all=False):
197 |         if not all:
198 |             pos = self._idx[self._keep]
199 |             val = self._val[self._keep]
200 |         else:
201 |             pos = self.all_pos
202 |             val = self.all_val
203 |         lbound = np.concatenate(([0], pos))
204 |         rbound = np.concatenate((pos+1, [len(self.y)]))
205 |         sal_l = []
206 |         sal_r = []
207 |         for lb, rb, v in zip(lbound[:-1], rbound[:-1], val):
208 |             sal_l.append(v - np.min(self.y[lb:rb]))
209 |         for lb, rb, v in zip(lbound[1:], rbound[1:], val):
210 |             sal_r.append(v - np.min(self.y[lb:rb]))
211 | 
212 |         sal_l = np.array(sal_l)
213 |         sal_r = np.array(sal_r)
214 |         prominence = side_fun(np.array([sal_l,sal_r]),axis=0)
215 | 
216 |         if not all:
217 |             self.prominence = prominence
218 |         else:
219 |             self.prominence = np.zeros(len(self.pos))
220 |             self.prominence[self._keep] = prominence
221 |         return self.prominence[self._keep]
222 | 
223 |     def plot(self, logarithmic=False):
224 |         """Plot a graphical representation of the peaks
225 | 
226 |         Arguments:
227 |             (none)
228 |         """
229 | 
230 |         import pylab as pl
231 | 
232 |         pl.figure()
233 |         pl.plot(self.x, self.y)
234 |         pl.plot(self.all_pos,
235 |                 self.all_val, 'om')
236 |         pl.plot(self.rough_pos, self.rough_val, 'og')
237 |         if hasattr(self, 'bounds'):
238 |             lmins = np.unique(self.bounds.flatten())
239 |             lminvals = self.y[lmins]
240 |             pl.plot(lmins, lminvals, 'or')
241 |         pl.plot(self.pos, self.val, 'dg')
242 |         if logarithmic:
243 |             pl.gca().set_yscale('log')
244 | 
245 |     def sort_ampl(self):
246 |         """Sort the found peaks in decreasing order of amplitude
247 | 
248 |         Arguments:
249 |             (none)
250 |         """
251 |         if len(self.pos) > 1:
252 |             idx = np.argsort(self._val)[::-1]
253 |             self._order = idx
254 |             self.sorttype = 2
255 | 
256 |     def sort_pos(self):
257 |         """Sort the found peaks in order of position
258 | 
259 |         Arguments:
260 |             (none)
261 |         """
262 | 
263 |         if len(self._idx) > 1:
264 |             idx = np.argsort(self._idx)
265 | 
266 |             self._order = idx
267 |             self.sorttype = 1
268 | 
269 |     def find_boundaries(self, all=False):
270 |         """Find the local minima on either side of each peak
271 | 
272 |         Arguments:
273 |             (none)
274 |         """
275 |         try:
276 |             prevb = np.argmin(self.y[0:self._idx[0]])
277 |         except IndexError:
278 |             prevb = 0
279 | 
280 |         bounds = []
281 | 
282 |         if not all:
283 |             pos = self._idx[self._keep]
284 |         else:
285 |             pos = self._idx
286 | 
287 |         npks = len(pos)
288 | 
289 |         for i in range(npks):
290 |             thismax = pos[i]
291 |             if i < npks-1:
292 |                 nextmax = pos[i + 1]
293 |                 relb = np.argmin(self.y[thismax:nextmax])
294 |                 nextb = relb + thismax
295 |             else:
296 |                 nextmax = len(self.y)-1
297 |                 nextb = len(self.y)-1
298 | 
299 |             bounds.append([prevb, nextb])
300 |             prevb = nextb
301 | 
302 |         self._bounds = np.array(bounds)
303 | 
304 |     def refine_opt(self, idx, yvec=None, rad=2):
305 |         """use fit to quadratic to locate a fine maximum of
306 |         the peak position and value
307 | 
308 |         Arguments:
309 |             idx: index of the peak to interpolate
310 |         """
311 | 
312 |         pos = self.pos[idx]
313 |         if yvec is not None:
314 |             y = yvec
315 |         else:
316 |             y = self.y
317 | 
318 |         # val = self.val[idx]
319 |         imin = max(1, pos-rad)
320 |         imax = min(pos + rad + 1, len(y))
321 |         sur = y[imin:imax]
322 |         ifit = np.arange(imin-pos, imax-pos)
323 | 
324 |         pp = np.polyfit(ifit, sur, 2)
325 |         lpos = - pp[1]/2.0/pp[0]
326 |         fpos = float(pos) + lpos
327 |         fval = pp[0]*lpos*lpos + pp[1]*lpos + pp[2]
328 | 
329 |         return fpos, fval.tolist()
330 | 
331 |     def refine(self, idx, fun=None, yvec=None):
332 |         """use quadratic interpolation to locate a fine maximum of
333 |         the peak position and value
334 | 
335 |         Arguments:
336 |             idx: index of the peak to interpolate
337 |         """
338 | 
339 |         pos = self._idx[idx]
340 |         if yvec is not None:
341 |             y = yvec
342 |         else:
343 |             y = self.y
344 | 
345 |         if fun:
346 |             from scipy.optimize import broyden1 as opt
347 |             # val = fun(self.val[idx])
348 |             sur = fun(y[pos-1:pos+2])
349 |         else:
350 |             # val = self.val[idx]
351 |             sur = y[pos-1:pos+2]
352 | 
353 |         if sur[1] > sur[0] and sur[1] >= sur[2]:
354 |             c = sur[1]
355 |             b = (sur[2] - sur[0])/2
356 |             a = (sur[2] + sur[0])/2 - c
357 | 
358 |             lpos = - b/2/a
359 |             fpos = float(pos) + lpos
360 |             if fun:
361 |                 ival = a*lpos*lpos + b*lpos + c
362 |                 # print "rpos = %d; rf(val) = %f; f(val) = %f; dpos = %f;"%(pos, sur[1], ival, lpos)
363 |                 fval = opt(lambda x: fun(x)-ival, self.val[idx]/2)
364 |             else:
365 |                 fval = a*lpos*lpos + b*lpos + c
366 |                 # print "rpos = %d; rval = %f; val = %f; dpos = %f; pos = %f"%(pos, sur[1], fval, lpos, fpos)
367 | 
368 |         else:
369 |             fpos = pos
370 |             fval = sur[1]
371 | 
372 |         return np.interp(fpos, np.arange(len(self.x)), self.x), fval.tolist()
373 | 
374 |     def refine_all(self, logarithmic=False, rad=1):
375 |         """use quadratic interpolation to refine all peaks
376 | 
377 |         Arguments:
378 |             idx: index of the peak to interpolate
379 |         """
380 | 
381 |         if logarithmic:
382 |             y = np.log10(self.y)
383 |         else:
384 |             y = self.y
385 | 
386 |         # rpos = self.pos
387 |         # rval = self.val
388 |         self._fine_pos = np.zeros(self._idx.shape)
389 |         self._fine_val = np.zeros(self._idx.shape)
390 | 
391 |         for i in range(len(self._idx)):
392 |             if logarithmic:
393 |                 if rad > 1:
394 |                     fpos, fval = self.refine_opt(i, yvec=y, rad=rad)
395 |                 else:
396 |                     fpos, fval = self.refine(i, yvec=y)
397 |             else:
398 |                 if rad > 1:
399 |                     fpos, fval = self.refine_opt(i, rad=rad)
400 |                 else:
401 |                     fpos, fval = self.refine(i)
402 |             self._fine_pos[i] = fpos
403 |             if logarithmic:
404 |                 self._fine_val[i] = 10**fval
405 |             else:
406 |                 self._fine_val[i] = fval
407 | 
408 |     def calc_individual_area(self, idx, funct=None, max_rad=None):
409 |         lims = self._bounds[idx]
410 |         if funct is None:
411 |             return sum(self.y[lims[0]:lims[-1]])
412 |         else:
413 |             return sum(funct(self.y[lims[0]:lims[-1]]))
414 | 
415 |     def get_areas(self, funct=None, max_rad=None):
416 |         if not hasattr(self, '_bounds'):
417 |             self.find_boundaries()
418 | 
419 |         areas = []
420 |         for idx in range(len(self._idx)):
421 |             areas.append(self.calc_individual_area(idx, funct=funct))
422 | 
423 |         self._areas = np.array(areas)
424 | 
425 |         return self._areas[self._keep]
426 | 
427 |     def get_pos_val(self, rough=False):
428 |         """return a vector with peak position in first column
429 |         and value in second column
430 | 
431 |         Arguments:
432 |             rough: do not return the refined position
433 |         """
434 | 
435 |         rvec = np.array(zip(self.pos, self.val))
436 | 
437 |         return rvec
438 | 
439 |     def to_dict(self):
440 |         """
441 |         Return a list of dictionary with peak characteristics
442 |         """
443 |         ret = []
444 |         for ii, (pos, val) in enumerate(zip(self.pos,self.val)):
445 |             thisd = {'pos': pos,
446 |                      'val': val}
447 |             try:
448 |                 thisd['sal'] = self.prominence[self._keep][ii]
449 |             except AttributeError:
450 |                 pass
451 | 
452 |             try:
453 |                 thisd['l_bound'] = self.bounds[ii,0]
454 |                 thisd['r_bound'] = self.bounds[ii,1]
455 |             except AttributeError:
456 |                 pass
457 | 
458 |             try:
459 |                 thisd['area'] = self.areas[ii]
460 |             except AttributeError:
461 |                 pass
462 | 
463 |             ret.append(thisd)
464 | 
465 |         return ret
466 | 
467 |     def to_data_frame(self):
468 |         """
469 |         Return a pandas dataframe with peak information
470 |         """
471 |         import pandas
472 |         return pandas.DataFrame(self.to_dict())
473 | 
474 |     # backwards compat
475 |     def boundaries(self):
476 |         try:
477 |             self._bounds
478 |         except AttributeError:
479 |             self.find_boundaries()
480 |         b = np.array(self._bounds)
481 |         try:
482 |             return self.x[b[self._keep, :]]
483 |         except IndexError:
484 |             return np.array([])
485 | 
486 |     def get_pos(self):
487 |         return np.array(self.pos)
488 | 
489 | 
490 | 


--------------------------------------------------------------------------------
/pypevoc/Periodicity.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  Periodicity.py
  5 | #
  6 | #  Utilities for frequency and periodicity estimation
  7 | #  * Fundamental frequency estimator
  8 | #  * Tonal character
  9 | #
 10 | #
 11 | #  Copyright 2014 Andre Almeida <goios@AndreUbuntu>
 12 | #
 13 | #  This program is free software; you can redistribute it and/or modify
 14 | #  it under the terms of the GNU General Public License as published by
 15 | #  the Free Software Foundation; either version 2 of the License, or
 16 | #  (at your option) any later version.
 17 | #
 18 | #  This program is distributed in the hope that it will be useful,
 19 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 21 | #  GNU General Public License for more details.
 22 | #
 23 | #  You should have received a copy of the GNU General Public License
 24 | #  along with this program; if not, write to the Free Software
 25 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 26 | #  MA 02110-1301, USA.
 27 | #
 28 | #
 29 | 
 30 | import sys
 31 | import numpy as np
 32 | import pylab as pl
 33 | from matplotlib.colors import hsv_to_rgb
 34 | from .PeakFinder import PeakFinder as pf
 35 | from .ProgressDisplay import Progress
 36 | #from AMDF import amdf
 37 | 
 38 | def amdf(x, mindelay=0, maxdelay=None):
 39 |     nx = len(x)
 40 |     if maxdelay is None:
 41 |         maxdelay = nx
 42 | 
 43 |     y = np.zeros(nx)
 44 |     for i in range(mindelay, maxdelay):
 45 |         n = nx - i
 46 |         y[i] = (np.abs(x[0:nx-i]-x[i:])).sum()/n
 47 | 
 48 |     return y
 49 | 
 50 | # I will try to update this object so that data required for
 51 | # the initialisation of every instance stays in the caller.
 52 | # Thee caller passes itself as argument to the callee
 53 | 
 54 | 
 55 | class Periodicity(object):
 56 |     """Single period object, including multiple periodicity candidates
 57 |     """
 58 |     def __init__(self, parent, index=0):
 59 |         """Calculate the periodicity estimation for a window
 60 |            of a time signal
 61 | 
 62 |         Arguments:
 63 |         parent: parent object contaigning entire signal
 64 |         idx:    index of local peridoicity calulation
 65 |         """
 66 | 
 67 |         self.parent = parent
 68 |         self.nwind = parent.nwind
 69 |         self.wnorm = parent.wnorm
 70 |         self.wind = parent.wind
 71 |         self.sr = parent.sr
 72 | 
 73 |         self.mindelay = parent.mindelay
 74 |         if parent.maxdelay is None:
 75 |             self.maxdelay = int(round(self.nwind/2))
 76 |         else:
 77 |             self.maxdelay = int(parent.maxdelay)
 78 | 
 79 |         self.method = parent.method
 80 |         self.threshold = parent.threshold
 81 |         self.vthresh = parent.vthresh
 82 |         self.ncand = parent.ncand
 83 |         self.fftthresh = parent.fftthresh
 84 | 
 85 |         # Arrays with probable candidate periodicity and corresponding
 86 |         # strength
 87 |         self.cand_period = np.array([])
 88 |         self.cand_strength = np.array([])
 89 |         # Index of preferred candidate
 90 |         self.preferred = 0
 91 | 
 92 |         self.cand_method = parent.cand_method
 93 |         self.index=index
 94 | 
 95 |         self._calc()
 96 | 
 97 |     def _calc(self):
 98 |         """Calculate the periodicity candidates
 99 | 
100 |         Arguments:
101 |         xw: the windowed portion of time signal where periodicity
102 |             is to be estimated
103 |         """
104 |         nwleft = int(np.floor(self.nwind/2))
105 |         nwright = int(self.nwind - nwleft)
106 |         idx = int(np.round(self.index))
107 |         ist = idx - nwleft
108 |         iend = idx + nwright
109 | 
110 |         xs = self.parent.x[ist:iend]
111 |         xw = (xs-np.mean(xs)) * self.wind
112 | 
113 |         nwind = self.nwind
114 | 
115 |         # unvoiced
116 |         pkpos = np.array([])
117 |         pkstr = np.array([])
118 | 
119 |         peaks = None
120 | 
121 |         try:
122 |             if self.method is 'amdf':
123 |                 xc = amdf(xw)
124 | 
125 |                 maxxc = max(xc[nwind-1-self.maxdelay:nwind-1+self.maxdelay])
126 |                 xcn = (maxxc-xc)/maxxc
127 |                 imin = self.mindelay
128 |                 xcpos = xcn[imin:self.maxdelay]
129 |                 xcth = self.threshold
130 | 
131 |             elif self.method is 'xcorr':
132 | 
133 |                 xc = np.correlate(xw, xw, "full") / self.wnorm
134 | 
135 |                 negvals = np.flatnonzero(xc[nwind-1:] < 0)
136 |                 if len(negvals) > 0:
137 |                     firstneg = np.min(negvals)
138 |                 else:
139 |                     firstneg = self.mindelay
140 |                 imin = max(firstneg, self.mindelay)
141 |                 xcn = xc/max(xc[nwind-1-self.maxdelay:nwind-1+self.maxdelay])
142 |                 xcpos = xcn[nwind-1+imin:nwind-1+self.maxdelay]
143 | 
144 |                 xcth = self.threshold
145 | 
146 |                 # print "In xcorr. max %f, thr %f"%(max(xcpos),xcth)
147 | 
148 |             if len(xcpos) > 0 and max(xcpos) > self.vthresh:
149 |                 # this is equivlent to finding minima
150 |                 # below the absolute minimum * threshold
151 |                 peaks = pf(xcpos, minval=xcth,
152 |                                       npeaks=self.ncand)
153 | 
154 |                 peaks.refine_all()
155 |                 # peaks.plot()
156 | 
157 |                 pkpos = peaks.pos + imin
158 |                 pkstr = peaks.val
159 | 
160 |                 # keep = pkpos<self.maxdelay
161 |                 # pkpos = pkpos[keep]
162 |                 # pkstr = pkstr[keep]
163 | 
164 |         except IndexError as e:
165 |             print(e)
166 | 
167 |         if len(pkpos) > 0:
168 |             self.cand_period = pkpos
169 |             self.cand_strength = pkstr
170 | 
171 |             if self.cand_method == 'fft':
172 |                 xf = np.fft.fft(xw)
173 |                 fftpeaks = pf(np.abs(xf[0:int(self.nwind/2)]),
174 |                                          npeaks=self.ncand)
175 |                 # periodicity corresponding to fft peaks:
176 |                 fpos = fftpeaks.pos
177 |                 fval = fftpeaks.val
178 |                 fposkeep = fpos[fval > np.max(fval*self.fftthresh)]
179 |                 fftpkpos = self.nwind / fposkeep
180 | 
181 |                 # minimum distance between correlation candidates
182 |                 # and fft peaks
183 |                 perdist = [np.min(np.abs(fftpkpos-thispos))
184 |                            for thispos in pkpos]
185 |                 try:
186 |                     self.preferred = np.argmin(perdist)
187 |                 except ValueError:
188 |                     self.preferred = 0
189 |                 # print (fftpkpos)
190 |                 # print (pkpos)
191 |             elif self.cand_method == 'min':
192 |                 self.preferred = np.argmin(pkpos)
193 |             elif self.cand_method == 'similar':
194 |                 self.preferred = np.argmax(pkstr)
195 |         else:
196 |             self.preferred = 0
197 |             # self.cand_period = np.array([np.nan])
198 |             # self.cand_strength = np.array([np.nan])
199 | 
200 |         return xcn
201 | 
202 |     def plot_similarity(self, ax=None):
203 | 
204 |         xc = self._calc()
205 | 
206 |         if not ax:
207 |             fig, ax = pl.subplots(1)
208 |         ln = ax.plot(np.arange(len(xc))-self.nwind+1, xc)
209 |         ax.hold('on')
210 |         ax.plot(self.cand_period, self.cand_strength, 'o',
211 |                 color=ln[0].get_color())
212 | 
213 |     def set_time_properties(self, index):
214 |         """Set the sample and time value of this periodicity estimation
215 | 
216 |         Arguments:
217 |         index: sample index
218 |         """
219 | 
220 |         self.index = float(index)
221 |         self.time = float(index)/self.sr
222 | 
223 |     def sort_strength(self):
224 |         """Sort candidates by periodicity strength
225 | 
226 |         Arguments: (None)
227 |         """
228 | 
229 |         idx = np.argsort(self.cand_strength)[::-1]
230 |         self.cand_period = self.cand_period[idx]
231 |         self.cand_strength = self.cand_strength[idx]
232 |         pref = np.flatnonzero(idx == self.preferred)
233 |         if len(pref) > 0:
234 |             self.preferred = pref[0]
235 |         else:
236 |             self.preferred = []
237 | 
238 |     def get_preferred_period(self):
239 |         if len(self.cand_period) > 0:
240 |             return self.cand_period[self.preferred]
241 |         else:
242 |             return 0
243 | 
244 |     def get_preferred_strength(self):
245 |         if len(self.cand_period) > 0:
246 |             return self.cand_strength[self.preferred]
247 |         else:
248 |             return 0
249 | 
250 | 
251 | class PeriodSeries(object):
252 |     def __init__(self, x, sr=48000, window=None, hop=None,
253 |                  threshold = .8, vthresh = .2,
254 |                  fmin=50, fmax=5000, 
255 |                  ncand=8, method='xcorr',
256 |                  cand_method='fft', fftthresh=0.1):
257 |         """Calculate the average mean difference of x around index
258 | 
259 |         Arguments:
260 |         x:         signal
261 |         sr:        sample rate
262 |         window:    window around index used for difference calculations
263 |         threshold: ratio to lowest minima to keep as peak
264 |         vthresh:   voicing threshold
265 |         fmin:      value of minimum possible frequency
266 |         fmax:      value of maximum possible frequency
267 |         ncand:     maximum number of period candidates
268 |         method:    type of correlation correlation / matching to use
269 |                    'xcorr' - correlation
270 |                    'amdf'  - average mean difference function
271 |                    'zc'    - zero crossing
272 |         cand_method: method for candidate selection:
273 |                      'fft'    - based on an fft of the window
274 |                      'min'    - minimum periodicity wins
275 |                      'similar'- most similar wins
276 |         fftthresh: threshold for fft peak selection (default=0.1)
277 |         """
278 | 
279 |         self.method = method
280 |         self.x = x.astype(float)
281 |         self.sr = sr
282 | 
283 |         self.nx = len(x)
284 | 
285 |         if fmin is None:
286 |             maxdelay = None
287 |         else:
288 |             maxdelay = int(sr/fmin)
289 | 
290 |         if fmax is None:
291 |             mindelay = 2
292 |         else:
293 |             mindelay = int(sr/fmax)
294 | 
295 |         if window is None:
296 |             if maxdelay is None:
297 |                 window = self.nx
298 |             else:
299 |                 window = 3*maxdelay
300 | 
301 |         if not np.iterable(window):
302 |             window = np.ones(window)
303 | 
304 |         self.wind = window
305 |         self._calc_window_norm()
306 | 
307 |         self.nwind = len(window)
308 |         # self.windad = amdf(window)
309 | 
310 |         self.mindelay = mindelay
311 |         if maxdelay is None:
312 |             self.maxdelay = int(round(self.nwind/2))
313 |         else:
314 |             self.maxdelay = maxdelay
315 | 
316 |         if hop is None:
317 |             hop = self.nwind//2
318 | 
319 |         self.hop = hop
320 | 
321 |         self.method = method
322 |         self.threshold = threshold
323 |         self.vthresh = vthresh
324 |         self.ncand = ncand
325 |         self.cand_method = cand_method
326 |         self.fftthresh = fftthresh
327 | 
328 |         # data storage
329 |         self.periods = []
330 | 
331 |         # progress indicator
332 |         self.progress = Progress(end=self.nx)
333 | 
334 |     def _calc_window_norm(self):
335 |         """Calculate the normalisation function for window
336 | 
337 |         Arguments: (None)
338 |         """
339 | 
340 |         if self.method is 'xcorr':
341 |             w = self.wind
342 |             self.wnorm = np.correlate(w, w, "full")
343 |         else:
344 |             self.wnorm = 1.
345 | 
346 |     def per_at_index(self, index):
347 |         """Calculate the average mean difference of x around index
348 | 
349 |         Arguments:
350 | 
351 |         index:  index of x for current amdf
352 |         threshold: ratio to lowest minima to keep as peak
353 |         """
354 | 
355 |         pp = Periodicity(self, index)
356 |         pp.set_time_properties(index)
357 |         pp.sort_strength()
358 | 
359 |         # self.periods.append(pp)
360 |         return pp
361 | 
362 |     def calc(self, hop=None, threshold=None):
363 |         """Estimate local periodicity in the full time series
364 | 
365 |         Arguments:
366 | 
367 |         hop:       samples bewteen estimations
368 |         threshold: peak threshold for maintaining or rejecting
369 |                    candidates
370 |         """
371 | 
372 |         self.periods = []
373 |         if hop is None:
374 |             hop = self.hop
375 | 
376 |         if threshold is not None:
377 |             oldthresh = self.threshold
378 |             self.threshold = threshold
379 | 
380 |         idxmax = self.nx - self.nwind
381 |         idxvec = np.arange(self.nwind, idxmax, hop)
382 | 
383 |         sys.stderr.write("Calculating local periodicity... \n")
384 | 
385 |         for idx in idxvec:
386 |             pp = self.per_at_index(idx)
387 |             sys.stderr.write("\r{:6.2f}%%".format(idx*100/idxmax))
388 |             sys.stderr.flush()
389 |             self.periods.append(pp)
390 | 
391 |         sys.stderr.write("\ndone\n"  )
392 | 
393 |         if threshold is not None:
394 |             self.threshold = oldthresh
395 | 
396 |     def calcPeriodByPeriod(self, threshold=None, 
397 |                            tf=None, f=None):
398 |         """Estimate local periodicity in the full time series
399 | 
400 |         Arguments:
401 | 
402 |         hop:       samples bewteen estimations
403 |         threshold: peak threshold for maintaining or rejecting
404 |                    candidates
405 |         """
406 | 
407 |         self.periods = []
408 |         if threshold is not None:
409 |             oldthresh = self.threshold
410 |             self.threshold = threshold
411 | 
412 |         # Max index for starting window
413 |         idxmax = self.nx - self.nwind
414 | 
415 |         sys.stdout.write("Calculating local periodicity... ")
416 |         idx = self.nwind
417 |         while idx < idxmax:
418 |             pp = self.per_at_index(idx)
419 |             oldidx = idx
420 |             if f is None:
421 |                 di = pp.get_preferred_period()
422 |             else: 
423 |                 thisf = np.interp(pp.time, tf, f)
424 |                 if len(pp.cand_period)>0 and thisf>0:
425 |                     imin = np.argmin(np.abs(self.sr/thisf-pp.cand_period))
426 |                     pp.preferred = imin
427 |                     di = pp.cand_period[imin]
428 |                 else:
429 |                     di=0
430 |             if di:
431 |                 idx += di
432 |                 self.periods.append(pp)
433 |             else:
434 |                 idx += self.mindelay 
435 | 
436 |             # sys.stdout.write("\b"*15+"%6d / %6d" % (idx, self.nx))
437 |             # sys.stdout.flush()
438 |             self.progress.update(idx)
439 | 
440 |         self.progress.update(self.nx)
441 |         sys.stdout.write("\ndone\n") 
442 | 
443 |         if threshold is not None:
444 |             self.threshold = oldthresh
445 | 
446 |     def plot_candidates(self):
447 |         """Plot a representation of candidate periodicity
448 | 
449 |         Size gives the periodicity strength, 
450 |         color the order of preference
451 |         """
452 | 
453 |         fig, ax = pl.subplots(2, sharex=True)
454 | 
455 |         hues = np.arange(self.ncand)/float(self.ncand)
456 |         hsv = np.swapaxes(np.atleast_3d([[hues, np.ones(len(hues)),
457 |                                           np.ones(len(hues))]]), 1, 2)
458 |         cols = hsv_to_rgb(hsv).squeeze()
459 | 
460 |         for per in self.periods:
461 |             nc = len(per.cand_period)
462 | 
463 |             ax[0].scatter(per.time*np.ones(nc), per.cand_period,
464 |                           s=per.cand_strength*100,
465 |                           c=cols[0:nc], alpha=.5)
466 | 
467 |         ax[0].plot(*zip(*[[per.time, float(per.get_preferred_period())]
468 |                         for per in self.periods]), color='k')
469 | 
470 |         ax[1].plot(self.get_times(), self.get_strength())
471 | 
472 |     def get_f0(self, thresh=0.0):
473 |         """Get f0 as a function of time
474 | 
475 |         thresh: threshod for period strength
476 |         """
477 | 
478 |         f0 = np.zeros(len(self.periods))
479 |         for ii, per in enumerate(self.periods):
480 |             if per.get_preferred_strength() > thresh:
481 |                 f0[ii] = self.sr/per.get_preferred_period()
482 |             else:
483 |                 f0[ii] = np.nan
484 |         return f0
485 | 
486 |     def get_times(self):
487 |         """Get f0 as a function of time
488 |         """
489 | 
490 |         f0 = np.zeros(len(self.periods))
491 |         for ii, per in enumerate(self.periods):
492 |             f0[ii] = per.time
493 |         return f0
494 | 
495 |     def get_strength(self):
496 |         """Get f0 strength as a function of time
497 |         """
498 | 
499 |         ss = np.zeros(len(self.periods))
500 |         for ii, per in enumerate(self.periods):
501 |             ss[ii] = per.get_preferred_strength()
502 |         return ss
503 | 
504 | 
505 | class PeriodTimeSeries(PeriodSeries):
506 |     pass
507 | 
508 | 
509 | class PeriodByPeriod(PeriodSeries):
510 |     def __init__(self):
511 |         super(PeriodByPeriod, self).__init__()
512 | 
513 |     def import_period_series(self, pts):
514 |         """Imports a PeriodTimeSeries object
515 | 
516 |         :pts: PeriodTimeSeries object with
517 |               time and frequency information
518 |         :returns: None
519 | 
520 |         """
521 |         self.f = pts.f
522 |         self.t = pts.t
523 |         self.sr = pts.sr
524 | 
525 | def period_marks_amdf(x, sr=1.0, t0=0.0, tf=[], f=[], window_size=1024,
526 |                       min_per=0.001):
527 |     """add period marks information to file,
528 |     based on sample per sample difference between adjacent periods
529 | 
530 |     :t0: first mark position
531 |     :window_size: window to use for comparison between periods
532 |     :returns: TODO
533 | 
534 |     """
535 |     marks_t = [t0]
536 |     next_t = t0
537 |     this_f0 = np.interp(marks_t[-1], tf, f)
538 |     if np.isnan(this_f0):
539 |         this_f0 = np.nanmean(f)
540 |     period_samp = int(sr/this_f0)
541 |     while next_t*sr < len(x) - period_samp - window_size:
542 |         if not np.isnan(this_f0):
543 |             period_samp = int(sr/this_f0)
544 |             source_idx_st = int(next_t*sr)
545 |             target_idx_st = source_idx_st + period_samp
546 |             source_idx_end = source_idx_st + window_size
547 |             target_idx_end = target_idx_st + window_size
548 |             x_source = x[source_idx_st:source_idx_end]
549 |             x_target = x[target_idx_st:target_idx_end]
550 |             xc = amdf(x_source, x_target)
551 |             # find max of xc near 0 lag
552 |             # (at position window_size-1)
553 |             peaks = pf(-xc)
554 |             idx_min = np.argmin(np.abs(peaks.pos-window_size+1))
555 |             delay_samp, _ = peaks.refine(idx_min)
556 |             # delay_samp = peaks.get_pos()[idx_min]
557 |             delay_samp -= window_size-1
558 |             # print delay_samp
559 |             delay_t = (-delay_samp + period_samp)/sr
560 |             if delay_t > min_per:
561 |                 marks_t.append(next_t+(window_size+period_samp/2)/sr)
562 |                 next_t += delay_t
563 |             else:
564 |                 next_t += 1/this_f0
565 | 
566 |         else:
567 |             next_t = next_t + delay_t
568 | 
569 |         this_f0 = np.interp(next_t, tf, f)
570 |     return np.array(marks_t)
571 | 
572 | 
573 | def period_marks_corr(x, sr=1.0, t0=0.0, tf=[], f=[], window_size=1024,
574 |                       min_per=0.001):
575 |     """add period marks information to file,
576 |     based on correlation between adjacent periods
577 | 
578 |     :t0: first mark position
579 |     :window_size: window to use for comparison between periods
580 |     :returns: TODO
581 | 
582 |     """
583 |     marks_t = [t0]
584 |     next_t = t0
585 |     this_f0 = np.interp(marks_t[-1], tf, f)
586 |     if np.isnan(this_f0):
587 |         this_f0 = np.nanmean(f)
588 |     period_samp = int(sr/this_f0)
589 |     while next_t*sr < len(x) - period_samp - window_size:
590 |         if not np.isnan(this_f0):
591 |             period_samp = int(sr/this_f0)
592 |             source_idx_st = int(next_t*sr)
593 |             target_idx_st = source_idx_st + period_samp
594 |             source_idx_end = source_idx_st + window_size
595 |             target_idx_end = target_idx_st + window_size
596 |             x_source = x[source_idx_st:source_idx_end]
597 |             x_target = x[target_idx_st:target_idx_end]
598 |             xc = np.correlate(x_source, x_target, "full")
599 |             # find max of xc near 0 lag
600 |             # (at position window_size-1)
601 |             peaks = pf(xc)
602 |             idx_min = np.argmin(np.abs(peaks.pos-window_size+1))
603 |             delay_samp, _ = peaks.refine(idx_min)
604 |             # delay_samp = peaks.get_pos()[idx_min]
605 |             delay_samp -= window_size-1
606 |             # print delay_samp
607 |             delay_t = (-delay_samp + period_samp)/sr
608 |             if delay_t > min_per:
609 |                 marks_t.append(next_t+(window_size+period_samp/2)/sr)
610 |                 next_t += delay_t
611 |             else:
612 |                 next_t += 1/this_f0
613 | 
614 |         else:
615 |             next_t = next_t + delay_t
616 | 
617 |         this_f0 = np.interp(next_t, tf, f)
618 |     return np.array(marks_t)
619 | 
620 | 
621 | def period_marks_peak(x, sr=1.0, tf=None, f=[], fit_points=3):
622 |     """calculate period marks for x based on peak
623 |     positions of the signal
624 | 
625 |     :x: signal
626 |     :sr: sample rate (defalut 1 sample/sec)
627 |     :tf: time at which frequency values are calulated
628 |          (defaults to same samples as x)
629 |     :f: frequency values
630 |     :fit_points: number of points to use for peak fitting
631 |     :returns: time markers
632 |     """
633 | 
634 |     # derivative of x
635 |     # dx = np.diff(x)
636 | 
637 |     # make sure the rate is float
638 |     sr = float(sr)
639 | 
640 |     # build time vector for signal
641 |     tx = np.arange(len(x))/(sr)
642 |     # interpolate frequency values
643 |     if tf is None:
644 |         try:
645 |             assert(len(f) == len(x))
646 |         except(TypeError):
647 |             f = f*np.ones(len(x))
648 |     else:
649 |         # f_orig = f
650 |         f = np.interp(tx, tf, f)
651 | 
652 |     real_mask = np.isfinite(f)
653 |     idx_0 = np.nonzero(real_mask)[0][0]
654 |     period_samp = int(sr/f[idx_0])
655 | 
656 |     marks = []
657 |     maxval = []
658 | 
659 |     # find the first minimum
660 |     idx_start = idx_0 + np.argmin(x[idx_0:idx_0+period_samp])
661 |     while idx_start < len(x):
662 |         idx_end = np.min([idx_start + period_samp, len(x)])
663 |         idx_max = np.argmax(x[idx_start:idx_end]) + idx_start
664 | 
665 |         if fit_points < 3:
666 |             t_max = idx_max/sr
667 |         # elif fit_points == 3:
668 |         #    # parabolic interpolation
669 |         else:
670 |             # parabolic fit
671 |             rel_idx_start = int(np.max([0,-fit_points/2]))
672 |             rel_idx_end = np.min([rel_idx_start + fit_points,
673 |                                  len(x) - idx_max - 1])
674 |             # dx_fit = dx[idx_max+rel_idx_start:idx_max+rel_idx_end]
675 |             # dx_abcissa = np.arange(rel_idx_start, rel_idx_end)+.5
676 |             # fit_poly = np.polyfit(dx_abcissa, dx_fit, 1)
677 |             # rel_refined_max = -fit_poly[1]/fit_poly[0]
678 |             x_fit = x[idx_max+rel_idx_start:idx_max+rel_idx_end+1]
679 |             x_abcissa = np.arange(rel_idx_start, rel_idx_end+1)
680 |             try:
681 |                 fit_poly = np.polyfit(x_abcissa, x_fit, 2)
682 |                 rel_refined_max = -fit_poly[1]/fit_poly[0]/2
683 |             except (ValueError, np.RankWarning):
684 |                 rel_refined_max = fit_points+1
685 |             if np.abs(rel_refined_max) <= fit_points:
686 |                 t_max = (idx_max + rel_refined_max)/sr
687 |                 v_max = np.polyval(fit_poly, rel_refined_max)
688 |             else:
689 |                 t_max = (idx_max)/sr
690 |                 v_max = x[idx_max]
691 | 
692 |         # prepare for next iteration 
693 |         this_f0 = f[idx_max]
694 |         if np.isfinite(this_f0):
695 |             period_samp = int(sr/this_f0)
696 |             marks.append(t_max)
697 |             maxval.append(v_max)
698 | 
699 |         # otherwise keep the same period
700 |         # next starting point
701 |         min_search_max = np.min([idx_max+period_samp, len(x)])
702 |         adv = np.argmin(x[idx_max:min_search_max])
703 |         if adv > 0:
704 |             idx_start = idx_max + adv
705 |         else:
706 |             idx_start = idx_max + 1
707 |         
708 |     return np.array(marks)[:-1], np.array(maxval)[:-1]
709 | 
710 | 


--------------------------------------------------------------------------------
/pypevoc/ProgressDisplay.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  ProgressDisplay.py
  5 | #  
  6 | #  An IPython-friendly progress bar
  7 | #
  8 | #
  9 | #  Copyright 2014 Andre Almeida <goios@AndreUbuntu>
 10 | #  
 11 | #  This program is free software; you can redistribute it and/or modify
 12 | #  it under the terms of the GNU General Public License as published by
 13 | #  the Free Software Foundation; either version 2 of the License, or
 14 | #  (at your option) any later version.
 15 | #  
 16 | #  This program is distributed in the hope that it will be useful,
 17 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19 | #  GNU General Public License for more details.
 20 | #  
 21 | #  You should have received a copy of the GNU General Public License
 22 | #  along with this program; if not, write to the Free Software
 23 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 24 | #  MA 02110-1301, USA.
 25 | #  
 26 | #  
 27 | 
 28 | import sys
 29 | 
 30 | try:
 31 |     from IPython.core.display import clear_output
 32 |     have_ipython = True
 33 | except ImportError:
 34 |     have_ipython = False
 35 | 
 36 | try:
 37 |     from IPython.display import display
 38 |     from ipywidgets import IntProgress, HTML, VBox
 39 |     have_ipywidgets=True
 40 | except ImportError:
 41 |     have_ipywidgets=False
 42 | 
 43 | def in_ipynb():
 44 |     try:
 45 |         cfg = get_ipython().config
 46 |         try:
 47 |             ipk = cfg['IPKernelApp']
 48 |             if len(ipk)==0:
 49 |                 return False
 50 |         except KeyError:
 51 |             return False
 52 |         return True
 53 |     except NameError:
 54 |         return False
 55 | 
 56 | 
 57 | 
 58 | class Progress(object):
 59 |     def __init__(self, end=1.):
 60 |         """
 61 |         Creates a progress bar display
 62 |         """
 63 |         self.current_val = 0.0
 64 |         self.max_val = end
 65 |         if in_ipynb():
 66 |             if have_ipywidgets:
 67 |                 self.label = HTML()
 68 |                 self.progress = IntProgress(min=0,max=100,value=1)
 69 |                 self.progress.bar_style = 'info'
 70 |                 self.progressHTML = VBox([self.label, self.progress])
 71 |                 display(self.progressHTML)
 72 |                 self.redraw = self._redraw_ipywidgets
 73 |                 self.cleanup = self._cleanup_ipywidgets
 74 |             else:
 75 |                 self.redraw = self._redraw_ipython
 76 |                 self.cleanup = self._cleanup_ipython
 77 |         else:
 78 |             self.redraw = self._redraw_console
 79 |             self.cleanup = self._cleanup_console
 80 | 
 81 | 
 82 |     def update(self, val):
 83 |         """
 84 |         Update the progress bar value
 85 |         """
 86 |         self.current_val = val
 87 |         self.redraw()
 88 | 
 89 |     def _redraw_ipywidgets(self):
 90 |         self.label.value = str(self)
 91 |         self.progress.value = self.current_val/self.max_val*100
 92 | 
 93 |     def _redraw_ipython(self):
 94 |         clear_output()
 95 |         print(str(self))
 96 |         sys.stdout.flush()
 97 | 
 98 |     def _redraw_console(self):
 99 |         print('\r'+str(self),end=" ")
100 |         sys.stdout.flush()
101 |         
102 |     def __str__(self):
103 |         pct = self.current_val/self.max_val*100
104 |         return '%d / %d (%.2f%%)'%(self.current_val,self.max_val,pct)
105 | 
106 |     def _cleanup_console(self):
107 |         print('\n')
108 | 
109 |     def _cleanup_ipython(self):
110 |         pass
111 | 
112 |     def _cleanup_ipywidgets(self):
113 |         pass
114 | 
115 |     def finish(self):
116 |         self.update(self.max_val)
117 |         self.cleanup()
118 | 
119 | 
120 |         
121 | 


--------------------------------------------------------------------------------
/pypevoc/SoundUtils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | 
  4 | 
  5 | def FftFilter(x, bands, gains):
  6 |     '''
  7 |     Filter signal x using FFT and IFFT
  8 |     * x input signal
  9 |     * bands: list of start and stop frequencies of each band
 10 |     * gains: start and stop gains in each band
 11 | 
 12 |     Example:
 13 | 
 14 |     y = FFTfilter(x, [(0,0.1),(0.1,1.0)], [(1.,1.),(0.,0.)])
 15 | 
 16 |     filters signal x low pass at 0.1 times the nyquist rate
 17 |       (sampling rate / 2)
 18 |     '''
 19 | 
 20 |     xf = np.fft.fft(x)
 21 |     nyq = len(xf)/2
 22 | 
 23 |     ffilter = np.zeros(len(xf))
 24 |     for bb, gg in zip(bands, gains):
 25 |         fmin = int(bb[0]*nyq)
 26 |         fmax = int(bb[1]*nyq)
 27 |         ffilter[fmin:fmax] = np.linspace(gg[0], gg[1],
 28 |                                          fmax-fmin)
 29 |         if fmin > 0:
 30 |             ffilter[-fmax+1:-fmin+1] = np.linspace(gg[1], gg[0],
 31 |                                                    fmax-fmin)
 32 |         else:
 33 |             ffilter[-fmax+1:] = np.linspace(gg[1], gg[0],
 34 |                                             fmax-fmin-1)
 35 |         print('{}-{} : gains [{}, {}]'.format(fmin, fmax,
 36 |                                               gg[0], gg[1]))
 37 | 
 38 |     xf_filt = xf*ffilter
 39 |     return np.fft.ifft(xf_filt)
 40 | 
 41 | 
 42 | def FuncWind(func, x, sr=1, nwind=1024, nhop=512, power=1,
 43 |              windfunc=np.blackman):
 44 |     '''
 45 |     Applies a function window by window to a time series
 46 |     '''
 47 | 
 48 |     nsam = len(x)
 49 |     ist = 0
 50 |     iend = ist+nwind
 51 | 
 52 |     t = []
 53 |     ret = []
 54 | 
 55 |     wind = windfunc(nwind)
 56 |     if power > 0:
 57 |         wsumpow = sum(wind**power)
 58 |     else:
 59 |         wsumpow = 1.
 60 | 
 61 |     while (iend < nsam):
 62 |         thisx = x[ist:iend]
 63 |         xw = thisx*wind
 64 | 
 65 |         ret.append(func(xw)/wsumpow)
 66 |         t.append(float(ist+iend)/2.0/float(sr))
 67 | 
 68 |         ist = ist+nhop
 69 |         iend = ist+nwind
 70 | 
 71 |     return np.array(ret), np.array(t)
 72 | 
 73 | 
 74 | def RMSWind(x, sr=1, nwind=1024, nhop=512, windfunc=np.blackman):
 75 |     '''
 76 |     Calculates the RMS amplitude amplitude of x, in frames of
 77 |     length nwind, and in steps of nhop. windfunc is used as
 78 |     windowing function.
 79 | 
 80 |     nwind should be at least 3 periods if the signal is periodic.
 81 |     '''
 82 | 
 83 |     nsam = len(x)
 84 |     ist = 0
 85 |     iend = ist+nwind
 86 | 
 87 |     t = []
 88 |     ret = []
 89 | 
 90 |     wind = windfunc(nwind)
 91 |     wsum2 = np.sum(wind**2)
 92 | 
 93 |     while (iend < nsam):
 94 |         thisx = x[ist:iend]
 95 |         xw = thisx*wind
 96 | 
 97 |         ret.append(np.sum(xw*xw/wsum2))
 98 |         t.append(float(ist+iend)/2.0/float(sr))
 99 | 
100 |         ist = ist+nhop
101 |         iend = ist+nwind
102 | 
103 |     return np.sqrt(np.array(ret)), np.array(t)
104 | 
105 | 
106 | def Heterodyn(x, f, sr=1, nwind=1024, nhop=512,
107 |               windfunc=np.blackman):
108 |     '''
109 |     Calculates the amplitude near frequency f in x
110 | 
111 |     nwind should be at least 3 periods if the signal is periodic.
112 |     '''
113 |     sinsig = np.exp(2j*np.pi*np.arange(len(x))*f/float(sr))
114 |     hamp, t = FuncWind(np.sum, x*sinsig, power=1, sr=sr,
115 |                        nwind=nwind, nhop=nhop,
116 |                        windfunc=windfunc)
117 |     return np.array(hamp)*2, np.array(t)
118 | 
119 | 
120 | def HeterodynWithF0Track(x, tf0, f0, sr=1,
121 |                          nwind=1024, nhop=512,
122 |                          windfunc=np.blackman):
123 |     '''
124 |     Calculates the amplitude near frequency f0 in x
125 |     (f0 is time-varying, values given at tf0
126 | 
127 |     nwind should be at least 3 periods if the signal
128 |     is periodic.
129 |     '''
130 |     valid_idx = np.logical_not(np.isnan(f0))
131 |     tx = np.arange(len(x))/float(sr)
132 |     f0s = np.interp(tx, tf0[valid_idx], f0[valid_idx])
133 |     phs = np.cumsum(2*np.pi*f0s/sr)
134 |     sinsig = np.exp(1j*phs)
135 | 
136 |     hamp, t = FuncWind(np.sum, x*sinsig, power=1, sr=sr,
137 |                        nwind=nwind, nhop=nhop,
138 |                        windfunc=windfunc)
139 |     return np.array(hamp)*2, np.array(t)
140 | 
141 | 
142 | def SpecCentWind(x, sr=1, nwind=1024, nhop=512, windfunc=np.blackman):
143 |     '''
144 |     Calculates the SpectralCentroid of x, in frames of
145 |     length nwind, and in steps of nhop. windfunc is used as
146 |     windowing function
147 | 
148 |     nwind should be at least 3 periods if the signal is periodic.
149 |     '''
150 |     ff = np.arange(nwind/2)/float(nwind)*sr
151 | 
152 |     def SCvec(xw):
153 |         xf = np.fft.fft(xw)
154 |         xf2 = xf[:nwind/2]
155 |         return sum(np.abs(xf2)*ff)/sum(np.abs(xf2))
156 | 
157 |     amp, t = FuncWind(SCvec, x, power=0, sr=sr,
158 |                       nwind=nwind, nhop=nhop)
159 | 
160 |     return np.array(amp), np.array(t)
161 | 
162 | 
163 | def AvgWind(x, sr=1, nwind=1024, nhop=512,
164 |             windfunc=np.blackman):
165 |     '''
166 |     Calculates the RMS amplitude amplitude of x, in frames of
167 |     length nwind, and in steps of nhop. windfunc is used as
168 |     windowing function.
169 | 
170 |     nwind should be at least 3 periods if the signal is periodic.
171 |     '''
172 | 
173 |     nsam = len(x)
174 |     ist = 0
175 |     iend = ist+nwind
176 | 
177 |     t = []
178 |     amp = []
179 | 
180 |     wind = windfunc(nwind)
181 |     wsum = sum(wind)
182 | 
183 |     while (iend < nsam):
184 |         thisx = x[ist:iend]
185 |         xw = thisx*wind
186 | 
187 |         amp.append(sum(xw)/wsum)
188 |         t.append(float(ist+iend)/2.0/float(sr))
189 | 
190 |         ist = ist+nhop
191 |         iend = ist+nwind
192 | 
193 |     return np.array(amp), np.array(t)
194 | 
195 | 
196 | def SpecFlux(x, sr=1, nwind=1024, nhop=512, minf=0,
197 |              maxf=np.inf, windfunc=np.blackman):
198 |     '''
199 |     Calculates the spectral flux in sunud
200 |     '''
201 | 
202 |     nsam = len(x)
203 |     # first window
204 |     ist = 0
205 |     iend = ist+nwind
206 | 
207 |     t = []
208 |     res = []
209 | 
210 |     wind = windfunc(nwind)
211 |     minbin = int(minf/sr*nwind)
212 |     maxbinf = (float(maxf)/sr*nwind)
213 |     if maxbinf > nwind:
214 |         maxbin = nwind
215 |     else:
216 |         maxbin = int(maxbinf)
217 | 
218 |     while (iend < nsam-nhop):
219 |         thisx = x[ist:iend]
220 |         nextx = x[ist+nhop:iend+nhop]
221 | 
222 |         ff = np.abs(np.fft.fft(thisx*wind))
223 |         fl = np.abs(np.fft.fft(nextx*wind))
224 | 
225 |         res.append(np.sqrt(sum((ff[minbin:maxbin]-fl[minbin:maxbin])**2)))
226 |         t.append(float(ist+iend+nhop)/2.0/float(sr))
227 | 
228 |         ist = ist+nhop
229 |         iend = ist+nwind
230 | 
231 |     return np.array(res), np.array(t)
232 | 
233 | 
234 | def aubio_f0yin(y, sr, nwind=1024, hop=512,
235 |                 method='yin', tolerance=None):
236 |     ''' Applies f0 detection to a numpy vector using aubio
237 |     '''
238 |     from aubio import pitch, fvec
239 | 
240 |     po = pitch(method, nwind, hop, sr)
241 |     vs = fvec(nwind)
242 | 
243 |     if tolerance is not None:
244 |         if tolerance > 0.0 and tolerance < 1.0:
245 |             po.set_tolerance(tolerance)
246 |         else:
247 |             sys.stderr.write('Tolerance not set: Out of bounds\n')
248 | 
249 |     nsamples = y.shape[0]
250 | 
251 |     freq = []
252 |     time = []
253 |     conf = []
254 | 
255 |     for ii in xrange(0,nsamples-nwind, hop):
256 |         thisy = y[ii:ii+nwind]
257 |         vs[:] = thisy
258 |         time.append(float(ii+nwind/2)/sr)
259 |         freq.append(po(vs))
260 |         conf.append(po.get_confidence())
261 |     return np.array(freq).squeeze(), np.array(time), np.array(conf)
262 | 
263 | 
264 | def PlaySound(w, sr=44100):
265 |     import pyaudio
266 | 
267 |     p = pyaudio.PyAudio()
268 |     stream = p.open(format=pyaudio.paFloat32,
269 |                     channels=1, rate=sr, output=1)
270 | 
271 |     stream.write(w.astype(np.float32).tostring())
272 | 
273 |     stream.close()
274 |     p.terminate()
275 | 
276 | 


--------------------------------------------------------------------------------
/pypevoc/TransferFunctions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines some useful functions for the estimation of transfer functions
  3 | """
  4 | 
  5 | import numpy as np
  6 | import matplotlib.pyplot as pl
  7 | import scipy.signal as sig
  8 | 
  9 | 
 10 | def tfe_sig(y, x, *args, **kwargs):
 11 |     """estimate transfer function from x to y,
 12 |        see csd for calling convention"""
 13 |     fxy, sxy = sig.csd(y, x, *args, **kwargs)
 14 |     fxx, sxx = sig.csd(x, x, *args, **kwargs)
 15 |     return sxy / sxx, fxx
 16 | 
 17 | 
 18 | try:
 19 |     from matplotlib.mlab import psd, csd, cohere
 20 | 
 21 |     def tfe(y, x, *args, **kwargs):
 22 |         """estimate transfer function from x to y,
 23 |            see csd for calling convention"""
 24 |         sxy, fxy = csd(y, x, *args, **kwargs)
 25 |         sxx, fxx = psd(x, *args, **kwargs)
 26 |         return sxy / sxx, fxx
 27 | 
 28 | 
 29 | except ImportError:
 30 |     tfe = tfe_sig
 31 | 
 32 | 
 33 | def nextpow2(number):
 34 |     intlognum = int(np.log2(number))
 35 |     return 2**intlognum
 36 | 
 37 | 
 38 | def fft_filter(x, bands, gains):
 39 |     '''
 40 |     Filter signal x using FFT and IFFT
 41 |     * x input signal
 42 |     * bands: list of start and stop frequencies of each band
 43 |     * gains: start and stop gains in each band
 44 | 
 45 |     Example:
 46 | 
 47 |     y = FFTfilter(x, [(0,0.1),(0.1,1.0)], [(1.,1.),(0.,0.)])
 48 | 
 49 |     filters signal x low pass at 0.1 times the nyquist rate
 50 |       (sampling rate / 2)
 51 |     '''
 52 | 
 53 |     xf = np.fft.fft(x)
 54 |     nyq = len(xf)/2
 55 | 
 56 |     ffilter = np.zeros(len(xf))
 57 |     for bb, gg in zip(bands, gains):
 58 |         fmin = int(bb[0]*nyq)
 59 |         fmax = int(bb[1]*nyq)
 60 |         ffilter[fmin:fmax] = np.linspace(gg[0], gg[1], fmax-fmin)
 61 |         if fmin > 0:
 62 |             ffilter[-fmax+1:-fmin+1] = np.linspace(gg[1], gg[0], fmax-fmin)
 63 |         else:
 64 |             ffilter[-fmax+1:] = np.linspace(gg[1], gg[0], fmax-fmin-1)
 65 | 
 66 |     xf_filt = xf*ffilter
 67 |     return np.fft.ifft(xf_filt)
 68 | 
 69 | 
 70 | def smthderiv(ff, ph, rad=1):
 71 |     dph = []
 72 |     for i, phi in enumerate(ph):
 73 |         imin = max(0, i-rad)
 74 |         imax = min(len(ph), i+rad)
 75 |         pp = np.polyfit(ff[imin:imax], ph[imin:imax], 1)
 76 |         dph.append(pp[0])
 77 |     return np.array(dph)
 78 | 
 79 | 
 80 | def determineDelay(source, target, maxdel=2**16, ax=None):
 81 |     '''
 82 |     Determine the delay between two signals
 83 |     (based on correlation extrema)
 84 | 
 85 |     Parameters:
 86 |     * Signals
 87 |       - source
 88 |       - target
 89 |     * maxdel: maximum delay to look for (in both directions)
 90 |     '''
 91 |     sample_start = 0
 92 |     xd = source[sample_start:sample_start+maxdel]
 93 |     yd = target[sample_start:sample_start+maxdel]
 94 |     Cxx = np.correlate(xd, xd, 'full')
 95 |     Cxy = np.correlate(yd, xd, 'full')
 96 |     Pkx = np.argmax(np.abs(Cxx))
 97 |     Pky = np.argmax(np.abs(Cxy))
 98 |     if ax:
 99 |         try:
100 |             ax.plot(Cxx)
101 |         except AttributeError:
102 |             fig, ax = pl.subplots(1)
103 |             ax.plot(Cxx)
104 |         ax.plot(Cxy)
105 |         ax.axvline(Pkx, color='red')
106 |         ax.plot(Pky, Cxy[Pky], 'o')
107 | 
108 |     delay = Pky-Pkx
109 |     return delay
110 | 
111 | 
112 | def transferogram(source, target, rate=1, start_time=0., delta_time=1.,
113 |                   sample_duration=.5, window_duration=.125, window_hop=None):
114 |     '''
115 |     tfe, freqs, times, coherence = transferogram(...)
116 | 
117 |     Calculates a time-varying transfer function from source (x)
118 |     to target (y) at intervals delta_time.
119 | 
120 |     Parameters:
121 |     * source: source signal (reuqired)
122 |     * target: target signal (required)
123 |     * rate:   sampling rate
124 |     * start_time: starting time for tfe calculations
125 |     * delta_time: distance between calculations
126 |     * sample_duration: length of signals used in tfe estimates
127 |                        (longer than window_duration, used in averaging)
128 |     * window_duration: inidvidual window length in tfe estimates
129 |     * window_hop: hop between windows (defaults to window_duration/2)
130 | 
131 |     Returns:
132 |     * tfe:   transfer functions (complex matrix NxM)
133 |     * freqs: frequencies corresponding to tfe estimates (array size N)
134 |     * times: times corresponding to tfe estimates (array size M)
135 |     * coherence: coherence matrix MxN
136 |     '''
137 | 
138 |     # convert time to samples
139 |     sample_start = int(start_time*rate)
140 |     sample_delta = int(delta_time*rate)
141 |     sample_len = int(sample_duration*rate)
142 | 
143 |     if target is None:
144 |         n_target = len(source)
145 |     else:
146 |         n_target = len(target)
147 | 
148 |     n_samples = min(len(source), n_target)
149 |     sample_end = n_samples - sample_start - sample_len
150 | 
151 |     # windowing parameters
152 |     nsamp_window = nextpow2(window_duration*rate)
153 |     if window_hop:
154 |         nsamp_window_hop = nextpow2(window_hop*rate)
155 |     else:
156 |         nsamp_window_hop = nsamp_window/2
157 | 
158 |     noverlap = nsamp_window - nsamp_window_hop
159 | 
160 |     resp = []
161 |     coherence = []
162 |     times = []
163 | 
164 |     if target is None:
165 |         for ii in np.arange(sample_start, sample_end, sample_delta):
166 |             block_resp, freq = psd(source[ii:ii+sample_len],
167 |                                    NFFT=nsamp_window,
168 |                                    noverlap=noverlap, Fs=rate)
169 |             block_coh = []
170 |             times.append((ii+sample_len/2)/float(rate))
171 |             resp.append(block_resp)
172 |             coherence.append(block_coh)
173 |     else:
174 |         for ii in np.arange(sample_start, sample_end, sample_delta):
175 |             block_resp, freq = tfe(target[ii:ii+sample_len],
176 |                                    source[ii:ii+sample_len], NFFT=nsamp_window,
177 |                                    noverlap=noverlap, Fs=rate)
178 |             block_coh, _ = cohere(target[ii:ii+sample_len],
179 |                                   source[ii:ii+sample_len], NFFT=nsamp_window,
180 |                                   noverlap=noverlap, Fs=rate)
181 |             times.append((ii+sample_len/2)/float(rate))
182 |             resp.append(block_resp)
183 |             coherence.append(block_coh)
184 | 
185 |     return np.array(resp).T, freq, np.array(times), np.array(coherence).T
186 | 
187 | 
188 | def block_delay(source, target, window=None):
189 |     if window is None:
190 |         window = np.ones(len(source))
191 |     wind_source = window*source
192 |     wind_target = window*target
193 | 
194 |     corr_st = np.correlate(wind_source, wind_target, "full")
195 | 
196 |     return np.argmax(corr_st)-len(source), np.max(corr_st)
197 | 
198 | 
199 | def maxdelwind(source, target, rate=1, start_time=0., delta_time=1.,
200 |                sample_duration=.5):
201 |     '''
202 |     delay, times = maxdelwid(...)
203 | 
204 |     Calculates a time-varying delay function from source (x)
205 |     to target (y) at intervals delta_time.
206 | 
207 |     Parameters:
208 |     * source: source signal (reuqired)
209 |     * target: target signal (required)
210 |     * rate:   sampling rate
211 |     * start_time: starting time for tfe calculations
212 |     * delta_time: distance between calculations
213 |     * sample_duration: length of signals used in tfe estimates
214 |                        (longer than window_duration, used in averaging)
215 |     Returns:
216 |     * delay: max delay array
217 |     * times: times corresponding to delay estimates (array size M)
218 |     '''
219 |     
220 |     # convert time to samples
221 |     sample_start = int(start_time*rate)
222 |     sample_delta = int(delta_time*rate)
223 |     sample_len = int(sample_duration*rate)
224 | 
225 |     window = np.ones(sample_len)
226 | 
227 |     n_samples = min(len(source), len(target))
228 |     sample_end = n_samples - sample_start - sample_len
229 | 
230 |     delay = []
231 |     corr_strength = []
232 |     times = []
233 | 
234 |     for block_start in np.arange(sample_start, sample_end, sample_delta):
235 |         block_end = block_start + sample_len
236 |         target_block = sig.detrend(target[block_start:block_end])
237 |         source_block = sig.detrend(source[block_start:block_end])
238 |         block_del, block_corr = block_delay(target_block, source_block,
239 |                                             window=window)
240 |         times.append((block_start+sample_len/2)/float(rate))
241 |         delay.append(block_del/float(rate))
242 |         corr_strength.append(block_corr)
243 | 
244 |     return np.array(delay), np.array(corr_strength), np.array(times)
245 | 
246 | 
247 | def plot_time_freq(tf_matrix, freq=None, time=None, ax=None, mask=None):
248 |     if time is None:
249 |         time = np.arange(tf_matrix.shape[1])
250 | 
251 |     if freq is None:
252 |         freq = np.arange(tf_matrix.shape[0])
253 | 
254 |     if ax is None:
255 |         fig, ax = pl.subplots(1)
256 | 
257 |     if mask is not None:
258 |         tf_matrix[np.logical_not(mask)] = np.nan
259 | 
260 |     ax.imshow(tf_matrix, aspect='auto', origin='lower',
261 |               extent=[min(time), max(time), min(freq), max(freq)])
262 | 
263 | 


--------------------------------------------------------------------------------
/pypevoc/__init__.py:
--------------------------------------------------------------------------------
1 | from .PVAnalysis import PV, PVHarmonic, SinSum
2 | from .Periodicity import PeriodSeries, period_marks_corr, period_marks_peak, period_marks_amdf
3 | from . import SoundUtils
4 | from . import TransferFunctions
5 | 
6 | 


--------------------------------------------------------------------------------
/pypevoc/speech/DAP.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # pypevoc.speech.DAP.py
 3 | #
 4 | # Part of PyPeVoc python package
 5 | #
 6 | # Copyright (C) 2018 Andre Almeida
 7 | #
 8 | # based on covarep's env_dap.m:
 9 | # https://github.com/covarep/covarep/blob/master/envelope/env_dap.m
10 | #
11 | # This program is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 | #
24 | 
25 | import numpy as np
26 | import scipy.signal as sig
27 | import scipy.linalg as sla
28 | import logging
29 | 
30 | class EnvelopeDAP(object):
31 |     def __init__(self, sr=1.0, order=4, dftlen=2**12, maxit=50, alpha=.5, dISthresh=1e-6,
32 |                  minbw=None):
33 |         self.sr = sr
34 |         self.order = order
35 |         self.dftlen = dftlen
36 |         self.maxit = maxit
37 |         self.alpha = alpha
38 |         self.dISthresh = dISthresh
39 |         if minbw is not None:
40 |             self.minrr = np.exp(-np.pi/self.sr*minbw) 
41 | 
42 |     def estimate(self, freqs, amps, order=None):
43 |         if order is None:
44 |             order = self.order
45 |         omegas = 2*np.pi*freqs/self.sr
46 |         amps = np.abs(amps)
47 |         nharm = len(amps)
48 | 
49 |         # imaginary part of z variable
50 |         ejw = np.exp(-1j*omegas * np.arange(0,order+1))
51 |         inv_ejw = np.exp(1j*omegas * np.arange(0,order+1))
52 | 
53 |         # target autocorr matrix
54 |         r = 1/nharm*np.real(amps**2*inv_ejw)
55 |         rmx_inv = sla.inv(sla.toeplitz(r))
56 | 
57 |         # initial guess (LPC)
58 |         use_r = r[:order]
59 |         a = sla.solve_toeplitz(r[:-1],-r[1:])
60 |         # calculate the prediction error
61 |         
62 |         
63 | 
64 |     
65 | 
66 | 


--------------------------------------------------------------------------------
/pypevoc/speech/PitchJumps.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  PitchJumps.py
  5 | #
  6 | #  Detect pitch jumps in vocal glides
  7 | #  
  8 | #  Copyright 2017 Andre Almeida <a.almeida@unsw.edu.au>
  9 | #  
 10 | #  This program is free software; you can redistribute it and/or modify
 11 | #  it under the terms of the GNU General Public License as published by
 12 | #  the Free Software Foundation; either version 2 of the License, or
 13 | #  (at your option) any later version.
 14 | #  
 15 | #  This program is distributed in the hope that it will be useful,
 16 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | #  GNU General Public License for more details.
 19 | #  
 20 | #  You should have received a copy of the GNU General Public License
 21 | #  along with this program; if not, write to the Free Software
 22 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 23 | #  MA 02110-1301, USA.
 24 | #  
 25 | #  
 26 | 
 27 | import os
 28 | import sys
 29 | import numpy as np
 30 | #import matplotlib.pyplot as pl
 31 | import pandas
 32 | from .. import PV
 33 | from .SpeechChunker import SilenceDetector
 34 | from scipy.stats import ttest_ind
 35 | from scipy.signal import argrelmax
 36 | 
 37 | 
 38 | try:
 39 |     from scipy.io.wavfile import read as wavread
 40 |     from scipy.io.wavfile import write as wavwrite
 41 | except ImportError:
 42 |     sys.stderr.write('Scipy wav reader not found!\nUsing internal reader\n')
 43 |     from AudioInterface import wavLoad as wavread
 44 |     from AudioInterface import wavWrite as wavwrite
 45 | 
 46 | def nextpow2(x):
 47 |     return int(2**np.ceil(np.log2(x)))
 48 | 
 49 | 
 50 | def zscore_wind(x, wleft=5, wright=5, hop=None, kind='mean'):
 51 |     if hop is None:
 52 |         hop = 1
 53 | 
 54 |     zs = np.zeros(len(x))
 55 |     for ii in range(wleft,len(x)-wright,hop):
 56 |         xx = x[ii-wleft:ii+wright]
 57 |         if kind=='mean':
 58 |             mx = np.nanmean(xx)
 59 |             sx = np.std(xx)
 60 |         elif kind == 'median':
 61 |             mx = np.nanmedian(xx)
 62 |             sx = np.percentile(xx,75)-np.percentile(xx,25)
 63 |         zs[ii] = (x[ii]-mx)/sx
 64 |     return zs
 65 | 
 66 | 
 67 | def linreg_err(t, x, wleft=5, wright=5, hop=None):
 68 |     if hop is None:
 69 |         hop = 1
 70 | 
 71 |     zs = np.zeros(len(x))
 72 |     if wright < 0:
 73 |         wm = 0
 74 |     else:
 75 |         wm=wright
 76 |     for ii in range(wleft,len(x)-wm,hop):
 77 |         xx = x[ii-wleft:ii+wright]
 78 |         tt = t[ii-wleft:ii+wright]
 79 |         
 80 |         p = np.polyfit(tt,xx,1)
 81 |         resid = xx-np.polyval(p,tt)
 82 |         std = np.std(resid)
 83 |         zs[ii] = (x[ii]-np.polyval(p,t[ii]))/std
 84 |     return zs
 85 | 
 86 |     
 87 | def linreg2_err(t, x, wleft=5, wright=5, hop=None, use_l=True, use_r=True):
 88 |     if hop is None:
 89 |         hop=1
 90 |         
 91 |     zs = np.zeros(len(x))
 92 |     if wright<0:
 93 |         wm = 0
 94 |     else:
 95 |         wm=wright
 96 |     for ii in range(wleft,len(x)-wm,hop):
 97 |         ts=[]
 98 |         xl = x[ii-wleft:ii]
 99 |         tl = t[ii-wleft:ii]
100 |         xr = x[ii:ii+wright]
101 |         tr = t[ii:ii+wright]
102 | 
103 |         if use_l>0:
104 |             pl = np.polyfit(tl,xl,1)
105 |             residll = xl-np.polyval(pl,tl)
106 |             stdll = np.std(residll)
107 |             residlr = xr-np.polyval(pl,tr)
108 |             stdlr = np.std(residlr)
109 |             ttl,pvl = ttest_ind(residll,residlr)
110 |             ts.append(ttl)
111 | 
112 |         if use_r>0:
113 |             pr = np.polyfit(tr,xr,1)
114 |             residrr = xr-np.polyval(pr,tr)
115 |             stdrr = np.std(residrr)
116 |             residrl = xl-np.polyval(pr,tl)
117 |             stdrl = np.std(residrl)
118 |             ttr,pvr = ttest_ind(residrr,residrl)
119 |             ts.append(-ttr)
120 |         zs[ii] = np.mean(ts)
121 |     return zs
122 | 
123 | 
124 | def avg_interpolator(tn, t, x, twind=0):
125 |     xn = np.zeros(len(tn))
126 |     for ii, tt in enumerate(tn):
127 |         try:
128 |             ior = np.flatnonzero(t > tt+twind)[0]
129 |         except IndexError:
130 |             ior = len(t)
131 |         try:
132 |             iol = np.flatnonzero(t < tt-twind)[-1]
133 |         except IndexError:
134 |             iol = 0
135 | 
136 |         xn[ii] = np.mean(x[iol:ior])
137 |     return xn
138 | 
139 | 
140 | class JumpDetector(object):
141 |     def __init__(self, min_freq=70,
142 |                  pitch_t_hop=0.02,
143 |                  regressor_t=0.5,
144 |                  t_threshold=10,
145 |                  mag_threshold=0.01):
146 |         """
147 |         Pitch jump detector object,
148 | 
149 |         Calculates pitch track and detects jumps by comparing linear
150 |         trends on each side of a smaple
151 | 
152 |         Arguments:
153 |         * min_freq: minimum frequency for pitch detector
154 |         * pitch_t_hop: time between pitch estimates
155 |         * regressor_t: time for estimation of linear slopes
156 |                        in pitch track
157 |         * t_threshold: threshold for t-test comparator
158 |         * mag_threshold: magnitude threshold for pitch track
159 |         """
160 |         self.min_freq = min_freq
161 |         self.pitch_t_hop = pitch_t_hop
162 |         self.mag_threshold = mag_threshold
163 |         self.t_threshold = t_threshold
164 |         self.regressor_t = regressor_t
165 |         self.slope_t = regressor_t
166 | 
167 |     def detect_pitch(self, w, sr):
168 |         nfft = nextpow2(sr/self.min_freq*2)
169 |         n_hop = nextpow2(sr*self.pitch_t_hop)
170 |         pv = PV(w, sr, nfft=nfft, hop=n_hop)
171 |         pv.run_pv()
172 |         self.mag = np.sqrt(np.sum(pv.mag**2, axis=1))
173 |         self.t = pv.get_time_vector()
174 |         self.f0 = pv.calc_f0()
175 |         self.nfft = nfft
176 |         self.nhop = n_hop
177 | 
178 |     def detect_jumps(self):
179 |         wle = int(self.regressor_t/self.pitch_t_hop)
180 |         isel = self.mag > np.max(self.mag)*self.mag_threshold
181 |         tsel = self.t[isel]
182 |         fsel = self.f0[isel]
183 |         self.isel = isel
184 |         #pl.plot(np.flatnonzero(isel),20*np.log10(m[isel]))
185 | 
186 |         le = linreg2_err(tsel, fsel, wleft=wle, wright=wle, use_l=True)
187 |         #ax[0].plot(tsel,fsel)
188 |         #ax[1].plot(tsel,le)
189 |         
190 |         imax = argrelmax(le)[0]
191 |         lemax = le[imax]
192 |         idx = imax[lemax > self.t_threshold]
193 |         ijup = idx
194 |         #ax[0].plot(tsel[idx],fsel[idx],'o')
195 |         #ax[1].plot(tsel[idx],le[idx],'o')
196 |         
197 |         imin = argrelmax(-le)[0]
198 |         lemin = le[imin]
199 |         idx = imin[lemin < -self.t_threshold]
200 |         ijdn = idx
201 |         #ax[0].plot(tsel[idx],fsel[idx],'o')
202 |         #ax[1].plot(tsel[idx],le[idx],'o')
203 | 
204 |         self.down_jump_indices = np.asarray(ijdn)
205 |         self.up_jump_indices = np.asarray(ijup)
206 |         self.down_jump_times = tsel[ijdn]
207 |         self.up_jump_times = tsel[ijup]
208 | 
209 |     def calc_jump_params(self):
210 |         tsel = self.t[self.isel]
211 |         fsel = self.f0[self.isel]
212 |         ijup = self.up_jump_indices
213 |         ijdn = self.down_jump_indices
214 |  
215 |         nsl = int(self.slope_t/self.pitch_t_hop)
216 | 
217 |         alli = np.sort(np.concatenate((ijup, ijdn)))
218 |         # pl.figure()
219 |         #pl.plot(tsel,fsel)
220 |         p = []
221 |         intcpts = []
222 |         sumres = []
223 |         for ii in alli:
224 |             il = max(0, ii-nsl)
225 |             ir = min(ii+nsl, len(fsel))
226 |             polyl = np.polyfit(tsel[il:ii], fsel[il:ii], 1)
227 |             intl = np.polyval(polyl, tsel[ii])
228 |             rsuml = np.sqrt(np.nansum((fsel[il:ii]-np.polyval(polyl, tsel[il:ii]))**2)/(ii-il))
229 | 
230 |             polyr = np.polyfit(tsel[ii+1:ir], fsel[ii+1:ir], 1)
231 |             intr = np.polyval(polyr, tsel[ii])
232 |             rsumr = np.sqrt(np.nansum((fsel[ii+1:ir]-np.polyval(polyr, tsel[ii+1:ir]))**2)/(ir-ii))
233 | 
234 |             #pl.plot(tsel[il:ii+1],np.polyval(polyl,tsel[il:ii+1]),color='r',alpha=.5)
235 |             #pl.plot(tsel[ii:ir],np.polyval(polyr,tsel[ii:ir]),color='m',alpha=.5)
236 | 
237 |             p.append([polyl, polyr])
238 |             intcpts.append([intl, intr])
239 |             sumres.append([rsuml, rsumr])
240 | 
241 |         self.intcpts = np.array(intcpts)
242 |         self.sumres = np.array(sumres)
243 | 
244 |         #pl.plot(tsel[alli],intcpts[:,0],'o')
245 |         #pl.plot(tsel[alli],intcpts[:,1],'o')
246 | 
247 |     def process(self, w, sr):
248 |         """
249 |         process pitch tracking and jump detection
250 |         """
251 |         self.detect_pitch(w, sr)
252 |         self.detect_jumps()
253 |         self.calc_jump_params()
254 |         return np.sort(np.concatenate([self.up_jump_times,
255 |                                        self.down_jump_times]))
256 | 
257 |     def get_jump_table(self):
258 |         allt = np.sort(np.concatenate([self.up_jump_times,
259 |                                        self.down_jump_times]))
260 | 
261 |         df = pandas.DataFrame({'segment_time': allt,
262 |                                'f_before': self.intcpts[:, 0],
263 |                                'f_after': self.intcpts[:, 1],
264 |                                'f_cent': np.mean(self.intcpts, axis=1),
265 |                                'df': (np.diff(self.intcpts, axis=1))[:, 0],
266 |                                'residue_before': self.sumres[:, 0],
267 |                                'residue_after': self.sumres[:, 1],
268 |                                'residue_total': np.sum(self.sumres, axis=1)})
269 | 
270 |         return df
271 | 
272 | 
273 | def segment_and_detect_jumps(w, sr, **kwargs):
274 |     sc = SilenceDetector(w, sr, fmin=50, fmax=1000)
275 |     jd = JumpDetector(**kwargs)
276 |     df = pandas.DataFrame()
277 |     for ii, (tst, tend) in enumerate(zip(sc.tst, sc.tend)):
278 |         ww = w[int(tst*sr):int(tend*sr)]
279 |         tjmp = jd.process(ww, sr)
280 |         try:
281 |             dfi = jd.get_jump_table()
282 |             dfi['rec_time'] = dfi['segment_time']+tst
283 |             dfi['region_nbr'] = ii
284 |             df = df.append(dfi, ignore_index=True)
285 |         except IndexError:
286 |             sys.stderr.write("Jump table empty between {:.2f} and {:.2f}\n".format(tst,tend))
287 | 
288 |     segments = pandas.DataFrame({'nbr': np.arange(len(sc.tst)), 'start': sc.tst,
289 |                                  'end': sc.tend})
290 |     return df, segments
291 | 
292 | def file_reader(filename, chan):
293 |     file_base, file_ext = os.path.splitext(filename)
294 |     if file_ext.lower() == '.aup':
295 |         import audacity
296 |         aud = audacity.Aup(filename)
297 |         w = aud.get_channel_data(chan)
298 |         sr = aud.rate
299 |     else:
300 |         sys.stderr.write("Format not recognized: %s" % file_ext)
301 |         return
302 |     return(sr, w)
303 | 
304 | 
305 | def pitch_jump_file(filename, channel_nbr=0):
306 |     sr, w = file_reader(filename, channel_nbr)
307 |     df,dfs = segment_and_detect_jumps(w, sr)
308 |     df.to_csv('pitch_jumps.csv')
309 |     dfs.to_csv('segments.csv')
310 | 


--------------------------------------------------------------------------------
/pypevoc/speech/SpeechAnalysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | import scipy.signal as sig
  4 | import scipy.linalg as lg
  5 | from scipy.io import wavfile
  6 | from .. import FFTFilters as ftf
  7 | from ..PeakFinder import PeakFinder
  8 | 
  9 | def lpc(w, order, axis=-1):
 10 |     """
 11 |     Calculate the lpc coefficients of the waveform
 12 |     """
 13 | 
 14 |     nsamp = w.shape[axis]
 15 |     if order > nsamp:
 16 |         raise ValueError('Order must be smaller than size of vector')
 17 | 
 18 |     r = np.correlate(w, w, 'full')
 19 |     #use_r = np.zeros(order+1)
 20 |     #use_r[:order+1] = r[nsamp-1:nsamp+order]
 21 |     use_r = r[nsamp-1:nsamp+order]
 22 |     a = lg.solve_toeplitz(use_r[:-1], -use_r[1:])
 23 | 
 24 |     return a
 25 | 
 26 | def refine_max(x, pos):
 27 |     '''
 28 |     Given the position of a peak pos in a series x,
 29 |     interpolate the position assuming that the peak is
 30 |     approximated by a quadratic function
 31 |     '''
 32 |     if pos==0:
 33 |         pos=1
 34 | 
 35 |     sur = x[pos-1:pos+2]
 36 | 
 37 |     if sur[1]>sur[0] and sur[1]>sur[2]:
 38 |         c = sur[1]
 39 |         b = (sur[2] - sur[0])/2
 40 |         a = (sur[2] + sur[0])/2 - c
 41 | 
 42 |         lpos = - b/2/a
 43 |         fpos = float(pos) + lpos
 44 |         fval = a*lpos*lpos + b*lpos + c
 45 |         #print "rpos = %d; rval = %f; val = %f; dpos = %f; pos = %f"%(pos,sur[1],fval, lpos, fpos)
 46 | 
 47 |     else:
 48 |         fpos = pos
 49 |         fval = sur[1]
 50 | 
 51 |     return fpos,fval.tolist()
 52 | 
 53 | 
 54 | def DistribMoments(x,f, MaxMoments=4):
 55 |     '''Calculate the moments in a distribution f(x)
 56 |        x: abcissa - values at which distribution is given
 57 |        f: value -   values of the distribution
 58 |        MaxMoments: maximum moment order to return
 59 | 
 60 |        returns:
 61 |        COG: center of gravity
 62 |        StDev: standard deviation
 63 |        skew: skewness
 64 |        kurt: kurtosis
 65 |        Moments: array with all raw central moments
 66 |     '''
 67 | 
 68 |     moments = []
 69 |     m0 = np.sum(f)
 70 |     m1 = np.sum(f*x)/m0
 71 |     moments.append(m1)
 72 |     for mn in range(1,MaxMoments):
 73 |         moments.append(np.sum((x-m1)**(mn+1)*f)/m0)
 74 | 
 75 |     cog = m1
 76 |     stdev = np.sqrt(moments[1])
 77 |     skew = moments[2]/moments[1]**1.5
 78 |     kurt = moments[3]/moments[1]**2 - 3.
 79 | 
 80 |     return cog,stdev,skew,kurt,moments
 81 | 
 82 | def SpectralMoments(w, Fs, tWind=0.025, tHop=0.0125,
 83 |                     windFunc=sig.hamming, fCut=300, maxMoments=4):
 84 |     '''Calculates spectral moments in short windows  of signal w
 85 | 
 86 |         w:          signal
 87 |         Fs:         sample rate
 88 |         tWind:      window length in seconds
 89 |         tHop:       hop length in seconds
 90 |         windFunc:   windowing function
 91 |         fCut:       high-pass cutoff
 92 |         MaxMoments: maximum moment order to return
 93 | 
 94 |        returns:
 95 |         cog:     center of gravity
 96 |         stdev:     standard deviation
 97 |         skew:    skewness
 98 |         kurt:    kurtosis
 99 |         moments: all the moments
100 | 
101 | 
102 |     '''
103 | 
104 |     wLen = len(w)
105 |     hopLenSam = int(np.round(Fs*tHop));
106 |     windowLenSam = int(np.round(Fs*tWind));
107 |     #print 'SpectralMoments: Fs={}; wLen={}; hop={}'.format(Fs,windowLenSam,hopLenSam)
108 |     specLen = int(windowLenSam/2)
109 | 
110 |     dt = 1./Fs
111 |     nFrames = int((wLen - windowLenSam-1)/hopLenSam)
112 | 
113 |     wind = windFunc(windowLenSam)
114 |     SxxSum = np.zeros(specLen)
115 |     freqS = (np.arange(specLen))*float(Fs)/windowLenSam
116 | 
117 |     for FN in np.arange(nFrames):
118 |         I0 = (FN)*hopLenSam;
119 |         Iend = I0 + windowLenSam;
120 |         X = w[I0:Iend];
121 | 
122 |         XW = X*sig.hamming(len(X));
123 |         XF = np.fft.fft(XW)
124 |         Sxx = np.abs(XF)**2
125 |         SxxSum = SxxSum + Sxx[0:specLen]
126 | 
127 |     # periodogram
128 |     SxxS = np.sqrt(SxxSum/float(nFrames))
129 | 
130 |     # intensity
131 |     intens = np.sqrt(np.mean(SxxSum/float(nFrames)))
132 | 
133 |     # filter out low frequencies
134 |     idx = freqS > fCut
135 | 
136 |     # compute moments
137 |     cog, std, skew, kurt, mm=DistribMoments(freqS[idx], SxxS[idx], maxMoments)
138 | 
139 |     return dict(cog=cog, std=std, skew=skew, kurt=kurt, level=intens)
140 | 
141 | def Periodogram(w, Fs, tWind=0.025, tHop=0.0125,
142 |                     windFunc=sig.hamming):
143 |     '''Calculates spectral moments in short windows  of signal w
144 | 
145 |         w:          signal
146 |         Fs:         sample rate
147 |         tWind:      window length in seconds
148 |         tHop:       hop length in seconds
149 |         windFunc:   windowing function
150 | 
151 |        returns:
152 |         Sxx:       power spectrum
153 |         f:         frequency values
154 | 
155 | 
156 |     '''
157 | 
158 |     wLen = len(w)
159 |     hopLenSam = int(np.round(Fs*tHop));
160 |     windowLenSam = int(np.round(Fs*tWind));
161 |     #print 'SpectralMoments: Fs={}; wLen={}; hop={}'.format(Fs,windowLenSam,hopLenSam)
162 |     specLen = windowLenSam/2
163 | 
164 |     dt = 1./Fs
165 |     nFrames = (wLen - windowLenSam-1)/hopLenSam
166 | 
167 |     wind = windFunc(windowLenSam)
168 |     SxxSum = np.zeros(specLen)
169 |     freqS = (np.arange(specLen))*float(Fs)/windowLenSam
170 | 
171 |     for FN in np.arange(nFrames):
172 |         I0 = (FN)*hopLenSam;
173 |         Iend = I0 + windowLenSam;
174 |         X = w[I0:Iend];
175 | 
176 |         XW = X*sig.hamming(len(X));
177 |         XF = np.fft.fft(XW)
178 |         Sxx = np.abs(XF)**2
179 |         SxxSum = SxxSum + Sxx[0:specLen]
180 | 
181 |     # periodogram
182 |     Sxx = (SxxSum/float(nFrames))
183 | 
184 |     return Sxx, freqS
185 | 
186 | def lpc2form(a, Fs=1.0):
187 |     '''
188 |     Convert all-pole coefficients to resonance frequencies
189 |     and bandwidths
190 | 
191 |     a: LPC coefficients (all-pole coefficients excluding order 0)
192 |     Fs: sampling rate
193 |     '''
194 |     RTS = np.roots(np.concatenate(([1],a)));
195 | 
196 |     # roots are complex conjugate pairs
197 |     RTS = RTS[np.imag(RTS)>=0];
198 |     AngZ = np.arctan2(np.imag(RTS),np.real(RTS));
199 | 
200 |     # Convert normalised frequency to freq.
201 |     nFreq = AngZ*(Fs/(2*np.pi))
202 |     Indices = np.argsort(nFreq);
203 |     FreqS = nFreq[Indices]
204 |     FreqS = FreqS[FreqS>0]
205 | 
206 |     # Bandwidths are the distance to the unit circle
207 |     BW = -1/2*(Fs/(2*np.pi))*np.log(np.abs(RTS[Indices]))
208 | 
209 |     return FreqS, BW
210 | 
211 | def lpc2form_full(a, Fs=1.0, npts=1024):
212 |     FreqS, BW = lpc2form(a, Fs)
213 |     omega, h = sig.freqz([1],np.concatenate(([1], a)), worN=npts)
214 |     f = omega/np.pi * Fs/2
215 |     pks = PeakFinder(x=f, y=np.abs(h))
216 |     pks.refine_all()
217 | 
218 |     return FreqS, BW, pks.pos, pks.val
219 | 
220 | def Formants(w, Fs, tWind=0.025, tHop=0.0125,
221 |                     fMin=50, fMax=5500, bwMax=400,
222 |                     modelOrd=10, hpFreq=50, full=False):
223 |     '''Estimate formants from waveform w with sample rate Fs
224 | 
225 |        tWind:      window length in seconds
226 |        tHop:       hop length in seconds
227 |        fMin:       minimum frequency of formant in Hz
228 |        fMax:       maximum frequency of formant in Hz
229 |                     (determines resampling rate)
230 |        bwMax:      maximum bandwidth (Hz)
231 |        modelOrder: model order for linear prediction (LPC)
232 |        hpFreq:     cutoff frequency of pre-emphasis filter
233 |                     (high-pass, 1st order)
234 |        full:       also calclate amplitudes and freqs of peaks
235 |     '''
236 | 
237 |     # pre-emphasise
238 |     #
239 |     if hpFreq>0:
240 |         a=np.exp(-2.*np.pi*hpFreq/float(Fs));
241 |         #preEmphA = [a,1-a];
242 |         #wo = sig.lfilter([1],preEmphA,w);
243 |         wo=w
244 |         wo[:-1] -= wo[1:]
245 |     else:
246 |         wo=w
247 | 
248 |     # resample the original wave file
249 |     # AnalysisFs = 8000;
250 | 
251 |     underSample = int(Fs/fMax/2);
252 |     FsO = Fs;
253 | 
254 |     # Fourier method: can be slow!
255 |     #w = sig.resample(wo,len(wo)/underSample);
256 | 
257 |     # Resample: polyhase method (only in scipy v18.1)
258 |     w = sig.resample_poly(wo,1,underSample);
259 | 
260 |     Fs = int(FsO*len(w)/float(len(wo)));
261 |     Fsf = float(Fs)
262 | 
263 |     wLen = len(w);
264 | 
265 |     hopLenSam = int(round(Fs*tHop));
266 |     windowLenSam = int(round(Fs*tWind));
267 |     #print 'Formant:         Fs={}; wLen={}; hop={}'.format(Fs,windowLenSam,hopLenSam)
268 | 
269 |     dt = 1./Fs;
270 |     nFrames = int(np.floor((wLen-windowLenSam-1)/hopLenSam))
271 | 
272 |     Form = np.nan*np.ones((nFrames,int(modelOrd/2)));
273 |     BandWidths = np.nan*np.ones((nFrames,int(modelOrd/2)));
274 |     if full:
275 |         Peaks = np.nan*np.ones((nFrames,int(modelOrd/2)));
276 |         Amplitudes = np.nan*np.ones((nFrames,int(modelOrd/2)));
277 |     Time = np.arange(nFrames+0)*hopLenSam/Fsf+windowLenSam/Fsf/2
278 | 
279 | 
280 |     for FN in np.arange(nFrames):
281 |         I0 = (FN)*hopLenSam;
282 |         Iend = I0 + windowLenSam;
283 |         X = w[I0:Iend];
284 | 
285 |         XW = X*sig.hamming(len(X));
286 |         #XW = X*sig.gaussian(len(X),0.4);
287 | 
288 |         # pre-emphasis filter
289 |         # all-pole high pass filter
290 | 
291 |         #PreEmph = [1 0.63];
292 |         #XW = filter(1,PreEmph,XW);
293 | 
294 |         # call LPC
295 |         # A, err, rcoeff = lpc(XW,modelOrd);
296 |         A = lpc(XW,modelOrd);
297 | 
298 |         if full:
299 |             FreqS, BW, pkF, pkA = lpc2form_full(A, Fs)
300 |         else:
301 |             FreqS, BW = lpc2form(A, Fs)
302 |         NN = 0
303 |         for KK in range(len(FreqS)):
304 |             if (FreqS[KK] > fMin and FreqS[KK] < fMax-fMin and BW[KK] <bwMax):
305 |                 Form[FN, NN] = FreqS[KK]
306 |                 BandWidths[FN, NN] = BW[KK]
307 |                 if full:
308 |                     if len(pkF)>0:
309 |                         idx = np.argmin(np.abs(pkF-FreqS[KK]))
310 |                         Peaks[FN, NN] = pkF[idx]
311 |                         Amplitudes[FN, NN] = pkA[idx]
312 |                 NN = NN + 1
313 |             else:
314 |                 #print('Rejected f={}, bw={}'.format(FreqS[KK],BW[KK]))
315 |                 pass
316 |     if full:
317 |         return Time, Form, BandWidths, Peaks, Amplitudes
318 |     else:
319 |         return Time, Form, BandWidths
320 | 
321 | def rmsWind(w, nwind=256, nhop=None, windfunc=np.ones, sr=1):
322 |     '''
323 |     calculate RMS values in window chunks of data
324 |     '''
325 | 
326 |     if not nhop:
327 |         nhop=nwind/2
328 | 
329 |     i=0
330 | 
331 |     nw=len(w)
332 | 
333 |     tl=[]
334 |     rl=[]
335 | 
336 |     wvr = windfunc(nwind)
337 |     wvnorm = np.sqrt(sum(wvr**2)/nwind)
338 | 
339 |     wv = wvr/wvnorm
340 | 
341 |     while i<nw-nwind:
342 |         rl.append(np.std(w[i:i+nwind]*wv))
343 |         tl.append((i+nwind/2)/float(sr))
344 |         i+=nhop
345 | 
346 |     return np.array(tl),np.array(rl)
347 | 
348 | 
349 | def FricativeDataFromSnip(w, nwind=256, sr=1):
350 |     fricdata = dict()
351 | 
352 |     freq,Sww = sig.welch(w,nperseg=nwind,fs=sr)
353 | 
354 |     logSww = 10*np.log10(Sww)
355 | 
356 |     # Peak position
357 |     imax = np.argmax(Sww)
358 |     maxref,maxv = refine_max(logSww,imax)
359 |     fmax = np.interp(maxref,np.arange(len(Sww)),freq)
360 |     fricdata['f_max']=fmax
361 | 
362 |     # RMS
363 |     rms = np.std(w)
364 |     fricdata['rms']=rms
365 | 
366 |     # spectral slopes
367 |     p_left = np.polyfit(freq[:imax+1],logSww[:imax+1],1)
368 |     slp_left = p_left[0]*1000
369 | 
370 |     p_right = np.polyfit(freq[imax:],logSww[imax:],1)
371 |     slp_right = p_right[0]*1000
372 | 
373 | 
374 |     fricdata['slope_left']=(slp_left)
375 |     fricdata['slope_right']=(slp_right)
376 | 
377 | 
378 |     cent,stdev,skew,kurt,_=DistribMoments(freq,Sww)
379 |     fricdata['cent']=(cent)
380 |     fricdata['stdev']=(stdev)
381 |     fricdata['skew']=(skew)
382 |     fricdata['kurt']=(kurt)
383 | 
384 |     return fricdata
385 | 
386 | def FricativeDataFromWav(wavname, intervals=[], pos=[0.5],
387 |                         hpFreq=50,wsize=1024):
388 |     sr,w = wavfile.read(wavname)
389 |     if hpFreq>0:
390 |         wo = ftf.preemph(w,Fs=sr,hpFreq=hpFreq)
391 |     else:
392 |         wo = w
393 | 
394 |     tamp,amp = rmsWind(w,nwind=wsize*4)
395 | 
396 |     fdict = {pp:pd.DataFrame() for pp in pos}
397 | 
398 |     for st,end in intervals:
399 |         kept=[]
400 |         for pp in pos:
401 |             print('{:.3f}-{:.3f} @ {}'.format(st,end,pp))
402 |             try:
403 |                 duration = end-st
404 |                 imed = int((st+duration*(pp))*sr)
405 |                 imin = int(imed-wsize/2)
406 |                 imax = int(imed+wsize/2)
407 |                 if imin < st*sr:
408 |                     imin = int(row[('all',start_col)]*sr)
409 |                     imax = int(imin+wsize)
410 |                 if imax > end*sr:
411 |                     imax = int(row[('all',end_col)]*sr)
412 |                     imin = int(imax - wsize)
413 | 
414 |                 ww = wo[imin:imax]
415 | 
416 |                 fricdata = FricativeData(ww,sr=sr,nwind=wsize)
417 | 
418 |                 fdict[pos].append(fricdata)
419 |             except ValueError as e:
420 |                 print(' ERROR at {}-{}'.format(st,end))
421 |                 print(e)
422 |                 fdict[pos].append([])
423 |     return fdict
424 | 


--------------------------------------------------------------------------------
/pypevoc/speech/SpeechChunker.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | #  SpeechChunkerer.py
  5 | #  
  6 | #  Copyright 2017 Andre Almeida <a.almeida@unsw.edu.au>
  7 | #  
  8 | #  This program is free software; you can redistribute it and/or modify
  9 | #  it under the terms of the GNU General Public License as published by
 10 | #  the Free Software Foundation; either version 2 of the License, or
 11 | #  (at your option) any later version.
 12 | #  
 13 | #  This program is distributed in the hope that it will be useful,
 14 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #  GNU General Public License for more details.
 17 | #  
 18 | #  You should have received a copy of the GNU General Public License
 19 | #  along with this program; if not, write to the Free Software
 20 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 21 | #  MA 02110-1301, USA.
 22 | #  
 23 | #  
 24 | 
 25 | import sys
 26 | import os
 27 | 
 28 | import numpy as np
 29 | 
 30 | from .SpeechAnalysis import rmsWind
 31 | 
 32 | try:
 33 |     from scipy.io.wavfile import read as wavread
 34 |     from scipy.io.wavfile import write as wavwrite
 35 | except ImportError:
 36 |     sys.stderr.write('Scipy wav reader not found!\nUsing internal reader\n')
 37 |     from AudioInterface import wavLoad as wavread
 38 |     from AudioInterface import wavWrite as wavwrite
 39 | 
 40 | class SilenceDetector(object):
 41 |     '''
 42 |     Detects regions of silence in a sound file
 43 |     '''
 44 |     
 45 |     def __init__(self, x, sr=1, wind_sec=0.92, method = 'pct05',
 46 |                  min_len = 0.1, max_len=5, fmin=None, fmax=None):
 47 |         '''
 48 |         crate silence detector
 49 |         '''
 50 |         self.x=x
 51 |         self.sr=sr
 52 |         self.nwind=int(wind_sec*sr)
 53 |         
 54 |         if fmin is None and fmax is None:
 55 |             self._calc_amplitude(nwind=self.nwind)
 56 |         else:
 57 |             self._calc_band_amplitude(nwind=self.nwind, fmin=fmin, fmax=fmax)
 58 |         if method[0:3].lower()=='pct':
 59 |             try:
 60 |                 pctval = int(method[3:5])
 61 |             except TypeError:
 62 |                 pctval = 5
 63 |             self._percentile_discriminator(pct=pctval)
 64 |         elif method=='kmeans':
 65 |             self._k_means_discriminator()
 66 |         
 67 |         #return self._clusters_to_time_int()
 68 |         self.tst, self.tend = self._clusters_to_time_int(min_int=min_len,
 69 |                                                          max_int=max_len)
 70 |         
 71 |     def _calc_amplitude(self,nwind=4096):
 72 |         '''
 73 |         calculates amplitude for amplitude discriminator
 74 |         '''
 75 |         self.nfr = int(nwind/2)
 76 |         self.at, ampl = rmsWind(self.x,sr=self.sr,nwind=self.nwind,
 77 |                                     nhop = self.nfr)
 78 |         self.ax = 20*np.log10(ampl)
 79 | 
 80 |     def _calc_band_amplitude(self,nwind=4096,fmin=50,fmax=5000):
 81 |         '''
 82 |         calculates amplitude in a frequency band for amplitude discriminator
 83 |         '''
 84 |         from ..FFTFilters import FilterBank, PiecewiseFilterSpec
 85 |         self.nfr = int(nwind/2)
 86 |         if fmin is None:
 87 |             fb = FilterBank([PiecewiseFilterSpec(freq=fmax,mode='lp',sr=self.sr)],
 88 |                             sr=self.sr,nwind=self.nwind,nhop=self.nfr)
 89 | 
 90 |         elif fmax is None:
 91 |             fb = FilterBank([PiecewiseFilterSpec(freq=fmin,mode='hp',sr=self.sr)],
 92 |                             sr=self.sr,nwind=self.nwind,nhop=self.nfr)
 93 |         else:
 94 |             fb = FilterBank([PiecewiseFilterSpec(freq=[fmin,fmax],mode='bp',sr=self.sr)],
 95 |                             sr=self.sr,nwind=self.nwind,nhop=self.nfr)
 96 |         
 97 |         ampl, self.at = fb.specout(self.x)
 98 |         self.ax = 20*np.log10(ampl.flatten())
 99 |         
100 |     def _k_means_discriminator(self, batch_size=45):
101 |         from sklearn.cluster import MiniBatchKMeans
102 |         from sklearn.metrics.pairwise import pairwise_distances_argmin
103 |         
104 |         mbk = MiniBatchKMeans(init='k-means++', n_clusters=2, batch_size=batch_size,
105 |                       n_init=10, max_no_improvement=10, verbose=0)
106 |         #t0 = time.time()
107 |         X = np.log10(self.ax.reshape(-1, 1))
108 |         mbk.fit(X)
109 |         cc = np.sort(mbk.cluster_centers_,axis=0)
110 |         self.clusters = pairwise_distances_argmin(X,cc)
111 |         
112 | 
113 |         
114 |     def _percentile_discriminator(self, pct=5):
115 |         '''
116 |         calculate threshold based on percentiles
117 |         
118 |         arguments:
119 |         pct: percentile value
120 |         '''
121 |         self.amin = np.percentile(self.ax, pct)
122 |         self.amax = np.percentile(self.ax, 100-pct)
123 |         self.ath  = (self.amax+self.amin)/2
124 |         
125 |         self.clusters = np.zeros(self.ax.shape[0],dtype='int')
126 |         self.clusters[self.ax>self.ath]=1
127 | 
128 |     def _clusters_to_time_int(self, min_int=0.0, max_int=None):
129 |         
130 |         tfr = self.nfr/float(self.sr)
131 |         min_frames = int(np.round(min_int/tfr))
132 |         if max_int:
133 |             maxlen = int(max_int/tfr)
134 |         else:
135 |             maxlen = len(self.ax)
136 |         
137 |         lastsplit=0
138 |         
139 |         i=0
140 |         
141 |         off=True
142 |         
143 |         nframes = len(self.clusters)
144 |         
145 |         tst=[]
146 |         tend=[]
147 |         
148 |         while i+min_frames<nframes:
149 |             if self.clusters[i]>0 and off:
150 |                 tst.append(self.at[max(0,i-1)])
151 |                 off=False
152 |                 i+=1
153 |             elif self.clusters[i]<=0 and not off:
154 |                 if all(self.clusters[i:i+min_frames] <=0 ):
155 |                     off=True
156 |                     tend.append(self.at[i])
157 |                     i+=min_frames
158 |                 else:
159 |                     i+=1
160 |             else:
161 |                 i+=1
162 |         if not off:
163 |             tend.append(self.at[-1])
164 |         return tst,tend
165 |     
166 |     def to_textgrid(self, filename='segmentation.TextGrid', 
167 |                     tiername='Segmentation'):
168 |         
169 |         from pympi import TextGrid
170 |         tg=TextGrid(xmax=max(self.tend))
171 |         tier=tg.add_tier(tiername)
172 |         for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
173 |             tier.add_interval(ts,te,'{}'.format(lab))
174 |         
175 |         tg.to_file(filename)
176 | 
177 |     def output(self, file_handle):
178 |         for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
179 |             file_handle.write('{},{},{}\n'.format(ts,te,lab))
180 | 
181 |     def recognise(self, mode='sphinx', marg=0.2):
182 |         import speech_recognition as srec
183 |         # use the audio file as the audio source
184 |         r = srec.Recognizer()
185 |         
186 |         if mode=='sphinx':
187 |             recogniser = r.recognize_sphinx
188 |             sys.stderr.write('Doing speech recognition with sphinx\n')
189 |         if mode=='google':
190 |             sys.stderr.write('Doing speech recognition with google\n')
191 |             recogniser = r.recognise_google
192 |         
193 |         for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
194 |             tstart = ts-marg
195 |             tend = te+marg
196 |             wo=self.x[int(tstart*self.sr):int(tend*self.sr)]
197 |     
198 |             wavwrite('speech_sample.wav',self.sr,wo.astype('int16'))
199 |     
200 |             with srec.AudioFile('speech_sample.wav') as source:
201 |                 audio = r.record(source)  # read the entire audio file
202 |     
203 |             try:
204 |                 # for testing purposes, we're just using the default API key
205 |                 # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
206 |                 # instead of `r.recognize_google(audio)`
207 |                 utt = recogniser(audio)
208 |                 #utt = r.recognize_sphinx(audio)
209 |                 self.label[ii] = utt
210 |                 sys.stderr.write('{}\n'.format(utt))
211 |             except srec.UnknownValueError:
212 |                 sys.stderr.write("Speech Recognition could not understand audio\n")
213 |             except srec.RequestError as e:
214 |                 sys.stderr.write("Could not request results {}\n".format(e))
215 | 
216 | class MultiChannelSegmenter(object):
217 |     '''
218 |     Detects regions from different sources from multi-channel recordings
219 |     '''
220 |     
221 |     def __init__(self, x, sr=1, nwind=4096, method = 'kmeans',
222 |                  nsources = 2, min_len = 0.1, max_len=5):
223 |         '''
224 |         crate multi-channel analyser
225 |         '''
226 |         self.x=x
227 |         self.sr=sr
228 |         self.nwind=nwind
229 |         self.nsources = nsources
230 |         
231 |         self._calc_amplitude(nwind=nwind)
232 |         if method[0:3].lower()=='pct':
233 |             try:
234 |                 pctval = int(method[3:5])
235 |             except TypeError:
236 |                 pctval = 5
237 |             self._percentile_discriminator(pct=pctval)
238 |         elif method=='kmeans':
239 |             self._k_means_discriminator()
240 |         
241 |         #return self._clusters_to_time_int()
242 |         self.tst, self.tend, self.label = self._clusters_to_time_int(
243 |                                                         min_int=min_len,
244 |                                                         max_int=max_len)
245 |         
246 |     def _calc_amplitude(self,nwind=4096):
247 |         '''
248 |         calculates amplitude for amplitude discriminator
249 |         '''
250 |         self.nfr = int(nwind/2)
251 |         
252 |         ax=[]
253 |         for i in range(self.x.shape[1]):
254 |             self.at, axi = rmsWind(self.x[:,i],sr=self.sr,
255 |                                   nwind=self.nwind,
256 |                                   nhop = self.nfr)
257 |             ax.append(axi)
258 |         self.ax = np.array(ax).T
259 |         self.dt = self.at[1]-self.at[0]
260 |         
261 |     def _k_means_discriminator(self, batch_size=45):
262 |         from sklearn.cluster import MiniBatchKMeans
263 |         from sklearn.metrics.pairwise import pairwise_distances_argmin
264 |         
265 |         mbk = MiniBatchKMeans(init='k-means++', n_clusters=self.nsources+1, 
266 |                               batch_size=batch_size,
267 |                               n_init=10, max_no_improvement=10, verbose=0)
268 |         #t0 = time.time()
269 |         X = np.log10(self.ax)
270 |         mbk.fit(X)
271 |         cc = np.zeros(mbk.cluster_centers_.shape)
272 |         # index of cluster corresponding to silence
273 |         idx_silence = np.argmin(np.sum(mbk.cluster_centers_,axis=1))
274 |         cc[0,:] = mbk.cluster_centers_[idx_silence,:]
275 |         idx_free = range(cc.shape[0])
276 |         idx_free.remove(idx_silence)
277 |         cred = mbk.cluster_centers_-cc[0,:]
278 |         # remaining indexes, sort them by channel
279 |         used_chan=[]
280 |         nchan = cc.shape[1]
281 |         last_unmatched=0
282 |         while idx_free:
283 |             crem = cred[idx_free,:]
284 |             r,idx_chan = np.unravel_index(crem.argmax(),crem.shape)
285 |             idx_center = idx_free[r]
286 |             if idx_chan not in used_chan:
287 |                 this_center = idx_chan+1
288 |             else:
289 |                 # append to end of list
290 |                 this_center = cc.shape[0]-last_unmatched-1
291 |                 sys.stderr.write('Cluster {} not matched to channel\n'.format(idx_center))
292 |             cc[this_center,:]=mbk.cluster_centers_[idx_center,:]
293 |             used_chan.append(idx_chan)
294 |             idx_free.remove(idx_center)
295 |         
296 |         cc[1:,:] = np.delete(mbk.cluster_centers_,idx_silence,axis=0)
297 |         #cc = mbk.cluster_centers_[idxs,:]
298 |         self.clusters = pairwise_distances_argmin(X,cc)
299 |         self.centers = cc
300 |         
301 | 
302 |     def _clusters_to_time_int(self, min_int=0.0, max_int=None):
303 |         
304 |         tfr = self.nfr/float(self.sr)
305 |         min_frames = int(np.round(min_int/tfr))
306 |         if max_int:
307 |             maxlen = int(max_int/tfr)
308 |         else:
309 |             maxlen = len(self.ax)
310 |         
311 |         lastsplit=0
312 |         
313 |         i=0
314 |         
315 |         off=True
316 |         
317 |         nframes = len(self.clusters)
318 |         
319 |         tst=[]
320 |         tend=[]
321 |         label=[]
322 |         
323 |         lastlabel = 0
324 |         
325 |         while i+min_frames<nframes:
326 |             if self.clusters[i]>0 and off:
327 |                 tst.append(self.at[max(0,i-1)])
328 |                 label.append(self.clusters[i])
329 |                 lastlabel = self.clusters[i]
330 |                 off=False
331 |                 i+=1
332 |             elif self.clusters[i]<=0 and not off:
333 |                 if all(self.clusters[i:i+min_frames] <=0 ):
334 |                     off=True
335 |                     tend.append(self.at[i])
336 |                     i+=min_frames
337 |                 else:
338 |                     i+=1
339 | 
340 |             elif self.clusters[i] != lastlabel and not off:
341 |                 tend.append(self.at[i-1])#-self.dt/2)
342 |                 tst.append(self.at[i-1])
343 |                 label.append(self.clusters[i])
344 |                 lastlabel = self.clusters[i]
345 |                 off=False
346 |                 i+=1
347 | 
348 |             else:
349 |                 i+=1
350 |         if not off:
351 |             tend.append(self.at[-1])
352 |         return tst,tend,label
353 |     
354 |     def to_textgrid(self, filename='mc_segmentation.TextGrid', 
355 |                     tiername='Segmentation'):
356 |         
357 |         from pympi import TextGrid
358 |         tg=TextGrid(xmax=max(self.tend))
359 |         tier=tg.add_tier(tiername)
360 |         for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
361 |             tier.add_interval(ts,te,'{}'.format(lab))
362 |         
363 |         tg.to_file(filename)
364 |         
365 |     def output(self, file_handle):
366 |         for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
367 |             filehandle.write('{},{},{}'.format(ts,te,lab))
368 |             
369 |     def recognise(self, mode='sphinx', marg=0.2):
370 |         # use the audio file as the audio source
371 |         r = srec.Recognizer()
372 |         
373 |         if mode=='sphinx':
374 |             recogniser = r.recognize_sphinx
375 |         if mode=='google':
376 |             recogniser = r.recognise_google
377 |         
378 |         for ii, (ts,te,lab) in enumerate(zip(self.tst,self.tend,self.label)):
379 |             tstart = st-marg
380 |             tend = end+marg
381 |             wo=w[int(tstart*fs):int(tend*fs)]
382 |     
383 |             wavwrite('speech_sample.wav',fs,wo)
384 |     
385 |             with srec.AudioFile('speech_sample.wav') as source:
386 |                 audio = r.record(source)  # read the entire audio file
387 |     
388 |             try:
389 |                 # for testing purposes, we're just using the default API key
390 |                 # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
391 |                 # instead of `r.recognize_google(audio)`
392 |                 utt = recogniser(audio)
393 |                 #utt = r.recognize_sphinx(audio)
394 |                 self.label[ii] = utt
395 |             except srec.UnknownValueError:
396 |                 print("Speech Recognition could not understand audio")
397 |             except srec.RequestError as e:
398 |                 print("Could not request results {}".format(e))
399 |         
400 | def output_results(seg, output_csv='', output_text_grid=''):
401 |     if output_text_grid:
402 |         seg.to_textgrid(output_text_grid)
403 | 
404 |     if output_csv:
405 |         with open(output_csv,'w') as f:
406 |             seg.output(f)
407 |     else:
408 |         seg.output(sys.stdout)
409 | 
410 | 
411 | def analyse_rec(sound_files, nsources=1, wind_sec=0.092, min_len=.3,
412 |                 recognise=None, output_csv='', output_text_grid=''):
413 |     # segment recordings
414 |     w=[]
415 |     for ff in sound_files:
416 |         sr,wi=wavread(ff)
417 |         w.append(wi.T)
418 |     
419 |     w=np.vstack(w).T
420 |     sys.stderr.write("Read {} files, {} channels, {} samples\n"\
421 |                      .format(len(sound_files),w.shape[1],w.shape[0]))
422 |     sys.stderr.write("Segmenting audio\n")
423 |     if nsources>1:
424 |         seg = MultiChannelSegmenter(w,sr=sr,min_len=min_len)
425 |     else:
426 |         #w=w.squeeze()
427 |         if len(w.shape)>1:
428 |             w = np.mean(w,axis=1)
429 |         seg = SilenceDetector(w.squeeze(), sr=sr, method = 'pct05',
430 |                                 min_len=min_len, wind_sec=wind_sec)
431 |         seg.label = [1 for tst in seg.tst]
432 |         seg.centers = np.array([[0,0],[1,0]])
433 |         
434 |     if recognise:
435 |         seg.recognise(mode=recognise)
436 | 
437 |                                 
438 |             
439 |     sys.stderr.write("Found {} chunks\n".format(len(seg.label)))
440 |     
441 |     output_results(seg, output_csv=output_csv, 
442 |                         output_text_grid=output_text_grid)
443 | 
444 | def process_file_list(batch_file, output_csv='', 
445 |                                   output_text_grid='',
446 |                                   recognise=None, 
447 |                                   wind_sec=0.092, 
448 |                                   min_len=.3, 
449 |                                   nsources=0):
450 |                                   
451 |     import logging
452 |     file_seq=[]
453 |     
454 |     suffix_csv = output_csv
455 |     suffix_tg = output_text_grid
456 |     out_csv=''
457 |     out_tg=''
458 |     
459 |     if not (suffix_csv or suffix_tg):
460 |         suffix_csv = '_segmentation.csv'
461 |     
462 |     with open(batch_file) as f:
463 |         for line in f:
464 |             files = [it.strip() for it in line.split(',') if len(it.strip())>0]
465 |             
466 |             if len(files)>0:
467 |                 basedir, filename = os.path.split(files[0])
468 |                 if suffix_csv:
469 |                     out_csv,ext = os.path.splitext(files[0])
470 |                     out_csv+=suffix_csv
471 |                 if suffix_tg:
472 |                     out_tg,ext = os.path.splitext(files[0])
473 |                     out_tg+=suffix_tg
474 |                 try:
475 |                     analyse_rec(files, output_csv=out_csv, 
476 |                                 output_text_grid=out_tg,
477 |                                 nsources=len(files),
478 |                                 recognise=recognise,
479 |                                 wind_sec=wind_sec,
480 |                                 min_len=min_len)
481 |                 except Exception as e:
482 |                     message = 'ERROR while processing files:\n'
483 |                     for f in files:
484 |                         message+=f
485 |                     message+='/n'
486 |                     logging.exception(message)
487 |                     #~ sys.stderr.write('ERROR while processing files:\n')
488 |                     #~ for f in files:
489 |                         #~ sys.stderr.write(f+'\n')
490 |                     #~ sys.stderr.write(str(e))
491 |                     #~ sys.stderr.write('\n')
492 |                     #~ sys.stderr.write(e.__doc__ )
493 |                     #~ sys.stderr.write('\n')
494 |     return 0
495 | 
496 | 
497 | def main(args):
498 |     sound_files = args.infiles
499 |     for sf in sound_files:
500 |         sys.stderr.write(sf+', ')
501 |     
502 |     sys.stderr.write('\n')
503 |     
504 |     if args.batch:
505 |         process_file_list(args.batch, nsources=args.n_sources, 
506 |                                      wind_sec=args.window,
507 |                                      min_len=args.min_silence,
508 |                                      output_csv=args.csv,
509 |                                      output_text_grid=args.textgrid,
510 |                                      recognise=args.recognise)
511 |         
512 |     else:
513 |         if sound_files:  
514 |             analyse_rec(sound_files, nsources=args.n_sources, 
515 |                                      wind_sec=args.window,
516 |                                      min_len=args.min_silence,
517 |                                      output_csv=args.csv,
518 |                                      output_text_grid=args.textgrid,
519 |                                      recognise=args.recognise)
520 |         else:
521 |             sys.stderr.write('Input files or batch list (-b) are required!\n')
522 |         
523 | 
524 |     return 0
525 | 
526 | 
527 | if __name__ == '__main__':
528 |     import sys
529 |     import argparse
530 |     # construct the argument parse and parse the arguments
531 |     ap = argparse.ArgumentParser()
532 |     ap.add_argument("-n", "--min-silence", nargs='?', default = '0.3', type=float,
533 |         help = "minimum silence duration in seconds")
534 |     ap.add_argument("-w", "--window", nargs='?', default = '0.092', type=float,
535 |         help = "window analysis duration in seconds")
536 |     ap.add_argument("-b", "--batch", nargs='?', 
537 |         help = "input file list for batch processing")
538 |     ap.add_argument("-r", "--recognise", nargs='?', 
539 |         help = "use speach recognition on each interval. Select method sphinx or google")
540 |     ap.add_argument("-c", "--csv", nargs='?', default = '',
541 |         help = "output to csv file name")
542 |     ap.add_argument("-t", "--textgrid", nargs='?', default = '',
543 |         help = "output to Praat Textgrid file name")
544 |     
545 | 
546 | 
547 |     ap.add_argument("-s", "--n-sources", type=float, nargs='?', default = '1',
548 |         help = "number of expected sources in the file")
549 |     
550 |     
551 |     ap.add_argument('infiles', nargs='*', help='Input sound files (required if not batch)')
552 |     
553 |     args = ap.parse_args()
554 | 
555 |     
556 | 
557 |     sys.exit(main(args))
558 | 
559 | 


--------------------------------------------------------------------------------
/pypevoc/speech/__init__.py:
--------------------------------------------------------------------------------
1 | from .SpeechSegmenter import SpeechSegmenter, SyllableSegmenter
2 | from .SpeechChunker import SilenceDetector, MultiChannelSegmenter
3 | from . import SpeechAnalysis as analysis
4 | 


--------------------------------------------------------------------------------
/pypevoc/speech/glottal.py:
--------------------------------------------------------------------------------
  1 | # pypevoc.speech.glottal.py
  2 | #
  3 | # Part of PyPeVoc python package
  4 | #
  5 | # Copyright (C) 2018 Andre Almeida
  6 | #
  7 | # based on covarep's IAIF:
  8 | # https://github.com/covarep/covarep/blob/master/glottalsource/iaif.m
  9 | #
 10 | # This program is free software: you can redistribute it and/or modify
 11 | # it under the terms of the GNU General Public License as published by
 12 | # the Free Software Foundation, either version 3 of the License, or
 13 | # (at your option) any later version.
 14 | #
 15 | # This program is distributed in the hope that it will be useful,
 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 | # GNU General Public License for more details.
 19 | #
 20 | # You should have received a copy of the GNU General Public License
 21 | # along with this program. If not, see <http://www.gnu.org/licenses/>.
 22 | #
 23 | 
 24 | import numpy as np
 25 | import scipy.signal as sig
 26 | import logging
 27 | 
 28 | from .SpeechAnalysis import lpc as lpc_red
 29 | 
 30 | # lpc_red ommits the first coefficient of an all-pole filter
 31 | # needed for filtering
 32 | def lpc(x, n):
 33 |     y = lpc_red(x, n)
 34 |     return np.concatenate([[1.], y])
 35 | 
 36 | def iaif_ola(x, Fs=1, nwind=None, nhop=None, 
 37 |              tract_order=None, glottal_order=None,
 38 |              leaky_integration=0.99, wind_func=np.hanning,
 39 |              n_it=1):
 40 |     
 41 |     if nwind is None:
 42 |         nwind = int(np.round(25/1000*Fs))
 43 |     if nhop is None:
 44 |         nhop = int(nwind/5)
 45 |     if tract_order is None:
 46 |         tract_order = 2*int(np.round(Fs/2000))+4
 47 |     if glottal_order is None:
 48 |         glottal_order = 2*int(np.round(Fs/4000))
 49 | 
 50 |     wind = wind_func(nwind)
 51 | 
 52 |     # output signals
 53 |     glot = np.zeros(len(x))
 54 |     dglot = np.zeros(len(x))
 55 |     wins = np.zeros(len(x))
 56 | 
 57 |     # filters, per frame
 58 |     vt_coef = []
 59 |     glot_coef = []
 60 | 
 61 |     ist = 0
 62 | 
 63 |     iaif = InverseFilter(Fs=Fs, nwind=nwind, 
 64 |                          tract_order=tract_order,
 65 |                          glottal_order=glottal_order,
 66 |                          leaky_integration=leaky_integration)
 67 | 
 68 |     while ist < len(x)-nwind:
 69 |         xw = x[ist:ist+nwind]
 70 |         g, gd, vt_f, g_f = iaif.apply(xw, n_it=n_it)
 71 | 
 72 |         glot[ist:ist+nwind] += g*wind
 73 |         dglot[ist:ist+nwind] += gd*wind
 74 |         wins[ist:ist+nwind] += wind
 75 | 
 76 |         vt_coef.append(vt_f)
 77 |         glot_coef.append(g_f)
 78 |         ist += nhop
 79 | 
 80 |     idx = wins>0
 81 |     glot[idx] /= wins[idx]
 82 |     dglot[idx] /= wins[idx]
 83 |     
 84 |     return glot, dglot, np.array(vt_coef), np.array(glot_coef)
 85 | 
 86 | class PaddedFilter(object):
 87 |     def __init__(self, input_signal,
 88 |                  n_before=0, n_after=0,
 89 |                  mode='zeros'):
 90 |         # Padded filter object, applies filters to a signal
 91 |         # while first padding on left and/or right
 92 |         self.mode = mode
 93 |         self.n_before = n_before
 94 |         self.n_after = n_after
 95 |         self.input_signal = input_signal
 96 | 
 97 |     @property
 98 |     def input_signal(self):
 99 |         return self._input_signal
100 | 
101 |     @input_signal.setter
102 |     def input_signal(self, x):
103 |         self._input_signal = x
104 |         if self.mode == 'ramp':
105 |             pad_before = np.linspace(-x[0],x[0], self.n_before)
106 |             pad_after = np.linspace(x[-1],-x[-1], self.n_after)
107 |         else:
108 |             pad_before = np.zeros(self.n_before)
109 |             pad_after = np.zeros(self.n_after)
110 |         self._padded_input = np.concatenate((pad_before, x, pad_after))
111 |         self._padded_output = self._padded_input
112 | 
113 |     @property
114 |     def output_signal(self):
115 |         if self.n_after:
116 |             return self._padded_output[self.n_before:-self.n_after]
117 |         else:
118 |             return self._padded_output[self.n_before:]
119 | 
120 |     def apply_filter(self, b, a):
121 |         self._padded_output = sig.lfilter(b, a, self._padded_input)
122 |         return self.output_signal
123 | 
124 |     def apply_filter_to_last_output(self, b, a):
125 |         self._padded_output = sig.lfilter(b, a, self._padded_output)
126 |         return self.output_signal
127 | 
128 | 
129 | def fir_pre_phase(b, x, n_ramp=None):
130 |     # applies a FIR filter with a pre-phase ramp
131 |     # to reduce ripple
132 |     #
133 |     # Arguments:
134 |     # * b: FIR coefficients
135 |     # * x: input signal
136 |     # * n_ramp: number of samples in pre-ramp 
137 |     #           (default = len(b))
138 |     signal = np.concat((np.linspace(-x[0],x[0], n_ramp), x))
139 |     y = np.lfilter(b,1,signal)
140 |     return y[n_ramp+1:]
141 | 
142 | 
143 | class InverseFilter(object):
144 |     # implements an inverse filter object
145 |     # based on Alku's IAIF
146 |     #
147 |     # P. Alku, "Glottal wave analysis with pitch synchronous iterative
148 |     # adaptive inverse filtering", Speech Communication, vol. 11, no. 2-3,
149 |     # pp. 109–118, 1992.
150 |     def __init__(self, Fs=1, nwind=1024, wind_func=np.hanning, 
151 |             tract_order=None, glottal_order=None,
152 |             leaky_integration=0.99, hpfilt=1):
153 |         # Initialise an inverse filter object
154 |         #
155 |         # Fs:                sample rate (default 1)
156 |         # nwind:             size of inpt chunks
157 |         # tract_order:       order for Vocal Tract LPC
158 |         #                    (default: Fs/1000 + 4)
159 |         # glottal_order:     order fot Glottal Source LPC
160 |         #                    (default: Fs/2000)
161 |         # leaky_integration: leaky integration coef
162 |         # hpfilt:            number of high pass filters to apply
163 | 
164 |         if tract_order is None:
165 |             tract_order = 2*int(np.round(Fs/2000))
166 | 
167 |         if glottal_order is None:
168 |             glottal_order = 2*int(np.round(Fs/4000))+4
169 | 
170 | 
171 |         self.Fs = Fs
172 |         self.tract_order = tract_order
173 |         self.glottal_order = glottal_order
174 |         try:
175 |             assert nwind > self.tract_order
176 |         except AssertionError:
177 |             logging.warning('Frame not analysed')
178 |             return
179 |         self.nwind = int(nwind)
180 |         self.hpfilt = hpfilt
181 |         self.leaky_integrator = np.array([1, -leaky_integration])
182 |         #self.pre_filter = tract_order+1
183 |         self.wind = wind_func(self.nwind)
184 |         n_prel = self.init_preliminary_filter()
185 |         n_pad = int(np.round(n_prel/2-1))
186 |         self.n_pad = tract_order+1
187 |         # array for "a" coeffs of FIR filters
188 |         self.id = np.array([1])
189 | 
190 |     def apply(self,x,n_it=1):
191 |         # Calculates the source and filter parameters
192 |         # (independent of preliminary hp filter)
193 |         # - Combined effect of lip radiation and glottal flow
194 | 
195 |         # create a padded filter object for chained filtering
196 |         # hp_filterer = PaddedFilter(n_after=len(self.hpfilt_b),
197 |         #         input_signal=x,
198 |         #         mode='zeros')
199 |         hp_pad = int(np.round(len(self.hpfilt_b)/2-1))
200 | 
201 |         # HP filter to remove low frequency fluctuations
202 |         for ii in range(self.hpfilt):
203 |             # y = hp_filterer.apply_filter_to_last_output(self.hpfilt_b,self.id)
204 |             y = np.concatenate([x, np.zeros(hp_pad)])
205 |             y = sig.lfilter(self.hpfilt_b, self.id, y)
206 |             y = y[hp_pad:]
207 |         # create a padded filter object for chained filtering
208 |         filter_machine = PaddedFilter(n_before=self.n_pad,
209 |                                       input_signal=y,
210 |                                       mode='ramp')
211 | 
212 |         # first estimate of glottal flow and radiation filters
213 |         Hg = lpc(y*self.wind, 1)
214 |         y = filter_machine.apply_filter(Hg, self.id)
215 | 
216 |         # subsequent iterations of glottal and vt estimations
217 |         for ii in range(n_it):
218 |             Hvt = lpc(y*self.wind, self.tract_order)
219 |             g = filter_machine.apply_filter(Hvt, self.id)
220 |             g = filter_machine.apply_filter_to_last_output(self.id,
221 |                     self.leaky_integrator)
222 | 
223 |             Hg = lpc(g*self.wind, self.glottal_order)
224 |             y = filter_machine.apply_filter(Hg, self.id)
225 |             y = filter_machine.apply_filter_to_last_output(self.id,
226 |                     self.leaky_integrator)
227 | 
228 |         # final estimation of vocal tract and glottal flow
229 |         Hvt = lpc(y*self.wind, self.tract_order)
230 |         dg = filter_machine.apply_filter(Hvt, self.id)
231 |         g = filter_machine.apply_filter_to_last_output(self.id,
232 |                 self.leaky_integrator)
233 | 
234 |         return g, dg, Hvt, Hg
235 | 
236 |     def init_preliminary_filter(self, order=None, freq_stop=40, freq_pass=70):
237 |         # calculate filter coefficients for preliminary hp filter
238 |         if order is None:
239 |             order = int(np.round(300/16000*self.Fs))
240 |         logging.info('Preliminary high-pass filter order set to %d'%order)
241 |         self.hpfilt_b = sig.firls(order,
242 |                                   [0, freq_stop, freq_pass, self.Fs/2],
243 |                                   [0, 0, 1, 1],
244 |                                   [1, 1],
245 |                                   fs=self.Fs)
246 |         return len(self.hpfilt_b)
247 | 
248 | 
249 | def lpcc2pole(b, sr=1):
250 |     def l2p_1(bb):
251 |         rts = np.roots(bb)
252 |         rts = rts[rts.imag>=0]
253 |         omega_n = np.arctan2(rts.imag,rts.real)
254 |         fp = omega_n*sr/2/np.pi
255 |         idx = np.argsort(fp)
256 |         fp = fp[idx]
257 |         bw = -1/2*(sr/2/np.pi) * np.log(np.abs(rts[idx]))
258 |         return fp, bw
259 | 
260 |     if len(b.shape)>1:
261 |         poles = np.zeros((b.shape[0],int(b.shape[1]+1)))
262 |         bws = np.zeros((b.shape[0],int(b.shape[1]+1)))
263 |         for ii in range(b.shape[0]):
264 |             p,bw = l2p_1(b[ii,:])
265 |             poles[ii,:len(p)]=(p)
266 |             bws[ii,:len(p)]=(bw)
267 |         poles=np.array(poles)
268 |         bws=np.array(bws)
269 |     else:
270 |         poles, bws = l2p_1(b)
271 | 
272 |     return poles, bws
273 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from setuptools import setup
 3 | 
 4 | import unittest
 5 | def my_tests():
 6 |     test_loader = unittest.TestLoader()
 7 |     test_suite = test_loader.discover('tests', pattern='test_*.py')
 8 |     return test_suite
 9 | 
10 | setup(name='pypevoc',
11 |       version='0.3',
12 |       description='Pure python sound analysis tools',
13 |       url='http://github.com/goiosunw/pypevoc',
14 |       author='Andre Goios',
15 |       author_email='a.almeida@unsw.edu.au',
16 |       license='GPL v3',
17 |       packages=['pypevoc', 'pypevoc.speech'],
18 |       test_suite = 'setup.my_tests',
19 |       zip_safe=False)
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/test_glottal.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | import pypevoc.speech.glottal as gl
 5 | 
 6 | class testPaddedFilter(unittest.TestCase):
 7 |     def test_output_same_padding_as_input(self):
 8 |         filter = np.array([-1,1])
 9 |         n_pad_before = 8
10 |         n_signal = 16
11 |         x = np.zeros(n_signal)
12 |         glx = gl.PaddedFilter(x,n_before=n_pad_before)
13 |         y = glx.apply_fir(filter)
14 |         self.assertEqual(len(x),len(y))
15 | 
16 |     def test_output_same_after_padding_as_input(self):
17 |         filter = np.array([-1,1])
18 |         n_pad_before = 8
19 |         n_pad_after = 8
20 |         n_signal = 16
21 |         x = np.zeros(n_signal)
22 |         glx = gl.PaddedFilter(x,n_before=n_pad_before,n_after=n_pad_after)
23 |         y = glx.apply_fir(filter)
24 |         self.assertEqual(len(x),len(y))
25 | 
26 |     def test_output_same_as_input(self):
27 |         filter = np.array([1])
28 |         n_pad_before = 8
29 |         n_signal = 16
30 |         x = np.zeros(n_signal)
31 |         glx = gl.PaddedFilter(x,n_before=n_pad_before)
32 |         y = glx.apply_fir(filter)
33 |         for xx, yy in zip(x, y):
34 |             self.assertEqual(xx, yy)
35 | 
36 |     def test_private_buffer_1d(self):
37 |         glx = gl.PaddedFilter(np.zeros(10),n_before=8)
38 |         self.assertEqual(len(glx._padded_input.shape),1)
39 | 
40 |     def test_private_output_buffer_1d(self):
41 |         glx = gl.PaddedFilter(np.zeros(10),n_before=8)
42 |         self.assertEqual(len(glx._padded_output.shape),1)
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     unittest.main()
47 | 


--------------------------------------------------------------------------------
/tests/test_peak_finder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | import pypevoc.PeakFinder as pf
 4 | 
 5 | import unittest
 6 | 
 7 | def parabolic_peak(max_pos=1.0, max_val=1.0, n=3, a=-1.):
 8 |     x = np.arange(n)
 9 |     b = -max_pos * 2 * a
10 |     c = max_val - a * max_pos * (b + max_pos)
11 |     return a*x*x + b*x + c
12 | 
13 | 
14 | class testPeakFinder(unittest.TestCase):
15 |     def testFindOnePeak(self):
16 |         x = np.linspace(0, 1, 10)
17 |         x = np.concatenate((x, np.linspace(.9, 1, 9)))
18 |         peaks = pf.PeakFinder(x)
19 |         assert(len(peaks.pos) == 1)
20 |         self.assertEqual(peaks.pos, 9)
21 | 
22 |     def test_refine_one_peak_centered(self):
23 |         x = parabolic_peak(max_pos=1.0)
24 |         peaks = pf.PeakFinder(x)
25 |         peaks.refine_all()
26 |         assert(len(peaks.pos) == 1)
27 |         self.assertEqual(peaks.pos, 1.0)
28 | 
29 |     def test_refine_one_peak_at_random_pos(self):
30 |         mypos = 1.2
31 |         x = parabolic_peak(max_pos=mypos, n=4)
32 |         peaks = pf.PeakFinder(x)
33 |         peaks.refine_all()
34 |         self.assertListEqual(peaks.fpos.tolist(), [mypos])
35 | 
36 |     def test_refine_one_peak_between_samples(self):
37 |         x = parabolic_peak(max_pos=1.5, n=4)
38 |         peaks = pf.PeakFinder(x)
39 |         peaks.refine_all()
40 |         self.assertListEqual(peaks.fpos.tolist(), [1.5])
41 | 
42 |     def test_refine_one_peak_almost_between_samples(self):
43 |         mypos = 1.499
44 |         x = parabolic_peak(max_pos=mypos, n=4)
45 |         peaks = pf.PeakFinder(x)
46 |         peaks.refine_all()
47 |         self.assertEqual(len(peaks.fpos), 1)
48 |         self.assertAlmostEqual(peaks.fpos[0], mypos)
49 | 
50 | 
51 | 
52 | def main():
53 |     unittest.main()
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/tests/test_periodicity.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | from pypevoc.Periodicity import period_marks_corr, PeriodTimeSeries
 5 | 
 6 | 
 7 | def gen_sin(f=440, sr=48000, nsamp=4800):
 8 |     # nsamp = int(dur*sr)
 9 |     return np.sin(2.*np.pi*float(f)/sr*np.arange(nsamp))
10 | 
11 | 
12 | class testPeriodicity(unittest.TestCase):
13 |     def test_single_period_sin_xcorr(self):
14 |         f0 = 500.
15 |         sr = 48000
16 |         nsam = 4800
17 |         x = gen_sin(f=f0, sr=sr, nsamp=nsam)
18 |         pts = PeriodTimeSeries(x, sr=sr, method='xcorr')
19 |         period = pts.per_at_index(nsam/2)
20 |         p0 = period.get_preferred_period()
21 |         self.assertAlmostEqual(sr/p0, f0, delta=1.0)
22 | 
23 |     def test_preferred_period_is_scalar(self):
24 |         x = gen_sin()
25 |         nsam = len(x)
26 |         pts = PeriodTimeSeries(x, method='xcorr')
27 |         period = pts.per_at_index(nsam/2)
28 |         p0 = period.get_preferred_period()
29 |         self.assertIsInstance(p0, float)
30 | 
31 | 
32 | class testPeriodMarks(unittest.TestCase):
33 |     def test_period_mark_corr_int_samples_per_period(self):
34 |         sr = 1.0
35 |         f0 = sr/8
36 |         nsam = 1024
37 |         x = gen_sin(f=f0, sr=sr, nsamp=nsam)
38 |         marks = period_marks_corr(x, sr=sr, tf=[0, nsam],
39 |                                   f=[f0, f0], window_size=256)
40 |         period = 1./f0
41 |         dmarks = np.diff(marks[1:])
42 |         for dm in dmarks:
43 |             self.assertAlmostEqual(dm, period)
44 | 
45 |     def test_period_mark_corr_frac_samples_per_period(self):
46 |         sr = 1.0
47 |         f0 = sr/64.3
48 |         nsam = 1024
49 |         x = gen_sin(f=f0, sr=sr, nsamp=nsam)
50 |         marks = period_marks_corr(x, sr=sr, tf=[0, nsam],
51 |                                   f=[f0, f0], window_size=256)
52 |         period = 1./f0
53 |         dmarks = np.diff(marks[1:])
54 |         for dm in dmarks:
55 |             self.assertAlmostEqual(dm, period, places=1)
56 | 
57 | 
58 | def main():
59 |     unittest.main()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------
/tests/test_pypevoc.py:
--------------------------------------------------------------------------------
 1 | import pypevoc.PVAnalysis as pv
 2 | import numpy as np
 3 | 
 4 | sr = 44100
 5 | t = np.arange(sr)/float(sr)
 6 | 
 7 | f = [400.,1200.]
 8 | mag = [.1,.05]
 9 | fmul=1.0
10 | 
11 | minmag = min(mag)*0.001
12 | #minmag=-1
13 | 
14 | xx = np.zeros(len(t))
15 | for ff,mm in zip(f,mag):
16 |     xx += mm*np.sin(2.0*np.pi*ff*fmul*t)
17 | 
18 | p=pv.PV(xx,sr,nfft=2**10,hop=2**9)
19 | p.run_pv()
20 | ss=p.toSinSum()
21 | 
22 | 
23 | for ii,part in enumerate(ss.partial):
24 |     avmag = np.mean(part.mag)
25 |     if avmag > minmag:
26 |         print('Partial %d, st=%d, len=%d, f=%f, mag =%f'%(ii,part.start_idx,len(part.f),np.mean(part.f),avmag))
27 | 


--------------------------------------------------------------------------------
/tests/test_speech.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | 
 4 | import pypevoc.speech as sp
 5 | 
 6 | def generate_recursive_noise(a = [-1], length=1000):
 7 |     x = np.random.rand(length)
 8 | 
 9 |     #y = np.zeros(len(x)-len(a_req)-1)
10 |     y = [0] * len(a)
11 |     for xx in x:
12 |         yy = xx
13 |         for ii, aa in enumerate(a):
14 |             yy += -aa*y[-ii-1]
15 |         y.append(yy)
16 | 
17 |     return np.array(y)
18 | 
19 | 
20 | class test_lpc(unittest.TestCase):
21 |     def test_coef_length(self):
22 |         a_req = [-.5,.25]
23 |         y = generate_recursive_noise(a=a_req)
24 |         order = len(a_req)
25 |         a_pyp = sp.analysis.lpc(y,order=order)
26 | 
27 |         self.assertEqual(order, len(a_pyp))
28 | 
29 | 
30 |     def test_coef_equivalence(self):
31 |         a_req = [-.5,.25]
32 |         y = generate_recursive_noise(a=a_req)
33 |         order = len(a_req)
34 |         a_pyp = sp.analysis.lpc(y,order=order)
35 | 
36 |         for ap, at in zip(a_pyp, a_req):
37 |             self.assertAlmostEqual(ap, at, delta=0.0001)
38 | 
39 |     def compare_to_talkbox(self):
40 |         try:
41 |             import scikits.talkbox as tbox
42 |         except ImportError:
43 |             return
44 |         
45 |         order = 1
46 |         y = generate_recursive_noise()
47 |         a_pyp = sp.analysis.lpc(y,order=order)
48 |         a_tbx = tbox.lpc(y,order=order)
49 | 
50 |         for ap, at in zip(a_pyp, a_tbx):
51 |             self.assertAlmostEqual(ap, at, delta=0.01)
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/vibrato_obj.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | from scipy.interpolate import interp1d
  4 | 
  5 | def Centroid(hamp):
  6 |     '''Calculate the centroid of a harmonic sequence'''
  7 |     allamp = 0.0
  8 |     allfsum = 0.0
  9 |     for hno0, amp in enumerate(hamp):
 10 |         hno = hno0+1
 11 |         allamp += amp
 12 |         allfsum += hno*amp
 13 |     
 14 |     return allfsum/allamp
 15 | 
 16 | def RMSampl(hamp):
 17 |     '''Calculate the RMS amplitude of a harmonic sequence'''
 18 |     ampsq = 0.0
 19 |     for hno0, amp in enumerate(hamp):
 20 |         hno = hno0+1
 21 |         ampsq += amp*amp
 22 |     
 23 |     return np.sqrt(ampsq)
 24 | 
 25 | def SlopeToHmult(slope, nharm):
 26 |     '''Calculate a harmonic sequence for a constant dB slope'''
 27 |     
 28 |     base = np.exp(slope)
 29 |     hamp = []
 30 |     for i in xrange(nharm):
 31 |         hn = i
 32 |         hamp.append(base**(hn-(nharm-1)/2.0))
 33 |         #hamp.append(np.sqrt(((hn/float(nharm-1)-.5 )*2.*(-slope)+1.)/nharm)) 
 34 |     ha = np.array(hamp)
 35 |     return ha /np.sqrt(sum(ha*ha))
 36 | 
 37 | 
 38 | class SlopeHarmonicScaler(object):
 39 |     '''Object for quick calculation of a harmonic for 
 40 |     a desired spectral centroid
 41 |     * for val=0, centroid is on 1st harmonic
 42 |     * for val=1, centroid is on last harmonic
 43 |     * Centroid variation is produced by a change in spectral slope
 44 |     * Spectrum is a linear slope in dB'''
 45 |     def __init__(self, nharm=2, npoints=100, slopelim=4):
 46 |         self.nharm = nharm
 47 |         
 48 |         slopes = np.linspace(-slopelim,slopelim,npoints)
 49 |         cent = np.zeros(len(slopes)+2)
 50 |         hamps = np.zeros((len(slopes)+2,nharm))
 51 |         
 52 |         
 53 |         
 54 |         for (ii,slope) in enumerate(slopes):
 55 |             hamp = SlopeToHmult(slope,nharm)
 56 |             cent[ii+1]=(Centroid(hamp))
 57 |             hamps[ii+1,...] = hamp
 58 |         
 59 |         hamps[0,0]=1.
 60 |         hamps[-1,-1]=1.
 61 |         
 62 |         cent[0] = 1.
 63 |         cent[-1] = nharm
 64 |         
 65 |         self.cent = cent
 66 |         self.hamp = hamps
 67 |         
 68 |         self.generateInterpolators()
 69 |         
 70 |         self.vmin = np.min(cent)
 71 |         self.vmax = np.max(cent)
 72 |         
 73 |     def __call__(self, val):
 74 |         '''
 75 |         Return harmonic amplitudes for a given spectral centroid
 76 |         '''
 77 |         hh = []
 78 |         cent = val*(self.nharm-1.)+1.
 79 |         for ii in xrange(self.nharm):
 80 |             hh.append(self.fharm[ii](cent))
 81 |         return np.array(hh)
 82 |     
 83 |     def saveNumpy(self,filename):
 84 |         '''
 85 |         Save a table of harmonic amplitudes to file
 86 |         '''
 87 |         np.save(filename, (self.cent,self.hamp))
 88 |         
 89 |     def loadNumpy(self,filename):
 90 |         '''
 91 |         Load a table of harmonic amplitudes from file
 92 |         '''
 93 |         self.cent, self.hamp = np.load(filename)
 94 |         self.generateInterpolators()
 95 |         
 96 |     def generateInterpolators(self):
 97 |         '''
 98 |         Generates the interpolator function from a table 
 99 |         of harmonic amplitudes
100 |         '''
101 |         self.fharm=[]
102 |         
103 |         for ii in xrange(self.nharm):
104 |             ff = interp1d(self.cent, self.hamp[...,ii], kind='cubic')
105 |             self.fharm.append(ff)
106 |         
107 |         
108 |     def outputJSArray(self, npoints=100, vlims=[0.,1.]):
109 |         '''
110 |         Outputs an interpolated array of harmonic amplitudes
111 |         for each value of spectral centroid (0-1)
112 |         '''
113 | 
114 |         sys.stdout.write("scvals = [ \n")
115 | 
116 |         for ii in xrange(npoints+1):
117 |             vrange = max(vlims) - min(vlims)
118 |             cent = min(vlims) + ii * vrange / float(npoints)
119 |             hamp = self(cent)
120 |             sys.stdout.write('[')
121 |             for hh in hamp:
122 |                 sys.stdout.write('%f,'%hh)
123 |                 
124 |             sys.stdout.write('], // %f\n'% cent)
125 |             
126 |         sys.stdout.write('];\n')
127 |         
128 | 
129 | class VibratoProfile(object):
130 |     '''A vibrato time-profile'''
131 |     def __init__(self, t_vals=[0.0,1.0], a_vals=[1.0,1.0], vibfreq=5.0):
132 |         self.ti=np.array(t_vals)
133 |         self.ai=np.array(a_vals)
134 |         self.vibfreq = vibfreq
135 |         
136 |         # max of vibrato profile is 1
137 |         amax = np.max(self.ai)
138 |         if amax>0.:
139 |             self.ai = self.ai/amax
140 |         self.recalc_profile()
141 |         
142 |         
143 |     def recalc_profile(self):
144 | 
145 |         t_max = max(self.ti)
146 |         tout = np.arange(0,t_max,1./self.vibfreq/16.0)
147 |         aout = np.interp(tout,self.ti,self.ai)
148 |         self.t = tout
149 |         
150 |         amax = np.max(self.ai)
151 |         if amax>0.:
152 |             i_st = min(np.argmin(self.ai>0.0),1)-1
153 |             t_st = self.ti[i_st]
154 | 
155 |             self.vibprof = aout*np.sin(2*np.pi*self.vibfreq*(tout-t_st))
156 |         else:
157 |             self.vibprof = np.zeros_like(tout)
158 |         
159 |     
160 |     def __call__(self,t):
161 |         return np.interp(t,self.t,self.vibprof)
162 |         
163 |     def getDuration(self):
164 |         return max(self.t)
165 |         
166 |     def setVibratoFreq(self, vibfreq):
167 |         self.vibfreq=vibfreq
168 |         self.recalc_profile()
169 | 
170 | class Vibrato(object):
171 |     '''Generate a sound from vibrato profile'''
172 |     def __init__(self, harm0=[1.],  sr=44100, f0=500., vibfreq=5.0):
173 |         self.sr=sr
174 |         self.f0=f0
175 |         self.h0 = np.array(harm0)
176 |         self.nharm = len(harm0)
177 |         self.hs = SlopeHarmonicScaler(self.nharm)
178 |         self.vibfreq=vibfreq
179 |         
180 |         self.setProfile()
181 |         self.setEnvelope()
182 |         
183 | 
184 | 
185 |     def setProfile(self, t_prof=[0.0,1.0], v_prof=[1.0,1.0]):
186 |         self.prof = VibratoProfile(t_prof,v_prof,vibfreq=self.vibfreq)
187 | 
188 |     def setVibratoFreq(self, vibfreq=5.0):
189 |         self.prof.setVibFreq(vibfreq)
190 |         
191 |     def setEnvelope(self,t_att=0.0, t_rel=0.0):
192 |         self.t_att=t_att
193 |         self.t_rel=t_rel
194 |         self.at_sam = int(round(t_att*self.sr));
195 |         self.rel_sam = int(round(t_rel*self.sr));
196 |         
197 |     def getFrequencyTime(self,t,mult=1.0):
198 |         '''Generates the values of frequency at times t'''
199 |         vibsig = self.prof(t)
200 |         return self.f0 * (1.0 + mult*vibsig)
201 | 
202 |     def getAmplitudeTime(self,t,hno=1,mult=1.0):
203 |         '''Generates the values of amplitude of harmonic hno at times t'''
204 |         vibsig = self.prof(t)
205 |         
206 |         # amplitude vector
207 |         hamp = self.hs(bsig)
208 |         aharm = hamp[hno-1];
209 |         a0 = self.h0[hno-1] * (1. + mult * vibsig) 
210 |         
211 |         hsig = a0 * aharm 
212 |         
213 |         return hsig
214 |  
215 |     def generateProfiles(self,brightness=[0.5,0.5], amplitude=0.0, frequency = 0.0, t=None):
216 |         # Build amplitude and frequency profiles
217 |         bmin = min(brightness)
218 |         bmax = max(brightness)
219 |         
220 |         if t is None:
221 |             t = np.arange(0,self.prof.getDuration(),1/float(self.sr));
222 |         
223 |         vibsig = self.prof(t)
224 |         bsig = vibsig * (bmax-bmin)/2. + (bmax+bmin)/2.
225 |         
226 |         hamp = self.hs(bsig)
227 |         
228 |         ## Build overal envelope
229 |         env_a = np.ones_like(vibsig);
230 |         
231 |         
232 |         if self.t_att>0:
233 |             at_sam = np.min(np.nonzero(t>self.t_att))
234 |             env_a[0:at_sam]    = np.linspace(0,1,at_sam);
235 |         if self.t_rel>0:
236 |             rel_sam = len(t)-np.min(np.nonzero(t>max(t)-self.t_rel))
237 |             env_a[-rel_sam:] = np.linspace(1,0,rel_sam);
238 | 
239 |         fh=np.zeros([len(vibsig),self.nharm])
240 |         ah=np.zeros([len(vibsig),self.nharm])
241 | 
242 |         for i in range(1,self.nharm+1):
243 |             # vector of frequency per sample
244 |             fh[:,i-1] = i*self.f0 * (1.0 + frequency*vibsig)
245 |             
246 |             # amplitude vector
247 |             hamp = self.hs(bsig)
248 |             aavg = hamp[i-1];
249 |             a0 = self.h0[i-1] * (1. + amplitude * vibsig) 
250 |             
251 |             ah[:,i-1] = a0 * aavg *env_a 
252 |             
253 |         return fh,ah
254 |        
255 |     def calculateWav(self,brightness=[0.5,0.5], amplitude=0.0, frequency = 0.0):
256 |         # Build signal
257 |         bmin = min(brightness)
258 |         bmax = max(brightness)
259 |         
260 |         t = np.arange(0,self.prof.getDuration(),1/float(self.sr));
261 |         sig = np.zeros_like(t);
262 |         
263 |         
264 |         vibsig = self.prof(t)
265 |         bsig = vibsig * (bmax-bmin)/2. + (bmax+bmin)/2.
266 |         
267 |         hamp = self.hs(bsig)
268 |         for i in range(1,self.nharm+1):
269 |             # vector of frequency per sample
270 |             fharm = self.getFrequencyTime(t,mult=frequency)
271 |             #fharm = i*self.f0 *np.ones_like(vibsig)
272 |             # phase vector
273 |             fcumsum = np.cumsum(2*np.pi*fharm)/self.sr;
274 |             phi = np.concatenate(([0],fcumsum[0:-1]));
275 | 
276 |             # amplitude vector
277 |             # amplitude vector
278 |             hamp = self.hs(bsig)
279 |             aharm = hamp[i-1];
280 |             a0 = self.h0[i-1] * (1. + amplitude * vibsig) 
281 |             hsig = a0 * aharm *env_a 
282 |             
283 |             sig = sig+hsig;
284 | 
285 |         ## Build overal envelope
286 |         env_a = np.ones_like(vibsig);
287 |         
288 |         if self.at_sam>0:
289 |             env_a[0:self.at_sam]    = np.linspace(0,1,self.at_sam);
290 |         if self.rel_sam>0:
291 |             env_a[-self.rel_sam:] = np.linspace(1,0,self.rel_sam);
292 | 
293 |         sig=sig*env_a;
294 | 
295 |         self.sig = sig
296 |         return sig
297 |         
298 |     def saveWav(self,filename,sampwidth = 2):
299 |         import wave
300 |         import struct
301 |         
302 |         wav_file = wave.open(filename, "w")
303 | 
304 |         nchannels = 2
305 |         amp = 2**(8*sampwidth)
306 | 
307 |         framerate = int(self.sr)
308 |         nframes = len(self.sig)
309 | 
310 |         comptype = "NONE"
311 |         compname = "not compressed"
312 | 
313 |         wav_file.setparams((nchannels, sampwidth, framerate, nframes,
314 |             comptype, compname))
315 | 
316 |         # numpy convert float to int
317 |         xstereo = np.reshape(np.tile(self.sig,[2,1]).T*amp/2,2*len(self.sig)).astype('int16').tostring()
318 | 
319 |         wav_file.writeframes(xstereo)
320 | 
321 |         wav_file.close()
322 |         
323 |         
324 | def SlopeVibratoWAV(filename='out.wav',
325 |         slope=0,
326 |         nharm = 7,
327 |         f0tonic=500.0,
328 |         amp=0.1,
329 |         hdepth = 6.0,
330 |         vib_slope=1.0,
331 |         sr=44100):
332 |     '''Generate a sequence of similar vibrato notes:fluctuating in amplitude or slope
333 |     '''
334 |     #sr=44100
335 |     base = np.exp(slope)
336 |     
337 |     print vib_slope
338 |     fact = 20./np.log(10)
339 |     if vib_slope>0.0:
340 |         hvib = [(float(hn)-(nharm-2.0)/2.0)*hdepth for hn in xrange(nharm-1)]
341 |     else:
342 |         hvib = [hdepth for hn in xrange(nharm-1)]
343 |     print hvib
344 |     #hvib = [fact*np.log10((hn-(nharm+1.0)/2.0)*slope) for hn in xrange(nharm-1)]
345 |     
346 |     hamp = np.array([(1.)**xx/xx**slope for xx in xrange(1,nharm)])
347 |     #hamp = np.concatenate(([0],hamp))
348 |     #hamp = np.zeros(nharm)
349 |     #f0tonic = 500.
350 |     #amp=0.05
351 |     #amp=0.1
352 |     #hamp = amp*np.ones(nharm)
353 |     #hamp[0]=0.0
354 | 
355 | 
356 |     #hvib = np.zeros(len(hamp))
357 |     # if vib_slope > 0.0:
358 |     #     for nn in range(nharm-1):
359 |     #         hvib[nn] = hdepth * (nharm/2. - float(nn))
360 |     # else:
361 |     #for nn in range(nharm-1):
362 |     #    hvib[nn] = hdepth
363 |     
364 |     
365 |     
366 |     sig = HarmonicVibrato(ampseq=hamp,hvib=hvib,f0vib=0.00,f0=f0tonic,
367 |                         vib_prof_t=[0.0,0.3,0.7,1.5,1.6],vib_prof_a=[0.0,0.0,0.5,1.0,0.0],vibfreq=6.0,
368 |                         a0=amp,sr=sr,t_att=.05)
369 |     
370 |     write_wav(filename,sig,sr=sr)
371 |     #wavwrite(filename,rate=sr,data=np.tile(sig,[1,2]))
372 | 
373 |     #return sig, sr
374 |     #display(Audio(data=sig,rate=sr,autoplay=True))
375 | 
376 | 
377 | 


--------------------------------------------------------------------------------