├── examples
    ├── audio_tools.py
    └── test_audio_extract.py
├── README.md
├── LICENSE
├── mcts
    ├── toy_state_manager.py
    └── puct_mcts.py
├── image
    └── image_tools.py
├── graph
    └── graph_tools.py
└── audio
    └── audio_tools.py


/examples/audio_tools.py:
--------------------------------------------------------------------------------
1 | ../audio_tools.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # tools
2 | Various tools for graphs, audio, images
3 | 
4 | Goal is to keep as self-contained scripts for ease of integration into larger projects
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Kyle Kastner
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/mcts/toy_state_manager.py:
--------------------------------------------------------------------------------
  1 | STATE_MAX = 50
  2 | 
  3 | class RightPolicyStateManager(object):
  4 |     """
  5 |     A state manager with a goal state and action space of 2
  6 |     """
  7 |     def __init__(self, goal_state, random_state, rollout_limit=1000):
  8 |         self.rollout_limit = rollout_limit
  9 |         self.goal_state = goal_state
 10 |         self.random_state = random_state
 11 | 
 12 |     def get_next_state(self, state, action):
 13 |         if action == 1:
 14 |             return state + 1
 15 |         elif action == 0:
 16 |             return state - 1
 17 | 
 18 |     def get_action_space(self):
 19 |         # go down, 0
 20 |         # go up, 1
 21 |         return list(range(2))
 22 | 
 23 |     def get_valid_actions(self, state):
 24 |         if state > 0 and state < STATE_MAX:
 25 |             return list(range(2))
 26 |         elif state == 0:
 27 |             return [1]
 28 |         elif state == STATE_MAX:
 29 |             return [0]
 30 | 
 31 |     def get_init_state(self):
 32 |         # start in the worst state
 33 |         return 0
 34 |         #return self.random_state.randint(0, STATE_MAX)
 35 | 
 36 |     def rollout_fn(self, state):
 37 |         # can define custom rollout function
 38 |         return self.random_state.choice(self.get_valid_actions(state))
 39 | 
 40 |     def score(self, state):
 41 |         # if these numbers are big, it tends to run slower
 42 | 
 43 |         # example of custom finish, score
 44 |         # sparse / goal discovery reward
 45 |         #return 1. if state == self.goal_state else 0.
 46 |         # distance / goal conditioned reward
 47 |         return 1. if state == self.goal_state else -(1. / self.goal_state) * (self.goal_state - state)
 48 | 
 49 |     def is_finished(self, state):
 50 |         # if this check is slow
 51 |         # can rewrite as _is_finished
 52 |         # then add
 53 |         # self.is_finished = MemoizeMutable(self._is_finished)
 54 |         # to __init__ instead
 55 | 
 56 |         # return winner, score, end
 57 |         # winner normally in [-1, 0, 1]
 58 |         # if it's one player, can just use [0, 1] and it's fine
 59 |         # score arbitrary float value
 60 |         # end in [True, False]
 61 |         return (1, 1., True) if state == self.goal_state else (0, 0., False)
 62 | 
 63 |     def rollout_from_state(self, state):
 64 |         # example rollout function
 65 |         s = state
 66 |         w, sc, e = self.is_finished(state)
 67 |         if e:
 68 |             return self.score(s)
 69 | 
 70 |         c = 0
 71 |         while True:
 72 |             a = self.rollout_fn(s)
 73 |             s = self.get_next_state(s, a)
 74 | 
 75 |             e = self.is_finished(s)
 76 |             c += 1
 77 |             if e:
 78 |                 return self.score(s)
 79 |             if c > self.rollout_limit:
 80 |                 # can also return different score if rollout limit hit
 81 |                 return self.score(s)
 82 | 
 83 | 
 84 | if __name__ == "__main__":
 85 |     from puct_mcts import MCTS, MemoizeMutable
 86 |     import numpy as np
 87 |     mcts_random = np.random.RandomState(1110)
 88 |     state_random = np.random.RandomState(11)
 89 |     exact = True
 90 | 
 91 |     state_man = RightPolicyStateManager(STATE_MAX, state_random)
 92 |     mcts = MCTS(state_man, n_playout=1000, random_state=mcts_random)
 93 |     state = mcts.state_manager.get_init_state()
 94 |     winner, score, end = mcts.state_manager.is_finished(state)
 95 |     states = [state]
 96 |     while True:
 97 |         if not end:
 98 |             if not exact:
 99 |                 a, ap = mcts.sample_action(state, temp=temp, add_noise=noise)
100 |             else:
101 |                 a, ap = mcts.get_action(state)
102 | 
103 |             for i in mcts.root.children_.keys():
104 |                 print(i, mcts.root.children_[i].__dict__)
105 |                 print("")
106 |             mcts.update_tree_root(a)
107 |             state = mcts.state_manager.get_next_state(state, a)
108 |             states.append(state)
109 |             print(states)
110 |             winner, score, end = mcts.state_manager.is_finished(state)
111 |         if end:
112 |             print(states[-1])
113 |             print("Ended")
114 |             mcts.reconstruct_tree()
115 |             break
116 | 


--------------------------------------------------------------------------------
/image/image_tools.py:
--------------------------------------------------------------------------------
  1 | # Author: Kyle Kastner
  2 | # License: BSD 3-Clause
  3 | 
  4 | import numpy as np
  5 | from scipy.linalg import eigh
  6 | from scipy.misc import imresize
  7 | 
  8 | 
  9 | def ind2sub(array_shape, ind):
 10 |     # Gives repeated indices, replicates matlabs ind2sub
 11 |     rows = (ind.astype("int32") // array_shape[1])
 12 |     cols = (ind.astype("int32") % array_shape[1])
 13 |     return (rows, cols)
 14 | 
 15 | 
 16 | def graphcut(im, n_splits=2, split_type="mean", rad=5, sigma_x=.3,
 17 |              sigma_p=.1, scaling=255.):
 18 |     # im: grayscale image
 19 |     sz = im.shape[0] * im.shape[1]
 20 |     ind = np.arange(sz)
 21 | 
 22 |     I, J = ind2sub(im.shape, ind)
 23 |     I = I + 1
 24 |     J = J + 1
 25 | 
 26 |     scaled_im = im.ravel() / float(scaling)
 27 | 
 28 |     # float32 gives the wrong answer...
 29 |     scaled_im = scaled_im.astype("float64")
 30 |     sim = np.zeros((sz, sz)).astype("float64")
 31 | 
 32 |     # Faster with broadcast tricks
 33 |     # Still wasting computation - einsum might be fastest
 34 |     x1 = I[None]
 35 |     x2 = I[:, None]
 36 |     y1 = J[None]
 37 |     y2 = J[:, None]
 38 |     dist = (x1 - x2) ** 2 + (y1 - y2) ** 2
 39 |     scale = np.exp(-(dist / (sigma_x ** 2)))
 40 |     sim = scale
 41 |     sim[np.sqrt(dist) >= rad] = 0.
 42 |     del x1
 43 |     del x2
 44 |     del y1
 45 |     del y2
 46 |     del dist
 47 | 
 48 |     p1 = scaled_im[None]
 49 |     p2 = scaled_im[:, None]
 50 |     pdist = (p1 - p2) ** 2
 51 |     pscale = np.exp(-(pdist / (sigma_p ** 2)))
 52 | 
 53 |     sim *= pscale
 54 | 
 55 |     dind = np.diag_indices_from(sim)
 56 |     sim[dind] = 1.
 57 | 
 58 |     d = np.sum(sim, axis=1)
 59 |     D = np.diag(d)
 60 |     A = (D - sim)
 61 | 
 62 |     # Want second smallest eigenvector onward
 63 |     S, V = eigh(A, D, eigvals=(1, n_splits + 1),
 64 |                 overwrite_a=True, overwrite_b=True)
 65 |     sort_ind = np.argsort(S)
 66 |     S = S[sort_ind]
 67 |     V = V[:, sort_ind]
 68 |     segs = V
 69 |     segs[:, -1] = ind
 70 | 
 71 |     def cut(im, matches, ix, split_type="mean"):
 72 |         # Can choose how to split
 73 |         if split_type == "mean":
 74 |             split = np.mean(segs[:, ix])
 75 |         elif split_type == "median":
 76 |             split = np.median(segs[:, ix])
 77 |         elif split_type == "zero":
 78 |             split = 0.
 79 |         else:
 80 |             raise ValueError("Unknown split type %s" % split_type)
 81 | 
 82 |         meets = np.where(matches[:, ix] >= split)[0]
 83 |         match1 = matches[meets, :]
 84 |         res1 = np.zeros_like(im)
 85 |         match_inds = match1[:, -1].astype("int32")
 86 |         res1.ravel()[match_inds] = im.ravel()[match_inds]
 87 | 
 88 |         meets = np.where(matches[:, ix] < split)[0]
 89 |         match2 = matches[meets, :]
 90 |         res2 = np.zeros_like(im)
 91 |         match_inds = match2[:, -1].astype("int32")
 92 |         res2.ravel()[match_inds] = im.ravel()[match_inds]
 93 |         return (match1, match2), (res1, res2)
 94 | 
 95 |     # Recursively split partitions
 96 |     # Currently also stores intermediates
 97 |     all_splits = []
 98 |     all_matches = [[segs]]
 99 |     for i in range(n_splits):
100 |         matched = all_matches[-1]
101 |         current_splits = []
102 |         current_matches = []
103 |         for s in matched:
104 |             matches, splits = cut(im, s, i, split_type=split_type)
105 |             current_splits.extend(splits)
106 |             current_matches.extend(matches)
107 |         all_splits.append(current_splits)
108 |         all_matches.append(current_matches)
109 |     return all_matches, all_splits
110 | 
111 | 
112 | def test_graphcut():
113 |     import matplotlib.pyplot as plt
114 |     from scipy.misc import lena
115 |     im = lena()
116 |     # Any bigger and my weak laptop gets memory errors
117 |     bounds = (50, 50)
118 |     im = imresize(im, bounds, interp="bicubic")
119 |     all_matches, all_splits = graphcut(im, split_type="mean")
120 | 
121 |     to_plot = all_splits[-1]
122 |     f, axarr = plt.subplots(2, len(to_plot) // 2)
123 |     for n in range(len(to_plot)):
124 |         axarr.ravel()[n].imshow(to_plot[n], cmap="gray")
125 |         axarr.ravel()[n].set_xticks([])
126 |         axarr.ravel()[n].set_yticks([])
127 |     plt.show()
128 | 
129 | if __name__ == "__main__":
130 |     test_graphcut()
131 | 


--------------------------------------------------------------------------------
/examples/test_audio_extract.py:
--------------------------------------------------------------------------------
  1 | from audio_tools import fetch_sample_speech_tapestry, world_synthesis
  2 | from audio_tools import harvest, cheaptrick, d4c, sp2mgc, mgc2sp
  3 | from audio_tools import soundsc
  4 | from scipy.io import wavfile
  5 | from scipy import fftpack
  6 | import numpy as np
  7 | import time
  8 | 
  9 | 
 10 | def run_world_mgc_example():
 11 |     # run on chromebook
 12 |     # enc 839.71
 13 |     # synth 48.79
 14 |     fs, d = fetch_sample_speech_tapestry()
 15 |     d = d.astype("float32") / 2 ** 15
 16 | 
 17 |     # harcoded for 16k from
 18 |     # https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world/extract_features_for_merlin.sh
 19 |     mgc_alpha = 0.58
 20 |     #mgc_order = 59
 21 |     mgc_order = 59
 22 |     # this is actually just mcep
 23 |     mgc_gamma = 0.0
 24 | 
 25 |     def enc():
 26 |         temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
 27 |         temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
 28 |                 temporal_positions_h, f0_h, vuv_h)
 29 |         temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
 30 |                 temporal_positions_h, f0_h, vuv_h)
 31 | 
 32 |         mgc_arr = sp2mgc(spectrogram_ct, mgc_order, mgc_alpha, mgc_gamma,
 33 |                 verbose=True)
 34 |         return mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c
 35 | 
 36 | 
 37 |     start = time.time()
 38 |     mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
 39 |     enc_done = time.time()
 40 | 
 41 |     sp_r = mgc2sp(mgc_arr, mgc_alpha, mgc_gamma, fs=fs, verbose=True)
 42 |     synth_done = time.time()
 43 | 
 44 |     print("enc time: {}".format(enc_done - start))
 45 |     print("synth time: {}".format(synth_done - enc_done))
 46 |     y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, sp_r, fs)
 47 |     #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
 48 |     wavfile.write("out_mgc.wav", fs, soundsc(y))
 49 | 
 50 | 
 51 | def run_world_base_example():
 52 |     # on chromebook
 53 |     # enc 114.229
 54 |     # synth 5.165
 55 |     fs, d = fetch_sample_speech_tapestry()
 56 |     d = d.astype("float32") / 2 ** 15
 57 | 
 58 |     def enc():
 59 |         temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
 60 |         temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
 61 |                 temporal_positions_h, f0_h, vuv_h)
 62 |         temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
 63 |                 temporal_positions_h, f0_h, vuv_h)
 64 | 
 65 |         return spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c
 66 | 
 67 | 
 68 |     start = time.time()
 69 |     spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
 70 |     enc_done = time.time()
 71 | 
 72 |     y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs)
 73 |     synth_done = time.time()
 74 | 
 75 |     print("enc time: {}".format(enc_done - start))
 76 |     print("synth time: {}".format(synth_done - enc_done))
 77 |     #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
 78 |     wavfile.write("out_base.wav", fs, soundsc(y))
 79 | 
 80 | 
 81 | def run_world_dct_example():
 82 |     # on chromebook
 83 |     # enc 114.229
 84 |     # synth 5.165
 85 |     fs, d = fetch_sample_speech_tapestry()
 86 |     d = d.astype("float32") / 2 ** 15
 87 | 
 88 |     def enc():
 89 |         temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
 90 |         temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
 91 |                 temporal_positions_h, f0_h, vuv_h)
 92 |         temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
 93 |                 temporal_positions_h, f0_h, vuv_h)
 94 | 
 95 |         return spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c
 96 | 
 97 |     start = time.time()
 98 |     spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
 99 |     dct_buf = fftpack.dct(spectrogram_ct)
100 |     n_fft = 512
101 |     n_dct = 20
102 |     dct_buf = dct_buf[:, :n_dct]
103 |     idct_buf = np.zeros((dct_buf.shape[0], n_fft + 1))
104 |     idct_buf[:, :n_dct] = dct_buf
105 |     ispectrogram_ct = fftpack.idct(idct_buf)
106 |     enc_done = time.time()
107 | 
108 |     y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs)
109 |     synth_done = time.time()
110 | 
111 |     print("enc time: {}".format(enc_done - start))
112 |     print("synth time: {}".format(synth_done - enc_done))
113 |     #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
114 |     wavfile.write("out_dct.wav", fs, soundsc(y))
115 | 
116 | 
117 | #run_world_mgc_example()
118 | #run_world_base_example()
119 | run_world_dct_example()
120 | 


--------------------------------------------------------------------------------
/graph/graph_tools.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # Author: Kyle Kastner
  3 | # License: BSD 3-Clause
  4 | 
  5 | # Using code modified from the following authors, collected in one place
  6 | # http://www.gilles-bertrand.com/2014/03/dijkstra-algorithm-python-example-source-code-shortest-path.html
  7 | # http://eddmann.com/posts/depth-first-search-and-breadth-first-search-in-python/
  8 | # https://gist.github.com/joninvski/701720https://gist.github.com/joninvski/701720
  9 | # https://jlmedina123.wordpress.com/2014/05/17/floyd-warshall-algorithm-in-python/
 10 | # http://code.activestate.com/recipes/119466-dijkstras-algorithm-for-shortest-paths/
 11 | import matplotlib.pyplot as plt
 12 | import matplotlib.image as mpimg
 13 | import subprocess
 14 | import os
 15 | 
 16 | 
 17 | def pwrap(args, shell=False):
 18 |     p = subprocess.Popen(args, shell=shell, stdout=subprocess.PIPE,
 19 |                          stdin=subprocess.PIPE, stderr=subprocess.PIPE,
 20 |                          universal_newlines=True)
 21 |     return p
 22 | 
 23 | # Print output
 24 | # http://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
 25 | def execute(cmd, shell=False):
 26 |     popen = pwrap(cmd, shell=shell)
 27 |     for stdout_line in iter(popen.stdout.readline, ""):
 28 |         yield stdout_line
 29 | 
 30 |     popen.stdout.close()
 31 |     return_code = popen.wait()
 32 |     if return_code:
 33 |         raise subprocess.CalledProcessError(return_code, cmd)
 34 | 
 35 | 
 36 | def pe(cmd, shell=False):
 37 |     """
 38 |     Print and execute command on system
 39 |     """
 40 |     for line in execute(cmd, shell=shell):
 41 |         print(line, end="")
 42 | 
 43 | 
 44 | def _paths(graph, start, end, pop):
 45 |     # dfs or bfs depending on pop
 46 |     q = [(start, [start])]
 47 |     while q:
 48 |         if pop is None:
 49 |             (vertex, path) = q.pop()
 50 |         else:
 51 |             (vertex, path) = q.pop(0)
 52 |         for nx in set(graph[vertex].keys()) - set(path):
 53 |             if nx == end:
 54 |                 yield path + [nx]
 55 |             else:
 56 |                 q.append((nx, path + [nx]))
 57 | 
 58 | 
 59 | def dfs_paths(graph, start, end):
 60 |     return _paths(graph, start, end, None)
 61 | 
 62 | 
 63 | def bfs_paths(graph, start, end):
 64 |     return _paths(graph, start, end, 0)
 65 | 
 66 | 
 67 | def initialize_bf(graph, source):
 68 |     d = {} # Stands for destination
 69 |     p = {} # Stands for predecessor
 70 |     for node in graph:
 71 |         d[node] = float("inf")
 72 |         p[node] = None
 73 |     d[source] = 0
 74 |     return d, p
 75 | 
 76 | 
 77 | def relax_bf(node, neighbour, graph, d, p):
 78 |     if d[neighbour] > d[node] + graph[node][neighbour]:
 79 |         d[neighbour] = d[node] + graph[node][neighbour]
 80 |         p[neighbour] = node
 81 | 
 82 | 
 83 | def bellman_ford_paths(graph, source):
 84 |     # returns distances and paths
 85 |     d, p = initialize_bf(graph, source)
 86 |     for i in range(len(graph)-1):
 87 |         for u in graph:
 88 |             for v in graph[u]:
 89 |                 relax_bf(u, v, graph, d, p)
 90 | 
 91 |     # Check for negative-weight cycles
 92 |     for u in graph:
 93 |         for v in graph[u]:
 94 |             assert d[v] <= d[u] + graph[u][v]
 95 |     return d, p
 96 | 
 97 | 
 98 | def floyd_warshall_paths(graph):
 99 |     # returns distances and paths
100 |     # Initialize dist and pred:
101 |     # copy graph into dist, but add infinite where there is
102 |     # no edge, and 0 in the diagonal
103 |     dist = {}
104 |     pred = {}
105 |     for u in graph:
106 |         dist[u] = {}
107 |         pred[u] = {}
108 |         for v in graph:
109 |             dist[u][v] = float("inf")
110 |             pred[u][v] = -1
111 |         dist[u][u] = 0
112 |         for neighbor in graph[u]:
113 |             dist[u][neighbor] = graph[u][neighbor]
114 |             pred[u][neighbor] = u
115 | 
116 |     for t in graph:
117 |         # given dist u to v, check if path u - t - v is shorter
118 |         for u in graph:
119 |             for v in graph:
120 |                 newdist = dist[u][t] + dist[t][v]
121 |                 if newdist < dist[u][v]:
122 |                     dist[u][v] = newdist
123 |                     pred[u][v] = pred[t][v] # route new path through t
124 |     return dist, pred
125 | 
126 | 
127 | def dijkstra_path(graph, start, end, visited=[], distances={}, predecessors={}):
128 |     """Find the shortest path between start and end nodes in a graph"""
129 |     # we've found our end node, now find the path to it, and return
130 |     if start == end:
131 |         path = []
132 |         while end != None:
133 |             path.append(end)
134 |             end = predecessors.get(end, None)
135 |         return distances[start], path[::-1]
136 |     # detect if it's the first time through, set current distance to zero
137 |     if not visited:
138 |         distances[start] = 0
139 | 
140 |     # process neighbors as per algorithm, keep track of predecessors
141 |     for neighbor in graph[start]:
142 |         if neighbor not in visited:
143 |             neighbordist = distances.get(neighbor, float("inf"))
144 |             tentativedist = distances[start] + graph[start][neighbor]
145 |             if tentativedist < neighbordist:
146 |                 distances[neighbor] = tentativedist
147 |                 predecessors[neighbor] = start
148 |     # neighbors processed, now mark the current node as visited
149 |     visited.append(start)
150 |     # finds the closest unvisited node to the start
151 |     unvisiteds = dict((k, distances.get(k, float("inf"))) for k in graph if k not in visited)
152 |     closestnode = min(unvisiteds, key=unvisiteds.get)
153 |     # now we can take the closest node and recurse, making it current
154 |     return dijkstra_path(graph, closestnode, end, visited, distances, predecessors)
155 | 
156 | 
157 | def graphviz_plot(graph, fname="tmp_dotgraph.dot", show=True):
158 |     if os.path.exists(fname):
159 |         print("WARNING: Overwriting existing file {} for new plots".format(fname))
160 |     f = open(fname,'w')
161 |     f.writelines('digraph G {\nnode [width=.3,height=.3,shape=octagon,style=filled,color=skyblue];\noverlap="false";\nrankdir="LR";\n')
162 |     for i in graph:
163 |         for j in graph[i]:
164 |             s= '      '+ i
165 |             s +=  ' -> ' +  j + ' [label="' + str(graph[i][j]) + '"]'
166 |             s+=';\n'
167 |             f.writelines(s)
168 |     f.writelines('}')
169 |     f.close()
170 |     graphname = fname.split(".")[0] + ".png"
171 |     pe(["dot", "-Tpng", fname, "-o", graphname])
172 | 
173 |     if show:
174 |         plt.imshow(mpimg.imread(graphname))
175 |         plt.show()
176 | 
177 | 
178 | def test_graph_tools():
179 |     graph = {'s': {'a': 2, 'b': 1},
180 |              'a': {'s': 3, 'b': 4, 'c': 8},
181 |              'b': {'s': 4, 'a': 2, 'd': 2},
182 |              'c': {'a': 2, 'd': 7, 't': 4},
183 |              'd': {'b': 1, 'c': 11, 't': 5},
184 |              't': {'c': 4, 'd': 5}}
185 | 
186 |     print([p for p in bfs_paths(graph, 'a', 't')])
187 |     print([p for p in dfs_paths(graph, 'a', 't')])
188 |     print(dijkstra_path(graph, 'a', 't'))
189 |     print(floyd_warshall_paths(graph))
190 |     print(bellman_ford_paths(graph, 'a'))
191 |     graphviz_plot(graph)
192 | 
193 | 
194 | if __name__ == "__main__":
195 |    test_graph_tools()
196 | 


--------------------------------------------------------------------------------
/mcts/puct_mcts.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import copy
  3 | import cPickle
  4 | 
  5 | class MemoizeMutable(object):
  6 |     def __init__(self, fn):
  7 |         self.fn = fn
  8 |         self.memo = {}
  9 | 
 10 |     def __call__(self, *args, **kwds):
 11 |         str = cPickle.dumps(args, 1) + cPickle.dumps(kwds, 1)
 12 |         if not self.memo.has_key(str):
 13 |             self.memo[str] = self.fn(*args, **kwds)
 14 |         else:
 15 |             pass
 16 |         return self.memo[str]
 17 | 
 18 | 
 19 | def softmax(x):
 20 |     assert len(x.shape) == 1
 21 |     probs = np.exp(x - np.max(x))
 22 |     probs /= np.sum(probs)
 23 |     return probs
 24 | 
 25 | 
 26 | class TreeNode(object):
 27 |     def __init__(self, prior_prob, parent):
 28 |         self.parent = parent
 29 |         self.Q_ = 0.
 30 |         self.P_ = float(prior_prob)
 31 |         # action -> tree node
 32 |         self.children_ = {}
 33 |         self.n_visits_ = 0
 34 | 
 35 |     def expand(self, actions_and_probs):
 36 |         for action, prob in actions_and_probs:
 37 |             if action not in self.children_:
 38 |                 self.children_[action] = TreeNode(prob, self)
 39 | 
 40 |     def is_leaf(self):
 41 |         return self.children_ == {}
 42 | 
 43 |     def is_root(self):
 44 |         return self.parent is None
 45 | 
 46 |     def _update(self, value):
 47 |         self.n_visits_ += 1
 48 |         # not tracking W directly
 49 |         # original update is
 50 |         # n_visits += 1
 51 |         # W += v
 52 |         # Q = W / n_visits
 53 |         # so,
 54 |         # the old W = Q * (n_visits - 1)
 55 |         # new W = old W + v
 56 |         # new Q = new W / n_visits
 57 |         # plugging in new W
 58 |         # new Q = (old W + v) / n_visits
 59 |         # plugging in old W
 60 |         # new Q = (Q * (n_visits - 1) + v)/n_visits
 61 |         # new_Q = (Q * n_visits - Q + v)/n_visits
 62 |         # new_Q = Q * n_visits/n_visits - Q/n_visits + v/n_visits
 63 |         # new_Q = Q - Q/n_visits + v/n_visits
 64 |         # new_Q = Q + (v - Q) / n_visits
 65 |         # new_Q += (v - Q) / n_visits
 66 |         self.Q_ += (value - self.Q_) / float(self.n_visits_)
 67 | 
 68 |     def update(self, value):
 69 |         if self.parent != None:
 70 |             # negative in the original code due to being the opposing player
 71 |             self.parent.update(value)
 72 |         self._update(value)
 73 | 
 74 |     def get_value(self, c_puct):
 75 |         self.U_ = c_puct * self.P_ * np.sqrt(float(self.parent.n_visits_)) / float(1. + self.n_visits_)
 76 |         return self.Q_ + self.U_
 77 | 
 78 |     def get_best(self, c_puct):
 79 |         best = max(self.children_.iteritems(), key=lambda x: x[1].get_value(c_puct))
 80 |         return best
 81 | 
 82 | 
 83 | class MCTS(object):
 84 |     def __init__(self, state_manager, c_puct=1.4, n_playout=1000, random_state=None):
 85 |         if random_state is None:
 86 |             raise ValueError("Must pass random_state object")
 87 |         self.random_state = random_state
 88 |         self.root = TreeNode(1., None)
 89 |         # state manager must, itself have *NO* state / updating behavior
 90 |         # internally. Otherwise we need deepcopy() in get_move_probs
 91 |         self.state_manager = state_manager
 92 |         self.c_puct = c_puct
 93 |         self.n_playout = n_playout
 94 |         self.tree_subs_ = []
 95 |         self.warn_at_ = 10000
 96 | 
 97 |     def playout(self, state):
 98 |         node = self.root
 99 |         while True:
100 |             if node.is_leaf():
101 |                 break
102 |             action, node = node.get_best(self.c_puct)
103 |             state = self.state_manager.get_next_state(state, action)
104 |         winner, score, end = self.state_manager.is_finished(state)
105 |         if not end:
106 |             # uniform prior probs
107 |             actions = self.state_manager.get_valid_actions(state)
108 |             action_space = self.state_manager.get_action_space()
109 |             probs = np.ones((len(actions))) / float(len(actions))
110 |             actions_and_probs = list(zip(actions, probs))
111 |             node.expand(actions_and_probs)
112 |         value = self.state_manager.rollout_from_state(state)
113 |         # negative here
114 |         node.update(value)
115 |         return None
116 | 
117 |     def get_action_probs(self, state, temp=1E-3):
118 |         # low temp -> nearly argmax
119 |         for n in range(self.n_playout):
120 |             self.playout(state)
121 | 
122 |         act_visits = [(act, node.n_visits_) for act, node in self.root.children_.items()]
123 |         if len(act_visits) == 0:
124 |             return None, None
125 |         actions, visits = zip(*act_visits)
126 |         action_probs = softmax(1. / temp * np.log(visits))
127 |         return actions, action_probs
128 | 
129 |     def sample_action(self, state, temp=1E-3, add_noise=True,
130 |                       dirichlet_coeff1=0.25, dirichlet_coeff2=0.3):
131 |         vsz = len(self.state_manager.get_action_space())
132 |         act_probs = np.zeros((vsz,))
133 |         acts, probs = self.get_action_probs(state, temp)
134 |         if acts == None:
135 |             return acts, probs
136 |         act_probs[list(acts)] = probs
137 |         if add_noise:
138 |             act = self.random_state.choice(acts, p=(1. - dirichlet_coeff1) * probs + dirichlet_coeff1 * self.random_state.dirichlet(dirichlet_coeff2 * np.ones(len(probs))))
139 |         else:
140 |             act = self.random_state.choice(acts, p=probs)
141 |         return act, act_probs
142 | 
143 |     def get_action(self, state):
144 |         vsz = len(self.state_manager.get_action_space())
145 |         act_probs = np.zeros((vsz,))
146 |         # temp doesn't matter for argmax
147 |         acts, probs = self.get_action_probs(state, temp=1.)
148 |         if acts == None:
149 |             return acts, probs
150 |         act_probs[list(acts)] = probs
151 |         maxes = np.max(act_probs)
152 |         opts = np.where(act_probs == maxes)[0]
153 |         if len(opts) > 1:
154 |             # choose the one with the highest win score if equal?
155 |             # if 2 options are *exactly* equal, just choose 1 at random
156 |             self.random_state.shuffle(opts)
157 |         act = opts[0]
158 |         return act, act_probs
159 | 
160 |     def update_tree_root(self, action):
161 |         if action in self.root.children_:
162 |             self.tree_subs_.append((self.root, self.root.children_[action]))
163 |             if len(self.tree_subs_) > self.warn_at_:
164 |                 print("WARNING: Over {} tree_subs_ detected, watch memory".format(self.warn_at_))
165 |                 # only print the warning a few times
166 |                 self.warn_at_ = 10 * self.warn_at_
167 |             self.root = self.root.children_[action]
168 |             self.root.parent = None
169 |         else:
170 |             raise ValueError("Action argument {} neither in root.children_ {} and not == -1 (reset)".format(self.root.children_.keys()))
171 | 
172 |     def reconstruct_tree(self):
173 |         # walk the list back to front, putting parents back in place
174 |         # should reconstruct tree while still preserving counts...
175 |         # this might be a bad idea for large state spaces
176 |         for pair in self.tree_subs_[::-1]:
177 |             self.root.parent = pair[0]
178 |             self.root = pair[0]
179 |         self.tree_subs_ = []
180 | 
181 |     def reset_tree(self):
182 |         print("Resetting tree")
183 |         self.root = TreeNode(1., None)
184 |         self.tree_subs_ = []
185 | 


--------------------------------------------------------------------------------
/audio/audio_tools.py:
--------------------------------------------------------------------------------
   1 | # License: BSD 3-clause
   2 | # Authors: Kyle Kastner
   3 | # LTSD routine from jfsantos (Joao Felipe Santos)
   4 | # Harvest, Cheaptrick, D4C, WORLD routines based on MATLAB code from M. Morise
   5 | # http://ml.cs.yamanashi.ac.jp/world/english/
   6 | # MGC code based on r9y9 (Ryuichi Yamamoto) MelGeneralizedCepstrums.jl
   7 | # Pieces also adapted from SPTK
   8 | from __future__ import division
   9 | import numpy as np
  10 | import scipy as sp
  11 | from numpy.lib.stride_tricks import as_strided
  12 | import scipy.signal as sg
  13 | from scipy.interpolate import interp1d
  14 | import wave
  15 | from scipy.cluster.vq import vq
  16 | from scipy import linalg, fftpack
  17 | from numpy.testing import assert_almost_equal
  18 | from scipy.linalg import svd
  19 | from scipy.io import wavfile
  20 | from scipy.signal import firwin
  21 | import zipfile
  22 | import tarfile
  23 | import os
  24 | import copy
  25 | import multiprocessing
  26 | from multiprocessing import Pool
  27 | import functools
  28 | import time
  29 | try:
  30 |     import urllib.request as urllib  # for backwards compatibility
  31 | except ImportError:
  32 |     import urllib2 as urllib
  33 | 
  34 | 
  35 | def download(url, server_fname, local_fname=None, progress_update_percentage=5,
  36 |              bypass_certificate_check=False):
  37 |     """
  38 |     An internet download utility modified from
  39 |     http://stackoverflow.com/questions/22676/
  40 |     how-do-i-download-a-file-over-http-using-python/22776#22776
  41 |     """
  42 |     if bypass_certificate_check:
  43 |         import ssl
  44 |         ctx = ssl.create_default_context()
  45 |         ctx.check_hostname = False
  46 |         ctx.verify_mode = ssl.CERT_NONE
  47 |         u = urllib.urlopen(url, context=ctx)
  48 |     else:
  49 |         u = urllib.urlopen(url)
  50 |     if local_fname is None:
  51 |         local_fname = server_fname
  52 |     full_path = local_fname
  53 |     meta = u.info()
  54 |     with open(full_path, 'wb') as f:
  55 |         try:
  56 |             file_size = int(meta.get("Content-Length"))
  57 |         except TypeError:
  58 |             print("WARNING: Cannot get file size, displaying bytes instead!")
  59 |             file_size = 100
  60 |         print("Downloading: %s Bytes: %s" % (server_fname, file_size))
  61 |         file_size_dl = 0
  62 |         block_sz = int(1E7)
  63 |         p = 0
  64 |         while True:
  65 |             buffer = u.read(block_sz)
  66 |             if not buffer:
  67 |                 break
  68 |             file_size_dl += len(buffer)
  69 |             f.write(buffer)
  70 |             if (file_size_dl * 100. / file_size) > p:
  71 |                 status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl *
  72 |                                                100. / file_size)
  73 |                 print(status)
  74 |                 p += progress_update_percentage
  75 | 
  76 | 
  77 | def fetch_sample_speech_tapestry():
  78 |     url = "https://www.dropbox.com/s/qte66a7haqspq2g/tapestry.wav?dl=1"
  79 |     wav_path = "tapestry.wav"
  80 |     if not os.path.exists(wav_path):
  81 |         download(url, wav_path)
  82 |     fs, d = wavfile.read(wav_path)
  83 |     d = d.astype('float32') / (2 ** 15)
  84 |     # file is stereo? - just choose one channel
  85 |     return fs, d
  86 | 
  87 | 
  88 | def fetch_sample_file(wav_path):
  89 |     if not os.path.exists(wav_path):
  90 |         raise ValueError("Unable to find file at path %s" % wav_path)
  91 |     fs, d = wavfile.read(wav_path)
  92 |     d = d.astype('float32') / (2 ** 15)
  93 |     # file is stereo - just choose one channel
  94 |     if len(d.shape) > 1:
  95 |         d = d[:, 0]
  96 |     return fs, d
  97 | 
  98 | 
  99 | def fetch_sample_music():
 100 |     url = "http://www.music.helsinki.fi/tmt/opetus/uusmedia/esim/"
 101 |     url += "a2002011001-e02-16kHz.wav"
 102 |     wav_path = "test.wav"
 103 |     if not os.path.exists(wav_path):
 104 |         download(url, wav_path)
 105 |     fs, d = wavfile.read(wav_path)
 106 |     d = d.astype('float32') / (2 ** 15)
 107 |     # file is stereo - just choose one channel
 108 |     d = d[:, 0]
 109 |     return fs, d
 110 | 
 111 | 
 112 | def fetch_sample_speech_fruit(n_samples=None):
 113 |     url = 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz'
 114 |     wav_path = "audio.tar.gz"
 115 |     if not os.path.exists(wav_path):
 116 |         download(url, wav_path)
 117 |     tf = tarfile.open(wav_path)
 118 |     wav_names = [fname for fname in tf.getnames()
 119 |                  if ".wav" in fname.split(os.sep)[-1]]
 120 |     speech = []
 121 |     print("Loading speech files...")
 122 |     for wav_name in wav_names[:n_samples]:
 123 |         f = tf.extractfile(wav_name)
 124 |         fs, d = wavfile.read(f)
 125 |         d = d.astype('float32') / (2 ** 15)
 126 |         speech.append(d)
 127 |     return fs, speech
 128 | 
 129 | 
 130 | def fetch_sample_speech_eustace(n_samples=None):
 131 |     """
 132 |     http://www.cstr.ed.ac.uk/projects/eustace/download.html
 133 |     """
 134 |     # data
 135 |     url = "http://www.cstr.ed.ac.uk/projects/eustace/down/eustace_wav.zip"
 136 |     wav_path = "eustace_wav.zip"
 137 |     if not os.path.exists(wav_path):
 138 |         download(url, wav_path)
 139 | 
 140 |     # labels
 141 |     url = "http://www.cstr.ed.ac.uk/projects/eustace/down/eustace_labels.zip"
 142 |     labels_path = "eustace_labels.zip"
 143 |     if not os.path.exists(labels_path):
 144 |         download(url, labels_path)
 145 | 
 146 |     # Read wavfiles
 147 |     # 16 kHz wav
 148 |     zf = zipfile.ZipFile(wav_path, 'r')
 149 |     wav_names = [fname for fname in zf.namelist()
 150 |                  if ".wav" in fname.split(os.sep)[-1]]
 151 |     fs = 16000
 152 |     speech = []
 153 |     print("Loading speech files...")
 154 |     for wav_name in wav_names[:n_samples]:
 155 |         wav_str = zf.read(wav_name)
 156 |         d = np.frombuffer(wav_str, dtype=np.int16)
 157 |         d = d.astype('float32') / (2 ** 15)
 158 |         speech.append(d)
 159 | 
 160 |     zf = zipfile.ZipFile(labels_path, 'r')
 161 |     label_names = [fname for fname in zf.namelist()
 162 |                    if ".lab" in fname.split(os.sep)[-1]]
 163 |     labels = []
 164 |     print("Loading label files...")
 165 |     for label_name in label_names[:n_samples]:
 166 |         label_file_str = zf.read(label_name)
 167 |         labels.append(label_file_str)
 168 |     return fs, speech
 169 | 
 170 | 
 171 | def stft(X, fftsize=128, step="half", mean_normalize=True, real=False,
 172 |          compute_onesided=True):
 173 |     """
 174 |     Compute STFT for 1D real valued input X
 175 |     """
 176 |     if real:
 177 |         local_fft = fftpack.rfft
 178 |         cut = -1
 179 |     else:
 180 |         local_fft = fftpack.fft
 181 |         cut = None
 182 |     if compute_onesided:
 183 |         cut = fftsize // 2 + 1
 184 |     if mean_normalize:
 185 |         X -= X.mean()
 186 |     if step == "half":
 187 |         X = halfoverlap(X, fftsize)
 188 |     else:
 189 |         X = overlap(X, fftsize, step)
 190 |     size = fftsize
 191 |     win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(size) / (size - 1))
 192 |     X = X * win[None]
 193 |     X = local_fft(X)[:, :cut]
 194 |     return X
 195 | 
 196 | 
 197 | def istft(X, fftsize=128, step="half", wsola=False, mean_normalize=True,
 198 |           real=False, compute_onesided=True):
 199 |     """
 200 |     Compute ISTFT for STFT transformed X
 201 |     """
 202 |     if real:
 203 |         local_ifft = fftpack.irfft
 204 |         X_pad = np.zeros((X.shape[0], X.shape[1] + 1)) + 0j
 205 |         X_pad[:, :-1] = X
 206 |         X = X_pad
 207 |     else:
 208 |         local_ifft = fftpack.ifft
 209 |     if compute_onesided:
 210 |         X_pad = np.zeros((X.shape[0], 2 * X.shape[1])) + 0j
 211 |         X_pad[:, :fftsize // 2 + 1] = X
 212 |         X_pad[:, fftsize // 2 + 1:] = 0
 213 |         X = X_pad
 214 |     X = local_ifft(X).astype("float64")
 215 |     if step == "half":
 216 |         X = invert_halfoverlap(X)
 217 |     else:
 218 |         X = overlap_add(X, step, wsola=wsola)
 219 |     if mean_normalize:
 220 |         X -= np.mean(X)
 221 |     return X
 222 | 
 223 | 
 224 | def mdct_slow(X, dctsize=128):
 225 |     M = dctsize
 226 |     N = 2 * dctsize
 227 |     N_0 = (M + 1) / 2
 228 |     X = halfoverlap(X, N)
 229 |     X = sine_window(X)
 230 |     n, k = np.meshgrid(np.arange(N), np.arange(M))
 231 |     # Use transpose due to "samples as rows" convention
 232 |     tf = np.cos(np.pi * (n + N_0) * (k + 0.5) / M).T
 233 |     return np.dot(X, tf)
 234 | 
 235 | 
 236 | def imdct_slow(X, dctsize=128):
 237 |     M = dctsize
 238 |     N = 2 * dctsize
 239 |     N_0 = (M + 1) / 2
 240 |     N_4 = N / 4
 241 |     n, k = np.meshgrid(np.arange(N), np.arange(M))
 242 |     # inverse *is not* transposed
 243 |     tf = np.cos(np.pi * (n + N_0) * (k + 0.5) / M)
 244 |     X_r = np.dot(X, tf) / N_4
 245 |     X_r = sine_window(X_r)
 246 |     X = invert_halfoverlap(X_r)
 247 |     return X
 248 | 
 249 | 
 250 | def nsgcwin(fmin, fmax, n_bins, fs, signal_len, gamma):
 251 |     """
 252 |     Nonstationary Gabor window calculation
 253 | 
 254 |     References
 255 |     ----------
 256 |     Velasco G. A., Holighaus N., Dorfler M., Grill T.
 257 |     Constructing an invertible constant-Q transform with nonstationary Gabor
 258 |     frames, Proceedings of the 14th International Conference on Digital
 259 |     Audio Effects (DAFx 11), Paris, France, 2011
 260 | 
 261 |     Holighaus N., Dorfler M., Velasco G. A. and Grill T.
 262 |     A framework for invertible, real-time constant-Q transforms, submitted.
 263 | 
 264 |     Original matlab code copyright follows:
 265 | 
 266 |     AUTHOR(s) : Monika Dorfler, Gino Angelo Velasco, Nicki Holighaus, 2010-2011
 267 | 
 268 |     COPYRIGHT : (c) NUHAG, Dept.Math., University of Vienna, AUSTRIA
 269 |     http://nuhag.eu/
 270 |     Permission is granted to modify and re-distribute this
 271 |     code in any manner as long as this notice is preserved.
 272 |     All standard disclaimers apply.
 273 |     """
 274 |     # use a hanning window
 275 |     # no fractional shifts
 276 |     fftres = fs / signal_len
 277 |     fmin = float(fmin)
 278 |     fmax = float(fmax)
 279 |     gamma = float(gamma)
 280 |     nyq = fs / 2.
 281 |     b = np.floor(n_bins * np.log2(fmax / fmin))
 282 |     fbas = fmin * 2 ** (np.arange(b + 1) / float(n_bins))
 283 |     Q = 2 ** (1. / n_bins) - 2 ** (-1. / n_bins)
 284 |     cqtbw = Q * fbas + gamma
 285 |     cqtbw = cqtbw.ravel()
 286 |     maxidx = np.where(fbas + cqtbw / 2. > nyq)[0]
 287 |     if len(maxidx) > 0:
 288 |         # replicate bug in MATLAB version...
 289 |         # or is it a feature
 290 |         if sum(maxidx) == 0:
 291 |             first = len(cqtbw) - 1
 292 |         else:
 293 |             first = maxidx[0]
 294 |         fbas = fbas[:first]
 295 |         cqtbw = cqtbw[:first]
 296 |     minidx = np.where(fbas - cqtbw / 2. < 0)[0]
 297 |     if len(minidx) > 0:
 298 |         fbas = fbas[minidx[-1]+1:]
 299 |         cqtbw = cqtbw[minidx[-1]+1:]
 300 | 
 301 |     fbas_len = len(fbas)
 302 |     fbas_new = np.zeros((2 * (len(fbas) + 1)))
 303 |     fbas_new[1:len(fbas) + 1] = fbas
 304 |     fbas = fbas_new
 305 |     fbas[fbas_len + 1] = nyq
 306 |     fbas[fbas_len + 2:] = fs - fbas[1:fbas_len + 1][::-1]
 307 |     bw = np.zeros_like(fbas)
 308 |     bw[0] = 2 * fmin
 309 |     bw[1:len(cqtbw) + 1] = cqtbw
 310 |     bw[len(cqtbw) + 1] = fbas[fbas_len + 2] - fbas[fbas_len]
 311 |     bw[-len(cqtbw):] = cqtbw[::-1]
 312 |     bw = bw / fftres
 313 |     fbas = fbas / fftres
 314 | 
 315 |     posit = np.zeros_like(fbas)
 316 |     posit[:fbas_len + 2] = np.floor(fbas[:fbas_len + 2])
 317 |     posit[fbas_len + 2:] = np.ceil(fbas[fbas_len + 2:])
 318 |     base_shift = -posit[-1] % signal_len
 319 |     shift = np.zeros_like(posit).astype("int32")
 320 |     shift[1:] = (posit[1:] - posit[:-1]).astype("int32")
 321 |     shift[0] = base_shift
 322 | 
 323 |     bw = np.round(bw)
 324 |     bwfac = 1
 325 |     M = bw
 326 | 
 327 |     min_win = 4
 328 |     for ii in range(len(bw)):
 329 |         if bw[ii] < min_win:
 330 |             bw[ii] = min_win
 331 |             M[ii] = bw[ii]
 332 | 
 333 |     def _win(numel):
 334 |         if numel % 2 == 0:
 335 |             s1 = np.arange(0, .5, 1. / numel)
 336 |             if len(s1) != numel // 2:
 337 |                 # edge case with small floating point numbers...
 338 |                 s1 = s1[:-1]
 339 |             s2 = np.arange(-.5, 0, 1. / numel)
 340 |             if len(s2) != numel // 2:
 341 |                 # edge case with small floating point numbers...
 342 |                 s2 = s2[:-1]
 343 |             x = np.concatenate((s1, s2))
 344 |         else:
 345 |             s1 = np.arange(0, .5, 1. / numel)
 346 |             s2 = np.arange(-.5 + .5 / numel, 0, 1. / numel)
 347 |             if len(s2) != numel // 2:  # assume integer truncate 27 // 2 = 13
 348 |                 s2 = s2[:-1]
 349 |             x = np.concatenate((s1, s2))
 350 |         assert len(x) == numel
 351 |         g = .5 + .5 * np.cos(2 * np.pi * x)
 352 |         return g
 353 | 
 354 |     multiscale = [_win(bi) for bi in bw]
 355 |     bw = bwfac * np.ceil(M / bwfac)
 356 | 
 357 |     for kk in [0, fbas_len + 1]:
 358 |         if M[kk] > M[kk + 1]:
 359 |             multiscale[kk] = np.ones(M[kk]).astype(multiscale[0].dtype)
 360 |             i1 = np.floor(M[kk] / 2) - np.floor(M[kk + 1] / 2)
 361 |             i2 = np.floor(M[kk] / 2) + np.ceil(M[kk + 1] / 2)
 362 |             # Very rarely, gets an off by 1 error? Seems to be at the end...
 363 |             # for now, slice
 364 |             multiscale[kk][i1:i2] = _win(M[kk + 1])
 365 |             multiscale[kk] = multiscale[kk] / np.sqrt(M[kk])
 366 |     return multiscale, shift, M
 367 | 
 368 | 
 369 | def nsgtf_real(X, multiscale, shift, window_lens):
 370 |     """
 371 |     Nonstationary Gabor Transform for real values
 372 | 
 373 |     References
 374 |     ----------
 375 |     Velasco G. A., Holighaus N., Dorfler M., Grill T.
 376 |     Constructing an invertible constant-Q transform with nonstationary Gabor
 377 |     frames, Proceedings of the 14th International Conference on Digital
 378 |     Audio Effects (DAFx 11), Paris, France, 2011
 379 | 
 380 |     Holighaus N., Dorfler M., Velasco G. A. and Grill T.
 381 |     A framework for invertible, real-time constant-Q transforms, submitted.
 382 | 
 383 |     Original matlab code copyright follows:
 384 | 
 385 |     AUTHOR(s) : Monika Dorfler, Gino Angelo Velasco, Nicki Holighaus, 2010-2011
 386 | 
 387 |     COPYRIGHT : (c) NUHAG, Dept.Math., University of Vienna, AUSTRIA
 388 |     http://nuhag.eu/
 389 |     Permission is granted to modify and re-distribute this
 390 |     code in any manner as long as this notice is preserved.
 391 |     All standard disclaimers apply.
 392 |     """
 393 |     # This will break with multchannel input
 394 |     signal_len = len(X)
 395 |     N = len(shift)
 396 |     X_fft = np.fft.fft(X)
 397 | 
 398 |     fill = np.sum(shift) - signal_len
 399 |     if fill > 0:
 400 |         X_fft_tmp = np.zeros((signal_len + shift))
 401 |         X_fft_tmp[:len(X_fft)] = X_fft
 402 |         X_fft = X_fft_tmp
 403 |     posit = np.cumsum(shift) - shift[0]
 404 |     scale_lens = np.array([len(m) for m in multiscale])
 405 |     N = np.where(posit - np.floor(scale_lens) <= (signal_len + fill) / 2)[0][-1]
 406 |     c = []
 407 |     # c[0] is almost exact
 408 |     for ii in range(N):
 409 |         idx_l = np.arange(np.ceil(scale_lens[ii] / 2), scale_lens[ii])
 410 |         idx_r = np.arange(np.ceil(scale_lens[ii] / 2))
 411 |         idx = np.concatenate((idx_l, idx_r))
 412 |         idx = idx.astype("int32")
 413 |         subwin_range = posit[ii] + np.arange(-np.floor(scale_lens[ii] / 2),
 414 |                                              np.ceil(scale_lens[ii] / 2))
 415 |         win_range = subwin_range % (signal_len + fill)
 416 |         win_range = win_range.astype("int32")
 417 |         if window_lens[ii] < scale_lens[ii]:
 418 |             raise ValueError("Not handling 'not enough channels' case")
 419 |         else:
 420 |             temp = np.zeros((window_lens[ii],)).astype(X_fft.dtype)
 421 |             temp_idx_l = np.arange(len(temp) - np.floor(scale_lens[ii] / 2),
 422 |                                    len(temp))
 423 |             temp_idx_r = np.arange(np.ceil(scale_lens[ii] / 2))
 424 |             temp_idx = np.concatenate((temp_idx_l, temp_idx_r))
 425 |             temp_idx = temp_idx.astype("int32")
 426 |             temp[temp_idx] = X_fft[win_range] * multiscale[ii][idx]
 427 |             fs_new_bins = window_lens[ii]
 428 |             fk_bins = posit[ii]
 429 |             displace = fk_bins - np.floor(fk_bins / fs_new_bins) * fs_new_bins
 430 |             displace = displace.astype("int32")
 431 |             temp = np.roll(temp, displace)
 432 |         c.append(np.fft.ifft(temp))
 433 | 
 434 |     if 0:
 435 |         # cell2mat concatenation
 436 |         c = np.concatenate(c)
 437 |     return c
 438 | 
 439 | 
 440 | def nsdual(multiscale, shift, window_lens):
 441 |     """
 442 |     Calculation of nonstationary inverse gabor filters
 443 | 
 444 |     References
 445 |     ----------
 446 |     Velasco G. A., Holighaus N., Dorfler M., Grill T.
 447 |     Constructing an invertible constant-Q transform with nonstationary Gabor
 448 |     frames, Proceedings of the 14th International Conference on Digital
 449 |     Audio Effects (DAFx 11), Paris, France, 2011
 450 | 
 451 |     Holighaus N., Dorfler M., Velasco G. A. and Grill T.
 452 |     A framework for invertible, real-time constant-Q transforms, submitted.
 453 | 
 454 |     Original matlab code copyright follows:
 455 | 
 456 |     AUTHOR(s) : Monika Dorfler, Gino Angelo Velasco, Nicki Holighaus, 2010-2011
 457 | 
 458 |     COPYRIGHT : (c) NUHAG, Dept.Math., University of Vienna, AUSTRIA
 459 |     http://nuhag.eu/
 460 |     Permission is granted to modify and re-distribute this
 461 |     code in any manner as long as this notice is preserved.
 462 |     All standard disclaimers apply.
 463 |     """
 464 |     N = len(shift)
 465 |     posit = np.cumsum(shift)
 466 |     seq_len = posit[-1]
 467 |     posit = posit - shift[0]
 468 | 
 469 |     diagonal = np.zeros((seq_len,))
 470 |     win_range = []
 471 | 
 472 |     for ii in range(N):
 473 |         filt_len = len(multiscale[ii])
 474 |         idx = np.arange(-np.floor(filt_len / 2), np.ceil(filt_len / 2))
 475 |         win_range.append((posit[ii] + idx) % seq_len)
 476 |         subdiag = window_lens[ii] * np.fft.fftshift(multiscale[ii]) ** 2
 477 |         ind = win_range[ii].astype(np.int)
 478 |         diagonal[ind] = diagonal[ind] + subdiag
 479 | 
 480 |     dual_multiscale = multiscale
 481 |     for ii in range(N):
 482 |         ind = win_range[ii].astype(np.int)
 483 |         dual_multiscale[ii] = np.fft.ifftshift(
 484 |             np.fft.fftshift(dual_multiscale[ii]) / diagonal[ind])
 485 |     return dual_multiscale
 486 | 
 487 | 
 488 | def nsgitf_real(c, c_dc, c_nyq, multiscale, shift):
 489 |     """
 490 |     Nonstationary Inverse Gabor Transform on real valued signal
 491 | 
 492 |     References
 493 |     ----------
 494 |     Velasco G. A., Holighaus N., Dorfler M., Grill T.
 495 |     Constructing an invertible constant-Q transform with nonstationary Gabor
 496 |     frames, Proceedings of the 14th International Conference on Digital
 497 |     Audio Effects (DAFx 11), Paris, France, 2011
 498 | 
 499 |     Holighaus N., Dorfler M., Velasco G. A. and Grill T.
 500 |     A framework for invertible, real-time constant-Q transforms, submitted.
 501 | 
 502 |     Original matlab code copyright follows:
 503 | 
 504 |     AUTHOR(s) : Monika Dorfler, Gino Angelo Velasco, Nicki Holighaus, 2010-2011
 505 | 
 506 |     COPYRIGHT : (c) NUHAG, Dept.Math., University of Vienna, AUSTRIA
 507 |     http://nuhag.eu/
 508 |     Permission is granted to modify and re-distribute this
 509 |     code in any manner as long as this notice is preserved.
 510 |     All standard disclaimers apply.
 511 |     """
 512 |     c_l = []
 513 |     c_l.append(c_dc)
 514 |     c_l.extend([ci for ci in c])
 515 |     c_l.append(c_nyq)
 516 | 
 517 |     posit = np.cumsum(shift)
 518 |     seq_len = posit[-1]
 519 |     posit -= shift[0]
 520 |     out = np.zeros((seq_len,)).astype(c_l[1].dtype)
 521 | 
 522 |     for ii in range(len(c_l)):
 523 |         filt_len = len(multiscale[ii])
 524 |         win_range = posit[ii] + np.arange(-np.floor(filt_len / 2),
 525 |                                           np.ceil(filt_len / 2))
 526 |         win_range = (win_range % seq_len).astype(np.int)
 527 |         temp = np.fft.fft(c_l[ii]) * len(c_l[ii])
 528 | 
 529 |         fs_new_bins = len(c_l[ii])
 530 |         fk_bins = posit[ii]
 531 |         displace = int(fk_bins - np.floor(fk_bins / fs_new_bins) * fs_new_bins)
 532 |         temp = np.roll(temp, -displace)
 533 |         l = np.arange(len(temp) - np.floor(filt_len / 2), len(temp))
 534 |         r = np.arange(np.ceil(filt_len / 2))
 535 |         temp_idx = (np.concatenate((l, r)) % len(temp)).astype(np.int)
 536 |         temp = temp[temp_idx]
 537 |         lf = np.arange(filt_len - np.floor(filt_len / 2), filt_len)
 538 |         rf = np.arange(np.ceil(filt_len / 2))
 539 |         filt_idx = np.concatenate((lf, rf)).astype(np.int)
 540 |         m = multiscale[ii][filt_idx]
 541 |         out[win_range] = out[win_range] + m * temp
 542 | 
 543 |     nyq_bin = np.floor(seq_len / 2) + 1
 544 |     out_idx = np.arange(
 545 |         nyq_bin - np.abs(1 - seq_len % 2) - 1, 0, -1).astype(np.int)
 546 |     out[nyq_bin:] = np.conj(out[out_idx])
 547 |     t_out = np.real(np.fft.ifft(out)).astype(np.float64)
 548 |     return t_out
 549 | 
 550 | 
 551 | def cqt(X, fs, n_bins=48, fmin=27.5, fmax="nyq", gamma=20):
 552 |     """
 553 |     Constant Q Transform
 554 | 
 555 |     References
 556 |     ----------
 557 |     Velasco G. A., Holighaus N., Dorfler M., Grill T.
 558 |     Constructing an invertible constant-Q transform with nonstationary Gabor
 559 |     frames, Proceedings of the 14th International Conference on Digital
 560 |     Audio Effects (DAFx 11), Paris, France, 2011
 561 | 
 562 |     Holighaus N., Dorfler M., Velasco G. A. and Grill T.
 563 |     A framework for invertible, real-time constant-Q transforms, submitted.
 564 | 
 565 |     Original matlab code copyright follows:
 566 | 
 567 |     AUTHOR(s) : Monika Dorfler, Gino Angelo Velasco, Nicki Holighaus, 2010-2011
 568 | 
 569 |     COPYRIGHT : (c) NUHAG, Dept.Math., University of Vienna, AUSTRIA
 570 |     http://nuhag.eu/
 571 |     Permission is granted to modify and re-distribute this
 572 |     code in any manner as long as this notice is preserved.
 573 |     All standard disclaimers apply.
 574 |     """
 575 |     if fmax == "nyq":
 576 |         fmax = fs / 2.
 577 |     multiscale, shift, window_lens = nsgcwin(fmin, fmax, n_bins, fs,
 578 |                                              len(X), gamma)
 579 |     fbas = fs * np.cumsum(shift[1:]) / len(X)
 580 |     fbas = fbas[:len(window_lens) // 2 - 1]
 581 |     bins = window_lens.shape[0] // 2 - 1
 582 |     window_lens[1:bins + 1] = window_lens[bins + 2]
 583 |     window_lens[bins + 2:] = window_lens[1:bins + 1][::-1]
 584 |     norm = 2. * window_lens[:bins + 2] / float(len(X))
 585 |     norm = np.concatenate((norm, norm[1:-1][::-1]))
 586 |     multiscale = [norm[ii] * multiscale[ii] for ii in range(2 * (bins + 1))]
 587 | 
 588 |     c = nsgtf_real(X, multiscale, shift, window_lens)
 589 |     c_dc = c[0]
 590 |     c_nyq = c[-1]
 591 |     c_sub = c[1:-1]
 592 |     c = np.vstack(c_sub)
 593 |     return c, c_dc, c_nyq, multiscale, shift, window_lens
 594 | 
 595 | 
 596 | def icqt(X_cq, c_dc, c_nyq, multiscale, shift, window_lens):
 597 |     """
 598 |     Inverse constant Q Transform
 599 | 
 600 |     References
 601 |     ----------
 602 |     Velasco G. A., Holighaus N., Dorfler M., Grill T.
 603 |     Constructing an invertible constant-Q transform with nonstationary Gabor
 604 |     frames, Proceedings of the 14th International Conference on Digital
 605 |     Audio Effects (DAFx 11), Paris, France, 2011
 606 | 
 607 |     Holighaus N., Dorfler M., Velasco G. A. and Grill T.
 608 |     A framework for invertible, real-time constant-Q transforms, submitted.
 609 | 
 610 |     Original matlab code copyright follows:
 611 | 
 612 |     AUTHOR(s) : Monika Dorfler, Gino Angelo Velasco, Nicki Holighaus, 2010-2011
 613 | 
 614 |     COPYRIGHT : (c) NUHAG, Dept.Math., University of Vienna, AUSTRIA
 615 |     http://nuhag.eu/
 616 |     Permission is granted to modify and re-distribute this
 617 |     code in any manner as long as this notice is preserved.
 618 |     All standard disclaimers apply.
 619 |     """
 620 |     new_multiscale = nsdual(multiscale, shift, window_lens)
 621 |     X = nsgitf_real(X_cq, c_dc, c_nyq, new_multiscale, shift)
 622 |     return X
 623 | 
 624 | 
 625 | def rolling_mean(X, window_size):
 626 |     w = 1.0 / window_size * np.ones((window_size))
 627 |     return np.correlate(X, w, 'valid')
 628 | 
 629 | 
 630 | def rolling_window(X, window_size):
 631 |     # for 1d data
 632 |     shape = X.shape[:-1] + (X.shape[-1] - window_size + 1, window_size)
 633 |     strides = X.strides + (X.strides[-1],)
 634 |     return np.lib.stride_tricks.as_strided(X, shape=shape, strides=strides)
 635 | 
 636 | 
 637 | def voiced_unvoiced(X, window_size=256, window_step=128, copy=True):
 638 |     """
 639 |     Voiced unvoiced detection from a raw signal
 640 | 
 641 |     Based on code from:
 642 |         https://www.clear.rice.edu/elec532/PROJECTS96/lpc/code.html
 643 | 
 644 |     Other references:
 645 |         http://www.seas.ucla.edu/spapl/code/harmfreq_MOLRT_VAD.m
 646 | 
 647 |     Parameters
 648 |     ----------
 649 |     X : ndarray
 650 |         Raw input signal
 651 | 
 652 |     window_size : int, optional (default=256)
 653 |         The window size to use, in samples.
 654 | 
 655 |     window_step : int, optional (default=128)
 656 |         How far the window steps after each calculation, in samples.
 657 | 
 658 |     copy : bool, optional (default=True)
 659 |         Whether to make a copy of the input array or allow in place changes.
 660 |     """
 661 |     X = np.array(X, copy=copy)
 662 |     if len(X.shape) < 2:
 663 |         X = X[None]
 664 |     n_points = X.shape[1]
 665 |     n_windows = n_points // window_step
 666 |     # Padding
 667 |     pad_sizes = [(window_size - window_step) // 2,
 668 |                  window_size - window_step // 2]
 669 |     # TODO: Handling for odd window sizes / steps
 670 |     X = np.hstack((np.zeros((X.shape[0], pad_sizes[0])), X,
 671 |                    np.zeros((X.shape[0], pad_sizes[1]))))
 672 | 
 673 |     clipping_factor = 0.6
 674 |     b, a = sg.butter(10, np.pi * 9 / 40)
 675 |     voiced_unvoiced = np.zeros((n_windows, 1))
 676 |     period = np.zeros((n_windows, 1))
 677 |     for window in range(max(n_windows - 1, 1)):
 678 |         XX = X.ravel()[window * window_step + np.arange(window_size)]
 679 |         XX *= sg.hamming(len(XX))
 680 |         XX = sg.lfilter(b, a, XX)
 681 |         left_max = np.max(np.abs(XX[:len(XX) // 3]))
 682 |         right_max = np.max(np.abs(XX[-len(XX) // 3:]))
 683 |         clip_value = clipping_factor * np.min([left_max, right_max])
 684 |         XX_clip = np.clip(XX, clip_value, -clip_value)
 685 |         XX_corr = np.correlate(XX_clip, XX_clip, mode='full')
 686 |         center = np.argmax(XX_corr)
 687 |         right_XX_corr = XX_corr[center:]
 688 |         prev_window = max([window - 1, 0])
 689 |         if voiced_unvoiced[prev_window] > 0:
 690 |             # Want it to be harder to turn off than turn on
 691 |             strength_factor = .29
 692 |         else:
 693 |             strength_factor = .3
 694 |         start = np.where(right_XX_corr < .3 * XX_corr[center])[0]
 695 |         # 20 is hardcoded but should depend on samplerate?
 696 |         try:
 697 |             start = np.max([20, start[0]])
 698 |         except IndexError:
 699 |             start = 20
 700 |         search_corr = right_XX_corr[start:]
 701 |         index = np.argmax(search_corr)
 702 |         second_max = search_corr[index]
 703 |         if (second_max > strength_factor * XX_corr[center]):
 704 |             voiced_unvoiced[window] = 1
 705 |             period[window] = start + index - 1
 706 |         else:
 707 |             voiced_unvoiced[window] = 0
 708 |             period[window] = 0
 709 |     return np.array(voiced_unvoiced), np.array(period)
 710 | 
 711 | 
 712 | def lpc_analysis(X, order=8, window_step=128, window_size=2 * 128,
 713 |                  emphasis=0.9, voiced_start_threshold=.9,
 714 |                  voiced_stop_threshold=.6, truncate=False, copy=True):
 715 |     """
 716 |     Extract LPC coefficients from a signal
 717 | 
 718 |     Based on code from:
 719 |         http://labrosa.ee.columbia.edu/matlab/sws/
 720 | 
 721 |     _rParameters
 722 |     ----------
 723 |     X : ndarray
 724 |         Signals to extract LPC coefficients from
 725 | 
 726 |     order : int, optional (default=8)
 727 |         Order of the LPC coefficients. For speech, use the general rule that the
 728 |         order is two times the expected number of formants plus 2.
 729 |         This can be formulated as 2 + 2 * (fs // 2000). For approx. signals
 730 |         with fs = 7000, this is 8 coefficients - 2 + 2 * (7000 // 2000).
 731 | 
 732 |     window_step : int, optional (default=128)
 733 |         The size (in samples) of the space between each window
 734 | 
 735 |     window_size : int, optional (default=2 * 128)
 736 |         The size of each window (in samples) to extract coefficients over
 737 | 
 738 |     emphasis : float, optional (default=0.9)
 739 |         The emphasis coefficient to use for filtering
 740 | 
 741 |     voiced_start_threshold : float, optional (default=0.9)
 742 |         Upper power threshold for estimating when speech has started
 743 | 
 744 |     voiced_stop_threshold : float, optional (default=0.6)
 745 |         Lower power threshold for estimating when speech has stopped
 746 | 
 747 |     truncate : bool, optional (default=False)
 748 |         Whether to cut the data at the last window or do zero padding.
 749 | 
 750 |     copy : bool, optional (default=True)
 751 |         Whether to copy the input X or modify in place
 752 | 
 753 |     Returns
 754 |     -------
 755 |     lp_coefficients : ndarray
 756 |         lp coefficients to describe the frame
 757 | 
 758 |     per_frame_gain : ndarray
 759 |         calculated gain for each frame
 760 | 
 761 |     residual_excitation : ndarray
 762 |         leftover energy which is not described by lp coefficents and gain
 763 | 
 764 |     voiced_frames : ndarray
 765 |         array of [0, 1] values which holds voiced/unvoiced decision for each
 766 |         frame.
 767 | 
 768 |     References
 769 |     ----------
 770 |     D. P. W. Ellis (2004), "Sinewave Speech Analysis/Synthesis in Matlab",
 771 |     Web resource, available: http://www.ee.columbia.edu/ln/labrosa/matlab/sws/
 772 |     """
 773 |     X = np.array(X, copy=copy)
 774 |     if len(X.shape) < 2:
 775 |         X = X[None]
 776 | 
 777 |     n_points = X.shape[1]
 778 |     n_windows = int(n_points // window_step)
 779 |     if not truncate:
 780 |         pad_sizes = [(window_size - window_step) // 2,
 781 |                      window_size - window_step // 2]
 782 |         # TODO: Handling for odd window sizes / steps
 783 |         X = np.hstack((np.zeros((X.shape[0], int(pad_sizes[0]))), X,
 784 |                        np.zeros((X.shape[0], int(pad_sizes[1])))))
 785 |     else:
 786 |         pad_sizes = [0, 0]
 787 |         X = X[0, :n_windows * window_step]
 788 | 
 789 |     lp_coefficients = np.zeros((n_windows, order + 1))
 790 |     per_frame_gain = np.zeros((n_windows, 1))
 791 |     residual_excitation = np.zeros(
 792 |         int(((n_windows - 1) * window_step + window_size)))
 793 |     # Pre-emphasis high-pass filter
 794 |     X = sg.lfilter([1, -emphasis], 1, X)
 795 |     # stride_tricks.as_strided?
 796 |     autocorr_X = np.zeros((n_windows, int(2 * window_size - 1)))
 797 |     for window in range(max(n_windows - 1, 1)):
 798 |         wtws = int(window * window_step)
 799 |         XX = X.ravel()[wtws + np.arange(window_size, dtype="int32")]
 800 |         WXX = XX * sg.hanning(window_size)
 801 |         autocorr_X[window] = np.correlate(WXX, WXX, mode='full')
 802 |         center = np.argmax(autocorr_X[window])
 803 |         RXX = autocorr_X[window,
 804 |                          np.arange(center, window_size + order, dtype="int32")]
 805 |         R = linalg.toeplitz(RXX[:-1])
 806 |         solved_R = linalg.pinv(R).dot(RXX[1:])
 807 |         filter_coefs = np.hstack((1, -solved_R))
 808 |         residual_signal = sg.lfilter(filter_coefs, 1, WXX)
 809 |         gain = np.sqrt(np.mean(residual_signal ** 2))
 810 |         lp_coefficients[window] = filter_coefs
 811 |         per_frame_gain[window] = gain
 812 |         assign_range = wtws + np.arange(window_size, dtype="int32")
 813 |         residual_excitation[assign_range] += residual_signal / gain
 814 |     # Throw away first part in overlap mode for proper synthesis
 815 |     residual_excitation = residual_excitation[int(pad_sizes[0]):]
 816 |     return lp_coefficients, per_frame_gain, residual_excitation
 817 | 
 818 | 
 819 | def lpc_to_frequency(lp_coefficients, per_frame_gain):
 820 |     """
 821 |     Extract resonant frequencies and magnitudes from LPC coefficients and gains.
 822 |     Parameters
 823 |     ----------
 824 |     lp_coefficients : ndarray
 825 |         LPC coefficients, such as those calculated by ``lpc_analysis``
 826 | 
 827 |     per_frame_gain : ndarray
 828 |        Gain calculated for each frame, such as those calculated
 829 |        by ``lpc_analysis``
 830 | 
 831 |     Returns
 832 |     -------
 833 |     frequencies : ndarray
 834 |        Resonant frequencies calculated from LPC coefficients and gain. Returned
 835 |        frequencies are from 0 to 2 * pi
 836 | 
 837 |     magnitudes : ndarray
 838 |        Magnitudes of resonant frequencies
 839 | 
 840 |     References
 841 |     ----------
 842 |     D. P. W. Ellis (2004), "Sinewave Speech Analysis/Synthesis in Matlab",
 843 |     Web resource, available: http://www.ee.columbia.edu/ln/labrosa/matlab/sws/
 844 |     """
 845 |     n_windows, order = lp_coefficients.shape
 846 | 
 847 |     frame_frequencies = np.zeros((n_windows, (order - 1) // 2))
 848 |     frame_magnitudes = np.zeros_like(frame_frequencies)
 849 | 
 850 |     for window in range(n_windows):
 851 |         w_coefs = lp_coefficients[window]
 852 |         g_coefs = per_frame_gain[window]
 853 |         roots = np.roots(np.hstack(([1], w_coefs[1:])))
 854 |         # Roots doesn't return the same thing as MATLAB... agh
 855 |         frequencies, index = np.unique(
 856 |             np.abs(np.angle(roots)), return_index=True)
 857 |         # Make sure 0 doesn't show up...
 858 |         gtz = np.where(frequencies > 0)[0]
 859 |         frequencies = frequencies[gtz]
 860 |         index = index[gtz]
 861 |         magnitudes = g_coefs / (1. - np.abs(roots))
 862 |         sort_index = np.argsort(frequencies)
 863 |         frame_frequencies[window, :len(sort_index)] = frequencies[sort_index]
 864 |         frame_magnitudes[window, :len(sort_index)] = magnitudes[sort_index]
 865 |     return frame_frequencies, frame_magnitudes
 866 | 
 867 | 
 868 | def lpc_to_lsf(all_lpc):
 869 |     if len(all_lpc.shape) < 2:
 870 |         all_lpc = all_lpc[None]
 871 |     order = all_lpc.shape[1] - 1
 872 |     all_lsf = np.zeros((len(all_lpc), order))
 873 |     for i in range(len(all_lpc)):
 874 |         lpc = all_lpc[i]
 875 |         lpc1 = np.append(lpc, 0)
 876 |         lpc2 = lpc1[::-1]
 877 |         sum_filt = lpc1 + lpc2
 878 |         diff_filt = lpc1 - lpc2
 879 | 
 880 |         if order % 2 != 0:
 881 |             deconv_diff, _ = sg.deconvolve(diff_filt, [1, 0, -1])
 882 |             deconv_sum = sum_filt
 883 |         else:
 884 |             deconv_diff, _ = sg.deconvolve(diff_filt, [1, -1])
 885 |             deconv_sum, _ = sg.deconvolve(sum_filt, [1, 1])
 886 | 
 887 |         roots_diff = np.roots(deconv_diff)
 888 |         roots_sum = np.roots(deconv_sum)
 889 |         angle_diff = np.angle(roots_diff[::2])
 890 |         angle_sum = np.angle(roots_sum[::2])
 891 |         lsf = np.sort(np.hstack((angle_diff, angle_sum)))
 892 |         if len(lsf) != 0:
 893 |             all_lsf[i] = lsf
 894 |     return np.squeeze(all_lsf)
 895 | 
 896 | 
 897 | def lsf_to_lpc(all_lsf):
 898 |     if len(all_lsf.shape) < 2:
 899 |         all_lsf = all_lsf[None]
 900 |     order = all_lsf.shape[1]
 901 |     all_lpc = np.zeros((len(all_lsf), order + 1))
 902 |     for i in range(len(all_lsf)):
 903 |         lsf = all_lsf[i]
 904 |         zeros = np.exp(1j * lsf)
 905 |         sum_zeros = zeros[::2]
 906 |         diff_zeros = zeros[1::2]
 907 |         sum_zeros = np.hstack((sum_zeros, np.conj(sum_zeros)))
 908 |         diff_zeros = np.hstack((diff_zeros, np.conj(diff_zeros)))
 909 |         sum_filt = np.poly(sum_zeros)
 910 |         diff_filt = np.poly(diff_zeros)
 911 | 
 912 |         if order % 2 != 0:
 913 |             deconv_diff = sg.convolve(diff_filt, [1, 0, -1])
 914 |             deconv_sum = sum_filt
 915 |         else:
 916 |             deconv_diff = sg.convolve(diff_filt, [1, -1])
 917 |             deconv_sum = sg.convolve(sum_filt, [1, 1])
 918 | 
 919 |         lpc = .5 * (deconv_sum + deconv_diff)
 920 |         # Last coefficient is 0 and not returned
 921 |         all_lpc[i] = lpc[:-1]
 922 |     return np.squeeze(all_lpc)
 923 | 
 924 | 
 925 | def lpc_synthesis(lp_coefficients, per_frame_gain, residual_excitation=None,
 926 |                   voiced_frames=None, window_step=128, emphasis=0.9):
 927 |     """
 928 |     Synthesize a signal from LPC coefficients
 929 | 
 930 |     Based on code from:
 931 |         http://labrosa.ee.columbia.edu/matlab/sws/
 932 |         http://web.uvic.ca/~tyoon/resource/auditorytoolbox/auditorytoolbox/synlpc.html
 933 | 
 934 |     Parameters
 935 |     ----------
 936 |     lp_coefficients : ndarray
 937 |         Linear prediction coefficients
 938 | 
 939 |     per_frame_gain : ndarray
 940 |         Gain coefficients
 941 | 
 942 |     residual_excitation : ndarray or None, optional (default=None)
 943 |         Residual excitations. If None, this will be synthesized with white noise
 944 | 
 945 |     voiced_frames : ndarray or None, optional (default=None)
 946 |         Voiced frames. If None, all frames assumed to be voiced.
 947 | 
 948 |     window_step : int, optional (default=128)
 949 |         The size (in samples) of the space between each window
 950 | 
 951 |     emphasis : float, optional (default=0.9)
 952 |         The emphasis coefficient to use for filtering
 953 | 
 954 |     overlap_add : bool, optional (default=True)
 955 |         What type of processing to use when joining windows
 956 | 
 957 |     copy : bool, optional (default=True)
 958 |        Whether to copy the input X or modify in place
 959 | 
 960 |     Returns
 961 |     -------
 962 |     synthesized : ndarray
 963 |         Sound vector synthesized from input arguments
 964 | 
 965 |     References
 966 |     ----------
 967 |     D. P. W. Ellis (2004), "Sinewave Speech Analysis/Synthesis in Matlab",
 968 |     Web resource, available: http://www.ee.columbia.edu/ln/labrosa/matlab/sws/
 969 |     """
 970 |     # TODO: Incorporate better synthesis from
 971 |     # http://eecs.oregonstate.edu/education/docs/ece352/CompleteManual.pdf
 972 |     window_size = 2 * window_step
 973 |     [n_windows, order] = lp_coefficients.shape
 974 | 
 975 |     n_points = (n_windows + 1) * window_step
 976 |     n_excitation_points = n_points + window_step + window_step // 2
 977 | 
 978 |     random_state = np.random.RandomState(1999)
 979 |     if residual_excitation is None:
 980 |         # Need to generate excitation
 981 |         if voiced_frames is None:
 982 |             # No voiced/unvoiced info
 983 |             voiced_frames = np.ones((lp_coefficients.shape[0], 1))
 984 |         residual_excitation = np.zeros((n_excitation_points))
 985 |         f, m = lpc_to_frequency(lp_coefficients, per_frame_gain)
 986 |         t = np.linspace(0, 1, window_size, endpoint=False)
 987 |         hanning = sg.hanning(window_size)
 988 |         for window in range(n_windows):
 989 |             window_base = window * window_step
 990 |             index = window_base + np.arange(window_size)
 991 |             if voiced_frames[window]:
 992 |                 sig = np.zeros_like(t)
 993 |                 cycles = np.cumsum(f[window][0] * t)
 994 |                 sig += sg.sawtooth(cycles, 0.001)
 995 |                 residual_excitation[index] += hanning * sig
 996 |             residual_excitation[index] += hanning * 0.01 * random_state.randn(
 997 |                 window_size)
 998 |     else:
 999 |         n_excitation_points = residual_excitation.shape[0]
1000 |         n_points = n_excitation_points + window_step + window_step // 2
1001 |     residual_excitation = np.hstack((residual_excitation,
1002 |                                      np.zeros(window_size)))
1003 |     if voiced_frames is None:
1004 |         voiced_frames = np.ones_like(per_frame_gain)
1005 | 
1006 |     synthesized = np.zeros((n_points))
1007 |     for window in range(n_windows):
1008 |         window_base = window * window_step
1009 |         oldbit = synthesized[window_base + np.arange(window_step)]
1010 |         w_coefs = lp_coefficients[window]
1011 |         if not np.all(w_coefs):
1012 |             # Hack to make lfilter avoid
1013 |             # ValueError: BUG: filter coefficient a[0] == 0 not supported yet
1014 |             # when all coeffs are 0
1015 |             w_coefs = [1]
1016 |         g_coefs = voiced_frames[window] * per_frame_gain[window]
1017 |         index = window_base + np.arange(window_size)
1018 |         newbit = g_coefs * sg.lfilter([1], w_coefs,
1019 |                                       residual_excitation[index])
1020 |         synthesized[index] = np.hstack((oldbit, np.zeros(
1021 |             (window_size - window_step))))
1022 |         synthesized[index] += sg.hanning(window_size) * newbit
1023 |     synthesized = sg.lfilter([1], [1, -emphasis], synthesized)
1024 |     return synthesized
1025 | 
1026 | 
1027 | def soundsc(X, gain_scale=.9, copy=True):
1028 |     """
1029 |     Approximate implementation of soundsc from MATLAB without the audio playing.
1030 | 
1031 |     Parameters
1032 |     ----------
1033 |     X : ndarray
1034 |         Signal to be rescaled
1035 | 
1036 |     gain_scale : float
1037 |         Gain multipler, default .9 (90% of maximum representation)
1038 | 
1039 |     copy : bool, optional (default=True)
1040 |         Whether to make a copy of input signal or operate in place.
1041 | 
1042 |     Returns
1043 |     -------
1044 |     X_sc : ndarray
1045 |         (-32767, 32767) scaled version of X as int16, suitable for writing
1046 |         with scipy.io.wavfile
1047 |     """
1048 |     X = np.array(X, copy=copy)
1049 |     X = (X - X.min()) / (X.max() - X.min())
1050 |     X = 2 * X - 1
1051 |     X = gain_scale * X
1052 |     X = X * 2 ** 15
1053 |     return X.astype('int16')
1054 | 
1055 | 
1056 | def _wav2array(nchannels, sampwidth, data):
1057 |     # wavio.py
1058 |     # Author: Warren Weckesser
1059 |     # License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
1060 | 
1061 |     """data must be the string containing the bytes from the wav file."""
1062 |     num_samples, remainder = divmod(len(data), sampwidth * nchannels)
1063 |     if remainder > 0:
1064 |         raise ValueError('The length of data is not a multiple of '
1065 |                          'sampwidth * num_channels.')
1066 |     if sampwidth > 4:
1067 |         raise ValueError("sampwidth must not be greater than 4.")
1068 | 
1069 |     if sampwidth == 3:
1070 |         a = np.empty((num_samples, nchannels, 4), dtype=np.uint8)
1071 |         raw_bytes = np.fromstring(data, dtype=np.uint8)
1072 |         a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
1073 |         a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
1074 |         result = a.view('<i4').reshape(a.shape[:-1])
1075 |     else:
1076 |         # 8 bit samples are stored as unsigned ints; others as signed ints.
1077 |         dt_char = 'u' if sampwidth == 1 else 'i'
1078 |         a = np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
1079 |         result = a.reshape(-1, nchannels)
1080 |     return result
1081 | 
1082 | 
1083 | def readwav(file):
1084 |     # wavio.py
1085 |     # Author: Warren Weckesser
1086 |     # License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
1087 |     """
1088 |     Read a wav file.
1089 | 
1090 |     Returns the frame rate, sample width (in bytes) and a numpy array
1091 |     containing the data.
1092 | 
1093 |     This function does not read compressed wav files.
1094 |     """
1095 |     wav = wave.open(file)
1096 |     rate = wav.getframerate()
1097 |     nchannels = wav.getnchannels()
1098 |     sampwidth = wav.getsampwidth()
1099 |     nframes = wav.getnframes()
1100 |     data = wav.readframes(nframes)
1101 |     wav.close()
1102 |     array = _wav2array(nchannels, sampwidth, data)
1103 |     return rate, sampwidth, array
1104 | 
1105 | 
1106 | def csvd(arr):
1107 |     """
1108 |     Do the complex SVD of a 2D array, returning real valued U, S, VT
1109 | 
1110 |     http://stemblab.github.io/complex-svd/
1111 |     """
1112 |     C_r = arr.real
1113 |     C_i = arr.imag
1114 |     block_x = C_r.shape[0]
1115 |     block_y = C_r.shape[1]
1116 |     K = np.zeros((2 * block_x, 2 * block_y))
1117 |     # Upper left
1118 |     K[:block_x, :block_y] = C_r
1119 |     # Lower left
1120 |     K[:block_x, block_y:] = C_i
1121 |     # Upper right
1122 |     K[block_x:, :block_y] = -C_i
1123 |     # Lower right
1124 |     K[block_x:, block_y:] = C_r
1125 |     return svd(K, full_matrices=False)
1126 | 
1127 | 
1128 | def icsvd(U, S, VT):
1129 |     """
1130 |     Invert back to complex values from the output of csvd
1131 | 
1132 |     U, S, VT = csvd(X)
1133 |     X_rec = inv_csvd(U, S, VT)
1134 |     """
1135 |     K = U.dot(np.diag(S)).dot(VT)
1136 |     block_x = U.shape[0] // 2
1137 |     block_y = U.shape[1] // 2
1138 |     arr_rec = np.zeros((block_x, block_y)) + 0j
1139 |     arr_rec.real = K[:block_x, :block_y]
1140 |     arr_rec.imag = K[:block_x, block_y:]
1141 |     return arr_rec
1142 | 
1143 | 
1144 | def sinusoid_analysis(X, input_sample_rate, resample_block=128, copy=True):
1145 |     """
1146 |     Contruct a sinusoidal model for the input signal.
1147 | 
1148 |     Parameters
1149 |     ----------
1150 |     X : ndarray
1151 |         Input signal to model
1152 | 
1153 |     input_sample_rate : int
1154 |         The sample rate of the input signal
1155 | 
1156 |     resample_block : int, optional (default=128)
1157 |        Controls the step size of the sinusoidal model
1158 | 
1159 |     Returns
1160 |     -------
1161 |     frequencies_hz : ndarray
1162 |        Frequencies for the sinusoids, in Hz.
1163 | 
1164 |     magnitudes : ndarray
1165 |        Magnitudes of sinusoids returned in ``frequencies``
1166 | 
1167 |     References
1168 |     ----------
1169 |     D. P. W. Ellis (2004), "Sinewave Speech Analysis/Synthesis in Matlab",
1170 |     Web resource, available: http://www.ee.columbia.edu/ln/labrosa/matlab/sws/
1171 |     """
1172 |     X = np.array(X, copy=copy)
1173 |     resample_to = 8000
1174 |     if input_sample_rate != resample_to:
1175 |         if input_sample_rate % resample_to != 0:
1176 |             raise ValueError("Input sample rate must be a multiple of 8k!")
1177 |         # Should be able to use resample... ?
1178 |         # resampled_count = round(len(X) * resample_to / input_sample_rate)
1179 |         # X = sg.resample(X, resampled_count, window=sg.hanning(len(X)))
1180 |         X = sg.decimate(X, input_sample_rate // resample_to, zero_phase=True)
1181 |     step_size = 2 * round(resample_block / input_sample_rate * resample_to / 2.)
1182 |     a, g, e = lpc_analysis(X, order=8, window_step=step_size,
1183 |                            window_size=2 * step_size)
1184 |     f, m = lpc_to_frequency(a, g)
1185 |     f_hz = f * resample_to / (2 * np.pi)
1186 |     return f_hz, m
1187 | 
1188 | 
1189 | def slinterp(X, factor, copy=True):
1190 |     """
1191 |     Slow-ish linear interpolation of a 1D numpy array. There must be some
1192 |     better function to do this in numpy.
1193 | 
1194 |     Parameters
1195 |     ----------
1196 |     X : ndarray
1197 |         1D input array to interpolate
1198 | 
1199 |     factor : int
1200 |         Integer factor to interpolate by
1201 | 
1202 |     Return
1203 |     ------
1204 |     X_r : ndarray
1205 |     """
1206 |     sz = np.product(X.shape)
1207 |     X = np.array(X, copy=copy)
1208 |     X_s = np.hstack((X[1:], [0]))
1209 |     X_r = np.zeros((factor, sz))
1210 |     for i in range(factor):
1211 |         X_r[i, :] = (factor - i) / float(factor) * X + (i / float(factor)) * X_s
1212 |     return X_r.T.ravel()[:(sz - 1) * factor + 1]
1213 | 
1214 | 
1215 | def sinusoid_synthesis(frequencies_hz, magnitudes, input_sample_rate=16000,
1216 |                        resample_block=128):
1217 |     """
1218 |     Create a time series based on input frequencies and magnitudes.
1219 | 
1220 |     Parameters
1221 |     ----------
1222 |     frequencies_hz : ndarray
1223 |         Input signal to model
1224 | 
1225 |     magnitudes : int
1226 |         The sample rate of the input signal
1227 | 
1228 |     input_sample_rate : int, optional (default=16000)
1229 |         The sample rate parameter that the sinusoid analysis was run with
1230 | 
1231 |     resample_block : int, optional (default=128)
1232 |        Controls the step size of the sinusoidal model
1233 | 
1234 |     Returns
1235 |     -------
1236 |     synthesized : ndarray
1237 |         Sound vector synthesized from input arguments
1238 | 
1239 |     References
1240 |     ----------
1241 |     D. P. W. Ellis (2004), "Sinewave Speech Analysis/Synthesis in Matlab",
1242 |     Web resource, available: http://www.ee.columbia.edu/ln/labrosa/matlab/sws/
1243 |     """
1244 |     rows, cols = frequencies_hz.shape
1245 |     synthesized = np.zeros((1 + ((rows - 1) * resample_block),))
1246 |     for col in range(cols):
1247 |         mags = slinterp(magnitudes[:, col], resample_block)
1248 |         freqs = slinterp(frequencies_hz[:, col], resample_block)
1249 |         cycles = np.cumsum(2 * np.pi * freqs / float(input_sample_rate))
1250 |         sines = mags * np.cos(cycles)
1251 |         synthesized += sines
1252 |     return synthesized
1253 | 
1254 | 
1255 | def dct_compress(X, n_components, window_size=128):
1256 |     """
1257 |     Compress using the DCT
1258 | 
1259 |     Parameters
1260 |     ----------
1261 |     X : ndarray, shape=(n_samples,)
1262 |         The input signal to compress. Should be 1-dimensional
1263 | 
1264 |     n_components : int
1265 |         The number of DCT components to keep. Setting n_components to about
1266 |         .5 * window_size can give compression with fairly good reconstruction.
1267 | 
1268 |     window_size : int
1269 |         The input X is broken into windows of window_size, each of which are
1270 |         then compressed with the DCT.
1271 | 
1272 |     Returns
1273 |     -------
1274 |     X_compressed : ndarray, shape=(num_windows, window_size)
1275 |        A 2D array of non-overlapping DCT coefficients. For use with uncompress
1276 | 
1277 |     Reference
1278 |     ---------
1279 |     http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
1280 |     """
1281 |     if len(X) % window_size != 0:
1282 |         append = np.zeros((window_size - len(X) % window_size))
1283 |         X = np.hstack((X, append))
1284 |     num_frames = len(X) // window_size
1285 |     X_strided = X.reshape((num_frames, window_size))
1286 |     X_dct = fftpack.dct(X_strided, norm='ortho')
1287 |     if n_components is not None:
1288 |         X_dct = X_dct[:, :n_components]
1289 |     return X_dct
1290 | 
1291 | 
1292 | def dct_uncompress(X_compressed, window_size=128):
1293 |     """
1294 |     Uncompress a DCT compressed signal (such as returned by ``compress``).
1295 | 
1296 |     Parameters
1297 |     ----------
1298 |     X_compressed : ndarray, shape=(n_samples, n_features)
1299 |         Windowed and compressed array.
1300 | 
1301 |     window_size : int, optional (default=128)
1302 |         Size of the window used when ``compress`` was called.
1303 | 
1304 |     Returns
1305 |     -------
1306 |     X_reconstructed : ndarray, shape=(n_samples)
1307 |         Reconstructed version of X.
1308 |     """
1309 |     if X_compressed.shape[1] % window_size != 0:
1310 |         append = np.zeros((X_compressed.shape[0],
1311 |                            window_size - X_compressed.shape[1] % window_size))
1312 |         X_compressed = np.hstack((X_compressed, append))
1313 |     X_r = fftpack.idct(X_compressed, norm='ortho')
1314 |     return X_r.ravel()
1315 | 
1316 | 
1317 | def sine_window(X):
1318 |     """
1319 |     Apply a sinusoid window to X.
1320 | 
1321 |     Parameters
1322 |     ----------
1323 |     X : ndarray, shape=(n_samples, n_features)
1324 |         Input array of samples
1325 | 
1326 |     Returns
1327 |     -------
1328 |     X_windowed : ndarray, shape=(n_samples, n_features)
1329 |         Windowed version of X.
1330 |     """
1331 |     i = np.arange(X.shape[1])
1332 |     win = np.sin(np.pi * (i + 0.5) / X.shape[1])
1333 |     row_stride = 0
1334 |     col_stride = win.itemsize
1335 |     strided_win = as_strided(win, shape=X.shape,
1336 |                              strides=(row_stride, col_stride))
1337 |     return X * strided_win
1338 | 
1339 | 
1340 | def kaiserbessel_window(X, alpha=6.5):
1341 |     """
1342 |     Apply a Kaiser-Bessel window to X.
1343 | 
1344 |     Parameters
1345 |     ----------
1346 |     X : ndarray, shape=(n_samples, n_features)
1347 |         Input array of samples
1348 | 
1349 |     alpha : float, optional (default=6.5)
1350 |         Tuning parameter for Kaiser-Bessel function. alpha=6.5 should make
1351 |         perfect reconstruction possible for DCT.
1352 | 
1353 |     Returns
1354 |     -------
1355 |     X_windowed : ndarray, shape=(n_samples, n_features)
1356 |         Windowed version of X.
1357 |     """
1358 |     beta = np.pi * alpha
1359 |     win = sg.kaiser(X.shape[1], beta)
1360 |     row_stride = 0
1361 |     col_stride = win.itemsize
1362 |     strided_win = as_strided(win, shape=X.shape,
1363 |                              strides=(row_stride, col_stride))
1364 |     return X * strided_win
1365 | 
1366 | 
1367 | def overlap(X, window_size, window_step):
1368 |     """
1369 |     Create an overlapped version of X
1370 | 
1371 |     Parameters
1372 |     ----------
1373 |     X : ndarray, shape=(n_samples,)
1374 |         Input signal to window and overlap
1375 | 
1376 |     window_size : int
1377 |         Size of windows to take
1378 | 
1379 |     window_step : int
1380 |         Step size between windows
1381 | 
1382 |     Returns
1383 |     -------
1384 |     X_strided : shape=(n_windows, window_size)
1385 |         2D array of overlapped X
1386 |     """
1387 |     if window_size % 2 != 0:
1388 |         raise ValueError("Window size must be even!")
1389 |     # Make sure there are an even number of windows before stridetricks
1390 |     append = np.zeros((window_size - len(X) % window_size))
1391 |     X = np.hstack((X, append))
1392 |     overlap_sz = window_size - window_step
1393 |     new_shape = X.shape[:-1] + ((X.shape[-1] - overlap_sz) // window_step, window_size)
1394 |     new_strides = X.strides[:-1] + (window_step * X.strides[-1],) + X.strides[-1:]
1395 |     X_strided = as_strided(X, shape=new_shape, strides=new_strides)
1396 |     return X_strided
1397 | 
1398 | 
1399 | def halfoverlap(X, window_size):
1400 |     """
1401 |     Create an overlapped version of X using 50% of window_size as overlap.
1402 | 
1403 |     Parameters
1404 |     ----------
1405 |     X : ndarray, shape=(n_samples,)
1406 |         Input signal to window and overlap
1407 | 
1408 |     window_size : int
1409 |         Size of windows to take
1410 | 
1411 |     Returns
1412 |     -------
1413 |     X_strided : shape=(n_windows, window_size)
1414 |         2D array of overlapped X
1415 |     """
1416 |     if window_size % 2 != 0:
1417 |         raise ValueError("Window size must be even!")
1418 |     window_step = window_size // 2
1419 |     # Make sure there are an even number of windows before stridetricks
1420 |     append = np.zeros((window_size - len(X) % window_size))
1421 |     X = np.hstack((X, append))
1422 |     num_frames = len(X) // window_step - 1
1423 |     row_stride = X.itemsize * window_step
1424 |     col_stride = X.itemsize
1425 |     X_strided = as_strided(X, shape=(num_frames, window_size),
1426 |                            strides=(row_stride, col_stride))
1427 |     return X_strided
1428 | 
1429 | 
1430 | def invert_halfoverlap(X_strided):
1431 |     """
1432 |     Invert ``halfoverlap`` function to reconstruct X
1433 | 
1434 |     Parameters
1435 |     ----------
1436 |     X_strided : ndarray, shape=(n_windows, window_size)
1437 |         X as overlapped windows
1438 | 
1439 |     Returns
1440 |     -------
1441 |     X : ndarray, shape=(n_samples,)
1442 |         Reconstructed version of X
1443 |     """
1444 |     # Hardcoded 50% overlap! Can generalize later...
1445 |     n_rows, n_cols = X_strided.shape
1446 |     X = np.zeros((((int(n_rows // 2) + 1) * n_cols),)).astype(X_strided.dtype)
1447 |     start_index = 0
1448 |     end_index = n_cols
1449 |     window_step = n_cols // 2
1450 |     for row in range(X_strided.shape[0]):
1451 |         X[start_index:end_index] += X_strided[row]
1452 |         start_index += window_step
1453 |         end_index += window_step
1454 |     return X
1455 | 
1456 | 
1457 | def overlap_add(X_strided, window_step, wsola=False):
1458 |     """
1459 |     overlap add to reconstruct X
1460 | 
1461 |     Parameters
1462 |     ----------
1463 |     X_strided : ndarray, shape=(n_windows, window_size)
1464 |         X as overlapped windows
1465 | 
1466 |     window_step : int
1467 |        step size for overlap add
1468 | 
1469 |     Returns
1470 |     -------
1471 |     X : ndarray, shape=(n_samples,)
1472 |         Reconstructed version of X
1473 |     """
1474 |     n_rows, window_size = X_strided.shape
1475 | 
1476 |     # Start with largest size (no overlap) then truncate after we finish
1477 |     # +2 for one window on each side
1478 |     X = np.zeros(((n_rows + 2) * window_size,)).astype(X_strided.dtype)
1479 |     start_index = 0
1480 | 
1481 |     total_windowing_sum = np.zeros((X.shape[0]))
1482 |     win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(window_size) / (
1483 |         window_size - 1))
1484 |     for i in range(n_rows):
1485 |         end_index = start_index + window_size
1486 |         if wsola:
1487 |             offset_size = window_size - window_step
1488 |             offset = xcorr_offset(X[start_index:start_index + offset_size],
1489 |                                   X_strided[i, :offset_size])
1490 |             ss = start_index - offset
1491 |             st = end_index - offset
1492 |             if start_index - offset < 0:
1493 |                 ss = 0
1494 |                 st = 0 + (end_index - start_index)
1495 |             X[ss:st] += X_strided[i]
1496 |             total_windowing_sum[ss:st] += win
1497 |             start_index = start_index + window_step
1498 |         else:
1499 |             X[start_index:end_index] += X_strided[i]
1500 |             total_windowing_sum[start_index:end_index] += win
1501 |             start_index += window_step
1502 |     # Not using this right now
1503 |     #X = np.real(X) / (total_windowing_sum + 1)
1504 |     X = X[:end_index]
1505 |     return X
1506 | 
1507 | 
1508 | def overlap_dct_compress(X, n_components, window_size):
1509 |     """
1510 |     Overlap (at 50% of window_size) and compress X.
1511 | 
1512 |     Parameters
1513 |     ----------
1514 |     X : ndarray, shape=(n_samples,)
1515 |         Input signal to compress
1516 | 
1517 |     n_components : int
1518 |         number of DCT components to keep
1519 | 
1520 |     window_size : int
1521 |         Size of windows to take
1522 | 
1523 |     Returns
1524 |     -------
1525 |     X_dct : ndarray, shape=(n_windows, n_components)
1526 |         Windowed and compressed version of X
1527 |     """
1528 |     X_strided = halfoverlap(X, window_size)
1529 |     X_dct = fftpack.dct(X_strided, norm='ortho')
1530 |     if n_components is not None:
1531 |         X_dct = X_dct[:, :n_components]
1532 |     return X_dct
1533 | 
1534 | 
1535 | # Evil voice is caused by adding double the zeros before inverse DCT...
1536 | # Very cool bug but makes sense
1537 | def overlap_dct_uncompress(X_compressed, window_size):
1538 |     """
1539 |     Uncompress X as returned from ``overlap_compress``.
1540 | 
1541 |     Parameters
1542 |     ----------
1543 |     X_compressed : ndarray, shape=(n_windows, n_components)
1544 |         Windowed and compressed version of X
1545 | 
1546 |     window_size : int
1547 |         Size of windows originally used when compressing X
1548 | 
1549 |     Returns
1550 |     -------
1551 |     X_reconstructed : ndarray, shape=(n_samples,)
1552 |         Reconstructed version of X
1553 |     """
1554 |     if X_compressed.shape[1] % window_size != 0:
1555 |         append = np.zeros((X_compressed.shape[0], window_size -
1556 |                            X_compressed.shape[1] % window_size))
1557 |         X_compressed = np.hstack((X_compressed, append))
1558 |     X_r = fftpack.idct(X_compressed, norm='ortho')
1559 |     return invert_halfoverlap(X_r)
1560 | 
1561 | 
1562 | def herz_to_mel(freqs):
1563 |     """
1564 |     Based on code by Dan Ellis
1565 | 
1566 |     http://labrosa.ee.columbia.edu/matlab/tf_agc/
1567 |     """
1568 |     f_0 = 0  # 133.33333
1569 |     f_sp = 200 / 3.  # 66.66667
1570 |     bark_freq = 1000.
1571 |     bark_pt = (bark_freq - f_0) / f_sp
1572 |     # The magic 1.0711703 which is the ratio needed to get from 1000 Hz
1573 |     # to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz
1574 |     # and the preceding linear filter center at 933.33333 Hz
1575 |     # (actually 1000/933.33333 = 1.07142857142857 and
1576 |     # exp(log(6.4)/27) = 1.07117028749447)
1577 |     if not isinstance(freqs, np.ndarray):
1578 |         freqs = np.array(freqs)[None]
1579 |     log_step = np.exp(np.log(6.4) / 27)
1580 |     lin_pts = (freqs < bark_freq)
1581 |     mel = 0. * freqs
1582 |     mel[lin_pts] = (freqs[lin_pts] - f_0) / f_sp
1583 |     mel[~lin_pts] = bark_pt + np.log(freqs[~lin_pts] / bark_freq) / np.log(
1584 |         log_step)
1585 |     return mel
1586 | 
1587 | 
1588 | def mel_to_herz(mel):
1589 |     """
1590 |     Based on code by Dan Ellis
1591 | 
1592 |     http://labrosa.ee.columbia.edu/matlab/tf_agc/
1593 |     """
1594 |     f_0 = 0  # 133.33333
1595 |     f_sp = 200 / 3.  # 66.66667
1596 |     bark_freq = 1000.
1597 |     bark_pt = (bark_freq - f_0) / f_sp
1598 |     # The magic 1.0711703 which is the ratio needed to get from 1000 Hz
1599 |     # to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz
1600 |     # and the preceding linear filter center at 933.33333 Hz
1601 |     # (actually 1000/933.33333 = 1.07142857142857 and
1602 |     # exp(log(6.4)/27) = 1.07117028749447)
1603 |     if not isinstance(mel, np.ndarray):
1604 |         mel = np.array(mel)[None]
1605 |     log_step = np.exp(np.log(6.4) / 27)
1606 |     lin_pts = (mel < bark_pt)
1607 | 
1608 |     freqs = 0. * mel
1609 |     freqs[lin_pts] = f_0 + f_sp * mel[lin_pts]
1610 |     freqs[~lin_pts] = bark_freq * np.exp(np.log(log_step) * (
1611 |         mel[~lin_pts] - bark_pt))
1612 |     return freqs
1613 | 
1614 | 
1615 | def mel_freq_weights(n_fft, fs, n_filts=None, width=None):
1616 |     """
1617 |     Based on code by Dan Ellis
1618 | 
1619 |     http://labrosa.ee.columbia.edu/matlab/tf_agc/
1620 |     """
1621 |     min_freq = 0
1622 |     max_freq = fs // 2
1623 |     if width is None:
1624 |         width = 1.
1625 |     if n_filts is None:
1626 |         n_filts = int(herz_to_mel(max_freq) / 2) + 1
1627 |     else:
1628 |         n_filts = int(n_filts)
1629 |         assert n_filts > 0
1630 |     weights = np.zeros((n_filts, n_fft))
1631 |     fft_freqs = np.arange(n_fft // 2) / n_fft * fs
1632 |     min_mel = herz_to_mel(min_freq)
1633 |     max_mel = herz_to_mel(max_freq)
1634 |     partial = np.arange(n_filts + 2) / (n_filts + 1.) * (max_mel - min_mel)
1635 |     bin_freqs = mel_to_herz(min_mel + partial)
1636 |     bin_bin = np.round(bin_freqs / fs * (n_fft - 1))
1637 |     for i in range(n_filts):
1638 |         fs_i = bin_freqs[i + np.arange(3)]
1639 |         fs_i = fs_i[1] + width * (fs_i - fs_i[1])
1640 |         lo_slope = (fft_freqs - fs_i[0]) / float(fs_i[1] - fs_i[0])
1641 |         hi_slope = (fs_i[2] - fft_freqs) / float(fs_i[2] - fs_i[1])
1642 |         weights[i, :n_fft // 2] = np.maximum(
1643 |             0, np.minimum(lo_slope, hi_slope))
1644 |     # Constant amplitude multiplier
1645 |     weights = np.diag(2. / (bin_freqs[2:n_filts + 2]
1646 |                       - bin_freqs[:n_filts])).dot(weights)
1647 |     weights[:, n_fft // 2:] = 0
1648 |     return weights
1649 | 
1650 | 
1651 | def time_attack_agc(X, fs, t_scale=0.5, f_scale=1.):
1652 |     """
1653 |     AGC based on code by Dan Ellis
1654 | 
1655 |     http://labrosa.ee.columbia.edu/matlab/tf_agc/
1656 |     """
1657 |     # 32 ms grid for FFT
1658 |     n_fft = 2 ** int(np.log(0.032 * fs) / np.log(2))
1659 |     f_scale = float(f_scale)
1660 |     window_size = n_fft
1661 |     window_step = window_size // 2
1662 |     X_freq = stft(X, window_size, mean_normalize=False)
1663 |     fft_fs = fs / window_step
1664 |     n_bands = max(10, 20 / f_scale)
1665 |     mel_width = f_scale * n_bands / 10.
1666 |     f_to_a = mel_freq_weights(n_fft, fs, n_bands, mel_width)
1667 |     f_to_a = f_to_a[:, :n_fft // 2 + 1]
1668 |     audiogram = np.abs(X_freq).dot(f_to_a.T)
1669 |     fbg = np.zeros_like(audiogram)
1670 |     state = np.zeros((audiogram.shape[1],))
1671 |     alpha = np.exp(-(1. / fft_fs) / t_scale)
1672 |     for i in range(len(audiogram)):
1673 |         state = np.maximum(alpha * state, audiogram[i])
1674 |         fbg[i] = state
1675 | 
1676 |     sf_to_a = np.sum(f_to_a, axis=0)
1677 |     E = np.diag(1. / (sf_to_a + (sf_to_a == 0)))
1678 |     E = E.dot(f_to_a.T)
1679 |     E = fbg.dot(E.T)
1680 |     E[E <= 0] = np.min(E[E > 0])
1681 |     ts = istft(X_freq / E, window_size, mean_normalize=False)
1682 |     return ts, X_freq, E
1683 | 
1684 | 
1685 | def hebbian_kmeans(X, n_clusters=10, n_epochs=10, W=None, learning_rate=0.01,
1686 |                    batch_size=100, random_state=None, verbose=True):
1687 |     """
1688 |     Modified from existing code from R. Memisevic
1689 |     See http://www.cs.toronto.edu/~rfm/code/hebbian_kmeans.py
1690 |     """
1691 |     if W is None:
1692 |         if random_state is None:
1693 |             random_state = np.random.RandomState()
1694 |         W = 0.1 * random_state.randn(n_clusters, X.shape[1])
1695 |     else:
1696 |         assert n_clusters == W.shape[0]
1697 |     X2 = (X ** 2).sum(axis=1, keepdims=True)
1698 |     last_print = 0
1699 |     for e in range(n_epochs):
1700 |         for i in range(0, X.shape[0], batch_size):
1701 |             X_i = X[i: i + batch_size]
1702 |             X2_i = X2[i: i + batch_size]
1703 |             D = -2 * np.dot(W, X_i.T)
1704 |             D += (W ** 2).sum(axis=1, keepdims=True)
1705 |             D += X2_i.T
1706 |             S = (D == D.min(axis=0)[None, :]).astype("float").T
1707 |             W += learning_rate * (
1708 |                 np.dot(S.T, X_i) - S.sum(axis=0)[:, None] * W)
1709 |         if verbose:
1710 |             if e == 0 or e > (.05 * n_epochs + last_print):
1711 |                 last_print = e
1712 |                 print("Epoch %i of %i, cost %.4f" % (
1713 |                     e + 1, n_epochs, D.min(axis=0).sum()))
1714 |     return W
1715 | 
1716 | 
1717 | def complex_to_real_view(arr_c):
1718 |     # Inplace view from complex to r, i as separate columns
1719 |     assert arr_c.dtype in [np.complex64, np.complex128]
1720 |     shp = arr_c.shape
1721 |     dtype = np.float64 if arr_c.dtype == np.complex128 else np.float32
1722 |     arr_r = arr_c.ravel().view(dtype=dtype).reshape(shp[0], 2 * shp[1])
1723 |     return arr_r
1724 | 
1725 | 
1726 | def real_to_complex_view(arr_r):
1727 |     # Inplace view from real, image as columns to complex
1728 |     assert arr_r.dtype not in [np.complex64, np.complex128]
1729 |     shp = arr_r.shape
1730 |     dtype = np.complex128 if arr_r.dtype == np.float64 else np.complex64
1731 |     arr_c = arr_r.ravel().view(dtype=dtype).reshape(shp[0], shp[1] // 2)
1732 |     return arr_c
1733 | 
1734 | 
1735 | def complex_to_abs(arr_c):
1736 |     return np.abs(arr_c)
1737 | 
1738 | 
1739 | def complex_to_angle(arr_c):
1740 |     return np.angle(arr_c)
1741 | 
1742 | 
1743 | def abs_and_angle_to_complex(arr_abs, arr_angle):
1744 |     # abs(f_c2 - f_c) < 1E-15
1745 |     return arr_abs * np.exp(1j * arr_angle)
1746 | 
1747 | 
1748 | def angle_to_sin_cos(arr_angle):
1749 |     return np.hstack((np.sin(arr_angle), np.cos(arr_angle)))
1750 | 
1751 | 
1752 | def sin_cos_to_angle(arr_sin, arr_cos):
1753 |     return np.arctan2(arr_sin, arr_cos)
1754 | 
1755 | 
1756 | def polyphase_core(x, m, f):
1757 |     # x = input data
1758 |     # m = decimation rate
1759 |     # f = filter
1760 |     # Hack job - append zeros to match decimation rate
1761 |     if x.shape[0] % m != 0:
1762 |         x = np.append(x, np.zeros((m - x.shape[0] % m,)))
1763 |     if f.shape[0] % m != 0:
1764 |         f = np.append(f, np.zeros((m - f.shape[0] % m,)))
1765 |     polyphase = p = np.zeros((m, (x.shape[0] + f.shape[0]) / m), dtype=x.dtype)
1766 |     p[0, :-1] = np.convolve(x[::m], f[::m])
1767 |     # Invert the x values when applying filters
1768 |     for i in range(1, m):
1769 |         p[i, 1:] = np.convolve(x[m - i::m], f[i::m])
1770 |     return p
1771 | 
1772 | 
1773 | def polyphase_single_filter(x, m, f):
1774 |     return np.sum(polyphase_core(x, m, f), axis=0)
1775 | 
1776 | 
1777 | def polyphase_lowpass(arr, downsample=2, n_taps=50, filter_pad=1.1):
1778 |     filt = firwin(downsample * n_taps, 1 / (downsample * filter_pad))
1779 |     filtered = polyphase_single_filter(arr, downsample, filt)
1780 |     return filtered
1781 | 
1782 | 
1783 | def window(arr, window_size, window_step=1, axis=0):
1784 |     """
1785 |     Directly taken from Erik Rigtorp's post to numpy-discussion.
1786 |     <http://www.mail-archive.com/numpy-discussion@scipy.org/msg29450.html>
1787 | 
1788 |     <http://stackoverflow.com/questions/4936620/using-strides-for-an-efficient-moving-average-filter>
1789 |     """
1790 |     if window_size < 1:
1791 |         raise ValueError("`window_size` must be at least 1.")
1792 |     if window_size > arr.shape[-1]:
1793 |         raise ValueError("`window_size` is too long.")
1794 | 
1795 |     orig = list(range(len(arr.shape)))
1796 |     trans = list(range(len(arr.shape)))
1797 |     trans[axis] = orig[-1]
1798 |     trans[-1] = orig[axis]
1799 |     arr = arr.transpose(trans)
1800 | 
1801 |     shape = arr.shape[:-1] + (arr.shape[-1] - window_size + 1, window_size)
1802 |     strides = arr.strides + (arr.strides[-1],)
1803 |     strided = as_strided(arr, shape=shape, strides=strides)
1804 | 
1805 |     if window_step > 1:
1806 |         strided = strided[..., ::window_step, :]
1807 | 
1808 |     orig = list(range(len(strided.shape)))
1809 |     trans = list(range(len(strided.shape)))
1810 |     trans[-2] = orig[-1]
1811 |     trans[-1] = orig[-2]
1812 |     trans = trans[::-1]
1813 |     strided = strided.transpose(trans)
1814 |     return strided
1815 | 
1816 | 
1817 | def unwindow(arr, window_size, window_step=1, axis=0):
1818 |     # undo windows by broadcast
1819 |     if axis != 0:
1820 |         raise ValueError("axis != 0 currently unsupported")
1821 |     shp = arr.shape
1822 |     unwindowed = np.tile(arr[:, None, ...], (1, window_step, 1, 1))
1823 |     unwindowed = unwindowed.reshape(shp[0] * window_step, *shp[1:])
1824 |     return unwindowed.mean(axis=1)
1825 | 
1826 | 
1827 | def xcorr_offset(x1, x2):
1828 |     """
1829 |     Under MSR-LA License
1830 | 
1831 |     Based on MATLAB implementation from Spectrogram Inversion Toolbox
1832 | 
1833 |     References
1834 |     ----------
1835 |     D. Griffin and J. Lim. Signal estimation from modified
1836 |     short-time Fourier transform. IEEE Trans. Acoust. Speech
1837 |     Signal Process., 32(2):236-243, 1984.
1838 | 
1839 |     Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
1840 |     Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
1841 |     Adelaide, 1994, II.77-80.
1842 | 
1843 |     Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
1844 |     Estimation from Modified Short-Time Fourier Transform
1845 |     Magnitude Spectra. IEEE Transactions on Audio Speech and
1846 |     Language Processing, 08/2007.
1847 |     """
1848 |     x1 = x1 - x1.mean()
1849 |     x2 = x2 - x2.mean()
1850 |     frame_size = len(x2)
1851 |     half = frame_size // 2
1852 |     corrs = np.convolve(x1.astype('float32'), x2[::-1].astype('float32'))
1853 |     corrs[:half] = -1E30
1854 |     corrs[-half:] = -1E30
1855 |     offset = corrs.argmax() - len(x1)
1856 |     return offset
1857 | 
1858 | 
1859 | def invert_spectrogram(X_s, step, calculate_offset=True, set_zero_phase=True):
1860 |     """
1861 |     Under MSR-LA License
1862 | 
1863 |     Based on MATLAB implementation from Spectrogram Inversion Toolbox
1864 | 
1865 |     References
1866 |     ----------
1867 |     D. Griffin and J. Lim. Signal estimation from modified
1868 |     short-time Fourier transform. IEEE Trans. Acoust. Speech
1869 |     Signal Process., 32(2):236-243, 1984.
1870 | 
1871 |     Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
1872 |     Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
1873 |     Adelaide, 1994, II.77-80.
1874 | 
1875 |     Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
1876 |     Estimation from Modified Short-Time Fourier Transform
1877 |     Magnitude Spectra. IEEE Transactions on Audio Speech and
1878 |     Language Processing, 08/2007.
1879 |     """
1880 |     size = int(X_s.shape[1] // 2)
1881 |     wave = np.zeros((X_s.shape[0] * step + size))
1882 |     # Getting overflow warnings with 32 bit...
1883 |     wave = wave.astype('float64')
1884 |     total_windowing_sum = np.zeros((X_s.shape[0] * step + size))
1885 |     win = 0.54 - .46 * np.cos(2 * np.pi * np.arange(size) / (size - 1))
1886 | 
1887 |     est_start = int(size // 2) - 1
1888 |     est_end = est_start + size
1889 |     for i in range(X_s.shape[0]):
1890 |         wave_start = int(step * i)
1891 |         wave_end = wave_start + size
1892 |         if set_zero_phase:
1893 |             spectral_slice = X_s[i].real + 0j
1894 |         else:
1895 |             # already complex
1896 |             spectral_slice = X_s[i]
1897 | 
1898 |         # Don't need fftshift due to different impl.
1899 |         wave_est = np.real(np.fft.ifft(spectral_slice))[::-1]
1900 |         if calculate_offset and i > 0:
1901 |             offset_size = size - step
1902 |             if offset_size <= 0:
1903 |                 print("WARNING: Large step size >50\% detected! "
1904 |                       "This code works best with high overlap - try "
1905 |                       "with 75% or greater")
1906 |                 offset_size = step
1907 |             offset = xcorr_offset(wave[wave_start:wave_start + offset_size],
1908 |                                   wave_est[est_start:est_start + offset_size])
1909 |         else:
1910 |             offset = 0
1911 |         wave[wave_start:wave_end] += win * wave_est[
1912 |             est_start - offset:est_end - offset]
1913 |         total_windowing_sum[wave_start:wave_end] += win
1914 |     wave = np.real(wave) / (total_windowing_sum + 1E-6)
1915 |     return wave
1916 | 
1917 | 
1918 | def iterate_invert_spectrogram(X_s, fftsize, step, n_iter=10, verbose=False,
1919 |                                complex_input=False):
1920 |     """
1921 |     Under MSR-LA License
1922 | 
1923 |     Based on MATLAB implementation from Spectrogram Inversion Toolbox
1924 | 
1925 |     References
1926 |     ----------
1927 |     D. Griffin and J. Lim. Signal estimation from modified
1928 |     short-time Fourier transform. IEEE Trans. Acoust. Speech
1929 |     Signal Process., 32(2):236-243, 1984.
1930 | 
1931 |     Malcolm Slaney, Daniel Naar and Richard F. Lyon. Auditory
1932 |     Model Inversion for Sound Separation. Proc. IEEE-ICASSP,
1933 |     Adelaide, 1994, II.77-80.
1934 | 
1935 |     Xinglei Zhu, G. Beauregard, L. Wyse. Real-Time Signal
1936 |     Estimation from Modified Short-Time Fourier Transform
1937 |     Magnitude Spectra. IEEE Transactions on Audio Speech and
1938 |     Language Processing, 08/2007.
1939 |     """
1940 |     reg = np.max(X_s) / 1E8
1941 |     X_best = copy.deepcopy(X_s)
1942 |     try:
1943 |         for i in range(n_iter):
1944 |             if verbose:
1945 |                 print("Runnning iter %i" % i)
1946 |             if i == 0 and not complex_input:
1947 |                 X_t = invert_spectrogram(X_best, step, calculate_offset=True,
1948 |                                         set_zero_phase=True)
1949 |             else:
1950 |                 # Calculate offset was False in the MATLAB version
1951 |                 # but in mine it massively improves the result
1952 |                 # Possible bug in my impl?
1953 |                 X_t = invert_spectrogram(X_best, step, calculate_offset=True,
1954 |                                         set_zero_phase=False)
1955 |             est = stft(X_t, fftsize=fftsize, step=step, compute_onesided=False)
1956 |             phase = est / np.maximum(reg, np.abs(est))
1957 |             phase = phase[:len(X_s)]
1958 |             X_s = X_s[:len(phase)]
1959 |             X_best = X_s * phase
1960 |     except ValueError:
1961 |         raise ValueError("The iterate_invert_spectrogram algorithm requires"
1962 |                          " stft(..., compute_onesided=False),",
1963 |                          " be sure you have calculated stft with this argument")
1964 |     X_t = invert_spectrogram(X_best, step, calculate_offset=True,
1965 |                              set_zero_phase=False)
1966 |     return np.real(X_t)
1967 | 
1968 | 
1969 | def harvest_get_downsampled_signal(x, fs, target_fs):
1970 |     decimation_ratio = np.round(fs / target_fs)
1971 |     offset = np.ceil(140. / decimation_ratio) * decimation_ratio
1972 |     start_pad = x[0] * np.ones(int(offset), dtype=np.float32)
1973 |     end_pad = x[-1] * np.ones(int(offset), dtype=np.float32)
1974 |     x = np.concatenate((start_pad, x, end_pad), axis=0)
1975 | 
1976 |     if fs < target_fs:
1977 |         raise ValueError("CASE NOT HANDLED IN harvest_get_downsampled_signal")
1978 |     else:
1979 |         try:
1980 |             y0 = sg.decimate(x, int(decimation_ratio), 3, zero_phase=True)
1981 |         except:
1982 |             y0 = sg.decimate(x, int(decimation_ratio), 3)
1983 |         actual_fs = fs / decimation_ratio
1984 |         y = y0[int(offset / decimation_ratio):-int(offset / decimation_ratio)]
1985 |     y = y - np.mean(y)
1986 |     return y, actual_fs
1987 | 
1988 | 
1989 | def harvest_get_raw_f0_candidates(number_of_frames, boundary_f0_list,
1990 |       y_length, temporal_positions, actual_fs, y_spectrum, f0_floor,
1991 |       f0_ceil):
1992 |     raw_f0_candidates = np.zeros((len(boundary_f0_list), number_of_frames), dtype=np.float32)
1993 |     for i in range(len(boundary_f0_list)):
1994 |         raw_f0_candidates[i, :] = harvest_get_f0_candidate_from_raw_event(
1995 |                 boundary_f0_list[i], actual_fs, y_spectrum, y_length,
1996 |                 temporal_positions, f0_floor, f0_ceil)
1997 |     return raw_f0_candidates
1998 | 
1999 | 
2000 | def harvest_nuttall(N):
2001 |     t = np.arange(0, N) * 2 * np.pi / (N - 1)
2002 |     coefs = np.array([0.355768, -0.487396, 0.144232, -0.012604])
2003 |     window = np.cos(t[:, None].dot(np.array([0., 1., 2., 3.])[None])).dot( coefs[:, None])
2004 |     # 1D window...
2005 |     return window.ravel()
2006 | 
2007 | 
2008 | def harvest_get_f0_candidate_from_raw_event(boundary_f0,
2009 |         fs, y_spectrum, y_length, temporal_positions, f0_floor,
2010 |         f0_ceil):
2011 |     filter_length_half = int(np.round(fs / boundary_f0 * 2))
2012 |     band_pass_filter_base = harvest_nuttall(filter_length_half * 2 + 1)
2013 |     shifter = np.cos(2 * np.pi * boundary_f0 * np.arange(-filter_length_half, filter_length_half + 1) / float(fs))
2014 |     band_pass_filter = band_pass_filter_base * shifter
2015 | 
2016 |     index_bias = filter_length_half
2017 |     # possible numerical issues if 32 bit
2018 |     spectrum_low_pass_filter = np.fft.fft(band_pass_filter.astype("float64"), len(y_spectrum))
2019 |     filtered_signal = np.real(np.fft.ifft(spectrum_low_pass_filter * y_spectrum))
2020 |     index_bias = filter_length_half + 1
2021 |     filtered_signal = filtered_signal[index_bias + np.arange(y_length).astype("int32")]
2022 |     negative_zero_cross = harvest_zero_crossing_engine(filtered_signal, fs)
2023 |     positive_zero_cross = harvest_zero_crossing_engine(-filtered_signal, fs)
2024 |     d_filtered_signal = filtered_signal[1:] - filtered_signal[:-1]
2025 |     peak = harvest_zero_crossing_engine(d_filtered_signal, fs)
2026 |     dip = harvest_zero_crossing_engine(-d_filtered_signal, fs)
2027 |     f0_candidate = harvest_get_f0_candidate_contour(negative_zero_cross,
2028 |             positive_zero_cross, peak, dip, temporal_positions)
2029 |     f0_candidate[f0_candidate > (boundary_f0 * 1.1)] = 0.
2030 |     f0_candidate[f0_candidate < (boundary_f0 * .9)] = 0.
2031 |     f0_candidate[f0_candidate > f0_ceil] = 0.
2032 |     f0_candidate[f0_candidate < f0_floor] = 0.
2033 |     return f0_candidate
2034 | 
2035 | 
2036 | def harvest_get_f0_candidate_contour(negative_zero_cross_tup,
2037 |         positive_zero_cross_tup, peak_tup, dip_tup, temporal_positions):
2038 |     # 0 is inteval locations
2039 |     # 1 is interval based f0
2040 |     usable_channel = max(0, len(negative_zero_cross_tup[0]) - 2)
2041 |     usable_channel *= max(0, len(positive_zero_cross_tup[0]) - 2)
2042 |     usable_channel *= max(0, len(peak_tup[0]) - 2)
2043 |     usable_channel *= max(0, len(dip_tup[0]) - 2)
2044 |     if usable_channel > 0:
2045 |         interpolated_f0_list = np.zeros((4, len(temporal_positions)))
2046 |         nz = interp1d(negative_zero_cross_tup[0], negative_zero_cross_tup[1],
2047 |                  kind="linear", bounds_error=False, fill_value="extrapolate")
2048 |         pz = interp1d(positive_zero_cross_tup[0], positive_zero_cross_tup[1],
2049 |                  kind="linear", bounds_error=False, fill_value="extrapolate")
2050 |         pkz = interp1d(peak_tup[0], peak_tup[1],
2051 |                   kind="linear", bounds_error=False, fill_value="extrapolate")
2052 |         dz = interp1d(dip_tup[0], dip_tup[1],
2053 |                   kind="linear", bounds_error=False, fill_value="extrapolate")
2054 |         interpolated_f0_list[0, :] = nz(temporal_positions)
2055 |         interpolated_f0_list[1, :] = pz(temporal_positions)
2056 |         interpolated_f0_list[2, :] = pkz(temporal_positions)
2057 |         interpolated_f0_list[3, :] = dz(temporal_positions)
2058 |         f0_candidate = np.mean(interpolated_f0_list, axis=0)
2059 |     else:
2060 |         f0_candidate = temporal_positions * 0
2061 |     return f0_candidate
2062 | 
2063 | 
2064 | def harvest_zero_crossing_engine(x, fs, debug=False):
2065 |     # negative zero crossing, going from positive to negative
2066 |     x_shift = x.copy()
2067 |     x_shift[:-1] = x_shift[1:]
2068 |     x_shift[-1] = x[-1]
2069 |     # +1 here to avoid edge case at 0
2070 |     points = np.arange(len(x)) + 1
2071 |     negative_going_points = points * ((x_shift * x < 0) * (x_shift < x))
2072 |     edge_list = negative_going_points[negative_going_points > 0]
2073 |     # -1 to correct index
2074 |     fine_edge_list = edge_list - x[edge_list - 1] / (x[edge_list] - x[edge_list - 1]).astype("float32")
2075 |     interval_locations = (fine_edge_list[:-1] + fine_edge_list[1:]) / float(2) / fs
2076 |     interval_based_f0 = float(fs) / (fine_edge_list[1:] - fine_edge_list[:-1])
2077 |     return interval_locations, interval_based_f0
2078 | 
2079 | 
2080 | def harvest_detect_official_f0_candidates(raw_f0_candidates):
2081 |     number_of_channels, number_of_frames = raw_f0_candidates.shape
2082 |     f0_candidates = np.zeros((int(np.round(number_of_channels / 10.)), number_of_frames))
2083 |     number_of_candidates = 0
2084 |     threshold = 10
2085 |     for i in range(number_of_frames):
2086 |         tmp = raw_f0_candidates[:, i].copy()
2087 |         tmp[tmp > 0] = 1.
2088 |         tmp[0] = 0
2089 |         tmp[-1] = 0
2090 |         tmp = tmp[1:] - tmp[:-1]
2091 |         st = np.where(tmp == 1)[0]
2092 |         ed = np.where(tmp == -1)[0]
2093 |         count = 0
2094 |         for j in range(len(st)):
2095 |             dif = ed[j] - st[j]
2096 |             if dif >= threshold:
2097 |                 tmp_f0 = raw_f0_candidates[st[j] + 1: ed[j] + 1, i]
2098 |                 f0_candidates[count, i] = np.mean(tmp_f0)
2099 |                 count = count + 1
2100 |         number_of_candidates = max(number_of_candidates, count)
2101 |     return f0_candidates, number_of_candidates
2102 | 
2103 | 
2104 | def harvest_overlap_f0_candidates(f0_candidates, max_number_of_f0_candidates):
2105 |     n = 3 # this is the optimized parameter... apparently
2106 |     number_of_candidates = n * 2 + 1
2107 |     new_f0_candidates = f0_candidates[number_of_candidates, :].copy()
2108 |     new_f0_candidates = new_f0_candidates[None]
2109 |     # hack to bypass magic matlab-isms of allocating when indexing OOB
2110 |     new_f0_candidates = np.vstack([new_f0_candidates] + (new_f0_candidates.shape[-1] - 1) * [np.zeros_like(new_f0_candidates)])
2111 |     # this indexing is megagross, possible source for bugs!
2112 |     all_nonzero = []
2113 |     for i in range(number_of_candidates):
2114 |         st = max(-(i - n), 0)
2115 |         ed = min(-(i - n), 0)
2116 |         f1_b = np.arange(max_number_of_f0_candidates).astype("int32")
2117 |         f1 = f1_b + int(i * max_number_of_f0_candidates)
2118 |         all_nonzero = list(set(all_nonzero + list(f1)))
2119 |         f2 = None if ed == 0 else ed
2120 |         f3 = -ed
2121 |         f4 = None if st == 0 else -st
2122 |         new_f0_candidates[f1, st:f2] = f0_candidates[f1_b, f3:f4]
2123 |     new_f0_candidates = new_f0_candidates[all_nonzero, :]
2124 |     return new_f0_candidates
2125 | 
2126 | 
2127 | def harvest_refine_candidates(x, fs, temporal_positions, f0_candidates,
2128 |         f0_floor, f0_ceil):
2129 |     new_f0_candidates = f0_candidates.copy()
2130 |     f0_scores = f0_candidates * 0.
2131 |     for i in range(len(temporal_positions)):
2132 |         for j in range(len(f0_candidates)):
2133 |             tmp_f0 = f0_candidates[j, i]
2134 |             if tmp_f0 == 0:
2135 |                 continue
2136 |             res = harvest_get_refined_f0(x, fs, temporal_positions[i],
2137 |                     tmp_f0, f0_floor, f0_ceil)
2138 |             new_f0_candidates[j, i] = res[0]
2139 |             f0_scores[j, i] = res[1]
2140 |     return new_f0_candidates, f0_scores
2141 | 
2142 | 
2143 | def harvest_get_refined_f0(x, fs, current_time, current_f0, f0_floor,
2144 |         f0_ceil):
2145 |     half_window_length = np.ceil(3. * fs / current_f0 / 2.)
2146 |     window_length_in_time = (2. * half_window_length + 1) / float(fs)
2147 |     base_time = np.arange(-half_window_length, half_window_length + 1) / float(fs)
2148 |     fft_size = int(2 ** np.ceil(np.log2((half_window_length * 2 + 1)) + 1))
2149 |     frequency_axis = np.arange(fft_size) / fft_size * float(fs)
2150 | 
2151 |     base_index = np.round((current_time + base_time) * fs + 0.001)
2152 |     index_time = (base_index - 1) / float(fs)
2153 |     window_time = index_time - current_time
2154 |     part1 = np.cos(2 * np.pi * window_time / window_length_in_time)
2155 |     part2 = np.cos(4 * np.pi * window_time / window_length_in_time)
2156 |     main_window = 0.42 + 0.5 * part1 + 0.08 * part2
2157 |     ext = np.zeros((len(main_window) + 2))
2158 |     ext[1:-1] = main_window
2159 |     diff_window = -((ext[1:-1] - ext[:-2]) + (ext[2:] - ext[1:-1])) / float(2)
2160 |     safe_index = np.maximum(1, np.minimum(len(x), base_index)).astype("int32") - 1
2161 |     spectrum = np.fft.fft(x[safe_index] * main_window, fft_size)
2162 |     diff_spectrum = np.fft.fft(x[safe_index] * diff_window, fft_size)
2163 |     numerator_i = np.real(spectrum) * np.imag(diff_spectrum) - np.imag(spectrum) * np.real(diff_spectrum)
2164 |     power_spectrum = np.abs(spectrum) ** 2
2165 |     instantaneous_frequency = frequency_axis + numerator_i / power_spectrum * float(fs) / 2. / np.pi
2166 | 
2167 |     number_of_harmonics = int(min(np.floor(float(fs) / 2. / current_f0), 6.))
2168 |     harmonics_index = np.arange(number_of_harmonics) + 1
2169 |     index_list = np.round(current_f0 * fft_size / fs * harmonics_index).astype("int32")
2170 |     instantaneous_frequency_list = instantaneous_frequency[index_list]
2171 |     amplitude_list = np.sqrt(power_spectrum[index_list])
2172 |     refined_f0 = np.sum(amplitude_list * instantaneous_frequency_list)
2173 |     refined_f0 /= np.sum(amplitude_list * harmonics_index.astype("float32"))
2174 | 
2175 |     variation = np.abs(((instantaneous_frequency_list / harmonics_index.astype("float32")) - current_f0) / float(current_f0))
2176 |     refined_score = 1. / (0.000000000001 + np.mean(variation))
2177 | 
2178 |     if (refined_f0 < f0_floor) or (refined_f0 > f0_ceil) or (refined_score < 2.5):
2179 |         refined_f0 = 0.
2180 |         redined_score = 0.
2181 |     return refined_f0, refined_score
2182 | 
2183 | 
2184 | def harvest_select_best_f0(reference_f0, f0_candidates, allowed_range):
2185 |     best_f0 = 0
2186 |     best_error = allowed_range
2187 | 
2188 |     for i in range(len(f0_candidates)):
2189 |         tmp = np.abs(reference_f0 - f0_candidates[i]) / reference_f0
2190 |         if tmp > best_error:
2191 |             continue
2192 |         best_f0 = f0_candidates[i]
2193 |         best_error = tmp
2194 |     return best_f0, best_error
2195 | 
2196 | 
2197 | def harvest_remove_unreliable_candidates(f0_candidates, f0_scores):
2198 |     new_f0_candidates = f0_candidates.copy()
2199 |     new_f0_scores = f0_scores.copy()
2200 |     threshold = 0.05
2201 |     f0_length = f0_candidates.shape[1]
2202 |     number_of_candidates = len(f0_candidates)
2203 | 
2204 |     for i in range(1, f0_length - 1):
2205 |         for j in range(number_of_candidates):
2206 |             reference_f0 = f0_candidates[j, i]
2207 |             if reference_f0 == 0:
2208 |                 continue
2209 |             _, min_error1 = harvest_select_best_f0(reference_f0, f0_candidates[:, i + 1], 1)
2210 |             _, min_error2 = harvest_select_best_f0(reference_f0, f0_candidates[:, i - 1], 1)
2211 |             min_error = min([min_error1, min_error2])
2212 |             if min_error > threshold:
2213 |                 new_f0_candidates[j, i] = 0
2214 |                 new_f0_scores[j, i] = 0
2215 |     return new_f0_candidates, new_f0_scores
2216 | 
2217 | 
2218 | def harvest_search_f0_base(f0_candidates, f0_scores):
2219 |     f0_base = f0_candidates[0, :] * 0.
2220 |     for i in range(len(f0_base)):
2221 |         max_index = np.argmax(f0_scores[:, i])
2222 |         f0_base[i] = f0_candidates[max_index, i]
2223 |     return f0_base
2224 | 
2225 | 
2226 | def harvest_fix_step_1(f0_base, allowed_range):
2227 |     # Step 1: Rapid change of f0 contour is replaced by 0
2228 |     f0_step1 = f0_base.copy()
2229 |     f0_step1[0] = 0.
2230 |     f0_step1[1] = 0.
2231 | 
2232 |     for i in range(2, len(f0_base)):
2233 |         if f0_base[i] == 0:
2234 |             continue
2235 |         reference_f0 = f0_base[i - 1] * 2 - f0_base[i - 2]
2236 |         c1 = np.abs((f0_base[i] - reference_f0) / reference_f0) > allowed_range
2237 |         c2 = np.abs((f0_base[i] - f0_base[i - 1]) / f0_base[i - 1]) > allowed_range
2238 |         if c1 and c2:
2239 |             f0_step1[i] = 0.
2240 |     return f0_step1
2241 | 
2242 | 
2243 | def harvest_fix_step_2(f0_step1, voice_range_minimum):
2244 |     f0_step2 = f0_step1.copy()
2245 |     boundary_list = harvest_get_boundary_list(f0_step1)
2246 | 
2247 |     for i in range(1, int(len(boundary_list) / 2.) + 1):
2248 |         distance = boundary_list[(2 * i) - 1] - boundary_list[(2 * i) - 2]
2249 |         if distance < voice_range_minimum:
2250 |             # need one more due to range not including last index
2251 |             lb = boundary_list[(2 * i) - 2]
2252 |             ub = boundary_list[(2 * i) - 1] + 1
2253 |             f0_step2[lb:ub] = 0.
2254 |     return f0_step2
2255 | 
2256 | 
2257 | def harvest_fix_step_3(f0_step2, f0_candidates, allowed_range, f0_scores):
2258 |     f0_step3 = f0_step2.copy()
2259 |     boundary_list = harvest_get_boundary_list(f0_step2)
2260 |     multichannel_f0 = harvest_get_multichannel_f0(f0_step2, boundary_list)
2261 |     rrange = np.zeros((int(len(boundary_list) / 2), 2))
2262 |     threshold1 = 100
2263 |     threshold2 = 2200
2264 |     count = 0
2265 |     for i in range(1, int(len(boundary_list) / 2) + 1):
2266 |         # changed to 2 * i - 2
2267 |         extended_f0, tmp_range_1 = harvest_extend_f0(multichannel_f0[i - 1, :],
2268 |                 boundary_list[(2 * i) - 1],
2269 |                 min([len(f0_step2) - 1, boundary_list[(2 * i) - 1] + threshold1]),
2270 |                 1, f0_candidates, allowed_range)
2271 |         tmp_f0_sequence, tmp_range_0 = harvest_extend_f0(extended_f0,
2272 |                 boundary_list[(2 * i) - 2],
2273 |                 max([2, boundary_list[(2 * i) - 2] - threshold1]), -1,
2274 |                 f0_candidates, allowed_range)
2275 | 
2276 |         mean_f0 = np.mean(tmp_f0_sequence[tmp_range_0 : tmp_range_1 + 1])
2277 |         if threshold2 / mean_f0 < (tmp_range_1 - tmp_range_0):
2278 |             multichannel_f0[count, :] = tmp_f0_sequence
2279 |             rrange[count, :] = np.array([tmp_range_0, tmp_range_1])
2280 |             count = count + 1
2281 |     if count > 0:
2282 |         multichannel_f0 = multichannel_f0[:count, :]
2283 |         rrange = rrange[:count, :]
2284 |         f0_step3 = harvest_merge_f0(multichannel_f0, rrange, f0_candidates,
2285 |                 f0_scores)
2286 |     return f0_step3
2287 | 
2288 | 
2289 | def harvest_merge_f0(multichannel_f0, rrange, f0_candidates, f0_scores):
2290 |     number_of_channels = len(multichannel_f0)
2291 |     sorted_order = np.argsort(rrange[:, 0])
2292 |     f0 = multichannel_f0[sorted_order[0], :]
2293 |     for i in range(1, number_of_channels):
2294 |         if rrange[sorted_order[i], 0] - rrange[sorted_order[0], 1] > 0:
2295 |             # no overlapping
2296 |             f0[int(rrange[sorted_order[i], 0]):int(rrange[sorted_order[i], 1])] = multichannel_f0[sorted_order[i], int(rrange[sorted_order[i], 0]):int(rrange[sorted_order[i], 1])]
2297 |             cp = rrange.copy()
2298 |             rrange[sorted_order[0], 0] = cp[sorted_order[i], 0]
2299 |             rrange[sorted_order[0], 1] = cp[sorted_order[i], 1]
2300 |         else:
2301 |             cp = rrange.copy()
2302 |             res = harvest_merge_f0_sub(f0, cp[sorted_order[0], 0],
2303 |                     cp[sorted_order[0], 1],
2304 |                     multichannel_f0[sorted_order[i], :],
2305 |                     cp[sorted_order[i], 0],
2306 |                     cp[sorted_order[i], 1], f0_candidates, f0_scores)
2307 |             f0 = res[0]
2308 |             rrange[sorted_order[0], 1] = res[1]
2309 |     return f0
2310 | 
2311 | 
2312 | def harvest_merge_f0_sub(f0_1, st1, ed1, f0_2, st2, ed2, f0_candidates,
2313 |         f0_scores):
2314 |     merged_f0 = f0_1
2315 |     if (st1 <= st2) and (ed1 >= ed2):
2316 |         new_ed = ed1
2317 |         return merged_f0, new_ed
2318 |     new_ed = ed2
2319 | 
2320 |     score1 = 0.
2321 |     score2 = 0.
2322 |     for i in range(int(st2), int(ed1) + 1):
2323 |         score1 = score1 + harvest_serach_score(f0_1[i], f0_candidates[:, i], f0_scores[:, i])
2324 |         score2 = score2 + harvest_serach_score(f0_2[i], f0_candidates[:, i], f0_scores[:, i])
2325 |     if score1 > score2:
2326 |         merged_f0[int(ed1):int(ed2) + 1] = f0_2[int(ed1):int(ed2) + 1]
2327 |     else:
2328 |         merged_f0[int(st2):int(ed2) + 1] = f0_2[int(st2):int(ed2) + 1]
2329 |     return merged_f0, new_ed
2330 | 
2331 | 
2332 | def harvest_serach_score(f0, f0_candidates, f0_scores):
2333 |     score = 0
2334 |     for i in range(len(f0_candidates)):
2335 |         if (f0 == f0_candidates[i]) and (score < f0_scores[i]):
2336 |             score = f0_scores[i]
2337 |     return score
2338 | 
2339 | 
2340 | def harvest_extend_f0(f0, origin, last_point, shift, f0_candidates,
2341 |         allowed_range):
2342 |     threshold = 4
2343 |     extended_f0 = f0.copy()
2344 |     tmp_f0 = extended_f0[origin]
2345 |     shifted_origin = origin
2346 |     count = 0
2347 | 
2348 |     for i in np.arange(origin, last_point + shift, shift):
2349 |         # off by 1 issues
2350 |         if (i + shift) >= f0_candidates.shape[1]:
2351 |             continue
2352 |         bf0, bs = harvest_select_best_f0(tmp_f0,
2353 |                 f0_candidates[:, i + shift], allowed_range)
2354 |         extended_f0[i + shift] = bf0
2355 |         if extended_f0[i + shift] != 0:
2356 |             tmp_f0 = extended_f0[i + shift]
2357 |             count = 0
2358 |             shifted_origin = i + shift
2359 |         else:
2360 |             count = count + 1
2361 |         if count == threshold:
2362 |             break
2363 |     return extended_f0, shifted_origin
2364 | 
2365 | 
2366 | def harvest_get_multichannel_f0(f0, boundary_list):
2367 |     multichannel_f0 = np.zeros((int(len(boundary_list) / 2), len(f0)))
2368 |     for i in range(1, int(len(boundary_list) / 2) + 1):
2369 |         sl = boundary_list[(2 * i) - 2]
2370 |         el = boundary_list[(2 * i) - 1] + 1
2371 |         multichannel_f0[i - 1, sl:el] = f0[sl:el]
2372 |     return multichannel_f0
2373 | 
2374 | 
2375 | def harvest_get_boundary_list(f0):
2376 |     vuv = f0.copy()
2377 |     vuv[vuv != 0] = 1.
2378 |     vuv[0] = 0
2379 |     vuv[-1] = 0
2380 |     diff_vuv = vuv[1:] - vuv[:-1]
2381 |     boundary_list = np.where(diff_vuv != 0)[0]
2382 |     boundary_list[::2] = boundary_list[::2] + 1
2383 |     return boundary_list
2384 | 
2385 | 
2386 | def harvest_fix_step_4(f0_step3, threshold):
2387 |     f0_step4 = f0_step3.copy()
2388 |     boundary_list = harvest_get_boundary_list(f0_step3)
2389 | 
2390 |     for i in range(1, int(len(boundary_list) / 2.)):
2391 |         distance = boundary_list[(2 * i)] - boundary_list[(2 * i) - 1] - 1
2392 |         if distance >= threshold:
2393 |             continue
2394 |         boundary0 = f0_step3[boundary_list[(2 * i) - 1]] + 1
2395 |         boundary1 = f0_step3[boundary_list[(2 * i)]] - 1
2396 |         coefficient = (boundary1 - boundary0) / float((distance + 1))
2397 |         count = 1
2398 |         st = boundary_list[(2 * i) - 1] + 1
2399 |         ed = boundary_list[(2 * i)]
2400 |         for j in range(st, ed):
2401 |             f0_step4[j] = boundary0 + coefficient * count
2402 |             count = count + 1
2403 |     return f0_step4
2404 | 
2405 | 
2406 | def harvest_fix_f0_contour(f0_candidates, f0_scores):
2407 |     f0_base = harvest_search_f0_base(f0_candidates, f0_scores)
2408 |     f0_step1 = harvest_fix_step_1(f0_base, 0.008) # optimized?
2409 |     f0_step2 = harvest_fix_step_2(f0_step1, 6) # optimized?
2410 |     f0_step3 = harvest_fix_step_3(f0_step2, f0_candidates, 0.18, f0_scores) # optimized?
2411 |     f0 = harvest_fix_step_4(f0_step3, 9) # optimized
2412 |     vuv = f0.copy()
2413 |     vuv[vuv != 0] = 1.
2414 |     return f0, vuv
2415 | 
2416 | 
2417 | def harvest_filter_f0_contour(f0, st, ed, b, a):
2418 |     smoothed_f0 = f0.copy()
2419 |     smoothed_f0[:st] = smoothed_f0[st]
2420 |     smoothed_f0[ed + 1:] = smoothed_f0[ed]
2421 |     aaa = sg.lfilter(b, a, smoothed_f0)
2422 |     bbb = sg.lfilter(b, a, aaa[::-1])
2423 |     smoothed_f0 = bbb[::-1].copy()
2424 |     smoothed_f0[:st] = 0.
2425 |     smoothed_f0[ed + 1:] = 0.
2426 |     return smoothed_f0
2427 | 
2428 | 
2429 | def harvest_smooth_f0_contour(f0):
2430 |     b = np.array([0.0078202080334971724, 0.015640416066994345, 0.0078202080334971724])
2431 |     a = np.array([1.0, -1.7347257688092754, 0.76600660094326412])
2432 |     smoothed_f0 = np.concatenate([np.zeros(300,), f0, np.zeros(300,)])
2433 |     boundary_list = harvest_get_boundary_list(smoothed_f0)
2434 |     multichannel_f0 = harvest_get_multichannel_f0(smoothed_f0, boundary_list)
2435 |     for i in range(1, int(len(boundary_list) / 2) + 1):
2436 |         tmp_f0_contour = harvest_filter_f0_contour(multichannel_f0[i - 1, :],
2437 |                 boundary_list[(2 * i) - 2], boundary_list[(2 * i) - 1], b, a)
2438 |         st = boundary_list[(2 * i) - 2]
2439 |         ed = boundary_list[(2 * i) - 1] + 1
2440 |         smoothed_f0[st:ed] = tmp_f0_contour[st:ed]
2441 |     smoothed_f0 = smoothed_f0[300:-300]
2442 |     return smoothed_f0
2443 | 
2444 | 
2445 | def _world_get_temporal_positions(x_len, fs):
2446 |     frame_period = 5
2447 |     basic_frame_period = 1
2448 |     basic_temporal_positions = np.arange(0, x_len / float(fs), basic_frame_period / float(1000))
2449 |     temporal_positions = np.arange(0,
2450 |             x_len / float(fs),
2451 |             frame_period / float(1000))
2452 |     return basic_temporal_positions, temporal_positions
2453 | 
2454 | 
2455 | def harvest(x, fs):
2456 |     f0_floor = 71
2457 |     f0_ceil = 800
2458 |     target_fs = 8000
2459 |     channels_in_octave = 40.
2460 |     basic_temporal_positions, temporal_positions = _world_get_temporal_positions(len(x), fs)
2461 |     adjusted_f0_floor = f0_floor * 0.9
2462 |     adjusted_f0_ceil = f0_ceil * 1.1
2463 |     boundary_f0_list = np.arange(1, np.ceil(np.log2(adjusted_f0_ceil / adjusted_f0_floor) * channels_in_octave) + 1) / float(channels_in_octave)
2464 |     boundary_f0_list = adjusted_f0_floor * 2.0 ** boundary_f0_list
2465 |     y, actual_fs = harvest_get_downsampled_signal(x, fs, target_fs)
2466 |     fft_size = 2. ** np.ceil(np.log2(len(y) + np.round(fs / f0_floor * 4) + 1))
2467 |     y_spectrum = np.fft.fft(y, int(fft_size))
2468 |     raw_f0_candidates = harvest_get_raw_f0_candidates(
2469 |         len(basic_temporal_positions),
2470 |         boundary_f0_list, len(y), basic_temporal_positions, actual_fs,
2471 |         y_spectrum, f0_floor, f0_ceil)
2472 | 
2473 |     f0_candidates, number_of_candidates = harvest_detect_official_f0_candidates(raw_f0_candidates)
2474 |     f0_candidates = harvest_overlap_f0_candidates(f0_candidates, number_of_candidates)
2475 |     f0_candidates, f0_scores = harvest_refine_candidates(y, actual_fs,
2476 |             basic_temporal_positions, f0_candidates, f0_floor, f0_ceil)
2477 | 
2478 |     f0_candidates, f0_scores = harvest_remove_unreliable_candidates(f0_candidates, f0_scores)
2479 | 
2480 |     connected_f0, vuv = harvest_fix_f0_contour(f0_candidates, f0_scores)
2481 |     smoothed_f0 = harvest_smooth_f0_contour(connected_f0)
2482 |     idx = np.minimum(len(smoothed_f0) - 1, np.round(temporal_positions * 1000)).astype("int32")
2483 |     f0 = smoothed_f0[idx]
2484 |     vuv = vuv[idx]
2485 |     f0_candidates = f0_candidates
2486 |     return temporal_positions, f0, vuv, f0_candidates
2487 | 
2488 | 
2489 | def cheaptrick_get_windowed_waveform(x, fs, current_f0, current_position):
2490 |     half_window_length = np.round(1.5 * fs / float(current_f0))
2491 |     base_index = np.arange(-half_window_length, half_window_length + 1)
2492 |     index = np.round(current_position * fs + 0.001) + base_index + 1
2493 |     safe_index = np.minimum(len(x), np.maximum(1, np.round(index))).astype("int32")
2494 |     safe_index = safe_index - 1
2495 |     segment = x[safe_index]
2496 |     time_axis = base_index / float(fs) / 1.5
2497 |     window1 = 0.5 * np.cos(np.pi * time_axis * float(current_f0)) + 0.5
2498 |     window1 = window1 / np.sqrt(np.sum(window1 ** 2))
2499 |     waveform = segment * window1 - window1 * np.mean(segment * window1) / np.mean(window1)
2500 |     return waveform
2501 | 
2502 | 
2503 | def cheaptrick_get_power_spectrum(waveform, fs, fft_size, f0):
2504 |     power_spectrum = np.abs(np.fft.fft(waveform, fft_size)) ** 2
2505 |     frequency_axis = np.arange(fft_size) / float(fft_size) * float(fs)
2506 |     ind = frequency_axis < (f0 + fs / fft_size)
2507 |     low_frequency_axis = frequency_axis[ind]
2508 |     low_frequency_replica = interp1d(f0 - low_frequency_axis,
2509 |             power_spectrum[ind], kind="linear",
2510 |             fill_value="extrapolate")(low_frequency_axis)
2511 |     p1 = low_frequency_replica[(frequency_axis < f0)[:len(low_frequency_replica)]]
2512 |     p2 = power_spectrum[(frequency_axis < f0)[:len(power_spectrum)]]
2513 |     power_spectrum[frequency_axis < f0] = p1 + p2
2514 |     lb1 = int(fft_size / 2) + 1
2515 |     lb2 = 1
2516 |     ub2 = int(fft_size / 2)
2517 |     power_spectrum[lb1:] = power_spectrum[lb2:ub2][::-1]
2518 |     return power_spectrum
2519 | 
2520 | 
2521 | def cheaptrick_linear_smoothing(power_spectrum, f0, fs, fft_size):
2522 |     double_frequency_axis = np.arange(2 * fft_size) / float(fft_size ) * fs - fs
2523 |     double_spectrum = np.concatenate([power_spectrum, power_spectrum])
2524 | 
2525 |     double_segment = np.cumsum(double_spectrum * (fs / float(fft_size)))
2526 |     center_frequency = np.arange(int(fft_size / 2) + 1) / float(fft_size ) * fs
2527 |     low_levels = cheaptrick_interp1h(double_frequency_axis + fs / float(fft_size) / 2.,
2528 |             double_segment, center_frequency - f0 / 3.)
2529 |     high_levels = cheaptrick_interp1h(double_frequency_axis + fs / float(fft_size) / 2.,
2530 |             double_segment, center_frequency + f0 / 3.)
2531 |     smoothed_spectrum = (high_levels - low_levels) * 1.5 / f0
2532 |     return smoothed_spectrum
2533 | 
2534 | 
2535 | def cheaptrick_interp1h(x, y, xi):
2536 |     delta_x = float(x[1] - x[0])
2537 |     xi = np.maximum(x[0], np.minimum(x[-1], xi))
2538 |     xi_base = (np.floor((xi - x[0]) / delta_x)).astype("int32")
2539 |     xi_fraction = (xi - x[0]) / delta_x - xi_base
2540 |     delta_y = np.zeros_like(y)
2541 |     delta_y[:-1] = y[1:] - y[:-1]
2542 |     yi = y[xi_base] + delta_y[xi_base] * xi_fraction
2543 |     return yi
2544 | 
2545 | 
2546 | def cheaptrick_smoothing_with_recovery(smoothed_spectrum, f0, fs, fft_size, q1):
2547 |     quefrency_axis = np.arange(fft_size) / float(fs)
2548 |     # 0 is NaN
2549 |     smoothing_lifter = np.sin(np.pi * f0 * quefrency_axis) / (np.pi * f0 * quefrency_axis)
2550 |     p = smoothing_lifter[1:int(fft_size / 2)][::-1].copy()
2551 |     smoothing_lifter[int(fft_size / 2) + 1:] = p
2552 |     smoothing_lifter[0] = 1.
2553 |     compensation_lifter = (1 - 2. * q1) + 2. * q1 * np.cos(2 * np.pi * quefrency_axis * f0)
2554 |     p = compensation_lifter[1:int(fft_size / 2)][::-1].copy()
2555 |     compensation_lifter[int(fft_size / 2) + 1:] = p
2556 |     tandem_cepstrum = np.fft.fft(np.log(smoothed_spectrum))
2557 |     tmp_spectral_envelope = np.exp(np.real(np.fft.ifft(tandem_cepstrum * smoothing_lifter * compensation_lifter)))
2558 |     spectral_envelope = tmp_spectral_envelope[:int(fft_size / 2) + 1]
2559 |     return spectral_envelope
2560 | 
2561 | 
2562 | def cheaptrick_estimate_one_slice(x, fs, current_f0,
2563 |     current_position, fft_size, q1):
2564 |     waveform = cheaptrick_get_windowed_waveform(x, fs, current_f0,
2565 |         current_position)
2566 |     power_spectrum = cheaptrick_get_power_spectrum(waveform, fs, fft_size,
2567 |             current_f0)
2568 |     smoothed_spectrum = cheaptrick_linear_smoothing(power_spectrum, current_f0,
2569 |             fs, fft_size)
2570 |     comb_spectrum = np.concatenate([smoothed_spectrum, smoothed_spectrum[1:-1][::-1]])
2571 |     spectral_envelope = cheaptrick_smoothing_with_recovery(comb_spectrum,
2572 |             current_f0, fs, fft_size, q1)
2573 |     return spectral_envelope
2574 | 
2575 | 
2576 | def cheaptrick(x, fs, temporal_positions, f0_sequence,
2577 |         vuv, fftlen="auto", q1=-0.15):
2578 |     f0_sequence = f0_sequence.copy()
2579 |     f0_low_limit = 71
2580 |     default_f0 = 500
2581 |     if fftlen == "auto":
2582 |         fftlen = int(2 ** np.ceil(np.log2(3. * float(fs) / f0_low_limit + 1)))
2583 |     #raise ValueError("Only fftlen auto currently supported")
2584 |     fft_size = fftlen
2585 |     f0_low_limit = fs * 3.0 / (fft_size - 3.0)
2586 |     f0_sequence[vuv == 0] = default_f0
2587 |     spectrogram = np.zeros((int(fft_size / 2.) + 1, len(f0_sequence)))
2588 |     for i in range(len(f0_sequence)):
2589 |         if f0_sequence[i] < f0_low_limit:
2590 |             f0_sequence[i] = default_f0
2591 |         spectrogram[:, i] = cheaptrick_estimate_one_slice(x, fs, f0_sequence[i],
2592 |                 temporal_positions[i], fft_size, q1)
2593 |     return temporal_positions, spectrogram.T, fs
2594 | 
2595 | 
2596 | def d4c_love_train(x, fs, current_f0, current_position, threshold):
2597 |     vuv = 0
2598 |     if current_f0 == 0:
2599 |         return vuv
2600 |     lowest_f0 = 40
2601 |     current_f0 = max([current_f0, lowest_f0])
2602 |     fft_size = int(2 ** np.ceil(np.log2(3. * fs / lowest_f0 + 1)))
2603 |     boundary0 = int(np.ceil(100 / (float(fs) / fft_size)))
2604 |     boundary1 = int(np.ceil(4000 / (float(fs) / fft_size)))
2605 |     boundary2 = int(np.ceil(7900 / (float(fs) / fft_size)))
2606 | 
2607 |     waveform = d4c_get_windowed_waveform(x, fs, current_f0, current_position,
2608 |             1.5, 2)
2609 |     power_spectrum = np.abs(np.fft.fft(waveform, int(fft_size)) ** 2)
2610 |     power_spectrum[0:boundary0 + 1] = 0.
2611 |     cumulative_spectrum = np.cumsum(power_spectrum)
2612 |     if (cumulative_spectrum[boundary1] / cumulative_spectrum[boundary2]) > threshold:
2613 |         vuv = 1
2614 |     return vuv
2615 | 
2616 | 
2617 | def d4c_get_windowed_waveform(x, fs, current_f0, current_position, half_length,
2618 |         window_type):
2619 |     half_window_length = int(np.round(half_length * fs / current_f0))
2620 |     base_index = np.arange(-half_window_length, half_window_length + 1)
2621 |     index = np.round(current_position * fs + 0.001) + base_index + 1
2622 |     safe_index = np.minimum(len(x), np.maximum(1, np.round(index))).astype("int32") - 1
2623 | 
2624 |     segment = x[safe_index]
2625 |     time_axis = base_index / float(fs) / float(half_length)
2626 |     if window_type == 1:
2627 |         window1 = 0.5 * np.cos(np.pi * time_axis * current_f0) + 0.5
2628 |     elif window_type == 2:
2629 |         window1 = 0.08 * np.cos(np.pi * time_axis * current_f0 * 2)
2630 |         window1 += 0.5 * np.cos(np.pi * time_axis * current_f0) + 0.42
2631 |     else:
2632 |         raise ValueError("Unknown window type")
2633 |     waveform = segment * window1 - window1 * np.mean(segment * window1) / np.mean(window1)
2634 |     return waveform
2635 | 
2636 | 
2637 | def d4c_get_static_centroid(x, fs, current_f0, current_position, fft_size):
2638 |     waveform1 = d4c_get_windowed_waveform(x, fs, current_f0,
2639 |         current_position + 1. / current_f0 / 4., 2, 2)
2640 |     waveform2 = d4c_get_windowed_waveform(x, fs, current_f0,
2641 |         current_position - 1. / current_f0 / 4., 2, 2)
2642 |     centroid1 = d4c_get_centroid(waveform1, fft_size)
2643 |     centroid2 = d4c_get_centroid(waveform2, fft_size)
2644 |     centroid = d4c_dc_correction(centroid1 + centroid2, fs, fft_size,
2645 |             current_f0)
2646 |     return centroid
2647 | 
2648 | 
2649 | def d4c_get_centroid(x, fft_size):
2650 |     fft_size = int(fft_size)
2651 |     time_axis = np.arange(1, len(x) + 1)
2652 |     x = x.copy()
2653 |     x = x / np.sqrt(np.sum(x ** 2))
2654 | 
2655 |     spectrum = np.fft.fft(x, fft_size)
2656 |     weighted_spectrum = np.fft.fft(-x * 1j * time_axis, fft_size)
2657 |     centroid = -(weighted_spectrum.imag) * spectrum.real + spectrum.imag * weighted_spectrum.real
2658 |     return centroid
2659 | 
2660 | 
2661 | def d4c_dc_correction(signal, fs, fft_size, f0):
2662 |     fft_size = int(fft_size)
2663 |     frequency_axis = np.arange(fft_size) / fft_size * fs
2664 |     low_frequency_axis = frequency_axis[frequency_axis < f0 + fs / fft_size]
2665 |     low_frequency_replica = interp1d(f0 - low_frequency_axis,
2666 |             signal[frequency_axis < f0 + fs / fft_size],
2667 |             kind="linear",
2668 |             fill_value="extrapolate")(low_frequency_axis)
2669 |     idx = frequency_axis < f0
2670 |     signal[idx] = low_frequency_replica[idx[:len(low_frequency_replica)]] + signal[idx]
2671 |     signal[int(fft_size / 2.) + 1:] = signal[1 : int(fft_size / 2.)][::-1]
2672 |     return signal
2673 | 
2674 | 
2675 | def d4c_linear_smoothing(group_delay, fs, fft_size, width):
2676 |     double_frequency_axis = np.arange(2 * fft_size) / float(fft_size ) * fs - fs
2677 |     double_spectrum = np.concatenate([group_delay, group_delay])
2678 | 
2679 |     double_segment = np.cumsum(double_spectrum * (fs / float(fft_size)))
2680 |     center_frequency = np.arange(int(fft_size / 2) + 1) / float(fft_size ) * fs
2681 |     low_levels = cheaptrick_interp1h(double_frequency_axis + fs / float(fft_size) / 2.,
2682 |             double_segment, center_frequency - width / 2.)
2683 |     high_levels = cheaptrick_interp1h(double_frequency_axis + fs / float(fft_size) / 2.,
2684 |             double_segment, center_frequency + width / 2.)
2685 |     smoothed_spectrum = (high_levels - low_levels) / width
2686 |     return smoothed_spectrum
2687 | 
2688 | 
2689 | def d4c_get_smoothed_power_spectrum(waveform, fs, f0, fft_size):
2690 |     power_spectrum = np.abs(np.fft.fft(waveform, int(fft_size))) ** 2
2691 |     spectral_envelope = d4c_dc_correction(power_spectrum, fs, fft_size, f0)
2692 |     spectral_envelope = d4c_linear_smoothing(spectral_envelope, fs, fft_size, f0)
2693 |     spectral_envelope = np.concatenate([spectral_envelope,
2694 |         spectral_envelope[1:-1][::-1]])
2695 |     return spectral_envelope
2696 | 
2697 | 
2698 | def d4c_get_static_group_delay(static_centroid, smoothed_power_spectrum, fs, f0,
2699 |         fft_size):
2700 |     group_delay = static_centroid / smoothed_power_spectrum
2701 |     group_delay = d4c_linear_smoothing(group_delay, fs, fft_size, f0 / 2.)
2702 |     group_delay = np.concatenate([group_delay, group_delay[1:-1][::-1]])
2703 |     smoothed_group_delay = d4c_linear_smoothing(group_delay, fs, fft_size, f0)
2704 |     group_delay = group_delay[:int(fft_size / 2) + 1] - smoothed_group_delay
2705 |     group_delay = np.concatenate([group_delay, group_delay[1:-1][::-1]])
2706 |     return group_delay
2707 | 
2708 | 
2709 | def d4c_get_coarse_aperiodicity(group_delay, fs, fft_size,
2710 |         frequency_interval, number_of_aperiodicities, window1):
2711 |     boundary = np.round(fft_size / len(window1) * 8)
2712 |     half_window_length = np.floor(len(window1) / 2)
2713 |     coarse_aperiodicity = np.zeros((number_of_aperiodicities, 1))
2714 |     for i in range(1, number_of_aperiodicities + 1):
2715 |         center = np.floor(frequency_interval * i / (fs / float(fft_size)))
2716 |         segment = group_delay[int(center - half_window_length):int(center + half_window_length + 1)] * window1
2717 |         power_spectrum = np.abs(np.fft.fft(segment, int(fft_size))) ** 2
2718 |         cumulative_power_spectrum = np.cumsum(np.sort(power_spectrum[:int(fft_size / 2) + 1]))
2719 |         coarse_aperiodicity[i - 1] = -10 * np.log10(cumulative_power_spectrum[int(fft_size / 2 - boundary) - 1] / cumulative_power_spectrum[-1])
2720 |     return coarse_aperiodicity
2721 | 
2722 | 
2723 | def d4c_estimate_one_slice(x, fs, current_f0, frequency_interval,
2724 |         current_position, fft_size, number_of_aperiodicities, window1):
2725 |     if current_f0 == 0:
2726 |         coarse_aperiodicity = np.zeros((number_of_aperiodicities, 1))
2727 |         return coarse_aperiodicity
2728 | 
2729 |     static_centroid = d4c_get_static_centroid(x, fs, current_f0,
2730 |         current_position, fft_size)
2731 |     waveform = d4c_get_windowed_waveform(x, fs, current_f0, current_position,
2732 |             2, 1)
2733 |     smoothed_power_spectrum = d4c_get_smoothed_power_spectrum(waveform, fs,
2734 |             current_f0, fft_size)
2735 |     static_group_delay = d4c_get_static_group_delay(static_centroid,
2736 |             smoothed_power_spectrum, fs, current_f0, fft_size)
2737 |     coarse_aperiodicity = d4c_get_coarse_aperiodicity(static_group_delay,
2738 |             fs, fft_size, frequency_interval, number_of_aperiodicities, window1)
2739 |     return coarse_aperiodicity
2740 | 
2741 | 
2742 | def d4c(x, fs, temporal_positions_h, f0_h, vuv_h, threshold="default",
2743 |         fft_size="auto"):
2744 |     f0_low_limit = 47
2745 |     if fft_size == "auto":
2746 |         fft_size = 2 ** np.ceil(np.log2(4. * fs / f0_low_limit + 1.))
2747 |     else:
2748 |         raise ValueError("Only fft_size auto currently supported")
2749 |     f0_low_limit_for_spectrum = 71
2750 |     fft_size_for_spectrum = 2 ** np.ceil(np.log2(3 * fs / f0_low_limit_for_spectrum + 1.))
2751 |     threshold = 0.85
2752 |     upper_limit = 15000
2753 |     frequency_interval = 3000
2754 |     f0 = f0_h.copy()
2755 |     temporal_positions = temporal_positions_h.copy()
2756 |     f0[vuv_h == 0] = 0.
2757 | 
2758 |     number_of_aperiodicities = int(np.floor(np.min([upper_limit, fs / 2. - frequency_interval]) / float(frequency_interval)))
2759 |     window_length = np.floor(frequency_interval / (fs / float(fft_size))) * 2 + 1
2760 |     window1 =  harvest_nuttall(window_length)
2761 |     aperiodicity = np.zeros((int(fft_size_for_spectrum / 2) + 1, len(f0)))
2762 |     coarse_ap = np.zeros((1, len(f0)))
2763 | 
2764 |     frequency_axis = np.arange(int(fft_size_for_spectrum / 2) + 1) * float(fs) / fft_size_for_spectrum
2765 |     coarse_axis = np.arange(number_of_aperiodicities + 2) * frequency_interval
2766 |     coarse_axis[-1] = fs / 2.
2767 | 
2768 |     for i in range(len(f0)):
2769 |         r = d4c_love_train(x, fs, f0[i], temporal_positions_h[i], threshold)
2770 |         if r == 0:
2771 |             aperiodicity[:, i] = 1 - 0.000000000001
2772 |             continue
2773 |         current_f0 = max([f0_low_limit, f0[i]])
2774 |         coarse_aperiodicity = d4c_estimate_one_slice(x, fs, current_f0,
2775 |             frequency_interval, temporal_positions[i], fft_size,
2776 |             number_of_aperiodicities, window1)
2777 |         coarse_ap[0, i] = coarse_aperiodicity.ravel()[0]
2778 |         coarse_aperiodicity = np.maximum(0, coarse_aperiodicity - (current_f0 - 100) * 2. / 100.)
2779 |         piece = np.concatenate([[-60], -coarse_aperiodicity.ravel(), [-0.000000000001]])
2780 |         part = interp1d(coarse_axis, piece, kind="linear")(frequency_axis) / 20.
2781 |         aperiodicity[:, i] = 10 ** part
2782 |     return temporal_positions_h, f0_h, vuv_h, aperiodicity.T, coarse_ap.squeeze()
2783 | 
2784 | 
2785 | def world_synthesis_time_base_generation(temporal_positions, f0, fs, vuv,
2786 |         time_axis, default_f0):
2787 |     f0_interpolated_raw = interp1d(temporal_positions, f0, kind="linear",
2788 |             fill_value="extrapolate")(time_axis)
2789 |     vuv_interpolated = interp1d(temporal_positions, vuv, kind="linear",
2790 |             fill_value="extrapolate")(time_axis)
2791 |     vuv_interpolated = vuv_interpolated > 0.5
2792 |     f0_interpolated = f0_interpolated_raw * vuv_interpolated.astype("float32")
2793 |     f0_interpolated[f0_interpolated == 0] = f0_interpolated[f0_interpolated == 0] + default_f0
2794 |     total_phase = np.cumsum(2 * np.pi * f0_interpolated / float(fs))
2795 | 
2796 |     core = np.mod(total_phase, 2 * np.pi)
2797 |     core = np.abs(core[1:] - core[:-1])
2798 |     # account for diff, avoid deprecation warning with [:-1]
2799 |     pulse_locations = time_axis[:-1][core > (np.pi / 2.)]
2800 |     pulse_locations_index = np.round(pulse_locations * fs).astype("int32")
2801 |     return pulse_locations, pulse_locations_index, vuv_interpolated
2802 | 
2803 | 
2804 | def world_synthesis_get_spectral_parameters(temporal_positions,
2805 |         temporal_position_index, spectrogram, amplitude_periodic,
2806 |         amplitude_random, pulse_locations):
2807 |     floor_index = int(np.floor(temporal_position_index) - 1)
2808 |     assert floor_index >= 0
2809 |     ceil_index = int(np.ceil(temporal_position_index) - 1)
2810 |     t1 = temporal_positions[floor_index]
2811 |     t2 = temporal_positions[ceil_index]
2812 | 
2813 |     if t1 == t2:
2814 |         spectrum_slice = spectrogram[:, floor_index]
2815 |         periodic_slice = amplitude_periodic[:, floor_index]
2816 |         aperiodic_slice = amplitude_random[:, floor_index]
2817 |     else:
2818 |         cs = np.concatenate([spectrogram[:, floor_index][None],
2819 |             spectrogram[:, ceil_index][None]], axis=0)
2820 |         mmm = max([t1, min([t2, pulse_locations])])
2821 |         spectrum_slice = interp1d(np.array([t1, t2]), cs,
2822 |             kind="linear", axis=0)(mmm.copy())
2823 |         cp = np.concatenate([amplitude_periodic[:, floor_index][None],
2824 |             amplitude_periodic[:, ceil_index][None]], axis=0)
2825 |         periodic_slice = interp1d(np.array([t1, t2]), cp,
2826 |             kind="linear", axis=0)(mmm.copy())
2827 |         ca = np.concatenate([amplitude_random[:, floor_index][None],
2828 |             amplitude_random[:, ceil_index][None]], axis=0)
2829 |         aperiodic_slice = interp1d(np.array([t1, t2]), ca,
2830 |             kind="linear", axis=0)(mmm.copy())
2831 |     return spectrum_slice, periodic_slice, aperiodic_slice
2832 | 
2833 | """
2834 | Filter data with an FIR filter using the overlap-add method.
2835 | from http://projects.scipy.org/scipy/attachment/ticket/837/fftfilt.py
2836 | """
2837 | def nextpow2(x):
2838 |     """Return the first integer N such that 2**N >= abs(x)"""
2839 |     return np.ceil(np.log2(np.abs(x)))
2840 | 
2841 | 
2842 | def fftfilt(b, x, *n):
2843 |     """Filter the signal x with the FIR filter described by the
2844 |     coefficients in b using the overlap-add method. If the FFT
2845 |     length n is not specified, it and the overlap-add block length
2846 |     are selected so as to minimize the computational cost of
2847 |     the filtering operation."""
2848 | 
2849 |     N_x = len(x)
2850 |     N_b = len(b)
2851 | 
2852 |     # Determine the FFT length to use:
2853 |     if len(n):
2854 |         # Use the specified FFT length (rounded up to the nearest
2855 |         # power of 2), provided that it is no less than the filter
2856 |         # length:
2857 |         n = n[0]
2858 |         if n != int(n) or n <= 0:
2859 |             raise ValueError('n must be a nonnegative integer')
2860 |         if n < N_b:
2861 |             n = N_b
2862 |         N_fft = 2**nextpow2(n)
2863 |     else:
2864 |         if N_x > N_b:
2865 |             # When the filter length is smaller than the signal,
2866 |             # choose the FFT length and block size that minimize the
2867 |             # FLOPS cost. Since the cost for a length-N FFT is
2868 |             # (N/2)*log2(N) and the filtering operation of each block
2869 |             # involves 2 FFT operations and N multiplications, the
2870 |             # cost of the overlap-add method for 1 length-N block is
2871 |             # N*(1+log2(N)). For the sake of efficiency, only FFT
2872 |             # lengths that are powers of 2 are considered:
2873 |             N = 2**np.arange(np.ceil(np.log2(N_b)),
2874 |                              np.floor(np.log2(N_x)))
2875 |             cost = np.ceil(N_x/(N-N_b+1))*N*(np.log2(N)+1)
2876 |             N_fft = N[np.argmin(cost)]
2877 |         else:
2878 |             # When the filter length is at least as long as the signal,
2879 |             # filter the signal using a single block:
2880 |             N_fft = 2**nextpow2(N_b+N_x-1)
2881 | 
2882 |     N_fft = int(N_fft)
2883 | 
2884 |     # Compute the block length:
2885 |     L = int(N_fft - N_b + 1)
2886 | 
2887 |     # Compute the transform of the filter:
2888 |     H = np.fft.fft(b, N_fft)
2889 | 
2890 |     y = np.zeros(N_x, dtype=np.float32)
2891 |     i = 0
2892 |     while i <= N_x:
2893 |         il = min([i+L,N_x])
2894 |         k = min([i+N_fft,N_x])
2895 |         yt = np.fft.ifft(np.fft.fft(x[i:il],N_fft)*H,N_fft) # Overlap..
2896 |         y[i:k] = y[i:k] + yt[:k-i]            # and add
2897 |         i += L
2898 |     return y
2899 | 
2900 | 
2901 | def world_synthesis(f0_d4c, vuv_d4c, aperiodicity_d4c,
2902 |         spectrogram_ct, fs_ct, random_seed=1999):
2903 | 
2904 |     # swap 0 and 1 axis
2905 |     spectrogram_ct = spectrogram_ct.T
2906 |     fs = fs_ct
2907 |     # coarse -> fine aper
2908 |     if len(aperiodicity_d4c.shape) == 1 or aperiodicity_d4c.shape[1] == 1:
2909 |         print("Coarse aperiodicity detected - interpolating to full size")
2910 |         aper = np.zeros_like(spectrogram_ct)
2911 |         if len(aperiodicity_d4c.shape) == 1:
2912 |             aperiodicity_d4c = aperiodicity_d4c[None, :]
2913 |         else:
2914 |             aperiodicity_d4c = aperiodicity_d4c.T
2915 |         coarse_aper_d4c = aperiodicity_d4c
2916 |         frequency_interval = 3000
2917 |         upper_limit = 15000
2918 |         number_of_aperiodicities = int(np.floor(np.min([upper_limit, fs / 2. - frequency_interval]) / float(frequency_interval)))
2919 |         coarse_axis = np.arange(number_of_aperiodicities + 2) * frequency_interval
2920 |         coarse_axis[-1] = fs / 2.
2921 |         f0_low_limit_for_spectrum = 71
2922 |         fft_size_for_spectrum = 2 ** np.ceil(np.log2(3 * fs / f0_low_limit_for_spectrum + 1.))
2923 | 
2924 |         frequency_axis = np.arange(int(fft_size_for_spectrum / 2) + 1) * float(fs) / fft_size_for_spectrum
2925 | 
2926 |         for i in range(len(f0_d4c)):
2927 |             ca = coarse_aper_d4c[0, i]
2928 |             cf = f0_d4c[i]
2929 |             coarse_aperiodicity = np.maximum(0, ca - (cf - 100) * 2. / 100.)
2930 |             piece = np.concatenate([[-60], -ca.ravel(), [-0.000000000001]])
2931 |             part = interp1d(coarse_axis, piece, kind="linear")(frequency_axis) / 20.
2932 |             aper[:, i] = 10 ** part
2933 |         aperiodicity_d4c = aper
2934 |     else:
2935 |         aperiodicity_d4c = aperiodicity_d4c.T
2936 | 
2937 |     default_f0 = 500.
2938 |     random_state = np.random.RandomState(1999)
2939 |     spectrogram = spectrogram_ct
2940 |     aperiodicity = aperiodicity_d4c
2941 |     # max 30s, if greater than thrown an error
2942 |     max_len = 5000000
2943 |     _, temporal_positions = _world_get_temporal_positions(max_len, fs)
2944 |     temporal_positions = temporal_positions[:spectrogram.shape[1]]
2945 |     #temporal_positions = temporal_positions_d4c
2946 |     #from IPython import embed; embed()
2947 |     #raise ValueError()
2948 |     vuv = vuv_d4c
2949 |     f0 = f0_d4c
2950 | 
2951 |     time_axis = np.arange(temporal_positions[0], temporal_positions[-1],
2952 |             1. / fs)
2953 |     y = 0. * time_axis
2954 |     r = world_synthesis_time_base_generation(temporal_positions, f0, fs, vuv,
2955 |             time_axis, default_f0)
2956 |     pulse_locations, pulse_locations_index, interpolated_vuv = r
2957 |     fft_size = int((len(spectrogram) - 1) * 2)
2958 |     base_index = np.arange(-fft_size / 2, fft_size / 2) + 1
2959 |     y_length = len(y)
2960 |     tmp_complex_cepstrum = np.zeros((fft_size,), dtype=np.complex128)
2961 |     latter_index = np.arange(int(fft_size / 2) + 1, fft_size + 1) - 1
2962 | 
2963 |     temporal_position_index = interp1d(temporal_positions, np.arange(1, len(temporal_positions) + 1), kind="linear", fill_value="extrapolate")(pulse_locations)
2964 |     temporal_postion_index = np.maximum(1, np.minimum(len(temporal_positions),
2965 |         temporal_position_index)) - 1
2966 | 
2967 |     amplitude_aperiodic = aperiodicity ** 2
2968 |     amplitude_periodic = np.maximum(0.001, (1. - amplitude_aperiodic))
2969 | 
2970 |     for i in range(len(pulse_locations_index)):
2971 |         spectrum_slice, periodic_slice, aperiodic_slice = world_synthesis_get_spectral_parameters(
2972 |             temporal_positions, temporal_position_index[i], spectrogram,
2973 |             amplitude_periodic, amplitude_aperiodic, pulse_locations[i])
2974 |         idx = min(len(pulse_locations_index), i + 2) - 1
2975 |         noise_size = pulse_locations_index[idx] - pulse_locations_index[i]
2976 |         output_buffer_index = np.maximum(1, np.minimum(y_length, pulse_locations_index[i] + 1 + base_index)).astype("int32") - 1
2977 | 
2978 |         if interpolated_vuv[pulse_locations_index[i]] >= 0.5:
2979 |             tmp_periodic_spectrum = spectrum_slice * periodic_slice
2980 |             # eps in matlab/octave
2981 |             tmp_periodic_spectrum[tmp_periodic_spectrum == 0] = 2.2204E-16
2982 |             periodic_spectrum = np.concatenate([tmp_periodic_spectrum,
2983 |                 tmp_periodic_spectrum[1:-1][::-1]])
2984 |             tmp_cepstrum = np.real(np.fft.fft(np.log(np.abs(periodic_spectrum)) / 2.))
2985 |             tmp_complex_cepstrum[latter_index] = tmp_cepstrum[latter_index] * 2
2986 |             tmp_complex_cepstrum[0] = tmp_cepstrum[0]
2987 | 
2988 |             response = np.fft.fftshift(np.real(np.fft.ifft(np.exp(np.fft.ifft(
2989 |                 tmp_complex_cepstrum)))))
2990 |             y[output_buffer_index] += response * np.sqrt(
2991 |                    max([1, noise_size]))
2992 |             tmp_aperiodic_spectrum = spectrum_slice * aperiodic_slice
2993 |         else:
2994 |             tmp_aperiodic_spectrum = spectrum_slice
2995 | 
2996 |         tmp_aperiodic_spectrum[tmp_aperiodic_spectrum == 0] = 2.2204E-16
2997 |         aperiodic_spectrum = np.concatenate([tmp_aperiodic_spectrum,
2998 |             tmp_aperiodic_spectrum[1:-1][::-1]])
2999 |         tmp_cepstrum = np.real(np.fft.fft(np.log(np.abs(aperiodic_spectrum)) / 2.))
3000 |         tmp_complex_cepstrum[latter_index] = tmp_cepstrum[latter_index] * 2
3001 |         tmp_complex_cepstrum[0] = tmp_cepstrum[0]
3002 |         rc = np.fft.ifft(tmp_complex_cepstrum)
3003 |         erc = np.exp(rc)
3004 |         response = np.fft.fftshift(np.real(np.fft.ifft(erc)))
3005 |         noise_input = random_state.randn(max([3, noise_size]),)
3006 | 
3007 |         y[output_buffer_index] = y[output_buffer_index] + fftfilt(noise_input - np.mean(noise_input), response)
3008 |     return y
3009 | 
3010 | 
3011 | def _mgc_b2c(wc, c, alpha):
3012 |     wc_o = np.zeros_like(wc)
3013 |     desired_order = len(wc) - 1
3014 |     for i in range(0, len(c))[::-1]:
3015 |         prev = copy.copy(wc_o)
3016 |         wc_o[0] = c[i]
3017 |         if desired_order >= 1:
3018 |             wc_o[1] = (1. - alpha ** 2) * prev[0] + alpha * prev[1]
3019 |         for m in range(2, desired_order + 1):
3020 |             wc_o[m] = prev[m - 1] + alpha * (prev[m] - wc_o[m - 1])
3021 |     return wc_o
3022 | 
3023 | 
3024 | def _mgc_ptrans(p, m, alpha):
3025 |     d = 0.
3026 |     o = 0.
3027 | 
3028 |     d = p[m]
3029 |     for i in range(1, m)[::-1]:
3030 |         o = p[i] + alpha * d
3031 |         d = p[i]
3032 |         p[i] = o
3033 | 
3034 |     o = alpha * d
3035 |     p[0] = (1. - alpha ** 2) * p[0] + 2 * o
3036 | 
3037 | 
3038 | def _mgc_qtrans(q, m, alpha):
3039 |     d = q[1]
3040 |     for i in range(2, 2 * m + 1):
3041 |         o = q[i] + alpha * d
3042 |         d = q[i]
3043 |         q[i] = o
3044 | 
3045 | 
3046 | def _mgc_gain(er, c, m, g):
3047 |     t = 0.
3048 |     if g != 0:
3049 |         for i in range(1, m + 1):
3050 |             t += er[i] * c[i]
3051 |         return er[0] + g * t
3052 |     else:
3053 |         return er[0]
3054 | 
3055 | 
3056 | def _mgc_fill_toeplitz(A, t):
3057 |     n = len(t)
3058 |     for i in range(n):
3059 |         for j in range(n):
3060 |             A[i, j] = t[i - j] if i - j >= 0 else t[j - i]
3061 | 
3062 | 
3063 | def _mgc_fill_hankel(A, t):
3064 |     n = len(t) // 2 + 1
3065 |     for i in range(n):
3066 |         for j in range(n):
3067 |             A[i, j] = t[i + j]
3068 | 
3069 | 
3070 | def _mgc_ignorm(c, gamma):
3071 |     if gamma == 0.:
3072 |         c[0] = np.log(c[0])
3073 |         return c
3074 |     gain = c[0] ** gamma
3075 |     c[1:] *= gain
3076 |     c[0] = (gain - 1.) / gamma
3077 | 
3078 | 
3079 | def _mgc_gnorm(c, gamma):
3080 |     if gamma == 0.:
3081 |         c[0] = np.exp(c[0])
3082 |         return c
3083 |     gain = 1. + gamma * c[0]
3084 |     c[1:] /= gain
3085 |     c[0] = gain ** (1. / gamma)
3086 | 
3087 | 
3088 | def _mgc_b2mc(mc, alpha):
3089 |     m = len(mc)
3090 |     o = 0.
3091 |     d = mc[m - 1]
3092 |     for i in range(m - 1)[::-1]:
3093 |         o = mc[i] + alpha * d
3094 |         d = mc[i]
3095 |         mc[i] = o
3096 | 
3097 | 
3098 | def _mgc_mc2b(mc, alpha):
3099 |     itr = range(len(mc) - 1)[::-1]
3100 |     for i in itr:
3101 |         mc[i] = mc[i] - alpha * mc[i + 1]
3102 | 
3103 | 
3104 | def _mgc_gc2gc(src_ceps, src_gamma=0., dst_order=None, dst_gamma=0.):
3105 |     if dst_order == None:
3106 |         dst_order = len(src_ceps) - 1
3107 | 
3108 |     dst_ceps = np.zeros((dst_order + 1,), dtype=src_ceps.dtype)
3109 |     dst_order = len(dst_ceps) - 1
3110 |     m1 = len(src_ceps) - 1
3111 |     dst_ceps[0] = copy.deepcopy(src_ceps[0])
3112 | 
3113 |     for m in range(2, dst_order + 2):
3114 |         ss1 = 0.
3115 |         ss2 = 0.
3116 |         min_1 = m1 if (m1 < m - 1) else m - 2
3117 |         itr = range(2, min_1 + 2)
3118 |         if len(itr) < 1:
3119 |             if min_1 + 1 == 2:
3120 |                 itr = [2]
3121 |             else:
3122 |                 itr = []
3123 | 
3124 |         """
3125 |         # old slower version
3126 |         for k in itr:
3127 |             assert k >= 1
3128 |             assert (m - k) >= 0
3129 |             cc = src_ceps[k - 1] * dst_ceps[m - k]
3130 |             ss2 += (k - 1) * cc
3131 |             ss1 += (m - k) * cc
3132 |         """
3133 | 
3134 |         if len(itr) > 0:
3135 |             itr = np.array(itr)
3136 |             cc_a = src_ceps[itr - 1] * dst_ceps[m - itr]
3137 |             ss2 += ((itr - 1) * cc_a).sum()
3138 |             ss1 += ((m - itr) * cc_a).sum()
3139 | 
3140 |         if m <= m1 + 1:
3141 |             dst_ceps[m - 1] = src_ceps[m - 1] + (dst_gamma * ss2 - src_gamma * ss1)/(m - 1.)
3142 |         else:
3143 |             dst_ceps[m - 1] = (dst_gamma * ss2 - src_gamma * ss1) / (m - 1.)
3144 |     return dst_ceps
3145 | 
3146 | 
3147 | def _mgc_newton(mgc_stored, periodogram, order, alpha, gamma,
3148 |         recursion_order, iter_number, y_fft, z_fft, cr, pr, rr, ri,
3149 |         qr, qi, Tm, Hm, Tm_plus_Hm, b):
3150 |     # a lot of inplace operations to match the Julia code
3151 |     cr[1:order + 1] = mgc_stored[1:order + 1]
3152 | 
3153 |     if alpha != 0:
3154 |         cr_res = _mgc_b2c(cr[:recursion_order + 1], cr[:order + 1], -alpha)
3155 |         cr[:recursion_order + 1] = cr_res[:]
3156 | 
3157 |     y = sp.fftpack.fft(np.cast["float64"](cr))
3158 |     c = mgc_stored
3159 |     x = periodogram
3160 |     if gamma != 0.:
3161 |         gamma_inv = 1. / gamma
3162 |     else:
3163 |         gamma_inv = np.inf
3164 | 
3165 |     if gamma == -1.:
3166 |         pr[:] = copy.deepcopy(x)
3167 |         new_pr = copy.deepcopy(pr)
3168 |     elif gamma == 0.:
3169 |         pr[:] = copy.deepcopy(x) / np.exp(2 * np.real(y))
3170 |         new_pr = copy.deepcopy(pr)
3171 |     else:
3172 |         tr = 1. + gamma * np.real(y)
3173 |         ti = -gamma * np.imag(y)
3174 |         trr = tr * tr
3175 |         tii = ti * ti
3176 |         s = trr + tii
3177 |         t = x * np.power(s, (-gamma_inv))
3178 |         t /= s
3179 |         pr[:] = t
3180 |         rr[:] = tr * t
3181 |         ri[:] = ti * t
3182 |         t /= s
3183 |         qr[:] = (trr - tii) * t
3184 |         s = tr * ti * t
3185 |         qi[:] = (s + s)
3186 |         new_pr = copy.deepcopy(pr)
3187 | 
3188 |     if gamma != -1.:
3189 |         """
3190 |         print()
3191 |         print(pr.sum())
3192 |         print(rr.sum())
3193 |         print(ri.sum())
3194 |         print(qr.sum())
3195 |         print(qi.sum())
3196 |         print()
3197 |         """
3198 |         pass
3199 | 
3200 |     y_fft[:] = copy.deepcopy(pr) + 0.j
3201 |     z_fft[:] = np.fft.fft(y_fft) / len(y_fft)
3202 |     pr[:] = copy.deepcopy(np.real(z_fft))
3203 |     if alpha != 0.:
3204 |         idx_1 = pr[:2 * order + 1]
3205 |         idx_2 = pr[:recursion_order + 1]
3206 |         idx_3 = _mgc_b2c(idx_1, idx_2, alpha)
3207 |         pr[:2 * order + 1] = idx_3[:]
3208 | 
3209 |     if gamma == 0. or gamma == -1.:
3210 |         qr[:2 * order + 1] = pr[:2 * order + 1]
3211 |         rr[:order + 1] = copy.deepcopy(pr[:order + 1])
3212 |     else:
3213 |         for i in range(len(qr)):
3214 |             y_fft[i] = qr[i] + 1j * qi[i]
3215 |         z_fft[:] = np.fft.fft(y_fft) / len(y_fft)
3216 |         qr[:] = np.real(z_fft)
3217 | 
3218 |         for i in range(len(rr)):
3219 |             y_fft[i] = rr[i] + 1j * ri[i]
3220 |         z_fft[:] = np.fft.fft(y_fft) / len(y_fft)
3221 |         rr[:] = np.real(z_fft)
3222 | 
3223 |         if alpha != 0.:
3224 |             qr_new = _mgc_b2c(qr[:recursion_order + 1], qr[:recursion_order + 1], alpha)
3225 |             qr[:recursion_order + 1] = qr_new[:]
3226 |             rr_new = _mgc_b2c(rr[:order + 1], rr[:recursion_order + 1], alpha)
3227 |             rr[:order + 1] = rr_new[:]
3228 | 
3229 |     if alpha != 0:
3230 |         _mgc_ptrans(pr, order, alpha)
3231 |         _mgc_qtrans(qr, order, alpha)
3232 | 
3233 |     eta = 0.
3234 |     if gamma != -1.:
3235 |         eta = _mgc_gain(rr, c, order, gamma)
3236 |         c[0] = np.sqrt(eta)
3237 | 
3238 |     if gamma == -1.:
3239 |         qr[:] = 0.
3240 |     elif gamma != 0.:
3241 |         for i in range(2, 2 * order + 1):
3242 |             qr[i] *= 1. + gamma
3243 | 
3244 |     te = pr[:order]
3245 |     _mgc_fill_toeplitz(Tm, te)
3246 |     he = qr[2: 2 * order + 1]
3247 |     _mgc_fill_hankel(Hm, he)
3248 | 
3249 |     Tm_plus_Hm[:] = Hm[:] + Tm[:]
3250 |     b[:order] = rr[1:order + 1]
3251 |     res = np.linalg.solve(Tm_plus_Hm, b)
3252 |     b[:] = res[:]
3253 | 
3254 |     c[1:order + 1] += res[:order]
3255 | 
3256 |     if gamma == -1.:
3257 |         eta = _mgc_gain(rr, c, order, gamma)
3258 |         c[0] = np.sqrt(eta)
3259 |     return np.log(eta), new_pr
3260 | 
3261 | 
3262 | def _mgc_mgcepnorm(b_gamma, alpha, gamma, otype):
3263 |     if otype != 0:
3264 |         raise ValueError("Not yet implemented for otype != 0")
3265 | 
3266 |     mgc = copy.deepcopy(b_gamma)
3267 |     _mgc_ignorm(mgc, gamma)
3268 |     _mgc_b2mc(mgc, alpha)
3269 |     return mgc
3270 | 
3271 | 
3272 | def _sp2mgc(sp, order=20, alpha=0.35, gamma=-0.41, miniter=2, maxiter=30, criteria=0.001, otype=0, verbose=False):
3273 |     # Based on r9y9 Julia code
3274 |     # https://github.com/r9y9/MelGeneralizedCepstrums.jl
3275 |     periodogram = np.abs(sp) ** 2
3276 |     recursion_order = len(periodogram) - 1
3277 |     slen = len(periodogram)
3278 |     iter_number = 1
3279 | 
3280 |     def _z():
3281 |         return np.zeros((slen,), dtype="float64")
3282 | 
3283 |     def _o():
3284 |         return np.zeros((order,), dtype="float64")
3285 | 
3286 |     def _o2():
3287 |         return np.zeros((order, order), dtype="float64")
3288 | 
3289 |     cr = _z()
3290 |     pr = _z()
3291 |     rr = _z()
3292 |     ri = _z().astype("float128")
3293 |     qr = _z()
3294 |     qi = _z().astype("float128")
3295 |     Tm = _o2()
3296 |     Hm = _o2()
3297 |     Tm_plus_Hm = _o2()
3298 |     b = _o()
3299 |     y = _z() + 0j
3300 |     z = _z() + 0j
3301 |     b_gamma = np.zeros((order + 1,), dtype="float64")
3302 |     # return pr_new due to oddness with Julia having different numbers
3303 |     # in pr at end of function vs back in this scope
3304 |     eta0, pr_new = _mgc_newton(b_gamma, periodogram, order, alpha, -1.,
3305 |                                recursion_order, iter_number, y, z, cr, pr, rr,
3306 |                                ri, qr, qi, Tm, Hm, Tm_plus_Hm, b)
3307 |     pr[:] = pr_new
3308 |     """
3309 |     print(eta0)
3310 |     print(sum(b_gamma))
3311 |     print(sum(periodogram))
3312 |     print(order)
3313 |     print(alpha)
3314 |     print(recursion_order)
3315 |     print(sum(y))
3316 |     print(sum(cr))
3317 |     print(sum(z))
3318 |     print(sum(pr))
3319 |     print(sum(rr))
3320 |     print(sum(qi))
3321 |     print(Tm.sum())
3322 |     print(Hm.sum())
3323 |     print(sum(b))
3324 |     raise ValueError()
3325 |     """
3326 |     if gamma != -1.:
3327 |         d = np.zeros((order + 1,), dtype="float64")
3328 |         if alpha != 0.:
3329 |             _mgc_ignorm(b_gamma, -1.)
3330 |             _mgc_b2mc(b_gamma, alpha)
3331 |             d = copy.deepcopy(b_gamma)
3332 |             _mgc_gnorm(d, -1.)
3333 |             # numbers are slightly different here - numerical diffs?
3334 |         else:
3335 |             d = copy.deepcopy(b_gamma)
3336 |         b_gamma = _mgc_gc2gc(d, -1., order, gamma)
3337 | 
3338 |         if alpha != 0.:
3339 |             _mgc_ignorm(b_gamma, gamma)
3340 |             _mgc_mc2b(b_gamma, alpha)
3341 |             _mgc_gnorm(b_gamma, gamma)
3342 | 
3343 |     if gamma != -1.:
3344 |         eta_t = eta0
3345 |         for i in range(1, maxiter + 1):
3346 |             eta, pr_new = _mgc_newton(b_gamma, periodogram, order, alpha,
3347 |                     gamma, recursion_order, i, y, z, cr, pr, rr,
3348 |                     ri, qr, qi, Tm, Hm, Tm_plus_Hm, b)
3349 |             pr[:] = pr_new
3350 |             """
3351 |             print(eta0)
3352 |             print(sum(b_gamma))
3353 |             print(sum(periodogram))
3354 |             print(order)
3355 |             print(alpha)
3356 |             print(recursion_order)
3357 |             print(sum(y))
3358 |             print(sum(cr))
3359 |             print(sum(z))
3360 |             print(sum(pr))
3361 |             print(sum(rr))
3362 |             print(sum(qi))
3363 |             print(Tm.sum())
3364 |             print(Hm.sum())
3365 |             print(sum(b))
3366 |             raise ValueError()
3367 |             """
3368 |             err = np.abs((eta_t - eta) / eta)
3369 |             if verbose:
3370 |                 print("iter %i, criterion: %f" % (i, err))
3371 |             if i >= miniter:
3372 |                 if err < criteria:
3373 |                     if verbose:
3374 |                         print("optimization complete at iter %i" % i)
3375 |                     break
3376 |             eta_t = eta
3377 |     mgc_arr = _mgc_mgcepnorm(b_gamma, alpha, gamma, otype)
3378 |     return mgc_arr
3379 | 
3380 | 
3381 | _sp_convert_results = []
3382 | 
3383 | def _sp_collect_result(result):
3384 |     _sp_convert_results.append(result)
3385 | 
3386 | 
3387 | def _sp_convert(c_i, order, alpha, gamma, miniter, maxiter, criteria,
3388 |         otype, verbose):
3389 |     i = c_i[0]
3390 |     tot_i = c_i[1]
3391 |     sp_i = c_i[2]
3392 |     r_i = (i, _sp2mgc(sp_i, order=order, alpha=alpha, gamma=gamma,
3393 |                 miniter=miniter, maxiter=maxiter, criteria=criteria,
3394 |                 otype=otype, verbose=verbose))
3395 |     return r_i
3396 | 
3397 | 
3398 | def sp2mgc(sp, order=20, alpha=0.35, gamma=-0.41, miniter=2,
3399 |         maxiter=30, criteria=0.001, otype=0, verbose=False):
3400 |     """
3401 |     Accepts 1D or 2D one-sided spectrum (complex or real valued).
3402 | 
3403 |     If 2D, assumes time is axis 0.
3404 | 
3405 |     Returns mel generalized cepstral coefficients.
3406 | 
3407 |     Based on r9y9 Julia code
3408 |     https://github.com/r9y9/MelGeneralizedCepstrums.jl
3409 |     """
3410 | 
3411 |     if len(sp.shape) == 1:
3412 |         sp = np.concatenate((sp, sp[:, 1:][:, ::-1]), axis=0)
3413 |         return _sp2mgc(sp, order=order, alpha=alpha, gamma=gamma,
3414 |                 miniter=miniter, maxiter=maxiter, criteria=criteria,
3415 |                 otype=otype, verbose=verbose)
3416 |     else:
3417 |         sp = np.concatenate((sp, sp[:, 1:][:, ::-1]), axis=1)
3418 |         # Slooow, use multiprocessing to speed up a bit
3419 |         # http://blog.shenwei.me/python-multiprocessing-pool-difference-between-map-apply-map_async-apply_async/
3420 |         # http://stackoverflow.com/questions/5666576/show-the-progress-of-a-python-multiprocessing-pool-map-call
3421 |         c = [(i + 1, sp.shape[0], sp[i]) for i in range(sp.shape[0])]
3422 |         p = Pool()
3423 |         start = time.time()
3424 |         if verbose:
3425 |             print("Starting conversion of %i frames" % sp.shape[0])
3426 |             print("This may take some time...")
3427 | 
3428 |         # takes ~360s for 630 frames, 1 process
3429 |         itr = p.map_async(functools.partial(_sp_convert, order=order, alpha=alpha, gamma=gamma, miniter=miniter, maxiter=maxiter, criteria=criteria, otype=otype, verbose=False), c, callback=_sp_collect_result)
3430 | 
3431 |         sz = len(c) // itr._chunksize
3432 |         if (sz * itr._chunksize) != len(c):
3433 |             sz += 1
3434 | 
3435 |         last_remaining = None
3436 |         while True:
3437 |             remaining = itr._number_left
3438 |             if verbose:
3439 |                 if remaining != last_remaining:
3440 |                     last_remaining = remaining
3441 |                     print("%i chunks of %i complete" % (sz - remaining, sz))
3442 |             if itr.ready():
3443 |                 break
3444 |             time.sleep(.5)
3445 | 
3446 |         """
3447 |         # takes ~455s for 630 frames
3448 |         itr = p.imap_unordered(functools.partial(_sp_convert, order=order, alpha=alpha, gamma=gamma, miniter=miniter, maxiter=maxiter, criteria=criteria, otype=otype, verbose=False), c)
3449 |         res = []
3450 |         # print ~every 5%
3451 |         mod = int(len(c)) // 20
3452 |         if mod < 1:
3453 |             mod = 1
3454 |         for i, res_i in enumerate(itr, 1):
3455 |             res.append(res_i)
3456 |             if i % mod == 0 or i == 1:
3457 |                 print("%i of %i complete" % (i, len(c)))
3458 |         """
3459 |         p.close()
3460 |         p.join()
3461 |         stop = time.time()
3462 |         if verbose:
3463 |             print("Processed %i frames in %s seconds" % (sp.shape[0], stop - start))
3464 |         # map_async result comes in chunks
3465 |         flat = [a_i for a in _sp_convert_results for a_i in a]
3466 |         final = [o[1] for o in sorted(flat, key=lambda x: x[0])]
3467 |         for i in range(len(_sp_convert_results)):
3468 |             _sp_convert_results.pop()
3469 |         return np.array(final)
3470 | 
3471 | 
3472 | def win2mgc(windowed_signal, order=20, alpha=0.35, gamma=-0.41, miniter=2,
3473 |         maxiter=30, criteria=0.001, otype=0, verbose=False):
3474 |     """
3475 |     Accepts 1D or 2D array of windowed signal frames.
3476 | 
3477 |     If 2D, assumes time is axis 0.
3478 | 
3479 |     Returns mel generalized cepstral coefficients.
3480 | 
3481 |     Based on r9y9 Julia code
3482 |     https://github.com/r9y9/MelGeneralizedCepstrums.jl
3483 |     """
3484 |     if len(windowed_signal.shape) == 1:
3485 |         sp = np.fft.fft(windowed_signal)
3486 |         return _sp2mgc(sp, order=order, alpha=alpha, gamma=gamma,
3487 |                 miniter=miniter, maxiter=maxiter, criteria=criteria,
3488 |                 otype=otype, verbose=verbose)
3489 |     else:
3490 |         raise ValueError("2D input not yet complete for win2mgc")
3491 | 
3492 | 
3493 | def _mgc_freqt(wc, c, alpha):
3494 |     prev = np.zeros_like(wc)
3495 |     dst_order = len(wc) - 1
3496 |     wc *= 0
3497 |     m1 = len(c) - 1
3498 |     for i in range(-m1, 1, 1):
3499 |         prev[:] = wc
3500 |         if dst_order >= 0:
3501 |             wc[0] = c[-i] + alpha * prev[0]
3502 |         if dst_order >= 1:
3503 |             wc[1] = (1. - alpha * alpha) * prev[0] + alpha * prev[1]
3504 |         for m in range(2, dst_order + 1):
3505 |             wc[m] = prev[m - 1] + alpha * (prev[m] - wc[m - 1])
3506 | 
3507 | 
3508 | def _mgc_mgc2mgc(src_ceps, src_alpha, src_gamma, dst_order, dst_alpha, dst_gamma):
3509 |     dst_ceps = np.zeros((dst_order + 1,))
3510 |     alpha = (dst_alpha - src_alpha) / (1. - dst_alpha * src_alpha)
3511 |     if alpha == 0.:
3512 |         new_dst_ceps = copy.deepcopy(src_ceps)
3513 |         _mgc_gnorm(new_dst_ceps, src_gamma)
3514 |         dst_ceps = _mgc_gc2gc(new_dst_ceps, src_gamma, dst_order, dst_gamma)
3515 |         _mgc_ignorm(dst_ceps, dst_gamma)
3516 |     else:
3517 |         _mgc_freqt(dst_ceps, src_ceps, alpha)
3518 |         _mgc_gnorm(dst_ceps, src_gamma)
3519 |         new_dst_ceps = copy.deepcopy(dst_ceps)
3520 |         dst_ceps = _mgc_gc2gc(new_dst_ceps, src_gamma, dst_order, dst_gamma)
3521 |         _mgc_ignorm(dst_ceps, dst_gamma)
3522 |     return dst_ceps
3523 | 
3524 | 
3525 | _mgc_convert_results = []
3526 | 
3527 | def _mgc_collect_result(result):
3528 |     _mgc_convert_results.append(result)
3529 | 
3530 | 
3531 | def _mgc_convert(c_i, alpha, gamma, fftlen):
3532 |     i = c_i[0]
3533 |     tot_i = c_i[1]
3534 |     mgc_i = c_i[2]
3535 |     r_i = (i, _mgc_mgc2mgc(mgc_i, src_alpha=alpha, src_gamma=gamma,
3536 |                 dst_order=fftlen // 2, dst_alpha=0., dst_gamma=0.))
3537 |     return r_i
3538 | 
3539 | 
3540 | def mgc2sp(mgc_arr, alpha=0.35, gamma=-0.41, fftlen="auto", fs=None,
3541 |         mode="world_pad", verbose=False):
3542 |     """
3543 |     Accepts 1D or 2D array of mgc
3544 | 
3545 |     If 2D, assume time is on axis 0
3546 | 
3547 |     Returns reconstructed smooth spectrum
3548 | 
3549 |     Based on r9y9 Julia code
3550 |     https://github.com/r9y9/MelGeneralizedCepstrums.jl
3551 |     """
3552 |     if mode != "world_pad":
3553 |         raise ValueError("Only currently supported mode is world_pad")
3554 | 
3555 |     if fftlen == "auto":
3556 |         if fs == None:
3557 |             raise ValueError("fs must be provided for fftlen 'auto'")
3558 |         f0_low_limit = 71
3559 |         fftlen = int(2 ** np.ceil(np.log2(3. * float(fs) / f0_low_limit + 1)))
3560 |         if verbose:
3561 |             print("setting fftlen to %i" % fftlen)
3562 | 
3563 |     if len(mgc_arr.shape) == 1:
3564 |         c = _mgc_mgc2mgc(mgc_arr, alpha, gamma, fftlen // 2, 0., 0.)
3565 |         buf = np.zeros((fftlen,), dtype=c.dtype)
3566 |         buf[:len(c)] = c[:]
3567 |         return np.fft.rfft(buf)
3568 |     else:
3569 |         # Slooow, use multiprocessing to speed up a bit
3570 |         # http://blog.shenwei.me/python-multiprocessing-pool-difference-between-map-apply-map_async-apply_async/
3571 |         # http://stackoverflow.com/questions/5666576/show-the-progress-of-a-python-multiprocessing-pool-map-call
3572 |         c = [(i + 1, mgc_arr.shape[0], mgc_arr[i]) for i in range(mgc_arr.shape[0])]
3573 |         p = Pool()
3574 |         start = time.time()
3575 |         if verbose:
3576 |             print("Starting conversion of %i frames" % mgc_arr.shape[0])
3577 |             print("This may take some time...")
3578 |         #itr = p.map(functools.partial(_mgc_convert, alpha=alpha, gamma=gamma, fftlen=fftlen), c)
3579 |         #raise ValueError()
3580 | 
3581 |         # 500.1 s for 630 frames process
3582 |         itr = p.map_async(functools.partial(_mgc_convert, alpha=alpha, gamma=gamma, fftlen=fftlen), c, callback=_mgc_collect_result)
3583 | 
3584 |         sz = len(c) // itr._chunksize
3585 |         if (sz * itr._chunksize) != len(c):
3586 |             sz += 1
3587 | 
3588 |         last_remaining = None
3589 |         while True:
3590 |             remaining = itr._number_left
3591 |             if verbose:
3592 |                 if last_remaining != remaining:
3593 |                     last_remaining = remaining
3594 |                     print("%i chunks of %i complete" % (sz - remaining, sz))
3595 |             if itr.ready():
3596 |                 break
3597 |             time.sleep(.5)
3598 |         p.close()
3599 |         p.join()
3600 |         stop = time.time()
3601 |         if verbose:
3602 |             print("Processed %i frames in %s seconds" % (mgc_arr.shape[0], stop - start))
3603 |         # map_async result comes in chunks
3604 |         flat = [a_i for a in _mgc_convert_results for a_i in a]
3605 |         final = [o[1] for o in sorted(flat, key=lambda x: x[0])]
3606 |         for i in range(len(_mgc_convert_results)):
3607 |             _mgc_convert_results.pop()
3608 |         c = np.array(final)
3609 |         buf = np.zeros((len(c), fftlen), dtype=c.dtype)
3610 |         buf[:, :c.shape[1]] = c[:]
3611 |         return np.exp(np.fft.rfft(buf, axis=-1).real)
3612 | 
3613 | 
3614 | def implot(arr, scale=None, title="", cmap="gray"):
3615 |     import matplotlib.pyplot as plt
3616 |     if scale is "specgram":
3617 |         # plotting part
3618 |         mag = 20. * np.log10(np.abs(arr))
3619 |         # Transpose so time is X axis, and invert y axis so
3620 |         # frequency is low at bottom
3621 |         mag = mag.T[::-1, :]
3622 |     else:
3623 |         mag = arr
3624 |     f, ax = plt.subplots()
3625 |     ax.matshow(mag, cmap=cmap)
3626 |     plt.axis("off")
3627 |     x1 = mag.shape[0]
3628 |     y1 = mag.shape[1]
3629 | 
3630 |     def autoaspect(x_range, y_range):
3631 |         """
3632 |         The aspect to make a plot square with ax.set_aspect in Matplotlib
3633 |         """
3634 |         mx = max(x_range, y_range)
3635 |         mn = min(x_range, y_range)
3636 |         if x_range <= y_range:
3637 |             return mx / float(mn)
3638 |         else:
3639 |             return mn / float(mx)
3640 |     asp = autoaspect(x1, y1)
3641 |     ax.set_aspect(asp)
3642 |     plt.title(title)
3643 | 
3644 | 
3645 | def test_lpc_to_lsf():
3646 |     # Matlab style vectors for testing
3647 |     # lsf = [0.7842 1.5605 1.8776 1.8984 2.3593]
3648 |     # a = [1.0000  0.6149  0.9899  0.0000  0.0031 -0.0082];
3649 |     lsf = [[0.7842, 1.5605, 1.8776, 1.8984, 2.3593],
3650 |            [0.7842, 1.5605, 1.8776, 1.8984, 2.3593]]
3651 |     a = [[1.0000, 0.6149, 0.9899, 0.0000, 0.0031, -0.0082],
3652 |          [1.0000, 0.6149, 0.9899, 0.0000, 0.0031, -0.0082]]
3653 |     a = np.array(a)
3654 |     lsf = np.array(lsf)
3655 |     lsf_r = lpc_to_lsf(a)
3656 |     assert_almost_equal(lsf, lsf_r, decimal=4)
3657 |     a_r = lsf_to_lpc(lsf)
3658 |     assert_almost_equal(a, a_r, decimal=4)
3659 |     lsf_r = lpc_to_lsf(a[0])
3660 |     assert_almost_equal(lsf[0], lsf_r, decimal=4)
3661 |     a_r = lsf_to_lpc(lsf[0])
3662 |     assert_almost_equal(a[0], a_r, decimal=4)
3663 | 
3664 | 
3665 | def test_lpc_analysis_truncate():
3666 |     # Test that truncate doesn't crash and actually truncates
3667 |     [a, g, e] = lpc_analysis(np.random.randn(85), order=8, window_step=80,
3668 |                              window_size=80, emphasis=0.9, truncate=True)
3669 |     assert(a.shape[0] == 1)
3670 | 
3671 | 
3672 | def test_feature_build():
3673 |     samplerate, X = fetch_sample_music()
3674 |     # MATLAB wavread does normalization
3675 |     X = X.astype('float32') / (2 ** 15)
3676 |     wsz = 256
3677 |     wst = 128
3678 |     a, g, e = lpc_analysis(X, order=8, window_step=wst,
3679 |                            window_size=wsz, emphasis=0.9,
3680 |                            copy=True)
3681 |     v, p = voiced_unvoiced(X, window_size=wsz,
3682 |                            window_step=wst)
3683 |     c = compress(e, n_components=64)
3684 |     # First component of a is always 1
3685 |     combined = np.hstack((a[:, 1:], g, c[:a.shape[0]]))
3686 |     features = np.zeros((a.shape[0], 2 * combined.shape[1]))
3687 |     start_indices = v * combined.shape[1]
3688 |     start_indices = start_indices.astype('int32')
3689 |     end_indices = (v + 1) * combined.shape[1]
3690 |     end_indices = end_indices.astype('int32')
3691 |     for i in range(features.shape[0]):
3692 |         features[i, start_indices[i]:end_indices[i]] = combined[i]
3693 | 
3694 | 
3695 | def test_mdct_and_inverse():
3696 |     fs, X = fetch_sample_music()
3697 |     X_dct = mdct_slow(X)
3698 |     X_r = imdct_slow(X_dct)
3699 |     assert np.all(np.abs(X_r[:len(X)] - X) < 1E-3)
3700 |     assert np.abs(X_r[:len(X)] - X).mean() < 1E-6
3701 | 
3702 | 
3703 | def test_all():
3704 |     test_lpc_analysis_truncate()
3705 |     test_feature_build()
3706 |     test_lpc_to_lsf()
3707 |     test_mdct_and_inverse()
3708 | 
3709 | 
3710 | def run_lpc_example():
3711 |     # ae.wav is from
3712 |     # http://www.linguistics.ucla.edu/people/hayes/103/Charts/VChart/ae.wav
3713 |     # Partially following the formant tutorial here
3714 |     # http://www.mathworks.com/help/signal/ug/formant-estimation-with-lpc-coefficients.html
3715 | 
3716 |     samplerate, X = fetch_sample_music()
3717 | 
3718 |     c = overlap_dct_compress(X, 200, 400)
3719 |     X_r = overlap_dct_uncompress(c, 400)
3720 |     wavfile.write('lpc_uncompress.wav', samplerate, soundsc(X_r))
3721 | 
3722 |     print("Calculating sinusoids")
3723 |     f_hz, m = sinusoid_analysis(X, input_sample_rate=16000)
3724 |     Xs_sine = sinusoid_synthesis(f_hz, m)
3725 |     orig_fname = 'lpc_orig.wav'
3726 |     sine_fname = 'lpc_sine_synth.wav'
3727 |     wavfile.write(orig_fname, samplerate, soundsc(X))
3728 |     wavfile.write(sine_fname, samplerate, soundsc(Xs_sine))
3729 | 
3730 |     lpc_order_list = [8, ]
3731 |     dct_components_list = [200, ]
3732 |     window_size_list = [400, ]
3733 |     # Seems like a dct component size of ~2/3rds the step
3734 |     # (1/3rd the window for 50% overlap) works well.
3735 |     for lpc_order in lpc_order_list:
3736 |         for dct_components in dct_components_list:
3737 |             for window_size in window_size_list:
3738 |                 # 50% overlap
3739 |                 window_step = window_size // 2
3740 |                 a, g, e = lpc_analysis(X, order=lpc_order,
3741 |                                        window_step=window_step,
3742 |                                        window_size=window_size, emphasis=0.9,
3743 |                                        copy=True)
3744 |                 print("Calculating LSF")
3745 |                 lsf = lpc_to_lsf(a)
3746 |                 # Not window_size - window_step! Need to implement overlap
3747 |                 print("Calculating compression")
3748 |                 c = dct_compress(e, n_components=dct_components,
3749 |                              window_size=window_step)
3750 |                 co = overlap_dct_compress(e, n_components=dct_components,
3751 |                                       window_size=window_step)
3752 |                 block_excitation = dct_uncompress(c, window_size=window_step)
3753 |                 overlap_excitation = overlap_dct_uncompress(co,
3754 |                                                         window_size=window_step)
3755 |                 a_r = lsf_to_lpc(lsf)
3756 |                 f, m = lpc_to_frequency(a_r, g)
3757 |                 block_lpc = lpc_synthesis(a_r, g, block_excitation,
3758 |                                           emphasis=0.9,
3759 |                                           window_step=window_step)
3760 |                 overlap_lpc = lpc_synthesis(a_r, g, overlap_excitation,
3761 |                                             emphasis=0.9,
3762 |                                             window_step=window_step)
3763 |                 v, p = voiced_unvoiced(X, window_size=window_size,
3764 |                                        window_step=window_step)
3765 |                 noisy_lpc = lpc_synthesis(a_r, g, voiced_frames=v,
3766 |                                           emphasis=0.9,
3767 |                                           window_step=window_step)
3768 |                 if dct_components is None:
3769 |                     dct_components = window_size
3770 |                 noisy_fname = 'lpc_noisy_synth_%iwin_%ilpc_%idct.wav' % (
3771 |                     window_size, lpc_order, dct_components)
3772 |                 block_fname = 'lpc_block_synth_%iwin_%ilpc_%idct.wav' % (
3773 |                     window_size, lpc_order, dct_components)
3774 |                 overlap_fname = 'lpc_overlap_synth_%iwin_%ilpc_%idct.wav' % (
3775 |                     window_size, lpc_order, dct_components)
3776 |                 wavfile.write(noisy_fname, samplerate, soundsc(noisy_lpc))
3777 |                 wavfile.write(block_fname, samplerate,
3778 |                               soundsc(block_lpc))
3779 |                 wavfile.write(overlap_fname, samplerate,
3780 |                               soundsc(overlap_lpc))
3781 | 
3782 | 
3783 | def run_fft_vq_example():
3784 |     n_fft = 512
3785 |     time_smoothing = 4
3786 |     def _pre(list_of_data):
3787 |         f_c = np.vstack([stft(dd, n_fft) for dd in list_of_data])
3788 |         if len(f_c) % time_smoothing != 0:
3789 |             newlen = len(f_c) - len(f_c) % time_smoothing
3790 |             f_c = f_c[:newlen]
3791 |         f_mag = complex_to_abs(f_c)
3792 |         f_phs = complex_to_angle(f_c)
3793 |         f_sincos = angle_to_sin_cos(f_phs)
3794 |         f_r = np.hstack((f_mag, f_sincos))
3795 |         f_r = f_r.reshape((len(f_r) // time_smoothing,
3796 |                            time_smoothing * f_r.shape[1]))
3797 |         return f_r, n_fft
3798 | 
3799 |     def preprocess_train(list_of_data, random_state):
3800 |         f_r, n_fft = _pre(list_of_data)
3801 |         clusters = f_r
3802 |         return clusters
3803 | 
3804 |     def apply_preprocess(list_of_data, clusters):
3805 |         f_r, n_fft = _pre(list_of_data)
3806 |         memberships, distances = vq(f_r, clusters)
3807 |         vq_r = clusters[memberships]
3808 |         vq_r = vq_r.reshape((time_smoothing * len(vq_r),
3809 |                              vq_r.shape[1] // time_smoothing))
3810 |         f_mag = vq_r[:, :n_fft // 2 + 1]
3811 |         f_sincos = vq_r[:, n_fft // 2 + 1:]
3812 |         extent = f_sincos.shape[1] // 2
3813 |         f_phs = sin_cos_to_angle(f_sincos[:, :extent], f_sincos[:, extent:])
3814 |         vq_c = abs_and_angle_to_complex(f_mag, f_phs)
3815 |         d_k = istft(vq_c, fftsize=n_fft)
3816 |         return d_k
3817 | 
3818 |     random_state = np.random.RandomState(1999)
3819 | 
3820 |     """
3821 |     fs, d = fetch_sample_music()
3822 |     sub = int(.8 * d.shape[0])
3823 |     d1 = [d[:sub]]
3824 |     d2 = [d[sub:]]
3825 |     """
3826 | 
3827 |     fs, d = fetch_sample_speech_fruit()
3828 |     d1 = d[::8] + d[1::8] + d[2::8] + d[3::8] + d[4::8] + d[5::8] + d[6::8]
3829 |     d2 = d[7::8]
3830 |     # make sure d1 and d2 aren't the same!
3831 |     assert [len(di) for di in d1] != [len(di) for di in d2]
3832 | 
3833 |     clusters = preprocess_train(d1, random_state)
3834 |     # Training data
3835 |     vq_d1 = apply_preprocess(d1, clusters)
3836 |     vq_d2 = apply_preprocess(d2, clusters)
3837 |     assert [i != j for i, j in zip(vq_d1.ravel(), vq_d2.ravel())]
3838 | 
3839 |     fix_d1 = np.concatenate(d1)
3840 |     fix_d2 = np.concatenate(d2)
3841 | 
3842 |     wavfile.write("fft_train_no_agc.wav", fs, soundsc(fix_d1))
3843 |     wavfile.write("fft_test_no_agc.wav", fs, soundsc(fix_d2))
3844 |     wavfile.write("fft_vq_train_no_agc.wav", fs, soundsc(vq_d1, fs))
3845 |     wavfile.write("fft_vq_test_no_agc.wav", fs, soundsc(vq_d2, fs))
3846 | 
3847 |     agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
3848 |     agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
3849 |     agc_vq_d1, freq_vq_d1, energy_vq_d1 = time_attack_agc(vq_d1, fs, .5, 5)
3850 |     agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)
3851 | 
3852 |     wavfile.write("fft_train_agc.wav", fs, soundsc(agc_d1))
3853 |     wavfile.write("fft_test_agc.wav", fs, soundsc(agc_d2))
3854 |     wavfile.write("fft_vq_train_agc.wav", fs, soundsc(agc_vq_d1, fs))
3855 |     wavfile.write("fft_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
3856 | 
3857 | 
3858 | def run_dct_vq_example():
3859 |     def _pre(list_of_data):
3860 |         # Temporal window setting is crucial! - 512 seems OK for music, 256
3861 |         # fruit perhaps due to samplerates
3862 |         n_dct = 512
3863 |         f_r = np.vstack([mdct_slow(dd, n_dct) for dd in list_of_data])
3864 |         return f_r, n_dct
3865 | 
3866 |     def preprocess_train(list_of_data, random_state):
3867 |         f_r, n_dct = _pre(list_of_data)
3868 |         clusters = f_r
3869 |         return clusters
3870 | 
3871 |     def apply_preprocess(list_of_data, clusters):
3872 |         f_r, n_dct = _pre(list_of_data)
3873 |         f_clust = f_r
3874 |         memberships, distances = vq(f_clust, clusters)
3875 |         vq_r = clusters[memberships]
3876 |         d_k = imdct_slow(vq_r, n_dct)
3877 |         return d_k
3878 | 
3879 |     random_state = np.random.RandomState(1999)
3880 | 
3881 |     # This doesn't work very well due to only taking a sample from the end as
3882 |     # test
3883 |     fs, d = fetch_sample_music()
3884 |     sub = int(.8 * d.shape[0])
3885 |     d1 = [d[:sub]]
3886 |     d2 = [d[sub:]]
3887 | 
3888 |     """
3889 |     fs, d = fetch_sample_speech_fruit()
3890 |     d1 = d[::8] + d[1::8] + d[2::8] + d[3::8] + d[4::8] + d[5::8] + d[6::8]
3891 |     d2 = d[7::8]
3892 |     # make sure d1 and d2 aren't the same!
3893 |     assert [len(di) for di in d1] != [len(di) for di in d2]
3894 |     """
3895 | 
3896 |     clusters = preprocess_train(d1, random_state)
3897 |     # Training data
3898 |     vq_d1 = apply_preprocess(d1, clusters)
3899 |     vq_d2 = apply_preprocess(d2, clusters)
3900 |     assert [i != j for i, j in zip(vq_d2.ravel(), vq_d2.ravel())]
3901 | 
3902 |     fix_d1 = np.concatenate(d1)
3903 |     fix_d2 = np.concatenate(d2)
3904 | 
3905 |     wavfile.write("dct_train_no_agc.wav", fs, soundsc(fix_d1))
3906 |     wavfile.write("dct_test_no_agc.wav", fs, soundsc(fix_d2))
3907 |     wavfile.write("dct_vq_train_no_agc.wav", fs, soundsc(vq_d1))
3908 |     wavfile.write("dct_vq_test_no_agc.wav", fs, soundsc(vq_d2))
3909 | 
3910 |     """
3911 |     import matplotlib.pyplot as plt
3912 |     plt.specgram(vq_d2, cmap="gray")
3913 |     plt.figure()
3914 |     plt.specgram(fix_d2, cmap="gray")
3915 |     plt.show()
3916 |     """
3917 | 
3918 |     agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
3919 |     agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
3920 |     agc_vq_d1, freq_vq_d1, energy_vq_d1 = time_attack_agc(vq_d1, fs, .5, 5)
3921 |     agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)
3922 | 
3923 |     wavfile.write("dct_train_agc.wav", fs, soundsc(agc_d1))
3924 |     wavfile.write("dct_test_agc.wav", fs, soundsc(agc_d2))
3925 |     wavfile.write("dct_vq_train_agc.wav", fs, soundsc(agc_vq_d1))
3926 |     wavfile.write("dct_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
3927 | 
3928 | 
3929 | def run_phase_reconstruction_example():
3930 |     fs, d = fetch_sample_speech_tapestry()
3931 |     # actually gives however many components you say! So double what .m file
3932 |     # says
3933 |     fftsize = 512
3934 |     step = 64
3935 |     X_s = np.abs(stft(d, fftsize=fftsize, step=step, real=False,
3936 |                       compute_onesided=False))
3937 |     X_t = iterate_invert_spectrogram(X_s, fftsize, step, verbose=True)
3938 | 
3939 |     """
3940 |     import matplotlib.pyplot as plt
3941 |     plt.specgram(d, cmap="gray")
3942 |     plt.savefig("1.png")
3943 |     plt.close()
3944 |     plt.imshow(X_s, cmap="gray")
3945 |     plt.savefig("2.png")
3946 |     plt.close()
3947 |     """
3948 | 
3949 |     wavfile.write("phase_original.wav", fs, soundsc(d))
3950 |     wavfile.write("phase_reconstruction.wav", fs, soundsc(X_t))
3951 | 
3952 | 
3953 | def run_phase_vq_example():
3954 |     def _pre(list_of_data):
3955 |         # Temporal window setting is crucial! - 512 seems OK for music, 256
3956 |         # fruit perhaps due to samplerates
3957 |         n_fft = 256
3958 |         step = 32
3959 |         f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False,
3960 |                                 compute_onesided=False))
3961 |                          for dd in list_of_data])
3962 |         return f_r, n_fft, step
3963 | 
3964 |     def preprocess_train(list_of_data, random_state):
3965 |         f_r, n_fft, step = _pre(list_of_data)
3966 |         clusters = copy.deepcopy(f_r)
3967 |         return clusters
3968 | 
3969 |     def apply_preprocess(list_of_data, clusters):
3970 |         f_r, n_fft, step = _pre(list_of_data)
3971 |         f_clust = f_r
3972 |         # Nondeterministic ?
3973 |         memberships, distances = vq(f_clust, clusters)
3974 |         vq_r = clusters[memberships]
3975 |         d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
3976 |         return d_k
3977 | 
3978 |     random_state = np.random.RandomState(1999)
3979 | 
3980 |     fs, d = fetch_sample_speech_fruit()
3981 |     d1 = d[::9]
3982 |     d2 = d[7::8][:5]
3983 |     # make sure d1 and d2 aren't the same!
3984 |     assert [len(di) for di in d1] != [len(di) for di in d2]
3985 | 
3986 |     clusters = preprocess_train(d1, random_state)
3987 |     fix_d1 = np.concatenate(d1)
3988 |     fix_d2 = np.concatenate(d2)
3989 |     vq_d2 = apply_preprocess(d2, clusters)
3990 | 
3991 |     wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
3992 |     wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))
3993 | 
3994 |     agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
3995 |     agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
3996 |     agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)
3997 | 
3998 |     """
3999 |     import matplotlib.pyplot as plt
4000 |     plt.specgram(agc_vq_d2, cmap="gray")
4001 |     #plt.title("Fake")
4002 |     plt.figure()
4003 |     plt.specgram(agc_d2, cmap="gray")
4004 |     #plt.title("Real")
4005 |     plt.show()
4006 |     """
4007 | 
4008 |     wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
4009 |     wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
4010 |     wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
4011 | 
4012 | 
4013 | def run_cqt_example():
4014 |     try:
4015 |         fs, d = fetch_sample_file("/Users/User/cqt_resources/kempff1.wav")
4016 |     except ValueError:
4017 |         print("WARNING: Using sample music instead but kempff1.wav is the example")
4018 |         fs, d = fetch_sample_music()
4019 |     X = d[:44100]
4020 |     X_cq, c_dc, c_nyq, multiscale, shift, window_lens = cqt(X, fs)
4021 |     X_r = icqt(X_cq, c_dc, c_nyq, multiscale, shift, window_lens)
4022 |     SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
4023 |     wavfile.write("cqt_original.wav", fs, soundsc(X))
4024 |     wavfile.write("cqt_reconstruction.wav", fs, soundsc(X_r))
4025 | 
4026 | 
4027 | def run_fft_dct_example():
4028 |     random_state = np.random.RandomState(1999)
4029 | 
4030 |     fs, d = fetch_sample_speech_fruit()
4031 |     n_fft = 64
4032 |     X = d[0]
4033 |     X_stft = stft(X, n_fft)
4034 |     X_rr = complex_to_real_view(X_stft)
4035 |     X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
4036 |     X_dct_sub = X_dct[1:] - X_dct[:-1]
4037 |     std = X_dct_sub.std(axis=0, keepdims=True)
4038 |     X_dct_sub += .01 * std * random_state.randn(
4039 |         X_dct_sub.shape[0], X_dct_sub.shape[1])
4040 |     X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
4041 |     X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
4042 |     X_irr = real_to_complex_view(X_idct)
4043 |     X_r = istft(X_irr, n_fft)[:len(X)]
4044 | 
4045 |     SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
4046 |     print(SNR)
4047 | 
4048 |     wavfile.write("fftdct_orig.wav", fs, soundsc(X))
4049 |     wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
4050 | 
4051 | 
4052 | def run_world_example():
4053 |     fs, d = fetch_sample_speech_tapestry()
4054 |     d = d.astype("float32") / 2 ** 15
4055 |     temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
4056 |     temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
4057 |             temporal_positions_h, f0_h, vuv_h)
4058 |     temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
4059 |             temporal_positions_h, f0_h, vuv_h)
4060 |     #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, spectrogram_ct, fs_ct)
4061 |     y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs_ct)
4062 |     wavfile.write("out.wav", fs, soundsc(y))
4063 | 
4064 | 
4065 | def run_mgc_example():
4066 |     import matplotlib.pyplot as plt
4067 |     fs, x = wavfile.read("test16k.wav")
4068 |     pos = 3000
4069 |     fftlen = 1024
4070 |     win = np.blackman(fftlen) / np.sqrt(np.sum(np.blackman(fftlen) ** 2))
4071 |     xw = x[pos:pos + fftlen] * win
4072 |     sp = 20 * np.log10(np.abs(np.fft.rfft(xw)))
4073 |     mgc_order = 20
4074 |     mgc_alpha = 0.41
4075 |     mgc_gamma = -0.35
4076 |     mgc_arr = win2mgc(xw, order=mgc_order, alpha=mgc_alpha, gamma=mgc_gamma, verbose=True)
4077 |     xwsp = 20 * np.log10(np.abs(np.fft.rfft(xw)))
4078 |     sp = mgc2sp(mgc_arr, mgc_alpha, mgc_gamma, fftlen)
4079 |     plt.plot(xwsp)
4080 |     plt.plot(20. / np.log(10) * np.real(sp), "r")
4081 |     plt.xlim(1, len(xwsp))
4082 |     plt.show()
4083 | 
4084 | 
4085 | def run_world_mgc_example():
4086 |     fs, d = fetch_sample_speech_tapestry()
4087 |     d = d.astype("float32") / 2 ** 15
4088 | 
4089 |     # harcoded for 16k from
4090 |     # https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world/extract_features_for_merlin.sh
4091 |     mgc_alpha = 0.58
4092 |     #mgc_order = 59
4093 |     mgc_order = 59
4094 |     # this is actually just mcep
4095 |     mgc_gamma = 0.0
4096 | 
4097 |     #from sklearn.externals import joblib
4098 |     #mem = joblib.Memory("/tmp")
4099 |     #mem.clear()
4100 | 
4101 |     def enc():
4102 |         temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
4103 |         temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
4104 |                 temporal_positions_h, f0_h, vuv_h)
4105 |         temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
4106 |                 temporal_positions_h, f0_h, vuv_h)
4107 | 
4108 |         mgc_arr = sp2mgc(spectrogram_ct, mgc_order, mgc_alpha, mgc_gamma,
4109 |                 verbose=True)
4110 |         return mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c
4111 | 
4112 | 
4113 |     mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
4114 |     sp_r = mgc2sp(mgc_arr, mgc_alpha, mgc_gamma, fs=fs, verbose=True)
4115 | 
4116 |     """
4117 |     import matplotlib.pyplot as plt
4118 |     plt.imshow(20 * np.log10(sp_r))
4119 |     plt.figure()
4120 |     plt.imshow(20 * np.log10(spectrogram_ct))
4121 |     plt.show()
4122 |     raise ValueError()
4123 |     """
4124 | 
4125 |     y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, sp_r, fs)
4126 |     #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
4127 |     wavfile.write("out_mgc.wav", fs, soundsc(y))
4128 | 
4129 | 
4130 | def get_frame(signal, winsize, no):
4131 |     shift = winsize//2
4132 |     start = no*shift
4133 |     end = start+winsize
4134 |     return signal[start:end]
4135 | 
4136 | 
4137 | class LTSD():
4138 |     """
4139 |     LTSD VAD code from jfsantos
4140 |     """
4141 |     def __init__(self,winsize,window,order):
4142 |         self.winsize = int(winsize)
4143 |         self.window = window
4144 |         self.order = order
4145 |         self.amplitude = {}
4146 | 
4147 |     def get_amplitude(self,signal,l):
4148 |         if self.amplitude.has_key(l):
4149 |             return self.amplitude[l]
4150 |         else:
4151 |             amp = sp.absolute(sp.fft(get_frame(signal, self.winsize,l) * self.window))
4152 |             self.amplitude[l] = amp
4153 |             return amp
4154 | 
4155 |     def compute_noise_avg_spectrum(self, nsignal):
4156 |         windownum = int(len(nsignal)//(self.winsize//2) - 1)
4157 |         avgamp = np.zeros(self.winsize)
4158 |         for l in range(windownum):
4159 |             avgamp += sp.absolute(sp.fft(get_frame(nsignal, self.winsize,l) * self.window))
4160 |         return avgamp/float(windownum)
4161 | 
4162 |     def compute(self,signal):
4163 |         self.windownum = int(len(signal)//(self.winsize//2) - 1)
4164 |         ltsds = np.zeros(self.windownum)
4165 |         #Calculate the average noise spectrum amplitude based 20 frames in the head parts of input signal.
4166 |         self.avgnoise = self.compute_noise_avg_spectrum(signal[0:self.winsize*20])**2
4167 |         for l in range(self.windownum):
4168 |             ltsds[l] = self.ltsd(signal,l,5)
4169 |         return ltsds
4170 | 
4171 |     def ltse(self,signal,l,order):
4172 |         maxamp = np.zeros(self.winsize)
4173 |         for idx in range(l-order,l+order+1):
4174 |             amp = self.get_amplitude(signal,idx)
4175 |             maxamp = np.maximum(maxamp,amp)
4176 |         return maxamp
4177 | 
4178 |     def ltsd(self,signal,l,order):
4179 |         if l < order or l+order >= self.windownum:
4180 |             return 0
4181 |         return 10.0 * np.log10(np.sum(self.ltse(signal,l,order)**2/self.avgnoise)/float(len(self.avgnoise)))
4182 | 
4183 | 
4184 | def ltsd_vad(x, fs, threshold=9, winsize=8192):
4185 |     # winsize based on sample rate
4186 |     # 1024 for fs = 16000
4187 |     orig_dtype = x.dtype
4188 |     orig_scale_min = x.min()
4189 |     orig_scale_max = x.max()
4190 |     x = (x - x.min()) / (x.max() - x.min())
4191 |     # works with 16 bit
4192 |     x = x * (2 ** 15)
4193 |     x = x.astype("int32")
4194 |     window = sp.hanning(winsize)
4195 |     ltsd = LTSD(winsize, window, 5)
4196 |     s_vad = ltsd.compute(x)
4197 |     # LTSD is 50% overlap, so each "step" covers 4096 samples
4198 |     # +1 to cover the extra edge window
4199 |     n_samples = int(((len(s_vad) + 1) * winsize) // 2)
4200 |     time_s = n_samples / float(fs)
4201 |     time_points = np.linspace(0, time_s, len(s_vad))
4202 |     time_samples = (fs * time_points).astype(np.int32)
4203 |     time_samples = time_samples
4204 |     f_vad = np.zeros_like(x, dtype=np.bool)
4205 |     offset = winsize
4206 |     for n, (ss, es) in enumerate(zip(time_samples[:-1], time_samples[1:])):
4207 |         sss = ss - offset
4208 |         if sss < 0:
4209 |             sss = 0
4210 |         ses = es - offset
4211 |         if ses < 0:
4212 |             ses = 0
4213 |         if s_vad[n + 1] < threshold:
4214 |             f_vad[sss:ses] = False
4215 |         else:
4216 |             f_vad[sss:ses] = True
4217 |     f_vad[ses:] = False
4218 |     x = x.astype("float64")
4219 |     x = x / float(2 ** 15)
4220 |     x = x * (orig_scale_max - orig_scale_min) + orig_scale_min
4221 |     x = x.astype(orig_dtype)
4222 |     return x[f_vad], f_vad
4223 | 
4224 | 
4225 | def run_ltsd_example():
4226 |     fs, d = fetch_sample_speech_tapestry()
4227 |     winsize = 1024
4228 |     d = d.astype("float32") / 2 ** 15
4229 |     d -= d.mean()
4230 | 
4231 |     pad = 3 * fs
4232 |     noise_pwr = np.percentile(d, 1) ** 2
4233 |     noise_pwr = max(1E-9, noise_pwr)
4234 |     d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
4235 |     _, vad_segments = ltsd_vad(d, fs, winsize=winsize)
4236 |     v_up = np.where(vad_segments == True)[0]
4237 |     s = v_up[0]
4238 |     st = v_up[-1] + int(.5 * fs)
4239 |     d = d[s:st]
4240 | 
4241 |     bname = "tapestry.wav".split(".")[0]
4242 |     wavfile.write("%s_out.wav" % bname, fs, soundsc(d))
4243 | 
4244 | 
4245 | if __name__ == "__main__":
4246 |     run_ltsd_example()
4247 |     """
4248 |     Trying to run all examples will seg fault on my laptop - probably memory!
4249 |     Comment individually
4250 |     run_ltsd_example()
4251 |     run_world_mgc_example()
4252 |     run_world_example()
4253 |     run_mgc_example()
4254 |     run_phase_reconstruction_example()
4255 |     run_phase_vq_example()
4256 |     run_dct_vq_example()
4257 |     run_fft_vq_example()
4258 |     run_lpc_example()
4259 |     run_cqt_example()
4260 |     run_fft_dct_example()
4261 |     test_all()
4262 |     """
4263 | 


--------------------------------------------------------------------------------