├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── python
    ├── hpss.py
    ├── hpss_test.py
    ├── stretch.py
    └── stretch_test.py
└── src
    ├── WavIO.hpp
    ├── fft_utils.h
    ├── hpss.cpp
    ├── hpss.h
    ├── hpss_test.cpp
    ├── stretch.cpp
    ├── stretch.h
    └── stretch_test.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # cpp artifacts
 2 | build/
 3 | 
 4 | # python artifacts
 5 | __pycache__
 6 | 
 7 | # extras
 8 | *.wav
 9 | .vscode/
10 | .idea/
11 | .DS_Store
12 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.1)
 2 | project(time-stretch)
 3 | set(CMAKE_CXX_STANDARD 17)
 4 | 
 5 | include(FindPkgConfig)
 6 | 
 7 | add_library(time_stretch STATIC
 8 |     src/hpss.cpp
 9 |     src/stretch.cpp
10 | )
11 | 
12 | # Find requirements
13 | if(TIMESTRETCH_USING_JUCE)
14 |     message(STATUS "time-stretch using JUCE")
15 |     target_compile_definitions(time_stretch PUBLIC TIMESTRETCH_USING_JUCE=1)
16 | else()
17 |     message(STATUS "time-stretch using FFTW")
18 |     pkg_check_modules(FFTW REQUIRED fftw3)
19 |     target_include_directories(time_stretch PUBLIC ${FFTW_INCLUDE_DIRS})
20 |     target_link_directories(time_stretch PUBLIC ${FFTW_LIBRARY_DIRS})
21 |     target_link_libraries(time_stretch PUBLIC fftw3f)
22 | endif()
23 | 
24 | if(BUILD_TESTS)
25 |     message(STATUS "Building tests for time-stretch library")
26 | 
27 |     pkg_check_modules(SNDFILE REQUIRED sndfile>=1.0.2)
28 | 
29 |     # test executable for hpss part
30 |     add_executable(hpss src/hpss_test.cpp)
31 |     target_include_directories(hpss PUBLIC ${SNDFILE_INCLUDE_DIRS})
32 |     target_link_directories(hpss PUBLIC ${SNDFILE_LIBRARY_DIRS})
33 |     target_link_libraries(hpss PUBLIC time_stretch ${SNDFILE_LIBRARIES})
34 | 
35 |     # test executable for time stretching
36 |     add_executable(stretch src/stretch_test.cpp)
37 |     target_include_directories(stretch PUBLIC ${SNDFILE_INCLUDE_DIRS})
38 |     target_link_directories(stretch PUBLIC ${SNDFILE_LIBRARY_DIRS})
39 |     target_link_libraries(stretch PUBLIC time_stretch ${SNDFILE_LIBRARIES})
40 | endif()
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, jatinchowdhury18
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Time Stretcher
 2 | 
 3 | C++ audio time-stretching implementation, based on the algorithms
 4 | presented in:
 5 | - [Audio Time Stretching with an Adaptive Phase Vocoder](http://www.pitchtech.ch/Confs/ICASSP2017/0000716.pdf), Nicolas Juillerat and Beat Hirsbrunner (ICASP 2017)
 6 | - [Harmonic/Percussive Separation using Median Filtering](https://arrow.tudublin.ie/cgi/viewcontent.cgi?article=1078&context=argcon), Derry Fitgerald (DAFx 2010)
 7 | 
 8 | ## Dependencies
 9 | Building the time-stretching library requires FFTW.
10 | Building the library tests requires libsndfile.
11 | 
12 | With apt:
13 | ```bash
14 | sudo apt-get install fftw3
15 | sudo apt-get install libsndfile
16 | ```
17 | 
18 | Or with HomeBrew:
19 | ```bash
20 | brew install fftw3
21 | brew install libsndfile
22 | ```
23 | 
24 | ## Building Tests
25 | Building the code requires CMake.
26 | ```bash
27 | cmake -Bbuild -DBUILD_TESTS=ON
28 | cmake --build build --config Release
29 | ```
30 | 
31 | `./build/hpss` can be used to test the
32 | harmonic/percussive source separation algorithm.
33 | `./build/stretch` can be used to test the
34 | time-stretching algorithm. Use the `--help`
35 | flag for more information about how to use each test.
36 | 
37 | ## License
38 | The code in this repository is licensed under the BSD 3-clause license.
39 | 
40 | Enjoy!
41 | 


--------------------------------------------------------------------------------
/python/hpss.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.signal import windows
 3 | 
 4 | eps = np.finfo(np.float32).eps
 5 | 
 6 | def median_filter(x, kernel_size):
 7 |     pad = (kernel_size - 1) // 2
 8 |     x_pad = np.concatenate((np.zeros(pad), x, np.zeros(pad)))
 9 |     y = np.zeros_like(x)
10 |     for i in range(len(y)):
11 |         median_arg = np.argsort(np.abs(x_pad[i : i + kernel_size]))[pad]
12 |         y[i] = x_pad[i + median_arg]
13 | 
14 |     return y
15 | 
16 | def spectrogram(x, fft_size, hop_size, zero_pad=1):
17 |     S = None
18 |     for i in range(0, len(x), hop_size):
19 |         x_win = x[i : i + fft_size]
20 |         x_pad = np.zeros(fft_size * zero_pad)
21 |         x_pad[:len(x_win)] = x_win
22 | 
23 |         if S is None:
24 |             S = np.array([np.fft.fft(x_pad)])
25 |         else:
26 |             S = np.append(S, np.array([np.fft.fft(x_pad)]), axis=0)
27 | 
28 |     return S
29 | 
30 | def hpss(x, perc_kernel=17, harm_kernel=17, mask_power=2, fft_size=4096, hop_size=1024, zero_pad=2):
31 |     ''' Simple harmonic/percussive source separation based on median filter method '''
32 | 
33 |     print('Computing HPSS...')
34 |     print('\tComputing STFTs...')
35 |     S = spectrogram(x, fft_size, hop_size, zero_pad)
36 | 
37 |     # percussive signal
38 |     print('\tSeparating percussive signal...')
39 |     P = np.copy(S)
40 |     for i in range(S.shape[0]):
41 |         P[i, :] = median_filter(np.abs(S[i, :]), kernel_size=perc_kernel)
42 | 
43 |     # harmonic signal
44 |     print('\tSeparating harmonic signal...')
45 |     H = np.copy(S)
46 |     for h in range(S.shape[1]):
47 |         H[:, h] = median_filter(np.abs(S[:, h]), kernel_size=harm_kernel)
48 | 
49 |     # create filter masks
50 |     print('\tCreating filter masks...')
51 |     M_H = np.copy(S)
52 |     M_P = np.copy(S)
53 |     for i in range(S.shape[0]):
54 |         for h in range(S.shape[1]):
55 |             H_p = H[i,h]**mask_power
56 |             P_p = P[i,h]**mask_power
57 |             denom = H_p + P_p + eps
58 | 
59 |             M_H[i, h] = H_p / denom
60 |             M_P[i, h] = P_p / denom
61 | 
62 |     H_hat = np.multiply(S, M_H)
63 |     P_hat = np.multiply(S, M_P)
64 | 
65 |     print('\tComputing time-domain signal...')
66 |     h_sig = np.zeros_like(x)
67 |     p_sig = np.zeros_like(x)
68 |     for i in range(S.shape[0]):
69 |         start_idx = int(i * hop_size)
70 |         n_samples = min(fft_size, len(x) - start_idx)
71 |         win = windows.hann(fft_size)[:n_samples] / ((fft_size // hop_size) // 2)
72 |         h_sig[start_idx : start_idx + fft_size] += win * np.real(np.fft.ifft(H_hat[i,:])[:n_samples])
73 |         p_sig[start_idx : start_idx + fft_size] += win * np.real(np.fft.ifft(P_hat[i,:])[:n_samples])
74 | 
75 |     return h_sig, p_sig
76 | 


--------------------------------------------------------------------------------
/python/hpss_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.io import wavfile
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from hpss import hpss
 6 | 
 7 | START_SECONDS = 125
 8 | NUM_SECONDS = 5
 9 | TEST_FILE = '/Users/jachowdhury/Downloads/Tennyson - Old Singles/Tennyson - Old Singles - 01 All Yours.wav'
10 | 
11 | fs, x = wavfile.read(TEST_FILE)
12 | x = (np.transpose(x) / np.max(np.abs(x))).astype(np.float32)
13 | 
14 | start_idx = int(START_SECONDS * fs)
15 | n_samples = int(NUM_SECONDS * fs)
16 | ref_signal = x[:,start_idx : start_idx + n_samples]
17 | wavfile.write('ref.wav', fs, np.transpose(ref_signal))
18 | 
19 | h_signal = np.zeros_like(ref_signal)
20 | p_signal = np.zeros_like(ref_signal)
21 | h_signal[0], p_signal[0] = hpss(ref_signal[0])
22 | h_signal[1], p_signal[1] = hpss(ref_signal[1])
23 | 
24 | wavfile.write('harmonic.wav', fs, np.transpose(h_signal))
25 | wavfile.write('percussive.wav', fs, np.transpose(p_signal))
26 | wavfile.write('sum.wav', fs, np.transpose(h_signal + p_signal))
27 | 


--------------------------------------------------------------------------------
/python/stretch.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.signal import windows
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | from hpss import hpss
  6 | 
  7 | def next_pow_2(x):
  8 |     return int(2**(np.ceil(np.log2(x))))
  9 | 
 10 | def phase_propagation(S, Fs, Ha, Hs, N):
 11 |     def p_arg(x):
 12 |         return (x + 0.5) % 1.0 - 0.5
 13 | 
 14 |     k = np.arange(S.shape[1])
 15 |     omega = k * Fs / N
 16 | 
 17 |     delta_T = Ha / Fs
 18 |     phi = np.angle(S) / (2 * np.pi) + 0.5
 19 | 
 20 |     phi_mod = np.copy(phi)
 21 |     for m in range(S.shape[0] - 1):
 22 |         F_if = omega + p_arg(phi[m+1] - (phi[m] + omega * delta_T)) / delta_T
 23 |         phi_mod[m+1] = p_arg(phi_mod[m] + F_if * Hs / Fs) + 0.5
 24 | 
 25 |     return phi_mod
 26 | 
 27 | def spectrogram(x, fft_size, hop_size, zero_pad=1):
 28 |     S = None
 29 |     win = np.sqrt(windows.hann(fft_size))
 30 |     for i in range(0, len(x), hop_size):
 31 |         x_win = np.copy(x[i : i + fft_size])
 32 |         x_win *= win[:len(x_win)]
 33 |         x_pad = np.zeros(fft_size * zero_pad)
 34 |         x_pad[:len(x_win)] = x_win
 35 | 
 36 |         if S is None:
 37 |             S = np.array([np.fft.fft(x_pad)])
 38 |         else:
 39 |             S = np.append(S, np.array([np.fft.fft(x_pad)]), axis=0)
 40 | 
 41 |     return S
 42 | 
 43 | def reconstruct(S, hop_size, window_size, L):
 44 |     y = np.zeros(L)
 45 |     win = np.sqrt(windows.hann(window_size))
 46 |     for i in range(S.shape[0]):
 47 |         start_idx = int(i * hop_size)
 48 |         n_samples = min(window_size, L - start_idx)
 49 |         y[start_idx : start_idx + window_size] += win[:n_samples] * np.real(np.fft.ifft(S[i,:])[:n_samples])
 50 |     return y
 51 | 
 52 | def stretch(x, fs, stretch_factor):
 53 |     window_size_sec = 0.1 # 100 milliseconds
 54 |     window_size = next_pow_2(window_size_sec * fs)
 55 |     short_window_size = next_pow_2(0.005 * fs)
 56 |     
 57 |     Hs = window_size // 2 # synthesis hop size
 58 |     Ha = int(float(Hs) / stretch_factor)
 59 | 
 60 |     Hs_short = short_window_size // 2 # synthesis hop size
 61 |     Ha_short = int(float(Hs_short) / stretch_factor)
 62 | 
 63 |     print('Computing mono reference phases...')
 64 |     x_sum = np.sum(x, axis=0) / 2.0
 65 |     X_sum = spectrogram(x_sum, window_size, Ha, zero_pad=1)
 66 |     phase_mods = phase_propagation(X_sum, fs, Ha, Hs, window_size)
 67 | 
 68 |     stretch_len = int(len(x[0]) * stretch_factor) + 5000
 69 |     y = np.zeros((2, stretch_len))
 70 |     for ch in range(x.shape[0]):
 71 |         print(f'Processing channel {ch}...')
 72 |         h_signal, p_signal = hpss(x[ch])
 73 | 
 74 |         print('Performing time-stretching...')
 75 |         H_full = spectrogram(h_signal, window_size, Ha)
 76 |         P_full = spectrogram(p_signal, window_size, Ha)
 77 | 
 78 |         print('\tSeparated magnitude-only PV...')
 79 |         h_x_long = reconstruct(H_full, Hs, window_size, stretch_len)
 80 |         P_short = spectrogram(p_signal, short_window_size, Ha_short)
 81 |         p_x_short = reconstruct(P_short, Hs_short, short_window_size, stretch_len)
 82 | 
 83 |         print('\tApplying reference phases...')
 84 |         H_full = np.multiply(np.abs(H_full), np.exp(1j * phase_mods))
 85 |         P_full = np.multiply(np.abs(P_full), np.exp(1j * phase_mods))
 86 | 
 87 |         print('\tReconstructing references...')
 88 |         h_v = reconstruct(H_full, Hs, window_size, stretch_len)
 89 |         p_v = reconstruct(P_full, Hs, window_size, stretch_len)
 90 | 
 91 |         print('\tPerforming magnitude correction...')
 92 |         H_v_long = spectrogram(h_v, window_size, Ha)
 93 |         P_v_short = spectrogram(p_v, short_window_size, Ha_short)
 94 |         
 95 |         H_w_long = spectrogram(h_x_long, window_size, Ha)
 96 |         P_w_short = spectrogram(p_x_short, short_window_size, Ha_short)
 97 | 
 98 |         H_y = np.multiply(np.abs(H_w_long), np.exp(1j * np.angle(H_v_long)))
 99 |         P_y = np.multiply(np.abs(P_w_short), np.exp(1j * np.angle(P_v_short)))
100 | 
101 |         print('\tReconstructing final signal...')
102 |         h_y = reconstruct(H_y, Ha, window_size, stretch_len)
103 |         p_y = reconstruct(P_y, Ha_short, short_window_size, stretch_len)
104 | 
105 |         print('\tNormalizing separated signal...')
106 |         h_mag = np.max(np.abs(h_signal))
107 |         p_mag = np.max(np.abs(p_signal))
108 |         h_y *= (h_mag / np.max(np.abs(h_y)))
109 |         p_y *= (p_mag / np.max(np.abs(p_y)))
110 | 
111 |         y[ch] = h_y + p_y
112 | 
113 |     # normalize if needed...
114 |     # mag = np.max(np.abs(y))
115 |     # print(f'Original Magnitude {np.max(np.abs(x))}')
116 |     # print(f'Stretched Magnitude {mag}')
117 |     # if mag > 1.0:
118 |     #     y /= mag
119 | 
120 |     return y
121 | 


--------------------------------------------------------------------------------
/python/stretch_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.io import wavfile
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | from stretch import stretch
 6 | 
 7 | START_SECONDS = 125
 8 | NUM_SECONDS = 10
 9 | TEST_FILE = '/Users/jachowdhury/Downloads/Tennyson - Old Singles/Tennyson - Old Singles - 01 All Yours.wav'
10 | STRETCH_AMOUNT = 0.5
11 | 
12 | fs, x = wavfile.read(TEST_FILE)
13 | x = (np.transpose(x) / np.max(np.abs(x))).astype(np.float32)
14 | start_idx = int(START_SECONDS * fs)
15 | n_samples = int(NUM_SECONDS * fs)
16 | ref_signal = x[:,start_idx : start_idx + n_samples]
17 | 
18 | # fs = 44100
19 | # N = int(NUM_SECONDS * fs)
20 | # FREQ = 50
21 | # ref_signal = np.zeros((2, N))
22 | # for ch in range(2):
23 | #     ref_signal[ch] = np.sin(2 * np.pi * np.arange(N) * FREQ / fs)
24 | 
25 | wavfile.write('ref.wav', fs, np.transpose(ref_signal))
26 | 
27 | stretch_signal = stretch(ref_signal, fs, STRETCH_AMOUNT)
28 | 
29 | wavfile.write(f'stretch_{STRETCH_AMOUNT}.wav', fs, np.transpose(stretch_signal))
30 | 
31 | # plt.plot(ref_signal[0])
32 | # plt.plot(np.sin(2 * np.pi * np.arange(len(stretch_signal[0])) * FREQ / fs))
33 | # plt.plot(stretch_signal[0])
34 | # plt.show()
35 | 


--------------------------------------------------------------------------------
/src/WavIO.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <sndfile.h>
 4 | #include <string>
 5 | #include <iostream>
 6 | #include <vector>
 7 | 
 8 | /**
 9 |  * Utility functions for loading .wav files
10 |  * into 2D vectors using libsndfile.
11 |  */ 
12 | namespace WavIO
13 | {
14 | 
15 | using Vec2d = std::vector<std::vector<float>>;
16 | using SND_PTR = std::unique_ptr<SNDFILE, decltype(&sf_close)>;
17 | 
18 | Vec2d load_file (const char* file, SF_INFO& sf_info)
19 | {
20 |     std::cout << "Loading file: " << file << std::endl;
21 | 
22 |     SND_PTR wavFile { sf_open(file, SFM_READ, &sf_info), &sf_close };
23 | 
24 |     if (sf_info.frames == 0)
25 |     {
26 |         std::cout << "File could not be opened!" << std::endl;
27 |         exit (1);
28 |     }
29 | 
30 |     std::vector<float> readInterleaved(sf_info.channels * sf_info.frames, 0.0);
31 |     sf_readf_float(wavFile.get(), readInterleaved.data(), sf_info.frames);
32 | 
33 |     Vec2d audio (sf_info.channels, std::vector<float> (sf_info.frames, 0.0));
34 | 
35 |     // de-interleave channels
36 |     for (int i = 0; i < sf_info.frames; ++i)
37 |     {
38 |         int interleavedPtr = i * sf_info.channels;
39 |         for(size_t ch = 0; ch < sf_info.channels; ++ch)
40 |             audio[ch][i] = readInterleaved[interleavedPtr + ch];
41 |     }
42 | 
43 |     return audio;
44 | }
45 | 
46 | void write_file (const char* file, const Vec2d& audio, SF_INFO& sf_info)
47 | {
48 |     std::cout << "Writing to file: " << file << std::endl;
49 | 
50 |     const auto channels = (int) audio.size();
51 |     const auto frames = (sf_count_t) audio[0].size();
52 |     sf_info.frames = frames;
53 | 
54 |     SND_PTR wavFile { sf_open(file, SFM_WRITE, &sf_info), &sf_close };
55 |     std::vector<float> writeInterleaved(channels * frames, 0.0);
56 | 
57 |     // de-interleave channels
58 |     for (int i = 0; i < frames; ++i)
59 |     {
60 |         int interleavedPtr = i * channels;
61 |         for(int ch = 0; ch < channels; ++ch)
62 |             writeInterleaved[interleavedPtr + ch] = audio[ch][i];
63 |     }
64 | 
65 |     sf_writef_float(wavFile.get(), writeInterleaved.data(), frames);
66 | }
67 | 
68 | } // namespace WavIO
69 | 


--------------------------------------------------------------------------------
/src/fft_utils.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #if TIMESTRETCH_USING_JUCE
  4 | #define _USE_MATH_DEFINES
  5 | #include <juce_dsp/juce_dsp.h>
  6 | 
  7 | #else
  8 | #include <algorithm>
  9 | #include <complex> // need to include this before fftw for std::complex compatibility
 10 | #include <fftw3.h>
 11 | #endif
 12 | 
 13 | namespace fft_utils
 14 | {
 15 | 
 16 | static int next_pow2_log(int x)
 17 | {
 18 |     return (int) std::ceil(std::log2((double) x));
 19 | }
 20 | 
 21 | /** Finds the next power of two larger than the given number */
 22 | static int next_pow2(int x)
 23 | {
 24 |     return (int) std::pow(2.0f, (float) next_pow2_log(x));
 25 | }
 26 | 
 27 | #if TIMESTRETCH_USING_JUCE
 28 | using fftw_real_vec = std::vector<float>;
 29 | using fftw_complex_vec = std::vector<std::complex<float>>;
 30 | 
 31 | static void applyWindow(const std::vector<float>& data, const std::vector<float>& win, std::vector<float>& out)
 32 | {
 33 |     juce::FloatVectorOperations::multiply(out.data(), win.data(), data.data(), (int) win.size());
 34 | }
 35 | 
 36 | /** Helper struct for performing forward FFTs */
 37 | struct ForwardFFT
 38 | {
 39 |     std::vector<float> x_in;
 40 |     fftw_complex_vec Y_out;
 41 | 
 42 |     ForwardFFT(int n) : fft(next_pow2_log(n))
 43 |     {
 44 |         x_in.resize(n, 0.0f);
 45 |         x_tmp.resize(2 * n, 0.0f);
 46 |         Y_out.resize(n, { 0.0f, 0.0f });
 47 |     }
 48 | 
 49 |     void perform()
 50 |     {
 51 |         jassert ((int) x_tmp.size() == fft.getSize() * 2);
 52 | 
 53 |         std::copy(x_in.begin(), x_in.end(), x_tmp.begin());
 54 |         fft.performRealOnlyForwardTransform(x_tmp.data());
 55 | 
 56 |         for(int i = 0; i < (int) x_tmp.size(); i += 2)
 57 |             Y_out[i / 2] = std::complex<float> { x_tmp[i], x_tmp[i+1] };
 58 |     }
 59 | 
 60 | private:
 61 |     juce::dsp::FFT fft;
 62 |     std::vector<float> x_tmp;
 63 | };
 64 | 
 65 | /** Helper struct for performing inverse FFTs */
 66 | struct InverseFFT
 67 | {
 68 |     fftw_complex_vec X_in;
 69 |     std::vector<float> y_out;
 70 | 
 71 |     InverseFFT(int n) : fft(next_pow2_log(n))
 72 |     {
 73 |         X_in.resize(n, { 0.0f, 0.0f });
 74 |         x_tmp.resize(2 * n, 0.0f);
 75 |         y_out.resize(n, 0.0f);
 76 |     }
 77 | 
 78 |     void perform()
 79 |     {
 80 |         jassert ((int) x_tmp.size() == fft.getSize() * 2);
 81 | 
 82 |         for(int i = 0; i < (int) x_tmp.size(); i += 2)
 83 |         {
 84 |             x_tmp[i] = X_in[i / 2].real();
 85 |             x_tmp[i+1] = X_in[i / 2].imag();
 86 |         }
 87 | 
 88 |         fft.performRealOnlyInverseTransform(x_tmp.data());
 89 |         std::copy(x_tmp.begin(), x_tmp.begin() + y_out.size(), y_out.begin());
 90 |     }
 91 | 
 92 | private:
 93 |     juce::dsp::FFT fft;
 94 |     std::vector<float> x_tmp;
 95 | };
 96 | 
 97 | #else // use FFTW
 98 | /** Custom allocator for making vectors compatible with FFTW */
 99 | template<typename T>
100 | class fftw_allocator : public std::allocator<T>
101 | {
102 | public:
103 |     template <typename U>
104 |     struct rebind { typedef fftw_allocator<U> other; };
105 |     T* allocate(size_t n) { return (T*) fftwf_malloc(sizeof(T) * n); }
106 |     void deallocate(T* data, std::size_t size) { fftwf_free(data); }
107 | };
108 | 
109 | using fftw_real_vec = std::vector<float, fftw_allocator<float>>;
110 | using fftw_complex_vec = std::vector<std::complex<float>, fftw_allocator<std::complex<float>>>;
111 | 
112 | /** Re-interpret std::complex as fftw_complex* (requires including <complex> before fftw3.h) */
113 | inline fftwf_complex* toFFTW (fftw_complex_vec& vec)
114 | {
115 |     return reinterpret_cast<fftwf_complex*> (vec.data());
116 | }
117 | 
118 | /** Applies a window to the given data (out-of-place) */
119 | static void applyWindow(const std::vector<float>& data, const std::vector<float>& win, std::vector<float>& out)
120 | {
121 |     for(int n = 0; n < (int) win.size(); ++n)
122 |         out[n] = data[n] * win[n];
123 | }
124 | 
125 | /** Helper struct for performing forward FFTs */
126 | struct ForwardFFT
127 | {
128 |     std::vector<float> x_in;
129 |     fftw_complex_vec Y_out;
130 | 
131 |     ForwardFFT(int n)
132 |     {
133 |         x_in.resize(n, 0.0f);
134 |         Y_out.resize(n, { 0.0f, 0.0f });
135 |         fft_plan = fftwf_plan_dft_r2c_1d(n, x_in.data(), toFFTW(Y_out), FFTW_ESTIMATE);
136 |     }
137 | 
138 |     ~ForwardFFT()
139 |     {
140 |         fftwf_destroy_plan(fft_plan);
141 |     }
142 | 
143 |     void perform()
144 |     {
145 |         fftwf_execute(fft_plan);
146 |     }
147 | 
148 | private:
149 |     fftwf_plan fft_plan;
150 | };
151 | 
152 | /** Helper struct for performing inverse FFTs */
153 | struct InverseFFT
154 | {
155 |     fftw_complex_vec X_in;
156 |     std::vector<float> y_out;
157 | 
158 |     InverseFFT(int n)
159 |     {
160 |         X_in.resize(n, { 0.0f, 0.0f });
161 |         y_out.resize(n, 0.0f);
162 |         fft_plan = fftwf_plan_dft_c2r_1d(n, toFFTW(X_in), y_out.data(), FFTW_ESTIMATE);
163 |         oneOverN = 1.0f / (float) n;
164 |     }
165 | 
166 |     ~InverseFFT()
167 |     {
168 |         fftwf_destroy_plan(fft_plan);
169 |     }
170 | 
171 |     void perform()
172 |     {
173 |         fftwf_execute(fft_plan);
174 |         for(int i = 0; i < (int) y_out.size(); ++i)
175 |             y_out[i] *= oneOverN;
176 |     }
177 | 
178 | private:
179 |     fftwf_plan fft_plan;
180 |     float oneOverN;
181 | };
182 | #endif // TIMESTRETCH_USING_JUCE
183 | 
184 | /** Helper function for generating Hann window */
185 | inline std::vector<float> hann(int N, float normalization = 1.0f)
186 | {
187 |     std::vector<float> win (N, 0.0f);
188 |     for(int i = 0; i < N; ++i)
189 |     {
190 |         win[i] = 0.5f - 0.5f * std::cos(2.0f * (float)M_PI * (float)i / (float(N - 1)));
191 |         win[i] /= normalization;
192 |     }
193 | 
194 |     return win;
195 | }
196 | 
197 | } // namespace fft_utils
198 | 


--------------------------------------------------------------------------------
/src/hpss.cpp:
--------------------------------------------------------------------------------
  1 | #include "hpss.h"
  2 | #include "fft_utils.h"
  3 | #include <iostream>
  4 | #include <algorithm>
  5 | #include <limits>
  6 | 
  7 | namespace HPSS
  8 | {
  9 | 
 10 | using namespace fft_utils;
 11 | using Vec2D = std::vector<std::vector<float>>;
 12 | 
 13 | /** Performs "horizontal" median filtering to obtain harmonic signal */
 14 | Vec2D median_filter_harm(const std::vector<fftw_complex_vec>& S, int kernel_size)
 15 | {
 16 |     Vec2D H (S.size(), std::vector<float> (S[0].size(), 0.0f));
 17 | 
 18 |     const int pad = (kernel_size - 1) / 2;
 19 |     std::vector<float> med_vec (S.size() + 2 * pad, 0.0f);
 20 |     std::vector<float> kernel_vec (kernel_size, 0.0f);
 21 | 
 22 |     for(int h = 0; h < (int) S[0].size(); ++h)
 23 |     {
 24 |         for(int i = 0; i < (int) S.size(); ++i)
 25 |             med_vec[i + pad] = std::abs(S[i][h]);
 26 | 
 27 |         for(int i = 0; i < (int) S.size(); ++i)
 28 |         {
 29 |             std::copy(med_vec.begin() + i, med_vec.begin() + i + kernel_size, kernel_vec.begin());
 30 |             std::nth_element(kernel_vec.begin(), kernel_vec.begin() + pad, kernel_vec.end());
 31 |             H[i][h] = kernel_vec[pad];
 32 |         }
 33 |     }
 34 | 
 35 |     return H;
 36 | }
 37 | 
 38 | /** Performs "vertical" median filtering to obtain percussive signal */
 39 | Vec2D median_filter_perc(const std::vector<fftw_complex_vec>& S, int kernel_size)
 40 | {
 41 |     Vec2D P (S.size(), std::vector<float> (S[0].size(), 0.0f));
 42 | 
 43 |     const int pad = (kernel_size - 1) / 2;
 44 |     std::vector<float> med_vec (S[0].size() + 2 * pad, 0.0f);
 45 |     std::vector<float> kernel_vec (kernel_size, 0.0f);
 46 | 
 47 |     for(int i = 0; i < (int) S.size(); ++i)
 48 |     {
 49 |         for(int h = 0; h < (int) S[i].size(); ++h)
 50 |             med_vec[h + pad] = std::abs(S[i][h]);
 51 | 
 52 |         for(int h = 0; h < (int) S[i].size(); ++h)
 53 |         {
 54 |             std::copy(med_vec.begin() + i, med_vec.begin() + i + kernel_size, kernel_vec.begin());
 55 |             std::nth_element(kernel_vec.begin(), kernel_vec.begin() + pad, kernel_vec.end());
 56 |             P[i][h] = kernel_vec[pad];
 57 |         }
 58 |     }
 59 | 
 60 |     return P;
 61 | }
 62 | 
 63 | /** Computes a time-frequency spectrogram with no window (for now) */
 64 | inline std::vector<fftw_complex_vec> spectrogram(const std::vector<float>& x, int fft_size, int hop_size, int zero_pad = 1)
 65 | {
 66 |     const auto n_fft = fft_size * zero_pad;
 67 | 
 68 |     std::vector<fftw_complex_vec> S;
 69 |     fft_utils::ForwardFFT fft { n_fft };
 70 |     for(int i = 0; i + fft_size < (int) x.size(); i += hop_size)
 71 |     {
 72 |         std::copy(x.begin() + i, x.begin() + i + fft_size, fft.x_in.data());
 73 |         fft.perform();
 74 |         S.push_back(fft.Y_out);
 75 |     }
 76 | 
 77 |     return S;
 78 | }
 79 | 
 80 | /** Reconstructs the harmonic and percussive signals from their spectrograms with a Hann window */
 81 | std::pair<std::vector<float>, std::vector<float>> spec_reconstruct(std::vector<fftw_complex_vec>& H_hat,
 82 |                                                                    std::vector<fftw_complex_vec>& P_hat,
 83 |                                                                    int n_samples,
 84 |                                                                    const int fft_size,
 85 |                                                                    const int hop_size)
 86 | {
 87 |     std::vector<float> h_sig (n_samples, 0.0f);
 88 |     std::vector<float> p_sig (n_samples, 0.0f);
 89 |     const auto win = hann(fft_size, float(fft_size / hop_size) / 2.0f);
 90 | 
 91 |     const int n_fft = (int) H_hat[0].size();
 92 |     fft_utils::InverseFFT ifft { n_fft };
 93 |     for(int i = 0; i < (int) H_hat.size(); ++i)
 94 |     {
 95 |         int start_idx = i * hop_size;
 96 |         int samples = std::min(fft_size, n_samples - start_idx);
 97 | 
 98 |         { // do H
 99 |             std::copy(H_hat[i].begin(), H_hat[i].end(), ifft.X_in.data());
100 |             ifft.perform();
101 |             fft_utils::applyWindow(ifft.y_out, win, ifft.y_out);
102 | 
103 |             for(int n = 0; n < samples; ++n)
104 |                 h_sig[n + start_idx] += ifft.y_out[n];
105 |         }
106 | 
107 |         { // do P
108 |             std::copy(P_hat[i].begin(), P_hat[i].end(), ifft.X_in.data());
109 |             ifft.perform();
110 |             fft_utils::applyWindow(ifft.y_out, win, ifft.y_out);
111 | 
112 |             for(int n = 0; n < samples; ++n)
113 |                 p_sig[n + start_idx] += ifft.y_out[n];
114 |         }
115 |     }
116 | 
117 |     return std::make_pair(h_sig, p_sig);
118 | }
119 | 
120 | static void debug_print(const std::string& str, bool debug)
121 | {
122 |     if(debug)
123 |         std::cout << str << std::endl;
124 | }
125 | 
126 | std::pair<std::vector<float>, std::vector<float>> hpss(std::vector<float> x, const HPSS_PARAMS& params)
127 | {
128 |     const auto fft_size = next_pow2(int(params.window_length_ms * 0.001 * params.sample_rate));
129 |     const auto hop_size = next_pow2(int(params.hop_length_ms * 0.001 * params.sample_rate));
130 | 
131 |     debug_print("Computing HPSS...", params.debug);
132 |     debug_print("\tComputing STFTs...", params.debug);
133 |     auto S = spectrogram(x, fft_size, hop_size, params.zero_pad);
134 | 
135 |     debug_print("\tSeparating percussive signal...", params.debug);
136 |     auto P = median_filter_perc(S, params.perc_kernel);
137 | 
138 |     debug_print("\tSeparating harmonic signal...", params.debug);
139 |     auto H = median_filter_harm(S, params.harm_kernel);
140 | 
141 |     debug_print("\tApplying filter masks...", params.debug);
142 |     std::vector<fftw_complex_vec> H_hat (S.size(), fftw_complex_vec(S[0].size()));
143 |     std::vector<fftw_complex_vec> P_hat (S.size(), fftw_complex_vec(S[0].size()));
144 |     for(int i = 0; i < (int) S.size(); ++i)
145 |     {
146 |         for(int h = 0; h < (int) S[0].size(); ++h)
147 |         {
148 |             float H_p = std::pow(H[i][h], params.mask_exp);
149 |             float P_p = std::pow(P[i][h], params.mask_exp);
150 |             float denom = H_p + P_p + std::numeric_limits<float>::epsilon();
151 | 
152 |             H_hat[i][h] = S[i][h] * (H_p / denom);
153 |             P_hat[i][h] = S[i][h] * (P_p / denom);
154 |         }
155 |     }
156 | 
157 |     debug_print("\tComputing time-domain signal...", params.debug);
158 |     return spec_reconstruct(H_hat, P_hat, (int) x.size(), fft_size, hop_size);
159 | }
160 | 
161 | } // namespace HPSS
162 | 
163 | 


--------------------------------------------------------------------------------
/src/hpss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <utility>
 4 | #include <vector>
 5 | 
 6 | /**
 7 |  * Algorithm for harmonic/percussive source-separation
 8 |  * using median filtering. Based on the algorithm
 9 |  * proposed by Derry Fitzgerald at DAFx 2010:
10 |  * https://arrow.tudublin.ie/cgi/viewcontent.cgi?article=1078&context=argcon
11 |  */ 
12 | namespace HPSS
13 | {
14 | 
15 | struct HPSS_PARAMS
16 | {
17 |     int perc_kernel = 17;           // size of median filter kernel for percussive siganl
18 |     int harm_kernel = 17;           // size of median filter kernel for harmonic signal
19 |     float mask_exp = 3.0f;          // exponent used for Weiner filter to construct mask
20 |     float sample_rate = 44100.0f;   // sample rate of the audio being processed
21 |     float window_length_ms = 40.0f; // length of window to use for FFT
22 |     float hop_length_ms = 20.0f;    // length of hop size to use for FFT (should 1x, 0.5x, or 0.25x of the window length)
23 |     int zero_pad = 2;               // zero-padding factor to use for FFT
24 |     bool debug = false;             // enable print debug statements for the algorithm
25 | };
26 | 
27 | /**
28 |  * Accepts a single vector of audio samples, and returns a pair of vectors
29 |  * with the separate harmonic and percussive signal.
30 |  * 
31 |  * ```
32 |  * std::vector<float> audio;
33 |  * HPSS_PARAMS params;
34 |  * auto [harmonic, percussive] = hpss(audio, params);
35 |  * ```
36 |  */ 
37 | std::pair<std::vector<float>, std::vector<float>> hpss(std::vector<float> x, const HPSS_PARAMS& params);
38 | 
39 | } // namespace HPSS
40 | 


--------------------------------------------------------------------------------
/src/hpss_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "hpss.h"
 2 | #include "WavIO.hpp"
 3 | 
 4 | void help()
 5 | {
 6 |     std::cout << "Utility to separate harmonic and percussive signals from a .wav file" << std::endl;
 7 |     std::cout << "Usage: hpss <wav_file> [<num_seconds> <start_seconds>]" << std::endl;
 8 | }
 9 | 
10 | int main(int argc, char* argv[])
11 | {
12 |     if(argc < 2 || argc > 4)
13 |     {
14 |         help();
15 |         return 1;
16 |     }
17 | 
18 |     if(argc == 2 && std::string(argv[1]) == "--help")
19 |     {
20 |         help();
21 |         return 1;
22 |     }
23 | 
24 |     std::string test_file = std::string(argv[1]);
25 | 
26 |     float num_seconds = 10.0f;
27 |     if(argc >= 3)
28 |         num_seconds = (float) std::atof(argv[2]);
29 | 
30 |     float start_seconds = 0.0f;
31 |     if(argc >= 4)
32 |         start_seconds = (float) std::atof(argv[3]);
33 | 
34 |     SF_INFO sf_info;
35 |     auto wav_signal = WavIO::load_file(test_file.c_str(), sf_info);
36 |     const float fs = (float) sf_info.samplerate;
37 | 
38 |     // trim signal
39 |     std::vector<std::vector<float>> ref_signal;
40 |     {
41 |         int start_sample = int(fs * start_seconds);
42 |         int end_sample = std::min(start_sample + int(fs * num_seconds), (int) sf_info.frames);
43 |         for(int ch = 0; ch < (int) wav_signal.size(); ++ch)
44 |             ref_signal.push_back(std::vector<float> (&wav_signal[ch][start_sample], &wav_signal[ch][end_sample]));
45 |     }
46 | 
47 |     HPSS::HPSS_PARAMS params;
48 |     params.sample_rate = fs;
49 |     params.debug = true;
50 | 
51 |     std::vector<std::vector<float>> h_signal;
52 |     std::vector<std::vector<float>> p_signal;
53 |     std::vector<std::vector<float>> sum_signal;
54 |     for(int ch = 0; ch < (int) ref_signal.size(); ++ch)
55 |     {
56 |         auto[h_ch, p_ch] = HPSS::hpss(ref_signal[ch], params);
57 |         h_signal.push_back(h_ch);
58 |         p_signal.push_back(p_ch);
59 | 
60 |         std::vector<float> sum_ch (h_ch.size(), 0.0f);
61 |         for(int i = 0; i < (int) h_ch.size(); ++i)
62 |             sum_ch[i] = h_ch[i] + p_ch[i];
63 | 
64 |         sum_signal.push_back(sum_ch);
65 |     }
66 | 
67 |     WavIO::write_file("ref.wav", ref_signal, sf_info);
68 |     WavIO::write_file("harmonic.wav", h_signal, sf_info);
69 |     WavIO::write_file("percussive.wav", p_signal, sf_info);
70 |     WavIO::write_file("sum.wav", sum_signal, sf_info);
71 | 
72 |     return 0;
73 | }
74 | 


--------------------------------------------------------------------------------
/src/stretch.cpp:
--------------------------------------------------------------------------------
  1 | #include "stretch.h"
  2 | #include "fft_utils.h"
  3 | #include <cmath>
  4 | #include <iostream>
  5 | 
  6 | namespace time_stretch
  7 | {
  8 | 
  9 | using namespace fft_utils;
 10 | 
 11 | /** Computes the correct phase propagation for a phase vocoder */
 12 | std::vector<std::vector<float>> phase_propagation(const std::vector<fftw_complex_vec>& S, float fs, int Ha, int Hs, int N)
 13 | {
 14 |     auto p_arg = [] (float x) { return std::fmod(x + 0.5f, 1.0f) - 0.5f; };
 15 | 
 16 |     const int M = (int) S.size();
 17 |     const int K = (int) S[0].size();
 18 |     std::vector<float> omega (K, 0.0f);
 19 |     for(int k = 0; k < (int) omega.size(); ++k)
 20 |         omega[k] = (float) k * fs / (float) N;
 21 | 
 22 |     std::vector<std::vector<float>> phi (M, std::vector<float> (K, 0.0f));
 23 |     for(int m = 0; m < M; ++m)
 24 |         for(int k = 0; k < K; ++k)
 25 |             phi[m][k] = std::arg(S[m][k]) / (2.0f * (float)M_PI) + 0.5f;
 26 | 
 27 |     const float delta_T = (float) Ha / fs;
 28 |     std::vector<std::vector<float>> phi_mod (M, std::vector<float> (K, 0.0f));
 29 |     std::copy(phi[0].begin(), phi[0].end(), phi_mod[0].begin());
 30 |     for(int m = 0; m < M - 1; ++m)
 31 |     {
 32 |         for(int k = 0; k < K; ++k)
 33 |         {
 34 |             auto F_if = omega[k] + p_arg(phi[m+1][k] - (phi[m][k] + omega[k] * delta_T)) / delta_T;
 35 |             phi_mod[m+1][k] = p_arg(phi_mod[m][k] + F_if * (float) Hs / fs) + 0.5f;
 36 |         }
 37 |     }
 38 | 
 39 |     return phi_mod;
 40 | }
 41 | 
 42 | /** Generates a square-root Hann window */
 43 | std::vector<float> sqrt_hann(int N)
 44 | {
 45 |     auto win = fft_utils::hann(N);
 46 |     for(int n = 0; n < N; ++n)
 47 |         win[n] = std::sqrt(win[n]);
 48 | 
 49 |     return win;
 50 | }
 51 | 
 52 | /** Computes a time-frequency spectrogram with a sqrt Hann window */
 53 | std::vector<fftw_complex_vec> spectrogram(const std::vector<float>& x, int fft_size, int hop_size, int zero_pad = 1)
 54 | {
 55 |     const auto n_fft = fft_size * zero_pad;
 56 |     auto win = sqrt_hann(fft_size);
 57 | 
 58 |     std::vector<fftw_complex_vec> S;
 59 |     fft_utils::ForwardFFT fft { n_fft };
 60 |     for(int i = 0; i + fft_size < (int) x.size(); i += hop_size)
 61 |     {
 62 |         std::copy(x.begin() + i, x.begin() + i + fft_size, fft.x_in.data());
 63 |         fft_utils::applyWindow(fft.x_in, win, fft.x_in);
 64 |         fft.perform();
 65 |         S.push_back(fft.Y_out);
 66 |     }
 67 | 
 68 |     return S;
 69 | }
 70 | 
 71 | /** Reconstructs the signal from its spectrograms with a sqrt Hann window */
 72 | std::vector<float> reconstruct(std::vector<fftw_complex_vec>& S, int hop_size, int window_size, int L)
 73 | {
 74 |     std::vector<float> y (L, 0.0f);
 75 |     int final_idx = 0;
 76 |     auto win = sqrt_hann(window_size);
 77 | 
 78 |     const auto n_fft = (int) S[0].size();
 79 |     fft_utils::InverseFFT ifft { n_fft };
 80 |     for(int i = 0; i < (int) S.size(); ++i)
 81 |     {
 82 |         const auto start_idx = int(i * hop_size);
 83 |         const auto n_samples = std::min(window_size, L - start_idx);
 84 | 
 85 |         std::copy(S[i].begin(), S[i].end(), ifft.X_in.data());
 86 |         ifft.perform();
 87 |         fft_utils::applyWindow(ifft.y_out, win, ifft.y_out);
 88 | 
 89 |         for(int n = 0; n < n_samples; ++n)
 90 |             y[n + start_idx] += ifft.y_out[n];
 91 |         final_idx = start_idx + n_samples;
 92 |     }
 93 | 
 94 |     return { y.begin(), y.begin() + final_idx };
 95 | }
 96 | 
 97 | /** Compares greater/less than with absolute value */
 98 | template<typename T>
 99 | static bool abs_compare(T a, T b)
100 | {
101 |     return (std::abs(a) < std::abs(b));
102 | }
103 | 
104 | /** Returns the maximum absolute value in a vector */
105 | static float max_abs(const std::vector<float>& vec)
106 | {
107 |     return std::abs(*std::max_element(vec.begin(), vec.end(), abs_compare<float>));
108 | }
109 | 
110 | /** Normalizes the signal in a vector to have the same magnitude as the reference */
111 | static void normalize_vec(const std::vector<float>& vec_ref, std::vector<float>& vec_cur)
112 | {
113 |     auto mag_ref = max_abs(vec_ref);
114 |     auto mag_cur = max_abs(vec_cur);
115 |     for(float& val : vec_cur)
116 |         val *= mag_ref / mag_cur;
117 | }
118 | 
119 | static void debug_print(const std::string& str, bool debug)
120 | {
121 |     if(debug)
122 |         std::cout << str << std::endl;
123 | }
124 | 
125 | std::vector<std::vector<float>> time_stretch(const std::vector<std::vector<float>>& x, STRETCH_PARAMS& params)
126 | {
127 |     params.hpss_params.sample_rate = params.sample_rate; // make sure hpss is using the same sample rate
128 | 
129 |     const auto long_window_size = next_pow2(int(params.long_window_ms * 0.001 * params.sample_rate));
130 |     const auto short_window_size = next_pow2(int(params.short_window_ms * 0.001 * params.sample_rate));
131 | 
132 |     const auto Hs_long = long_window_size / 2;
133 |     const auto Ha_long = int((float) Hs_long / params.stretch_factor);
134 | 
135 |     const auto Hs_short = short_window_size / 2;
136 |     const auto Ha_short = int((float) Hs_short / params.stretch_factor);
137 | 
138 |     debug_print("Computing mono reference phases...", params.debug);
139 |     std::vector<float> x_sum (x[0].size(), 0.0f);
140 |     for(int ch = 0; ch < (int) x.size(); ++ch)
141 |         for(int n = 0; n < (int) x_sum.size(); ++n)
142 |             x_sum[n] += x[ch][n] / (float) x.size();
143 |     auto X_sum = spectrogram(x_sum, long_window_size, Ha_long);
144 |     const auto phase_mods = phase_propagation(X_sum, params.sample_rate, Ha_long, Hs_long, long_window_size);
145 | 
146 |     const auto stretch_len = int((float) x[0].size() * params.stretch_factor) + 2000;
147 |     int out_len = -1;
148 |     std::vector<std::vector<float>> y;
149 |     for(int ch = 0; ch < (int) x.size(); ++ch)
150 |     {
151 |         debug_print("Processing channel " + std::to_string(ch) + "...", params.debug);
152 |         auto[h_signal, p_signal] = HPSS::hpss(x[ch], params.hpss_params);
153 | 
154 |         debug_print("Performing time-stretching...", params.debug);
155 |         auto H_full = spectrogram(h_signal, long_window_size, Ha_long);
156 |         auto P_full = spectrogram(p_signal, long_window_size, Ha_long);
157 | 
158 |         debug_print("\tSeparating magnitude-only PV...", params.debug);
159 |         auto H_long = spectrogram(h_signal, long_window_size, Ha_long);
160 |         const auto h_x_long = reconstruct(H_long, Hs_long, long_window_size, stretch_len);
161 |         auto P_short = spectrogram(p_signal, short_window_size, Ha_short);
162 |         const auto p_x_short = reconstruct(P_short, Hs_short, short_window_size, stretch_len);
163 | 
164 |         debug_print("\tApplying reference phases...", params.debug);
165 |         for(int m = 0; m < (int) H_full.size(); ++m)
166 |         {
167 |             for(int k = 0; k < (int) H_full[m].size(); ++k)
168 |             {
169 |                 H_full[m][k] = std::polar(std::abs(H_full[m][k]), phase_mods[m][k]);
170 |                 P_full[m][k] = std::polar(std::abs(P_full[m][k]), phase_mods[m][k]);
171 |             }
172 |         }
173 | 
174 |         debug_print("\tReconstructing references...", params.debug);
175 |         const auto h_v = reconstruct(H_full, Hs_long, long_window_size, stretch_len);
176 |         const auto p_v = reconstruct(P_full, Hs_long, long_window_size, stretch_len);
177 | 
178 |         debug_print("\tPerforming magnitude correction...", params.debug);
179 |         auto H_v_long = spectrogram(h_v, long_window_size, Ha_long);
180 |         auto P_v_short = spectrogram(p_v, short_window_size, Ha_short);
181 | 
182 |         auto H_w_long = spectrogram(h_x_long, long_window_size, Ha_long);
183 |         auto P_w_short = spectrogram(p_x_short, short_window_size, Ha_short);
184 | 
185 |         for(int m = 0; m < (int) H_w_long.size(); ++m)
186 |             for(int k = 0; k < (int) H_w_long[m].size(); ++k)
187 |                 H_v_long[m][k] = std::polar(std::abs(H_w_long[m][k]), std::arg(H_v_long[m][k]));
188 | 
189 |         const auto M = (int) std::min(P_v_short.size(), P_w_short.size());
190 |         for(int m = 0; m < M; ++m)
191 |             for(int k = 0; k < (int) P_w_short[m].size(); ++k)
192 |                 P_v_short[m][k] = std::polar(std::abs(P_w_short[m][k]), std::arg(P_v_short[m][k]));
193 | 
194 |         debug_print("\tReconstructing final signal...", params.debug);
195 |         auto h_y = reconstruct(H_v_long, Ha_long, long_window_size, stretch_len);
196 |         auto p_y = reconstruct(P_v_short, Ha_short, short_window_size, stretch_len);
197 | 
198 |         debug_print("\tNormalizing and combining signals...", params.debug);
199 |         normalize_vec(h_signal, h_y);
200 |         normalize_vec(p_signal, p_y);
201 | 
202 |         if(out_len < 0)
203 |             out_len = std::min((int) h_y.size(), (int) p_y.size());
204 | 
205 |         y.emplace_back(out_len, 0.0f);
206 |         for(int i = 0; i < std::min(out_len, (int) h_y.size()); ++i)
207 |             y[ch][i] += h_y[i];
208 | 
209 |         for(int i = 0; i < std::min(out_len, (int) p_y.size()); ++i)
210 |             y[ch][i] += p_y[i];
211 | 
212 |         normalize_vec(x[ch], y[ch]);
213 |     }
214 | 
215 |     return y;
216 | }
217 | 
218 | } // namespace time_stretch
219 | 


--------------------------------------------------------------------------------
/src/stretch.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "hpss.h"
 4 | 
 5 | /**
 6 |  * Algorithm for time-stretching audio without altering
 7 |  * the pitch. Based on the algorithm proposed by Nicolas
 8 |  * Juillerat and Beat Hirsbrunner at ICASP 2017:
 9 |  * http://www.pitchtech.ch/Confs/ICASSP2017/0000716.pdf
10 |  * 
11 |  * The basic idea is to separate harmonic and percussive
12 |  * parts of the signal and stretch them using a phase vocoder
13 |  * with different window sizes for the different parts of
14 |  * the signal. However, a single phase correction is used
15 |  * across all parts and all channels to preserve phase
16 |  * coherence.
17 |  */ 
18 | namespace time_stretch
19 | {
20 | 
21 | struct STRETCH_PARAMS
22 | {
23 |     HPSS::HPSS_PARAMS hpss_params;  // parameters for harmonic/percussive source-separation
24 |     float stretch_factor = 1.0f;    // the time-stretching factor used by the stretcher
25 |     float sample_rate = 44100.0f;   // the sample-rate of the incomind audio signal
26 |     float long_window_ms = 100.0f;  // the (long) window length to use for the harmonic signal
27 |     float short_window_ms = 1.0f;   // the (short) window length to use for the percussive signal
28 |     bool debug = false;             // enable print debug statements for the algorithm
29 | };
30 | 
31 | /** Performs time-stretching on a multi-channel audio signal */
32 | std::vector<std::vector<float>> time_stretch(const std::vector<std::vector<float>>& x, STRETCH_PARAMS& params);
33 | 
34 | } // namespace time_stretch
35 | 


--------------------------------------------------------------------------------
/src/stretch_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "stretch.h"
 2 | #include "WavIO.hpp"
 3 | 
 4 | void help()
 5 | {
 6 |     std::cout << "Utility to time-stretch a .wav file" << std::endl;
 7 |     std::cout << "Usage: hpss <wav_file> <stretch_factor> [<num_seconds> <start_seconds>]" << std::endl;
 8 | }
 9 | 
10 | int main(int argc, char* argv[])
11 | {
12 |     if(argc < 3 || argc > 5)
13 |     {
14 |         help();
15 |         return 1;
16 |     }
17 | 
18 |     std::string test_file = std::string(argv[1]);
19 |     const auto stretch_factor = (float) std::atof(argv[2]);
20 | 
21 |     float num_seconds = 10.0f;
22 |     if(argc >= 4)
23 |         num_seconds = (float) std::atof(argv[3]);
24 | 
25 |     float start_seconds = 0.0f;
26 |     if(argc >= 5)
27 |         start_seconds = (float) std::atof(argv[4]);
28 | 
29 |     SF_INFO sf_info;
30 |     auto wav_signal = WavIO::load_file(test_file.c_str(), sf_info);
31 |     const float fs = (float) sf_info.samplerate;
32 | 
33 |     // trim signal
34 |     std::vector<std::vector<float>> ref_signal;
35 |     {
36 |         int start_sample = int(fs * start_seconds);
37 |         int end_sample = std::min(start_sample + int(fs * num_seconds), (int) sf_info.frames);
38 |         for(int ch = 0; ch < (int) wav_signal.size(); ++ch)
39 |             ref_signal.push_back(std::vector<float> (&wav_signal[ch][start_sample], &wav_signal[ch][end_sample]));
40 |     }
41 | 
42 |     time_stretch::STRETCH_PARAMS params;
43 |     params.sample_rate = fs;
44 |     params.stretch_factor = stretch_factor;
45 |     params.debug = true;
46 |     params.hpss_params.debug = true;
47 | 
48 |     auto stretch_signal = time_stretch::time_stretch(ref_signal, params);
49 | 
50 |     WavIO::write_file("ref.wav", ref_signal, sf_info);
51 |     WavIO::write_file("stretch.wav", stretch_signal, sf_info);
52 | 
53 |     return 0;
54 | }
55 | 


--------------------------------------------------------------------------------