├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── python ├── hpss.py ├── hpss_test.py ├── stretch.py └── stretch_test.py └── src ├── WavIO.hpp ├── fft_utils.h ├── hpss.cpp ├── hpss.h ├── hpss_test.cpp ├── stretch.cpp ├── stretch.h └── stretch_test.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # cpp artifacts 2 | build/ 3 | 4 | # python artifacts 5 | __pycache__ 6 | 7 | # extras 8 | *.wav 9 | .vscode/ 10 | .idea/ 11 | .DS_Store 12 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | project(time-stretch) 3 | set(CMAKE_CXX_STANDARD 17) 4 | 5 | include(FindPkgConfig) 6 | 7 | add_library(time_stretch STATIC 8 | src/hpss.cpp 9 | src/stretch.cpp 10 | ) 11 | 12 | # Find requirements 13 | if(TIMESTRETCH_USING_JUCE) 14 | message(STATUS "time-stretch using JUCE") 15 | target_compile_definitions(time_stretch PUBLIC TIMESTRETCH_USING_JUCE=1) 16 | else() 17 | message(STATUS "time-stretch using FFTW") 18 | pkg_check_modules(FFTW REQUIRED fftw3) 19 | target_include_directories(time_stretch PUBLIC ${FFTW_INCLUDE_DIRS}) 20 | target_link_directories(time_stretch PUBLIC ${FFTW_LIBRARY_DIRS}) 21 | target_link_libraries(time_stretch PUBLIC fftw3f) 22 | endif() 23 | 24 | if(BUILD_TESTS) 25 | message(STATUS "Building tests for time-stretch library") 26 | 27 | pkg_check_modules(SNDFILE REQUIRED sndfile>=1.0.2) 28 | 29 | # test executable for hpss part 30 | add_executable(hpss src/hpss_test.cpp) 31 | target_include_directories(hpss PUBLIC ${SNDFILE_INCLUDE_DIRS}) 32 | target_link_directories(hpss PUBLIC ${SNDFILE_LIBRARY_DIRS}) 33 | target_link_libraries(hpss PUBLIC time_stretch ${SNDFILE_LIBRARIES}) 34 | 35 | # test executable for time stretching 36 | add_executable(stretch src/stretch_test.cpp) 37 | target_include_directories(stretch PUBLIC ${SNDFILE_INCLUDE_DIRS}) 38 | target_link_directories(stretch PUBLIC ${SNDFILE_LIBRARY_DIRS}) 39 | target_link_libraries(stretch PUBLIC time_stretch ${SNDFILE_LIBRARIES}) 40 | endif() 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, jatinchowdhury18 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Time Stretcher 2 | 3 | C++ audio time-stretching implementation, based on the algorithms 4 | presented in: 5 | - [Audio Time Stretching with an Adaptive Phase Vocoder](http://www.pitchtech.ch/Confs/ICASSP2017/0000716.pdf), Nicolas Juillerat and Beat Hirsbrunner (ICASP 2017) 6 | - [Harmonic/Percussive Separation using Median Filtering](https://arrow.tudublin.ie/cgi/viewcontent.cgi?article=1078&context=argcon), Derry Fitgerald (DAFx 2010) 7 | 8 | ## Dependencies 9 | Building the time-stretching library requires FFTW. 10 | Building the library tests requires libsndfile. 11 | 12 | With apt: 13 | ```bash 14 | sudo apt-get install fftw3 15 | sudo apt-get install libsndfile 16 | ``` 17 | 18 | Or with HomeBrew: 19 | ```bash 20 | brew install fftw3 21 | brew install libsndfile 22 | ``` 23 | 24 | ## Building Tests 25 | Building the code requires CMake. 26 | ```bash 27 | cmake -Bbuild -DBUILD_TESTS=ON 28 | cmake --build build --config Release 29 | ``` 30 | 31 | `./build/hpss` can be used to test the 32 | harmonic/percussive source separation algorithm. 33 | `./build/stretch` can be used to test the 34 | time-stretching algorithm. Use the `--help` 35 | flag for more information about how to use each test. 36 | 37 | ## License 38 | The code in this repository is licensed under the BSD 3-clause license. 39 | 40 | Enjoy! 41 | -------------------------------------------------------------------------------- /python/hpss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.signal import windows 3 | 4 | eps = np.finfo(np.float32).eps 5 | 6 | def median_filter(x, kernel_size): 7 | pad = (kernel_size - 1) // 2 8 | x_pad = np.concatenate((np.zeros(pad), x, np.zeros(pad))) 9 | y = np.zeros_like(x) 10 | for i in range(len(y)): 11 | median_arg = np.argsort(np.abs(x_pad[i : i + kernel_size]))[pad] 12 | y[i] = x_pad[i + median_arg] 13 | 14 | return y 15 | 16 | def spectrogram(x, fft_size, hop_size, zero_pad=1): 17 | S = None 18 | for i in range(0, len(x), hop_size): 19 | x_win = x[i : i + fft_size] 20 | x_pad = np.zeros(fft_size * zero_pad) 21 | x_pad[:len(x_win)] = x_win 22 | 23 | if S is None: 24 | S = np.array([np.fft.fft(x_pad)]) 25 | else: 26 | S = np.append(S, np.array([np.fft.fft(x_pad)]), axis=0) 27 | 28 | return S 29 | 30 | def hpss(x, perc_kernel=17, harm_kernel=17, mask_power=2, fft_size=4096, hop_size=1024, zero_pad=2): 31 | ''' Simple harmonic/percussive source separation based on median filter method ''' 32 | 33 | print('Computing HPSS...') 34 | print('\tComputing STFTs...') 35 | S = spectrogram(x, fft_size, hop_size, zero_pad) 36 | 37 | # percussive signal 38 | print('\tSeparating percussive signal...') 39 | P = np.copy(S) 40 | for i in range(S.shape[0]): 41 | P[i, :] = median_filter(np.abs(S[i, :]), kernel_size=perc_kernel) 42 | 43 | # harmonic signal 44 | print('\tSeparating harmonic signal...') 45 | H = np.copy(S) 46 | for h in range(S.shape[1]): 47 | H[:, h] = median_filter(np.abs(S[:, h]), kernel_size=harm_kernel) 48 | 49 | # create filter masks 50 | print('\tCreating filter masks...') 51 | M_H = np.copy(S) 52 | M_P = np.copy(S) 53 | for i in range(S.shape[0]): 54 | for h in range(S.shape[1]): 55 | H_p = H[i,h]**mask_power 56 | P_p = P[i,h]**mask_power 57 | denom = H_p + P_p + eps 58 | 59 | M_H[i, h] = H_p / denom 60 | M_P[i, h] = P_p / denom 61 | 62 | H_hat = np.multiply(S, M_H) 63 | P_hat = np.multiply(S, M_P) 64 | 65 | print('\tComputing time-domain signal...') 66 | h_sig = np.zeros_like(x) 67 | p_sig = np.zeros_like(x) 68 | for i in range(S.shape[0]): 69 | start_idx = int(i * hop_size) 70 | n_samples = min(fft_size, len(x) - start_idx) 71 | win = windows.hann(fft_size)[:n_samples] / ((fft_size // hop_size) // 2) 72 | h_sig[start_idx : start_idx + fft_size] += win * np.real(np.fft.ifft(H_hat[i,:])[:n_samples]) 73 | p_sig[start_idx : start_idx + fft_size] += win * np.real(np.fft.ifft(P_hat[i,:])[:n_samples]) 74 | 75 | return h_sig, p_sig 76 | -------------------------------------------------------------------------------- /python/hpss_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.io import wavfile 3 | import matplotlib.pyplot as plt 4 | 5 | from hpss import hpss 6 | 7 | START_SECONDS = 125 8 | NUM_SECONDS = 5 9 | TEST_FILE = '/Users/jachowdhury/Downloads/Tennyson - Old Singles/Tennyson - Old Singles - 01 All Yours.wav' 10 | 11 | fs, x = wavfile.read(TEST_FILE) 12 | x = (np.transpose(x) / np.max(np.abs(x))).astype(np.float32) 13 | 14 | start_idx = int(START_SECONDS * fs) 15 | n_samples = int(NUM_SECONDS * fs) 16 | ref_signal = x[:,start_idx : start_idx + n_samples] 17 | wavfile.write('ref.wav', fs, np.transpose(ref_signal)) 18 | 19 | h_signal = np.zeros_like(ref_signal) 20 | p_signal = np.zeros_like(ref_signal) 21 | h_signal[0], p_signal[0] = hpss(ref_signal[0]) 22 | h_signal[1], p_signal[1] = hpss(ref_signal[1]) 23 | 24 | wavfile.write('harmonic.wav', fs, np.transpose(h_signal)) 25 | wavfile.write('percussive.wav', fs, np.transpose(p_signal)) 26 | wavfile.write('sum.wav', fs, np.transpose(h_signal + p_signal)) 27 | -------------------------------------------------------------------------------- /python/stretch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.signal import windows 3 | import matplotlib.pyplot as plt 4 | 5 | from hpss import hpss 6 | 7 | def next_pow_2(x): 8 | return int(2**(np.ceil(np.log2(x)))) 9 | 10 | def phase_propagation(S, Fs, Ha, Hs, N): 11 | def p_arg(x): 12 | return (x + 0.5) % 1.0 - 0.5 13 | 14 | k = np.arange(S.shape[1]) 15 | omega = k * Fs / N 16 | 17 | delta_T = Ha / Fs 18 | phi = np.angle(S) / (2 * np.pi) + 0.5 19 | 20 | phi_mod = np.copy(phi) 21 | for m in range(S.shape[0] - 1): 22 | F_if = omega + p_arg(phi[m+1] - (phi[m] + omega * delta_T)) / delta_T 23 | phi_mod[m+1] = p_arg(phi_mod[m] + F_if * Hs / Fs) + 0.5 24 | 25 | return phi_mod 26 | 27 | def spectrogram(x, fft_size, hop_size, zero_pad=1): 28 | S = None 29 | win = np.sqrt(windows.hann(fft_size)) 30 | for i in range(0, len(x), hop_size): 31 | x_win = np.copy(x[i : i + fft_size]) 32 | x_win *= win[:len(x_win)] 33 | x_pad = np.zeros(fft_size * zero_pad) 34 | x_pad[:len(x_win)] = x_win 35 | 36 | if S is None: 37 | S = np.array([np.fft.fft(x_pad)]) 38 | else: 39 | S = np.append(S, np.array([np.fft.fft(x_pad)]), axis=0) 40 | 41 | return S 42 | 43 | def reconstruct(S, hop_size, window_size, L): 44 | y = np.zeros(L) 45 | win = np.sqrt(windows.hann(window_size)) 46 | for i in range(S.shape[0]): 47 | start_idx = int(i * hop_size) 48 | n_samples = min(window_size, L - start_idx) 49 | y[start_idx : start_idx + window_size] += win[:n_samples] * np.real(np.fft.ifft(S[i,:])[:n_samples]) 50 | return y 51 | 52 | def stretch(x, fs, stretch_factor): 53 | window_size_sec = 0.1 # 100 milliseconds 54 | window_size = next_pow_2(window_size_sec * fs) 55 | short_window_size = next_pow_2(0.005 * fs) 56 | 57 | Hs = window_size // 2 # synthesis hop size 58 | Ha = int(float(Hs) / stretch_factor) 59 | 60 | Hs_short = short_window_size // 2 # synthesis hop size 61 | Ha_short = int(float(Hs_short) / stretch_factor) 62 | 63 | print('Computing mono reference phases...') 64 | x_sum = np.sum(x, axis=0) / 2.0 65 | X_sum = spectrogram(x_sum, window_size, Ha, zero_pad=1) 66 | phase_mods = phase_propagation(X_sum, fs, Ha, Hs, window_size) 67 | 68 | stretch_len = int(len(x[0]) * stretch_factor) + 5000 69 | y = np.zeros((2, stretch_len)) 70 | for ch in range(x.shape[0]): 71 | print(f'Processing channel {ch}...') 72 | h_signal, p_signal = hpss(x[ch]) 73 | 74 | print('Performing time-stretching...') 75 | H_full = spectrogram(h_signal, window_size, Ha) 76 | P_full = spectrogram(p_signal, window_size, Ha) 77 | 78 | print('\tSeparated magnitude-only PV...') 79 | h_x_long = reconstruct(H_full, Hs, window_size, stretch_len) 80 | P_short = spectrogram(p_signal, short_window_size, Ha_short) 81 | p_x_short = reconstruct(P_short, Hs_short, short_window_size, stretch_len) 82 | 83 | print('\tApplying reference phases...') 84 | H_full = np.multiply(np.abs(H_full), np.exp(1j * phase_mods)) 85 | P_full = np.multiply(np.abs(P_full), np.exp(1j * phase_mods)) 86 | 87 | print('\tReconstructing references...') 88 | h_v = reconstruct(H_full, Hs, window_size, stretch_len) 89 | p_v = reconstruct(P_full, Hs, window_size, stretch_len) 90 | 91 | print('\tPerforming magnitude correction...') 92 | H_v_long = spectrogram(h_v, window_size, Ha) 93 | P_v_short = spectrogram(p_v, short_window_size, Ha_short) 94 | 95 | H_w_long = spectrogram(h_x_long, window_size, Ha) 96 | P_w_short = spectrogram(p_x_short, short_window_size, Ha_short) 97 | 98 | H_y = np.multiply(np.abs(H_w_long), np.exp(1j * np.angle(H_v_long))) 99 | P_y = np.multiply(np.abs(P_w_short), np.exp(1j * np.angle(P_v_short))) 100 | 101 | print('\tReconstructing final signal...') 102 | h_y = reconstruct(H_y, Ha, window_size, stretch_len) 103 | p_y = reconstruct(P_y, Ha_short, short_window_size, stretch_len) 104 | 105 | print('\tNormalizing separated signal...') 106 | h_mag = np.max(np.abs(h_signal)) 107 | p_mag = np.max(np.abs(p_signal)) 108 | h_y *= (h_mag / np.max(np.abs(h_y))) 109 | p_y *= (p_mag / np.max(np.abs(p_y))) 110 | 111 | y[ch] = h_y + p_y 112 | 113 | # normalize if needed... 114 | # mag = np.max(np.abs(y)) 115 | # print(f'Original Magnitude {np.max(np.abs(x))}') 116 | # print(f'Stretched Magnitude {mag}') 117 | # if mag > 1.0: 118 | # y /= mag 119 | 120 | return y 121 | -------------------------------------------------------------------------------- /python/stretch_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.io import wavfile 3 | import matplotlib.pyplot as plt 4 | 5 | from stretch import stretch 6 | 7 | START_SECONDS = 125 8 | NUM_SECONDS = 10 9 | TEST_FILE = '/Users/jachowdhury/Downloads/Tennyson - Old Singles/Tennyson - Old Singles - 01 All Yours.wav' 10 | STRETCH_AMOUNT = 0.5 11 | 12 | fs, x = wavfile.read(TEST_FILE) 13 | x = (np.transpose(x) / np.max(np.abs(x))).astype(np.float32) 14 | start_idx = int(START_SECONDS * fs) 15 | n_samples = int(NUM_SECONDS * fs) 16 | ref_signal = x[:,start_idx : start_idx + n_samples] 17 | 18 | # fs = 44100 19 | # N = int(NUM_SECONDS * fs) 20 | # FREQ = 50 21 | # ref_signal = np.zeros((2, N)) 22 | # for ch in range(2): 23 | # ref_signal[ch] = np.sin(2 * np.pi * np.arange(N) * FREQ / fs) 24 | 25 | wavfile.write('ref.wav', fs, np.transpose(ref_signal)) 26 | 27 | stretch_signal = stretch(ref_signal, fs, STRETCH_AMOUNT) 28 | 29 | wavfile.write(f'stretch_{STRETCH_AMOUNT}.wav', fs, np.transpose(stretch_signal)) 30 | 31 | # plt.plot(ref_signal[0]) 32 | # plt.plot(np.sin(2 * np.pi * np.arange(len(stretch_signal[0])) * FREQ / fs)) 33 | # plt.plot(stretch_signal[0]) 34 | # plt.show() 35 | -------------------------------------------------------------------------------- /src/WavIO.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | /** 9 | * Utility functions for loading .wav files 10 | * into 2D vectors using libsndfile. 11 | */ 12 | namespace WavIO 13 | { 14 | 15 | using Vec2d = std::vector>; 16 | using SND_PTR = std::unique_ptr; 17 | 18 | Vec2d load_file (const char* file, SF_INFO& sf_info) 19 | { 20 | std::cout << "Loading file: " << file << std::endl; 21 | 22 | SND_PTR wavFile { sf_open(file, SFM_READ, &sf_info), &sf_close }; 23 | 24 | if (sf_info.frames == 0) 25 | { 26 | std::cout << "File could not be opened!" << std::endl; 27 | exit (1); 28 | } 29 | 30 | std::vector readInterleaved(sf_info.channels * sf_info.frames, 0.0); 31 | sf_readf_float(wavFile.get(), readInterleaved.data(), sf_info.frames); 32 | 33 | Vec2d audio (sf_info.channels, std::vector (sf_info.frames, 0.0)); 34 | 35 | // de-interleave channels 36 | for (int i = 0; i < sf_info.frames; ++i) 37 | { 38 | int interleavedPtr = i * sf_info.channels; 39 | for(size_t ch = 0; ch < sf_info.channels; ++ch) 40 | audio[ch][i] = readInterleaved[interleavedPtr + ch]; 41 | } 42 | 43 | return audio; 44 | } 45 | 46 | void write_file (const char* file, const Vec2d& audio, SF_INFO& sf_info) 47 | { 48 | std::cout << "Writing to file: " << file << std::endl; 49 | 50 | const auto channels = (int) audio.size(); 51 | const auto frames = (sf_count_t) audio[0].size(); 52 | sf_info.frames = frames; 53 | 54 | SND_PTR wavFile { sf_open(file, SFM_WRITE, &sf_info), &sf_close }; 55 | std::vector writeInterleaved(channels * frames, 0.0); 56 | 57 | // de-interleave channels 58 | for (int i = 0; i < frames; ++i) 59 | { 60 | int interleavedPtr = i * channels; 61 | for(int ch = 0; ch < channels; ++ch) 62 | writeInterleaved[interleavedPtr + ch] = audio[ch][i]; 63 | } 64 | 65 | sf_writef_float(wavFile.get(), writeInterleaved.data(), frames); 66 | } 67 | 68 | } // namespace WavIO 69 | -------------------------------------------------------------------------------- /src/fft_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #if TIMESTRETCH_USING_JUCE 4 | #define _USE_MATH_DEFINES 5 | #include 6 | 7 | #else 8 | #include 9 | #include // need to include this before fftw for std::complex compatibility 10 | #include 11 | #endif 12 | 13 | namespace fft_utils 14 | { 15 | 16 | static int next_pow2_log(int x) 17 | { 18 | return (int) std::ceil(std::log2((double) x)); 19 | } 20 | 21 | /** Finds the next power of two larger than the given number */ 22 | static int next_pow2(int x) 23 | { 24 | return (int) std::pow(2.0f, (float) next_pow2_log(x)); 25 | } 26 | 27 | #if TIMESTRETCH_USING_JUCE 28 | using fftw_real_vec = std::vector; 29 | using fftw_complex_vec = std::vector>; 30 | 31 | static void applyWindow(const std::vector& data, const std::vector& win, std::vector& out) 32 | { 33 | juce::FloatVectorOperations::multiply(out.data(), win.data(), data.data(), (int) win.size()); 34 | } 35 | 36 | /** Helper struct for performing forward FFTs */ 37 | struct ForwardFFT 38 | { 39 | std::vector x_in; 40 | fftw_complex_vec Y_out; 41 | 42 | ForwardFFT(int n) : fft(next_pow2_log(n)) 43 | { 44 | x_in.resize(n, 0.0f); 45 | x_tmp.resize(2 * n, 0.0f); 46 | Y_out.resize(n, { 0.0f, 0.0f }); 47 | } 48 | 49 | void perform() 50 | { 51 | jassert ((int) x_tmp.size() == fft.getSize() * 2); 52 | 53 | std::copy(x_in.begin(), x_in.end(), x_tmp.begin()); 54 | fft.performRealOnlyForwardTransform(x_tmp.data()); 55 | 56 | for(int i = 0; i < (int) x_tmp.size(); i += 2) 57 | Y_out[i / 2] = std::complex { x_tmp[i], x_tmp[i+1] }; 58 | } 59 | 60 | private: 61 | juce::dsp::FFT fft; 62 | std::vector x_tmp; 63 | }; 64 | 65 | /** Helper struct for performing inverse FFTs */ 66 | struct InverseFFT 67 | { 68 | fftw_complex_vec X_in; 69 | std::vector y_out; 70 | 71 | InverseFFT(int n) : fft(next_pow2_log(n)) 72 | { 73 | X_in.resize(n, { 0.0f, 0.0f }); 74 | x_tmp.resize(2 * n, 0.0f); 75 | y_out.resize(n, 0.0f); 76 | } 77 | 78 | void perform() 79 | { 80 | jassert ((int) x_tmp.size() == fft.getSize() * 2); 81 | 82 | for(int i = 0; i < (int) x_tmp.size(); i += 2) 83 | { 84 | x_tmp[i] = X_in[i / 2].real(); 85 | x_tmp[i+1] = X_in[i / 2].imag(); 86 | } 87 | 88 | fft.performRealOnlyInverseTransform(x_tmp.data()); 89 | std::copy(x_tmp.begin(), x_tmp.begin() + y_out.size(), y_out.begin()); 90 | } 91 | 92 | private: 93 | juce::dsp::FFT fft; 94 | std::vector x_tmp; 95 | }; 96 | 97 | #else // use FFTW 98 | /** Custom allocator for making vectors compatible with FFTW */ 99 | template 100 | class fftw_allocator : public std::allocator 101 | { 102 | public: 103 | template 104 | struct rebind { typedef fftw_allocator other; }; 105 | T* allocate(size_t n) { return (T*) fftwf_malloc(sizeof(T) * n); } 106 | void deallocate(T* data, std::size_t size) { fftwf_free(data); } 107 | }; 108 | 109 | using fftw_real_vec = std::vector>; 110 | using fftw_complex_vec = std::vector, fftw_allocator>>; 111 | 112 | /** Re-interpret std::complex as fftw_complex* (requires including before fftw3.h) */ 113 | inline fftwf_complex* toFFTW (fftw_complex_vec& vec) 114 | { 115 | return reinterpret_cast (vec.data()); 116 | } 117 | 118 | /** Applies a window to the given data (out-of-place) */ 119 | static void applyWindow(const std::vector& data, const std::vector& win, std::vector& out) 120 | { 121 | for(int n = 0; n < (int) win.size(); ++n) 122 | out[n] = data[n] * win[n]; 123 | } 124 | 125 | /** Helper struct for performing forward FFTs */ 126 | struct ForwardFFT 127 | { 128 | std::vector x_in; 129 | fftw_complex_vec Y_out; 130 | 131 | ForwardFFT(int n) 132 | { 133 | x_in.resize(n, 0.0f); 134 | Y_out.resize(n, { 0.0f, 0.0f }); 135 | fft_plan = fftwf_plan_dft_r2c_1d(n, x_in.data(), toFFTW(Y_out), FFTW_ESTIMATE); 136 | } 137 | 138 | ~ForwardFFT() 139 | { 140 | fftwf_destroy_plan(fft_plan); 141 | } 142 | 143 | void perform() 144 | { 145 | fftwf_execute(fft_plan); 146 | } 147 | 148 | private: 149 | fftwf_plan fft_plan; 150 | }; 151 | 152 | /** Helper struct for performing inverse FFTs */ 153 | struct InverseFFT 154 | { 155 | fftw_complex_vec X_in; 156 | std::vector y_out; 157 | 158 | InverseFFT(int n) 159 | { 160 | X_in.resize(n, { 0.0f, 0.0f }); 161 | y_out.resize(n, 0.0f); 162 | fft_plan = fftwf_plan_dft_c2r_1d(n, toFFTW(X_in), y_out.data(), FFTW_ESTIMATE); 163 | oneOverN = 1.0f / (float) n; 164 | } 165 | 166 | ~InverseFFT() 167 | { 168 | fftwf_destroy_plan(fft_plan); 169 | } 170 | 171 | void perform() 172 | { 173 | fftwf_execute(fft_plan); 174 | for(int i = 0; i < (int) y_out.size(); ++i) 175 | y_out[i] *= oneOverN; 176 | } 177 | 178 | private: 179 | fftwf_plan fft_plan; 180 | float oneOverN; 181 | }; 182 | #endif // TIMESTRETCH_USING_JUCE 183 | 184 | /** Helper function for generating Hann window */ 185 | inline std::vector hann(int N, float normalization = 1.0f) 186 | { 187 | std::vector win (N, 0.0f); 188 | for(int i = 0; i < N; ++i) 189 | { 190 | win[i] = 0.5f - 0.5f * std::cos(2.0f * (float)M_PI * (float)i / (float(N - 1))); 191 | win[i] /= normalization; 192 | } 193 | 194 | return win; 195 | } 196 | 197 | } // namespace fft_utils 198 | -------------------------------------------------------------------------------- /src/hpss.cpp: -------------------------------------------------------------------------------- 1 | #include "hpss.h" 2 | #include "fft_utils.h" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace HPSS 8 | { 9 | 10 | using namespace fft_utils; 11 | using Vec2D = std::vector>; 12 | 13 | /** Performs "horizontal" median filtering to obtain harmonic signal */ 14 | Vec2D median_filter_harm(const std::vector& S, int kernel_size) 15 | { 16 | Vec2D H (S.size(), std::vector (S[0].size(), 0.0f)); 17 | 18 | const int pad = (kernel_size - 1) / 2; 19 | std::vector med_vec (S.size() + 2 * pad, 0.0f); 20 | std::vector kernel_vec (kernel_size, 0.0f); 21 | 22 | for(int h = 0; h < (int) S[0].size(); ++h) 23 | { 24 | for(int i = 0; i < (int) S.size(); ++i) 25 | med_vec[i + pad] = std::abs(S[i][h]); 26 | 27 | for(int i = 0; i < (int) S.size(); ++i) 28 | { 29 | std::copy(med_vec.begin() + i, med_vec.begin() + i + kernel_size, kernel_vec.begin()); 30 | std::nth_element(kernel_vec.begin(), kernel_vec.begin() + pad, kernel_vec.end()); 31 | H[i][h] = kernel_vec[pad]; 32 | } 33 | } 34 | 35 | return H; 36 | } 37 | 38 | /** Performs "vertical" median filtering to obtain percussive signal */ 39 | Vec2D median_filter_perc(const std::vector& S, int kernel_size) 40 | { 41 | Vec2D P (S.size(), std::vector (S[0].size(), 0.0f)); 42 | 43 | const int pad = (kernel_size - 1) / 2; 44 | std::vector med_vec (S[0].size() + 2 * pad, 0.0f); 45 | std::vector kernel_vec (kernel_size, 0.0f); 46 | 47 | for(int i = 0; i < (int) S.size(); ++i) 48 | { 49 | for(int h = 0; h < (int) S[i].size(); ++h) 50 | med_vec[h + pad] = std::abs(S[i][h]); 51 | 52 | for(int h = 0; h < (int) S[i].size(); ++h) 53 | { 54 | std::copy(med_vec.begin() + i, med_vec.begin() + i + kernel_size, kernel_vec.begin()); 55 | std::nth_element(kernel_vec.begin(), kernel_vec.begin() + pad, kernel_vec.end()); 56 | P[i][h] = kernel_vec[pad]; 57 | } 58 | } 59 | 60 | return P; 61 | } 62 | 63 | /** Computes a time-frequency spectrogram with no window (for now) */ 64 | inline std::vector spectrogram(const std::vector& x, int fft_size, int hop_size, int zero_pad = 1) 65 | { 66 | const auto n_fft = fft_size * zero_pad; 67 | 68 | std::vector S; 69 | fft_utils::ForwardFFT fft { n_fft }; 70 | for(int i = 0; i + fft_size < (int) x.size(); i += hop_size) 71 | { 72 | std::copy(x.begin() + i, x.begin() + i + fft_size, fft.x_in.data()); 73 | fft.perform(); 74 | S.push_back(fft.Y_out); 75 | } 76 | 77 | return S; 78 | } 79 | 80 | /** Reconstructs the harmonic and percussive signals from their spectrograms with a Hann window */ 81 | std::pair, std::vector> spec_reconstruct(std::vector& H_hat, 82 | std::vector& P_hat, 83 | int n_samples, 84 | const int fft_size, 85 | const int hop_size) 86 | { 87 | std::vector h_sig (n_samples, 0.0f); 88 | std::vector p_sig (n_samples, 0.0f); 89 | const auto win = hann(fft_size, float(fft_size / hop_size) / 2.0f); 90 | 91 | const int n_fft = (int) H_hat[0].size(); 92 | fft_utils::InverseFFT ifft { n_fft }; 93 | for(int i = 0; i < (int) H_hat.size(); ++i) 94 | { 95 | int start_idx = i * hop_size; 96 | int samples = std::min(fft_size, n_samples - start_idx); 97 | 98 | { // do H 99 | std::copy(H_hat[i].begin(), H_hat[i].end(), ifft.X_in.data()); 100 | ifft.perform(); 101 | fft_utils::applyWindow(ifft.y_out, win, ifft.y_out); 102 | 103 | for(int n = 0; n < samples; ++n) 104 | h_sig[n + start_idx] += ifft.y_out[n]; 105 | } 106 | 107 | { // do P 108 | std::copy(P_hat[i].begin(), P_hat[i].end(), ifft.X_in.data()); 109 | ifft.perform(); 110 | fft_utils::applyWindow(ifft.y_out, win, ifft.y_out); 111 | 112 | for(int n = 0; n < samples; ++n) 113 | p_sig[n + start_idx] += ifft.y_out[n]; 114 | } 115 | } 116 | 117 | return std::make_pair(h_sig, p_sig); 118 | } 119 | 120 | static void debug_print(const std::string& str, bool debug) 121 | { 122 | if(debug) 123 | std::cout << str << std::endl; 124 | } 125 | 126 | std::pair, std::vector> hpss(std::vector x, const HPSS_PARAMS& params) 127 | { 128 | const auto fft_size = next_pow2(int(params.window_length_ms * 0.001 * params.sample_rate)); 129 | const auto hop_size = next_pow2(int(params.hop_length_ms * 0.001 * params.sample_rate)); 130 | 131 | debug_print("Computing HPSS...", params.debug); 132 | debug_print("\tComputing STFTs...", params.debug); 133 | auto S = spectrogram(x, fft_size, hop_size, params.zero_pad); 134 | 135 | debug_print("\tSeparating percussive signal...", params.debug); 136 | auto P = median_filter_perc(S, params.perc_kernel); 137 | 138 | debug_print("\tSeparating harmonic signal...", params.debug); 139 | auto H = median_filter_harm(S, params.harm_kernel); 140 | 141 | debug_print("\tApplying filter masks...", params.debug); 142 | std::vector H_hat (S.size(), fftw_complex_vec(S[0].size())); 143 | std::vector P_hat (S.size(), fftw_complex_vec(S[0].size())); 144 | for(int i = 0; i < (int) S.size(); ++i) 145 | { 146 | for(int h = 0; h < (int) S[0].size(); ++h) 147 | { 148 | float H_p = std::pow(H[i][h], params.mask_exp); 149 | float P_p = std::pow(P[i][h], params.mask_exp); 150 | float denom = H_p + P_p + std::numeric_limits::epsilon(); 151 | 152 | H_hat[i][h] = S[i][h] * (H_p / denom); 153 | P_hat[i][h] = S[i][h] * (P_p / denom); 154 | } 155 | } 156 | 157 | debug_print("\tComputing time-domain signal...", params.debug); 158 | return spec_reconstruct(H_hat, P_hat, (int) x.size(), fft_size, hop_size); 159 | } 160 | 161 | } // namespace HPSS 162 | 163 | -------------------------------------------------------------------------------- /src/hpss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | /** 7 | * Algorithm for harmonic/percussive source-separation 8 | * using median filtering. Based on the algorithm 9 | * proposed by Derry Fitzgerald at DAFx 2010: 10 | * https://arrow.tudublin.ie/cgi/viewcontent.cgi?article=1078&context=argcon 11 | */ 12 | namespace HPSS 13 | { 14 | 15 | struct HPSS_PARAMS 16 | { 17 | int perc_kernel = 17; // size of median filter kernel for percussive siganl 18 | int harm_kernel = 17; // size of median filter kernel for harmonic signal 19 | float mask_exp = 3.0f; // exponent used for Weiner filter to construct mask 20 | float sample_rate = 44100.0f; // sample rate of the audio being processed 21 | float window_length_ms = 40.0f; // length of window to use for FFT 22 | float hop_length_ms = 20.0f; // length of hop size to use for FFT (should 1x, 0.5x, or 0.25x of the window length) 23 | int zero_pad = 2; // zero-padding factor to use for FFT 24 | bool debug = false; // enable print debug statements for the algorithm 25 | }; 26 | 27 | /** 28 | * Accepts a single vector of audio samples, and returns a pair of vectors 29 | * with the separate harmonic and percussive signal. 30 | * 31 | * ``` 32 | * std::vector audio; 33 | * HPSS_PARAMS params; 34 | * auto [harmonic, percussive] = hpss(audio, params); 35 | * ``` 36 | */ 37 | std::pair, std::vector> hpss(std::vector x, const HPSS_PARAMS& params); 38 | 39 | } // namespace HPSS 40 | -------------------------------------------------------------------------------- /src/hpss_test.cpp: -------------------------------------------------------------------------------- 1 | #include "hpss.h" 2 | #include "WavIO.hpp" 3 | 4 | void help() 5 | { 6 | std::cout << "Utility to separate harmonic and percussive signals from a .wav file" << std::endl; 7 | std::cout << "Usage: hpss [ ]" << std::endl; 8 | } 9 | 10 | int main(int argc, char* argv[]) 11 | { 12 | if(argc < 2 || argc > 4) 13 | { 14 | help(); 15 | return 1; 16 | } 17 | 18 | if(argc == 2 && std::string(argv[1]) == "--help") 19 | { 20 | help(); 21 | return 1; 22 | } 23 | 24 | std::string test_file = std::string(argv[1]); 25 | 26 | float num_seconds = 10.0f; 27 | if(argc >= 3) 28 | num_seconds = (float) std::atof(argv[2]); 29 | 30 | float start_seconds = 0.0f; 31 | if(argc >= 4) 32 | start_seconds = (float) std::atof(argv[3]); 33 | 34 | SF_INFO sf_info; 35 | auto wav_signal = WavIO::load_file(test_file.c_str(), sf_info); 36 | const float fs = (float) sf_info.samplerate; 37 | 38 | // trim signal 39 | std::vector> ref_signal; 40 | { 41 | int start_sample = int(fs * start_seconds); 42 | int end_sample = std::min(start_sample + int(fs * num_seconds), (int) sf_info.frames); 43 | for(int ch = 0; ch < (int) wav_signal.size(); ++ch) 44 | ref_signal.push_back(std::vector (&wav_signal[ch][start_sample], &wav_signal[ch][end_sample])); 45 | } 46 | 47 | HPSS::HPSS_PARAMS params; 48 | params.sample_rate = fs; 49 | params.debug = true; 50 | 51 | std::vector> h_signal; 52 | std::vector> p_signal; 53 | std::vector> sum_signal; 54 | for(int ch = 0; ch < (int) ref_signal.size(); ++ch) 55 | { 56 | auto[h_ch, p_ch] = HPSS::hpss(ref_signal[ch], params); 57 | h_signal.push_back(h_ch); 58 | p_signal.push_back(p_ch); 59 | 60 | std::vector sum_ch (h_ch.size(), 0.0f); 61 | for(int i = 0; i < (int) h_ch.size(); ++i) 62 | sum_ch[i] = h_ch[i] + p_ch[i]; 63 | 64 | sum_signal.push_back(sum_ch); 65 | } 66 | 67 | WavIO::write_file("ref.wav", ref_signal, sf_info); 68 | WavIO::write_file("harmonic.wav", h_signal, sf_info); 69 | WavIO::write_file("percussive.wav", p_signal, sf_info); 70 | WavIO::write_file("sum.wav", sum_signal, sf_info); 71 | 72 | return 0; 73 | } 74 | -------------------------------------------------------------------------------- /src/stretch.cpp: -------------------------------------------------------------------------------- 1 | #include "stretch.h" 2 | #include "fft_utils.h" 3 | #include 4 | #include 5 | 6 | namespace time_stretch 7 | { 8 | 9 | using namespace fft_utils; 10 | 11 | /** Computes the correct phase propagation for a phase vocoder */ 12 | std::vector> phase_propagation(const std::vector& S, float fs, int Ha, int Hs, int N) 13 | { 14 | auto p_arg = [] (float x) { return std::fmod(x + 0.5f, 1.0f) - 0.5f; }; 15 | 16 | const int M = (int) S.size(); 17 | const int K = (int) S[0].size(); 18 | std::vector omega (K, 0.0f); 19 | for(int k = 0; k < (int) omega.size(); ++k) 20 | omega[k] = (float) k * fs / (float) N; 21 | 22 | std::vector> phi (M, std::vector (K, 0.0f)); 23 | for(int m = 0; m < M; ++m) 24 | for(int k = 0; k < K; ++k) 25 | phi[m][k] = std::arg(S[m][k]) / (2.0f * (float)M_PI) + 0.5f; 26 | 27 | const float delta_T = (float) Ha / fs; 28 | std::vector> phi_mod (M, std::vector (K, 0.0f)); 29 | std::copy(phi[0].begin(), phi[0].end(), phi_mod[0].begin()); 30 | for(int m = 0; m < M - 1; ++m) 31 | { 32 | for(int k = 0; k < K; ++k) 33 | { 34 | auto F_if = omega[k] + p_arg(phi[m+1][k] - (phi[m][k] + omega[k] * delta_T)) / delta_T; 35 | phi_mod[m+1][k] = p_arg(phi_mod[m][k] + F_if * (float) Hs / fs) + 0.5f; 36 | } 37 | } 38 | 39 | return phi_mod; 40 | } 41 | 42 | /** Generates a square-root Hann window */ 43 | std::vector sqrt_hann(int N) 44 | { 45 | auto win = fft_utils::hann(N); 46 | for(int n = 0; n < N; ++n) 47 | win[n] = std::sqrt(win[n]); 48 | 49 | return win; 50 | } 51 | 52 | /** Computes a time-frequency spectrogram with a sqrt Hann window */ 53 | std::vector spectrogram(const std::vector& x, int fft_size, int hop_size, int zero_pad = 1) 54 | { 55 | const auto n_fft = fft_size * zero_pad; 56 | auto win = sqrt_hann(fft_size); 57 | 58 | std::vector S; 59 | fft_utils::ForwardFFT fft { n_fft }; 60 | for(int i = 0; i + fft_size < (int) x.size(); i += hop_size) 61 | { 62 | std::copy(x.begin() + i, x.begin() + i + fft_size, fft.x_in.data()); 63 | fft_utils::applyWindow(fft.x_in, win, fft.x_in); 64 | fft.perform(); 65 | S.push_back(fft.Y_out); 66 | } 67 | 68 | return S; 69 | } 70 | 71 | /** Reconstructs the signal from its spectrograms with a sqrt Hann window */ 72 | std::vector reconstruct(std::vector& S, int hop_size, int window_size, int L) 73 | { 74 | std::vector y (L, 0.0f); 75 | int final_idx = 0; 76 | auto win = sqrt_hann(window_size); 77 | 78 | const auto n_fft = (int) S[0].size(); 79 | fft_utils::InverseFFT ifft { n_fft }; 80 | for(int i = 0; i < (int) S.size(); ++i) 81 | { 82 | const auto start_idx = int(i * hop_size); 83 | const auto n_samples = std::min(window_size, L - start_idx); 84 | 85 | std::copy(S[i].begin(), S[i].end(), ifft.X_in.data()); 86 | ifft.perform(); 87 | fft_utils::applyWindow(ifft.y_out, win, ifft.y_out); 88 | 89 | for(int n = 0; n < n_samples; ++n) 90 | y[n + start_idx] += ifft.y_out[n]; 91 | final_idx = start_idx + n_samples; 92 | } 93 | 94 | return { y.begin(), y.begin() + final_idx }; 95 | } 96 | 97 | /** Compares greater/less than with absolute value */ 98 | template 99 | static bool abs_compare(T a, T b) 100 | { 101 | return (std::abs(a) < std::abs(b)); 102 | } 103 | 104 | /** Returns the maximum absolute value in a vector */ 105 | static float max_abs(const std::vector& vec) 106 | { 107 | return std::abs(*std::max_element(vec.begin(), vec.end(), abs_compare)); 108 | } 109 | 110 | /** Normalizes the signal in a vector to have the same magnitude as the reference */ 111 | static void normalize_vec(const std::vector& vec_ref, std::vector& vec_cur) 112 | { 113 | auto mag_ref = max_abs(vec_ref); 114 | auto mag_cur = max_abs(vec_cur); 115 | for(float& val : vec_cur) 116 | val *= mag_ref / mag_cur; 117 | } 118 | 119 | static void debug_print(const std::string& str, bool debug) 120 | { 121 | if(debug) 122 | std::cout << str << std::endl; 123 | } 124 | 125 | std::vector> time_stretch(const std::vector>& x, STRETCH_PARAMS& params) 126 | { 127 | params.hpss_params.sample_rate = params.sample_rate; // make sure hpss is using the same sample rate 128 | 129 | const auto long_window_size = next_pow2(int(params.long_window_ms * 0.001 * params.sample_rate)); 130 | const auto short_window_size = next_pow2(int(params.short_window_ms * 0.001 * params.sample_rate)); 131 | 132 | const auto Hs_long = long_window_size / 2; 133 | const auto Ha_long = int((float) Hs_long / params.stretch_factor); 134 | 135 | const auto Hs_short = short_window_size / 2; 136 | const auto Ha_short = int((float) Hs_short / params.stretch_factor); 137 | 138 | debug_print("Computing mono reference phases...", params.debug); 139 | std::vector x_sum (x[0].size(), 0.0f); 140 | for(int ch = 0; ch < (int) x.size(); ++ch) 141 | for(int n = 0; n < (int) x_sum.size(); ++n) 142 | x_sum[n] += x[ch][n] / (float) x.size(); 143 | auto X_sum = spectrogram(x_sum, long_window_size, Ha_long); 144 | const auto phase_mods = phase_propagation(X_sum, params.sample_rate, Ha_long, Hs_long, long_window_size); 145 | 146 | const auto stretch_len = int((float) x[0].size() * params.stretch_factor) + 2000; 147 | int out_len = -1; 148 | std::vector> y; 149 | for(int ch = 0; ch < (int) x.size(); ++ch) 150 | { 151 | debug_print("Processing channel " + std::to_string(ch) + "...", params.debug); 152 | auto[h_signal, p_signal] = HPSS::hpss(x[ch], params.hpss_params); 153 | 154 | debug_print("Performing time-stretching...", params.debug); 155 | auto H_full = spectrogram(h_signal, long_window_size, Ha_long); 156 | auto P_full = spectrogram(p_signal, long_window_size, Ha_long); 157 | 158 | debug_print("\tSeparating magnitude-only PV...", params.debug); 159 | auto H_long = spectrogram(h_signal, long_window_size, Ha_long); 160 | const auto h_x_long = reconstruct(H_long, Hs_long, long_window_size, stretch_len); 161 | auto P_short = spectrogram(p_signal, short_window_size, Ha_short); 162 | const auto p_x_short = reconstruct(P_short, Hs_short, short_window_size, stretch_len); 163 | 164 | debug_print("\tApplying reference phases...", params.debug); 165 | for(int m = 0; m < (int) H_full.size(); ++m) 166 | { 167 | for(int k = 0; k < (int) H_full[m].size(); ++k) 168 | { 169 | H_full[m][k] = std::polar(std::abs(H_full[m][k]), phase_mods[m][k]); 170 | P_full[m][k] = std::polar(std::abs(P_full[m][k]), phase_mods[m][k]); 171 | } 172 | } 173 | 174 | debug_print("\tReconstructing references...", params.debug); 175 | const auto h_v = reconstruct(H_full, Hs_long, long_window_size, stretch_len); 176 | const auto p_v = reconstruct(P_full, Hs_long, long_window_size, stretch_len); 177 | 178 | debug_print("\tPerforming magnitude correction...", params.debug); 179 | auto H_v_long = spectrogram(h_v, long_window_size, Ha_long); 180 | auto P_v_short = spectrogram(p_v, short_window_size, Ha_short); 181 | 182 | auto H_w_long = spectrogram(h_x_long, long_window_size, Ha_long); 183 | auto P_w_short = spectrogram(p_x_short, short_window_size, Ha_short); 184 | 185 | for(int m = 0; m < (int) H_w_long.size(); ++m) 186 | for(int k = 0; k < (int) H_w_long[m].size(); ++k) 187 | H_v_long[m][k] = std::polar(std::abs(H_w_long[m][k]), std::arg(H_v_long[m][k])); 188 | 189 | const auto M = (int) std::min(P_v_short.size(), P_w_short.size()); 190 | for(int m = 0; m < M; ++m) 191 | for(int k = 0; k < (int) P_w_short[m].size(); ++k) 192 | P_v_short[m][k] = std::polar(std::abs(P_w_short[m][k]), std::arg(P_v_short[m][k])); 193 | 194 | debug_print("\tReconstructing final signal...", params.debug); 195 | auto h_y = reconstruct(H_v_long, Ha_long, long_window_size, stretch_len); 196 | auto p_y = reconstruct(P_v_short, Ha_short, short_window_size, stretch_len); 197 | 198 | debug_print("\tNormalizing and combining signals...", params.debug); 199 | normalize_vec(h_signal, h_y); 200 | normalize_vec(p_signal, p_y); 201 | 202 | if(out_len < 0) 203 | out_len = std::min((int) h_y.size(), (int) p_y.size()); 204 | 205 | y.emplace_back(out_len, 0.0f); 206 | for(int i = 0; i < std::min(out_len, (int) h_y.size()); ++i) 207 | y[ch][i] += h_y[i]; 208 | 209 | for(int i = 0; i < std::min(out_len, (int) p_y.size()); ++i) 210 | y[ch][i] += p_y[i]; 211 | 212 | normalize_vec(x[ch], y[ch]); 213 | } 214 | 215 | return y; 216 | } 217 | 218 | } // namespace time_stretch 219 | -------------------------------------------------------------------------------- /src/stretch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "hpss.h" 4 | 5 | /** 6 | * Algorithm for time-stretching audio without altering 7 | * the pitch. Based on the algorithm proposed by Nicolas 8 | * Juillerat and Beat Hirsbrunner at ICASP 2017: 9 | * http://www.pitchtech.ch/Confs/ICASSP2017/0000716.pdf 10 | * 11 | * The basic idea is to separate harmonic and percussive 12 | * parts of the signal and stretch them using a phase vocoder 13 | * with different window sizes for the different parts of 14 | * the signal. However, a single phase correction is used 15 | * across all parts and all channels to preserve phase 16 | * coherence. 17 | */ 18 | namespace time_stretch 19 | { 20 | 21 | struct STRETCH_PARAMS 22 | { 23 | HPSS::HPSS_PARAMS hpss_params; // parameters for harmonic/percussive source-separation 24 | float stretch_factor = 1.0f; // the time-stretching factor used by the stretcher 25 | float sample_rate = 44100.0f; // the sample-rate of the incomind audio signal 26 | float long_window_ms = 100.0f; // the (long) window length to use for the harmonic signal 27 | float short_window_ms = 1.0f; // the (short) window length to use for the percussive signal 28 | bool debug = false; // enable print debug statements for the algorithm 29 | }; 30 | 31 | /** Performs time-stretching on a multi-channel audio signal */ 32 | std::vector> time_stretch(const std::vector>& x, STRETCH_PARAMS& params); 33 | 34 | } // namespace time_stretch 35 | -------------------------------------------------------------------------------- /src/stretch_test.cpp: -------------------------------------------------------------------------------- 1 | #include "stretch.h" 2 | #include "WavIO.hpp" 3 | 4 | void help() 5 | { 6 | std::cout << "Utility to time-stretch a .wav file" << std::endl; 7 | std::cout << "Usage: hpss [ ]" << std::endl; 8 | } 9 | 10 | int main(int argc, char* argv[]) 11 | { 12 | if(argc < 3 || argc > 5) 13 | { 14 | help(); 15 | return 1; 16 | } 17 | 18 | std::string test_file = std::string(argv[1]); 19 | const auto stretch_factor = (float) std::atof(argv[2]); 20 | 21 | float num_seconds = 10.0f; 22 | if(argc >= 4) 23 | num_seconds = (float) std::atof(argv[3]); 24 | 25 | float start_seconds = 0.0f; 26 | if(argc >= 5) 27 | start_seconds = (float) std::atof(argv[4]); 28 | 29 | SF_INFO sf_info; 30 | auto wav_signal = WavIO::load_file(test_file.c_str(), sf_info); 31 | const float fs = (float) sf_info.samplerate; 32 | 33 | // trim signal 34 | std::vector> ref_signal; 35 | { 36 | int start_sample = int(fs * start_seconds); 37 | int end_sample = std::min(start_sample + int(fs * num_seconds), (int) sf_info.frames); 38 | for(int ch = 0; ch < (int) wav_signal.size(); ++ch) 39 | ref_signal.push_back(std::vector (&wav_signal[ch][start_sample], &wav_signal[ch][end_sample])); 40 | } 41 | 42 | time_stretch::STRETCH_PARAMS params; 43 | params.sample_rate = fs; 44 | params.stretch_factor = stretch_factor; 45 | params.debug = true; 46 | params.hpss_params.debug = true; 47 | 48 | auto stretch_signal = time_stretch::time_stretch(ref_signal, params); 49 | 50 | WavIO::write_file("ref.wav", ref_signal, sf_info); 51 | WavIO::write_file("stretch.wav", stretch_signal, sf_info); 52 | 53 | return 0; 54 | } 55 | --------------------------------------------------------------------------------