├── LICENSE ├── README.md ├── __init__.py └── swingify.py /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Colin Fahy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # swingify 2 | Make any song swing 3 | 4 | # Usage 5 | ``` 6 | python swingify.py [-h] [-f FACTOR] [--format FORMAT] audio_path output 7 | ``` 8 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Curly-Mo/swingify/1c4a592a4e33adc6efabe8d61b38d2410f27fba1/__init__.py -------------------------------------------------------------------------------- /swingify.py: -------------------------------------------------------------------------------- 1 | import math 2 | import argparse 3 | 4 | import numpy as np 5 | import librosa 6 | import soundfile as sf 7 | 8 | 9 | def swingify(file_path, outfile, factor, sr=44100, hop_length=512, format=None, max_length=None): 10 | y, sr = librosa.load(file_path, mono=False, sr=sr, duration=max_length) 11 | print(y.shape) 12 | anal_samples = librosa.to_mono(y) 13 | raw_samples = np.atleast_2d(y) 14 | # force stereo 15 | if raw_samples.shape[0] < 2: 16 | print('doubling mono signal to be stereo') 17 | raw_samples = np.vstack([raw_samples, raw_samples]) 18 | 19 | beats = get_beats(anal_samples, sr, hop_length) 20 | 21 | output = synthesize(raw_samples, beats, factor) 22 | 23 | output = output * 0.7 24 | print(sr) 25 | sf.write(outfile, output.T, int(sr), format=format) 26 | # librosa.output.write_wav(outfile, output, sr, norm=True) 27 | return beats 28 | 29 | 30 | def get_beats(samples, sr=44100, hop_length=512): 31 | _, beat_frames = librosa.beat.beat_track(y=samples, sr=sr, trim=False, hop_length=hop_length) 32 | 33 | beat_frames = beat_frames * hop_length 34 | beat_frames = librosa.util.fix_frames(beat_frames, x_min=0, x_max=len(samples)) 35 | 36 | beats = [(s, t-1) for (s, t) in zip(beat_frames, beat_frames[1:])] 37 | return beats 38 | 39 | 40 | def synthesize(raw_samples, beats, factor): 41 | array_shape = (2, raw_samples.shape[1]*2) 42 | output = np.zeros(array_shape) 43 | offset = 0 44 | val = (factor - 1) / (5*factor + 2) 45 | factor1 = 1-2*val 46 | factor2 = 1+5*val 47 | 48 | winsize = 128 49 | window = np.hanning(winsize*2-1) 50 | winsize1 = int(math.floor(winsize * factor1)) 51 | winsize2 = int(math.floor(winsize * factor2)) 52 | 53 | for start, end in beats: 54 | frame = raw_samples[:, start:end] 55 | 56 | # timestretch the eigth notes 57 | mid = int(math.floor((frame.shape[1])/2)) 58 | left = frame[:, :mid + winsize1] 59 | right = frame[:, max(0, mid - winsize2):] 60 | left = timestretch(left, factor1) 61 | right = timestretch(right, factor2) 62 | 63 | # taper the ends to 0 to avoid discontinuities 64 | left[:, :winsize] = left[:, :winsize] * window[:winsize] 65 | left[:, -winsize:] = left[:, -winsize:] * window[-winsize:] 66 | right[:, :winsize] = right[:, :winsize] * window[:winsize] 67 | right[:, -winsize:] = right[:, -winsize:] * window[-winsize:] 68 | 69 | # zero pad and add for the overlap 70 | overlap = sum_signals([left[:, -winsize:], right[:, :winsize]]) 71 | frame = np.hstack([left[:, :-winsize], overlap, right[:, winsize:]]) 72 | 73 | if offset > 0: 74 | overlap = sum_signals([output[:, offset-winsize:offset], frame[:, :winsize]]) 75 | output[:, max(0, offset - winsize):offset] = overlap 76 | output[:, offset:(offset+frame.shape[1]-winsize)] = frame[:, winsize:] 77 | 78 | offset += frame.shape[1] - winsize 79 | 80 | output = output[:, 0:offset] 81 | return output 82 | 83 | 84 | def synthesize_no_crossfade(raw_samples, beats, factor): 85 | array_shape = (2, raw_samples.shape[1]*2) 86 | output = np.zeros(array_shape) 87 | offset = 0 88 | val = (factor - 1) / (5*factor + 2) 89 | factor1 = 1-2*val 90 | factor2 = 1+5*val 91 | 92 | for start, end in beats: 93 | # take one extra sample at end of frame 94 | frame = raw_samples[:, start:end + 1] 95 | 96 | # timestretch the eigth notes 97 | mid = int(math.floor((frame.shape[1]-1)/2)) 98 | # take one extra sample at end of left frame 99 | left = frame[:, :mid + 1] 100 | right = frame[:, mid:] 101 | left = timestretch(left, factor1) 102 | right = timestretch(right, factor2) 103 | 104 | # trim extra samples before joining back together 105 | frame = np.hstack([left[:, :-1], right[: :-1]]) 106 | 107 | output[:, offset:(offset+frame.shape[1])] = frame 108 | 109 | offset += frame.shape[1] 110 | 111 | output = output[:, 0:offset] 112 | return output 113 | 114 | 115 | def timestretch(signal, factor): 116 | left = librosa.effects.time_stretch(signal[0, :], factor) 117 | right = librosa.effects.time_stretch(signal[1, :], factor) 118 | return np.vstack([left, right]) 119 | 120 | 121 | def sum_signals(signals): 122 | """ 123 | Sum together a list of stereo signals 124 | append zeros to match the longest array 125 | """ 126 | if not signals: 127 | return np.array([]) 128 | max_length = max(sig.shape[1] for sig in signals) 129 | y = np.zeros([2, max_length]) 130 | for sig in signals: 131 | padded = np.zeros([2, max_length]) 132 | padded[:, 0:sig.shape[1]] = sig 133 | y += padded 134 | return y 135 | 136 | 137 | def ola(samples, win_length, hop_length, factor): 138 | phase = np.zeros(win_length) 139 | hanning_window = np.hanning(win_length) 140 | result = np.zeros(len(samples) / factor + win_length) 141 | 142 | for i in np.arange(0, len(samples)-(win_length+hop_length), hop_length*factor): 143 | # two potentially overlapping subarrays 144 | a1 = samples[i: i + win_length] 145 | a2 = samples[i + hop_length: i + win_length + hop_length] 146 | # resynchronize the second array on the first 147 | s1 = np.fft.fft(hanning_window * a1) 148 | s2 = np.fft.fft(hanning_window * a2) 149 | phase = (phase + np.angle(s2/s1)) % 2*np.pi 150 | a2_rephased = np.fft.ifft(np.abs(s2)*np.exp(1j*phase)) 151 | 152 | # add to result 153 | i2 = int(i/factor) 154 | a2_rephased = np.real(a2_rephased) 155 | result[i2:i2+win_length] += hanning_window*a2 156 | 157 | return result 158 | 159 | 160 | if __name__ == '__main__': 161 | parser = argparse.ArgumentParser( 162 | description="Make a song swing") 163 | parser.add_argument('audio_path', type=str, help='Input audio file path') 164 | parser.add_argument('output', type=str, help='Output file path') 165 | parser.add_argument('-f', '--factor', type=float, default=2.0, 166 | help='Swing factor {light: 1.5, medium: 2.0, hard: 3.0}') 167 | parser.add_argument('--format', type=str, default='wav', 168 | help='Output audio format') 169 | args = parser.parse_args() 170 | 171 | swingify(args.audio_path, args.output, args.factor, format=args.format) 172 | --------------------------------------------------------------------------------