├── Audio ├── Better Man_clip.wav └── BecauseOfLove_clip.wav ├── README.md └── voiceMusicSeparation.py /Audio/Better Man_clip.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iamhankai/voiceMusicSeparation/HEAD/Audio/Better Man_clip.wav -------------------------------------------------------------------------------- /Audio/BecauseOfLove_clip.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iamhankai/voiceMusicSeparation/HEAD/Audio/BecauseOfLove_clip.wav -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Voice Music Separation 2 | -------- 3 | 4 | 该工程用于参加第六届浙大华为杯题目: 5 | [流行歌曲歌声提取](http://paas-developer.huawei.com/competition#!/competition/subjects/551234f01bd7a2a52753d995) 6 | 7 | 主要功能为歌声音乐分离`fuck()`以及播放音乐`playAudio()`,还有内部函数矩阵恢复(RPCA)的IALM算法等。 8 | 9 | -------- 10 | 11 | 该工程基于Python开发,需要安装Python及工程相关包:NumPy、SciPy、Pymedia、stft 12 | 13 | 使用步骤 14 | 15 | - 下载并解压工程,解压后路径如`E:Python\voiceMusicSeparation` 16 | 17 | - 打开Python,把工程文件夹加入到搜索路径 18 | 19 | ``` 20 | import sys 21 | sys.path.append('E:\Python\voiceMusicSeparation') 22 | ``` 23 | 如果要永久添加该路径,可以在Python的`D:\Python27\Lib\site-packages`文件夹下新建`mypkpath.pth`,里面写上要添加的路径 24 | ``` 25 | # .pth file for my project(这行是注释) 26 | E:\Python\voiceMusicSeparation 27 | ``` 28 | 29 | - 导入该工程module 30 | ``` 31 | import voiceMusicSeparation as vms 32 | ``` 33 | 34 | - 把你的`wav`格式歌曲放到`Audio`文件夹,注意歌曲名称改成英文。进行歌声和音乐的分离 35 | ``` 36 | vms.fuck('Audio/歌曲名称.wav') 37 | ``` 38 | 39 | - 打开工程主目录下生成的歌声`outputE.wav`和音乐`outputA.wav`查看分离效果。也可以在Python中播放: 40 | ``` 41 | vms.playAudio('outputE.wav') 42 | ``` 43 | 44 | - - - 45 | 46 | 作者:Luwak队@ZJU 47 | 48 | 说明:仅用于学习交流,禁止用于商业目的 49 | -------------------------------------------------------------------------------- /voiceMusicSeparation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | voice music separation 4 | -------------------------------------- 5 | reference 6 | Po-Sen Huang / Singing-voice separation from monaural recordings using robust principal component analysis 7 | Zhouchen Lin / The augmented lagrange multiplier method for exact recovery of corrupted low-rank matrices 8 | --------------------------------------- 9 | used package 10 | math/scipy/numpy 11 | stft0.4.7 URL:https://pypi.python.org/pypi/stft/0.4.7 12 | pymedia1.3.7.3 URL:http://www.lfd.uci.edu/~gohlke/pythonlibs/#pymedia 13 | ------------------------------------- 14 | @author: HanSir 15 | website: http://hansir.net 16 | Created on Fri Apr 17 09:00:18 2015 17 | """ 18 | import math 19 | import scipy 20 | import numpy as np 21 | # 播放音频 22 | def playAudio(filename): 23 | import pymedia.audio.acodec as acodec 24 | import pymedia.muxer as muxer 25 | import pymedia.audio.sound as sound 26 | import time 27 | 28 | name1 = str.split(filename, '.') 29 | # Open demuxer first 30 | dm = muxer.Demuxer(name1[-1].lower()) 31 | dec = None 32 | snd = None 33 | s = " " 34 | f = open(filename, 'rb') 35 | 36 | while len(s): 37 | s = f.read(20000) 38 | if len(s): 39 | # 解析出最初的几帧音频数据 40 | frames = dm.parse(s) 41 | for fr in frames: 42 | if dec == None: 43 | # Open decoder 44 | dec = acodec.Decoder(dm.streams[0]) 45 | #音频数据在 frame 数组的第二个元素中 46 | r = dec.decode(fr[1]) 47 | if r and r.data: 48 | if snd == None: 49 | snd = sound.Output(r.sample_rate, r.channels, sound.AFMT_S16_LE) 50 | snd.play(r.data) 51 | #8.延时,直到播放完毕 52 | while snd.isPlaying(): 53 | time.sleep(0.5) 54 | 55 | # 收缩算子 56 | def shrinkage(X,eps): 57 | S = np.sign(X)*(abs(X)-1.0*eps)*(abs(X)>eps) 58 | return S 59 | # RPCA的IALM算法 60 | def ialmRPCA(D, lamb=1, tol1=0.00001, tol2=0.0001, maxIter=1000): 61 | n1,n2 = np.shape(D) 62 | lamb = lamb/math.sqrt(max(n1,n2)) 63 | A = np.zeros((n1,n2), dtype=float) 64 | E = np.zeros((n1,n2), dtype=float) 65 | S = np.zeros((n1,n2), dtype=float) 66 | # 受启发于对偶方法,Y=sgn(D)/J(sgn(D)) 67 | normDfro = np.linalg.norm(D,'fro') 68 | normD2 = np.linalg.norm(D,2) 69 | normDinf = np.linalg.norm(D,np.inf) 70 | JD = max(normD2,normDinf) 71 | Y = np.sign(D)/JD 72 | # 根据Lin的论文 73 | mu = 1.25/normD2 74 | rho = 1.6 75 | for i in range(maxIter): 76 | U, s, Vh = scipy.linalg.svd(Y/mu+D-E,full_matrices=True) 77 | for j in range(len(s)): 78 | S[j][j] = s[j] 79 | S = shrinkage(S,1/mu) 80 | A = np.dot(np.dot(U,S),Vh) 81 | tempE = E; 82 | E = shrinkage(Y/mu+D-A,lamb/mu) 83 | # update Y and mu 84 | Y += mu*(D-A-E) 85 | mu = rho*mu 86 | # dispaly 87 | stop1 = np.linalg.norm(D-A-E,'fro')/normDfro 88 | stop2 = np.linalg.norm(E-tempE,'fro')/normDfro 89 | print("iteration:%d, |D-A-E|_F/|D|_F:%.8f" % (i,stop1)) 90 | # stop 91 | if(stop1=maxIter): 94 | print("Max number of iter reached.") 95 | break; 96 | return A,E,i 97 | 98 | # 歌声音乐分离 99 | def voiceMusicSeparation(audio,masktype=1,lamb=1.25,gain=1.25): 100 | import stft 101 | # stft 102 | specgram = stft.spectrogram(audio) 103 | # rpca 104 | D = abs(specgram) 105 | angle = np.angle(specgram) 106 | A_mag,E_mag,numiter = ialmRPCA(D,lamb) 107 | A = A_mag*scipy.exp(angle*1j) 108 | E = E_mag*scipy.exp(angle*1j) 109 | # binary mask 110 | if(masktype): 111 | m = 1.0*(abs(E_mag)>abs(gain*A_mag)) 112 | Emask = m*specgram 113 | Amask = specgram-Emask 114 | else: 115 | Emask = E 116 | Amask = A 117 | # istft 118 | outputA = stft.ispectrogram(Amask) 119 | outputE = stft.ispectrogram(Emask) 120 | #output 121 | wavoutA = np.array(outputA[:len(audio)],dtype=np.int16) 122 | wavoutE = np.array(outputE[:len(audio)],dtype=np.int16) 123 | return wavoutA, wavoutE 124 | 125 | # 对歌曲进行歌声音乐分离(单双通道都可以处理) 126 | def fuck(filename,masktype=1,lamb=1.25,gain=1.25): 127 | import scipy.io.wavfile as wav 128 | # input 129 | fs, audio = wav.read(filename) 130 | print("Voice Music Separation Starts...") 131 | if(audio.shape[1]==1): 132 | # voiceMusicSeparation 133 | wavoutA, wavoutE = voiceMusicSeparation(audio,masktype,lamb,gain) 134 | # output 135 | wav.write('outputA.wav', fs, wavoutA) 136 | wav.write('outputE.wav', fs, wavoutE) 137 | elif(audio.shape[1]==2): 138 | audio0 = np.array([audio[i][0] for i in range(len(audio))]) 139 | audio1 = np.array([audio[i][1] for i in range(len(audio))]) 140 | # voiceMusicSeparation 141 | wavoutA0, wavoutE0 = voiceMusicSeparation(audio0,masktype,lamb,gain) 142 | wavoutA1, wavoutE1 = voiceMusicSeparation(audio1,masktype,lamb,gain) 143 | # output 144 | wavoutA = np.array([[wavoutA0[i],wavoutA1[i]] for i in range(len(audio))]) 145 | wav.write('outputA.wav', fs, wavoutA) 146 | wavoutE = np.array([[wavoutE0[i],wavoutE1[i]] for i in range(len(audio))]) 147 | wav.write('outputE.wav', fs, wavoutE) 148 | else: 149 | print("Sorry, your song is too complex to deal with.") 150 | print("Voice Music Separation Completes.") 151 | print("voice: outputE.wav") 152 | print("music: outputA.wav") 153 | ##########################################################################33 154 | #filename = 'Audio\Better Man_clip.wav' 155 | #masktype = 1 # 1: binary mask, 0: no mask 156 | #lamb = 1.25 # lambda in ialm_rpca 157 | #gain = 1.25 158 | #fuck(filename) 159 | --------------------------------------------------------------------------------