├── 主成分分析.doc ├── 主成分分析.docx ├── 逻辑斯蒂回归实验.docx ├── 主成分分析~EDF9F.tmp ├── 多项式函数拟合实验.docx ├── EM算法求解高斯混合模型.docx ├── he.py ├── he.py~ ├── pca~ ├── pca_final.py ├── pca_final.py~ ├── pca.py~ ├── pca.py ├── gonge.m~ ├── gonge.m ├── test ├── test~ ├── emsamesigma.py ├── emsamesigma.py~ ├── regulazation.py~ ├── regulazation.py ├── emsolute.py ├── emsolute.py~ ├── regulazationl2.py ├── regulazationl2.py~ ├── em.py ├── lr.py ├── lr.py~ ├── lrmap.py~ ├── em.py~ └── lrmap.py /主成分分析.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitcszq/machine_learning_basic_algo/HEAD/主成分分析.doc -------------------------------------------------------------------------------- /主成分分析.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitcszq/machine_learning_basic_algo/HEAD/主成分分析.docx -------------------------------------------------------------------------------- /逻辑斯蒂回归实验.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitcszq/machine_learning_basic_algo/HEAD/逻辑斯蒂回归实验.docx -------------------------------------------------------------------------------- /主成分分析~EDF9F.tmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitcszq/machine_learning_basic_algo/HEAD/主成分分析~EDF9F.tmp -------------------------------------------------------------------------------- /多项式函数拟合实验.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitcszq/machine_learning_basic_algo/HEAD/多项式函数拟合实验.docx -------------------------------------------------------------------------------- /EM算法求解高斯混合模型.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hitcszq/machine_learning_basic_algo/HEAD/EM算法求解高斯混合模型.docx -------------------------------------------------------------------------------- /he.py: -------------------------------------------------------------------------------- 1 | W=[1,2,3,4] 2 | A=[] 3 | A=W 4 | 5 | print A 6 | def ni(A): 7 | A[1]=20 8 | ni(A) 9 | print A 10 | -------------------------------------------------------------------------------- /he.py~: -------------------------------------------------------------------------------- 1 | W=[1,2,3,4] 2 | global A 3 | A=W 4 | 5 | print A 6 | def ni(A): 7 | global A 8 | A[1]=20 9 | ni(A) 10 | -------------------------------------------------------------------------------- /pca~: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | #coding utf-8 3 | def pca(dataMat, topNfeat=5): 4 | meanVals = mean(dataMat, axis=0) 5 | meanRemoved = dataMat - meanVals #减去均值 6 | stded = meanRemoved / std(dataMat) #用标准差归一化 7 | covMat = cov(stded, rowvar=0) #求协方差方阵 8 | eigVals, eigVects = linalg.eig(mat(covMat)) #求特征值和特征向量 9 | eigValInd = argsort(eigVals) #对特征值进行排序 10 | eigValInd = eigValInd[:-(topNfeat + 1):-1] 11 | redEigVects = eigVects[:, eigValInd] # 除去不需要的特征向量 12 | lowDDataMat = stded * redEigVects #求新的数据矩阵 13 | reconMat = (lowDDataMat * redEigVects.T) * std(dataMat) + meanVals 14 | return lowDDataMat, reconMat 15 | 16 | randArray = random.random(size=(10,8)) 17 | print randArray 18 | a,b=pca(randArray) 19 | print a,b 20 | 21 | 22 | -------------------------------------------------------------------------------- /pca_final.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | #coding: utf-8 3 | def pca(dataMat, K=5): 4 | meanVals = mean_value(dataMat) 5 | 6 | meanRemoved = dataMat - meanVals 7 | stded = meanRemoved / std(dataMat,axis=0) 8 | covMat = cov(stded, axis=0) 9 | eigVals, eigVects = linalg.eig(mat(covMat)) 10 | eigValInd = argsort(eigVals) 11 | eigValInd = eigValInd[-K:] 12 | seleEigVects = eigVects[:, eigValInd] 13 | lowDDataMat = stded * seleEigVects 14 | return lowDDataMat 15 | def mean_value(MAT): 16 | mean_va=random.random(size=(1,8)) 17 | for j in range(MAT.shape[1]): 18 | sum0=0 19 | for i in range(MAT.shape[0]): 20 | sum0=sum0+MAT[i][j] 21 | mean_va[0,j]=sum0 22 | return mean_va 23 | 24 | randArray = random.random(size=(10,8)) 25 | a=pca(randArray) 26 | print a 27 | 28 | 29 | -------------------------------------------------------------------------------- /pca_final.py~: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | #coding: utf-8 3 | def pca(dataMat, topNfeat=5): 4 | meanVals = mean_value(dataMat) 5 | 6 | meanRemoved = dataMat - meanVals 7 | stded = meanRemoved / std(dataMat,axis=0) 8 | covMat = cov(stded, rowvar=0) #求协方差方阵 9 | eigVals, eigVects = linalg.eig(mat(covMat)) 10 | eigValInd = argsort(eigVals) #对特征值进行排序 11 | eigValInd = eigValInd[-topNfeat:] 12 | redEigVects = eigVects[:, eigValInd] # 除去不需要的特征向量 13 | lowDDataMat = stded * redEigVects #求新的数据矩阵 14 | return lowDDataMat 15 | def mean_value(MAT): 16 | mean_va=random.random(size=(1,8)) 17 | for j in range(MAT.shape[1]): 18 | sum0=0 19 | for i in range(MAT.shape[0]): 20 | sum0=sum0+MAT[i][j] 21 | mean_va[0,j]=sum0 22 | return mean_va 23 | 24 | randArray = random.random(size=(10,8)) 25 | a=pca(randArray) 26 | print a 27 | 28 | 29 | -------------------------------------------------------------------------------- /pca.py~: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | #coding: utf-8 3 | def pca(dataMat, topNfeat=5): 4 | meanVals = mean(dataMat, axis=0) 5 | meanRemoved = dataMat - meanVals #减去均值 6 | stded = meanRemoved / std(dataMat) #用标准差归一化 7 | covMat = cov(stded, rowvar=0) #求协方差方阵 8 | eigVals, eigVects = linalg.eig(mat(covMat)) #求特征值和特征向量 9 | print eigVals 10 | print eigVects 11 | eigValInd = argsort(eigVals) #对特征值进行排序 12 | print eigValInd 13 | eigValIndre = eigValInd[-(topNfeat ):] 14 | #eigValInd=eigValIndre.reverse() 15 | #print eigValInd 16 | redEigVects = eigVects[:, eigValIndre] # 除去不需要的特征向量 17 | print redEigVects 18 | lowDDataMat = stded * redEigVects #求新的数据矩阵 19 | reconMat = (lowDDataMat * redEigVects.T) * std(dataMat) + meanVals 20 | return lowDDataMat, reconMat 21 | 22 | randArray = random.random(size=(10,8)) 23 | a,b=pca(randArray) 24 | print a,b 25 | 26 | 27 | -------------------------------------------------------------------------------- /pca.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | #coding: utf-8 3 | def pca(dataMat, topNfeat=5): 4 | meanVals = mean(dataMat, axis=0) 5 | meanRemoved = dataMat - meanVals #减去均值 6 | stded = meanRemoved / std(dataMat) #用标准差归一化 7 | covMat = cov(stded, rowvar=0) #求协方差方阵 8 | eigVals, eigVects = linalg.eig(mat(covMat)) #求特征值和特征向量 9 | #print eigVals 10 | #print eigVects 11 | eigValInd = argsort(eigVals) #对特征值进行排序 12 | #print eigValInd 13 | eigValIndre = eigValInd[-(topNfeat ):] 14 | #eigValInd=eigValIndre.reverse() 15 | #print eigValInd 16 | redEigVects = eigVects[:, eigValIndre] # 除去不需要的特征向量 17 | #print redEigVects 18 | lowDDataMat = stded * redEigVects #求新的数据矩阵 19 | reconMat = (lowDDataMat * redEigVects.T) * std(dataMat) + meanVals 20 | return lowDDataMat, reconMat 21 | 22 | randArray = random.random(size=(10,8)) 23 | print randArray 24 | a,b=pca(randArray) 25 | print a,b 26 | 27 | 28 | -------------------------------------------------------------------------------- /gonge.m~: -------------------------------------------------------------------------------- 1 | lamata=0.5 2 | trainnum=100 3 | X=rand(1,trainnum); 4 | Y=sin(X); 5 | G=zeros(10,10); 6 | b=zeros(10,1) 7 | Gtemp=zeros(10,10); 8 | 9 | for i=1:10 10 | for j=1:10 11 | for l=1:100 12 | Gtemp(i,j)=Gtemp(i,j)+X(1,l)^(i+j-2); 13 | end 14 | end 15 | end 16 | 17 | for i=1:10 18 | for j=1:10 19 | G(i,j)=Gtemp(i,j)/2*trainnum 20 | if i==j 21 | G(i,j)=G(i,j)+lamata; 22 | end 23 | end 24 | end 25 | 26 | for i=1:10 27 | for l in 1:100 28 | b(i,1)=b(i,1)-2*Y(1,l)*X(1,l)^(i-1) 29 | end 30 | end 31 | 32 | 33 | function  [x,n]=conjgrad(A,b,x0)      34 | r1=b-A*x0;      35 | p=r1;      36 | n=0;  37 |      for i=1:rank(A)          38 | if(dot(p,A*p)<1.0e-10)  39 |              break;          40 | end  41 |          alpha=dot(r1,r1)*(dot(p,A*p))^-1; 42 |          x=x0+alpha*p;            43 | r2=r1-alpha*A*p;  44 |         if(r2<1.0e-10)                 45 | break;             46 | end  47 |             belta=dot(r2,r2)*(dot(r1,r1))^-1; 48 |         p=r2+belta*p;            49 | n=n+1;      50 | end  51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /gonge.m: -------------------------------------------------------------------------------- 1 | lamata=0.5 2 | trainnum=100 3 | X=rand(1,trainnum); 4 | Y=sin(X); 5 | G=zeros(10,10); 6 | b=zeros(10,1) 7 | Gtemp=zeros(10,10); 8 | 9 | for i=1:10 10 | for j=1:10 11 | for l=1:100 12 | Gtemp(i,j)=Gtemp(i,j)+X(1,l)^(i+j-2); 13 | end 14 | end 15 | end 16 | 17 | for i=1:10 18 | for j=1:10 19 | G(i,j)=Gtemp(i,j)/2*trainnum 20 | if i==j 21 | G(i,j)=G(i,j)+lamata; 22 | end 23 | end 24 | end 25 | 26 | for i=1:10 %一次项系数 27 | for l in 1:100 28 | b(i,1)=b(i,1)-2*Y(1,l)*X(1,l)^(i-1) 29 | end 30 | end 31 | 32 | 33 | function  [x,n]=conjgrad(A,b,x0)      34 | r1=b-A*x0;      35 | p=r1;      36 | n=0;  37 |      for i=1:rank(A)          38 | if(dot(p,A*p)<1.0e-10)  39 |              break;          40 | end  41 |          alpha=dot(r1,r1)*(dot(p,A*p))^-1; 42 |          x=x0+alpha*p;            43 | r2=r1-alpha*A*p;  44 |         if(r2<1.0e-10)                 45 | break;             46 | end  47 |             belta=dot(r2,r2)*(dot(r1,r1))^-1; 48 |         p=r2+belta*p;            49 | n=n+1;      50 | end  51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /test: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import numpy 4 | import scipy as sp 5 | import matplotlib.pyplot as plt 6 | X=[] 7 | Y=[] 8 | XTEST=[] 9 | YTEST=[] 10 | trainnum=100 11 | for i in range(trainnum): 12 | X.append(random.uniform(-1,1)) 13 | Y.append(math.sin(X[i])) 14 | XTEST.append(random.uniform(-1,1)) 15 | YTEST.append(math.sin(XTEST[i])) 16 | #plt.plot(X[trainnum],Y[trainnum],'.') 17 | def error(f, x, y): 18 | return sp.sum((f(x)-y)**2) 19 | fp1, residuals, rank, sv, rcond = sp.polyfit(X, Y, 1, full=True) 20 | f1 = sp.poly1d(fp1) 21 | plt.plot(X,Y,'.') 22 | plt.show() 23 | print(error(f1, X, Y)) 24 | print("Model parameters: %s" % fp1) 25 | trainingerrors=[] 26 | testerrors=[] 27 | for i in range(20): 28 | fpi, residuals, rank, sv, rcond = sp.polyfit(X, Y, i, full=True) 29 | fi=sp.poly1d(fpi) 30 | fx=sp.linspace(-1,1,5) 31 | plt.plot(fx, fi(fx), linewidth=1) 32 | plt.legend(["d=%i" % i], loc="upper left") 33 | er=error(fi, X, Y) 34 | trainingerrors.append(er) 35 | err=error(fi,XTEST,YTEST) 36 | testerrors.append(err) 37 | plt.show() 38 | print trainingerrors 39 | print testerrors 40 | 41 | -------------------------------------------------------------------------------- /test~: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import numpy 4 | import scipy as sp 5 | import matplotlib.pyplot as plt 6 | X=[] 7 | Y=[] 8 | XTEST=[] 9 | YTEST=[] 10 | trainnum=100 11 | for i in range(trainnum): 12 | X.append(random.uniform(-1,1)) 13 | Y.append(math.sin(X[i])) 14 | XTEST.append(random.uniform(-1,1)) 15 | YTEST.append(math.sin(XTEST[i])) 16 | #plt.plot(X[trainnum],Y[trainnum],'.') 17 | def error(f, x, y): 18 | return sp.sum((f(x)-y)**2) 19 | fp1, residuals, rank, sv, rcond = sp.polyfit(X, Y, 1, full=True) 20 | f1 = sp.poly1d(fp1) 21 | plt.plot(X,Y,'.') 22 | plt.show() 23 | print(error(f1, X, Y)) 24 | print("Model parameters: %s" % fp1) 25 | trainingerrors=[] 26 | testerrors=[] 27 | for i in range(20): 28 | fpi, residuals, rank, sv, rcond = sp.polyfit(X, Y, i, full=True) 29 | fi=sp.poly1d(fpi) 30 | fx=sp.linspace(-1,1,5) 31 | plt.plot(fx, fi(fx), linewidth=10) 32 | plt.legend(["d=%i" % i], loc="upper left") 33 | er=error(fi, X, Y) 34 | trainingerrors.append(er) 35 | err=error(fi,XTEST,YTEST) 36 | testerrors.append(err) 37 | plt.show() 38 | print trainingerrors 39 | print testerrors 40 | 41 | -------------------------------------------------------------------------------- /emsamesigma.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | isdebug = True 8 | 9 | # 指定k个高斯分布参数,这里指定k=2。注意2个高斯分布具有相同均方差Sigma,分别为Mu1,Mu2。 10 | def ini_data(Sigma,Mu1,Mu2,k,N): 11 | global X 12 | global Mu 13 | global Expectations 14 | X = np.zeros((1,N)) 15 | Mu = np.random.random(2) 16 | Expectations = np.zeros((N,k)) 17 | for i in xrange(0,N): 18 | if np.random.random(1) > 0.5: 19 | X[0,i] = np.random.normal()*Sigma + Mu1 20 | else: 21 | X[0,i] = np.random.normal()*Sigma + Mu2 22 | if isdebug: 23 | print "***********" 24 | print u"初始观测数据X:" 25 | print X 26 | # EM算法:步骤1,计算E[zij] 27 | def e_step(Sigma,k,N): 28 | global Expectations 29 | global Mu 30 | global X 31 | for i in xrange(0,N): 32 | Denom = 0 33 | for j in xrange(0,k): 34 | Denom += math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2) 35 | for j in xrange(0,k): 36 | Numer = math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2) 37 | Expectations[i,j] = Numer / Denom 38 | if isdebug: 39 | print "***********" 40 | print u"隐藏变量E(Z):" 41 | print Expectations 42 | # EM算法:步骤2,求最大化E[zij]的参数Mu 43 | def m_step(k,N): 44 | global Expectations 45 | global X 46 | for j in xrange(0,k): 47 | Numer = 0 48 | Denom = 0 49 | for i in xrange(0,N): 50 | Numer += Expectations[i,j]*X[0,i] 51 | Denom +=Expectations[i,j] 52 | Mu[j] = Numer / Denom 53 | # 算法迭代iter_num次,或达到精度Epsilon停止迭代 54 | def run(Sigma,Mu1,Mu2,k,N,iter_num,Epsilon): 55 | ini_data(Sigma,Mu1,Mu2,k,N) 56 | print u"初始:", Mu 57 | for i in range(iter_num): 58 | Old_Mu = copy.deepcopy(Mu) 59 | e_step(Sigma,k,N) 60 | m_step(k,N) 61 | print i,Mu 62 | if sum(abs(Mu-Old_Mu)) < Epsilon: 63 | break 64 | if __name__ == '__main__': 65 | run(6,40,20,2,1000,1000,0.0001) 66 | plt.hist(X[0,:],50) 67 | plt.show() 68 | -------------------------------------------------------------------------------- /emsamesigma.py~: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | isdebug = False 8 | 9 | # 指定k个高斯分布参数,这里指定k=2。注意2个高斯分布具有相同均方差Sigma,分别为Mu1,Mu2。 10 | def ini_data(Sigma,Mu1,Mu2,k,N): 11 | global X 12 | global Mu 13 | global Expectations 14 | X = np.zeros((1,N)) 15 | Mu = np.random.random(2) 16 | Expectations = np.zeros((N,k)) 17 | for i in xrange(0,N): 18 | if np.random.random(1) > 0.5: 19 | X[0,i] = np.random.normal()*Sigma + Mu1 20 | else: 21 | X[0,i] = np.random.normal()*Sigma + Mu2 22 | if isdebug: 23 | print "***********" 24 | print u"初始观测数据X:" 25 | print X 26 | # EM算法:步骤1,计算E[zij] 27 | def e_step(Sigma,k,N): 28 | global Expectations 29 | global Mu 30 | global X 31 | for i in xrange(0,N): 32 | Denom = 0 33 | for j in xrange(0,k): 34 | Denom += math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2) 35 | for j in xrange(0,k): 36 | Numer = math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2) 37 | Expectations[i,j] = Numer / Denom 38 | if isdebug: 39 | print "***********" 40 | print u"隐藏变量E(Z):" 41 | print Expectations 42 | # EM算法:步骤2,求最大化E[zij]的参数Mu 43 | def m_step(k,N): 44 | global Expectations 45 | global X 46 | for j in xrange(0,k): 47 | Numer = 0 48 | Denom = 0 49 | for i in xrange(0,N): 50 | Numer += Expectations[i,j]*X[0,i] 51 | Denom +=Expectations[i,j] 52 | Mu[j] = Numer / Denom 53 | # 算法迭代iter_num次,或达到精度Epsilon停止迭代 54 | def run(Sigma,Mu1,Mu2,k,N,iter_num,Epsilon): 55 | ini_data(Sigma,Mu1,Mu2,k,N) 56 | print u"初始:", Mu 57 | for i in range(iter_num): 58 | Old_Mu = copy.deepcopy(Mu) 59 | e_step(Sigma,k,N) 60 | m_step(k,N) 61 | print i,Mu 62 | if sum(abs(Mu-Old_Mu)) < Epsilon: 63 | break 64 | if __name__ == '__main__': 65 | run(6,40,20,2,1000,1000,0.0001) 66 | plt.hist(X[0,:],50) 67 | plt.show() 68 | -------------------------------------------------------------------------------- /regulazation.py~: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import scipy as sp 4 | import matplotlib.pyplot as plt 5 | W=[] 6 | X=[] 7 | Y=[] 8 | 9 | step=0.0001 10 | trainnum=500 11 | testnum=100 12 | polynum=10 13 | X1=[] 14 | Y1=[] 15 | select={} 16 | for i in range(polynum): 17 | W.append(1); 18 | for i in range(trainnum): 19 | X.append(random.uniform(-1,1)) 20 | Y.append(math.sin(X[i])) 21 | for i in range(testnum): 22 | X1.append(random.uniform(-1,1)) 23 | Y1.append(math.sin(X[i])) 24 | 25 | 26 | def qiupiandao(i,W,lamata,X,Y): 27 | sumh=2*lamata*W[i] 28 | for j in range(trainnum): 29 | sum0=0 30 | for q in range(polynum): 31 | sum0=sum0+W[q]*(X[j]**q) 32 | if q==0: 33 | sum0=sum0-Y[j] 34 | sumh=sumh+sum0*i*W[i]*X[j]**(i-1)/trainnum 35 | return sumh 36 | 37 | def lostfuction(lamata,W,X,Y): 38 | suml=0 39 | for i in range(polynum): 40 | suml=suml+lamata*(W[i]**2) 41 | for j in range(trainnum): 42 | sum0=0 43 | for q in range(polynum): 44 | sum0=sum0+W[q]*(X[j]**q) 45 | if q==0: 46 | sum0=sum0-Y[j] 47 | suml=suml+sum0**2/(2*trainnum) 48 | return suml 49 | 50 | def error(f, x, y): 51 | return sp.sum((f(x)-y)**2) 52 | 53 | for lamata in range(5): 54 | while True: 55 | prelost=lostfuction(lamata,W,X,Y) 56 | for i in range(polynum): 57 | W[i]=W[i]-step*qiupiandao(i,W,lamata,X,Y) 58 | postlost=lostfuction(lamata,W,X,Y) 59 | print prelost-postlost 60 | if prelost-postlost<0.001: 61 | break 62 | f1 = sp.poly1d(W) 63 | fx=sp.linspace(-1,1,100) 64 | er=error(f1, X1, Y1) 65 | plt.plot(fx, f1(fx)) 66 | #plt.legend(["lamata=%i" % lamata], loc="upper left") 67 | select[lamata]=[] 68 | select[lamata].append(er) 69 | select[lamata].append(list(W)) 70 | for i in range(polynum): 71 | W[i]=1 72 | 73 | 74 | for lamata in range(5): 75 | miner=0 76 | print lamata,select[lamata][0],select[lamata][1] 77 | if select[lamata][0] < select[miner][0]: 78 | miner=lamata 79 | 80 | 81 | 82 | print "lamata,lost,W" 83 | print miner,select[miner][0],select[miner][1] 84 | plt.plot(X,Y,'.') 85 | plt.show() 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /regulazation.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import scipy as sp 4 | import matplotlib.pyplot as plt 5 | W=[] 6 | X=[] 7 | Y=[] 8 | 9 | step=0.000001 10 | trainnum=500 11 | testnum=100 12 | polynum=10 13 | X1=[] 14 | Y1=[] 15 | select={} 16 | for i in range(polynum): 17 | W.append(1); 18 | for i in range(trainnum): 19 | X.append(random.uniform(-1,1)) 20 | Y.append(math.sin(X[i])) 21 | for i in range(testnum): 22 | X1.append(random.uniform(-1,1)) 23 | Y1.append(math.sin(X[i])) 24 | 25 | 26 | def qiupiandao(i,W,lamata,X,Y): 27 | sumh=2*lamata*W[i] 28 | for j in range(trainnum): 29 | sum0=0 30 | for q in range(polynum): 31 | sum0=sum0+W[q]*(X[j]**q) 32 | if q==0: 33 | sum0=sum0-Y[j] 34 | sumh=sumh+sum0*i*W[i]*X[j]**(i-1)/trainnum 35 | return sumh 36 | 37 | def lostfuction(lamata,W,X,Y): 38 | suml=0 39 | for i in range(polynum): 40 | suml=suml+lamata*(W[i]**2) 41 | for j in range(trainnum): 42 | sum0=0 43 | for q in range(polynum): 44 | sum0=sum0+W[q]*(X[j]**q) 45 | if q==0: 46 | sum0=sum0-Y[j] 47 | suml=suml+sum0**2/(2*trainnum) 48 | return suml 49 | 50 | def error(f, x, y): 51 | return sp.sum((f(x)-y)**2) 52 | 53 | for lamata in range(5): 54 | while True: 55 | prelost=lostfuction(lamata,W,X,Y) 56 | for i in range(polynum): 57 | W[i]=W[i]-step*qiupiandao(i,W,lamata,X,Y) 58 | postlost=lostfuction(lamata,W,X,Y) 59 | print prelost-postlost 60 | if prelost-postlost<0.000001: 61 | break 62 | f1 = sp.poly1d(W) 63 | fx=sp.linspace(-1,1,100) 64 | er=error(f1, X1, Y1) 65 | plt.plot(fx, f1(fx)) 66 | #plt.legend(["lamata=%i" % lamata], loc="upper left") 67 | select[lamata]=[] 68 | select[lamata].append(er) 69 | select[lamata].append(list(W)) 70 | for i in range(polynum): 71 | W[i]=1 72 | 73 | 74 | for lamata in range(5): 75 | miner=0 76 | print lamata,select[lamata][0],select[lamata][1] 77 | if select[lamata][0] < select[miner][0]: 78 | miner=lamata 79 | 80 | 81 | 82 | print "lamata,lost,W" 83 | print miner,select[miner][0],select[miner][1] 84 | plt.plot(X,Y,'.') 85 | plt.show() 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /emsolute.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | isdebug = False 8 | 9 | # 指定k个高斯分布参数,这里指定k=2。注意2个高斯分布具有相同均方差Sigma,分别为Mu1,Mu2。 10 | def ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N): 11 | global X 12 | global Mu 13 | global Expectations 14 | global Sigma 15 | X = np.zeros((1,N)) 16 | Mu = np.random.random(2) 17 | Sigma=np.random.random(2) 18 | Expectations = np.zeros((N,k)) 19 | for i in xrange(0,N): 20 | if np.random.random(1) > 0.5: 21 | X[0,i] = np.random.normal()*Sigma1 + Mu1 22 | else: 23 | X[0,i] = np.random.normal()*Sigma2 + Mu2 24 | if isdebug: 25 | print "***********" 26 | print u"初始观测数据X:" 27 | print X 28 | # EM算法:步骤1,计算E[zij] 29 | def e_step(k,N): 30 | global Expectations 31 | global Mu 32 | global X 33 | global Sigma 34 | for i in xrange(0,N): 35 | Denom = 0 36 | for j in xrange(0,k): 37 | Denom += math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 38 | for j in xrange(0,k): 39 | Numer = math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 40 | Expectations[i,j] = Numer / Denom 41 | if isdebug: 42 | print "***********" 43 | print u"隐藏变量E(Z):" 44 | print Expectations 45 | # EM算法:步骤2,求最大化E[zij]的参数Mu 46 | def m_step(k,N): 47 | global Expectations 48 | global X 49 | for j in xrange(0,k): 50 | Numer = 0 51 | mosig = 0 52 | Denom = 0 53 | for i in xrange(0,N): 54 | Numer += Expectations[i,j]*X[0,i] 55 | Denom +=Expectations[i,j] 56 | mosig +=Expectations[i,j]*((X[0,i])-Mu[j])**2 57 | Mu[j] = Numer / Denom 58 | Sigma[j]=mosig/Denom 59 | 60 | # 算法迭代iter_num次,或达到精度Epsilon停止迭代 61 | def run(Sigma1,Sigma2,Mu1,Mu2,k,N,iter_num,Epsilon): 62 | ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N) 63 | print u"初始:", Mu 64 | for i in range(iter_num): 65 | Old_Mu = copy.deepcopy(Mu) 66 | e_step(k,N) 67 | m_step(k,N) 68 | print i,Mu,Sigma 69 | 70 | if sum(abs(Mu-Old_Mu)) < Epsilon: 71 | break 72 | if __name__ == '__main__': 73 | run(4,6,40,20,2,1000,1000,0.0001) 74 | plt.hist(X[0,:],50) 75 | plt.show() 76 | -------------------------------------------------------------------------------- /emsolute.py~: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | isdebug = False 8 | 9 | # 指定k个高斯分布参数,这里指定k=2。注意2个高斯分布具有相同均方差Sigma,分别为Mu1,Mu2。 10 | def ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N): 11 | global X 12 | global Mu 13 | global Expectations 14 | global Sigma 15 | X = np.zeros((1,N)) 16 | Mu = np.random.random(2) 17 | Sigma=np.random.random(2) 18 | Expectations = np.zeros((N,k)) 19 | for i in xrange(0,N): 20 | if np.random.random(1) > 0.5: 21 | X[0,i] = np.random.normal()*Sigma1 + Mu1 22 | else: 23 | X[0,i] = np.random.normal()*Sigma2 + Mu2 24 | if isdebug: 25 | print "***********" 26 | print u"初始观测数据X:" 27 | print X 28 | # EM算法:步骤1,计算E[zij] 29 | def e_step(k,N): 30 | global Expectations 31 | global Mu 32 | global X 33 | global Sigma 34 | for i in xrange(0,N): 35 | Denom = 0 36 | for j in xrange(0,k): 37 | Denom += math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 38 | for j in xrange(0,k): 39 | Numer = math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 40 | Expectations[i,j] = Numer / Denom 41 | if isdebug: 42 | print "***********" 43 | print u"隐藏变量E(Z):" 44 | print Expectations 45 | # EM算法:步骤2,求最大化E[zij]的参数Mu 46 | def m_step(k,N): 47 | global Expectations 48 | global X 49 | for j in xrange(0,k): 50 | Numer = 0 51 | mosig = 0 52 | Denom = 0 53 | for i in xrange(0,N): 54 | Numer += Expectations[i,j]*X[0,i] 55 | Denom +=Expectations[i,j] 56 | mosig +=Expectations[i,j]*((X[0,i])-Mu[j])**2 57 | Mu[j] = Numer / Denom 58 | Sigma[j]=mosig/Denom 59 | 60 | # 算法迭代iter_num次,或达到精度Epsilon停止迭代 61 | def run(Sigma1,Sigma2,Mu1,Mu2,k,N,iter_num,Epsilon): 62 | ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N) 63 | print u"初始:", Mu 64 | for i in range(iter_num): 65 | Old_Mu = copy.deepcopy(Mu) 66 | e_step(k,N) 67 | m_step(k,N) 68 | print i,Mu,Sigma 69 | 70 | if sum(abs(Mu-Old_Mu)) < Epsilon: 71 | break 72 | if __name__ == '__main__': 73 | run(4,6,40,20,2,1000,1000,0.0001) 74 | plt.hist(X[0,:],50) 75 | plt.show() 76 | -------------------------------------------------------------------------------- /regulazationl2.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import scipy as sp 4 | import matplotlib.pyplot as plt 5 | W=[] 6 | X=[] 7 | Y=[] 8 | 9 | step=0.0001 10 | trainnum=500 11 | testnum=100 12 | polynum=10 13 | X1=[] 14 | Y1=[] 15 | select={} 16 | for i in range(polynum): 17 | W.append(1); 18 | for i in range(trainnum): 19 | X.append(random.uniform(-1,1)) 20 | Y.append(math.sin(X[i])) 21 | for i in range(testnum): 22 | X1.append(random.uniform(-1,1)) 23 | Y1.append(math.sin(X[i])) 24 | 25 | 26 | def qiupiandao(i,W,lamata,X,Y): 27 | suml=0 28 | for q in range(polynum): 29 | suml=suml+W[q]**2 30 | suml=math.sqrt(suml) 31 | sumh=lamata*W[i]/suml 32 | for j in range(trainnum): 33 | sum0=0 34 | for q in range(polynum): 35 | sum0=sum0+W[q]*(X[j]**q) 36 | if q==0: 37 | sum0=sum0-Y[j] 38 | sumh=sumh+sum0*i*W[i]*X[j]**(i-1)/trainnum 39 | return sumh 40 | 41 | def lostfuction(lamata,W,X,Y): 42 | suml=0 43 | for i in range(polynum): 44 | suml=suml+W[i]**2 45 | 46 | suml=lamata*math.sqrt(suml) 47 | for j in range(trainnum): 48 | sum0=0 49 | for q in range(polynum): 50 | sum0=sum0+W[q]*(X[j]**q) 51 | if q==0: 52 | sum0=sum0-Y[j] 53 | suml=suml+sum0**2/(2*trainnum) 54 | return suml 55 | 56 | def error(f, x, y): 57 | return sp.sum((f(x)-y)**2) 58 | 59 | for lamata in range(5): 60 | while True: 61 | prelost=lostfuction(lamata,W,X,Y) 62 | for i in range(polynum): 63 | W[i]=W[i]-step*qiupiandao(i,W,lamata,X,Y) 64 | postlost=lostfuction(lamata,W,X,Y) 65 | print prelost-postlost 66 | if prelost-postlost<0.001: 67 | break 68 | f1 = sp.poly1d(W) 69 | fx=sp.linspace(-1,1,100) 70 | er=error(f1, X1, Y1) 71 | plt.plot(fx, f1(fx)) 72 | #plt.legend(["lamata=%i" % lamata], loc="upper left") 73 | select[lamata]=[] 74 | select[lamata].append(er) 75 | select[lamata].append(list(W)) 76 | for i in range(polynum): 77 | W[i]=1 78 | 79 | 80 | for lamata in range(5): 81 | miner=0 82 | print lamata,select[lamata][0],select[lamata][1] 83 | if select[lamata][0] < select[miner][0]: 84 | miner=lamata 85 | 86 | 87 | 88 | print "lamata,lost,W" 89 | print miner,select[miner][0],select[miner][1] 90 | plt.plot(X,Y,'.') 91 | plt.show() 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /regulazationl2.py~: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import scipy as sp 4 | import matplotlib.pyplot as plt 5 | W=[] 6 | X=[] 7 | Y=[] 8 | 9 | step=0.0001 10 | trainnum=500 11 | testnum=100 12 | polynum=10 13 | X1=[] 14 | Y1=[] 15 | select={} 16 | for i in range(polynum): 17 | W.append(1); 18 | for i in range(trainnum): 19 | X.append(random.uniform(-1,1)) 20 | Y.append(math.sin(X[i])) 21 | for i in range(testnum): 22 | X1.append(random.uniform(-1,1)) 23 | Y1.append(math.sin(X[i])) 24 | 25 | 26 | def qiupiandao(i,W,lamata,X,Y): 27 | suml=0 28 | for q in range(polynum): 29 | suml=suml+W[q]**2 30 | suml=math.sqrt(suml) 31 | sumh=lamata*W[i]/suml 32 | for j in range(trainnum): 33 | sum0=0 34 | for q in range(polynum): 35 | sum0=sum0+W[q]*(X[j]**q) 36 | if q==0: 37 | sum0=sum0-Y[j] 38 | sumh=sumh+sum0*i*W[i]*X[j]**(i-1)/trainnum 39 | return sumh 40 | 41 | def lostfuction(lamata,W,X,Y): 42 | suml=0 43 | for i in range(polynum): 44 | suml=suml+W[i]**2 45 | 46 | suml=lamata*math.sqrt(suml) 47 | for j in range(trainnum): 48 | sum0=0 49 | for q in range(polynum): 50 | sum0=sum0+W[q]*(X[j]**q) 51 | if q==0: 52 | sum0=sum0-Y[j] 53 | suml=suml+sum0**2/(2*trainnum) 54 | return suml 55 | 56 | def error(f, x, y): 57 | return sp.sum((f(x)-y)**2) 58 | 59 | for lamata in range(5): 60 | while True: 61 | prelost=lostfuction(lamata,W,X,Y) 62 | for i in range(polynum): 63 | W[i]=W[i]-step*qiupiandao(i,W,lamata,X,Y) 64 | postlost=lostfuction(lamata,W,X,Y) 65 | print prelost-postlost 66 | if prelost-postlost<0.001: 67 | break 68 | f1 = sp.poly1d(W) 69 | fx=sp.linspace(-1,1,100) 70 | er=error(f1, X1, Y1) 71 | plt.plot(fx, f1(fx)) 72 | #plt.legend(["lamata=%i" % lamata], loc="upper left") 73 | select[lamata]=[] 74 | select[lamata].append(er) 75 | select[lamata].append(list(W)) 76 | for i in range(polynum): 77 | W[i]=0.1 78 | 79 | 80 | for lamata in range(5): 81 | miner=0 82 | print lamata,select[lamata][0],select[lamata][1] 83 | if select[lamata][0] < select[miner][0]: 84 | miner=lamata 85 | 86 | 87 | 88 | print "lamata,lost,W" 89 | print miner,select[miner][0],select[miner][1] 90 | plt.plot(X,Y,'.') 91 | plt.show() 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /em.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | isdebug = True 8 | def ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N): 9 | global X 10 | global Mu 11 | global E 12 | global Sigma 13 | global pi 14 | pi=[] 15 | pi.append(0.5) 16 | pi.append(0.5) 17 | X = np.zeros((1,N)) 18 | Mu = [] 19 | Sigma=[] 20 | Sigma.append(10) 21 | Sigma.append(10) 22 | Mu.append(1) 23 | Mu.append(100) 24 | E = np.random.random((N,k)) 25 | for i in xrange(0,N): 26 | if np.random.random(1) > 0.5: 27 | X[0,i] = np.random.normal()*Sigma1 + Mu1 28 | else: 29 | X[0,i] = np.random.normal()*Sigma2 + Mu2 30 | if isdebug: 31 | print X 32 | def e_step(k,N): 33 | global E 34 | global Mu 35 | global X 36 | global Sigma 37 | global pi 38 | for i in xrange(0,N): 39 | Denom = 0 40 | for j in xrange(0,k): 41 | Denom += pi[j]*math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 42 | for j in xrange(0,k): 43 | Numer = pi[j]*math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 44 | E[i,j] = Numer / Denom 45 | if isdebug: 46 | print E 47 | def m_step(k,N): 48 | global E 49 | global X 50 | for j in xrange(0,k): 51 | Numer = 0 52 | mosig=0 53 | Denom = 0 54 | for i in xrange(0,N): 55 | Numer += E[i,j]*X[0,i] 56 | Denom +=E[i,j] 57 | mosig +=E[i,j]*(((X[0,i])-Mu[j])**2) 58 | Mu[j] = Numer / Denom 59 | Sigma[j]=(mosig/Denom)**0.5 60 | pi[j]=Denom/N 61 | def run(Sigma1,Sigma2,Mu1,Mu2,k,N,iter_num,Epsilon): 62 | ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N) 63 | print Mu,Sigma,pi 64 | old=objec(k,N) 65 | for i in range(iter_num): 66 | e_step(k,N) 67 | m_step(k,N) 68 | print i,Mu,Sigma,pi 69 | new=objec(k,N) 70 | if abs(new-old)<2.4e-1000: 71 | break 72 | 73 | def objec(k,N): 74 | global Sigma 75 | global Mu 76 | global E 77 | global pi 78 | resul=1.0 79 | for i in xrange(0,N): 80 | num1=0.0 81 | for j in xrange(0,k): 82 | num1=num1+pi[j]*E[i,j] 83 | resul=resul*num1 84 | print resul 85 | return resul 86 | 87 | if __name__ == '__main__': 88 | run(15,8,20,70,2,1000,10,0.0001) 89 | plt.hist(X[0,:],50) 90 | plt.show() 91 | -------------------------------------------------------------------------------- /lr.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import random 7 | dim=5 8 | num=1000 9 | testnum=500 10 | sigma=[] 11 | u1=[] 12 | u0=[] 13 | X=[] 14 | W=[] 15 | Xtest=[] 16 | step=0.0001 17 | Epsilon=0.01 18 | def ini(sigma,u1,u0,X,Xtest,W): 19 | for i in range(dim): 20 | sigma.append(0.5) 21 | u1.append(random.random()) 22 | u0.append(random.random()) 23 | for datanum in range(num): 24 | a=[] 25 | b=random.random() 26 | if b>0.5: 27 | for i in range(dim): 28 | a.append(np.random.normal()*sigma[i] + u1[i]) 29 | a.append(1) 30 | else: 31 | for i in range(dim): 32 | a.append(np.random.normal()*sigma[i] + u0[i]) 33 | a.append(0) 34 | X.append(a) 35 | for datanum in range(testnum): 36 | a=[] 37 | b=random.random() 38 | if b>0.5: 39 | for i in range(dim): 40 | a.append(np.random.normal()*sigma[i] + u1[i]) 41 | a.append(1) 42 | else: 43 | for i in range(dim): 44 | a.append(np.random.normal()*sigma[i] + u0[i]) 45 | a.append(0) 46 | Xtest.append(a) 47 | for i in range(dim+1): 48 | W.append(1.0) 49 | 50 | def Probr(X,W): 51 | temp=0 52 | for i in range(dim): 53 | temp=temp+X[i]*W[i] 54 | son=math.exp(W[dim]+temp) 55 | return son/(son+1) 56 | def l(W,X): 57 | su=0 58 | for datanum in range(num): 59 | temp=0 60 | for i in range(dim): 61 | temp=temp+W[i]*X[datanum][i] 62 | su=su+X[datanum][5]*(W[dim]+temp)-math.log(1+math.exp(W[dim]+temp)) 63 | return su 64 | 65 | def solute(W,X,Epsilon): 66 | while True: 67 | sum=0 68 | Wold=copy.deepcopy(W) 69 | Wa=[] 70 | for i in range(dim+1): 71 | Wa.append(0.0) 72 | for datanum in range(num): 73 | for i in range(dim): 74 | Wa[i]=Wa[i]+X[datanum][i]*(X[datanum][5]-Probr(X[datanum],W)) 75 | Wa[dim]=Wa[dim]+X[datanum][5]-Probr(X[datanum],W) 76 | 77 | for i in range(dim+1): 78 | W[i]=W[i]+step*Wa[i] 79 | print abs(l(W,X)-l(Wold,X)) 80 | if abs(l(W,X)-l(Wold,X))0.5: 92 | Y1=1 93 | else: 94 | Y1=0 95 | if X[i][5]==Y1: 96 | numright=numright+1.0 97 | print W 98 | print "right rate"+str(numright/testnum) 99 | print "right number" +str(numright) 100 | print "test number" +str(testnum) 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /lr.py~: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import random 7 | dim=5 8 | num=1000 9 | testnum=500 10 | sigma=[] 11 | u1=[] 12 | u0=[] 13 | X=[] 14 | W=[] 15 | Xtest=[] 16 | step=0.0001 17 | Epsilon=0.01 18 | def ini(sigma,u1,u0,X,Xtest,W): 19 | for i in range(dim): 20 | sigma.append(0.1) 21 | u1.append(random.random()) 22 | u0.append(random.random()) 23 | for datanum in range(num): 24 | a=[] 25 | b=random.random() 26 | if b>0.5: 27 | for i in range(dim): 28 | a.append(np.random.normal()*sigma[i] + u1[i]) 29 | a.append(1) 30 | else: 31 | for i in range(dim): 32 | a.append(np.random.normal()*sigma[i] + u0[i]) 33 | a.append(0) 34 | X.append(a) 35 | for datanum in range(testnum): 36 | a=[] 37 | b=random.random() 38 | if b>0.5: 39 | for i in range(dim): 40 | a.append(np.random.normal()*sigma[i] + u1[i]) 41 | a.append(1) 42 | else: 43 | for i in range(dim): 44 | a.append(np.random.normal()*sigma[i] + u0[i]) 45 | a.append(0) 46 | Xtest.append(a) 47 | for i in range(dim+1): 48 | W.append(1.0) 49 | 50 | def Probr(X,W): 51 | temp=0 52 | for i in range(dim): 53 | temp=temp+X[i]*W[i] 54 | son=math.exp(W[dim]+temp) 55 | return son/(son+1) 56 | def l(W,X): 57 | su=0 58 | for datanum in range(num): 59 | temp=0 60 | for i in range(dim): 61 | temp=temp+W[i]*X[datanum][i] 62 | su=su+X[datanum][5]*(W[dim]+temp)-math.log(1+math.exp(W[dim]+temp)) 63 | return su 64 | 65 | def solute(W,X,Epsilon): 66 | while True: 67 | sum=0 68 | Wold=copy.deepcopy(W) 69 | Wa=[] 70 | for i in range(dim+1): 71 | Wa.append(0.0) 72 | for datanum in range(num): 73 | for i in range(dim): 74 | Wa[i]=Wa[i]+X[datanum][i]*(X[datanum][5]-Probr(X[datanum],W)) 75 | Wa[dim]=Wa[dim]+X[datanum][5]-Probr(X[datanum],W) 76 | 77 | for i in range(dim+1): 78 | W[i]=W[i]+step*Wa[i] 79 | print abs(l(W,X)-l(Wold,X)) 80 | if abs(l(W,X)-l(Wold,X))0.5: 92 | Y1=1 93 | else: 94 | Y1=0 95 | if X[i][5]==Y1: 96 | numright=numright+1.0 97 | print W 98 | print "right rate"+str(numright/testnum) 99 | print "right number" +str(numright) 100 | print "test number" +str(testnum) 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /lrmap.py~: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import random 7 | dim=5 8 | num=1000 9 | testnum=500 10 | sigma=[] 11 | u1=[] 12 | u0=[] 13 | X=[] 14 | W=[] 15 | Xtest=[] 16 | step=0.0001 17 | Epsilon=0.01 18 | def ini(sigma,u1,u0,X,Xtest,W): 19 | for i in range(dim): 20 | sigma.append(0.3) 21 | u1.append(random.random()) 22 | u0.append(random.random()) 23 | for datanum in range(num): 24 | a=[] 25 | b=random.random() 26 | if b>0.5: 27 | for i in range(dim): 28 | a.append(np.random.normal()*sigma[i] + u1[i]) 29 | a.append(1) 30 | else: 31 | for i in range(dim): 32 | a.append(np.random.normal()*sigma[i] + u0[i]) 33 | a.append(0) 34 | X.append(a) 35 | for datanum in range(testnum): 36 | a=[] 37 | b=random.random() 38 | if b>0.5: 39 | for i in range(dim): 40 | a.append(np.random.normal()*sigma[i] + u1[i]) 41 | a.append(1) 42 | else: 43 | for i in range(dim): 44 | a.append(np.random.normal()*sigma[i] + u0[i]) 45 | a.append(0) 46 | Xtest.append(a) 47 | for i in range(dim+1): 48 | W.append(1.0) 49 | 50 | def Probr(X,W): 51 | temp=0 52 | for i in range(dim): 53 | temp=temp+X[i]*W[i] 54 | son=math.exp(W[dim]+temp) 55 | return son/(son+1) 56 | def l(W,X): 57 | su=0 58 | for datanum in range(num): 59 | temp=0 60 | for i in range(dim): 61 | temp=temp+W[i]*X[datanum][i] 62 | su=su+X[datanum][5]*(W[dim]+temp)-math.log(1+math.exp(W[dim]+temp)) 63 | return su 64 | 65 | def solute(W,X,Epsilon): 66 | while True: 67 | sum=0 68 | Wold=copy.deepcopy(W) 69 | Wa=[] 70 | for i in range(dim+1): 71 | Wa.append(0.0) 72 | for datanum in range(num): 73 | for i in range(dim): 74 | Wa[i]=Wa[i]+X[datanum][i]*(X[datanum][5]-Probr(X[datanum],W)) 75 | Wa[dim]=Wa[dim]+X[datanum][5]-Probr(X[datanum],W) 76 | 77 | for i in range(dim+1): 78 | W[i]=W[i]+step*Wa[i] 79 | print abs(l(W,X)-l(Wold,X)) 80 | if abs(l(W,X)-l(Wold,X))0.5: 92 | Y1=1 93 | else: 94 | Y1=0 95 | if X[i][5]==Y1: 96 | numright=numright+1.0 97 | print W 98 | print "right rate"+str(numright/testnum) 99 | print "right number" +str(numright) 100 | print "test number" +str(testnum) 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /em.py~: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | isdebug = True 8 | def ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N): 9 | global X 10 | global Mu 11 | global Expectations 12 | global Sigma 13 | global pi 14 | pi=[] 15 | pi.append(0.5) 16 | pi.append(0.5) 17 | X = np.zeros((1,N)) 18 | Mu = [] 19 | Sigma=[] 20 | Sigma.append(10) 21 | Sigma.append(10) 22 | Mu.append(1) 23 | Mu.append(100) 24 | Expectations = np.random.random((N,k)) 25 | for i in xrange(0,N): 26 | if np.random.random(1) > 0.5: 27 | X[0,i] = np.random.normal()*Sigma1 + Mu1 28 | else: 29 | X[0,i] = np.random.normal()*Sigma2 + Mu2 30 | if isdebug: 31 | print X 32 | def e_step(k,N): 33 | global Expectations 34 | global Mu 35 | global X 36 | global Sigma 37 | global pi 38 | for i in xrange(0,N): 39 | Denom = 0 40 | for j in xrange(0,k): 41 | Denom += pi[j]*math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 42 | for j in xrange(0,k): 43 | Numer = pi[j]*math.exp((-1/(2*(float(Sigma[j]**2))))*(float(X[0,i]-Mu[j]))**2) 44 | Expectations[i,j] = Numer / Denom 45 | if isdebug: 46 | print Expectations 47 | def m_step(k,N): 48 | global Expectations 49 | global X 50 | for j in xrange(0,k): 51 | Numer = 0 52 | mosig=0 53 | Denom = 0 54 | for i in xrange(0,N): 55 | Numer += Expectations[i,j]*X[0,i] 56 | Denom +=Expectations[i,j] 57 | mosig +=Expectations[i,j]*(((X[0,i])-Mu[j])**2) 58 | Mu[j] = Numer / Denom 59 | Sigma[j]=(mosig/Denom)**0.5 60 | pi[j]=Denom/N 61 | def run(Sigma1,Sigma2,Mu1,Mu2,k,N,iter_num,Epsilon): 62 | ini_data(Sigma1,Sigma2,Mu1,Mu2,k,N) 63 | print Mu,Sigma,pi 64 | old=objec(k,N) 65 | for i in range(iter_num): 66 | e_step(k,N) 67 | m_step(k,N) 68 | print i,Mu,Sigma,pi 69 | new=objec(k,N) 70 | if abs(new-old)<2.4e-1000: 71 | break 72 | 73 | def objec(k,N): 74 | global Sigma 75 | global Mu 76 | global Expectations 77 | global pi 78 | resul=1.0 79 | for i in xrange(0,N): 80 | num1=0.0 81 | for j in xrange(0,k): 82 | num1=num1+pi[j]*Expectations[i,j] 83 | resul=resul*num1 84 | print resul 85 | return resul 86 | 87 | if __name__ == '__main__': 88 | run(15,8,20,70,2,1000,10,0.0001) 89 | plt.hist(X[0,:],50) 90 | plt.show() 91 | -------------------------------------------------------------------------------- /lrmap.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import math 3 | import copy 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import random 7 | dim=5 8 | num=1000 9 | testnum=500 10 | sigma=[] 11 | u1=[] 12 | u0=[] 13 | X=[] 14 | W=[] 15 | Xtest=[] 16 | step=0.0001 17 | Epsilon=0.01 18 | lamata=0.01 19 | def ini(sigma,u1,u0,X,Xtest,W): 20 | for i in range(dim): 21 | sigma.append(0.3) 22 | u1.append(random.random()) 23 | u0.append(random.random()) 24 | for datanum in range(num): 25 | a=[] 26 | b=random.random() 27 | if b>0.5: 28 | for i in range(dim): 29 | a.append(np.random.normal()*sigma[i] + u1[i]) 30 | a.append(1) 31 | else: 32 | for i in range(dim): 33 | a.append(np.random.normal()*sigma[i] + u0[i]) 34 | a.append(0) 35 | X.append(a) 36 | for datanum in range(testnum): 37 | a=[] 38 | b=random.random() 39 | if b>0.5: 40 | for i in range(dim): 41 | a.append(np.random.normal()*sigma[i] + u1[i]) 42 | a.append(1) 43 | else: 44 | for i in range(dim): 45 | a.append(np.random.normal()*sigma[i] + u0[i]) 46 | a.append(0) 47 | Xtest.append(a) 48 | for i in range(dim+1): 49 | W.append(1.0) 50 | 51 | def Probr(X,W): 52 | temp=0 53 | for i in range(dim): 54 | temp=temp+X[i]*W[i] 55 | son=math.exp(W[dim]+temp) 56 | return son/(son+1) 57 | def l(W,X): 58 | su=0 59 | for datanum in range(num): 60 | temp=0 61 | for i in range(dim): 62 | temp=temp+W[i]*X[datanum][i] 63 | su=su+X[datanum][5]*(W[dim]+temp)-math.log(1+math.exp(W[dim]+temp)) 64 | return su 65 | 66 | def solute(W,X,Epsilon): 67 | while True: 68 | sum=0 69 | Wold=copy.deepcopy(W) 70 | Wa=[] 71 | for i in range(dim+1): 72 | Wa.append(0.0) 73 | for datanum in range(num): 74 | for i in range(dim): 75 | Wa[i]=Wa[i]+X[datanum][i]*(X[datanum][5]-Probr(X[datanum],W)) 76 | Wa[dim]=Wa[dim]+X[datanum][5]-Probr(X[datanum],W) 77 | 78 | for i in range(dim+1): 79 | W[i]=W[i]+step*Wa[i]-step*lamata*Wa[i] 80 | print abs(l(W,X)-l(Wold,X)) 81 | if abs(l(W,X)-l(Wold,X))0.5: 93 | Y1=1 94 | else: 95 | Y1=0 96 | if X[i][5]==Y1: 97 | numright=numright+1.0 98 | print W 99 | print "right rate"+str(numright/testnum) 100 | print "right number" +str(numright) 101 | print "test number" +str(testnum) 102 | 103 | 104 | 105 | 106 | 107 | 108 | --------------------------------------------------------------------------------