├── 10 Dimension Reduction ├── ecoli.py ├── factoranalysis.py ├── floyd.py ├── iris.py ├── isomap.py ├── kernelpca.py ├── kpcademo.py ├── lda.py ├── lle.py ├── pca.py └── pcademo.py ├── 11 Optimisation ├── CG.py ├── LevenbergMarquardt.py ├── LevenbergMarquardt_leastsq.py ├── Newton.py ├── TSP.py └── steepest.py ├── 12 Evolutionary ├── PBIL.py ├── exhaustiveKnapsack.py ├── fourpeaks.py ├── ga.py ├── greedyKnapsack.py ├── knapsack.py └── run_ga.py ├── 13 Reinforcement ├── SARSA.py ├── SARSA_cliff.py ├── TDZero.py └── TDZero_cliff.py ├── 14 MCMC ├── BoxMuller.py ├── Gibbs.py ├── MH.py ├── SIR.py ├── importancesampling.py ├── lcg.py └── rejectionsampling.py ├── 15 Graphical Models ├── Gibbs.py ├── HMM.py ├── Kalman.py ├── MRF.py ├── graphdemo.py └── world.png ├── 2 Linear ├── auto-mpg.py ├── linreg.py ├── linreg_logic_eg.py ├── logic.py ├── pcn.py ├── pcn_logic_eg.py └── pima.py ├── 3 MLP ├── PNOz.py ├── PNoz.dat ├── iris.py ├── iris_proc.data ├── logic.py ├── mlp.py └── sinewave.py ├── 4 RBF ├── iris.py ├── least_squares.py └── rbf.py ├── 6 Trees ├── dtree.py ├── party.data └── party.py ├── 7 Committee ├── bagging.py ├── boost.py ├── car.data ├── car.py ├── dtw.py └── party.py ├── 8 Probability ├── GMM.py ├── gaussian.py ├── kdtree.py ├── knn.py ├── knnSmoother.py ├── plotGaussian.py └── ruapehu.dat └── 9 Unsupervised ├── iris.py ├── kmeans.py ├── kmeansnet.py ├── moredemos.py ├── shortecoli.data ├── som.py └── somdemo.py /10 Dimension Reduction/ecoli.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # Simple example of LDA, PCA, and kernel PCA, on the Wine and e-coli datasets 12 | from pylab import * 13 | from numpy import * 14 | 15 | wine = loadtxt('../9 Unsupervised/wine.data',delimiter=',') 16 | 17 | labels = wine[:,0] 18 | data = wine[:,1:] 19 | data -= mean(data,axis=0) 20 | data /= data.max(axis=0) 21 | 22 | #ecoli = loadtxt('../9 Unsupervised/shortecoli.data') 23 | #labels = ecoli[:,7:] 24 | #data = ecoli[:,:7] 25 | #data -= mean(data,axis=0) 26 | #data /= data.max(axis=0) 27 | 28 | order = range(shape(data)[0]) 29 | random.shuffle(order) 30 | data = data[order] 31 | w0 = where(labels==1) 32 | w1 = where(labels==2) 33 | w2 = where(labels==3) 34 | 35 | import lda 36 | newData,w = lda.lda(data,labels,2) 37 | 38 | plot(data[w0,0],data[w0,1],'ok') 39 | plot(data[w1,0],data[w1,1],'^k') 40 | plot(data[w2,0],data[w2,1],'vk') 41 | axis([-1.5,1.8,-1.5,1.8]) 42 | axis('off') 43 | figure(2) 44 | plot(newData[w0,0],newData[w0,1],'ok') 45 | plot(newData[w1,0],newData[w1,1],'^k') 46 | plot(newData[w2,0],newData[w2,1],'vk') 47 | axis([-1.5,1.8,-1.5,1.8]) 48 | axis('off') 49 | 50 | import pca 51 | x,y,evals,evecs = pca.pca(data,2) 52 | figure(3) 53 | plot(y[w0,0],y[w0,1],'ok') 54 | plot(y[w1,0],y[w1,1],'^k') 55 | plot(y[w2,0],y[w2,1],'vk') 56 | axis('off') 57 | 58 | import kernelpca 59 | newData = kernelpca.kernelpca(data,'gaussian',2) 60 | figure(4) 61 | plot(newData[w0,0],newData[w0,1],'ok') 62 | plot(newData[w1,0],newData[w1,1],'^k') 63 | plot(newData[w2,0],newData[w2,1],'vk') 64 | axis('off') 65 | 66 | show() 67 | -------------------------------------------------------------------------------- /10 Dimension Reduction/factoranalysis.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The Factor Analysis algorithm 12 | from pylab import * 13 | from numpy import * 14 | 15 | def factoranalysis(y,nRedDim): 16 | Ndata = shape(y)[0] 17 | N = shape(y)[1] 18 | 19 | y = y-y.mean(axis=0) 20 | C = cov(transpose(y)) 21 | Cd = C.diagonal() 22 | Psi = Cd 23 | scaling = linalg.det(C)**(1./N) 24 | 25 | W = random.normal(0,sqrt(scaling/nRedDim),(N,nRedDim)) 26 | 27 | nits = 1000 28 | oldL = -inf 29 | 30 | for i in range(nits): 31 | 32 | # E-step 33 | A = dot(W,transpose(W)) + diag(Psi) 34 | logA = log(abs(linalg.det(A))) 35 | A = linalg.inv(A) 36 | 37 | WA = dot(transpose(W),A) 38 | WAC = dot(WA,C) 39 | Exx = eye(nRedDim) - dot(WA,W) + dot(WAC,transpose(WA)) 40 | 41 | # M-step 42 | W = dot(transpose(WAC),linalg.inv(Exx)) 43 | Psi = Cd - (dot(W,WAC)).diagonal() 44 | #Sigma1 = (dot(transpose(y),y) - dot(W,WAC)).diagonal()/Ndata 45 | 46 | tAC = (A*transpose(C)).sum() 47 | 48 | L = -N/2*log(2.*pi) -0.5*logA - 0.5*tAC 49 | if (L-oldL)<(1e-4): 50 | print "Stop",i 51 | break 52 | print L 53 | oldL = L 54 | A = linalg.inv(dot(W,transpose(W))+diag(Psi)) 55 | Ex = dot(transpose(A),W) 56 | 57 | return dot(y,Ex) 58 | 59 | data = array([[0.1,0.1],[0.2,0.2],[0.3,0.3],[0.35,0.3],[0.4,0.4],[0.6,0.4],[0.7,0.45],[0.75,0.4],[0.8,0.35]]) 60 | newData = factoranalysis(data,2) 61 | plot(newData[:,0],newData[:,1],'.') 62 | show() 63 | -------------------------------------------------------------------------------- /10 Dimension Reduction/floyd.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | from numpy import * 12 | import time 13 | 14 | def floyd(): 15 | 16 | ndata = 100 17 | neighbours = zeros((ndata,10)) 18 | g = random.rand(ndata,ndata) 19 | for i in range(ndata): 20 | neighbours[i,:] = random.randint(0,100,10) 21 | 22 | t0 = time.time() 23 | print "Floyd's algorithm" 24 | for k in range(ndata): 25 | for i in range(ndata): 26 | for j in range(ndata): 27 | if g[i,j] > g[i,k] + g[k,j]: 28 | g[i,j] = g[i,k] + g[k,j] 29 | 30 | t1 = time.time() 31 | print "Complete" 32 | print t1-t0 33 | x = g.copy() 34 | 35 | t2 = time.time() 36 | q = g.copy() 37 | for i in range(ndata): 38 | for j in range(ndata): 39 | k = argmin(q[i,:]) 40 | while not(isnan(q[i,k])): 41 | q[i,k] = nan 42 | for l in neighbours[k,:]: 43 | possible = q[i,l] + q[l,k] 44 | if possible < q[i,k]: 45 | g[i,k] = possible 46 | k = argmin(q[i,:]) 47 | t3 = time.time() 48 | y = g 49 | print t3-t2 50 | return x,y 51 | -------------------------------------------------------------------------------- /10 Dimension Reduction/iris.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # Various dimensionality reductions running on the Iris dataset 12 | from pylab import * 13 | from numpy import * 14 | 15 | iris = loadtxt('../3 MLP/iris_proc.data',delimiter=',') 16 | iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0) 17 | imax = concatenate((iris.max(axis=0)*ones((1,5)),iris.min(axis=0)*ones((1,5))),axis=0).max(axis=0) 18 | iris[:,:4] = iris[:,:4]/imax[:4] 19 | labels = iris[:,4] 20 | 21 | order = range(shape(iris)[0]) 22 | random.shuffle(order) 23 | iris = iris[order,:] 24 | labels = labels[order,:] 25 | 26 | w0 = where(labels==0) 27 | w1 = where(labels==1) 28 | w2 = where(labels==2) 29 | 30 | #import lda 31 | #newData,w = lda.lda(iris,labels,2) 32 | # 33 | #plot(iris[w0,0],iris[w0,1],'ok') 34 | #plot(iris[w1,0],iris[w1,1],'^k') 35 | #plot(iris[w2,0],iris[w2,1],'vk') 36 | #axis([-1.5,1.8,-1.5,1.8]) 37 | #axis('off') 38 | #figure(2) 39 | #plot(newData[w0,0],newData[w0,1],'ok') 40 | #plot(newData[w1,0],newData[w1,1],'^k') 41 | #plot(newData[w2,0],newData[w2,1],'vk') 42 | #axis([-1.5,1.8,-1.5,1.8]) 43 | #axis('off') 44 | # 45 | #import pca 46 | #x,y,evals,evecs = pca.pca(iris,2) 47 | #figure(3) 48 | #plot(y[w0,0],y[w0,1],'ok') 49 | #plot(y[w1,0],y[w1,1],'^k') 50 | #plot(y[w2,0],y[w2,1],'vk') 51 | #axis('off') 52 | 53 | #import kernelpca 54 | #newData = kernelpca.kernelpca(iris,'gaussian',2) 55 | #figure(4) 56 | #plot(newData[w0,0],newData[w0,1],'ok') 57 | #plot(newData[w1,0],newData[w1,1],'^k') 58 | #plot(newData[w2,0],newData[w2,1],'vk') 59 | #axis('off') 60 | 61 | #import factoranalysis 62 | #newData = factoranalysis.factoranalysis(iris,2) 63 | ##print newData 64 | ##figure(5) 65 | #plot(newData[w0,0],newData[w0,1],'ok') 66 | #plot(newData[w1,0],newData[w1,1],'^k') 67 | #plot(newData[w2,0],newData[w2,1],'vk') 68 | #axis('off') 69 | 70 | #import lle 71 | #print shape(iris) 72 | #a,b,newData = lle.lle(iris,2,12) 73 | #print shape(newData) 74 | #print newData[w0,:] 75 | #print "---" 76 | #print newData[w1,:] 77 | #print "---" 78 | #print newData[w2,:] 79 | # 80 | #plot(newData[w0,0],newData[w0,1],'ok') 81 | #plot(newData[w1,0],newData[w1,1],'^k') 82 | #plot(newData[w2,0],newData[w2,1],'vk') 83 | #axis('off') 84 | 85 | import isomap 86 | print labels 87 | newData,newLabels = isomap.isomap(iris,2,100) 88 | print shape(newData) 89 | print newLabels 90 | w0 = where(newLabels==0) 91 | w1 = where(newLabels==1) 92 | w2 = where(newLabels==2) 93 | plot(newData[w0,0],newData[w0,1],'ok') 94 | plot(newData[w1,0],newData[w1,1],'^k') 95 | plot(newData[w2,0],newData[w2,1],'vk') 96 | axis('off') 97 | 98 | show() 99 | -------------------------------------------------------------------------------- /10 Dimension Reduction/isomap.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The Isomap algorithm 12 | from pylab import * 13 | from numpy import * 14 | 15 | def swissroll(): 16 | # Make the swiss roll dataset 17 | N = 1000 18 | noise = 0.05 19 | 20 | t = 3.*math.pi/2 * (1. + 2.*random.rand(1,N)) 21 | h = 21. * random.rand(1,N) 22 | data = concatenate((t*cos(t),h,t*sin(t))) + noise*random.randn(3,N) 23 | return transpose(data), squeeze(t) 24 | 25 | def isomap(data,newdim=2,K=12,labels=None): 26 | 27 | ndata = shape(data)[0] 28 | ndim = shape(data)[1] 29 | d = zeros((ndata,ndata),dtype=float) 30 | 31 | # Compute the distance matrix 32 | # Inefficient -- not matrices 33 | for i in range(ndata): 34 | for j in range(i+1,ndata): 35 | for k in range(ndim): 36 | d[i,j] += (data[i,k] - data[j,k])**2 37 | d[i,j] = sqrt(d[i,j]) 38 | d[j,i] = d[i,j] 39 | 40 | # K-nearest neighbours 41 | indices = d.argsort() 42 | #notneighbours = indices[:,K+1:] 43 | neighbours = indices[:,:K+1] 44 | # Alternative: epsilon 45 | # epsilon = 0.1 46 | #neighbours = where(d<=epsilon) 47 | #notneighbours = where(d>epsilon) 48 | 49 | h = ones((ndata,ndata),dtype=float)*inf 50 | for i in range(ndata): 51 | h[i,neighbours[i,:]] = d[i,neighbours[i,:]] 52 | 53 | # Compute the full distance matrix over all paths 54 | print "Floyd's algorithm" 55 | for k in range(ndata): 56 | for i in range(ndata): 57 | for j in range(ndata): 58 | if h[i,j] > h[i,k] + h[k,j]: 59 | h[i,j] = h[i,k] + h[k,j] 60 | 61 | # print "Dijkstra's algorithm" 62 | # q = h.copy() 63 | # for i in range(ndata): 64 | # for j in range(ndata): 65 | # k = argmin(q[i,:]) 66 | # while not(isinf(q[i,k])): 67 | # q[i,k] = inf 68 | # for l in neighbours[k,:]: 69 | # possible = h[i,l] + h[l,k] 70 | # if possible < h[i,k]: 71 | # h[i,k] = possible 72 | # k = argmin(q[i,:]) 73 | # print "Complete" 74 | 75 | # remove lines full of infs 76 | x = isinf(h[:,0]).nonzero() 77 | if size(x)>0: 78 | print x 79 | if x[0][0]>0: 80 | new = h[0:x[0][0],:] 81 | newlabels = labels[0:x[0][0]] 82 | start = 1 83 | else: 84 | new = h[x[0][0]+1,:] 85 | newlabels = labels[x[0][0]+1] 86 | start = 2 87 | for i in range(start,size(x)): 88 | new = concatenate((new,h[x[0][i-1]+1:x[0][i],:]),axis=0) 89 | newlabels = concatenate((newlabels,labels[x[0][i-1]+1:x[0][i]]),axis=0) 90 | new = concatenate((new,h[x[0][i]+1:,:]),axis=0) 91 | newlabels = concatenate((newlabels,labels[x[0][i]+1:]),axis=0) 92 | 93 | new2 = new[:,0:x[0][0]] 94 | if x[0][0]>0: 95 | new2 = new[:,0:x[0][0]] 96 | start = 1 97 | else: 98 | new2 = new[:,x[0][0]+1] 99 | start = 2 100 | for i in range(start,size(x)): 101 | new2 = concatenate((new2,new[:,x[0][i-1]+1:x[0][i]]),axis=1) 102 | new2 = concatenate((new2,new[:,x[0][i]+1:]),axis=1) 103 | 104 | g = new2.copy() 105 | ndata = ndata - size(x) 106 | else: 107 | g = h.copy() 108 | newlabels = labels 109 | 110 | # Map computations, following by the dimensionality reduction 111 | M = -0.5*(g**2 - transpose(sum(g*g,axis=0) * ones((ndata,1))/ndata) - ones((ndata,1))* sum(g*g,axis=0)/ndata + sum(sum(g*g))/ndata**2) 112 | 113 | eval,evec = linalg.eig(M) 114 | eval = real(eval) 115 | ind = argsort(eval) 116 | eval = real(diag(eval[ind[-1::-1]])) 117 | evec = evec[:,ind[-1::-1]] 118 | y = real(dot(evec,transpose((sqrt(eval))))) 119 | print shape(y) 120 | print shape(eval), shape(evec) 121 | return y, newlabels 122 | 123 | data,t = swissroll() 124 | y,u = isomap(data) 125 | 126 | t -= t.min() 127 | t /= t.max() 128 | #scatter(y[:,0],y[:,1],c=t,cmap=cm.jet) 129 | scatter(y[:,1],y[:,2],s=50,c=t,cmap=cm.gray) 130 | #scatter(data[:,0],data[:,1],s=50,c=t,cmap=cm.gray) 131 | 132 | show() 133 | -------------------------------------------------------------------------------- /10 Dimension Reduction/kernelpca.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The Kernel PCA algorithm 12 | 13 | from pylab import * 14 | from numpy import * 15 | 16 | def kernelmatrix(data,kernel,param=array([3,2])): 17 | 18 | if kernel=='linear': 19 | return dot(data,transpose(data)) 20 | elif kernel=='gaussian': 21 | K = zeros((shape(data)[0],shape(data)[0])) 22 | for i in range(shape(data)[0]): 23 | for j in range(i+1,shape(data)[0]): 24 | K[i,j] = sum((data[i,:]-data[j,:])**2) 25 | K[j,i] = K[i,j] 26 | return exp(-K**2/(2*param[0]**2)) 27 | elif kernel=='polynomial': 28 | return (dot(data,transpose(data))+param[0])**param[1] 29 | 30 | def kernelpca(data,kernel,redDim): 31 | 32 | nData = shape(data)[0] 33 | nDim = shape(data)[1] 34 | 35 | K = kernelmatrix(data,kernel) 36 | 37 | # Compute the transformed data 38 | D = sum(K,axis=0)/nData 39 | E = sum(D)/nData 40 | J = ones((nData,1))*D 41 | K = K - J - transpose(J) + E*ones((nData,nData)) 42 | 43 | # Perform the dimensionality reduction 44 | evals,evecs = linalg.eig(K) 45 | indices = argsort(evals) 46 | indices = indices[::-1] 47 | evecs = evecs[:,indices[:redDim]] 48 | evals = evals[indices[:redDim]] 49 | 50 | sqrtE = zeros((len(evals),len(evals))) 51 | for i in range(len(evals)): 52 | sqrtE[i,i] = sqrt(evals[i]) 53 | 54 | #print shape(sqrtE), shape(data) 55 | newData = transpose(dot(sqrtE,transpose(evecs))) 56 | 57 | return newData 58 | 59 | #data = array([[0.1,0.1],[0.2,0.2],[0.3,0.3],[0.35,0.3],[0.4,0.4],[0.6,0.4],[0.7,0.45],[0.75,0.4],[0.8,0.35]]) 60 | #newData = kernelpca(data,'gaussian',2) 61 | #plot(data[:,0],data[:,1],'o',newData[:,0],newData[:,0],'.') 62 | #show() 63 | -------------------------------------------------------------------------------- /10 Dimension Reduction/kpcademo.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # Demonstration of PCA and kernel PCA on the circular dataset 12 | from pylab import * 13 | from numpy import * 14 | 15 | import pca 16 | import kernelpca 17 | 18 | data = zeros((150,2)) 19 | 20 | theta = random.normal(0,pi,50) 21 | r = random.normal(0,0.1,50) 22 | data[0:50,0] = r*cos(theta) 23 | data[0:50,1] = r*sin(theta) 24 | 25 | theta = random.normal(0,pi,50) 26 | r = random.normal(2,0.1,50) 27 | data[50:100,0] = r*cos(theta) 28 | data[50:100,1] = r*sin(theta) 29 | 30 | theta = random.normal(0,pi,50) 31 | r = random.normal(5,0.1,50) 32 | data[100:150,0] = r*cos(theta) 33 | data[100:150,1] = r*sin(theta) 34 | 35 | figure() 36 | plot(data[:50,0],data[:50,1],'ok') 37 | plot(data[50:100,0],data[50:100,1],'^k') 38 | plot(data[100:150,0],data[100:150,1],'vk') 39 | title('Original dataset') 40 | 41 | x,y,evals,evecs = pca.pca(data,2) 42 | figure() 43 | plot(x[:50,0],x[:50,1],'ok') 44 | plot(x[50:100,0],x[50:100,1],'^k') 45 | plot(x[100:150,0],x[100:150,1],'vk') 46 | title('Reconstructed points after PCA') 47 | 48 | figure() 49 | y = kernelpca.kernelpca(data,'gaussian',2) 50 | plot(y[:50,0],y[:50,1],'ok') 51 | plot(y[50:100,0],y[50:100,1],'^k') 52 | plot(y[100:150,0],y[100:150,1],'vk') 53 | title('Reconstructed points after kernel PCA') 54 | 55 | show() 56 | -------------------------------------------------------------------------------- /10 Dimension Reduction/lda.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The LDA algorithm 12 | 13 | from pylab import * 14 | from numpy import * 15 | from scipy import linalg as la 16 | 17 | def lda(data,labels,redDim): 18 | 19 | # Centre data 20 | data -= data.mean(axis=0) 21 | nData = shape(data)[0] 22 | nDim = shape(data)[1] 23 | 24 | Sw = zeros((nDim,nDim)) 25 | Sb = zeros((nDim,nDim)) 26 | 27 | C = cov(transpose(data)) 28 | 29 | # Loop over classes 30 | classes = unique(labels) 31 | for i in range(len(classes)): 32 | # Find relevant datapoints 33 | indices = squeeze(where(labels==classes[i])) 34 | d = squeeze(data[indices,:]) 35 | classcov = cov(transpose(d)) 36 | Sw += float(shape(indices)[0])/nData * classcov 37 | 38 | Sb = C - Sw 39 | # Now solve for W 40 | # Compute eigenvalues, eigenvectors and sort into order 41 | #evals,evecs = linalg.eig(dot(linalg.pinv(Sw),sqrt(Sb))) 42 | evals,evecs = la.eig(Sw,Sb) 43 | indices = argsort(evals) 44 | indices = indices[::-1] 45 | evecs = evecs[:,indices] 46 | evals = evals[indices] 47 | w = evecs[:,:redDim] 48 | #print evals, w 49 | 50 | newData = dot(data,w) 51 | return newData,w 52 | 53 | #data = array([[0.1,0.1],[0.2,0.2],[0.3,0.3],[0.35,0.3],[0.4,0.4],[0.6,0.4],[0.7,0.45],[0.75,0.4],[0.8,0.35]]) 54 | #labels = array([0,0,0,0,0,1,1,1,1]) 55 | #newData,w = lda(data,labels,2) 56 | #print w 57 | #plot(data[:,0],data[:,1],'o',newData[:,0],newData[:,0],'.') 58 | #show() 59 | -------------------------------------------------------------------------------- /10 Dimension Reduction/lle.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The Locally Linear Embedding algorithm, and the swissroll example 12 | from pylab import * 13 | from numpy import * 14 | 15 | def swissroll(): 16 | # Make the swiss roll dataset 17 | N = 1000 18 | noise = 0.05 19 | 20 | t = 3*math.pi/2 * (1 + 2*random.rand(1,N)) 21 | h = 21 * random.rand(1,N) 22 | data = concatenate((t*cos(t),h,t*sin(t))) + noise*random.randn(3,N) 23 | return transpose(data), squeeze(t) 24 | 25 | def lle(data,nRedDim=2,K=12): 26 | 27 | ndata = shape(data)[0] 28 | ndim = shape(data)[1] 29 | d = zeros((ndata,ndata),dtype=float) 30 | 31 | # Inefficient -- not matrices 32 | for i in range(ndata): 33 | for j in range(i+1,ndata): 34 | for k in range(ndim): 35 | d[i,j] += (data[i,k] - data[j,k])**2 36 | d[i,j] = sqrt(d[i,j]) 37 | d[j,i] = d[i,j] 38 | 39 | indices = d.argsort(axis=1) 40 | neighbours = indices[:,1:K+1] 41 | 42 | W = zeros((K,ndata),dtype=float) 43 | 44 | for i in range(ndata): 45 | Z = data[neighbours[i,:],:] - kron(ones((K,1)),data[i,:]) 46 | C = dot(Z,transpose(Z)) 47 | C = C+identity(K)*1e-3*trace(C) 48 | W[:,i] = transpose(linalg.solve(C,ones((K,1)))) 49 | W[:,i] = W[:,i]/sum(W[:,i]) 50 | 51 | M = eye(ndata,dtype=float) 52 | for i in range(ndata): 53 | w = transpose(ones((1,shape(W)[0]))*transpose(W[:,i])) 54 | j = neighbours[i,:] 55 | #print shape(w), shape(dot(w,transpose(w))), shape(M[i,j]) 56 | ww = dot(w,transpose(w)) 57 | for k in range(K): 58 | M[i,j[k]] -= w[k] 59 | M[j[k],i] -= w[k] 60 | for l in range(K): 61 | M[j[k],j[l]] += ww[k,l] 62 | 63 | evals,evecs = linalg.eig(M) 64 | ind = argsort(evals) 65 | y = evecs[:,ind[1:nRedDim+1]]*sqrt(ndata) 66 | return evals,evecs,y 67 | 68 | data,t = swissroll() 69 | evals,evecs,y = lle(data) 70 | 71 | t -= t.min() 72 | t /= t.max() 73 | scatter(y[:,0],y[:,1],s=50,c=t,cmap=cm.gray) 74 | axis('off') 75 | show() 76 | -------------------------------------------------------------------------------- /10 Dimension Reduction/pca.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # An algorithm to compute PCA. Not as fast as the NumPy implementation 12 | from pylab import * 13 | from numpy import * 14 | 15 | def pca(data,nRedDim=0,normalise=1): 16 | 17 | # Centre data 18 | m = mean(data,axis=0) 19 | data -= m 20 | 21 | # Covariance matrix 22 | C = cov(transpose(data)) 23 | 24 | # Compute eigenvalues and sort into descending order 25 | evals,evecs = linalg.eig(C) 26 | indices = argsort(evals) 27 | indices = indices[::-1] 28 | evecs = evecs[:,indices] 29 | evals = evals[indices] 30 | 31 | if nRedDim>0: 32 | evecs = evecs[:,:nRedDim] 33 | 34 | if normalise: 35 | for i in range(shape(evecs)[1]): 36 | evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i]) 37 | 38 | # Produce the new data matrix 39 | x = dot(transpose(evecs),transpose(data)) 40 | # Compute the original data again 41 | y=transpose(dot(evecs,x))+m 42 | return x,y,evals,evecs 43 | 44 | -------------------------------------------------------------------------------- /10 Dimension Reduction/pcademo.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # A simple example of PCA 12 | from pylab import * 13 | from numpy import * 14 | 15 | import pca 16 | 17 | x = random.normal(5,.5,1000) 18 | y = random.normal(3,1,1000) 19 | a = x*cos(pi/4) + y*sin(pi/4) 20 | b = -x*sin(pi/4) + y*cos(pi/4) 21 | 22 | plot(a,b,'.') 23 | xlabel('x') 24 | ylabel('y') 25 | title('Original dataset') 26 | data = zeros((1000,2)) 27 | data[:,0] = a 28 | data[:,1] = b 29 | 30 | x,y,evals,evecs = pca.pca(data,1) 31 | print y 32 | figure() 33 | plot(y[:,0],y[:,1],'.') 34 | xlabel('x') 35 | ylabel('y') 36 | title('Reconstructed data after PCA') 37 | show() 38 | -------------------------------------------------------------------------------- /11 Optimisation/CG.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The conjugate gradients algorithm 12 | from numpy import * 13 | 14 | def Jacobian(x): 15 | #return array([.4*x[0],2*x[1]]) 16 | return array([x[0], 0.4*x[1], 1.2*x[2]]) 17 | 18 | def Hessian(x): 19 | #return array([[.2,0],[0,1]]) 20 | return array([[1,0,0],[0,0.4,0],[0,0,1.2]]) 21 | 22 | def CG(x0): 23 | 24 | i=0 25 | k=0 26 | 27 | r = -Jacobian(x0) 28 | p=r 29 | 30 | betaTop = dot(r.transpose(),r) 31 | beta0 = betaTop 32 | 33 | iMax = 3 34 | epsilon = 10**(-2) 35 | jMax = 5 36 | 37 | # Restart every nDim iterations 38 | nRestart = shape(x0)[0] 39 | x = x0 40 | 41 | while i < iMax and betaTop > epsilon**2*beta0: 42 | j=0 43 | dp = dot(p.transpose(),p) 44 | alpha = (epsilon+1)**2 45 | # Newton-Raphson iteration 46 | while j < jMax and alpha**2 * dp > epsilon**2: 47 | # Line search 48 | alpha = -dot(Jacobian(x).transpose(),p) / (dot(p.transpose(),dot(Hessian(x),p))) 49 | print "N-R",x, alpha, p 50 | x = x + alpha * p 51 | j += 1 52 | print x 53 | # Now construct beta 54 | r = -Jacobian(x) 55 | print "r: ", r 56 | betaBottom = betaTop 57 | betaTop = dot(r.transpose(),r) 58 | beta = betaTop/betaBottom 59 | print "Beta: ",beta 60 | # Update the estimate 61 | p = r + beta*p 62 | print "p: ",p 63 | print "----" 64 | k += 1 65 | 66 | if k==nRestart or dot(r.transpose(),p) <= 0: 67 | p = r 68 | k = 0 69 | print "Restarting" 70 | i +=1 71 | 72 | print x 73 | 74 | x0 = array([-2,2,-2]) 75 | CG(x0) 76 | -------------------------------------------------------------------------------- /11 Optimisation/LevenbergMarquardt.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The Levenberg Marquardt algorithm 12 | from numpy import * 13 | 14 | def function(p): 15 | r = array([10*(p[1]-p[0]**2),(1-p[0])]) 16 | fp = dot(transpose(r),r) #= 100*(p[1]-p[0]**2)**2 + (1-p[0])**2 17 | J = (array([[-20*p[0],10],[-1,0]])) 18 | grad = dot(transpose(J),transpose(r)) 19 | return fp,r,grad,J 20 | 21 | def lm(p0,tol=10**(-5),maxits=100): 22 | 23 | nvars=shape(p0)[0] 24 | nu=0.01 25 | p = p0 26 | fp,r,grad,J = function(p) 27 | e = sum(dot(transpose(r),r)) 28 | nits = 0 29 | while nitstol: 30 | nits += 1 31 | fp,r,grad,J = function(p) 32 | H=dot(transpose(J),J) + nu*eye(nvars) 33 | 34 | pnew = zeros(shape(p)) 35 | nits2 = 0 36 | while (p!=pnew).all() and nits20: 46 | update = 1 47 | p = pnew 48 | e = enew 49 | if rho>0.25: 50 | nu=nu/10 51 | else: 52 | nu=nu*10 53 | update = 0 54 | print fp, p, e, linalg.norm(grad), nu 55 | 56 | p0 = array([-1.92,2]) 57 | lm(p0) 58 | -------------------------------------------------------------------------------- /11 Optimisation/LevenbergMarquardt_leastsq.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The Levenberg Marquardt algorithm solving a least-squares problem 12 | 13 | from pylab import * 14 | from numpy import * 15 | 16 | def function(p,x,ydata): 17 | fp = p[0]*cos(p[1]*x)+ p[1]*sin([p[0]*x]) 18 | r = ydata - fp 19 | J = transpose([-cos(p[0]*x)-p[1]*cos(p[0]*x)*x, p[0] * sin(p[1]*x)*x-sin(p[0]*x)]) 20 | grad = dot(transpose(J),transpose(r)) 21 | return fp,r,grad,J 22 | 23 | def lm(p0,x,f,tol=10**(-5),maxits=100): 24 | 25 | nvars=shape(p0)[0] 26 | nu=0.01 27 | p = p0 28 | fp,r,grad,J = function(p,x,f) 29 | e = sum(dot(transpose(r),r)) 30 | nits = 0 31 | while nitstol: 32 | nits += 1 33 | 34 | # Compute current Jacobian and approximate Hessian 35 | fp,r,grad,J = function(p,x,f) 36 | H=dot(transpose(J),J) + nu*eye(nvars) 37 | pnew = zeros(shape(p)) 38 | nits2 = 0 39 | while (p!=pnew).all() and nits20: 55 | # Keep new estimate 56 | p = pnew 57 | e = enew 58 | if rho>0.25: 59 | # Make trust region larger (reduce nu) 60 | nu=nu/10 61 | else: 62 | # Make trust region smaller (increase nu) 63 | nu=nu*10 64 | print p, e, linalg.norm(grad), nu 65 | return p 66 | 67 | p0 = array([100.5,102.5]) #[ 100.0001126 101.99969709] 1078.36915936 8.87386341319e-06 1e-10 (8 itns) 68 | #p0 = array([101,101]) #[ 100.88860713 101.12607589] 631.488571159 9.36938417155e-06 1e-67 69 | 70 | p = array([100,102]) 71 | 72 | x = arange(0,2*pi,0.1) 73 | y = p[0]*cos(p[1]*x)+ p[1]*sin([p[0]*x]) + random.rand(len(x)) 74 | p = lm(p0,x,y) 75 | y1 = p[0]*cos(p[1]*x)+ p[1]*sin([p[0]*x]) #+ random.rand(len(x)) 76 | 77 | plot(x,squeeze(y),'-') 78 | plot(x,squeeze(y1),'r--') 79 | legend(['Actual Data','Fitted Data']) 80 | show() 81 | -------------------------------------------------------------------------------- /11 Optimisation/Newton.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # Gradient Descent using Newton's method 12 | from numpy import * 13 | 14 | def Jacobian(x): 15 | #return array([.4*x[0],2*x[1]]) 16 | return array([x[0], 0.4*x[1], 1.2*x[2]]) 17 | 18 | def Hessian(x): 19 | #return array([[.2,0],[0,1]]) 20 | return array([[1,0,0],[0,0.4,0],[0,0,1.2]]) 21 | 22 | def Newton(x0): 23 | 24 | i = 0 25 | iMax = 10 26 | x = x0 27 | Delta = 1 28 | alpha = 1 29 | 30 | while i10**(-5): 31 | p = -dot(linalg.inv(Hessian(x)),Jacobian(x)) 32 | xOld = x 33 | x = x + alpha*p 34 | Delta = sum((x-xOld)**2) 35 | i += 1 36 | print x 37 | 38 | x0 = array([-2,2,-2]) 39 | Newton(x0) 40 | -------------------------------------------------------------------------------- /11 Optimisation/TSP.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # A demonstration of four methods of solving the Travelling Salesman Problem 12 | from numpy import * 13 | 14 | def makeTSP(nCities): 15 | positions = 2*random.rand(nCities,2)-1; 16 | distances = zeros((nCities,nCities)) 17 | 18 | for i in range(nCities): 19 | for j in range(i+1,nCities): 20 | distances[i,j] = sqrt((positions[i,0] - positions[j,0])**2 + (positions[i,1] - positions[j,1])**2); 21 | distances[j,i] = distances[i,j]; 22 | 23 | return distances 24 | 25 | def exhaustive(distances): 26 | nCities = shape(distances)[0] 27 | 28 | cityOrder = arange(nCities) 29 | 30 | distanceTravelled = 0 31 | for i in range(nCities-1): 32 | distanceTravelled += distances[cityOrder[i],cityOrder[i+1]] 33 | distanceTravelled += distances[cityOrder[nCities-1],0] 34 | 35 | for newOrder in permutation(range(nCities)): 36 | possibleDistanceTravelled = 0 37 | for i in range(nCities-1): 38 | possibleDistanceTravelled += distances[newOrder[i],newOrder[i+1]] 39 | possibleDistanceTravelled += distances[newOrder[nCities-1],0] 40 | 41 | if possibleDistanceTravelled < distanceTravelled: 42 | distanceTravelled = possibleDistanceTravelled 43 | cityOrder = newOrder 44 | 45 | return cityOrder, distanceTravelled 46 | 47 | def permutation(order): 48 | order = tuple(order) 49 | if len(order)==1: 50 | yield order 51 | else: 52 | for i in range(len(order)): 53 | rest = order[:i] + order[i+1:] 54 | move = (order[i],) 55 | for smaller in permutation(rest): 56 | yield move + smaller 57 | 58 | def greedy(distances): 59 | nCities = shape(distances)[0] 60 | distanceTravelled = 0 61 | 62 | # Need a version of the matrix we can trash 63 | dist = distances.copy() 64 | 65 | cityOrder = zeros(nCities) 66 | cityOrder[0] = random.randint(nCities) 67 | dist[:,cityOrder[0]] = Inf 68 | 69 | for i in range(nCities-1): 70 | cityOrder[i+1] = argmin(dist[cityOrder[i],:]) 71 | distanceTravelled += dist[cityOrder[i],cityOrder[i+1]] 72 | # Now exclude the chance of travelling to that city again 73 | dist[:,cityOrder[i+1]] = Inf 74 | 75 | # Now return to the original city 76 | distanceTravelled += distances[cityOrder[nCities-1],0] 77 | 78 | return cityOrder, distanceTravelled 79 | 80 | def hillClimbing(distances): 81 | 82 | nCities = shape(distances)[0] 83 | 84 | cityOrder = arange(nCities) 85 | random.shuffle(cityOrder) 86 | 87 | distanceTravelled = 0 88 | for i in range(nCities-1): 89 | distanceTravelled += distances[cityOrder[i],cityOrder[i+1]] 90 | distanceTravelled += distances[cityOrder[nCities-1],0] 91 | 92 | for i in range(1000): 93 | # Choose cities to swap 94 | city1 = random.randint(nCities) 95 | city2 = random.randint(nCities) 96 | 97 | if city1 != city2: 98 | # Reorder the set of cities 99 | possibleCityOrder = cityOrder.copy() 100 | possibleCityOrder = where(possibleCityOrder==city1,-1,possibleCityOrder) 101 | possibleCityOrder = where(possibleCityOrder==city2,city1,possibleCityOrder) 102 | possibleCityOrder = where(possibleCityOrder==-1,city2,possibleCityOrder) 103 | 104 | # Work out the new distances 105 | # This can be done more efficiently 106 | newDistanceTravelled = 0 107 | for j in range(nCities-1): 108 | newDistanceTravelled += distances[possibleCityOrder[j],possibleCityOrder[j+1]] 109 | distanceTravelled += distances[cityOrder[nCities-1],0] 110 | 111 | if newDistanceTravelled < distanceTravelled: 112 | distanceTravelled = newDistanceTravelled 113 | cityOrder = possibleCityOrder 114 | 115 | return cityOrder, distanceTravelled 116 | 117 | 118 | def simulatedAnnealing(distances): 119 | 120 | nCities = shape(distances)[0] 121 | 122 | cityOrder = arange(nCities) 123 | random.shuffle(cityOrder) 124 | 125 | distanceTravelled = 0 126 | for i in range(nCities-1): 127 | distanceTravelled += distances[cityOrder[i],cityOrder[i+1]] 128 | distanceTravelled += distances[cityOrder[nCities-1],0] 129 | 130 | T = 500 131 | c = 0.8 132 | nTests = 10 133 | 134 | while T>1: 135 | for i in range(nTests): 136 | # Choose cities to swap 137 | city1 = random.randint(nCities) 138 | city2 = random.randint(nCities) 139 | 140 | if city1 != city2: 141 | # Reorder the set of cities 142 | possibleCityOrder = cityOrder.copy() 143 | possibleCityOrder = where(possibleCityOrder==city1,-1,possibleCityOrder) 144 | possibleCityOrder = where(possibleCityOrder==city2,city1,possibleCityOrder) 145 | possibleCityOrder = where(possibleCityOrder==-1,city2,possibleCityOrder) 146 | 147 | # Work out the new distances 148 | # This can be done more efficiently 149 | newDistanceTravelled = 0 150 | for j in range(nCities-1): 151 | newDistanceTravelled += distances[possibleCityOrder[j],possibleCityOrder[j+1]] 152 | distanceTravelled += distances[cityOrder[nCities-1],0] 153 | 154 | if newDistanceTravelled < distanceTravelled or (distanceTravelled - newDistanceTravelled) < T*log(random.rand()): 155 | distanceTravelled = newDistanceTravelled 156 | cityOrder = possibleCityOrder 157 | 158 | # Annealing schedule 159 | T = c*T 160 | 161 | return cityOrder, distanceTravelled 162 | 163 | def runAll(): 164 | import time 165 | 166 | nCities = 5 167 | distances = makeTSP(nCities) 168 | 169 | print "Exhaustive search" 170 | start = time.time() 171 | print exhaustive(distances) 172 | finish = time.time() 173 | print finish-start 174 | 175 | print "Greedy search" 176 | start = time.time() 177 | print greedy(distances) 178 | finish = time.time() 179 | print finish-start 180 | 181 | print "Hill Climbing" 182 | start = time.time() 183 | print hillClimbing(distances) 184 | finish = time.time() 185 | print finish-start 186 | 187 | print "Simulated Annealing" 188 | start = time.time() 189 | print simulatedAnnealing(distances) 190 | finish = time.time() 191 | print finish-start 192 | 193 | runAll() -------------------------------------------------------------------------------- /11 Optimisation/steepest.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # Gradient Descent using steepest descent 12 | 13 | from numpy import * 14 | 15 | def Jacobian(x): 16 | #return array([.4*x[0],2*x[1]]) 17 | return array([x[0], 0.4*x[1], 1.2*x[2]]) 18 | 19 | def steepest(x0): 20 | 21 | i = 0 22 | iMax = 10 23 | x = x0 24 | Delta = 1 25 | alpha = 1 26 | 27 | while i10**(-5): 28 | p = -Jacobian(x) 29 | xOld = x 30 | x = x + alpha*p 31 | Delta = sum((x-xOld)**2) 32 | print x 33 | i += 1 34 | 35 | x0 = array([-2,2,-2]) 36 | steepest(x0) 37 | -------------------------------------------------------------------------------- /12 Evolutionary/PBIL.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 12 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The Population Based Incremental Learning algorithm 12 | # Comment and uncomment fitness functions as appropriate (as an import and the fitnessFunction variable) 13 | 14 | from pylab import * 15 | from numpy import * 16 | #import fourpeaks as fF 17 | import knapsack as fF 18 | 19 | def PBIL(): 20 | ion() 21 | 22 | populationSize = 100 23 | stringLength = 20 24 | eta = 0.005 25 | 26 | #fitnessFunction = 'fF.fourpeaks' 27 | fitnessFunction = 'fF.knapsack' 28 | p = 0.5*ones(stringLength) 29 | best = zeros(501,dtype=float) 30 | 31 | for count in range(501): 32 | # Generate samples 33 | population = random.rand(populationSize,stringLength) 34 | for i in range(stringLength): 35 | population[:,i] = where(population[:,i] best and fitness<500: 27 | best = fitness 28 | bestString = string 29 | print best 30 | print bestString 31 | 32 | exhaustive() 33 | -------------------------------------------------------------------------------- /12 Evolutionary/fourpeaks.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 12 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The four peaks fitness function 12 | from numpy import * 13 | def fourpeaks(population): 14 | 15 | T = 15 16 | start = zeros((shape(population)[0],1)) 17 | finish = zeros((shape(population)[0],1)) 18 | 19 | fitness = zeros((shape(population)[0],1)) 20 | 21 | for i in range(shape(population)[0]): 22 | s = where(population[i,:]==1) 23 | f = where(population[i,:]==0) 24 | if size(s)>0: 25 | start = s[0][0] 26 | else: 27 | start = 0 28 | 29 | if size(f)>0: 30 | finish = shape(population)[1] - f[-1][-1] -1 31 | else: 32 | finish = 0 33 | 34 | if start>T and finish>T: 35 | fitness[i] = maximum(start,finish)+100 36 | else: 37 | fitness[i] = maximum(start,finish) 38 | 39 | fitness = squeeze(fitness) 40 | return fitness 41 | -------------------------------------------------------------------------------- /12 Evolutionary/ga.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 12 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | 12 | # The Genetic algorithm 13 | # Comment and uncomment fitness functions as appropriate (as an import and the fitnessFunction variable) 14 | 15 | from pylab import * 16 | from numpy import * 17 | import knapsack as fF 18 | 19 | class ga: 20 | 21 | def __init__(self,stringLength,fitnessFunction,nEpochs,populationSize=100,mutationProb=-1,crossover='un',nElite=4,tournament=True): 22 | """ Constructor""" 23 | self.stringLength = stringLength 24 | 25 | # Population size should be even 26 | if mod(populationSize,2)==0: 27 | self.populationSize = populationSize 28 | else: 29 | self.populationSize = populationSize+1 30 | 31 | if mutationProb < 0: 32 | self.mutationProb = 1/stringLength 33 | else: 34 | self.mutationProb = mutationProb 35 | 36 | self.nEpochs = nEpochs 37 | 38 | self.fitnessFunction = fitnessFunction 39 | 40 | self.crossover = crossover 41 | self.nElite = nElite 42 | self.tournment = tournament 43 | 44 | self.population = random.rand(self.populationSize,self.stringLength) 45 | self.population = where(self.population<0.5,0,1) 46 | 47 | def runGA(self): 48 | """The basic loop""" 49 | ion() 50 | plotfig = figure 51 | bestfit = zeros(self.nEpochs) 52 | 53 | for i in range(self.nEpochs): 54 | # Compute fitness of the population 55 | fitness = eval(self.fitnessFunction)(self.population) 56 | 57 | # Pick parents -- can do in order since they are randomised 58 | newPopulation = self.fps(self.population,fitness) 59 | 60 | # Apply the genetic operators 61 | if self.crossover == 'sp': 62 | newPopulation = self.spCrossover(newPopulation) 63 | elif self.crossover == 'un': 64 | newPopulation = self.uniformCrossover(newPopulation) 65 | newPopulation = self.mutate(newPopulation) 66 | 67 | # Apply elitism and tournaments if using 68 | if self.nElite>0: 69 | newPopulation = self.elitism(self.population,newPopulation,fitness) 70 | 71 | if self.tournament: 72 | newPopulation = self.tournament(self.population,newPopulation,fitness,self.fitnessFunction) 73 | 74 | self.population = newPopulation 75 | bestfit[i] = fitness.max() 76 | 77 | if (mod(i,100)==0): 78 | print i, fitness.max() 79 | #plot([i],[fitness.max()],'r+') 80 | plot(bestfit,'kx-') 81 | show() 82 | 83 | def fps(self,population,fitness): 84 | 85 | # Scale fitness by total fitness 86 | fitness = fitness/sum(fitness) 87 | fitness = 10*fitness/fitness.max() 88 | 89 | # Put repeated copies of each string in according to fitness 90 | # Deal with strings with very low fitness 91 | j=0 92 | while round(fitness[j])<1: 93 | j = j+1 94 | 95 | newPopulation = kron(ones((round(fitness[j]),1)),population[j,:]) 96 | 97 | # Add multiple copies of strings into the newPopulation 98 | for i in range(j+1,self.populationSize): 99 | if round(fitness[i])>=1: 100 | newPopulation = concatenate((newPopulation,kron(ones((round(fitness[i]),1)),population[i,:])),axis=0) 101 | 102 | # Shuffle the order (note that there are still too many) 103 | indices = range(shape(newPopulation)[0]) 104 | random.shuffle(indices) 105 | newPopulation = newPopulation[indices[:self.populationSize],:] 106 | return newPopulation 107 | 108 | def spCrossover(self,population): 109 | # Single point crossover 110 | newPopulation = zeros(shape(population)) 111 | crossoverPoint = random.randint(0,self.stringLength,self.populationSize) 112 | for i in range(0,self.populationSize,2): 113 | newPopulation[i,:crossoverPoint[i]] = population[i,:crossoverPoint[i]] 114 | newPopulation[i+1,:crossoverPoint[i]] = population[i+1,:crossoverPoint[i]] 115 | newPopulation[i,crossoverPoint[i]:] = population[i+1,crossoverPoint[i]:] 116 | newPopulation[i+1,crossoverPoint[i]:] = population[i,crossoverPoint[i]:] 117 | return newPopulation 118 | 119 | def uniformCrossover(self,population): 120 | # Uniform crossover 121 | newPopulation = zeros(shape(population)) 122 | which = random.rand(self.populationSize,self.stringLength) 123 | which1 = which>=0.5 124 | for i in range(0,self.populationSize,2): 125 | newPopulation[i,:] = population[i,:]*which1[i,:] + population[i+1,:]*(1-which1[i,:]) 126 | newPopulation[i+1,:] = population[i,:]*(1-which1[i,:]) + population[i+1,:]*which1[i,:] 127 | return newPopulation 128 | 129 | def mutate(self,population): 130 | # Mutation 131 | whereMutate = random.rand(shape(population)[0],shape(population)[1]) 132 | population[where(whereMutate < self.mutationProb)] = 1 - population[where(whereMutate < self.mutationProb)] 133 | return population 134 | 135 | def elitism(self,oldPopulation,population,fitness): 136 | best = argsort(fitness) 137 | best = squeeze(oldPopulation[best[-self.nElite:],:]) 138 | indices = range(shape(population)[0]) 139 | random.shuffle(indices) 140 | population = population[indices,:] 141 | population[0:self.nElite,:] = best 142 | return population 143 | 144 | def tournament(self,oldPopulation,population,fitness,fitnessFunction): 145 | newFitness = eval(self.fitnessFunction)(population) 146 | for i in range(0,shape(population)[0],2): 147 | f = concatenate((fitness[i:i+2],newFitness[i:i+2]),axis=1) 148 | indices = argsort(f) 149 | if indices[-1]<2 and indices[-2]<2: 150 | population[i,:] = oldPopulation[i,:] 151 | population[i+1,:] = oldPopulation[i+1,:] 152 | elif indices[-1]<2: 153 | if indices[0]>=2: 154 | population[i+indices[0]-2,:] = oldPopulation[i+indices[-1]] 155 | else: 156 | population[i+indices[1]-2,:] = oldPopulation[i+indices[-1]] 157 | elif indices[-2]<2: 158 | if indices[0]>=2: 159 | population[i+indices[0]-2,:] = oldPopulation[i+indices[-2]] 160 | else: 161 | population[i+indices[1]-2,:] = oldPopulation[i+indices[-2]] 162 | return population 163 | 164 | -------------------------------------------------------------------------------- /12 Evolutionary/greedyKnapsack.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 12 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # A greedy algorithm to solve the Knapsack problem 12 | from numpy import * 13 | 14 | def greedy(): 15 | maxSize = 500 16 | sizes = array([109.60,125.48,52.16,195.55,58.67,61.87,92.95,93.14,155.05,110.89,13.34,132.49,194.03,121.29,179.33,139.02,198.78,192.57,81.66,128.90]) 17 | 18 | sizes.sort() 19 | newSizes = sizes[-1:0:-1] 20 | space = maxSize 21 | 22 | while len(newSizes)>0 and space>newSizes[-1]: 23 | # Pick largest item that will fit 24 | item = where(space>newSizes)[0][0] 25 | print newSizes[item] 26 | space = space-newSizes[item] 27 | newSizes = concatenate((newSizes[:item],newSizes[item+1:])) 28 | print "Size = ",maxSize-space 29 | 30 | greedy() 31 | -------------------------------------------------------------------------------- /12 Evolutionary/knapsack.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 12 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # A fitness function for the Knapsack problem 12 | from numpy import * 13 | 14 | def knapsack(pop): 15 | maxSize = 500 16 | #sizes = array([193.71,60.15,89.08,88.98,15.39,238.14,68.78,107.47,119.66,183.70]) 17 | 18 | sizes = array([109.60,125.48,52.16,195.55,58.67,61.87,92.95,93.14,155.05,110.89,13.34,132.49,194.03,121.29,179.33,139.02,198.78,192.57,81.66,128.90]) 19 | 20 | fitness = sum(sizes*pop,axis=1) 21 | fitness = where(fitness>maxSize,500-2*(fitness-maxSize),fitness) 22 | 23 | return fitness 24 | -------------------------------------------------------------------------------- /12 Evolutionary/run_ga.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 12 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # A runner for the Genetic Algorithm 12 | import ga 13 | 14 | ga = ga.ga(20,'fF.knapsack',101,100,-1,'sp',4,True) 15 | ga.runGA() 16 | -------------------------------------------------------------------------------- /13 Reinforcement/SARSA.py: -------------------------------------------------------------------------------- 1 | 2 | # Code from Chapter 13 of Machine Learning: An Algorithmic Perspective 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html) 4 | 5 | # You are free to use, change, or redistribute the code in any way you wish for 6 | # non-commercial purposes, but please maintain the name of the original author. 7 | # This code comes with no warranty of any kind. 8 | 9 | # Stephen Marsland, 2008 10 | 11 | # The basic SARSA algorithm with the Europe example 12 | 13 | from numpy import * 14 | 15 | def SARSA(): 16 | 17 | R = array([[-5,0,-inf,-inf,-inf,-inf],[0,-5,0,0,-inf,-inf],[-inf,0,-5,0,-inf,100],[-inf,0,0,-5,0,-inf],[-inf,-inf,-inf,0,-5,100],[-inf,-inf,0,-inf,-inf,0]]) 18 | t = array([[1,1,0,0,0,0],[1,1,1,1,0,0],[0,1,1,1,0,1],[0,1,1,1,1,0],[0,0,0,1,1,1],[0,0,1,0,1,1]]) 19 | 20 | 21 | 22 | nStates = shape(R)[0] 23 | nActions = shape(R)[1] 24 | Q = random.rand(nStates,nActions)*0.1-0.05 25 | mu = 0.7 26 | gamma = 0.4 27 | epsilon = 0.1 28 | nits = 0 29 | 30 | while nits < 1000: 31 | # Pick initial state 32 | s = random.randint(nStates) 33 | # epsilon-greedy 34 | if (random.rand()0: 54 | t[i,j,k,0] = i-1 55 | t[i,j,k,1] = j 56 | else: 57 | t[i,j,k,0] = i 58 | t[i,j,k,1] = j 59 | 60 | if i==1 and 1<=j<=5: 61 | t[i,j,k,0] = 0 62 | t[i,j,k,1] = 0 63 | else: 64 | if j>0: 65 | t[i,j,k,0] = i 66 | t[i,j,k,1] = j-1 67 | else: 68 | t[i,j,k,0] = i 69 | t[i,j,k,1] = j 70 | if i==0 and j==6: 71 | t[i,j,k,0] = 0 72 | t[i,j,k,1] = 0 73 | 74 | #print t[:,:,3,0] ,t[:,:,3,1] 75 | 76 | Q = random.random_sample(shape(R))*0.1-0.05 77 | mu = 0.7 78 | gamma = 0.4 79 | epsilon = 0.05 80 | nits = 0 81 | 82 | while nits < 1000: 83 | # Pick initial state 84 | s = array([0,0]) #array([random.randint(4),random.randint(7)]) 85 | 86 | r=-inf 87 | while r==-inf: 88 | # epsilon-greedy 89 | if (random.rand()0: 53 | t[i,j,k,0] = i-1 54 | t[i,j,k,1] = j 55 | else: 56 | t[i,j,k,0] = i 57 | t[i,j,k,1] = j 58 | 59 | if i==1 and 1<=j<=5: 60 | t[i,j,k,0] = 0 61 | t[i,j,k,1] = 0 62 | else: 63 | if j>0: 64 | t[i,j,k,0] = i 65 | t[i,j,k,1] = j-1 66 | else: 67 | t[i,j,k,0] = i 68 | t[i,j,k,1] = j 69 | if i==0 and j==6: 70 | t[i,j,k,0] = 0 71 | t[i,j,k,1] = 0 72 | 73 | #print t[:,:,3,0] ,t[:,:,3,1] 74 | 75 | #Q = random.random_sample(shape(R))*0.1-0.05 76 | Q = zeros(shape(R)) 77 | mu = 0.7 78 | gamma = 0.4 79 | epsilon = 0.05 80 | nits = 0 81 | 82 | while nits < 1000: 83 | # Pick initial state 84 | s = array([0,0]) #array([random.randint(4),random.randint(7)]) 85 | 86 | #print s, shape(s) 87 | #print shape(Q), shape(Q[s[0],s[1],:]) 88 | inEpisode = 1 89 | # Stop when the accepting state is reached 90 | while inEpisode: 91 | r=-inf 92 | while r==-inf: 93 | # epsilon-greedy 94 | if (random.rand()