├── Ch10
    ├── PBIL.py
    ├── billsfit.py
    ├── exhaustiveKnapsack.py
    ├── fourpeaks.py
    ├── ga.py
    ├── greedyKnapsack.py
    ├── knapsack.py
    ├── onemax.py
    └── run_ga.py
├── Ch11
    ├── SARSA.py
    ├── SARSA_cliff.py
    ├── TDZero.py
    └── TDZero_cliff.py
├── Ch12
    ├── dtree.py
    ├── party.data
    └── party.py
├── Ch13
    ├── .DS_Store
    ├── bagging.py
    ├── boost.py
    ├── car.data
    ├── car.py
    ├── car1.py
    ├── dtree.py
    ├── dtw.py
    ├── mushroom.py
    ├── party.py
    └── randomforest.py
├── Ch14
    ├── iris.py
    ├── iris_somperc.py
    ├── kmeans.py
    ├── kmeansnet.py
    ├── moredemos.py
    ├── som.py
    └── somdemo.py
├── Ch15
    ├── BoxMuller.py
    ├── Gibbs.py
    ├── MH.py
    ├── SIR.py
    ├── importancesampling.py
    ├── lcg.py
    └── rejectionsampling.py
├── Ch16
    ├── EKF.py
    ├── Gibbs.py
    ├── HMM.py
    ├── Kalman.py
    ├── Kalman_full.py
    ├── MRF.py
    ├── graphdemo.py
    ├── particle_filter.py
    ├── pftrack.py
    ├── world.gif
    └── world.png
├── Ch17
    ├── binaryalphadigs.mat
    ├── dbn.py
    ├── hopfield.py
    └── rbm.py
├── Ch18
    ├── data.txt
    ├── gp.py
    ├── gpc.py
    ├── gpcdemo.py
    ├── plotdist.py
    └── plotgp.py
├── Ch2
    ├── gaussian.py
    └── plotGaussian.py
├── Ch3
    ├── autompg.py
    ├── linreg.py
    ├── linreg_logic_eg.py
    ├── logic.py
    ├── mnist.py
    ├── pcn.py
    ├── pcn_logic_eg.py
    └── pima.py
├── Ch4
    ├── PNOz.py
    ├── iris.py
    ├── iris_proc.data
    ├── logic.py
    ├── mlp.py
    ├── mnist.py
    └── sinewave.py
├── Ch5
    ├── iris.py
    ├── least_squares.py
    └── rbf.py
├── Ch6
    ├── ecoli.py
    ├── factoranalysis.py
    ├── floyd.py
    ├── iris.py
    ├── isomap.py
    ├── kernelpca.py
    ├── kpcademo.py
    ├── lda.py
    ├── lle.py
    ├── pca.py
    └── pcademo.py
├── Ch7
    ├── GMM.py
    ├── kdtree.py
    ├── knn.py
    └── knnSmoother.py
├── Ch8
    ├── svm.py
    ├── svmdemo.py
    └── svmdemo2.py
├── Ch9
    ├── CG.py
    ├── LevenbergMarquardt.py
    ├── LevenbergMarquardt_leastsq.py
    ├── Newton.py
    ├── TSP.py
    ├── iris.py
    ├── mlp_cg.py
    └── steepest.py
├── Data
    ├── PNoz.dat
    ├── PROSTATE_TEST.TXT
    ├── PROSTATE_TRAIN.TXT
    ├── ruapehu.dat
    └── shortecoli.dat
└── README.md


/Ch10/PBIL.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Population Based Incremental Learning algorithm
12 | # Comment and uncomment fitness functions as appropriate (as an import and the fitnessFunction variable)
13 | 
14 | import pylab as pl
15 | import numpy as np
16 | 
17 | #import fourpeaks as fF
18 | import knapsack as fF
19 | 
20 | def PBIL():
21 | 	pl.ion()
22 | 	
23 | 	populationSize = 100
24 | 	stringLength = 20	
25 | 	eta = 0.005
26 | 	
27 | 	#fitnessFunction = 'fF.fourpeaks'
28 | 	fitnessFunction = 'fF.knapsack'
29 | 	p = 0.5*np.ones(stringLength)
30 | 	best = np.zeros(501,dtype=float)
31 | 
32 | 	for count in range(501):
33 | 		# Generate samples
34 | 		population = np.random.rand(populationSize,stringLength)
35 | 		for i in range(stringLength):
36 | 			population[:,i] = np.where(population[:,i]<p[i],1,0)
37 | 
38 | 		# Evaluate fitness
39 | 		fitness = eval(fitnessFunction)(population)
40 | 
41 | 		# Pick best
42 | 		best[count] = np.max(fitness)
43 | 		bestplace = np.argmax(fitness)
44 | 		fitness[bestplace] = 0
45 | 		secondplace = np.argmax(fitness)
46 | 
47 | 		# Update vector
48 | 		p  = p*(1-eta) + eta*((population[bestplace,:]+population[secondplace,:])/2)
49 | 
50 | 		if (np.mod(count,100)==0):
51 | 			print count, best[count]
52 | 
53 | 	pl.plot(best,'kx-')
54 | 	pl.xlabel('Epochs')
55 | 	pl.ylabel('Fitness')
56 | 	pl.show()
57 | 	#print p
58 | 
59 | PBIL()
60 | 


--------------------------------------------------------------------------------
/Ch10/billsfit.py:
--------------------------------------------------------------------------------
 1 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 2 | # by Stephen Marsland (http://stephenmonika.net)
 3 | 
 4 | # You are free to use, change, or redistribute the code in any way you wish for
 5 | # non-commercial purposes, but please maintain the name of the original author.
 6 | # This code comes with no warranty of any kind.
 7 | 
 8 | # Stephen Marsland, 2008, 2014
 9 | 
10 | # A fitness function for the Knapsack problem
11 | import numpy as np
12 | 
13 | def billsfit(pop):
14 | 
15 | 	
16 | 	maxSize = 500	
17 | 	#sizes = np.array([193.71,60.15,89.08,88.98,15.39,238.14,68.78,107.47,119.66,183.70])
18 | 
19 |  	sizes = np.array([109.60,125.48,52.16,195.55,58.67,61.87,92.95,93.14,155.05,110.89,13.34,132.49,194.03,121.29,179.33,139.02,198.78,192.57,81.66,128.90])
20 | 
21 | 	fitness = np.sum(sizes*pop,axis=1)
22 | 	fitness = np.where(fitness>maxSize,500-2*(fitness-maxSize),fitness)
23 | 		
24 | 	return fitness
25 | 


--------------------------------------------------------------------------------
/Ch10/exhaustiveKnapsack.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # An exhaustive search to solve the Knapsack problem
12 | import numpy as np
13 | 
14 | def exhaustive():
15 |     maxSize = 500    
16 |     sizes = np.array([109.60,125.48,52.16,195.55,58.67,61.87,92.95,93.14,155.05,110.89,13.34,132.49,194.03,121.29,179.33,139.02,198.78,192.57,81.66,128.90])
17 | 
18 |     best = 0
19 | 
20 |     twos = np.arange(-len(sizes),0,1)
21 |     twos = 2.0**twos
22 |     
23 |     for i in range(2**len(sizes)-1):
24 |         string = np.remainder(np.floor(i*twos),2) 
25 |         fitness = np.sum(string*sizes)
26 |         if fitness > best and fitness<500:
27 |             best = fitness
28 |             bestString = string
29 |     print best
30 |     print bestString
31 |           
32 | exhaustive()
33 | 


--------------------------------------------------------------------------------
/Ch10/fourpeaks.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The four peaks fitness function
12 | import numpy as np
13 | def fourpeaks(population):
14 | 
15 | 	T = 15
16 | 	start = np.zeros((np.shape(population)[0],1))
17 | 	finish = np.zeros((np.shape(population)[0],1))
18 | 
19 | 	fitness = np.zeros((np.shape(population)[0],1))
20 | 
21 | 	for i in range(np.shape(population)[0]):
22 | 		s = np.where(population[i,:]==1)
23 | 		f = np.where(population[i,:]==0)
24 | 		if np.size(s)>0:
25 | 			start = s[0][0]
26 | 		else:
27 | 			start = 0	
28 | 		
29 | 		if np.size(f)>0:
30 | 			finish = np.shape(population)[1] - f[-1][-1] -1
31 | 		else:
32 | 			finish = 0
33 | 
34 | 		if start>T and finish>T:
35 | 			fitness[i] = np.maximum(start,finish)+100
36 | 		else:
37 | 			fitness[i] = np.maximum(start,finish)
38 | 
39 | 	fitness = np.squeeze(fitness)
40 | 	return fitness
41 | 


--------------------------------------------------------------------------------
/Ch10/ga.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | 
 12 | # The Genetic algorithm
 13 | # Comment and uncomment fitness functions as appropriate (as an import and the fitnessFunction variable)
 14 | 
 15 | import pylab as pl
 16 | import numpy as np
 17 | import fourpeaks as fF
 18 | 
 19 | class ga:
 20 | 
 21 | 	def __init__(self,stringLength,fitnessFunction,nEpochs,populationSize=100,mutationProb=-1,crossover='un',nElite=4,tournament=True):
 22 | 		""" Constructor"""
 23 | 		self.stringLength = stringLength
 24 | 		
 25 | 		# Population size should be even
 26 | 		if np.mod(populationSize,2)==0:
 27 | 			self.populationSize = populationSize
 28 | 		else:
 29 | 			self.populationSize = populationSize+1
 30 | 		
 31 | 		if mutationProb < 0:
 32 | 			 self.mutationProb = 1/stringLength
 33 | 		else:
 34 | 			 self.mutationProb = mutationProb
 35 | 			 	  
 36 | 		self.nEpochs = nEpochs
 37 | 
 38 | 		self.fitnessFunction = fitnessFunction
 39 | 
 40 | 		self.crossover = crossover
 41 | 		self.nElite = nElite
 42 | 		self.tournment = tournament
 43 | 
 44 | 		self.population = np.random.rand(self.populationSize,self.stringLength)
 45 | 		self.population = np.where(self.population<0.5,0,1)
 46 | 		
 47 | 	def runGA(self,plotfig):
 48 | 		"""The basic loop"""
 49 | 		pl.ion()
 50 | 		#plotfig = pl.figure()
 51 | 		bestfit = np.zeros(self.nEpochs)
 52 | 
 53 | 		for i in range(self.nEpochs):
 54 | 			# Compute fitness of the population
 55 | 			fitness = eval(self.fitnessFunction)(self.population)
 56 | 
 57 | 			# Pick parents -- can do in order since they are randomised
 58 | 			newPopulation = self.fps(self.population,fitness)
 59 | 
 60 | 			# Apply the genetic operators
 61 | 			if self.crossover == 'sp':
 62 | 				newPopulation = self.spCrossover(newPopulation)
 63 | 			elif self.crossover == 'un':
 64 | 				newPopulation = self.uniformCrossover(newPopulation)
 65 | 			newPopulation = self.mutate(newPopulation)
 66 | 
 67 | 			# Apply elitism and tournaments if using
 68 | 			if self.nElite>0:
 69 | 				newPopulation = self.elitism(self.population,newPopulation,fitness)
 70 | 	
 71 | 			if self.tournament:
 72 | 				newPopulation = self.tournament(self.population,newPopulation,fitness,self.fitnessFunction)
 73 | 	
 74 | 			self.population = newPopulation
 75 | 			bestfit[i] = fitness.max()
 76 | 
 77 | 			if (np.mod(i,100)==0):
 78 | 				print i, fitness.max()	
 79 | 			#pl.plot([i],[fitness.max()],'r+')
 80 | 		pl.plot(bestfit,'kx-')
 81 | 		#pl.show()
 82 | 	
 83 | 	def fps(self,population,fitness):
 84 | 
 85 | 		# Scale fitness by total fitness
 86 | 		fitness = fitness/np.sum(fitness)
 87 | 		fitness = 10*fitness/fitness.max()
 88 | 		
 89 | 		# Put repeated copies of each string in according to fitness
 90 | 		# Deal with strings with very low fitness
 91 | 		j=0
 92 | 		while np.round(fitness[j])<1:
 93 | 			j = j+1
 94 | 		
 95 | 		newPopulation = np.kron(np.ones((np.round(fitness[j]),1)),population[j,:])
 96 | 
 97 | 		# Add multiple copies of strings into the newPopulation
 98 | 		for i in range(j+1,self.populationSize):
 99 | 			if np.round(fitness[i])>=1:
100 | 				newPopulation = np.concatenate((newPopulation,np.kron(np.ones((np.round(fitness[i]),1)),population[i,:])),axis=0)
101 | 
102 | 		# Shuffle the order (note that there are still too many)
103 | 		indices = range(np.shape(newPopulation)[0])
104 | 		np.random.shuffle(indices)
105 | 		newPopulation = newPopulation[indices[:self.populationSize],:]
106 | 		return newPopulation	
107 | 
108 | 	def spCrossover(self,population):
109 | 		# Single point crossover
110 | 		newPopulation = np.zeros(np.shape(population))
111 | 		crossoverPoint = np.random.randint(0,self.stringLength,self.populationSize)
112 | 		for i in range(0,self.populationSize,2):
113 | 			newPopulation[i,:crossoverPoint[i]] = population[i,:crossoverPoint[i]]
114 | 			newPopulation[i+1,:crossoverPoint[i]] = population[i+1,:crossoverPoint[i]]
115 | 			newPopulation[i,crossoverPoint[i]:] = population[i+1,crossoverPoint[i]:]
116 | 			newPopulation[i+1,crossoverPoint[i]:] = population[i,crossoverPoint[i]:]
117 | 		return newPopulation
118 | 
119 | 	def uniformCrossover(self,population):
120 | 		# Uniform crossover
121 | 		newPopulation = np.zeros(np.shape(population))
122 | 		which = np.random.rand(self.populationSize,self.stringLength)
123 | 		which1 = which>=0.5
124 | 		for i in range(0,self.populationSize,2):
125 | 			newPopulation[i,:] = population[i,:]*which1[i,:] + population[i+1,:]*(1-which1[i,:])
126 | 			newPopulation[i+1,:] = population[i,:]*(1-which1[i,:]) + population[i+1,:]*which1[i,:]
127 | 		return newPopulation
128 | 		
129 | 	def mutate(self,population):
130 | 		# Mutation
131 | 		whereMutate = np.random.rand(np.shape(population)[0],np.shape(population)[1])
132 | 		population[np.where(whereMutate < self.mutationProb)] = 1 - population[np.where(whereMutate < self.mutationProb)]
133 | 		return population
134 | 
135 | 	def elitism(self,oldPopulation,population,fitness):
136 | 		best = np.argsort(fitness)
137 | 		best = np.squeeze(oldPopulation[best[-self.nElite:],:])
138 | 		indices = range(np.shape(population)[0])
139 | 		np.random.shuffle(indices)
140 | 		population = population[indices,:]
141 | 		population[0:self.nElite,:] = best
142 | 		return population
143 | 
144 | 	def tournament(self,oldPopulation,population,fitness,fitnessFunction):
145 | 		newFitness = eval(self.fitnessFunction)(population)
146 | 		for i in range(0,np.shape(population)[0],2):
147 | 			f = np.concatenate((fitness[i:i+2],newFitness[i:i+2]),axis=1)
148 | 			indices = np.argsort(f)
149 | 			if indices[-1]<2 and indices[-2]<2:
150 | 				population[i,:] = oldPopulation[i,:]
151 | 				population[i+1,:] = oldPopulation[i+1,:]
152 | 			elif indices[-1]<2:
153 | 				if indices[0]>=2:
154 | 					population[i+indices[0]-2,:] = oldPopulation[i+indices[-1]]
155 | 				else:
156 | 					population[i+indices[1]-2,:] = oldPopulation[i+indices[-1]]
157 | 			elif indices[-2]<2:
158 | 				if indices[0]>=2:
159 | 					population[i+indices[0]-2,:] = oldPopulation[i+indices[-2]]
160 | 				else:
161 | 					population[i+indices[1]-2,:] = oldPopulation[i+indices[-2]]
162 | 		return population
163 | 			
164 | 


--------------------------------------------------------------------------------
/Ch10/greedyKnapsack.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A greedy algorithm to solve the Knapsack problem
12 | import numpy as np
13 | 
14 | def greedy():
15 |     maxSize = 500    
16 |     sizes = np.array([109.60,125.48,52.16,195.55,58.67,61.87,92.95,93.14,155.05,110.89,13.34,132.49,194.03,121.29,179.33,139.02,198.78,192.57,81.66,128.90])
17 | 
18 |     sizes.sort()
19 |     newSizes = sizes[-1:0:-1]
20 |     space = maxSize
21 |     
22 |     while len(newSizes)>0 and space>newSizes[-1]:
23 |         # Pick largest item that will fit
24 |         item = np.where(space>newSizes)[0][0]
25 |         print newSizes[item]
26 |         space = space-newSizes[item]
27 |         newSizes = np.concatenate((newSizes[:item],newSizes[item+1:]))
28 |     print "Size = ",maxSize-space
29 |     
30 | greedy() 
31 | 


--------------------------------------------------------------------------------
/Ch10/knapsack.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A fitness function for the Knapsack problem
12 | import numpy as np
13 | 
14 | def knapsack(pop):
15 | 	maxSize = 500	
16 | 	#sizes = np.array([193.71,60.15,89.08,88.98,15.39,238.14,68.78,107.47,119.66,183.70])
17 | 
18 |  	sizes = np.array([109.60,125.48,52.16,195.55,58.67,61.87,92.95,93.14,155.05,110.89,13.34,132.49,194.03,121.29,179.33,139.02,198.78,192.57,81.66,128.90])
19 | 
20 | 	fitness = np.sum(sizes*pop,axis=1)
21 | 	fitness = np.where(fitness>maxSize,500-2*(fitness-maxSize),fitness)
22 | 		
23 | 	return fitness
24 | 


--------------------------------------------------------------------------------
/Ch10/onemax.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A fitness function for the onemax problem
12 | import numpy as np
13 | 
14 | def onemax(pop):
15 |     
16 |     fitness = np.sum(pop,axis=1)
17 |         
18 |     return fitness
19 | 


--------------------------------------------------------------------------------
/Ch10/run_ga.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 10 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A runner for the Genetic Algorithm
12 | import ga
13 | import pylab as pl
14 | 
15 | pl.ion()
16 | pl.show()
17 | 
18 | plotfig = pl.figure()
19 | 
20 | ga = ga.ga(30,'fF.fourpeaks',301,100,-1,'un',4,True)
21 | ga.runGA(plotfig)
22 | 
23 | pl.pause(0)
24 | #pl.show()
25 | 


--------------------------------------------------------------------------------
/Ch11/SARSA.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The basic SARSA algorithm with the Europe example
12 | 
13 | import numpy as np
14 | def SARSA():
15 | 
16 |     R = np.array([[-5,0,-np.inf,-np.inf,-np.inf,-np.inf],[0,-5,0,0,-np.inf,-np.inf],[-np.inf,0,-5,0,-np.inf,100],[-np.inf,0,0,-5,0,-np.inf],[-np.inf,-np.inf,-np.inf,0,-5,100],[-np.inf,-np.inf,0,-np.inf,-np.inf,0]])
17 |     t = np.array([[1,1,0,0,0,0],[1,1,1,1,0,0],[0,1,1,1,0,1],[0,1,1,1,1,0],[0,0,0,1,1,1],[0,0,1,0,1,1]])
18 | 
19 |     nStates = np.shape(R)[0]
20 |     nActions = np.shape(R)[1]
21 |     Q = np.random.rand(nStates,nActions)*0.1-0.05
22 |     mu = 0.7
23 |     gamma = 0.4
24 |     epsilon = 0.1
25 |     nits = 0
26 | 
27 |     while nits < 1000:
28 |         # Pick initial state
29 |         s = np.random.randint(nStates)
30 |         # epsilon-greedy
31 |         if (np.random.rand()<epsilon):
32 |             indices = np.where(t[s,:]!=0)
33 |             pick = np.random.randint(np.shape(indices)[1])
34 |             a = indices[0][pick]
35 |         else:
36 |             a = np.argmax(Q[s,:])
37 |                 
38 |         # Stop when the accepting state is reached
39 |         while s!=5:
40 |             r = R[s,a]
41 |             # For this example, new state is the chosen action
42 |             sprime = a
43 |             
44 |             # epsilon-greedy
45 |             if (np.random.rand()<epsilon):
46 |                 indices = np.where(t[sprime,:]!=0)
47 |                 pick = np.random.randint(np.shape(indices)[1])
48 |                 aprime = indices[0][pick]
49 |                 #print s,a
50 |             else:
51 |                 aprime = np.argmax(Q[sprime,:])
52 |             #print "here", Q[sprime,aprime], Q[s,a], s, a
53 |             
54 |             Q[s,a] += mu * (r + gamma*Q[sprime,aprime] - Q[s,a])
55 | 
56 |             s = sprime
57 |             a = aprime
58 |             
59 |         nits = nits+1
60 | 
61 |     print Q
62 | 
63 | SARSA()
64 | 


--------------------------------------------------------------------------------
/Ch11/SARSA_cliff.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # The basic SARSA algorithm with the Cliff example
 12 | 
 13 | import numpy as np
 14 | 
 15 | def SARSA_cliff():
 16 | 
 17 |     R = -np.ones((4,7,4))
 18 |     R[0,:,0] = -np.inf
 19 |     R[0,6,0] = 0
 20 |     R[:,0,3] = -np.inf
 21 |     R[3,:,2] = -np.inf
 22 |     R[:,6,1] = -np.inf
 23 |     R[1,1:6,0] = -100
 24 |     R[0,0,1] = -100
 25 |     R[0,6,3] = -100
 26 |     
 27 |     t = np.zeros((4,7,4,2))
 28 |     for i in range(4):
 29 |         for j in range(7):
 30 |             for k in range(4):
 31 |                 if k==2:
 32 |                     if i<3:
 33 |                         t[i,j,k,0] = i+1
 34 |                         t[i,j,k,1] = j
 35 |                     else:
 36 |                         t[i,j,k,0] = i
 37 |                         t[i,j,k,1] = j                  
 38 |                 elif k==1:
 39 |                     if j<6:
 40 |                         t[i,j,k,0] = i
 41 |                         t[i,j,k,1] = j+1
 42 |                     else:
 43 |                         t[i,j,k,0] = i
 44 |                         t[i,j,k,1] = j 
 45 |                     if i==0 and j==0:
 46 |                         t[i,j,k,0] = 0
 47 |                         t[i,j,k,1] = 0   
 48 |                 elif k==0:
 49 |                     if i==0 and j==6:
 50 |                         # Finished
 51 |                         t[i,j,k,0] = 0
 52 |                         t[i,j,k,1] = 0
 53 |                     if i>0:
 54 |                         t[i,j,k,0] = i-1
 55 |                         t[i,j,k,1] = j
 56 |                     else:
 57 |                         t[i,j,k,0] = i
 58 |                         t[i,j,k,1] = j
 59 |                     
 60 |                     if i==1 and 1<=j<=5:
 61 |                         t[i,j,k,0] = 0
 62 |                         t[i,j,k,1] = 0 
 63 |                 else:
 64 |                     if j>0:
 65 |                         t[i,j,k,0] = i
 66 |                         t[i,j,k,1] = j-1
 67 |                     else:
 68 |                         t[i,j,k,0] = i
 69 |                         t[i,j,k,1] = j
 70 |                     if i==0 and j==6:
 71 |                         t[i,j,k,0] = 0
 72 |                         t[i,j,k,1] = 0
 73 |     
 74 |     #print t[:,:,3,0] ,t[:,:,3,1]
 75 |     
 76 |     Q = np.random.random_sample(np.shape(R))*0.1-0.05
 77 |     mu = 0.7
 78 |     gamma = 0.4
 79 |     epsilon = 0.05
 80 |     nits = 0
 81 | 
 82 |     while nits < 1000:
 83 |         # Pick initial state
 84 |         s = np.array([0,0]) #np.array([np.random.randint(4),np.random.randint(7)])
 85 |         
 86 |         r=-np.inf
 87 |         while r==-np.inf:
 88 |             # epsilon-greedy
 89 |             if (np.random.rand()<epsilon):
 90 |                 a = np.random.randint(4)
 91 |             else:
 92 |                 a = np.argmax(Q[s[0],s[1],:])
 93 |             r = R[s[0],s[1],a]
 94 | 
 95 |         #print s, np.shape(s)
 96 |         #print np.shape(Q), np.shape(Q[s[0],s[1],:])
 97 |         inEpisode = 1
 98 |         # Stop when the accepting state is reached
 99 |         while inEpisode:
100 |             r = R[s[0],s[1],a]
101 |             #print "r = ", r
102 |             sprime = t[s[0],s[1],a,:]
103 |             #print "sprime",sprime
104 | 
105 |             rprime=-np.inf
106 |             while rprime==-np.inf:            
107 |                 # epsilon-greedy
108 |                 if (np.random.rand()<epsilon):
109 |                     aprime = np.random.randint(4)
110 |                 else:
111 |                     aprime = np.argmax(Q[sprime[0],sprime[1],:])
112 |                 rprime = R[sprime[0],sprime[1],aprime]
113 | 
114 |             #print aprime
115 |             #print "here", Q[sprime[0],sprime[1],aprime], Q[s[0],s[1],a], s, a
116 |             
117 |             Q[s[0],s[1],a] += mu * (r + gamma*Q[sprime[0],sprime[1],aprime] - Q[s[0],s[1],a])
118 |             #print "there"
119 |             s = sprime
120 |             a = aprime
121 |             r = rprime
122 |             if s[0]==0 and s[1]==6 and a==0:
123 |                 # Have reached endpoint
124 |                 inEpisode = 0
125 |         nits = nits+1
126 |         print nits
127 |     print Q
128 |     return Q, R, t
129 | 
130 | def SARSAgo(Q,R,t):
131 |     s = np.array([0,0])
132 |     rtotal = 0
133 |     finished = 0
134 |     epsilon = 0.05
135 |     while not(finished):
136 |         r=-np.inf
137 |         while r==-np.inf:
138 |             # epsilon-greedy
139 |             if (np.random.rand()<epsilon):
140 |                 a = np.random.randint(4)
141 |             else:
142 |                 a = np.argmax(Q[s[0],s[1],:])
143 |             r = R[s[0],s[1],a]
144 |         s = t[s[0],s[1],a,:]
145 |         #print s
146 |         rtotal += r
147 |         if s[0]==0 and s[1]==6 and a==0:
148 |             finished = 1
149 |     print "Total cost = ",rtotal
150 |     return rtotal
151 |     
152 | Q,R,t = SARSA_cliff()
153 | cost = SARSAgo(Q,R,t)
154 | cost = SARSAgo(Q,R,t)
155 | cost = SARSAgo(Q,R,t)
156 | cost = SARSAgo(Q,R,t)
157 | cost = SARSAgo(Q,R,t)
158 | 


--------------------------------------------------------------------------------
/Ch11/TDZero.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The basic TD(0) algorithm with the Europe example
12 | 
13 | import numpy as np
14 | 
15 | def TDZero():
16 | 
17 | 	R = np.array([[-5,0,-np.inf,-np.inf,-np.inf,-np.inf],[0,-5,0,0,-np.inf,-np.inf],[-np.inf,0,-5,0,-np.inf,100],[-np.inf,0,0,-5,0,-np.inf],[-np.inf,-np.inf,-np.inf,0,-5,100],[-np.inf,-np.inf,0,-np.inf,-np.inf,0]])
18 | 	t = np.array([[1,1,0,0,0,0],[1,1,1,1,0,0],[0,1,1,1,0,1],[0,1,1,1,1,0],[0,0,0,1,1,1],[0,0,1,0,1,1]])
19 | 
20 | 	nStates = np.shape(R)[0]
21 | 	nActions = np.shape(R)[1]
22 | 	Q = np.random.rand(nStates,nActions)*0.1-0.05
23 | 	mu = 0.7
24 | 	gamma = 0.4
25 | 	epsilon = 0.1
26 | 	nits = 0
27 | 
28 | 	while nits < 1000:
29 | 		# Pick initial state
30 | 		s = np.random.randint(nStates)
31 | 		# Stop when the accepting state is reached
32 | 		while s!=5:
33 | 			# epsilon-greedy
34 | 			if (np.random.rand()<epsilon):
35 | 				indices = np.where(t[s,:]!=0)
36 | 				pick = np.random.randint(np.shape(indices)[1])
37 | 				a = indices[0][pick]
38 | 				#print s,a
39 | 			else:
40 | 				a = np.argmax(Q[s,:])
41 | 
42 | 			r = R[s,a]
43 | 			# For this example, new state is the chosen action
44 | 			sprime = a
45 | 			#print "here"
46 | 			Q[s,a] += mu * (r + gamma*np.max(Q[sprime,:]) - Q[s,a])
47 | 			s = sprime
48 | 
49 | 		nits = nits+1
50 | 
51 | 	print Q
52 | 
53 | TDZero()
54 | 


--------------------------------------------------------------------------------
/Ch11/TDZero_cliff.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # The basic TD(0) algorithm with the Cliff example
 12 | 
 13 | import numpy as np
 14 | 
 15 | def TDZero_cliff():
 16 | 
 17 |     R = -np.ones((4,7,4))
 18 |     R[0,:,0] = -np.inf
 19 |     R[:,0,3] = -np.inf
 20 |     R[3,:,2] = -np.inf
 21 |     R[:,6,1] = -np.inf
 22 |     R[1,1:6,0] = -100
 23 |     R[0,0,1] = -100
 24 |     R[0,6,3] = -100
 25 |     
 26 |     t = np.zeros((4,7,4,2))
 27 |     for i in range(4):
 28 |         for j in range(7):
 29 |             for k in range(4):
 30 |                 if k==2:
 31 |                     if i<3:
 32 |                         t[i,j,k,0] = i+1
 33 |                         t[i,j,k,1] = j
 34 |                     else:
 35 |                         t[i,j,k,0] = i
 36 |                         t[i,j,k,1] = j                  
 37 |                 elif k==1:
 38 |                     if j<6:
 39 |                         t[i,j,k,0] = i
 40 |                         t[i,j,k,1] = j+1
 41 |                     else:
 42 |                         t[i,j,k,0] = i
 43 |                         t[i,j,k,1] = j 
 44 |                     if i==0 and j==0:
 45 |                         t[i,j,k,0] = 0
 46 |                         t[i,j,k,1] = 0   
 47 |                 elif k==0:
 48 |                     if i==0 and j==6:
 49 |                         # Finished
 50 |                         t[i,j,k,0] = 0
 51 |                         t[i,j,k,1] = 0
 52 |                     if i>0:
 53 |                         t[i,j,k,0] = i-1
 54 |                         t[i,j,k,1] = j
 55 |                     else:
 56 |                         t[i,j,k,0] = i
 57 |                         t[i,j,k,1] = j
 58 |                     
 59 |                     if i==1 and 1<=j<=5:
 60 |                         t[i,j,k,0] = 0
 61 |                         t[i,j,k,1] = 0 
 62 |                 else:
 63 |                     if j>0:
 64 |                         t[i,j,k,0] = i
 65 |                         t[i,j,k,1] = j-1
 66 |                     else:
 67 |                         t[i,j,k,0] = i
 68 |                         t[i,j,k,1] = j
 69 |                     if i==0 and j==6:
 70 |                         t[i,j,k,0] = 0
 71 |                         t[i,j,k,1] = 0
 72 |     
 73 |     #print t[:,:,3,0] ,t[:,:,3,1]
 74 |     
 75 |     #Q = np.random.random_sample(np.shape(R))*0.1-0.05
 76 |     Q = np.zeros(np.shape(R))
 77 |     mu = 0.7
 78 |     gamma = 0.4
 79 |     epsilon = 0.05
 80 |     nits = 0
 81 | 
 82 |     while nits < 1000:
 83 |         # Pick initial state
 84 |         s = np.array([0,0]) #np.array([np.random.randint(4),np.random.randint(7)])
 85 | 
 86 |         #print s, np.shape(s)
 87 |         #print np.shape(Q), np.shape(Q[s[0],s[1],:])
 88 |         inEpisode = 1
 89 |         # Stop when the accepting state is reached
 90 |         while inEpisode:
 91 |             r=-np.inf
 92 |             while r==-np.inf:
 93 |                 # epsilon-greedy
 94 |                 if (np.random.rand()<epsilon):
 95 |                     a = np.random.randint(4)
 96 |                 else:
 97 |                     a = np.argmax(Q[s[0],s[1],:])
 98 |             
 99 |                 r = R[s[0],s[1],a]
100 |             #print "r = ", r
101 |             sprime = t[s[0],s[1],a,:]
102 |             #print "sprime",sprime
103 |             
104 |             Q[s[0],s[1],a] += mu * (r + gamma*np.max(Q[sprime[0],sprime[1],:]) - Q[s[0],s[1],a])
105 |             #print "there"
106 |             s = sprime
107 | 
108 |             if s[0]==0 and s[1]==6 and a==0:
109 |                 # Have reached endpoint
110 |                 inEpisode = 0
111 |         nits = nits+1
112 |         print nits
113 |     print Q
114 |     return Q, R, t
115 | 
116 | def TDgo(Q,R,t):
117 |     s = np.array([0,0])
118 |     rtotal = 0
119 |     finished = 0
120 |     epsilon = 0.05
121 |     while not(finished):
122 |         r=-np.inf
123 |         while r==-np.inf:
124 |             # epsilon-greedy
125 |             if (np.random.rand()<epsilon):
126 |                 a = np.random.randint(4)
127 |             else:
128 |                 a = np.argmax(Q[s[0],s[1],:])
129 |             r = R[s[0],s[1],a]
130 |         s = t[s[0],s[1],a,:]
131 |         #print s
132 |         rtotal += r
133 |         if s[0]==0 and s[1]==6 and a==0:
134 |             finished = 1
135 |     print "Total cost = ",rtotal
136 |     return rtotal
137 |     
138 | Q,R,t = TDZero_cliff()
139 | cost = TDgo(Q,R,t)
140 | cost = TDgo(Q,R,t)
141 | cost = TDgo(Q,R,t)
142 | cost = TDgo(Q,R,t)
143 | cost = TDgo(Q,R,t)
144 | 


--------------------------------------------------------------------------------
/Ch12/party.data:
--------------------------------------------------------------------------------
 1 | Deadline,Party,Lazy,Activity
 2 | Urgent,Yes,Yes,Party
 3 | Urgent,No,Yes,Study
 4 | Near,Yes,Yes,Party
 5 | None,Yes,No,Party
 6 | None,No,Yes,Pub
 7 | None,Yes,No,Party
 8 | Near,No,No,Study
 9 | Near,No,Yes,TV
10 | Near,Yes,Yes,Party
11 | Urgent,No,No,Study
12 | 


--------------------------------------------------------------------------------
/Ch12/party.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 12 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Code to run the decision tree on the Party dataset
12 | import dtree
13 | 
14 | tree = dtree.dtree()
15 | party,classes,features = tree.read_data('party.data')
16 | t=tree.make_tree(party,classes,features)
17 | tree.printTree(t,' ')
18 | 
19 | print tree.classifyAll(t,party)
20 | 
21 | for i in range(len(party)):
22 |     tree.classify(t,party[i])
23 | 
24 | 
25 | print "True Classes"
26 | print classes
27 | 


--------------------------------------------------------------------------------
/Ch13/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jpub/MLGuide/e1956f4e372ec0ed6f482e2c864811aedbb1744f/Ch13/.DS_Store


--------------------------------------------------------------------------------
/Ch13/bagging.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | import dtree
13 | 
14 | class bagger:
15 | 
16 | 	"""The bagging algorithm based on the decision tree of Chapter 6"""
17 | 	def __init__(self):
18 | 		""" Constructor """
19 | 		self.tree = dtree.dtree()
20 | 		
21 | 	def bag(self,data,targets,features,nSamples):
22 | 	
23 | 		nPoints = np.shape(data)[0]
24 | 		nDim = np.shape(data)[1]
25 | 		self.nSamples = nSamples
26 | 		
27 | 		# Compute bootstrap samples
28 | 		samplePoints = np.random.randint(0,nPoints,(nPoints,nSamples))
29 | 		classifiers = []
30 | 		
31 | 		for i in range(nSamples):
32 | 			sample = []
33 | 			sampleTarget = []
34 | 			for j in range(nPoints):
35 | 				sample.append(data[samplePoints[j,i]])
36 | 				sampleTarget.append(targets[samplePoints[j,i]])
37 | 			# Train classifiers
38 | 			classifiers.append(self.tree.make_tree(sample,sampleTarget,features,1))
39 | 
40 | 		return classifiers
41 | 	
42 | 	def bagclass(self,classifiers,data):
43 | 		
44 | 		decision = []
45 | 		# Majority voting
46 | 		for j in range(len(data)):
47 | 			outputs = []
48 | 			#print data[j]
49 | 			for i in range(self.nSamples):
50 | 				out = self.tree.classify(classifiers[i],data[j])
51 | 				if out is not None:
52 | 					outputs.append(out)
53 | 			# List the possible outputs
54 | 			out = []
55 | 			for each in outputs:
56 | 				if out.count(each)==0:
57 | 					out.append(each)
58 | 			frequency = np.zeros(len(out))
59 | 		
60 | 			index = 0
61 | 			if len(out)>0:
62 | 				for each in out:
63 | 					frequency[index] = outputs.count(each)
64 | 					index += 1
65 | 				decision.append(out[frequency.argmax()])
66 | 			else:
67 | 				decision.append(None)
68 | 		return decision
69 | 


--------------------------------------------------------------------------------
/Ch13/car.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # An example of bagging on the Car Safety dataset
 12 | import numpy as np
 13 | import dtree
 14 | import bagging
 15 | import randomforest
 16 | 
 17 | tree = dtree.dtree()
 18 | bagger = bagging.bagger()
 19 | forest = randomforest.randomforest()
 20 | 
 21 | data,classes,features = tree.read_data('car.data')
 22 | 
 23 | train = data[::2][:]
 24 | test = data[1::2][:]
 25 | trainc = classes[::2]
 26 | testc = classes[1::2]
 27 | 
 28 | t=tree.make_tree(train,trainc,features)
 29 | out = tree.classifyAll(t,test)
 30 | tree.printTree(t,' ')
 31 |  
 32 | a = np.zeros(len(out))
 33 | b = np.zeros(len(out))
 34 | d = np.zeros(len(out))
 35 |  
 36 | for i in range(len(out)):
 37 |     if testc[i] == 'good' or testc[i]== 'v-good':
 38 |         b[i] = 1
 39 |         if out[i] == testc[i]:
 40 |             d[i] = 1
 41 |     if out[i] == testc[i]:
 42 |         a[i] = 1
 43 |  
 44 | print "Tree"
 45 | print "Number correctly predicted",np.sum(a)
 46 | print "Number of testpoints ",len(a)
 47 | print "Percentage Accuracy ",np.sum(a)/len(a)*100.0
 48 | print ""
 49 | print "Number of cars rated as good or very good", np.sum(b)
 50 | print "Number correctly identified as good or very good",np.sum(d) 
 51 | print "Percentage Accuracy",np.sum(d)/np.sum(b)*100.0
 52 |  
 53 | c=bagger.bag(train,trainc,features,100)
 54 | out = bagger.bagclass(c,test)
 55 |  
 56 | a = np.zeros(len(out))
 57 | b = np.zeros(len(out))
 58 | d = np.zeros(len(out))
 59 |  
 60 | for i in range(len(out)):
 61 |     if testc[i] == 'good' or testc[i]== 'v-good':
 62 |         b[i] = 1
 63 |         if out[i] == testc[i]:
 64 |             d[i] = 1
 65 |     if out[i] == testc[i]:
 66 |         a[i] = 1
 67 | print "-----"
 68 | print "Bagger"
 69 | print "Number correctly predicted",np.sum(a)
 70 | print "Number of testpoints ",len(a)
 71 | print "Percentage Accuracy ",np.sum(a)/len(a)*100.0
 72 | print ""
 73 | print "Number of cars rated as good or very good", np.sum(b)
 74 | print "Number correctly identified as good or very good",np.sum(d) 
 75 | print "Percentage Accuracy",np.sum(d)/np.sum(b)*100.0
 76 | 
 77 | f=f = forest.rf(train,trainc,features,100,200,2)
 78 | out = forest.rfclass(f,test)
 79 | 
 80 | a = np.zeros(len(out))
 81 | b = np.zeros(len(out))
 82 | d = np.zeros(len(out))
 83 | 
 84 | for i in range(len(out)):
 85 |     if testc[i] == 'good' or testc[i]== 'v-good':
 86 |         b[i] = 1
 87 |         if out[i] == testc[i]:
 88 |             d[i] = 1
 89 |     if out[i] == testc[i]:
 90 |         a[i] = 1
 91 | print "-----"
 92 | print "Forest"
 93 | print "Number correctly predicted",np.sum(a)
 94 | print "Number of testpoints ",len(a)
 95 | print "Percentage Accuracy ",np.sum(a)/len(a)*100.0
 96 | print ""
 97 | print "Number of cars rated as good or very good", np.sum(b)
 98 | print "Number correctly identified as good or very good",np.sum(d) 
 99 | print "Percentage Accuracy",np.sum(d)/np.sum(b)*100.0
100 | 


--------------------------------------------------------------------------------
/Ch13/car1.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import dtree
12 | import randomforest
13 | tree = dtree.dtree()
14 | forest = randomforest.randomforest()
15 | data,classes,features = tree.read_data('car.data')
16 | train = data[::2][:]
17 | test = data[1::2][:]
18 | trainc = classes[::2]
19 | testc = classes[1::2]
20 | f=f = forest.rf(train,trainc,features,50,100,2,maxlevel=3)
21 | #f=f = forest.rf(train,trainc,features,100,200,2)
22 | out = forest.rfclass(f,test)
23 | 
24 | import numpy as np
25 | 
26 | a = np.zeros(len(out))
27 | b = np.zeros(len(out))
28 | d = np.zeros(len(out))
29 | 
30 | for i in range(len(out)):
31 |     if testc[i] == 'good' or testc[i]== 'v-good':
32 |         b[i] = 1
33 |         if out[i] == testc[i]:
34 |             d[i] = 1
35 |     if out[i] == testc[i]:
36 |         a[i] = 1
37 | print "-----"
38 | print "Forest"
39 | print "Number correctly predicted",np.sum(a)
40 | print "Number of testpoints ",len(a)
41 | print "Percentage Accuracy ",np.sum(a)/len(a)*100.0
42 | print ""
43 | print "Number of cars rated as good or very good", np.sum(b)
44 | print "Number correctly identified as good or very good",np.sum(d) 
45 | print "Percentage Accuracy",np.sum(d)/np.sum(b)*100.0
46 | 


--------------------------------------------------------------------------------
/Ch13/mushroom.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Comparison of stumping and bagging on the mushroom dataset
12 | import numpy as np
13 | import dtw
14 | import bagging
15 | import randomforest
16 | 
17 | tree = dtw.dtree()
18 | bagger = bagging.bagger()
19 | forest = randomforest.randomforest()
20 | mushroom,classes,features = tree.read_data('agaricus-lepiota.data')
21 | 
22 | w = np.ones((np.shape(mushroom)[0]),dtype = float)/np.shape(mushroom)[0]
23 | 
24 | f = forest.rf(mushroom,classes,features,10,7,2)
25 | print forest.rfclass(f,mushroom)
26 | 
27 | t=tree.make_tree(mushroom,w,classes,features,1)
28 | tree.printTree(t,' ')
29 | 
30 | print "Tree Stump Prediction"
31 | print tree.classifyAll(t,mushroom)
32 | print "True Classes"
33 | print classes
34 | 
35 | c=bagger.bag(mushroom,classes,features,20)
36 | print "Bagged Results"
37 | print bagger.bagclass(c,mushroom)
38 | 


--------------------------------------------------------------------------------
/Ch13/party.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Comparison of stumping and bagging on the Party dataset
12 | import numpy as np
13 | #import dtree
14 | import dtw
15 | import bagging
16 | import randomforest
17 | 
18 | tree = dtw.dtree()
19 | #tree = dtree.dtree()
20 | bagger = bagging.bagger()
21 | forest = randomforest.randomforest()
22 | party,classes,features = tree.read_data('../6 Trees/party.data')
23 | 
24 | #w = np.random.rand((np.shape(party)[0]))/np.shape(party)[0]
25 | w = np.ones((np.shape(party)[0]),dtype = float)/np.shape(party)[0]
26 | 
27 | f = forest.rf(party,classes,features,10,7,2,maxlevel=2)
28 | print "RF prediction"
29 | print forest.rfclass(f,party)
30 | 
31 | #t=tree.make_tree(party,classes,features)
32 | t=tree.make_tree(party,w,classes,features)
33 | #tree.printTree(t,' ')
34 | print "Decision Tree prediction"
35 | print tree.classifyAll(t,party)
36 | 
37 | print "Tree Stump Prediction"
38 | print tree.classifyAll(t,party)
39 | 
40 | c=bagger.bag(party,classes,features,20)
41 | print "Bagged Results"
42 | print bagger.bagclass(c,party)
43 | 
44 | print "True Classes"
45 | print classes
46 | 


--------------------------------------------------------------------------------
/Ch13/randomforest.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 13 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2014
10 | 
11 | import numpy as np
12 | import dtree
13 | 
14 | class randomforest:
15 | 
16 |     """The random forest algorithm based on the decision tree of Chapter 6"""
17 |     def __init__(self):
18 |         """ Constructor """
19 |         self.tree = dtree.dtree()
20 | 
21 |         
22 |     def rf(self,data,targets,features,nTrees,nSamples,nFeatures,maxlevel=5):
23 |     
24 |         nPoints = np.shape(data)[0]
25 |         nDim = np.shape(data)[1]
26 |         self.nSamples = nSamples
27 |         self.nTrees = nTrees
28 |                  
29 |         classifiers = []
30 |    
31 |         for i in range(nTrees):
32 | 	    print i
33 |             # Compute bootstrap samples
34 |             samplePoints = np.random.randint(0,nPoints,(nPoints,nSamples))
35 |         
36 |             for j in range(nSamples):
37 |                 sample = []
38 |                 sampleTarget = []
39 |                 for k in range(nPoints):
40 |                     sample.append(data[samplePoints[k,j]])
41 |                     sampleTarget.append(targets[samplePoints[k,j]])
42 |             # Train classifiers
43 |             classifiers.append(self.tree.make_tree(sample,sampleTarget,features,maxlevel,forest=nFeatures))
44 |         return classifiers
45 |     
46 |     def rfclass(self,classifiers,data):
47 |         
48 |         decision = []
49 |         # Majority voting
50 |         for j in range(len(data)):
51 |             outputs = []
52 |             #print data[j]
53 |             for i in range(self.nTrees):
54 |                 out = self.tree.classify(classifiers[i],data[j])
55 |                 if out is not None:
56 |                     outputs.append(out)
57 |             # List the possible outputs
58 |             out = []
59 |             for each in outputs:
60 |                 if out.count(each)==0:
61 |                     out.append(each)
62 |             frequency = np.zeros(len(out))
63 |         
64 |             index = 0
65 |             if len(out)>0:
66 |                 for each in out:
67 |                     frequency[index] = outputs.count(each)
68 |                     index += 1
69 |                 decision.append(out[frequency.argmax()])
70 |             else:
71 |                 decision.append(None)
72 |         return decision
73 | 


--------------------------------------------------------------------------------
/Ch14/iris.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 14 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Examples of using the k-means and SOM algorithms on the Iris dataset
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | iris = np.loadtxt('../3 MLP/iris_proc.data',delimiter=',')
17 | iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
18 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
19 | iris[:,:4] = iris[:,:4]/imax[:4]
20 | 
21 | target = iris[:,4]
22 | 
23 | order = range(np.shape(iris)[0])
24 | np.random.shuffle(order)
25 | iris = iris[order,:]
26 | target = target[order]
27 | 
28 | train = iris[::2,0:4]
29 | traint = target[::2]
30 | valid = iris[1::4,0:4]
31 | validt = target[1::4]
32 | test = iris[3::4,0:4]
33 | testt = target[3::4]
34 | 
35 | #print train.max(axis=0), train.min(axis=0)
36 | 
37 | import kmeansnet
38 | #import kmeans as kmeansnet
39 | net = kmeansnet.kmeans(3,train)
40 | net.kmeanstrain(train)
41 | cluster = net.kmeansfwd(test)
42 | print 1.*cluster
43 | print iris[3::4,4]
44 | 
45 | import som
46 | net = som.som(6,6,train)
47 | net.somtrain(train,400)
48 | 
49 | best = np.zeros(np.shape(train)[0],dtype=int)
50 | for i in range(np.shape(train)[0]):
51 |     best[i],activation = net.somfwd(train[i,:])
52 | 
53 | pl.plot(net.map[0,:],net.map[1,:],'k.',ms=15)
54 | where = pl.find(traint == 0)
55 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30)
56 | where = pl.find(traint == 1)
57 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30)
58 | where = pl.find(traint == 2)
59 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'b^',ms=30)
60 | pl.axis([-0.1,1.1,-0.1,1.1])
61 | pl.axis('off')
62 | pl.figure(2)
63 | 
64 | best = np.zeros(np.shape(test)[0],dtype=int)
65 | for i in range(np.shape(test)[0]):
66 |     best[i],activation = net.somfwd(test[i,:])
67 | 
68 | pl.plot(net.map[0,:],net.map[1,:],'k.',ms=15)
69 | where = pl.find(testt == 0)
70 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30)
71 | where = pl.find(testt == 1)
72 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30)
73 | where = pl.find(testt == 2)
74 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'b^',ms=30)
75 | pl.axis([-0.1,1.1,-0.1,1.1])
76 | pl.axis('off')
77 | pl.show()
78 | 


--------------------------------------------------------------------------------
/Ch14/iris_somperc.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 14 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Examples of using the k-means and SOM algorithms on the Iris dataset
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | iris = np.loadtxt('../3 MLP/iris_proc.data',delimiter=',')
17 | iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
18 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
19 | iris[:,:4] = iris[:,:4]/imax[:4]
20 | 
21 | target = iris[:,4]
22 | 
23 | order = range(np.shape(iris)[0])
24 | np.random.shuffle(order)
25 | iris = iris[order,:]
26 | target = target[order]
27 | 
28 | train = iris[::2,0:4]
29 | traint = target[::2]
30 | valid = iris[1::4,0:4]
31 | validt = target[1::4]
32 | test = iris[3::4,0:4]
33 | testt = target[3::4]
34 | 
35 | #print train.max(axis=0), train.min(axis=0)
36 | 
37 | import som
38 | net = som.som(6,6,train)
39 | net.somtrain(train,400)
40 | 
41 | best = np.zeros(np.shape(train)[0],dtype=int)
42 | for i in range(np.shape(train)[0]):
43 |     best[i],activation = net.somfwd(train[i,:])
44 | 
45 | pl.plot(net.map[0,:],net.map[1,:],'k.',ms=15)
46 | where = pl.find(traint == 0)
47 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30)
48 | where = pl.find(traint == 1)
49 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30)
50 | where = pl.find(traint == 2)
51 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'b^',ms=30)
52 | pl.axis([-0.1,1.1,-0.1,1.1])
53 | pl.axis('off')
54 | pl.figure(2)
55 | 
56 | best = np.zeros(np.shape(test)[0],dtype=int)
57 | for i in range(np.shape(test)[0]):
58 |     best[i],activation = net.somfwd(test[i,:])
59 | 
60 | pl.plot(net.map[0,:],net.map[1,:],'k.',ms=15)
61 | where = pl.find(testt == 0)
62 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30)
63 | where = pl.find(testt == 1)
64 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30)
65 | where = pl.find(testt == 2)
66 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'b^',ms=30)
67 | pl.axis([-0.1,1.1,-0.1,1.1])
68 | pl.axis('off')
69 | pl.show()
70 | 


--------------------------------------------------------------------------------
/Ch14/kmeans.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 14 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | 
13 | class kmeans:
14 | 	""" The k-Means algorithm"""
15 | 	def __init__(self,k,data):
16 | 
17 | 		self.nData = np.shape(data)[0]
18 | 		self.nDim = np.shape(data)[1]
19 | 		self.k = k
20 | 		
21 | 	def kmeanstrain(self,data,maxIterations=10):
22 | 		
23 | 		# Find the minimum and maximum values for each feature
24 | 		minima = data.min(axis=0)
25 | 		maxima = data.max(axis=0)
26 | 	
27 | 		# Pick the centre locations randomly
28 | 		self.centres = np.random.rand(self.k,self.nDim)*(maxima-minima)+minima
29 | 		oldCentres = np.random.rand(self.k,self.nDim)*(maxima-minima)+minima
30 | 	
31 | 		count = 0
32 | 		#print centres
33 | 		while np.sum(np.sum(oldCentres-self.centres))!= 0 and count<maxIterations:
34 | 	
35 | 			oldCentres = self.centres.copy()
36 | 			count += 1
37 | 	
38 | 			# Compute distances
39 | 			distances = np.ones((1,self.nData))*np.sum((data-self.centres[0,:])**2,axis=1)
40 | 			for j in range(self.k-1):
41 | 				distances = np.append(distances,np.ones((1,self.nData))*np.sum((data-self.centres[j+1,:])**2,axis=1),axis=0)
42 | 	
43 | 			# Identify the closest cluster
44 | 			cluster = distances.argmin(axis=0)
45 | 			cluster = np.transpose(cluster*np.ones((1,self.nData)))
46 | 	
47 | 			# Update the cluster centres	
48 | 			for j in range(self.k):
49 | 				thisCluster = np.where(cluster==j,1,0)
50 | 				if sum(thisCluster)>0:
51 | 					self.centres[j,:] = np.sum(data*thisCluster,axis=0)/np.sum(thisCluster)
52 | 			#plot(data[:,0],data[:,1],'kx')
53 | 			#plot(centres[:,0],centres[:,1],'ro')
54 | 		return self.centres
55 | 	
56 | 	def kmeansfwd(self,data):
57 | 		
58 | 		nData = np.shape(data)[0]
59 | 		# Compute distances
60 | 		distances = np.ones((1,nData))*np.sum((data-self.centres[0,:])**2,axis=1)
61 | 		for j in range(self.k-1):
62 | 			distances = np.append(distances,np.ones((1,nData))*np.sum((data-self.centres[j+1,:])**2,axis=1),axis=0)
63 | 	
64 | 		# Identify the closest cluster
65 | 		cluster = distances.argmin(axis=0)
66 | 		cluster = np.transpose(cluster*np.ones((1,nData)))
67 | 	
68 | 		return cluster
69 | 


--------------------------------------------------------------------------------
/Ch14/kmeansnet.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 14 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | 
13 | class kmeans:
14 |     """The k-Means Algorithm implemented as a neural network"""
15 |     def __init__(self,k,data,nEpochs=1000,eta=0.25):
16 | 
17 |         self.nData = np.shape(data)[0]
18 |         self.nDim = np.shape(data)[1]
19 |         self.k = k
20 |         self.nEpochs = nEpochs
21 |         self.weights = np.random.rand(self.nDim,self.k)
22 |         self.eta = eta
23 |         
24 |     def kmeanstrain(self,data):
25 |         # Preprocess data (won't work if (0,0,...0) is in data)
26 |         normalisers = np.sqrt(np.sum(data**2,axis=1))*np.ones((1,np.shape(data)[0]))
27 |         data = np.transpose(np.transpose(data)/normalisers)
28 | 
29 |         for i in range(self.nEpochs):
30 |             for j in range(self.nData):
31 |                 activation = np.sum(self.weights*np.transpose(data[j:j+1,:]),axis=0)
32 |                 winner = np.argmax(activation)
33 |                 self.weights[:,winner] += self.eta * data[j,:] - self.weights[:,winner]            
34 |             
35 |     def kmeansfwd(self,data):
36 |         best = np.zeros(np.shape(data)[0])
37 |         for i in range(np.shape(data)[0]):
38 |             activation = np.sum(self.weights*np.transpose(data[i:i+1,:]),axis=0)
39 |             best[i] = np.argmax(activation)
40 |         return best
41 |     
42 | 


--------------------------------------------------------------------------------
/Ch14/moredemos.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 14 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Demonstration of the SOM algorithm on the Wine dataset (and the e-coli dataset)
12 | import pylab as pl
13 | import numpy as np
14 | import som
15 | 
16 | wine = np.loadtxt('wine.data',delimiter=',')
17 | 
18 | classes = wine[:,0]
19 | data = wine[:,1:]
20 | data -= np.mean(data,axis=0)
21 | data /= data.max(axis=0)
22 | 
23 | #ecoli = loadtxt('shortecoli.dat')
24 | #classes = ecoli[:,7:]
25 | #data = ecoli[:,:7]
26 | #data -= mean(data,axis=0)
27 | #data /= data.max(axis=0)
28 | 
29 | order = range(shape(data)[0])
30 | np.random.shuffle(order)
31 | split = int(np.round(np.shape(data)[0]/2))
32 | train = data[order[:split],:]
33 | target = classes[order[:split],:]
34 | 
35 | test = data[order[split:],:]
36 | ttarget = classes[order[:split],:]
37 | 
38 | net = som.som(15,15,train,eta_b=0.3,eta_n=0.1,nSize=0.5,alpha=1,usePCA=1,useBCs=1,eta_bfinal=0.03,eta_nfinal=0.01,nSizefinal=0.05)
39 | net.somtrain(train,12000)
40 | 
41 | best = np.zeros(shape(test)[0],dtype=int)
42 | 
43 | for i in range(shape(test)[0]):
44 |     best[i],activation = net.somfwd(train[i,:])
45 | 
46 | #print best
47 | #print ttarget
48 | 
49 | pl.plot(net.map[0,:],net.map[1,:],'k.',ms=15)
50 | where = pl.find(target == 0)
51 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30)
52 | where = pl.find(target == 1)
53 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30)
54 | where = pl.find(target == 2)
55 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'b^',ms=30)
56 | pl.axis([-0.1,1.1,-0.1,1.1])
57 | pl.axis('off')
58 | 
59 | pl.figure(2)
60 | best = np.zeros(shape(test)[0],dtype=int)
61 | 
62 | for i in range(shape(test)[0]):
63 |     best[i],activation = net.somfwd(test[i,:])
64 | 
65 | pl.plot(net.map[0,:],net.map[1,:],'k.',ms=15)
66 | where = pl.find(ttarget == 0)
67 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'rs',ms=30)
68 | where = pl.find(ttarget == 1)
69 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'gv',ms=30)
70 | where = pl.find(ttarget == 2)
71 | pl.plot(net.map[0,best[where]],net.map[1,best[where]],'b^',ms=30)
72 | pl.axis([-0.1,1.1,-0.1,1.1])
73 | pl.axis('off')
74 | 
75 | pl.show()
76 | 


--------------------------------------------------------------------------------
/Ch14/som.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 14 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | import pca
13 | 
14 | class som:
15 | 	"""A Basic 2D Self-Organising Map
16 | 	The map connections can be initialised randomly or with PCA"""
17 | 	def __init__(self,x,y,inputs,eta_b=0.3,eta_n=0.1,nSize=0.5,alpha=1,usePCA=1,useBCs=0,eta_bfinal=0.03,eta_nfinal=0.01,nSizefinal=0.05):
18 | 		self.nData = np.shape(inputs)[0]
19 | 		self.nDim = np.shape(inputs)[1]
20 | 		self.mapDim = 2
21 | 		
22 | 		self.x = x
23 | 		self.y = y
24 | 		self.eta_b = eta_b
25 | 		self.eta_bfinal = eta_bfinal
26 | 		self.eta_n = eta_n
27 | 		self.eta_nfinal = eta_nfinal
28 | 		self.nSize = nSize
29 | 		self.nSizefinal = nSizefinal
30 | 		self.alpha = alpha
31 | 
32 | 		self.map = np.mgrid[0:1:np.complex(0,x),0:1:np.complex(0,y)]
33 | 		self.map = np.reshape(self.map,(2,x*y))
34 | 			
35 | 		if usePCA:
36 | 			dummy1,dummy2,evals,evecs = pca.pca(inputs,2)
37 | 			self.weights = np.zeros((self.nDim,x*y))
38 | 			for i in range(x*y):
39 | 				for j in range(self.mapDim):
40 | 					self.weights[:,i] += (self.map[j,i]-0.5)*2*evecs[:,j]			
41 | 		else:
42 | 			self.weights = (np.random.rand(self.nDim,x*y)-0.5)*2	
43 | 		
44 | 		self.mapDist = np.zeros((self.x*self.y,self.x*self.y))
45 | 		if useBCs:
46 | 			for i in range(self.x*self.y):
47 | 				for j in range(i+1,self.x*self.y):
48 | 					xdist = np.min((self.map[0,i]-self.map[0,j])**2,(self.map[0,i]+1+1./self.x-self.map[0,j])**2,(self.map[0,i]-1-1./self.x-self.map[0,j])**2,(self.map[0,i]-self.map[0,j]+1+1./self.x)**2,(self.map[0,i]-self.map[0,j]-1-1./self.x)**2)
49 | 					ydist = np.min((self.map[1,i]-self.map[1,j])**2,(self.map[1,i]+1+1./self.y-self.map[1,j])**2,(self.map[1,i]-1-1./self.y-self.map[1,j])**2,(self.map[1,i]-self.map[1,j]+1+1./self.y)**2,(self.map[1,i]-self.map[1,j]-1-1./self.y)**2)
50 | 					self.mapDist[i,j] = np.sqrt(xdist+ydist)
51 | 					self.mapDist[j,i] = self.mapDist[i,j]				
52 | 		else:
53 | 			for i in range(self.x*self.y):
54 | 				for j in range(i+1,self.x*self.y):
55 | 					self.mapDist[i,j] = np.sqrt((self.map[0,i] - self.map[0,j])**2 + (self.map[1,i] - self.map[1,j])**2)
56 | 					self.mapDist[j,i] = self.mapDist[i,j]
57 | 				
58 | 	def somtrain(self,inputs,nIterations):
59 | 		self.eta_binit = self.eta_b
60 | 		self.eta_ninit = self.eta_n
61 | 		self.nSizeinit = self.nSize
62 | 
63 | 		for iterations in range(nIterations):
64 | 			for i in range(self.nData):
65 | 				#print inputs[i,:]
66 | 				best,activation = self.somfwd(inputs[i,:])
67 | 				# Update the weights of the best match
68 | 				self.weights[:,best] += self.eta_b * (inputs[i,:] - self.weights[:,best])
69 | 				#print self.weights
70 | 				# Find the neighbours and update their weights
71 | 				neighbours = np.where(self.mapDist[best,:]<=self.nSize,1,0)
72 | 				neighbours[best] = 0
73 | 				#print neighbours
74 | 				self.weights += self.eta_n * neighbours*np.transpose((inputs[i,:] - np.transpose(self.weights)))
75 | 				#print self.weights
76 | 			# Modify learning rates
77 | 			self.eta_b = self.eta_binit*np.power(self.eta_bfinal/self.eta_binit,float(iterations)/nIterations)
78 | 			self.eta_n = self.eta_ninit*np.power(self.eta_nfinal/self.eta_ninit,float(iterations)/nIterations)
79 | 		
80 | 			# Modify neighbourhood size
81 | 			self.nSize = self.nSizeinit*np.power(self.nSizefinal/self.nSizeinit,float(iterations)/nIterations)
82 | 	
83 | 	def somfwd(self,inputs):
84 | 		activations = np.sum((np.transpose(np.tile(inputs,(self.x*self.y,1)))-self.weights)**2,axis=0)
85 | 		best = np.argmin(activations)
86 | 		return best,activations
87 | 


--------------------------------------------------------------------------------
/Ch14/somdemo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 14 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A simple example of using the SOM on a 2D dataset showing the neighbourhood connections
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | import som
17 | nNodesEdge = 8
18 | data = (np.random.rand(2000,2)-0.5)*2
19 | 
20 | # Set up the network and decide on parameters
21 | net = som.som(nNodesEdge,nNodesEdge,data,usePCA=0)
22 | step = 0.2
23 | 
24 | pl.figure(1)
25 | pl.plot(data[:,0],data[:,1],'.')
26 | # Train the network for 0 iterations (to get the position of the nodes)
27 | net.somtrain(data,0)
28 | for i in range(net.x*net.y):
29 |     neighbours = np.where(net.mapDist[i,:]<=step)
30 | 
31 |     t = np.zeros((np.shape(neighbours)[1]*2,np.shape(net.weights)[0]))
32 |     t[::2,:] = np.tile(net.weights[:,i],(np.shape(neighbours)[1],1))
33 |     t[1::2,:] = np.transpose(net.weights[:,neighbours[0][:]])
34 |     pl.plot(t[:,0],t[:,1],'g-')
35 | pl.axis('off')
36 | 
37 | pl.figure(2)
38 | pl.plot(data[:,0],data[:,1],'.')
39 | net.somtrain(data,5)
40 | for i in range(net.x*net.y):
41 |     neighbours = np.where(net.mapDist[i,:]<=step)
42 | 
43 |     t = np.zeros((np.shape(neighbours)[1]*2,np.shape(net.weights)[0]))
44 |     t[::2,:] = np.tile(net.weights[:,i],(np.shape(neighbours)[1],1))
45 |     t[1::2,:] = np.transpose(net.weights[:,neighbours[0][:]])
46 |     pl.plot(t[:,0],t[:,1],'g-')
47 | pl.axis([-1,1,-1,1])
48 | pl.axis('off')
49 | 
50 | net.somtrain(data,100)
51 | pl.figure(3)
52 | pl.plot(data[:,0],data[:,1],'.')
53 | for i in range(net.x*net.y):
54 |     neighbours = np.where(net.mapDist[i,:]<=step)
55 |     #print neighbours
56 |     #n = tile(net.weights[:,i],(shape(neighbours)[1],1))
57 |     t = np.zeros((np.shape(neighbours)[1]*2,np.shape(net.weights)[0]))
58 |     t[::2,:] = np.tile(net.weights[:,i],(np.shape(neighbours)[1],1))
59 |     t[1::2,:] = np.transpose(net.weights[:,neighbours[0][:]])
60 |     pl.plot(t[:,0],t[:,1],'g-')
61 |     
62 | #net.somtrain(data,100)
63 | #pl.figure(4)
64 | #pl.plot(data[:,0],data[:,1],'.')
65 | #for i in range(net.x*net.y):
66 | #    neighbours = np.where(net.mapDist[i,:]<=step)
67 | #    #print neighbours
68 | #    #n = np.tile(net.weights[:,i],(np.shape(neighbours)[1],1))
69 | #    t = np.zeros((np.shape(neighbours)[1]*2,np.shape(net.weights)[0]))
70 | #    t[::2,:] = np.tile(net.weights[:,i],(np.shape(neighbours)[1],1))
71 | #    t[1::2,:] = np.transpose(net.weights[:,neighbours[0][:]])
72 | #    pl.plot(t[:,0],t[:,1],'g-')
73 | #    
74 | #net.somtrain(data,100)
75 | #pl.figure(5)
76 | #pl.plot(data[:,0],data[:,1],'.')
77 | #for i in range(net.x*net.y):
78 | #    neighbours = np.where(net.mapDist[i,:]<=step)
79 | #    #print neighbours
80 | #    #n = np.tile(net.weights[:,i],(snp.hape(neighbours)[1],1))
81 | #    t = np.zeros((np.shape(neighbours)[1]*2,np.shape(net.weights)[0]))
82 | #    t[::2,:] = np.tile(net.weights[:,i],(np.shape(neighbours)[1],1))
83 | #    t[1::2,:] = np.transpose(net.weights[:,neighbours[0][:]])
84 | #    pl.plot(t[:,0],t[:,1],'g-')
85 | 
86 | pl.show()
87 | 


--------------------------------------------------------------------------------
/Ch15/BoxMuller.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 15 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Box-Muller algorithm for constructing pseudo-random Gaussian-distributed numbers
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | def boxmuller(n):
17 |     
18 |     x = np.zeros((n,2))
19 |     y = np.zeros((n,2))
20 |     
21 |     for i in range(n):
22 |         x[i,:] = np.array([2,2])
23 |         x2 = x[i,0]*x[i,0]+x[i,1]*x[i,1]
24 |         while (x2)>1:
25 |             x[i,:] = np.random.rand(2)*2-1
26 |             x2 = x[i,0]*x[i,0]+x[i,1]*x[i,1]
27 | 
28 |         y[i,:] = x[i,:] * np.sqrt((-2*np.log(x2))/x2)
29 |     
30 |     y = np.reshape(y,2*n,1)
31 |     return y
32 | 
33 | y = boxmuller(1000)
34 | pl.hist(y,normed=1,fc='k')
35 | x = np.arange(-4,4,0.1)
36 | pl.plot(x,1/np.sqrt(2*np.pi)*np.exp(-0.5*x**2),'k',lw=6)
37 | pl.xlabel('x',fontsize=24)
38 | pl.ylabel('p(x)',fontsize=24)
39 | pl.show()
40 |     
41 |     
42 | 


--------------------------------------------------------------------------------
/Ch15/Gibbs.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 15 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A simple Gibbs sampler
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | def pxgiveny(y,mx,my,s1,s2):
16 |     return np.random.normal(mx + (y-my)/s2,s1)
17 |     #return random.binomial(16,y,1)
18 | 
19 | def pygivenx(x,mx,my,s1,s2):
20 |     return np.random.normal(my + (x-mx)/s1,s2)
21 |     #return random.beta(x+2,16-x+4,1)
22 | 
23 | def gibbs(N=500):
24 |     k=10
25 |     x0 = np.zeros(N,dtype=float)
26 |     m1 = 10
27 |     m2 = 20
28 |     s1 = 2
29 |     s2 = 3
30 |     for i in range(N):
31 |         y = np.random.rand(1)
32 |         for j in range(k):
33 |             x = pxgiveny(y,m1,m2,s1,s2)
34 |             y = pygivenx(x,m1,m2,s1,s2)
35 |         x0[i] = x
36 |     
37 |     return x0
38 | 
39 | #def f(x):
40 | #    n = 16
41 | #    alph = 2
42 | #    bet = 4
43 | #    return 20.0*(np.factorial(n)/(np.factorial(x)*np.factorial(n-x)))*np.factorial(x+1)*np.factorial(19-x)/np.factorial(21)
44 | #
45 | #def factorial(n):
46 | #    x = 1
47 | #    for i in range(n):
48 | #        x *= (i+1)
49 | #    return x
50 | 
51 | def f(x):
52 |     return np.exp(-(x-10)**2/10)
53 | 
54 | N=500
55 | s=gibbs(N)
56 | x1 = np.arange(0,17,1)
57 | pl.hist(s,bins=x1,fc='k')
58 | x1 = np.arange(0,17,0.1)
59 | px1 = np.zeros(len(x1))
60 | for i in range(len(x1)):
61 |     px1[i] = f(x1[i])
62 | pl.plot(x1, px1*N*10/np.sum(px1), color='k',linewidth=3)
63 | 
64 | pl.show()
65 | 


--------------------------------------------------------------------------------
/Ch15/MH.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 15 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Metropolis-Hastings algorithm
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | def p(x):
16 |     mu1 = 3
17 |     mu2 = 10
18 |     v1 = 10
19 |     v2 = 3
20 |     return 0.3*np.exp(-(x-mu1)**2/v1) + 0.7* np.exp(-(x-mu2)**2/v2)
21 | 
22 | def q(x):
23 |     mu = 5
24 |     sigma = 10
25 |     return np.exp(-(x-mu)**2/(sigma**2))
26 | 
27 | stepsize = 0.5
28 | x = np.arange(-10,20,stepsize)
29 | px = np.zeros(np.shape(x))
30 | for i in range(len(x)):
31 |     px[i] = p(x[i])
32 | N = 5000
33 | 
34 | # independence chain
35 | mu = 5
36 | sigma = 10
37 | u = np.random.rand(N)
38 | y = np.zeros(N)
39 | y[0] = np.random.normal(mu,sigma)
40 | for i in range(N-1):
41 |     ynew = np.random.normal(mu,sigma)
42 |     alpha = min(1,p(ynew)*q(y[i])/(p(y[i])*q(ynew)))
43 |     if u[i] < alpha:
44 |         y[i+1] = ynew
45 |     else:
46 |         y[i+1] = y[i]
47 | 
48 | # random walk chain
49 | sigma = 10
50 | u2 = np.random.rand(N)
51 | y2 = np.zeros(N)
52 | y2[0] = np.random.normal(0,sigma)
53 | for i in range(N-1):
54 |     y2new = y2[i] + np.random.normal(0,sigma)
55 |     alpha = min(1,p(y2new)/p(y2[i]))
56 |     if u2[i] < alpha:
57 |         y2[i+1] = y2new
58 |     else:
59 |         y2[i+1] = y2[i]
60 | 
61 | pl.figure(1)
62 | nbins = 30
63 | pl.hist(y, bins = x)
64 | pl.plot(x, px*N/sum(px), color='r', linewidth=2)
65 | 
66 | pl.figure(2)
67 | nbins = 30
68 | pl.hist(y2, bins = x)
69 | pl.plot(x, px*N/sum(px), color='r', linewidth=2)
70 | 
71 | pl.show()
72 | 


--------------------------------------------------------------------------------
/Ch15/SIR.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 15 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Sampling-Importance-Resampling algorithm
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | def p(x):
16 |     return 0.3*np.exp(-(x-0.3)**2) + 0.7* np.exp(-(x-2.)**2/0.3) 
17 | 
18 | def q(x):
19 |     return 4.0
20 | 
21 | def sir(n):
22 |     
23 |     sample1 = np.zeros(n)
24 |     w = np.zeros(n)
25 |     sample2 = np.zeros(n)
26 |     
27 |     # Sample from q
28 |     sample1 = np.random.rand(n)*4
29 | 
30 |     # Compute weights
31 |     w = p(sample1)/q(sample1)
32 |     w /= np.sum(w)
33 | 
34 |     # Sample from sample1 according to w
35 |     cumw = np.zeros(len(w))
36 |     cumw[0] = w[0]
37 |     for i in range(1,len(w)):
38 |         cumw[i] = cumw[i-1]+w[i]
39 |     
40 |     u = np.random.rand(n)
41 |     
42 |     index = 0
43 |     for i in range(n):
44 |         indices = np.where(u<cumw[i])
45 |         sample2[index:index+np.size(indices)] = sample1[i]
46 |         index += np.size(indices)
47 |         u[indices]=2
48 |     return sample2
49 | 
50 | x = np.arange(0,4,0.01)
51 | x2 = np.arange(-0.5,4.5,0.1)
52 | realdata = 0.3*np.exp(-(x-0.3)**2) + 0.7* np.exp(-(x-2.)**2/0.3) 
53 | box = np.ones(len(x2))*0.8
54 | box[:5] = 0
55 | box[-5:] = 0
56 | pl.plot(x,realdata,'k',lw=6)
57 | pl.plot(x2,box,'k--',lw=6)
58 | 
59 | import time
60 | t0=time.time()
61 | samples = sir(10000)
62 | t1=time.time()
63 | print t1-t0
64 | pl.hist(samples,15,normed=1,fc='k')
65 | pl.xlabel('x',fontsize=24)
66 | pl.ylabel('p(x)',fontsize=24)
67 | pl.axis([-0.5,4.5,0,1])
68 | pl.show()
69 | 


--------------------------------------------------------------------------------
/Ch15/importancesampling.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 15 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The basic importance sampling algorithm
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | def qsample():
16 |     return np.random.rand()*4.
17 | 
18 | def p(x):
19 |     return 0.3*np.exp(-(x-0.3)**2) + 0.7* np.exp(-(x-2.)**2/0.3) 
20 | 
21 | def q(x):
22 |     return 4.0
23 | 
24 | def importance(nsamples):
25 |     
26 |     samples = np.zeros(nsamples,dtype=float)
27 |     w = np.zeros(nsamples,dtype=float)
28 |     
29 |     for i in range(nsamples):
30 |             samples[i] = qsample()
31 |             w[i] = p(samples[i])/q(samples[i])
32 |                 
33 |     return samples, w
34 | 
35 | x = np.arange(0,4,0.01)
36 | x2 = np.arange(-0.5,4.5,0.1)
37 | realdata = 0.3*np.exp(-(x-0.3)**2) + 0.7* np.exp(-(x-2.)**2/0.3) 
38 | box = np.ones(len(x2))*0.8
39 | box[:5] = 0
40 | box[-5:] = 0
41 | pl.plot(x,realdata,'k',lw=6)
42 | pl.plot(x2,box,'k--',lw=6)
43 | 
44 | samples,w = importance(5000)
45 | pl.hist(samples,normed=1,fc='k')
46 | #pl.xlabel('x',fontsize=24)
47 | #pl.ylabel('p(x)',fontsize=24)
48 | pl.show()
49 | 


--------------------------------------------------------------------------------
/Ch15/lcg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 15 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The linear congruential pseudo-random number generator
12 | import numpy as np
13 | 
14 | def lcg(x0,n):
15 |     # These choices show the periodicity very well
16 |     # Better choices are a = 16,807 m = 2**31 -1 c = 0
17 |     # Or m = 2**32 a = 1,664,525 c = 1,013,904,223
18 |     a = 23
19 |     m = 197
20 |     c = 0
21 |     
22 |     rnd = np.zeros((n))
23 |     
24 |     rnd[0] = np.mod(a*x0 + c,m)
25 |     
26 |     for i in range(1,n):
27 |         rnd[i] = np.mod(a*rnd[i-1]+c,m)
28 |         
29 |     return rnd
30 |     
31 | print lcg(3,80) 
32 | 


--------------------------------------------------------------------------------
/Ch15/rejectionsampling.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 15 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The basic rejection sampling algorithm
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | def qsample():
17 |     return np.random.rand()*4.
18 | 
19 | def p(x):
20 |     return 0.3*np.exp(-(x-0.3)**2) + 0.7* np.exp(-(x-2.)**2/0.3) 
21 | 
22 | def rejection(nsamples):
23 |     
24 |     M = 0.72#0.8
25 |     samples = np.zeros(nsamples,dtype=float)
26 |     count = 0
27 |     for i in range(nsamples):
28 |         accept = False
29 |         while not accept:
30 |             x = qsample()
31 |             u = np.random.rand()*M
32 |             if u<p(x):
33 |                 accept = True
34 |                 samples[i] = x
35 |             else: 
36 |                 count += 1
37 |     print count   
38 |     return samples
39 | 
40 | x = np.arange(0,4,0.01)
41 | x2 = np.arange(-0.5,4.5,0.1)
42 | realdata = 0.3*np.exp(-(x-0.3)**2) + 0.7* np.exp(-(x-2.)**2/0.3) 
43 | box = np.ones(len(x2))*0.75#0.8
44 | box[:5] = 0
45 | box[-5:] = 0
46 | pl.plot(x,realdata,'k',lw=6)
47 | pl.plot(x2,box,'k--',lw=6)
48 | 
49 | import time
50 | t0=time.time()
51 | samples = rejection(10000)
52 | t1=time.time()
53 | print "Time ",t1-t0
54 | 
55 | pl.hist(samples,15,normed=1,fc='k')
56 | pl.xlabel('x',fontsize=24)
57 | pl.ylabel('p(x)',fontsize=24)
58 | pl.axis([-0.5,4.5,0,1])
59 | pl.show()
60 | 


--------------------------------------------------------------------------------
/Ch16/EKF.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | import numpy as np
 12 | import pylab as pl
 13 | 
 14 | def EKF_update(Q,R,y,x,t,Sig,B=None,u=None):
 15 | 
 16 | 	if B is None:
 17 | 		xpred = f(x,t).reshape(len(x),1)
 18 | 	else:
 19 | 		xpred = f(x,t).reshape(len(x),1) + np.dot(B,u)
 20 | 
 21 | 	A = np.array(jac_f(x,t))
 22 | 	#A = jac_f(x,t).reshape(1,len(x))
 23 | 	SigPred = np.dot(A,np.dot(Sig,A.T)) + Q
 24 | 
 25 | 	H = jac_h(xpred).reshape(1,len(x))
 26 | 	e = y - h(xpred)
 27 | 	Sinv = 1./(np.dot(H,np.dot(SigPred,H.T)) + R)
 28 | 	#Sinv = np.linalg.inv(np.dot(H,np.dot(SigPred,H.T)) + R)
 29 | 	K = np.dot(SigPred,np.dot(H.T,Sinv))
 30 | 
 31 | 	xnew = xpred + np.dot(K,e)
 32 | 	SigNew = np.dot((1 - np.dot(K,H)),SigPred)
 33 | 
 34 | 	return xnew.T,SigNew
 35 | 	
 36 | def EKF(y,Q,R,x0,Sig0,B=None,u=None):
 37 | 
 38 | 	obs_size,T = np.shape(y)
 39 | 	state_size = np.shape(Q)[0]
 40 | 
 41 | 	x = np.zeros((state_size,T))
 42 | 	Sig = np.zeros((state_size,state_size,T))
 43 | 
 44 | 	[x[:,0],Sig[:,:,0]] = EKF_update(Q,R,y[:,0].reshape(len(y),1),x0,0,Sig0,B,u)
 45 | 	for t in range(1,T):
 46 | 		prevx = x[:,t-1].reshape(state_size,1)
 47 | 		prevSig = Sig[:,:,t-1]
 48 | 		[x[:,t],Sig[:,:,t]] = EKF_update(Q,R,y[:,t].reshape(len(y),1),prevx,t,prevSig,B,u)
 49 | 
 50 | 	return x,Sig
 51 | 
 52 | def f(x,t):
 53 | 	return np.array([x[1],x[2],0.5*x[0]*(x[1]+x[2])]).T
 54 | 
 55 | def jac_f(x,t):
 56 | 	return np.array([[0,0,0.5*(x[1]+x[2])],[1,0,0.5*x[0]],[0,1,0.5*x[0]]])
 57 | 	#return x
 58 | 
 59 | def jac_h(x):
 60 | 	#return np.array([x[2]*np.sin(x[0]), 0, np.cos(x[0])])
 61 | 	return np.array([1.0,1.0,0.0]) #np.array([0.4*x]).reshape(1,1)
 62 | 	#return np.array([1.0,0.0,0.0]) #np.array([0.4*x]).reshape(1,1)
 63 | 
 64 | def h(x):
 65 | 	return x[0]+x[1]
 66 | 
 67 | def EKF_demo():
 68 | 
 69 | 	state_size = 3
 70 | 	observation_size = 1
 71 | 
 72 | 	Q = 0.1*np.eye(state_size)
 73 | 	R = 0.1*np.eye(observation_size)
 74 | 	x0 = np.array([0,0,1])
 75 | 	Sig0 = np.eye(state_size)
 76 | 	
 77 | 	T = 20
 78 | 	state = np.zeros((state_size,T))
 79 | 	y = np.zeros((observation_size,T))
 80 | 	state[:,0] = x0.T
 81 | 
 82 | 	for t in range(1,T):
 83 | 		state[:,t] = f(state[:,t-1],t) + np.random.multivariate_normal(np.zeros((len(Q))),Q)
 84 | 		y[:,t] = h(state[:,t]) + np.sqrt(R)*np.random.randn()
 85 | 		#state[:,t] = np.dot(A,state[:,t-1]) + np.random.multivariate_normal(np.zeros((len(Q))),Q)
 86 | 		#y[:,t] = h(state[:,t]) + np.random.randn()
 87 | 
 88 | 	[xfilt,Sigfilt] = EKF(y,Q,R,x0,Sig0)
 89 | 
 90 | 	dfilt = state[0,:] - xfilt[0,:]
 91 | 	#dfilt = state[[0,1],:] - xfilt[[0,1],:]
 92 | 	mse_filt = np.sqrt(np.sum(dfilt**2))
 93 | 	print mse_filt
 94 | 
 95 | 	ypred = np.zeros((T))
 96 | 	for t in range(T):
 97 | 		ypred[t] = h(xfilt[:,t])
 98 | 
 99 | 	print Sigfilt
100 | 	pl.figure()
101 | 	pl.plot(np.arange(T),np.squeeze(y),'*')
102 | 	pl.plot(np.arange(T),ypred,'k-')
103 | 	
104 | 


--------------------------------------------------------------------------------
/Ch16/Gibbs.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # A Gibbs sampler for the Exam Panic dataset
 12 | 
 13 | import numpy as np
 14 |     
 15 | Pb = np.array([[0.5,0.5]])
 16 | Pr_b = np.array([[0.3,0.7],[0.8,0.2]])
 17 | Pa_b = np.array([[0.1,0.9],[0.5,0.5]])
 18 | Ps_ra = np.array([[0,1],[0.8,0.2],[0.6,0.4],[1,0]])
 19 | 
 20 | """
 21 | P(b|ras)=P(b|ra)=P(ra|b)*P(b)/P(ra)=P(r|b)*P(a|b)*P(b)/P(ra)
 22 | r a    P(b)
 23 | T T    0.3*0.1*0.5/0.215=0.0698
 24 | T F    0.3*0.9*0.5/0.335=0.4030
 25 | F T    0.7*0.1*0.5/0.085=0.4118
 26 | F F    0.7*0.9*0.5/0.365=0.8630
 27 | """
 28 | 
 29 | def pb_ras(values):
 30 |     if np.random.rand()<values[1]*values[2]*0.0698+values[1]*(1-values[2])*0.4030+(1-values[1])*values[2]*0.4118+(1-values[1])*(1-values[2])*0.8630:
 31 |         values[0]=1
 32 |     else:
 33 |         values[0]=0
 34 |     return values   
 35 | 
 36 | """   
 37 | P(r|bas)=P(ras|b)*P(b)/P(bas)
 38 |         =(P(s|rab)*P(rab)/P(b))*P(b)/P(bas)
 39 |         =(P(s|rab)*P(rab))/P(bas)
 40 |        
 41 | P(bas)=P(b)*P(a|b)*(P(r|b)*P(s|ra)+P(~r|b)*P(s|~ra))
 42 |     b a s    P(bas)
 43 |     T T T    0.5*0.1*(0.3*0+0.7*0.6)=0.021
 44 |     T T F    0.5*0.1*(0.3*1+0.7*0.4)=0.029
 45 |     T F T    0.5*0.9*(0.3*0.8+0.7*1)=0.423
 46 |     T F F    0.5*0.9*(0.3*0.2+0.7*0)=0.027
 47 |     F T T    0.5*0.5*(0.8*0+0.2*0.6)=0.030
 48 |     F T F    0.5*0.5*(0.8*1+0.2*0.4)=0.220
 49 |     F F T    0.5*0.5*(0.8*0.8+0.2*1)=0.210
 50 |     F F F    0.5*0.5*(0.8*0.2+0.2*0)=0.040
 51 | 
 52 | 
 53 | 
 54 | 
 55 | P(r|bas) =(P(s|rab)*P(rab))/P(bas)
 56 |          =(P(s|ra)*P(r|b)*P(a|b)*P(b))/P(bas)
 57 | 
 58 | b a s    P(r)
 59 | T T T    0                     =0
 60 | T T F    1*0.3*0.1*0.5/0.029   =0.5172
 61 | T F T    0.8*0.3*0.9*0.5/0.423 =0.2553
 62 | T F F    0.2*0.3*0.9*0.5/0.027 =1
 63 | F T T    0                     =0
 64 | F T F    1*0.8*0.5*0.5/0.220   =0.9091
 65 | F F T    0.8*0.8*0.5*0.5/0.210 =0.7619
 66 | F F F    0.2*0.8*0.5*0.5/0.040 =1
 67 | """
 68 | 
 69 | 
 70 | def pr_bas(values):
 71 |     y=np.random.rand(1)
 72 |     if np.random.rand()<values[0]*values[2]*(1-values[3])*0.5172+values[0]*(1-values[2])*values[3]*0.2553+values[0]*(1-values[2])*(1-values[3])+(1-values[0])*values[2]*(1-values[3])*0.9091+(1-values[0])*(1-values[2])*values[3]*0.7619+(1-values[0])*(1-values[2])*(1-values[3]):
 73 |         values[1]=1
 74 |     else:
 75 |         values[1]=0
 76 |     return values
 77 | 
 78 | 
 79 | 
 80 | """   
 81 | P(a|brs)=P(ras|b)*P(b)/P(brs)
 82 |         =(P(s|rab)*P(rab)/P(b))*P(b)/P(brs)
 83 |         =(P(s|rab)*P(rab))/P(brs)
 84 |        
 85 | P(brs)=P(b)*P(r|b)*(P(a|b)*P(s|ra)+P(~a|b)*P(s|r~a))
 86 |     b r s    P(brs)
 87 |     T T T    0.5*0.3*(0.1*0+0.9*0.8)=0.108
 88 |     T T F    0.5*0.3*(0.1*1+0.9*0.2)=0.042
 89 |     T F T    0.5*0.7*(0.1*0.6+0.9*1)=0.334
 90 |     T F F    0.5*0.7*(0.1*0.4+0.9*0)=0.014
 91 |     F T T    0.5*0.8*(0.5*0+0.5*0.8)=0.160
 92 |     F T F    0.5*0.8*(0.5*1+0.5*0.2)=0.240
 93 |     F F T    0.5*0.2*(0.5*0.6+0.5*1)=0.080
 94 |     F F F    0.5*0.2*(0.5*0.4+0.5*0)=0.020
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | P(a|brs) =(P(s|rab)*P(rab))/P(brs)
101 |          =(P(s|ra)*P(r|b)*P(a|b)*P(b))/P(brs)
102 | 
103 | b r s    P(a)
104 | T T T    0                     =0
105 | T T F    1*0.3*0.1*0.5/0.042   =0.3571
106 | T F T    0.6*0.7*0.1*0.5/0.334 =0.0629
107 | T F F    0.4*0.7*0.1*0.5/0.014 =1
108 | F T T    0                     =0
109 | F T F    1*0.8*0.5*0.5/0.240   =0.8333
110 | F F T    0.6*0.2*0.5*0.5/0.080 =0.375
111 | F F F    0.4*0.2*0.5*0.5/0.020 =1
112 | """
113 | 
114 | 
115 | def pa_brs(values):
116 |     if np.random.rand()<values[0]*values[1]*(1-values[3])*0.3571+values[0]*(1-values[1])*values[3]*0.0629+values[0]*(1-values[1])*(1-values[2])+(1-values[0])*values[1]*(1-values[3])*0.8333+(1-values[0])*(1-values[1])*values[3]*0.375+(1-values[0])*(1-values[1])*(1-values[3]):
117 |         values[2]=1
118 |     else:
119 |         values[2]=0
120 |     return values
121 | 
122 | def ps_bra(values):
123 |     if np.random.rand()<values[1]*values[2]*0+values[1]*(1-values[2])*0.8+(1-values[1])*values[2]*0.6+(1-values[1])*(1-values[2])*1:
124 |         values[3]=1
125 |     else:
126 |         values[3]=0
127 |     return values
128 | 
129 | 
130 | def gibbs():
131 |         
132 |     nsamples = 500
133 |     nsteps = 10
134 |     distribution = np.zeros(16,dtype=float)
135 |     
136 |     for i in range(nsamples):
137 |         # values contains current samples of b, r, a, s
138 |         values = np.where(np.random.rand(4)<0.5,0,1)       
139 |         for j in range(nsteps):
140 |             values=pb_ras(values)
141 |             values=pr_bas(values)
142 |             values=pa_brs(values)
143 |             values=ps_bra(values)               
144 |         distribution[values[0]+2*values[1]+4*values[2]+8*values[3]] += 1
145 |     distribution /= nsamples
146 |     print 'b  r  a  s: \t dist'
147 |     for b in range(2):
148 |         for r in range(2):
149 |             for a in range(2):
150 |                 for s in range(2):
151 |                     print 1-b,1-r,1-a,1-s,'\t', distribution[b+2*r+4*a+8*s]
152 | gibbs()
153 | 


--------------------------------------------------------------------------------
/Ch16/HMM.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # A basic Hidden Markov Model
 12 | import numpy as np
 13 | 
 14 | scaling = False
 15 | 
 16 | def HMMfwd(pi,a,b,obs):
 17 | 
 18 | 	nStates = np.shape(b)[0]
 19 | 	T = np.shape(obs)[0]
 20 | 
 21 | 	alpha = np.zeros((nStates,T))
 22 | 
 23 | 	alpha[:,0] = pi*b[:,obs[0]]
 24 | 
 25 | 	for t in range(1,T):
 26 | 		for s in range(nStates):
 27 | 			alpha[s,t] = b[s,obs[t]] * np.sum(alpha[:,t-1] * a[:,s])
 28 | 
 29 | 	c = np.ones((T))
 30 | 	if scaling:
 31 | 		for t in range(T):
 32 | 			c[t] = np.sum(alpha[:,t])
 33 | 			alpha[:,t] /= c[t]
 34 | 	return alpha,c
 35 | 
 36 | def HMMbwd(a,b,obs,c):
 37 | 
 38 | 	nStates = np.shape(b)[0]
 39 | 	T = np.shape(obs)[0]
 40 | 
 41 | 	beta = np.zeros((nStates,T))
 42 | 
 43 | 	beta[:,T-1] = 1.0 #aLast
 44 | 
 45 | 	for t in range(T-2,-1,-1):
 46 | 		for s in range(nStates):
 47 | 			beta[s,t] = np.sum(b[:,obs[t+1]] * beta[:,t+1] * a[s,:])
 48 | 
 49 | 	for t in range(T):
 50 | 		beta[:,t] /= c[t]
 51 | 	#beta[:,0] = b[:,obs[0]] * np.sum(beta[:,1] * pi)
 52 | 	return beta
 53 | 
 54 | def Viterbi(pi,a,b,obs):
 55 | 
 56 | 	nStates = np.shape(b)[0]
 57 | 	T = np.shape(obs)[0]
 58 | 
 59 | 	path = np.zeros(T)
 60 | 	delta = np.zeros((nStates,T))
 61 | 	phi = np.zeros((nStates,T))
 62 | 
 63 | 	delta[:,0] = pi * b[:,obs[0]]
 64 | 	phi[:,0] = 0
 65 | 
 66 | 	for t in range(1,T):
 67 | 		for s in range(nStates):
 68 | 			delta[s,t] = np.max(delta[:,t-1]*a[:,s])*b[s,obs[t]]
 69 | 			phi[s,t] = np.argmax(delta[:,t-1]*a[:,s])
 70 | 
 71 | 	path[T-1] = np.argmax(delta[:,T-1])
 72 | 	for t in range(T-2,-1,-1):
 73 | 		path[t] = phi[path[t+1],t+1]
 74 | 
 75 | 	return path,delta, phi
 76 | 
 77 | def BaumWelch(obs,nStates):
 78 | 
 79 | 	T = np.shape(obs)[0]
 80 | 	xi = np.zeros((nStates,nStates,T))
 81 | 
 82 | 	# Initialise pi, a, b randomly
 83 | 	pi = 1./nStates*np.ones((nStates))
 84 | 	a = np.random.rand(nStates,nStates)
 85 | 	b = np.random.rand(nStates,np.max(obs)+1)
 86 | 
 87 | 	tol = 1e-5
 88 | 	error = tol+1
 89 | 	maxits = 100
 90 | 	nits = 0
 91 | 	while ((error > tol) & (nits < maxits)):
 92 | 		nits += 1
 93 | 		oldpi = pi.copy()
 94 | 		olda = a.copy()
 95 | 		oldb = b.copy()
 96 | 
 97 | 		# E step
 98 | 		alpha,c = HMMfwd(pi,a,b,obs)
 99 | 		beta = HMMbwd(a,b,obs,c) 
100 | 
101 | 		for t in range(T-1):
102 | 			for i in range(nStates):
103 | 				for j in range(nStates):
104 | 					xi[i,j,t] = alpha[i,t]*a[i,j]*b[j,obs[t+1]]*beta[j,t+1]
105 | 			xi[:,:,t] /= np.sum(xi[:,:,t])
106 | 
107 | 		# The last step has no b, beta in
108 | 		for i in range(nStates):
109 | 			for j in range(nStates):
110 | 				xi[i,j,T-1] = alpha[i,T-1]*a[i,j]
111 | 		xi[:,:,T-1] /= np.sum(xi[:,:,T-1])
112 | 
113 | 		# M step
114 | 		for i in range(nStates):
115 | 			pi[i] = np.sum(xi[i,:,0])
116 | 			for j in range(nStates):
117 | 				a[i,j] = np.sum(xi[i,j,:T-1])/np.sum(xi[i,:,:T-1])
118 | 	
119 | 			for k in range(max(obs)):
120 | 				found = (obs==k).nonzero()
121 | 				b[i,k] = np.sum(xi[i,:,found])/np.sum(xi[i,:,:])
122 | 
123 | 		error = (np.abs(a-olda)).max() + (np.abs(b-oldb)).max() 
124 | 		print nits, error, 1./np.sum(1./c), np.sum(alpha[:,T-1])
125 | 
126 | 	return pi, a, b	
127 | 		
128 | def evenings():
129 | 	pi = np.array([0.25, 0.25, 0.25, 0.25])
130 | 	a = np.array([[0.05,0.7, 0.05, 0.2],[0.1,0.4,0.3,0.2],[0.1,0.6,0.05,0.25],[0.25,0.3,0.4,0.05]])
131 | 	b = np.array([[0.3,0.4,0.2,0.1],[0.2,0.1,0.2,0.5],[0.4,0.2,0.1,0.3],[0.3,0.05,0.3,0.35]])
132 | 	
133 | 	obs = np.array([3,1,1,3,0,3,3,3,1,1,0,2,2])
134 | 	print Viterbi(pi,a,b,obs)[0]
135 | 	alpha,c = HMMfwd(pi,a,b,obs)
136 | 	print np.sum(alpha[:,-1])
137 | 
138 | def test():
139 | 	np.random.seed(4)
140 | 	pi = np.array([0.25,0.25,0.25,0.25])
141 | 	aLast = np.array([0.25,0.25,0.25,0.25])
142 | 	#a = np.array([[.7,.3],[.4,.6]] )
143 | 	a = np.array([[.4,.3,.1,.2],[.6,.05,.1,.25],[.7,.05,.05,.2],[.3,.4,.25,.05]])
144 | 	#b = np.array([[.2,.4,.4],[.5,.4,.1]] )
145 | 	b = np.array([[.2,.1,.2,.5],[.4,.2,.1,.3],[.3,.4,.2,.1],[.3,.05,.3,.35]])
146 | 	obs = np.array([0,0,3,1,1,2,1,3])
147 | 	#obs = np.array([2,0,2])
148 | 	HMMfwd(pi,a,b,obs)
149 | 	Viterbi(pi,a,b,obs)
150 | 	print BaumWelch(obs,4)
151 | 	
152 | def biased_coins():
153 | 	a = np.array([[0.4,0.6],[0.9,0.1]])
154 | 	b = np.array([[0.49,0.51],[0.85,0.15]])
155 | 	pi = np.array([0.5,0.5])
156 | 
157 | 	obs = np.array([0,1,1,0,1,1,0,0,1,1,0,1,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,1,0,0,1,0,1,0,0,1,1,1,0])	
158 | 	print Viterbi(pi,a,b,obs)[0]
159 | 
160 | 	print BaumWelch(obs,2)
161 | 
162 | 


--------------------------------------------------------------------------------
/Ch16/Kalman.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The 1D Kalman filter
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | def Kalman(obs=None,mu_init=np.array([-0.37727]),cov_init=0.1*np.ones((1)),nsteps=50):
17 | 
18 |     ndim = np.shape(mu_init)[0]
19 |     
20 |     if obs==None:
21 |         mu_init = np.tile(mu_init,(1,nsteps))
22 |         cov_init = np.tile(cov_init,(1,nsteps))
23 |         obs = np.random.normal(mu_init,cov_init,(ndim,nsteps))
24 |     
25 |     Sigma_x = np.eye(ndim)*1e-5
26 |     A = np.eye(ndim)
27 |     H = np.eye(ndim)
28 |     mu_hat = 0
29 |     cov = np.eye(ndim)
30 |     R = np.eye(ndim)*0.01
31 |     
32 |     m = np.zeros((ndim,nsteps),dtype=float)
33 |     ce = np.zeros((ndim,nsteps),dtype=float)
34 |     
35 |     for t in range(1,nsteps):
36 |         # Make prediction
37 |         mu_hat_est = np.dot(A,mu_hat)
38 |         cov_est = np.dot(A,np.dot(cov,np.transpose(A))) + Sigma_x
39 | 
40 |         # Update estimate
41 |         error_mu = obs[:,t] - np.dot(H,mu_hat_est)
42 |         error_cov = np.dot(H,np.dot(cov,np.transpose(H))) + R
43 |         K = np.dot(np.dot(cov_est,np.transpose(H)),np.linalg.inv(error_cov))
44 |         mu_hat = mu_hat_est + np.dot(K,error_mu)
45 |         #m[:,:,t] = mu_hat
46 |         m[:,t] = mu_hat
47 |         if ndim>1:
48 |             cov = np.dot((np.eye(ndim) - np.dot(K,H)),cov_est)
49 |         else:
50 |             cov = (1-K)*cov_est 
51 |         ce[:,t] = cov                                
52 |     
53 |     pl.figure()
54 |     pl.plot(obs[0,:],'ko',ms=6)
55 |     pl.plot(m[0,:],'k-',lw=3)
56 |     pl.plot(m[0,:]+20*ce[0,:],'k--',lw=2)
57 |     pl.plot(m[0,:]-20*ce[0,:],'k--',lw=2)
58 |     pl.legend(['Noisy Datapoints','Kalman estimate','Covariance'])
59 |     pl.xlabel('Time')
60 |     
61 |     
62 |     pl.show()
63 |     
64 | Kalman()
65 | 


--------------------------------------------------------------------------------
/Ch16/Kalman_full.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | import numpy as np
 12 | import pylab as pl
 13 | 
 14 | def Kalman_update(A,H,Q,R,y,x,Sig,B=None,u=None):
 15 | 
 16 | 	if B is None:
 17 | 		xpred = np.dot(A,x) 
 18 | 	else:
 19 | 		xpred = np.dot(A,x) + np.dot(B,u)
 20 | 
 21 | 	SigPred = np.dot(A,np.dot(Sig,A.T)) + Q
 22 | 
 23 | 	e = y - np.dot(H,xpred)
 24 | 	Sinv = np.linalg.inv(np.dot(H,np.dot(SigPred,H.T)) + R)
 25 | 	K = np.dot(SigPred,np.dot(H.T,Sinv))
 26 | 
 27 | 	xnew = xpred + np.dot(K,e)
 28 | 	SigNew = np.dot((np.eye(np.shape(A)[0]) - np.dot(K,H)),SigPred)
 29 | 
 30 | 	return xnew.T,SigNew
 31 | 	
 32 | def Kalman_smoother_update(A,Q,B,u,xs_t,Sigs_t,xfilt,Sigfilt,Sigfilt_t):
 33 | 
 34 | 	if B is None:
 35 | 		xpred = np.dot(A,xfilt)
 36 | 	else:
 37 | 		xpred = np.dot(A,xfilt) + np.dot(B,u)
 38 | 
 39 | 	SigPred = np.dot(A,np.dot(Sigfilt,A.T)) + Q
 40 | 	J = np.dot(Sigfilt,np.dot(A.T,np.linalg.inv(SigPred)))
 41 | 	xs = xfilt + np.dot(J,(xs_t - xpred))
 42 | 	Sigs = Sigfilt + np.dot(J,np.dot((Sigs_t - SigPred),J.T))
 43 | 
 44 | 	return xs.T, Sigs
 45 | 
 46 | 
 47 | def Kalman_filter(y,A,H,Q,R,x0,Sig0,B=None,u=None):
 48 | 
 49 | 	obs_size,T = np.shape(y)
 50 | 	state_size = np.shape(A)[0]
 51 | 
 52 | 	x = np.zeros((state_size,T))
 53 | 	Sig = np.zeros((state_size,state_size,T))
 54 | 
 55 | 	[x[:,0],Sig[:,:,0]] = Kalman_update(A,H,Q,R,y[:,0].reshape(len(y),1),x0,Sig0,B,u)
 56 | 	for t in range(1,T):
 57 | 		prevx = x[:,t-1].reshape(state_size,1)
 58 | 		prevSig = Sig[:,:,t-1]
 59 | 		[x[:,t],Sig[:,:,t]] = Kalman_update(A,H,Q,R,y[:,t].reshape(len(y),1),prevx,prevSig,B,u)
 60 | 
 61 | 	return x,Sig
 62 | 
 63 | def Kalman_smoother(y,A,H,Q,R,x0,Sig0,B=None,u=None):
 64 | 
 65 | 	obs_size,T = np.shape(y)
 66 | 	state_size = np.shape(A)[0]
 67 | 
 68 | 	xs = np.zeros((state_size,T))
 69 | 	Sigs = np.zeros((state_size,state_size,T))
 70 | 
 71 | 	[xfilt,Sigfilt] = Kalman_filter(y,A,H,Q,R,x0,Sig0,B,u)
 72 | 
 73 | 	xs[:,T-1] = xfilt[:,T-1]
 74 | 	Sigs[:,:,T-1] = Sigfilt[:,:,T-1]
 75 | 
 76 | 	for t in range(T-2,-1,-1):
 77 | 		[xs[:,t],Sigs[:,:,t]] = Kalman_smoother_update(A,Q,B,u,xs[:,t+1].reshape(len(xs),1),Sigs[:,:,t+1],xfilt[:,t].reshape(len(xfilt),1),Sigfilt[:,:,t],Sigfilt[:,:,t+1])
 78 | 
 79 | 	return xs,Sigs
 80 | 
 81 | def lds_sample(A,H,Q,R,state0,T):
 82 | 	# x(t+1) = Ax(t) +  state_noise(t), state_noise ~ N(O,Q), x(0) = state0
 83 | 	# y(t) = Hx(t) + obs_noise(t), obs_noise~N(O,R)
 84 | 
 85 | 	state_noise_samples = np.random.multivariate_normal(np.zeros((len(Q))),Q,T).T
 86 | 	obs_noise_samples = np.random.multivariate_normal(np.zeros((len(R))),R,T).T
 87 | 
 88 | 	x = np.zeros((np.shape(H)[1],T))
 89 | 	y = np.zeros((np.shape(H)[0],T))
 90 | 
 91 | 	x[:,0] = state0.T
 92 | 	y[:,0] = np.dot(H,x[:,0]) + obs_noise_samples[:,0]
 93 | 
 94 | 	for t in range(1,T):
 95 | 		x[:,t] = np.dot(A,x[:,t-1]) + state_noise_samples[:,t]
 96 | 		y[:,t] = np.dot(H,x[:,t-1]) + obs_noise_samples[:,t]
 97 | 
 98 | 	return [x,y]
 99 | 
100 | def Kalman_demo():
101 | 	state_size = 4
102 | 	observation_size = 2
103 | 	A = np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]],dtype=float)
104 | 	H = np.array([[1,0,0,0],[0,1,0,0]],dtype=float)
105 | 
106 | 	Q = 0.1*np.eye((state_size))
107 | 	R = np.eye(observation_size,dtype=float)
108 | 
109 | 	x0 = np.array([[10],[10],[1],[0]],dtype=float)
110 | 	Sig0 = 10. * np.eye(state_size)
111 | 
112 | 	np.random.seed(3)
113 | 	T = 15
114 | 	
115 | 	[x,y] = lds_sample(A,H,Q,R,x0,T)
116 | 
117 | 	[xfilt,Sigfilt] = Kalman_filter(y,A,H,Q,R,x0,Sig0)
118 | 	[xsmooth,Sigsmooth] = Kalman_smoother(y,A,H,Q,R,x0,Sig0)
119 | 
120 | 	dfilt = x[[0,1],:] - xfilt[[0,1],:]
121 | 	mse_filt = np.sqrt(np.sum(dfilt**2))
122 | 
123 | 	dsmooth = x[[0,1],:] - xsmooth[[0,1],:]
124 | 	mse_smooth = np.sqrt(np.sum(dsmooth**2))
125 | 
126 | 	plot_track(x,y,xfilt,Sigfilt)
127 | 	plot_track(x,y,xsmooth,Sigsmooth)
128 | 	
129 | def plot_track(x,y,Kx,Sig):
130 | 	fig = pl.figure()
131 | 	ax = fig.add_subplot(111, aspect='equal')
132 | 	pl.plot(x[0,:],x[1,:],'ks-')
133 | 	pl.plot(y[0,:],y[1,:],'k*')
134 | 	pl.plot(Kx[0,:],Kx[1,:],'kx:')
135 | 	pl.legend(('True','Observed','Filtered'))
136 | 
137 | 	obs_size,T = np.shape(y)
138 | 
139 | 	from matplotlib.patches import Ellipse
140 | 	# Axes of ellipse are eigenvectors of covariance matrix, lengths are square roots of eigenvalues
141 | 	ellsize = np.zeros((obs_size,T))
142 | 	ellangle = np.zeros((T))
143 | 	for t in range(T):
144 | 		[evals,evecs] = np.linalg.eig(Sig[:2,:2,t])
145 | 		ellsize[:,t] = np.sqrt(evals)	
146 | 		ellangle[t] = np.angle(evecs[0,0]+0.j*evecs[0,1])
147 | 		
148 | 	ells = [Ellipse(xy=[Kx[0,t],Kx[1,t]] ,width=ellsize[0,t],height=ellsize[1,t], angle=ellangle[t]) for t in range(T)]
149 | 	for e in ells:
150 | 		ax.add_artist(e)
151 | 		e.set_alpha(0.1)
152 | 		e.set_facecolor([0.7,0.7,0.7])
153 | 	pl.xlabel('x')
154 | 	pl.ylabel('y')
155 | 
156 | 
157 | def Kalman_demo1d():
158 | 
159 | 	x0 = np.array([-0.37727])
160 | 	Sig0 = 0.1*np.ones((1))
161 | 	T = 50	
162 | 
163 |         y = np.random.normal(x0,Sig0,(1,T))
164 | 
165 |     	A = np.eye(1)
166 |     	H = np.eye(1)
167 |     	Q = np.eye(1)*1e-5
168 |     	R = np.eye(1)*0.01
169 |     
170 |     	xfilt = np.zeros((1,T),dtype=float)
171 |     	Sigfilt = np.zeros((1,T),dtype=float)
172 | 
173 | 	[xfilt,Sigfilt] = Kalman_filter(y,A,H,Q,R,x0,Sig0)
174 | 	xfilt = np.squeeze(xfilt)
175 | 	Sigfilt = np.squeeze(Sigfilt)
176 | 
177 |     	pl.figure()
178 | 	time = np.arange(T)
179 |     	pl.plot(time,y[0,:],'ko',ms=6)
180 |     	pl.plot(time,xfilt,'k-',lw=3)
181 |     	pl.plot(time,xfilt+20*Sigfilt,'k--',lw=2)
182 |     	pl.plot(time,xfilt-20*Sigfilt,'k--',lw=2)
183 |     	pl.legend(['Noisy Datapoints','Kalman estimate','20*Covariance'])
184 |     	pl.xlabel('Time')
185 | 


--------------------------------------------------------------------------------
/Ch16/MRF.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Demonstration of the Markov Random Field method of image denoising
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | def MRF(I,J,eta=2.0,zeta=1.5):
16 |     ind =np.arange(np.shape(I)[0])
17 |     np.random.shuffle(ind)
18 |     orderx = ind.copy()
19 |     np.random.shuffle(ind)
20 | 
21 |     for i in orderx:
22 |         for j in ind:
23 |             oldJ = J[i,j]
24 |             J[i,j]=1
25 |             patch = 0
26 |             for k in range(-1,1):
27 |                 for l in range(-1,1):
28 |                     patch += J[i,j] * J[i+k,j+l]
29 |             energya = -eta*np.sum(I*J) - zeta*patch
30 |             J[i,j]=-1
31 |             patch = 0
32 |             for k in range(-1,1):
33 |                 for l in range(-1,1):
34 |                     patch += J[i,j] * J[i+k,j+l]
35 |             energyb = -eta*np.sum(I*J) - zeta*patch
36 |             if energya<energyb:
37 |                 J[i,j] = 1
38 |             else:
39 |                 J[i,j] = -1
40 |     return J
41 |             
42 | I = pl.imread('world.png')
43 | N = np.shape(I)[0]
44 | I = I[:,:,0]
45 | I = np.where(I<0.1,-1,1)
46 | pl.imshow(I)
47 | pl.title('Original Image')
48 | 
49 | noise = np.random.rand(N,N)
50 | J = I.copy()
51 | ind = np.where(noise<0.1)
52 | J[ind] = -J[ind]
53 | pl.figure()
54 | pl.imshow(J)
55 | pl.title('Noisy image')
56 | newJ = J.copy()
57 | newJ = MRF(I,newJ)
58 | pl.figure()
59 | pl.imshow(newJ)
60 | pl.title('Denoised version')
61 | print np.sum(I-J), np.sum(I-newJ)
62 | pl.show()
63 | 


--------------------------------------------------------------------------------
/Ch16/graphdemo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | def findPath(graph, start, end, pathSoFar):
12 |     pathSoFar = pathSoFar + [start]
13 |     if start == end:
14 |         return pathSoFar
15 |     if start not in graph:
16 |         return None
17 |     for node in graph[start]:
18 |         if node not in pathSoFar:
19 |             newpath = findPath(graph, node, end, pathSoFar)
20 |             return newpath
21 |     return None
22 | 
23 | graph = {'A': ['B', 'C'],'B': ['C', 'D'],'C': ['D'],'D': ['C'],'E': ['F'],'F': ['C']}
24 | print findPath(graph,'A','D',[])
25 | 
26 | 


--------------------------------------------------------------------------------
/Ch16/particle_filter.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2014
 10 | 
 11 | import numpy as np
 12 | import pylab as pl
 13 | 
 14 | def systematic(w,N):
 15 | 	# Systematic resampling
 16 | 	N2 = np.shape(w)[0]
 17 | 	# One too many to make sure it is >1
 18 | 	samples = np.random.rand(N+1)
 19 | 	indices = np.arange(N+1)
 20 | 	u = (samples+indices)/(N+1) 
 21 | 	cumw = np.cumsum(w)
 22 | 	keep = np.zeros((N))
 23 | 	# ni copies of particle xi where ni = number of u between ws[i-1] and ws[i]
 24 | 	j = 0
 25 | 	for i in range(N2):
 26 | 		while((u[j]<cumw[i]) & (j<N)):
 27 | 			keep[j] = i
 28 | 			j+=1
 29 | 
 30 | 	return keep
 31 | 
 32 | def pf(x,y,sigma,T,N):
 33 | 
 34 | 	particles = np.ones((N,T))
 35 | 	particlepred = np.ones((N,T))
 36 | 	ypred = np.ones((N,T))
 37 | 	weights = np.ones((N,T))
 38 | 
 39 | 	# Main loop
 40 | 	for t in range(1,T):
 41 | 	
 42 | 		# importance sampling
 43 | 		particlepred[:,t] = ffun(particles[:,t-1],t) + np.random.randn(N)
 44 | 		ypred[:,t] = hfun(particlepred[:,t],t)
 45 | 		weights[:,t] = 1./np.sqrt(sigma) * np.exp(-0.5/sigma * (y[t] - ypred[:,t])**2) + 1e-99
 46 | 		weights[:,t] /= np.sum(weights[:,t])
 47 | 		
 48 | 		# selection
 49 | 		sys= True
 50 | 		if sys:
 51 | 			keep = systematic(weights[:,t],N)
 52 | 		else:	
 53 | 			# Residual resampling
 54 | 			# Add a little bit because of a rounding error!
 55 | 			Ncopies = np.floor(weights[:,t]*N + 1e-10)
 56 | 			keep = np.zeros((N))
 57 | 			j = 0
 58 | 			for i in range(N):
 59 | 				keep[j:j+Ncopies[i]] = i
 60 | 				j+=Ncopies[i]
 61 | 				
 62 | 			Nleft = int(N - np.sum(Ncopies))
 63 | 			# Rest by systematic resampling
 64 | 			if Nleft > 0:
 65 | 				print "sys resample"
 66 | 				probs = (weights[:,t]*N - Ncopies)/Nleft
 67 | 				extrakeep = systematic(probs,Nleft)
 68 | 				keep[j:] = extrakeep
 69 | 		
 70 | 		particles[:,t] = particlepred[keep.astype('int'),t]
 71 | 
 72 | 	return particles, particlepred, ypred, weights
 73 | 
 74 | def ffun(x,t):
 75 | 	return 1 + np.sin(4e-2*np.pi*t) + 0.5*x
 76 | 
 77 | def hfun(x,t):
 78 | 	if t<30:
 79 | 		return x**2/5.0
 80 | 	else:
 81 | 		return x/2. - 2.
 82 | 
 83 | def pf_demo():
 84 | 
 85 | 	T = 50
 86 | 	N = 10	
 87 | 	sigma = 1.0
 88 | 	
 89 | 	x = np.zeros((T))
 90 | 	x[1] = 1
 91 | 	y = np.zeros((T))
 92 | 	for t in range(T):
 93 | 		x[t] = ffun(x[t-1],t) + np.random.randn(1)
 94 | 		y[t] = hfun(x[t],t) + np.sqrt(sigma)*np.random.randn(1,1)
 95 | 
 96 | 	p, pp, yp, w = pf(x,y,sigma,T,N)
 97 | 
 98 | 	pl.figure()
 99 | 	time = np.arange(T)
100 | 	pl.plot(time,y,'k+')
101 | 	pl.plot(time,x,'k:')
102 | 	#pl.plot(time,np.mean(p,axis=0),'.')
103 | 	pl.plot(time[:30],hfun(np.mean(p,axis=0)[:30],0),'k')
104 | 	pl.plot(time,p.T,'k.')
105 | 	pl.plot(time[30:],hfun(np.mean(p,axis=0)[30:],40),'k')
106 | 	#pl.axis([-0.5,9.5,-1,7])
107 | 	pl.legend(['Observation','Process','Output','Particles'])
108 | 
109 | 


--------------------------------------------------------------------------------
/Ch16/pftrack.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 16 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2014
 10 | 
 11 | # 2D particle filter tracking. Euclidean distance based
 12 | import numpy as np
 13 | import pylab as pl
 14 | 
 15 | def systematic(w,N):
 16 | 	# Systematic resampling
 17 | 	# One too many to make sure it is >1
 18 | 	samples = np.random.rand(N+1)
 19 | 	indices = np.arange(N+1)
 20 | 	u = (samples+indices)/N 
 21 | 	cumw = np.cumsum(w)
 22 | 	Ncopies = np.zeros((N))
 23 | 	keep = np.zeros((N))
 24 | 	# ni copies of particle xi where ni = number of u between ws[i-1] and ws[i]
 25 | 	j = 0
 26 | 	for i in range(N):
 27 | 		while((u[j]<cumw[i]) & (j<N)):
 28 | 			keep[j] = i
 29 | 			Ncopies[i]+=1
 30 | 			j+=1
 31 | 
 32 | 	return keep
 33 | 
 34 | def pf(x0,xdot,sigma,T,N,width):
 35 | 
 36 | 	# Sample x0 from prior p(x0)
 37 | 	particles = np.zeros((N,2,T+1))
 38 | 	x = np.zeros((2,T+1))
 39 | 	x[:,0] = x0
 40 | 	particles[:,:,0] = x0
 41 | 	particlepred = np.zeros((N,2,T))
 42 | 	particlepred[:,:,0] = x0+np.random.uniform(-width,width,(N,2))
 43 | 	print particlepred[:,:,0]
 44 | 	weights = np.ones((N,T))
 45 | 
 46 | 	# Main loop
 47 | 	for t in range(0,T):
 48 | 	
 49 | 		# importance sampling
 50 | 		particlepred[:,:,t] = particles[:,:,t] + np.random.uniform(-width,width,(N,2))
 51 | 		#print particlepred[:,:,t]
 52 | 
 53 | 		print x[:,t]
 54 | 		print x[:,t] - particlepred[:,:,t]
 55 | 		weights[:,t] = np.sum((x[:,t] - particlepred[:,:,t])**2 + 1e-99,axis=1)
 56 | 		print weights[:,t]
 57 | 		weights[:,t] = 1./np.sum((x[:,t] - particlepred[:,:,t])**2 + 1e-99,axis=1)
 58 | 		print weights[:,t]
 59 | 		#weights[:,t] = np.sum(1./np.sqrt(sigma) * np.exp(-0.5/sigma * (x[:,t] - particlepred[:,:,t])**2) + 1e-99,axis=1)
 60 | 		weights[:,t] /= np.sum(weights[:,t])
 61 | 		print weights[:,t]
 62 | 		
 63 | 		# selection
 64 | 		resample = False
 65 | 		if 1./sum(weights[:,t]**2) < N/2.:
 66 | 			print "Resampling"
 67 | 			resample = True
 68 | 		sys= True
 69 | 		if resample:
 70 | 			if sys:
 71 | 				keep = systematic(weights[:,t],N)
 72 | 			else:	
 73 | 				# Residual resampling
 74 | 				# Add a little bit because of a rounding error!
 75 | 				Ncopies = np.floor(weights[:,t]*N + 1e-10)
 76 | 				keep = np.zeros((N))
 77 | 				j = 0
 78 | 				for i in range(N):
 79 | 					keep[j:j+Ncopies[i]] = i
 80 | 					j+=Ncopies[i]
 81 | 					
 82 | 				Nleft = int(N - np.sum(Ncopies))
 83 | 				# Rest by systematic resampling
 84 | 				if Nleft > 0:
 85 | 					print "sys resample"
 86 | 					probs = (weights[:,t]*N - Ncopies)/Nleft
 87 | 					extrakeep = systematic(probs,Nleft)
 88 | 					keep[j:] = extrakeep
 89 | 		else:
 90 | 			keep = range(N)
 91 | 	
 92 | 		print keep
 93 | 		# output
 94 | 		for i in range(N):
 95 | 			particles[i,:,t+1] = particlepred[keep[i],:,t]
 96 | 			#print "here"
 97 | 		print x[:,t]
 98 | 		print particlepred[:,:,t]
 99 | 		#x[:,t+1] = x[:,t] + xdot*np.random.uniform(-1,1,(1,2))
100 | 		x[:,t+1] = x[:,t] + xdot #+ np.random.uniform(-1,1,(1,2))
101 | 		#print particles[:,:,t]
102 | 
103 | 	return particles, x,weights
104 | 
105 | def pf_demo():
106 | 
107 | 	x0 = np.array([10,12])
108 | 	xdot = np.array([10,8])
109 | 
110 | 	np.random.seed(3)
111 | 	T = 15
112 | 	N = 30
113 | 	sigma = 1.0
114 | 
115 | 	[particles,x,weights] = pf(x0,xdot,sigma,T,N,15)
116 | 	x = x[:,:T]
117 | 	particles = particles[:,:,:T]
118 | 	#print particles
119 | 	#print x
120 | 
121 | 	dfilt = x[[0,1],:] - particles[[0,1],:]
122 | 	mse_filt = np.sqrt(np.sum(dfilt**2))
123 | 
124 | 	#plot_track(x,y,xfilt,Pfilt)
125 | 	plot_position(x,particles,T)
126 | 	
127 | def plot_position(x,particles,T):
128 | 	import time
129 | 
130 | 	pl.ion()
131 | 	pl.figure()
132 | 	colours = pl.cm.gray(np.linspace(0, 1, T))
133 | 
134 | 	#for t in [0,5,10,14]:
135 | 	for t in range(T):
136 | 		#print particles[:,:,t]
137 | 		pl.plot(x[0,t],x[1,t],'x',color=colours[t],ms=10.)
138 | 		pl.plot(particles[:,0,t],particles[:,1,t],'o',color=colours[t])
139 | 	#pl.plot(particles[:,0,5],particles[:,1,5],'go')
140 | 	#pl.plot(particles[:,0,10],particles[:,1,10],'co')
141 | 	#pl.plot(particles[:,0,14],particles[:,1,14],'ko')
142 | 	pl.xlim((0,150))
143 | 	pl.ylim((0,150))
144 | 
145 | def plot_track(x,y,Kx,P):
146 | 	fig = pl.figure()
147 | 	ax = fig.add_subplot(111, aspect='equal')
148 | 	pl.plot(x[0,:],x[1,:],'ks-')
149 | 	pl.plot(y[0,:],y[1,:],'k*')
150 | 	pl.plot(Kx[0,:],Kx[1,:],'kx:')
151 | 
152 | 	obs_size,T = np.shape(y)
153 | 
154 | 	from matplotlib.patches import Ellipse
155 | 	# Axes of ellipse are eigenvectors of covariance matrix, lengths are square roots of eigenvalues
156 | 	ellsize = np.zeros((obs_size,T))
157 | 	ellangle = np.zeros((T))
158 | 	for t in range(T):
159 | 		[evals,evecs] = np.linalg.eig(P[:2,:2,t])
160 | 		ellsize[:,t] = np.sqrt(evals)	
161 | 		ellangle[t] = np.angle(evecs[0,0]+0.j*evecs[0,1])
162 | 		
163 | 	ells = [Ellipse(xy=[Kx[0,t],Kx[1,t]] ,width=ellsize[0,t],height=ellsize[1,t], angle=ellangle[t]) for t in range(T)]
164 | 	for e in ells:
165 | 		ax.add_artist(e)
166 | 		e.set_alpha(0.1)
167 | 		e.set_facecolor([0.7,0.7,0.7])
168 | 


--------------------------------------------------------------------------------
/Ch16/world.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jpub/MLGuide/e1956f4e372ec0ed6f482e2c864811aedbb1744f/Ch16/world.gif


--------------------------------------------------------------------------------
/Ch16/world.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jpub/MLGuide/e1956f4e372ec0ed6f482e2c864811aedbb1744f/Ch16/world.png


--------------------------------------------------------------------------------
/Ch18/data.txt:
--------------------------------------------------------------------------------
 1 | 7.70253e-01 -4.25080e-01
 2 | 1.20205e+00 -1.56972e+00
 3 | 3.03586e+00 -3.51676e+00
 4 | 2.11230e+00 -3.23995e+00
 5 | -2.77646e-01 -8.33153e-01
 6 | 1.04043e+00 1.08034e-02
 7 | 8.63193e-01 2.95915e-01
 8 | 8.26474e-01 -9.66392e-01
 9 | 1.21781e+00 -4.08146e-01
10 | -1.59987e+00 -2.34009e+00
11 | 1.25398e+00 -1.05705e+00
12 | 1.97716e+00 -2.02539e+00
13 | 


--------------------------------------------------------------------------------
/Ch18/gpcdemo.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 18 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2014
 10 | 
 11 | import numpy as np
 12 | import pylab as pl
 13 | 
 14 | iris = np.loadtxt('iris_proc.data',delimiter=',')
 15 | #iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
 16 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
 17 | #iris[:,:4] = iris[:,:4]/imax[:4]
 18 | 
 19 | target = -np.ones((np.shape(iris)[0],3));
 20 | indices = np.where(iris[:,4]==0) 
 21 | target[indices,0] = 1
 22 | indices = np.where(iris[:,4]==1)
 23 | target[indices,1] = 1
 24 | indices = np.where(iris[:,4]==2)
 25 | target[indices,2] = 1
 26 | 
 27 | # Randomly order the data
 28 | order = range(np.shape(iris)[0])
 29 | np.random.shuffle(order)
 30 | iris = iris[order,:]
 31 | target = target[order,:]
 32 | 
 33 | train = iris[::2,0:4]
 34 | traint = target[::2]
 35 | test = iris[1::2,0:4]
 36 | testt = target[1::2]
 37 | 
 38 | #print train.max(axis=0), train.min(axis=0)
 39 | 
 40 | # Train the machines
 41 | output = np.zeros((np.shape(test)[0],3))
 42 | 
 43 | theta =np.zeros((3,1))
 44 | theta[0] = 1.0 #np.random.rand()*3
 45 | theta[1] = 0.7 #np.random.rand()*3
 46 | theta[2] = 0.
 47 | 
 48 | import gpc
 49 | import scipy.optimize as so
 50 | 
 51 | args = (train[:,:2],traint[:,0])
 52 | newTheta0 = so.fmin_cg(gpc.logPosterior, theta, fprime=gpc.gradLogPosterior, args=[args], gtol=1e-4,maxiter=20,disp=1)
 53 | pred = np.squeeze(np.array([gpc.predict(np.reshape(i,(1,2)),train,traint,newTheta0) for i in test[:,:2]]))
 54 | output[:,0] = np.reshape(np.where(pred[:,0]<0,-1,1),(np.shape(pred)[1],1))
 55 | print np.sum(np.abs(output-testt))
 56 | 
 57 | args = (train[:,:2],traint[:,1])
 58 | newTheta1 = so.fmin_cg(gpc.logPosterior, theta, fprime=gpc.gradLogPosterior, args=[args], gtol=1e-4,maxiter=20,disp=1)
 59 | pred = np.squeeze(np.array([gpc.predict(np.reshape(i,(1,2)),train,traint,newTheta0) for i in test[:,:2]]))
 60 | output[:,1] = np.reshape(np.where(pred[:,0]<0,-1,1),(np.shape(pred)[1],1))
 61 | print np.sum(np.abs(output-testt))
 62 | 
 63 | args = (train[:,:2],traint[:,2])
 64 | newTheta2 = so.fmin_cg(gpc.logPosterior, theta, fprime=gpc.gradLogPosterior, args=[args], gtol=1e-4,maxiter=20,disp=1)
 65 | pred = np.squeeze(np.array([gpc.predict(np.reshape(i,(1,2)),train,traint,newTheta0) for i in test[:,:2]]))
 66 | output[:,2] = np.reshape(np.where(pred[:,0]<0,-1,1),(np.shape(pred)[1],1))
 67 | print np.sum(np.abs(output-testt))
 68 | 
 69 | #err1 = np.where((output==1.) & (test==-1.))[0]
 70 | #err2 = np.where((output==-1.) & (test==1.))[0]
 71 | #print "Class 1 errors ",len(err1)," from ",len(test[test==1])
 72 | #print "Class 2 errors ",len(err2)," from ",len(test[test==-1])
 73 | #print "Test accuracy ",1. -(float(len(err1)+len(err2)))/ (len(test[test==1]) + len(test[test==-1]))
 74 | 
 75 | 
 76 | #svm1 = svm.svm(kernel='linear',sigma=3.)
 77 | svm1 = svm.svm(kernel='poly',degree=1.)
 78 | #svm1 = svm.svm(kernel='rbf',gamma=0.7,sigma=2.)
 79 | svm1.train_svm(train[:,:2],traint[:,1])
 80 | output[:,1] = svm1.classifier(test[:,:2],soft=True).T
 81 | 
 82 | #svm2 = svm.svm(kernel='linear',sigma=3.)
 83 | svm2 = svm.svm(kernel='poly',degree=1.)
 84 | #svm2 = svm.svm(kernel='rbf',gamma=0.7,sigma=2.)
 85 | svm2.train_svm(train[:,:2],traint[:,2])
 86 | output[:,2] = svm2.classifier(test[:,:2],soft=True).T
 87 | 
 88 | # Make a decision about which class
 89 | # Pick the one with the largest margin
 90 | bestclass = np.argmax(output,axis=1)
 91 | err = np.where(bestclass!=iris[1::2,4])[0]
 92 | print len(err), np.shape(iris)[0]/2.
 93 | 
 94 | # Make a plot
 95 | pl.figure()
 96 | step=0.01
 97 | f0,f1  = np.meshgrid(np.arange(np.min(train[:,0])-0.5, np.max(train[:,0])+0.5, step), np.arange(np.min(train[:,1])-0.5, np.max(train[:,1])+0.5, step))
 98 | 
 99 | out = np.zeros((np.shape(f0.ravel())[0],3))
100 | out[:,0] = svm0.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T
101 | out[:,1] = svm1.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T
102 | out[:,2]= svm2.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T
103 | out = np.argmax(out[:,:3],axis=1)
104 | 
105 | # Put the result into a color plot
106 | out = out.reshape(f0.shape)
107 | pl.contourf(f0, f1, out, cmap=pl.cm.Paired)
108 | #pl.axis('off')
109 | 
110 | # Plot also the training points
111 | #traint = np.where(traint==-1,0,1)
112 | pl.plot(train[svm0.sv,0],train[svm0.sv,1],'o',markerfacecolor=None,markeredgecolor='r',markeredgewidth=3)
113 | pl.scatter(train[:, 0], train[:, 1], c=iris[::2,4], cmap=pl.cm.Paired)
114 | #pl.plot(train[:, 0], train[:, 1],'o', c=traint, cmap=pl.cm.Paired)
115 | 


--------------------------------------------------------------------------------
/Ch18/plotdist.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 18 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2014
10 | 
11 | import pylab as pl
12 | import numpy as np
13 | 
14 | # Weibull and Gaussian fits
15 | 
16 | pl.ion()
17 | pl.figure()
18 | l = 1.
19 | k = 2.
20 | x = np.random.random(27)*2.
21 | x = np.concatenate((x,np.random.rand(9)+2.))
22 | xx = k/l * (x/l)**(k-1) * np.exp(-(x/l)**k) + np.random.random(36)*0.2-0.1
23 | pl.plot(x,xx,'o')
24 | 
25 | pl.figure()
26 | pl.plot(x,xx,'o')
27 | x = np.arange(0,3,0.01)
28 | s = 0.5
29 | mu = 0.7
30 | y = 1/(np.sqrt(2*np.pi)*s) * np.exp(-0.5*(x-mu)**2/s**2)
31 | pl.plot(x,y,'k')
32 | 
33 | 
34 | z = k/l * (x/l)**(k-1) * np.exp(-(x/l)**k)
35 | pl.plot(x,z,'r--')
36 | 


--------------------------------------------------------------------------------
/Ch18/plotgp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 18 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2014
10 | 
11 | import numpy as np
12 | import pylab as pl
13 | 
14 | a = 1. + 0.25*np.random.randn(10)
15 | #a = 0.25*np.random.randn(10)
16 | b = 1. + np.random.randn(10)
17 | 
18 | #x = np.linspace(0,2,100)
19 | #f = lambda x: np.exp(ai*x) * np.cos(bi*x)
20 | x = np.linspace(-2,2,100)
21 | f = lambda x: np.exp(-(ai*a)x**2)
22 | 
23 | pl.figure()
24 | for i in range(10):
25 | 	ai = a[i] 
26 | 	bi = b[i]
27 | 	pl.plot(x,f(x),'k-')
28 | 	pl.xlabel('x')
29 | 	pl.ylabel('f(x)')
30 | 
31 | x1 = 0.5
32 | y1 = 0.5
33 | x2 = 1.5
34 | y2 = 1.0
35 | 
36 | 
37 | a = 1.+0.25*np.random.randn(10000)
38 | #a = 0.25*np.random.randn(10000)
39 | b = 1. + np.random.randn(10000)
40 | 
41 | nbins = 25
42 | pl.figure()
43 | f1 = np.exp(-a**2)
44 | #f1 = np.exp(a) * np.cos(b)
45 | c = pl.hist(f1,bins=nbins)
46 | p = c[0]/np.max(c[0])
47 | pl.figure()
48 | pl.plot(c[1][:nbins],p,'-k')
49 | pl.xlabel('f(1)')
50 | pl.ylabel('Pr(f(1))')
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/Ch2/gaussian.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 2 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Plots of three 2D Gaussians
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | x = np.arange(-5,5,0.01)
17 | s = 1
18 | mu = 0
19 | y = 1/(np.sqrt(2*np.pi)*s) * np.exp(-0.5*(x-mu)**2/s**2)
20 | pl.plot(x,y,'k')
21 | 
22 | pl.close('all')
23 | mu = np.array([2,-3])
24 | s = np.array([1,1])
25 | #s = array([0.5,2])
26 | x = np.random.normal(mu,scale=s,size = (500,2))
27 | pl.plot(x[:,0],x[:,1],'ko')
28 | #axis(array([0,3,-8,4]))
29 | pl.axis('equal')
30 | 
31 | theta = np.arange(0,2.1*np.pi,np.pi/20)
32 | 
33 | pl.plot(mu[0]+2*np.cos(theta),mu[1]+2*np.sin(theta),'k-')
34 | pl.plot(mu[0]+3*np.cos(theta),mu[1]+3*np.sin(theta),'k-')
35 | 
36 | 
37 | pl.figure()
38 | 
39 | mu = np.array([2,-3])
40 | s = np.array([0.5,2])
41 | x = np.random.normal(mu,scale=s,size = (500,2))
42 | phi = 2*np.pi/3
43 | pl.plot(x[:,0]*np.cos(phi)+x[:,1]*np.sin(phi),x[:,0]*(-np.sin(phi)) + x[:,1]*np.cos(phi),'ko')
44 | pl.axis('equal')
45 | 
46 | theta = np.arange(0,2.1*np.pi,np.pi/20)
47 | pl.plot((mu[0]+3*s[0]*np.cos(theta))*np.cos(phi)+(mu[1]+3*s[1]*np.sin(theta))*np.sin(phi), (mu[0]+3*s[0]*np.cos(theta))*np.sin(-phi)+(mu[1]+3*s[1]*np.sin(theta))*np.cos(phi), 'k-')
48 | 
49 | pl.figure()
50 | mu = np.array([2,-3])
51 | s = np.array([0.5,2])
52 | x = np.random.normal(mu,scale=s,size = (500,2))
53 | pl.plot(x[:,0],x[:,1],'ko')
54 | pl.axis('equal')
55 | 
56 | theta = np.arange(0,2.1*np.pi,np.pi/20)
57 | pl.plot(mu[0]+3*s[0]*np.cos(theta),mu[1]+3*s[1]*np.sin(theta), 'k-')
58 | 
59 | pl.show()
60 | 


--------------------------------------------------------------------------------
/Ch2/plotGaussian.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 2 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Plots a 1D Gaussian function
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | gaussian = lambda x: 1/(np.sqrt(2*np.pi)*1.5)*np.exp(-(x-0)**2/(2*(1.5**2)))
16 | x = np.arange(-5,5,0.01)
17 | y = gaussian(x)
18 | pl.ion()
19 | pl.plot(x,y,'k',linewidth=3)
20 | pl.xlabel('x')
21 | pl.ylabel('y(x)')
22 | pl.axis([-5,5,0,0.3])
23 | pl.title('Gaussian Function (mean 0, standard deviation 1.5)')
24 | pl.show()
25 | 


--------------------------------------------------------------------------------
/Ch3/autompg.py:
--------------------------------------------------------------------------------
 1 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 2 | # by Stephen Marsland (http://stephenmonika.net)
 3 | 
 4 | # You are free to use, change, or redistribute the code in any way you wish for
 5 | # non-commercial purposes, but please maintain the name of the original author.
 6 | # This code comes with no warranty of any kind.
 7 | 
 8 | # Stephen Marsland, 2008, 2014
 9 | 
10 | # This is the start of a script for you to complete
11 | import numpy as np
12 | import linreg
13 | 
14 | auto = np.loadtxt('/Users/srmarsla/Book/Datasets/auto-mpg/auto-mpg.data.txt',comments='"')
15 | 
16 | # Separate the data into training and testing sets
17 | 
18 | # Normalise the data
19 | 
20 | # This is the training part
21 | beta = linreg.linreg(trainin,traintgt)
22 | testin = np.concatenate((testin,-np.ones((np.shape(testin)[0],1))),axis=1)
23 | testout = np.dot(testin,beta)
24 | error = np.sum((testout - testtgt)**2)
25 | print error
26 | 


--------------------------------------------------------------------------------
/Ch3/linreg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | 
13 | def linreg(inputs,targets):
14 | 
15 | 	inputs = np.concatenate((inputs,-np.ones((np.shape(inputs)[0],1))),axis=1)
16 | 	beta = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(inputs),inputs)),np.transpose(inputs)),targets)
17 | 
18 | 	outputs = np.dot(inputs,beta)
19 | 	#print shape(beta)
20 | 	#print outputs
21 | 	return beta
22 | 


--------------------------------------------------------------------------------
/Ch3/linreg_logic_eg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Demonstration of the Perceptron and Linear Regressor on the basic logic functions
12 | 
13 | import numpy as np
14 | import linreg
15 | 
16 | inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
17 | testin = np.concatenate((inputs,-np.ones((np.shape(inputs)[0],1))),axis=1)
18 | 
19 | # AND data
20 | ANDtargets = np.array([[0],[0],[0],[1]])
21 | # OR data
22 | ORtargets = np.array([[0],[1],[1],[1]])
23 | # XOR data
24 | XORtargets = np.array([[0],[1],[1],[0]])
25 | 
26 | print "AND data"
27 | ANDbeta = linreg.linreg(inputs,ANDtargets)
28 | ANDout = np.dot(testin,ANDbeta)
29 | print ANDout
30 | 
31 | print "OR data"
32 | ORbeta = linreg.linreg(inputs,ORtargets)
33 | ORout = np.dot(testin,ORbeta)
34 | print ORout
35 | 
36 | print "XOR data"
37 | XORbeta = linreg.linreg(inputs,XORtargets)
38 | XORout = np.dot(testin,XORbeta)
39 | print XORout
40 | 


--------------------------------------------------------------------------------
/Ch3/logic.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Demonstration of the Perceptron and Linear Regressor on the basic logic functions
12 | 
13 | import numpy as np
14 | inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
15 | # AND data
16 | ANDtargets = np.array([[0],[0],[0],[1]])
17 | # OR data
18 | ORtargets = np.array([[0],[1],[1],[1]])
19 | # XOR data
20 | XORtargets = np.array([[0],[1],[1],[0]])
21 | import pcn_logic_eg
22 | 
23 | print "AND logic function"
24 | pAND = pcn_logic_eg.pcn(inputs,ANDtargets)
25 | pAND.pcntrain(inputs,ANDtargets,0.25,6)
26 | 
27 | print "OR logic function"
28 | pOR = pcn_logic_eg.pcn(inputs,ORtargets)
29 | pOR.pcntrain(inputs,ORtargets,0.25,6)
30 | 
31 | print "XOR logic function"
32 | pXOR = pcn_logic_eg.pcn(inputs,XORtargets)
33 | pXOR.pcntrain(inputs,XORtargets,0.25,6)
34 | 


--------------------------------------------------------------------------------
/Ch3/mnist.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | import pylab as pl
11 | import numpy as np
12 | import pcn
13 | import cPickle, gzip
14 | 
15 | # Read the dataset in (code from sheet)
16 | f = gzip.open('mnist.pkl.gz','rb')
17 | tset, vset, teset = cPickle.load(f)
18 | f.close()
19 | 
20 | nread = 200
21 | # Just use the first few images
22 | train_in = tset[0][:nread,:]
23 | 
24 | # This is a little bit of work -- 1 of N encoding
25 | # Make sure you understand how it does it
26 | train_tgt = np.zeros((nread,10))
27 | for i in range(nread):
28 |     train_tgt[i,tset[1][i]] = 1
29 | 
30 | test_in = teset[0][:nread,:]
31 | test_tgt = np.zeros((nread,10))
32 | for i in range(nread):
33 |     test_tgt[i,teset[1][i]] = 1
34 | 
35 | # Train a Perceptron on training set
36 | p = pcn.pcn(train_in, train_tgt)
37 | p.pcntrain(train_in, train_tgt,0.25,100)
38 | 
39 | # This isn't really good practice since it's on the training data, 
40 | # but it does show that it is learning.
41 | p.confmat(train_in,train_tgt)
42 | 
43 | # Now test it
44 | p.confmat(test_in,test_tgt)
45 | 
46 | 


--------------------------------------------------------------------------------
/Ch3/pcn.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | import numpy as np
 12 | 
 13 | class pcn:
 14 | 	""" A basic Perceptron"""
 15 | 	
 16 | 	def __init__(self,inputs,targets):
 17 | 		""" Constructor """
 18 | 		# Set up network size
 19 | 		if np.ndim(inputs)>1:
 20 | 			self.nIn = np.shape(inputs)[1]
 21 | 		else: 
 22 | 			self.nIn = 1
 23 | 	
 24 | 		if np.ndim(targets)>1:
 25 | 			self.nOut = np.shape(targets)[1]
 26 | 		else:
 27 | 			self.nOut = 1
 28 | 
 29 | 		self.nData = np.shape(inputs)[0]
 30 | 	
 31 | 		# Initialise network
 32 | 		self.weights = np.random.rand(self.nIn+1,self.nOut)*0.1-0.05
 33 | 
 34 | 	def pcntrain(self,inputs,targets,eta,nIterations):
 35 | 		""" Train the thing """	
 36 | 		# Add the inputs that match the bias node
 37 | 		inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
 38 | 		# Training
 39 | 		change = range(self.nData)
 40 | 
 41 | 		for n in range(nIterations):
 42 | 			
 43 | 			self.activations = self.pcnfwd(inputs);
 44 | 			self.weights -= eta*np.dot(np.transpose(inputs),self.activations-targets)
 45 | 		
 46 | 			# Randomise order of inputs
 47 | 			#np.random.shuffle(change)
 48 | 			#inputs = inputs[change,:]
 49 | 			#targets = targets[change,:]
 50 | 			
 51 | 		#return self.weights
 52 | 
 53 | 	def pcnfwd(self,inputs):
 54 | 		""" Run the network forward """
 55 | 		# Compute activations
 56 | 		activations =  np.dot(inputs,self.weights)
 57 | 
 58 | 		# Threshold the activations
 59 | 		return np.where(activations>0,1,0)
 60 | 
 61 | 
 62 | 	def confmat(self,inputs,targets):
 63 | 		"""Confusion matrix"""
 64 | 
 65 | 		# Add the inputs that match the bias node
 66 | 		inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
 67 | 		
 68 | 		outputs = np.dot(inputs,self.weights)
 69 | 	
 70 | 		nClasses = np.shape(targets)[1]
 71 | 
 72 | 		if nClasses==1:
 73 | 			nClasses = 2
 74 | 			outputs = np.where(outputs>0,1,0)
 75 | 		else:
 76 | 			# 1-of-N encoding
 77 | 			outputs = np.argmax(outputs,1)
 78 | 			targets = np.argmax(targets,1)
 79 | 
 80 | 		cm = np.zeros((nClasses,nClasses))
 81 | 		for i in range(nClasses):
 82 | 			for j in range(nClasses):
 83 | 				cm[i,j] = np.sum(np.where(outputs==i,1,0)*np.where(targets==j,1,0))
 84 | 
 85 | 		print cm
 86 | 		print np.trace(cm)/np.sum(cm)
 87 | 		
 88 | def logic():
 89 | 	import pcn
 90 | 	""" Run AND and XOR logic functions"""
 91 | 
 92 | 	a = np.array([[0,0,0],[0,1,0],[1,0,0],[1,1,1]])
 93 | 	b = np.array([[0,0,0],[0,1,1],[1,0,1],[1,1,0]])
 94 | 
 95 | 	p = pcn.pcn(a[:,0:2],a[:,2:])
 96 | 	p.pcntrain(a[:,0:2],a[:,2:],0.25,10)
 97 | 	p.confmat(a[:,0:2],a[:,2:])
 98 | 
 99 | 	q = pcn.pcn(b[:,0:2],b[:,2:])
100 | 	q.pcntrain(b[:,0:2],b[:,2:],0.25,10)
101 | 	q.confmat(b[:,0:2],b[:,2:])
102 | 
103 | 


--------------------------------------------------------------------------------
/Ch3/pcn_logic_eg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | 
13 | class pcn:
14 | 	""" A basic Perceptron (the same pcn.py except with the weights printed
15 | 	and it does not reorder the inputs)"""
16 | 	
17 | 	def __init__(self,inputs,targets):
18 | 		""" Constructor """
19 | 		# Set up network size
20 | 		if np.ndim(inputs)>1:
21 | 			self.nIn = np.shape(inputs)[1]
22 | 		else: 
23 | 			self.nIn = 1
24 | 	
25 | 		if np.ndim(targets)>1:
26 | 			self.nOut = np.shape(targets)[1]
27 | 		else:
28 | 			self.nOut = 1
29 | 
30 | 		self.nData = np.shape(inputs)[0]
31 | 	
32 | 		# Initialise network
33 | 		self.weights = np.random.rand(self.nIn+1,self.nOut)*0.1-0.05
34 | 
35 | 	def pcntrain(self,inputs,targets,eta,nIterations):
36 | 		""" Train the thing """	
37 | 		# Add the inputs that match the bias node
38 | 		inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
39 | 	
40 | 		# Training
41 | 		change = range(self.nData)
42 | 
43 | 		for n in range(nIterations):
44 | 			
45 | 			self.activations = self.pcnfwd(inputs);
46 | 			self.weights -= eta*np.dot(np.transpose(inputs),self.activations-targets)
47 | 			print "Iteration: ", n
48 | 			print self.weights
49 | 			
50 | 			activations = self.pcnfwd(inputs)
51 | 			print "Final outputs are:"
52 | 			print activations
53 | 		#return self.weights
54 | 
55 | 	def pcnfwd(self,inputs):
56 | 		""" Run the network forward """
57 | 
58 | 		# Compute activations
59 | 		activations =  np.dot(inputs,self.weights)
60 | 
61 | 		# Threshold the activations
62 | 		return np.where(activations>0,1,0)
63 | 
64 | 	def confmat(self,inputs,targets):
65 | 		"""Confusion matrix"""
66 | 
67 | 		# Add the inputs that match the bias node
68 | 		inputs = np.concatenate((inputs,-np.ones((self.nData,1))),axis=1)
69 | 		outputs = np.dot(inputs,self.weights)
70 | 	
71 | 		nClasses = np.shape(targets)[1]
72 | 
73 | 		if nClasses==1:
74 | 			nClasses = 2
75 | 			outputs = np.where(outputs>0,1,0)
76 | 		else:
77 | 			# 1-of-N encoding
78 | 			outputs = np.argmax(outputs,1)
79 | 			targets = np.argmax(targets,1)
80 | 
81 | 		cm = np.zeros((nClasses,nClasses))
82 | 		for i in range(nClasses):
83 | 			for j in range(nClasses):
84 | 				cm[i,j] = np.sum(np.where(outputs==i,1,0)*np.where(targets==j,1,0))
85 | 
86 | 		print cm
87 | 		print np.trace(cm)/np.sum(cm)
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/Ch3/pima.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 3 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Demonstration of the Perceptron on the Pima Indian dataset
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | import pcn
16 | 
17 | pima = np.loadtxt('/Users/srmarsla/Book/Datasets/pima/pima-indians-diabetes.data',delimiter=',')
18 | 
19 | # Plot the first and second values for the two classes
20 | indices0 = np.where(pima[:,8]==0)
21 | indices1 = np.where(pima[:,8]==1)
22 | 
23 | pl.ion()
24 | pl.plot(pima[indices0,0],pima[indices0,1],'go')
25 | pl.plot(pima[indices1,0],pima[indices1,1],'rx')
26 | 
27 | # Perceptron training on the original dataset
28 | print "Output on original data"
29 | p = pcn.pcn(pima[:,:8],pima[:,8:9])
30 | p.pcntrain(pima[:,:8],pima[:,8:9],0.25,100)
31 | p.confmat(pima[:,:8],pima[:,8:9])
32 | 
33 | # Various preprocessing steps
34 | pima[np.where(pima[:,0]>8),0] = 8
35 | 
36 | pima[np.where(pima[:,7]<=30),7] = 1
37 | pima[np.where((pima[:,7]>30) & (pima[:,7]<=40)),7] = 2
38 | pima[np.where((pima[:,7]>40) & (pima[:,7]<=50)),7] = 3
39 | pima[np.where((pima[:,7]>50) & (pima[:,7]<=60)),7] = 4
40 | pima[np.where(pima[:,7]>60),7] = 5
41 | 
42 | pima[:,:8] = pima[:,:8]-pima[:,:8].mean(axis=0)
43 | pima[:,:8] = pima[:,:8]/pima[:,:8].var(axis=0)
44 | 
45 | #print pima.mean(axis=0)
46 | #print pima.var(axis=0)
47 | #print pima.max(axis=0)
48 | #print pima.min(axis=0)
49 | 
50 | trainin = pima[::2,:8]
51 | testin = pima[1::2,:8]
52 | traintgt = pima[::2,8:9]
53 | testtgt = pima[1::2,8:9]
54 | 
55 | # Perceptron training on the preprocessed dataset
56 | print "Output after preprocessing of data"
57 | p1 = pcn.pcn(trainin,traintgt)
58 | p1.pcntrain(trainin,traintgt,0.25,100)
59 | p1.confmat(testin,testtgt)
60 | 
61 | 
62 | 
63 | pl.show()
64 | 


--------------------------------------------------------------------------------
/Ch4/PNOz.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 4 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Palmerston North Ozone time series example
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | PNoz = np.loadtxt('PNOz.dat')
17 | pl.ion()
18 | pl.plot(np.arange(np.shape(PNoz)[0]),PNoz[:,2],'.')
19 | pl.xlabel('Time (Days)')
20 | pl.ylabel('Ozone (Dobson units)')
21 | 
22 | # Normalise data
23 | PNoz[:,2] = PNoz[:,2]-PNoz[:,2].mean()
24 | PNoz[:,2] = PNoz[:,2]/PNoz[:,2].max()
25 | 
26 | # Assemble input vectors
27 | t = 2
28 | k = 3
29 | 
30 | lastPoint = np.shape(PNoz)[0]-t*(k+1)
31 | inputs = np.zeros((lastPoint,k))
32 | targets = np.zeros((lastPoint,1))
33 | for i in range(lastPoint):
34 |     inputs[i,:] = PNoz[i:i+t*k:t,2]
35 |     targets[i] = PNoz[i+t*(k+1),2]
36 |     
37 | test = inputs[-400:,:]
38 | testtargets = targets[-400:]
39 | train = inputs[:-400:2,:]
40 | traintargets = targets[:-400:2]
41 | valid = inputs[1:-400:2,:]
42 | validtargets = targets[1:-400:2]
43 | 
44 | # Randomly order the data
45 | change = range(np.shape(inputs)[0])
46 | np.random.shuffle(change)
47 | inputs = inputs[change,:]
48 | targets = targets[change,:]
49 | 
50 | # Train the network
51 | import mlp
52 | net = mlp.mlp(train,traintargets,3,outtype='linear')
53 | net.earlystopping(train,traintargets,valid,validtargets,0.25)
54 | 
55 | test = np.concatenate((test,-np.ones((np.shape(test)[0],1))),axis=1)
56 | testout = net.mlpfwd(test)
57 | 
58 | pl.figure()
59 | pl.plot(np.arange(np.shape(test)[0]),testout,'.')
60 | pl.plot(np.arange(np.shape(test)[0]),testtargets,'x')
61 | pl.legend(('Predictions','Targets'))
62 | print 0.5*np.sum((testtargets-testout)**2)
63 | pl.show()
64 | 


--------------------------------------------------------------------------------
/Ch4/iris.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 4 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The iris classification example
12 | 
13 | def preprocessIris(infile,outfile):
14 | 
15 |     stext1 = 'Iris-setosa'
16 |     stext2 = 'Iris-versicolor'
17 |     stext3 = 'Iris-virginica'
18 |     rtext1 = '0'
19 |     rtext2 = '1'
20 |     rtext3 = '2'
21 | 
22 |     fid = open(infile,"r")
23 |     oid = open(outfile,"w")
24 | 
25 |     for s in fid:
26 |         if s.find(stext1)>-1:
27 |             oid.write(s.replace(stext1, rtext1))
28 |         elif s.find(stext2)>-1:
29 |             oid.write(s.replace(stext2, rtext2))
30 |         elif s.find(stext3)>-1:
31 |             oid.write(s.replace(stext3, rtext3))
32 |     fid.close()
33 |     oid.close()
34 | 
35 | import numpy as np
36 | # Preprocessor to remove the test (only needed once)
37 | #preprocessIris('/Users/srmarsla/Book/Datasets/Iris/iris.data','iris_proc.data')
38 | 
39 | iris = np.loadtxt('iris_proc.data',delimiter=',')
40 | iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
41 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),np.abs(iris.min(axis=0)*np.ones((1,5)))),axis=0).max(axis=0)
42 | iris[:,:4] = iris[:,:4]/imax[:4]
43 | print iris[0:5,:]
44 | 
45 | # Split into training, validation, and test sets
46 | target = np.zeros((np.shape(iris)[0],3));
47 | indices = np.where(iris[:,4]==0) 
48 | target[indices,0] = 1
49 | indices = np.where(iris[:,4]==1)
50 | target[indices,1] = 1
51 | indices = np.where(iris[:,4]==2)
52 | target[indices,2] = 1
53 | 
54 | # Randomly order the data
55 | order = range(np.shape(iris)[0])
56 | np.random.shuffle(order)
57 | iris = iris[order,:]
58 | target = target[order,:]
59 | 
60 | train = iris[::2,0:4]
61 | traint = target[::2]
62 | valid = iris[1::4,0:4]
63 | validt = target[1::4]
64 | test = iris[3::4,0:4]
65 | testt = target[3::4]
66 | 
67 | #print train.max(axis=0), train.min(axis=0)
68 | 
69 | # Train the network
70 | import mlp
71 | net = mlp.mlp(train,traint,5,outtype='logistic')
72 | net.earlystopping(train,traint,valid,validt,0.1)
73 | net.confmat(test,testt)
74 | 


--------------------------------------------------------------------------------
/Ch4/iris_proc.data:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,0
  2 | 4.9,3.0,1.4,0.2,0
  3 | 4.7,3.2,1.3,0.2,0
  4 | 4.6,3.1,1.5,0.2,0
  5 | 5.0,3.6,1.4,0.2,0
  6 | 5.4,3.9,1.7,0.4,0
  7 | 4.6,3.4,1.4,0.3,0
  8 | 5.0,3.4,1.5,0.2,0
  9 | 4.4,2.9,1.4,0.2,0
 10 | 4.9,3.1,1.5,0.1,0
 11 | 5.4,3.7,1.5,0.2,0
 12 | 4.8,3.4,1.6,0.2,0
 13 | 4.8,3.0,1.4,0.1,0
 14 | 4.3,3.0,1.1,0.1,0
 15 | 5.8,4.0,1.2,0.2,0
 16 | 5.7,4.4,1.5,0.4,0
 17 | 5.4,3.9,1.3,0.4,0
 18 | 5.1,3.5,1.4,0.3,0
 19 | 5.7,3.8,1.7,0.3,0
 20 | 5.1,3.8,1.5,0.3,0
 21 | 5.4,3.4,1.7,0.2,0
 22 | 5.1,3.7,1.5,0.4,0
 23 | 4.6,3.6,1.0,0.2,0
 24 | 5.1,3.3,1.7,0.5,0
 25 | 4.8,3.4,1.9,0.2,0
 26 | 5.0,3.0,1.6,0.2,0
 27 | 5.0,3.4,1.6,0.4,0
 28 | 5.2,3.5,1.5,0.2,0
 29 | 5.2,3.4,1.4,0.2,0
 30 | 4.7,3.2,1.6,0.2,0
 31 | 4.8,3.1,1.6,0.2,0
 32 | 5.4,3.4,1.5,0.4,0
 33 | 5.2,4.1,1.5,0.1,0
 34 | 5.5,4.2,1.4,0.2,0
 35 | 4.9,3.1,1.5,0.1,0
 36 | 5.0,3.2,1.2,0.2,0
 37 | 5.5,3.5,1.3,0.2,0
 38 | 4.9,3.1,1.5,0.1,0
 39 | 4.4,3.0,1.3,0.2,0
 40 | 5.1,3.4,1.5,0.2,0
 41 | 5.0,3.5,1.3,0.3,0
 42 | 4.5,2.3,1.3,0.3,0
 43 | 4.4,3.2,1.3,0.2,0
 44 | 5.0,3.5,1.6,0.6,0
 45 | 5.1,3.8,1.9,0.4,0
 46 | 4.8,3.0,1.4,0.3,0
 47 | 5.1,3.8,1.6,0.2,0
 48 | 4.6,3.2,1.4,0.2,0
 49 | 5.3,3.7,1.5,0.2,0
 50 | 5.0,3.3,1.4,0.2,0
 51 | 7.0,3.2,4.7,1.4,1
 52 | 6.4,3.2,4.5,1.5,1
 53 | 6.9,3.1,4.9,1.5,1
 54 | 5.5,2.3,4.0,1.3,1
 55 | 6.5,2.8,4.6,1.5,1
 56 | 5.7,2.8,4.5,1.3,1
 57 | 6.3,3.3,4.7,1.6,1
 58 | 4.9,2.4,3.3,1.0,1
 59 | 6.6,2.9,4.6,1.3,1
 60 | 5.2,2.7,3.9,1.4,1
 61 | 5.0,2.0,3.5,1.0,1
 62 | 5.9,3.0,4.2,1.5,1
 63 | 6.0,2.2,4.0,1.0,1
 64 | 6.1,2.9,4.7,1.4,1
 65 | 5.6,2.9,3.6,1.3,1
 66 | 6.7,3.1,4.4,1.4,1
 67 | 5.6,3.0,4.5,1.5,1
 68 | 5.8,2.7,4.1,1.0,1
 69 | 6.2,2.2,4.5,1.5,1
 70 | 5.6,2.5,3.9,1.1,1
 71 | 5.9,3.2,4.8,1.8,1
 72 | 6.1,2.8,4.0,1.3,1
 73 | 6.3,2.5,4.9,1.5,1
 74 | 6.1,2.8,4.7,1.2,1
 75 | 6.4,2.9,4.3,1.3,1
 76 | 6.6,3.0,4.4,1.4,1
 77 | 6.8,2.8,4.8,1.4,1
 78 | 6.7,3.0,5.0,1.7,1
 79 | 6.0,2.9,4.5,1.5,1
 80 | 5.7,2.6,3.5,1.0,1
 81 | 5.5,2.4,3.8,1.1,1
 82 | 5.5,2.4,3.7,1.0,1
 83 | 5.8,2.7,3.9,1.2,1
 84 | 6.0,2.7,5.1,1.6,1
 85 | 5.4,3.0,4.5,1.5,1
 86 | 6.0,3.4,4.5,1.6,1
 87 | 6.7,3.1,4.7,1.5,1
 88 | 6.3,2.3,4.4,1.3,1
 89 | 5.6,3.0,4.1,1.3,1
 90 | 5.5,2.5,4.0,1.3,1
 91 | 5.5,2.6,4.4,1.2,1
 92 | 6.1,3.0,4.6,1.4,1
 93 | 5.8,2.6,4.0,1.2,1
 94 | 5.0,2.3,3.3,1.0,1
 95 | 5.6,2.7,4.2,1.3,1
 96 | 5.7,3.0,4.2,1.2,1
 97 | 5.7,2.9,4.2,1.3,1
 98 | 6.2,2.9,4.3,1.3,1
 99 | 5.1,2.5,3.0,1.1,1
100 | 5.7,2.8,4.1,1.3,1
101 | 6.3,3.3,6.0,2.5,2
102 | 5.8,2.7,5.1,1.9,2
103 | 7.1,3.0,5.9,2.1,2
104 | 6.3,2.9,5.6,1.8,2
105 | 6.5,3.0,5.8,2.2,2
106 | 7.6,3.0,6.6,2.1,2
107 | 4.9,2.5,4.5,1.7,2
108 | 7.3,2.9,6.3,1.8,2
109 | 6.7,2.5,5.8,1.8,2
110 | 7.2,3.6,6.1,2.5,2
111 | 6.5,3.2,5.1,2.0,2
112 | 6.4,2.7,5.3,1.9,2
113 | 6.8,3.0,5.5,2.1,2
114 | 5.7,2.5,5.0,2.0,2
115 | 5.8,2.8,5.1,2.4,2
116 | 6.4,3.2,5.3,2.3,2
117 | 6.5,3.0,5.5,1.8,2
118 | 7.7,3.8,6.7,2.2,2
119 | 7.7,2.6,6.9,2.3,2
120 | 6.0,2.2,5.0,1.5,2
121 | 6.9,3.2,5.7,2.3,2
122 | 5.6,2.8,4.9,2.0,2
123 | 7.7,2.8,6.7,2.0,2
124 | 6.3,2.7,4.9,1.8,2
125 | 6.7,3.3,5.7,2.1,2
126 | 7.2,3.2,6.0,1.8,2
127 | 6.2,2.8,4.8,1.8,2
128 | 6.1,3.0,4.9,1.8,2
129 | 6.4,2.8,5.6,2.1,2
130 | 7.2,3.0,5.8,1.6,2
131 | 7.4,2.8,6.1,1.9,2
132 | 7.9,3.8,6.4,2.0,2
133 | 6.4,2.8,5.6,2.2,2
134 | 6.3,2.8,5.1,1.5,2
135 | 6.1,2.6,5.6,1.4,2
136 | 7.7,3.0,6.1,2.3,2
137 | 6.3,3.4,5.6,2.4,2
138 | 6.4,3.1,5.5,1.8,2
139 | 6.0,3.0,4.8,1.8,2
140 | 6.9,3.1,5.4,2.1,2
141 | 6.7,3.1,5.6,2.4,2
142 | 6.9,3.1,5.1,2.3,2
143 | 5.8,2.7,5.1,1.9,2
144 | 6.8,3.2,5.9,2.3,2
145 | 6.7,3.3,5.7,2.5,2
146 | 6.7,3.0,5.2,2.3,2
147 | 6.3,2.5,5.0,1.9,2
148 | 6.5,3.0,5.2,2.0,2
149 | 6.2,3.4,5.4,2.3,2
150 | 5.9,3.0,5.1,1.8,2
151 | 


--------------------------------------------------------------------------------
/Ch4/logic.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 4 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | import mlp
13 | 
14 | anddata = np.array([[0,0,0],[0,1,0],[1,0,0],[1,1,1]])
15 | xordata = np.array([[0,0,0],[0,1,1],[1,0,1],[1,1,0]])
16 | 
17 | p = mlp.mlp(anddata[:,0:2],anddata[:,2:3],2)
18 | p.mlptrain(anddata[:,0:2],anddata[:,2:3],0.25,1001)
19 | p.confmat(anddata[:,0:2],anddata[:,2:3])
20 | 
21 | q = mlp.mlp(xordata[:,0:2],xordata[:,2:3],2,outtype='logistic')
22 | q.mlptrain(xordata[:,0:2],xordata[:,2:3],0.25,5001)
23 | q.confmat(xordata[:,0:2],xordata[:,2:3])
24 | 
25 | #anddata = array([[0,0,1,0],[0,1,1,0],[1,0,1,0],[1,1,0,1]])
26 | #xordata = array([[0,0,1,0],[0,1,0,1],[1,0,0,1],[1,1,1,0]])
27 | #
28 | #p = mlp.mlp(anddata[:,0:2],anddata[:,2:4],2,outtype='linear')
29 | #p.mlptrain(anddata[:,0:2],anddata[:,2:4],0.25,1001)
30 | #p.confmat(anddata[:,0:2],anddata[:,2:4])
31 | #
32 | #q = mlp.mlp(xordata[:,0:2],xordata[:,2:4],2,outtype='linear')
33 | #q.mlptrain(xordata[:,0:2],xordata[:,2:4],0.15,5001)
34 | #q.confmat(xordata[:,0:2],xordata[:,2:4])
35 | 


--------------------------------------------------------------------------------
/Ch4/mlp.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 4 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | import numpy as np
 12 | 
 13 | class mlp:
 14 |     """ A Multi-Layer Perceptron"""
 15 |     
 16 |     def __init__(self,inputs,targets,nhidden,beta=1,momentum=0.9,outtype='logistic'):
 17 |         """ Constructor """
 18 |         # Set up network size
 19 |         self.nin = np.shape(inputs)[1]
 20 |         self.nout = np.shape(targets)[1]
 21 |         self.ndata = np.shape(inputs)[0]
 22 |         self.nhidden = nhidden
 23 | 
 24 |         self.beta = beta
 25 |         self.momentum = momentum
 26 |         self.outtype = outtype
 27 |     
 28 |         # Initialise network
 29 |         self.weights1 = (np.random.rand(self.nin+1,self.nhidden)-0.5)*2/np.sqrt(self.nin)
 30 |         self.weights2 = (np.random.rand(self.nhidden+1,self.nout)-0.5)*2/np.sqrt(self.nhidden)
 31 | 
 32 |     def earlystopping(self,inputs,targets,valid,validtargets,eta,niterations=100):
 33 |     
 34 |         valid = np.concatenate((valid,-np.ones((np.shape(valid)[0],1))),axis=1)
 35 |         
 36 |         old_val_error1 = 100002
 37 |         old_val_error2 = 100001
 38 |         new_val_error = 100000
 39 |         
 40 |         count = 0
 41 |         while (((old_val_error1 - new_val_error) > 0.001) or ((old_val_error2 - old_val_error1)>0.001)):
 42 |             count+=1
 43 |             print count
 44 |             self.mlptrain(inputs,targets,eta,niterations)
 45 |             old_val_error2 = old_val_error1
 46 |             old_val_error1 = new_val_error
 47 |             validout = self.mlpfwd(valid)
 48 |             new_val_error = 0.5*np.sum((validtargets-validout)**2)
 49 |             
 50 |         print "Stopped", new_val_error,old_val_error1, old_val_error2
 51 |         return new_val_error
 52 |     	
 53 |     def mlptrain(self,inputs,targets,eta,niterations):
 54 |         """ Train the thing """    
 55 |         # Add the inputs that match the bias node
 56 |         inputs = np.concatenate((inputs,-np.ones((self.ndata,1))),axis=1)
 57 |         change = range(self.ndata)
 58 |     
 59 |         updatew1 = np.zeros((np.shape(self.weights1)))
 60 |         updatew2 = np.zeros((np.shape(self.weights2)))
 61 |             
 62 |         for n in range(niterations):
 63 |     
 64 |             self.outputs = self.mlpfwd(inputs)
 65 | 
 66 |             error = 0.5*np.sum((self.outputs-targets)**2)
 67 |             if (np.mod(n,100)==0):
 68 |                 print "Iteration: ",n, " Error: ",error    
 69 | 
 70 |             # Different types of output neurons
 71 |             if self.outtype == 'linear':
 72 |             	deltao = (self.outputs-targets)/self.ndata
 73 |             elif self.outtype == 'logistic':
 74 |             	deltao = self.beta*(self.outputs-targets)*self.outputs*(1.0-self.outputs)
 75 |             elif self.outtype == 'softmax':
 76 |                 deltao = (self.outputs-targets)*(self.outputs*(-self.outputs)+self.outputs)/self.ndata 
 77 |             else:
 78 |             	print "error"
 79 |             
 80 |             deltah = self.hidden*self.beta*(1.0-self.hidden)*(np.dot(deltao,np.transpose(self.weights2)))
 81 |                       
 82 |             updatew1 = eta*(np.dot(np.transpose(inputs),deltah[:,:-1])) + self.momentum*updatew1
 83 |             updatew2 = eta*(np.dot(np.transpose(self.hidden),deltao)) + self.momentum*updatew2
 84 |             self.weights1 -= updatew1
 85 |             self.weights2 -= updatew2
 86 |                 
 87 |             # Randomise order of inputs (not necessary for matrix-based calculation)
 88 |             #np.random.shuffle(change)
 89 |             #inputs = inputs[change,:]
 90 |             #targets = targets[change,:]
 91 |             
 92 |     def mlpfwd(self,inputs):
 93 |         """ Run the network forward """
 94 | 
 95 |         self.hidden = np.dot(inputs,self.weights1);
 96 |         self.hidden = 1.0/(1.0+np.exp(-self.beta*self.hidden))
 97 |         self.hidden = np.concatenate((self.hidden,-np.ones((np.shape(inputs)[0],1))),axis=1)
 98 | 
 99 |         outputs = np.dot(self.hidden,self.weights2);
100 | 
101 |         # Different types of output neurons
102 |         if self.outtype == 'linear':
103 |         	return outputs
104 |         elif self.outtype == 'logistic':
105 |             return 1.0/(1.0+np.exp(-self.beta*outputs))
106 |         elif self.outtype == 'softmax':
107 |             normalisers = np.sum(np.exp(outputs),axis=1)*np.ones((1,np.shape(outputs)[0]))
108 |             return np.transpose(np.transpose(np.exp(outputs))/normalisers)
109 |         else:
110 |             print "error"
111 | 
112 |     def confmat(self,inputs,targets):
113 |         """Confusion matrix"""
114 | 
115 |         # Add the inputs that match the bias node
116 |         inputs = np.concatenate((inputs,-np.ones((np.shape(inputs)[0],1))),axis=1)
117 |         outputs = self.mlpfwd(inputs)
118 |         
119 |         nclasses = np.shape(targets)[1]
120 | 
121 |         if nclasses==1:
122 |             nclasses = 2
123 |             outputs = np.where(outputs>0.5,1,0)
124 |         else:
125 |             # 1-of-N encoding
126 |             outputs = np.argmax(outputs,1)
127 |             targets = np.argmax(targets,1)
128 | 
129 |         cm = np.zeros((nclasses,nclasses))
130 |         for i in range(nclasses):
131 |             for j in range(nclasses):
132 |                 cm[i,j] = np.sum(np.where(outputs==i,1,0)*np.where(targets==j,1,0))
133 | 
134 |         print "Confusion matrix is:"
135 |         print cm
136 |         print "Percentage Correct: ",np.trace(cm)/np.sum(cm)*100
137 | 


--------------------------------------------------------------------------------
/Ch4/mnist.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 4 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | import pylab as pl
11 | import numpy as np
12 | import mlp
13 | import cPickle, gzip
14 | 
15 | # Read the dataset in (code from sheet)
16 | f = gzip.open('../2 Linear/mnist.pkl.gz','rb')
17 | tset, vset, teset = cPickle.load(f)
18 | f.close()
19 | 
20 | nread = 200
21 | # Just use the first few images
22 | train_in = tset[0][:nread,:]
23 | 
24 | # This is a little bit of work -- 1 of N encoding
25 | # Make sure you understand how it does it
26 | train_tgt = np.zeros((nread,10))
27 | for i in range(nread):
28 |     train_tgt[i,tset[1][i]] = 1
29 | 
30 | test_in = teset[0][:nread,:]
31 | test_tgt = np.zeros((nread,10))
32 | for i in range(nread):
33 |     test_tgt[i,teset[1][i]] = 1
34 | 
35 | # We will need the validation set
36 | valid_in = vset[0][:nread,:]
37 | valid_tgt = np.zeros((nread,10))
38 | for i in range(nread):
39 |     valid_tgt[i,vset[1][i]] = 1
40 | 
41 | for i in [1,2,5,10,20]:  
42 |     print "----- "+str(i)  
43 |     net = mlp.mlp(train_in,train_tgt,i,outtype='softmax')
44 |     net.earlystopping(train_in,train_tgt,valid_in,valid_tgt,0.1)
45 |     net.confmat(test_in,test_tgt)
46 | 


--------------------------------------------------------------------------------
/Ch4/sinewave.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 4 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The sinewave regression example
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | # Set up the data
17 | x = np.linspace(0,1,40).reshape((40,1))
18 | t = np.sin(2*np.pi*x) + np.cos(4*np.pi*x) + np.random.randn(40).reshape((40,1))*0.2
19 | x = (x-0.5)*2
20 | 
21 | # Split into training, testing, and validation sets
22 | train = x[0::2,:]
23 | test = x[1::4,:]
24 | valid = x[3::4,:]
25 | traintarget = t[0::2,:]
26 | testtarget = t[1::4,:]
27 | validtarget = t[3::4,:]
28 | 
29 | # Plot the data
30 | pl.plot(x,t,'o')
31 | pl.xlabel('x')
32 | pl.ylabel('t')
33 | 
34 | # Perform basic training with a small MLP
35 | import mlp
36 | net = mlp.mlp(train,traintarget,3,outtype='linear')
37 | net.mlptrain(train,traintarget,0.25,101)
38 | 
39 | # Use early stopping
40 | net.earlystopping(train,traintarget,valid,validtarget,0.25)
41 | 
42 | # Test out different sizes of network
43 | #count = 0
44 | #out = zeros((10,7))
45 | #for nnodes in [1,2,3,5,10,25,50]:
46 | #    for i in range(10):
47 | #        net = mlp.mlp(train,traintarget,nnodes,outtype='linear')
48 | #        out[i,count] = net.earlystopping(train,traintarget,valid,validtarget,0.25)
49 | #    count += 1
50 | #    
51 | #test = concatenate((test,-ones((shape(test)[0],1))),axis=1)
52 | #outputs = net.mlpfwd(test)
53 | #print 0.5*sum((outputs-testtarget)**2)
54 | #
55 | #print out
56 | #print out.mean(axis=0)
57 | #print out.var(axis=0)
58 | #print out.max(axis=0)
59 | #print out.min(axis=0)
60 | 
61 | pl.show()
62 | 


--------------------------------------------------------------------------------
/Ch5/iris.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 5 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | 
13 | iris = np.loadtxt('../3 MLP/iris_proc.data',delimiter=',')
14 | iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
15 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
16 | iris[:,:4] = iris[:,:4]/imax[:4]
17 | #print iris[0:5,:]
18 | 
19 | #target = zeros((shape(iris)[0],2));
20 | #indices = where(iris[:,4]==0) 
21 | #target[indices,0] = 1
22 | #indices = where(iris[:,4]==1)
23 | #target[indices,1] = 1
24 | #indices = where(iris[:,4]==2)
25 | #target[indices,0] = 1
26 | #target[indices,1] = 1
27 | 
28 | target = np.zeros((np.shape(iris)[0],3));
29 | indices = np.where(iris[:,4]==0) 
30 | target[indices,0] = 1
31 | indices = np.where(iris[:,4]==1)
32 | target[indices,1] = 1
33 | indices = np.where(iris[:,4]==2)
34 | target[indices,2] = 1
35 | 
36 | 
37 | order = range(np.shape(iris)[0])
38 | np.random.shuffle(order)
39 | iris = iris[order,:]
40 | target = target[order,:]
41 | 
42 | train = iris[::2,0:4]
43 | traint = target[::2]
44 | valid = iris[1::4,0:4]
45 | validt = target[1::4]
46 | test = iris[3::4,0:4]
47 | testt = target[3::4]
48 | 
49 | #print train.max(axis=0), train.min(axis=0)
50 | 
51 | import rbf
52 | net = rbf.rbf(train,traint,5,1,1)
53 | 
54 | net.rbftrain(train,traint,0.25,2000)
55 | #net.confmat(train,traint)
56 | net.confmat(test,testt)
57 | 


--------------------------------------------------------------------------------
/Ch5/least_squares.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 5 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import pylab as pl
12 | import numpy as np
13 | 
14 | x = np.arange(-3,10,0.05)
15 | y = 2.5 * np.exp(-(x)**2/9) + 3.2 * np.exp(-(x-0.5)**2/4) + np.random.normal(0.0, 1.0, len(x))
16 | nParam = 2
17 | A = np.zeros((len(x),nParam), float)
18 | A[:,0] = np.exp(-(x)**2/9)
19 | A[:,1] = np.exp(-(x*0.5)**2/4)
20 | (p, residuals, rank, s) = np.linalg.lstsq(A,y)
21 | 
22 | print p
23 | pl.ion()
24 | pl.plot(x,y,'.')
25 | pl.plot(x,p[0]*A[:,0]+p[1]*A[:,1],'x')
26 | 
27 | pl.show()
28 | 


--------------------------------------------------------------------------------
/Ch5/rbf.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 5 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | import numpy as np
 12 | import pcn
 13 | import kmeans
 14 | 
 15 | class rbf:
 16 |     """ The Radial Basis Function network
 17 |     Parameters are number of RBFs, and their width, how to train the network 
 18 |     (pseudo-inverse or kmeans) and whether the RBFs are normalised"""
 19 | 
 20 |     def __init__(self,inputs,targets,nRBF,sigma=0,usekmeans=0,normalise=0):
 21 |         self.nin = np.shape(inputs)[1]
 22 |         self.nout = np.shape(targets)[1]
 23 |         self.ndata = np.shape(inputs)[0]
 24 |         self.nRBF = nRBF
 25 |         self.usekmeans = usekmeans
 26 |         self.normalise = normalise
 27 |         
 28 |         if usekmeans:
 29 |             self.kmeansnet = kmeans.kmeans(self.nRBF,inputs)
 30 |             
 31 |         self.hidden = np.zeros((self.ndata,self.nRBF+1))
 32 |         
 33 |         if sigma==0:
 34 |             # Set width of Gaussians
 35 |             d = (inputs.max(axis=0)-inputs.min(axis=0)).max()
 36 |             self.sigma = d/np.sqrt(2*nRBF)  
 37 |         else:
 38 |             self.sigma = sigma
 39 |                 
 40 |         self.perceptron = pcn.pcn(self.hidden[:,:-1],targets)
 41 |         
 42 |         # Initialise network
 43 |         self.weights1 = np.zeros((self.nin,self.nRBF))
 44 |         
 45 |     def rbftrain(self,inputs,targets,eta=0.25,niterations=100):
 46 |                 
 47 |         if self.usekmeans==0:
 48 |             # Version 1: set RBFs to be datapoints
 49 |             indices = range(self.ndata)
 50 |             np.random.shuffle(indices)
 51 |             for i in range(self.nRBF):
 52 |                 self.weights1[:,i] = inputs[indices[i],:]
 53 |         else:
 54 |             # Version 2: use k-means
 55 |             self.weights1 = np.transpose(self.kmeansnet.kmeanstrain(inputs))
 56 | 
 57 |         for i in range(self.nRBF):
 58 |             self.hidden[:,i] = np.exp(-np.sum((inputs - np.ones((1,self.nin))*self.weights1[:,i])**2,axis=1)/(2*self.sigma**2))
 59 |         if self.normalise:
 60 |             self.hidden[:,:-1] /= np.transpose(np.ones((1,np.shape(self.hidden)[0]))*self.hidden[:,:-1].sum(axis=1))
 61 |         
 62 |         # Call Perceptron without bias node (since it adds its own)
 63 |         self.perceptron.pcntrain(self.hidden[:,:-1],targets,eta,niterations)
 64 |         
 65 |     def rbffwd(self,inputs):
 66 | 
 67 |         hidden = np.zeros((np.shape(inputs)[0],self.nRBF+1))
 68 | 
 69 |         for i in range(self.nRBF):
 70 |             hidden[:,i] = np.exp(-np.sum((inputs - np.ones((1,self.nin))*self.weights1[:,i])**2,axis=1)/(2*self.sigma**2))
 71 | 
 72 |         if self.normalise:
 73 |             hidden[:,:-1] /= np.transpose(ones((1,np.shape(hidden)[0]))*hidden[:,:-1].sum(axis=1))
 74 |         
 75 |         # Add the bias
 76 |         hidden[:,-1] = -1
 77 | 
 78 |         outputs = self.perceptron.pcnfwd(hidden)
 79 |         return outputs
 80 |     
 81 |     def confmat(self,inputs,targets):
 82 |         """Confusion matrix"""
 83 | 
 84 |         outputs = self.rbffwd(inputs)
 85 |         nClasses = np.shape(targets)[1]
 86 | 
 87 |         if nClasses==1:
 88 |             nClasses = 2
 89 |             outputs = np.where(outputs>0,1,0)
 90 |         else:
 91 |             # 1-of-N encoding
 92 |             outputs = np.argmax(outputs,1)
 93 |             targets = np.argmax(targets,1)
 94 | 
 95 |         cm = np.zeros((nClasses,nClasses))
 96 |         for i in range(nClasses):
 97 |             for j in range(nClasses):
 98 |                 cm[i,j] = np.sum(np.where(outputs==i,1,0)*np.where(targets==j,1,0))
 99 | 
100 |         print cm
101 |         print np.trace(cm)/np.sum(cm)
102 | 


--------------------------------------------------------------------------------
/Ch6/ecoli.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Simple example of LDA, PCA, and kernel PCA, on the Wine and e-coli datasets
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | #wine = np.loadtxt('../9 Unsupervised/wine.data',delimiter=',')
16 | #
17 | #labels = wine[:,0]
18 | #data = wine[:,1:]
19 | #data -= np.mean(data,axis=0)
20 | #data /= data.max(axis=0)
21 | 
22 | ecoli = np.loadtxt('../9 Unsupervised/shortecoli.data')
23 | labels = ecoli[:,7:]
24 | data = ecoli[:,:7]
25 | data -= np.mean(data,axis=0)
26 | data /= data.max(axis=0)
27 | 
28 | order = range(np.shape(data)[0])
29 | np.random.shuffle(order)
30 | data = data[order]
31 | w0 = np.where(labels==1)
32 | w1 = np.where(labels==2)
33 | w2 = np.where(labels==3)
34 | 
35 | import lda
36 | newData,w = lda.lda(data,labels,2)
37 | 
38 | pl.plot(data[w0,0],data[w0,1],'ok')
39 | pl.plot(data[w1,0],data[w1,1],'^k')
40 | pl.plot(data[w2,0],data[w2,1],'vk')
41 | pl.axis([-1.5,1.8,-1.5,1.8])
42 | pl.axis('off')
43 | pl.figure(2)
44 | pl.plot(newData[w0,0],newData[w0,1],'ok')
45 | pl.plot(newData[w1,0],newData[w1,1],'^k')
46 | pl.plot(newData[w2,0],newData[w2,1],'vk')
47 | pl.axis([-1.5,1.8,-1.5,1.8])
48 | pl.axis('off')
49 | 
50 | import pca
51 | x,y,evals,evecs = pca.pca(data,2)
52 | pl.figure(3)
53 | pl.plot(y[w0,0],y[w0,1],'ok')
54 | pl.plot(y[w1,0],y[w1,1],'^k')
55 | pl.plot(y[w2,0],y[w2,1],'vk')
56 | pl.axis('off')
57 | 
58 | import kernelpca
59 | newData = kernelpca.kernelpca(data,'gaussian',2)
60 | pl.figure(4)
61 | pl.plot(newData[w0,0],newData[w0,1],'ok')
62 | pl.plot(newData[w1,0],newData[w1,1],'^k')
63 | pl.plot(newData[w2,0],newData[w2,1],'vk')
64 | pl.axis('off')
65 | 
66 | pl.show()
67 | 


--------------------------------------------------------------------------------
/Ch6/factoranalysis.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Factor Analysis algorithm
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | def factoranalysis(y,nRedDim):
16 |     Ndata = np.shape(y)[0]
17 |     N = np.shape(y)[1]
18 |  
19 |     y = y-y.mean(axis=0)
20 |     C = np.cov(np.transpose(y))    
21 |     Cd = C.diagonal()
22 |     Psi = Cd
23 |     scaling = np.linalg.det(C)**(1./N)
24 |     
25 |     W = np.random.normal(0,np.sqrt(scaling/nRedDim),(N,nRedDim))
26 | 
27 |     nits = 1000
28 |     oldL = -np.inf
29 | 
30 |     for i in range(nits):    
31 |     
32 |         # E-step
33 |         A = np.dot(W,np.transpose(W)) + np.diag(Psi)
34 |         logA = np.log(np.abs(np.linalg.det(A)))
35 |         A = np.linalg.inv(A)
36 |         
37 |         WA = np.dot(np.transpose(W),A)
38 |         WAC = np.dot(WA,C)
39 |         Exx = np.eye(nRedDim) - np.dot(WA,W) + np.dot(WAC,np.transpose(WA)) 
40 | 
41 |         # M-step
42 |         W = np.dot(np.transpose(WAC),np.linalg.inv(Exx))
43 |         Psi = Cd - (np.dot(W,WAC)).diagonal()
44 |         #Sigma1 = (dot(transpose(y),y) - dot(W,WAC)).diagonal()/Ndata
45 | 
46 |         tAC = (A*np.transpose(C)).sum()
47 |         
48 |         L = -N/2*np.log(2.*np.pi) -0.5*logA - 0.5*tAC
49 |         if (L-oldL)<(1e-4):
50 |             print "Stop",i
51 |             break
52 |         print L
53 |         oldL = L
54 |     A = np.linalg.inv(np.dot(W,np.transpose(W))+np.diag(Psi))
55 |     Ex = np.dot(np.transpose(A),W)
56 |     
57 |     return np.dot(y,Ex)
58 | 
59 | data = np.array([[0.1,0.1],[0.2,0.2],[0.3,0.3],[0.35,0.3],[0.4,0.4],[0.6,0.4],[0.7,0.45],[0.75,0.4],[0.8,0.35]])
60 | newData = factoranalysis(data,2)
61 | pl.plot(newData[:,0],newData[:,1],'.')
62 | pl.show()
63 | 


--------------------------------------------------------------------------------
/Ch6/floyd.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import numpy as np
12 | import time
13 | 
14 | def floyd():
15 |     
16 |     ndata = 100
17 |     neighbours = np.zeros((ndata,10))
18 |     g = np.random.rand(ndata,ndata)
19 |     for i in range(ndata):
20 |         neighbours[i,:] = np.random.randint(0,100,10) 
21 |     
22 |     t0 = time.time()
23 |     print "Floyd's algorithm"
24 |     for k in range(ndata):
25 |         for i in range(ndata):
26 |             for j in range(ndata):
27 |                 if g[i,j] > g[i,k] + g[k,j]:
28 |                     g[i,j] = g[i,k] + g[k,j]
29 |     
30 |     t1 = time.time()
31 |     print "Complete"
32 |     print t1-t0
33 |     x = g.copy()
34 | 
35 |     t2 = time.time()
36 |     q = g.copy()
37 |     for i in range(ndata):
38 |         for j in range(ndata):
39 |             k = np.argmin(q[i,:])
40 |             while not(np.isnan(q[i,k])):
41 |                 q[i,k] = np.nan
42 |                 for l in neighbours[k,:]:
43 |                     possible = q[i,l] + q[l,k]
44 |                     if possible < q[i,k]:
45 |                         g[i,k] = possible
46 |                 k = np.argmin(q[i,:])
47 |     t3 = time.time()
48 |     y = g
49 |     print t3-t2
50 |     return x,y
51 | 


--------------------------------------------------------------------------------
/Ch6/iris.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # Various dimensionality reductions running on the Iris dataset
 12 | import pylab as pl
 13 | import numpy as np
 14 | 
 15 | iris = np.loadtxt('../3 MLP/iris_proc.data',delimiter=',')
 16 | iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
 17 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
 18 | iris[:,:4] = iris[:,:4]/imax[:4]
 19 | labels = iris[:,4:]
 20 | iris = iris[:,:4]
 21 | 
 22 | order = range(np.shape(iris)[0])
 23 | np.random.shuffle(order)
 24 | iris = iris[order,:]
 25 | labels = labels[order,0]
 26 | 
 27 | w0 = np.where(labels==0)
 28 | w1 = np.where(labels==1)
 29 | w2 = np.where(labels==2)
 30 | 
 31 | import lda
 32 | newData,w = lda.lda(iris,labels,2)
 33 | print np.shape(newData)
 34 | pl.plot(iris[w0,0],iris[w0,1],'ok')
 35 | pl.plot(iris[w1,0],iris[w1,1],'^k')
 36 | pl.plot(iris[w2,0],iris[w2,1],'vk')
 37 | pl.axis([-1.5,1.8,-1.5,1.8])
 38 | pl.axis('off')
 39 | pl.figure(2)
 40 | pl.plot(newData[w0,0],newData[w0,1],'ok')
 41 | pl.plot(newData[w1,0],newData[w1,1],'^k')
 42 | pl.plot(newData[w2,0],newData[w2,1],'vk')
 43 | pl.axis([-1.5,1.8,-1.5,1.8])
 44 | pl.axis('off')
 45 | 
 46 | import pca
 47 | x,y,evals,evecs = pca.pca(iris,2)
 48 | pl.figure(3)
 49 | pl.plot(y[w0,0],y[w0,1],'ok')
 50 | pl.plot(y[w1,0],y[w1,1],'^k')
 51 | pl.plot(y[w2,0],y[w2,1],'vk')
 52 | pl.axis('off')
 53 | 
 54 | import kernelpca
 55 | newData = kernelpca.kernelpca(iris,'gaussian',2)
 56 | pl.figure(4)
 57 | pl.plot(newData[w0,0],newData[w0,1],'ok')
 58 | pl.plot(newData[w1,0],newData[w1,1],'^k')
 59 | pl.plot(newData[w2,0],newData[w2,1],'vk')
 60 | pl.axis('off')
 61 | 
 62 | import factoranalysis
 63 | newData = factoranalysis.factoranalysis(iris,2)
 64 | #print newData
 65 | pl.figure(5)
 66 | pl.plot(newData[w0,0],newData[w0,1],'ok')
 67 | pl.plot(newData[w1,0],newData[w1,1],'^k')
 68 | pl.plot(newData[w2,0],newData[w2,1],'vk')
 69 | pl.axis('off')
 70 | 
 71 | import lle
 72 | print np.shape(iris)
 73 | a,b,newData = lle.lle(iris,2,12)
 74 | print np.shape(newData)
 75 | print newData[w0,:]
 76 | print "---"
 77 | print newData[w1,:]
 78 | print "---"
 79 | print newData[w2,:]
 80 | 
 81 | pl.plot(newData[w0,0],newData[w0,1],'ok')
 82 | pl.plot(newData[w1,0],newData[w1,1],'^k')
 83 | pl.plot(newData[w2,0],newData[w2,1],'vk')
 84 | pl.axis('off')
 85 | 
 86 | import isomap
 87 | print labels
 88 | newData,newLabels = isomap.isomap(iris,2,100)
 89 | print np.shape(newData)
 90 | print newLabels
 91 | w0 = np.where(newLabels==0)
 92 | w1 = np.where(newLabels==1)
 93 | w2 = np.where(newLabels==2)
 94 | pl.plot(newData[w0,0],newData[w0,1],'ok')
 95 | pl.plot(newData[w1,0],newData[w1,1],'^k')
 96 | pl.plot(newData[w2,0],newData[w2,1],'vk')
 97 | pl.axis('off')
 98 | 
 99 | print "Done"
100 | 
101 | pl.show()
102 | 


--------------------------------------------------------------------------------
/Ch6/isomap.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # The Isomap algorithm
 12 | import pylab as pl
 13 | import numpy as np
 14 | 
 15 | def swissroll():
 16 | 	# Make the swiss roll dataset
 17 | 	N = 1000
 18 | 	noise = 0.05
 19 | 
 20 | 	t = 3.*np.pi/2 * (1. + 2.*np.random.rand(1,N))
 21 | 	h = 21. * np.random.rand(1,N)
 22 | 	data = np.concatenate((t*np.cos(t),h,t*np.sin(t))) + noise*np.random.randn(3,N)	
 23 | 	return np.transpose(data), np.squeeze(t)
 24 | 
 25 | def isomap(data,newdim=2,K=12,labels=None):
 26 | 
 27 | 	ndata = np.shape(data)[0]
 28 | 	ndim = np.shape(data)[1]
 29 | 	d = np.zeros((ndata,ndata),dtype=float)
 30 | 	
 31 | 	# Compute the distance matrix
 32 | 	# Inefficient -- not matrices
 33 | 	for i in range(ndata):
 34 | 		for j in range(i+1,ndata):
 35 | 			for k in range(ndim):
 36 | 				d[i,j] += (data[i,k] - data[j,k])**2
 37 | 			d[i,j] = np.sqrt(d[i,j])
 38 | 			d[j,i] = d[i,j]
 39 | 
 40 | 	# K-nearest neighbours
 41 | 	indices = d.argsort()
 42 | 	#notneighbours = indices[:,K+1:]
 43 | 	neighbours = indices[:,:K+1]
 44 | 	# Alternative: epsilon
 45 | 	# epsilon = 0.1
 46 | 	#neighbours = where(d<=epsilon)
 47 | 	#notneighbours = where(d>epsilon)
 48 | 
 49 | 	h = np.ones((ndata,ndata),dtype=float)*np.inf
 50 | 	for i in range(ndata):
 51 | 		h[i,neighbours[i,:]] = d[i,neighbours[i,:]]
 52 | 
 53 | 	# Compute the full distance matrix over all paths
 54 | 	print "Floyd's algorithm"
 55 | 	for k in range(ndata):
 56 | 		for i in range(ndata):
 57 | 			for j in range(ndata):
 58 | 				if h[i,j] > h[i,k] + h[k,j]:
 59 | 					h[i,j] = h[i,k] + h[k,j]
 60 | 
 61 | #	print "Dijkstra's algorithm"
 62 | #	q = h.copy()
 63 | #	for i in range(ndata):
 64 | #		for j in range(ndata):
 65 | #			k = np.argmin(q[i,:])
 66 | #			while not(np.isinf(q[i,k])):
 67 | #				q[i,k] = np.inf
 68 | #				for l in neighbours[k,:]:
 69 | #					possible = h[i,l] + h[l,k]
 70 | #					if possible < h[i,k]:
 71 | #						h[i,k] = possible
 72 | #				k = np.argmin(q[i,:])
 73 | #	print "Comnlete"
 74 | 
 75 | 	# remove lines full of infs 
 76 | 	x = np.isinf(h[:,0]).nonzero()
 77 | 	if np.size(x)>0:	
 78 | 		print x
 79 | 		if x[0][0]>0:
 80 | 			new = h[0:x[0][0],:]
 81 | 			newlabels = labels[0:x[0][0]]
 82 | 			start = 1
 83 | 		else:
 84 | 			new = h[x[0][0]+1,:]
 85 | 			newlabels = labels[x[0][0]+1]
 86 | 			start = 2
 87 | 		for i in range(start,size(x)):
 88 | 			new = np.concatenate((new,h[x[0][i-1]+1:x[0][i],:]),axis=0)
 89 | 			newlabels = np.concatenate((newlabels,labels[x[0][i-1]+1:x[0][i]]),axis=0)
 90 | 		new = np.concatenate((new,h[x[0][i]+1:,:]),axis=0)
 91 | 		newlabels = np.concatenate((newlabels,labels[x[0][i]+1:]),axis=0)
 92 | 
 93 | 		new2 = new[:,0:x[0][0]]
 94 | 		if x[0][0]>0:
 95 | 			new2 = new[:,0:x[0][0]]
 96 | 			start = 1
 97 | 		else:
 98 | 			new2 = new[:,x[0][0]+1]
 99 | 			start = 2
100 | 		for i in range(start,size(x)):
101 | 			new2 = np.concatenate((new2,new[:,x[0][i-1]+1:x[0][i]]),axis=1)
102 | 		new2 = np.concatenate((new2,new[:,x[0][i]+1:]),axis=1)
103 | 
104 | 		g = new2.copy()
105 | 		ndata = ndata - size(x)
106 | 	else:
107 | 		g = h.copy()
108 | 		newlabels = labels
109 | 	
110 | 	# Map computations, following by the dimensionality reduction
111 | 	M = -0.5*(g**2 - np.transpose(np.sum(g*g,axis=0) * np.ones((ndata,1))/ndata) - np.ones((ndata,1))* np.sum(g*g,axis=0)/ndata + np.sum(np.sum(g*g))/ndata**2)
112 | 
113 | 	eval,evec = np.linalg.eig(M)
114 | 	eval = np.real(eval)
115 | 	ind = np.argsort(eval)
116 | 	eval = np.real(np.diag(eval[ind[-1::-1]]))
117 | 	evec = evec[:,ind[-1::-1]]
118 | 	y = np.real(np.dot(evec,np.transpose((np.sqrt(eval)))))
119 | 	print np.shape(y)
120 | 	print np.shape(eval), np.shape(evec)
121 | 	return y, newlabels
122 | 
123 | data,t = swissroll()
124 | y,u = isomap(data)
125 | 
126 | t -= t.min()
127 | t /= t.max()
128 | #pl.scatter(y[:,0],y[:,1],c=t,cmap=pl.cm.jet)
129 | pl.scatter(y[:,1],y[:,2],s=50,c=t,cmap=pl.cm.gray)
130 | #pl.scatter(data[:,0],data[:,1],s=50,c=t,cmap=pl.cm.gray)
131 |  
132 | pl.show()
133 | 


--------------------------------------------------------------------------------
/Ch6/kernelpca.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Kernel PCA algorithm
12 | 
13 | import numpy as np
14 | import pylab as pl
15 | 
16 | def kernelmatrix(data,kernel,param=np.array([3,2])):
17 |     
18 |     if kernel=='linear':
19 |         return np.dot(data,transpose(data))
20 |     elif kernel=='gaussian':
21 |         K = np.zeros((np.shape(data)[0],np.shape(data)[0]))
22 |         for i in range(np.shape(data)[0]):
23 |             for j in range(i+1,np.shape(data)[0]):
24 |                 K[i,j] = np.sum((data[i,:]-data[j,:])**2)
25 |                 K[j,i] = K[i,j]
26 |         return np.exp(-K**2/(2*param[0]**2))
27 |     elif kernel=='polynomial':
28 |         return (np.dot(data,np.transpose(data))+param[0])**param[1]
29 |     
30 | def kernelpca(data,kernel,redDim):
31 |     
32 |     nData = np.shape(data)[0]
33 |     nDim = np.shape(data)[1]
34 |     
35 |     K = kernelmatrix(data,kernel)
36 |     
37 |     # Compute the transformed data
38 |     D = np.sum(K,axis=0)/nData
39 |     E = np.sum(D)/nData
40 |     J = np.ones((nData,1))*D
41 |     K = K - J - np.transpose(J) + E*np.ones((nData,nData))
42 |     
43 |     # Perform the dimensionality reduction
44 |     evals,evecs = np.linalg.eig(K) 
45 |     indices = np.argsort(evals)
46 |     indices = indices[::-1]
47 |     evecs = evecs[:,indices[:redDim]]
48 |     evals = evals[indices[:redDim]]
49 |     
50 |     sqrtE = np.zeros((len(evals),len(evals)))
51 |     for i in range(len(evals)):
52 |         sqrtE[i,i] = np.sqrt(evals[i])
53 |        
54 |     #print shape(sqrtE), shape(data)
55 |     newData = np.transpose(np.dot(sqrtE,np.transpose(evecs)))
56 |     
57 |     return newData
58 | 
59 | #data = array([[0.1,0.1],[0.2,0.2],[0.3,0.3],[0.35,0.3],[0.4,0.4],[0.6,0.4],[0.7,0.45],[0.75,0.4],[0.8,0.35]])
60 | #newData = kernelpca(data,'gaussian',2)
61 | #plot(data[:,0],data[:,1],'o',newData[:,0],newData[:,0],'.')
62 | #show()
63 | 


--------------------------------------------------------------------------------
/Ch6/kpcademo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Demonstration of PCA and kernel PCA on the circular dataset
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | import pca
16 | import kernelpca
17 | 
18 | data = np.zeros((150,2))
19 | 
20 | theta = np.random.normal(0,np.pi,50)
21 | r = np.random.normal(0,0.1,50)
22 | data[0:50,0] = r*np.cos(theta)
23 | data[0:50,1] = r*np.sin(theta)
24 | 
25 | theta = np.random.normal(0,np.pi,50)
26 | r = np.random.normal(2,0.1,50)
27 | data[50:100,0] = r*np.cos(theta)
28 | data[50:100,1] = r*np.sin(theta)
29 | 
30 | theta = np.random.normal(0,np.pi,50)
31 | r = np.random.normal(5,0.1,50)
32 | data[100:150,0] = r*np.cos(theta)
33 | data[100:150,1] = r*np.sin(theta)
34 | 
35 | pl.figure()
36 | pl.plot(data[:50,0],data[:50,1],'ok')
37 | pl.plot(data[50:100,0],data[50:100,1],'^k')
38 | pl.plot(data[100:150,0],data[100:150,1],'vk')
39 | pl.title('Original dataset')
40 | 
41 | x,y,evals,evecs = pca.pca(data,2)
42 | pl.figure()
43 | pl.plot(x[:50,0],x[:50,1],'ok')
44 | pl.plot(x[50:100,0],x[50:100,1],'^k')
45 | pl.plot(x[100:150,0],x[100:150,1],'vk')
46 | pl.title('Reconstructed points after PCA')
47 | 
48 | pl.figure()
49 | y = kernelpca.kernelpca(data,'gaussian',2)
50 | pl.plot(y[:50,0],y[:50,1],'ok')
51 | pl.plot(y[50:100,0],y[50:100,1],'^k')
52 | pl.plot(y[100:150,0],y[100:150,1],'vk')
53 | pl.title('Reconstructed points after kernel PCA')
54 | 
55 | pl.show()
56 | 


--------------------------------------------------------------------------------
/Ch6/lda.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The LDA algorithm
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | from scipy import linalg as la
16 | 
17 | def lda(data,labels,redDim):
18 | 
19 |     # Centre data
20 |     data -= data.mean(axis=0)
21 |     nData = np.shape(data)[0]
22 |     nDim = np.shape(data)[1]
23 |     
24 |     Sw = np.zeros((nDim,nDim))
25 |     Sb = np.zeros((nDim,nDim))
26 |     
27 |     C = np.cov(np.transpose(data))
28 |     
29 |     # Loop over classes
30 |     classes = np.unique(labels)
31 |     for i in range(len(classes)):
32 |         # Find relevant datapoints
33 |         indices = np.squeeze(np.where(labels==classes[i]))
34 |         d = np.squeeze(data[indices,:])
35 |         classcov = np.cov(np.transpose(d))
36 |         Sw += np.float(np.shape(indices)[0])/nData * classcov
37 |         
38 |     Sb = C - Sw
39 |     # Now solve for W and compute mapped data
40 |     # Compute eigenvalues, eigenvectors and sort into order
41 |     evals,evecs = la.eig(Sw,Sb)
42 |     indices = np.argsort(evals)
43 |     indices = indices[::-1]
44 |     evecs = evecs[:,indices]
45 |     evals = evals[indices]
46 |     w = evecs[:,:redDim]
47 |     newData = np.dot(data,w)
48 |     return newData,w
49 | 
50 | #data = np.array([[0.1,0.1],[0.2,0.2],[0.3,0.3],[0.35,0.3],[0.4,0.4],[0.6,0.4],[0.7,0.45],[0.75,0.4],[0.8,0.35]])
51 | #labels = np.array([0,0,0,0,0,1,1,1,1])
52 | #newData,w = lda(data,labels,2)
53 | #print w
54 | #pl.plot(data[:,0],data[:,1],'o',newData[:,0],newData[:,0],'.')
55 | #pl.show()
56 | 


--------------------------------------------------------------------------------
/Ch6/lle.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Locally Linear Embedding algorithm, and the swissroll example
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | def swissroll():
16 | 	# Make the swiss roll dataset
17 | 	N = 1000
18 | 	noise = 0.05
19 | 
20 | 	t = 3*np.pi/2 * (1 + 2*np.random.rand(1,N))
21 | 	h = 21 * np.random.rand(1,N)
22 | 	data = np.concatenate((t*np.cos(t),h,t*np.sin(t))) + noise*np.random.randn(3,N)	
23 | 	return np.transpose(data), np.squeeze(t)
24 | 
25 | def lle(data,nRedDim=2,K=12):
26 | 
27 | 	ndata = np.shape(data)[0]
28 | 	ndim = np.shape(data)[1]
29 | 	d = np.zeros((ndata,ndata),dtype=float)
30 | 	
31 | 	# Inefficient -- not matrices
32 | 	for i in range(ndata):
33 | 		for j in range(i+1,ndata):
34 | 			for k in range(ndim):
35 | 				d[i,j] += (data[i,k] - data[j,k])**2
36 | 			d[i,j] = np.sqrt(d[i,j])
37 | 			d[j,i] = d[i,j]
38 | 
39 | 	indices = d.argsort(axis=1)
40 | 	neighbours = indices[:,1:K+1]
41 | 
42 | 	W = np.zeros((K,ndata),dtype=float)
43 | 
44 | 	for i in range(ndata):
45 | 		Z  = data[neighbours[i,:],:] - np.kron(np.ones((K,1)),data[i,:])
46 | 		C = np.dot(Z,np.transpose(Z))
47 | 		C = C+np.identity(K)*1e-3*np.trace(C)
48 | 		W[:,i] = np.transpose(np.linalg.solve(C,np.ones((K,1))))
49 | 		W[:,i] = W[:,i]/np.sum(W[:,i])
50 | 
51 | 	M = np.eye(ndata,dtype=float)
52 | 	for i in range(ndata):
53 | 		w = np.transpose(np.ones((1,np.shape(W)[0]))*np.transpose(W[:,i]))
54 | 		j = neighbours[i,:]
55 | 		#print shape(w), np.shape(np.dot(w,np.transpose(w))), np.shape(M[i,j])
56 | 		ww = np.dot(w,np.transpose(w))
57 | 		for k in range(K):
58 | 			M[i,j[k]] -= w[k]
59 | 			M[j[k],i] -= w[k]
60 | 			for l in range(K):
61 | 			     M[j[k],j[l]] += ww[k,l]
62 | 	
63 | 	evals,evecs = np.linalg.eig(M)
64 | 	ind = np.argsort(evals)
65 | 	y = evecs[:,ind[1:nRedDim+1]]*np.sqrt(ndata)
66 | 	return evals,evecs,y
67 | 
68 | data,t = swissroll()
69 | evals,evecs,y = lle(data)
70 | 
71 | t -= t.min()
72 | t /= t.max()
73 | pl.scatter(y[:,0],y[:,1],s=50,c=t,cmap=pl.cm.gray)
74 | pl.axis('off')
75 | pl.show()
76 | 


--------------------------------------------------------------------------------
/Ch6/pca.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # An algorithm to compute PCA. Not as fast as the NumPy implementation
12 | import numpy as np
13 | 
14 | def pca(data,nRedDim=0,normalise=1):
15 |     
16 |     # Centre data
17 |     m = np.mean(data,axis=0)
18 |     data -= m
19 | 
20 |     # Covariance matrix
21 |     C = np.cov(np.transpose(data))
22 | 
23 |     # Compute eigenvalues and sort into descending order
24 |     evals,evecs = np.linalg.eig(C) 
25 |     indices = np.argsort(evals)
26 |     indices = indices[::-1]
27 |     evecs = evecs[:,indices]
28 |     evals = evals[indices]
29 | 
30 |     if nRedDim>0:
31 |         evecs = evecs[:,:nRedDim]
32 |     
33 |     if normalise:
34 |         for i in range(np.shape(evecs)[1]):
35 |             evecs[:,i] / np.linalg.norm(evecs[:,i]) * np.sqrt(evals[i])
36 | 
37 |     # Produce the new data matrix
38 |     x = np.dot(np.transpose(evecs),np.transpose(data))
39 |     # Compute the original data again
40 |     y=np.transpose(np.dot(evecs,x))+m
41 |     return x,y,evals,evecs
42 |     
43 | 


--------------------------------------------------------------------------------
/Ch6/pcademo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 6 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A simple example of PCA
12 | import pylab as pl
13 | import numpy as np
14 | 
15 | import pca
16 | 
17 | x = np.random.normal(5,.5,1000)
18 | y = np.random.normal(3,1,1000)
19 | a = x*np.cos(np.pi/4) + y*np.sin(np.pi/4)
20 | b = -x*np.sin(np.pi/4) + y*np.cos(np.pi/4)
21 | 
22 | pl.plot(a,b,'.')
23 | pl.xlabel('x')
24 | pl.ylabel('y')
25 | pl.title('Original dataset')
26 | data = np.zeros((1000,2))
27 | data[:,0] = a
28 | data[:,1] = b
29 | 
30 | x,y,evals,evecs = pca.pca(data,1)
31 | print y
32 | pl.figure()
33 | pl.plot(y[:,0],y[:,1],'.')
34 | pl.xlabel('x')
35 | pl.ylabel('y')
36 | pl.title('Reconstructed data after PCA')
37 | pl.show()
38 | 


--------------------------------------------------------------------------------
/Ch7/GMM.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 7 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import pylab as pl
12 | import numpy as np
13 | 
14 | def GMM():
15 | 
16 |     """ Fits two Gaussians to data using the EM algorithm """
17 |     N = 100
18 |     pl.ion()
19 |     
20 |     y = 1.*np.zeros(N)
21 |     # Set up data
22 |     out1 = np.random.normal(6,1,N)
23 |     out2 = np.random.normal(1,1,N)
24 |     choice = np.random.rand(N)
25 |     
26 |     w = [choice>=0.5]
27 |     y[w] = out1[w]	
28 |     w = [choice<0.5]
29 |     y[w] = out2[w]
30 |     
31 |     pl.clf()
32 |     pl.hist(y,fc='0.5')
33 |     
34 |     # Now do some learning
35 | 
36 |     # Initialisation
37 |     mu1 = y[np.random.randint(0,N-1,1)]
38 |     mu2 = y[np.random.randint(0,N-1,1)]
39 |     s1 = np.sum((y-np.mean(y))**2)/N
40 |     s2 = s1
41 |     pi = 0.5
42 | 
43 |     # EM loop
44 |     count = 0
45 |     gamma = 1.*np.zeros(N)
46 |     nits = 20
47 | 
48 |     ll = 1.*np.zeros(nits)
49 | 	
50 |     while count<nits:
51 |         count = count + 1
52 | 
53 |     	# E-step
54 |         for i in range(N):
55 |             gamma[i] = pi*np.exp(-(y[i]-mu1)**2/(2*s1))/ (pi * np.exp(-(y[i]-mu1)**2/(2*s1)) + (1-pi)* np.exp(-(y[i]-mu2)**2/2*s2))
56 |         
57 |     	# M-step
58 |         mu1 = np.sum((1-gamma)*y)/np.sum(1-gamma)
59 |         mu2 = np.sum(gamma*y)/np.sum(gamma)
60 |         s1 = np.sum((1-gamma)*(y-mu1)**2)/np.sum(1-gamma)
61 |         s2 = np.sum(gamma*(y-mu2)**2)/np.sum(gamma)
62 |         pi = np.sum(gamma)/N
63 |         	
64 |         ll[count-1] = np.sum(np.log(pi*np.exp(-(y[i]-mu1)**2/(2*s1)) + (1-pi)*np.exp(-(y[i]-mu2)**2/(2*s2))))
65 | 
66 |     x = np.arange(-2,8.5,0.1)
67 |     y = 35*pi*np.exp(-(x-mu1)**2/(2*s1)) + 35*(1-pi)*np.exp(-(x-mu2)**2/(2*s2))
68 | 
69 |     pl.plot(x,y,'k',linewidth=4)
70 |     pl.figure(), pl.plot(ll,'ko-')
71 |     pl.show()
72 |     
73 | GMM()
74 | 


--------------------------------------------------------------------------------
/Ch7/kdtree.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 7 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The code to construct and use a KD-tree
12 | # There is a simple example at the bottom
13 | 
14 | import numpy as np
15 | 
16 | class node:
17 | 	# A passive class to hold the nodes
18 | 	pass
19 | 
20 | def makeKDtree(points, depth):
21 | 	if np.shape(points)[0]<1:
22 | 		# Have reached an empty leaf
23 | 		return None
24 | 	elif np.shape(points)[0]<2:
25 | 		# Have reached a proper leaf
26 | 		newNode = node()
27 | 		newNode.point = points[0,:]
28 | 		newNode.left = None
29 | 		newNode.right = None
30 | 		return newNode
31 | 	else:
32 | 		# Pick next axis to split on	
33 | 		whichAxis = np.mod(depth,np.shape(points)[1])
34 | 		
35 | 		# Find the median point
36 | 		indices = np.argsort(points[:,whichAxis])
37 | 		points = points[indices,:]
38 | 		median = np.ceil(float(np.shape(points)[0]-1)/2)
39 | 
40 | 		# Separate the remaining points
41 | 		goLeft = points[:median,:]
42 | 		goRight = points[median+1:,:]
43 | 
44 | 		# Make a new branching node and recurse
45 | 		newNode = node()
46 | 		newNode.point = points[median,:]
47 | 		newNode.left = makeKDtree(goLeft,depth+1)
48 | 		newNode.right = makeKDtree(goRight,depth+1)
49 | 		return newNode
50 | 
51 | def returnNearest(tree,point,depth):
52 | 	if tree.left is None:
53 | 		# Have reached a leaf
54 | 		distance = np.sum((tree.point-point)**2)
55 | 		return tree.point,distance,0
56 | 	else:
57 | 		# Pick next axis to split on
58 | 		whichAxis = np.mod(depth,np.shape(point)[0])
59 | 
60 | 		# Recurse down the tree
61 | 		if point[whichAxis]<tree.point[whichAxis]:
62 | 			bestGuess,distance,height = returnNearest(tree.left,point,depth+1)
63 | 		else:
64 | 			bestGuess,distance,height = returnNearest(tree.right,point,depth+1)
65 | 
66 | 		if height<=2:
67 | 			# Check the sibling
68 | 			if point[whichAxis]<tree.point[whichAxis]:
69 | 				bestGuess2,distance2,height2 = returnNearest(tree.right,point,depth+1)
70 | 			else:
71 | 				bestGuess2,distance2,height2 = returnNearest(tree.left,point,depth+1)
72 | 		
73 | 			# Check this node
74 | 			distance3 = np.sum((tree.point-point)**2)
75 | 			if (distance3<distance2):
76 | 				distance2 = distance3
77 | 				bestGuess2 = tree.point
78 | 			
79 | 			if (distance2<distance):
80 | 				distance = distance2
81 | 				bestGuess = bestGuess2
82 | 		return bestGuess,distance,height+1
83 | 
84 | 
85 | points = np.array([[1,6],[2,2],[3,7],[5,4],[6,8],[6,1],[7,5]])
86 | tree = makeKDtree(points,0)
87 | print returnNearest(tree,np.array([3,5]),0)
88 | print returnNearest(tree,np.array([4.5,2]),0)
89 | 
90 | 


--------------------------------------------------------------------------------
/Ch7/knn.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 7 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # A k-Nearest Neighbour Classifier
12 | import numpy as np
13 | 
14 | def knn(k,data,dataClass,inputs):
15 | 
16 | 	nInputs = np.shape(inputs)[0]
17 | 	closest = np.zeros(nInputs)
18 | 
19 | 	for n in range(nInputs):
20 | 		# Compute distances
21 | 		distances = np.sum((data-inputs[n,:])**2,axis=1)
22 | 
23 | 		# Identify the nearest neighbours
24 | 		indices = np.argsort(distances,axis=0)
25 | 
26 | 		classes = np.unique(dataClass[indices[:k]])
27 | 		if len(classes)==1:
28 | 			closest[n] = np.unique(classes)
29 | 		else:
30 | 			counts = np.zeros(max(classes)+1)
31 | 			for i in range(k):
32 | 				counts[dataClass[indices[i]]] += 1
33 | 			closest[n] = np.max(counts)
34 | 			 
35 | 	return closest
36 | 


--------------------------------------------------------------------------------
/Ch7/knnSmoother.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 7 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | import pylab as pl
12 | import numpy as np
13 | 
14 | # A k-Nearest Neighbour smoother, with three different kernels
15 | # Example is the Ruapehu dataset
16 | def knnSmoother(k,data,testpoints,kernel):
17 | 
18 |     outputs = np.zeros(len(testpoints))
19 |     
20 |     for i in range(len(testpoints)):
21 |         distances = (data[:,0]-testpoints[i])
22 |         if kernel=='NN':
23 |             indices = np.argsort(distances**2,axis=0)
24 |             outputs[i] = 1./k * np.sum(data[indices[:k],1])
25 |         elif kernel=='Epan':
26 |             Klambda = 0.75*(1 - distances**2/k**2)
27 |             where = (np.abs(distances)<k)
28 |             outputs[i] = np.sum(Klambda*where*data[:,1])/np.sum(Klambda*where)
29 |         elif kernel=='Tricube':
30 |             Klambda = (1 - np.abs((distances/k)**3)**3)
31 |             where = (np.abs(distances)<k)
32 |             outputs[i] = np.sum(Klambda*where*data[:,1])/np.sum(Klambda*where)
33 |         else:
34 |             print('Unknown kernel')
35 |     return outputs
36 | 
37 | data = np.loadtxt('ruapehu.dat') 
38 | # Data is time of start and stop
39 | # Turn into repose and duration 
40 | t1 = data[:,0:1] 
41 | t2 = data[:,1:2] 
42 | repose = t1[1:len(t1),:] -t2[0:len(t2)-1,:] 
43 | duration = t2[1:len(t2),:] -t1[1:len(t1),:]
44 | order = np.argsort(repose,axis=0)
45 | repose = repose[order]
46 | duration = duration[order]
47 | data = np.squeeze(np.concatenate((repose,duration),axis=1))
48 | testpoints = 12.0*np.arange(1000)/1000
49 | outputs5 = knnSmoother(5,data,testpoints,'NN')
50 | outputs10 = knnSmoother(10,data,testpoints,'NN')
51 | 
52 | pl.plot(data[:,0],data[:,1],'ko',testpoints,outputs5,'k-',linewidth=3)
53 | pl.plot(testpoints,outputs10,'k--',linewidth=3)
54 | pl.legend(('Data','NN, k=5','NN, k=10'))
55 | pl.xlabel('Repose (years)')
56 | pl.ylabel('Duration (years)')
57 | 
58 | pl.figure(2)
59 | outputs5 = knnSmoother(2,data,testpoints,'Epan')
60 | outputs10 = knnSmoother(4,data,testpoints,'Epan')
61 | 
62 | pl.plot(data[:,0],data[:,1],'ko',testpoints,outputs5,'k-',linewidth=3)
63 | pl.plot(testpoints,outputs10,'k--',linewidth=3)
64 | pl.legend(('Data','Epanechnikov, lambda=2','Epanechnikov, lambda=4'))
65 | pl.xlabel('Repose (years)')
66 | pl.ylabel('Duration (years)')
67 | 
68 | pl.figure(3)
69 | outputs5 = knnSmoother(2,data,testpoints,'Tricube')
70 | outputs10 = knnSmoother(4,data,testpoints,'Tricube')
71 | 
72 | pl.plot(data[:,0],data[:,1],'ko',testpoints,outputs5,'k-',linewidth=3)
73 | pl.plot(testpoints,outputs10,'k--',linewidth=3)
74 | pl.legend(('Data','Tricube, lambda=2','Tricube, lambda=4'))
75 | pl.xlabel('Repose (years)')
76 | pl.ylabel('Duration (years)')
77 | 
78 | 
79 | pl.show()
80 | 


--------------------------------------------------------------------------------
/Ch8/svmdemo.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 8 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2014
 10 | 
 11 | import numpy as np
 12 | import pylab as pl
 13 | 
 14 | iris = np.loadtxt('iris_proc.data',delimiter=',')
 15 | #iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
 16 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
 17 | #iris[:,:4] = iris[:,:4]/imax[:4]
 18 | 
 19 | target = -np.ones((np.shape(iris)[0],3),dtype=float);
 20 | indices = np.where(iris[:,4]==0) 
 21 | target[indices,0] = 1.
 22 | indices = np.where(iris[:,4]==1)
 23 | target[indices,1] = 1.
 24 | indices = np.where(iris[:,4]==2)
 25 | target[indices,2] = 1.
 26 | 
 27 | # Randomly order the data
 28 | #order = range(np.shape(iris)[0])
 29 | #np.random.shuffle(order)
 30 | #iris = iris[order,:]
 31 | #target = target[order,:]
 32 | 
 33 | train = iris[::2,0:4]
 34 | traint = target[::2]
 35 | test = iris[1::2,0:4]
 36 | testt = target[1::2]
 37 | 
 38 | #print train.max(axis=0), train.min(axis=0)
 39 | 
 40 | # Train the machines
 41 | output = np.zeros((np.shape(test)[0],3))
 42 | 
 43 | import svm
 44 | reload(svm)
 45 | 
 46 | # Learn the full data
 47 | #svm0 = svm.svm(kernel='linear')
 48 | #svm0 = svm.svm(kernel='poly',C=0.1,degree=3)
 49 | svm0 = svm.svm(kernel='rbf')
 50 | svm0.train_svm(train,np.reshape(traint[:,0],(np.shape(train[:,:2])[0],1)))
 51 | output[:,0] = svm0.classifier(test,soft=True).T
 52 | 
 53 | #svm1 = svm.svm(kernel='linear')
 54 | #svm1 = svm.svm(kernel='poly',C=0.1,degree=3)
 55 | svm1 = svm.svm(kernel='rbf')
 56 | svm1.train_svm(train,np.reshape(traint[:,1],(np.shape(train[:,:2])[0],1)))
 57 | output[:,1] = svm1.classifier(test,soft=True).T
 58 | 
 59 | #svm2 = svm.svm(kernel='linear')
 60 | #svm2 = svm.svm(kernel='poly',C=0.1,degree=3)
 61 | svm2 = svm.svm(kernel='rbf')
 62 | svm2.train_svm(train,np.reshape(traint[:,2],(np.shape(train[:,:2])[0],1)))
 63 | output[:,2] = svm2.classifier(test,soft=True).T
 64 | 
 65 | # Make a decision about which class
 66 | # Pick the one with the largest margin
 67 | bestclass = np.argmax(output,axis=1)
 68 | print bestclass
 69 | print iris[1::2,4]
 70 | err = np.where(bestclass!=iris[1::2,4])[0]
 71 | print err
 72 | print float(np.shape(testt)[0] - len(err))/ (np.shape(testt)[0]) , "test accuracy"
 73 | 
 74 | 
 75 | # Plot 2D version is below
 76 | #svm0 = svm.svm(kernel='linear')
 77 | svm0 = svm.svm(kernel='poly',degree=3)
 78 | #svm0 = svm.svm(kernel='rbf')
 79 | svm0.train_svm(train[:,:2],np.reshape(traint[:,0],(np.shape(train[:,:2])[0],1)))
 80 | output[:,0] = svm0.classifier(test[:,:2],soft=True).T
 81 | 
 82 | #svm1 = svm.svm(kernel='linear')
 83 | svm1 = svm.svm(kernel='poly',degree=3)
 84 | #svm1 = svm.svm(kernel='rbf')
 85 | svm1.train_svm(train[:,:2],np.reshape(traint[:,1],(np.shape(train[:,:2])[0],1)))
 86 | output[:,1] = svm1.classifier(test[:,:2],soft=True).T
 87 | 
 88 | #svm2 = svm.svm(kernel='linear')
 89 | svm2 = svm.svm(kernel='poly',degree=3)
 90 | #svm2 = svm.svm(kernel='rbf')
 91 | svm2.train_svm(train[:,:2],np.reshape(traint[:,2],(np.shape(train[:,:2])[0],1)))
 92 | output[:,2] = svm2.classifier(test[:,:2],soft=True).T
 93 | 
 94 | 
 95 | # Make a decision about which class
 96 | # Pick the one with the largest margin
 97 | bestclass = np.argmax(output,axis=1)
 98 | print bestclass
 99 | print iris[1::2,4]
100 | err = np.where(bestclass!=iris[1::2,4])[0]
101 | print err
102 | print float(len(err))/ (np.shape(testt)[0]) , "test accuracy"
103 | 
104 | print z
105 | # Make a plot
106 | pl.figure()
107 | step=0.01
108 | f0,f1  = np.meshgrid(np.arange(np.min(train[:,0])-0.5, np.max(train[:,0])+0.5, step), np.arange(np.min(train[:,1])-0.5, np.max(train[:,1])+0.5, step))
109 | 
110 | out = np.zeros((np.shape(f0.ravel())[0],3))
111 | out[:,0] = svm0.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T
112 | out[:,1] = svm1.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T
113 | out[:,2]= svm2.classifier(np.c_[np.ravel(f0), np.ravel(f1)],soft=True).T
114 | out = np.argmax(out[:,:3],axis=1)
115 | print out
116 | 
117 | out = out.reshape(f0.shape)
118 | pl.contourf(f0, f1, out, cmap=pl.cm.Paired)
119 | #pl.axis('off')
120 | 
121 | # Plot also the training points
122 | #traint = np.where(traint==-1,0,1)
123 | pl.plot(train[svm0.sv,0],train[svm0.sv,1],'o',markerfacecolor=None,markeredgecolor='r',markeredgewidth=3)
124 | pl.scatter(train[:, 0], train[:, 1], c=iris[::2,4], cmap=pl.cm.Paired)
125 | #pl.plot(train[:, 0], train[:, 1],'o', c=traint, cmap=pl.cm.Paired)
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/Ch8/svmdemo2.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 8 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2014
10 | 
11 | import numpy as np
12 | import pylab as pl
13 | 
14 | # Make some 2D data
15 | linsep = False
16 | overlap = True
17 | 
18 | if linsep:
19 | 	# Case 1: Linearly separable
20 | 	if overlap:
21 | 		cov = [[2.0,1.0],[1.0,2.0]]
22 | 	else:
23 | 		cov = [[0.8,0.6],[0.6,0.8]]
24 | 
25 | 	train0 = np.random.multivariate_normal([0.,2.], cov, 100)
26 | 	train1 = np.random.multivariate_normal([2.,0.], cov, 100)
27 | 	train = np.concatenate((train0,train1),axis=0)
28 | 	test0 = np.random.multivariate_normal([0.,2.], cov, 20)
29 | 	test1 = np.random.multivariate_normal([2.,0.], cov, 20)
30 | 	test = np.concatenate((test0,test1),axis=0)
31 | 
32 | 	labeltrain0 = np.ones((np.shape(train0)[0],1))
33 | 	labeltrain1 = -np.ones((np.shape(train1)[0],1))
34 | 	labeltrain = np.concatenate((labeltrain0,labeltrain1),axis=0)
35 | 	labeltest0 = np.ones((np.shape(test0)[0],1))
36 | 	labeltest1 = -np.ones((np.shape(test1)[0],1))
37 | 	labeltest = np.concatenate((labeltest0,labeltest1),axis=0)
38 | 
39 | else:
40 | 	# Case 2: Not linearly separable
41 | 	cov = [[1.5,1.0],[1.0,1.5]]
42 | 	train0a = np.random.multivariate_normal([-1.,2.], cov, 50)
43 | 	train0b = np.random.multivariate_normal([1.,-1.], cov, 50)
44 | 	train0 = np.concatenate((train0a,train0b),axis=0)
45 | 	train1a = np.random.multivariate_normal([4.,-4.], cov, 50)
46 | 	train1b = np.random.multivariate_normal([-4.,4.], cov, 50)
47 | 	train1 = np.concatenate((train1a,train1b),axis=0)
48 | 	train = np.concatenate((train0,train1),axis=0)
49 | 
50 | 	test0a = np.random.multivariate_normal([-1.,2.], cov, 50)
51 | 	test0b = np.random.multivariate_normal([1.,-1.], cov, 50)
52 | 	test0 = np.concatenate((test0a,test0b),axis=0)
53 | 	test1a = np.random.multivariate_normal([4.,-4.], cov, 50)
54 | 	test1b = np.random.multivariate_normal([-4.,4.], cov, 50)
55 | 	test1 = np.concatenate((test1a,test1b),axis=0)
56 | 	test = np.concatenate((test0,test1),axis=0)
57 | 
58 | 	labeltrain0 = np.ones((np.shape(train0)[0],1))
59 | 	labeltrain1 = -np.ones((np.shape(train1)[0],1))
60 | 	labeltrain = np.concatenate((labeltrain0,labeltrain1),axis=0)
61 | 	labeltest0 = np.ones((np.shape(test0)[0],1))
62 | 	labeltest1 = -np.ones((np.shape(test1)[0],1))
63 | 	labeltest = np.concatenate((labeltest0,labeltest1),axis=0)
64 | 
65 | pl.figure()
66 | pl.plot(train0[:,0], train0[:,1], "o",color="0.75")
67 | pl.plot(train1[:,0], train1[:,1], "s",color="0.25")
68 | 
69 | import svm
70 | reload(svm)
71 | 
72 | svm = svm.svm(kernel='linear',C=0.1)
73 | #svm = svm.svm(kernel='rbf')
74 | #svm = svm.svm(kernel='poly',C=0.1,degree=4)
75 | 
76 | print np.shape(train), np.shape(labeltrain)
77 | svm.train_svm(train, labeltrain)
78 | pl.scatter(svm.X[:,0], svm.X[:,1], s=200,color= 'k')
79 | 
80 | predict = svm.classifier(test,soft=False)
81 | correct = np.sum(predict == labeltest)
82 | print correct, np.shape(predict)
83 | print float(correct)/np.shape(predict)[0]*100., "test accuracy"
84 | 
85 | # Classify points over 2D space to fit contour
86 | x,y = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50))
87 | xx = np.reshape(np.ravel(x),(2500,1))
88 | yy = np.reshape(np.ravel(y),(2500,1))
89 | points = np.concatenate((xx,yy),axis=1)
90 | outpoints = svm.classifier(points,soft=True).reshape(np.shape(x))
91 | pl.contour(x, y, outpoints, [0.0], colors='k', linewidths=1, origin='lower')
92 | pl.contour(x, y, outpoints + 1, [0.0], colors='grey', linewidths=1, origin='lower')
93 | pl.contour(x, y, outpoints - 1, [0.0], colors='grey', linewidths=1, origin='lower')
94 | 
95 | pl.axis("tight")
96 | pl.show()
97 | 
98 | 


--------------------------------------------------------------------------------
/Ch9/CG.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 9 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The conjugate gradients algorithm
12 | import numpy as np
13 | 
14 | def Jacobian(x):
15 | 	#return np.array([.4*x[0],2*x[1]])
16 | 	return np.array([x[0], 0.4*x[1], 1.2*x[2]])
17 | 
18 | def Hessian(x):
19 | 	#return np.array([[.2,0],[0,1]])
20 | 	return np.array([[1,0,0],[0,0.4,0],[0,0,1.2]])
21 | 
22 | def CG(x0):
23 | 
24 | 	i=0
25 | 	k=0
26 | 
27 | 	r = -Jacobian(x0)
28 | 	p=r
29 | 
30 | 	betaTop = np.dot(r.transpose(),r)
31 | 	beta0 = betaTop
32 | 
33 | 	iMax = 3
34 | 	epsilon = 10**(-2)
35 | 	jMax = 5
36 | 
37 | 	# Restart every nDim iterations
38 | 	nRestart = np.shape(x0)[0]
39 | 	x = x0
40 | 
41 | 	while i < iMax and betaTop > epsilon**2*beta0:
42 | 		j=0
43 | 		dp = np.dot(p.transpose(),p)
44 | 		alpha = (epsilon+1)**2
45 | 		# Newton-Raphson iteration
46 | 		while j < jMax and alpha**2 * dp > epsilon**2:
47 | 			# Line search
48 | 			alpha = -np.dot(Jacobian(x).transpose(),p) / (np.dot(p.transpose(),np.dot(Hessian(x),p)))
49 | 			print "N-R",x, alpha, p
50 | 			x = x + alpha * p
51 | 			j += 1
52 | 		print x
53 | 		# Now construct beta
54 | 		r = -Jacobian(x)
55 | 		print "r: ", r
56 | 		betaBottom = betaTop
57 | 		betaTop = np.dot(r.transpose(),r)
58 | 		beta = betaTop/betaBottom
59 | 		print "Beta: ",beta
60 | 		# Update the estimate
61 | 		p = r + beta*p
62 | 		print "p: ",p
63 | 		print "----"
64 | 		k += 1
65 | 		
66 | 		if k==nRestart or np.dot(r.transpose(),p) <= 0:
67 | 			p = r
68 | 			k = 0
69 | 			print "Restarting"
70 | 		i +=1
71 | 
72 | 	print x
73 | 
74 | x0 = np.array([-2,2,-2])
75 | CG(x0)
76 | 


--------------------------------------------------------------------------------
/Ch9/LevenbergMarquardt.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 9 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The Levenberg Marquardt algorithm
12 | import numpy as np
13 | 
14 | def function(p):
15 |     r = np.array([10*(p[1]-p[0]**2),(1-p[0])])
16 |     fp = np.dot(np.transpose(r),r) #= 100*(p[1]-p[0]**2)**2 + (1-p[0])**2
17 |     J = (np.array([[-20*p[0],10],[-1,0]]))
18 |     grad = np.dot(J.T,r.T)
19 |     return fp,r,grad,J
20 | 
21 | def lm(p0,tol=10**(-5),maxits=100):
22 |     
23 |     nvars=np.shape(p0)[0]
24 |     nu=0.01
25 |     p = p0
26 |     fp,r,grad,J = function(p)
27 |     e = np.sum(np.dot(np.transpose(r),r))
28 |     nits = 0
29 |     while nits<maxits and np.linalg.norm(grad)>tol:
30 |         nits += 1
31 |         fp,r,grad,J = function(p)
32 |         H=np.dot(np.transpose(J),J) + nu*np.eye(nvars)
33 | 
34 |         pnew = np.zeros(np.shape(p))
35 |         nits2 = 0
36 |         while (p!=pnew).all() and nits2<maxits:
37 |             nits2 += 1
38 |             dp,resid,rank,s = np.linalg.lstsq(H,grad)
39 |             pnew = p - dp
40 |             fpnew,rnew,gradnew,Jnew = function(pnew)
41 |             enew = np.sum(np.dot(np.transpose(rnew),rnew))
42 |             rho = np.linalg.norm(np.dot(np.transpose(r),r)-np.dot(np.transpose(rnew),rnew))
43 |             rho /= np.linalg.norm(np.dot(np.transpose(grad),pnew-p))
44 |             
45 |             if rho>0:
46 |                 update = 1
47 |                 p = pnew
48 |                 e = enew
49 |                 if rho>0.25:
50 |                     nu=nu/10
51 |             else: 
52 |                 nu=nu*10
53 |                 update = 0
54 |         print fp, p, e, np.linalg.norm(grad), nu
55 | 
56 | p0 = np.array([-1.92,2])
57 | lm(p0)
58 | 


--------------------------------------------------------------------------------
/Ch9/LevenbergMarquardt_leastsq.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 11 of Machine Learning: An Algorithmic Perspective
 3 | # by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008
10 | 
11 | # The Levenberg Marquardt algorithm solving a least-squares problem
12 | 
13 | import pylab as pl
14 | import numpy as np
15 | 
16 | def function(p,x,ydata):
17 |     fp = p[0]*np.cos(p[1]*x)+ p[1]*np.sin([p[0]*x])
18 |     r = ydata - fp
19 |     J = np.transpose([-np.cos(p[0]*x)-p[1]*np.cos(p[0]*x)*x, p[0] * np.sin(p[1]*x)*x-np.sin(p[0]*x)])
20 |     grad = np.dot(J.T,r.T)
21 |     return fp,r,grad,J
22 | 
23 | def lm(p0,x,f,tol=10**(-5),maxits=100):
24 |     
25 |     nvars=np.shape(p0)[0]
26 |     nu=0.01
27 |     p = p0
28 |     fp,r,grad,J = function(p,x,f)
29 |     e = np.sum(np.dot(np.transpose(r),r))
30 |     nits = 0
31 |     while nits<maxits and np.linalg.norm(grad)>tol:
32 |         nits += 1
33 |         
34 |         # Compute current Jacobian and approximate Hessian
35 |         fp,r,grad,J = function(p,x,f)
36 |         H=np.dot(np.transpose(J),J) + nu*np.eye(nvars)
37 |         pnew = np.zeros(np.shape(p))
38 |         nits2 = 0
39 |         while (p!=pnew).all() and nits2<maxits:
40 |             nits2 += 1
41 |             # Compute the new estimate pnew
42 |             #dp = np.linalg.solve(H,grad)
43 |             dp,resid,rank,s = np.linalg.lstsq(H,grad)
44 |             #dp = -dot(linalg.inv(H),dot(transpose(J),transpose(d)))
45 |             pnew = p - dp[:,0]
46 |             
47 |             # Decide whether the trust region is good
48 |             fpnew,rnew,gradnew,Jnew = function(pnew,x,f)
49 |             enew = np.sum(np.dot(np.transpose(rnew),rnew))
50 |             
51 |             rho = np.linalg.norm(np.dot(np.transpose(r),r)-np.dot(np.transpose(rnew),rnew))
52 |             rho /= np.linalg.norm(np.dot(np.transpose(grad),pnew-p))
53 |             
54 |             if rho>0:
55 |                 # Keep new estimate
56 |                 p = pnew
57 |                 e = enew
58 |                 if rho>0.25:
59 |                     # Make trust region larger (reduce nu)
60 |                     nu=nu/10
61 |             else: 
62 |                 # Make trust region smaller (increase nu)
63 |                 nu=nu*10
64 |         print p, e, np.linalg.norm(grad), nu
65 |     return p
66 |     
67 | p0 = np.array([100.5,102.5]) #[ 100.0001126   101.99969709] 1078.36915936 8.87386341319e-06 1e-10 (8 itns)
68 | #p0 = np.array([101,101]) #[ 100.88860713  101.12607589] 631.488571159 9.36938417155e-06 1e-67
69 | 
70 | p = np.array([100,102])
71 | 
72 | x = np.arange(0,2*np.pi,0.1)
73 | y = p[0]*np.cos(p[1]*x)+ p[1]*np.sin([p[0]*x]) + np.random.rand(len(x))
74 | p = lm(p0,x,y)
75 | y1 = p[0]*np.cos(p[1]*x)+ p[1]*np.sin([p[0]*x]) #+ np.random.rand(len(x))
76 | 
77 | pl.plot(x,np.squeeze(y),'-')
78 | pl.plot(x,np.squeeze(y1),'r--')
79 | pl.legend(['Actual Data','Fitted Data'])
80 | pl.show()
81 | 


--------------------------------------------------------------------------------
/Ch9/Newton.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 9 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Gradient Descent using Newton's method
12 | import numpy as np
13 | 
14 | def Jacobian(x):
15 |     #return array([.4*x[0],2*x[1]])
16 |     return np.array([x[0], 0.4*x[1], 1.2*x[2]])
17 | 
18 | def Hessian(x):
19 |     #return array([[.2,0],[0,1]])
20 |     return np.array([[1,0,0],[0,0.4,0],[0,0,1.2]])
21 | 
22 | def Newton(x0):
23 | 
24 |     i = 0
25 |     iMax = 10
26 |     x = x0
27 |     Delta = 1
28 |     alpha = 1
29 |     
30 |     while i<iMax and Delta>10**(-5):
31 |         p = -np.dot(np.linalg.inv(Hessian(x)),Jacobian(x))
32 |         xOld = x
33 |         x = x + alpha*p
34 |         Delta = np.sum((x-xOld)**2)
35 |         i += 1
36 |     print x
37 |     
38 | x0 = np.array([-2,2,-2])
39 | Newton(x0)
40 | 


--------------------------------------------------------------------------------
/Ch9/TSP.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Code from Chapter 9 of Machine Learning: An Algorithmic Perspective (2nd Edition)
  3 | # by Stephen Marsland (http://stephenmonika.net)
  4 | 
  5 | # You are free to use, change, or redistribute the code in any way you wish for
  6 | # non-commercial purposes, but please maintain the name of the original author.
  7 | # This code comes with no warranty of any kind.
  8 | 
  9 | # Stephen Marsland, 2008, 2014
 10 | 
 11 | # A demonstration of four methods of solving the Travelling Salesman Problem
 12 | import numpy as np
 13 | 
 14 | def makeTSP(nCities):
 15 | 	positions = 2*np.random.rand(nCities,2)-1;
 16 | 	distances = np.zeros((nCities,nCities))
 17 | 
 18 | 	for i in range(nCities):
 19 | 		for j in range(i+1,nCities):
 20 | 			distances[i,j] = np.sqrt((positions[i,0] - positions[j,0])**2 + (positions[i,1] - positions[j,1])**2);
 21 | 			distances[j,i] = distances[i,j];
 22 | 
 23 | 	return distances
 24 | 
 25 | def exhaustive(distances):
 26 | 	nCities = np.shape(distances)[0]
 27 | 
 28 | 	cityOrder = np.arange(nCities)
 29 | 
 30 | 	distanceTravelled = 0
 31 | 	for i in range(nCities-1):
 32 | 		distanceTravelled += distances[cityOrder[i],cityOrder[i+1]]
 33 | 	distanceTravelled += distances[cityOrder[nCities-1],0]
 34 | 
 35 | 	for newOrder in permutation(range(nCities)):
 36 | 		possibleDistanceTravelled = 0
 37 | 		for i in range(nCities-1):
 38 | 			possibleDistanceTravelled += distances[newOrder[i],newOrder[i+1]]
 39 | 		possibleDistanceTravelled += distances[newOrder[nCities-1],0]
 40 | 			 
 41 | 		if possibleDistanceTravelled < distanceTravelled:
 42 | 			distanceTravelled = possibleDistanceTravelled
 43 | 			cityOrder = newOrder
 44 | 
 45 | 	return cityOrder, distanceTravelled
 46 | 	
 47 | def permutation(order):
 48 | 	order = tuple(order)
 49 | 	if len(order)==1:
 50 | 		yield order
 51 | 	else:
 52 | 		for i in range(len(order)):
 53 | 			rest = order[:i] + order[i+1:]
 54 | 			move = (order[i],)
 55 | 			for smaller in permutation(rest):
 56 | 				yield move + smaller
 57 | 		
 58 | def greedy(distances):
 59 | 	nCities = np.shape(distances)[0]
 60 | 	distanceTravelled = 0
 61 | 	
 62 | 	# Need a version of the matrix we can trash
 63 | 	dist = distances.copy()
 64 | 
 65 | 	cityOrder = np.zeros(nCities)
 66 | 	cityOrder[0] = np.random.randint(nCities)
 67 | 	dist[:,cityOrder[0]] = np.Inf
 68 | 
 69 | 	for i in range(nCities-1):
 70 | 		cityOrder[i+1] = np.argmin(dist[cityOrder[i],:])
 71 | 		distanceTravelled  += dist[cityOrder[i],cityOrder[i+1]]
 72 | 		# Now exclude the chance of travelling to that city again
 73 | 		dist[:,cityOrder[i+1]] = np.Inf
 74 | 	
 75 | 	# Now return to the original city
 76 | 	distanceTravelled += distances[cityOrder[nCities-1],0]
 77 | 
 78 | 	return cityOrder, distanceTravelled
 79 | 
 80 | def hillClimbing(distances):
 81 | 
 82 | 	nCities = np.shape(distances)[0]
 83 | 
 84 | 	cityOrder = np.arange(nCities)
 85 | 	np.random.shuffle(cityOrder)
 86 | 
 87 | 	distanceTravelled = 0
 88 | 	for i in range(nCities-1):
 89 | 		distanceTravelled += distances[cityOrder[i],cityOrder[i+1]]
 90 | 	distanceTravelled += distances[cityOrder[nCities-1],0]
 91 | 
 92 | 	for i in range(1000):
 93 | 		# Choose cities to swap
 94 | 		city1 = np.random.randint(nCities)
 95 | 		city2 = np.random.randint(nCities)
 96 | 
 97 | 		if city1 != city2:
 98 | 			# Reorder the set of cities
 99 | 			possibleCityOrder = cityOrder.copy()
100 | 			possibleCityOrder = np.where(possibleCityOrder==city1,-1,possibleCityOrder)
101 | 			possibleCityOrder = np.where(possibleCityOrder==city2,city1,possibleCityOrder)
102 | 			possibleCityOrder = np.where(possibleCityOrder==-1,city2,possibleCityOrder)
103 | 
104 | 			# Work out the new distances
105 | 			# This can be done more efficiently
106 | 			newDistanceTravelled = 0
107 | 			for j in range(nCities-1):
108 | 				newDistanceTravelled += distances[possibleCityOrder[j],possibleCityOrder[j+1]]
109 | 			distanceTravelled += distances[cityOrder[nCities-1],0]
110 | 	
111 | 			if newDistanceTravelled < distanceTravelled:
112 | 				distanceTravelled = newDistanceTravelled
113 | 				cityOrder = possibleCityOrder
114 | 
115 | 	return cityOrder, distanceTravelled
116 | 	
117 | 
118 | def simulatedAnnealing(distances):
119 | 
120 | 	nCities = np.shape(distances)[0]
121 | 
122 | 	cityOrder = np.arange(nCities)
123 | 	np.random.shuffle(cityOrder)
124 | 
125 | 	distanceTravelled = 0
126 | 	for i in range(nCities-1):
127 | 		distanceTravelled += distances[cityOrder[i],cityOrder[i+1]]
128 | 	distanceTravelled += distances[cityOrder[nCities-1],0]
129 | 
130 | 	T = 500
131 | 	c = 0.8
132 | 	nTests = 10
133 | 
134 | 	while T>1:
135 | 		for i in range(nTests):
136 | 			# Choose cities to swap
137 | 			city1 = np.random.randint(nCities)
138 | 			city2 = np.random.randint(nCities)
139 | 
140 | 			if city1 != city2:
141 | 				# Reorder the set of cities
142 | 				possibleCityOrder = cityOrder.copy()
143 | 				possibleCityOrder = np.where(possibleCityOrder==city1,-1,possibleCityOrder)
144 | 				possibleCityOrder = np.where(possibleCityOrder==city2,city1,possibleCityOrder)
145 | 				possibleCityOrder = np.where(possibleCityOrder==-1,city2,possibleCityOrder)
146 | 
147 | 				# Work out the new distances
148 | 				# This can be done more efficiently
149 | 				newDistanceTravelled = 0
150 | 				for j in range(nCities-1):
151 | 					newDistanceTravelled += distances[possibleCityOrder[j],possibleCityOrder[j+1]]
152 | 				distanceTravelled += distances[cityOrder[nCities-1],0]
153 | 
154 | 				if newDistanceTravelled < distanceTravelled or (distanceTravelled - newDistanceTravelled) < T*np.log(np.random.rand()):
155 | 					distanceTravelled = newDistanceTravelled
156 | 					cityOrder = possibleCityOrder
157 | 
158 | 			# Annealing schedule
159 | 			T = c*T
160 | 
161 | 	return cityOrder, distanceTravelled
162 | 
163 | def runAll():
164 | 	import time
165 | 
166 | 	nCities = 5
167 | 	distances = makeTSP(nCities)
168 | 
169 | 	print "Exhaustive search"
170 | 	start = time.time()
171 | 	print exhaustive(distances)
172 | 	finish = time.time()
173 | 	print finish-start
174 | 
175 | 	print "Greedy search"
176 | 	start = time.time()
177 | 	print greedy(distances)
178 | 	finish = time.time()
179 | 	print finish-start
180 | 
181 | 	print "Hill Climbing"
182 | 	start = time.time()
183 | 	print hillClimbing(distances)
184 | 	finish = time.time()
185 | 	print finish-start
186 | 
187 | 	print "Simulated Annealing"
188 | 	start = time.time()
189 | 	print simulatedAnnealing(distances)
190 | 	finish = time.time()
191 | 	print finish-start
192 | 
193 | runAll()
194 | 


--------------------------------------------------------------------------------
/Ch9/iris.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 9 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # The iris classification example
12 | 
13 | def preprocessIris(infile,outfile):
14 | 
15 |     stext1 = 'Iris-setosa'
16 |     stext2 = 'Iris-versicolor'
17 |     stext3 = 'Iris-virginica'
18 |     rtext1 = '0'
19 |     rtext2 = '1'
20 |     rtext3 = '2'
21 | 
22 |     fid = open(infile,"r")
23 |     oid = open(outfile,"w")
24 | 
25 |     for s in fid:
26 |         if s.find(stext1)>-1:
27 |             oid.write(s.replace(stext1, rtext1))
28 |         elif s.find(stext2)>-1:
29 |             oid.write(s.replace(stext2, rtext2))
30 |         elif s.find(stext3)>-1:
31 |             oid.write(s.replace(stext3, rtext3))
32 |     fid.close()
33 |     oid.close()
34 | 
35 | import numpy as np
36 | # Preprocessor to remove the test (only needed once)
37 | #preprocessIris('/Users/srmarsla/Book/Datasets/Iris/iris.data','iris_proc.data')
38 | 
39 | iris = np.loadtxt('iris_proc.data',delimiter=',')
40 | iris[:,:4] = iris[:,:4]-iris[:,:4].mean(axis=0)
41 | imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
42 | iris[:,:4] = iris[:,:4]/imax[:4]
43 | #print iris[0:5,:]
44 | 
45 | # Split into training, validation, and test sets
46 | target = np.zeros((np.shape(iris)[0],3));
47 | indices = np.where(iris[:,4]==0) 
48 | target[indices,0] = 1
49 | indices = np.where(iris[:,4]==1)
50 | target[indices,1] = 1
51 | indices = np.where(iris[:,4]==2)
52 | target[indices,2] = 1
53 | 
54 | # Randomly order the data
55 | order = range(np.shape(iris)[0])
56 | np.random.shuffle(order)
57 | iris = iris[order,:]
58 | target = target[order,:]
59 | 
60 | train = iris[::2,0:4]
61 | traint = target[::2]
62 | valid = iris[1::4,0:4]
63 | validt = target[1::4]
64 | test = iris[3::4,0:4]
65 | testt = target[3::4]
66 | 
67 | #print train.max(axis=0), train.min(axis=0)
68 | 
69 | # Train the network
70 | import mlp_cg
71 | reload(mlp_cg)
72 | net = mlp_cg.mlp_cg(train,traint,5,outtype='softmax')
73 | net.confmat(test,testt)
74 | net.mlptrain(train,traint)
75 | net.confmat(test,testt)
76 | 


--------------------------------------------------------------------------------
/Ch9/steepest.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Code from Chapter 9 of Machine Learning: An Algorithmic Perspective (2nd Edition)
 3 | # by Stephen Marsland (http://stephenmonika.net)
 4 | 
 5 | # You are free to use, change, or redistribute the code in any way you wish for
 6 | # non-commercial purposes, but please maintain the name of the original author.
 7 | # This code comes with no warranty of any kind.
 8 | 
 9 | # Stephen Marsland, 2008, 2014
10 | 
11 | # Gradient Descent using steepest descent
12 | 
13 | import numpy as np
14 | 
15 | def Jacobian(x):
16 |     #return array([.4*x[0],2*x[1]])
17 |     return np.array([x[0], 0.4*x[1], 1.2*x[2]])
18 | 
19 | def steepest(x0):
20 | 
21 |     i = 0 
22 |     iMax = 10
23 |     x = x0
24 |     Delta = 1
25 |     alpha = 1
26 | 
27 |     while i<iMax and Delta>10**(-5):
28 |         p = -Jacobian(x)
29 |         xOld = x
30 |         x = x + alpha*p
31 |         Delta = np.sum((x-xOld)**2)
32 |         print x
33 |         i += 1
34 | 
35 | x0 = np.array([-2,2,-2])
36 | steepest(x0)
37 | 


--------------------------------------------------------------------------------
/Data/PROSTATE_TEST.TXT:
--------------------------------------------------------------------------------
 1 |  0.73716  3.47352  64   0.61519  0  -1.38629  6   0  0.76547
 2 | -0.77653  3.53951  47  -1.38629  0  -1.38629  6   0  1.04732
 3 |  0.22314  3.24454  63  -1.38629  0  -1.38629  6   0  1.04732
 4 |  1.20597  3.44202  57  -1.38629  0  -0.43078  7   5  1.39872
 5 |  2.05924  3.50104  60   1.47476  0   1.34807  7  20  1.65823
 6 |  0.38526  3.66740  69   1.59939  0  -1.38629  6   0  1.73166
 7 |  1.44692  3.12457  68   0.30010  0  -1.38629  6   0  1.76644
 8 | -0.40048  3.86598  67   1.81645  0  -1.38629  7  20  1.81645
 9 |  0.18232  6.10758  65   1.70475  0  -1.38629  6   0  2.00821
10 |  0.00995  3.26767  54  -1.38629  0  -1.38629  6   0  2.02155
11 |  1.30833  4.11985  64   2.17134  0  -1.38629  7   5  2.08567
12 |  1.44220  3.68261  68  -1.38629  0  -1.38629  7  10  2.30757
13 |  1.77156  3.89691  61  -1.38629  0   0.81093  7   6  2.37491
14 |  1.16315  4.03512  68   1.71380  0  -0.43078  7  40  2.56879
15 |  1.74572  3.49802  43  -1.38629  0  -1.38629  6   0  2.59152
16 |  1.22083  3.56812  70   1.37372  0  -0.79851  6   0  2.59152
17 |  0.51282  3.63363  64   1.49290  0   0.04879  7  70  2.68444
18 |  2.12704  4.12147  68   1.76644  0   1.44692  7  40  2.69124
19 |  3.15359  3.51601  59  -1.38629  0  -1.38629  7   5  2.70471
20 |  0.97456  2.86505  47  -1.38629  0   0.50078  7   4  2.78809
21 |  1.99742  3.71965  63   1.61939  1   1.90954  7  40  2.85359
22 |  2.03471  3.91701  66   2.00821  1   2.11021  7  60  2.88200
23 |  2.07317  3.62301  64  -1.38629  0  -1.38629  6   0  2.88200
24 |  1.45862  3.83622  61   1.32176  0  -0.43078  7  20  2.88759
25 |  1.21491  3.82538  69  -1.38629  1   0.22314  7  20  3.05636
26 |  1.83896  3.23672  60   0.43825  1   1.17866  9  90  3.07501
27 |  2.77944  3.82319  63  -1.38629  0   0.37156  7  50  3.51304
28 |  2.67759  3.83838  65   1.11514  0   1.74920  9  70  3.57094
29 |  2.90745  3.39619  52  -1.38629  1   2.46385  7  10  5.14312
30 |  3.47197  3.97500  68   0.43825  1   2.90417  7  20  5.58293
31 | 


--------------------------------------------------------------------------------
/Data/PROSTATE_TRAIN.TXT:
--------------------------------------------------------------------------------
 1 | -0.57982  2.76946  50  -1.38629  0  -1.38629  6    0  -0.43078
 2 | -0.99425  3.31963  58  -1.38629  0  -1.38629  6    0  -0.16252
 3 | -0.51083  2.69124  74  -1.38629  0  -1.38629  7   20  -0.16252
 4 | -1.20397  3.28279  58  -1.38629  0  -1.38629  6    0  -0.16252
 5 |  0.75142  3.43237  62  -1.38629  0  -1.38629  6    0   0.37156
 6 | -1.04982  3.22883  50  -1.38629  0  -1.38629  6    0   0.76547
 7 |  0.69315  3.53951  58   1.53687  0  -1.38629  6    0   0.85442
 8 |  0.25464  3.60414  65  -1.38629  0  -1.38629  6    0   1.26695
 9 | -1.34707  3.59868  63   1.26695  0  -1.38629  6    0   1.26695
10 |  1.61343  3.02286  63  -1.38629  0  -0.59784  7   30   1.26695
11 |  1.47705  2.99823  67  -1.38629  0  -1.38629  7    5   1.34807
12 |  1.54116  3.06105  66  -1.38629  0  -1.38629  6    0   1.44692
13 | -0.41552  3.51601  70   1.24415  0  -0.59784  7   30   1.47018
14 |  2.28849  3.64936  66  -1.38629  0   0.37156  6    0   1.49290
15 | -0.56212  3.26767  41  -1.38629  0  -1.38629  6    0   1.55814
16 |  0.18232  3.82538  70   1.65823  0  -1.38629  6    0   1.59939
17 |  1.14740  3.41937  59  -1.38629  0  -1.38629  6    0   1.63900
18 | -0.54473  3.37588  59  -0.79851  0  -1.38629  6    0   1.69562
19 |  1.78171  3.45157  63   0.43825  0   1.17866  7   60   1.71380
20 |  0.51282  3.71965  65  -1.38629  0  -0.79851  7   70   1.80006
21 |  1.04028  3.12895  67   0.22314  0   0.04879  7   80   1.84845
22 |  2.40964  3.37588  65  -1.38629  0   1.61939  6    0   1.89462
23 |  0.28518  4.09017  65   1.96291  0  -0.79851  6    0   1.92425
24 |  1.27536  3.03735  71   1.26695  0  -1.38629  6    0   2.00821
25 | -0.01005  3.21687  63  -1.38629  0  -0.79851  6    0   2.04769
26 |  1.42311  3.65713  73  -0.57982  0   1.65823  8   15   2.15756
27 |  0.45742  2.37491  64  -1.38629  0  -1.38629  7   15   2.19165
28 |  2.66096  4.08514  68   1.37372  1   1.83258  7   35   2.21375
29 |  0.79751  3.01308  56   0.93609  0  -0.16252  7    5   2.27727
30 |  0.62058  3.14200  60  -1.38629  0  -1.38629  9   80   2.29757
31 |  0.58222  3.86598  62   1.71380  0  -0.43078  6    0   2.32728
32 |  1.48614  3.40950  66   1.74920  0  -0.43078  7   20   2.52172
33 |  1.66393  3.39283  61   0.61519  0  -1.38629  7   15   2.55334
34 |  2.72785  3.99545  79   1.87947  1   2.65676  9  100   2.56879
35 |  1.09192  3.99360  68  -1.38629  0  -1.38629  7   50   2.65676
36 |  1.66013  4.23483  64   2.07317  0  -1.38629  6    0   2.67759
37 |  1.26695  4.28013  66   2.12226  0  -1.38629  7   15   2.71800
38 |  0.46373  3.76468  49   1.42311  0  -1.38629  6    0   2.79423
39 |  0.54232  4.17823  70   0.43825  0  -1.38629  7   20   2.80639
40 |  1.06126  3.85121  61   1.29473  0  -1.38629  7   40   2.81241
41 |  0.45742  4.52450  73   2.32630  0  -1.38629  6    0   2.84200
42 |  2.77571  3.52489  72  -1.38629  0   1.55814  9   95   2.85359
43 |  2.02287  3.87847  68   1.78339  0   1.32176  7   70   2.92047
44 |  2.19834  4.05091  72   2.30757  0  -0.43078  7   10   2.96269
45 | -0.44629  4.40855  69  -1.38629  0  -1.38629  6    0   2.96269
46 |  1.19392  4.78038  72   2.32630  0  -0.79851  7    5   2.97298
47 |  1.86408  3.59319  60  -1.38629  1   1.32176  7   60   3.01308
48 |  1.16002  3.34109  77   1.74920  0  -1.38629  7   25   3.03735
49 |  2.99923  3.84908  69  -1.38629  1   1.90954  7   20   3.27526
50 |  3.14113  3.26385  68  -0.05129  1   2.42037  7   50   3.33755
51 |  2.01089  4.43379  72   2.12226  0   0.50078  7   60   3.39283
52 |  2.53766  4.35478  78   2.32630  0  -1.38629  7   10   3.43560
53 |  2.64830  3.58213  69  -1.38629  1   2.58400  7   70   3.45789
54 |  1.46787  3.07038  66   0.55962  0   0.22314  7   40   3.51601
55 |  2.51366  3.47352  57   0.43825  0   2.32728  7   60   3.53076
56 |  2.61301  3.88875  77  -0.52763  1   0.55962  7   30   3.56530
57 |  1.56235  3.70991  60   1.69562  0   0.81093  7   30   3.58768
58 |  3.30285  3.51898  64  -1.38629  1   2.32728  7   60   3.63099
59 |  2.02419  3.73170  58   1.63900  0  -1.38629  6    0   3.68009
60 |  1.73166  3.36902  62  -1.38629  1   0.30010  7   30   3.71235
61 |  2.80759  4.71805  65  -1.38629  1   2.46385  7   60   3.98434
62 |  1.56235  3.69511  76   0.93609  1   0.81093  7   75   3.99360
63 |  3.24649  4.10182  68  -1.38629  0  -1.38629  6    0   4.02981
64 |  2.53290  3.67757  61   1.34807  1  -1.38629  7   15   4.12955
65 |  2.83027  3.87640  68  -1.38629  1   1.32176  7   60   4.38515
66 |  3.82100  3.89691  44  -1.38629  1   2.16905  7   40   4.68444
67 |  2.88256  3.77391  68   1.55814  1   1.55814  7   80   5.47751
68 | 


--------------------------------------------------------------------------------
/Data/ruapehu.dat:
--------------------------------------------------------------------------------
 1 | 1861.12	1861.37
 2 | 1869.50	1869.67
 3 | 1881.20	1881.37
 4 | 1886.29	1886.45
 5 | 1889.33	1889.62
 6 | 1890.20	1890.37
 7 | 1895.19	1895.20
 8 | 1897.50	1897.67
 9 | 1903.50	1903.79
10 | 1906.20	1906.49
11 | 1907.13	1907.29
12 | 1910.16	1910.33
13 | 1918.49	1918.78
14 | 1921.79	1922.08
15 | 1925.06	1925.35
16 | 1934.61	1934.90
17 | 1934.96	1935.13
18 | 1936.35	1936.36
19 | 1940.29	1940.58
20 | 1942.61	1942.90
21 | 1944.79	1945.08
22 | 1945.18	1945.96
23 | 1946.29	1946.45
24 | 1946.89	1947.01
25 | 1947.13	1947.29
26 | 1948.06	1948.23
27 | 1948.33	1948.50
28 | 1949.37	1949.54
29 | 1949.71	1949.87
30 | 1950.48	1950.48
31 | 1951.21	1951.38
32 | 1952.54	1952.70
33 | 1954.79	1954.95
34 | 1956.88	1956.88
35 | 1959.39	1959.67
36 | 1964.29	1964.45
37 | 1966.26	1966.74
38 | 1967.56	1967.76
39 | 1968.26	1968.44
40 | 1969.47	1969.48
41 | 1970.71	1970.71
42 | 1971.26	1971.84
43 | 1972.81	1973.03
44 | 1973.83	1974.82
45 | 1975.31	1975.32
46 | 1975.80	1975.80
47 | 1976.18	1976.18
48 | 1976.70	1976.89
49 | 1977.54	1977.75
50 | 1977.84	1978.00
51 | 1978.18	1978.35
52 | 1979.50	1979.54
53 | 1980.04	1980.24
54 | 1980.80	1980.84
55 | 1981.82	1982.28
56 | 1984.25	1984.42
57 | 1984.82	1984.98
58 | 1985.39	1985.44
59 | 1985.87	1985.87
60 | 1986.11	1986.11
61 | 1987.65	1987.66
62 | 1988.22	1988.40
63 | 1988.94	1989.18
64 | 1989.50	1989.72
65 | 1990.02	1990.07
66 | 1991.51	1991.53
67 | 1992.11	1992.18
68 | 1994.12	1994.25
69 | 1995.03	1995.86
70 | 1996.46	1996.67
71 | 1997.77	1997.80
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 |    
 2 | # 알고리즘 중심의 머신러닝 가이드
 3 |   
 4 |  ![알고리즘 중심의 머신러닝 가이드 표지](http://image.kyobobook.co.kr/images/book/large/722/l9791185890722.jpg)
 5 |   
 6 | **출판사** 제이펍  
 7 | **원출판사** Chapman and Hall  
 8 | **원서명** Machine Learning: An Algorithmic Perspective, Second Edition(ISBN: 9781466583283)  
 9 | **저자명** 스티븐 마슬랜드  
10 | **역자명** 강전형  
11 | **출판일** 2016년 12월 28일  
12 | **페이지** 532쪽  
13 | **시리즈** I♥A.I. 02  
14 | **ISBN** 979-11-85890-72-2 (93000)  
15 | 
16 | [### 도서 소개 페이지 바로 가기 ###](http://jpub.tistory.com/648)  
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------