├── Python3
    ├── README.md
    ├── data.csv
    ├── test.py
    ├── make_data.py
    ├── Plot2D.py
    ├── Adaboost.py
    └── WeakClassifier.py
├── README.md
├── data.csv
├── test.py
├── make_data.py
├── Plot2D.py
├── Adaboost.py
└── WeakClassifier.py


/Python3/README.md:
--------------------------------------------------------------------------------
1 | # AdaboostExample
2 | AdaboostExample
3 | 
4 | Python3.7
5 | 
6 | You can dircet run test.py to view the result, the introduction can be found in the CSDN blog:
7 | https://blog.csdn.net/px_528/article/details/72963977
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AdaboostExample
 2 | AdaboostExample
 3 | 
 4 | Python2.7 --> python test.py
 5 | 
 6 | Python3.7 --> python Python3/test.py
 7 | 
 8 | You can dircet run test.py to view the result, the introduction can be found in the CSDN blog:
 9 | https://blog.csdn.net/px_528/article/details/72963977
10 | 


--------------------------------------------------------------------------------
/data.csv:
--------------------------------------------------------------------------------
 1 | 0,1,label
 2 | 0.55,4.4,-1
 3 | 1.1,2.8,-1
 4 | 1.85,1.95,-1
 5 | 3.15,1.7,-1
 6 | 4.0,2.7,-1
 7 | 3.75,3.95,-1
 8 | 2.8,4.4,-1
 9 | 2.35,3.2,-1
10 | 3.05,2.25,-1
11 | 3.55,2.6,-1
12 | 3.1,3.0,-1
13 | 3.0,3.4,-1
14 | 1.0,7.3,1
15 | 1.4,6.7,1
16 | 3.05,6.9,1
17 | 4.3,7.15,1
18 | 4.75,7.0,1
19 | 5.5,5.85,1
20 | 5.95,4.75,1
21 | 6.45,3.15,1
22 | 6.5,1.35,1
23 | 6.3,0.95,1
24 | 5.95,0.85,1
25 | 5.95,1.6,1
26 | 5.85,2.75,1
27 | 5.65,4.0,1
28 | 5.35,5.25,1
29 | 5.0,6.15,1
30 | 4.7,6.3,1
31 | 3.85,6.5,1
32 | 2.55,6.55,1
33 | 1.4,6.65,1
34 | 0.6,6.75,1
35 | 0.6,6.85,1
36 | 5.35,0.9,1
37 | 


--------------------------------------------------------------------------------
/Python3/data.csv:
--------------------------------------------------------------------------------
 1 | 0,1,label
 2 | 0.55,4.4,-1
 3 | 1.1,2.8,-1
 4 | 1.85,1.95,-1
 5 | 3.15,1.7,-1
 6 | 4.0,2.7,-1
 7 | 3.75,3.95,-1
 8 | 2.8,4.4,-1
 9 | 2.35,3.2,-1
10 | 3.05,2.25,-1
11 | 3.55,2.6,-1
12 | 3.1,3.0,-1
13 | 3.0,3.4,-1
14 | 1.0,7.3,1
15 | 1.4,6.7,1
16 | 3.05,6.9,1
17 | 4.3,7.15,1
18 | 4.75,7.0,1
19 | 5.5,5.85,1
20 | 5.95,4.75,1
21 | 6.45,3.15,1
22 | 6.5,1.35,1
23 | 6.3,0.95,1
24 | 5.95,0.85,1
25 | 5.95,1.6,1
26 | 5.85,2.75,1
27 | 5.65,4.0,1
28 | 5.35,5.25,1
29 | 5.0,6.15,1
30 | 4.7,6.3,1
31 | 3.85,6.5,1
32 | 2.55,6.55,1
33 | 1.4,6.65,1
34 | 0.6,6.75,1
35 | 0.6,6.85,1
36 | 5.35,0.9,1
37 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | from Adaboost import *
 2 | from Plot2D import *
 3 | import pandas as pd
 4 | 
 5 | #data.csv is created by make_data.py
 6 | data=pd.read_csv('data.csv')
 7 | 
 8 | #get X and y
 9 | X=data.iloc[:,:-1].values
10 | y=data.iloc[:,-1].values
11 | 
12 | #train the AdaboostClassifier
13 | clf=AdaboostClassifier()
14 | times=clf.fit(X,y)
15 | 
16 | #plot original data
17 | Plot2D(data).pause(3)
18 | 
19 | #plot Adaboost decision_threshold
20 | for i in xrange(times):
21 | 	if clf.weak[i].decision_feature==0:
22 | 		plt.plot([clf.weak[i].decision_threshold,clf.weak[i].decision_threshold],[0,8])
23 | 	else:
24 | 		plt.plot([0,8],[clf.weak[i].decision_threshold,clf.weak[i].decision_threshold])
25 | plt.pause(3)
26 | 


--------------------------------------------------------------------------------
/Python3/test.py:
--------------------------------------------------------------------------------
 1 | from Adaboost import *
 2 | from Plot2D import *
 3 | import pandas as pd
 4 | 
 5 | #data.csv is created by make_data.py
 6 | data=pd.read_csv('data.csv')
 7 | 
 8 | #get X and y
 9 | X=data.iloc[:,:-1].values
10 | y=data.iloc[:,-1].values
11 | 
12 | #train the AdaboostClassifier
13 | clf=AdaboostClassifier()
14 | times=clf.fit(X,y)
15 | 
16 | #plot original data
17 | Plot2D(data).pause(3)
18 | 
19 | #plot Adaboost decision_threshold
20 | for i in range(times):
21 | 	if clf.weak[i].decision_feature==0:
22 | 		plt.plot([clf.weak[i].decision_threshold,clf.weak[i].decision_threshold],[0,8])
23 | 	else:
24 | 		plt.plot([0,8],[clf.weak[i].decision_threshold,clf.weak[i].decision_threshold])
25 | plt.pause(3)
26 | 


--------------------------------------------------------------------------------
/make_data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | X=np.array([
 5 | [0.55,4.4],
 6 | [1.1,2.8],
 7 | [1.85,1.95],
 8 | [3.15,1.7],
 9 | [4,2.7],
10 | [3.75,3.95],
11 | [2.8,4.4],
12 | [2.35,3.2],
13 | [3.05,2.25],
14 | [3.55,2.6],
15 | [3.1,3],
16 | [3,3.4],
17 | [1,7.3],
18 | [1.4,6.7],
19 | [3.05,6.9],
20 | [4.3,7.15],
21 | [4.75,7],
22 | [5.5,5.85],
23 | [5.95,4.75],
24 | [6.45,3.15],
25 | [6.5,1.35],
26 | [6.3,0.95],
27 | [5.95,0.85],
28 | [5.95,1.6],
29 | [5.85,2.75],
30 | [5.65,4],
31 | [5.35,5.25],
32 | [5,6.15],
33 | [4.7,6.3],
34 | [3.85,6.5],
35 | [2.55,6.55],
36 | [1.4,6.65],
37 | [0.6,6.75],
38 | [0.6,6.85],
39 | [5.35,0.9]])
40 | 
41 | 
42 | y=np.array([
43 | [-1],
44 | [-1],
45 | [-1],
46 | [-1],
47 | [-1],
48 | [-1],
49 | [-1],
50 | [-1],
51 | [-1],
52 | [-1],
53 | [-1],
54 | [-1],
55 | [1],
56 | [1],
57 | [1],
58 | [1],
59 | [1],
60 | [1],
61 | [1],
62 | [1],
63 | [1],
64 | [1],
65 | [1],
66 | [1],
67 | [1],
68 | [1],
69 | [1],
70 | [1],
71 | [1],
72 | [1],
73 | [1],
74 | [1],
75 | [1],
76 | [1],
77 | [1]])
78 | 
79 | X=pd.DataFrame(X)
80 | X['label']=y
81 | X.to_csv('data.csv',index=None)
82 | 


--------------------------------------------------------------------------------
/Python3/make_data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | X=np.array([
 5 | [0.55,4.4],
 6 | [1.1,2.8],
 7 | [1.85,1.95],
 8 | [3.15,1.7],
 9 | [4,2.7],
10 | [3.75,3.95],
11 | [2.8,4.4],
12 | [2.35,3.2],
13 | [3.05,2.25],
14 | [3.55,2.6],
15 | [3.1,3],
16 | [3,3.4],
17 | [1,7.3],
18 | [1.4,6.7],
19 | [3.05,6.9],
20 | [4.3,7.15],
21 | [4.75,7],
22 | [5.5,5.85],
23 | [5.95,4.75],
24 | [6.45,3.15],
25 | [6.5,1.35],
26 | [6.3,0.95],
27 | [5.95,0.85],
28 | [5.95,1.6],
29 | [5.85,2.75],
30 | [5.65,4],
31 | [5.35,5.25],
32 | [5,6.15],
33 | [4.7,6.3],
34 | [3.85,6.5],
35 | [2.55,6.55],
36 | [1.4,6.65],
37 | [0.6,6.75],
38 | [0.6,6.85],
39 | [5.35,0.9]])
40 | 
41 | 
42 | y=np.array([
43 | [-1],
44 | [-1],
45 | [-1],
46 | [-1],
47 | [-1],
48 | [-1],
49 | [-1],
50 | [-1],
51 | [-1],
52 | [-1],
53 | [-1],
54 | [-1],
55 | [1],
56 | [1],
57 | [1],
58 | [1],
59 | [1],
60 | [1],
61 | [1],
62 | [1],
63 | [1],
64 | [1],
65 | [1],
66 | [1],
67 | [1],
68 | [1],
69 | [1],
70 | [1],
71 | [1],
72 | [1],
73 | [1],
74 | [1],
75 | [1],
76 | [1],
77 | [1]])
78 | 
79 | X=pd.DataFrame(X)
80 | X['label']=y
81 | X.to_csv('data.csv',index=None)
82 | 


--------------------------------------------------------------------------------
/Plot2D.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import LabelEncoder
 2 | from matplotlib import pyplot as plt
 3 | import pandas as pd
 4 | import warnings
 5 | 
 6 | class Plot2D:
 7 | 	
 8 | 	def __init__(self,data):
 9 | 		self.__data=data
10 | 		self.__X=data.iloc[:,:-1].values
11 | 		self.__le=LabelEncoder()
12 | 		self.__y=self.__le.fit_transform(data.iloc[:,-1].values)
13 | 		self.__xmin=self.__X[:,0].min()
14 | 		self.__xmax=self.__X[:,0].max()
15 | 		self.__ymin=self.__X[:,1].min()
16 | 		self.__ymax=self.__X[:,1].max()
17 | 		self.__marker_list=['o','x']
18 | 
19 | 	def show(self):
20 | 		plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1)
21 | 		plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1)
22 | 		for i in xrange(2):
23 | 			temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform(i)]
24 | 			plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform(i),marker=self.__marker_list[i])
25 | 			plt.legend()
26 | 		plt.xlabel(self.__data.columns[0])
27 | 		plt.ylabel(self.__data.columns[1])
28 | 		plt.show()
29 | 
30 | 	def pause(self,Seconds):
31 | 		warnings.filterwarnings("ignore",".*GUI is implemented.*")
32 | 		
33 | 		plt.ion()
34 | 		plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1)
35 | 		plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1)
36 | 		for i in xrange(2):
37 | 			temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform(i)]
38 | 			plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform(i),marker=self.__marker_list[i])
39 | 			plt.legend()
40 | 		plt.xlabel(self.__data.columns[0])
41 | 		plt.ylabel(self.__data.columns[1])
42 | 		plt.pause(Seconds)
43 | 
44 | '''
45 | data=pd.read_csv('data.csv')
46 | Plot2D(data).show()
47 | '''


--------------------------------------------------------------------------------
/Python3/Plot2D.py:
--------------------------------------------------------------------------------
 1 | from sklearn.preprocessing import LabelEncoder
 2 | from matplotlib import pyplot as plt
 3 | import pandas as pd
 4 | import warnings
 5 | 
 6 | class Plot2D:
 7 | 	
 8 | 	def __init__(self,data):
 9 | 		self.__data=data
10 | 		self.__X=data.iloc[:,:-1].values
11 | 		self.__le=LabelEncoder()
12 | 		self.__y=self.__le.fit_transform(data.iloc[:,-1].values)
13 | 		self.__xmin=self.__X[:,0].min()
14 | 		self.__xmax=self.__X[:,0].max()
15 | 		self.__ymin=self.__X[:,1].min()
16 | 		self.__ymax=self.__X[:,1].max()
17 | 		self.__marker_list=['o','x']
18 | 
19 | 	def show(self):
20 | 		plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1)
21 | 		plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1)
22 | 		length=len(self.__data.loc[:])
23 | 		for i in range(2):
24 | 			temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform([i]*length)]
25 | 			plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform([i]),marker=self.__marker_list[i])
26 | 			plt.legend()
27 | 		plt.xlabel(self.__data.columns[0])
28 | 		plt.ylabel(self.__data.columns[1])
29 | 		plt.show()
30 | 
31 | 	def pause(self,Seconds):
32 | 		warnings.filterwarnings("ignore",".*GUI is implemented.*")
33 | 		
34 | 		plt.ion()
35 | 		plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1)
36 | 		plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1)
37 | 		length=len(self.__data.loc[:])
38 | 		for i in range(2):
39 | 			temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform([i]*length)]
40 | 			plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform([i]),marker=self.__marker_list[i])
41 | 			plt.legend()
42 | 		plt.xlabel(self.__data.columns[0])
43 | 		plt.ylabel(self.__data.columns[1])
44 | 		plt.pause(Seconds)
45 | 
46 | 
47 | '''
48 | data=pd.read_csv('data.csv')
49 | Plot2D(data).show()
50 | '''


--------------------------------------------------------------------------------
/Python3/Adaboost.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import numpy as np 
  4 | from WeakClassifier import *
  5 | 
  6 | class AdaboostClassifier:
  7 | 
  8 | 	#calculate new Weight
  9 | 	def cal_W(self,W,alpha,y,pred):
 10 | 		ret=0
 11 | 		new_W=[]
 12 | 		for i in range(len(y)):
 13 | 			new_W.append(W[i]*np.exp(-alpha*y[i]*pred[i]))
 14 | 		return np.array(new_W/sum(new_W)).reshape([len(y),1])
 15 | 
 16 | 	#calculate error rate per iteration
 17 | 	def cal_e(self,y,pred,W):
 18 | 		ret=0
 19 | 		for i in range(len(y)):
 20 | 			if y[i]!=pred[i]:
 21 | 				ret+=W[i]
 22 | 		return ret
 23 | 
 24 | 	#calculate alpha
 25 | 	def cal_alpha(self,e):
 26 | 		if e==0:
 27 | 			return 10000
 28 | 		elif e==0.5:
 29 | 			return 0.001
 30 | 		else:
 31 | 			return 0.5*np.log((1-e)/e)
 32 | 
 33 | 	#calculate final predict value
 34 | 	def cal_final_pred(self,i,alpha,weak,y):
 35 | 		ret=np.array([0.0]*len(y))
 36 | 		for j in range(i+1):
 37 | 			ret+=alpha[j]*weak[j].pred
 38 | 		return np.sign(ret)
 39 | 
 40 | 	#calculate final error rate
 41 | 	def cal_final_e(self,y,cal_final_predict):	
 42 | 		ret=0
 43 | 		for i in range(len(y)):
 44 | 			if y[i]!=cal_final_predict[i]:
 45 | 				ret+=1
 46 | 		return ret/len(y)
 47 | 
 48 | 	#train
 49 | 	def fit(self,X,y,M=15):
 50 | 		W={}
 51 | 		self.weak={}
 52 | 		alpha={}
 53 | 		pred={}
 54 | 
 55 | 		for i in range(M):
 56 | 			W.setdefault(i)
 57 | 			self.weak.setdefault(i)
 58 | 			alpha.setdefault(i)
 59 | 			pred.setdefault(i)
 60 | 
 61 | 		#per iteration (all:M times)
 62 | 		for i in range(M):
 63 | 			#for the first iteration,initial W
 64 | 			if i == 0:
 65 | 				W[i]=np.array([1]*len(y))/len(y)
 66 | 				W[i]=W[i].reshape([len(y),1])
 67 | 			#if not the first iteration,calculate new Weight
 68 | 			else:
 69 | 				W[i]=self.cal_W(W[i-1],alpha[i-1],y,pred[i-1])
 70 | 
 71 | 			#using train weak learner and get this learner predict value
 72 | 			self.weak[i]=WeakClassifier()
 73 | 			self.weak[i].fit(X,y,W[i])
 74 | 			pred[i]=self.weak[i].pred
 75 | 
 76 | 			#calculate error rate this iteration
 77 | 			e=self.cal_e(y,pred[i],W[i])
 78 | 			#calculate alpha this iteration
 79 | 			alpha[i]=self.cal_alpha(e)
 80 | 			#calculate the final predict value
 81 | 			cal_final_predict=self.cal_final_pred(i,alpha,self.weak,y)
 82 | 
 83 | 			print('iteration:%d'%(i+1))
 84 | 			print('self.decision_key=%s'%(self.weak[i].decision_key))
 85 | 			print('self.decision_feature=%d'%(self.weak[i].decision_feature))
 86 | 			print('decision_threshold=%f'%(self.weak[i].decision_threshold))
 87 | 			print('W=%s'%(W[i]))
 88 | 			print('pred=%s'%(pred[i]))
 89 | 			print('e:%f alpha:%f'%(e,alpha[i]))
 90 | 			print('cal_final_predict:%s'%(cal_final_predict))
 91 | 			print('cal_final_e:%s%%'%(self.cal_final_e(y,cal_final_predict)*100))
 92 | 			print('')
 93 | 
 94 | 			#calculate the final error rate,if it is zero,stop iteration.
 95 | 			if self.cal_final_e(y,cal_final_predict)==0 or e==0:
 96 | 				break
 97 | 		#return the iteration times,from 1 on.
 98 | 		return i+1
 99 | 
100 | 


--------------------------------------------------------------------------------
/Adaboost.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import division
  3 | import numpy as np 
  4 | from WeakClassifier import *
  5 | 
  6 | class AdaboostClassifier:
  7 | 
  8 | 	#calculate new Weight
  9 | 	def cal_W(self,W,alpha,y,pred):
 10 | 		ret=0
 11 | 		new_W=[]
 12 | 		for i in range(len(y)):
 13 | 			new_W.append(W[i]*np.exp(-alpha*y[i]*pred[i]))
 14 | 		return np.array(new_W/sum(new_W)).reshape([len(y),1])
 15 | 
 16 | 	#calculate error rate per iteration
 17 | 	def cal_e(self,y,pred,W):
 18 | 		ret=0
 19 | 		for i in range(len(y)):
 20 | 			if y[i]!=pred[i]:
 21 | 				ret+=W[i]
 22 | 		return ret
 23 | 
 24 | 	#calculate alpha
 25 | 	def cal_alpha(self,e):
 26 | 		if e==0:
 27 | 			return 10000
 28 | 		elif e==0.5:
 29 | 			return 0.001
 30 | 		else:
 31 | 			return 0.5*np.log((1-e)/e)
 32 | 
 33 | 	#calculate final predict value
 34 | 	def cal_final_pred(self,i,alpha,weak,y):
 35 | 		ret=np.array([0.0]*len(y))
 36 | 		for j in range(i+1):
 37 | 			ret+=alpha[j]*weak[j].pred
 38 | 		return np.sign(ret)
 39 | 
 40 | 	#calculate final error rate
 41 | 	def cal_final_e(self,y,cal_final_predict):	
 42 | 		ret=0
 43 | 		for i in range(len(y)):
 44 | 			if y[i]!=cal_final_predict[i]:
 45 | 				ret+=1
 46 | 		return ret/len(y)
 47 | 
 48 | 	#train
 49 | 	def fit(self,X,y,M=15):
 50 | 		W={}
 51 | 		self.weak={}
 52 | 		alpha={}
 53 | 		pred={}
 54 | 
 55 | 		for i in range(M):
 56 | 			W.setdefault(i)
 57 | 			self.weak.setdefault(i)
 58 | 			alpha.setdefault(i)
 59 | 			pred.setdefault(i)
 60 | 
 61 | 		#per iteration (all:M times)
 62 | 		for i in range(M):
 63 | 			#for the first iteration,initial W
 64 | 			if i == 0:
 65 | 				W[i]=np.array([1]*len(y))/len(y)
 66 | 				W[i]=W[i].reshape([len(y),1])
 67 | 			#if not the first iteration,calculate new Weight
 68 | 			else:
 69 | 				W[i]=self.cal_W(W[i-1],alpha[i-1],y,pred[i-1])
 70 | 
 71 | 			#using train weak learner and get this learner predict value
 72 | 			self.weak[i]=WeakClassifier()
 73 | 			self.weak[i].fit(X,y,W[i])
 74 | 			pred[i]=self.weak[i].pred
 75 | 
 76 | 			#calculate error rate this iteration
 77 | 			e=self.cal_e(y,pred[i],W[i])
 78 | 			#calculate alpha this iteration
 79 | 			alpha[i]=self.cal_alpha(e)
 80 | 			#calculate the final predict value
 81 | 			cal_final_predict=self.cal_final_pred(i,alpha,self.weak,y)
 82 | 
 83 | 			print 'iteration:%d'%(i+1)
 84 | 			print 'self.decision_key=%s'%(self.weak[i].decision_key)
 85 | 			print 'self.decision_feature=%d'%(self.weak[i].decision_feature)
 86 | 			print 'decision_threshold=%f'%(self.weak[i].decision_threshold)
 87 | 			print 'W=%s'%(W[i])
 88 | 			print 'pred=%s'%(pred[i])
 89 | 			print 'e:%f alpha:%f'%(e,alpha[i])
 90 | 			print 'cal_final_predict:%s'%(cal_final_predict)
 91 | 			print 'cal_final_e:%s%%'%(self.cal_final_e(y,cal_final_predict)*100)
 92 | 			print ''
 93 | 
 94 | 			#calculate the final error rate,if it is zero,stop iteration.
 95 | 			if self.cal_final_e(y,cal_final_predict)==0 or e==0:
 96 | 				break
 97 | 		#return the iteration times,from 1 on.
 98 | 		return i+1
 99 | 
100 | 


--------------------------------------------------------------------------------
/Python3/WeakClassifier.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np 
  3 | 
  4 | class WeakClassifier:
  5 | 
  6 | 	'''
  7 | 	for every feature,calculate the all the possible decision_threshold\
  8 | 	remember 'gt':great than or 'lt':less than. Finally,get a dictionary\
  9 | 	as dic={'gt':{'0':...},{'1':...},...,'lt':{'0':...},{'1':...},...}\
 10 | 	the symbol '...' above is a two dimension np.array([[],[],[],...])\
 11 | 	'''
 12 | 	def cal_dic(self,X):
 13 | 		ret_gt={}
 14 | 		for i in range(X.shape[1]):
 15 | 			ret_gt[i]=[]
 16 | 			for j in range(X.shape[0]):
 17 | 				temp_threshold=X[j,i]
 18 | 				temp_line=[]
 19 | 				for k in range(X.shape[0]):
 20 | 					if X[k,i]>=temp_threshold:
 21 | 						temp_line.append(1)
 22 | 					else:
 23 | 						temp_line.append(-1)
 24 | 				ret_gt[i].append(temp_line)
 25 | 
 26 | 		ret_lt={}
 27 | 		for i in range(X.shape[1]):
 28 | 			ret_lt[i]=[]
 29 | 			for j in range(X.shape[0]):
 30 | 				temp_threshold=X[j,i]
 31 | 				temp_line=[]
 32 | 				for k in range(X.shape[0]):
 33 | 					if X[k,i]<=temp_threshold:
 34 | 						temp_line.append(1)
 35 | 					else:
 36 | 						temp_line.append(-1)
 37 | 				ret_lt[i].append(temp_line)
 38 | 		ret={}
 39 | 		ret['gt']=ret_gt
 40 | 		ret['lt']=ret_lt
 41 | 		return ret
 42 | 
 43 | 	#calculate error for one dimension array
 44 | 	def cal_e_line(self,y,line):
 45 | 		ret=0
 46 | 		for i in range(len(y)):
 47 | 			if y[i]!=line[i]:
 48 | 				ret+=self.W[i]
 49 | 		return ret
 50 | 
 51 | 	#calculate error for two dimension array
 52 | 	def cal_e_lines(self,y,lines):
 53 | 		ret=[]
 54 | 		for i in lines:
 55 | 			ret.append(self.cal_e_line(y,i))
 56 | 		return ret
 57 | 
 58 | 	#calculate error for all possible data and get e_dic
 59 | 	def cal_e_dic(self,y,dic):
 60 | 		ret_gt={}
 61 | 		for i in dic['gt']:
 62 | 			ret_gt[i]=(self.cal_e_lines(y,dic['gt'][i]))
 63 | 		ret_lt={}
 64 | 		for i in dic['lt']:
 65 | 			ret_lt[i]=(self.cal_e_lines(y,dic['lt'][i]))
 66 | 		ret={}
 67 | 		ret['gt']=ret_gt
 68 | 		ret['lt']=ret_lt
 69 | 		return ret
 70 | 
 71 | 	#select min error for e_dic
 72 | 	def cal_e_min(self,e_dic):
 73 | 		ret=100000
 74 | 		for key in e_dic:
 75 | 			for i in e_dic[key]:
 76 | 				temp=min(e_dic[key][i])
 77 | 				if ret>temp:
 78 | 					ret=temp
 79 | 		for key in e_dic:
 80 | 			for i in e_dic[key]:
 81 | 				if ret == min(e_dic[key][i]):
 82 | 					#return key,feature_index,index
 83 | 					return ret,key,i,e_dic[key][i].index(ret)
 84 | 
 85 | 	#train
 86 | 	def fit(self,X,y,W):
 87 | 		self.W=W
 88 | 		dic=self.cal_dic(X)
 89 | 		e_dic=self.cal_e_dic(y,dic)
 90 | 		e_min,self.decision_key,self.decision_feature,e_min_i=self.cal_e_min(e_dic)
 91 | 		self.decision_threshold=X[e_min_i,self.decision_feature]
 92 | 		self.pred=dic[self.decision_key][self.decision_feature][e_min_i]
 93 | 		'''
 94 | 		print dic
 95 | 		print e_dic
 96 | 		print e_min,self.decision_key,self.decision_feature,e_min_i
 97 | 		print self.decision_threshold
 98 | 		print self.pred
 99 | 		'''
100 | 		return
101 | 
102 | '''
103 | X=np.array([0,12,15,23,33,46,51,72,82,100]).reshape([10,1])
104 | y=np.array([1,1,1,-1,-1,-1,1,1,1,-1])
105 | W=np.array([0.1]*10).reshape([10,1])/10
106 | wk=WeakClassifier()
107 | wk.fit(X,y,W)
108 | '''
109 | 


--------------------------------------------------------------------------------
/WeakClassifier.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import numpy as np 
  3 | 
  4 | class WeakClassifier:
  5 | 
  6 | 	'''
  7 | 	for every feature,calculate the all the possible decision_threshold\
  8 | 	remember 'gt':great than or 'lt':less than. Finally,get a dictionary\
  9 | 	as dic={'gt':{'0':...},{'1':...},...,'lt':{'0':...},{'1':...},...}\
 10 | 	the symbol '...' above is a two dimension np.array([[],[],[],...])\
 11 | 	'''
 12 | 	def cal_dic(self,X):
 13 | 		ret_gt={}
 14 | 		for i in range(X.shape[1]):
 15 | 			ret_gt[i]=[]
 16 | 			for j in range(X.shape[0]):
 17 | 				temp_threshold=X[j,i]
 18 | 				temp_line=[]
 19 | 				for k in range(X.shape[0]):
 20 | 					if X[k,i]>=temp_threshold:
 21 | 						temp_line.append(1)
 22 | 					else:
 23 | 						temp_line.append(-1)
 24 | 				ret_gt[i].append(temp_line)
 25 | 
 26 | 		ret_lt={}
 27 | 		for i in range(X.shape[1]):
 28 | 			ret_lt[i]=[]
 29 | 			for j in range(X.shape[0]):
 30 | 				temp_threshold=X[j,i]
 31 | 				temp_line=[]
 32 | 				for k in range(X.shape[0]):
 33 | 					if X[k,i]<=temp_threshold:
 34 | 						temp_line.append(1)
 35 | 					else:
 36 | 						temp_line.append(-1)
 37 | 				ret_lt[i].append(temp_line)
 38 | 		ret={}
 39 | 		ret['gt']=ret_gt
 40 | 		ret['lt']=ret_lt
 41 | 		return ret
 42 | 
 43 | 	#calculate error for one dimension array
 44 | 	def cal_e_line(self,y,line):
 45 | 		ret=0
 46 | 		for i in range(len(y)):
 47 | 			if y[i]!=line[i]:
 48 | 				ret+=self.W[i]
 49 | 		return ret
 50 | 
 51 | 	#calculate error for two dimension array
 52 | 	def cal_e_lines(self,y,lines):
 53 | 		ret=[]
 54 | 		for i in lines:
 55 | 			ret.append(self.cal_e_line(y,i))
 56 | 		return ret
 57 | 
 58 | 	#calculate error for all possible data and get e_dic
 59 | 	def cal_e_dic(self,y,dic):
 60 | 		ret_gt={}
 61 | 		for i in dic['gt']:
 62 | 			ret_gt[i]=(self.cal_e_lines(y,dic['gt'][i]))
 63 | 		ret_lt={}
 64 | 		for i in dic['lt']:
 65 | 			ret_lt[i]=(self.cal_e_lines(y,dic['lt'][i]))
 66 | 		ret={}
 67 | 		ret['gt']=ret_gt
 68 | 		ret['lt']=ret_lt
 69 | 		return ret
 70 | 
 71 | 	#select min error for e_dic
 72 | 	def cal_e_min(self,e_dic):
 73 | 		ret=100000
 74 | 		for key in e_dic:
 75 | 			for i in e_dic[key]:
 76 | 				temp=min(e_dic[key][i])
 77 | 				if ret>temp:
 78 | 					ret=temp
 79 | 		for key in e_dic:
 80 | 			for i in e_dic[key]:
 81 | 				if ret == min(e_dic[key][i]):
 82 | 					#return key,feature_index,index
 83 | 					return ret,key,i,e_dic[key][i].index(ret)
 84 | 
 85 | 	#train
 86 | 	def fit(self,X,y,W):
 87 | 		self.W=W
 88 | 		dic=self.cal_dic(X)
 89 | 		e_dic=self.cal_e_dic(y,dic)
 90 | 		e_min,self.decision_key,self.decision_feature,e_min_i=self.cal_e_min(e_dic)
 91 | 		self.decision_threshold=X[e_min_i,self.decision_feature]
 92 | 		self.pred=dic[self.decision_key][self.decision_feature][e_min_i]
 93 | 		'''
 94 | 		print dic
 95 | 		print e_dic
 96 | 		print e_min,self.decision_key,self.decision_feature,e_min_i
 97 | 		print self.decision_threshold
 98 | 		print self.pred
 99 | 		'''
100 | 		return
101 | 
102 | '''
103 | X=np.array([0,12,15,23,33,46,51,72,82,100]).reshape([10,1])
104 | y=np.array([1,1,1,-1,-1,-1,1,1,1,-1])
105 | W=np.array([0.1]*10).reshape([10,1])/10
106 | wk=WeakClassifier()
107 | wk.fit(X,y,W)
108 | '''
109 | 


--------------------------------------------------------------------------------