├── Python3 ├── README.md ├── data.csv ├── test.py ├── make_data.py ├── Plot2D.py ├── Adaboost.py └── WeakClassifier.py ├── README.md ├── data.csv ├── test.py ├── make_data.py ├── Plot2D.py ├── Adaboost.py └── WeakClassifier.py /Python3/README.md: -------------------------------------------------------------------------------- 1 | # AdaboostExample 2 | AdaboostExample 3 | 4 | Python3.7 5 | 6 | You can dircet run test.py to view the result, the introduction can be found in the CSDN blog: 7 | https://blog.csdn.net/px_528/article/details/72963977 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AdaboostExample 2 | AdaboostExample 3 | 4 | Python2.7 --> python test.py 5 | 6 | Python3.7 --> python Python3/test.py 7 | 8 | You can dircet run test.py to view the result, the introduction can be found in the CSDN blog: 9 | https://blog.csdn.net/px_528/article/details/72963977 10 | -------------------------------------------------------------------------------- /data.csv: -------------------------------------------------------------------------------- 1 | 0,1,label 2 | 0.55,4.4,-1 3 | 1.1,2.8,-1 4 | 1.85,1.95,-1 5 | 3.15,1.7,-1 6 | 4.0,2.7,-1 7 | 3.75,3.95,-1 8 | 2.8,4.4,-1 9 | 2.35,3.2,-1 10 | 3.05,2.25,-1 11 | 3.55,2.6,-1 12 | 3.1,3.0,-1 13 | 3.0,3.4,-1 14 | 1.0,7.3,1 15 | 1.4,6.7,1 16 | 3.05,6.9,1 17 | 4.3,7.15,1 18 | 4.75,7.0,1 19 | 5.5,5.85,1 20 | 5.95,4.75,1 21 | 6.45,3.15,1 22 | 6.5,1.35,1 23 | 6.3,0.95,1 24 | 5.95,0.85,1 25 | 5.95,1.6,1 26 | 5.85,2.75,1 27 | 5.65,4.0,1 28 | 5.35,5.25,1 29 | 5.0,6.15,1 30 | 4.7,6.3,1 31 | 3.85,6.5,1 32 | 2.55,6.55,1 33 | 1.4,6.65,1 34 | 0.6,6.75,1 35 | 0.6,6.85,1 36 | 5.35,0.9,1 37 | -------------------------------------------------------------------------------- /Python3/data.csv: -------------------------------------------------------------------------------- 1 | 0,1,label 2 | 0.55,4.4,-1 3 | 1.1,2.8,-1 4 | 1.85,1.95,-1 5 | 3.15,1.7,-1 6 | 4.0,2.7,-1 7 | 3.75,3.95,-1 8 | 2.8,4.4,-1 9 | 2.35,3.2,-1 10 | 3.05,2.25,-1 11 | 3.55,2.6,-1 12 | 3.1,3.0,-1 13 | 3.0,3.4,-1 14 | 1.0,7.3,1 15 | 1.4,6.7,1 16 | 3.05,6.9,1 17 | 4.3,7.15,1 18 | 4.75,7.0,1 19 | 5.5,5.85,1 20 | 5.95,4.75,1 21 | 6.45,3.15,1 22 | 6.5,1.35,1 23 | 6.3,0.95,1 24 | 5.95,0.85,1 25 | 5.95,1.6,1 26 | 5.85,2.75,1 27 | 5.65,4.0,1 28 | 5.35,5.25,1 29 | 5.0,6.15,1 30 | 4.7,6.3,1 31 | 3.85,6.5,1 32 | 2.55,6.55,1 33 | 1.4,6.65,1 34 | 0.6,6.75,1 35 | 0.6,6.85,1 36 | 5.35,0.9,1 37 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from Adaboost import * 2 | from Plot2D import * 3 | import pandas as pd 4 | 5 | #data.csv is created by make_data.py 6 | data=pd.read_csv('data.csv') 7 | 8 | #get X and y 9 | X=data.iloc[:,:-1].values 10 | y=data.iloc[:,-1].values 11 | 12 | #train the AdaboostClassifier 13 | clf=AdaboostClassifier() 14 | times=clf.fit(X,y) 15 | 16 | #plot original data 17 | Plot2D(data).pause(3) 18 | 19 | #plot Adaboost decision_threshold 20 | for i in xrange(times): 21 | if clf.weak[i].decision_feature==0: 22 | plt.plot([clf.weak[i].decision_threshold,clf.weak[i].decision_threshold],[0,8]) 23 | else: 24 | plt.plot([0,8],[clf.weak[i].decision_threshold,clf.weak[i].decision_threshold]) 25 | plt.pause(3) 26 | -------------------------------------------------------------------------------- /Python3/test.py: -------------------------------------------------------------------------------- 1 | from Adaboost import * 2 | from Plot2D import * 3 | import pandas as pd 4 | 5 | #data.csv is created by make_data.py 6 | data=pd.read_csv('data.csv') 7 | 8 | #get X and y 9 | X=data.iloc[:,:-1].values 10 | y=data.iloc[:,-1].values 11 | 12 | #train the AdaboostClassifier 13 | clf=AdaboostClassifier() 14 | times=clf.fit(X,y) 15 | 16 | #plot original data 17 | Plot2D(data).pause(3) 18 | 19 | #plot Adaboost decision_threshold 20 | for i in range(times): 21 | if clf.weak[i].decision_feature==0: 22 | plt.plot([clf.weak[i].decision_threshold,clf.weak[i].decision_threshold],[0,8]) 23 | else: 24 | plt.plot([0,8],[clf.weak[i].decision_threshold,clf.weak[i].decision_threshold]) 25 | plt.pause(3) 26 | -------------------------------------------------------------------------------- /make_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | X=np.array([ 5 | [0.55,4.4], 6 | [1.1,2.8], 7 | [1.85,1.95], 8 | [3.15,1.7], 9 | [4,2.7], 10 | [3.75,3.95], 11 | [2.8,4.4], 12 | [2.35,3.2], 13 | [3.05,2.25], 14 | [3.55,2.6], 15 | [3.1,3], 16 | [3,3.4], 17 | [1,7.3], 18 | [1.4,6.7], 19 | [3.05,6.9], 20 | [4.3,7.15], 21 | [4.75,7], 22 | [5.5,5.85], 23 | [5.95,4.75], 24 | [6.45,3.15], 25 | [6.5,1.35], 26 | [6.3,0.95], 27 | [5.95,0.85], 28 | [5.95,1.6], 29 | [5.85,2.75], 30 | [5.65,4], 31 | [5.35,5.25], 32 | [5,6.15], 33 | [4.7,6.3], 34 | [3.85,6.5], 35 | [2.55,6.55], 36 | [1.4,6.65], 37 | [0.6,6.75], 38 | [0.6,6.85], 39 | [5.35,0.9]]) 40 | 41 | 42 | y=np.array([ 43 | [-1], 44 | [-1], 45 | [-1], 46 | [-1], 47 | [-1], 48 | [-1], 49 | [-1], 50 | [-1], 51 | [-1], 52 | [-1], 53 | [-1], 54 | [-1], 55 | [1], 56 | [1], 57 | [1], 58 | [1], 59 | [1], 60 | [1], 61 | [1], 62 | [1], 63 | [1], 64 | [1], 65 | [1], 66 | [1], 67 | [1], 68 | [1], 69 | [1], 70 | [1], 71 | [1], 72 | [1], 73 | [1], 74 | [1], 75 | [1], 76 | [1], 77 | [1]]) 78 | 79 | X=pd.DataFrame(X) 80 | X['label']=y 81 | X.to_csv('data.csv',index=None) 82 | -------------------------------------------------------------------------------- /Python3/make_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | X=np.array([ 5 | [0.55,4.4], 6 | [1.1,2.8], 7 | [1.85,1.95], 8 | [3.15,1.7], 9 | [4,2.7], 10 | [3.75,3.95], 11 | [2.8,4.4], 12 | [2.35,3.2], 13 | [3.05,2.25], 14 | [3.55,2.6], 15 | [3.1,3], 16 | [3,3.4], 17 | [1,7.3], 18 | [1.4,6.7], 19 | [3.05,6.9], 20 | [4.3,7.15], 21 | [4.75,7], 22 | [5.5,5.85], 23 | [5.95,4.75], 24 | [6.45,3.15], 25 | [6.5,1.35], 26 | [6.3,0.95], 27 | [5.95,0.85], 28 | [5.95,1.6], 29 | [5.85,2.75], 30 | [5.65,4], 31 | [5.35,5.25], 32 | [5,6.15], 33 | [4.7,6.3], 34 | [3.85,6.5], 35 | [2.55,6.55], 36 | [1.4,6.65], 37 | [0.6,6.75], 38 | [0.6,6.85], 39 | [5.35,0.9]]) 40 | 41 | 42 | y=np.array([ 43 | [-1], 44 | [-1], 45 | [-1], 46 | [-1], 47 | [-1], 48 | [-1], 49 | [-1], 50 | [-1], 51 | [-1], 52 | [-1], 53 | [-1], 54 | [-1], 55 | [1], 56 | [1], 57 | [1], 58 | [1], 59 | [1], 60 | [1], 61 | [1], 62 | [1], 63 | [1], 64 | [1], 65 | [1], 66 | [1], 67 | [1], 68 | [1], 69 | [1], 70 | [1], 71 | [1], 72 | [1], 73 | [1], 74 | [1], 75 | [1], 76 | [1], 77 | [1]]) 78 | 79 | X=pd.DataFrame(X) 80 | X['label']=y 81 | X.to_csv('data.csv',index=None) 82 | -------------------------------------------------------------------------------- /Plot2D.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import LabelEncoder 2 | from matplotlib import pyplot as plt 3 | import pandas as pd 4 | import warnings 5 | 6 | class Plot2D: 7 | 8 | def __init__(self,data): 9 | self.__data=data 10 | self.__X=data.iloc[:,:-1].values 11 | self.__le=LabelEncoder() 12 | self.__y=self.__le.fit_transform(data.iloc[:,-1].values) 13 | self.__xmin=self.__X[:,0].min() 14 | self.__xmax=self.__X[:,0].max() 15 | self.__ymin=self.__X[:,1].min() 16 | self.__ymax=self.__X[:,1].max() 17 | self.__marker_list=['o','x'] 18 | 19 | def show(self): 20 | plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1) 21 | plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1) 22 | for i in xrange(2): 23 | temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform(i)] 24 | plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform(i),marker=self.__marker_list[i]) 25 | plt.legend() 26 | plt.xlabel(self.__data.columns[0]) 27 | plt.ylabel(self.__data.columns[1]) 28 | plt.show() 29 | 30 | def pause(self,Seconds): 31 | warnings.filterwarnings("ignore",".*GUI is implemented.*") 32 | 33 | plt.ion() 34 | plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1) 35 | plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1) 36 | for i in xrange(2): 37 | temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform(i)] 38 | plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform(i),marker=self.__marker_list[i]) 39 | plt.legend() 40 | plt.xlabel(self.__data.columns[0]) 41 | plt.ylabel(self.__data.columns[1]) 42 | plt.pause(Seconds) 43 | 44 | ''' 45 | data=pd.read_csv('data.csv') 46 | Plot2D(data).show() 47 | ''' -------------------------------------------------------------------------------- /Python3/Plot2D.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import LabelEncoder 2 | from matplotlib import pyplot as plt 3 | import pandas as pd 4 | import warnings 5 | 6 | class Plot2D: 7 | 8 | def __init__(self,data): 9 | self.__data=data 10 | self.__X=data.iloc[:,:-1].values 11 | self.__le=LabelEncoder() 12 | self.__y=self.__le.fit_transform(data.iloc[:,-1].values) 13 | self.__xmin=self.__X[:,0].min() 14 | self.__xmax=self.__X[:,0].max() 15 | self.__ymin=self.__X[:,1].min() 16 | self.__ymax=self.__X[:,1].max() 17 | self.__marker_list=['o','x'] 18 | 19 | def show(self): 20 | plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1) 21 | plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1) 22 | length=len(self.__data.loc[:]) 23 | for i in range(2): 24 | temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform([i]*length)] 25 | plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform([i]),marker=self.__marker_list[i]) 26 | plt.legend() 27 | plt.xlabel(self.__data.columns[0]) 28 | plt.ylabel(self.__data.columns[1]) 29 | plt.show() 30 | 31 | def pause(self,Seconds): 32 | warnings.filterwarnings("ignore",".*GUI is implemented.*") 33 | 34 | plt.ion() 35 | plt.xlim(self.__xmin-(self.__xmax-self.__xmin)*0.1,self.__xmax+(self.__xmax-self.__xmin)*0.1) 36 | plt.ylim(self.__ymin-(self.__ymax-self.__ymin)*0.1,self.__ymax+(self.__ymax-self.__ymin)*0.1) 37 | length=len(self.__data.loc[:]) 38 | for i in range(2): 39 | temp=self.__data.loc[:][self.__data['label']==self.__le.inverse_transform([i]*length)] 40 | plt.scatter(temp.iloc[:,0],temp.iloc[:,1],label=self.__le.inverse_transform([i]),marker=self.__marker_list[i]) 41 | plt.legend() 42 | plt.xlabel(self.__data.columns[0]) 43 | plt.ylabel(self.__data.columns[1]) 44 | plt.pause(Seconds) 45 | 46 | 47 | ''' 48 | data=pd.read_csv('data.csv') 49 | Plot2D(data).show() 50 | ''' -------------------------------------------------------------------------------- /Python3/Adaboost.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | from WeakClassifier import * 5 | 6 | class AdaboostClassifier: 7 | 8 | #calculate new Weight 9 | def cal_W(self,W,alpha,y,pred): 10 | ret=0 11 | new_W=[] 12 | for i in range(len(y)): 13 | new_W.append(W[i]*np.exp(-alpha*y[i]*pred[i])) 14 | return np.array(new_W/sum(new_W)).reshape([len(y),1]) 15 | 16 | #calculate error rate per iteration 17 | def cal_e(self,y,pred,W): 18 | ret=0 19 | for i in range(len(y)): 20 | if y[i]!=pred[i]: 21 | ret+=W[i] 22 | return ret 23 | 24 | #calculate alpha 25 | def cal_alpha(self,e): 26 | if e==0: 27 | return 10000 28 | elif e==0.5: 29 | return 0.001 30 | else: 31 | return 0.5*np.log((1-e)/e) 32 | 33 | #calculate final predict value 34 | def cal_final_pred(self,i,alpha,weak,y): 35 | ret=np.array([0.0]*len(y)) 36 | for j in range(i+1): 37 | ret+=alpha[j]*weak[j].pred 38 | return np.sign(ret) 39 | 40 | #calculate final error rate 41 | def cal_final_e(self,y,cal_final_predict): 42 | ret=0 43 | for i in range(len(y)): 44 | if y[i]!=cal_final_predict[i]: 45 | ret+=1 46 | return ret/len(y) 47 | 48 | #train 49 | def fit(self,X,y,M=15): 50 | W={} 51 | self.weak={} 52 | alpha={} 53 | pred={} 54 | 55 | for i in range(M): 56 | W.setdefault(i) 57 | self.weak.setdefault(i) 58 | alpha.setdefault(i) 59 | pred.setdefault(i) 60 | 61 | #per iteration (all:M times) 62 | for i in range(M): 63 | #for the first iteration,initial W 64 | if i == 0: 65 | W[i]=np.array([1]*len(y))/len(y) 66 | W[i]=W[i].reshape([len(y),1]) 67 | #if not the first iteration,calculate new Weight 68 | else: 69 | W[i]=self.cal_W(W[i-1],alpha[i-1],y,pred[i-1]) 70 | 71 | #using train weak learner and get this learner predict value 72 | self.weak[i]=WeakClassifier() 73 | self.weak[i].fit(X,y,W[i]) 74 | pred[i]=self.weak[i].pred 75 | 76 | #calculate error rate this iteration 77 | e=self.cal_e(y,pred[i],W[i]) 78 | #calculate alpha this iteration 79 | alpha[i]=self.cal_alpha(e) 80 | #calculate the final predict value 81 | cal_final_predict=self.cal_final_pred(i,alpha,self.weak,y) 82 | 83 | print('iteration:%d'%(i+1)) 84 | print('self.decision_key=%s'%(self.weak[i].decision_key)) 85 | print('self.decision_feature=%d'%(self.weak[i].decision_feature)) 86 | print('decision_threshold=%f'%(self.weak[i].decision_threshold)) 87 | print('W=%s'%(W[i])) 88 | print('pred=%s'%(pred[i])) 89 | print('e:%f alpha:%f'%(e,alpha[i])) 90 | print('cal_final_predict:%s'%(cal_final_predict)) 91 | print('cal_final_e:%s%%'%(self.cal_final_e(y,cal_final_predict)*100)) 92 | print('') 93 | 94 | #calculate the final error rate,if it is zero,stop iteration. 95 | if self.cal_final_e(y,cal_final_predict)==0 or e==0: 96 | break 97 | #return the iteration times,from 1 on. 98 | return i+1 99 | 100 | -------------------------------------------------------------------------------- /Adaboost.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import division 3 | import numpy as np 4 | from WeakClassifier import * 5 | 6 | class AdaboostClassifier: 7 | 8 | #calculate new Weight 9 | def cal_W(self,W,alpha,y,pred): 10 | ret=0 11 | new_W=[] 12 | for i in range(len(y)): 13 | new_W.append(W[i]*np.exp(-alpha*y[i]*pred[i])) 14 | return np.array(new_W/sum(new_W)).reshape([len(y),1]) 15 | 16 | #calculate error rate per iteration 17 | def cal_e(self,y,pred,W): 18 | ret=0 19 | for i in range(len(y)): 20 | if y[i]!=pred[i]: 21 | ret+=W[i] 22 | return ret 23 | 24 | #calculate alpha 25 | def cal_alpha(self,e): 26 | if e==0: 27 | return 10000 28 | elif e==0.5: 29 | return 0.001 30 | else: 31 | return 0.5*np.log((1-e)/e) 32 | 33 | #calculate final predict value 34 | def cal_final_pred(self,i,alpha,weak,y): 35 | ret=np.array([0.0]*len(y)) 36 | for j in range(i+1): 37 | ret+=alpha[j]*weak[j].pred 38 | return np.sign(ret) 39 | 40 | #calculate final error rate 41 | def cal_final_e(self,y,cal_final_predict): 42 | ret=0 43 | for i in range(len(y)): 44 | if y[i]!=cal_final_predict[i]: 45 | ret+=1 46 | return ret/len(y) 47 | 48 | #train 49 | def fit(self,X,y,M=15): 50 | W={} 51 | self.weak={} 52 | alpha={} 53 | pred={} 54 | 55 | for i in range(M): 56 | W.setdefault(i) 57 | self.weak.setdefault(i) 58 | alpha.setdefault(i) 59 | pred.setdefault(i) 60 | 61 | #per iteration (all:M times) 62 | for i in range(M): 63 | #for the first iteration,initial W 64 | if i == 0: 65 | W[i]=np.array([1]*len(y))/len(y) 66 | W[i]=W[i].reshape([len(y),1]) 67 | #if not the first iteration,calculate new Weight 68 | else: 69 | W[i]=self.cal_W(W[i-1],alpha[i-1],y,pred[i-1]) 70 | 71 | #using train weak learner and get this learner predict value 72 | self.weak[i]=WeakClassifier() 73 | self.weak[i].fit(X,y,W[i]) 74 | pred[i]=self.weak[i].pred 75 | 76 | #calculate error rate this iteration 77 | e=self.cal_e(y,pred[i],W[i]) 78 | #calculate alpha this iteration 79 | alpha[i]=self.cal_alpha(e) 80 | #calculate the final predict value 81 | cal_final_predict=self.cal_final_pred(i,alpha,self.weak,y) 82 | 83 | print 'iteration:%d'%(i+1) 84 | print 'self.decision_key=%s'%(self.weak[i].decision_key) 85 | print 'self.decision_feature=%d'%(self.weak[i].decision_feature) 86 | print 'decision_threshold=%f'%(self.weak[i].decision_threshold) 87 | print 'W=%s'%(W[i]) 88 | print 'pred=%s'%(pred[i]) 89 | print 'e:%f alpha:%f'%(e,alpha[i]) 90 | print 'cal_final_predict:%s'%(cal_final_predict) 91 | print 'cal_final_e:%s%%'%(self.cal_final_e(y,cal_final_predict)*100) 92 | print '' 93 | 94 | #calculate the final error rate,if it is zero,stop iteration. 95 | if self.cal_final_e(y,cal_final_predict)==0 or e==0: 96 | break 97 | #return the iteration times,from 1 on. 98 | return i+1 99 | 100 | -------------------------------------------------------------------------------- /Python3/WeakClassifier.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | class WeakClassifier: 5 | 6 | ''' 7 | for every feature,calculate the all the possible decision_threshold\ 8 | remember 'gt':great than or 'lt':less than. Finally,get a dictionary\ 9 | as dic={'gt':{'0':...},{'1':...},...,'lt':{'0':...},{'1':...},...}\ 10 | the symbol '...' above is a two dimension np.array([[],[],[],...])\ 11 | ''' 12 | def cal_dic(self,X): 13 | ret_gt={} 14 | for i in range(X.shape[1]): 15 | ret_gt[i]=[] 16 | for j in range(X.shape[0]): 17 | temp_threshold=X[j,i] 18 | temp_line=[] 19 | for k in range(X.shape[0]): 20 | if X[k,i]>=temp_threshold: 21 | temp_line.append(1) 22 | else: 23 | temp_line.append(-1) 24 | ret_gt[i].append(temp_line) 25 | 26 | ret_lt={} 27 | for i in range(X.shape[1]): 28 | ret_lt[i]=[] 29 | for j in range(X.shape[0]): 30 | temp_threshold=X[j,i] 31 | temp_line=[] 32 | for k in range(X.shape[0]): 33 | if X[k,i]<=temp_threshold: 34 | temp_line.append(1) 35 | else: 36 | temp_line.append(-1) 37 | ret_lt[i].append(temp_line) 38 | ret={} 39 | ret['gt']=ret_gt 40 | ret['lt']=ret_lt 41 | return ret 42 | 43 | #calculate error for one dimension array 44 | def cal_e_line(self,y,line): 45 | ret=0 46 | for i in range(len(y)): 47 | if y[i]!=line[i]: 48 | ret+=self.W[i] 49 | return ret 50 | 51 | #calculate error for two dimension array 52 | def cal_e_lines(self,y,lines): 53 | ret=[] 54 | for i in lines: 55 | ret.append(self.cal_e_line(y,i)) 56 | return ret 57 | 58 | #calculate error for all possible data and get e_dic 59 | def cal_e_dic(self,y,dic): 60 | ret_gt={} 61 | for i in dic['gt']: 62 | ret_gt[i]=(self.cal_e_lines(y,dic['gt'][i])) 63 | ret_lt={} 64 | for i in dic['lt']: 65 | ret_lt[i]=(self.cal_e_lines(y,dic['lt'][i])) 66 | ret={} 67 | ret['gt']=ret_gt 68 | ret['lt']=ret_lt 69 | return ret 70 | 71 | #select min error for e_dic 72 | def cal_e_min(self,e_dic): 73 | ret=100000 74 | for key in e_dic: 75 | for i in e_dic[key]: 76 | temp=min(e_dic[key][i]) 77 | if ret>temp: 78 | ret=temp 79 | for key in e_dic: 80 | for i in e_dic[key]: 81 | if ret == min(e_dic[key][i]): 82 | #return key,feature_index,index 83 | return ret,key,i,e_dic[key][i].index(ret) 84 | 85 | #train 86 | def fit(self,X,y,W): 87 | self.W=W 88 | dic=self.cal_dic(X) 89 | e_dic=self.cal_e_dic(y,dic) 90 | e_min,self.decision_key,self.decision_feature,e_min_i=self.cal_e_min(e_dic) 91 | self.decision_threshold=X[e_min_i,self.decision_feature] 92 | self.pred=dic[self.decision_key][self.decision_feature][e_min_i] 93 | ''' 94 | print dic 95 | print e_dic 96 | print e_min,self.decision_key,self.decision_feature,e_min_i 97 | print self.decision_threshold 98 | print self.pred 99 | ''' 100 | return 101 | 102 | ''' 103 | X=np.array([0,12,15,23,33,46,51,72,82,100]).reshape([10,1]) 104 | y=np.array([1,1,1,-1,-1,-1,1,1,1,-1]) 105 | W=np.array([0.1]*10).reshape([10,1])/10 106 | wk=WeakClassifier() 107 | wk.fit(X,y,W) 108 | ''' 109 | -------------------------------------------------------------------------------- /WeakClassifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | 4 | class WeakClassifier: 5 | 6 | ''' 7 | for every feature,calculate the all the possible decision_threshold\ 8 | remember 'gt':great than or 'lt':less than. Finally,get a dictionary\ 9 | as dic={'gt':{'0':...},{'1':...},...,'lt':{'0':...},{'1':...},...}\ 10 | the symbol '...' above is a two dimension np.array([[],[],[],...])\ 11 | ''' 12 | def cal_dic(self,X): 13 | ret_gt={} 14 | for i in range(X.shape[1]): 15 | ret_gt[i]=[] 16 | for j in range(X.shape[0]): 17 | temp_threshold=X[j,i] 18 | temp_line=[] 19 | for k in range(X.shape[0]): 20 | if X[k,i]>=temp_threshold: 21 | temp_line.append(1) 22 | else: 23 | temp_line.append(-1) 24 | ret_gt[i].append(temp_line) 25 | 26 | ret_lt={} 27 | for i in range(X.shape[1]): 28 | ret_lt[i]=[] 29 | for j in range(X.shape[0]): 30 | temp_threshold=X[j,i] 31 | temp_line=[] 32 | for k in range(X.shape[0]): 33 | if X[k,i]<=temp_threshold: 34 | temp_line.append(1) 35 | else: 36 | temp_line.append(-1) 37 | ret_lt[i].append(temp_line) 38 | ret={} 39 | ret['gt']=ret_gt 40 | ret['lt']=ret_lt 41 | return ret 42 | 43 | #calculate error for one dimension array 44 | def cal_e_line(self,y,line): 45 | ret=0 46 | for i in range(len(y)): 47 | if y[i]!=line[i]: 48 | ret+=self.W[i] 49 | return ret 50 | 51 | #calculate error for two dimension array 52 | def cal_e_lines(self,y,lines): 53 | ret=[] 54 | for i in lines: 55 | ret.append(self.cal_e_line(y,i)) 56 | return ret 57 | 58 | #calculate error for all possible data and get e_dic 59 | def cal_e_dic(self,y,dic): 60 | ret_gt={} 61 | for i in dic['gt']: 62 | ret_gt[i]=(self.cal_e_lines(y,dic['gt'][i])) 63 | ret_lt={} 64 | for i in dic['lt']: 65 | ret_lt[i]=(self.cal_e_lines(y,dic['lt'][i])) 66 | ret={} 67 | ret['gt']=ret_gt 68 | ret['lt']=ret_lt 69 | return ret 70 | 71 | #select min error for e_dic 72 | def cal_e_min(self,e_dic): 73 | ret=100000 74 | for key in e_dic: 75 | for i in e_dic[key]: 76 | temp=min(e_dic[key][i]) 77 | if ret>temp: 78 | ret=temp 79 | for key in e_dic: 80 | for i in e_dic[key]: 81 | if ret == min(e_dic[key][i]): 82 | #return key,feature_index,index 83 | return ret,key,i,e_dic[key][i].index(ret) 84 | 85 | #train 86 | def fit(self,X,y,W): 87 | self.W=W 88 | dic=self.cal_dic(X) 89 | e_dic=self.cal_e_dic(y,dic) 90 | e_min,self.decision_key,self.decision_feature,e_min_i=self.cal_e_min(e_dic) 91 | self.decision_threshold=X[e_min_i,self.decision_feature] 92 | self.pred=dic[self.decision_key][self.decision_feature][e_min_i] 93 | ''' 94 | print dic 95 | print e_dic 96 | print e_min,self.decision_key,self.decision_feature,e_min_i 97 | print self.decision_threshold 98 | print self.pred 99 | ''' 100 | return 101 | 102 | ''' 103 | X=np.array([0,12,15,23,33,46,51,72,82,100]).reshape([10,1]) 104 | y=np.array([1,1,1,-1,-1,-1,1,1,1,-1]) 105 | W=np.array([0.1]*10).reshape([10,1])/10 106 | wk=WeakClassifier() 107 | wk.fit(X,y,W) 108 | ''' 109 | --------------------------------------------------------------------------------