├── .DS_Store ├── .gitignore ├── BroadLearning ├── .DS_Store ├── bls.py ├── bls_addinput.py ├── bls_enhence.py ├── bls_enhmap.py └── bls_mapping.py ├── LICENSE └── README.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiangjunFeng/Broad-Learning-System/43f4ca4d76eb11a189e4c8e9209aa7c534cd7d8f/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /BroadLearning/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LiangjunFeng/Broad-Learning-System/43f4ca4d76eb11a189e4c8e9209aa7c534cd7d8f/BroadLearning/.DS_Store -------------------------------------------------------------------------------- /BroadLearning/bls.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import preprocessing 3 | from numpy import random 4 | from sklearn.datasets import load_iris 5 | from sklearn.model_selection import train_test_split 6 | import pandas as pd 7 | import datetime 8 | import skimage.io as io 9 | import skimage 10 | import math 11 | from sklearn.decomposition import PCA 12 | import scipy.io as sio 13 | 14 | def show_accuracy(predictLabel,Label): 15 | Label = np.ravel(Label).tolist() 16 | predictLabel = predictLabel.tolist() 17 | count = 0 18 | for i in range(len(Label)): 19 | if Label[i] == predictLabel[i]: 20 | count += 1 21 | return (round(count/len(Label),5)) 22 | 23 | class node_generator: 24 | def __init__(self,whiten = False): 25 | self.Wlist = [] 26 | self.blist = [] 27 | self.nonlinear = 0 28 | self.whiten = whiten 29 | 30 | def sigmoid(self,data): 31 | return 1.0/(1+np.exp(-data)) 32 | 33 | def linear(self,data): 34 | return data 35 | 36 | def tanh(self,data): 37 | return (np.exp(data)-np.exp(-data))/(np.exp(data)+np.exp(-data)) 38 | 39 | def relu(self,data): 40 | return np.maximum(data,0) 41 | 42 | def orth(self,W): 43 | for i in range(0,W.shape[1]): 44 | w = np.mat(W[:,i].copy()).T 45 | w_sum = 0 46 | for j in range(i): 47 | wj = np.mat(W[:,j].copy()).T 48 | w_sum += (w.T.dot(wj))[0,0]*wj 49 | w -= w_sum 50 | w = w/np.sqrt(w.T.dot(w)) 51 | W[:,i] = np.ravel(w) 52 | return W 53 | 54 | def generator(self,shape,times): 55 | for i in range(times): 56 | W = 2*random.random(size=shape)-1 57 | if self.whiten == True: 58 | W = self.orth(W) 59 | b = 2*random.random()-1 60 | yield (W,b) 61 | 62 | def generator_nodes(self, data, times, batchsize, nonlinear): 63 | self.Wlist = [elem[0] for elem in self.generator((data.shape[1],batchsize),times)] 64 | self.blist = [elem[1] for elem in self.generator((data.shape[1],batchsize),times)] 65 | 66 | self.nonlinear = {'linear':self.linear, 67 | 'sigmoid':self.sigmoid, 68 | 'tanh':self.tanh, 69 | 'relu':self.relu 70 | }[nonlinear] 71 | nodes = self.nonlinear(data.dot(self.Wlist[0])+self.blist[0]) 72 | for i in range(1,len(self.Wlist)): 73 | nodes = np.column_stack((nodes, self.nonlinear(data.dot(self.Wlist[i])+self.blist[i]))) 74 | return nodes 75 | 76 | def transform(self,testdata): 77 | testnodes = self.nonlinear(testdata.dot(self.Wlist[0])+self.blist[0]) 78 | for i in range(1,len(self.Wlist)): 79 | testnodes = np.column_stack((testnodes, self.nonlinear(testdata.dot(self.Wlist[i])+self.blist[i]))) 80 | return testnodes 81 | 82 | def update(self,otherW, otherb): 83 | self.Wlist += otherW 84 | self.blist += otherb 85 | 86 | class scaler: 87 | def __init__(self): 88 | self._mean = 0 89 | self._std = 0 90 | 91 | def fit_transform(self,traindata): 92 | self._mean = traindata.mean(axis = 0) 93 | self._std = traindata.std(axis = 0) 94 | return (traindata-self._mean)/(self._std+0.001) 95 | 96 | def transform(self,testdata): 97 | return (testdata-self._mean)/(self._std+0.001) 98 | 99 | 100 | class broadnet: 101 | def __init__(self, 102 | maptimes = 10, 103 | enhencetimes = 10, 104 | map_function = 'linear', 105 | enhence_function = 'linear', 106 | batchsize = 'auto', 107 | reg = 0.001): 108 | 109 | self._maptimes = maptimes 110 | self._enhencetimes = enhencetimes 111 | self._batchsize = batchsize 112 | self._reg = reg 113 | self._map_function = map_function 114 | self._enhence_function = enhence_function 115 | 116 | self.W = 0 117 | self.pesuedoinverse = 0 118 | self.normalscaler = scaler() 119 | self.onehotencoder = preprocessing.OneHotEncoder(sparse = False) 120 | self.mapping_generator = node_generator() 121 | self.enhence_generator = node_generator(whiten = True) 122 | 123 | def fit(self,data,label): 124 | if self._batchsize == 'auto': 125 | self._batchsize = data.shape[1] 126 | data = self.normalscaler.fit_transform(data) 127 | label = self.onehotencoder.fit_transform(np.mat(label).T) 128 | 129 | mappingdata = self.mapping_generator.generator_nodes(data,self._maptimes,self._batchsize,self._map_function) 130 | enhencedata = self.enhence_generator.generator_nodes(mappingdata,self._enhencetimes,self._batchsize,self._enhence_function) 131 | 132 | print('number of mapping nodes {0}, number of enhence nodes {1}'.format(mappingdata.shape[1],enhencedata.shape[1])) 133 | print('mapping nodes maxvalue {0} minvalue {1} '.format(round(np.max(mappingdata),5),round(np.min(mappingdata),5))) 134 | print('enhence nodes maxvalue {0} minvalue {1} '.format(round(np.max(enhencedata),5),round(np.min(enhencedata),5))) 135 | 136 | inputdata = np.column_stack((mappingdata,enhencedata)) 137 | pesuedoinverse = self.pinv(inputdata,self._reg) 138 | self.W = pesuedoinverse.dot(label) 139 | 140 | def pinv(self,A,reg): 141 | return np.mat(reg*np.eye(A.shape[1])+A.T.dot(A)).I.dot(A.T) 142 | 143 | def decode(self,Y_onehot): 144 | Y = [] 145 | for i in range(Y_onehot.shape[0]): 146 | lis = np.ravel(Y_onehot[i,:]).tolist() 147 | Y.append(lis.index(max(lis))) 148 | return np.array(Y) 149 | 150 | def accuracy(self,predictlabel,label): 151 | label = np.ravel(label).tolist() 152 | predictlabel = predictlabel.tolist() 153 | count = 0 154 | for i in range(len(label)): 155 | if label[i] == predictlabel[i]: 156 | count += 1 157 | return (round(count/len(label),5)) 158 | 159 | def predict(self,testdata): 160 | testdata = self.normalscaler.transform(testdata) 161 | test_mappingdata = self.mapping_generator.transform(testdata) 162 | test_enhencedata = self.enhence_generator.transform(test_mappingdata) 163 | 164 | test_inputdata = np.column_stack((test_mappingdata,test_enhencedata)) 165 | return self.decode(test_inputdata.dot(self.W)) 166 | 167 | 168 | def LoadData(number): 169 | if number == 1: 170 | path = '/Users/zhuxiaoxiansheng/Desktop/日常/数据集/yale_faces/*.bmp' 171 | elif number == 2: 172 | path = '/Users/zhuxiaoxiansheng/Desktop/日常/数据集/orl_faces_full/*.pgm' 173 | elif number == 3: 174 | path = '/Users/zhuxiaoxiansheng/Desktop/日常/数据集/jaffe/*.tiff' 175 | elif number == 4: 176 | path = '/Volumes/TOSHIBA EXT/数据集/YaleB/*.pgm' 177 | 178 | pictures = io.ImageCollection(path) 179 | data = [] 180 | for i in range(len(pictures)): 181 | picture = pictures[i] 182 | picture = skimage.color.rgb2gray(picture) 183 | data.append(np.ravel(picture.reshape((1,picture.shape[0]*picture.shape[1])))) 184 | label = [] 185 | if number == 1: 186 | for i in range(len(data)): 187 | label.append(int(i/11)) 188 | elif number == 2: 189 | for i in range(len(data)): 190 | label.append(int(i/10)) 191 | elif number == 3: 192 | for i in range(len(data)): 193 | label.append(int(i/20)) 194 | elif number == 4: 195 | label = [0]*64+[1]*64+[2]*64+[3]*64+[4]*64+[5]*64+[6]*64+[7]*64+[8]*64+[9]*64+[10]*60+[11]*59+[12]*60+[13]*63+[14]*62+[15]*63+[16]*63+[17]*64+[18]*64+[19]*64+[20]*64+[21]*64+[22]*64+[23]*64+[24]*64+[25]*64+[26]*64+[27]*64+[28]*64+[29]*64+[30]*64+[31]*64+[32]*64+[33]*64+[34]*64+[35]*64+[36]*64+[37]*64 196 | return np.matrix(data),np.matrix(label).T 197 | 198 | 199 | def SplitData(data,label,number,propotion): 200 | if number == 1: 201 | classes = 15 202 | elif number == 2: 203 | classes = 40 204 | elif number == 3: 205 | classes = 10 206 | elif number == 4: 207 | trainData = [] 208 | testData = [] 209 | trainLabel = [] 210 | testLabel = [] 211 | lis = [] 212 | while len(lis) < int(data.shape[0]*propotion): 213 | t = random.randint(0,data.shape[0]-1) 214 | if t not in lis: 215 | trainData.append(np.ravel(data[t,:])) 216 | trainLabel.append(np.ravel(label[t])) 217 | lis.append(t) 218 | for i in range(data.shape[0]): 219 | if i not in lis: 220 | testData.append(np.ravel(data[i,:])) 221 | testLabel.append(np.ravel(label[i])) 222 | lis.append(i) 223 | return np.matrix(trainData),np.matrix(trainLabel),np.matrix(testData),np.matrix(testLabel) 224 | 225 | samples = data.shape[0] 226 | perClass = int(samples/classes) 227 | selected = int(perClass*propotion) 228 | 229 | trainData,testData = [],[] 230 | trainLabel,testLabel = [],[] 231 | count1 = [] 232 | for i in range(classes): 233 | count2,k = [],math.inf 234 | for j in range(selected): 235 | count2.append(k) 236 | k = random.randint(0,perClass-1) 237 | while k in count2: 238 | k = random.randint(0,perClass-1) 239 | trainData.append(np.ravel(data[perClass*i+k])) 240 | trainLabel.append(np.ravel(label[perClass*i+k])) 241 | count1.append(11*i+k) 242 | for i in range(samples): 243 | if i not in count1: 244 | testData.append(np.ravel(data[i])) 245 | testLabel.append(np.ravel(label[i])) 246 | return np.mat(trainData),np.ravel(np.mat(trainLabel)),np.mat(testData),np.ravel(np.mat(testLabel)) 247 | 248 | 249 | #from sklearn.datasets import load_digits 250 | #digits = load_digits() 251 | #data = digits['data'] 252 | #label = digits['target'] 253 | # 254 | #print(data.shape,max(label)+1) 255 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 1) 256 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 257 | # 258 | # 259 | #from sklearn.datasets import load_breast_cancer 260 | #breast_cancer = load_breast_cancer() 261 | #data = breast_cancer['data'] 262 | #label = breast_cancer['target'] 263 | # 264 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 2018) 265 | 266 | 267 | #dataset = 4 268 | #data,label = LoadData(dataset) 269 | #print(data.shape,max(label)+1) 270 | # 271 | #pca = PCA(0.99) 272 | #data = pca.fit_transform(data) 273 | #data = data/255. 274 | # 275 | #def split(data,label,propotion): 276 | # train_index = np.random.choice(2414,size=(int(propotion*2414)),replace=False) 277 | # test_index = list(set(np.arange(2414))-set(train_index)) 278 | # return data[train_index],label[train_index],data[test_index],label[test_index] 279 | # 280 | # 281 | #traindata,trainlabel,testdata,testlabel = SplitData(data,label,dataset,0.8) 282 | #trainlabel = np.ravel(trainlabel) 283 | #testlabel = np.ravel(testlabel) 284 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 285 | 286 | 287 | #fault1_1 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase1.mat')['Set1_1'] 288 | #fault1_2 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase1.mat')['Set1_2'] 289 | #fault1_3 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase1.mat')['Set1_3'] 290 | #fault2_1 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase2.mat')['Set2_1'] 291 | #fault2_2 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase2.mat')['Set2_2'] 292 | #fault2_3 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase2.mat')['Set2_3'] 293 | #fault3_1 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase3.mat')['Set3_1'] 294 | #fault3_2 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase3.mat')['Set3_2'] 295 | #fault3_3 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase3.mat')['Set3_3'] 296 | #fault4_1 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase4.mat')['Set4_1'] 297 | #fault4_2 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase4.mat')['Set4_2'] 298 | #fault4_3 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase4.mat')['Set4_3'] 299 | #fault5_1 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase5.mat')['Set5_1'] 300 | #fault5_2 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase5.mat')['Set5_2'] 301 | #fault6_1 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase6.mat')['Set6_1'] 302 | #fault6_2 = sio.loadmat(u'/Volumes/TOSHIBA EXT/数据集/CVACaseStudy/CVACaseStudy/FaultyCase6.mat')['Set6_2'] 303 | # 304 | #data = np.vstack([fault1_1,fault1_2,fault1_3,fault2_1,fault2_2,fault2_3,fault3_1,fault3_2,fault3_3,fault4_1,fault4_2,fault4_3,fault5_1,fault5_2,fault6_1,fault6_2]) 305 | #label = [0]*(fault1_1.shape[0]+fault1_2.shape[0]+fault1_3.shape[0])+[1]*(fault2_1.shape[0]+fault2_2.shape[0]+fault2_3.shape[0])+[2]*(fault3_1.shape[0]+fault3_2.shape[0]+fault3_3.shape[0])+[3]*(fault4_1.shape[0]+fault4_2.shape[0]+fault4_3.shape[0])+[4]*(fault5_1.shape[0]+fault5_2.shape[0])+[5]*(fault6_1.shape[0]+fault6_2.shape[0]) 306 | #label = np.array(label) 307 | #print(data.shape,max(label)+1) 308 | # 309 | # 310 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 2018) 311 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 312 | 313 | 314 | #data = pd.read_csv(u'/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/dataset_diabetes/diabetic_data.csv') 315 | #print(data.shape) 316 | # 317 | #le = preprocessing.LabelEncoder() 318 | #for item in data.columns: 319 | # data[item] = le.fit_transform(data[item]) 320 | #label = data['diabetesMed'].values 321 | #data = data.drop('diabetesMed',axis=1) 322 | #data = data.drop('encounter_id',axis=1) 323 | #data = data.drop('patient_nbr',axis=1) 324 | #data = data.drop('weight',axis=1) 325 | #data = data.drop('payer_code',axis=1) 326 | #data = data.drop('max_glu_serum',axis=1) 327 | #data = data.drop('A1Cresult',axis=1).values 328 | # 329 | #print(data.shape,max(label)+1) 330 | # 331 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 2018) 332 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 333 | 334 | #def Decode(Y_onehot): 335 | # Y = [] 336 | # for i in range(Y_onehot.shape[0]): 337 | # lis = np.ravel(Y_onehot[i,:]).tolist() 338 | # Y.append(lis.index(max(lis))) 339 | # return np.array(Y) 340 | #data = pd.read_excel(u'/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/Steel_Plates_Faults.xlsx') 341 | #data = data.fillna(data.median()) 342 | #label = data.iloc[:,-7:].values 343 | #label = Decode(label) 344 | #data = data.drop([28,29,30,31,32,33,34],axis = 1) 345 | #print(data.shape,label.shape) 346 | #print(data.shape,max(label)+1) 347 | # 348 | #traindata,testdata,trainlabel,testlabel = train_test_split(data.values,label,test_size=0.2,random_state = 4) 349 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 350 | 351 | #data = sio.loadmat('/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/USPS美国邮政服务手写数字识别库/USPStrainingdata.mat') 352 | #traindata = data['traindata'] 353 | #traintarg = data['traintarg'] 354 | #trainlabel = Decode(traintarg) 355 | #data = sio.loadmat('/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/USPS美国邮政服务手写数字识别库/USPStestingdata.mat') 356 | #testdata = data['testdata'] 357 | #testtarg = data['testtarg'] 358 | #testlabel = Decode(testtarg) 359 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 360 | # 361 | #data = np.row_stack((traindata,testdata)) 362 | #label = np.ravel((np.row_stack((np.mat(trainlabel).T,np.mat(testlabel).T)))) 363 | #print(data.shape,max(label)+1) 364 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 2018) 365 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 366 | # 367 | 368 | #data = pd.read_excel(u'/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/car_evaluation.xlsx') 369 | # 370 | #le = preprocessing.LabelEncoder() 371 | #for item in data.columns: 372 | # data[item] = le.fit_transform(data[item]) 373 | #label = data['label'].values 374 | #data = data.drop('label',axis=1) 375 | #data = data.values 376 | #print(data.shape,max(label)+1) 377 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 1) 378 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 379 | 380 | 381 | #data = pd.read_csv(u'/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/HTRU_2.csv') 382 | #label = data['label'].values 383 | #data = data.drop('label',axis=1) 384 | #data = data.values 385 | #print(data.shape,max(label)+1) 386 | # 387 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 2018) 388 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 389 | # 390 | #data = pd.read_excel(u'/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/mushroom_expand.xlsx') 391 | # 392 | #le = preprocessing.LabelEncoder() 393 | #for item in data.columns: 394 | # data[item] = le.fit_transform(data[item]) 395 | # 396 | #label = data['label'].values 397 | #data = data.drop('label',axis=1) 398 | #data = data.values 399 | #print(data.shape,max(label)+1) 400 | # 401 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 1) 402 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 403 | 404 | 405 | 406 | #traindata = pd.read_csv('/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/Crowdsourced Mapping/training.csv') 407 | # 408 | #le = preprocessing.LabelEncoder() 409 | #for item in traindata.columns: 410 | # traindata[item] = le.fit_transform(traindata[item]) 411 | # 412 | #trainlabel = traindata['label'].values 413 | #traindata = traindata.drop('label',axis = 1).values 414 | # 415 | #testdata = pd.read_csv('/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/Crowdsourced Mapping/testing.csv') 416 | # 417 | #le = preprocessing.LabelEncoder() 418 | #for item in testdata.columns: 419 | # testdata[item] = le.fit_transform(testdata[item]) 420 | # 421 | #testlabel = testdata['label'].values 422 | #testdata = testdata.drop('label',axis = 1).values 423 | # 424 | #data = np.row_stack((traindata,testdata)) 425 | #label = np.ravel((np.row_stack((np.mat(trainlabel).T,np.mat(testlabel).T)))) 426 | # 427 | #print(data.shape,max(label)+1) 428 | # 429 | #traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 1) 430 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 431 | # 432 | 433 | 434 | 435 | data = pd.read_excel(u'/Users/zhuxiaoxiansheng/Desktop/GBN/GBN_data/balance-scale.xlsx') 436 | 437 | le = preprocessing.LabelEncoder() 438 | for item in data.columns: 439 | data[item] = le.fit_transform(data[item]) 440 | 441 | 442 | label = data['label'].values 443 | data = data.drop('label',axis=1) 444 | data = data.values 445 | print(data.shape,max(label)+1) 446 | 447 | traindata,testdata,trainlabel,testlabel = train_test_split(data,label,test_size=0.2,random_state = 0) 448 | print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 449 | 450 | 451 | bls = broadnet(maptimes = 10, 452 | enhencetimes = 10, 453 | map_function = 'relu', 454 | enhence_function = 'relu', 455 | batchsize = 100, 456 | reg = 0.001) 457 | 458 | starttime = datetime.datetime.now() 459 | bls.fit(traindata,trainlabel) 460 | endtime = datetime.datetime.now() 461 | print('the training time of BLS is {0} seconds'.format((endtime - starttime).total_seconds())) 462 | 463 | predictlabel = bls.predict(testdata) 464 | print(show_accuracy(predictlabel,testlabel)) 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | -------------------------------------------------------------------------------- /BroadLearning/bls_addinput.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import preprocessing 3 | from numpy import random 4 | from sklearn.datasets import load_iris 5 | from sklearn.model_selection import train_test_split 6 | import pandas as pd 7 | 8 | def show_accuracy(predictLabel,Label): 9 | Label = np.ravel(Label).tolist() 10 | predictLabel = predictLabel.tolist() 11 | count = 0 12 | for i in range(len(Label)): 13 | if Label[i] == predictLabel[i]: 14 | count += 1 15 | return (round(count/len(Label),5)) 16 | 17 | class scaler: 18 | def __init__(self): 19 | self._mean = 0 20 | self._std = 0 21 | 22 | def fit_transform(self,traindata): 23 | self._mean = traindata.mean(axis = 0) 24 | self._std = traindata.std(axis = 0) 25 | return (traindata-self._mean)/self._std 26 | 27 | def transform(self,testdata): 28 | return (testdata-self._mean)/self._std 29 | 30 | class node_generator: 31 | def __init__(self,whiten = False): 32 | self.Wlist = [] 33 | self.blist = [] 34 | self.nonlinear = 0 35 | self.whiten = whiten 36 | 37 | def sigmoid(self,data): 38 | return 1.0/(1+np.exp(-data)) 39 | 40 | def linear(self,data): 41 | return data 42 | 43 | def tanh(self,data): 44 | return (np.exp(data)-np.exp(-data))/(np.exp(data)+np.exp(-data)) 45 | 46 | def relu(self,data): 47 | return np.maximum(data,0) 48 | 49 | def orth(self,W): 50 | for i in range(0,W.shape[1]): 51 | w = np.mat(W[:,i].copy()).T 52 | w_sum = 0 53 | for j in range(i): 54 | wj = np.mat(W[:,j].copy()).T 55 | w_sum += (w.T.dot(wj))[0,0]*wj 56 | w -= w_sum 57 | w = w/np.sqrt(w.T.dot(w)) 58 | W[:,i] = np.ravel(w) 59 | return W 60 | 61 | def generator(self,shape,times): 62 | for i in range(times): 63 | W = 2*random.random(size=shape)-1 64 | if self.whiten == True: 65 | W = self.orth(W) 66 | b = 2*random.random()-1 67 | yield (W,b) 68 | 69 | def generator_nodes(self, data, times, batchsize, nonlinear): 70 | self.Wlist = [elem[0] for elem in self.generator((data.shape[1],batchsize),times)] 71 | self.blist = [elem[1] for elem in self.generator((data.shape[1],batchsize),times)] 72 | 73 | self.nonlinear = {'linear':self.linear, 74 | 'sigmoid':self.sigmoid, 75 | 'tanh':self.tanh, 76 | 'relu':self.relu 77 | }[nonlinear] 78 | nodes = self.nonlinear(data.dot(self.Wlist[0])+self.blist[0]) 79 | for i in range(1,len(self.Wlist)): 80 | nodes = np.column_stack((nodes, self.nonlinear(data.dot(self.Wlist[i])+self.blist[i]))) 81 | return nodes 82 | 83 | def transform(self,testdata): 84 | testnodes = self.nonlinear(testdata.dot(self.Wlist[0])+self.blist[0]) 85 | for i in range(1,len(self.Wlist)): 86 | testnodes = np.column_stack((testnodes, self.nonlinear(testdata.dot(self.Wlist[i])+self.blist[i]))) 87 | return testnodes 88 | 89 | def update(self,otherW, otherb): 90 | self.Wlist += otherW 91 | self.blist += otherb 92 | 93 | 94 | class broadnet_enhmap: 95 | def __init__(self, 96 | maptimes = 10, 97 | enhencetimes = 10, 98 | traintimes = 100, 99 | map_function = 'linear', 100 | enhence_function = 'linear', 101 | batchsize = 'auto', 102 | acc = 1, 103 | mapstep = 1, 104 | enhencestep = 1, 105 | reg = 0.001): 106 | 107 | self._maptimes = maptimes 108 | self._enhencetimes = enhencetimes 109 | self._batchsize = batchsize 110 | self._traintimes = traintimes 111 | self._acc = acc 112 | self._mapstep = mapstep 113 | self._enhencestep = enhencestep 114 | self._reg = reg 115 | self._map_function = map_function 116 | self._enhence_function = enhence_function 117 | 118 | self.W = 0 119 | self.pesuedoinverse = 0 120 | 121 | self.normalscaler = scaler() 122 | self.onehotencoder = preprocessing.OneHotEncoder(sparse = False) 123 | self.mapping_generator = node_generator() 124 | self.enhence_generator = node_generator(whiten = True) 125 | self.local_mapgeneratorlist = [] 126 | self.local_enhgeneratorlist = [] 127 | 128 | def fit(self,oridata,orilabel): 129 | if self._batchsize == 'auto': 130 | self._batchsize = oridata.shape[1] 131 | data = self.normalscaler.fit_transform(oridata) 132 | label = self.onehotencoder.fit_transform(np.mat(orilabel).T) 133 | 134 | mappingdata = self.mapping_generator.generator_nodes(data,self._maptimes,self._batchsize,self._map_function) 135 | enhencedata = self.enhence_generator.generator_nodes(mappingdata,self._enhencetimes,self._batchsize,self._enhence_function) 136 | inputdata = np.column_stack((mappingdata,enhencedata)) 137 | 138 | self.pesuedoinverse = self.pinv(inputdata) 139 | self.W = self.pesuedoinverse.dot(label) 140 | 141 | Y = self.predict(oridata) 142 | accuracy, i = self.accuracy(Y,orilabel),0 143 | # print("inital setting, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format(mappingdata.shape[1],enhencedata.shape[1],round(accuracy,5))) 144 | 145 | while i < self._traintimes and accuracy < self._acc: 146 | Y = self.adding_predict(oridata, orilabel, self._mapstep, self._enhencestep, self._batchsize) 147 | accuracy = self.accuracy(Y,orilabel) 148 | i += 1 149 | # print("adding {3}, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format((len(self.mapping_generator.Wlist)+len(self.local_mapgeneratorlist)*len(self.local_mapgeneratorlist[0].Wlist))*self._batchsize,(len(self.enhence_generator.Wlist)+len(self.local_enhgeneratorlist)*len(self.local_enhgeneratorlist[0].Wlist))*self._batchsize,round(accuracy,5),i)) 150 | 151 | def pinv(self, A): 152 | return np.mat(self._reg*np.eye(A.shape[1])+A.T.dot(A)).I.dot(A.T) 153 | 154 | def decode(self, Y_onehot): 155 | Y = [] 156 | for i in range(Y_onehot.shape[0]): 157 | lis = np.ravel(Y_onehot[i,:]).tolist() 158 | Y.append(lis.index(max(lis))) 159 | return np.array(Y) 160 | 161 | def accuracy(self, predictlabel,label): 162 | label = np.ravel(label).tolist() 163 | predictlabel = predictlabel.tolist() 164 | count = 0 165 | for i in range(len(label)): 166 | if label[i] == predictlabel[i]: 167 | count += 1 168 | return (round(count/len(label),5)) 169 | 170 | def predict(self, testdata): 171 | testdata = self.normalscaler.transform(testdata) 172 | test_inputdata = self.transform(testdata) 173 | return self.decode(test_inputdata.dot(self.W)) 174 | 175 | def transform(self, data): 176 | mappingdata = self.mapping_generator.transform(data) 177 | enhencedata = self.enhence_generator.transform(mappingdata) 178 | inputdata = np.column_stack((mappingdata,enhencedata)) 179 | for elem1,elem2 in zip(self.local_mapgeneratorlist,self.local_enhgeneratorlist): 180 | inputdata = np.column_stack((inputdata, elem1.transform(data))) 181 | inputdata = np.column_stack((inputdata, elem2.transform(mappingdata))) 182 | return inputdata 183 | 184 | def adding_nodes(self, data, label, mapstep = 1, enhencestep = 1, batchsize = 'auto'): 185 | if batchsize == 'auto': 186 | batchsize = data.shape[1] 187 | 188 | mappingdata = self.mapping_generator.transform(data) 189 | inputdata = self.transform(data) 190 | 191 | localmap_generator = node_generator() 192 | extramap_nodes = localmap_generator.generator_nodes(data,mapstep,batchsize,self._map_function) 193 | localenhence_generator = node_generator() 194 | extraenh_nodes = localenhence_generator.generator_nodes(mappingdata,enhencestep,batchsize,self._map_function) 195 | extra_nodes = np.column_stack((extramap_nodes,extraenh_nodes)) 196 | 197 | D = self.pesuedoinverse.dot(extra_nodes) 198 | C = extra_nodes - inputdata.dot(D) 199 | BT = self.pinv(C) if (C == 0).any() else np.mat((D.T.dot(D)+np.eye(D.shape[1]))).I.dot(D.T).dot(self.pesuedoinverse) 200 | 201 | self.W = np.row_stack((self.W-D.dot(BT).dot(label),BT.dot(label))) 202 | self.pesuedoinverse = np.row_stack((self.pesuedoinverse - D.dot(BT),BT)) 203 | self.local_mapgeneratorlist.append(localmap_generator) 204 | self.local_enhgeneratorlist.append(localenhence_generator) 205 | 206 | def adding_predict(self, data, label, mapstep = 1, enhencestep = 1, batchsize = 'auto'): 207 | data = self.normalscaler.transform(data) 208 | label = self.onehotencoder.transform(np.mat(label).T) 209 | self.adding_nodes(data, label, mapstep, enhencestep, batchsize) 210 | test_inputdata = self.transform(data) 211 | return self.decode(test_inputdata.dot(self.W)) 212 | 213 | def incremental_input(self, traindata, extratraindata, extratrainlabel): 214 | data = self.normalscaler.transform(traindata) 215 | data = self.transform(data) 216 | 217 | xdata = self.normalscaler.transform(extratraindata) 218 | xdata = self.transform(xdata).T 219 | xlabel = self.onehotencoder.transform(np.mat(extratrainlabel).T).T 220 | 221 | DT = xdata.T.dot(self.pesuedoinverse) 222 | CT = xdata.T - DT.dot(data) 223 | B = self.pinv(CT) if (CT.T == 0).any() else self.pesuedoinverse.dot(DT.T).dot(np.mat((DT.dot(DT.T)+np.eye(DT.shape[0]))).I) 224 | 225 | self.W = self.W + B.dot((xlabel.T-xdata.T.dot(self.W))) 226 | self.pesuedoinverse = np.column_stack((self.pesuedoinverse-B.dot(DT),B)) 227 | 228 | 229 | iris=load_iris() 230 | X = iris.data 231 | Y = iris.target 232 | traindata1,testdata1,trainlabel1,testlabel1 = train_test_split(X,Y,test_size=0.9) 233 | 234 | traindata21,testdata21,trainlabel21,testlabel21 = train_test_split(testdata1,testlabel1,test_size=0.9,random_state = 2018) 235 | traindata22,testdata22,trainlabel22,testlabel22 = train_test_split(testdata21,testlabel21,test_size=0.9,random_state = 2018) 236 | traindata31,testdata31,trainlabel31,testlabel31 = train_test_split(testdata22,testlabel22,test_size=0.9,random_state = 2018) 237 | traindata32,testdata32,trainlabel32,testlabel32 = train_test_split(testdata31,testlabel31,test_size=0.9,random_state = 2018) 238 | 239 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 240 | 241 | 242 | 243 | bls = broadnet_enhmap(maptimes = 10, 244 | enhencetimes = 10, 245 | traintimes = 10, 246 | map_function = 'tanh', 247 | enhence_function = 'sigmoid', 248 | batchsize = 'auto', 249 | acc = 1, 250 | mapstep = 10, 251 | enhencestep = 10, 252 | reg = 0.001) 253 | 254 | bls.fit(traindata1,trainlabel1) 255 | predictlabel = bls.predict(testdata32) 256 | print(show_accuracy(predictlabel,testlabel32)) 257 | 258 | bls.incremental_input(traindata1, traindata21, trainlabel21) 259 | predictlabel = bls.predict(testdata32) 260 | print(show_accuracy(predictlabel,testlabel32)) 261 | 262 | bls.incremental_input(np.row_stack((traindata1,traindata21)), traindata22, trainlabel22) 263 | predictlabel = bls.predict(testdata32) 264 | print(show_accuracy(predictlabel,testlabel32)) 265 | 266 | bls.incremental_input(np.row_stack((traindata1,traindata21,traindata22)), traindata31, trainlabel31) 267 | predictlabel = bls.predict(testdata32) 268 | print(show_accuracy(predictlabel,testlabel32)) 269 | 270 | bls.incremental_input(np.row_stack((traindata1,traindata21,traindata22,traindata31)), traindata32, trainlabel32) 271 | predictlabel = bls.predict(testdata32) 272 | print(show_accuracy(predictlabel,testlabel32)) 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | -------------------------------------------------------------------------------- /BroadLearning/bls_enhence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import preprocessing 3 | from numpy import random 4 | from sklearn.datasets import load_iris 5 | from sklearn.model_selection import train_test_split 6 | import pandas as pd 7 | 8 | def show_accuracy(predictLabel,Label): 9 | Label = np.ravel(Label).tolist() 10 | predictLabel = predictLabel.tolist() 11 | count = 0 12 | for i in range(len(Label)): 13 | if Label[i] == predictLabel[i]: 14 | count += 1 15 | return (round(count/len(Label),5)) 16 | 17 | class scaler: 18 | def __init__(self): 19 | self._mean = 0 20 | self._std = 0 21 | 22 | def fit_transform(self,traindata): 23 | self._mean = traindata.mean(axis = 0) 24 | self._std = traindata.std(axis = 0) 25 | return (traindata-self._mean)/self._std 26 | 27 | def transform(self,testdata): 28 | return (testdata-self._mean)/self._std 29 | 30 | class node_generator: 31 | def __init__(self,whiten = False): 32 | self.Wlist = [] 33 | self.blist = [] 34 | self.nonlinear = 0 35 | self.whiten = whiten 36 | 37 | def sigmoid(self,data): 38 | return 1.0/(1+np.exp(-data)) 39 | 40 | def linear(self,data): 41 | return data 42 | 43 | def tanh(self,data): 44 | return (np.exp(data)-np.exp(-data))/(np.exp(data)+np.exp(-data)) 45 | 46 | def relu(self,data): 47 | return np.maximum(data,0) 48 | 49 | def orth(self,W): 50 | for i in range(0,W.shape[1]): 51 | w = np.mat(W[:,i].copy()).T 52 | w_sum = 0 53 | for j in range(i): 54 | wj = np.mat(W[:,j].copy()).T 55 | w_sum += (w.T.dot(wj))[0,0]*wj 56 | w -= w_sum 57 | w = w/np.sqrt(w.T.dot(w)) 58 | W[:,i] = np.ravel(w) 59 | return W 60 | 61 | def generator(self,shape,times): 62 | for i in range(times): 63 | W = 2*random.random(size=shape)-1 64 | if self.whiten == True: 65 | W = self.orth(W) 66 | b = 2*random.random()-1 67 | yield (W,b) 68 | 69 | def generator_nodes(self, data, times, batchsize, nonlinear): 70 | self.Wlist = [elem[0] for elem in self.generator((data.shape[1],batchsize),times)] 71 | self.blist = [elem[1] for elem in self.generator((data.shape[1],batchsize),times)] 72 | 73 | self.nonlinear = {'linear':self.linear, 74 | 'sigmoid':self.sigmoid, 75 | 'tanh':self.tanh, 76 | 'relu':self.relu 77 | }[nonlinear] 78 | nodes = self.nonlinear(data.dot(self.Wlist[0])+self.blist[0]) 79 | for i in range(1,len(self.Wlist)): 80 | nodes = np.column_stack((nodes, self.nonlinear(data.dot(self.Wlist[i])+self.blist[i]))) 81 | return nodes 82 | 83 | def transform(self,testdata): 84 | testnodes = self.nonlinear(testdata.dot(self.Wlist[0])+self.blist[0]) 85 | for i in range(1,len(self.Wlist)): 86 | testnodes = np.column_stack((testnodes, self.nonlinear(testdata.dot(self.Wlist[i])+self.blist[i]))) 87 | return testnodes 88 | 89 | def update(self,otherW, otherb): 90 | self.Wlist += otherW 91 | self.blist += otherb 92 | 93 | 94 | class broadnet_enhence: 95 | def __init__(self, 96 | maptimes = 10, 97 | enhencetimes = 10, 98 | traintimes = 100, 99 | map_function = 'linear', 100 | enhence_function = 'linear', 101 | batchsize = 'auto', 102 | acc = 1, 103 | step = 1, 104 | reg = 0.001): 105 | 106 | self._maptimes = maptimes 107 | self._enhencetimes = enhencetimes 108 | self._batchsize = batchsize 109 | self._traintimes = traintimes 110 | self._acc = acc 111 | self._step = step 112 | self._reg = reg 113 | self._map_function = map_function 114 | self._enhence_function = enhence_function 115 | 116 | self.W = 0 117 | self.pesuedoinverse = 0 118 | 119 | self.normalscaler = scaler() 120 | self.onehotencoder = preprocessing.OneHotEncoder(sparse = False) 121 | self.mapping_generator = node_generator() 122 | self.enhence_generator = node_generator(whiten = True) 123 | 124 | def fit(self,oridata,orilabel): 125 | if self._batchsize == 'auto': 126 | self._batchsize = oridata.shape[1] 127 | data = self.normalscaler.fit_transform(oridata) 128 | label = self.onehotencoder.fit_transform(np.mat(orilabel).T) 129 | 130 | mappingdata = self.mapping_generator.generator_nodes(data,self._maptimes,self._batchsize,self._map_function) 131 | enhencedata = self.enhence_generator.generator_nodes(mappingdata,self._enhencetimes,self._batchsize,self._enhence_function) 132 | inputdata = np.column_stack((mappingdata,enhencedata)) 133 | 134 | self.pesuedoinverse = self.pinv(inputdata) 135 | self.W = self.pesuedoinverse.dot(label) 136 | 137 | Y = self.predict(oridata) 138 | accuracy, i = self.accuracy(Y,orilabel),0 139 | print("inital setting, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format(mappingdata.shape[1],enhencedata.shape[1],round(accuracy,5))) 140 | 141 | while i < self._traintimes and accuracy < self._acc: 142 | Y = self.addingenhence_predict(data, label, self._step,self._batchsize) 143 | accuracy = self.accuracy(Y,orilabel) 144 | i += 1 145 | print("enhencing {3}, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format(len(self.mapping_generator.Wlist)*self._batchsize,len(self.enhence_generator.Wlist)*self._batchsize,round(accuracy,5),i)) 146 | 147 | def pinv(self,A): 148 | return np.mat(self._reg*np.eye(A.shape[1])+A.T.dot(A)).I.dot(A.T) 149 | 150 | def decode(self,Y_onehot): 151 | Y = [] 152 | for i in range(Y_onehot.shape[0]): 153 | lis = np.ravel(Y_onehot[i,:]).tolist() 154 | Y.append(lis.index(max(lis))) 155 | return np.array(Y) 156 | 157 | def accuracy(self,predictlabel,label): 158 | label = np.ravel(label).tolist() 159 | predictlabel = predictlabel.tolist() 160 | count = 0 161 | for i in range(len(label)): 162 | if label[i] == predictlabel[i]: 163 | count += 1 164 | return (round(count/len(label),5)) 165 | 166 | def predict(self,testdata): 167 | testdata = self.normalscaler.transform(testdata) 168 | test_inputdata = self.transform(testdata) 169 | return self.decode(test_inputdata.dot(self.W)) 170 | 171 | def transform(self,data): 172 | mappingdata = self.mapping_generator.transform(data) 173 | enhencedata = self.enhence_generator.transform(mappingdata) 174 | return np.column_stack((mappingdata,enhencedata)) 175 | 176 | def addingenhence_nodes(self, data, label, step = 1, batchsize = 'auto'): 177 | if batchsize == 'auto': 178 | batchsize = data.shape[1] 179 | 180 | mappingdata = self.mapping_generator.transform(data) 181 | inputdata = self.transform(data) 182 | localenhence_generator = node_generator() 183 | extraenhence_nodes = localenhence_generator.generator_nodes(mappingdata,step,batchsize,self._enhence_function) 184 | 185 | D = self.pesuedoinverse.dot(extraenhence_nodes) 186 | C = extraenhence_nodes - inputdata.dot(D) 187 | BT = self.pinv(C) if (C == 0).any() else np.mat((D.T.dot(D)+np.eye(D.shape[1]))).I.dot(D.T).dot(self.pesuedoinverse) 188 | 189 | self.W = np.row_stack((self.W-D.dot(BT).dot(label),BT.dot(label))) 190 | self.enhence_generator.update(localenhence_generator.Wlist,localenhence_generator.blist) 191 | self.pesuedoinverse = np.row_stack((self.pesuedoinverse - D.dot(BT),BT)) 192 | 193 | def addingenhence_predict(self, data, label, step = 1, batchsize = 'auto'): 194 | self.addingenhence_nodes(data, label, step, batchsize) 195 | test_inputdata = self.transform(data) 196 | return self.decode(test_inputdata.dot(self.W)) 197 | 198 | 199 | iris=load_iris() 200 | X = iris.data 201 | Y = iris.target 202 | traindata,testdata,trainlabel,testlabel = train_test_split(X,Y,test_size=0.2,random_state = 2018) 203 | #print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 204 | 205 | #data = pd.read_excel('/Users/zhuxiaoxiansheng/Desktop/日常/数据集/糖尿病数据集.xlsx') 206 | #label = np.mat(data['标签'].values) 207 | #data = data.drop('标签',axis = 1) 208 | #traindata,testdata,trainlabel,testlabel = train_test_split(data.values,np.ravel(label),test_size=0.2,random_state = 2018) 209 | 210 | bls = broadnet_enhence(maptimes = 10, 211 | enhencetimes = 10, 212 | traintimes = 10, 213 | map_function = 'tanh', 214 | enhence_function = 'sigmoid', 215 | batchsize = 1, 216 | acc = 1, 217 | step = 5, 218 | reg = 0.001) 219 | 220 | bls.fit(traindata,trainlabel) 221 | predictlabel = bls.predict(testdata) 222 | print(show_accuracy(predictlabel,testlabel)) 223 | 224 | -------------------------------------------------------------------------------- /BroadLearning/bls_enhmap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import preprocessing 3 | from numpy import random 4 | from sklearn.datasets import load_iris 5 | from sklearn.model_selection import train_test_split 6 | import pandas as pd 7 | 8 | def show_accuracy(predictLabel,Label): 9 | Label = np.ravel(Label).tolist() 10 | predictLabel = predictLabel.tolist() 11 | count = 0 12 | for i in range(len(Label)): 13 | if Label[i] == predictLabel[i]: 14 | count += 1 15 | return (round(count/len(Label),5)) 16 | 17 | class scaler: 18 | def __init__(self): 19 | self._mean = 0 20 | self._std = 0 21 | 22 | def fit_transform(self,traindata): 23 | self._mean = traindata.mean(axis = 0) 24 | self._std = traindata.std(axis = 0) 25 | return (traindata-self._mean)/self._std 26 | 27 | def transform(self,testdata): 28 | return (testdata-self._mean)/self._std 29 | 30 | class node_generator: 31 | def __init__(self,whiten = False): 32 | self.Wlist = [] 33 | self.blist = [] 34 | self.nonlinear = 0 35 | self.whiten = whiten 36 | 37 | def sigmoid(self,data): 38 | return 1.0/(1+np.exp(-data)) 39 | 40 | def linear(self,data): 41 | return data 42 | 43 | def tanh(self,data): 44 | return (np.exp(data)-np.exp(-data))/(np.exp(data)+np.exp(-data)) 45 | 46 | def relu(self,data): 47 | return np.maximum(data,0) 48 | 49 | def orth(self,W): 50 | for i in range(0,W.shape[1]): 51 | w = np.mat(W[:,i].copy()).T 52 | w_sum = 0 53 | for j in range(i): 54 | wj = np.mat(W[:,j].copy()).T 55 | w_sum += (w.T.dot(wj))[0,0]*wj 56 | w -= w_sum 57 | w = w/np.sqrt(w.T.dot(w)) 58 | W[:,i] = np.ravel(w) 59 | return W 60 | 61 | def generator(self,shape,times): 62 | for i in range(times): 63 | W = 2*random.random(size=shape)-1 64 | if self.whiten == True: 65 | W = self.orth(W) 66 | b = 2*random.random()-1 67 | yield (W,b) 68 | 69 | def generator_nodes(self, data, times, batchsize, nonlinear): 70 | self.Wlist = [elem[0] for elem in self.generator((data.shape[1],batchsize),times)] 71 | self.blist = [elem[1] for elem in self.generator((data.shape[1],batchsize),times)] 72 | 73 | self.nonlinear = {'linear':self.linear, 74 | 'sigmoid':self.sigmoid, 75 | 'tanh':self.tanh, 76 | 'relu':self.relu 77 | }[nonlinear] 78 | nodes = self.nonlinear(data.dot(self.Wlist[0])+self.blist[0]) 79 | for i in range(1,len(self.Wlist)): 80 | nodes = np.column_stack((nodes, self.nonlinear(data.dot(self.Wlist[i])+self.blist[i]))) 81 | return nodes 82 | 83 | def transform(self,testdata): 84 | testnodes = self.nonlinear(testdata.dot(self.Wlist[0])+self.blist[0]) 85 | for i in range(1,len(self.Wlist)): 86 | testnodes = np.column_stack((testnodes, self.nonlinear(testdata.dot(self.Wlist[i])+self.blist[i]))) 87 | return testnodes 88 | 89 | def update(self,otherW, otherb): 90 | self.Wlist += otherW 91 | self.blist += otherb 92 | 93 | 94 | class broadnet_enhmap: 95 | def __init__(self, 96 | maptimes = 10, 97 | enhencetimes = 10, 98 | traintimes = 100, 99 | map_function = 'linear', 100 | enhence_function = 'linear', 101 | batchsize = 'auto', 102 | acc = 1, 103 | mapstep = 1, 104 | enhencestep = 1, 105 | reg = 0.001): 106 | 107 | self._maptimes = maptimes 108 | self._enhencetimes = enhencetimes 109 | self._batchsize = batchsize 110 | self._traintimes = traintimes 111 | self._acc = acc 112 | self._mapstep = mapstep 113 | self._enhencestep = enhencestep 114 | self._reg = reg 115 | self._map_function = map_function 116 | self._enhence_function = enhence_function 117 | 118 | self.W = 0 119 | self.pesuedoinverse = 0 120 | 121 | self.normalscaler = scaler() 122 | self.onehotencoder = preprocessing.OneHotEncoder(sparse = False) 123 | self.mapping_generator = node_generator() 124 | self.enhence_generator = node_generator(whiten = True) 125 | self.local_mapgeneratorlist = [] 126 | self.local_enhgeneratorlist = [] 127 | 128 | def fit(self,oridata,orilabel): 129 | if self._batchsize == 'auto': 130 | self._batchsize = oridata.shape[1] 131 | data = self.normalscaler.fit_transform(oridata) 132 | label = self.onehotencoder.fit_transform(np.mat(orilabel).T) 133 | 134 | mappingdata = self.mapping_generator.generator_nodes(data,self._maptimes,self._batchsize,self._map_function) 135 | enhencedata = self.enhence_generator.generator_nodes(mappingdata,self._enhencetimes,self._batchsize,self._enhence_function) 136 | inputdata = np.column_stack((mappingdata,enhencedata)) 137 | 138 | self.pesuedoinverse = self.pinv(inputdata) 139 | self.W = self.pesuedoinverse.dot(label) 140 | 141 | Y = self.predict(oridata) 142 | accuracy, i = self.accuracy(Y,orilabel),0 143 | print("inital setting, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format(mappingdata.shape[1],enhencedata.shape[1],round(accuracy,5))) 144 | 145 | while i < self._traintimes and accuracy < self._acc: 146 | Y = self.adding_predict(data, label, self._mapstep, self._enhencestep, self._batchsize) 147 | accuracy = self.accuracy(Y,orilabel) 148 | i += 1 149 | print("adding {3}, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format((len(self.mapping_generator.Wlist)+len(self.local_mapgeneratorlist)*len(self.local_mapgeneratorlist[0].Wlist))*self._batchsize,(len(self.enhence_generator.Wlist)+len(self.local_enhgeneratorlist)*len(self.local_enhgeneratorlist[0].Wlist))*self._batchsize,round(accuracy,5),i)) 150 | 151 | def pinv(self,A): 152 | return np.mat(self._reg*np.eye(A.shape[1])+A.T.dot(A)).I.dot(A.T) 153 | 154 | def decode(self,Y_onehot): 155 | Y = [] 156 | for i in range(Y_onehot.shape[0]): 157 | lis = np.ravel(Y_onehot[i,:]).tolist() 158 | Y.append(lis.index(max(lis))) 159 | return np.array(Y) 160 | 161 | def accuracy(self,predictlabel,label): 162 | label = np.ravel(label).tolist() 163 | predictlabel = predictlabel.tolist() 164 | count = 0 165 | for i in range(len(label)): 166 | if label[i] == predictlabel[i]: 167 | count += 1 168 | return (round(count/len(label),5)) 169 | 170 | def predict(self,testdata): 171 | testdata = self.normalscaler.transform(testdata) 172 | test_inputdata = self.transform(testdata) 173 | return self.decode(test_inputdata.dot(self.W)) 174 | 175 | def transform(self,data): 176 | mappingdata = self.mapping_generator.transform(data) 177 | enhencedata = self.enhence_generator.transform(mappingdata) 178 | inputdata = np.column_stack((mappingdata,enhencedata)) 179 | for elem1,elem2 in zip(self.local_mapgeneratorlist,self.local_enhgeneratorlist): 180 | inputdata = np.column_stack((inputdata, elem1.transform(data))) 181 | inputdata = np.column_stack((inputdata, elem2.transform(mappingdata))) 182 | return inputdata 183 | 184 | def adding_nodes(self, data, label, mapstep = 1, enhencestep = 1, batchsize = 'auto'): 185 | if batchsize == 'auto': 186 | batchsize = data.shape[1] 187 | 188 | mappingdata = self.mapping_generator.transform(data) 189 | inputdata = self.transform(data) 190 | 191 | localmap_generator = node_generator() 192 | extramap_nodes = localmap_generator.generator_nodes(data,mapstep,batchsize,self._map_function) 193 | localenhence_generator = node_generator() 194 | extraenh_nodes = localenhence_generator.generator_nodes(mappingdata,enhencestep,batchsize,self._map_function) 195 | extra_nodes = np.column_stack((extramap_nodes,extraenh_nodes)) 196 | 197 | D = self.pesuedoinverse.dot(extra_nodes) 198 | C = extra_nodes - inputdata.dot(D) 199 | BT = self.pinv(C) if (C == 0).any() else np.mat((D.T.dot(D)+np.eye(D.shape[1]))).I.dot(D.T).dot(self.pesuedoinverse) 200 | 201 | self.W = np.row_stack((self.W-D.dot(BT).dot(label),BT.dot(label))) 202 | self.pesuedoinverse = np.row_stack((self.pesuedoinverse - D.dot(BT),BT)) 203 | self.local_mapgeneratorlist.append(localmap_generator) 204 | self.local_enhgeneratorlist.append(localenhence_generator) 205 | 206 | def adding_predict(self, data, label, mapstep = 1, enhencestep = 1, batchsize = 'auto'): 207 | self.adding_nodes(data, label, mapstep, enhencestep, batchsize) 208 | test_inputdata = self.transform(data) 209 | return self.decode(test_inputdata.dot(self.W)) 210 | 211 | 212 | iris=load_iris() 213 | X = iris.data 214 | Y = iris.target 215 | traindata,testdata,trainlabel,testlabel = train_test_split(X,Y,test_size=0.2,random_state = 2018) 216 | print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 217 | 218 | 219 | 220 | bls = broadnet_enhmap(maptimes = 10, 221 | enhencetimes = 10, 222 | traintimes = 100, 223 | map_function = 'linear', 224 | enhence_function = 'sigmoid', 225 | batchsize = 'auto', 226 | acc = 1, 227 | mapstep = 5, 228 | enhencestep = 5, 229 | reg = 0.001) 230 | 231 | bls.fit(traindata,trainlabel) 232 | predictlabel = bls.predict(testdata) 233 | print(show_accuracy(predictlabel,testlabel)) 234 | 235 | -------------------------------------------------------------------------------- /BroadLearning/bls_mapping.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import preprocessing 3 | from numpy import random 4 | from sklearn.datasets import load_iris 5 | from sklearn.model_selection import train_test_split 6 | import pandas as pd 7 | 8 | def show_accuracy(predictLabel,Label): 9 | Label = np.ravel(Label).tolist() 10 | predictLabel = predictLabel.tolist() 11 | count = 0 12 | for i in range(len(Label)): 13 | if Label[i] == predictLabel[i]: 14 | count += 1 15 | return (round(count/len(Label),5)) 16 | 17 | class scaler: 18 | def __init__(self): 19 | self._mean = 0 20 | self._std = 0 21 | 22 | def fit_transform(self,traindata): 23 | self._mean = traindata.mean(axis = 0) 24 | self._std = traindata.std(axis = 0) 25 | return (traindata-self._mean)/self._std 26 | 27 | def transform(self,testdata): 28 | return (testdata-self._mean)/self._std 29 | 30 | class node_generator: 31 | def __init__(self,whiten = False): 32 | self.Wlist = [] 33 | self.blist = [] 34 | self.nonlinear = 0 35 | self.whiten = whiten 36 | 37 | def sigmoid(self,data): 38 | return 1.0/(1+np.exp(-data)) 39 | 40 | def linear(self,data): 41 | return data 42 | 43 | def tanh(self,data): 44 | return (np.exp(data)-np.exp(-data))/(np.exp(data)+np.exp(-data)) 45 | 46 | def relu(self,data): 47 | return np.maximum(data,0) 48 | 49 | def orth(self,W): 50 | for i in range(0,W.shape[1]): 51 | w = np.mat(W[:,i].copy()).T 52 | w_sum = 0 53 | for j in range(i): 54 | wj = np.mat(W[:,j].copy()).T 55 | w_sum += (w.T.dot(wj))[0,0]*wj 56 | w -= w_sum 57 | w = w/np.sqrt(w.T.dot(w)) 58 | W[:,i] = np.ravel(w) 59 | return W 60 | 61 | def generator(self,shape,times): 62 | for i in range(times): 63 | W = 2*random.random(size=shape)-1 64 | if self.whiten == True: 65 | W = self.orth(W) 66 | b = 2*random.random()-1 67 | yield (W,b) 68 | 69 | def generator_nodes(self, data, times, batchsize, nonlinear): 70 | self.Wlist = [elem[0] for elem in self.generator((data.shape[1],batchsize),times)] 71 | self.blist = [elem[1] for elem in self.generator((data.shape[1],batchsize),times)] 72 | 73 | self.nonlinear = {'linear':self.linear, 74 | 'sigmoid':self.sigmoid, 75 | 'tanh':self.tanh, 76 | 'relu':self.relu 77 | }[nonlinear] 78 | nodes = self.nonlinear(data.dot(self.Wlist[0])+self.blist[0]) 79 | for i in range(1,len(self.Wlist)): 80 | nodes = np.column_stack((nodes, self.nonlinear(data.dot(self.Wlist[i])+self.blist[i]))) 81 | return nodes 82 | 83 | def transform(self,testdata): 84 | testnodes = self.nonlinear(testdata.dot(self.Wlist[0])+self.blist[0]) 85 | for i in range(1,len(self.Wlist)): 86 | testnodes = np.column_stack((testnodes, self.nonlinear(testdata.dot(self.Wlist[i])+self.blist[i]))) 87 | return testnodes 88 | 89 | def update(self,otherW, otherb): 90 | self.Wlist += otherW 91 | self.blist += otherb 92 | 93 | 94 | class broadnet_mapping: 95 | def __init__(self, 96 | maptimes = 10, 97 | enhencetimes = 10, 98 | traintimes = 100, 99 | map_function = 'linear', 100 | enhence_function = 'linear', 101 | batchsize = 'auto', 102 | acc = 1, 103 | step = 1, 104 | reg = 0.001): 105 | 106 | self._maptimes = maptimes 107 | self._enhencetimes = enhencetimes 108 | self._batchsize = batchsize 109 | self._traintimes = traintimes 110 | self._acc = acc 111 | self._step = step 112 | self._reg = reg 113 | self._map_function = map_function 114 | self._enhence_function = enhence_function 115 | 116 | self.W = 0 117 | self.pesuedoinverse = 0 118 | 119 | self.normalscaler = scaler() 120 | self.onehotencoder = preprocessing.OneHotEncoder(sparse = False) 121 | self.mapping_generator = node_generator() 122 | self.enhence_generator = node_generator(whiten = True) 123 | self.local_mapgeneratorlist = [] 124 | 125 | def fit(self,oridata,orilabel): 126 | if self._batchsize == 'auto': 127 | self._batchsize = oridata.shape[1] 128 | data = self.normalscaler.fit_transform(oridata) 129 | label = self.onehotencoder.fit_transform(np.mat(orilabel).T) 130 | 131 | mappingdata = self.mapping_generator.generator_nodes(data,self._maptimes,self._batchsize,self._map_function) 132 | enhencedata = self.enhence_generator.generator_nodes(mappingdata,self._enhencetimes,self._batchsize,self._enhence_function) 133 | inputdata = np.column_stack((mappingdata,enhencedata)) 134 | 135 | self.pesuedoinverse = self.pinv(inputdata) 136 | self.W = self.pesuedoinverse.dot(label) 137 | 138 | Y = self.predict(oridata) 139 | accuracy, i = self.accuracy(Y,orilabel),0 140 | print("inital setting, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format(mappingdata.shape[1],enhencedata.shape[1],round(accuracy,5))) 141 | 142 | while i < self._traintimes and accuracy < self._acc: 143 | Y = self.addingmap_predict(data, label, self._step,self._batchsize) 144 | accuracy = self.accuracy(Y,orilabel) 145 | i += 1 146 | print("mapping {3}, number of mapping nodes {0}, number of enhence nodes {1}, accuracy {2}".format((len(self.mapping_generator.Wlist)+len(self.local_mapgeneratorlist)*len(self.local_mapgeneratorlist[0].Wlist))*self._batchsize,len(self.enhence_generator.Wlist)*self._batchsize,round(accuracy,5),i)) 147 | 148 | def pinv(self,A): 149 | return np.mat(self._reg*np.eye(A.shape[1])+A.T.dot(A)).I.dot(A.T) 150 | 151 | def decode(self,Y_onehot): 152 | Y = [] 153 | for i in range(Y_onehot.shape[0]): 154 | lis = np.ravel(Y_onehot[i,:]).tolist() 155 | Y.append(lis.index(max(lis))) 156 | return np.array(Y) 157 | 158 | def accuracy(self,predictlabel,label): 159 | label = np.ravel(label).tolist() 160 | predictlabel = predictlabel.tolist() 161 | count = 0 162 | for i in range(len(label)): 163 | if label[i] == predictlabel[i]: 164 | count += 1 165 | return (round(count/len(label),5)) 166 | 167 | def predict(self,testdata): 168 | testdata = self.normalscaler.transform(testdata) 169 | test_inputdata = self.transform(testdata) 170 | return self.decode(test_inputdata.dot(self.W)) 171 | 172 | def transform(self,data): 173 | mappingdata = self.mapping_generator.transform(data) 174 | enhencedata = self.enhence_generator.transform(mappingdata) 175 | inputdata = np.column_stack((mappingdata,enhencedata)) 176 | for elem in self.local_mapgeneratorlist: 177 | inputdata = np.column_stack((inputdata , elem.transform(data))) 178 | return inputdata 179 | 180 | def addingmap_nodes(self, data, label, step = 1, batchsize = 'auto'): 181 | if batchsize == 'auto': 182 | batchsize = data.shape[1] 183 | 184 | inputdata = self.transform(data) 185 | localmap_generator = node_generator() 186 | extramap_nodes = localmap_generator.generator_nodes(data,step,batchsize,self._map_function) 187 | 188 | D = self.pesuedoinverse.dot(extramap_nodes) 189 | C = extramap_nodes - inputdata.dot(D) 190 | BT = self.pinv(C) if (C == 0).any() else np.mat((D.T.dot(D)+np.eye(D.shape[1]))).I.dot(D.T).dot(self.pesuedoinverse) 191 | 192 | self.W = np.row_stack((self.W-D.dot(BT).dot(label),BT.dot(label))) 193 | self.pesuedoinverse = np.row_stack((self.pesuedoinverse - D.dot(BT),BT)) 194 | self.local_mapgeneratorlist.append(localmap_generator) 195 | 196 | def addingmap_predict(self, data, label, step = 1, batchsize = 'auto'): 197 | self.addingmap_nodes(data, label, step, batchsize) 198 | test_inputdata = self.transform(data) 199 | return self.decode(test_inputdata.dot(self.W)) 200 | 201 | 202 | iris=load_iris() 203 | X = iris.data 204 | Y = iris.target 205 | traindata,testdata,trainlabel,testlabel = train_test_split(X,Y,test_size=0.2,random_state = 2018) 206 | print(traindata.shape,trainlabel.shape,testdata.shape,testlabel.shape) 207 | 208 | 209 | 210 | bls = broadnet_mapping(maptimes = 10, 211 | enhencetimes = 10, 212 | traintimes = 100, 213 | map_function = 'linear', 214 | enhence_function = 'sigmoid', 215 | batchsize = 'auto', 216 | acc = 1, 217 | step = 5, 218 | reg = 0.001) 219 | 220 | bls.fit(traindata,trainlabel) 221 | predictlabel = bls.predict(testdata) 222 | print(show_accuracy(predictlabel,testlabel)) 223 | 224 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Mr. Little Zhu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Broad-Learning-System 2 | BLS Code 3 | 4 | the related introduction could be find at https://blog.csdn.net/Liangjun_Feng/article/details/80541689 5 | 6 | the code is according to the paper "Broad Learning System: An Effective and Efficient Incremental Learning System Without the Need for Deep Architecture" 7 | --------------------------------------------------------------------------------