├── .gitignore ├── examples ├── Eclat_TEST.py ├── Apriori_TEST.py ├── FP_growth_TEST.py ├── GMM_TEST.py ├── KMeans_TEST.py ├── DBSCAN_TEST.py ├── Logistic_TEST.py ├── SVM_TEST.py ├── AdaptiveBoost_TEST.py ├── NaiveBayes_TEST.py ├── KNN_TEST.py ├── RandomForest_TEST.py ├── Perceptron_TEST.py ├── DecisionTree_TEST.py ├── HMM_TEST.py ├── PCA_TEST.py ├── LDA_TEST.py ├── LinearRegression_TEST.py ├── TreeRegression_TEST.py ├── Stacking_TEST.py └── Blending_TEST.py ├── preProcess.py ├── dataset ├── dataset2 │ ├── train.txt │ └── test.txt ├── dataset3 │ └── test.txt ├── dataset5 │ ├── train.txt │ └── test.txt └── dataset1 │ ├── test.txt │ └── train.txt ├── README.md ├── GradientBoostingDecisionTree.py ├── GMM.py ├── Blending.py ├── KNN.py ├── Stacking.py ├── DimensionReduction.py ├── FeatureCombination.py ├── LogisticRegression.py ├── NaiveBayes.py ├── Perceptron.py ├── AdaBoost.py ├── RandomForest.py ├── LinearRegression.py ├── TreeRegression.py ├── DecisionTree.py ├── LICENSE └── SVM.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ -------------------------------------------------------------------------------- /examples/Eclat_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Eclat_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-06-02 5 | @ Update Date: 2019-06-02 6 | @ Description: Implement Eclat_TEST 7 | """ 8 | 9 | from AssociationAnalysis import Eclat 10 | import numpy as np 11 | import pandas as pd 12 | import time 13 | 14 | trainData = [['bread', 'milk', 'vegetable', 'fruit', 'eggs'], 15 | ['noodle', 'beef', 'pork', 'water', 'socks', 'gloves', 'shoes', 'rice'], 16 | ['socks', 'gloves'], 17 | ['bread', 'milk', 'shoes', 'socks', 'eggs'], 18 | ['socks', 'shoes', 'sweater', 'cap', 'milk', 'vegetable', 'gloves'], 19 | ['eggs', 'bread', 'milk', 'fish', 'crab', 'shrimp', 'rice']] 20 | 21 | time_start1 = time.time() 22 | clf1 = Eclat() 23 | pred1 = clf1.train(trainData) 24 | time_end1 = time.time() 25 | print("Runtime of Eclat:", time_end1-time_start1) 26 | -------------------------------------------------------------------------------- /examples/Apriori_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Apriori_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-28 5 | @ Update Date: 2019-05-31 6 | @ Description: Implement Apriori_TEST 7 | """ 8 | 9 | from AssociationAnalysis import Apriori 10 | import numpy as np 11 | import pandas as pd 12 | import time 13 | 14 | trainData = [['bread', 'milk', 'vegetable', 'fruit', 'eggs'], 15 | ['noodle', 'beef', 'pork', 'water', 'socks', 'gloves', 'shoes', 'rice'], 16 | ['socks', 'gloves'], 17 | ['bread', 'milk', 'shoes', 'socks', 'eggs'], 18 | ['socks', 'shoes', 'sweater', 'cap', 'milk', 'vegetable', 'gloves'], 19 | ['eggs', 'bread', 'milk', 'fish', 'crab', 'shrimp', 'rice']] 20 | 21 | time_start1 = time.time() 22 | clf1 = Apriori() 23 | pred1 = clf1.train(trainData) 24 | time_end1 = time.time() 25 | print("Runtime of Apriori:", time_end1-time_start1) 26 | -------------------------------------------------------------------------------- /examples/FP_growth_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: FP_growth_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-30 5 | @ Update Date: 2019-05-31 6 | @ Description: Implement FP_growth_TEST 7 | """ 8 | 9 | from AssociationAnalysis import FPgrowth 10 | import numpy as np 11 | import pandas as pd 12 | import time 13 | 14 | trainData = [['bread', 'milk', 'vegetable', 'fruit', 'eggs'], 15 | ['noodle', 'beef', 'pork', 'water', 'socks', 'gloves', 'shoes', 'rice'], 16 | ['socks', 'gloves'], 17 | ['bread', 'milk', 'shoes', 'socks', 'eggs'], 18 | ['socks', 'shoes', 'sweater', 'cap', 'milk', 'vegetable', 'gloves'], 19 | ['eggs', 'bread', 'milk', 'fish', 'crab', 'shrimp', 'rice']] 20 | 21 | time_start1 = time.time() 22 | clf1 = FPgrowth() 23 | pred1 = clf1.train(trainData) 24 | time_end1 = time.time() 25 | print("Runtime of FP-growth:", time_end1-time_start1) 26 | -------------------------------------------------------------------------------- /examples/GMM_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @FileName: GMM_TEST.py 3 | @Description: Implement GMM_TEST 4 | @Author: Ryuk 5 | @CreateDate: 2021/06/03 6 | @LastEditTime: 2021/06/03 7 | @LastEditors: Please set LastEditors 8 | @Version: v0.1 9 | """ 10 | 11 | from sklearn.mixture import GaussianMixture 12 | from GMM import * 13 | import matplotlib.pyplot as plt 14 | import time 15 | from sklearn.datasets import make_blobs 16 | 17 | 18 | X, y_true = make_blobs(n_samples=400, centers=4, cluster_std=0.60, random_state=0) 19 | 20 | time_start1 = time.time() 21 | clf1 = GaussianMixtureModel(K=4) 22 | pred = clf1.train(X) 23 | time_end1 = time.time() 24 | print("Runtime of GMM:", time_end1-time_start1) 25 | 26 | 27 | time_start2 = time.time() 28 | clf2 = GaussianMixture(n_components=4) 29 | pred2 = clf2.fit_predict(X) 30 | time_end2 = time.time() 31 | print("Runtime of Sklearn GMM:", time_end2-time_start2) 32 | plt.scatter(X[:, 0], X[:, 1], c=pred2) 33 | plt.title('Sklearn GMM') 34 | plt.show() 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /examples/KMeans_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: KMeans_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-16 5 | @ Update Date: 2019-05-28 6 | @ Description: Implement KMeans_TEST 7 | """ 8 | import matplotlib.pyplot as plt 9 | from Cluster import KMeans as kmeans 10 | from sklearn.cluster import KMeans 11 | import numpy as np 12 | import pandas as pd 13 | import time 14 | 15 | trainData = pd.read_table('../dataset/dataset6/train.txt',header=None,encoding='gb2312', delim_whitespace=True) 16 | trainData = np.array(trainData) 17 | 18 | time_start1 = time.time() 19 | clf1 = kmeans(k=4, cluster_type="KMeans") 20 | pred1 = clf1.train(trainData) 21 | time_end1 = time.time() 22 | print("Runtime of KMeans:", time_end1-time_start1) 23 | 24 | time_start2 = time.time() 25 | clf2 = kmeans(k=4, cluster_type="biKMeans") 26 | pred = clf2.train(trainData) 27 | time_end2 = time.time() 28 | print("Runtime of biKMeans:", time_end2-time_start2) 29 | 30 | time_start3 = time.time() 31 | clf3 = kmeans(k=4, cluster_type="KMeans++") 32 | pred3 = clf3.train(trainData) 33 | time_end3 = time.time() 34 | print("Runtime of KMeans++:", time_end3-time_start3) 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /examples/DBSCAN_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: DBSCAN_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-20 5 | @ Update Date: 2019-05-20 6 | @ Description: Implement DBSCAN_TEST 7 | """ 8 | 9 | from Cluster import KMeans as kmeans 10 | from Cluster import DBSCAN as dbscan 11 | from sklearn.cluster import DBSCAN 12 | 13 | import time 14 | import matplotlib.pyplot as plt 15 | from sklearn import datasets 16 | 17 | X1, y1=datasets.make_circles(n_samples=5000, factor=.6, noise=.05) 18 | trainData = X1[0:1000] 19 | time_start1 = time.time() 20 | clf1 = kmeans(k=4, cluster_type="KMeans") 21 | pred1 = clf1.train(trainData) 22 | time_end1 = time.time() 23 | print("Runtime of KMeans:", time_end1-time_start1) 24 | 25 | time_start2 = time.time() 26 | clf2 = dbscan() 27 | pred = clf2.train(trainData) 28 | time_end2 = time.time() 29 | print("Runtime of DBSCAN:", time_end2-time_start2) 30 | 31 | time_start3 = time.time() 32 | clf3 = DBSCAN(eps=0.1, min_samples=10) 33 | clf3.fit(trainData) 34 | pred3 = clf3.labels_ 35 | time_end3 = time.time() 36 | plt.scatter(trainData[:, 0], trainData[:, 1], c=pred3) 37 | plt.title('Sklearn DBSCAN') 38 | plt.show() 39 | print("Runtime of Sklearn DBSCAN:", time_end3-time_start3) 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /examples/Logistic_TEST.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression 2 | from LogisticRegression import * 3 | import numpy as np 4 | import pandas as pd 5 | import time 6 | 7 | trainData = np.array(pd.read_table('../dataset3/train.txt',header=None,encoding='gb2312',delim_whitespace=True)) 8 | testData = np.array(pd.read_table('../dataset3/test.txt',header=None,encoding='gb2312',delim_whitespace=True)) 9 | trainLabel = trainData[:, -1] 10 | trainData = np.delete(trainData, -1, axis=1) 11 | testLabel = testData[:, -1] 12 | testData = np.delete(testData, -1, axis=1) 13 | 14 | time_start1 = time.time() 15 | clf1 = LogisticRegressionClassifier() 16 | clf1.train(trainData, trainLabel) 17 | clf1.predict(testData) 18 | score1 = clf1.accuarcy(testLabel) 19 | time_end1 = time.time() 20 | print("Accuracy of self-LogisticRegression: %f" % score1) 21 | print("Runtime of self-LogisticRegression:", time_end1-time_start1) 22 | 23 | time_start = time.time() 24 | clf = LogisticRegression() 25 | clf.fit(trainData, trainLabel) 26 | clf.predict(testData) 27 | score = clf.score(testData, testLabel, sample_weight=None) 28 | time_end = time.time() 29 | print("Accuracy of sklearn-LogisticRegression: %f" % score) 30 | print("Runtime of sklearn-LogisticRegression:", time_end-time_start) 31 | -------------------------------------------------------------------------------- /examples/SVM_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: SVM_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-05 5 | @ Update Date: 2019-05-05 6 | @ Description: Test SVM 7 | """ 8 | 9 | from sklearn.svm import SVC 10 | from SVM import * 11 | import numpy as np 12 | import pandas as pd 13 | import time 14 | 15 | trainData = np.array(pd.read_table('../dataset2/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 16 | testData = np.array(pd.read_table('../dataset2/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 17 | trainLabel = trainData[:, -1] 18 | trainData = np.delete(trainData, -1, axis=1) 19 | testLabel = testData[:, -1] 20 | testData = np.delete(testData, -1, axis=1) 21 | 22 | time_start1 = time.time() 23 | clf1 = SVMClassifier() 24 | clf1.train(trainData, trainLabel) 25 | clf1.predict(testData) 26 | score1 = clf1.accuarcy(testLabel) 27 | time_end1 = time.time() 28 | print("Accuracy of self-SVM: %f" % score1) 29 | print("Runtime of self-SVM:", time_end1-time_start1) 30 | 31 | time_start = time.time() 32 | clf = SVC() 33 | clf.fit(trainData, trainLabel) 34 | clf.predict(testData) 35 | score = clf.score(testData, testLabel, sample_weight=None) 36 | time_end = time.time() 37 | print("Accuracy of SVM: %f" % score) 38 | print("Runtime of SVM:", time_end-time_start) 39 | -------------------------------------------------------------------------------- /examples/AdaptiveBoost_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: AadBoost_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-28 5 | @ Update Date: 2019-05-31 6 | @ Description: Implement AdaBoost_TEST 7 | """ 8 | from AdaBoost import * 9 | from SVM import * 10 | import numpy as np 11 | import pandas as pd 12 | import time 13 | 14 | trainData = np.array(pd.read_table('../dataset/dataset2/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 15 | testData = np.array(pd.read_table('../dataset/dataset2/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 16 | trainLabel = trainData[:, -1] 17 | trainData = np.delete(trainData, -1, axis=1) 18 | testLabel = testData[:, -1] 19 | testData = np.delete(testData, -1, axis=1) 20 | 21 | time_start1 = time.time() 22 | clf1 = SVMClassifier() 23 | clf1.train(trainData, trainLabel) 24 | clf1.predict(testData) 25 | score1 = clf1.accuarcy(testLabel) 26 | time_end1 = time.time() 27 | print("Accuracy of SVM: %f" % score1) 28 | print("Runtime of SVM:", time_end1-time_start1) 29 | 30 | time_start1 = time.time() 31 | clf1 = Adaboost() 32 | clf1.train(trainData, trainLabel) 33 | clf1.predict(testData) 34 | score1 = clf1.accuarcy(testLabel) 35 | time_end1 = time.time() 36 | print("Accuracy of Adaboost: %f" % score1) 37 | print("Runtime of Adaboost:", time_end1-time_start1) 38 | -------------------------------------------------------------------------------- /examples/NaiveBayes_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: NaiveBayes_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-16 5 | @ Update Date: 2019-05-16 6 | @ Description: Implement NaiveBayes_TEST 7 | """ 8 | from sklearn.naive_bayes import BernoulliNB 9 | from NaiveBayes import * 10 | import numpy as np 11 | import pandas as pd 12 | import time 13 | 14 | trainData = pd.read_table('../dataset1/train.txt',header=None,encoding='gb2312',delim_whitespace=True) 15 | testData = pd.read_table('../dataset1/test.txt',header=None,encoding='gb2312',delim_whitespace=True) 16 | trainLabel = np.array(trainData.pop(3)) 17 | trainData = np.array(trainData) 18 | testLabel = np.array(testData.pop(3)) 19 | testData = np.array(testData) 20 | 21 | time_start1 = time.time() 22 | clf1 = BayesClassifier() 23 | clf1.train(trainData, trainLabel) 24 | clf1.predict(testData) 25 | score1 = clf1.accuarcy(testLabel) 26 | time_end1 = time.time() 27 | print("Accuracy of self-Bayes: %f" % score1) 28 | print("Runtime of self-Bayes:", time_end1-time_start1) 29 | 30 | time_start = time.time() 31 | clf = BernoulliNB() 32 | clf.fit(trainData, trainLabel) 33 | clf.predict(testData) 34 | score = clf.score(testData, testLabel, sample_weight=None) 35 | time_end = time.time() 36 | print("Accuracy of sklearn-Bayes: %f" % score) 37 | print("Runtime of sklearn-Bayes:", time_end-time_start) 38 | -------------------------------------------------------------------------------- /examples/KNN_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: KNN_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-16 5 | @ Update Date: 2019-05-28 6 | @ Description: Implement KNN 7 | """ 8 | from sklearn.neighbors import KNeighborsClassifier 9 | from KNN import * 10 | import numpy as np 11 | import pandas as pd 12 | import time 13 | 14 | trainData = pd.read_table('../dataset/dataset1/train.txt',header=None,encoding='gb2312',delim_whitespace=True) 15 | testData = pd.read_table('../dataset/dataset1/test.txt',header=None,encoding='gb2312',delim_whitespace=True) 16 | trainLabel = np.array(trainData.pop(3)) 17 | trainData = np.array(trainData) 18 | testLabel = np.array(testData.pop(3)) 19 | testData = np.array(testData) 20 | 21 | time_start1 = time.time() 22 | clf1 = KNNClassifier(k=6) 23 | clf1.train(trainData, trainLabel) 24 | clf1.predict(testData) 25 | score1 = clf1.showDetectionResult(testData, testLabel) 26 | time_end1 = time.time() 27 | print("Accuracy of self-KNN: %f" % score1) 28 | print("Runtime of self-KNN:", time_end1-time_start1) 29 | 30 | time_start = time.time() 31 | knn = KNeighborsClassifier(n_neighbors=6) 32 | knn.fit(trainData, trainLabel) 33 | knn.predict(testData) 34 | score = knn.score(testData, testLabel, sample_weight=None) 35 | time_end = time.time() 36 | print("Accuracy of sklearn-KNN: %f" % score) 37 | print("Runtime of sklearn-KNN:", time_end-time_start) 38 | -------------------------------------------------------------------------------- /examples/RandomForest_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: RandomForest_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-07-10 5 | @ Update Date: 2019-07-10 6 | @ Description: Implement RandomForest_TEST 7 | """ 8 | from RandomForest import RandomForestClassifier, RandomForestRegression 9 | import numpy as np 10 | import pandas as pd 11 | import time 12 | from DecisionTree import * 13 | 14 | trainData = pd.read_table('../dataset/dataset1/train.txt', header=None, encoding='gb2312', delim_whitespace=True) 15 | testData = pd.read_table('../dataset/dataset1/test.txt', header=None, encoding='gb2312', delim_whitespace=True) 16 | trainLabel = np.array(trainData.pop(3)) 17 | trainData = np.array(trainData) 18 | testLabel = np.array(testData.pop(3)) 19 | testData = np.array(testData) 20 | 21 | time_start1 = time.time() 22 | clf1 = DecisionTreeClassifier() 23 | clf1.train(trainData, trainLabel) 24 | clf1.predict(testData) 25 | score1 = clf1.accuarcy(testLabel) 26 | time_end1 = time.time() 27 | print("Accuracy of self-DecisionTree: %f" % score1) 28 | print("Runtime of self-DecisionTree:", time_end1-time_start1) 29 | 30 | time_start = time.time() 31 | clf = RandomForestClassifier() 32 | clf.train(trainData, trainLabel) 33 | clf.predict(testData) 34 | score = clf.accuarcy(testLabel) 35 | time_end = time.time() 36 | print("Accuracy of RandomForest: %f" % score) 37 | print("Runtime of RandomForest:", time_end-time_start) 38 | 39 | 40 | -------------------------------------------------------------------------------- /preProcess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | Function: Normalization 5 | Description: Normalize input data. For vector x, the normalization process is given by 6 | normalization(x) = (x - min(x))/(max(x) - min(x)) 7 | Input: data dataType: ndarray description: input data 8 | Output: normdata dataType: ndarray description: output data after normalization 9 | ''' 10 | 11 | def Normalization(data): 12 | # get the max and min value of each column 13 | minValue = data.min(axis=0) 14 | maxValue = data.max(axis=0) 15 | diff = maxValue - minValue 16 | # normalization 17 | mindata = np.tile(minValue, (data.shape[0], 1)) 18 | normdata = (data - mindata) / np.tile(diff, (data.shape[0], 1)) 19 | return normdata 20 | 21 | ''' 22 | Function: Standardization 23 | Description: Standardize input data. For vector x, the normalization process is given by 24 | Standardization(x) = x - mean(x)/std(x) 25 | Input: data dataType: ndarray description: input data 26 | Output: standarddata dataType: ndarray description: output data after standardization 27 | ''' 28 | 29 | def Standardization(data): 30 | # get the mean and the variance of each column 31 | meanValue = data.mean(axis=0) 32 | varValue = data.std(axis=0) 33 | standarddata = (data - np.tile(meanValue, (data.shape[0], 1))) / np.tile(varValue, (data.shape[0], 1)) 34 | return standarddata 35 | -------------------------------------------------------------------------------- /examples/Perceptron_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Perceptron_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-05 5 | @ Update Date: 2019-05-05 6 | @ Description: Test Perceptron 7 | """ 8 | 9 | from sklearn.neural_network import MLPClassifier 10 | from Perceptron import * 11 | import numpy as np 12 | import pandas as pd 13 | import time 14 | 15 | trainData = np.array(pd.read_table('../dataset3/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 16 | testData = np.array(pd.read_table('../dataset3/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 17 | trainLabel = trainData[:, -1] 18 | trainData = np.delete(trainData, -1, axis=1) 19 | testLabel = testData[:, -1] 20 | testData = np.delete(testData, -1, axis=1) 21 | 22 | time_start1 = time.time() 23 | clf1 = PerceptronClassifier() 24 | clf1.train(trainData, trainLabel) 25 | clf1.predict(testData) 26 | score1 = clf1.accuarcy(testLabel) 27 | time_end1 = time.time() 28 | print("Accuracy of self-Perceptron: %f" % score1) 29 | print("Runtime of self-Perceptron:", time_end1-time_start1) 30 | 31 | time_start = time.time() 32 | clf = MLPClassifier() 33 | clf.fit(trainData, trainLabel) 34 | clf.predict(testData) 35 | score = clf.score(testData, testLabel, sample_weight=None) 36 | time_end = time.time() 37 | print("Accuracy of Perceptron: %f" % score) 38 | print("Runtime of Perceptron:", time_end-time_start) 39 | -------------------------------------------------------------------------------- /examples/DecisionTree_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: DecisionTree_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-16 5 | @ Update Date: 2019-05-28 6 | @ Description: Implement DecisionTree_Test 7 | """ 8 | from sklearn import tree 9 | from DecisionTree import * 10 | import numpy as np 11 | import pandas as pd 12 | import time 13 | 14 | trainData = pd.read_table('../dataset/dataset1/train.txt', header=None, encoding='gb2312', delim_whitespace=True) 15 | testData = pd.read_table('../dataset/dataset1/test.txt', header=None, encoding='gb2312', delim_whitespace=True) 16 | trainLabel = np.array(trainData.pop(3)) 17 | trainData = np.array(trainData) 18 | testLabel = np.array(testData.pop(3)) 19 | testData = np.array(testData) 20 | 21 | time_start1 = time.time() 22 | clf1 = DecisionTreeClassifier() 23 | clf1.train(trainData, trainLabel) 24 | clf1.predict(testData) 25 | score1 = clf1.accuarcy(testLabel) 26 | time_end1 = time.time() 27 | print("Accuracy of self-DecisionTree: %f" % score1) 28 | print("Runtime of self-DecisionTree:", time_end1-time_start1) 29 | 30 | time_start = time.time() 31 | clf = tree.DecisionTreeClassifier() 32 | clf.fit(trainData, trainLabel) 33 | clf.predict(testData) 34 | score = clf.score(testData, testLabel, sample_weight=None) 35 | time_end = time.time() 36 | print("Accuracy of sklearn-DecisionTree: %f" % score) 37 | print("Runtime of sklearn-DecisionTree:", time_end-time_start) 38 | -------------------------------------------------------------------------------- /examples/HMM_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: HMM_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-06-12 5 | @ Update Date: 2019-06-16 6 | @ Description: Implement HMM_TEST 7 | """ 8 | 9 | from HMM import HiddenMarkovModel 10 | import numpy as np 11 | import time 12 | 13 | Q = np.array([0, 1]) # hot 0, cold 1 14 | V = np.array([0, 1, 2]) 15 | O = np.array([[2, 2, 1], [0, 0, 1], [0, 1, 2]]) 16 | I = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 0]]) 17 | test = np.array([0, 1, 2]) 18 | 19 | # # supervised learning algorithm 20 | time_start1 = time.time() 21 | clf1 = HiddenMarkovModel(Q, V) 22 | clf1.train(O, I) 23 | time_end1 = time.time() 24 | print("Supervised learning parameters:") 25 | print("Transfer probability matrix\n", clf1.A) 26 | print("Observation probability matirx\n", clf1.B) 27 | print("Initial state probability \n", clf1.Pi) 28 | print("Prediction of Supervised learning", clf1.predict(test)) 29 | print("Runtime of Supervised learning:", time_end1-time_start1) 30 | print("________________BOUNDARY_______________________________________") 31 | # unsupervised learning algorithm 32 | time_start2 = time.time() 33 | clf2 = HiddenMarkovModel(Q, V) 34 | clf2.train(O) 35 | time_end2 = time.time() 36 | print("Unsupervised learning parameters:") 37 | print("Transfer probability matrix\n", clf2.A) 38 | print("Observation probability matirx\n", clf2.B) 39 | print("Initial state probability \n", clf2.Pi) 40 | print("Prediction of Unsupervised learning", clf2.predict(test)) 41 | print("Runtime of Unsupervised learning:", time_end2-time_start2) 42 | -------------------------------------------------------------------------------- /examples/PCA_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: PCA_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-06-03 5 | @ Update Date: 2019-06-06 6 | @ Description: Implement PCA_TEST 7 | """ 8 | 9 | from DimensionReduction import PCA 10 | from sklearn.decomposition import PCA as pca 11 | import numpy as np 12 | import time 13 | from sklearn.linear_model import LogisticRegression 14 | import pandas as pd 15 | 16 | trainData = np.array(pd.read_table('./dataset/dataset1/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 17 | testData = np.array(pd.read_table('./dataset/dataset1/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 18 | train_y = trainData[:, -1] 19 | train_x = np.delete(trainData, -1, axis=1) 20 | test_y = testData[:, -1] 21 | test_x = np.delete(testData, -1, axis=1) 22 | 23 | time_start1 = time.time() 24 | clf1 = PCA() 25 | clf1.train(train_x) 26 | train_x = clf1.transformData(train_x) 27 | test_x = clf1.transformData(test_x) 28 | clf = LogisticRegression(solver='liblinear', multi_class='ovr') 29 | clf.fit(train_x, train_y) 30 | print("Accuracy of PCA:", clf.score(test_x, test_y)) 31 | time_end1 = time.time() 32 | print("Runtime of PCA:", time_end1-time_start1) 33 | 34 | time_start2 = time.time() 35 | clf2 = pca(n_components=1) 36 | train_x = clf2.fit_transform(train_x) 37 | test_x = clf2.fit_transform(test_x, test_y) 38 | clf = LogisticRegression(solver='liblinear' ,multi_class='ovr') 39 | clf.fit(train_x, train_y) 40 | print("Accuracy of sklearn PCA:", clf.score(test_x, test_y)) 41 | time_end2 = time.time() 42 | print("Runtime of sklearn PCA:", time_end2-time_start2) 43 | -------------------------------------------------------------------------------- /examples/LDA_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: LDA_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-06-04 5 | @ Update Date: 2019-06-04 6 | @ Description: Implement LDA_TEST 7 | """ 8 | 9 | from sklearn.model_selection import train_test_split 10 | from DimensionReduction import LDA 11 | import numpy as np 12 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 13 | import time 14 | from sklearn.linear_model import LogisticRegression 15 | import pandas as pd 16 | 17 | trainData = np.array(pd.read_table('./dataset/dataset1/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 18 | testData = np.array(pd.read_table('./dataset/dataset1/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 19 | train_y = trainData[:, -1] 20 | train_x = np.delete(trainData, -1, axis=1) 21 | test_y = testData[:, -1] 22 | test_x = np.delete(testData, -1, axis=1) 23 | 24 | time_start1 = time.time() 25 | clf1 = LDA() 26 | clf1.train(train_x, train_y) 27 | train_x = clf1.transformData(train_x) 28 | test_x = clf1.transformData(test_x) 29 | clf = LogisticRegression() 30 | clf.fit(train_x, train_y) 31 | print("Accuracy of LDA:", clf.score(test_x, test_y)) 32 | time_end1 = time.time() 33 | print("Runtime of LDA:", time_end1-time_start1) 34 | 35 | 36 | time_start2 = time.time() 37 | clf2 = LinearDiscriminantAnalysis(n_components=1) 38 | train_x = clf2.fit_transform(train_x, train_y) 39 | test_x = clf2.fit_transform(test_x, test_y) 40 | clf = LogisticRegression() 41 | clf.fit(train_x, train_y) 42 | print("Accuracy of sklearn LDA:", clf.score(test_x, test_y)) 43 | time_end2 = time.time() 44 | print("Runtime of sklearn LDA:", time_end2-time_start2) 45 | -------------------------------------------------------------------------------- /examples/LinearRegression_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: LinearRegression_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-10 5 | @ Update Date: 2019-05-10 6 | @ Description: Test LinearRegression 7 | """ 8 | 9 | 10 | from LinearRegression import * 11 | from sklearn import linear_model 12 | import numpy as np 13 | import pandas as pd 14 | import time 15 | 16 | def plot(real_label, regression_label): 17 | # test_label = np.expand_dims(test_label, axis=1) 18 | plot1 = plt.plot(regression_label, 'r*', label='Regression values') 19 | plot2 = plt.plot(real_label, 'b', label='Real values') 20 | plt.xlabel('X ') 21 | plt.ylabel('Y') 22 | plt.legend(loc=3) 23 | plt.title('Regression') 24 | plt.show() 25 | 26 | trainData = np.array(pd.read_table('../dataset4/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 27 | testData = np.array(pd.read_table('../dataset4/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 28 | trainLabel = trainData[:, -1] 29 | trainData = np.delete(trainData, -1, axis=1) 30 | testLabel = testData[:, -1] 31 | testData = np.delete(testData, -1, axis=1) 32 | 33 | time_start1 = time.time() 34 | clf1 = linear_model.LinearRegression() 35 | clf1.fit(trainData, trainLabel) 36 | regression_label = clf1.predict(testData) 37 | time_end1 = time.time() 38 | plot(testLabel, regression_label) 39 | print("Runtime of Sklearn-linear regression:", time_end1-time_start1) 40 | 41 | time_start2 = time.time() 42 | clf2 = Regression() 43 | clf2.train(trainData, trainLabel) 44 | regression_label2 = clf2.predict(testData) 45 | time_end2 = time.time() 46 | plot(testLabel,regression_label2) 47 | print("Runtime of self-linear regression:", time_end2-time_start2) 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /examples/TreeRegression_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: TreeRegression_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-13 5 | @ Update Date: 2019-05-15 6 | @ Description: Implement TreeRegression_TEST 7 | """ 8 | 9 | import matplotlib.pyplot as plt 10 | from sklearn import linear_model 11 | 12 | from TreeRegression import * 13 | from sklearn.tree import DecisionTreeRegressor 14 | import numpy as np 15 | import pandas as pd 16 | import time 17 | 18 | def plot(real_label, regression_label): 19 | # test_label = np.expand_dims(test_label, axis=1) 20 | plot1 = plt.plot(regression_label, 'r*', label='Regression values') 21 | plot2 = plt.plot(real_label, 'b', label='Real values') 22 | plt.xlabel('X ') 23 | plt.ylabel('Y') 24 | plt.legend(loc=3) 25 | plt.title('Tree Regression') 26 | plt.show() 27 | 28 | 29 | trainData = np.array(pd.read_table('../dataset5/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 30 | testData = np.array(pd.read_table('../dataset5/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 31 | trainLabel = trainData[:, -1] 32 | trainData = np.delete(trainData, -1, axis=1) 33 | testLabel = testData[:, -1] 34 | testData = np.delete(testData, -1, axis=1) 35 | 36 | 37 | time_start1 = time.time() 38 | clf1 = DecisionTreeRegressor() 39 | clf1.fit(trainData, trainLabel) 40 | regression_label = clf1.predict(testData) 41 | time_end1 = time.time() 42 | plot(testLabel, regression_label) 43 | print("Runtime of Sklearn-tree regression:", time_end1-time_start1) 44 | 45 | time_start2 = time.time() 46 | clf2 = treeRegression() 47 | clf2.train(trainData, trainLabel) 48 | regression_label2 = clf2.predict(testData) 49 | time_end2 = time.time() 50 | plot(testLabel,regression_label2 ) 51 | print("Runtime of self-tree regression:", time_end2-time_start2) 52 | -------------------------------------------------------------------------------- /examples/Stacking_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Stacking_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-05 5 | @ Update Date: 2019-05-05 6 | @ Description: Test Stacking 7 | """ 8 | 9 | from Stacking import * 10 | from Perceptron import * 11 | from Logistic import * 12 | import numpy as np 13 | import pandas as pd 14 | import time 15 | 16 | trainData = np.array(pd.read_table('../dataset3/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 17 | testData = np.array(pd.read_table('../dataset3/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 18 | trainLabel = trainData[:, -1] 19 | trainData = np.delete(trainData, -1, axis=1) 20 | testLabel = testData[:, -1] 21 | testData = np.delete(testData, -1, axis=1) 22 | 23 | clfs = [PerceptronClassifier(), PerceptronClassifier(), LogisticRegressionClassifier(), LogisticRegressionClassifier()] 24 | 25 | time_start1 = time.time() 26 | clf1 = StackingClassifier(classifier_set=clfs) 27 | clf1.train(trainData, trainLabel) 28 | clf1.predict(testData) 29 | score1 = clf1.accuarcy(testLabel) 30 | time_end1 = time.time() 31 | print("Accuracy of self-Stacking: %f" % score1) 32 | print("Runtime of self-Stacking:", time_end1-time_start1) 33 | 34 | time_start2 = time.time() 35 | clf2 = LogisticRegressionClassifier() 36 | clf2.train(trainData, trainLabel) 37 | clf2.predict(testData) 38 | score2 = clf2.accuarcy(testLabel) 39 | time_end2 = time.time() 40 | print("Accuracy of self-Logistic: %f" % score2) 41 | print("Runtime of self-Logistic:", time_end2-time_start2) 42 | 43 | time_start3 = time.time() 44 | clf3 = PerceptronClassifier() 45 | clf3.train(trainData, trainLabel) 46 | clf3.predict(testData) 47 | score3 = clf3.accuarcy(testLabel) 48 | time_end3 = time.time() 49 | print("Accuracy of self-Perceptron: %f" % score3) 50 | print("Runtime of self-Perceptron:", time_end3-time_start3) 51 | 52 | 53 | -------------------------------------------------------------------------------- /examples/Blending_TEST.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Blending_TEST.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-04 5 | @ Update Date: 2019-05-04 6 | @ Description: Test Blending 7 | """ 8 | 9 | from Blending import * 10 | from Perceptron import * 11 | from Logistic import * 12 | import numpy as np 13 | import pandas as pd 14 | import time 15 | 16 | trainData = np.array(pd.read_table('../dataset/dataset3/train.txt', header=None, encoding='gb2312', delim_whitespace=True)) 17 | testData = np.array(pd.read_table('../dataset/dataset3/test.txt', header=None, encoding='gb2312', delim_whitespace=True)) 18 | trainLabel = trainData[:, -1] 19 | trainData = np.delete(trainData, -1, axis=1) 20 | testLabel = testData[:, -1] 21 | testData = np.delete(testData, -1, axis=1) 22 | 23 | clfs = [PerceptronClassifier(), PerceptronClassifier(), LogisticRegressionClassifier(), LogisticRegressionClassifier()] 24 | 25 | time_start1 = time.time() 26 | clf1 = BlendingClassifier(classifier_set=clfs) 27 | clf1.train(trainData, trainLabel) 28 | clf1.predict(testData) 29 | score1 = clf1.accuarcy(testLabel) 30 | time_end1 = time.time() 31 | print("Accuracy of self-Blending: %f" % score1) 32 | print("Runtime of self-Blending:", time_end1-time_start1) 33 | 34 | time_start2 = time.time() 35 | clf2 = LogisticRegressionClassifier() 36 | clf2.train(trainData, trainLabel) 37 | clf2.predict(testData) 38 | score2 = clf2.accuarcy(testLabel) 39 | time_end2 = time.time() 40 | print("Accuracy of self-Logistic: %f" % score2) 41 | print("Runtime of self-Logistic:", time_end2-time_start2) 42 | 43 | time_start3 = time.time() 44 | clf3 = PerceptronClassifier() 45 | clf3.train(trainData, trainLabel) 46 | clf3.predict(testData) 47 | score3 = clf3.accuarcy(testLabel) 48 | time_end3 = time.time() 49 | print("Accuracy of self-Perceptron: %f" % score3) 50 | print("Runtime of self-Perceptron:", time_end3-time_start3) 51 | 52 | 53 | -------------------------------------------------------------------------------- /dataset/dataset2/train.txt: -------------------------------------------------------------------------------- 1 | -0.214824 0.662756 -1.000000 2 | -0.061569 -0.091875 1.000000 3 | 0.406933 0.648055 -1.000000 4 | 0.223650 0.130142 1.000000 5 | 0.231317 0.766906 -1.000000 6 | -0.748800 -0.531637 -1.000000 7 | -0.557789 0.375797 -1.000000 8 | 0.207123 -0.019463 1.000000 9 | 0.286462 0.719470 -1.000000 10 | 0.195300 -0.179039 1.000000 11 | -0.152696 -0.153030 1.000000 12 | 0.384471 0.653336 -1.000000 13 | -0.117280 -0.153217 1.000000 14 | -0.238076 0.000583 1.000000 15 | -0.413576 0.145681 1.000000 16 | 0.490767 -0.680029 -1.000000 17 | 0.199894 -0.199381 1.000000 18 | -0.356048 0.537960 -1.000000 19 | -0.392868 -0.125261 1.000000 20 | 0.353588 -0.070617 1.000000 21 | 0.020984 0.925720 -1.000000 22 | -0.475167 -0.346247 -1.000000 23 | 0.074952 0.042783 1.000000 24 | 0.394164 -0.058217 1.000000 25 | 0.663418 0.436525 -1.000000 26 | 0.402158 0.577744 -1.000000 27 | -0.449349 -0.038074 1.000000 28 | 0.619080 -0.088188 -1.000000 29 | 0.268066 -0.071621 1.000000 30 | -0.015165 0.359326 1.000000 31 | 0.539368 -0.374972 -1.000000 32 | -0.319153 0.629673 -1.000000 33 | 0.694424 0.641180 -1.000000 34 | 0.079522 0.193198 1.000000 35 | 0.253289 -0.285861 1.000000 36 | -0.035558 -0.010086 1.000000 37 | -0.403483 0.474466 -1.000000 38 | -0.034312 0.995685 -1.000000 39 | -0.590657 0.438051 -1.000000 40 | -0.098871 -0.023953 1.000000 41 | -0.250001 0.141621 1.000000 42 | -0.012998 0.525985 -1.000000 43 | 0.153738 0.491531 -1.000000 44 | 0.388215 -0.656567 -1.000000 45 | 0.049008 0.013499 1.000000 46 | 0.068286 0.392741 1.000000 47 | 0.747800 -0.066630 -1.000000 48 | 0.004621 -0.042932 1.000000 49 | -0.701600 0.190983 -1.000000 50 | 0.055413 -0.024380 1.000000 51 | 0.035398 -0.333682 1.000000 52 | 0.211795 0.024689 1.000000 53 | -0.045677 0.172907 1.000000 54 | 0.595222 0.209570 -1.000000 55 | 0.229465 0.250409 1.000000 56 | -0.089293 0.068198 1.000000 57 | 0.384300 -0.176570 1.000000 58 | 0.834912 -0.110321 -1.000000 59 | -0.307768 0.503038 -1.000000 60 | -0.777063 -0.348066 -1.000000 61 | 0.017390 0.152441 1.000000 62 | -0.293382 -0.139778 1.000000 63 | -0.203272 0.286855 1.000000 64 | 0.957812 -0.152444 -1.000000 65 | 0.004609 -0.070617 1.000000 66 | -0.755431 0.096711 -1.000000 67 | -0.526487 0.547282 -1.000000 68 | -0.246873 0.833713 -1.000000 69 | 0.185639 -0.066162 1.000000 70 | 0.851934 0.456603 -1.000000 71 | -0.827912 0.117122 -1.000000 72 | 0.233512 -0.106274 1.000000 73 | 0.583671 -0.709033 -1.000000 74 | -0.487023 0.625140 -1.000000 75 | -0.448939 0.176725 1.000000 76 | 0.155907 -0.166371 1.000000 77 | 0.334204 0.381237 -1.000000 78 | 0.081536 -0.106212 1.000000 79 | 0.227222 0.527437 -1.000000 80 | 0.759290 0.330720 -1.000000 81 | 0.204177 -0.023516 1.000000 82 | 0.577939 0.403784 -1.000000 83 | -0.568534 0.442948 -1.000000 84 | -0.011520 0.021165 1.000000 85 | 0.875720 0.422476 -1.000000 86 | 0.297885 -0.632874 -1.000000 87 | -0.015821 0.031226 1.000000 88 | 0.541359 -0.205969 -1.000000 89 | -0.689946 -0.508674 -1.000000 90 | -0.343049 0.841653 -1.000000 91 | 0.523902 -0.436156 -1.000000 92 | 0.249281 -0.711840 -1.000000 93 | 0.193449 0.574598 -1.000000 94 | -0.257542 -0.753885 -1.000000 95 | -0.021605 0.158080 1.000000 96 | 0.601559 -0.727041 -1.000000 97 | -0.791603 0.095651 -1.000000 98 | -0.908298 -0.053376 -1.000000 99 | 0.122020 0.850966 -1.000000 100 | -0.725568 -0.292022 -1.000000 101 | -------------------------------------------------------------------------------- /dataset/dataset2/test.txt: -------------------------------------------------------------------------------- 1 | 0.676771 -0.486687 -1.000000 2 | 0.008473 0.186070 1.000000 3 | -0.727789 0.594062 -1.000000 4 | 0.112367 0.287852 1.000000 5 | 0.383633 -0.038068 1.000000 6 | -0.927138 -0.032633 -1.000000 7 | -0.842803 -0.423115 -1.000000 8 | -0.003677 -0.367338 1.000000 9 | 0.443211 -0.698469 -1.000000 10 | -0.473835 0.005233 1.000000 11 | 0.616741 0.590841 -1.000000 12 | 0.557463 -0.373461 -1.000000 13 | -0.498535 -0.223231 -1.000000 14 | -0.246744 0.276413 1.000000 15 | -0.761980 -0.244188 -1.000000 16 | 0.641594 -0.479861 -1.000000 17 | -0.659140 0.529830 -1.000000 18 | -0.054873 -0.238900 1.000000 19 | -0.089644 -0.244683 1.000000 20 | -0.431576 -0.481538 -1.000000 21 | -0.099535 0.728679 -1.000000 22 | -0.188428 0.156443 1.000000 23 | 0.267051 0.318101 1.000000 24 | 0.222114 -0.528887 -1.000000 25 | 0.030369 0.113317 1.000000 26 | 0.392321 0.026089 1.000000 27 | 0.298871 -0.915427 -1.000000 28 | -0.034581 -0.133887 1.000000 29 | 0.405956 0.206980 1.000000 30 | 0.144902 -0.605762 -1.000000 31 | 0.274362 -0.401338 1.000000 32 | 0.397998 -0.780144 -1.000000 33 | 0.037863 0.155137 1.000000 34 | -0.010363 -0.004170 1.000000 35 | 0.506519 0.486619 -1.000000 36 | 0.000082 -0.020625 1.000000 37 | 0.057761 -0.155140 1.000000 38 | 0.027748 -0.553763 -1.000000 39 | -0.413363 -0.746830 -1.000000 40 | 0.081500 -0.014264 1.000000 41 | 0.047137 -0.491271 1.000000 42 | -0.267459 0.024770 1.000000 43 | -0.148288 -0.532471 -1.000000 44 | -0.225559 -0.201622 1.000000 45 | 0.772360 -0.518986 -1.000000 46 | -0.440670 0.688739 -1.000000 47 | 0.329064 -0.095349 1.000000 48 | 0.970170 -0.010671 -1.000000 49 | -0.689447 -0.318722 -1.000000 50 | -0.465493 -0.227468 -1.000000 51 | -0.049370 0.405711 1.000000 52 | -0.166117 0.274807 1.000000 53 | 0.054483 0.012643 1.000000 54 | 0.021389 0.076125 1.000000 55 | -0.104404 -0.914042 -1.000000 56 | 0.294487 0.440886 -1.000000 57 | 0.107915 -0.493703 -1.000000 58 | 0.076311 0.438860 1.000000 59 | 0.370593 -0.728737 -1.000000 60 | 0.409890 0.306851 -1.000000 61 | 0.285445 0.474399 -1.000000 62 | -0.870134 -0.161685 -1.000000 63 | -0.654144 -0.675129 -1.000000 64 | 0.285278 -0.767310 -1.000000 65 | 0.049548 -0.000907 1.000000 66 | 0.030014 -0.093265 1.000000 67 | -0.128859 0.278865 1.000000 68 | 0.307463 0.085667 1.000000 69 | 0.023440 0.298638 1.000000 70 | 0.053920 0.235344 1.000000 71 | 0.059675 0.533339 -1.000000 72 | 0.817125 0.016536 -1.000000 73 | -0.108771 0.477254 1.000000 74 | -0.118106 0.017284 1.000000 75 | 0.288339 0.195457 1.000000 76 | 0.567309 -0.200203 -1.000000 77 | -0.202446 0.409387 1.000000 78 | -0.330769 -0.240797 1.000000 79 | -0.422377 0.480683 -1.000000 80 | -0.295269 0.326017 1.000000 81 | 0.261132 0.046478 1.000000 82 | -0.492244 -0.319998 -1.000000 83 | -0.384419 0.099170 1.000000 84 | 0.101882 -0.781145 -1.000000 85 | 0.234592 -0.383446 1.000000 86 | -0.020478 -0.901833 -1.000000 87 | 0.328449 0.186633 1.000000 88 | -0.150059 -0.409158 1.000000 89 | -0.155876 -0.843413 -1.000000 90 | -0.098134 -0.136786 1.000000 91 | 0.110575 -0.197205 1.000000 92 | 0.219021 0.054347 1.000000 93 | 0.030152 0.251682 1.000000 94 | 0.033447 -0.122824 1.000000 95 | -0.686225 -0.020779 -1.000000 96 | -0.911211 -0.262011 -1.000000 97 | 0.572557 0.377526 -1.000000 98 | -0.073647 -0.519163 -1.000000 99 | -0.281830 -0.797236 -1.000000 100 | -0.555263 0.126232 -1.000000 101 | -------------------------------------------------------------------------------- /dataset/dataset3/test.txt: -------------------------------------------------------------------------------- 1 | 2 1 38.50 54 20 0 1 2 2 3 4 1 2 2 5.90 0 2 42.00 6.30 0 0 1 2 | 2 1 37.60 48 36 0 0 1 1 0 3 0 0 0 0 0 0 44.00 6.30 1 5.00 1 3 | 1 1 37.7 44 28 0 4 3 2 5 4 4 1 1 0 3 5 45 70 3 2 1 4 | 1 1 37 56 24 3 1 4 2 4 4 3 1 1 0 0 0 35 61 3 2 0 5 | 2 1 38.00 42 12 3 0 3 1 1 0 1 0 0 0 0 2 37.00 5.80 0 0 1 6 | 1 1 0 60 40 3 0 1 1 0 4 0 3 2 0 0 5 42 72 0 0 1 7 | 2 1 38.40 80 60 3 2 2 1 3 2 1 2 2 0 1 1 54.00 6.90 0 0 1 8 | 2 1 37.80 48 12 2 1 2 1 3 0 1 2 0 0 2 0 48.00 7.30 1 0 1 9 | 2 1 37.90 45 36 3 3 3 2 2 3 1 2 1 0 3 0 33.00 5.70 3 0 1 10 | 2 1 39.00 84 12 3 1 5 1 2 4 2 1 2 7.00 0 4 62.00 5.90 2 2.20 0 11 | 2 1 38.20 60 24 3 1 3 2 3 3 2 3 3 0 4 4 53.00 7.50 2 1.40 1 12 | 1 1 0 140 0 0 0 4 2 5 4 4 1 1 0 0 5 30 69 0 0 0 13 | 1 1 37.90 120 60 3 3 3 1 5 4 4 2 2 7.50 4 5 52.00 6.60 3 1.80 0 14 | 2 1 38.00 72 36 1 1 3 1 3 0 2 2 1 0 3 5 38.00 6.80 2 2.00 1 15 | 2 9 38.00 92 28 1 1 2 1 1 3 2 3 0 7.20 0 0 37.00 6.10 1 1.10 1 16 | 1 1 38.30 66 30 2 3 1 1 2 4 3 3 2 8.50 4 5 37.00 6.00 0 0 1 17 | 2 1 37.50 48 24 3 1 1 1 2 1 0 1 1 0 3 2 43.00 6.00 1 2.80 1 18 | 1 1 37.50 88 20 2 3 3 1 4 3 3 0 0 0 0 0 35.00 6.40 1 0 0 19 | 2 9 0 150 60 4 4 4 2 5 4 4 0 0 0 0 0 0 0 0 0 0 20 | 1 1 39.7 100 30 0 0 6 2 4 4 3 1 0 0 4 5 65 75 0 0 0 21 | 1 1 38.30 80 0 3 3 4 2 5 4 3 2 1 0 4 4 45.00 7.50 2 4.60 1 22 | 2 1 37.50 40 32 3 1 3 1 3 2 3 2 1 0 0 5 32.00 6.40 1 1.10 1 23 | 1 1 38.40 84 30 3 1 5 2 4 3 3 2 3 6.50 4 4 47.00 7.50 3 0 0 24 | 1 1 38.10 84 44 4 0 4 2 5 3 1 1 3 5.00 0 4 60.00 6.80 0 5.70 0 25 | 2 1 38.70 52 0 1 1 1 1 1 3 1 0 0 0 1 3 4.00 74.00 0 0 1 26 | 2 1 38.10 44 40 2 1 3 1 3 3 1 0 0 0 1 3 35.00 6.80 0 0 1 27 | 2 1 38.4 52 20 2 1 3 1 1 3 2 2 1 0 3 5 41 63 1 1 1 28 | 1 1 38.20 60 0 1 0 3 1 2 1 1 1 1 0 4 4 43.00 6.20 2 3.90 1 29 | 2 1 37.70 40 18 1 1 1 0 3 2 1 1 1 0 3 3 36.00 3.50 0 0 1 30 | 1 1 39.1 60 10 0 1 1 0 2 3 0 0 0 0 4 4 0 0 0 0 1 31 | 2 1 37.80 48 16 1 1 1 1 0 1 1 2 1 0 4 3 43.00 7.50 0 0 1 32 | 1 1 39.00 120 0 4 3 5 2 2 4 3 2 3 8.00 0 0 65.00 8.20 3 4.60 1 33 | 1 1 38.20 76 0 2 3 2 1 5 3 3 1 2 6.00 1 5 35.00 6.50 2 0.90 1 34 | 2 1 38.30 88 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 | 1 1 38.00 80 30 3 3 3 1 0 0 0 0 0 6.00 0 0 48.00 8.30 0 4.30 1 36 | 1 1 0 0 0 3 1 1 1 2 3 3 1 3 6.00 4 4 0 0 2 0 0 37 | 1 1 37.60 40 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 2 2.10 1 38 | 2 1 37.50 44 0 1 1 1 1 3 3 2 0 0 0 0 0 45.00 5.80 2 1.40 1 39 | 2 1 38.2 42 16 1 1 3 1 1 3 1 0 0 0 1 0 35 60 1 1 1 40 | 2 1 38 56 44 3 3 3 0 0 1 1 2 1 0 4 0 47 70 2 1 1 41 | 2 1 38.30 45 20 3 3 2 2 2 4 1 2 0 0 4 0 0 0 0 0 1 42 | 1 1 0 48 96 1 1 3 1 0 4 1 2 1 0 1 4 42.00 8.00 1 0 1 43 | 1 1 37.70 55 28 2 1 2 1 2 3 3 0 3 5.00 4 5 0 0 0 0 1 44 | 2 1 36.00 100 20 4 3 6 2 2 4 3 1 1 0 4 5 74.00 5.70 2 2.50 0 45 | 1 1 37.10 60 20 2 0 4 1 3 0 3 0 2 5.00 3 4 64.00 8.50 2 0 1 46 | 2 1 37.10 114 40 3 0 3 2 2 2 1 0 0 0 0 3 32.00 0 3 6.50 1 47 | 1 1 38.1 72 30 3 3 3 1 4 4 3 2 1 0 3 5 37 56 3 1 1 48 | 1 1 37.00 44 12 3 1 1 2 1 1 1 0 0 0 4 2 40.00 6.70 3 8.00 1 49 | 1 1 38.6 48 20 3 1 1 1 4 3 1 0 0 0 3 0 37 75 0 0 1 50 | 1 1 0 82 72 3 1 4 1 2 3 3 0 3 0 4 4 53 65 3 2 0 51 | 1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0 52 | 2 1 37.8 60 16 1 1 3 1 2 3 2 1 2 0 3 0 41 73 0 0 0 53 | 1 1 38.7 34 30 2 0 3 1 2 3 0 0 0 0 0 0 33 69 0 2 0 54 | 1 1 0 36 12 1 1 1 1 1 2 1 1 1 0 1 5 44.00 0 0 0 1 55 | 2 1 38.30 44 60 0 0 1 1 0 0 0 0 0 0 0 0 6.40 36.00 0 0 1 56 | 2 1 37.40 54 18 3 0 1 1 3 4 3 2 2 0 4 5 30.00 7.10 2 0 1 57 | 1 1 0 0 0 4 3 0 2 2 4 1 0 0 0 0 0 54 76 3 2 1 58 | 1 1 36.6 48 16 3 1 3 1 4 1 1 1 1 0 0 0 27 56 0 0 0 59 | 1 1 38.5 90 0 1 1 3 1 3 3 3 2 3 2 4 5 47 79 0 0 1 60 | 1 1 0 75 12 1 1 4 1 5 3 3 0 3 5.80 0 0 58.00 8.50 1 0 1 61 | 2 1 38.20 42 0 3 1 1 1 1 1 2 2 1 0 3 2 35.00 5.90 2 0 1 62 | 1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0 63 | 2 1 38.60 60 30 1 1 3 1 4 2 2 1 1 0 0 0 40.00 6.00 1 0 1 64 | 2 1 37.80 42 40 1 1 1 1 1 3 1 0 0 0 3 3 36.00 6.20 0 0 1 65 | 1 1 38 60 12 1 1 2 1 2 1 1 1 1 0 1 4 44 65 3 2 0 66 | 2 1 38.00 42 12 3 0 3 1 1 1 1 0 0 0 0 1 37.00 5.80 0 0 1 67 | 2 1 37.60 88 36 3 1 1 1 3 3 2 1 3 1.50 0 0 44.00 6.00 0 0 0 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MachineLearning 2 | [![GPL-3.0 Licensed](https://img.shields.io/crates/l/rustc-serialize)](https://opensource.org/licenses/GPL-3.0) [![Python Version](https://img.shields.io/badge/Python-3.x-blue.svg)](https://www.python.org/) 3 | Machine learning algorithms implemented by myself with Python 3.6 4 | 5 | ## What's in it? 6 | + **Classification** 7 | 1. [AdaBoost](https://github.com/DandelionLau/MachineLearning/tree/master/AdaBoost.py) 8 | 2. [Blending](https://github.com/DandelionLau/MachineLearning/blob/master/Blending.py) 9 | 3. [DecisionTree](https://github.com/DandelionLau/MachineLearning/blob/master/Tree.py) 10 | 4. [GBDT](https://github.com/DandelionLau/MachineLearning/blob/master/GradientBoostingDecisionTree.py) 11 | 5. [KNN](https://github.com/DandelionLau/MachineLearning/blob/master/KNN.py) 12 | 6. [LogisticRegression](https://github.com/DandelionLau/MachineLearning/blob/master/LogisticRegression.py) 13 | 7. [NaiveBayes](https://github.com/DandelionLau/MachineLearning/blob/master/NaiveBayes.py) 14 | 8. [Perceptron](https://github.com/DandelionLau/MachineLearning/blob/master/Perceptron.py) 15 | 9. [RandomForest](https://github.com/DandelionLau/MachineLearning/blob/master/RandomForest.py) 16 | 10. [Stacking](https://github.com/DandelionLau/MachineLearning/blob/master/Stacking.py) 17 | 11. [SVM](https://github.com/DandelionLau/MachineLearning/blob/master/SVM.py) 18 | 19 | + **Regression** 20 | 1. [GBDT](https://github.com/DandelionLau/MachineLearning/blob/master/GradientBoostingDecisionTree.py) 21 | 2. [LinearRegression](https://github.com/DandelionLau/MachineLearning/blob/master/LinearRegression.py) 22 | 3. [LocallyWeightedLinearRegression](https://github.com/DandelionLau/MachineLearning/blob/master/LinearRegression.py) 23 | 4. [LassoRegression](https://github.com/DandelionLau/MachineLearning/blob/master/LinearRegression.py) 24 | 5. [RandomForest](https://github.com/DandelionLau/MachineLearning/blob/master/RandomForest.py) 25 | 6. [RidgeRegression](https://github.com/DandelionLau/MachineLearning/blob/master/LinearRegression.py) 26 | 7. [StepWiseRegression](https://github.com/DandelionLau/MachineLearning/blob/master/LinearRegression.py) 27 | 8. [TreeRegression](https://github.com/DandelionLau/MachineLearning/blob/master/Tree.py) 28 | 29 | + **Cluster** 30 | 1. [BiKmeans](https://github.com/DandelionLau/MachineLearning/blob/master/Cluster.py) 31 | 2. [DBSCAN](https://github.com/DandelionLau/MachineLearning/blob/master/Cluster.py) 32 | 3. [KMeans](https://github.com/DandelionLau/MachineLearning/blob/master/Cluster.py) 33 | 4. [KMeans++](https://github.com/DandelionLau/MachineLearning/blob/master/Cluster.py) 34 | 5. [GMM](https://github.com/Ryuk17/MachineLearning/blob/master/GMM.py) 35 | 36 | + **Association Analysis** 37 | 1. [Apriori](https://github.com/DandelionLau/MachineLearning/blob/master/AssociationAnalysis.py) 38 | 2. [Eclat](https://github.com/DandelionLau/MachineLearning/blob/master/AssociationAnalysis.py) 39 | 3. [FP-growth](https://github.com/DandelionLau/MachineLearning/blob/master/AssociationAnalysis.py) 40 | 41 | + **Dimensionality Reduction** 42 | 1. [LDA](https://github.com/DandelionLau/MachineLearning/blob/master/DimensionReduction.py) 43 | 2. [PCA](https://github.com/DandelionLau/MachineLearning/blob/master/DimensionReduction.py) 44 | 45 | + **Others** 46 | 1. [HMM](https://github.com/DandelionLau/MachineLearning/blob/master/HMM.py) 47 | 48 | 49 | ## Tutorials 50 | 中文教程: [从零实现机器学习算法](https://blog.csdn.net/sinat_35821976/category_9276758.html) 51 | English Turorials: [Step-by-Step Guide To Implement Machine Learning](https://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=14354398) 52 | 53 | ## Main References 54 | 1. [CS229:Machine Learning](http://cs229.stanford.edu/) 55 | 2. [Machine Learning IN ACTION](https://www.manning.com/books/machine-learning-in-action) 56 | 3. [统计学习方法](https://baike.baidu.com/item/%E7%BB%9F%E8%AE%A1%E5%AD%A6%E4%B9%A0%E6%96%B9%E6%B3%95/10430179) 57 | 58 | ## Dependences 59 | 1. Install [Python 3.6](https://www.python.org/) 60 | 2. Install [NumPy](http://www.numpy.org/) 61 | 2. Install [Scikit-learn](https://scikit-learn.org/) 62 | -------------------------------------------------------------------------------- /GradientBoostingDecisionTree.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: GradientBoostingDecisionTree.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-07-09 5 | @ Update Date: 2019-07-10 6 | @ Description: Implement GradientBoostingDecisionTree 7 | """ 8 | 9 | import numpy as np 10 | from TreeRegression import RegressionTree 11 | import pickle 12 | 13 | class GBDTClassifier: 14 | def __init__(self, tree_num=10): 15 | self.tree_num = tree_num 16 | 17 | 18 | 19 | 20 | class GBDTRegression: 21 | def __init__(self, tree_num=10, error_threshold=1, N=4, alpha=0.01, iterations=100): 22 | self.tree_num = tree_num 23 | self.error_threshold = error_threshold 24 | self.N = N 25 | self.alpha = alpha 26 | self.trees = [] 27 | self.gamma = [] # multiplier for each model 28 | self.residual = None 29 | self.iterations = iterations # iterations for gamma 30 | self.last_prediction = None 31 | self.prediction = None 32 | 33 | ''' 34 | Function: initializeModel 35 | Description: initialize the model 36 | Input: train_label dataType: ndarray description: train_label 37 | ''' 38 | def initializeModel(self, train_label): 39 | x = np.mean(train_label) 40 | for i in range(self.iterations): 41 | error = train_label - x 42 | x = x - self.alpha * error 43 | self.residual = train_label - x 44 | self.last_prediction = x 45 | self.trees.append(x) 46 | self.gamma.append(1) 47 | 48 | ''' 49 | Function: getGamma 50 | Description: get gamma 51 | Input: train_data dataType: ndarray description: features 52 | prediction dataType: ndarray description: prediction 53 | ''' 54 | def getGamma(self, train_label, last_prediction, current_prediction): 55 | gamma = np.mean(train_label) 56 | for i in range(self.iterations): 57 | error = train_label - last_prediction - gamma * current_prediction 58 | gamma = gamma - self.alpha*error 59 | self.residual = train_label - last_prediction - gamma * current_prediction 60 | self.last_prediction = last_prediction + gamma * current_prediction 61 | self.gamma.append(gamma) 62 | 63 | ''' 64 | Function: train 65 | Description: train the model 66 | Input: train_data dataType: ndarray description: features 67 | train_label dataType: ndarray description: labels 68 | Output: self dataType: obj description: the trained model 69 | ''' 70 | def train(self, train_data, train_label): 71 | # initialize 72 | self.initializeModel(train_label) 73 | 74 | # train 75 | for i in range(self.tree_num): 76 | clf = RegressionTree(self.error_threshold, self.N, self.alpha) 77 | clf.train(train_data, self.residual) 78 | prediction = clf.predict(train_data) 79 | self.trees.append(clf) 80 | self.getGamma(train_label, self.last_prediction, prediction) 81 | return self 82 | 83 | ''' 84 | Function: predict 85 | Description: predict the testing set 86 | Input: test_data dataType: ndarray description: features 87 | Output: prediction dataType: ndarray description: the prediction results for testing set 88 | ''' 89 | def perdict(self, test_data): 90 | prediction = np.zeros(len(test_data)) 91 | for i in range(self.tree_num): 92 | if i == 0: 93 | prediction += self.gamma * self.trees[i] 94 | else: 95 | clf_prediction = self.trees[i].predict(test_data) 96 | prediction += self.gamma * clf_prediction 97 | 98 | self.prediction = prediction 99 | return prediction 100 | 101 | ''' 102 | Function: save 103 | Description: save the model as pkl 104 | Input: filename dataType: str description: the path to save model 105 | ''' 106 | 107 | def save(self, filename): 108 | f = open(filename, 'w') 109 | model = {'trees':self.trees, 'gamma': self.gamma} 110 | pickle.dump(model, f) 111 | f.close() 112 | 113 | ''' 114 | Function: load 115 | Description: load the model 116 | Input: filename dataType: str description: the path to save model 117 | Output: self dataType: obj description: the trained model 118 | ''' 119 | 120 | def load(self, filename): 121 | f = open(filename) 122 | model = pickle.load(f) 123 | self.trees = model['trees'] 124 | self.gamma = model['gamma'] 125 | return self 126 | -------------------------------------------------------------------------------- /GMM.py: -------------------------------------------------------------------------------- 1 | """ 2 | @FileName: GMM.py 3 | @Description: Implement GMM 4 | @Author: Ryuk 5 | @CreateDate: 2021/05/30 6 | @LastEditTime: 2021/05/30 7 | @LastEditors: Please set LastEditors 8 | @Version: v0.1 9 | """ 10 | 11 | import numpy as np 12 | import pickle 13 | import preProcess 14 | from tqdm import tqdm 15 | from scipy.stats import multivariate_normal 16 | import matplotlib.pyplot as plt 17 | 18 | class GaussianMixtureModel: 19 | def __init__(self, K, D=2, iterations=100, norm_type="Normalization"): 20 | self.norm_type = norm_type 21 | self.iterations = iterations 22 | self.K = K 23 | self.D = D 24 | self.N = 0 25 | self.alpha = np.random.dirichlet(np.ones(self.K)) 26 | self.mu = np.random.rand(K, D) 27 | self.sigma = np.array([np.eye(self.D)] * K) 28 | self.gamma = None 29 | self.label = None 30 | 31 | ''' 32 | Function: GaussianPDF 33 | Description: generate gaussian distribution with given mu, sigma and x 34 | Input: mu dataType: ndarray description: features 35 | Input: sigma dataType: ndarray description: features 36 | Input: x dataType: ndarray description: features 37 | Output: self dataType: obj description: the trained model 38 | ''' 39 | def GaussianPDF(self, mu, sigma, x): 40 | gaussian = multivariate_normal(mu, sigma) 41 | return gaussian.pdf(x) 42 | 43 | ''' 44 | Function: train 45 | Description: train the model 46 | Input: train_data dataType: ndarray description: features 47 | Output: self dataType: obj description: the trained model 48 | ''' 49 | def train(self, train_data, plotResult=True): 50 | self.N = len(train_data) 51 | self.gamma = np.zeros([self.N, self.K]) 52 | 53 | # if self.norm_type == "Standardization": 54 | # train_data = preProcess.Standardization(train_data) 55 | # else: 56 | # train_data = preProcess.Normalization(train_data) 57 | 58 | for i in tqdm(range(self.iterations)): 59 | # E-step 60 | for k in range(self.K): 61 | self.gamma[:,k] = self.GaussianPDF(self.mu[k], self.sigma[k], train_data) 62 | 63 | for j in range(self.N): 64 | self.gamma[j,:] = self.gamma[j,:] / np.sum(self.gamma[j,:]) 65 | 66 | # M-step 67 | for k in range(self.K): 68 | gamma_sum = np.sum(self.gamma[:,k]) 69 | self.mu[k] = np.sum(np.dot(self.gamma[None,:, k], train_data), axis=0) / gamma_sum 70 | self.sigma[k] = (train_data - self.mu[k]).T * np.multiply(np.mat(train_data - self.mu[k]), np.mat(self.gamma[:, k]).T) / gamma_sum 71 | self.alpha[k] = gamma_sum / self.N 72 | self.label = np.argmax(self.gamma, axis=1) 73 | 74 | if plotResult: 75 | self.plotResult(train_data) 76 | return self.label 77 | 78 | 79 | ''' 80 | Function: predict 81 | Description: predict the test data 82 | Input: test_data dataType: ndarray description: features 83 | Output: label dataType: ndarray description: the predicted label 84 | ''' 85 | def predict(self, test_data): 86 | self.N = len(test_data) 87 | self.gamma = np.zeros([self.N, self.K]) 88 | 89 | for k in range(self.K): 90 | gamma_sum = np.sum(self.gamma[:,k]) 91 | self.mu[k] = np.sum(np.dot(self.gamma[None,:, k], test_data), axis=0) / gamma_sum 92 | self.sigma[k] = (test_data - self.mu[k]).T * np.multiply(np.mat(test_data - self.mu[k]), np.mat(self.gamma[:, k]).T) / gamma_sum 93 | self.alpha[k] = gamma_sum / self.N 94 | self.label = np.argmax(self.gamma, axis=1) 95 | return self.label 96 | 97 | ''' 98 | Function: plotResult 99 | Description: show the clustering result 100 | ''' 101 | def plotResult(self, train_data): 102 | plt.scatter(train_data[:, 0], train_data[:, 1], c=self.label) 103 | plt.title('GMM') 104 | plt.show() 105 | 106 | ''' 107 | Function: save 108 | Description: save the model as pkl 109 | Input: filename dataType: str description: the path to save model 110 | ''' 111 | def save(self, filename): 112 | f = open(filename, 'w') 113 | model = {'alpha': self.alpha, 'mu': self.mu, 'sigma': self.sigma} 114 | pickle.dump(model, f) 115 | f.close() 116 | 117 | ''' 118 | Function: load 119 | Description: load the model 120 | Input: filename dataType: str description: the path to save model 121 | Output: self dataType: obj description: the trained model 122 | ''' 123 | def load(self, filename): 124 | f = open(filename) 125 | model = pickle.load(f) 126 | self.alpha = model['alpha'] 127 | self.mu = model['mu'] 128 | self.sigma = model['sigma'] 129 | return self -------------------------------------------------------------------------------- /dataset/dataset5/train.txt: -------------------------------------------------------------------------------- 1 | 3.000000 46.852122 2 | 23.000000 178.676107 3 | 0.000000 86.154024 4 | 6.000000 68.707614 5 | 15.000000 139.737693 6 | 17.000000 141.988903 7 | 12.000000 94.477135 8 | 8.000000 86.083788 9 | 9.000000 97.265824 10 | 7.000000 80.400027 11 | 8.000000 83.414554 12 | 1.000000 52.525471 13 | 16.000000 127.060008 14 | 9.000000 101.639269 15 | 14.000000 146.412680 16 | 15.000000 144.157101 17 | 17.000000 152.699910 18 | 19.000000 136.669023 19 | 21.000000 166.971736 20 | 21.000000 165.467251 21 | 3.000000 38.455193 22 | 6.000000 75.557721 23 | 4.000000 22.171763 24 | 5.000000 50.321915 25 | 0.000000 74.412428 26 | 5.000000 42.052392 27 | 1.000000 42.489057 28 | 14.000000 139.185416 29 | 21.000000 140.713725 30 | 5.000000 63.222944 31 | 5.000000 56.294626 32 | 9.000000 91.674826 33 | 22.000000 173.497655 34 | 17.000000 152.692482 35 | 9.000000 113.920633 36 | 1.000000 51.552411 37 | 9.000000 100.075315 38 | 16.000000 137.803868 39 | 18.000000 135.925777 40 | 3.000000 45.550762 41 | 16.000000 149.933224 42 | 2.000000 27.914173 43 | 6.000000 62.103546 44 | 20.000000 173.942381 45 | 12.000000 119.200505 46 | 6.000000 70.730214 47 | 16.000000 156.260832 48 | 15.000000 132.467643 49 | 19.000000 161.164086 50 | 17.000000 138.031844 51 | 23.000000 169.747881 52 | 11.000000 116.761920 53 | 4.000000 34.305905 54 | 6.000000 68.841160 55 | 10.000000 119.535227 56 | 20.000000 158.104763 57 | 18.000000 138.390511 58 | 5.000000 59.375794 59 | 7.000000 80.802300 60 | 11.000000 108.611485 61 | 10.000000 91.169028 62 | 15.000000 154.104819 63 | 5.000000 51.100287 64 | 3.000000 32.334330 65 | 15.000000 150.551655 66 | 10.000000 111.023073 67 | 0.000000 87.489950 68 | 2.000000 46.726299 69 | 7.000000 92.540440 70 | 15.000000 135.715438 71 | 19.000000 152.960552 72 | 19.000000 162.789223 73 | 21.000000 167.176240 74 | 22.000000 164.323358 75 | 12.000000 104.823071 76 | 1.000000 35.554328 77 | 11.000000 114.784640 78 | 1.000000 36.819570 79 | 12.000000 130.266826 80 | 12.000000 126.053312 81 | 18.000000 153.378289 82 | 7.000000 70.089159 83 | 15.000000 139.528624 84 | 19.000000 157.137999 85 | 23.000000 183.595248 86 | 7.000000 73.431043 87 | 11.000000 128.176167 88 | 22.000000 183.181247 89 | 13.000000 112.685801 90 | 18.000000 161.634783 91 | 6.000000 63.169478 92 | 7.000000 63.393975 93 | 19.000000 165.779578 94 | 14.000000 143.973398 95 | 22.000000 185.131852 96 | 3.000000 45.275591 97 | 6.000000 62.018003 98 | 0.000000 83.193398 99 | 7.000000 76.847802 100 | 19.000000 147.087386 101 | 7.000000 62.812086 102 | 1.000000 49.910068 103 | 11.000000 102.169335 104 | 11.000000 105.108121 105 | 6.000000 63.429817 106 | 12.000000 121.301542 107 | 17.000000 163.253962 108 | 13.000000 119.588698 109 | 0.000000 87.333807 110 | 20.000000 144.484066 111 | 21.000000 168.792482 112 | 23.000000 159.751246 113 | 20.000000 162.843592 114 | 14.000000 145.664069 115 | 19.000000 146.838515 116 | 12.000000 132.049377 117 | 18.000000 155.756119 118 | 22.000000 155.686345 119 | 7.000000 73.913958 120 | 1.000000 66.761881 121 | 7.000000 65.855450 122 | 6.000000 56.271026 123 | 19.000000 155.308523 124 | 12.000000 124.372873 125 | 17.000000 136.025960 126 | 14.000000 132.996861 127 | 21.000000 172.639791 128 | 17.000000 135.672594 129 | 8.000000 90.323742 130 | 5.000000 62.462698 131 | 16.000000 159.048794 132 | 14.000000 139.991227 133 | 3.000000 37.026678 134 | 9.000000 100.839901 135 | 9.000000 93.097395 136 | 15.000000 123.645221 137 | 15.000000 147.327185 138 | 1.000000 40.055830 139 | 0.000000 88.192829 140 | 17.000000 139.174517 141 | 22.000000 169.354493 142 | 17.000000 136.354272 143 | 9.000000 90.692829 144 | 7.000000 63.987997 145 | 14.000000 128.972231 146 | 10.000000 108.433394 147 | 2.000000 49.321034 148 | 19.000000 171.615671 149 | 9.000000 97.894855 150 | 0.000000 68.962453 151 | 9.000000 72.063371 152 | 22.000000 157.000070 153 | 12.000000 114.461754 154 | 6.000000 58.239465 155 | 9.000000 104.601048 156 | 8.000000 90.772359 157 | 22.000000 164.428791 158 | 5.000000 34.804083 159 | 5.000000 37.089459 160 | 22.000000 177.987605 161 | 10.000000 89.439608 162 | 6.000000 70.711362 163 | 23.000000 181.731482 164 | 20.000000 151.538932 165 | 7.000000 66.067228 166 | 6.000000 61.565125 167 | 20.000000 184.441687 168 | 9.000000 91.569158 169 | 9.000000 98.833425 170 | 17.000000 144.352866 171 | 9.000000 94.498314 172 | 15.000000 121.922732 173 | 18.000000 166.408274 174 | 10.000000 89.571299 175 | 8.000000 75.373772 176 | 22.000000 161.001478 177 | 8.000000 90.594227 178 | 5.000000 57.180933 179 | 20.000000 161.643007 180 | 8.000000 87.197370 181 | 8.000000 95.584308 182 | 15.000000 126.207221 183 | 7.000000 84.528209 184 | 18.000000 161.056986 185 | 10.000000 86.762615 186 | 1.000000 33.325906 187 | 9.000000 105.095502 188 | 2.000000 22.440421 189 | 9.000000 93.449284 190 | 14.000000 106.249595 191 | 21.000000 163.254385 192 | 22.000000 161.746628 193 | 20.000000 152.973085 194 | 17.000000 122.918987 195 | 7.000000 58.536412 196 | 1.000000 45.013277 197 | 13.000000 137.294148 198 | 10.000000 88.123737 199 | 2.000000 45.847376 200 | 20.000000 163.385797 201 | -------------------------------------------------------------------------------- /dataset/dataset5/test.txt: -------------------------------------------------------------------------------- 1 | 12.000000 121.010516 2 | 19.000000 157.337044 3 | 12.000000 116.031825 4 | 15.000000 132.124872 5 | 2.000000 52.719612 6 | 6.000000 39.058368 7 | 3.000000 50.757763 8 | 20.000000 166.740333 9 | 11.000000 115.808227 10 | 21.000000 165.582995 11 | 3.000000 41.956087 12 | 3.000000 34.432370 13 | 13.000000 116.954676 14 | 1.000000 32.112553 15 | 7.000000 50.380243 16 | 7.000000 94.107791 17 | 23.000000 188.943179 18 | 18.000000 152.637773 19 | 9.000000 104.122082 20 | 18.000000 127.805226 21 | 0.000000 83.083232 22 | 15.000000 148.180104 23 | 3.000000 38.480247 24 | 8.000000 77.597839 25 | 7.000000 75.625803 26 | 11.000000 124.620208 27 | 13.000000 125.186698 28 | 5.000000 51.165922 29 | 3.000000 31.179113 30 | 15.000000 132.505727 31 | 19.000000 137.978043 32 | 9.000000 106.481123 33 | 20.000000 172.149955 34 | 11.000000 104.116556 35 | 4.000000 22.457996 36 | 20.000000 175.735047 37 | 18.000000 165.350412 38 | 22.000000 177.461724 39 | 16.000000 138.672986 40 | 17.000000 156.791788 41 | 19.000000 150.327544 42 | 19.000000 156.992196 43 | 23.000000 163.624262 44 | 8.000000 92.537227 45 | 3.000000 32.341399 46 | 16.000000 144.445614 47 | 11.000000 119.985586 48 | 16.000000 145.149335 49 | 12.000000 113.284662 50 | 5.000000 47.742716 51 | 11.000000 115.852585 52 | 3.000000 31.579325 53 | 1.000000 43.758671 54 | 1.000000 61.049125 55 | 13.000000 132.751826 56 | 23.000000 163.233087 57 | 12.000000 115.134296 58 | 8.000000 91.370839 59 | 8.000000 86.137955 60 | 14.000000 120.857934 61 | 3.000000 33.777477 62 | 10.000000 110.831763 63 | 10.000000 104.174775 64 | 20.000000 155.920696 65 | 4.000000 30.619132 66 | 0.000000 71.880474 67 | 7.000000 86.399516 68 | 7.000000 72.632906 69 | 5.000000 58.632985 70 | 18.000000 143.584511 71 | 23.000000 187.059504 72 | 6.000000 65.067119 73 | 6.000000 69.110280 74 | 19.000000 142.388056 75 | 15.000000 137.174489 76 | 21.000000 159.719092 77 | 9.000000 102.179638 78 | 20.000000 176.416294 79 | 21.000000 146.516385 80 | 18.000000 147.808343 81 | 23.000000 154.790810 82 | 16.000000 137.385285 83 | 18.000000 166.885975 84 | 15.000000 136.989000 85 | 20.000000 144.668679 86 | 14.000000 137.060671 87 | 19.000000 140.468283 88 | 11.000000 98.344084 89 | 16.000000 132.497910 90 | 1.000000 59.143101 91 | 20.000000 152.299381 92 | 13.000000 134.487271 93 | 0.000000 77.805718 94 | 3.000000 28.543764 95 | 10.000000 97.751817 96 | 4.000000 41.223659 97 | 11.000000 110.017015 98 | 12.000000 119.391386 99 | 20.000000 158.872126 100 | 2.000000 38.776222 101 | 19.000000 150.496148 102 | 15.000000 131.505967 103 | 22.000000 179.856157 104 | 13.000000 143.090102 105 | 14.000000 142.611861 106 | 13.000000 120.757410 107 | 4.000000 27.929324 108 | 16.000000 151.530849 109 | 15.000000 148.149702 110 | 5.000000 44.188084 111 | 16.000000 141.135406 112 | 12.000000 119.817665 113 | 8.000000 80.991524 114 | 3.000000 29.308640 115 | 6.000000 48.203468 116 | 8.000000 92.179834 117 | 22.000000 162.720371 118 | 10.000000 91.971158 119 | 2.000000 33.481943 120 | 8.000000 88.528612 121 | 1.000000 54.042173 122 | 8.000000 92.002928 123 | 5.000000 45.614646 124 | 3.000000 34.319635 125 | 14.000000 129.140558 126 | 17.000000 146.807901 127 | 17.000000 157.694058 128 | 4.000000 37.080929 129 | 20.000000 169.942381 130 | 10.000000 114.675638 131 | 5.000000 34.913029 132 | 14.000000 137.889747 133 | 0.000000 79.043129 134 | 16.000000 139.084390 135 | 6.000000 53.340135 136 | 13.000000 142.772612 137 | 0.000000 73.103173 138 | 3.000000 37.717487 139 | 15.000000 134.116395 140 | 18.000000 138.748257 141 | 23.000000 180.779121 142 | 10.000000 93.721894 143 | 23.000000 166.958335 144 | 6.000000 74.473589 145 | 6.000000 73.006291 146 | 3.000000 34.178656 147 | 1.000000 33.395482 148 | 22.000000 149.933384 149 | 18.000000 154.858982 150 | 6.000000 66.121084 151 | 1.000000 60.816800 152 | 5.000000 55.681020 153 | 6.000000 61.251558 154 | 15.000000 125.452206 155 | 16.000000 134.310255 156 | 19.000000 167.999681 157 | 5.000000 40.074830 158 | 22.000000 162.658997 159 | 12.000000 109.473909 160 | 4.000000 44.743405 161 | 11.000000 122.419496 162 | 14.000000 139.852014 163 | 21.000000 160.045407 164 | 15.000000 131.999358 165 | 15.000000 135.577799 166 | 20.000000 173.494629 167 | 8.000000 82.497177 168 | 12.000000 123.122032 169 | 10.000000 97.592026 170 | 16.000000 141.345706 171 | 8.000000 79.588881 172 | 3.000000 54.308878 173 | 4.000000 36.112937 174 | 19.000000 165.005336 175 | 23.000000 172.198031 176 | 15.000000 127.699625 177 | 1.000000 47.305217 178 | 13.000000 115.489379 179 | 8.000000 103.956569 180 | 4.000000 53.669477 181 | 0.000000 76.220652 182 | 12.000000 114.153306 183 | 6.000000 74.608728 184 | 3.000000 41.339299 185 | 5.000000 21.944048 186 | 22.000000 181.455655 187 | 20.000000 171.691444 188 | 10.000000 104.299002 189 | 21.000000 168.307123 190 | 20.000000 169.556523 191 | 23.000000 175.960552 192 | 1.000000 42.554778 193 | 14.000000 137.286185 194 | 16.000000 136.126561 195 | 12.000000 119.269042 196 | 6.000000 63.426977 197 | 4.000000 27.728212 198 | 4.000000 32.687588 199 | 23.000000 151.153204 200 | 15.000000 129.767331 201 | -------------------------------------------------------------------------------- /Blending.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Blending .py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-04 5 | @ Update Date: 2019-05-04 6 | @ Description: Implement Blending 7 | """ 8 | 9 | from sklearn.model_selection import StratifiedKFold, train_test_split 10 | from Perceptron import * 11 | import numpy as np 12 | import preProcess 13 | import pickle 14 | import random 15 | 16 | 17 | class BlendingClassifier: 18 | def __init__(self, norm_type="Normalization", classifier_set=None): 19 | self.norm_type = norm_type 20 | self.classifier_set = classifier_set 21 | self.k = len(self.classifier_set) # the number of classifiers 22 | self.layer1_classifier_set = None 23 | self.layer2_classifier = None 24 | self.prediction = None 25 | self.probability = None 26 | 27 | ''' 28 | Function: train 29 | Description: train the model 30 | Input: train_data dataType: ndarray description: features 31 | train_label dataType: ndarray description: labels 32 | Output: self dataType: obj description: the trained model 33 | ''' 34 | def train(self, train_data, train_label): 35 | if self.norm_type == "Standardization": 36 | train_data = preProcess.Standardization(train_data) 37 | else: 38 | train_data = preProcess.Normalization(train_data) 39 | 40 | train_data1, train_data2, train_label1, train_label2 = train_test_split(train_data, train_label, test_size=0.5, random_state=2019) 41 | # train set in the second layer 42 | train_predict_feature = np.zeros((train_data2.shape[0], self.k)) 43 | trained_model = [] 44 | 45 | # the first layer in Blending 46 | for j, clf in enumerate(self.classifier_set): 47 | # train each submodel 48 | print(j, clf) 49 | clf.train(train_data1, train_label1) 50 | train_predict_feature[:, j] = clf.predict(train_data2)[:, 0] 51 | # save the trained model in the first layer 52 | trained_model.append(clf) 53 | 54 | # the second layer in Blending 55 | layer2_clf = PerceptronClassifier() 56 | layer2_clf.train(train_predict_feature, train_label2) 57 | 58 | self.layer1_classifier_set = trained_model 59 | self.layer2_classifier = layer2_clf 60 | 61 | return self 62 | 63 | ''' 64 | Function: predict 65 | Description: predict the testing set 66 | Input: train_data dataType: ndarray description: features 67 | prob dataType: bool description: return probaility of label 68 | Output: prediction dataType: ndarray description: the prediction results for testing set 69 | ''' 70 | 71 | def predict(self, test_data, prob="False"): 72 | # Normalization 73 | if self.norm_type == "Standardization": 74 | test_data = preProcess.Standardization(test_data) 75 | else: 76 | test_data = preProcess.Normalization(test_data) 77 | 78 | test_predict_feature = np.zeros((test_data.shape[0], self.k)) 79 | # the first layer in Blending 80 | for j, clf in enumerate(self.layer1_classifier_set): 81 | test_predict_feature[:, j] = clf.predict(test_data)[:, 0] 82 | 83 | # the second layer in Blending 84 | probability = self.layer2_classifier.predict(test_predict_feature) 85 | prediction = (probability > 0.5)*1 86 | 87 | self.probability = probability 88 | self.prediction = prediction 89 | if prob: 90 | return probability 91 | else: 92 | return prediction 93 | 94 | ''' 95 | Function: accuracy 96 | Description: show detection result 97 | Input: test_label dataType: ndarray description: labels of test data 98 | Output: accuracy dataType: float description: detection accuarcy 99 | ''' 100 | 101 | def accuarcy(self, test_label): 102 | test_label = np.expand_dims(test_label, axis=1) 103 | prediction = self.prediction 104 | accuarcy = sum(prediction == test_label) / len(test_label) 105 | return accuarcy 106 | 107 | ''' 108 | Function: save 109 | Description: save the model as pkl 110 | Input: filename dataType: str description: the path to save model 111 | ''' 112 | 113 | def save(self, filename): 114 | f = open(filename, 'w') 115 | model = {'layer1_classifiers':self.layer1_classifier_set, 'layer2_classifier':self.layer2_classifier} 116 | pickle.dump(model, f) 117 | f.close() 118 | 119 | ''' 120 | Function: load 121 | Description: load the model 122 | Input: filename dataType: str description: the path to save model 123 | Output: self dataType: obj description: the trained model 124 | ''' 125 | 126 | def load(self, filename): 127 | f = open(filename) 128 | model = pickle.load(f) 129 | self.layer1_classifier_set = model['layer1_classifiers'] 130 | self.layer2_classifier = model['layer2_classifier'] 131 | return self 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /KNN.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Filename: KNN.py 3 | @Author: Ryuk 4 | @Create Date: 2019-04-29 5 | @Update Date: 2019-05-03 6 | @Description: Implement of KNN 7 | """ 8 | 9 | import numpy as np 10 | import operator as op 11 | 12 | class KNNClassifier: 13 | def __init__(self, k, norm_type="Normalization"): 14 | self.k = k 15 | self.norm_type = "Normalization" 16 | self.x_train = None 17 | self.y_train = None 18 | 19 | ''' 20 | Function: Normalization 21 | Description: Normalize input data. For vector x, the normalization process is given by 22 | normalization(x) = (x - min(x))/(max(x) - min(x)) 23 | Input: data dataType: ndarray description: input data 24 | Output: norm_data dataType: ndarray description: output data after normalization 25 | ''' 26 | def Normalization(self, data): 27 | # get the max and min value of each column 28 | min_value = data.min(axis=0) 29 | max_value = data.max(axis=0) 30 | diff = max_value - min_value 31 | # normalization 32 | min_data = np.tile(min_value, (data.shape[0], 1)) 33 | norm_data = (data - min_data)/np.tile(diff, (data.shape[0], 1)) 34 | return norm_data 35 | 36 | ''' 37 | Function: Standardization 38 | Description: Standardize input data. For vector x, the normalization process is given by 39 | Standardization(x) = x - mean(x)/std(x) 40 | Input: data dataType: ndarray description: input data 41 | Output: standard_data dataType: ndarray description: output data after standardization 42 | ''' 43 | def Standardization(self, data): 44 | # get the mean and the variance of each column 45 | mean_value = data.mean(axis=0) 46 | var_value = data.std(axis=0) 47 | standard_data = (data - np.tile(mean_value, (data.shape[0], 1)))/np.tile(var_value, (data.shape[0], 1)) 48 | return standard_data 49 | 50 | ''' 51 | Function: train 52 | Description: train the model 53 | Input: train_data dataType: ndarray description: features 54 | test_data dataType: ndarray description: labels 55 | Output: self dataType: obj description: 56 | ''' 57 | def train(self, train_data, train_label): 58 | if self.normType == "Standardization": 59 | train_data = self.Standardization(train_data) 60 | else: 61 | train_data = self.Normalization(train_data) 62 | self.x_train = train_data 63 | self.y_train = train_label 64 | return self 65 | 66 | ''' 67 | Function: predict 68 | Description: give the prediction for test data 69 | Input: test_data dataType: ndarray description: data for testing 70 | test_abel dataType: ndarray description: labels of train data 71 | norm_type dataType: string description: type of normalization, default:Normalization 72 | probability dataType: bool description: if true return label and probability, else return label only 73 | showResult dataType: bool description: display the prediction result 74 | Output: results dataType: ndarray description: label or probability 75 | ''' 76 | def predict(self, test_data): 77 | # Normalization 78 | if self.normType == "Standardization": 79 | testData = self.Standardization(test_data) 80 | else: 81 | testData = self.Normalization(test_data) 82 | 83 | test_num = testData.shape[0] 84 | prediction = np.zeros([test_num, 1]) 85 | probability = np.zeros([test_num, 1]) 86 | # predict each samples in test data 87 | for i in range(test_num): 88 | prediction[i], probability[i] = self.calcuateDistance(testData[i], self.x_train, self.y_train, self.k) 89 | 90 | return prediction 91 | 92 | ''' 93 | Function: calcuateDistance 94 | Description: calcuate the distance between input vector and train data 95 | Input: input dataType: ndarray description: input vector 96 | traind_ata dataType: ndarray description: data for training 97 | train_label dataType: ndarray description: labels of train data 98 | k dataType: int description: select the first k distances 99 | Output: prob dataType: float description: max probability of prediction 100 | label dataType: int description: prediction label of input vector 101 | ''' 102 | def calcuateDistance(self, input, train_data, train_label, k): 103 | train_num = train_data.shape[0] 104 | # calcuate the distances 105 | distances = np.tile(input, (train_num, 1)) - train_data 106 | distances = distances**2 107 | distances = distances.sum(axis=1) 108 | distances = distances**0.5 109 | 110 | # get the labels of the first k distances 111 | disIndex = distances.argsort() 112 | labelCount = {} 113 | for i in range(k): 114 | label = train_label[disIndex[i]] 115 | labelCount[label] = labelCount.get(label, 0) + 1 116 | 117 | prediction = sorted(labelCount.items(), key=op.itemgetter(1), reverse=True) 118 | label = prediction[0][0] 119 | prob = prediction[0][1]/k 120 | return label, prob 121 | 122 | ''' 123 | Function: showDetectionResult 124 | Description: show detection result 125 | Input: test_data dataType: ndarray description: data for test 126 | test_label dataType: ndarray description: labels of test data 127 | Output: accuracy dataType: float description: detection accuarcy 128 | ''' 129 | def showDetectionResult(self, test_data, test_label): 130 | test_label = np.expand_dims(test_label, axis=1) 131 | prediction = self.predict(test_data) 132 | accuarcy = sum(prediction == test_label)/len(test_label) 133 | return accuarcy 134 | -------------------------------------------------------------------------------- /Stacking.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Stacking.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-05 5 | @ Update Date: 2019-05-05 6 | @ Description: Implement Stacking 7 | """ 8 | from sklearn.model_selection import StratifiedKFold, train_test_split 9 | from Perceptron import * 10 | import numpy as np 11 | import preProcess 12 | import pickle 13 | import random 14 | 15 | class StackingClassifier: 16 | def __init__(self, norm_type="Normalization", classifier_set=None, fusion_type="Weighing",n_folds=5): 17 | self.norm_type = norm_type 18 | self.classifier_set = classifier_set 19 | self.k = len(self.classifier_set) # the number of classifiers 20 | self.trained_classifier_set = None 21 | self.n_folds = n_folds # the number of fold for cross validation 22 | self.fusion_type = fusion_type # fusion method in the second layer 23 | self.prediction = None 24 | self.probability = None 25 | 26 | ''' 27 | Function: train 28 | Description: train the model 29 | Input: train_data dataType: ndarray description: features 30 | train_label dataType: ndarray description: labels 31 | Output: self dataType: obj description: the trained model 32 | ''' 33 | 34 | def train(self, train_data, train_label): 35 | if self.norm_type == "Standardization": 36 | train_data = preProcess.Standardization(train_data) 37 | else: 38 | train_data = preProcess.Normalization(train_data) 39 | 40 | skf = StratifiedKFold(self.n_folds) 41 | prediction_feature = np.zeros((train_data.shape[0], len(self.classifier_set))) 42 | trained_model = [] 43 | 44 | # the first layer in Stacking 45 | for j, clf in enumerate(self.classifier_set): 46 | # train each submodel 47 | subtrained_model = [] 48 | # cross validation 49 | for (train_index, test_index) in skf.split(train_data, train_label): 50 | X_train, X_test = train_data[train_index], train_data[test_index] 51 | y_train, y_test = train_label[train_index], train_label[test_index] 52 | # train and save the model trained with S-si 53 | clf.train(X_train, y_train) 54 | subtrained_model.append(clf) 55 | # get the prediction feature for each sub model 56 | prediction_feature[test_index, j] = clf.predict(X_test)[:, 0] 57 | # save the models 58 | trained_model.append(subtrained_model) 59 | 60 | self.trained_classifier_set = trained_model 61 | return self 62 | 63 | ''' 64 | Function: predict 65 | Description: predict the testing set 66 | Input: train_data dataType: ndarray description: features 67 | prob dataType: bool description: return probaility of label 68 | Output: prediction dataType: ndarray description: the prediction results for testing set 69 | ''' 70 | 71 | def predict(self, test_data, prob="False"): 72 | # Normalization 73 | if self.norm_type == "Standardization": 74 | test_data = preProcess.Standardization(test_data) 75 | else: 76 | test_data = preProcess.Normalization(test_data) 77 | 78 | pre_prediction = np.zeros((test_data.shape[0], self.n_folds)) 79 | # the first layer in Stacking 80 | for j, sub_model in enumerate(self.trained_classifier_set): 81 | sub_prediction_feature = np.zeros((test_data.shape[0], self.n_folds)) 82 | i = 0 83 | for clf in sub_model: 84 | sub_prediction_feature[:, i] = clf.predict(test_data)[:, 0] 85 | i = i + 1 86 | pre_prediction[:, j] = sub_prediction_feature.mean(1) 87 | 88 | test_num = test_data.shape[0] 89 | prediction = np.zeros([test_num, 1]) 90 | probability = np.zeros([test_num, 1]) 91 | # the second layer in Stacking 92 | if self.fusion_type == "Averaging": 93 | probability = pre_prediction.mean(1) 94 | elif self.fusion_type == "Voting": 95 | probability = np.sum(pre_prediction, axis=1)/self.k 96 | elif self.fusion_type == "Weighing": 97 | w = [i/i.sum() for i in pre_prediction] 98 | probability = np.sum(np.multiply(pre_prediction, w), axis=1) 99 | 100 | prediction = (probability > 0.5) * 1 101 | self.probability = probability 102 | self.prediction = prediction 103 | if prob: 104 | return probability 105 | else: 106 | return prediction 107 | 108 | ''' 109 | Function: accuracy 110 | Description: show detection result 111 | Input: test_label dataType: ndarray description: labels of test data 112 | Output: accuracy dataType: float description: detection accuarcy 113 | ''' 114 | 115 | def accuarcy(self, test_label): 116 | # test_label = np.expand_dims(test_label, axis=1) 117 | prediction = self.prediction 118 | accuarcy = sum(prediction == test_label) / len(test_label) 119 | return accuarcy 120 | 121 | ''' 122 | Function: save 123 | Description: save the model as pkl 124 | Input: filename dataType: str description: the path to save model 125 | ''' 126 | 127 | def save(self, filename): 128 | f = open(filename, 'w') 129 | pickle.dump(self.trained_classifier_set, f) 130 | f.close() 131 | 132 | ''' 133 | Function: load 134 | Description: load the model 135 | Input: filename dataType: str description: the path to save model 136 | Output: self dataType: obj description: the trained model 137 | ''' 138 | 139 | def load(self, filename): 140 | f = open(filename) 141 | self.trained_classifier_set = pickle.load(f) 142 | return self 143 | 144 | -------------------------------------------------------------------------------- /DimensionReduction.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: DimensionReduction.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-06-02 5 | @ Update Date: 2019-06-06 6 | @ Description: Implement DimensionReduction 7 | """ 8 | import numpy as np 9 | import pickle 10 | import preProcess 11 | 12 | class PCA: 13 | def __init__(self, norm_type="Standardization", rate=0.9): 14 | self.norm_type = norm_type 15 | self.matrix = None 16 | self.contribute_rate = None 17 | self.acc_contribute_rate = None 18 | self.rate = rate 19 | 20 | ''' 21 | Function: train 22 | Description: train the model 23 | Input: train_data dataType: ndarray description: features 24 | Output: self dataType: obj description: the trained model 25 | ''' 26 | def train(self, train_data): 27 | # decentration 28 | data = train_data - train_data.mean(axis=0) 29 | 30 | # calculate the eigenvalue and eigenvector of covariance matrix 31 | covariance_matrix = np.cov(data, rowvar=False) 32 | eigenvalue, eigenvector = np.linalg.eig(covariance_matrix) 33 | index = np.argsort(-eigenvalue) 34 | eigenvalue = eigenvalue[index] 35 | eigenvector = eigenvector[:, index] 36 | 37 | # calculate contribute rate 38 | contribute_rate = np.zeros(len(index)) 39 | acc_contribute_rate = np.zeros(len(index)) 40 | value_sum = eigenvalue.sum() 41 | sum = 0 42 | k = 0 43 | for i in range(len(eigenvalue)): 44 | sum = sum + eigenvalue[i] 45 | contribute_rate[i] = eigenvalue[i]/value_sum 46 | acc_contribute_rate[i] = sum/value_sum 47 | if (acc_contribute_rate[i-1] < self.rate) and (acc_contribute_rate[i] >= self.rate): 48 | k = i 49 | self.contribute_rate = contribute_rate 50 | self.acc_contribute_rate = acc_contribute_rate 51 | 52 | matrix = np.mat(eigenvector)[:, k] 53 | self.matrix = matrix 54 | return self 55 | 56 | ''' 57 | Function: transformData 58 | Description: transform data 59 | Input: data dataType: ndarray description: original data 60 | Output: transformed_data dataType: ndarray description: transformed data 61 | ''' 62 | def transformData(self, data): 63 | data = data - data.mean(axis=0) 64 | transformed_data = np.dot(data, self.matrix) 65 | return transformed_data 66 | 67 | ''' 68 | Function: save 69 | Description: save the model as pkl 70 | Input: filename dataType: str description: the path to save model 71 | ''' 72 | 73 | def save(self, filename): 74 | f = open(filename, 'w') 75 | pickle.dump(self.matrix, f) 76 | f.close() 77 | 78 | ''' 79 | Function: load 80 | Description: load the model 81 | Input: filename dataType: str description: the path to save model 82 | Output: self dataType: obj description: the trained model 83 | ''' 84 | 85 | def load(self, filename): 86 | f = open(filename) 87 | self.matrix = pickle.load(f) 88 | return self 89 | 90 | 91 | class LDA: 92 | def __init__(self, norm_type="Standardization", rate=0.9): 93 | self.norm_type = norm_type 94 | self.matrix = None 95 | self.contribute_rate = None 96 | self.acc_contribute_rate = None 97 | self.rate = rate 98 | 99 | ''' 100 | Function: train 101 | Description: train the model 102 | Input: train_data dataType: ndarray description: features 103 | Output: self dataType: obj description: the trained model 104 | ''' 105 | def train(self, data, label): 106 | # Normalization 107 | if self.norm_type == "Standardization": 108 | data = preProcess.Standardization(data) 109 | else: 110 | data = preProcess.Normalization(data) 111 | unique_label = np.unique(label) 112 | mu = np.mean(data, axis=0) 113 | # St = np.dot((data - mu).T, data - mu) 114 | 115 | Sw = 0 116 | Sb = 0 117 | for c in unique_label: 118 | index = np.where(label == c) 119 | Ni = len(index) 120 | xi = data[index] 121 | mui = np.mean(xi, axis=0) 122 | 123 | # calculate Sw 124 | Si = np.dot((xi - mui).T, xi - mui) 125 | Sw = Sw + Si 126 | 127 | # calculate Sb 128 | delta = np.expand_dims(mu - mui, axis=1) 129 | Sb = Sb + Ni * np.dot(delta, delta.T) 130 | 131 | # calculate the eigenvalue, eigenvector of Sw-1 * Sb 132 | temp = np.dot(np.linalg.inv(Sw), Sb) 133 | eigenvalue, eigenvector = np.linalg.eig(np.dot(np.linalg.inv(Sw), Sb)) 134 | 135 | index = np.argsort(-eigenvalue) 136 | eigenvalue = eigenvalue[index] 137 | eigenvector = eigenvector[:, index] 138 | # calculate contribute rate 139 | contribute_rate = np.zeros(len(index)) 140 | acc_contribute_rate = np.zeros(len(index)) 141 | value_sum = eigenvalue.sum() 142 | sum = 0 143 | k = 0 144 | for i in range(len(eigenvalue)): 145 | sum = sum + eigenvalue[i] 146 | contribute_rate[i] = eigenvalue[i] / value_sum 147 | acc_contribute_rate[i] = sum / value_sum 148 | if (acc_contribute_rate[i - 1] < self.rate) and (acc_contribute_rate[i] >= self.rate): 149 | k = i 150 | 151 | self.contribute_rate = contribute_rate 152 | self.acc_contribute_rate = acc_contribute_rate 153 | 154 | matrix = np.mat(eigenvector)[:, k] 155 | self.matrix = matrix 156 | return self 157 | 158 | ''' 159 | Function: transformData 160 | Description: transform data 161 | Input: data dataType: ndarray description: original data 162 | Output: transformed_data dataType: ndarray description: transformed data 163 | ''' 164 | def transformData(self, data): 165 | transformed_data = np.dot(data, self.matrix) 166 | return transformed_data 167 | -------------------------------------------------------------------------------- /FeatureCombination.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: FeatureCombination.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-11-18 5 | @ Update Date: 2019-11-20 6 | @ Description: Implement FM 7 | """ 8 | 9 | import numpy as np 10 | import preProcess 11 | import pickle 12 | 13 | class FM: 14 | def __init__(self, n, norm_type="Standardization", k=5): 15 | self.norm_type = norm_type 16 | self.n = n # the number of feature 17 | self.k = k # the dimension of latency 18 | self.w_0 = 0 # numerical parameter 19 | self.W = np.random.random([self.n, 1]) # one order parameter 20 | self.V = np.random.random([self.n, self.k]) # second order parameter 21 | self.sample_num = None # the number of samples of trainset 22 | 23 | ''' 24 | Function: sigmoid 25 | Description: sigmoid function 26 | Input: x dataType: ndarray description: input vector 27 | derivative dataType: bool description: whether to calculate the derivative of sigmoid 28 | Output: output dataType: float description: output 29 | ''' 30 | def sigmoid(self, x, derivative=False): 31 | output = 1/(1 + np.exp(-x)) 32 | if derivative: 33 | output = output * (1 - output) 34 | return output 35 | 36 | 37 | ''' 38 | Function: train 39 | Description: train the model 40 | Input: train_data dataType: ndarray description: features 41 | train_label dataType: ndarray description: labels 42 | alpha dataType: float description: the stride of the target 43 | iterations dataType: int description: the times of iteration 44 | Output: self dataType: obj description: the trained model 45 | ''' 46 | def train(self, train_data, train_label, alpha=0.01, iterations=100): 47 | if self.norm_type == "Standardization": 48 | train_data = preProcess.Standardization(train_data) 49 | else: 50 | train_data = preProcess.Normalization(train_data) 51 | 52 | for epoch in range(iterations): 53 | for id in range(self.sample_num): 54 | 55 | # second order computation 56 | inter_1 = train_data[id] * self.V 57 | inter_2 = np.multiply(train_data[id], train_data[id]) * np.multiply(self.V, self.V) 58 | interaction = np.sum(np.multiply(inter_1, inter_1) - inter_2) / 2. 59 | 60 | # prediction result 61 | pred = self.w_0 + train_data[id] * self.W + interaction 62 | 63 | # calculate loss, cross entropy 64 | base = [np.log(self.sigmoid(train_label[id] * float(pred))) - 1] * train_label 65 | 66 | # update numerical parameters 67 | self.w_0 -= alpha * base 68 | 69 | x = train_data[id] 70 | for i in range(self.n): 71 | # update first-order parameter 72 | if train_data[id, i] != 0: 73 | self.W[id, i] -= alpha * base * train_data[id, i] 74 | for j in range(self.n): 75 | # update second-order parameter 76 | self.V[i, j] -= alpha * base * ( 77 | train_data[id, i] * self.V[j, i] * train_data[id, j] - self.V[i, j] * train_data[id, i] * train_data[id, i]) 78 | 79 | return self 80 | 81 | 82 | ''' 83 | Function: predict 84 | Description: predict the testing set 85 | Input: train_data dataType: ndarray description: features 86 | prob dataType: bool description: return probaility of label 87 | Output: prediction dataType: ndarray description: the prediction results for testing set 88 | ''' 89 | def predict(self, test_data, prob="False"): 90 | # Normalization 91 | if self.norm_type == "Standardization": 92 | test_data = preProcess.Standardization(test_data) 93 | else: 94 | test_data = preProcess.Normalization(test_data) 95 | 96 | test_num = test_data.shape[0] 97 | prediction = np.zeros([test_num, 1]) 98 | probability = np.zeros([test_num, 1]) 99 | for i in range(test_num): 100 | 101 | inter_1 = test_data[i] * self.V 102 | inter_2 = np.multiply(test_data[i], test_data[i]) * np.multiply(self.V, self.V) 103 | interaction = sum(np.multiply(inter_1, inter_1) - inter_2) / 2. 104 | pre = self.w_0 + test_data[i] * self.W + interaction 105 | probability = self.sigmoid(float(pre)) 106 | 107 | if probability[i] > 0.5: 108 | prediction[i] = 1 109 | else: 110 | prediction[i] = 0.5 111 | 112 | self.prediction = prediction 113 | self.probability = probability 114 | if prob: 115 | return probability 116 | else: 117 | return prediction 118 | 119 | 120 | ''' 121 | Function: accuracy 122 | Description: show detection result 123 | Input: test_label dataType: ndarray description: labels of test data 124 | Output: accuracy dataType: float description: detection accuarcy 125 | ''' 126 | def accuarcy(self, test_label): 127 | test_label = np.expand_dims(test_label, axis=1) 128 | prediction = self.prediction 129 | accuarcy = sum(prediction == test_label)/len(test_label) 130 | return accuarcy 131 | 132 | ''' 133 | Function: save 134 | Description: save the model as pkl 135 | Input: filename dataType: str description: the path to save model 136 | ''' 137 | def save(self, filename): 138 | f = open(filename, 'w') 139 | pickle.dump(self.weights, f) 140 | f.close() 141 | 142 | ''' 143 | Function: load 144 | Description: load the model 145 | Input: filename dataType: str description: the path to save model 146 | Output: self dataType: obj description: the trained model 147 | ''' 148 | def load(self, filename): 149 | f = open(filename) 150 | self.weights = pickle.load(f) 151 | return self 152 | -------------------------------------------------------------------------------- /LogisticRegression.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Filename: LogisticRegression.py 3 | @Author: Ryuk 4 | @Create Date: 2019-04-30 5 | @Update Date: 2019-05-03 6 | @Description: Implement of logistic regression 7 | """ 8 | 9 | import numpy as np 10 | import preProcess 11 | import pickle 12 | import random 13 | 14 | 15 | class LogisticRegressionClassifier: 16 | def __init__(self,norm_type="Normalization"): 17 | self.norm_type = norm_type 18 | self.weights = None 19 | self.prediction = None 20 | self.probability = None 21 | ''' 22 | Function: sigmoid 23 | Description: sigmoid function 24 | Input: x dataType: ndarray description: input vector 25 | derivative dataType: bool description: whether to calculate the derivative of sigmoid 26 | Output: output dataType: float description: output 27 | ''' 28 | def sigmoid(self, x, derivative=False): 29 | output = 1/(1 + np.exp(-x)) 30 | if derivative: 31 | output = output * (1 - output) 32 | return output 33 | 34 | ''' 35 | Function: updataAlpha 36 | Description: updata Alpha in each sample 37 | Input: alpha dataType: float description: original alpha 38 | method dataTpye: int description: update method of alpha 39 | Output: output dataType: float description: output 40 | ''' 41 | def updataAlpha(self, alpha, epoch, method=1): 42 | if method == 1: 43 | alpha = 0.95 ** epoch * alpha 44 | elif method == 2: 45 | k = 3 46 | alpha = k/(epoch ** 0.5) * alpha 47 | elif method == 3: 48 | decay_rate = 0.001 49 | alpha = alpha / (1 + decay_rate * epoch) 50 | return alpha 51 | 52 | ''' 53 | Function: train 54 | Description: train the model 55 | Input: train_data dataType: ndarray description: features 56 | train_label dataType: ndarray description: labels 57 | method dataType: string description: "GA":Gradient Ascent; "SGA": Stochastic Gradient Ascent 58 | alpha dataType: float description: the stride of the target 59 | iterations dataType: int description: the times of iteration 60 | Output: self dataType: obj description: the trained model 61 | ''' 62 | def train(self, train_data, train_label, method="GA", alpha=0.1, iterations=100): 63 | if self.norm_type == "Standardization": 64 | train_data = preProcess.Standardization(train_data) 65 | else: 66 | train_data = preProcess.Normalization(train_data) 67 | 68 | train_label = np.expand_dims(train_label, axis=1) 69 | feature_dim = len(train_data[1]) 70 | 71 | 72 | if method == "GA": 73 | weights = np.random.normal(0, 1, [feature_dim, 1]) 74 | for i in range(iterations): 75 | pred = self.sigmoid(np.dot(train_data, weights)) 76 | errors = train_label - pred 77 | # update the weights 78 | weights = weights + alpha * np.dot(train_data.T, errors) 79 | self.weights = weights 80 | return self 81 | 82 | if method == "SGA": 83 | weights = np.random.normal(0, 1, feature_dim) 84 | sample_num = len(train_data) 85 | random_index = np.random.randint(sample_num, size=sample_num) 86 | for i in range(iterations): 87 | for j in range(sample_num): 88 | alpha = self.updataAlpha(alpha, i, 1) 89 | pred = self.sigmoid(np.dot(train_data[random_index[j], :], weights)) 90 | sample_error = train_label[random_index[j]] - pred 91 | weights = weights + alpha * sample_error * train_data[random_index[j], :] 92 | 93 | self.weights = weights 94 | return self 95 | 96 | ''' 97 | Function: predict 98 | Description: predict the testing set 99 | Input: train_data dataType: ndarray description: features 100 | prob dataType: bool description: return probaility of label 101 | Output: prediction dataType: ndarray description: the prediction results for testing set 102 | ''' 103 | def predict(self, test_data, prob="False"): 104 | # Normalization 105 | if self.norm_type == "Standardization": 106 | test_data = preProcess.Standardization(test_data) 107 | else: 108 | test_data = preProcess.Normalization(test_data) 109 | 110 | test_num = test_data.shape[0] 111 | prediction = np.zeros([test_num, 1]) 112 | probability = np.zeros([test_num, 1]) 113 | for i in range(test_num): 114 | probability[i] = self.sigmoid(np.dot(test_data[i, :], self.weights)) 115 | if probability[i] > 0.5: 116 | prediction[i] = 1 117 | else: 118 | prediction[i] = 0.5 119 | 120 | self.prediction = prediction 121 | self.probability = probability 122 | if prob: 123 | return probability 124 | else: 125 | return prediction 126 | 127 | ''' 128 | Function: accuracy 129 | Description: show detection result 130 | Input: test_label dataType: ndarray description: labels of test data 131 | Output: accuracy dataType: float description: detection accuarcy 132 | ''' 133 | def accuarcy(self, test_label): 134 | test_label = np.expand_dims(test_label, axis=1) 135 | prediction = self.prediction 136 | accuarcy = sum(prediction == test_label)/len(test_label) 137 | return accuarcy 138 | 139 | ''' 140 | Function: save 141 | Description: save the model as pkl 142 | Input: filename dataType: str description: the path to save model 143 | ''' 144 | def save(self, filename): 145 | f = open(filename, 'w') 146 | pickle.dump(self.weights, f) 147 | f.close() 148 | 149 | ''' 150 | Function: load 151 | Description: load the model 152 | Input: filename dataType: str description: the path to save model 153 | Output: self dataType: obj description: the trained model 154 | ''' 155 | def load(self, filename): 156 | f = open(filename) 157 | self.weights = pickle.load(f) 158 | return self 159 | -------------------------------------------------------------------------------- /NaiveBayes.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Filename: NaiveBayes.py 3 | @Author: Ryuk 4 | @Create Date: 2019-05-02 5 | @Update Date: 2019-05-03 6 | @Description: Implement of naive Bayes 7 | """ 8 | 9 | import numpy as np 10 | import operator as op 11 | import preProcess 12 | import math 13 | import pickle 14 | 15 | 16 | class BayesClassifier: 17 | def __init__(self, norm_type="Normalization", laplace=1): 18 | self.norm_type = norm_type 19 | self.laplace = laplace 20 | self.label_value = None 21 | self.feature_value = None 22 | self.S = None 23 | self.prior_probability = None 24 | self.conditional_probability = None 25 | self.prediction = None 26 | self.probability = None 27 | ''' 28 | Function: train 29 | Description: train the model 30 | Input: train_data dataType: ndarray description: features 31 | train_label dataType: ndarray description: labels 32 | Output: self dataType: obj description: the trained model 33 | ''' 34 | def train(self, train_data, train_label): 35 | if self.norm_type == "Standardization": 36 | train_data = preProcess.Standardization(train_data) 37 | else: 38 | train_data = preProcess.Normalization(train_data) 39 | 40 | label_count = {} 41 | feature_dim = len(train_data[1]) 42 | 43 | # get the number of each labels 44 | for c in train_label: 45 | label_count[c] = label_count.get(c, 0) + 1 46 | label_value = sorted(label_count.items(), key=op.itemgetter(0), reverse=False) 47 | self.label_value = label_value 48 | 49 | K = len(label_value) # the number of unique labels 50 | N = len(train_label) # the number of samples 51 | 52 | # get the prior probability 53 | prior_probability = {} 54 | for key in range(len(label_value)): 55 | prior_probability[label_value[key][0]] = (label_value[key][1] + self.laplace) / (N + K * self.laplace) # laplace smooth 56 | self.prior_probability = prior_probability 57 | 58 | # get the value set of each feature 59 | feature_value = [] # feature with different value 60 | S = [] # the number of unique values of each feature 61 | for feat in range(feature_dim): 62 | unique_feature = np.unique(train_data[:, feat]) 63 | S.append(len(unique_feature)) 64 | feature_value.append(unique_feature) 65 | self.S = S 66 | self.feature_value = feature_value 67 | 68 | # calculate the conditional probability 69 | prob = [] 70 | # calculate the count (x = a & y = c) 71 | for j in range(feature_dim): 72 | count = np.zeros([S[j], len(label_count)]) # the range of label start with 1 73 | feature_temp = train_data[:, j] 74 | feature_value_temp = feature_value[j] 75 | for i in range(len(feature_temp)): 76 | for k in range(len(feature_value_temp)): 77 | for t in range(len(label_count)): 78 | if feature_temp[i] == feature_value_temp[k] and train_label[i] == label_value[t][0]: 79 | count[k][t] += 1 # x = value and y = label 80 | # calculate the conditional probability 81 | for m in range(len(label_value)): 82 | count[:, m] = (count[:, m] + self.laplace) / (label_value[m][1] + self.laplace*S[j]) # laplace smoothing 83 | # print(count) 84 | prob.append(count) 85 | self.conditional_probability = prob 86 | return self 87 | 88 | ''' 89 | Function: predict 90 | Description: predict the testing set 91 | Input: train_data dataType: ndarray description: features 92 | prob dataType: bool description: return probaility of label 93 | Output: prediction dataType: ndarray description: the prediction results for testing set 94 | ''' 95 | def predict(self, test_data, prob="False"): 96 | # Normalization 97 | if self.norm_type == "Standardization": 98 | test_data = preProcess.Standardization(test_data) 99 | else: 100 | test_data = preProcess.Normalization(test_data) 101 | 102 | test_num = test_data.shape[0] 103 | prediction = np.zeros([test_num, 1]) 104 | probability = np.zeros([test_num, 1]) 105 | for i in range(test_num): 106 | result = self.classify(test_data[i, :]) 107 | result = sorted(result.items(), key=op.itemgetter(1), reverse=True) 108 | prediction[i] = result[0][0] 109 | 110 | self.prediction = prediction 111 | self.probability = probability 112 | if prob: 113 | return probability 114 | else: 115 | return prediction 116 | ''' 117 | Function: classify 118 | Description: predict the testing set 119 | Input: sample dataType: ndarray description: input vector to be classified 120 | Output: label dataType: ndarray description: the prediction results of input 121 | ''' 122 | def classify(self, sample): 123 | predict = {} 124 | for m in range(len(self.label_value)): 125 | temp = self.prior_probability[self.label_value[m][0]] # get the prior_probability of m-th label in label_value 126 | for n in range(len(sample)): 127 | if sample[n] in self.feature_value[n]: 128 | # print(m, n) 129 | index = np.where(self.feature_value[n] == sample[n])[0][0] 130 | temp = temp * self.conditional_probability[n][index][m] 131 | else: 132 | temp = self.laplace / (self.S[n] * self.laplace) # if the value of feature is not in training set, return the laplace smoothing 133 | predict[self.label_value[m][0]] = temp 134 | return predict 135 | 136 | ''' 137 | Function: accuracy 138 | Description: show detection result 139 | Input: test_data dataType: ndarray description: data for test 140 | test_label dataType: ndarray description: labels of test data 141 | Output: accuracy dataType: float description: detection accuarcy 142 | ''' 143 | def accuarcy(self, test_label): 144 | test_label = np.expand_dims(test_label, axis=1) 145 | prediction = self.prediction 146 | accuarcy = sum(prediction == test_label)/len(test_label) 147 | return accuarcy 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /Perceptron.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Filename: Perceptron.py 3 | @Author: Ryuk 4 | @Create Date: 2019-04-30 5 | @Update Date: 2019-05-03 6 | @Description: Implement of perceptron.py 7 | """ 8 | 9 | import numpy as np 10 | import preProcess 11 | import pickle 12 | import random 13 | 14 | class PerceptronClassifier: 15 | def __init__(self, norm_type="Normalization", iterations=500, learning_rate=0.01): 16 | self.norm_type = norm_type 17 | self.iterations = iterations 18 | self.learning_rate = learning_rate 19 | self.gradients = None 20 | self.loss = None 21 | self.w = None 22 | self.b = None 23 | self.prediction = None 24 | self.probability = None 25 | 26 | ''' 27 | Function: sigmoid 28 | Description: sigmoid function 29 | Input: x dataType: ndarray description: input vector 30 | derivative dataType: bool description: whether to calculate the derivative of sigmoid 31 | Output: output dataType: float description: output 32 | ''' 33 | def sigmoid(self, x, derivative=False): 34 | output = 1/(1 + np.exp(-x)) 35 | if derivative: 36 | output = output * (1 - output) 37 | return output 38 | 39 | ''' 40 | Function: initializeParameter 41 | Description: initialize parameter 42 | Input: feature_dim dataType: int description: feature dimension 43 | ''' 44 | def initializeParameter(self, feature_dim): 45 | w = np.random.normal(0, 1, [feature_dim, 1]) 46 | b = 0 47 | self.w = w 48 | self.b = b 49 | 50 | ''' 51 | Function: BackPropagate 52 | Description: BackPropagate function 53 | Input: w dataType: dict description: the weights in network 54 | b dataType: dict description: the bias in network 55 | train_data dataType: ndarray description: train data 56 | train_label dataType: ndarray description: train label 57 | Output: gradients dataType: dict description: gradients 58 | cost dataType: float description: loss 59 | ''' 60 | def backPropagate(self, train_data, train_label): 61 | num = train_label.shape[0] 62 | 63 | # forward 64 | A = self.sigmoid(np.dot(train_data, self.w) + self.b) 65 | cost = -1 / num * np.sum(train_label * np.log(A) + (1 - train_label) * np.log(1 - A)) 66 | 67 | # backward 68 | dw = 1 / num * np.dot(train_data.T, A - train_label) 69 | db = 1 / num * np.sum(A - train_label) 70 | 71 | # save gradients 72 | gradients = {"dw": dw, 73 | "db": db} 74 | return gradients, cost 75 | 76 | ''' 77 | Function: train 78 | Description: train the model 79 | Input: train_data dataType: ndarray description: features 80 | train_label dataType: ndarray description: labels 81 | Output: self dataType: obj description: the trained model 82 | ''' 83 | def train(self, train_data, train_label): 84 | if self.norm_type == "Standardization": 85 | train_data = preProcess.Standardization(train_data) 86 | else: 87 | train_data = preProcess.Normalization(train_data) 88 | 89 | feature_dim = len(train_data[1]) 90 | train_label = np.expand_dims(train_label, axis=1) 91 | self.initializeParameter(feature_dim) 92 | 93 | self.loss = [] 94 | # training process 95 | for i in range(self.iterations): 96 | gradients, cost = self.backPropagate(train_data, train_label) 97 | # get the derivative 98 | dw = gradients["dw"] 99 | db = gradients["db"] 100 | 101 | # update parameter 102 | self.w = self.w - self.learning_rate * dw 103 | self.b = self.b - self.learning_rate * db 104 | self.loss.append(cost) 105 | 106 | return self 107 | 108 | ''' 109 | Function: predict 110 | Description: predict the testing set 111 | Input: train_data dataType: ndarray description: features 112 | prob dataType: bool description: return probaility of label 113 | Output: prediction dataType: ndarray description: the prediction results for testing set 114 | ''' 115 | 116 | def predict(self, test_data, prob="False"): 117 | # Normalization 118 | if self.norm_type == "Standardization": 119 | test_data = preProcess.Standardization(test_data) 120 | else: 121 | test_data = preProcess.Normalization(test_data) 122 | 123 | test_num = test_data.shape[0] 124 | prediction = np.zeros([test_num, 1]) 125 | probability = np.zeros([test_num, 1]) 126 | for i in range(test_num): 127 | probability[i] = self.sigmoid(np.dot(self.w.T, test_data[i, :]) + self.b) # prediction = self.sigmoid(np.dot(self.w.T, test_data) + self.b) can speed up 128 | if probability[i] > 0: 129 | prediction[i] = 1 130 | else: 131 | prediction[i] = -1 132 | 133 | self.prediction = prediction 134 | self.probability = probability 135 | if prob: 136 | return probability 137 | else: 138 | return prediction 139 | 140 | 141 | ''' 142 | Function: accuracy 143 | Description: show detection result 144 | Input: test_label dataType: ndarray description: labels of test data 145 | Output: accuracy dataType: float description: detection accuarcy 146 | ''' 147 | def accuarcy(self, test_label): 148 | test_label = np.expand_dims(test_label, axis=1) 149 | prediction = self.prediction 150 | accuarcy = sum(prediction == test_label)/len(test_label) 151 | return accuarcy 152 | 153 | ''' 154 | Function: save 155 | Description: save the model as pkl 156 | Input: filename dataType: str description: the path to save model 157 | ''' 158 | def save(self, filename): 159 | f = open(filename, 'w') 160 | model = {'w': self.w, 'b': self.b} 161 | pickle.dump(model, f) 162 | f.close() 163 | 164 | ''' 165 | Function: load 166 | Description: load the model 167 | Input: filename dataType: str description: the path to save model 168 | Output: self dataType: obj description: the trained model 169 | ''' 170 | def load(self, filename): 171 | f = open(filename) 172 | model = pickle.load(f) 173 | self.w = model['w'] 174 | self.b = model['b'] 175 | return self 176 | -------------------------------------------------------------------------------- /AdaBoost.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Filename: AdaptiveBoost.py 3 | @Author: Ryuk 4 | @Create Date: 2019-05-03 5 | @Update Date: 2019-05-24 6 | @Description: Implement of Adaptive Boosting 7 | """ 8 | 9 | import numpy as np 10 | import preProcess 11 | import pickle 12 | import random 13 | import SVM, KNN, DecisionTree,Logistic, Perceptron 14 | import math 15 | 16 | class Adaboost: 17 | def __init__(self, norm_type="Normalization", iterations=5, base_classifier="SVM"): 18 | self.iterations = iterations 19 | self.norm_type = norm_type 20 | self.prediction = None 21 | self.probability = None 22 | self.classifier_set = None 23 | 24 | if base_classifier == "SVM": 25 | self.base_classifier = SVM.SVMClassifier() 26 | elif base_classifier == "KNN": 27 | self.base_classifier = KNN.KNNClassifier() 28 | elif base_classifier == "DecisionTree": 29 | self.base_classifier = DecisionTree.DecisionTreeClassifier() 30 | elif base_classifier == "Logistic": 31 | self.base_classifier = Logistic.LogisticRegressionClassifier() 32 | elif base_classifier == "Perceptron": 33 | self.base_classifier = Perceptron.PerceptronClassifier() 34 | 35 | ''' 36 | Function: baseClassifier 37 | Description: generate weak classifier 38 | Input: train_data dataType: ndarray description: train_data 39 | train_label dataType: ndarray description: train_label 40 | w dataType: ndarray description: weight 41 | Output: clf dataType: object description: weak classifier 42 | weighted_error dataType: float description: weighted error 43 | base_predictions dataType: object description: base predictions 44 | 45 | ''' 46 | def baseClassifier(self, train_data, train_label, w): 47 | sample_num = len(train_data) 48 | error_index = np.ones([sample_num, 1]) 49 | clf = self.base_classifier 50 | clf.train(train_data, train_label) 51 | base_predictions = np.sign(clf.predict(train_data)) 52 | 53 | for i in range(sample_num): 54 | if base_predictions[i] == train_label[i]: 55 | error_index[i] = 0 56 | weighted_error = np.dot(w.T, error_index) 57 | return clf, weighted_error, base_predictions 58 | 59 | ''' 60 | Function: updataAlpha 61 | Description: updata alpha 62 | Input: error dataType: float description: weighted error 63 | Output: new_alpha dataType: float description: new alpha 64 | ''' 65 | def updateAlpha(self, error): 66 | temp = (1.0 - error)/max(error, 10e-6) 67 | new_alpha = 1/2 * math.log(temp, math.e) 68 | return new_alpha 69 | 70 | ''' 71 | Function: train 72 | Description: train the model 73 | Input: train_data dataType: ndarray description: features 74 | train_label dataType: ndarray description: labels 75 | Output: clf_set dataType: list description: classifiers set 76 | ''' 77 | def train(self, train_data, train_label): 78 | if self.norm_type == "Standardization": 79 | train_data = preProcess.Standardization(train_data) 80 | else: 81 | train_data = preProcess.Normalization(train_data) 82 | 83 | train_label = np.expand_dims(train_label, axis=1) 84 | sample_num = len(train_data) 85 | 86 | weak_classifier = [] 87 | 88 | # initialize weights 89 | w = np.ones([sample_num, 1]) 90 | w = w/sample_num 91 | 92 | # predictions 93 | agg_predicts = np.zeros([sample_num, 1]) # aggregate value of prediction 94 | 95 | # start train 96 | for i in range(self.iterations): 97 | base_clf, error, base_prediction = self.baseClassifier(train_data, train_label, w) 98 | alpha = self.updateAlpha(error) 99 | weak_classifier.append((alpha, base_clf)) 100 | 101 | # update parameters in page of 139 Eq.(8.4) 102 | expon = np.multiply(-1 * alpha * train_label, base_prediction) 103 | w = np.multiply(w, np.exp(expon)) 104 | w = w/w.sum() 105 | 106 | # calculate the total error rate 107 | agg_predicts += alpha*base_prediction 108 | error_rate = np.multiply(np.sign(agg_predicts) != train_label, np.ones([sample_num, 1])) 109 | error_rate = error_rate.sum()/sample_num 110 | 111 | if error_rate == 0: 112 | break 113 | self.classifier_set = weak_classifier 114 | return weak_classifier 115 | 116 | 117 | ''' 118 | Function: predict 119 | Description: predict the testing set 120 | Input: train_data dataType: ndarray description: features 121 | prob dataType: bool description: return probaility of label 122 | Output: prediction dataType: ndarray description: the prediction results for testing set 123 | ''' 124 | 125 | def predict(self, test_data, prob="False"): 126 | # Normalization 127 | if self.norm_type == "Standardization": 128 | test_data = preProcess.Standardization(test_data) 129 | else: 130 | test_data = preProcess.Normalization(test_data) 131 | 132 | test_num = test_data.shape[0] 133 | prediction = np.zeros([test_num, 1]) 134 | probability = np.zeros([test_num, 1]) 135 | 136 | for classifier in self.classifier_set: 137 | alpha = classifier[0] 138 | clf = classifier[1] 139 | base_prediction = alpha * clf.predict(test_data) 140 | probability += base_prediction 141 | 142 | self.prediction = np.sign(probability) 143 | self.probability = probability 144 | if prob: 145 | return probability 146 | else: 147 | return prediction 148 | 149 | 150 | ''' 151 | Function: accuracy 152 | Description: show detection result 153 | Input: test_label dataType: ndarray description: labels of test data 154 | Output: accuracy dataType: float description: detection accuarcy 155 | ''' 156 | def accuarcy(self, test_label): 157 | test_label = np.expand_dims(test_label, axis=1) 158 | prediction = self.prediction 159 | accuarcy = sum(prediction == test_label)/len(test_label) 160 | return accuarcy 161 | 162 | 163 | ''' 164 | Function: save 165 | Description: save the model as pkl 166 | Input: filename dataType: str description: the path to save model 167 | ''' 168 | def save(self, filename): 169 | f = open(filename, 'w') 170 | pickle.dump(self.classifier_set, f) 171 | f.close() 172 | 173 | ''' 174 | Function: load 175 | Description: load the model 176 | Input: filename dataType: str description: the path to save model 177 | Output: self dataType: obj description: the trained model 178 | ''' 179 | def load(self, filename): 180 | f = open(filename) 181 | self.classifier_set = pickle.load(f) 182 | return self 183 | 184 | -------------------------------------------------------------------------------- /RandomForest.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: RandomForest.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-07-09 5 | @ Update Date: 2019-07-09 6 | @ Description: Implement RandomForest 7 | """ 8 | import numpy as np 9 | import operator as op 10 | import pickle 11 | from DecisionTree import DecisionTreeClassifier 12 | from TreeRegression import RegressionTree 13 | 14 | class RandomForestClassifier: 15 | def __init__(self, tree_num=10, alpha=1e-5): 16 | self.tree_num = tree_num 17 | self.alpha=alpha 18 | self.trees = [] 19 | self.prediction = None 20 | self.probability = None 21 | 22 | ''' 23 | Function: boostrap 24 | Description: boostrap sampling and train a model 25 | Input: train_data dataType: ndarray description: features 26 | train_label dataType: ndarray description: labels 27 | self dataType: obj description: the trained model 28 | ''' 29 | def boostrap(self, train_data, train_label): 30 | index = np.random.randint(0, len(train_data), (len(train_data))) 31 | x = train_data[index] 32 | y = train_label[index] 33 | clf = DecisionTreeClassifier(t=self.alpha) 34 | clf.train(x, y) 35 | return clf 36 | 37 | ''' 38 | Function: train 39 | Description: train the model 40 | Input: train_data dataType: ndarray description: features 41 | train_label dataType: ndarray description: labels 42 | Output: self dataType: obj description: the trained model 43 | ''' 44 | def train(self, train_data, train_label): 45 | for i in range(self.tree_num): 46 | clf = self.boostrap(train_data, train_label) 47 | self.trees.append(clf) 48 | return self 49 | 50 | ''' 51 | Function: vote 52 | Description: return the label of the majority 53 | Input: labels dataType: ndarray description: labels 54 | Output: pred dataType: int description: prediction label of input vector 55 | ''' 56 | def vote(self, labels): 57 | label_count = {} 58 | # get the counts of each label 59 | for c in labels: 60 | label_count[c] = label_count.get(c, 0) + 1 61 | # get the labels of the majority 62 | predition = sorted(label_count.items(), key=op.itemgetter(1), reverse=True) 63 | pred = predition[0][0] 64 | return pred 65 | 66 | ''' 67 | Function: predict 68 | Description: predict the testing set 69 | Input: test_data dataType: ndarray description: features 70 | Output: prediction dataType: ndarray description: the prediction results for testing set 71 | ''' 72 | def predict(self, test_data): 73 | labels = np.zeros([len(test_data), self.tree_num]) 74 | for i in range(self.tree_num): 75 | clf = self.trees[i] 76 | labels[:, i] = clf.predict(test_data).reshape(len(test_data)) 77 | 78 | prediction = np.zeros([len(test_data)]) 79 | for j in range(len(labels)): 80 | prediction[j] = self.vote(labels[j,:]) 81 | 82 | self.prediction = prediction 83 | return prediction 84 | 85 | ''' 86 | Function: showDetectionResult 87 | Description: show detection result 88 | Input: test_data dataType: ndarray description: data for test 89 | test_label dataType: ndarray description: labels of test data 90 | Output: accuracy dataType: float description: detection accuarcy 91 | ''' 92 | def accuarcy(self, test_label): 93 | prediction = self.prediction 94 | accuarcy = sum(prediction == test_label)/len(test_label) 95 | return accuarcy 96 | 97 | ''' 98 | Function: save 99 | Description: save the model as pkl 100 | Input: filename dataType: str description: the path to save model 101 | ''' 102 | def save(self, filename): 103 | f = open(filename, 'w') 104 | model = self.trees 105 | pickle.dump(model, f) 106 | f.close() 107 | 108 | ''' 109 | Function: load 110 | Description: load the model 111 | Input: filename dataType: str description: the path to save model 112 | Output: self dataType: obj description: the trained model 113 | ''' 114 | def load(self, filename): 115 | f = open(filename) 116 | self.trees = pickle.load(f) 117 | return self 118 | 119 | 120 | class RandomForestRegression: 121 | def __init__(self, tree_num=10, error_threshold=1, N=4, alpha=0.01): 122 | self.sample_num = 0 123 | self.tree_num = tree_num 124 | self.trees = [] 125 | self.error_threshold = error_threshold # the threshold of error 126 | self.N = N # the least number of sample for split 127 | self.alpha = alpha 128 | self.tree_node = None 129 | self.prediction = None 130 | 131 | ''' 132 | Function: boostrap 133 | Description: boostrap sampling and train a model 134 | Input: train_data dataType: ndarray description: features 135 | train_label dataType: ndarray description: labels 136 | self dataType: obj description: the trained model 137 | ''' 138 | def boostrap(self, train_data, train_label): 139 | index = np.random.randint(0, self.sample_num, (self.sample_num)) 140 | x = train_data[index] 141 | y = train_label[index] 142 | clf = RegressionTree(error_threshold=1, N=4, alpha=0.01) 143 | clf.train(x, y) 144 | return clf 145 | 146 | ''' 147 | Function: train 148 | Description: train the model 149 | Input: train_data dataType: ndarray description: features 150 | train_label dataType: ndarray description: labels 151 | Output: self dataType: obj description: the trained model 152 | ''' 153 | def train(self, train_data, train_label): 154 | for i in range(self.tree_num): 155 | clf = self.boostrap(train_data, train_label) 156 | self.trees.append(clf) 157 | return self 158 | 159 | ''' 160 | Function: vote 161 | Description: return the label of the majority 162 | Input: labels dataType: ndarray description: labels 163 | Output: pred dataType: int description: prediction label of input vector 164 | ''' 165 | def vote(self, labels): 166 | label_count = {} 167 | # get the counts of each label 168 | for c in labels: 169 | label_count[c] = label_count.get(c, 0) + 1 170 | # get the labels of the majority 171 | predition = sorted(label_count.items(), key=op.itemgetter(1), reverse=True) 172 | pred = predition[0][0] 173 | return pred 174 | 175 | ''' 176 | Function: predict 177 | Description: predict the testing set 178 | Input: test_data dataType: ndarray description: features 179 | Output: prediction dataType: ndarray description: the prediction results for testing set 180 | ''' 181 | def predict(self, test_data): 182 | labels = np.zeros([len(test_data), self.tree_num]) 183 | for i in range(self.tree_num): 184 | labels[:,i] = self.trees[i].predict(test_data) 185 | 186 | prediction = np.mean(labels, axis=0) 187 | 188 | self.prediction = prediction 189 | return prediction 190 | 191 | ''' 192 | Function: showDetectionResult 193 | Description: show detection result 194 | Input: test_data dataType: ndarray description: data for test 195 | test_label dataType: ndarray description: labels of test data 196 | Output: accuracy dataType: float description: detection accuarcy 197 | ''' 198 | def accuarcy(self, test_label): 199 | test_label = np.expand_dims(test_label, axis=1) 200 | prediction = self.prediction 201 | accuarcy = sum(prediction == test_label)/len(test_label) 202 | return accuarcy 203 | 204 | ''' 205 | Function: save 206 | Description: save the model as pkl 207 | Input: filename dataType: str description: the path to save model 208 | ''' 209 | def save(self, filename): 210 | f = open(filename, 'w') 211 | model = self.trees 212 | pickle.dump(model, f) 213 | f.close() 214 | 215 | ''' 216 | Function: load 217 | Description: load the model 218 | Input: filename dataType: str description: the path to save model 219 | Output: self dataType: obj description: the trained model 220 | ''' 221 | def load(self, filename): 222 | f = open(filename) 223 | self.trees = pickle.load(f) 224 | return self 225 | -------------------------------------------------------------------------------- /LinearRegression.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: Regression.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-05 5 | @ Update Date: 2019-05-06 6 | @ Description: Implement linear regression 7 | """ 8 | import numpy as np 9 | import preProcess 10 | import pickle 11 | import random 12 | import matplotlib.pyplot as plt 13 | 14 | class Regression: 15 | def __init__(self, norm_type="Normalization",regression_type="Standard", k=1.0, lamda=0.2, learning_rate=0.01, iterations=100): 16 | self.norm_type = norm_type 17 | self.regression_type = regression_type 18 | self.k = k # parameter for local weight linear regression 19 | self.lamda = lamda # parameter for ridge regression 20 | self.learning_rate = learning_rate # parameter for forward step regression 21 | self.iterations = iterations # parameter for forward step regression 22 | self.w = None 23 | self.parameters = None 24 | self.prediction = None 25 | self.probability = None 26 | 27 | ''' 28 | Function: standardLinearRegression 29 | Description: standard Linear Regression, w =(X.T*X)-1*X.T*y 30 | Input: x dataType: ndarray description: x 31 | y dataType: ndarray description: y 32 | Output: w dataType: ndarray description: weights 33 | ''' 34 | def standardLinearRegression(self, x, y): 35 | if self.norm_type == "Standardization": 36 | x = preProcess.Standardization(x) 37 | else: 38 | x = preProcess.Normalization(x) 39 | 40 | xTx = np.dot(x.T, x) 41 | if np.linalg.det(xTx) == 0: # calculate the Determinant of xTx 42 | print("Error: Singluar Matrix !") 43 | return 44 | w = np.dot(np.linalg.inv(xTx), np.dot(x.T, y)) 45 | return w 46 | 47 | ''' 48 | Function: LWLinearRegression 49 | Description: locally weighted linear regression, w = (X.T*W*X)-1*X.T*W*y 50 | Input: x dataType: ndarray description: x 51 | y dataType: ndarray description: y 52 | Output: w dataType: ndarray description: weights 53 | ''' 54 | def LWLinearRegression(self, x, y, sample): 55 | if self.norm_type == "Standardization": 56 | x = preProcess.Standardization(x) 57 | else: 58 | x = preProcess.Normalization(x) 59 | 60 | sample_num = len(x) 61 | weights = np.eye(sample_num) 62 | for i in range(sample_num): 63 | diff = sample - x[i, :] 64 | weights[i, i] = np.exp(np.dot(diff, diff.T)/(-2 * self.k ** 2)) 65 | xTx = np.dot(x.T, np.dot(weights, x)) 66 | if np.linalg.det(xTx) == 0: 67 | print("Error: Singluar Matrix !") 68 | return 69 | result = np.dot(np.linalg.inv(xTx), np.dot(x.T, np.dot(weights, y))) 70 | return result 71 | 72 | ''' 73 | Function: ridgeRegression 74 | Description: ridge linear regression, w = (X.T*X+ LAMDA I)-1*X.T*y 75 | Input: x dataType: ndarray description: x 76 | y dataType: ndarray description: y 77 | Output: w dataType: ndarray description: weights 78 | ''' 79 | def ridgeRegression(self, x, y): 80 | if self.norm_type == "Standardization": 81 | x = preProcess.Standardization(x) 82 | else: 83 | x = preProcess.Normalization(x) 84 | 85 | feature_dim = len(x[0]) 86 | xTx = np.dot(x.T, x) 87 | matrix = xTx + np.exp(feature_dim)*self.lamda 88 | if np.linalg.det(xTx) == 0: 89 | print("Error: Singluar Matrix !") 90 | return 91 | w = np.dot(np.linalg.inv(matrix), np.dot(x.T, y)) 92 | return w 93 | 94 | ''' 95 | Function: lasso Regression 96 | Description: lasso linear regression, 97 | Input: x dataType: ndarray description: x 98 | y dataType: ndarray description: y 99 | Output: w dataType: ndarray description: weights 100 | ''' 101 | def lassoRegression(self, x, y): 102 | if self.norm_type == "Standardization": 103 | x = preProcess.Standardization(x) 104 | else: 105 | x = preProcess.Normalization(x) 106 | 107 | sample_num, feataure_dim = np.shape(x) 108 | w = np.zeros([feataure_dim, 1]) 109 | for i in range(self.iterations): 110 | last_w = w 111 | w[i] = np.dot(x[i, :], (y[i] - x[i, :] * last_w.T))/np.dot(x[i, :], x[i, :].T) 112 | return w 113 | 114 | 115 | ''' 116 | Function: forwardstep Regression 117 | Description: forward step linear regression, 118 | Input: x dataType: ndarray description: x 119 | y dataType: ndarray description: y 120 | Output: w dataType: ndarray description: weights 121 | ''' 122 | def forwardstepRegression(self, x, y): 123 | if self.norm_type == "Standardization": 124 | x = preProcess.Standardization(x) 125 | else: 126 | x = preProcess.Normalization(x) 127 | 128 | sample_num, feature_dim = np.shape(x) 129 | w = np.zeros([self.iterations, feature_dim]) 130 | best_w = np.zeros([feature_dim, 1]) 131 | for i in range(self.iterations): 132 | min_error = np.inf 133 | for j in range(feature_dim): 134 | for sign in [-1, 1]: 135 | temp_w = best_w 136 | temp_w[j] += sign * self.learning_rate 137 | y_hat = np.dot(x, temp_w) 138 | error = ((y - y_hat) ** 2).sum() # MSE 139 | if error < min_error: # save the best parameters 140 | min_error = error 141 | best_w = temp_w 142 | w[i, :] = best_w.T 143 | return w 144 | 145 | ''' 146 | Function: train 147 | Description: train the model 148 | Input: train_data dataType: ndarray description: features 149 | train_label dataType: ndarray description: labels 150 | Output: self dataType: obj description: the trained model 151 | ''' 152 | 153 | def train(self, train_data, train_label): 154 | if self.norm_type == "Standardization": 155 | train_data = preProcess.Standardization(train_data) 156 | else: 157 | train_data = preProcess.Normalization(train_data) 158 | 159 | if self.regression_type == "Standard": 160 | self.w = self.standardLinearRegression(train_data, train_label) 161 | elif self.regression_type == "Localweight": 162 | self.w = self.LWLinearRegression(train_data, train_label) 163 | elif self.regression_type == "Ridge": 164 | self.w = self.ridgeRegression(train_data, train_label) 165 | elif self.regression_type == "Lasso": 166 | self.w = self.lassoRegression(train_data, train_label) 167 | elif self.regression_type == "Forwardstep": 168 | self.w = self.forwardstepRegression(train_data, train_label) 169 | else: 170 | print("Error Regression Type!") 171 | return self 172 | 173 | ''' 174 | Function: predict 175 | Description: predict the testing set 176 | Input: test_data dataType: ndarray description: features 177 | prob dataType: bool description: return probaility of label 178 | Output: prediction dataType: ndarray description: the prediction results for testing set 179 | ''' 180 | def predict(self, x, prob="False"): 181 | # Normalization 182 | if self.norm_type == "Standardization": 183 | x = preProcess.Standardization(x) 184 | else: 185 | x = preProcess.Normalization(x) 186 | 187 | y = np.dot(x, self.w) 188 | self.prediction = y 189 | return y 190 | 191 | ''' 192 | Function: plot 193 | Description: show regression result 194 | Input: test_label dataType: ndarray description: labels of test data 195 | Output: accuracy dataType: float description: detection accuarcy 196 | ''' 197 | def plot(self, test_label): 198 | # test_label = np.expand_dims(test_label, axis=1) 199 | prediction = self.prediction 200 | plot1 = plt.plot(test_label, 'r*', label='Regression values') 201 | plot2 = plt.plot(prediction, 'b', label='Real values') 202 | plt.xlabel('X ') 203 | plt.ylabel('Y') 204 | plt.legend(loc=3) 205 | plt.title('Regression') 206 | plt.show() 207 | 208 | ''' 209 | Function: save 210 | Description: save the model as pkl 211 | Input: filename dataType: str description: the path to save model 212 | ''' 213 | 214 | def save(self, filename): 215 | f = open(filename, 'w') 216 | pickle.dump(self.w, f) 217 | f.close() 218 | 219 | ''' 220 | Function: load 221 | Description: load the model 222 | Input: filename dataType: str description: the path to save model 223 | Output: self dataType: obj description: the trained model 224 | ''' 225 | 226 | def load(self, filename): 227 | f = open(filename) 228 | self.w = pickle.load(f) 229 | return self 230 | 231 | -------------------------------------------------------------------------------- /TreeRegression.py: -------------------------------------------------------------------------------- 1 | """ 2 | @ Filename: TreeRegression.py 3 | @ Author: Ryuk 4 | @ Create Date: 2019-05-11 5 | @ Update Date: 2019-05-13 6 | @ Description: Implement TreeRegression 7 | """ 8 | 9 | import numpy as np 10 | import operator as op 11 | import preProcess 12 | import math 13 | import pickle 14 | 15 | class treeNode(): 16 | def __init__(self, index=-1, value=None, result=None, right_tree=None, left_tree=None): 17 | self.index = index 18 | self.value = value 19 | self.result = result 20 | self.right_tree = right_tree 21 | self.left_tree = left_tree 22 | 23 | 24 | class treeRegression: 25 | def __init__(self, norm_type="Normalization",iterations=100, error_threshold=1, N=4): 26 | self.norm_type = norm_type 27 | self.iterations = iterations 28 | self.error_threshold = error_threshold # the threshold of error 29 | self.N = N # the least number of sample for split 30 | self.tree_node = None 31 | self.prediction = None 32 | self.probability = None 33 | 34 | ''' 35 | Function: divideData 36 | Description: divide data into two parts 37 | Input: data dataType: ndarray description: feature and labels 38 | index dataType: int description: the column of feature 39 | value dataType: float description: the value of feature 40 | Output: left_set dataType: ndarray description: feature <= value 41 | right_set dataType: ndarray description: feature > value 42 | ''' 43 | def divideData(self, data, index, value): 44 | left_set = [] 45 | right_set = [] 46 | # select feature in index with value 47 | for temp in data: 48 | if temp[index] >= value: 49 | # delete this feature 50 | right_set.append(temp) 51 | else: 52 | left_set.append(temp) 53 | return np.array(left_set), np.array(right_set) 54 | 55 | ''' 56 | Function: getVariance 57 | Description: get the variance of the regression value, in page of 68 Eq.(5.19) 58 | Input: data dataType: ndarray description: feature and value, the last column is value 59 | Output: variance dataType: ndarray description: variance 60 | ''' 61 | def getVariance(self, data): 62 | variance = np.var(data) 63 | return variance*len(data) 64 | 65 | ''' 66 | Function: getMean 67 | Description: get the mean of the regression value,in page of 68 Eq.(5.17) 68 | Input: data dataType: ndarray description: feature and value, the last column is value 69 | Output: mean dataType: ndarray description: mean 70 | ''' 71 | def getMean(self, data): 72 | mean = np.mean(data) 73 | return mean 74 | 75 | ''' 76 | Function: createRegressionTree 77 | Description: create regression tree 78 | Input: data dataType: ndarray description: training set 79 | Output: w dataType: ndarray description: weights 80 | ''' 81 | def createRegressionTree(self, data): 82 | # if there is no feature 83 | if len(data) == 0: 84 | self.tree_node = treeNode(result=self.getMean(data[:, -1])) 85 | return self.tree_node 86 | 87 | sample_num, feature_dim = np.shape(data) 88 | 89 | best_criteria = None 90 | best_error = np.inf 91 | best_set = None 92 | initial_error = self.getVariance(data) 93 | 94 | # get the best split feature and value 95 | for index in range(feature_dim - 1): 96 | uniques = np.unique(data[:, index]) 97 | for value in uniques: 98 | left_set, right_set = self.divideData(data, index, value) 99 | if len(left_set) < self.N or len(right_set) < self.N: 100 | continue 101 | new_error = self.getVariance(left_set) + self.getVariance(right_set) 102 | if new_error < best_error: 103 | best_criteria = (index, value) 104 | best_error = new_error 105 | best_set = (left_set, right_set) 106 | 107 | if best_set is None: 108 | self.tree_node = treeNode(result=self.getMean(data[:, -1])) 109 | return self.tree_node 110 | # if the descent of error is small enough, return the mean of the data 111 | elif abs(initial_error - best_error) < self.error_threshold: 112 | self.tree_node = treeNode(result=self.getMean(data[:, -1])) 113 | return self.tree_node 114 | # if the split data is small enough, return the mean of the data 115 | elif len(best_set[0]) < self.N or len(best_set[1]) < self.N: 116 | self.tree_node = treeNode(result=self.getMean(data[:, -1])) 117 | return self.tree_node 118 | else: 119 | ltree = self.createRegressionTree(best_set[0]) 120 | rtree = self.createRegressionTree(best_set[1]) 121 | self.tree_node = treeNode(index=best_criteria[0], value=best_criteria[1], left_tree=ltree, right_tree=rtree) 122 | return self.tree_node 123 | 124 | ''' 125 | Function: train 126 | Description: train the model 127 | Input: train_data dataType: ndarray description: features 128 | train_label dataType: ndarray description: labels 129 | Output: self dataType: obj description: the trained model 130 | ''' 131 | def train(self, train_data, train_label, pruning=False, val_data=None, val_label=None): 132 | # if self.norm_type == "Standardization": 133 | # train_data = preProcess.Standardization(train_data) 134 | # else: 135 | # train_data = preProcess.Normalization(train_data) 136 | 137 | train_label = np.expand_dims(train_label, axis=1) 138 | data = np.hstack([train_data, train_label]) 139 | 140 | self.tree_node = self.createRegressionTree(data) 141 | #self.printTree(self.tree_node) 142 | return self 143 | 144 | ''' 145 | Function: printTree 146 | Description: show the structure of the decision tree 147 | Input: tree dataType: DecisionNode description: decision tree 148 | ''' 149 | def printTree(self, tree): 150 | # leaf node 151 | if tree.result != None: 152 | print(str(tree.result)) 153 | else: 154 | # print condition 155 | print(str(tree.index) + ":" + str(tree.value)) 156 | # print subtree 157 | print("R->", self.printTree(tree.right_tree)) 158 | print("L->", self.printTree(tree.left_tree)) 159 | 160 | ''' 161 | Function: predict 162 | Description: predict the testing set 163 | Input: train_data dataType: ndarray description: features 164 | prob dataType: bool description: return probaility of label 165 | Output: prediction dataType: ndarray description: the prediction results for testing set 166 | ''' 167 | def predict(self, test_data, prob="False"): 168 | # Normalization 169 | # if self.norm_type == "Standardization": 170 | # test_data = preProcess.Standardization(test_data) 171 | # else: 172 | # test_data = preProcess.Normalization(test_data) 173 | 174 | test_num = test_data.shape[0] 175 | prediction = np.zeros([test_num, 1]) 176 | probability = np.zeros([test_num, 1]) 177 | for i in range(test_num): 178 | prediction[i] = self.classify(test_data[i, :], self.tree_node) 179 | # probability[i] = result[0][1]/(result[0][1] + result[1][1]) 180 | self.prediction = prediction 181 | self.probability = probability 182 | 183 | return prediction 184 | 185 | ''' 186 | Function: classify 187 | Description: predict the testing set 188 | Input: sample dataType: ndarray description: input vector to be classified 189 | Output: label dataType: ndarray description: the prediction results of input 190 | ''' 191 | def classify(self, sample, tree): 192 | if tree.result is not None: 193 | return tree.result 194 | else: 195 | value = sample[tree.index] 196 | if value >= tree.value: 197 | branch = tree.right_tree 198 | else: 199 | branch = tree.left_tree 200 | return self.classify(sample, branch) 201 | 202 | ''' 203 | Function: pruning 204 | Description: pruning the regression tree 205 | Input: test_data dataType: ndarray description: features 206 | test_label dataType: ndarray description: labels 207 | Output: self dataType: obj description: the trained model 208 | ''' 209 | def pruning(self, tree, data, alpha): 210 | 211 | return 0 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | ''' 220 | Function: save 221 | Description: save the model as pkl 222 | Input: filename dataType: str description: the path to save model 223 | ''' 224 | 225 | def save(self, filename): 226 | f = open(filename, 'w') 227 | pickle.dump(self.tree_node, f) 228 | f.close() 229 | 230 | ''' 231 | Function: load 232 | Description: load the model 233 | Input: filename dataType: str description: the path to save model 234 | Output: self dataType: obj description: the trained model 235 | ''' 236 | 237 | def load(self, filename): 238 | f = open(filename) 239 | self.tree_node = pickle.load(f) 240 | return self 241 | 242 | 243 | -------------------------------------------------------------------------------- /DecisionTree.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Filename: DecisionTree.py 3 | @Author: Ryuk 4 | @Create Date: 2019-04-22 5 | @Update Date: 2019-05-03 6 | @Description: Implement of decision tree 7 | """ 8 | 9 | import numpy as np 10 | import operator as op 11 | import preProcess 12 | import math 13 | import pickle 14 | 15 | 16 | class DecisionNode: 17 | def __init__(self, index=-1, value=None, results=None, right_tree=None, left_tree=None): 18 | self.index = index # the index of feature 19 | self.value = value # the value of the feature with index 20 | self.results = results # current decision result 21 | self.right_tree = right_tree 22 | self.left_tree = left_tree 23 | 24 | 25 | class DecisionTreeClassifier: 26 | def __init__(self, norm_type="Normalization", t=1e-5): 27 | self.norm_type = norm_type 28 | self.t = t # the threshold of information gain 29 | self.prediction = None 30 | self.probability = None 31 | self.tree_node = None 32 | 33 | ''' 34 | Function: uniqueCount 35 | Description: calculate the count of unique labels 36 | Input: labels dataType: ndarray description: labels of data 37 | Output: label_count dataType: dictionary description: [label, count] 38 | ''' 39 | def uniqueCount(self, labels): 40 | label_count = {} 41 | for i in range(len(labels)): 42 | label_count[labels[i]] = label_count.get(labels[i], 0) + 1 43 | return label_count 44 | 45 | ''' 46 | Function: getEntropy 47 | Description: calcuate the Shannon entropy of the input data 48 | Input: labels dataType: ndarray description: labels of data 49 | Output: entropy dataType: description: 50 | ''' 51 | def getEntropy(self, labels): 52 | labels_num = len(labels) 53 | label_count = self.uniqueCount(labels) 54 | 55 | entropy = 0.0 56 | for j in label_count: 57 | prop = label_count[j]/labels_num 58 | entropy = entropy + (-prop*math.log(prop, 2)) 59 | 60 | return entropy 61 | 62 | ''' 63 | Function: divideData 64 | Description: divide data into two parts 65 | Input: data dataType: ndarray description: feature and labels 66 | index dataType: int description: the column of feature 67 | value dataType: float description: the value of feature 68 | Output: left_set dataType: ndarray description: feature <= value 69 | right_set dataType: ndarray description: feature > value 70 | ''' 71 | def divideData(self, data, index, value): 72 | left_set = [] 73 | right_set = [] 74 | # select feature in index with value 75 | for temp in data: 76 | if temp[index] >= value: 77 | # delete this feature 78 | new_feature = np.delete(temp, index) 79 | right_set.append(new_feature) 80 | else: 81 | new_feature = np.delete(temp, index) 82 | left_set.append(new_feature) 83 | return np.array(left_set), np.array(right_set) 84 | 85 | ''' 86 | Function: createDecisionTree 87 | Description: create decision tree by ID3 88 | Input: data dataType: ndarray description: [feature,label] 89 | Output: bestFeature dataType: ndarray description: best feature 90 | ''' 91 | def createDecisionTree(self, data): 92 | # if there is no feature in data, stop division 93 | if len(data) == 0: 94 | self.tree_node = DecisionNode() 95 | return self.tree_node 96 | 97 | best_gain = 0.0 98 | best_criteria = None 99 | best_set = None 100 | 101 | feature_num = len(data[0]) - 1 102 | sample_num = len(data[:, -1]) 103 | init_entropy = self.getEntropy(data[:, -1]) 104 | 105 | # get the best division 106 | for i in range(feature_num): 107 | uniques = np.unique(data[:, i]) 108 | for value in uniques: 109 | left_set, right_set = self.divideData(data, i, value) 110 | # calcuate information gain 111 | ratio = float(len(left_set)/sample_num) 112 | if ratio == 0.0: 113 | info_gain = init_entropy - (1 - ratio) * self.getEntropy(right_set[:, -1]) 114 | elif ratio == 1.0: 115 | info_gain = init_entropy - ratio*self.getEntropy(left_set[:, -1]) 116 | else: 117 | info_gain = init_entropy - ratio * self.getEntropy(left_set[:, -1]) - (1 - ratio) * self.getEntropy(right_set[:, -1]) 118 | if info_gain > best_gain: 119 | best_gain = info_gain 120 | best_criteria = (i, value) 121 | best_set = (left_set, right_set) 122 | 123 | # create the decision tree 124 | if best_gain < self.t: 125 | self.tree_node = DecisionNode(results=self.uniqueCount(data[:, -1])) 126 | return self.tree_node 127 | else: 128 | ltree = self.createDecisionTree(best_set[0]) 129 | rtree = self.createDecisionTree(best_set[1]) 130 | self.tree_node = DecisionNode(index=best_criteria[0], value=best_criteria[1], left_tree=ltree, right_tree=rtree) 131 | return self.tree_node 132 | 133 | ''' 134 | Function: vote 135 | Description: return the label of the majority 136 | Input: labels dataType: ndarray description: labels 137 | Output: pred dataType: int description: prediction label of input vector 138 | ''' 139 | def vote(self, labels): 140 | labelCount = {} 141 | # get the counts of each label 142 | for c in labels: 143 | labelCount[c] = labelCount.get(c, 0) + 1 144 | # get the labels of the majority 145 | predition = sorted(labelCount.items(), key=op.itemgetter(1), reverse=True) 146 | pred = predition[0][0] 147 | return pred 148 | 149 | 150 | ''' 151 | Function: train 152 | Description: train the model 153 | Input: trainData dataType: ndarray description: features 154 | labels dataType: ndarray description: labels 155 | Output: self dataType: obj description: the trained model 156 | ''' 157 | def train(self,trainData, trainLabel): 158 | if self.norm_type == "Standardization": 159 | trainData = preProcess.Standardization(trainData) 160 | else: 161 | trainData = preProcess.Normalization(trainData) 162 | 163 | trainLabel = np.expand_dims(trainLabel, axis=1) 164 | data = np.hstack([trainData, trainLabel]) 165 | 166 | self.tree_node = self.createDecisionTree(data) 167 | #self.printTree(self.tree_node) 168 | return self 169 | 170 | ''' 171 | Function: save 172 | Description: save the model as pkl 173 | Input: filename dataType: str description: the path to save model 174 | ''' 175 | def save(self, filename): 176 | f = open(filename, 'w') 177 | pickle.dump(self.tree_node, f) 178 | f.close() 179 | 180 | ''' 181 | Function: load 182 | Description: load the model 183 | Input: filename dataType: str description: the path to save model 184 | Output: self dataType: obj description: the trained model 185 | ''' 186 | def load(self, filename): 187 | f = open(filename) 188 | self.tree_node = pickle.load(f) 189 | return self 190 | 191 | ''' 192 | Function: predict 193 | Description: predict the testing set 194 | Input: train_data dataType: ndarray description: features 195 | probability dataType: bool description: return probaility of label 196 | Output: prediction dataType: ndarray description: the prediction results for testing set 197 | ''' 198 | def predict(self, test_data, prob="False"): 199 | # Normalization 200 | if self.norm_type == "Standardization": 201 | test_data = preProcess.Standardization(test_data) 202 | else: 203 | test_data = preProcess.Normalization(test_data) 204 | 205 | test_num = test_data.shape[0] 206 | prediction = np.zeros([test_num, 1]) 207 | probability = np.zeros([test_num, 1]) 208 | for i in range(test_num): 209 | result = self.classify(test_data[i, :], self.tree_node) 210 | result = sorted(result.items(), key=op.itemgetter(1), reverse=True) 211 | prediction[i] = result[0][0] 212 | #probability[i] = result[0][1]/(result[0][1] + result[1][1]) 213 | self.prediction = prediction 214 | self.probability = probability 215 | if prob: 216 | return probability 217 | else: 218 | return prediction 219 | 220 | ''' 221 | Function: classify 222 | Description: predict the testing set 223 | Input: sample dataType: ndarray description: input vector to be classified 224 | Output: label dataType: ndarray description: the prediction results of input 225 | ''' 226 | def classify(self, sample, tree): 227 | if tree.results != None: 228 | return tree.results 229 | else: 230 | value = sample[tree.index] 231 | branch = None 232 | if value >= tree.value: 233 | branch = tree.right_tree 234 | else: 235 | branch = tree.left_tree 236 | return self.classify(sample, branch) 237 | 238 | ''' 239 | Function: printTree 240 | Description: show the structure of the decision tree 241 | Input: tree dataType: DecisionNode description: decision tree 242 | ''' 243 | def printTree(self, tree): 244 | # leaf node 245 | if tree.results != None: 246 | print(str(tree.results)) 247 | else: 248 | # print condition 249 | print(str(tree.index) + ":" + str(tree.value) + "? ") 250 | # print subtree 251 | print("R->", self.printTree(tree.right_tree)) 252 | print("L->", self.printTree(tree.left_tree)) 253 | 254 | ''' 255 | Function: showDetectionResult 256 | Description: show detection result 257 | Input: test_data dataType: ndarray description: data for test 258 | test_label dataType: ndarray description: labels of test data 259 | Output: accuracy dataType: float description: detection accuarcy 260 | ''' 261 | def accuarcy(self, test_label): 262 | test_label = np.expand_dims(test_label, axis=1) 263 | prediction = self.prediction 264 | accuarcy = sum(prediction == test_label)/len(test_label) 265 | return accuarcy 266 | 267 | 268 | 269 | 270 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dataset/dataset1/test.txt: -------------------------------------------------------------------------------- 1 | 11188 6.649568 0.544233 2 2 | 56796 3.966325 0.850410 1 3 | 8571 1.924045 1.664782 2 4 | 4914 6.004812 0.280369 2 5 | 10784 0.000000 0.375849 2 6 | 39296 9.923018 0.092192 3 7 | 13113 2.389084 0.119284 2 8 | 70204 13.663189 0.133251 1 9 | 46813 11.434976 0.321216 3 10 | 11697 0.358270 1.292858 2 11 | 44183 9.598873 0.223524 3 12 | 2225 6.375275 0.608040 2 13 | 29066 11.580532 0.458401 3 14 | 4245 5.319324 1.598070 2 15 | 34379 4.324031 1.603481 1 16 | 44441 2.358370 1.273204 1 17 | 2022 0.000000 1.182708 2 18 | 26866 12.824376 0.890411 3 19 | 57070 1.587247 1.456982 1 20 | 32932 8.510324 1.520683 3 21 | 51967 10.428884 1.187734 3 22 | 44432 8.346618 0.042318 3 23 | 67066 7.541444 0.809226 1 24 | 17262 2.540946 1.583286 2 25 | 79728 9.473047 0.692513 1 26 | 14259 0.352284 0.474080 2 27 | 6122 0.000000 0.589826 2 28 | 76879 12.405171 0.567201 1 29 | 11426 4.126775 0.871452 2 30 | 2493 0.034087 0.335848 2 31 | 19910 1.177634 0.075106 2 32 | 10939 0.000000 0.479996 2 33 | 17716 0.994909 0.611135 2 34 | 31390 11.053664 1.180117 3 35 | 20375 0.000000 1.679729 2 36 | 26309 2.495011 1.459589 1 37 | 33484 11.516831 0.001156 3 38 | 45944 9.213215 0.797743 3 39 | 4249 5.332865 0.109288 2 40 | 6089 0.000000 1.689771 2 41 | 7513 0.000000 1.126053 2 42 | 27862 12.640062 1.690903 3 43 | 39038 2.693142 1.317518 1 44 | 19218 3.328969 0.268271 2 45 | 62911 7.193166 1.117456 1 46 | 77758 6.615512 1.521012 1 47 | 27940 8.000567 0.835341 3 48 | 2194 4.017541 0.512104 2 49 | 37072 13.245859 0.927465 3 50 | 15585 5.970616 0.813624 2 51 | 25577 11.668719 0.886902 3 52 | 8777 4.283237 1.272728 2 53 | 29016 10.742963 0.971401 3 54 | 21910 12.326672 1.592608 3 55 | 12916 0.000000 0.344622 2 56 | 10976 0.000000 0.922846 2 57 | 79065 10.602095 0.573686 1 58 | 36759 10.861859 1.155054 3 59 | 50011 1.229094 1.638690 1 60 | 1155 0.410392 1.313401 2 61 | 71600 14.552711 0.616162 1 62 | 30817 14.178043 0.616313 3 63 | 54559 14.136260 0.362388 1 64 | 29764 0.093534 1.207194 1 65 | 69100 10.929021 0.403110 1 66 | 47324 11.432919 0.825959 3 67 | 73199 9.134527 0.586846 1 68 | 44461 5.071432 1.421420 1 69 | 45617 11.460254 1.541749 3 70 | 28221 11.620039 1.103553 3 71 | 7091 4.022079 0.207307 2 72 | 6110 3.057842 1.631262 2 73 | 79016 7.782169 0.404385 1 74 | 18289 7.981741 0.929789 3 75 | 43679 4.601363 0.268326 1 76 | 22075 2.595564 1.115375 1 77 | 23535 10.049077 0.391045 3 78 | 25301 3.265444 1.572970 2 79 | 32256 11.780282 1.511014 3 80 | 36951 3.075975 0.286284 1 81 | 31290 1.795307 0.194343 1 82 | 38953 11.106979 0.202415 3 83 | 35257 5.994413 0.800021 1 84 | 25847 9.706062 1.012182 3 85 | 32680 10.582992 0.836025 3 86 | 62018 7.038266 1.458979 1 87 | 9074 0.023771 0.015314 2 88 | 33004 12.823982 0.676371 3 89 | 44588 3.617770 0.493483 1 90 | 32565 8.346684 0.253317 3 91 | 38563 6.104317 0.099207 1 92 | 75668 16.207776 0.584973 1 93 | 9069 6.401969 1.691873 2 94 | 53395 2.298696 0.559757 1 95 | 28631 7.661515 0.055981 3 96 | 71036 6.353608 1.645301 1 97 | 71142 10.442780 0.335870 1 98 | 37653 3.834509 1.346121 1 99 | 76839 10.998587 0.584555 1 100 | 9916 2.695935 1.512111 2 101 | 38889 3.356646 0.324230 1 102 | 39075 14.677836 0.793183 3 103 | 48071 1.551934 0.130902 1 104 | 7275 2.464739 0.223502 2 105 | 41804 1.533216 1.007481 1 106 | 35665 12.473921 0.162910 3 107 | 67956 6.491596 0.032576 1 108 | 41892 10.506276 1.510747 3 109 | 38844 4.380388 0.748506 1 110 | 74197 13.670988 1.687944 1 111 | 14201 8.317599 0.390409 2 112 | 3908 0.000000 0.556245 2 113 | 2459 0.000000 0.290218 2 114 | 32027 10.095799 1.188148 3 115 | 12870 0.860695 1.482632 2 116 | 9880 1.557564 0.711278 2 117 | 72784 10.072779 0.756030 1 118 | 17521 0.000000 0.431468 2 119 | 50283 7.140817 0.883813 3 120 | 33536 11.384548 1.438307 3 121 | 9452 3.214568 1.083536 2 122 | 37457 11.720655 0.301636 3 123 | 17724 6.374475 1.475925 3 124 | 43869 5.749684 0.198875 3 125 | 264 3.871808 0.552602 2 126 | 25736 8.336309 0.636238 3 127 | 39584 9.710442 1.503735 3 128 | 31246 1.532611 1.433898 1 129 | 49567 9.785785 0.984614 3 130 | 7052 2.633627 1.097866 2 131 | 35493 9.238935 0.494701 3 132 | 10986 1.205656 1.398803 2 133 | 49508 3.124909 1.670121 1 134 | 5734 7.935489 1.585044 2 135 | 65479 12.746636 1.560352 1 136 | 77268 10.732563 0.545321 1 137 | 28490 3.977403 0.766103 1 138 | 13546 4.194426 0.450663 2 139 | 37166 9.610286 0.142912 3 140 | 16381 4.797555 1.260455 2 141 | 10848 1.615279 0.093002 2 142 | 35405 4.614771 1.027105 1 143 | 15917 0.000000 1.369726 2 144 | 6131 0.608457 0.512220 2 145 | 67432 6.558239 0.667579 1 146 | 30354 12.315116 0.197068 3 147 | 69696 7.014973 1.494616 1 148 | 33481 8.822304 1.194177 3 149 | 43075 10.086796 0.570455 3 150 | 38343 7.241614 1.661627 3 151 | 14318 4.602395 1.511768 2 152 | 5367 7.434921 0.079792 2 153 | 37894 10.467570 1.595418 3 154 | 36172 9.948127 0.003663 3 155 | 40123 2.478529 1.568987 1 156 | 10976 5.938545 0.878540 2 157 | 12705 0.000000 0.948004 2 158 | 12495 5.559181 1.357926 2 159 | 35681 9.776654 0.535966 3 160 | 46202 3.092056 0.490906 1 161 | 11505 0.000000 1.623311 2 162 | 22834 4.459495 0.538867 1 163 | 49901 8.334306 1.646600 3 164 | 71932 11.226654 0.384686 1 165 | 13279 3.904737 1.597294 2 166 | 49112 7.038205 1.211329 3 167 | 77129 9.836120 1.054340 1 168 | 37447 1.990976 0.378081 1 169 | 62397 9.005302 0.485385 1 170 | 0 1.772510 1.039873 2 171 | 15476 0.458674 0.819560 2 172 | 40625 10.003919 0.231658 3 173 | 36706 0.520807 1.476008 1 174 | 28580 10.678214 1.431837 3 175 | 25862 4.425992 1.363842 1 176 | 63488 12.035355 0.831222 1 177 | 33944 10.606732 1.253858 3 178 | 30099 1.568653 0.684264 1 179 | 13725 2.545434 0.024271 2 180 | 36768 10.264062 0.982593 3 181 | 64656 9.866276 0.685218 1 182 | 14927 0.142704 0.057455 2 183 | 43231 9.853270 1.521432 3 184 | 66087 6.596604 1.653574 1 185 | 19806 2.602287 1.321481 2 186 | 41081 10.411776 0.664168 3 187 | 10277 7.083449 0.622589 2 188 | 7014 2.080068 1.254441 2 189 | 17275 0.522844 1.622458 2 190 | 31600 10.362000 1.544827 3 191 | 59956 3.412967 1.035410 1 192 | 42181 6.796548 1.112153 3 193 | 51743 4.092035 0.075804 1 194 | 5194 2.763811 1.564325 2 195 | 30832 12.547439 1.402443 3 196 | 7976 5.708052 1.596152 2 197 | 14602 4.558025 0.375806 2 198 | 41571 11.642307 0.438553 3 199 | 55028 3.222443 0.121399 1 200 | 5837 4.736156 0.029871 2 201 | 39808 10.839526 0.836323 3 202 | 20944 4.194791 0.235483 2 203 | 22146 14.936259 0.888582 3 204 | 42169 3.310699 1.521855 1 205 | 7010 2.971931 0.034321 2 206 | 3807 9.261667 0.537807 2 207 | 29241 7.791833 1.111416 3 208 | 52696 1.480470 1.028750 1 209 | 42545 3.677287 0.244167 1 210 | 24437 2.202967 1.370399 1 211 | 16037 5.796735 0.935893 2 212 | 8493 3.063333 0.144089 2 213 | 68080 11.233094 0.492487 1 214 | 59016 1.965570 0.005697 1 215 | 11810 8.616719 0.137419 2 216 | 68630 6.609989 1.083505 1 217 | 7629 1.712639 1.086297 2 218 | 71992 10.117445 1.299319 1 219 | 13398 0.000000 1.104178 2 220 | 26241 9.824777 1.346821 3 221 | 11160 1.653089 0.980949 2 222 | 76701 18.178822 1.473671 1 223 | 32174 6.781126 0.885340 3 224 | 45043 8.206750 1.549223 3 225 | 42173 10.081853 1.376745 3 226 | 69801 6.288742 0.112799 1 227 | 41737 3.695937 1.543589 1 228 | 46979 6.726151 1.069380 3 229 | 79267 12.969999 1.568223 1 230 | 4615 2.661390 1.531933 2 231 | 32907 7.072764 1.117386 3 232 | 37444 9.123366 1.318988 3 233 | 569 3.743946 1.039546 2 234 | 8723 2.341300 0.219361 2 235 | 6024 0.541913 0.592348 2 236 | 52252 2.310828 1.436753 1 237 | 8358 6.226597 1.427316 2 238 | 26166 7.277876 0.489252 3 239 | 18471 0.000000 0.389459 2 240 | 3386 7.218221 1.098828 2 241 | 41544 8.777129 1.111464 3 242 | 10480 2.813428 0.819419 2 243 | 5894 2.268766 1.412130 2 244 | 7273 6.283627 0.571292 2 245 | 22272 7.520081 1.626868 3 246 | 31369 11.739225 0.027138 3 247 | 10708 3.746883 0.877350 2 248 | 69364 12.089835 0.521631 1 249 | 37760 12.310404 0.259339 3 250 | 13004 0.000000 0.671355 2 251 | 37885 2.728800 0.331502 1 252 | 52555 10.814342 0.607652 3 253 | 38997 12.170268 0.844205 3 254 | 69698 6.698371 0.240084 1 255 | 11783 3.632672 1.643479 2 256 | 47636 10.059991 0.892361 3 257 | 15744 1.887674 0.756162 2 258 | 69058 8.229125 0.195886 1 259 | 33057 7.817082 0.476102 3 260 | 28681 12.277230 0.076805 3 261 | 34042 10.055337 1.115778 3 262 | 29928 3.596002 1.485952 1 263 | 9734 2.755530 1.420655 2 264 | 7344 7.780991 0.513048 2 265 | 7387 0.093705 0.391834 2 266 | 33957 8.481567 0.520078 3 267 | 9936 3.865584 0.110062 2 268 | 36094 9.683709 0.779984 3 269 | 39835 10.617255 1.359970 3 270 | 64486 7.203216 1.624762 1 271 | 0 7.601414 1.215605 2 272 | 39539 1.386107 1.417070 1 273 | 66972 9.129253 0.594089 1 274 | 15029 1.363447 0.620841 2 275 | 44909 3.181399 0.359329 1 276 | 38183 13.365414 0.217011 3 277 | 37372 4.207717 1.289767 1 278 | 0 4.088395 0.870075 2 279 | 17786 3.327371 1.142505 2 280 | 39055 1.303323 1.235650 1 281 | 37045 7.999279 1.581763 3 282 | 6435 2.217488 0.864536 2 283 | 72265 7.751808 0.192451 1 284 | 28152 14.149305 1.591532 3 285 | 25931 8.765721 0.152808 3 286 | 7538 3.408996 0.184896 2 287 | 1315 1.251021 0.112340 2 288 | 12292 6.160619 1.537165 2 289 | 49248 1.034538 1.585162 1 290 | 9025 0.000000 1.034635 2 291 | 13438 2.355051 0.542603 2 292 | 69683 6.614543 0.153771 1 293 | 25374 10.245062 1.450903 3 294 | 55264 3.467074 1.231019 1 295 | 38324 7.487678 1.572293 3 296 | 69643 4.624115 1.185192 1 297 | 44058 8.995957 1.436479 3 298 | 41316 11.564476 0.007195 3 299 | 29119 3.440948 0.078331 1 300 | 51656 1.673603 0.732746 1 301 | 3030 4.719341 0.699755 2 302 | 35695 10.304798 1.576488 3 303 | 1537 2.086915 1.199312 2 304 | 9083 6.338220 1.131305 2 305 | 47744 8.254926 0.710694 3 306 | 71372 16.067108 0.974142 1 307 | 37980 1.723201 0.310488 1 308 | 42385 3.785045 0.876904 1 309 | 22687 2.557561 0.123738 1 310 | 39512 9.852220 1.095171 3 311 | 11885 3.679147 1.557205 2 312 | 4944 9.789681 0.852971 2 313 | 73230 14.958998 0.526707 1 314 | 17585 11.182148 1.288459 3 315 | 68737 7.528533 1.657487 1 316 | 13818 5.253802 1.378603 2 317 | 31662 13.946752 1.426657 3 318 | 86686 15.557263 1.430029 1 319 | 43214 12.483550 0.688513 3 320 | 24091 2.317302 1.411137 1 321 | 52544 10.069724 0.766119 3 322 | 61861 5.792231 1.615483 1 323 | 47903 4.138435 0.475994 1 324 | 37190 12.929517 0.304378 3 325 | 6013 9.378238 0.307392 2 326 | 27223 8.361362 1.643204 3 327 | 69027 7.939406 1.325042 1 328 | 78642 10.735384 0.705788 1 329 | 30254 11.592723 0.286188 3 330 | 21704 10.098356 0.704748 3 331 | 34985 9.299025 0.545337 3 332 | 31316 11.158297 0.218067 3 333 | 76368 16.143900 0.558388 1 334 | 27953 10.971700 1.221787 3 335 | 152 0.000000 0.681478 2 336 | 9146 3.178961 1.292692 2 337 | 75346 17.625350 0.339926 1 338 | 26376 1.995833 0.267826 1 339 | 35255 10.640467 0.416181 3 340 | 19198 9.628339 0.985462 3 341 | 12518 4.662664 0.495403 2 342 | 25453 5.754047 1.382742 2 343 | 12530 0.000000 0.037146 2 344 | 62230 9.334332 0.198118 1 345 | 9517 3.846162 0.619968 2 346 | 71161 10.685084 0.678179 1 347 | 1593 4.752134 0.359205 2 348 | 33794 0.697630 0.966786 1 349 | 39710 10.365836 0.505898 3 350 | 16941 0.461478 0.352865 2 351 | 69209 11.339537 1.068740 1 352 | 4446 5.420280 0.127310 2 353 | 9347 3.469955 1.619947 2 354 | 55635 8.517067 0.994858 3 355 | 65889 8.306512 0.413690 1 356 | 10753 2.628690 0.444320 2 357 | 7055 0.000000 0.802985 2 358 | 7905 0.000000 1.170397 2 359 | 53447 7.298767 1.582346 3 360 | 9194 7.331319 1.277988 2 361 | 61914 9.392269 0.151617 1 362 | 15630 5.541201 1.180596 2 363 | 79194 15.149460 0.537540 1 364 | 12268 5.515189 0.250562 2 365 | 33682 7.728898 0.920494 3 366 | 26080 11.318785 1.510979 3 367 | 19119 3.574709 1.531514 2 368 | 30902 7.350965 0.026332 3 369 | 63039 7.122363 1.630177 1 370 | 51136 1.828412 1.013702 1 371 | 35262 10.117989 1.156862 3 372 | 42776 11.309897 0.086291 3 373 | 64191 8.342034 1.388569 1 374 | 15436 0.241714 0.715577 2 375 | 14402 10.482619 1.694972 2 376 | 6341 9.289510 1.428879 2 377 | 14113 4.269419 0.134181 2 378 | 6390 0.000000 0.189456 2 379 | 8794 0.817119 0.143668 2 380 | 43432 1.508394 0.652651 1 381 | 38334 9.359918 0.052262 3 382 | 34068 10.052333 0.550423 3 383 | 30819 11.111660 0.989159 3 384 | 22239 11.265971 0.724054 3 385 | 28725 10.383830 0.254836 3 386 | 57071 3.878569 1.377983 1 387 | 72420 13.679237 0.025346 1 388 | 28294 10.526846 0.781569 3 389 | 9896 0.000000 0.924198 2 390 | 65821 4.106727 1.085669 1 391 | 7645 8.118856 1.470686 2 392 | 71289 7.796874 0.052336 1 393 | 5128 2.789669 1.093070 2 394 | 13711 6.226962 0.287251 2 395 | 22240 10.169548 1.660104 3 396 | 15092 0.000000 1.370549 2 397 | 5017 7.513353 0.137348 2 398 | 10141 8.240793 0.099735 2 399 | 35570 14.612797 1.247390 3 400 | 46893 3.562976 0.445386 1 401 | 8178 3.230482 1.331698 2 402 | 55783 3.612548 1.551911 1 403 | 1148 0.000000 0.332365 2 404 | 10062 3.931299 0.487577 2 405 | 74124 14.752342 1.155160 1 406 | 66603 10.261887 1.628085 1 407 | 11893 2.787266 1.570402 2 408 | 50908 15.112319 1.324132 3 409 | 39891 5.184553 0.223382 3 410 | 65915 3.868359 0.128078 1 411 | 65678 3.507965 0.028904 1 412 | 62996 11.019254 0.427554 1 413 | 36851 3.812387 0.655245 1 414 | 36669 11.056784 0.378725 3 415 | 38876 8.826880 1.002328 3 416 | 26878 11.173861 1.478244 3 417 | 46246 11.506465 0.421993 3 418 | 12761 7.798138 0.147917 3 419 | 35282 10.155081 1.370039 3 420 | 68306 10.645275 0.693453 1 421 | 31262 9.663200 1.521541 3 422 | 34754 10.790404 1.312679 3 423 | 13408 2.810534 0.219962 2 424 | 30365 9.825999 1.388500 3 425 | 10709 1.421316 0.677603 2 426 | 24332 11.123219 0.809107 3 427 | 45517 13.402206 0.661524 3 428 | 6178 1.212255 0.836807 2 429 | 10639 1.568446 1.297469 2 430 | 29613 3.343473 1.312266 1 431 | 22392 5.400155 0.193494 1 432 | 51126 3.818754 0.590905 1 433 | 53644 7.973845 0.307364 3 434 | 51417 9.078824 0.734876 3 435 | 24859 0.153467 0.766619 1 436 | 61732 8.325167 0.028479 1 437 | 71128 7.092089 1.216733 1 438 | 27276 5.192485 1.094409 3 439 | 30453 10.340791 1.087721 3 440 | 18670 2.077169 1.019775 2 441 | 70600 10.151966 0.993105 1 442 | 12683 0.046826 0.809614 2 443 | 81597 11.221874 1.395015 1 444 | 69959 14.497963 1.019254 1 445 | 8124 3.554508 0.533462 2 446 | 18867 3.522673 0.086725 2 447 | 80886 14.531655 0.380172 1 448 | 55895 3.027528 0.885457 1 449 | 31587 1.845967 0.488985 1 450 | 10591 10.226164 0.804403 3 451 | 70096 10.965926 1.212328 1 452 | 53151 2.129921 1.477378 1 453 | 11992 0.000000 1.606849 2 454 | 33114 9.489005 0.827814 3 455 | 7413 0.000000 1.020797 2 456 | 10583 0.000000 1.270167 2 457 | 58668 6.556676 0.055183 1 458 | 35018 9.959588 0.060020 3 459 | 70843 7.436056 1.479856 1 460 | 14011 0.404888 0.459517 2 461 | 35015 9.952942 1.650279 3 462 | 70839 15.600252 0.021935 1 463 | 3024 2.723846 0.387455 2 464 | 5526 0.513866 1.323448 2 465 | 5113 0.000000 0.861859 2 466 | 20851 7.280602 1.438470 2 467 | 40999 9.161978 1.110180 3 468 | 15823 0.991725 0.730979 2 469 | 35432 7.398380 0.684218 3 470 | 53711 12.149747 1.389088 3 471 | 64371 9.149678 0.874905 1 472 | 9289 9.666576 1.370330 2 473 | 60613 3.620110 0.287767 1 474 | 18338 5.238800 1.253646 2 475 | 22845 14.715782 1.503758 3 476 | 74676 14.445740 1.211160 1 477 | 34143 13.609528 0.364240 3 478 | 14153 3.141585 0.424280 2 479 | 9327 0.000000 0.120947 2 480 | 18991 0.454750 1.033280 2 481 | 9193 0.510310 0.016395 2 482 | 2285 3.864171 0.616349 2 483 | 9493 6.724021 0.563044 2 484 | 2371 4.289375 0.012563 2 485 | 13963 0.000000 1.437030 2 486 | 2299 3.733617 0.698269 2 487 | 5262 2.002589 1.380184 2 488 | 4659 2.502627 0.184223 2 489 | 17582 6.382129 0.876581 2 490 | 27750 8.546741 0.128706 3 491 | 9868 2.694977 0.432818 2 492 | 18333 3.951256 0.333300 2 493 | 3780 9.856183 0.329181 2 494 | 18190 2.068962 0.429927 2 495 | 11145 3.410627 0.631838 2 496 | 68846 9.974715 0.669787 1 497 | 26575 10.650102 0.866627 3 498 | 48111 9.134528 0.728045 3 499 | 43757 7.882601 1.332446 3 500 | 27884 8.855312 0.570684 3 -------------------------------------------------------------------------------- /dataset/dataset1/train.txt: -------------------------------------------------------------------------------- 1 | 40920 8.326976 0.953952 3 2 | 14488 7.153469 1.673904 2 3 | 26052 1.441871 0.805124 1 4 | 75136 13.147394 0.428964 1 5 | 38344 1.669788 0.134296 1 6 | 72993 10.141740 1.032955 1 7 | 35948 6.830792 1.213192 3 8 | 42666 13.276369 0.543880 3 9 | 67497 8.631577 0.749278 1 10 | 35483 12.273169 1.508053 3 11 | 50242 3.723498 0.831917 1 12 | 63275 8.385879 1.669485 1 13 | 5569 4.875435 0.728658 2 14 | 51052 4.680098 0.625224 1 15 | 77372 15.299570 0.331351 1 16 | 43673 1.889461 0.191283 1 17 | 61364 7.516754 1.269164 1 18 | 69673 14.239195 0.261333 1 19 | 15669 0.000000 1.250185 2 20 | 28488 10.528555 1.304844 3 21 | 6487 3.540265 0.822483 2 22 | 37708 2.991551 0.833920 1 23 | 22620 5.297865 0.638306 2 24 | 28782 6.593803 0.187108 3 25 | 19739 2.816760 1.686209 2 26 | 36788 12.458258 0.649617 3 27 | 5741 0.000000 1.656418 2 28 | 28567 9.968648 0.731232 3 29 | 6808 1.364838 0.640103 2 30 | 41611 0.230453 1.151996 1 31 | 36661 11.865402 0.882810 3 32 | 43605 0.120460 1.352013 1 33 | 15360 8.545204 1.340429 3 34 | 63796 5.856649 0.160006 1 35 | 10743 9.665618 0.778626 2 36 | 70808 9.778763 1.084103 1 37 | 72011 4.932976 0.632026 1 38 | 5914 2.216246 0.587095 2 39 | 14851 14.305636 0.632317 3 40 | 33553 12.591889 0.686581 3 41 | 44952 3.424649 1.004504 1 42 | 17934 0.000000 0.147573 2 43 | 27738 8.533823 0.205324 3 44 | 29290 9.829528 0.238620 3 45 | 42330 11.492186 0.263499 3 46 | 36429 3.570968 0.832254 1 47 | 39623 1.771228 0.207612 1 48 | 32404 3.513921 0.991854 1 49 | 27268 4.398172 0.975024 1 50 | 5477 4.276823 1.174874 2 51 | 14254 5.946014 1.614244 2 52 | 68613 13.798970 0.724375 1 53 | 41539 10.393591 1.663724 3 54 | 7917 3.007577 0.297302 2 55 | 21331 1.031938 0.486174 2 56 | 8338 4.751212 0.064693 2 57 | 5176 3.692269 1.655113 2 58 | 18983 10.448091 0.267652 3 59 | 68837 10.585786 0.329557 1 60 | 13438 1.604501 0.069064 2 61 | 48849 3.679497 0.961466 1 62 | 12285 3.795146 0.696694 2 63 | 7826 2.531885 1.659173 2 64 | 5565 9.733340 0.977746 2 65 | 10346 6.093067 1.413798 2 66 | 1823 7.712960 1.054927 2 67 | 9744 11.470364 0.760461 3 68 | 16857 2.886529 0.934416 2 69 | 39336 10.054373 1.138351 3 70 | 65230 9.972470 0.881876 1 71 | 2463 2.335785 1.366145 2 72 | 27353 11.375155 1.528626 3 73 | 16191 0.000000 0.605619 2 74 | 12258 4.126787 0.357501 2 75 | 42377 6.319522 1.058602 1 76 | 25607 8.680527 0.086955 3 77 | 77450 14.856391 1.129823 1 78 | 58732 2.454285 0.222380 1 79 | 46426 7.292202 0.548607 3 80 | 32688 8.745137 0.857348 3 81 | 64890 8.579001 0.683048 1 82 | 8554 2.507302 0.869177 2 83 | 28861 11.415476 1.505466 3 84 | 42050 4.838540 1.680892 1 85 | 32193 10.339507 0.583646 3 86 | 64895 6.573742 1.151433 1 87 | 2355 6.539397 0.462065 2 88 | 0 2.209159 0.723567 2 89 | 70406 11.196378 0.836326 1 90 | 57399 4.229595 0.128253 1 91 | 41732 9.505944 0.005273 3 92 | 11429 8.652725 1.348934 3 93 | 75270 17.101108 0.490712 1 94 | 5459 7.871839 0.717662 2 95 | 73520 8.262131 1.361646 1 96 | 40279 9.015635 1.658555 3 97 | 21540 9.215351 0.806762 3 98 | 17694 6.375007 0.033678 2 99 | 22329 2.262014 1.022169 1 100 | 46570 5.677110 0.709469 1 101 | 42403 11.293017 0.207976 3 102 | 33654 6.590043 1.353117 1 103 | 9171 4.711960 0.194167 2 104 | 28122 8.768099 1.108041 3 105 | 34095 11.502519 0.545097 3 106 | 1774 4.682812 0.578112 2 107 | 40131 12.446578 0.300754 3 108 | 13994 12.908384 1.657722 3 109 | 77064 12.601108 0.974527 1 110 | 11210 3.929456 0.025466 2 111 | 6122 9.751503 1.182050 3 112 | 15341 3.043767 0.888168 2 113 | 44373 4.391522 0.807100 1 114 | 28454 11.695276 0.679015 3 115 | 63771 7.879742 0.154263 1 116 | 9217 5.613163 0.933632 2 117 | 69076 9.140172 0.851300 1 118 | 24489 4.258644 0.206892 1 119 | 16871 6.799831 1.221171 2 120 | 39776 8.752758 0.484418 3 121 | 5901 1.123033 1.180352 2 122 | 40987 10.833248 1.585426 3 123 | 7479 3.051618 0.026781 2 124 | 38768 5.308409 0.030683 3 125 | 4933 1.841792 0.028099 2 126 | 32311 2.261978 1.605603 1 127 | 26501 11.573696 1.061347 3 128 | 37433 8.038764 1.083910 3 129 | 23503 10.734007 0.103715 3 130 | 68607 9.661909 0.350772 1 131 | 27742 9.005850 0.548737 3 132 | 11303 0.000000 0.539131 2 133 | 0 5.757140 1.062373 2 134 | 32729 9.164656 1.624565 3 135 | 24619 1.318340 1.436243 1 136 | 42414 14.075597 0.695934 3 137 | 20210 10.107550 1.308398 3 138 | 33225 7.960293 1.219760 3 139 | 54483 6.317292 0.018209 1 140 | 18475 12.664194 0.595653 3 141 | 33926 2.906644 0.581657 1 142 | 43865 2.388241 0.913938 1 143 | 26547 6.024471 0.486215 3 144 | 44404 7.226764 1.255329 3 145 | 16674 4.183997 1.275290 2 146 | 8123 11.850211 1.096981 3 147 | 42747 11.661797 1.167935 3 148 | 56054 3.574967 0.494666 1 149 | 10933 0.000000 0.107475 2 150 | 18121 7.937657 0.904799 3 151 | 11272 3.365027 1.014085 2 152 | 16297 0.000000 0.367491 2 153 | 28168 13.860672 1.293270 3 154 | 40963 10.306714 1.211594 3 155 | 31685 7.228002 0.670670 3 156 | 55164 4.508740 1.036192 1 157 | 17595 0.366328 0.163652 2 158 | 1862 3.299444 0.575152 2 159 | 57087 0.573287 0.607915 1 160 | 63082 9.183738 0.012280 1 161 | 51213 7.842646 1.060636 3 162 | 6487 4.750964 0.558240 2 163 | 4805 11.438702 1.556334 3 164 | 30302 8.243063 1.122768 3 165 | 68680 7.949017 0.271865 1 166 | 17591 7.875477 0.227085 2 167 | 74391 9.569087 0.364856 1 168 | 37217 7.750103 0.869094 3 169 | 42814 0.000000 1.515293 1 170 | 14738 3.396030 0.633977 2 171 | 19896 11.916091 0.025294 3 172 | 14673 0.460758 0.689586 2 173 | 32011 13.087566 0.476002 3 174 | 58736 4.589016 1.672600 1 175 | 54744 8.397217 1.534103 1 176 | 29482 5.562772 1.689388 1 177 | 27698 10.905159 0.619091 3 178 | 11443 1.311441 1.169887 2 179 | 56117 10.647170 0.980141 3 180 | 39514 0.000000 0.481918 1 181 | 26627 8.503025 0.830861 3 182 | 16525 0.436880 1.395314 2 183 | 24368 6.127867 1.102179 1 184 | 22160 12.112492 0.359680 3 185 | 6030 1.264968 1.141582 2 186 | 6468 6.067568 1.327047 2 187 | 22945 8.010964 1.681648 3 188 | 18520 3.791084 0.304072 2 189 | 34914 11.773195 1.262621 3 190 | 6121 8.339588 1.443357 2 191 | 38063 2.563092 1.464013 1 192 | 23410 5.954216 0.953782 1 193 | 35073 9.288374 0.767318 3 194 | 52914 3.976796 1.043109 1 195 | 16801 8.585227 1.455708 3 196 | 9533 1.271946 0.796506 2 197 | 16721 0.000000 0.242778 2 198 | 5832 0.000000 0.089749 2 199 | 44591 11.521298 0.300860 3 200 | 10143 1.139447 0.415373 2 201 | 21609 5.699090 1.391892 2 202 | 23817 2.449378 1.322560 1 203 | 15640 0.000000 1.228380 2 204 | 8847 3.168365 0.053993 2 205 | 50939 10.428610 1.126257 3 206 | 28521 2.943070 1.446816 1 207 | 32901 10.441348 0.975283 3 208 | 42850 12.478764 1.628726 3 209 | 13499 5.856902 0.363883 2 210 | 40345 2.476420 0.096075 1 211 | 43547 1.826637 0.811457 1 212 | 70758 4.324451 0.328235 1 213 | 19780 1.376085 1.178359 2 214 | 44484 5.342462 0.394527 1 215 | 54462 11.835521 0.693301 3 216 | 20085 12.423687 1.424264 3 217 | 42291 12.161273 0.071131 3 218 | 47550 8.148360 1.649194 3 219 | 11938 1.531067 1.549756 2 220 | 40699 3.200912 0.309679 1 221 | 70908 8.862691 0.530506 1 222 | 73989 6.370551 0.369350 1 223 | 11872 2.468841 0.145060 2 224 | 48463 11.054212 0.141508 3 225 | 15987 2.037080 0.715243 2 226 | 70036 13.364030 0.549972 1 227 | 32967 10.249135 0.192735 3 228 | 63249 10.464252 1.669767 1 229 | 42795 9.424574 0.013725 3 230 | 14459 4.458902 0.268444 2 231 | 19973 0.000000 0.575976 2 232 | 5494 9.686082 1.029808 3 233 | 67902 13.649402 1.052618 1 234 | 25621 13.181148 0.273014 3 235 | 27545 3.877472 0.401600 1 236 | 58656 1.413952 0.451380 1 237 | 7327 4.248986 1.430249 2 238 | 64555 8.779183 0.845947 1 239 | 8998 4.156252 0.097109 2 240 | 11752 5.580018 0.158401 2 241 | 76319 15.040440 1.366898 1 242 | 27665 12.793870 1.307323 3 243 | 67417 3.254877 0.669546 1 244 | 21808 10.725607 0.588588 3 245 | 15326 8.256473 0.765891 2 246 | 20057 8.033892 1.618562 3 247 | 79341 10.702532 0.204792 1 248 | 15636 5.062996 1.132555 2 249 | 35602 10.772286 0.668721 3 250 | 28544 1.892354 0.837028 1 251 | 57663 1.019966 0.372320 1 252 | 78727 15.546043 0.729742 1 253 | 68255 11.638205 0.409125 1 254 | 14964 3.427886 0.975616 2 255 | 21835 11.246174 1.475586 3 256 | 7487 0.000000 0.645045 2 257 | 8700 0.000000 1.424017 2 258 | 26226 8.242553 0.279069 3 259 | 65899 8.700060 0.101807 1 260 | 6543 0.812344 0.260334 2 261 | 46556 2.448235 1.176829 1 262 | 71038 13.230078 0.616147 1 263 | 47657 0.236133 0.340840 1 264 | 19600 11.155826 0.335131 3 265 | 37422 11.029636 0.505769 3 266 | 1363 2.901181 1.646633 2 267 | 26535 3.924594 1.143120 1 268 | 47707 2.524806 1.292848 1 269 | 38055 3.527474 1.449158 1 270 | 6286 3.384281 0.889268 2 271 | 10747 0.000000 1.107592 2 272 | 44883 11.898890 0.406441 3 273 | 56823 3.529892 1.375844 1 274 | 68086 11.442677 0.696919 1 275 | 70242 10.308145 0.422722 1 276 | 11409 8.540529 0.727373 2 277 | 67671 7.156949 1.691682 1 278 | 61238 0.720675 0.847574 1 279 | 17774 0.229405 1.038603 2 280 | 53376 3.399331 0.077501 1 281 | 30930 6.157239 0.580133 1 282 | 28987 1.239698 0.719989 1 283 | 13655 6.036854 0.016548 2 284 | 7227 5.258665 0.933722 2 285 | 40409 12.393001 1.571281 3 286 | 13605 9.627613 0.935842 2 287 | 26400 11.130453 0.597610 3 288 | 13491 8.842595 0.349768 3 289 | 30232 10.690010 1.456595 3 290 | 43253 5.714718 1.674780 3 291 | 55536 3.052505 1.335804 1 292 | 8807 0.000000 0.059025 2 293 | 25783 9.945307 1.287952 3 294 | 22812 2.719723 1.142148 1 295 | 77826 11.154055 1.608486 1 296 | 38172 2.687918 0.660836 1 297 | 31676 10.037847 0.962245 3 298 | 74038 12.404762 1.112080 1 299 | 44738 10.237305 0.633422 3 300 | 17410 4.745392 0.662520 2 301 | 5688 4.639461 1.569431 2 302 | 36642 3.149310 0.639669 1 303 | 29956 13.406875 1.639194 3 304 | 60350 6.068668 0.881241 1 305 | 23758 9.477022 0.899002 3 306 | 25780 3.897620 0.560201 2 307 | 11342 5.463615 1.203677 2 308 | 36109 3.369267 1.575043 1 309 | 14292 5.234562 0.825954 2 310 | 11160 0.000000 0.722170 2 311 | 23762 12.979069 0.504068 3 312 | 39567 5.376564 0.557476 1 313 | 25647 13.527910 1.586732 3 314 | 14814 2.196889 0.784587 2 315 | 73590 10.691748 0.007509 1 316 | 35187 1.659242 0.447066 1 317 | 49459 8.369667 0.656697 3 318 | 31657 13.157197 0.143248 3 319 | 6259 8.199667 0.908508 2 320 | 33101 4.441669 0.439381 3 321 | 27107 9.846492 0.644523 3 322 | 17824 0.019540 0.977949 2 323 | 43536 8.253774 0.748700 3 324 | 67705 6.038620 1.509646 1 325 | 35283 6.091587 1.694641 3 326 | 71308 8.986820 1.225165 1 327 | 31054 11.508473 1.624296 3 328 | 52387 8.807734 0.713922 3 329 | 40328 0.000000 0.816676 1 330 | 34844 8.889202 1.665414 3 331 | 11607 3.178117 0.542752 2 332 | 64306 7.013795 0.139909 1 333 | 32721 9.605014 0.065254 3 334 | 33170 1.230540 1.331674 1 335 | 37192 10.412811 0.890803 3 336 | 13089 0.000000 0.567161 2 337 | 66491 9.699991 0.122011 1 338 | 15941 0.000000 0.061191 2 339 | 4272 4.455293 0.272135 2 340 | 48812 3.020977 1.502803 1 341 | 28818 8.099278 0.216317 3 342 | 35394 1.157764 1.603217 1 343 | 71791 10.105396 0.121067 1 344 | 40668 11.230148 0.408603 3 345 | 39580 9.070058 0.011379 3 346 | 11786 0.566460 0.478837 2 347 | 19251 0.000000 0.487300 2 348 | 56594 8.956369 1.193484 3 349 | 54495 1.523057 0.620528 1 350 | 11844 2.749006 0.169855 2 351 | 45465 9.235393 0.188350 3 352 | 31033 10.555573 0.403927 3 353 | 16633 6.956372 1.519308 2 354 | 13887 0.636281 1.273984 2 355 | 52603 3.574737 0.075163 1 356 | 72000 9.032486 1.461809 1 357 | 68497 5.958993 0.023012 1 358 | 35135 2.435300 1.211744 1 359 | 26397 10.539731 1.638248 3 360 | 7313 7.646702 0.056513 2 361 | 91273 20.919349 0.644571 1 362 | 24743 1.424726 0.838447 1 363 | 31690 6.748663 0.890223 3 364 | 15432 2.289167 0.114881 2 365 | 58394 5.548377 0.402238 1 366 | 33962 6.057227 0.432666 1 367 | 31442 10.828595 0.559955 3 368 | 31044 11.318160 0.271094 3 369 | 29938 13.265311 0.633903 3 370 | 9875 0.000000 1.496715 2 371 | 51542 6.517133 0.402519 3 372 | 11878 4.934374 1.520028 2 373 | 69241 10.151738 0.896433 1 374 | 37776 2.425781 1.559467 1 375 | 68997 9.778962 1.195498 1 376 | 67416 12.219950 0.657677 1 377 | 59225 7.394151 0.954434 1 378 | 29138 8.518535 0.742546 3 379 | 5962 2.798700 0.662632 2 380 | 10847 0.637930 0.617373 2 381 | 70527 10.750490 0.097415 1 382 | 9610 0.625382 0.140969 2 383 | 64734 10.027968 0.282787 1 384 | 25941 9.817347 0.364197 3 385 | 2763 0.646828 1.266069 2 386 | 55601 3.347111 0.914294 1 387 | 31128 11.816892 0.193798 3 388 | 5181 0.000000 1.480198 2 389 | 69982 10.945666 0.993219 1 390 | 52440 10.244706 0.280539 3 391 | 57350 2.579801 1.149172 1 392 | 57869 2.630410 0.098869 1 393 | 56557 11.746200 1.695517 3 394 | 42342 8.104232 1.326277 3 395 | 15560 12.409743 0.790295 3 396 | 34826 12.167844 1.328086 3 397 | 8569 3.198408 0.299287 2 398 | 77623 16.055513 0.541052 1 399 | 78184 7.138659 0.158481 1 400 | 7036 4.831041 0.761419 2 401 | 69616 10.082890 1.373611 1 402 | 21546 10.066867 0.788470 3 403 | 36715 8.129538 0.329913 3 404 | 20522 3.012463 1.138108 2 405 | 42349 3.720391 0.845974 1 406 | 9037 0.773493 1.148256 2 407 | 26728 10.962941 1.037324 3 408 | 587 0.177621 0.162614 2 409 | 48915 3.085853 0.967899 1 410 | 9824 8.426781 0.202558 2 411 | 4135 1.825927 1.128347 2 412 | 9666 2.185155 1.010173 2 413 | 59333 7.184595 1.261338 1 414 | 36198 0.000000 0.116525 1 415 | 34909 8.901752 1.033527 3 416 | 47516 2.451497 1.358795 1 417 | 55807 3.213631 0.432044 1 418 | 14036 3.974739 0.723929 2 419 | 42856 9.601306 0.619232 3 420 | 64007 8.363897 0.445341 1 421 | 59428 6.381484 1.365019 1 422 | 13730 0.000000 1.403914 2 423 | 41740 9.609836 1.438105 3 424 | 63546 9.904741 0.985862 1 425 | 30417 7.185807 1.489102 3 426 | 69636 5.466703 1.216571 1 427 | 64660 0.000000 0.915898 1 428 | 14883 4.575443 0.535671 2 429 | 7965 3.277076 1.010868 2 430 | 68620 10.246623 1.239634 1 431 | 8738 2.341735 1.060235 2 432 | 7544 3.201046 0.498843 2 433 | 6377 6.066013 0.120927 2 434 | 36842 8.829379 0.895657 3 435 | 81046 15.833048 1.568245 1 436 | 67736 13.516711 1.220153 1 437 | 32492 0.664284 1.116755 1 438 | 39299 6.325139 0.605109 3 439 | 77289 8.677499 0.344373 1 440 | 33835 8.188005 0.964896 3 441 | 71890 9.414263 0.384030 1 442 | 32054 9.196547 1.138253 3 443 | 38579 10.202968 0.452363 3 444 | 55984 2.119439 1.481661 1 445 | 72694 13.635078 0.858314 1 446 | 42299 0.083443 0.701669 1 447 | 26635 9.149096 1.051446 3 448 | 8579 1.933803 1.374388 2 449 | 37302 14.115544 0.676198 3 450 | 22878 8.933736 0.943352 3 451 | 4364 2.661254 0.946117 2 452 | 4985 0.988432 1.305027 2 453 | 37068 2.063741 1.125946 1 454 | 41137 2.220590 0.690754 1 455 | 67759 6.424849 0.806641 1 456 | 11831 1.156153 1.613674 2 457 | 34502 3.032720 0.601847 1 458 | 4088 3.076828 0.952089 2 459 | 15199 0.000000 0.318105 2 460 | 17309 7.750480 0.554015 3 461 | 42816 10.958135 1.482500 3 462 | 43751 10.222018 0.488678 3 463 | 58335 2.367988 0.435741 1 464 | 75039 7.686054 1.381455 1 465 | 42878 11.464879 1.481589 3 466 | 42770 11.075735 0.089726 3 467 | 8848 3.543989 0.345853 2 468 | 31340 8.123889 1.282880 3 469 | 41413 4.331769 0.754467 3 470 | 12731 0.120865 1.211961 2 471 | 22447 6.116109 0.701523 3 472 | 33564 7.474534 0.505790 3 473 | 48907 8.819454 0.649292 3 474 | 8762 6.802144 0.615284 2 475 | 46696 12.666325 0.931960 3 476 | 36851 8.636180 0.399333 3 477 | 67639 11.730991 1.289833 1 478 | 171 8.132449 0.039062 2 479 | 26674 10.296589 1.496144 3 480 | 8739 7.583906 1.005764 2 481 | 66668 9.777806 0.496377 1 482 | 68732 8.833546 0.513876 1 483 | 69995 4.907899 1.518036 1 484 | 82008 8.362736 1.285939 1 485 | 25054 9.084726 1.606312 3 486 | 33085 14.164141 0.560970 3 487 | 41379 9.080683 0.989920 3 488 | 39417 6.522767 0.038548 3 489 | 12556 3.690342 0.462281 2 490 | 39432 3.563706 0.242019 1 491 | 38010 1.065870 1.141569 1 492 | 69306 6.683796 1.456317 1 493 | 38000 1.712874 0.243945 1 494 | 46321 13.109929 1.280111 3 495 | 66293 11.327910 0.780977 1 496 | 22730 4.545711 1.233254 1 497 | 5952 3.367889 0.468104 2 498 | 72308 8.326224 0.567347 1 499 | 60338 8.978339 1.442034 1 500 | 13301 5.655826 1.582159 2 -------------------------------------------------------------------------------- /SVM.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Filename: SVM.py 3 | @Author: Ryuk 4 | @Create Date: 2019-04-29 5 | @Update Date: 2019-05-03 6 | @Description: Implement of SVM 7 | """ 8 | 9 | import numpy as np 10 | import preProcess 11 | import pickle 12 | import random 13 | 14 | 15 | class SVMClassifier: 16 | def __init__(self, norm_type="Normalization", C=200, kernel="rbf", threshold=10e-3, g=0.1, c=0, n=3, max_iteration=100): 17 | self.norm_type = norm_type 18 | self.prediction = None 19 | self.probability = None 20 | self.train_data = None 21 | self.train_label = None 22 | self.sample_num = None 23 | self.max_iteration = max_iteration # max iteration of SMO 24 | self.K = None 25 | self.alphas = None 26 | self.w = None # the weight of hyperplane 27 | self.b = None # the bias of hyperplane 28 | self.errors = None # errors 29 | self.C = C # penalty coefficient 30 | self.threshold = threshold # threshold of tolerant error 31 | self.kernel = kernel # kernel function 32 | self.g = g # sigma for rbf, sigmoid poly 33 | self.n = n # order of poly 34 | self.c = c # bias of sigmoid poly 35 | 36 | 37 | ''' 38 | Function: labelTransformation 39 | Description: transform {0, 1} into {-1, 1}, list to ndarray 40 | Input: labels dataType: List description: original label 41 | Output: new_label dataType: ndarray description: new label 42 | ''' 43 | def labelTransformation(self, labels): 44 | new_labels = np.zeros([len(labels), 1]) 45 | for i in range(len(labels)): 46 | if labels[i] == 0: 47 | new_labels[i] = labels[i] 48 | else: 49 | new_labels[i] = labels[i] 50 | return new_labels 51 | 52 | ''' 53 | Function: calculateErrors 54 | Description: calculate the prediction errors of the k-th sample LiHang statistical learning P127 Eq. (7.105) 55 | g(x) = sigma[ai*yi*K(xi,x))] + b 56 | Input: k dataType: int description: index of the k-th sample 57 | Output: Ek dataType: float description: prediction error of the k-th sample 58 | ''' 59 | def calculateErrors(self, k): 60 | gap = np.dot(np.multiply(self.alphas, self.train_label).T, self.K[:, k]) + self.b 61 | Ek = gap - self.train_label[k] 62 | return Ek 63 | 64 | ''' 65 | Function: selectAlpha2Rand 66 | Description: select alpha2 67 | Input: i dataType: int description: the index of the alpha1 68 | Output: j dataType: int description: the index of the alpha2 69 | ''' 70 | def selectAlpha2Rand(self, i): 71 | j = i 72 | while j == i: 73 | j = random.randint(0, self.sample_num) 74 | return j 75 | 76 | ''' 77 | Function: selectAplha2 78 | Description: select the second alpha by elicitation method in inner loop 79 | Input: i dataType: int description: the index of the first alpha 80 | Ei dataType: float description: the error of the first alpha 81 | Output: j dataType: int description: the index of the second alpha 82 | Ej dataType: float description: the error of the second alpha 83 | 84 | ''' 85 | def selectAplha2(self, i, Ei): 86 | max_k = -1 87 | max_delta = 0.0 88 | Ej = 0.0 89 | 90 | self.errors[i] = [1, Ei] 91 | valid_errors_index = np.nonzero(self.errors[:, 0])[0] # get the nonzero value of the alpha 92 | if len(valid_errors_index) > 1: 93 | for k in valid_errors_index: 94 | if k == i: 95 | continue 96 | Ek = self.calculateErrors(k) 97 | # print(self.calculateErrors(k)) 98 | delta_e = abs(Ei - Ek) 99 | if delta_e > max_delta: # select j with the max Ei-Ej 100 | max_k = k 101 | max_delta = delta_e 102 | Ej = Ek 103 | return max_k, Ej 104 | else: 105 | j = self.selectAlpha2Rand(i) 106 | Ej = self.calculateErrors(j) 107 | return j, Ej 108 | 109 | ''' 110 | Function: upadateError 111 | Description: update and save the perdiction errors 112 | Input: k dataType: int description: the index of the first alpha 113 | ''' 114 | def upadateError(self, k): 115 | Ek = self.calculateErrors(k) 116 | self.errors[k] = [1, Ek] # 1 means valid 117 | 118 | ''' 119 | Function: upadateError 120 | Description: update and save the perdiction errors, in page of 127 Eq.(7.108) 121 | Input: alpha2 dataType: float description: old alpha2 122 | L dataType: float description: low border of alpha2 123 | H dataType: float description: high border of alpha2 124 | Output: alpha2 dataType: float description: new alpha2 125 | ''' 126 | def updateAlpha2(self, alpha2, L, H): 127 | if alpha2 > H: 128 | alpha2 = H 129 | if L > alpha2: 130 | alpha2 = L 131 | return alpha2 132 | 133 | ''' 134 | Function: innerLoop 135 | Description: inner loop in Platt SMO 136 | Input: i dataType: int description: the index of the first alpha 137 | ''' 138 | def innerLoop(self, i): 139 | Ei = self.calculateErrors(i) 140 | # check KKT conditions 141 | if ((self.train_label[i] * Ei < -self.threshold) and (self.alphas[i] < self.C)) or ((self.train_label[i] * Ei > self.threshold) and (self.alphas[i] > 0)): 142 | 143 | j, Ej = self.selectAplha2(i, Ei) # select alpha2 according to alpha1 144 | 145 | # copy alpha1 and alpha2 146 | old_alpha1 = self.alphas[i] 147 | old_alpha2 = self.alphas[j] 148 | 149 | # determine the range of alpha2 L and H in page of 126 150 | # if y1 != y2 L = max(0, old_alpha2 - old_alpha1), H = min(C, C + old_alpha2 - old_alpha1) 151 | # if y1 == y2 L = max(0, old_alpha2 + old_alpha1 - C), H = min(C, old_alpha2 + old_alpha1) 152 | if self.train_label[i] != self.train_label[j]: 153 | L = max(0, old_alpha2 - old_alpha1) 154 | H = min(self.C, self.C + old_alpha2 - old_alpha1) 155 | else: 156 | L = max(0, old_alpha2 + old_alpha1 - self.C) 157 | H = min(self.C, old_alpha2 + old_alpha2) 158 | 159 | if L == H: 160 | # print("L == H") 161 | return 0 162 | 163 | # calculate eta in page of 127 Eq.(7.107) 164 | # eta = K11 + K22 - 2K12 165 | K11 = self.K[i, i] 166 | K12 = self.K[i, j] 167 | K21 = self.K[j, i] 168 | K22 = self.K[j, j] 169 | eta = K11 + K22 - 2 * K12 170 | if eta <= 0: 171 | # print("eta <= 0") 172 | return 0 173 | 174 | # update alpha2 and its error in page of 127 Eq.(7.106) and Eq.(7.108) 175 | self.alphas[j] = old_alpha2 + self.train_label[j]*(Ei - Ej)/eta 176 | self.alphas[j] = self.updateAlpha2(self.alphas[j], L, H) 177 | new_alphas2 = self.alphas[j] 178 | self.upadateError(j) 179 | 180 | # # if the stripe of alpha2 is not big enough, stop 181 | # if abs(self.alphas[j] - old_alpha2) < 0.01: 182 | # return 0 183 | 184 | # update the alpha1 and its error in page of 127 Eq.(7.109) 185 | # new_alpha1 = old_alpha1 + y1y2(old_alpha2 - new_alpha2) 186 | new_alphas1 = old_alpha1 + self.train_label[i] * self.train_label[j] * (old_alpha2 - new_alphas2) 187 | self.alphas[i] = new_alphas1 188 | self.upadateError(i) 189 | 190 | # determine b in page of 130 Eq.(7.115) and Eq.(7.116) 191 | # new_b1 = -E1 - y1K11(new_alpha1 - old_alpha1) - y2K21(new_alpha2 - old_alpha2) + old_b 192 | # new_b2 = -E2 - y1K12(new_alpha1 - old_alpha1) - y2K22(new_alpha2 - old_alpha2) + old_b 193 | b1 = - Ei - self.train_label[i] * K11 * (old_alpha1 - self.alphas[i]) - self.train_label[j] * K21 * (old_alpha2 - self.alphas[j]) + self.b 194 | b2 = - Ej - self.train_label[i] * K12 * (old_alpha1 - self.alphas[i]) - self.train_label[j] * K22 * (old_alpha2 - self.alphas[j]) + self.b 195 | if (self.alphas[i] > 0) and (self.alphas[i] < self.C): 196 | self.b = b1 197 | elif (self.alphas[j] > 0) and (self.alphas[j] < self.C): 198 | self.b = b2 199 | else: 200 | self.b = (b1 + b2)/2.0 201 | 202 | return 1 203 | else: 204 | return 0 205 | 206 | ''' 207 | Function: SMO 208 | Description: implement of Platt SMO, first search support vector which are not in bound, if alpha2 dosen't change enough, search 209 | the entire set 210 | ''' 211 | def SMO(self): 212 | iter = 0 213 | entire_set = True 214 | alpha_pairs_changes = 0 215 | while (iter < self.max_iteration) and (alpha_pairs_changes > 0) or entire_set: 216 | alpha_pairs_changes = 0 217 | if entire_set: 218 | for i in range(self.sample_num): 219 | alpha_pairs_changes += self.innerLoop(i) 220 | # print("Iteration:%d, Sample:%d, Pairs changed:%d" %(iter, i, alpha_pairs_changes)) 221 | iter += 1 222 | else: 223 | non_bound_alpha = np.nonzero((self.alphas > 0) & (self.alphas < self.C))[0] # in page of 129 Eq.(7.112) 224 | for i in range(len(non_bound_alpha)): 225 | alpha_pairs_changes += self.innerLoop(i) 226 | # print("Iteration:%d, Sample:%d, Pairs changed:%d" % (iter, i, alpha_pairs_changes)) 227 | iter += 1 228 | if entire_set: 229 | entire_set = False 230 | elif alpha_pairs_changes == 0: 231 | entire_set = True 232 | 233 | # print("Iteration:%d" % iter) 234 | 235 | ''' 236 | Function: kernel transformation 237 | Description: transform {0, 1} into {-1, 1}, list to ndarray 238 | Input: data dataType: ndarray description: data set 239 | sample dataType: ndarray description: a sample 240 | Output: new_label dataType: ndarray description: new label 241 | ''' 242 | 243 | def kernelTransformation(self, data, sample, kernel): 244 | sample_num, feature_dim = np.shape(data) 245 | K = np.zeros([sample_num]) 246 | if kernel == "linear": # linear function 247 | K = np.dot(data, sample.T) 248 | elif kernel == "poly": # polynomial function 249 | K = (np.dot(data, sample.T) + self.c) ** self.n 250 | elif kernel == "sigmoid": 251 | K = np.tanh(self.g * np.dot(data, sample.T) + self.c) 252 | elif kernel == "rbf": # Gaussian function 253 | for i in range(sample_num): 254 | delta = data[i, :] - sample 255 | K[i] = np.dot(delta, delta.T) 256 | K = np.exp(-self.g * K) 257 | else: 258 | raise NameError('Unrecognized kernel function') 259 | return K 260 | 261 | ''' 262 | Function: train 263 | Description: train the model 264 | Input: train_data dataType: ndarray description: features 265 | train_label dataType: ndarray description: labels 266 | Output: self dataType: obj description: the trained model 267 | ''' 268 | def train(self, train_data, train_label): 269 | if self.norm_type == "Standardization": 270 | train_data = preProcess.Standardization(train_data) 271 | else: 272 | train_data = preProcess.Normalization(train_data) 273 | 274 | # initiation 275 | sample_num, feature_dim = np.shape(train_data) 276 | self.train_data = train_data 277 | self.train_label = self.labelTransformation(train_label) 278 | self.sample_num = sample_num 279 | self.K = np.zeros([self.sample_num, self.sample_num]) 280 | self.alphas = np.zeros([self.sample_num, 1]) 281 | self.errors = np.zeros([self.sample_num, 2]) 282 | self.b = 0 283 | 284 | # kernel trick 285 | for i in range(self.sample_num): 286 | self.K[:, i] = self.kernelTransformation(self.train_data, self.train_data[i, :], self.kernel) 287 | 288 | # train model 289 | self.SMO() 290 | return self 291 | 292 | ''' 293 | Function: predict 294 | Description: predict the testing set 295 | Input: train_data dataType: ndarray description: features 296 | prob dataType: bool description: return probaility of label 297 | Output: prediction dataType: ndarray description: the prediction results for testing set 298 | ''' 299 | 300 | def predict(self, test_data, prob="False"): 301 | # Normalization 302 | if self.norm_type == "Standardization": 303 | test_data = preProcess.Standardization(test_data) 304 | else: 305 | test_data = preProcess.Normalization(test_data) 306 | 307 | test_num = test_data.shape[0] 308 | prediction = np.zeros([test_num, 1]) 309 | probability = np.zeros([test_num, 1]) 310 | 311 | # find the support vectors and its corresponding label 312 | support_vectors_index = np.nonzero(self.alphas > 0)[0] 313 | support_vectors = self.train_data[support_vectors_index] 314 | support_vectors_label = self.train_label[support_vectors_index] 315 | support_vectors_alphas = self.alphas[support_vectors_index] 316 | 317 | # predict the test sample in page of 122 Eq.(7.89) 318 | for i in range(test_num): 319 | kernel_data = self.kernelTransformation(support_vectors, test_data[i, :], self.kernel) 320 | probability[i] = np.dot(kernel_data.T, np.multiply(support_vectors_label, support_vectors_alphas)) + self.b 321 | if probability[i] > 0: 322 | prediction[i] = 1 323 | else: 324 | prediction[i] = -1 325 | 326 | self.prediction = prediction 327 | self.probability = probability 328 | if prob: 329 | return probability 330 | else: 331 | return prediction 332 | 333 | ''' 334 | Function: accuracy 335 | Description: show detection result 336 | Input: test_label dataType: ndarray description: labels of test data 337 | Output: accuracy dataType: float description: detection accuarcy 338 | ''' 339 | def accuarcy(self, test_label): 340 | test_label = np.expand_dims(test_label, axis=1) 341 | prediction = self.prediction 342 | accuarcy = sum(prediction == test_label)/len(test_label) 343 | return accuarcy 344 | 345 | ''' 346 | Function: save 347 | Description: save the model as pkl 348 | Input: filename dataType: str description: the path to save model 349 | ''' 350 | def save(self, filename): 351 | f = open(filename, 'w') 352 | model = {'b': self.b, 'alphas': self.alphas, 'labels': self.train_label} 353 | pickle.dump(model, f) 354 | f.close() 355 | 356 | ''' 357 | Function: load 358 | Description: load the model 359 | Input: filename dataType: str description: the path to save model 360 | Output: self dataType: obj description: the trained model 361 | ''' 362 | def load(self, filename): 363 | f = open(filename) 364 | model = pickle.load(f) 365 | self.alphas = model['alphas'] 366 | self.b = model['b'] 367 | self.train_label = model['train_label'] 368 | return self 369 | 370 | 371 | --------------------------------------------------------------------------------