├── Chapter01 ├── Bagging.py ├── Boosting.py ├── Stacking.py └── kmeansClustering.py ├── Chapter02 ├── Data │ ├── CarDataset.csv │ ├── __pycache__ │ │ ├── TreeDict.cpython-35.pyc │ │ └── __init__.cpython-35.pyc │ ├── testData.csv │ ├── trainData.csv │ └── treeModel.json ├── DecisionTree_ID3.py ├── ID3_Test.py ├── PracticalApplication.py ├── SplitCheck.py └── testTree.json ├── Chapter03 ├── BinaryTree.py ├── Data │ └── bcancer.csv ├── DecisionTree.py ├── DecisionTree_CART_RF.py ├── PracticalApplication.py └── RandomForest.py ├── Chapter04 ├── Data │ └── spamData.csv ├── KNN.py ├── SpamClassification.py ├── knnAlgoTest.py └── utilityFunctions.py ├── Chapter05 ├── AdaBoostFaceDetection.py ├── Adaboost.py ├── AdaboostAlgorithmExample.py └── Data │ ├── bcancer.csv │ ├── download.jpg │ ├── haarcascade_eye.xml │ └── haarcascade_frontalface_default.xml ├── Chapter06 ├── Data │ └── bcancer.csv ├── RegressionTreeTest.py └── RegressionTrees.py ├── Chapter07 ├── Data │ ├── pima-indians-diabetes.csv │ ├── train.csv │ └── train_modified.csv ├── xgBoost.py └── xgboost_param_tune.py ├── Chapter08 ├── Data │ └── sonar.all-data.csv └── StackedGeneralization.py ├── Chapter09 ├── Data │ ├── bcancer.csv │ ├── graph_feat_4.png │ ├── sonar.all-data.csv │ ├── spamData.csv │ └── train.csv ├── FeatureSelection_PCA.py ├── RF_feature_selection.py ├── RecursiveFeatureElimination.py ├── SVM_KernelTrick.py ├── SVM_Test.py ├── UnivariateFeatureSelection.py ├── bcancer.csv ├── feature_reduction_impact.py ├── sonar.all-data.csv └── stacking_spamdata.py ├── Chapter10 ├── ANN.py └── DigitClassification.py ├── LICENSE └── README.md /Chapter01/Bagging.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 20, 2017 3 | 4 | @author: DX 5 | ''' 6 | # Import All the required packages from sklearn 7 | import numpy as np 8 | from sklearn import model_selection 9 | from sklearn.ensemble import BaggingClassifier 10 | from sklearn.tree import DecisionTreeClassifier 11 | from sklearn.datasets import load_iris 12 | 13 | #Load data 14 | iris = load_iris() 15 | X = iris.data 16 | Y = iris.target 17 | 18 | #Split data in training and testing set 19 | X_fit, X_eval, y_fit, y_test= model_selection.train_test_split( X, Y, test_size=0.30, random_state=1 ) 20 | 21 | #Create random sub sample to train multiple models 22 | seed = 7 23 | kfold = model_selection.KFold(n_splits=10, random_state=seed) 24 | 25 | #Define a decision tree classifier 26 | cart = DecisionTreeClassifier() 27 | num_trees = 100 28 | 29 | #Create classification model for bagging 30 | model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed) 31 | 32 | #Train different models and print their accuracy 33 | results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold) 34 | for i in range(len(results)): 35 | print("Model: "+str(i)+" Accuracy is: "+str(results[i])) 36 | 37 | print("Mean Accuracy is: "+str(results.mean())) 38 | 39 | model.fit(X_fit, y_fit) 40 | pred_label = model.predict(X_eval) 41 | nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test) 42 | acc = 100*nnz/np.shape(y_test)[0] 43 | print('accuracy is: '+str(acc)) -------------------------------------------------------------------------------- /Chapter01/Boosting.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on May 22, 2017 3 | 4 | @author: DX 5 | ''' 6 | # Import All the required packages from sklearn 7 | from sklearn import model_selection 8 | from sklearn.datasets import load_iris 9 | from sklearn.ensemble import AdaBoostClassifier # Boosting Algorithm 10 | from sklearn.tree import DecisionTreeClassifier 11 | 12 | import numpy as np 13 | 14 | 15 | #Load data 16 | iris = load_iris() 17 | X = iris.data 18 | Y = iris.target 19 | 20 | #Split data in training and testing set 21 | X_fit, X_eval, y_fit, y_test= model_selection.train_test_split( X, Y, test_size=0.20, random_state=1 ) 22 | 23 | #Define a decision tree classifier 24 | cart = DecisionTreeClassifier() 25 | num_trees = 25 26 | 27 | #Create classification model for bagging 28 | model = AdaBoostClassifier(base_estimator=cart, n_estimators=num_trees, learning_rate = 0.1) 29 | 30 | #Train Classification model 31 | model.fit(X_fit, y_fit) 32 | 33 | #Test trained model over test set 34 | pred_label = model.predict(X_eval) 35 | nnz = np.float(np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)) 36 | acc = 100*nnz/np.shape(y_test)[0] 37 | 38 | #Print accuracy of the model 39 | print('accuracy is: '+str(acc)) -------------------------------------------------------------------------------- /Chapter01/Stacking.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 24-May-2017 3 | 4 | @author: aii32199 5 | ''' 6 | 7 | from sklearn import datasets 8 | from sklearn.ensemble import RandomForestClassifier 9 | from sklearn.linear_model import LogisticRegression 10 | from sklearn.naive_bayes import GaussianNB 11 | from sklearn.neighbors import KNeighborsClassifier 12 | from mlxtend.classifier import StackingClassifier 13 | from sklearn import cross_validation 14 | import numpy as np 15 | from sklearn.tree import DecisionTreeClassifier 16 | iris = datasets.load_iris() 17 | X, y = iris.data[:, 1:3], iris.target 18 | 19 | def CalculateAccuracy(y_test,pred_label): 20 | nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test) 21 | acc = 100*nnz/float(np.shape(y_test)[0]) 22 | return acc 23 | 24 | clf1 = KNeighborsClassifier(n_neighbors=2) 25 | clf2 = RandomForestClassifier(n_estimators = 2,random_state=1) 26 | clf3 = GaussianNB() 27 | lr = LogisticRegression() 28 | 29 | clf1.fit(X, y) 30 | clf2.fit(X, y) 31 | clf3.fit(X, y) 32 | 33 | f1 = clf1.predict(X) 34 | acc1 = CalculateAccuracy(y, f1) 35 | print("accuracy from KNN: "+str(acc1) ) 36 | 37 | f2 = clf2.predict(X) 38 | acc2 = CalculateAccuracy(y, f2) 39 | print("accuracy from Random Forest: "+str(acc2) ) 40 | 41 | f3 = clf3.predict(X) 42 | acc3 = CalculateAccuracy(y, f3) 43 | print("accuracy from Naive Bays: "+str(acc3) ) 44 | 45 | f = [f1,f2,f3] 46 | f = np.transpose(f) 47 | 48 | lr.fit(f, y) 49 | final = lr.predict(f) 50 | 51 | acc4 = CalculateAccuracy(y, final) 52 | print("accuracy from Stacking: "+str(acc4) ) 53 | 54 | # accuracy from KNN: 96.66666666666667 55 | # accuracy from Random Forest: 94.66666666666667 56 | # accuracy from Naive Bays: 92.0 57 | # accuracy from Stacking: 97.33333333333333 58 | 59 | # sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], 60 | # meta_classifier=lr) 61 | # 62 | # print('3-fold cross validation:\n') 63 | # 64 | # for clf, label in zip([clf1, clf2, clf3, sclf], 65 | # ['KNN', 66 | # 'Random Forest', 67 | # 'Naive Bayes', 68 | # 'StackingClassifier']): 69 | # 70 | # scores = cross_validation.cross_val_score(clf, X, y, 71 | # cv=3, scoring='accuracy') 72 | # print("Accuracy: %0.2f (+/- %0.2f) [%s]" 73 | # % (scores.mean(), scores.std(), label)) 74 | 75 | -------------------------------------------------------------------------------- /Chapter01/kmeansClustering.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon May 15 20:37:33 2017 4 | 5 | @author: DX 6 | """ 7 | ''' 8 | Created on 15-May-2017 9 | 10 | @author: aii32199 11 | ''' 12 | import time 13 | 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | 17 | from sklearn.cluster import MiniBatchKMeans, KMeans 18 | from sklearn.metrics.pairwise import pairwise_distances_argmin 19 | from sklearn.datasets.samples_generator import make_blobs 20 | #Generate sample data 21 | np.random.seed(0) 22 | 23 | batch_size = 45 24 | centers = [[1, 1], [-1, -1], [1, -1]] 25 | n_clusters = len(centers) 26 | X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7) 27 | # Compute clustering with Means 28 | k_means = KMeans(init='k-means++', n_clusters=3, n_init=10) 29 | t0 = time.time() 30 | k_means.fit(X) 31 | t_batch = time.time() - t0 32 | # Compute clustering with MiniBatchKMeans 33 | 34 | # Plot result 35 | fig = plt.figure(figsize=(12, 8)) 36 | fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9) 37 | colors = ['#4EACC5', '#FF9C34', '#4E9A06'] 38 | 39 | # We want to have the same colors for the same cluster from the 40 | # MiniBatchKMeans and the KMeans algorithm. Let's pair the cluster centers per 41 | # closest one. 42 | k_means_cluster_centers = np.sort(k_means.cluster_centers_, axis=0) 43 | #k_means_cluster_centers = np.load('E:/PyDevWorkSpaceTest/Ensembles/Chapter_01/data/kmenasCenter.npy') 44 | # np.save('E:/PyDevWorkSpaceTest/Ensembles/Chapter_01/data/kmenasCenter.npy',k_means_cluster_centers) 45 | k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers) 46 | 47 | 48 | 49 | ax = fig.add_subplot(1, 2,1) 50 | # ax.plot(X[:, 0], X[:, 1], 'w',markerfacecolor='k', marker='.',markersize=8) 51 | # KMeans 52 | ax = fig.add_subplot(1,2,1) 53 | for k, col in zip(range(n_clusters), colors): 54 | my_members = k_means_labels == k 55 | cluster_center = k_means_cluster_centers[k] 56 | ax.plot(X[my_members, 0], X[my_members, 1], 'w',markerfacecolor=col, marker='.',markersize=8) 57 | # plt.text(X[my_members, 0], X[my_members, 1], '%i' % (k)) 58 | ax.plot(cluster_center[0], cluster_center[1], marker='o', markerfacecolor=col, 59 | markeredgecolor='k', markersize=10) 60 | plt.text(cluster_center[0], cluster_center[1], 'Cluster: %i' % (k)) 61 | 62 | # ax.set_title('KMeans') 63 | 64 | 65 | test_point = [-1.3,1.3] 66 | ax.plot(test_point[0],test_point[1],marker='x',markerfacecolor='r',markersize=12) 67 | #plt.text(test_point[0],test_point[1], 'point:%.1f,%.1f' % (test_point[0],test_point[1])) 68 | #Check out its distance from each of the cluster 69 | dist = [] 70 | for center in k_means_cluster_centers: 71 | dist.append((sum(np.square((center) - (test_point))))) 72 | 73 | min = np.argmin(dist) 74 | test_point = [-1.3,1.3] 75 | 76 | ax = fig.add_subplot(1,2,2) 77 | for k, col in zip(range(n_clusters), colors): 78 | my_members = k_means_labels == k 79 | cluster_center = k_means_cluster_centers[k] 80 | ax.plot(X[my_members, 0], X[my_members, 1], 'w',markerfacecolor=col, marker='.',markersize=8) 81 | # plt.text(X[my_members, 0], X[my_members, 1], '%i' % (k)) 82 | ax.plot(cluster_center[0], cluster_center[1], marker='o', markerfacecolor=col, 83 | markeredgecolor='k', markersize=10) 84 | plt.text(cluster_center[0], cluster_center[1], 'Cluster: %i' % (k)) 85 | ax.plot(test_point[0],test_point[1],marker='x',markerfacecolor='r',markersize=8) 86 | plt.text(test_point[0],test_point[1], '%i' % (min)) 87 | 88 | print('distances are: '+ str(dist)) 89 | print('Minimum distance index: '+str(min)) 90 | 91 | 92 | #Supervised algorithm 93 | from sklearn.ensemble import RandomForestClassifier as rf 94 | from sklearn.metrics import log_loss 95 | y = k_means_labels 96 | 97 | X_train, y_train = X[:2000], y[:2000] 98 | X_valid, y_valid = X[2000:2500], y[2000:2500] 99 | X_train_valid, y_train_valid = X[:2500], y[:2500] 100 | X_test, y_test = X[2500:], y[2500:] 101 | 102 | # Train uncalibrated random forest classifier on whole train and validation 103 | # data and evaluate on test data 104 | clf = rf(n_estimators=25) 105 | clf.fit(X_train_valid, y_train_valid) 106 | clf_probs = clf.predict_proba(X_test) 107 | 108 | pred_label = np.argmax(clf_probs,axis=1) 109 | # score = log_loss(y_test, clf_probs) 110 | nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test) 111 | acc = 100*nnz/np.shape(y_test)[0] 112 | print('accuracy is: '+str(acc)) 113 | 114 | clf_probs = clf.predict_proba(test_point) 115 | pred_label = np.argmax(clf_probs,axis=1) 116 | print('RF predicted label: '+str(pred_label)) 117 | plt.show() 118 | # ax.set_xticks(()) 119 | # ax.set_yticks(()) 120 | # plt.text(-3.5, 1.8, 'train time: %.2fs\ninertia: %f' % ( 121 | # t_batch, k_means.inertia_)) 122 | 123 | # MiniBatchKMeans 124 | # ax = fig.add_subplot(1, 3, 2) 125 | # for k, col in zip(range(n_clusters), colors): 126 | # my_members = mbk_means_labels == order[k] 127 | # cluster_center = mbk_means_cluster_centers[order[k]] 128 | # ax.plot(X[my_members, 0], X[my_members, 1], 'w', 129 | # markerfacecolor=col, marker='.') 130 | # ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, 131 | # markeredgecolor='k', markersize=6) 132 | # ax.set_title('MiniBatchKMeans') 133 | # ax.set_xticks(()) 134 | # ax.set_yticks(()) 135 | # # plt.text(-3.5, 1.8, 'train time: %.2fs\ninertia: %f' % 136 | # # (t_mini_batch, mbk.inertia_)) 137 | # 138 | # # Initialise the different array to all False 139 | # different = (mbk_means_labels == 4) 140 | # ax = fig.add_subplot(1, 3, 3) 141 | # 142 | # for k in range(n_clusters): 143 | # different += ((k_means_labels == k) != (mbk_means_labels == order[k])) 144 | # 145 | # identic = np.logical_not(different) 146 | # ax.plot(X[identic, 0], X[identic, 1], 'w', 147 | # markerfacecolor='#bbbbbb', marker='.') 148 | # ax.plot(X[different, 0], X[different, 1], 'w', 149 | # markerfacecolor='m', marker='.') 150 | # ax.set_title('Difference') 151 | # ax.set_xticks(()) 152 | # ax.set_yticks(()) 153 | 154 | 155 | -------------------------------------------------------------------------------- /Chapter02/Data/__pycache__/TreeDict.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter02/Data/__pycache__/TreeDict.cpython-35.pyc -------------------------------------------------------------------------------- /Chapter02/Data/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter02/Data/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /Chapter02/Data/testData.csv: -------------------------------------------------------------------------------- 1 | ,buying,maint,doors,persons,lug_boot,safety,Class 2 | 0,vhigh,med,2,2,med,low,unacc 3 | 1,low,high,5more,4,small,low,unacc 4 | 2,high,high,3,4,med,low,unacc 5 | 3,vhigh,high,2,2,big,low,unacc 6 | 4,vhigh,high,2,2,big,med,unacc 7 | 5,vhigh,med,4,more,small,med,unacc 8 | 6,low,med,5more,2,small,high,unacc 9 | 7,high,low,4,4,med,high,acc 10 | 8,low,med,3,2,big,high,unacc 11 | -------------------------------------------------------------------------------- /Chapter02/Data/treeModel.json: -------------------------------------------------------------------------------- 1 | {"buying": {"med": {"safety": {"med": {"persons": {"4": {"maint": {"med": "acc", "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "acc", "2": "acc", "5more": "good"}}, "big": "good", "small": "acc"}}, "high": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}}}, "2": "unacc", "more": {"maint": {"med": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "good", "2": "acc", "5more": "good"}}, "big": "good", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "high": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}}}}}, "low": "unacc", "high": {"persons": {"4": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "acc", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": "acc"}}, "vhigh": "acc", "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "good", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": "good"}}, "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "vhigh": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "good", "3": "good", "2": "unacc", "5more": "good"}}}}, "high": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}}}, "vhigh": {"maint": {"med": {"safety": {"med": {"lug_boot": {"med": {"persons": {"4": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "big": {"persons": {"4": "acc", "2": "unacc", "more": "acc"}}, "small": "unacc"}}, "low": "unacc", "high": {"persons": {"4": "acc", "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}, "vhigh": "unacc", "low": {"safety": {"med": {"lug_boot": {"med": {"persons": {"4": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "big": {"persons": {"4": "acc", "2": "unacc", "more": "acc"}}, "small": "unacc"}}, "low": "unacc", "high": {"persons": {"4": "acc", "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}, "high": "unacc"}}, "low": {"safety": {"med": {"persons": {"4": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "good", "3": "acc", "2": "acc", "5more": "good"}}, "big": "good", "small": "acc"}}, "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "acc", "2": "acc", "5more": "good"}}, "big": "good", "small": "acc"}}, "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "good", "3": "good", "2": "acc", "5more": "good"}}, "big": "good", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "good", "2": "acc", "5more": "good"}}, "big": "good", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "high": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}, "low": "unacc", "high": {"persons": {"4": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "good", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": "good"}}, "vhigh": "acc", "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "good", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": "good"}}, "high": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "acc", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": "acc"}}}}, "2": "unacc", "more": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "good", "3": "good", "2": "unacc", "5more": "good"}}}}, "vhigh": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "good", "3": "good", "2": "unacc", "5more": "good"}}}}, "high": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}}}}}}}, "high": {"safety": {"med": {"lug_boot": {"med": {"persons": {"4": {"doors": {"4": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}, "3": "unacc", "2": "unacc", "5more": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}}}, "2": "unacc", "more": {"maint": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "vhigh": "unacc", "low": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "high": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}}}, "big": {"persons": {"4": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}}}, "small": "unacc"}}, "low": "unacc", "high": {"persons": {"4": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "vhigh": "unacc", "low": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "high": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}}}}} -------------------------------------------------------------------------------- /Chapter02/DecisionTree_ID3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | np.random.seed(1337) # for reproducibility 5 | 6 | #Function to get Information Gain of the attribute using class entropy 7 | def getInformationGain(subtable,classEntropy): 8 | 9 | #Initialize a variable for storing probability of Classes 10 | fraction = 0 11 | 12 | #Calculate total number of instances 13 | denom = np.sum(np.sum(subtable)) 14 | 15 | #Initialize variable for storing total entropies of attrribute values 16 | EntropyAtt = 0 17 | 18 | #Now we will run a loop to access each attribute and its information gain 19 | for key in subtable.keys(): 20 | 21 | #Extract Attribute 22 | attribute = subtable[key] 23 | entropy = 0 #Initialize variable for entropy calculation 24 | coeff = 0 #Initialize variable to store coefficient 25 | 26 | #Find out sum of class attributes(in our case Yes and No) 27 | denom2 = np.sum(attribute) 28 | 29 | #Run a loop to get entropy of distinct values of attribute 30 | for value in attribute: 31 | 32 | #Calculate coeff 33 | coeff+= float(value)/denom 34 | 35 | #Calculate probability of the attribute value 36 | fraction = float(value)/denom2 37 | 38 | #Calculate Entropy 39 | eps = np.finfo(float).eps 40 | entropy+= -fraction*np.log2(fraction+eps) 41 | EntropyAtt+= coeff*entropy 42 | 43 | #Calculate Information Gain using class entropy 44 | InfGain = classEntropy - EntropyAtt 45 | return InfGain,EntropyAtt 46 | 47 | def getClassEntropy(classAttributes): 48 | 49 | #Get distinct classes and how many time they occure 50 | _,counts = np.unique(classAttributes,return_counts=True) 51 | denom = len(classAttributes) 52 | entropy = 0 #Initialize entropy variable 53 | 54 | #Run a loop to calculate entropy of dataset 55 | for count in counts: 56 | fraction = float(count)/denom 57 | entropy+= -fraction*np.log2(fraction) 58 | return entropy 59 | 60 | 61 | def getHistTable(df,attribute): 62 | #This function create a subtable for the given attribute 63 | #Get values for the attribute 64 | value = df[attribute] 65 | 66 | #Extract class 67 | classes = df['Class'] 68 | 69 | #Get distinct classes 70 | classunique = df['Class'].unique() 71 | 72 | #Get distinct values from attribute e.g. Low, High and Med for Salary 73 | valunique = df[attribute].unique() 74 | 75 | #Create an empty table to store attribute value and their respective class occurance 76 | temp = np.zeros((len(classunique),len(valunique)),dtype='uint8') 77 | subtable = pd.DataFrame(temp,index=classunique,columns=valunique) 78 | 79 | #Calculate class occurance for each value for Med salary how many time class attribute is Yes 80 | for i in range(len(classes)): 81 | subtable[value[i]][classes[i]]+= 1 82 | 83 | return subtable 84 | 85 | def getNode(df): 86 | #This function is written for getting winner attribute to assign node 87 | 88 | #Get Classes 89 | classAttributes = df['Class'] 90 | 91 | #Create empty list to store Information gain for respected attributes 92 | InformationGain = [] 93 | AttributeName = [] 94 | 95 | #Extract each attribute 96 | for attribute in df.keys(): 97 | if attribute is not 'Class': 98 | #Get class occurance for each attribute value 99 | subtable = getHistTable(df,attribute) 100 | 101 | #Get class entropy of the data 102 | Ec = getClassEntropy(classAttributes) 103 | 104 | #Calculate Information Gain for each attribute 105 | InfoGain,EntropyAtt = getInformationGain(subtable, Ec) 106 | 107 | #Append the value into the list 108 | InformationGain.append(InfoGain) 109 | AttributeName.append(attribute) 110 | #print("Information Gain for %s: %.2f and Entropy: %.2f"%(attribute,InfoGain,EntropyAtt)) 111 | 112 | #Find out attribute with maximum information gain 113 | indx = np.argmax(InformationGain) 114 | winnerNode = AttributeName[indx] 115 | #print("\nWinner attrbute is: %s"%(winnerNode)) 116 | 117 | return winnerNode 118 | 119 | def getSubtable(df,node,atValues): 120 | #This function is written to get subtable for given attribute values(such as table for those persons whose salary is Medium) 121 | subtable = [] 122 | 123 | #run a loop through the dataset and create subtable 124 | for i in range(len(df[node])): 125 | if df[node][i]==atValues: 126 | row = df.loc[i,df.keys()] 127 | subtable.append(row) 128 | 129 | for c in range(len(df.keys())): 130 | if df.keys()[c]==node: 131 | break; 132 | 133 | #Create a new dataframe 134 | subtable = pd.DataFrame(subtable,index=range(len(subtable))) 135 | #print(subtable) 136 | return subtable 137 | 138 | def buildTree(df,tree=None): 139 | #Here we build our decision tree 140 | 141 | #Get attribute with maximum information gain 142 | node = getNode(df) 143 | 144 | #Get distinct value of that attribute e.g Salary is node and Low,Med and High are values 145 | attValue = np.unique(df[node]) 146 | 147 | #Create an empty dictionary to create tree 148 | if tree is None: 149 | tree={} 150 | tree[node] = {} 151 | 152 | #Loop below is written for building tree using recursion of the function, 153 | #We will create subtable of each attribute value and try to find whether it have a pure subset or not, 154 | #if it is a pure subset we will stop tree growing for that node. if it is not a pure set then we will.. 155 | #again call the same function. 156 | for value in attValue: 157 | 158 | #print("Value: %s"%value) 159 | subtable = getSubtable(df,node,value) 160 | clValue,counts = np.unique(subtable['Class'],return_counts=True) 161 | 162 | if len(counts)==1:#Checking purity of subset 163 | #print("Class: %s\n"%clValue) 164 | tree[node][value] = clValue[0] 165 | else: 166 | tree[node][value] = buildTree(subtable)#Recursion of the function 167 | 168 | return tree 169 | 170 | def predict(inst,tree): 171 | #This function will predict an input instace's class using given tree 172 | 173 | #We will use recursion to traverse through the tree same as we have done in case 174 | #of tree building 175 | 176 | for nodes in tree.keys(): 177 | 178 | value = inst[nodes] 179 | tree = tree[nodes][value] 180 | prediction = 0 181 | 182 | if type(tree) is dict: 183 | prediction = predict(inst, tree) 184 | else: 185 | prediction = tree 186 | break; 187 | 188 | return prediction 189 | 190 | def preProcess(dataset): 191 | #Create a dataframe out of our dataset with attribute names 192 | df = pd.DataFrame(dataset,columns=['Name','Salary','Sex','Marital','Class']) 193 | 194 | #Remove name attribute as it is not required for the calculations 195 | df.pop('Name') 196 | 197 | #Make sure last attribute of our data set must be Class attribute 198 | cols = list(df) 199 | cols.insert(len(cols), cols.pop(cols.index('Class'))) 200 | df = df.ix[:,cols] 201 | print(df) 202 | 203 | return df 204 | 205 | def BatchTest(instances,tree): 206 | 207 | prediction = [] 208 | instances.pop("Class") 209 | for i in range(len(instances.index)): 210 | inst = instances.ix[i] 211 | pred = predict(inst, tree) 212 | prediction.append(pred) 213 | return prediction 214 | 215 | def split_data(df,percentage): 216 | 217 | split_indx = np.int32(np.floor(percentage*len(df.index))) 218 | #We will shuffle the rows of data to mix out its well 219 | df = df.sample(frac=1).reset_index(drop=True) 220 | 221 | #split training data for creating tree 222 | train_data = df[:split_indx] 223 | temp = df[split_indx:len(df.index)] 224 | temp = temp.as_matrix() 225 | test_data = pd.DataFrame(temp,index=range(len(temp)),columns=[key for key in df.keys()]) 226 | 227 | return train_data,test_data 228 | 229 | def getAccuracy(testClass,predictedClass): 230 | 231 | match = 0 232 | for i in range(len(testClass)): 233 | if testClass[i]==predictedClass[i]: 234 | match+=1 235 | 236 | accuracy = 100*match/len(testClass) 237 | 238 | return accuracy,match -------------------------------------------------------------------------------- /Chapter02/ID3_Test.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | #Lets Create the test dataset to build our tree 3 | dataset = {'Name':['Person 1','Person 2','Person 3','Person 4','Person 5','Person 6','Person 7','Person 8','Person 9','Person 10'], 4 | 'Salary':['Low','Med','Med','Med','Med','High','Low','High','Med','Low'], 5 | 'Sex':['Male','Male','Male','Female','Male','Female','Female','Male','Female','Male'], 6 | 'Marital':['Unmarried','Unmarried','Married','Married','Married','Unmarried','Unmarried','Unmarried','Unmarried','Married'], 7 | 'Class':['No','No','Yes','No','Yes','Yes','No','Yes','Yes','Yes']} 8 | from Chapter_02 import DecisionTree_ID3 as ID3 9 | #Preprocess data set 10 | df = ID3.preProcess(dataset) 11 | 12 | #Lets build the tree 13 | tree = ID3.buildTree(df) 14 | 15 | import pprint 16 | #print(tree) 17 | pprint.pprint(tree) 18 | 19 | #Select test instance 20 | inst = df.ix[2] 21 | 22 | #Remove its class attribute 23 | inst.pop('Class') 24 | 25 | #Get prediction 26 | prediction = ID3.predict(inst, tree) 27 | print("Prediction: %s"%prediction[0]) 28 | 29 | main() -------------------------------------------------------------------------------- /Chapter02/PracticalApplication.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pprint 3 | import json 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from Chapter_02 import DecisionTree_ID3 as DT 7 | 8 | datapath = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/CarDataset.csv' 9 | path2save = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/TreeModel.json' 10 | trainDataPath = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/trainData.csv' 11 | testDataPath = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/testData.csv' 12 | 13 | # testData = pd.read_csv(testDataPath) 14 | 15 | cardata = pd.read_csv(datapath) 16 | mat = cardata.as_matrix() 17 | df = pd.DataFrame(mat,columns=['buying','maint','doors','persons','lug_boot','safety','Class']) 18 | trainData,testData = DT.split_data(df, 0.995) 19 | 20 | trainData.to_csv(trainDataPath,columns=['buying','maint','doors','persons','lug_boot','safety','Class']) 21 | testData.to_csv(testDataPath,columns=['buying','maint','doors','persons','lug_boot','safety','Class']) 22 | 23 | tree = DT.buildTree(trainData) 24 | pprint.pprint(tree) 25 | 26 | with open(path2save,'w') as f: 27 | json.dump(tree,f) 28 | 29 | with open(path2save) as f: 30 | model = json.load(f) 31 | 32 | pprint.pprint(model) 33 | actualClass = testData['Class'] 34 | predictions = DT.BatchTest(testData, model) 35 | accuracy,match = DT.getAccuracy(actualClass, predictions) 36 | 37 | print("Accuracy of the model is: %.2f and matched results are %i out of %i"%(accuracy,match,len(actualClass))) 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /Chapter02/SplitCheck.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jun 24, 2017 3 | 4 | @author: DX 5 | ''' 6 | 7 | # Split a dataset based on an attribute and an attribute value 8 | def test_split(index, value, dataset): 9 | left, right = list(), list() 10 | for row in dataset: 11 | if row[index] < value: 12 | left.append(row) 13 | else: 14 | right.append(row) 15 | return left, right 16 | 17 | # Calculate the Gini index for a split dataset 18 | def gini_index(groups, class_values): 19 | gini = 0.0 20 | for class_value in class_values: 21 | for group in groups: 22 | size = len(group) 23 | if size == 0: 24 | continue 25 | proportion = [row[-1] for row in group].count(class_value) / float(size) 26 | gini += (proportion * (1.0 - proportion)) 27 | return gini 28 | 29 | # Select the best split point for a dataset 30 | 31 | def get_split(dataset): 32 | 33 | class_values = extractClasses(dataset) 34 | 35 | b_index, b_value, b_score, b_groups = 999, 999, 999, None 36 | for index in range(len(dataset[0])-1): 37 | for row in dataset: 38 | groups = test_split(index, row[index], dataset) 39 | gini = gini_index(groups, class_values) 40 | print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini)) 41 | if gini < b_score: 42 | b_index, b_value, b_score, b_groups = index, row[index], gini, groups 43 | return {'index':b_index, 'value':b_value, 'groups':b_groups} 44 | 45 | def extractClasses(dataset): 46 | 47 | class_values = [] 48 | 49 | for rows in dataset: 50 | class_values.append(rows[-1]) 51 | 52 | return class_values 53 | 54 | dataset = [[0.50000, 1.50000, 1.00000], 55 | [1.00000, 0.50000, -1.00000], 56 | [1.25000, 3.50000, 1.00000], 57 | [1.50000, 4.00000, 1.00000], 58 | [2.00000, 2.00000, -1.00000], 59 | [2.50000, 2.50000, 1.00000], 60 | [3.75000, 3.00000, -1.00000], 61 | [4.00000, 1.00000, -1.00000]] 62 | split = get_split(dataset) 63 | print('Split: [X%d < %.3f]' % ((split['index']+1), split['value'])) 64 | -------------------------------------------------------------------------------- /Chapter02/testTree.json: -------------------------------------------------------------------------------- 1 | {"Salary": {"High": -------------------------------------------------------------------------------- /Chapter03/BinaryTree.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 19-Jun-2017 3 | 4 | @author: aii32199 5 | ''' 6 | import numpy as np 7 | def getNewNode(data): 8 | node = {'data':[],'left':[],'right':[]} 9 | node['data'] = data 10 | print(node) 11 | return node 12 | 13 | def createBinaryTree(tree,data): 14 | 15 | #Check whether we have any node in the tree if not create one 16 | if not tree: 17 | tree = getNewNode(data) 18 | 19 | #Now if current value is less than parent node put it in left 20 | elif data<=tree['data']: 21 | tree['left'] = createBinaryTree(tree['left'],data) 22 | #else put it in right 23 | else: 24 | tree['right'] = createBinaryTree(tree['right'],data) 25 | return tree 26 | 27 | 28 | # data = [0.7,0.65,0.83,0.54,0.9,0.11,0.44,0.35,0.75,0.3,0.78,0.15] 29 | data = [0.7,0.65,0.83,0.54,0.9,0.11,0.44,0.35,0.75,0.3,0.78,0.15] 30 | med = np.median(data) 31 | print("Median of array is: %.2f"%med) 32 | 33 | tree = [] 34 | tree = createBinaryTree(tree,med) 35 | for i in range(len(data)): 36 | value = data[i] 37 | tree = createBinaryTree(tree,value) 38 | 39 | import pprint 40 | pprint.pprint(tree) 41 | -------------------------------------------------------------------------------- /Chapter03/Data/bcancer.csv: -------------------------------------------------------------------------------- 1 | 1000025,5,1,1,1,2,1,3,1,1,2 2 | 1002945,5,4,4,5,7,10,3,2,1,2 3 | 1015425,3,1,1,1,2,2,3,1,1,2 4 | 1016277,6,8,8,1,3,4,3,7,1,2 5 | 1017023,4,1,1,3,2,1,3,1,1,2 6 | 1017122,8,10,10,8,7,10,9,7,1,4 7 | 1018099,1,1,1,1,2,10,3,1,1,2 8 | 1018561,2,1,2,1,2,1,3,1,1,2 9 | 1033078,2,1,1,1,2,1,1,1,5,2 10 | 1033078,4,2,1,1,2,1,2,1,1,2 11 | 1035283,1,1,1,1,1,1,3,1,1,2 12 | 1036172,2,1,1,1,2,1,2,1,1,2 13 | 1041801,5,3,3,3,2,3,4,4,1,4 14 | 1043999,1,1,1,1,2,3,3,1,1,2 15 | 1044572,8,7,5,10,7,9,5,5,4,4 16 | 1047630,7,4,6,4,6,1,4,3,1,4 17 | 1048672,4,1,1,1,2,1,2,1,1,2 18 | 1049815,4,1,1,1,2,1,3,1,1,2 19 | 1050670,10,7,7,6,4,10,4,1,2,4 20 | 1050718,6,1,1,1,2,1,3,1,1,2 21 | 1054590,7,3,2,10,5,10,5,4,4,4 22 | 1054593,10,5,5,3,6,7,7,10,1,4 23 | 1056784,3,1,1,1,2,1,2,1,1,2 24 | 1057013,8,4,5,1,2,?,7,3,1,4 25 | 1059552,1,1,1,1,2,1,3,1,1,2 26 | 1065726,5,2,3,4,2,7,3,6,1,4 27 | 1066373,3,2,1,1,1,1,2,1,1,2 28 | 1066979,5,1,1,1,2,1,2,1,1,2 29 | 1067444,2,1,1,1,2,1,2,1,1,2 30 | 1070935,1,1,3,1,2,1,1,1,1,2 31 | 1070935,3,1,1,1,1,1,2,1,1,2 32 | 1071760,2,1,1,1,2,1,3,1,1,2 33 | 1072179,10,7,7,3,8,5,7,4,3,4 34 | 1074610,2,1,1,2,2,1,3,1,1,2 35 | 1075123,3,1,2,1,2,1,2,1,1,2 36 | 1079304,2,1,1,1,2,1,2,1,1,2 37 | 1080185,10,10,10,8,6,1,8,9,1,4 38 | 1081791,6,2,1,1,1,1,7,1,1,2 39 | 1084584,5,4,4,9,2,10,5,6,1,4 40 | 1091262,2,5,3,3,6,7,7,5,1,4 41 | 1096800,6,6,6,9,6,?,7,8,1,2 42 | 1099510,10,4,3,1,3,3,6,5,2,4 43 | 1100524,6,10,10,2,8,10,7,3,3,4 44 | 1102573,5,6,5,6,10,1,3,1,1,4 45 | 1103608,10,10,10,4,8,1,8,10,1,4 46 | 1103722,1,1,1,1,2,1,2,1,2,2 47 | 1105257,3,7,7,4,4,9,4,8,1,4 48 | 1105524,1,1,1,1,2,1,2,1,1,2 49 | 1106095,4,1,1,3,2,1,3,1,1,2 50 | 1106829,7,8,7,2,4,8,3,8,2,4 51 | 1108370,9,5,8,1,2,3,2,1,5,4 52 | 1108449,5,3,3,4,2,4,3,4,1,4 53 | 1110102,10,3,6,2,3,5,4,10,2,4 54 | 1110503,5,5,5,8,10,8,7,3,7,4 55 | 1110524,10,5,5,6,8,8,7,1,1,4 56 | 1111249,10,6,6,3,4,5,3,6,1,4 57 | 1112209,8,10,10,1,3,6,3,9,1,4 58 | 1113038,8,2,4,1,5,1,5,4,4,4 59 | 1113483,5,2,3,1,6,10,5,1,1,4 60 | 1113906,9,5,5,2,2,2,5,1,1,4 61 | 1115282,5,3,5,5,3,3,4,10,1,4 62 | 1115293,1,1,1,1,2,2,2,1,1,2 63 | 1116116,9,10,10,1,10,8,3,3,1,4 64 | 1116132,6,3,4,1,5,2,3,9,1,4 65 | 1116192,1,1,1,1,2,1,2,1,1,2 66 | 1116998,10,4,2,1,3,2,4,3,10,4 67 | 1117152,4,1,1,1,2,1,3,1,1,2 68 | 1118039,5,3,4,1,8,10,4,9,1,4 69 | 1120559,8,3,8,3,4,9,8,9,8,4 70 | 1121732,1,1,1,1,2,1,3,2,1,2 71 | 1121919,5,1,3,1,2,1,2,1,1,2 72 | 1123061,6,10,2,8,10,2,7,8,10,4 73 | 1124651,1,3,3,2,2,1,7,2,1,2 74 | 1125035,9,4,5,10,6,10,4,8,1,4 75 | 1126417,10,6,4,1,3,4,3,2,3,4 76 | 1131294,1,1,2,1,2,2,4,2,1,2 77 | 1132347,1,1,4,1,2,1,2,1,1,2 78 | 1133041,5,3,1,2,2,1,2,1,1,2 79 | 1133136,3,1,1,1,2,3,3,1,1,2 80 | 1136142,2,1,1,1,3,1,2,1,1,2 81 | 1137156,2,2,2,1,1,1,7,1,1,2 82 | 1143978,4,1,1,2,2,1,2,1,1,2 83 | 1143978,5,2,1,1,2,1,3,1,1,2 84 | 1147044,3,1,1,1,2,2,7,1,1,2 85 | 1147699,3,5,7,8,8,9,7,10,7,4 86 | 1147748,5,10,6,1,10,4,4,10,10,4 87 | 1148278,3,3,6,4,5,8,4,4,1,4 88 | 1148873,3,6,6,6,5,10,6,8,3,4 89 | 1152331,4,1,1,1,2,1,3,1,1,2 90 | 1155546,2,1,1,2,3,1,2,1,1,2 91 | 1156272,1,1,1,1,2,1,3,1,1,2 92 | 1156948,3,1,1,2,2,1,1,1,1,2 93 | 1157734,4,1,1,1,2,1,3,1,1,2 94 | 1158247,1,1,1,1,2,1,2,1,1,2 95 | 1160476,2,1,1,1,2,1,3,1,1,2 96 | 1164066,1,1,1,1,2,1,3,1,1,2 97 | 1165297,2,1,1,2,2,1,1,1,1,2 98 | 1165790,5,1,1,1,2,1,3,1,1,2 99 | 1165926,9,6,9,2,10,6,2,9,10,4 100 | 1166630,7,5,6,10,5,10,7,9,4,4 101 | 1166654,10,3,5,1,10,5,3,10,2,4 102 | 1167439,2,3,4,4,2,5,2,5,1,4 103 | 1167471,4,1,2,1,2,1,3,1,1,2 104 | 1168359,8,2,3,1,6,3,7,1,1,4 105 | 1168736,10,10,10,10,10,1,8,8,8,4 106 | 1169049,7,3,4,4,3,3,3,2,7,4 107 | 1170419,10,10,10,8,2,10,4,1,1,4 108 | 1170420,1,6,8,10,8,10,5,7,1,4 109 | 1171710,1,1,1,1,2,1,2,3,1,2 110 | 1171710,6,5,4,4,3,9,7,8,3,4 111 | 1171795,1,3,1,2,2,2,5,3,2,2 112 | 1171845,8,6,4,3,5,9,3,1,1,4 113 | 1172152,10,3,3,10,2,10,7,3,3,4 114 | 1173216,10,10,10,3,10,8,8,1,1,4 115 | 1173235,3,3,2,1,2,3,3,1,1,2 116 | 1173347,1,1,1,1,2,5,1,1,1,2 117 | 1173347,8,3,3,1,2,2,3,2,1,2 118 | 1173509,4,5,5,10,4,10,7,5,8,4 119 | 1173514,1,1,1,1,4,3,1,1,1,2 120 | 1173681,3,2,1,1,2,2,3,1,1,2 121 | 1174057,1,1,2,2,2,1,3,1,1,2 122 | 1174057,4,2,1,1,2,2,3,1,1,2 123 | 1174131,10,10,10,2,10,10,5,3,3,4 124 | 1174428,5,3,5,1,8,10,5,3,1,4 125 | 1175937,5,4,6,7,9,7,8,10,1,4 126 | 1176406,1,1,1,1,2,1,2,1,1,2 127 | 1176881,7,5,3,7,4,10,7,5,5,4 128 | 1177027,3,1,1,1,2,1,3,1,1,2 129 | 1177399,8,3,5,4,5,10,1,6,2,4 130 | 1177512,1,1,1,1,10,1,1,1,1,2 131 | 1178580,5,1,3,1,2,1,2,1,1,2 132 | 1179818,2,1,1,1,2,1,3,1,1,2 133 | 1180194,5,10,8,10,8,10,3,6,3,4 134 | 1180523,3,1,1,1,2,1,2,2,1,2 135 | 1180831,3,1,1,1,3,1,2,1,1,2 136 | 1181356,5,1,1,1,2,2,3,3,1,2 137 | 1182404,4,1,1,1,2,1,2,1,1,2 138 | 1182410,3,1,1,1,2,1,1,1,1,2 139 | 1183240,4,1,2,1,2,1,2,1,1,2 140 | 1183246,1,1,1,1,1,?,2,1,1,2 141 | 1183516,3,1,1,1,2,1,1,1,1,2 142 | 1183911,2,1,1,1,2,1,1,1,1,2 143 | 1183983,9,5,5,4,4,5,4,3,3,4 144 | 1184184,1,1,1,1,2,5,1,1,1,2 145 | 1184241,2,1,1,1,2,1,2,1,1,2 146 | 1184840,1,1,3,1,2,?,2,1,1,2 147 | 1185609,3,4,5,2,6,8,4,1,1,4 148 | 1185610,1,1,1,1,3,2,2,1,1,2 149 | 1187457,3,1,1,3,8,1,5,8,1,2 150 | 1187805,8,8,7,4,10,10,7,8,7,4 151 | 1188472,1,1,1,1,1,1,3,1,1,2 152 | 1189266,7,2,4,1,6,10,5,4,3,4 153 | 1189286,10,10,8,6,4,5,8,10,1,4 154 | 1190394,4,1,1,1,2,3,1,1,1,2 155 | 1190485,1,1,1,1,2,1,1,1,1,2 156 | 1192325,5,5,5,6,3,10,3,1,1,4 157 | 1193091,1,2,2,1,2,1,2,1,1,2 158 | 1193210,2,1,1,1,2,1,3,1,1,2 159 | 1193683,1,1,2,1,3,?,1,1,1,2 160 | 1196295,9,9,10,3,6,10,7,10,6,4 161 | 1196915,10,7,7,4,5,10,5,7,2,4 162 | 1197080,4,1,1,1,2,1,3,2,1,2 163 | 1197270,3,1,1,1,2,1,3,1,1,2 164 | 1197440,1,1,1,2,1,3,1,1,7,2 165 | 1197510,5,1,1,1,2,?,3,1,1,2 166 | 1197979,4,1,1,1,2,2,3,2,1,2 167 | 1197993,5,6,7,8,8,10,3,10,3,4 168 | 1198128,10,8,10,10,6,1,3,1,10,4 169 | 1198641,3,1,1,1,2,1,3,1,1,2 170 | 1199219,1,1,1,2,1,1,1,1,1,2 171 | 1199731,3,1,1,1,2,1,1,1,1,2 172 | 1199983,1,1,1,1,2,1,3,1,1,2 173 | 1200772,1,1,1,1,2,1,2,1,1,2 174 | 1200847,6,10,10,10,8,10,10,10,7,4 175 | 1200892,8,6,5,4,3,10,6,1,1,4 176 | 1200952,5,8,7,7,10,10,5,7,1,4 177 | 1201834,2,1,1,1,2,1,3,1,1,2 178 | 1201936,5,10,10,3,8,1,5,10,3,4 179 | 1202125,4,1,1,1,2,1,3,1,1,2 180 | 1202812,5,3,3,3,6,10,3,1,1,4 181 | 1203096,1,1,1,1,1,1,3,1,1,2 182 | 1204242,1,1,1,1,2,1,1,1,1,2 183 | 1204898,6,1,1,1,2,1,3,1,1,2 184 | 1205138,5,8,8,8,5,10,7,8,1,4 185 | 1205579,8,7,6,4,4,10,5,1,1,4 186 | 1206089,2,1,1,1,1,1,3,1,1,2 187 | 1206695,1,5,8,6,5,8,7,10,1,4 188 | 1206841,10,5,6,10,6,10,7,7,10,4 189 | 1207986,5,8,4,10,5,8,9,10,1,4 190 | 1208301,1,2,3,1,2,1,3,1,1,2 191 | 1210963,10,10,10,8,6,8,7,10,1,4 192 | 1211202,7,5,10,10,10,10,4,10,3,4 193 | 1212232,5,1,1,1,2,1,2,1,1,2 194 | 1212251,1,1,1,1,2,1,3,1,1,2 195 | 1212422,3,1,1,1,2,1,3,1,1,2 196 | 1212422,4,1,1,1,2,1,3,1,1,2 197 | 1213375,8,4,4,5,4,7,7,8,2,2 198 | 1213383,5,1,1,4,2,1,3,1,1,2 199 | 1214092,1,1,1,1,2,1,1,1,1,2 200 | 1214556,3,1,1,1,2,1,2,1,1,2 201 | 1214966,9,7,7,5,5,10,7,8,3,4 202 | 1216694,10,8,8,4,10,10,8,1,1,4 203 | 1216947,1,1,1,1,2,1,3,1,1,2 204 | 1217051,5,1,1,1,2,1,3,1,1,2 205 | 1217264,1,1,1,1,2,1,3,1,1,2 206 | 1218105,5,10,10,9,6,10,7,10,5,4 207 | 1218741,10,10,9,3,7,5,3,5,1,4 208 | 1218860,1,1,1,1,1,1,3,1,1,2 209 | 1218860,1,1,1,1,1,1,3,1,1,2 210 | 1219406,5,1,1,1,1,1,3,1,1,2 211 | 1219525,8,10,10,10,5,10,8,10,6,4 212 | 1219859,8,10,8,8,4,8,7,7,1,4 213 | 1220330,1,1,1,1,2,1,3,1,1,2 214 | 1221863,10,10,10,10,7,10,7,10,4,4 215 | 1222047,10,10,10,10,3,10,10,6,1,4 216 | 1222936,8,7,8,7,5,5,5,10,2,4 217 | 1223282,1,1,1,1,2,1,2,1,1,2 218 | 1223426,1,1,1,1,2,1,3,1,1,2 219 | 1223793,6,10,7,7,6,4,8,10,2,4 220 | 1223967,6,1,3,1,2,1,3,1,1,2 221 | 1224329,1,1,1,2,2,1,3,1,1,2 222 | 1225799,10,6,4,3,10,10,9,10,1,4 223 | 1226012,4,1,1,3,1,5,2,1,1,4 224 | 1226612,7,5,6,3,3,8,7,4,1,4 225 | 1227210,10,5,5,6,3,10,7,9,2,4 226 | 1227244,1,1,1,1,2,1,2,1,1,2 227 | 1227481,10,5,7,4,4,10,8,9,1,4 228 | 1228152,8,9,9,5,3,5,7,7,1,4 229 | 1228311,1,1,1,1,1,1,3,1,1,2 230 | 1230175,10,10,10,3,10,10,9,10,1,4 231 | 1230688,7,4,7,4,3,7,7,6,1,4 232 | 1231387,6,8,7,5,6,8,8,9,2,4 233 | 1231706,8,4,6,3,3,1,4,3,1,2 234 | 1232225,10,4,5,5,5,10,4,1,1,4 235 | 1236043,3,3,2,1,3,1,3,6,1,2 236 | 1241232,3,1,4,1,2,?,3,1,1,2 237 | 1241559,10,8,8,2,8,10,4,8,10,4 238 | 1241679,9,8,8,5,6,2,4,10,4,4 239 | 1242364,8,10,10,8,6,9,3,10,10,4 240 | 1243256,10,4,3,2,3,10,5,3,2,4 241 | 1270479,5,1,3,3,2,2,2,3,1,2 242 | 1276091,3,1,1,3,1,1,3,1,1,2 243 | 1277018,2,1,1,1,2,1,3,1,1,2 244 | 128059,1,1,1,1,2,5,5,1,1,2 245 | 1285531,1,1,1,1,2,1,3,1,1,2 246 | 1287775,5,1,1,2,2,2,3,1,1,2 247 | 144888,8,10,10,8,5,10,7,8,1,4 248 | 145447,8,4,4,1,2,9,3,3,1,4 249 | 167528,4,1,1,1,2,1,3,6,1,2 250 | 169356,3,1,1,1,2,?,3,1,1,2 251 | 183913,1,2,2,1,2,1,1,1,1,2 252 | 191250,10,4,4,10,2,10,5,3,3,4 253 | 1017023,6,3,3,5,3,10,3,5,3,2 254 | 1100524,6,10,10,2,8,10,7,3,3,4 255 | 1116116,9,10,10,1,10,8,3,3,1,4 256 | 1168736,5,6,6,2,4,10,3,6,1,4 257 | 1182404,3,1,1,1,2,1,1,1,1,2 258 | 1182404,3,1,1,1,2,1,2,1,1,2 259 | 1198641,3,1,1,1,2,1,3,1,1,2 260 | 242970,5,7,7,1,5,8,3,4,1,2 261 | 255644,10,5,8,10,3,10,5,1,3,4 262 | 263538,5,10,10,6,10,10,10,6,5,4 263 | 274137,8,8,9,4,5,10,7,8,1,4 264 | 303213,10,4,4,10,6,10,5,5,1,4 265 | 314428,7,9,4,10,10,3,5,3,3,4 266 | 1182404,5,1,4,1,2,1,3,2,1,2 267 | 1198641,10,10,6,3,3,10,4,3,2,4 268 | 320675,3,3,5,2,3,10,7,1,1,4 269 | 324427,10,8,8,2,3,4,8,7,8,4 270 | 385103,1,1,1,1,2,1,3,1,1,2 271 | 390840,8,4,7,1,3,10,3,9,2,4 272 | 411453,5,1,1,1,2,1,3,1,1,2 273 | 320675,3,3,5,2,3,10,7,1,1,4 274 | 428903,7,2,4,1,3,4,3,3,1,4 275 | 431495,3,1,1,1,2,1,3,2,1,2 276 | 432809,3,1,3,1,2,?,2,1,1,2 277 | 434518,3,1,1,1,2,1,2,1,1,2 278 | 452264,1,1,1,1,2,1,2,1,1,2 279 | 456282,1,1,1,1,2,1,3,1,1,2 280 | 476903,10,5,7,3,3,7,3,3,8,4 281 | 486283,3,1,1,1,2,1,3,1,1,2 282 | 486662,2,1,1,2,2,1,3,1,1,2 283 | 488173,1,4,3,10,4,10,5,6,1,4 284 | 492268,10,4,6,1,2,10,5,3,1,4 285 | 508234,7,4,5,10,2,10,3,8,2,4 286 | 527363,8,10,10,10,8,10,10,7,3,4 287 | 529329,10,10,10,10,10,10,4,10,10,4 288 | 535331,3,1,1,1,3,1,2,1,1,2 289 | 543558,6,1,3,1,4,5,5,10,1,4 290 | 555977,5,6,6,8,6,10,4,10,4,4 291 | 560680,1,1,1,1,2,1,1,1,1,2 292 | 561477,1,1,1,1,2,1,3,1,1,2 293 | 563649,8,8,8,1,2,?,6,10,1,4 294 | 601265,10,4,4,6,2,10,2,3,1,4 295 | 606140,1,1,1,1,2,?,2,1,1,2 296 | 606722,5,5,7,8,6,10,7,4,1,4 297 | 616240,5,3,4,3,4,5,4,7,1,2 298 | 61634,5,4,3,1,2,?,2,3,1,2 299 | 625201,8,2,1,1,5,1,1,1,1,2 300 | 63375,9,1,2,6,4,10,7,7,2,4 301 | 635844,8,4,10,5,4,4,7,10,1,4 302 | 636130,1,1,1,1,2,1,3,1,1,2 303 | 640744,10,10,10,7,9,10,7,10,10,4 304 | 646904,1,1,1,1,2,1,3,1,1,2 305 | 653777,8,3,4,9,3,10,3,3,1,4 306 | 659642,10,8,4,4,4,10,3,10,4,4 307 | 666090,1,1,1,1,2,1,3,1,1,2 308 | 666942,1,1,1,1,2,1,3,1,1,2 309 | 667204,7,8,7,6,4,3,8,8,4,4 310 | 673637,3,1,1,1,2,5,5,1,1,2 311 | 684955,2,1,1,1,3,1,2,1,1,2 312 | 688033,1,1,1,1,2,1,1,1,1,2 313 | 691628,8,6,4,10,10,1,3,5,1,4 314 | 693702,1,1,1,1,2,1,1,1,1,2 315 | 704097,1,1,1,1,1,1,2,1,1,2 316 | 704168,4,6,5,6,7,?,4,9,1,2 317 | 706426,5,5,5,2,5,10,4,3,1,4 318 | 709287,6,8,7,8,6,8,8,9,1,4 319 | 718641,1,1,1,1,5,1,3,1,1,2 320 | 721482,4,4,4,4,6,5,7,3,1,2 321 | 730881,7,6,3,2,5,10,7,4,6,4 322 | 733639,3,1,1,1,2,?,3,1,1,2 323 | 733639,3,1,1,1,2,1,3,1,1,2 324 | 733823,5,4,6,10,2,10,4,1,1,4 325 | 740492,1,1,1,1,2,1,3,1,1,2 326 | 743348,3,2,2,1,2,1,2,3,1,2 327 | 752904,10,1,1,1,2,10,5,4,1,4 328 | 756136,1,1,1,1,2,1,2,1,1,2 329 | 760001,8,10,3,2,6,4,3,10,1,4 330 | 760239,10,4,6,4,5,10,7,1,1,4 331 | 76389,10,4,7,2,2,8,6,1,1,4 332 | 764974,5,1,1,1,2,1,3,1,2,2 333 | 770066,5,2,2,2,2,1,2,2,1,2 334 | 785208,5,4,6,6,4,10,4,3,1,4 335 | 785615,8,6,7,3,3,10,3,4,2,4 336 | 792744,1,1,1,1,2,1,1,1,1,2 337 | 797327,6,5,5,8,4,10,3,4,1,4 338 | 798429,1,1,1,1,2,1,3,1,1,2 339 | 704097,1,1,1,1,1,1,2,1,1,2 340 | 806423,8,5,5,5,2,10,4,3,1,4 341 | 809912,10,3,3,1,2,10,7,6,1,4 342 | 810104,1,1,1,1,2,1,3,1,1,2 343 | 814265,2,1,1,1,2,1,1,1,1,2 344 | 814911,1,1,1,1,2,1,1,1,1,2 345 | 822829,7,6,4,8,10,10,9,5,3,4 346 | 826923,1,1,1,1,2,1,1,1,1,2 347 | 830690,5,2,2,2,3,1,1,3,1,2 348 | 831268,1,1,1,1,1,1,1,3,1,2 349 | 832226,3,4,4,10,5,1,3,3,1,4 350 | 832567,4,2,3,5,3,8,7,6,1,4 351 | 836433,5,1,1,3,2,1,1,1,1,2 352 | 837082,2,1,1,1,2,1,3,1,1,2 353 | 846832,3,4,5,3,7,3,4,6,1,2 354 | 850831,2,7,10,10,7,10,4,9,4,4 355 | 855524,1,1,1,1,2,1,2,1,1,2 356 | 857774,4,1,1,1,3,1,2,2,1,2 357 | 859164,5,3,3,1,3,3,3,3,3,4 358 | 859350,8,10,10,7,10,10,7,3,8,4 359 | 866325,8,10,5,3,8,4,4,10,3,4 360 | 873549,10,3,5,4,3,7,3,5,3,4 361 | 877291,6,10,10,10,10,10,8,10,10,4 362 | 877943,3,10,3,10,6,10,5,1,4,4 363 | 888169,3,2,2,1,4,3,2,1,1,2 364 | 888523,4,4,4,2,2,3,2,1,1,2 365 | 896404,2,1,1,1,2,1,3,1,1,2 366 | 897172,2,1,1,1,2,1,2,1,1,2 367 | 95719,6,10,10,10,8,10,7,10,7,4 368 | 160296,5,8,8,10,5,10,8,10,3,4 369 | 342245,1,1,3,1,2,1,1,1,1,2 370 | 428598,1,1,3,1,1,1,2,1,1,2 371 | 492561,4,3,2,1,3,1,2,1,1,2 372 | 493452,1,1,3,1,2,1,1,1,1,2 373 | 493452,4,1,2,1,2,1,2,1,1,2 374 | 521441,5,1,1,2,2,1,2,1,1,2 375 | 560680,3,1,2,1,2,1,2,1,1,2 376 | 636437,1,1,1,1,2,1,1,1,1,2 377 | 640712,1,1,1,1,2,1,2,1,1,2 378 | 654244,1,1,1,1,1,1,2,1,1,2 379 | 657753,3,1,1,4,3,1,2,2,1,2 380 | 685977,5,3,4,1,4,1,3,1,1,2 381 | 805448,1,1,1,1,2,1,1,1,1,2 382 | 846423,10,6,3,6,4,10,7,8,4,4 383 | 1002504,3,2,2,2,2,1,3,2,1,2 384 | 1022257,2,1,1,1,2,1,1,1,1,2 385 | 1026122,2,1,1,1,2,1,1,1,1,2 386 | 1071084,3,3,2,2,3,1,1,2,3,2 387 | 1080233,7,6,6,3,2,10,7,1,1,4 388 | 1114570,5,3,3,2,3,1,3,1,1,2 389 | 1114570,2,1,1,1,2,1,2,2,1,2 390 | 1116715,5,1,1,1,3,2,2,2,1,2 391 | 1131411,1,1,1,2,2,1,2,1,1,2 392 | 1151734,10,8,7,4,3,10,7,9,1,4 393 | 1156017,3,1,1,1,2,1,2,1,1,2 394 | 1158247,1,1,1,1,1,1,1,1,1,2 395 | 1158405,1,2,3,1,2,1,2,1,1,2 396 | 1168278,3,1,1,1,2,1,2,1,1,2 397 | 1176187,3,1,1,1,2,1,3,1,1,2 398 | 1196263,4,1,1,1,2,1,1,1,1,2 399 | 1196475,3,2,1,1,2,1,2,2,1,2 400 | 1206314,1,2,3,1,2,1,1,1,1,2 401 | 1211265,3,10,8,7,6,9,9,3,8,4 402 | 1213784,3,1,1,1,2,1,1,1,1,2 403 | 1223003,5,3,3,1,2,1,2,1,1,2 404 | 1223306,3,1,1,1,2,4,1,1,1,2 405 | 1223543,1,2,1,3,2,1,1,2,1,2 406 | 1229929,1,1,1,1,2,1,2,1,1,2 407 | 1231853,4,2,2,1,2,1,2,1,1,2 408 | 1234554,1,1,1,1,2,1,2,1,1,2 409 | 1236837,2,3,2,2,2,2,3,1,1,2 410 | 1237674,3,1,2,1,2,1,2,1,1,2 411 | 1238021,1,1,1,1,2,1,2,1,1,2 412 | 1238464,1,1,1,1,1,?,2,1,1,2 413 | 1238633,10,10,10,6,8,4,8,5,1,4 414 | 1238915,5,1,2,1,2,1,3,1,1,2 415 | 1238948,8,5,6,2,3,10,6,6,1,4 416 | 1239232,3,3,2,6,3,3,3,5,1,2 417 | 1239347,8,7,8,5,10,10,7,2,1,4 418 | 1239967,1,1,1,1,2,1,2,1,1,2 419 | 1240337,5,2,2,2,2,2,3,2,2,2 420 | 1253505,2,3,1,1,5,1,1,1,1,2 421 | 1255384,3,2,2,3,2,3,3,1,1,2 422 | 1257200,10,10,10,7,10,10,8,2,1,4 423 | 1257648,4,3,3,1,2,1,3,3,1,2 424 | 1257815,5,1,3,1,2,1,2,1,1,2 425 | 1257938,3,1,1,1,2,1,1,1,1,2 426 | 1258549,9,10,10,10,10,10,10,10,1,4 427 | 1258556,5,3,6,1,2,1,1,1,1,2 428 | 1266154,8,7,8,2,4,2,5,10,1,4 429 | 1272039,1,1,1,1,2,1,2,1,1,2 430 | 1276091,2,1,1,1,2,1,2,1,1,2 431 | 1276091,1,3,1,1,2,1,2,2,1,2 432 | 1276091,5,1,1,3,4,1,3,2,1,2 433 | 1277629,5,1,1,1,2,1,2,2,1,2 434 | 1293439,3,2,2,3,2,1,1,1,1,2 435 | 1293439,6,9,7,5,5,8,4,2,1,2 436 | 1294562,10,8,10,1,3,10,5,1,1,4 437 | 1295186,10,10,10,1,6,1,2,8,1,4 438 | 527337,4,1,1,1,2,1,1,1,1,2 439 | 558538,4,1,3,3,2,1,1,1,1,2 440 | 566509,5,1,1,1,2,1,1,1,1,2 441 | 608157,10,4,3,10,4,10,10,1,1,4 442 | 677910,5,2,2,4,2,4,1,1,1,2 443 | 734111,1,1,1,3,2,3,1,1,1,2 444 | 734111,1,1,1,1,2,2,1,1,1,2 445 | 780555,5,1,1,6,3,1,2,1,1,2 446 | 827627,2,1,1,1,2,1,1,1,1,2 447 | 1049837,1,1,1,1,2,1,1,1,1,2 448 | 1058849,5,1,1,1,2,1,1,1,1,2 449 | 1182404,1,1,1,1,1,1,1,1,1,2 450 | 1193544,5,7,9,8,6,10,8,10,1,4 451 | 1201870,4,1,1,3,1,1,2,1,1,2 452 | 1202253,5,1,1,1,2,1,1,1,1,2 453 | 1227081,3,1,1,3,2,1,1,1,1,2 454 | 1230994,4,5,5,8,6,10,10,7,1,4 455 | 1238410,2,3,1,1,3,1,1,1,1,2 456 | 1246562,10,2,2,1,2,6,1,1,2,4 457 | 1257470,10,6,5,8,5,10,8,6,1,4 458 | 1259008,8,8,9,6,6,3,10,10,1,4 459 | 1266124,5,1,2,1,2,1,1,1,1,2 460 | 1267898,5,1,3,1,2,1,1,1,1,2 461 | 1268313,5,1,1,3,2,1,1,1,1,2 462 | 1268804,3,1,1,1,2,5,1,1,1,2 463 | 1276091,6,1,1,3,2,1,1,1,1,2 464 | 1280258,4,1,1,1,2,1,1,2,1,2 465 | 1293966,4,1,1,1,2,1,1,1,1,2 466 | 1296572,10,9,8,7,6,4,7,10,3,4 467 | 1298416,10,6,6,2,4,10,9,7,1,4 468 | 1299596,6,6,6,5,4,10,7,6,2,4 469 | 1105524,4,1,1,1,2,1,1,1,1,2 470 | 1181685,1,1,2,1,2,1,2,1,1,2 471 | 1211594,3,1,1,1,1,1,2,1,1,2 472 | 1238777,6,1,1,3,2,1,1,1,1,2 473 | 1257608,6,1,1,1,1,1,1,1,1,2 474 | 1269574,4,1,1,1,2,1,1,1,1,2 475 | 1277145,5,1,1,1,2,1,1,1,1,2 476 | 1287282,3,1,1,1,2,1,1,1,1,2 477 | 1296025,4,1,2,1,2,1,1,1,1,2 478 | 1296263,4,1,1,1,2,1,1,1,1,2 479 | 1296593,5,2,1,1,2,1,1,1,1,2 480 | 1299161,4,8,7,10,4,10,7,5,1,4 481 | 1301945,5,1,1,1,1,1,1,1,1,2 482 | 1302428,5,3,2,4,2,1,1,1,1,2 483 | 1318169,9,10,10,10,10,5,10,10,10,4 484 | 474162,8,7,8,5,5,10,9,10,1,4 485 | 787451,5,1,2,1,2,1,1,1,1,2 486 | 1002025,1,1,1,3,1,3,1,1,1,2 487 | 1070522,3,1,1,1,1,1,2,1,1,2 488 | 1073960,10,10,10,10,6,10,8,1,5,4 489 | 1076352,3,6,4,10,3,3,3,4,1,4 490 | 1084139,6,3,2,1,3,4,4,1,1,4 491 | 1115293,1,1,1,1,2,1,1,1,1,2 492 | 1119189,5,8,9,4,3,10,7,1,1,4 493 | 1133991,4,1,1,1,1,1,2,1,1,2 494 | 1142706,5,10,10,10,6,10,6,5,2,4 495 | 1155967,5,1,2,10,4,5,2,1,1,2 496 | 1170945,3,1,1,1,1,1,2,1,1,2 497 | 1181567,1,1,1,1,1,1,1,1,1,2 498 | 1182404,4,2,1,1,2,1,1,1,1,2 499 | 1204558,4,1,1,1,2,1,2,1,1,2 500 | 1217952,4,1,1,1,2,1,2,1,1,2 501 | 1224565,6,1,1,1,2,1,3,1,1,2 502 | 1238186,4,1,1,1,2,1,2,1,1,2 503 | 1253917,4,1,1,2,2,1,2,1,1,2 504 | 1265899,4,1,1,1,2,1,3,1,1,2 505 | 1268766,1,1,1,1,2,1,1,1,1,2 506 | 1277268,3,3,1,1,2,1,1,1,1,2 507 | 1286943,8,10,10,10,7,5,4,8,7,4 508 | 1295508,1,1,1,1,2,4,1,1,1,2 509 | 1297327,5,1,1,1,2,1,1,1,1,2 510 | 1297522,2,1,1,1,2,1,1,1,1,2 511 | 1298360,1,1,1,1,2,1,1,1,1,2 512 | 1299924,5,1,1,1,2,1,2,1,1,2 513 | 1299994,5,1,1,1,2,1,1,1,1,2 514 | 1304595,3,1,1,1,1,1,2,1,1,2 515 | 1306282,6,6,7,10,3,10,8,10,2,4 516 | 1313325,4,10,4,7,3,10,9,10,1,4 517 | 1320077,1,1,1,1,1,1,1,1,1,2 518 | 1320077,1,1,1,1,1,1,2,1,1,2 519 | 1320304,3,1,2,2,2,1,1,1,1,2 520 | 1330439,4,7,8,3,4,10,9,1,1,4 521 | 333093,1,1,1,1,3,1,1,1,1,2 522 | 369565,4,1,1,1,3,1,1,1,1,2 523 | 412300,10,4,5,4,3,5,7,3,1,4 524 | 672113,7,5,6,10,4,10,5,3,1,4 525 | 749653,3,1,1,1,2,1,2,1,1,2 526 | 769612,3,1,1,2,2,1,1,1,1,2 527 | 769612,4,1,1,1,2,1,1,1,1,2 528 | 798429,4,1,1,1,2,1,3,1,1,2 529 | 807657,6,1,3,2,2,1,1,1,1,2 530 | 8233704,4,1,1,1,1,1,2,1,1,2 531 | 837480,7,4,4,3,4,10,6,9,1,4 532 | 867392,4,2,2,1,2,1,2,1,1,2 533 | 869828,1,1,1,1,1,1,3,1,1,2 534 | 1043068,3,1,1,1,2,1,2,1,1,2 535 | 1056171,2,1,1,1,2,1,2,1,1,2 536 | 1061990,1,1,3,2,2,1,3,1,1,2 537 | 1113061,5,1,1,1,2,1,3,1,1,2 538 | 1116192,5,1,2,1,2,1,3,1,1,2 539 | 1135090,4,1,1,1,2,1,2,1,1,2 540 | 1145420,6,1,1,1,2,1,2,1,1,2 541 | 1158157,5,1,1,1,2,2,2,1,1,2 542 | 1171578,3,1,1,1,2,1,1,1,1,2 543 | 1174841,5,3,1,1,2,1,1,1,1,2 544 | 1184586,4,1,1,1,2,1,2,1,1,2 545 | 1186936,2,1,3,2,2,1,2,1,1,2 546 | 1197527,5,1,1,1,2,1,2,1,1,2 547 | 1222464,6,10,10,10,4,10,7,10,1,4 548 | 1240603,2,1,1,1,1,1,1,1,1,2 549 | 1240603,3,1,1,1,1,1,1,1,1,2 550 | 1241035,7,8,3,7,4,5,7,8,2,4 551 | 1287971,3,1,1,1,2,1,2,1,1,2 552 | 1289391,1,1,1,1,2,1,3,1,1,2 553 | 1299924,3,2,2,2,2,1,4,2,1,2 554 | 1306339,4,4,2,1,2,5,2,1,2,2 555 | 1313658,3,1,1,1,2,1,1,1,1,2 556 | 1313982,4,3,1,1,2,1,4,8,1,2 557 | 1321264,5,2,2,2,1,1,2,1,1,2 558 | 1321321,5,1,1,3,2,1,1,1,1,2 559 | 1321348,2,1,1,1,2,1,2,1,1,2 560 | 1321931,5,1,1,1,2,1,2,1,1,2 561 | 1321942,5,1,1,1,2,1,3,1,1,2 562 | 1321942,5,1,1,1,2,1,3,1,1,2 563 | 1328331,1,1,1,1,2,1,3,1,1,2 564 | 1328755,3,1,1,1,2,1,2,1,1,2 565 | 1331405,4,1,1,1,2,1,3,2,1,2 566 | 1331412,5,7,10,10,5,10,10,10,1,4 567 | 1333104,3,1,2,1,2,1,3,1,1,2 568 | 1334071,4,1,1,1,2,3,2,1,1,2 569 | 1343068,8,4,4,1,6,10,2,5,2,4 570 | 1343374,10,10,8,10,6,5,10,3,1,4 571 | 1344121,8,10,4,4,8,10,8,2,1,4 572 | 142932,7,6,10,5,3,10,9,10,2,4 573 | 183936,3,1,1,1,2,1,2,1,1,2 574 | 324382,1,1,1,1,2,1,2,1,1,2 575 | 378275,10,9,7,3,4,2,7,7,1,4 576 | 385103,5,1,2,1,2,1,3,1,1,2 577 | 690557,5,1,1,1,2,1,2,1,1,2 578 | 695091,1,1,1,1,2,1,2,1,1,2 579 | 695219,1,1,1,1,2,1,2,1,1,2 580 | 824249,1,1,1,1,2,1,3,1,1,2 581 | 871549,5,1,2,1,2,1,2,1,1,2 582 | 878358,5,7,10,6,5,10,7,5,1,4 583 | 1107684,6,10,5,5,4,10,6,10,1,4 584 | 1115762,3,1,1,1,2,1,1,1,1,2 585 | 1217717,5,1,1,6,3,1,1,1,1,2 586 | 1239420,1,1,1,1,2,1,1,1,1,2 587 | 1254538,8,10,10,10,6,10,10,10,1,4 588 | 1261751,5,1,1,1,2,1,2,2,1,2 589 | 1268275,9,8,8,9,6,3,4,1,1,4 590 | 1272166,5,1,1,1,2,1,1,1,1,2 591 | 1294261,4,10,8,5,4,1,10,1,1,4 592 | 1295529,2,5,7,6,4,10,7,6,1,4 593 | 1298484,10,3,4,5,3,10,4,1,1,4 594 | 1311875,5,1,2,1,2,1,1,1,1,2 595 | 1315506,4,8,6,3,4,10,7,1,1,4 596 | 1320141,5,1,1,1,2,1,2,1,1,2 597 | 1325309,4,1,2,1,2,1,2,1,1,2 598 | 1333063,5,1,3,1,2,1,3,1,1,2 599 | 1333495,3,1,1,1,2,1,2,1,1,2 600 | 1334659,5,2,4,1,1,1,1,1,1,2 601 | 1336798,3,1,1,1,2,1,2,1,1,2 602 | 1344449,1,1,1,1,1,1,2,1,1,2 603 | 1350568,4,1,1,1,2,1,2,1,1,2 604 | 1352663,5,4,6,8,4,1,8,10,1,4 605 | 188336,5,3,2,8,5,10,8,1,2,4 606 | 352431,10,5,10,3,5,8,7,8,3,4 607 | 353098,4,1,1,2,2,1,1,1,1,2 608 | 411453,1,1,1,1,2,1,1,1,1,2 609 | 557583,5,10,10,10,10,10,10,1,1,4 610 | 636375,5,1,1,1,2,1,1,1,1,2 611 | 736150,10,4,3,10,3,10,7,1,2,4 612 | 803531,5,10,10,10,5,2,8,5,1,4 613 | 822829,8,10,10,10,6,10,10,10,10,4 614 | 1016634,2,3,1,1,2,1,2,1,1,2 615 | 1031608,2,1,1,1,1,1,2,1,1,2 616 | 1041043,4,1,3,1,2,1,2,1,1,2 617 | 1042252,3,1,1,1,2,1,2,1,1,2 618 | 1057067,1,1,1,1,1,?,1,1,1,2 619 | 1061990,4,1,1,1,2,1,2,1,1,2 620 | 1073836,5,1,1,1,2,1,2,1,1,2 621 | 1083817,3,1,1,1,2,1,2,1,1,2 622 | 1096352,6,3,3,3,3,2,6,1,1,2 623 | 1140597,7,1,2,3,2,1,2,1,1,2 624 | 1149548,1,1,1,1,2,1,1,1,1,2 625 | 1174009,5,1,1,2,1,1,2,1,1,2 626 | 1183596,3,1,3,1,3,4,1,1,1,2 627 | 1190386,4,6,6,5,7,6,7,7,3,4 628 | 1190546,2,1,1,1,2,5,1,1,1,2 629 | 1213273,2,1,1,1,2,1,1,1,1,2 630 | 1218982,4,1,1,1,2,1,1,1,1,2 631 | 1225382,6,2,3,1,2,1,1,1,1,2 632 | 1235807,5,1,1,1,2,1,2,1,1,2 633 | 1238777,1,1,1,1,2,1,1,1,1,2 634 | 1253955,8,7,4,4,5,3,5,10,1,4 635 | 1257366,3,1,1,1,2,1,1,1,1,2 636 | 1260659,3,1,4,1,2,1,1,1,1,2 637 | 1268952,10,10,7,8,7,1,10,10,3,4 638 | 1275807,4,2,4,3,2,2,2,1,1,2 639 | 1277792,4,1,1,1,2,1,1,1,1,2 640 | 1277792,5,1,1,3,2,1,1,1,1,2 641 | 1285722,4,1,1,3,2,1,1,1,1,2 642 | 1288608,3,1,1,1,2,1,2,1,1,2 643 | 1290203,3,1,1,1,2,1,2,1,1,2 644 | 1294413,1,1,1,1,2,1,1,1,1,2 645 | 1299596,2,1,1,1,2,1,1,1,1,2 646 | 1303489,3,1,1,1,2,1,2,1,1,2 647 | 1311033,1,2,2,1,2,1,1,1,1,2 648 | 1311108,1,1,1,3,2,1,1,1,1,2 649 | 1315807,5,10,10,10,10,2,10,10,10,4 650 | 1318671,3,1,1,1,2,1,2,1,1,2 651 | 1319609,3,1,1,2,3,4,1,1,1,2 652 | 1323477,1,2,1,3,2,1,2,1,1,2 653 | 1324572,5,1,1,1,2,1,2,2,1,2 654 | 1324681,4,1,1,1,2,1,2,1,1,2 655 | 1325159,3,1,1,1,2,1,3,1,1,2 656 | 1326892,3,1,1,1,2,1,2,1,1,2 657 | 1330361,5,1,1,1,2,1,2,1,1,2 658 | 1333877,5,4,5,1,8,1,3,6,1,2 659 | 1334015,7,8,8,7,3,10,7,2,3,4 660 | 1334667,1,1,1,1,2,1,1,1,1,2 661 | 1339781,1,1,1,1,2,1,2,1,1,2 662 | 1339781,4,1,1,1,2,1,3,1,1,2 663 | 13454352,1,1,3,1,2,1,2,1,1,2 664 | 1345452,1,1,3,1,2,1,2,1,1,2 665 | 1345593,3,1,1,3,2,1,2,1,1,2 666 | 1347749,1,1,1,1,2,1,1,1,1,2 667 | 1347943,5,2,2,2,2,1,1,1,2,2 668 | 1348851,3,1,1,1,2,1,3,1,1,2 669 | 1350319,5,7,4,1,6,1,7,10,3,4 670 | 1350423,5,10,10,8,5,5,7,10,1,4 671 | 1352848,3,10,7,8,5,8,7,4,1,4 672 | 1353092,3,2,1,2,2,1,3,1,1,2 673 | 1354840,2,1,1,1,2,1,3,1,1,2 674 | 1354840,5,3,2,1,3,1,1,1,1,2 675 | 1355260,1,1,1,1,2,1,2,1,1,2 676 | 1365075,4,1,4,1,2,1,1,1,1,2 677 | 1365328,1,1,2,1,2,1,2,1,1,2 678 | 1368267,5,1,1,1,2,1,1,1,1,2 679 | 1368273,1,1,1,1,2,1,1,1,1,2 680 | 1368882,2,1,1,1,2,1,1,1,1,2 681 | 1369821,10,10,10,10,5,10,10,10,7,4 682 | 1371026,5,10,10,10,4,10,5,6,3,4 683 | 1371920,5,1,1,1,2,1,3,2,1,2 684 | 466906,1,1,1,1,2,1,1,1,1,2 685 | 466906,1,1,1,1,2,1,1,1,1,2 686 | 534555,1,1,1,1,2,1,1,1,1,2 687 | 536708,1,1,1,1,2,1,1,1,1,2 688 | 566346,3,1,1,1,2,1,2,3,1,2 689 | 603148,4,1,1,1,2,1,1,1,1,2 690 | 654546,1,1,1,1,2,1,1,1,8,2 691 | 654546,1,1,1,3,2,1,1,1,1,2 692 | 695091,5,10,10,5,4,5,4,4,1,4 693 | 714039,3,1,1,1,2,1,1,1,1,2 694 | 763235,3,1,1,1,2,1,2,1,2,2 695 | 776715,3,1,1,1,3,2,1,1,1,2 696 | 841769,2,1,1,1,2,1,1,1,1,2 697 | 888820,5,10,10,3,7,3,8,10,2,4 698 | 897471,4,8,6,4,3,4,10,6,1,4 699 | 897471,4,8,8,5,4,5,10,4,1,4 -------------------------------------------------------------------------------- /Chapter03/DecisionTree.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 21-Jun-2017 3 | 4 | @author: aii32199 5 | ''' 6 | import sys 7 | 8 | import numpy as np 9 | 10 | 11 | # Calculate the Gini index for a split dataset 12 | def gini_index(groups, class_values): 13 | 14 | #Initialize Gini variable 15 | gini = 0.0 16 | 17 | #Calculate propertion for each class 18 | for class_value in class_values: 19 | #Extract groups 20 | for group in groups: 21 | #Number of instance in the group 22 | size = len(group) 23 | if size == 0: 24 | continue 25 | #Initialize a list to store class index of the instances 26 | r = [] 27 | #get class of each instance in the group 28 | for row in group: 29 | r.append(row[-1]) 30 | #Count number of instances belongs to current class 31 | class_count = r.count(class_value) 32 | #Calculate class proportion 33 | proportion = class_count/float(size) 34 | #Calculate Gini index 35 | gini += (proportion * (1.0 - proportion)) 36 | return gini 37 | 38 | def createSplit(attribute,threshold,dataset): 39 | 40 | #Initialize two lists to store the sub sets 41 | lesser, greater = list(),list() 42 | 43 | #Loop through the attribute values and create sub set out of it 44 | for values in dataset: 45 | #Apply threshold 46 | if values[attribute]= max_depth: 109 | node['left']=terminalNode(left) 110 | node['right'] = terminalNode(right) 111 | return 112 | # if all okey lest start building tree for left side nodes 113 | # if minimum instances are done by the node stop further build 114 | if len(left) <= min_size: 115 | node['left'] = terminalNode(left) 116 | 117 | else: 118 | #Create new node under left side of the tree 119 | node['left'] = getNode(left) 120 | #append node under the tree and increase depth by one. 121 | buildTree(node['left'], max_depth, min_size, depth+1) #recursion will take place in here 122 | 123 | 124 | # Similar procedure for the right side nodes 125 | if len(right) <= min_size: 126 | node['right'] = terminalNode(right) 127 | 128 | else: 129 | node['right'] = getNode(right) 130 | buildTree(node['right'], max_depth, min_size, depth+1) 131 | 132 | 133 | 134 | # Build a decision tree 135 | def build_tree(train, max_depth, min_size): 136 | root = getNode(train) 137 | buildTree(root, max_depth, min_size, 1) 138 | return root 139 | 140 | 141 | # Print a decision tree 142 | def print_tree(node, depth=0): 143 | if isinstance(node, dict): 144 | print('%s[X%d < %.2f]' % ((depth*' ', (node['attribute']+1), node['value']))) 145 | print_tree(node['left'], depth+1) 146 | print_tree(node['right'], depth+1) 147 | else: 148 | print('%s[%s]' % ((depth*' ', node))) 149 | 150 | #Function to get prediction from input tree 151 | def predict(node, row): 152 | 153 | #Get the node value and check whether the attribute value is less than or equal. 154 | if row[node['attribute']] <= node['value']: 155 | #If yes enter into left branch and check whether it has another node or the class value. 156 | if isinstance(node['left'], dict): 157 | return predict(node['left'], row)#Recursion 158 | else: 159 | #If there is no node in the branch 160 | return node['left'] 161 | else: 162 | if isinstance(node['right'], dict): 163 | return predict(node['right'], row) 164 | else: 165 | return node['right'] 166 | 167 | #Function to check accuracy of the data set 168 | def accuracy_metric(actual, predicted): 169 | correct = 0 170 | for i in range(len(actual)): 171 | if actual[i] == predicted[i]: 172 | correct += 1 173 | return correct / float(len(actual)) * 100.0 174 | 175 | #Function to convert string attribute values to float 176 | def str_column_to_float(dataset, column): 177 | for row in dataset: 178 | if row[column]=='?': 179 | row[column] = 0 180 | else: 181 | row[column] = float(row[column].strip()) 182 | 183 | # Convert string column to integer 184 | def str_column_to_int(dataset, column): 185 | class_values = [row[column] for row in dataset] 186 | unique = set(class_values) 187 | lookup = dict() 188 | for i, value in enumerate(unique): 189 | lookup[value] = i 190 | for row in dataset: 191 | row[column] = lookup[row[column]] 192 | return lookup 193 | ################# Functions for Random Forest ############## 194 | # Build a decision tree 195 | def build_tree_RF(train, max_depth, min_size,nfeatures): 196 | root = getNodeRF(train,nfeatures) 197 | buildTreeRF(root, max_depth, min_size, 1,nfeatures) 198 | return root 199 | 200 | # Create child splits for a node or make terminal 201 | def buildTreeRF(node, max_depth, min_size, depth,nfeatures): 202 | #Lets get groups information first. 203 | left, right = node['groups'] 204 | del(node['groups']) 205 | # check if there are any element in the left and right group 206 | if not left or not right: 207 | #If there is no element in the groups call terminal Node 208 | combined = left+right 209 | node['left'] = terminalNode(combined) 210 | node['right']= terminalNode(combined) 211 | return 212 | # check if we have reached to maximum depth 213 | if depth >= max_depth: 214 | node['left']=terminalNode(left) 215 | node['right'] = terminalNode(right) 216 | return 217 | # if all okey lest start building tree for left side nodes 218 | # if minimum instances are done by the node stop further build 219 | if len(left) <= min_size: 220 | node['left'] = terminalNode(left) 221 | 222 | else: 223 | #Create new node under left side of the tree 224 | node['left'] = getNodeRF(left,nfeatures) 225 | #append node under the tree and increase depth by one. 226 | buildTree(node['left'], max_depth, min_size, depth+1) #recursion will take place in here 227 | 228 | 229 | # Similar procedure for the right side nodes 230 | if len(right) <= min_size: 231 | node['right'] = terminalNode(right) 232 | 233 | else: 234 | node['right'] = getNodeRF(right,nfeatures) 235 | buildTree(node['right'], max_depth, min_size, depth+1) 236 | 237 | # Select the best split point for a dataset 238 | from random import randrange 239 | def getNodeRF(dataset,n_features): 240 | 241 | class_values = [] 242 | for row in dataset: 243 | class_values.append(row[-1]) 244 | 245 | #Extract unique class values present in the dataset 246 | class_values = np.unique(np.array(class_values)) 247 | 248 | #Initialize variables to store gini score, attribute index and split groups 249 | winnerAttribute = sys.maxsize 250 | attributeValue = sys.maxsize 251 | gScore = sys.maxsize 252 | leftGroup = None 253 | 254 | #Select Random features 255 | features = list() 256 | while len(features) < n_features: 257 | index = randrange(len(dataset[0])-1) 258 | if index not in features: 259 | features.append(index) 260 | 261 | #Run loop to access each attribute and attribute values 262 | for index in features: 263 | for row in dataset: 264 | groups = createSplit(index, row[index], dataset) 265 | gini = gini_index(groups, class_values) 266 | if gini < gScore: 267 | winnerAttribute, attributeValue, gScore, leftGroup = index, row[index], gini, groups 268 | #Once done create a dictionary for node 269 | node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup} 270 | return node 271 | 272 | # Create a random subsample from the dataset with replacement 273 | def subsample(dataset, ratio): 274 | sample = list() 275 | n_sample = round(len(dataset) * ratio) 276 | while len(sample) < n_sample: 277 | index = randrange(len(dataset)) 278 | sample.append(dataset[index]) 279 | return sample 280 | 281 | # Make a prediction with a list of bagged trees 282 | def bagging_predict(trees, row): 283 | predictions = [predict(tree, row) for tree in trees] 284 | return max(set(predictions), key=predictions.count) 285 | 286 | # Random Forest Algorithm 287 | def random_forest(train, test, max_depth, min_size, sample_size, n_trees, n_features): 288 | trees = list() 289 | for i in range(n_trees): 290 | sample = subsample(train, sample_size) 291 | tree = build_tree_RF(sample, max_depth, min_size, n_features) 292 | trees.append(tree) 293 | predictions = [bagging_predict(trees, row) for row in test] 294 | return(predictions) 295 | 296 | #Create cross validation sets 297 | def cross_validation_split(dataset, n_folds): 298 | dataset_split = list() 299 | dataset_copy = list(dataset) 300 | fold_size = int(len(dataset) / n_folds) 301 | for i in range(n_folds): 302 | fold = list() 303 | while len(fold) < fold_size: 304 | index = randrange(len(dataset_copy)) 305 | fold.append(dataset_copy.pop(index)) 306 | dataset_split.append(fold) 307 | return dataset_split 308 | 309 | # Evaluate an algorithm using a cross validation split 310 | def evaluate_algorithm(dataset, algorithm, n_folds, *args): 311 | folds = cross_validation_split(dataset, n_folds) 312 | scores = list() 313 | for fold in folds: 314 | train_set = list(folds) 315 | train_set.remove(fold) 316 | train_set = sum(train_set, []) 317 | test_set = list() 318 | for row in fold: 319 | row_copy = list(row) 320 | test_set.append(row_copy) 321 | row_copy[-1] = None 322 | predicted = algorithm(train_set, test_set, *args) 323 | actual = [row[-1] for row in fold] 324 | accuracy = accuracy_metric(actual, predicted) 325 | scores.append(accuracy) 326 | return scores -------------------------------------------------------------------------------- /Chapter03/PracticalApplication.py: -------------------------------------------------------------------------------- 1 | from Chapter_03 import DecisionTree_CART_RF as CART 2 | import pprint 3 | filename = 'bcancer.csv' 4 | dataset = CART.load_csv(filename) 5 | # convert string attributes to integers 6 | for i in range(0, len(dataset[0])): 7 | CART.str_column_to_float(dataset, i) 8 | 9 | #Now remove index column from the data set 10 | dataset_new = [] 11 | for row in dataset: 12 | dataset_new.append([row[i] for i in range(1,len(row))]) 13 | 14 | #Get training and testing data split 15 | training,testing = CART.getTrainTestData(dataset_new, 0.7) 16 | tree = CART.build_tree(training,11,5) 17 | pprint.pprint(tree) 18 | 19 | pre = [] 20 | act = [] 21 | for row in training: 22 | prediction = CART.predict(tree, row) 23 | pre.append(prediction) 24 | actual = act.append(row[-1]) 25 | # print('Expected=%d, Got=%d' % (row[-1], prediction)) 26 | # print_tree(tree) 27 | acc = CART.accuracy_metric(act, pre) 28 | 29 | print('training accuracy: %.2f'%acc) 30 | 31 | for row in testing: 32 | prediction = CART.predict(tree, row) 33 | pre.append(prediction) 34 | actual = act.append(row[-1]) 35 | acc = CART.accuracy_metric(act, pre) 36 | # pprint.pprint(tree) 37 | print('testing accuracy: %.2f'%acc) -------------------------------------------------------------------------------- /Chapter03/RandomForest.py: -------------------------------------------------------------------------------- 1 | from Chapter_03 import DecisionTree_CART_RF as rf 2 | filename = 'bcancer.csv' 3 | dataset = rf.load_csv(filename) 4 | # convert string attributes to integers 5 | for i in range(0, len(dataset[0])-1): 6 | rf.str_column_to_float(dataset, i) 7 | # convert class column to integers 8 | rf.str_column_to_int(dataset, len(dataset[0])-1) 9 | 10 | dataset_new = [] 11 | for row in dataset: 12 | dataset_new.append([row[i] for i in range(1,len(row))]) 13 | # # evaluate algorithm 14 | dataset = dataset_new 15 | n_folds = 5 16 | max_depth = 3 17 | min_size = 1 18 | sample_size = 0.5 19 | n_features = 5#int(sqrt(len(dataset[0])-1)) 20 | print("features: %d"%n_features) 21 | 22 | for n_trees in [1, 5, 10]: 23 | scores = rf.evaluate_algorithm(dataset, rf.random_forest, n_folds, max_depth, min_size, sample_size, n_trees, n_features) 24 | print('Trees: %d' % n_trees) 25 | print('Scores: %s' % scores) 26 | print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores)))) -------------------------------------------------------------------------------- /Chapter04/KNN.py: -------------------------------------------------------------------------------- 1 | #Import math for calculations of square roots 2 | import math 3 | import operator 4 | from random import randrange 5 | 6 | #Function to get distance between test instance and training set 7 | def DistanceMetric(instance1, instance2, isClass=None): 8 | 9 | #If Class variable is in the instance 10 | if isClass: 11 | length = len(instance1)-1 12 | else: 13 | length = len(instance1) 14 | 15 | #Initialize variable to store distance 16 | distance = 0 17 | 18 | #Lets run a loop to calculate element wise differences 19 | for x in range(length): 20 | 21 | #Euclidean distance 22 | distance += pow((instance1[x] - instance2[x]), 2) 23 | 24 | return math.sqrt(distance) 25 | 26 | #Function to get nearest neighbors 27 | def getNeighbors(trainingSet, testInstance, k): 28 | 29 | #Create a list variable to store distances between test and training instance. 30 | distances = [] 31 | 32 | #Get distance between each instance in the training set and the test instance. 33 | for x in range(len(trainingSet)): 34 | 35 | #As we will going to have class variable in the training set isClass will be true 36 | dist = DistanceMetric(testInstance, trainingSet[x], isClass=True) 37 | 38 | #Append the distance of each instance to the distance list 39 | distances.append((trainingSet[x], dist)) 40 | 41 | #Sort the distances in ascending order 42 | distances.sort(key=operator.itemgetter(1)) 43 | 44 | #Create a list to store the neighbors 45 | neighbors = [] 46 | 47 | #Run a loop to get k neighbors from the sorted distances. 48 | for x in range(k): 49 | neighbors.append(distances[x][0]) 50 | return neighbors 51 | 52 | #Function to get prediction 53 | def getPrediction(neighbors): 54 | 55 | #Create a dictionary variable to store votes from the neighbors 56 | #We will use class attribute as the dictionary keys and their occurrence as key value. 57 | classVotes = {} 58 | 59 | #Go to each neighbor and take the vote for the class 60 | for x in range(len(neighbors)): 61 | 62 | #Get the class value of the neighbor 63 | response = neighbors[x][-1] 64 | 65 | #Create class key if its not there; 66 | #If class key is in the dictionary increase it by one. 67 | if response in classVotes: 68 | classVotes[response] += 1 69 | else: 70 | classVotes[response] = 1 71 | #Sort the dictionary keys on the basis of key values in descending order 72 | sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True) 73 | 74 | #Return the key name (class) with the highest value 75 | return sortedVotes[0][0] 76 | ####### KNN Bagging ############ 77 | 78 | def DistanceMetricBagged(instance1, instance2,n_features): 79 | 80 | #Initialize variable to store distance 81 | distance = 0 82 | features = list() 83 | 84 | #Select random features to apply sub space bagging 85 | while len(features) < n_features: 86 | index = randrange(len(instance1)-1) 87 | if index not in features: 88 | features.append(index) 89 | 90 | #Lets run a loop to calculate element wise differences for the selected features only. 91 | for x in features: 92 | #Euclidean distance 93 | distance += pow((instance1[x] - instance2[x]), 2) 94 | 95 | return math.sqrt(distance) 96 | 97 | def getNeighborsBagged(trainingSet, testInstance, k,n_features): 98 | 99 | #Create a list variable to store distances between test and training instance. 100 | distances = [] 101 | 102 | #Get distance between each instance in the training set and the test instance. 103 | for x in range(len(trainingSet)): 104 | #As we will going to have class variable in the training set isClass will be true 105 | dist = DistanceMetricBagged(testInstance, trainingSet[x],n_features) 106 | 107 | #Append the distance of each instance to the distance list 108 | distances.append((trainingSet[x], dist)) 109 | 110 | #Sort the distances in ascending order 111 | distances.sort(key=operator.itemgetter(1)) 112 | 113 | #Create a list to store the neighbors 114 | neighbors = [] 115 | 116 | #Run a loop to get k neighbors from the sorted distances. 117 | for x in range(k): 118 | neighbors.append(distances[x][0]) 119 | return neighbors -------------------------------------------------------------------------------- /Chapter04/SpamClassification.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 02-Sep-2017 3 | 4 | @author: DX 5 | ''' 6 | 7 | #Import math for calculations of square roots 8 | from Chapter_03.DecisionTree_CART_RF import load_csv, getTrainTestData, accuracy_metric, str_column_to_float 9 | from Chapter_04 import KNN 10 | import numpy as np 11 | 12 | 13 | #Read CSV file 14 | dataName = 'spamData.csv' 15 | 16 | #Use function load_csv from chapter 3 17 | dataset = load_csv(dataName) 18 | 19 | #Create an empty list to store the data set 20 | dataset_new = [] 21 | 22 | #We will remove incomplete instance from the data set 23 | for i in range(len(dataset)-1): 24 | dataset_new.append(dataset[i]) 25 | dataset = dataset_new 26 | 27 | #Use function str_column_to_float from chapter 3 to convert string values to float 28 | for i in range(0, len(dataset[0])-1): 29 | str_column_to_float(dataset, i) 30 | 31 | str_column_to_float(dataset, len(dataset[0])-1) 32 | 33 | #Split train and test data set using function getTrainTestData 34 | #We will use 80% of the data set as training set and rest for testing 35 | train,test = getTrainTestData(dataset,0.8) 36 | 37 | train = np.array(train) 38 | test = np.array(test) 39 | 40 | shape = np.shape(train) 41 | xtrain = train[:,0:shape[1]-1] 42 | ytrain = train[:,shape[1]-1] 43 | 44 | xtest = test[:,0:shape[1]-1] 45 | ytest = test[:,shape[1]-1] 46 | 47 | #Create empty list to store predictions and actual output 48 | testPredictions=[] 49 | testActual=[] 50 | 51 | #Select number of neighbors for each classifier 52 | k = 7 53 | 54 | #Select sample size 55 | sample_size = 500 56 | 57 | #Select number of random features 58 | n_features = 20 59 | 60 | #Calculate number of classifier on the basis of number of samples. 61 | n_classifier = np.uint8(len(train)/sample_size) 62 | 63 | #Get prediction for each test instance and store them into the list 64 | for i in range(0,len(test)): 65 | predictions = [] 66 | 67 | #Run loop for each sample 68 | for cl in range(1,n_classifier): 69 | 70 | #Randomly shuffle training set and create sample out of it 71 | np.random.shuffle(train) 72 | sample = [train[row] for row in range(sample_size)] 73 | 74 | #Pick test instance 75 | test_instance = test[i] 76 | 77 | #Get neighbors and prediction on the basis of neighbor 78 | neighbors = KNN.getNeighborsBagged(sample, test_instance, k,n_features) 79 | pred = KNN.getPrediction(neighbors) 80 | 81 | #Append prediction against each sample with random features 82 | predictions.append(pred) 83 | 84 | #Get final prediction using majority voting from each classifier 85 | fin_pred = max(set(predictions), key=predictions.count) 86 | testActual.append(test_instance[-1]) 87 | testPredictions.append(fin_pred) 88 | print ("Actual: %s Predicted: %s"%(test_instance[-1],pred)) 89 | 90 | #Use accurcay_metric function to evaluate our results 91 | accuracy = accuracy_metric(testActual,testPredictions) 92 | 93 | #Print accuracy 94 | print("Accuracy of the classification: %0.2f"%accuracy) -------------------------------------------------------------------------------- /Chapter04/knnAlgoTest.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 02-Sep-2017 3 | 4 | @author: DX 5 | ''' 6 | #Import math for calculations of square roots 7 | import numpy as np 8 | 9 | from Chapter_04 import KNN 10 | dataset = [[5.1, 3.5, 1.4, 0.2, 1], 11 | [4.9, 3.0, 1.4, 0.2, 1], 12 | [4.7, 3.2, 1.3, 0.2, 1], 13 | [4.6, 3.1, 1.5, 0.2, 1], 14 | [5.0, 3.6, 1.4, 0.2, 1], 15 | [7.0, 3.2, 4.7, 1.4, 2], 16 | [6.4, 6.2, 4.5, 1.5, 2], 17 | [6.9, 3.1, 4.9, 1.5, 2], 18 | [5.5, 2.3, 4.0, 1.3, 2], 19 | [6.5, 2.8, 4.6, 1.5, 2], 20 | [6.3, 3.3, 6.0, 2.5, 3], 21 | [5.8, 2.7, 5.1, 1.9, 3], 22 | [7.1, 3.0, 5.9, 2.1, 3], 23 | [6.3, 2.9, 5.6, 1.8, 3], 24 | [6.5, 3.0, 5.8, 2.2, 3]] 25 | 26 | np.random.shuffle(dataset) 27 | 28 | #Lets put our test instance. 29 | testInstance=[4.8,3.1,3.0,1.3,1] 30 | 31 | #Now lets find out 3 neighbors for our test instance using getNeighbor 32 | k = 5 33 | neighbors = KNN.getNeighbors(dataset, testInstance, k) 34 | 35 | #Print neighbors 36 | print(neighbors) 37 | 38 | #Get the class prediction out of neighbors 39 | prediction = KNN.getPrediction(neighbors) 40 | 41 | #Print predicion 42 | print("Predicted class for the test instance is: %d"%prediction) -------------------------------------------------------------------------------- /Chapter04/utilityFunctions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 02-Sep-2017 3 | 4 | @author: DX 5 | ''' 6 | from csv import reader 7 | from math import sqrt 8 | from random import seed 9 | from random import randrange 10 | import numpy as np 11 | 12 | # Load a CSV file 13 | def load_csv(filename): 14 | dataset = list() 15 | with open(filename, 'r') as file: 16 | csv_reader = reader(file) 17 | for row in csv_reader: 18 | if not row: 19 | continue 20 | dataset.append(row) 21 | return dataset 22 | 23 | def getTrainTestData(dataset,split): 24 | np.random.seed(0) 25 | training = [] 26 | testing = [] 27 | 28 | np.random.shuffle(dataset) 29 | shape = np.shape(dataset) 30 | trainlength = np.uint16(np.floor(split*shape[0])) 31 | 32 | for i in range(trainlength): 33 | training.append(dataset[i]) 34 | 35 | for i in range(trainlength,shape[0]): 36 | testing.append(dataset[i]) 37 | 38 | return training,testing 39 | 40 | # Convert string column to float 41 | def str_column_to_float(dataset, column,length): 42 | 43 | #for row in dataset: 44 | for i in range(length): 45 | row = dataset[i] 46 | if row[column]=='?': 47 | row[column] = 0 48 | else: 49 | row[column] = float(row[column].strip()) 50 | 51 | # Convert string column to integer 52 | def str_column_to_int(dataset, column,length): 53 | 54 | class_values=[] 55 | for i in range(length): 56 | row = dataset[i] 57 | class_values.append(row[column]) 58 | # class_values = [row[column] for row in dataset] 59 | unique = set(class_values) 60 | lookup = dict() 61 | for i, value in enumerate(unique): 62 | lookup[value] = i 63 | for i in range(length): 64 | row = dataset[i] 65 | row[column] = lookup[row[column]] 66 | return lookup 67 | 68 | # Split a dataset into k folds 69 | def cross_validation_split(dataset, n_folds): 70 | dataset_split = list() 71 | dataset_copy = list(dataset) 72 | fold_size = int(len(dataset) / n_folds) 73 | for i in range(n_folds): 74 | fold = list() 75 | while len(fold) < fold_size: 76 | index = randrange(len(dataset_copy)) 77 | fold.append(dataset_copy.pop(index)) 78 | dataset_split.append(fold) 79 | return dataset_split 80 | 81 | def subsample(dataset, n_sample): 82 | sample = list() 83 | #n_sample = round(len(dataset) * ratio) 84 | while len(sample) < n_sample: 85 | index = randrange(len(dataset)) 86 | sample.append(dataset[index]) 87 | return sample 88 | 89 | # Calculate accuracy percentage 90 | def accuracy_metric(actual, predicted): 91 | correct = 0 92 | for i in range(len(actual)): 93 | if actual[i] == predicted[i]: 94 | correct += 1 95 | return correct / float(len(actual)) * 100.0 -------------------------------------------------------------------------------- /Chapter05/AdaBoostFaceDetection.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 24-Nov-2017 3 | 4 | @author: aii32199 5 | ''' 6 | #So We will load required libraries numpy for matrix operations 7 | import numpy as np 8 | 9 | #Import OpenCV library, in python we can call it cv2 10 | import cv2 11 | 12 | #OpenCV have module cascade classifier which is based on haar cascade and 13 | #Adaboost algorithm, so we will call direct method. 14 | #First we will load the pre trained classifiers for frontal face and eye 15 | #detection, which are in the form of xml file. 16 | face_cascade = cv2.CascadeClassifier('E:/OpenCV/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml') 17 | eye_cascade = cv2.CascadeClassifier('E:/OpenCV/opencv/sources/data/haarcascades/haarcascade_eye.xml') 18 | 19 | #Now let us load an image from the local directory 20 | img = cv2.imread('download.jpg') 21 | 22 | #Let's convert image into gray 23 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 24 | 25 | #Here we will call the method which will find the faces in our input image 26 | faces = face_cascade.detectMultiScale(gray, 1.3, 5) 27 | #Lets run a loop to create sub images of faces from the input image using 28 | #cv2.rectangle function 29 | for (x,y,w,h) in faces: 30 | img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2) 31 | roi_gray = gray[y:y+h, x:x+w] 32 | roi_color = img[y:y+h, x:x+w] 33 | 34 | #windows 35 | eyes = eye_cascade.detectMultiScale(roi_gray) 36 | #following function will create the rectangles around the eyes 37 | for (ex,ey,ew,eh) in eyes: 38 | cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2) 39 | #Following Lines will show the detected face images 40 | cv2.imshow('img',img) 41 | cv2.waitKey(0) 42 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Chapter05/Adaboost.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np; 3 | from matplotlib import pyplot as plt 4 | 5 | def gini_index(groups, class_values): 6 | 7 | #Initialize Gini variable 8 | gini = 0.0 9 | 10 | #Calculate propertion for each class 11 | for class_value in class_values: 12 | 13 | #Extract groups 14 | for group in groups: 15 | #Number of instance in the group 16 | size = len(group) 17 | if size == 0: 18 | continue 19 | 20 | #Initialize a list to store class index of the instances 21 | r = [] 22 | cl = [] 23 | 24 | #get class of each instance in the group 25 | for row in group: 26 | r.append(row[-1])#Weight Append 27 | cl.append(row[-2])#Class Append 28 | 29 | r = np.array(r) 30 | #Extract Class indexes of the current class value 31 | class_index = np.where(cl==class_value) 32 | 33 | #Initialize a variable to add the weights of current class 34 | w_add=0 35 | 36 | #Add the weights of the current class using class indexes 37 | for w in class_index[0]: 38 | w_add+= r[w]; 39 | 40 | #Calculate class proportion using weights 41 | proportion = w_add/np.sum(r) 42 | 43 | #Calculate Gini index 44 | gini += (proportion * (1.0 - proportion)) 45 | return gini 46 | 47 | def createSplit(attribute,threshold,dataset): 48 | 49 | #Initialize two lists to store the sub sets 50 | lesser, greater = list(),list() 51 | 52 | #Loop through the attribute values and create sub set out of it 53 | for values in dataset: 54 | #Apply threshold 55 | if values[attribute]<=threshold: 56 | lesser.append(values) 57 | else: 58 | greater.append(values) 59 | return lesser,greater 60 | 61 | def getNode(dataset): 62 | class_values = [] 63 | #Extract unique class values present in the data set 64 | for row in dataset: 65 | class_values.append(row[-2])#Class values are in the second last column 66 | class_values = np.unique(class_values) 67 | 68 | #initialize variables to store gini score, attribute index and split groups 69 | winnerAttribute = sys.maxsize 70 | attributeValue = sys.maxsize 71 | gScore = sys.maxsize 72 | leftGroup = None 73 | 74 | #Run loop to access each attribute and attribute values 75 | for index in range(len(dataset[0])-2):#leave last two columns 76 | for row in dataset: 77 | #Create the groups 78 | groups = createSplit(index, row[index], dataset) 79 | #Extract gini score for the threshold 80 | gini = gini_index(groups, class_values) 81 | #print('A%d <- %.2f Gini=%.1f' % ((index+1), row[index], gini)) 82 | #If gini score is lower than the previous one choose return it 83 | if gini < gScore: 84 | winnerAttribute, attributeValue, gScore, leftGroup = index, row[index], gini, groups 85 | #print("winner attribute is %d with value %.2f gini is: %0.2f"%(winnerAttribute+1,attributeValue,gScore)) 86 | 87 | #Once done create a dictionary for node 88 | node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup} 89 | return node 90 | 91 | def terminalNode(group): 92 | outcomes = [row[-2] for row in group] 93 | return max(set(outcomes), key=outcomes.count) 94 | 95 | def decision_stump(dataset): 96 | 97 | #Get node value with best gini score 98 | node = getNode(dataset) 99 | 100 | #Separate out the groups from the node and remove them 101 | left, right = node['groups'] 102 | del(node['groups']) 103 | 104 | #Check whether there is any element in the groups or not 105 | #If there is not any element put the class value with maximum occurence 106 | if not left or not right: 107 | node['left'] = node['right'] = terminalNode(left + right) 108 | return node 109 | 110 | #Put left group's maximum occur class value in left branch 111 | node['left']=terminalNode(left) 112 | 113 | #Put right group's maximum occur class value in right branch 114 | node['right'] = terminalNode(right) 115 | #print(node) 116 | return node 117 | 118 | def predict(node, row): 119 | #Get the node value and check whether the attribute value is less than or equal. 120 | if row[node['attribute']] <= node['value']: 121 | #If yes enter into left branch and check whether it has another node or the class value. 122 | #If there is no node in the branch 123 | return node['left'] 124 | else: 125 | return node['right'] 126 | 127 | def getError(actual,predicted,weights): 128 | #Initialize the error variable 129 | error = 0 130 | 131 | #We will store the error of each instance in a vector 132 | error_vec=[] 133 | 134 | #Run a loop to calculate error for each instance 135 | for i in range(len(actual)): 136 | diff = predicted[i]!=actual[i] 137 | #Weights multiplication to the difference of actual and predicted values 138 | error+= weights[i]*(diff) 139 | 140 | #Append the difference to the error vector 141 | error_vec.append(diff) 142 | 143 | return error,error_vec 144 | 145 | def AdaBoostAlgorithm(dataset,iterations): 146 | 147 | #Initialize the weights of the size of data set 148 | weights = np.ones(len(dataset),dtype="float32")/len(dataset) 149 | dataset = np.array(dataset) 150 | 151 | #Add Weights column to the data set(Now last column will be the weights) 152 | dataset = np.c_[dataset,weights] 153 | 154 | #Create an empty list to store alpha values 155 | alphas = [] 156 | 157 | #Create a list to add weak learners(decision stumps) 158 | weaks = [] 159 | 160 | er = sys.maxsize 161 | #Lets run the loop for number of iteration(number of classifiers) 162 | for itr in range(iterations): 163 | 164 | #Create decision tree from the non weighted data-set 165 | ds = decision_stump(dataset) 166 | 167 | #Create a list to store the predictions of the decision stump 168 | pred=[] 169 | 170 | #Create a list to store actual outputs 171 | actual = [] 172 | 173 | #Let's predict output for each instance in the data set 174 | for row in dataset: 175 | actual.append(row[-2]) 176 | pred.append(predict(ds, row)) 177 | 178 | #Here we will find out difference between predicted and actual output 179 | error,error_vec = getError(actual, pred,weights) 180 | 181 | #If error is equal to 0.5 classifier is not able to classify the data set 182 | if error==0.0: 183 | continue 184 | eps = sys.float_info.epsilon 185 | 186 | #Let's find out the alpha with the help of error 187 | alpha = (0.5 * np.log((1-error)/(error+eps))) 188 | 189 | #Create empty vector to store weight updates 190 | w = np.zeros(len(weights)) 191 | 192 | # Update the weights using alpha value 193 | for i in range(len(error_vec)): 194 | 195 | #For wrong prediction increase the weights 196 | if error_vec[i]!=0: 197 | w[i] = weights[i] * np.exp(alpha) 198 | 199 | #For correct prediction decrease the weights 200 | else: 201 | w[i] = weights[i] * np.exp(-alpha) 202 | 203 | #Normalize the weights and update previous weight vector 204 | weights = w / w.sum() 205 | 206 | #Put the updated weights into the data set by over-writing previous weights 207 | dataset[:,-1]=weights 208 | 209 | #if error<=er: 210 | print("\nClassifier %i stats:"%itr) 211 | print(ds) 212 | print("Error: %.3f and alpha: %.3f"%(error,alpha)) 213 | er = error 214 | #Append alpha value to the list to used at the time of testing 215 | alphas.append(alpha) 216 | 217 | #Append the weak learner to the list 218 | weaks.append(ds) 219 | 220 | return weaks,alphas 221 | 222 | def accuracy_metric(actual, predicted): 223 | correct = 0 224 | for i in range(len(actual)): 225 | if actual[i] == predicted[i]: 226 | correct += 1 227 | return correct / float(len(actual)) * 100.0 -------------------------------------------------------------------------------- /Chapter05/AdaboostAlgorithmExample.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 18-Sep-2017 3 | 4 | @author: DX 5 | ''' 6 | 7 | import numpy as np; 8 | from Chapter_05 import Adaboost as ad 9 | 10 | dataset = [[0.25000, 1.75000, 1.00000], 11 | [1.25000, 1.75000, -1.00000], 12 | [0.50000, 1.50000, 1.00000], 13 | [1.00000, 0.50000, -1.00000], 14 | [1.25000, 3.50000, 1.00000], 15 | [1.50000, 4.00000, 1.00000], 16 | [2.00000, 2.00000, -1.00000], 17 | [2.50000, 2.50000, 1.00000], 18 | [3.75000, 3.00000, -1.00000], 19 | [4.00000, 1.00000, -1.00000]] 20 | 21 | [weaks,alphas] = ad.AdaBoostAlgorithm(dataset,9) 22 | 23 | prediction=[] 24 | actual = [] 25 | for row in dataset: 26 | preds = [] 27 | for i in range(len(weaks)): 28 | p = alphas[i]*ad.predict(weaks[i], row) 29 | #p = predict(weaks[i], row) 30 | preds.append(p) 31 | final = np.sign(sum(preds)) 32 | #final = max(set(preds), key=preds.count) 33 | prediction.append(final) 34 | actual.append(row[-1]) 35 | print('Expected=%d, Got=%d' % (row[-1], final)) 36 | 37 | acc = ad.accuracy_metric(actual, prediction) 38 | print("accuracy: %.2f"%acc) -------------------------------------------------------------------------------- /Chapter05/Data/bcancer.csv: -------------------------------------------------------------------------------- 1 | 1000025,5,1,1,1,2,1,3,1,1,2 2 | 1002945,5,4,4,5,7,10,3,2,1,2 3 | 1015425,3,1,1,1,2,2,3,1,1,2 4 | 1016277,6,8,8,1,3,4,3,7,1,2 5 | 1017023,4,1,1,3,2,1,3,1,1,2 6 | 1017122,8,10,10,8,7,10,9,7,1,4 7 | 1018099,1,1,1,1,2,10,3,1,1,2 8 | 1018561,2,1,2,1,2,1,3,1,1,2 9 | 1033078,2,1,1,1,2,1,1,1,5,2 10 | 1033078,4,2,1,1,2,1,2,1,1,2 11 | 1035283,1,1,1,1,1,1,3,1,1,2 12 | 1036172,2,1,1,1,2,1,2,1,1,2 13 | 1041801,5,3,3,3,2,3,4,4,1,4 14 | 1043999,1,1,1,1,2,3,3,1,1,2 15 | 1044572,8,7,5,10,7,9,5,5,4,4 16 | 1047630,7,4,6,4,6,1,4,3,1,4 17 | 1048672,4,1,1,1,2,1,2,1,1,2 18 | 1049815,4,1,1,1,2,1,3,1,1,2 19 | 1050670,10,7,7,6,4,10,4,1,2,4 20 | 1050718,6,1,1,1,2,1,3,1,1,2 21 | 1054590,7,3,2,10,5,10,5,4,4,4 22 | 1054593,10,5,5,3,6,7,7,10,1,4 23 | 1056784,3,1,1,1,2,1,2,1,1,2 24 | 1057013,8,4,5,1,2,?,7,3,1,4 25 | 1059552,1,1,1,1,2,1,3,1,1,2 26 | 1065726,5,2,3,4,2,7,3,6,1,4 27 | 1066373,3,2,1,1,1,1,2,1,1,2 28 | 1066979,5,1,1,1,2,1,2,1,1,2 29 | 1067444,2,1,1,1,2,1,2,1,1,2 30 | 1070935,1,1,3,1,2,1,1,1,1,2 31 | 1070935,3,1,1,1,1,1,2,1,1,2 32 | 1071760,2,1,1,1,2,1,3,1,1,2 33 | 1072179,10,7,7,3,8,5,7,4,3,4 34 | 1074610,2,1,1,2,2,1,3,1,1,2 35 | 1075123,3,1,2,1,2,1,2,1,1,2 36 | 1079304,2,1,1,1,2,1,2,1,1,2 37 | 1080185,10,10,10,8,6,1,8,9,1,4 38 | 1081791,6,2,1,1,1,1,7,1,1,2 39 | 1084584,5,4,4,9,2,10,5,6,1,4 40 | 1091262,2,5,3,3,6,7,7,5,1,4 41 | 1096800,6,6,6,9,6,?,7,8,1,2 42 | 1099510,10,4,3,1,3,3,6,5,2,4 43 | 1100524,6,10,10,2,8,10,7,3,3,4 44 | 1102573,5,6,5,6,10,1,3,1,1,4 45 | 1103608,10,10,10,4,8,1,8,10,1,4 46 | 1103722,1,1,1,1,2,1,2,1,2,2 47 | 1105257,3,7,7,4,4,9,4,8,1,4 48 | 1105524,1,1,1,1,2,1,2,1,1,2 49 | 1106095,4,1,1,3,2,1,3,1,1,2 50 | 1106829,7,8,7,2,4,8,3,8,2,4 51 | 1108370,9,5,8,1,2,3,2,1,5,4 52 | 1108449,5,3,3,4,2,4,3,4,1,4 53 | 1110102,10,3,6,2,3,5,4,10,2,4 54 | 1110503,5,5,5,8,10,8,7,3,7,4 55 | 1110524,10,5,5,6,8,8,7,1,1,4 56 | 1111249,10,6,6,3,4,5,3,6,1,4 57 | 1112209,8,10,10,1,3,6,3,9,1,4 58 | 1113038,8,2,4,1,5,1,5,4,4,4 59 | 1113483,5,2,3,1,6,10,5,1,1,4 60 | 1113906,9,5,5,2,2,2,5,1,1,4 61 | 1115282,5,3,5,5,3,3,4,10,1,4 62 | 1115293,1,1,1,1,2,2,2,1,1,2 63 | 1116116,9,10,10,1,10,8,3,3,1,4 64 | 1116132,6,3,4,1,5,2,3,9,1,4 65 | 1116192,1,1,1,1,2,1,2,1,1,2 66 | 1116998,10,4,2,1,3,2,4,3,10,4 67 | 1117152,4,1,1,1,2,1,3,1,1,2 68 | 1118039,5,3,4,1,8,10,4,9,1,4 69 | 1120559,8,3,8,3,4,9,8,9,8,4 70 | 1121732,1,1,1,1,2,1,3,2,1,2 71 | 1121919,5,1,3,1,2,1,2,1,1,2 72 | 1123061,6,10,2,8,10,2,7,8,10,4 73 | 1124651,1,3,3,2,2,1,7,2,1,2 74 | 1125035,9,4,5,10,6,10,4,8,1,4 75 | 1126417,10,6,4,1,3,4,3,2,3,4 76 | 1131294,1,1,2,1,2,2,4,2,1,2 77 | 1132347,1,1,4,1,2,1,2,1,1,2 78 | 1133041,5,3,1,2,2,1,2,1,1,2 79 | 1133136,3,1,1,1,2,3,3,1,1,2 80 | 1136142,2,1,1,1,3,1,2,1,1,2 81 | 1137156,2,2,2,1,1,1,7,1,1,2 82 | 1143978,4,1,1,2,2,1,2,1,1,2 83 | 1143978,5,2,1,1,2,1,3,1,1,2 84 | 1147044,3,1,1,1,2,2,7,1,1,2 85 | 1147699,3,5,7,8,8,9,7,10,7,4 86 | 1147748,5,10,6,1,10,4,4,10,10,4 87 | 1148278,3,3,6,4,5,8,4,4,1,4 88 | 1148873,3,6,6,6,5,10,6,8,3,4 89 | 1152331,4,1,1,1,2,1,3,1,1,2 90 | 1155546,2,1,1,2,3,1,2,1,1,2 91 | 1156272,1,1,1,1,2,1,3,1,1,2 92 | 1156948,3,1,1,2,2,1,1,1,1,2 93 | 1157734,4,1,1,1,2,1,3,1,1,2 94 | 1158247,1,1,1,1,2,1,2,1,1,2 95 | 1160476,2,1,1,1,2,1,3,1,1,2 96 | 1164066,1,1,1,1,2,1,3,1,1,2 97 | 1165297,2,1,1,2,2,1,1,1,1,2 98 | 1165790,5,1,1,1,2,1,3,1,1,2 99 | 1165926,9,6,9,2,10,6,2,9,10,4 100 | 1166630,7,5,6,10,5,10,7,9,4,4 101 | 1166654,10,3,5,1,10,5,3,10,2,4 102 | 1167439,2,3,4,4,2,5,2,5,1,4 103 | 1167471,4,1,2,1,2,1,3,1,1,2 104 | 1168359,8,2,3,1,6,3,7,1,1,4 105 | 1168736,10,10,10,10,10,1,8,8,8,4 106 | 1169049,7,3,4,4,3,3,3,2,7,4 107 | 1170419,10,10,10,8,2,10,4,1,1,4 108 | 1170420,1,6,8,10,8,10,5,7,1,4 109 | 1171710,1,1,1,1,2,1,2,3,1,2 110 | 1171710,6,5,4,4,3,9,7,8,3,4 111 | 1171795,1,3,1,2,2,2,5,3,2,2 112 | 1171845,8,6,4,3,5,9,3,1,1,4 113 | 1172152,10,3,3,10,2,10,7,3,3,4 114 | 1173216,10,10,10,3,10,8,8,1,1,4 115 | 1173235,3,3,2,1,2,3,3,1,1,2 116 | 1173347,1,1,1,1,2,5,1,1,1,2 117 | 1173347,8,3,3,1,2,2,3,2,1,2 118 | 1173509,4,5,5,10,4,10,7,5,8,4 119 | 1173514,1,1,1,1,4,3,1,1,1,2 120 | 1173681,3,2,1,1,2,2,3,1,1,2 121 | 1174057,1,1,2,2,2,1,3,1,1,2 122 | 1174057,4,2,1,1,2,2,3,1,1,2 123 | 1174131,10,10,10,2,10,10,5,3,3,4 124 | 1174428,5,3,5,1,8,10,5,3,1,4 125 | 1175937,5,4,6,7,9,7,8,10,1,4 126 | 1176406,1,1,1,1,2,1,2,1,1,2 127 | 1176881,7,5,3,7,4,10,7,5,5,4 128 | 1177027,3,1,1,1,2,1,3,1,1,2 129 | 1177399,8,3,5,4,5,10,1,6,2,4 130 | 1177512,1,1,1,1,10,1,1,1,1,2 131 | 1178580,5,1,3,1,2,1,2,1,1,2 132 | 1179818,2,1,1,1,2,1,3,1,1,2 133 | 1180194,5,10,8,10,8,10,3,6,3,4 134 | 1180523,3,1,1,1,2,1,2,2,1,2 135 | 1180831,3,1,1,1,3,1,2,1,1,2 136 | 1181356,5,1,1,1,2,2,3,3,1,2 137 | 1182404,4,1,1,1,2,1,2,1,1,2 138 | 1182410,3,1,1,1,2,1,1,1,1,2 139 | 1183240,4,1,2,1,2,1,2,1,1,2 140 | 1183246,1,1,1,1,1,?,2,1,1,2 141 | 1183516,3,1,1,1,2,1,1,1,1,2 142 | 1183911,2,1,1,1,2,1,1,1,1,2 143 | 1183983,9,5,5,4,4,5,4,3,3,4 144 | 1184184,1,1,1,1,2,5,1,1,1,2 145 | 1184241,2,1,1,1,2,1,2,1,1,2 146 | 1184840,1,1,3,1,2,?,2,1,1,2 147 | 1185609,3,4,5,2,6,8,4,1,1,4 148 | 1185610,1,1,1,1,3,2,2,1,1,2 149 | 1187457,3,1,1,3,8,1,5,8,1,2 150 | 1187805,8,8,7,4,10,10,7,8,7,4 151 | 1188472,1,1,1,1,1,1,3,1,1,2 152 | 1189266,7,2,4,1,6,10,5,4,3,4 153 | 1189286,10,10,8,6,4,5,8,10,1,4 154 | 1190394,4,1,1,1,2,3,1,1,1,2 155 | 1190485,1,1,1,1,2,1,1,1,1,2 156 | 1192325,5,5,5,6,3,10,3,1,1,4 157 | 1193091,1,2,2,1,2,1,2,1,1,2 158 | 1193210,2,1,1,1,2,1,3,1,1,2 159 | 1193683,1,1,2,1,3,?,1,1,1,2 160 | 1196295,9,9,10,3,6,10,7,10,6,4 161 | 1196915,10,7,7,4,5,10,5,7,2,4 162 | 1197080,4,1,1,1,2,1,3,2,1,2 163 | 1197270,3,1,1,1,2,1,3,1,1,2 164 | 1197440,1,1,1,2,1,3,1,1,7,2 165 | 1197510,5,1,1,1,2,?,3,1,1,2 166 | 1197979,4,1,1,1,2,2,3,2,1,2 167 | 1197993,5,6,7,8,8,10,3,10,3,4 168 | 1198128,10,8,10,10,6,1,3,1,10,4 169 | 1198641,3,1,1,1,2,1,3,1,1,2 170 | 1199219,1,1,1,2,1,1,1,1,1,2 171 | 1199731,3,1,1,1,2,1,1,1,1,2 172 | 1199983,1,1,1,1,2,1,3,1,1,2 173 | 1200772,1,1,1,1,2,1,2,1,1,2 174 | 1200847,6,10,10,10,8,10,10,10,7,4 175 | 1200892,8,6,5,4,3,10,6,1,1,4 176 | 1200952,5,8,7,7,10,10,5,7,1,4 177 | 1201834,2,1,1,1,2,1,3,1,1,2 178 | 1201936,5,10,10,3,8,1,5,10,3,4 179 | 1202125,4,1,1,1,2,1,3,1,1,2 180 | 1202812,5,3,3,3,6,10,3,1,1,4 181 | 1203096,1,1,1,1,1,1,3,1,1,2 182 | 1204242,1,1,1,1,2,1,1,1,1,2 183 | 1204898,6,1,1,1,2,1,3,1,1,2 184 | 1205138,5,8,8,8,5,10,7,8,1,4 185 | 1205579,8,7,6,4,4,10,5,1,1,4 186 | 1206089,2,1,1,1,1,1,3,1,1,2 187 | 1206695,1,5,8,6,5,8,7,10,1,4 188 | 1206841,10,5,6,10,6,10,7,7,10,4 189 | 1207986,5,8,4,10,5,8,9,10,1,4 190 | 1208301,1,2,3,1,2,1,3,1,1,2 191 | 1210963,10,10,10,8,6,8,7,10,1,4 192 | 1211202,7,5,10,10,10,10,4,10,3,4 193 | 1212232,5,1,1,1,2,1,2,1,1,2 194 | 1212251,1,1,1,1,2,1,3,1,1,2 195 | 1212422,3,1,1,1,2,1,3,1,1,2 196 | 1212422,4,1,1,1,2,1,3,1,1,2 197 | 1213375,8,4,4,5,4,7,7,8,2,2 198 | 1213383,5,1,1,4,2,1,3,1,1,2 199 | 1214092,1,1,1,1,2,1,1,1,1,2 200 | 1214556,3,1,1,1,2,1,2,1,1,2 201 | 1214966,9,7,7,5,5,10,7,8,3,4 202 | 1216694,10,8,8,4,10,10,8,1,1,4 203 | 1216947,1,1,1,1,2,1,3,1,1,2 204 | 1217051,5,1,1,1,2,1,3,1,1,2 205 | 1217264,1,1,1,1,2,1,3,1,1,2 206 | 1218105,5,10,10,9,6,10,7,10,5,4 207 | 1218741,10,10,9,3,7,5,3,5,1,4 208 | 1218860,1,1,1,1,1,1,3,1,1,2 209 | 1218860,1,1,1,1,1,1,3,1,1,2 210 | 1219406,5,1,1,1,1,1,3,1,1,2 211 | 1219525,8,10,10,10,5,10,8,10,6,4 212 | 1219859,8,10,8,8,4,8,7,7,1,4 213 | 1220330,1,1,1,1,2,1,3,1,1,2 214 | 1221863,10,10,10,10,7,10,7,10,4,4 215 | 1222047,10,10,10,10,3,10,10,6,1,4 216 | 1222936,8,7,8,7,5,5,5,10,2,4 217 | 1223282,1,1,1,1,2,1,2,1,1,2 218 | 1223426,1,1,1,1,2,1,3,1,1,2 219 | 1223793,6,10,7,7,6,4,8,10,2,4 220 | 1223967,6,1,3,1,2,1,3,1,1,2 221 | 1224329,1,1,1,2,2,1,3,1,1,2 222 | 1225799,10,6,4,3,10,10,9,10,1,4 223 | 1226012,4,1,1,3,1,5,2,1,1,4 224 | 1226612,7,5,6,3,3,8,7,4,1,4 225 | 1227210,10,5,5,6,3,10,7,9,2,4 226 | 1227244,1,1,1,1,2,1,2,1,1,2 227 | 1227481,10,5,7,4,4,10,8,9,1,4 228 | 1228152,8,9,9,5,3,5,7,7,1,4 229 | 1228311,1,1,1,1,1,1,3,1,1,2 230 | 1230175,10,10,10,3,10,10,9,10,1,4 231 | 1230688,7,4,7,4,3,7,7,6,1,4 232 | 1231387,6,8,7,5,6,8,8,9,2,4 233 | 1231706,8,4,6,3,3,1,4,3,1,2 234 | 1232225,10,4,5,5,5,10,4,1,1,4 235 | 1236043,3,3,2,1,3,1,3,6,1,2 236 | 1241232,3,1,4,1,2,?,3,1,1,2 237 | 1241559,10,8,8,2,8,10,4,8,10,4 238 | 1241679,9,8,8,5,6,2,4,10,4,4 239 | 1242364,8,10,10,8,6,9,3,10,10,4 240 | 1243256,10,4,3,2,3,10,5,3,2,4 241 | 1270479,5,1,3,3,2,2,2,3,1,2 242 | 1276091,3,1,1,3,1,1,3,1,1,2 243 | 1277018,2,1,1,1,2,1,3,1,1,2 244 | 128059,1,1,1,1,2,5,5,1,1,2 245 | 1285531,1,1,1,1,2,1,3,1,1,2 246 | 1287775,5,1,1,2,2,2,3,1,1,2 247 | 144888,8,10,10,8,5,10,7,8,1,4 248 | 145447,8,4,4,1,2,9,3,3,1,4 249 | 167528,4,1,1,1,2,1,3,6,1,2 250 | 169356,3,1,1,1,2,?,3,1,1,2 251 | 183913,1,2,2,1,2,1,1,1,1,2 252 | 191250,10,4,4,10,2,10,5,3,3,4 253 | 1017023,6,3,3,5,3,10,3,5,3,2 254 | 1100524,6,10,10,2,8,10,7,3,3,4 255 | 1116116,9,10,10,1,10,8,3,3,1,4 256 | 1168736,5,6,6,2,4,10,3,6,1,4 257 | 1182404,3,1,1,1,2,1,1,1,1,2 258 | 1182404,3,1,1,1,2,1,2,1,1,2 259 | 1198641,3,1,1,1,2,1,3,1,1,2 260 | 242970,5,7,7,1,5,8,3,4,1,2 261 | 255644,10,5,8,10,3,10,5,1,3,4 262 | 263538,5,10,10,6,10,10,10,6,5,4 263 | 274137,8,8,9,4,5,10,7,8,1,4 264 | 303213,10,4,4,10,6,10,5,5,1,4 265 | 314428,7,9,4,10,10,3,5,3,3,4 266 | 1182404,5,1,4,1,2,1,3,2,1,2 267 | 1198641,10,10,6,3,3,10,4,3,2,4 268 | 320675,3,3,5,2,3,10,7,1,1,4 269 | 324427,10,8,8,2,3,4,8,7,8,4 270 | 385103,1,1,1,1,2,1,3,1,1,2 271 | 390840,8,4,7,1,3,10,3,9,2,4 272 | 411453,5,1,1,1,2,1,3,1,1,2 273 | 320675,3,3,5,2,3,10,7,1,1,4 274 | 428903,7,2,4,1,3,4,3,3,1,4 275 | 431495,3,1,1,1,2,1,3,2,1,2 276 | 432809,3,1,3,1,2,?,2,1,1,2 277 | 434518,3,1,1,1,2,1,2,1,1,2 278 | 452264,1,1,1,1,2,1,2,1,1,2 279 | 456282,1,1,1,1,2,1,3,1,1,2 280 | 476903,10,5,7,3,3,7,3,3,8,4 281 | 486283,3,1,1,1,2,1,3,1,1,2 282 | 486662,2,1,1,2,2,1,3,1,1,2 283 | 488173,1,4,3,10,4,10,5,6,1,4 284 | 492268,10,4,6,1,2,10,5,3,1,4 285 | 508234,7,4,5,10,2,10,3,8,2,4 286 | 527363,8,10,10,10,8,10,10,7,3,4 287 | 529329,10,10,10,10,10,10,4,10,10,4 288 | 535331,3,1,1,1,3,1,2,1,1,2 289 | 543558,6,1,3,1,4,5,5,10,1,4 290 | 555977,5,6,6,8,6,10,4,10,4,4 291 | 560680,1,1,1,1,2,1,1,1,1,2 292 | 561477,1,1,1,1,2,1,3,1,1,2 293 | 563649,8,8,8,1,2,?,6,10,1,4 294 | 601265,10,4,4,6,2,10,2,3,1,4 295 | 606140,1,1,1,1,2,?,2,1,1,2 296 | 606722,5,5,7,8,6,10,7,4,1,4 297 | 616240,5,3,4,3,4,5,4,7,1,2 298 | 61634,5,4,3,1,2,?,2,3,1,2 299 | 625201,8,2,1,1,5,1,1,1,1,2 300 | 63375,9,1,2,6,4,10,7,7,2,4 301 | 635844,8,4,10,5,4,4,7,10,1,4 302 | 636130,1,1,1,1,2,1,3,1,1,2 303 | 640744,10,10,10,7,9,10,7,10,10,4 304 | 646904,1,1,1,1,2,1,3,1,1,2 305 | 653777,8,3,4,9,3,10,3,3,1,4 306 | 659642,10,8,4,4,4,10,3,10,4,4 307 | 666090,1,1,1,1,2,1,3,1,1,2 308 | 666942,1,1,1,1,2,1,3,1,1,2 309 | 667204,7,8,7,6,4,3,8,8,4,4 310 | 673637,3,1,1,1,2,5,5,1,1,2 311 | 684955,2,1,1,1,3,1,2,1,1,2 312 | 688033,1,1,1,1,2,1,1,1,1,2 313 | 691628,8,6,4,10,10,1,3,5,1,4 314 | 693702,1,1,1,1,2,1,1,1,1,2 315 | 704097,1,1,1,1,1,1,2,1,1,2 316 | 704168,4,6,5,6,7,?,4,9,1,2 317 | 706426,5,5,5,2,5,10,4,3,1,4 318 | 709287,6,8,7,8,6,8,8,9,1,4 319 | 718641,1,1,1,1,5,1,3,1,1,2 320 | 721482,4,4,4,4,6,5,7,3,1,2 321 | 730881,7,6,3,2,5,10,7,4,6,4 322 | 733639,3,1,1,1,2,?,3,1,1,2 323 | 733639,3,1,1,1,2,1,3,1,1,2 324 | 733823,5,4,6,10,2,10,4,1,1,4 325 | 740492,1,1,1,1,2,1,3,1,1,2 326 | 743348,3,2,2,1,2,1,2,3,1,2 327 | 752904,10,1,1,1,2,10,5,4,1,4 328 | 756136,1,1,1,1,2,1,2,1,1,2 329 | 760001,8,10,3,2,6,4,3,10,1,4 330 | 760239,10,4,6,4,5,10,7,1,1,4 331 | 76389,10,4,7,2,2,8,6,1,1,4 332 | 764974,5,1,1,1,2,1,3,1,2,2 333 | 770066,5,2,2,2,2,1,2,2,1,2 334 | 785208,5,4,6,6,4,10,4,3,1,4 335 | 785615,8,6,7,3,3,10,3,4,2,4 336 | 792744,1,1,1,1,2,1,1,1,1,2 337 | 797327,6,5,5,8,4,10,3,4,1,4 338 | 798429,1,1,1,1,2,1,3,1,1,2 339 | 704097,1,1,1,1,1,1,2,1,1,2 340 | 806423,8,5,5,5,2,10,4,3,1,4 341 | 809912,10,3,3,1,2,10,7,6,1,4 342 | 810104,1,1,1,1,2,1,3,1,1,2 343 | 814265,2,1,1,1,2,1,1,1,1,2 344 | 814911,1,1,1,1,2,1,1,1,1,2 345 | 822829,7,6,4,8,10,10,9,5,3,4 346 | 826923,1,1,1,1,2,1,1,1,1,2 347 | 830690,5,2,2,2,3,1,1,3,1,2 348 | 831268,1,1,1,1,1,1,1,3,1,2 349 | 832226,3,4,4,10,5,1,3,3,1,4 350 | 832567,4,2,3,5,3,8,7,6,1,4 351 | 836433,5,1,1,3,2,1,1,1,1,2 352 | 837082,2,1,1,1,2,1,3,1,1,2 353 | 846832,3,4,5,3,7,3,4,6,1,2 354 | 850831,2,7,10,10,7,10,4,9,4,4 355 | 855524,1,1,1,1,2,1,2,1,1,2 356 | 857774,4,1,1,1,3,1,2,2,1,2 357 | 859164,5,3,3,1,3,3,3,3,3,4 358 | 859350,8,10,10,7,10,10,7,3,8,4 359 | 866325,8,10,5,3,8,4,4,10,3,4 360 | 873549,10,3,5,4,3,7,3,5,3,4 361 | 877291,6,10,10,10,10,10,8,10,10,4 362 | 877943,3,10,3,10,6,10,5,1,4,4 363 | 888169,3,2,2,1,4,3,2,1,1,2 364 | 888523,4,4,4,2,2,3,2,1,1,2 365 | 896404,2,1,1,1,2,1,3,1,1,2 366 | 897172,2,1,1,1,2,1,2,1,1,2 367 | 95719,6,10,10,10,8,10,7,10,7,4 368 | 160296,5,8,8,10,5,10,8,10,3,4 369 | 342245,1,1,3,1,2,1,1,1,1,2 370 | 428598,1,1,3,1,1,1,2,1,1,2 371 | 492561,4,3,2,1,3,1,2,1,1,2 372 | 493452,1,1,3,1,2,1,1,1,1,2 373 | 493452,4,1,2,1,2,1,2,1,1,2 374 | 521441,5,1,1,2,2,1,2,1,1,2 375 | 560680,3,1,2,1,2,1,2,1,1,2 376 | 636437,1,1,1,1,2,1,1,1,1,2 377 | 640712,1,1,1,1,2,1,2,1,1,2 378 | 654244,1,1,1,1,1,1,2,1,1,2 379 | 657753,3,1,1,4,3,1,2,2,1,2 380 | 685977,5,3,4,1,4,1,3,1,1,2 381 | 805448,1,1,1,1,2,1,1,1,1,2 382 | 846423,10,6,3,6,4,10,7,8,4,4 383 | 1002504,3,2,2,2,2,1,3,2,1,2 384 | 1022257,2,1,1,1,2,1,1,1,1,2 385 | 1026122,2,1,1,1,2,1,1,1,1,2 386 | 1071084,3,3,2,2,3,1,1,2,3,2 387 | 1080233,7,6,6,3,2,10,7,1,1,4 388 | 1114570,5,3,3,2,3,1,3,1,1,2 389 | 1114570,2,1,1,1,2,1,2,2,1,2 390 | 1116715,5,1,1,1,3,2,2,2,1,2 391 | 1131411,1,1,1,2,2,1,2,1,1,2 392 | 1151734,10,8,7,4,3,10,7,9,1,4 393 | 1156017,3,1,1,1,2,1,2,1,1,2 394 | 1158247,1,1,1,1,1,1,1,1,1,2 395 | 1158405,1,2,3,1,2,1,2,1,1,2 396 | 1168278,3,1,1,1,2,1,2,1,1,2 397 | 1176187,3,1,1,1,2,1,3,1,1,2 398 | 1196263,4,1,1,1,2,1,1,1,1,2 399 | 1196475,3,2,1,1,2,1,2,2,1,2 400 | 1206314,1,2,3,1,2,1,1,1,1,2 401 | 1211265,3,10,8,7,6,9,9,3,8,4 402 | 1213784,3,1,1,1,2,1,1,1,1,2 403 | 1223003,5,3,3,1,2,1,2,1,1,2 404 | 1223306,3,1,1,1,2,4,1,1,1,2 405 | 1223543,1,2,1,3,2,1,1,2,1,2 406 | 1229929,1,1,1,1,2,1,2,1,1,2 407 | 1231853,4,2,2,1,2,1,2,1,1,2 408 | 1234554,1,1,1,1,2,1,2,1,1,2 409 | 1236837,2,3,2,2,2,2,3,1,1,2 410 | 1237674,3,1,2,1,2,1,2,1,1,2 411 | 1238021,1,1,1,1,2,1,2,1,1,2 412 | 1238464,1,1,1,1,1,?,2,1,1,2 413 | 1238633,10,10,10,6,8,4,8,5,1,4 414 | 1238915,5,1,2,1,2,1,3,1,1,2 415 | 1238948,8,5,6,2,3,10,6,6,1,4 416 | 1239232,3,3,2,6,3,3,3,5,1,2 417 | 1239347,8,7,8,5,10,10,7,2,1,4 418 | 1239967,1,1,1,1,2,1,2,1,1,2 419 | 1240337,5,2,2,2,2,2,3,2,2,2 420 | 1253505,2,3,1,1,5,1,1,1,1,2 421 | 1255384,3,2,2,3,2,3,3,1,1,2 422 | 1257200,10,10,10,7,10,10,8,2,1,4 423 | 1257648,4,3,3,1,2,1,3,3,1,2 424 | 1257815,5,1,3,1,2,1,2,1,1,2 425 | 1257938,3,1,1,1,2,1,1,1,1,2 426 | 1258549,9,10,10,10,10,10,10,10,1,4 427 | 1258556,5,3,6,1,2,1,1,1,1,2 428 | 1266154,8,7,8,2,4,2,5,10,1,4 429 | 1272039,1,1,1,1,2,1,2,1,1,2 430 | 1276091,2,1,1,1,2,1,2,1,1,2 431 | 1276091,1,3,1,1,2,1,2,2,1,2 432 | 1276091,5,1,1,3,4,1,3,2,1,2 433 | 1277629,5,1,1,1,2,1,2,2,1,2 434 | 1293439,3,2,2,3,2,1,1,1,1,2 435 | 1293439,6,9,7,5,5,8,4,2,1,2 436 | 1294562,10,8,10,1,3,10,5,1,1,4 437 | 1295186,10,10,10,1,6,1,2,8,1,4 438 | 527337,4,1,1,1,2,1,1,1,1,2 439 | 558538,4,1,3,3,2,1,1,1,1,2 440 | 566509,5,1,1,1,2,1,1,1,1,2 441 | 608157,10,4,3,10,4,10,10,1,1,4 442 | 677910,5,2,2,4,2,4,1,1,1,2 443 | 734111,1,1,1,3,2,3,1,1,1,2 444 | 734111,1,1,1,1,2,2,1,1,1,2 445 | 780555,5,1,1,6,3,1,2,1,1,2 446 | 827627,2,1,1,1,2,1,1,1,1,2 447 | 1049837,1,1,1,1,2,1,1,1,1,2 448 | 1058849,5,1,1,1,2,1,1,1,1,2 449 | 1182404,1,1,1,1,1,1,1,1,1,2 450 | 1193544,5,7,9,8,6,10,8,10,1,4 451 | 1201870,4,1,1,3,1,1,2,1,1,2 452 | 1202253,5,1,1,1,2,1,1,1,1,2 453 | 1227081,3,1,1,3,2,1,1,1,1,2 454 | 1230994,4,5,5,8,6,10,10,7,1,4 455 | 1238410,2,3,1,1,3,1,1,1,1,2 456 | 1246562,10,2,2,1,2,6,1,1,2,4 457 | 1257470,10,6,5,8,5,10,8,6,1,4 458 | 1259008,8,8,9,6,6,3,10,10,1,4 459 | 1266124,5,1,2,1,2,1,1,1,1,2 460 | 1267898,5,1,3,1,2,1,1,1,1,2 461 | 1268313,5,1,1,3,2,1,1,1,1,2 462 | 1268804,3,1,1,1,2,5,1,1,1,2 463 | 1276091,6,1,1,3,2,1,1,1,1,2 464 | 1280258,4,1,1,1,2,1,1,2,1,2 465 | 1293966,4,1,1,1,2,1,1,1,1,2 466 | 1296572,10,9,8,7,6,4,7,10,3,4 467 | 1298416,10,6,6,2,4,10,9,7,1,4 468 | 1299596,6,6,6,5,4,10,7,6,2,4 469 | 1105524,4,1,1,1,2,1,1,1,1,2 470 | 1181685,1,1,2,1,2,1,2,1,1,2 471 | 1211594,3,1,1,1,1,1,2,1,1,2 472 | 1238777,6,1,1,3,2,1,1,1,1,2 473 | 1257608,6,1,1,1,1,1,1,1,1,2 474 | 1269574,4,1,1,1,2,1,1,1,1,2 475 | 1277145,5,1,1,1,2,1,1,1,1,2 476 | 1287282,3,1,1,1,2,1,1,1,1,2 477 | 1296025,4,1,2,1,2,1,1,1,1,2 478 | 1296263,4,1,1,1,2,1,1,1,1,2 479 | 1296593,5,2,1,1,2,1,1,1,1,2 480 | 1299161,4,8,7,10,4,10,7,5,1,4 481 | 1301945,5,1,1,1,1,1,1,1,1,2 482 | 1302428,5,3,2,4,2,1,1,1,1,2 483 | 1318169,9,10,10,10,10,5,10,10,10,4 484 | 474162,8,7,8,5,5,10,9,10,1,4 485 | 787451,5,1,2,1,2,1,1,1,1,2 486 | 1002025,1,1,1,3,1,3,1,1,1,2 487 | 1070522,3,1,1,1,1,1,2,1,1,2 488 | 1073960,10,10,10,10,6,10,8,1,5,4 489 | 1076352,3,6,4,10,3,3,3,4,1,4 490 | 1084139,6,3,2,1,3,4,4,1,1,4 491 | 1115293,1,1,1,1,2,1,1,1,1,2 492 | 1119189,5,8,9,4,3,10,7,1,1,4 493 | 1133991,4,1,1,1,1,1,2,1,1,2 494 | 1142706,5,10,10,10,6,10,6,5,2,4 495 | 1155967,5,1,2,10,4,5,2,1,1,2 496 | 1170945,3,1,1,1,1,1,2,1,1,2 497 | 1181567,1,1,1,1,1,1,1,1,1,2 498 | 1182404,4,2,1,1,2,1,1,1,1,2 499 | 1204558,4,1,1,1,2,1,2,1,1,2 500 | 1217952,4,1,1,1,2,1,2,1,1,2 501 | 1224565,6,1,1,1,2,1,3,1,1,2 502 | 1238186,4,1,1,1,2,1,2,1,1,2 503 | 1253917,4,1,1,2,2,1,2,1,1,2 504 | 1265899,4,1,1,1,2,1,3,1,1,2 505 | 1268766,1,1,1,1,2,1,1,1,1,2 506 | 1277268,3,3,1,1,2,1,1,1,1,2 507 | 1286943,8,10,10,10,7,5,4,8,7,4 508 | 1295508,1,1,1,1,2,4,1,1,1,2 509 | 1297327,5,1,1,1,2,1,1,1,1,2 510 | 1297522,2,1,1,1,2,1,1,1,1,2 511 | 1298360,1,1,1,1,2,1,1,1,1,2 512 | 1299924,5,1,1,1,2,1,2,1,1,2 513 | 1299994,5,1,1,1,2,1,1,1,1,2 514 | 1304595,3,1,1,1,1,1,2,1,1,2 515 | 1306282,6,6,7,10,3,10,8,10,2,4 516 | 1313325,4,10,4,7,3,10,9,10,1,4 517 | 1320077,1,1,1,1,1,1,1,1,1,2 518 | 1320077,1,1,1,1,1,1,2,1,1,2 519 | 1320304,3,1,2,2,2,1,1,1,1,2 520 | 1330439,4,7,8,3,4,10,9,1,1,4 521 | 333093,1,1,1,1,3,1,1,1,1,2 522 | 369565,4,1,1,1,3,1,1,1,1,2 523 | 412300,10,4,5,4,3,5,7,3,1,4 524 | 672113,7,5,6,10,4,10,5,3,1,4 525 | 749653,3,1,1,1,2,1,2,1,1,2 526 | 769612,3,1,1,2,2,1,1,1,1,2 527 | 769612,4,1,1,1,2,1,1,1,1,2 528 | 798429,4,1,1,1,2,1,3,1,1,2 529 | 807657,6,1,3,2,2,1,1,1,1,2 530 | 8233704,4,1,1,1,1,1,2,1,1,2 531 | 837480,7,4,4,3,4,10,6,9,1,4 532 | 867392,4,2,2,1,2,1,2,1,1,2 533 | 869828,1,1,1,1,1,1,3,1,1,2 534 | 1043068,3,1,1,1,2,1,2,1,1,2 535 | 1056171,2,1,1,1,2,1,2,1,1,2 536 | 1061990,1,1,3,2,2,1,3,1,1,2 537 | 1113061,5,1,1,1,2,1,3,1,1,2 538 | 1116192,5,1,2,1,2,1,3,1,1,2 539 | 1135090,4,1,1,1,2,1,2,1,1,2 540 | 1145420,6,1,1,1,2,1,2,1,1,2 541 | 1158157,5,1,1,1,2,2,2,1,1,2 542 | 1171578,3,1,1,1,2,1,1,1,1,2 543 | 1174841,5,3,1,1,2,1,1,1,1,2 544 | 1184586,4,1,1,1,2,1,2,1,1,2 545 | 1186936,2,1,3,2,2,1,2,1,1,2 546 | 1197527,5,1,1,1,2,1,2,1,1,2 547 | 1222464,6,10,10,10,4,10,7,10,1,4 548 | 1240603,2,1,1,1,1,1,1,1,1,2 549 | 1240603,3,1,1,1,1,1,1,1,1,2 550 | 1241035,7,8,3,7,4,5,7,8,2,4 551 | 1287971,3,1,1,1,2,1,2,1,1,2 552 | 1289391,1,1,1,1,2,1,3,1,1,2 553 | 1299924,3,2,2,2,2,1,4,2,1,2 554 | 1306339,4,4,2,1,2,5,2,1,2,2 555 | 1313658,3,1,1,1,2,1,1,1,1,2 556 | 1313982,4,3,1,1,2,1,4,8,1,2 557 | 1321264,5,2,2,2,1,1,2,1,1,2 558 | 1321321,5,1,1,3,2,1,1,1,1,2 559 | 1321348,2,1,1,1,2,1,2,1,1,2 560 | 1321931,5,1,1,1,2,1,2,1,1,2 561 | 1321942,5,1,1,1,2,1,3,1,1,2 562 | 1321942,5,1,1,1,2,1,3,1,1,2 563 | 1328331,1,1,1,1,2,1,3,1,1,2 564 | 1328755,3,1,1,1,2,1,2,1,1,2 565 | 1331405,4,1,1,1,2,1,3,2,1,2 566 | 1331412,5,7,10,10,5,10,10,10,1,4 567 | 1333104,3,1,2,1,2,1,3,1,1,2 568 | 1334071,4,1,1,1,2,3,2,1,1,2 569 | 1343068,8,4,4,1,6,10,2,5,2,4 570 | 1343374,10,10,8,10,6,5,10,3,1,4 571 | 1344121,8,10,4,4,8,10,8,2,1,4 572 | 142932,7,6,10,5,3,10,9,10,2,4 573 | 183936,3,1,1,1,2,1,2,1,1,2 574 | 324382,1,1,1,1,2,1,2,1,1,2 575 | 378275,10,9,7,3,4,2,7,7,1,4 576 | 385103,5,1,2,1,2,1,3,1,1,2 577 | 690557,5,1,1,1,2,1,2,1,1,2 578 | 695091,1,1,1,1,2,1,2,1,1,2 579 | 695219,1,1,1,1,2,1,2,1,1,2 580 | 824249,1,1,1,1,2,1,3,1,1,2 581 | 871549,5,1,2,1,2,1,2,1,1,2 582 | 878358,5,7,10,6,5,10,7,5,1,4 583 | 1107684,6,10,5,5,4,10,6,10,1,4 584 | 1115762,3,1,1,1,2,1,1,1,1,2 585 | 1217717,5,1,1,6,3,1,1,1,1,2 586 | 1239420,1,1,1,1,2,1,1,1,1,2 587 | 1254538,8,10,10,10,6,10,10,10,1,4 588 | 1261751,5,1,1,1,2,1,2,2,1,2 589 | 1268275,9,8,8,9,6,3,4,1,1,4 590 | 1272166,5,1,1,1,2,1,1,1,1,2 591 | 1294261,4,10,8,5,4,1,10,1,1,4 592 | 1295529,2,5,7,6,4,10,7,6,1,4 593 | 1298484,10,3,4,5,3,10,4,1,1,4 594 | 1311875,5,1,2,1,2,1,1,1,1,2 595 | 1315506,4,8,6,3,4,10,7,1,1,4 596 | 1320141,5,1,1,1,2,1,2,1,1,2 597 | 1325309,4,1,2,1,2,1,2,1,1,2 598 | 1333063,5,1,3,1,2,1,3,1,1,2 599 | 1333495,3,1,1,1,2,1,2,1,1,2 600 | 1334659,5,2,4,1,1,1,1,1,1,2 601 | 1336798,3,1,1,1,2,1,2,1,1,2 602 | 1344449,1,1,1,1,1,1,2,1,1,2 603 | 1350568,4,1,1,1,2,1,2,1,1,2 604 | 1352663,5,4,6,8,4,1,8,10,1,4 605 | 188336,5,3,2,8,5,10,8,1,2,4 606 | 352431,10,5,10,3,5,8,7,8,3,4 607 | 353098,4,1,1,2,2,1,1,1,1,2 608 | 411453,1,1,1,1,2,1,1,1,1,2 609 | 557583,5,10,10,10,10,10,10,1,1,4 610 | 636375,5,1,1,1,2,1,1,1,1,2 611 | 736150,10,4,3,10,3,10,7,1,2,4 612 | 803531,5,10,10,10,5,2,8,5,1,4 613 | 822829,8,10,10,10,6,10,10,10,10,4 614 | 1016634,2,3,1,1,2,1,2,1,1,2 615 | 1031608,2,1,1,1,1,1,2,1,1,2 616 | 1041043,4,1,3,1,2,1,2,1,1,2 617 | 1042252,3,1,1,1,2,1,2,1,1,2 618 | 1057067,1,1,1,1,1,?,1,1,1,2 619 | 1061990,4,1,1,1,2,1,2,1,1,2 620 | 1073836,5,1,1,1,2,1,2,1,1,2 621 | 1083817,3,1,1,1,2,1,2,1,1,2 622 | 1096352,6,3,3,3,3,2,6,1,1,2 623 | 1140597,7,1,2,3,2,1,2,1,1,2 624 | 1149548,1,1,1,1,2,1,1,1,1,2 625 | 1174009,5,1,1,2,1,1,2,1,1,2 626 | 1183596,3,1,3,1,3,4,1,1,1,2 627 | 1190386,4,6,6,5,7,6,7,7,3,4 628 | 1190546,2,1,1,1,2,5,1,1,1,2 629 | 1213273,2,1,1,1,2,1,1,1,1,2 630 | 1218982,4,1,1,1,2,1,1,1,1,2 631 | 1225382,6,2,3,1,2,1,1,1,1,2 632 | 1235807,5,1,1,1,2,1,2,1,1,2 633 | 1238777,1,1,1,1,2,1,1,1,1,2 634 | 1253955,8,7,4,4,5,3,5,10,1,4 635 | 1257366,3,1,1,1,2,1,1,1,1,2 636 | 1260659,3,1,4,1,2,1,1,1,1,2 637 | 1268952,10,10,7,8,7,1,10,10,3,4 638 | 1275807,4,2,4,3,2,2,2,1,1,2 639 | 1277792,4,1,1,1,2,1,1,1,1,2 640 | 1277792,5,1,1,3,2,1,1,1,1,2 641 | 1285722,4,1,1,3,2,1,1,1,1,2 642 | 1288608,3,1,1,1,2,1,2,1,1,2 643 | 1290203,3,1,1,1,2,1,2,1,1,2 644 | 1294413,1,1,1,1,2,1,1,1,1,2 645 | 1299596,2,1,1,1,2,1,1,1,1,2 646 | 1303489,3,1,1,1,2,1,2,1,1,2 647 | 1311033,1,2,2,1,2,1,1,1,1,2 648 | 1311108,1,1,1,3,2,1,1,1,1,2 649 | 1315807,5,10,10,10,10,2,10,10,10,4 650 | 1318671,3,1,1,1,2,1,2,1,1,2 651 | 1319609,3,1,1,2,3,4,1,1,1,2 652 | 1323477,1,2,1,3,2,1,2,1,1,2 653 | 1324572,5,1,1,1,2,1,2,2,1,2 654 | 1324681,4,1,1,1,2,1,2,1,1,2 655 | 1325159,3,1,1,1,2,1,3,1,1,2 656 | 1326892,3,1,1,1,2,1,2,1,1,2 657 | 1330361,5,1,1,1,2,1,2,1,1,2 658 | 1333877,5,4,5,1,8,1,3,6,1,2 659 | 1334015,7,8,8,7,3,10,7,2,3,4 660 | 1334667,1,1,1,1,2,1,1,1,1,2 661 | 1339781,1,1,1,1,2,1,2,1,1,2 662 | 1339781,4,1,1,1,2,1,3,1,1,2 663 | 13454352,1,1,3,1,2,1,2,1,1,2 664 | 1345452,1,1,3,1,2,1,2,1,1,2 665 | 1345593,3,1,1,3,2,1,2,1,1,2 666 | 1347749,1,1,1,1,2,1,1,1,1,2 667 | 1347943,5,2,2,2,2,1,1,1,2,2 668 | 1348851,3,1,1,1,2,1,3,1,1,2 669 | 1350319,5,7,4,1,6,1,7,10,3,4 670 | 1350423,5,10,10,8,5,5,7,10,1,4 671 | 1352848,3,10,7,8,5,8,7,4,1,4 672 | 1353092,3,2,1,2,2,1,3,1,1,2 673 | 1354840,2,1,1,1,2,1,3,1,1,2 674 | 1354840,5,3,2,1,3,1,1,1,1,2 675 | 1355260,1,1,1,1,2,1,2,1,1,2 676 | 1365075,4,1,4,1,2,1,1,1,1,2 677 | 1365328,1,1,2,1,2,1,2,1,1,2 678 | 1368267,5,1,1,1,2,1,1,1,1,2 679 | 1368273,1,1,1,1,2,1,1,1,1,2 680 | 1368882,2,1,1,1,2,1,1,1,1,2 681 | 1369821,10,10,10,10,5,10,10,10,7,4 682 | 1371026,5,10,10,10,4,10,5,6,3,4 683 | 1371920,5,1,1,1,2,1,3,2,1,2 684 | 466906,1,1,1,1,2,1,1,1,1,2 685 | 466906,1,1,1,1,2,1,1,1,1,2 686 | 534555,1,1,1,1,2,1,1,1,1,2 687 | 536708,1,1,1,1,2,1,1,1,1,2 688 | 566346,3,1,1,1,2,1,2,3,1,2 689 | 603148,4,1,1,1,2,1,1,1,1,2 690 | 654546,1,1,1,1,2,1,1,1,8,2 691 | 654546,1,1,1,3,2,1,1,1,1,2 692 | 695091,5,10,10,5,4,5,4,4,1,4 693 | 714039,3,1,1,1,2,1,1,1,1,2 694 | 763235,3,1,1,1,2,1,2,1,2,2 695 | 776715,3,1,1,1,3,2,1,1,1,2 696 | 841769,2,1,1,1,2,1,1,1,1,2 697 | 888820,5,10,10,3,7,3,8,10,2,4 698 | 897471,4,8,6,4,3,4,10,6,1,4 699 | 897471,4,8,8,5,4,5,10,4,1,4 -------------------------------------------------------------------------------- /Chapter05/Data/download.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter05/Data/download.jpg -------------------------------------------------------------------------------- /Chapter06/RegressionTreeTest.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 03-Oct-2017 3 | 4 | @author: DX 5 | ''' 6 | import pprint 7 | from Chapter_06 import RegressionTrees as rg 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | #Create a Sine wave for demonstration of non-linearity 12 | 13 | #Set the number of samples 14 | N = 256 15 | 16 | #Create time value 17 | ix = np.arange(N) 18 | 19 | #Create the sine wave using the formula sin(2*pi*f) 20 | signal = np.sin(2*np.pi*ix/float(N/2)) 21 | 22 | #Combine both time and amplitude 23 | dataset = range(0,N) 24 | dataset = np.c_[ix,signal] 25 | dataset_ = dataset.copy() 26 | 27 | #Call Gradient boost 28 | weaks = rg.GradientBoost(dataset,5,1,100) 29 | 30 | prediction=[] 31 | actual = [] 32 | 33 | #Run a loop to extract each instance from the data set 34 | for row in dataset_: 35 | 36 | #Create a list to store predictions from different ckassifier for the test instance 37 | preds = [] 38 | 39 | #Feed the instance to different classifiers 40 | for i in range(len(weaks)): 41 | 42 | #Multiply the predicted ouput with the alpha value of the classifier 43 | p = rg.predict(weaks[i], row) 44 | 45 | #Add the weighted prediction to the list 46 | preds.append(p) 47 | 48 | #Sum up output of all the classifiers and take their sign as the prediction 49 | final = (sum(preds)) 50 | 51 | #Append the final output to the prediction list and actual ouput to the actual list 52 | prediction.append(final) 53 | actual.append(row[-1]) 54 | 55 | #Append the error of the current configuration 56 | _,mse = rg.getResidual(actual, prediction) 57 | 58 | 59 | #Lets Plot the error in each configuration 60 | plt.figure() 61 | plt.plot(ix,signal,marker='*',markersize=8) 62 | plt.plot(ix,prediction,marker='+',markersize=8) 63 | plt.show() -------------------------------------------------------------------------------- /Chapter06/RegressionTrees.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 03-Oct-2017 3 | 4 | @author: DX 5 | ''' 6 | import numpy as np 7 | import sys 8 | 9 | def terminalNodeReg(group): 10 | 11 | #Get all the target labels into the List 12 | class_values = [row[-1] for row in group] 13 | 14 | #Return the Mean value of the list 15 | return np.mean(class_values) 16 | 17 | # Calculate the SSE index for a split dataset 18 | def SquaredError(groups): 19 | 20 | #Initialize the variable for SSE 21 | sse = 0.0 22 | 23 | #Iterate for both the groups 24 | for group in groups: 25 | size = len(group) 26 | 27 | #If length is 0 continue for the next group 28 | if size == 0: 29 | continue 30 | 31 | #Take all the class values into a list 32 | class_values = [row[-1] for row in group] 33 | 34 | #Calculate SSE for the group 35 | sse += np.sum((class_values-np.mean(class_values))**2) 36 | return sse 37 | 38 | #Function to get new node 39 | def getNode(dataset): 40 | 41 | #initialize variables to store error score, attribute index and split groups 42 | winnerAttribute = sys.maxsize 43 | attributeValue = sys.maxsize 44 | errorScore = sys.maxsize 45 | leftGroup = None 46 | 47 | #Run loop to access each attribute and attribute values 48 | for index in range(len(dataset[0])-1): 49 | for row in dataset: 50 | 51 | #Get split for the attribute value 52 | groups = createSplit(index, row[index], dataset) 53 | 54 | #Calculate SSE for the group 55 | sse = SquaredError(groups) 56 | #print("SSE for the attribute %.2f's value %.2f is %.3f"%(index+1,row[index],sse)) 57 | #If SSE is less than previous attribute's SSE return attribute value as Node 58 | if sse < errorScore: 59 | winnerAttribute, attributeValue, errorScore, leftGroup = index, row[index], sse, groups 60 | 61 | #Once done create a dictionary for node 62 | node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup} 63 | return node 64 | 65 | #Create splits to test for node values 66 | def createSplit(attribute,threshold,dataset): 67 | 68 | #Initialize two lists to store the sub sets 69 | lesser, greater = list(),list() 70 | 71 | #Loop through the attribute values and create sub set out of it 72 | for values in dataset: 73 | #Apply threshold 74 | if values[attribute]<=threshold: 75 | lesser.append(values) 76 | else: 77 | greater.append(values) 78 | return lesser,greater 79 | 80 | # Create child splits for a node or make terminal 81 | def buildTreeReg(node, max_depth, min_size, depth): 82 | #Lets get groups information first. 83 | left, right = node['groups'] 84 | del(node['groups']) 85 | # check if there are any element in the left and right group 86 | if not left or not right: 87 | #If there is no element in the groups call terminal Node 88 | combined = left+right 89 | node['left'] = terminalNodeReg(combined) 90 | node['right']= terminalNodeReg(combined) 91 | return 92 | # check if we have reached to maximum depth 93 | if depth >= max_depth: 94 | node['left']=terminalNodeReg(left) 95 | node['right'] = terminalNodeReg(right) 96 | return 97 | # if all okey lest start building tree for left side nodes 98 | # if minimum instances are done by the node stop further build 99 | if len(left) <= min_size: 100 | node['left'] = terminalNodeReg(left) 101 | 102 | else: 103 | #Create new node under left side of the tree 104 | node['left'] = getNode(left) 105 | #append node under the tree and increase depth by one. 106 | buildTreeReg(node['left'], max_depth, min_size, depth+1) #recursion will take place in here 107 | 108 | 109 | # Similar procedure for the right side nodes 110 | if len(right) <= min_size: 111 | node['right'] = terminalNodeReg(right) 112 | 113 | else: 114 | node['right'] = getNode(right) 115 | buildTreeReg(node['right'], max_depth, min_size, depth+1) 116 | 117 | 118 | # Build a decision tree 119 | def build_tree(train, max_depth, min_size): 120 | 121 | #Add the root node to the tree 122 | root = getNode(train) 123 | 124 | #Start building the from the root's branches tree 125 | buildTreeReg(root, max_depth, min_size, 1) 126 | return root 127 | 128 | #Function to get prediction from input tree 129 | def predict(node, row): 130 | 131 | #Get the node value and check whether the attribute value is less than or equal. 132 | if row[node['attribute']] <= node['value']: 133 | #If yes enter into left branch and check whether it has another node or the class value. 134 | if isinstance(node['left'], dict): 135 | return predict(node['left'], row)#Recursion 136 | else: 137 | #If there is no node in the branch 138 | return node['left'] 139 | else: 140 | if isinstance(node['right'], dict): 141 | return predict(node['right'], row) 142 | else: 143 | return node['right'] 144 | 145 | def getResidual(actual,pred): 146 | 147 | #Create an empty list to store individual error of the instances 148 | residual = [] 149 | 150 | # Run a loop to get difference between output and prediction of each instance 151 | for i in range(len(actual)): 152 | 153 | #Get the difference and add the difference to the list of residuals 154 | diff = (actual[i]-pred[i]) 155 | residual.append(diff) 156 | 157 | #Calculate the Sum of squared error between output and prediction 158 | mse = np.sum(np.array(residual)**2) 159 | return residual,mse 160 | 161 | def GradientBoost(dataset,depth,mincount,iterations): 162 | 163 | dataset = np.array(dataset) 164 | 165 | #Create a list to add weak learners(decision stumps) 166 | weaks = [] 167 | 168 | #Lets run the loop for number of iteration(number of classifiers) 169 | for itr in range(iterations): 170 | 171 | #Create decision tree from the data-set 172 | ds = build_tree(dataset,depth,mincount) 173 | 174 | #Create a list to store the predictions of the decision stump 175 | pred=[] 176 | 177 | #Create a list to store actual outputs 178 | actual = [] 179 | 180 | #Let's predict output for each instance in the data set 181 | for row in dataset: 182 | actual.append(row[-1]) 183 | pred.append(predict(ds, row)) 184 | 185 | #Here we will find out difference between predicted and actual output 186 | residuals,error = getResidual(actual, pred) 187 | 188 | #Print the error status 189 | print("\nClassifier %i error is %.5f"%(itr,error)) 190 | 191 | #Check for the convergence 192 | if error<=0.00001: 193 | break 194 | 195 | #Replace the previous labels with the current differences(Residuals) 196 | dataset[:,-1] = residuals 197 | 198 | #Append the weak learner to the list 199 | weaks.append(ds) 200 | 201 | return weaks 202 | 203 | def accuracy_metric(actual, predicted): 204 | correct = 0 205 | for i in range(len(actual)): 206 | if actual[i] == predicted[i]: 207 | correct += 1 208 | return correct / float(len(actual)) * 100.0 -------------------------------------------------------------------------------- /Chapter07/Data/train_modified.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter07/Data/train_modified.csv -------------------------------------------------------------------------------- /Chapter07/xgBoost.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 23-Oct-2017 3 | 4 | @author: aii32199 5 | ''' 6 | 7 | # First XGBoost model for Pima Indians dataset 8 | 9 | #Load the required libraries 10 | #Numpy for reading the csv file 11 | from numpy import loadtxt 12 | 13 | #Import XGBoost classifier 14 | from xgboost import XGBClassifier 15 | 16 | #We will use sklearn to divide our data set into training and test set 17 | from sklearn.model_selection import train_test_split 18 | 19 | #We will use sklearn's accuracy metric to evaluate the performance of the trained model 20 | from sklearn.metrics import accuracy_score 21 | 22 | #Let's load the dataset into the numpy array 23 | dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",") 24 | 25 | #split data into X (input variables)and y(output variable/Class) 26 | X = dataset[:,0:8] 27 | Y = dataset[:,8] 28 | 29 | #Create training and test set with 33% data in test set and 66% for the training of the model 30 | seed = 7 31 | test_size = 0.33 32 | X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed) 33 | 34 | #Train our first model on created training set 35 | model = XGBClassifier() 36 | model.fit(X_train, y_train) 37 | 38 | #Lets see the prediction from the trained model 39 | y_pred = model.predict(X_test) 40 | 41 | #Create a list of predictions for evaluation purpose 42 | predictions = [round(value) for value in y_pred] 43 | 44 | #Evaluate predictions using accuracy metric 45 | accuracy = accuracy_score(y_test, predictions) 46 | 47 | #Print the accuracy 48 | print("Accuracy of the trained model is: %.2f%%" % (accuracy * 100.0)) 49 | 50 | print(model) -------------------------------------------------------------------------------- /Chapter07/xgboost_param_tune.py: -------------------------------------------------------------------------------- 1 | # XGBoost on Otto dataset, Tune n_estimators and max_depth 2 | from pandas import read_csv 3 | from xgboost import XGBClassifier 4 | from sklearn.model_selection import GridSearchCV 5 | from sklearn.model_selection import StratifiedKFold 6 | from sklearn.preprocessing import LabelEncoder 7 | import matplotlib 8 | matplotlib.use('Agg') 9 | from matplotlib import pyplot 10 | import numpy 11 | 12 | # load data 13 | data = read_csv('train.csv') 14 | dataset = data.values 15 | 16 | # split data into X and y 17 | X = dataset[:,0:94] 18 | y = dataset[:,94] 19 | 20 | # encode string class values as integers 21 | label_encoded_y = LabelEncoder().fit_transform(y) 22 | 23 | # grid search 24 | model = XGBClassifier() 25 | n_estimators = [50, 100, 150, 200] 26 | max_depth = [2, 4, 6, 8] 27 | print(max_depth) 28 | param_grid = dict(max_depth=max_depth, n_estimators=n_estimators) 29 | kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7) 30 | grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1) 31 | grid_result = grid_search.fit(X, label_encoded_y) 32 | 33 | # summarize results 34 | print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) 35 | means = grid_result.cv_results_['mean_test_score'] 36 | stds = grid_result.cv_results_['std_test_score'] 37 | params = grid_result.cv_results_['params'] 38 | for mean, stdev, param in zip(means, stds, params): 39 | print("%f (%f) with: %r" % (mean, stdev, param)) 40 | 41 | # plot results 42 | scores = numpy.array(means).reshape(len(max_depth), len(n_estimators)) 43 | for i, value in enumerate(max_depth): 44 | pyplot.plot(n_estimators, scores[i], label='depth: ' + str(value)) 45 | pyplot.legend() 46 | pyplot.xlabel('n_estimators') 47 | pyplot.ylabel('Log Loss') 48 | pyplot.savefig('n_estimators_vs_max_depth.png') 49 | 50 | ######################################### 51 | 52 | # XGBoost on Otto dataset, Tune n_estimators 53 | 54 | # load data 55 | data = read_csv('train.csv') 56 | dataset = data.values 57 | 58 | # split data into X and y 59 | X = dataset[:,0:94] 60 | y = dataset[:,94] 61 | 62 | # encode string class values as integers 63 | label_encoded_y = LabelEncoder().fit_transform(y) 64 | 65 | # grid search 66 | model = XGBClassifier() 67 | n_estimators = range(50, 400, 50) 68 | param_grid = dict(n_estimators=n_estimators) 69 | kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7) 70 | grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold) 71 | grid_result = grid_search.fit(X, label_encoded_y) 72 | 73 | # summarize results 74 | print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) 75 | means = grid_result.cv_results_['mean_test_score'] 76 | stds = grid_result.cv_results_['std_test_score'] 77 | params = grid_result.cv_results_['params'] 78 | for mean, stdev, param in zip(means, stds, params): 79 | print("%f (%f) with: %r" % (mean, stdev, param)) 80 | 81 | # plot 82 | pyplot.errorbar(n_estimators, means, yerr=stds) 83 | pyplot.title("XGBoost n_estimators vs Log Loss") 84 | pyplot.xlabel('n_estimators') 85 | pyplot.ylabel('Log Loss') 86 | pyplot.savefig('n_estimators.png') 87 | 88 | ############################################# 89 | # XGBoost on Otto dataset, Tune max_depth 90 | 91 | # load data 92 | data = read_csv('train.csv') 93 | dataset = data.values 94 | 95 | # split data into X and y 96 | X = dataset[:,0:94] 97 | y = dataset[:,94] 98 | 99 | # encode string class values as integers 100 | label_encoded_y = LabelEncoder().fit_transform(y) 101 | 102 | # grid search 103 | model = XGBClassifier() 104 | max_depth = range(1, 11, 2) 105 | print(max_depth) 106 | param_grid = dict(max_depth=max_depth) 107 | kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7) 108 | grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1) 109 | grid_result = grid_search.fit(X, label_encoded_y) 110 | 111 | # summarize results 112 | print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) 113 | means = grid_result.cv_results_['mean_test_score'] 114 | stds = grid_result.cv_results_['std_test_score'] 115 | params = grid_result.cv_results_['params'] 116 | for mean, stdev, param in zip(means, stds, params): 117 | print("%f (%f) with: %r" % (mean, stdev, param)) 118 | 119 | # plot 120 | pyplot.errorbar(max_depth, means, yerr=stds) 121 | pyplot.title("XGBoost max_depth vs Log Loss") 122 | pyplot.xlabel('max_depth') 123 | pyplot.ylabel('Log Loss') 124 | pyplot.savefig('max_depth.png') -------------------------------------------------------------------------------- /Chapter08/StackedGeneralization.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 27-Oct-2017 3 | 4 | @author: aii32199 5 | ''' 6 | # Test stacking on the sonar dataset 7 | from random import seed 8 | from random import randrange 9 | from csv import reader 10 | from math import sqrt 11 | from math import exp 12 | 13 | # Load a CSV file 14 | def load_csv(filename): 15 | dataset = list() 16 | with open(filename, 'r') as file: 17 | csv_reader = reader(file) 18 | for row in csv_reader: 19 | if not row: 20 | continue 21 | dataset.append(row) 22 | return dataset 23 | 24 | # Convert string column to float 25 | def str_column_to_float(dataset, column): 26 | for row in dataset: 27 | row[column] = float(row[column].strip()) 28 | 29 | # Convert string column to integer 30 | def str_column_to_int(dataset, column): 31 | class_values = [row[column] for row in dataset] 32 | unique = set(class_values) 33 | lookup = dict() 34 | for i, value in enumerate(unique): 35 | lookup[value] = i 36 | for row in dataset: 37 | row[column] = lookup[row[column]] 38 | return lookup 39 | 40 | # Split a dataset into k folds 41 | def cross_validation_split(dataset, n_folds): 42 | dataset_split = list() 43 | dataset_copy = list(dataset) 44 | fold_size = int(len(dataset) / n_folds) 45 | for i in range(n_folds): 46 | fold = list() 47 | while len(fold) < fold_size: 48 | index = randrange(len(dataset_copy)) 49 | fold.append(dataset_copy.pop(index)) 50 | dataset_split.append(fold) 51 | return dataset_split 52 | 53 | # Calculate accuracy percentage 54 | def accuracy_metric(actual, predicted): 55 | correct = 0 56 | for i in range(len(actual)): 57 | if actual[i] == predicted[i]: 58 | correct += 1 59 | return correct / float(len(actual)) * 100.0 60 | 61 | # Evaluate an algorithm using a cross validation split 62 | def evaluate_algorithm(dataset, algorithm, n_folds, *args): 63 | folds = cross_validation_split(dataset, n_folds) 64 | scores = list() 65 | for fold in folds: 66 | train_set = list(folds) 67 | train_set.remove(fold) 68 | train_set = sum(train_set, []) 69 | test_set = list() 70 | for row in fold: 71 | row_copy = list(row) 72 | test_set.append(row_copy) 73 | row_copy[-1] = None 74 | predicted = algorithm(train_set, test_set, *args) 75 | actual = [row[-1] for row in fold] 76 | accuracy = accuracy_metric(actual, predicted) 77 | scores.append(accuracy) 78 | return scores 79 | 80 | # Calculate the Euclidean distance between two vectors 81 | def euclidean_distance(row1, row2): 82 | distance = 0.0 83 | for i in range(len(row1)-1): 84 | distance += (row1[i] - row2[i])**2 85 | return sqrt(distance) 86 | 87 | # Locate neighbors for a new row 88 | def get_neighbors(train, test_row, num_neighbors): 89 | distances = list() 90 | for train_row in train: 91 | dist = euclidean_distance(test_row, train_row) 92 | distances.append((train_row, dist)) 93 | distances.sort(key=lambda tup: tup[1]) 94 | neighbors = list() 95 | for i in range(num_neighbors): 96 | neighbors.append(distances[i][0]) 97 | return neighbors 98 | 99 | # Make a prediction with kNN 100 | def knn_predict(model, test_row, num_neighbors=2): 101 | neighbors = get_neighbors(model, test_row, num_neighbors) 102 | output_values = [row[-1] for row in neighbors] 103 | prediction = max(set(output_values), key=output_values.count) 104 | return prediction 105 | 106 | # Prepare the kNN model 107 | def knn_model(train): 108 | return train 109 | 110 | # Make a prediction with weights 111 | def perceptron_predict(weights,row): 112 | #Row is the input instance 113 | 114 | #We will consider first weight as the bias for simplyfied the calculations 115 | activation = weights[0] 116 | 117 | #Now run a loop to multiply each attribute value of the instance with the weight 118 | #And add the result to the activation of previous attribute 119 | for i in range(len(row)-1): 120 | activation += weights[i + 1] * row[i] 121 | 122 | #Here we will return 1 if activation is a non negative value and zero in other case 123 | return 1.0 if activation >= 0.0 else 0.0 124 | 125 | # Estimate Perceptron weights using stochastic gradient descent 126 | def perceptron_model(train, l_rate=0.01, n_epoch=5000): 127 | 128 | #Lets initialize the weights by 0 129 | weights = [0.0 for i in range(len(train[0]))] 130 | 131 | #We will update the weights for given number of epoch 132 | for epoch in range(n_epoch): 133 | 134 | #Extract each row from the training set 135 | for row in train: 136 | 137 | #Predict the value for the instance 138 | prediction = perceptron_predict(weights,row) 139 | 140 | #Calculate the difference(gradient) between actual and predicted value 141 | error = row[-1] - prediction 142 | 143 | #Update the bias value using given learning rate and error 144 | weights[0] = weights[0] + l_rate * error 145 | 146 | #Update the weights for each attribute using learning rate 147 | for i in range(len(row)-1): 148 | weights[i + 1] = weights[i + 1] + l_rate * error * row[i] 149 | 150 | #Return the updated weights and biases 151 | return weights 152 | 153 | # Make a prediction with coefficients 154 | def logistic_regression_predict(model, row): 155 | 156 | #First weight of the model will be bias similar as Perceptron function 157 | yhat = model[0] 158 | 159 | #We will run a loop to multiply each attribute value with the corresponding weights 160 | #This is similar to activation calculation in perceptron algorithm 161 | for i in range(len(row)-1): 162 | yhat += model[i + 1] * row[i] 163 | 164 | #Here we will apply logistic function on the linear combination of weights and attributes 165 | #This is the place where linear and logistic regression differs 166 | return 1.0 / (1.0 + exp(-yhat)) 167 | 168 | # Estimate logistic regression coefficients using stochastic gradient descent 169 | def logistic_regression_model(train, l_rate=0.01, n_epoch=5000): 170 | 171 | #Initialize the weights with the zero values 172 | coef = [0.0 for i in range(len(train[0]))] 173 | 174 | #Repeat the procedure for given number of epochs 175 | for epoch in range(n_epoch): 176 | 177 | #Get prediction for each row and update weights based on error value 178 | for row in train: 179 | 180 | #Predict y for the given x 181 | yhat = logistic_regression_predict(coef, row) 182 | 183 | #Get the error value (gradient/slope/change) 184 | error = row[-1] - yhat 185 | 186 | #Apply gradient descent here to update the weights and biases 187 | #Update Bias first 188 | coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat) 189 | 190 | #Now update the Weights 191 | for i in range(len(row)-1): 192 | coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i] 193 | #Return the trained weights and biases 194 | return coef 195 | 196 | # Make predictions with sub-models and construct a new stacked row 197 | def to_stacked_row(models, predict_list, row): 198 | 199 | #Let's Create an empty list to store predictions from sub models 200 | stacked_row = list() 201 | 202 | #Run a loop to fetch stored models in the List 203 | for i in range(len(models)): 204 | 205 | #Start prediction for each row by each model 206 | prediction = predict_list[i](models[i], row) 207 | 208 | #Store the prediction in the list 209 | stacked_row.append(prediction) 210 | 211 | #Append class values to the new row 212 | stacked_row.append(row[-1]) 213 | 214 | #Extend the old row aby adding stacked row 215 | return row[0:len(row)-1] + stacked_row 216 | 217 | # Stacked Generalization Algorithm 218 | def stacking(train, test): 219 | 220 | #Let's define the sub model first 221 | model_list = [knn_model, perceptron_model] 222 | 223 | #We will create a prediction list to create new row 224 | predict_list = [knn_predict, perceptron_predict] 225 | 226 | #Create an empty list to store the trained models 227 | models = list() 228 | 229 | #Lets train each sub model individually on the dataset 230 | for i in range(len(model_list)): 231 | model = model_list[i](train) 232 | models.append(model) 233 | 234 | #Create a new stacked data set from prediction of sub models 235 | stacked_dataset = list() 236 | for row in train: 237 | 238 | #Get new row 239 | stacked_row = to_stacked_row(models, predict_list, row) 240 | 241 | #Append it to new dataset 242 | stacked_dataset.append(stacked_row) 243 | 244 | #We will train our final classifier on the stacked dataset 245 | stacked_model = logistic_regression_model(stacked_dataset) 246 | 247 | #lets create a list of prediction of the stacked output 248 | predictions = list() 249 | 250 | #Here we will combine all the classifier together to make stack of classifiers 251 | for row in test: 252 | 253 | #Get new row from prediction of sub models 254 | stacked_row = to_stacked_row(models, predict_list, row) 255 | 256 | #Append new row to the new dataset 257 | stacked_dataset.append(stacked_row) 258 | 259 | #Classify the new row using final classifier 260 | prediction = logistic_regression_predict(stacked_model, stacked_row) 261 | 262 | #As final classifier gives a continuous value round it to nearest integer 263 | prediction = round(prediction) 264 | 265 | #Append the prediction to the final list of predictions 266 | predictions.append(prediction) 267 | return predictions 268 | 269 | # Test stacking on the sonar dataset 270 | seed(1) 271 | 272 | # load and prepare data 273 | filename = 'sonar.all-data.csv' 274 | dataset = load_csv(filename) 275 | 276 | # convert string attributes to integers 277 | for i in range(len(dataset[0])-1): 278 | str_column_to_float(dataset, i) 279 | 280 | # convert class column to integers 281 | str_column_to_int(dataset, len(dataset[0])-1) 282 | n_folds = 5 283 | scores = evaluate_algorithm(dataset, stacking, n_folds) 284 | 285 | print('Scores: %s' % scores) 286 | print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores)))) -------------------------------------------------------------------------------- /Chapter09/Data/graph_feat_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter09/Data/graph_feat_4.png -------------------------------------------------------------------------------- /Chapter09/FeatureSelection_PCA.py: -------------------------------------------------------------------------------- 1 | #Import the required packages 2 | 3 | #Import pandas to read csv 4 | import pandas 5 | 6 | #Import numpy for array related operations 7 | import numpy 8 | 9 | #Import sklearn's PCA algorithm 10 | from sklearn.decomposition import PCA 11 | 12 | #URL for loading the data set 13 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data" 14 | 15 | #Define the attribute names 16 | names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 17 | 18 | #Create pandas data frame by loading the data from URL 19 | dataframe = pandas.read_csv(url, names=names) 20 | 21 | #Create array from data values 22 | array = dataframe.values 23 | 24 | #Split the data into input and target 25 | X = array[:,0:8] 26 | Y = array[:,8] 27 | 28 | #Feature extraction 29 | pca = PCA(n_components=3) 30 | fit = pca.fit(X) 31 | 32 | #Summarize components 33 | print("Explained Variance: %s" % fit.explained_variance_ratio_) 34 | print(fit.components_) -------------------------------------------------------------------------------- /Chapter09/RF_feature_selection.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 28-Oct-2017 3 | 4 | @author: DX 5 | ''' 6 | #Import the supporting libraries 7 | 8 | #Import pandas to load the data set from csv file 9 | from pandas import read_csv 10 | 11 | #Import numpy for array based operations and calculations 12 | import numpy as np 13 | 14 | #Import Random Forest classifier class from sklearn 15 | from sklearn.ensemble import RandomForestClassifier 16 | 17 | #Import feature selector class select model of sklearn 18 | from sklearn.feature_selection import SelectFromModel 19 | 20 | np.random.seed(1) 21 | 22 | #Function to create Train and Test set from the original data set 23 | def getTrainTestData(dataset,split): 24 | np.random.seed(0) 25 | training = [] 26 | testing = [] 27 | 28 | np.random.shuffle(dataset) 29 | shape = np.shape(dataset) 30 | trainlength = np.uint16(np.floor(split*shape[0])) 31 | 32 | for i in range(trainlength): 33 | training.append(dataset[i]) 34 | 35 | for i in range(trainlength,shape[0]): 36 | testing.append(dataset[i]) 37 | training = np.array(training) 38 | testing = np.array(testing) 39 | return training,testing 40 | 41 | #Function to evaluate model performance 42 | def getAccuracy(pre,ytest): 43 | count = 0 44 | for i in range(len(ytest)): 45 | if ytest[i]==pre[i]: 46 | count+=1 47 | 48 | acc = float(count)/len(ytest) 49 | return acc 50 | 51 | 52 | #Load data set as pandas data frame 53 | data = read_csv('train.csv') 54 | 55 | #Extract attribute names from the data frame 56 | feat = data.keys() 57 | feat_labels = feat.get_values() 58 | 59 | #Extract data values from the data frame 60 | dataset = data.values 61 | 62 | #Shuffle the data set 63 | np.random.shuffle(dataset) 64 | 65 | #We will select 10000 instances to train the classifier 66 | inst = 50000 67 | 68 | #Extract 10000 instances from the data set 69 | dataset = dataset[0:inst,:] 70 | 71 | #Create Training and Testing data for performance evaluation 72 | train,test = getTrainTestData(dataset, 0.7) 73 | 74 | #Split data into input and output variable with selected features 75 | Xtrain = train[:,0:94] 76 | ytrain = train[:,94] 77 | 78 | shape = np.shape(Xtrain) 79 | print("Shape of the data set ",shape) 80 | #Print the size of Data in MBs 81 | print("Size of Data set before feature selection: %.2f MB"%(Xtrain.nbytes/1e6)) 82 | 83 | #Lets select the test data for model evaluation purpose 84 | Xtest = test[:,0:94] 85 | ytest = test[:,94] 86 | 87 | #Create a random forest classifier with following Parameters 88 | trees = 250 89 | max_feat = 7 90 | max_depth = 30 91 | min_sample = 2 92 | 93 | clf = RandomForestClassifier(n_estimators=trees, 94 | max_features=max_feat, 95 | max_depth=max_depth, 96 | min_samples_split= min_sample, 97 | random_state=0, 98 | n_jobs=-1) 99 | 100 | #Train the classifier and calculate the training time 101 | import time 102 | start = time.time() 103 | clf.fit(Xtrain, ytrain) 104 | end = time.time() 105 | 106 | #Lets Note down the model training time 107 | print("Execution time for building the Tree is: %f"%(float(end)-float(start))) 108 | pre = clf.predict(Xtest) 109 | 110 | #Evaluate the model performance for the test data 111 | acc = getAccuracy(pre, ytest) 112 | print("Accuracy of model before feature selection is %.2f"%(100*acc)) 113 | 114 | #Once we have trained the model we will rank all the features 115 | 116 | for feature in zip(feat_labels, clf.feature_importances_): 117 | print(feature) 118 | 119 | #Select features which have higher contribution in the final prediction 120 | sfm = SelectFromModel(clf, threshold=0.01) 121 | sfm.fit(Xtrain,ytrain) 122 | 123 | #Transform input data set 124 | Xtrain_1 = sfm.transform(Xtrain) 125 | Xtest_1 = sfm.transform(Xtest) 126 | 127 | #Let's see the size and shape of new data set 128 | print("Size of Data set before featre selection: %.2f MB"%(Xtrain_1.nbytes/1e6)) 129 | shape = np.shape(Xtrain_1) 130 | print("Shape of the data set ",shape) 131 | 132 | #Model training time 133 | start = time.time() 134 | clf.fit(Xtrain_1, ytrain) 135 | end = time.time() 136 | print("Execution time for building the Tree is: %f"%(float(end)-float(start))) 137 | 138 | #Let's evaluate the model on test data 139 | pre = clf.predict(Xtest_1) 140 | count = 0 141 | 142 | acc2 = getAccuracy(pre, ytest) 143 | 144 | print("accuracy after feature selection %.2f"%(100*acc2)) -------------------------------------------------------------------------------- /Chapter09/RecursiveFeatureElimination.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 02-Nov-2017 3 | 4 | @author: aii32199 5 | ''' 6 | 7 | #Import the required packages 8 | 9 | #Import pandas to read csv 10 | import pandas 11 | 12 | #Import numpy for array related operations 13 | import numpy 14 | 15 | #Import sklearn's feature selection algorithm 16 | from sklearn.feature_selection import RFE 17 | 18 | #Import LogisticRegression for performing chi square test 19 | from sklearn.linear_model import LogisticRegression 20 | 21 | #URL for loading the data set 22 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data" 23 | 24 | #Define the attribute names 25 | names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 26 | 27 | #Create pandas data frame by loading the data from URL 28 | dataframe = pandas.read_csv(url, names=names) 29 | 30 | #Create array from data values 31 | array = dataframe.values 32 | 33 | #Split the data into input and target 34 | X = array[:,0:8] 35 | Y = array[:,8] 36 | 37 | #Feature extraction 38 | model = LogisticRegression() 39 | rfe = RFE(model, 3) 40 | fit = rfe.fit(X, Y) 41 | 42 | print("Num Features: %d"% fit.n_features_) 43 | print("Selected Features: %s"% fit.support_) 44 | print("Feature Ranking: %s"% fit.ranking_) 45 | -------------------------------------------------------------------------------- /Chapter09/SVM_KernelTrick.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 04-Nov-2017 3 | 4 | @author: DX 5 | ''' 6 | #We will use sklearns make circle to create the data 7 | from sklearn.datasets import make_circles 8 | 9 | #Numpy will help us for array related operations 10 | import numpy as np 11 | 12 | #We will use pylab for visualization of plots 13 | import pylab as pl 14 | 15 | #Import our SVM classifier from sklearn 16 | from sklearn.svm import SVC 17 | 18 | #Generate the data set using make_circle function 19 | X, Y = make_circles(n_samples=800, noise=0.07, factor=0.4) 20 | 21 | #Let's Plot the Point and see 22 | # print "...Showing dataset in new window..." 23 | pl.figure(figsize=(10, 8)) 24 | pl.subplot(111) 25 | pl.scatter(X[:, 0], X[:, 1], marker='o', c=Y) 26 | # pl.show() 27 | 28 | #Kernel to convert sub space of data 29 | def fn_kernel(x1, x2): 30 | 31 | # Implements a kernel phi(x1,y1) = [x1, y1, x1^2 + y1^2] 32 | return np.array([x1, x2, x1**2.0 + x2**2.0]) 33 | 34 | #Create a list to store transformed points 35 | transformed = [] 36 | 37 | #Transform each point to the new sub space 38 | for points in X: 39 | transformed.append(fn_kernel(points[0], points[1])) 40 | transformed = np.array(transformed) 41 | 42 | #We will 3D plots to visualize data in higher dimension 43 | from mpl_toolkits.mplot3d import Axes3D 44 | 45 | #Import matplotlib to plot the data 46 | import matplotlib.pyplot as plt 47 | 48 | #Let's plot the original data first 49 | fig = plt.figure(figsize=(20,8)) 50 | ax = fig.add_subplot(121) 51 | ax.scatter(X[:, 0], X[:, 1], marker='o', c=Y) 52 | ax.set_xlabel('X Label') 53 | ax.set_ylabel('Y Label') 54 | ax.set_title("Data in 2D (Non-separable)") 55 | 56 | #Here we will plot the transformed data 57 | ax = fig.add_subplot(122, projection='3d') 58 | ax.scatter(transformed[:, 0], transformed[:, 1],transformed[:, 2], marker='o', c=Y) 59 | ax.set_xlabel('X Label') 60 | ax.set_ylabel('Y Label') 61 | ax.set_zlabel('Z Label') 62 | ax.set_title("Data in 3D (separable)") 63 | 64 | #Finally show all the plots 65 | plt.show() 66 | 67 | #Function to create Train and Test set from the original data set 68 | def getTrainTestData(dataset,split): 69 | np.random.seed(0) 70 | training = [] 71 | testing = [] 72 | 73 | np.random.shuffle(dataset) 74 | shape = np.shape(dataset) 75 | trainlength = np.uint16(np.floor(split*shape[0])) 76 | 77 | for i in range(trainlength): 78 | training.append(dataset[i]) 79 | 80 | for i in range(trainlength,shape[0]): 81 | testing.append(dataset[i]) 82 | training = np.array(training) 83 | testing = np.array(testing) 84 | return training,testing 85 | 86 | #Function to evaluate model performance 87 | def getAccuracy(pre,ytest): 88 | count = 0 89 | for i in range(len(ytest)): 90 | if ytest[i]==pre[i]: 91 | count+=1 92 | 93 | acc = float(count)/len(ytest) 94 | return acc 95 | 96 | #Let's merge input and output variable to create train and test data 97 | dataset = np.c_[X,Y] 98 | 99 | #We will use our train and test split function 100 | train,test = getTrainTestData(dataset, 0.7) 101 | 102 | #Extract training input and output 103 | x_train = train[:,0:2] 104 | y_train = train[:,2] 105 | 106 | #Extract testing input and output 107 | x_test = test[:,0:2] 108 | y_test = test[:,2] 109 | 110 | #First we will train our classifier with linear kernel 111 | clf = SVC(kernel='linear') 112 | clf.fit(x_train,y_train) 113 | 114 | #Predict the output on test set 115 | pred = clf.predict(x_test) 116 | acc = getAccuracy(pred, y_test) 117 | print("Accuracy of the classifier with linear kernel is %.2f"%(100*acc)) 118 | 119 | #Now we will train our classifier with RBF kernel 120 | clf = SVC(kernel='rbf',C=3.0) 121 | clf.fit(x_train,y_train) 122 | 123 | #Predict the output on test set 124 | pred = clf.predict(x_test) 125 | acc = getAccuracy(pred, y_test) 126 | print("Accuracy of the classifier with rbf kernel is %.2f"%(100*acc)) 127 | 128 | 129 | -------------------------------------------------------------------------------- /Chapter09/SVM_Test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 03-Nov-2017 3 | 4 | @author: DX 5 | ''' 6 | #To help us perform math operations 7 | import numpy as np 8 | #to plot our data and model visually 9 | from matplotlib import pyplot as plt 10 | 11 | #Step 1 - Define our data 12 | 13 | #Input data - Of the form [X value, Y value, Bias term] 14 | X = np.array([ 15 | [-2,4,-1], 16 | [4,1,-1], 17 | [1, 6, -1], 18 | [2, 4, -1], 19 | [6, 2, -1], 20 | ]) 21 | 22 | #Associated output labels - First 2 examples are labeled '-1' and last 3 are labeled '+1' 23 | y = np.array([-1,-1,1,1,1]) 24 | 25 | #lets plot these examples on a 2D graph! 26 | #for each example 27 | for d, sample in enumerate(X): 28 | # Plot the negative samples (the first 2) 29 | if d < 2: 30 | plt.scatter(sample[0], sample[1], s=120, marker='_', linewidths=2) 31 | # Plot the positive samples (the last 3) 32 | else: 33 | plt.scatter(sample[0], sample[1], s=120, marker='+', linewidths=2) 34 | 35 | # Print a possible hyperplane, that is seperating the two classes. 36 | #we'll two points and draw the line between them (naive guess) 37 | plt.plot([-2,6],[6,0.5]) 38 | plt.show() 39 | 40 | #lets perform stochastic gradient descent to learn the seperating hyperplane between both classes 41 | 42 | def svm_sgd_plot(X, Y): 43 | #Initialize our SVMs weight vector with zeros (3 values) 44 | w = np.zeros(len(X[0])) 45 | #The learning rate 46 | eta = 1 47 | #how many iterations to train for 48 | epochs = 100000 49 | #store misclassifications so we can plot how they change over time 50 | errors = [] 51 | 52 | #training part, gradient descent part 53 | for epoch in range(1,epochs): 54 | error = 0 55 | for i, x in enumerate(X): 56 | #misclassification 57 | if (Y[i]*np.dot(X[i], w)) < 1: 58 | #misclassified update for ours weights 59 | w = w + eta * ( (X[i] * Y[i]) + (-2 *(1/epoch)* w) ) 60 | error = 1 61 | else: 62 | #correct classification, update our weights 63 | w = w + eta * (-2 *(1/epoch)* w) 64 | errors.append(error) 65 | 66 | 67 | #lets plot the rate of classification errors during training for our SVM 68 | plt.plot(errors, '|') 69 | plt.ylim(0.5,1.5) 70 | plt.axes().set_yticklabels([]) 71 | plt.xlabel('Epoch') 72 | plt.ylabel('Misclassified') 73 | plt.show() 74 | 75 | return w 76 | 77 | w = svm_sgd_plot(X,y) 78 | for d, sample in enumerate(X): 79 | # Plot the negative samples 80 | if d < 2: 81 | plt.scatter(sample[0], sample[1], s=120, marker='_', linewidths=2) 82 | # Plot the positive samples 83 | else: 84 | plt.scatter(sample[0], sample[1], s=120, marker='+', linewidths=2) 85 | 86 | # Add our test samples 87 | plt.scatter(2,2, s=120, marker='_', linewidths=2, color='yellow') 88 | plt.scatter(4,3, s=120, marker='+', linewidths=2, color='blue') 89 | 90 | # Print the hyperplane calculated by svm_sgd() 91 | x2=[w[0],w[1],-w[1],w[0]] 92 | x3=[w[0],w[1],w[1],-w[0]] 93 | 94 | x2x3 =np.array([x2,x3]) 95 | X,Y,U,V = zip(*x2x3) 96 | ax = plt.gca() 97 | ax.quiver(X,Y,U,V,scale=1, color='blue') 98 | plt.show() -------------------------------------------------------------------------------- /Chapter09/UnivariateFeatureSelection.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 02-Nov-2017 3 | 4 | @author: aii32199 5 | ''' 6 | # Feature Extraction with Univariate Statistical Tests (Chi-squared for classification) 7 | 8 | #Import the required packages 9 | 10 | #Import pandas to read csv 11 | import pandas 12 | 13 | #Import numpy for array related operations 14 | import numpy 15 | 16 | #Import sklearn's feature selection algorithm 17 | from sklearn.feature_selection import SelectKBest 18 | 19 | #Import chi2 for performing chi square test 20 | from sklearn.feature_selection import chi2 21 | 22 | #URL for loading the data set 23 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data" 24 | 25 | #Define the attribute names 26 | names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 27 | 28 | #Create pandas data frame by loading the data from URL 29 | dataframe = pandas.read_csv(url, names=names) 30 | 31 | #Create array from data values 32 | array = dataframe.values 33 | 34 | #Split the data into input and target 35 | X = array[:,0:8] 36 | Y = array[:,8] 37 | 38 | #We will select the features using chi square 39 | test = SelectKBest(score_func=chi2, k=4) 40 | 41 | #Fit the function for ranking the features by score 42 | fit = test.fit(X, Y) 43 | 44 | #Summarize scores 45 | numpy.set_printoptions(precision=3) 46 | print(fit.scores_) 47 | 48 | #Apply the transformation on to data set 49 | features = fit.transform(X) 50 | 51 | #Summarize selected features 52 | print(features[0:5,:]) -------------------------------------------------------------------------------- /Chapter09/bcancer.csv: -------------------------------------------------------------------------------- 1 | 1000025,5,1,1,1,2,1,3,1,1,2 2 | 1002945,5,4,4,5,7,10,3,2,1,2 3 | 1015425,3,1,1,1,2,2,3,1,1,2 4 | 1016277,6,8,8,1,3,4,3,7,1,2 5 | 1017023,4,1,1,3,2,1,3,1,1,2 6 | 1017122,8,10,10,8,7,10,9,7,1,4 7 | 1018099,1,1,1,1,2,10,3,1,1,2 8 | 1018561,2,1,2,1,2,1,3,1,1,2 9 | 1033078,2,1,1,1,2,1,1,1,5,2 10 | 1033078,4,2,1,1,2,1,2,1,1,2 11 | 1035283,1,1,1,1,1,1,3,1,1,2 12 | 1036172,2,1,1,1,2,1,2,1,1,2 13 | 1041801,5,3,3,3,2,3,4,4,1,4 14 | 1043999,1,1,1,1,2,3,3,1,1,2 15 | 1044572,8,7,5,10,7,9,5,5,4,4 16 | 1047630,7,4,6,4,6,1,4,3,1,4 17 | 1048672,4,1,1,1,2,1,2,1,1,2 18 | 1049815,4,1,1,1,2,1,3,1,1,2 19 | 1050670,10,7,7,6,4,10,4,1,2,4 20 | 1050718,6,1,1,1,2,1,3,1,1,2 21 | 1054590,7,3,2,10,5,10,5,4,4,4 22 | 1054593,10,5,5,3,6,7,7,10,1,4 23 | 1056784,3,1,1,1,2,1,2,1,1,2 24 | 1057013,8,4,5,1,2,?,7,3,1,4 25 | 1059552,1,1,1,1,2,1,3,1,1,2 26 | 1065726,5,2,3,4,2,7,3,6,1,4 27 | 1066373,3,2,1,1,1,1,2,1,1,2 28 | 1066979,5,1,1,1,2,1,2,1,1,2 29 | 1067444,2,1,1,1,2,1,2,1,1,2 30 | 1070935,1,1,3,1,2,1,1,1,1,2 31 | 1070935,3,1,1,1,1,1,2,1,1,2 32 | 1071760,2,1,1,1,2,1,3,1,1,2 33 | 1072179,10,7,7,3,8,5,7,4,3,4 34 | 1074610,2,1,1,2,2,1,3,1,1,2 35 | 1075123,3,1,2,1,2,1,2,1,1,2 36 | 1079304,2,1,1,1,2,1,2,1,1,2 37 | 1080185,10,10,10,8,6,1,8,9,1,4 38 | 1081791,6,2,1,1,1,1,7,1,1,2 39 | 1084584,5,4,4,9,2,10,5,6,1,4 40 | 1091262,2,5,3,3,6,7,7,5,1,4 41 | 1096800,6,6,6,9,6,?,7,8,1,2 42 | 1099510,10,4,3,1,3,3,6,5,2,4 43 | 1100524,6,10,10,2,8,10,7,3,3,4 44 | 1102573,5,6,5,6,10,1,3,1,1,4 45 | 1103608,10,10,10,4,8,1,8,10,1,4 46 | 1103722,1,1,1,1,2,1,2,1,2,2 47 | 1105257,3,7,7,4,4,9,4,8,1,4 48 | 1105524,1,1,1,1,2,1,2,1,1,2 49 | 1106095,4,1,1,3,2,1,3,1,1,2 50 | 1106829,7,8,7,2,4,8,3,8,2,4 51 | 1108370,9,5,8,1,2,3,2,1,5,4 52 | 1108449,5,3,3,4,2,4,3,4,1,4 53 | 1110102,10,3,6,2,3,5,4,10,2,4 54 | 1110503,5,5,5,8,10,8,7,3,7,4 55 | 1110524,10,5,5,6,8,8,7,1,1,4 56 | 1111249,10,6,6,3,4,5,3,6,1,4 57 | 1112209,8,10,10,1,3,6,3,9,1,4 58 | 1113038,8,2,4,1,5,1,5,4,4,4 59 | 1113483,5,2,3,1,6,10,5,1,1,4 60 | 1113906,9,5,5,2,2,2,5,1,1,4 61 | 1115282,5,3,5,5,3,3,4,10,1,4 62 | 1115293,1,1,1,1,2,2,2,1,1,2 63 | 1116116,9,10,10,1,10,8,3,3,1,4 64 | 1116132,6,3,4,1,5,2,3,9,1,4 65 | 1116192,1,1,1,1,2,1,2,1,1,2 66 | 1116998,10,4,2,1,3,2,4,3,10,4 67 | 1117152,4,1,1,1,2,1,3,1,1,2 68 | 1118039,5,3,4,1,8,10,4,9,1,4 69 | 1120559,8,3,8,3,4,9,8,9,8,4 70 | 1121732,1,1,1,1,2,1,3,2,1,2 71 | 1121919,5,1,3,1,2,1,2,1,1,2 72 | 1123061,6,10,2,8,10,2,7,8,10,4 73 | 1124651,1,3,3,2,2,1,7,2,1,2 74 | 1125035,9,4,5,10,6,10,4,8,1,4 75 | 1126417,10,6,4,1,3,4,3,2,3,4 76 | 1131294,1,1,2,1,2,2,4,2,1,2 77 | 1132347,1,1,4,1,2,1,2,1,1,2 78 | 1133041,5,3,1,2,2,1,2,1,1,2 79 | 1133136,3,1,1,1,2,3,3,1,1,2 80 | 1136142,2,1,1,1,3,1,2,1,1,2 81 | 1137156,2,2,2,1,1,1,7,1,1,2 82 | 1143978,4,1,1,2,2,1,2,1,1,2 83 | 1143978,5,2,1,1,2,1,3,1,1,2 84 | 1147044,3,1,1,1,2,2,7,1,1,2 85 | 1147699,3,5,7,8,8,9,7,10,7,4 86 | 1147748,5,10,6,1,10,4,4,10,10,4 87 | 1148278,3,3,6,4,5,8,4,4,1,4 88 | 1148873,3,6,6,6,5,10,6,8,3,4 89 | 1152331,4,1,1,1,2,1,3,1,1,2 90 | 1155546,2,1,1,2,3,1,2,1,1,2 91 | 1156272,1,1,1,1,2,1,3,1,1,2 92 | 1156948,3,1,1,2,2,1,1,1,1,2 93 | 1157734,4,1,1,1,2,1,3,1,1,2 94 | 1158247,1,1,1,1,2,1,2,1,1,2 95 | 1160476,2,1,1,1,2,1,3,1,1,2 96 | 1164066,1,1,1,1,2,1,3,1,1,2 97 | 1165297,2,1,1,2,2,1,1,1,1,2 98 | 1165790,5,1,1,1,2,1,3,1,1,2 99 | 1165926,9,6,9,2,10,6,2,9,10,4 100 | 1166630,7,5,6,10,5,10,7,9,4,4 101 | 1166654,10,3,5,1,10,5,3,10,2,4 102 | 1167439,2,3,4,4,2,5,2,5,1,4 103 | 1167471,4,1,2,1,2,1,3,1,1,2 104 | 1168359,8,2,3,1,6,3,7,1,1,4 105 | 1168736,10,10,10,10,10,1,8,8,8,4 106 | 1169049,7,3,4,4,3,3,3,2,7,4 107 | 1170419,10,10,10,8,2,10,4,1,1,4 108 | 1170420,1,6,8,10,8,10,5,7,1,4 109 | 1171710,1,1,1,1,2,1,2,3,1,2 110 | 1171710,6,5,4,4,3,9,7,8,3,4 111 | 1171795,1,3,1,2,2,2,5,3,2,2 112 | 1171845,8,6,4,3,5,9,3,1,1,4 113 | 1172152,10,3,3,10,2,10,7,3,3,4 114 | 1173216,10,10,10,3,10,8,8,1,1,4 115 | 1173235,3,3,2,1,2,3,3,1,1,2 116 | 1173347,1,1,1,1,2,5,1,1,1,2 117 | 1173347,8,3,3,1,2,2,3,2,1,2 118 | 1173509,4,5,5,10,4,10,7,5,8,4 119 | 1173514,1,1,1,1,4,3,1,1,1,2 120 | 1173681,3,2,1,1,2,2,3,1,1,2 121 | 1174057,1,1,2,2,2,1,3,1,1,2 122 | 1174057,4,2,1,1,2,2,3,1,1,2 123 | 1174131,10,10,10,2,10,10,5,3,3,4 124 | 1174428,5,3,5,1,8,10,5,3,1,4 125 | 1175937,5,4,6,7,9,7,8,10,1,4 126 | 1176406,1,1,1,1,2,1,2,1,1,2 127 | 1176881,7,5,3,7,4,10,7,5,5,4 128 | 1177027,3,1,1,1,2,1,3,1,1,2 129 | 1177399,8,3,5,4,5,10,1,6,2,4 130 | 1177512,1,1,1,1,10,1,1,1,1,2 131 | 1178580,5,1,3,1,2,1,2,1,1,2 132 | 1179818,2,1,1,1,2,1,3,1,1,2 133 | 1180194,5,10,8,10,8,10,3,6,3,4 134 | 1180523,3,1,1,1,2,1,2,2,1,2 135 | 1180831,3,1,1,1,3,1,2,1,1,2 136 | 1181356,5,1,1,1,2,2,3,3,1,2 137 | 1182404,4,1,1,1,2,1,2,1,1,2 138 | 1182410,3,1,1,1,2,1,1,1,1,2 139 | 1183240,4,1,2,1,2,1,2,1,1,2 140 | 1183246,1,1,1,1,1,?,2,1,1,2 141 | 1183516,3,1,1,1,2,1,1,1,1,2 142 | 1183911,2,1,1,1,2,1,1,1,1,2 143 | 1183983,9,5,5,4,4,5,4,3,3,4 144 | 1184184,1,1,1,1,2,5,1,1,1,2 145 | 1184241,2,1,1,1,2,1,2,1,1,2 146 | 1184840,1,1,3,1,2,?,2,1,1,2 147 | 1185609,3,4,5,2,6,8,4,1,1,4 148 | 1185610,1,1,1,1,3,2,2,1,1,2 149 | 1187457,3,1,1,3,8,1,5,8,1,2 150 | 1187805,8,8,7,4,10,10,7,8,7,4 151 | 1188472,1,1,1,1,1,1,3,1,1,2 152 | 1189266,7,2,4,1,6,10,5,4,3,4 153 | 1189286,10,10,8,6,4,5,8,10,1,4 154 | 1190394,4,1,1,1,2,3,1,1,1,2 155 | 1190485,1,1,1,1,2,1,1,1,1,2 156 | 1192325,5,5,5,6,3,10,3,1,1,4 157 | 1193091,1,2,2,1,2,1,2,1,1,2 158 | 1193210,2,1,1,1,2,1,3,1,1,2 159 | 1193683,1,1,2,1,3,?,1,1,1,2 160 | 1196295,9,9,10,3,6,10,7,10,6,4 161 | 1196915,10,7,7,4,5,10,5,7,2,4 162 | 1197080,4,1,1,1,2,1,3,2,1,2 163 | 1197270,3,1,1,1,2,1,3,1,1,2 164 | 1197440,1,1,1,2,1,3,1,1,7,2 165 | 1197510,5,1,1,1,2,?,3,1,1,2 166 | 1197979,4,1,1,1,2,2,3,2,1,2 167 | 1197993,5,6,7,8,8,10,3,10,3,4 168 | 1198128,10,8,10,10,6,1,3,1,10,4 169 | 1198641,3,1,1,1,2,1,3,1,1,2 170 | 1199219,1,1,1,2,1,1,1,1,1,2 171 | 1199731,3,1,1,1,2,1,1,1,1,2 172 | 1199983,1,1,1,1,2,1,3,1,1,2 173 | 1200772,1,1,1,1,2,1,2,1,1,2 174 | 1200847,6,10,10,10,8,10,10,10,7,4 175 | 1200892,8,6,5,4,3,10,6,1,1,4 176 | 1200952,5,8,7,7,10,10,5,7,1,4 177 | 1201834,2,1,1,1,2,1,3,1,1,2 178 | 1201936,5,10,10,3,8,1,5,10,3,4 179 | 1202125,4,1,1,1,2,1,3,1,1,2 180 | 1202812,5,3,3,3,6,10,3,1,1,4 181 | 1203096,1,1,1,1,1,1,3,1,1,2 182 | 1204242,1,1,1,1,2,1,1,1,1,2 183 | 1204898,6,1,1,1,2,1,3,1,1,2 184 | 1205138,5,8,8,8,5,10,7,8,1,4 185 | 1205579,8,7,6,4,4,10,5,1,1,4 186 | 1206089,2,1,1,1,1,1,3,1,1,2 187 | 1206695,1,5,8,6,5,8,7,10,1,4 188 | 1206841,10,5,6,10,6,10,7,7,10,4 189 | 1207986,5,8,4,10,5,8,9,10,1,4 190 | 1208301,1,2,3,1,2,1,3,1,1,2 191 | 1210963,10,10,10,8,6,8,7,10,1,4 192 | 1211202,7,5,10,10,10,10,4,10,3,4 193 | 1212232,5,1,1,1,2,1,2,1,1,2 194 | 1212251,1,1,1,1,2,1,3,1,1,2 195 | 1212422,3,1,1,1,2,1,3,1,1,2 196 | 1212422,4,1,1,1,2,1,3,1,1,2 197 | 1213375,8,4,4,5,4,7,7,8,2,2 198 | 1213383,5,1,1,4,2,1,3,1,1,2 199 | 1214092,1,1,1,1,2,1,1,1,1,2 200 | 1214556,3,1,1,1,2,1,2,1,1,2 201 | 1214966,9,7,7,5,5,10,7,8,3,4 202 | 1216694,10,8,8,4,10,10,8,1,1,4 203 | 1216947,1,1,1,1,2,1,3,1,1,2 204 | 1217051,5,1,1,1,2,1,3,1,1,2 205 | 1217264,1,1,1,1,2,1,3,1,1,2 206 | 1218105,5,10,10,9,6,10,7,10,5,4 207 | 1218741,10,10,9,3,7,5,3,5,1,4 208 | 1218860,1,1,1,1,1,1,3,1,1,2 209 | 1218860,1,1,1,1,1,1,3,1,1,2 210 | 1219406,5,1,1,1,1,1,3,1,1,2 211 | 1219525,8,10,10,10,5,10,8,10,6,4 212 | 1219859,8,10,8,8,4,8,7,7,1,4 213 | 1220330,1,1,1,1,2,1,3,1,1,2 214 | 1221863,10,10,10,10,7,10,7,10,4,4 215 | 1222047,10,10,10,10,3,10,10,6,1,4 216 | 1222936,8,7,8,7,5,5,5,10,2,4 217 | 1223282,1,1,1,1,2,1,2,1,1,2 218 | 1223426,1,1,1,1,2,1,3,1,1,2 219 | 1223793,6,10,7,7,6,4,8,10,2,4 220 | 1223967,6,1,3,1,2,1,3,1,1,2 221 | 1224329,1,1,1,2,2,1,3,1,1,2 222 | 1225799,10,6,4,3,10,10,9,10,1,4 223 | 1226012,4,1,1,3,1,5,2,1,1,4 224 | 1226612,7,5,6,3,3,8,7,4,1,4 225 | 1227210,10,5,5,6,3,10,7,9,2,4 226 | 1227244,1,1,1,1,2,1,2,1,1,2 227 | 1227481,10,5,7,4,4,10,8,9,1,4 228 | 1228152,8,9,9,5,3,5,7,7,1,4 229 | 1228311,1,1,1,1,1,1,3,1,1,2 230 | 1230175,10,10,10,3,10,10,9,10,1,4 231 | 1230688,7,4,7,4,3,7,7,6,1,4 232 | 1231387,6,8,7,5,6,8,8,9,2,4 233 | 1231706,8,4,6,3,3,1,4,3,1,2 234 | 1232225,10,4,5,5,5,10,4,1,1,4 235 | 1236043,3,3,2,1,3,1,3,6,1,2 236 | 1241232,3,1,4,1,2,?,3,1,1,2 237 | 1241559,10,8,8,2,8,10,4,8,10,4 238 | 1241679,9,8,8,5,6,2,4,10,4,4 239 | 1242364,8,10,10,8,6,9,3,10,10,4 240 | 1243256,10,4,3,2,3,10,5,3,2,4 241 | 1270479,5,1,3,3,2,2,2,3,1,2 242 | 1276091,3,1,1,3,1,1,3,1,1,2 243 | 1277018,2,1,1,1,2,1,3,1,1,2 244 | 128059,1,1,1,1,2,5,5,1,1,2 245 | 1285531,1,1,1,1,2,1,3,1,1,2 246 | 1287775,5,1,1,2,2,2,3,1,1,2 247 | 144888,8,10,10,8,5,10,7,8,1,4 248 | 145447,8,4,4,1,2,9,3,3,1,4 249 | 167528,4,1,1,1,2,1,3,6,1,2 250 | 169356,3,1,1,1,2,?,3,1,1,2 251 | 183913,1,2,2,1,2,1,1,1,1,2 252 | 191250,10,4,4,10,2,10,5,3,3,4 253 | 1017023,6,3,3,5,3,10,3,5,3,2 254 | 1100524,6,10,10,2,8,10,7,3,3,4 255 | 1116116,9,10,10,1,10,8,3,3,1,4 256 | 1168736,5,6,6,2,4,10,3,6,1,4 257 | 1182404,3,1,1,1,2,1,1,1,1,2 258 | 1182404,3,1,1,1,2,1,2,1,1,2 259 | 1198641,3,1,1,1,2,1,3,1,1,2 260 | 242970,5,7,7,1,5,8,3,4,1,2 261 | 255644,10,5,8,10,3,10,5,1,3,4 262 | 263538,5,10,10,6,10,10,10,6,5,4 263 | 274137,8,8,9,4,5,10,7,8,1,4 264 | 303213,10,4,4,10,6,10,5,5,1,4 265 | 314428,7,9,4,10,10,3,5,3,3,4 266 | 1182404,5,1,4,1,2,1,3,2,1,2 267 | 1198641,10,10,6,3,3,10,4,3,2,4 268 | 320675,3,3,5,2,3,10,7,1,1,4 269 | 324427,10,8,8,2,3,4,8,7,8,4 270 | 385103,1,1,1,1,2,1,3,1,1,2 271 | 390840,8,4,7,1,3,10,3,9,2,4 272 | 411453,5,1,1,1,2,1,3,1,1,2 273 | 320675,3,3,5,2,3,10,7,1,1,4 274 | 428903,7,2,4,1,3,4,3,3,1,4 275 | 431495,3,1,1,1,2,1,3,2,1,2 276 | 432809,3,1,3,1,2,?,2,1,1,2 277 | 434518,3,1,1,1,2,1,2,1,1,2 278 | 452264,1,1,1,1,2,1,2,1,1,2 279 | 456282,1,1,1,1,2,1,3,1,1,2 280 | 476903,10,5,7,3,3,7,3,3,8,4 281 | 486283,3,1,1,1,2,1,3,1,1,2 282 | 486662,2,1,1,2,2,1,3,1,1,2 283 | 488173,1,4,3,10,4,10,5,6,1,4 284 | 492268,10,4,6,1,2,10,5,3,1,4 285 | 508234,7,4,5,10,2,10,3,8,2,4 286 | 527363,8,10,10,10,8,10,10,7,3,4 287 | 529329,10,10,10,10,10,10,4,10,10,4 288 | 535331,3,1,1,1,3,1,2,1,1,2 289 | 543558,6,1,3,1,4,5,5,10,1,4 290 | 555977,5,6,6,8,6,10,4,10,4,4 291 | 560680,1,1,1,1,2,1,1,1,1,2 292 | 561477,1,1,1,1,2,1,3,1,1,2 293 | 563649,8,8,8,1,2,?,6,10,1,4 294 | 601265,10,4,4,6,2,10,2,3,1,4 295 | 606140,1,1,1,1,2,?,2,1,1,2 296 | 606722,5,5,7,8,6,10,7,4,1,4 297 | 616240,5,3,4,3,4,5,4,7,1,2 298 | 61634,5,4,3,1,2,?,2,3,1,2 299 | 625201,8,2,1,1,5,1,1,1,1,2 300 | 63375,9,1,2,6,4,10,7,7,2,4 301 | 635844,8,4,10,5,4,4,7,10,1,4 302 | 636130,1,1,1,1,2,1,3,1,1,2 303 | 640744,10,10,10,7,9,10,7,10,10,4 304 | 646904,1,1,1,1,2,1,3,1,1,2 305 | 653777,8,3,4,9,3,10,3,3,1,4 306 | 659642,10,8,4,4,4,10,3,10,4,4 307 | 666090,1,1,1,1,2,1,3,1,1,2 308 | 666942,1,1,1,1,2,1,3,1,1,2 309 | 667204,7,8,7,6,4,3,8,8,4,4 310 | 673637,3,1,1,1,2,5,5,1,1,2 311 | 684955,2,1,1,1,3,1,2,1,1,2 312 | 688033,1,1,1,1,2,1,1,1,1,2 313 | 691628,8,6,4,10,10,1,3,5,1,4 314 | 693702,1,1,1,1,2,1,1,1,1,2 315 | 704097,1,1,1,1,1,1,2,1,1,2 316 | 704168,4,6,5,6,7,?,4,9,1,2 317 | 706426,5,5,5,2,5,10,4,3,1,4 318 | 709287,6,8,7,8,6,8,8,9,1,4 319 | 718641,1,1,1,1,5,1,3,1,1,2 320 | 721482,4,4,4,4,6,5,7,3,1,2 321 | 730881,7,6,3,2,5,10,7,4,6,4 322 | 733639,3,1,1,1,2,?,3,1,1,2 323 | 733639,3,1,1,1,2,1,3,1,1,2 324 | 733823,5,4,6,10,2,10,4,1,1,4 325 | 740492,1,1,1,1,2,1,3,1,1,2 326 | 743348,3,2,2,1,2,1,2,3,1,2 327 | 752904,10,1,1,1,2,10,5,4,1,4 328 | 756136,1,1,1,1,2,1,2,1,1,2 329 | 760001,8,10,3,2,6,4,3,10,1,4 330 | 760239,10,4,6,4,5,10,7,1,1,4 331 | 76389,10,4,7,2,2,8,6,1,1,4 332 | 764974,5,1,1,1,2,1,3,1,2,2 333 | 770066,5,2,2,2,2,1,2,2,1,2 334 | 785208,5,4,6,6,4,10,4,3,1,4 335 | 785615,8,6,7,3,3,10,3,4,2,4 336 | 792744,1,1,1,1,2,1,1,1,1,2 337 | 797327,6,5,5,8,4,10,3,4,1,4 338 | 798429,1,1,1,1,2,1,3,1,1,2 339 | 704097,1,1,1,1,1,1,2,1,1,2 340 | 806423,8,5,5,5,2,10,4,3,1,4 341 | 809912,10,3,3,1,2,10,7,6,1,4 342 | 810104,1,1,1,1,2,1,3,1,1,2 343 | 814265,2,1,1,1,2,1,1,1,1,2 344 | 814911,1,1,1,1,2,1,1,1,1,2 345 | 822829,7,6,4,8,10,10,9,5,3,4 346 | 826923,1,1,1,1,2,1,1,1,1,2 347 | 830690,5,2,2,2,3,1,1,3,1,2 348 | 831268,1,1,1,1,1,1,1,3,1,2 349 | 832226,3,4,4,10,5,1,3,3,1,4 350 | 832567,4,2,3,5,3,8,7,6,1,4 351 | 836433,5,1,1,3,2,1,1,1,1,2 352 | 837082,2,1,1,1,2,1,3,1,1,2 353 | 846832,3,4,5,3,7,3,4,6,1,2 354 | 850831,2,7,10,10,7,10,4,9,4,4 355 | 855524,1,1,1,1,2,1,2,1,1,2 356 | 857774,4,1,1,1,3,1,2,2,1,2 357 | 859164,5,3,3,1,3,3,3,3,3,4 358 | 859350,8,10,10,7,10,10,7,3,8,4 359 | 866325,8,10,5,3,8,4,4,10,3,4 360 | 873549,10,3,5,4,3,7,3,5,3,4 361 | 877291,6,10,10,10,10,10,8,10,10,4 362 | 877943,3,10,3,10,6,10,5,1,4,4 363 | 888169,3,2,2,1,4,3,2,1,1,2 364 | 888523,4,4,4,2,2,3,2,1,1,2 365 | 896404,2,1,1,1,2,1,3,1,1,2 366 | 897172,2,1,1,1,2,1,2,1,1,2 367 | 95719,6,10,10,10,8,10,7,10,7,4 368 | 160296,5,8,8,10,5,10,8,10,3,4 369 | 342245,1,1,3,1,2,1,1,1,1,2 370 | 428598,1,1,3,1,1,1,2,1,1,2 371 | 492561,4,3,2,1,3,1,2,1,1,2 372 | 493452,1,1,3,1,2,1,1,1,1,2 373 | 493452,4,1,2,1,2,1,2,1,1,2 374 | 521441,5,1,1,2,2,1,2,1,1,2 375 | 560680,3,1,2,1,2,1,2,1,1,2 376 | 636437,1,1,1,1,2,1,1,1,1,2 377 | 640712,1,1,1,1,2,1,2,1,1,2 378 | 654244,1,1,1,1,1,1,2,1,1,2 379 | 657753,3,1,1,4,3,1,2,2,1,2 380 | 685977,5,3,4,1,4,1,3,1,1,2 381 | 805448,1,1,1,1,2,1,1,1,1,2 382 | 846423,10,6,3,6,4,10,7,8,4,4 383 | 1002504,3,2,2,2,2,1,3,2,1,2 384 | 1022257,2,1,1,1,2,1,1,1,1,2 385 | 1026122,2,1,1,1,2,1,1,1,1,2 386 | 1071084,3,3,2,2,3,1,1,2,3,2 387 | 1080233,7,6,6,3,2,10,7,1,1,4 388 | 1114570,5,3,3,2,3,1,3,1,1,2 389 | 1114570,2,1,1,1,2,1,2,2,1,2 390 | 1116715,5,1,1,1,3,2,2,2,1,2 391 | 1131411,1,1,1,2,2,1,2,1,1,2 392 | 1151734,10,8,7,4,3,10,7,9,1,4 393 | 1156017,3,1,1,1,2,1,2,1,1,2 394 | 1158247,1,1,1,1,1,1,1,1,1,2 395 | 1158405,1,2,3,1,2,1,2,1,1,2 396 | 1168278,3,1,1,1,2,1,2,1,1,2 397 | 1176187,3,1,1,1,2,1,3,1,1,2 398 | 1196263,4,1,1,1,2,1,1,1,1,2 399 | 1196475,3,2,1,1,2,1,2,2,1,2 400 | 1206314,1,2,3,1,2,1,1,1,1,2 401 | 1211265,3,10,8,7,6,9,9,3,8,4 402 | 1213784,3,1,1,1,2,1,1,1,1,2 403 | 1223003,5,3,3,1,2,1,2,1,1,2 404 | 1223306,3,1,1,1,2,4,1,1,1,2 405 | 1223543,1,2,1,3,2,1,1,2,1,2 406 | 1229929,1,1,1,1,2,1,2,1,1,2 407 | 1231853,4,2,2,1,2,1,2,1,1,2 408 | 1234554,1,1,1,1,2,1,2,1,1,2 409 | 1236837,2,3,2,2,2,2,3,1,1,2 410 | 1237674,3,1,2,1,2,1,2,1,1,2 411 | 1238021,1,1,1,1,2,1,2,1,1,2 412 | 1238464,1,1,1,1,1,?,2,1,1,2 413 | 1238633,10,10,10,6,8,4,8,5,1,4 414 | 1238915,5,1,2,1,2,1,3,1,1,2 415 | 1238948,8,5,6,2,3,10,6,6,1,4 416 | 1239232,3,3,2,6,3,3,3,5,1,2 417 | 1239347,8,7,8,5,10,10,7,2,1,4 418 | 1239967,1,1,1,1,2,1,2,1,1,2 419 | 1240337,5,2,2,2,2,2,3,2,2,2 420 | 1253505,2,3,1,1,5,1,1,1,1,2 421 | 1255384,3,2,2,3,2,3,3,1,1,2 422 | 1257200,10,10,10,7,10,10,8,2,1,4 423 | 1257648,4,3,3,1,2,1,3,3,1,2 424 | 1257815,5,1,3,1,2,1,2,1,1,2 425 | 1257938,3,1,1,1,2,1,1,1,1,2 426 | 1258549,9,10,10,10,10,10,10,10,1,4 427 | 1258556,5,3,6,1,2,1,1,1,1,2 428 | 1266154,8,7,8,2,4,2,5,10,1,4 429 | 1272039,1,1,1,1,2,1,2,1,1,2 430 | 1276091,2,1,1,1,2,1,2,1,1,2 431 | 1276091,1,3,1,1,2,1,2,2,1,2 432 | 1276091,5,1,1,3,4,1,3,2,1,2 433 | 1277629,5,1,1,1,2,1,2,2,1,2 434 | 1293439,3,2,2,3,2,1,1,1,1,2 435 | 1293439,6,9,7,5,5,8,4,2,1,2 436 | 1294562,10,8,10,1,3,10,5,1,1,4 437 | 1295186,10,10,10,1,6,1,2,8,1,4 438 | 527337,4,1,1,1,2,1,1,1,1,2 439 | 558538,4,1,3,3,2,1,1,1,1,2 440 | 566509,5,1,1,1,2,1,1,1,1,2 441 | 608157,10,4,3,10,4,10,10,1,1,4 442 | 677910,5,2,2,4,2,4,1,1,1,2 443 | 734111,1,1,1,3,2,3,1,1,1,2 444 | 734111,1,1,1,1,2,2,1,1,1,2 445 | 780555,5,1,1,6,3,1,2,1,1,2 446 | 827627,2,1,1,1,2,1,1,1,1,2 447 | 1049837,1,1,1,1,2,1,1,1,1,2 448 | 1058849,5,1,1,1,2,1,1,1,1,2 449 | 1182404,1,1,1,1,1,1,1,1,1,2 450 | 1193544,5,7,9,8,6,10,8,10,1,4 451 | 1201870,4,1,1,3,1,1,2,1,1,2 452 | 1202253,5,1,1,1,2,1,1,1,1,2 453 | 1227081,3,1,1,3,2,1,1,1,1,2 454 | 1230994,4,5,5,8,6,10,10,7,1,4 455 | 1238410,2,3,1,1,3,1,1,1,1,2 456 | 1246562,10,2,2,1,2,6,1,1,2,4 457 | 1257470,10,6,5,8,5,10,8,6,1,4 458 | 1259008,8,8,9,6,6,3,10,10,1,4 459 | 1266124,5,1,2,1,2,1,1,1,1,2 460 | 1267898,5,1,3,1,2,1,1,1,1,2 461 | 1268313,5,1,1,3,2,1,1,1,1,2 462 | 1268804,3,1,1,1,2,5,1,1,1,2 463 | 1276091,6,1,1,3,2,1,1,1,1,2 464 | 1280258,4,1,1,1,2,1,1,2,1,2 465 | 1293966,4,1,1,1,2,1,1,1,1,2 466 | 1296572,10,9,8,7,6,4,7,10,3,4 467 | 1298416,10,6,6,2,4,10,9,7,1,4 468 | 1299596,6,6,6,5,4,10,7,6,2,4 469 | 1105524,4,1,1,1,2,1,1,1,1,2 470 | 1181685,1,1,2,1,2,1,2,1,1,2 471 | 1211594,3,1,1,1,1,1,2,1,1,2 472 | 1238777,6,1,1,3,2,1,1,1,1,2 473 | 1257608,6,1,1,1,1,1,1,1,1,2 474 | 1269574,4,1,1,1,2,1,1,1,1,2 475 | 1277145,5,1,1,1,2,1,1,1,1,2 476 | 1287282,3,1,1,1,2,1,1,1,1,2 477 | 1296025,4,1,2,1,2,1,1,1,1,2 478 | 1296263,4,1,1,1,2,1,1,1,1,2 479 | 1296593,5,2,1,1,2,1,1,1,1,2 480 | 1299161,4,8,7,10,4,10,7,5,1,4 481 | 1301945,5,1,1,1,1,1,1,1,1,2 482 | 1302428,5,3,2,4,2,1,1,1,1,2 483 | 1318169,9,10,10,10,10,5,10,10,10,4 484 | 474162,8,7,8,5,5,10,9,10,1,4 485 | 787451,5,1,2,1,2,1,1,1,1,2 486 | 1002025,1,1,1,3,1,3,1,1,1,2 487 | 1070522,3,1,1,1,1,1,2,1,1,2 488 | 1073960,10,10,10,10,6,10,8,1,5,4 489 | 1076352,3,6,4,10,3,3,3,4,1,4 490 | 1084139,6,3,2,1,3,4,4,1,1,4 491 | 1115293,1,1,1,1,2,1,1,1,1,2 492 | 1119189,5,8,9,4,3,10,7,1,1,4 493 | 1133991,4,1,1,1,1,1,2,1,1,2 494 | 1142706,5,10,10,10,6,10,6,5,2,4 495 | 1155967,5,1,2,10,4,5,2,1,1,2 496 | 1170945,3,1,1,1,1,1,2,1,1,2 497 | 1181567,1,1,1,1,1,1,1,1,1,2 498 | 1182404,4,2,1,1,2,1,1,1,1,2 499 | 1204558,4,1,1,1,2,1,2,1,1,2 500 | 1217952,4,1,1,1,2,1,2,1,1,2 501 | 1224565,6,1,1,1,2,1,3,1,1,2 502 | 1238186,4,1,1,1,2,1,2,1,1,2 503 | 1253917,4,1,1,2,2,1,2,1,1,2 504 | 1265899,4,1,1,1,2,1,3,1,1,2 505 | 1268766,1,1,1,1,2,1,1,1,1,2 506 | 1277268,3,3,1,1,2,1,1,1,1,2 507 | 1286943,8,10,10,10,7,5,4,8,7,4 508 | 1295508,1,1,1,1,2,4,1,1,1,2 509 | 1297327,5,1,1,1,2,1,1,1,1,2 510 | 1297522,2,1,1,1,2,1,1,1,1,2 511 | 1298360,1,1,1,1,2,1,1,1,1,2 512 | 1299924,5,1,1,1,2,1,2,1,1,2 513 | 1299994,5,1,1,1,2,1,1,1,1,2 514 | 1304595,3,1,1,1,1,1,2,1,1,2 515 | 1306282,6,6,7,10,3,10,8,10,2,4 516 | 1313325,4,10,4,7,3,10,9,10,1,4 517 | 1320077,1,1,1,1,1,1,1,1,1,2 518 | 1320077,1,1,1,1,1,1,2,1,1,2 519 | 1320304,3,1,2,2,2,1,1,1,1,2 520 | 1330439,4,7,8,3,4,10,9,1,1,4 521 | 333093,1,1,1,1,3,1,1,1,1,2 522 | 369565,4,1,1,1,3,1,1,1,1,2 523 | 412300,10,4,5,4,3,5,7,3,1,4 524 | 672113,7,5,6,10,4,10,5,3,1,4 525 | 749653,3,1,1,1,2,1,2,1,1,2 526 | 769612,3,1,1,2,2,1,1,1,1,2 527 | 769612,4,1,1,1,2,1,1,1,1,2 528 | 798429,4,1,1,1,2,1,3,1,1,2 529 | 807657,6,1,3,2,2,1,1,1,1,2 530 | 8233704,4,1,1,1,1,1,2,1,1,2 531 | 837480,7,4,4,3,4,10,6,9,1,4 532 | 867392,4,2,2,1,2,1,2,1,1,2 533 | 869828,1,1,1,1,1,1,3,1,1,2 534 | 1043068,3,1,1,1,2,1,2,1,1,2 535 | 1056171,2,1,1,1,2,1,2,1,1,2 536 | 1061990,1,1,3,2,2,1,3,1,1,2 537 | 1113061,5,1,1,1,2,1,3,1,1,2 538 | 1116192,5,1,2,1,2,1,3,1,1,2 539 | 1135090,4,1,1,1,2,1,2,1,1,2 540 | 1145420,6,1,1,1,2,1,2,1,1,2 541 | 1158157,5,1,1,1,2,2,2,1,1,2 542 | 1171578,3,1,1,1,2,1,1,1,1,2 543 | 1174841,5,3,1,1,2,1,1,1,1,2 544 | 1184586,4,1,1,1,2,1,2,1,1,2 545 | 1186936,2,1,3,2,2,1,2,1,1,2 546 | 1197527,5,1,1,1,2,1,2,1,1,2 547 | 1222464,6,10,10,10,4,10,7,10,1,4 548 | 1240603,2,1,1,1,1,1,1,1,1,2 549 | 1240603,3,1,1,1,1,1,1,1,1,2 550 | 1241035,7,8,3,7,4,5,7,8,2,4 551 | 1287971,3,1,1,1,2,1,2,1,1,2 552 | 1289391,1,1,1,1,2,1,3,1,1,2 553 | 1299924,3,2,2,2,2,1,4,2,1,2 554 | 1306339,4,4,2,1,2,5,2,1,2,2 555 | 1313658,3,1,1,1,2,1,1,1,1,2 556 | 1313982,4,3,1,1,2,1,4,8,1,2 557 | 1321264,5,2,2,2,1,1,2,1,1,2 558 | 1321321,5,1,1,3,2,1,1,1,1,2 559 | 1321348,2,1,1,1,2,1,2,1,1,2 560 | 1321931,5,1,1,1,2,1,2,1,1,2 561 | 1321942,5,1,1,1,2,1,3,1,1,2 562 | 1321942,5,1,1,1,2,1,3,1,1,2 563 | 1328331,1,1,1,1,2,1,3,1,1,2 564 | 1328755,3,1,1,1,2,1,2,1,1,2 565 | 1331405,4,1,1,1,2,1,3,2,1,2 566 | 1331412,5,7,10,10,5,10,10,10,1,4 567 | 1333104,3,1,2,1,2,1,3,1,1,2 568 | 1334071,4,1,1,1,2,3,2,1,1,2 569 | 1343068,8,4,4,1,6,10,2,5,2,4 570 | 1343374,10,10,8,10,6,5,10,3,1,4 571 | 1344121,8,10,4,4,8,10,8,2,1,4 572 | 142932,7,6,10,5,3,10,9,10,2,4 573 | 183936,3,1,1,1,2,1,2,1,1,2 574 | 324382,1,1,1,1,2,1,2,1,1,2 575 | 378275,10,9,7,3,4,2,7,7,1,4 576 | 385103,5,1,2,1,2,1,3,1,1,2 577 | 690557,5,1,1,1,2,1,2,1,1,2 578 | 695091,1,1,1,1,2,1,2,1,1,2 579 | 695219,1,1,1,1,2,1,2,1,1,2 580 | 824249,1,1,1,1,2,1,3,1,1,2 581 | 871549,5,1,2,1,2,1,2,1,1,2 582 | 878358,5,7,10,6,5,10,7,5,1,4 583 | 1107684,6,10,5,5,4,10,6,10,1,4 584 | 1115762,3,1,1,1,2,1,1,1,1,2 585 | 1217717,5,1,1,6,3,1,1,1,1,2 586 | 1239420,1,1,1,1,2,1,1,1,1,2 587 | 1254538,8,10,10,10,6,10,10,10,1,4 588 | 1261751,5,1,1,1,2,1,2,2,1,2 589 | 1268275,9,8,8,9,6,3,4,1,1,4 590 | 1272166,5,1,1,1,2,1,1,1,1,2 591 | 1294261,4,10,8,5,4,1,10,1,1,4 592 | 1295529,2,5,7,6,4,10,7,6,1,4 593 | 1298484,10,3,4,5,3,10,4,1,1,4 594 | 1311875,5,1,2,1,2,1,1,1,1,2 595 | 1315506,4,8,6,3,4,10,7,1,1,4 596 | 1320141,5,1,1,1,2,1,2,1,1,2 597 | 1325309,4,1,2,1,2,1,2,1,1,2 598 | 1333063,5,1,3,1,2,1,3,1,1,2 599 | 1333495,3,1,1,1,2,1,2,1,1,2 600 | 1334659,5,2,4,1,1,1,1,1,1,2 601 | 1336798,3,1,1,1,2,1,2,1,1,2 602 | 1344449,1,1,1,1,1,1,2,1,1,2 603 | 1350568,4,1,1,1,2,1,2,1,1,2 604 | 1352663,5,4,6,8,4,1,8,10,1,4 605 | 188336,5,3,2,8,5,10,8,1,2,4 606 | 352431,10,5,10,3,5,8,7,8,3,4 607 | 353098,4,1,1,2,2,1,1,1,1,2 608 | 411453,1,1,1,1,2,1,1,1,1,2 609 | 557583,5,10,10,10,10,10,10,1,1,4 610 | 636375,5,1,1,1,2,1,1,1,1,2 611 | 736150,10,4,3,10,3,10,7,1,2,4 612 | 803531,5,10,10,10,5,2,8,5,1,4 613 | 822829,8,10,10,10,6,10,10,10,10,4 614 | 1016634,2,3,1,1,2,1,2,1,1,2 615 | 1031608,2,1,1,1,1,1,2,1,1,2 616 | 1041043,4,1,3,1,2,1,2,1,1,2 617 | 1042252,3,1,1,1,2,1,2,1,1,2 618 | 1057067,1,1,1,1,1,?,1,1,1,2 619 | 1061990,4,1,1,1,2,1,2,1,1,2 620 | 1073836,5,1,1,1,2,1,2,1,1,2 621 | 1083817,3,1,1,1,2,1,2,1,1,2 622 | 1096352,6,3,3,3,3,2,6,1,1,2 623 | 1140597,7,1,2,3,2,1,2,1,1,2 624 | 1149548,1,1,1,1,2,1,1,1,1,2 625 | 1174009,5,1,1,2,1,1,2,1,1,2 626 | 1183596,3,1,3,1,3,4,1,1,1,2 627 | 1190386,4,6,6,5,7,6,7,7,3,4 628 | 1190546,2,1,1,1,2,5,1,1,1,2 629 | 1213273,2,1,1,1,2,1,1,1,1,2 630 | 1218982,4,1,1,1,2,1,1,1,1,2 631 | 1225382,6,2,3,1,2,1,1,1,1,2 632 | 1235807,5,1,1,1,2,1,2,1,1,2 633 | 1238777,1,1,1,1,2,1,1,1,1,2 634 | 1253955,8,7,4,4,5,3,5,10,1,4 635 | 1257366,3,1,1,1,2,1,1,1,1,2 636 | 1260659,3,1,4,1,2,1,1,1,1,2 637 | 1268952,10,10,7,8,7,1,10,10,3,4 638 | 1275807,4,2,4,3,2,2,2,1,1,2 639 | 1277792,4,1,1,1,2,1,1,1,1,2 640 | 1277792,5,1,1,3,2,1,1,1,1,2 641 | 1285722,4,1,1,3,2,1,1,1,1,2 642 | 1288608,3,1,1,1,2,1,2,1,1,2 643 | 1290203,3,1,1,1,2,1,2,1,1,2 644 | 1294413,1,1,1,1,2,1,1,1,1,2 645 | 1299596,2,1,1,1,2,1,1,1,1,2 646 | 1303489,3,1,1,1,2,1,2,1,1,2 647 | 1311033,1,2,2,1,2,1,1,1,1,2 648 | 1311108,1,1,1,3,2,1,1,1,1,2 649 | 1315807,5,10,10,10,10,2,10,10,10,4 650 | 1318671,3,1,1,1,2,1,2,1,1,2 651 | 1319609,3,1,1,2,3,4,1,1,1,2 652 | 1323477,1,2,1,3,2,1,2,1,1,2 653 | 1324572,5,1,1,1,2,1,2,2,1,2 654 | 1324681,4,1,1,1,2,1,2,1,1,2 655 | 1325159,3,1,1,1,2,1,3,1,1,2 656 | 1326892,3,1,1,1,2,1,2,1,1,2 657 | 1330361,5,1,1,1,2,1,2,1,1,2 658 | 1333877,5,4,5,1,8,1,3,6,1,2 659 | 1334015,7,8,8,7,3,10,7,2,3,4 660 | 1334667,1,1,1,1,2,1,1,1,1,2 661 | 1339781,1,1,1,1,2,1,2,1,1,2 662 | 1339781,4,1,1,1,2,1,3,1,1,2 663 | 13454352,1,1,3,1,2,1,2,1,1,2 664 | 1345452,1,1,3,1,2,1,2,1,1,2 665 | 1345593,3,1,1,3,2,1,2,1,1,2 666 | 1347749,1,1,1,1,2,1,1,1,1,2 667 | 1347943,5,2,2,2,2,1,1,1,2,2 668 | 1348851,3,1,1,1,2,1,3,1,1,2 669 | 1350319,5,7,4,1,6,1,7,10,3,4 670 | 1350423,5,10,10,8,5,5,7,10,1,4 671 | 1352848,3,10,7,8,5,8,7,4,1,4 672 | 1353092,3,2,1,2,2,1,3,1,1,2 673 | 1354840,2,1,1,1,2,1,3,1,1,2 674 | 1354840,5,3,2,1,3,1,1,1,1,2 675 | 1355260,1,1,1,1,2,1,2,1,1,2 676 | 1365075,4,1,4,1,2,1,1,1,1,2 677 | 1365328,1,1,2,1,2,1,2,1,1,2 678 | 1368267,5,1,1,1,2,1,1,1,1,2 679 | 1368273,1,1,1,1,2,1,1,1,1,2 680 | 1368882,2,1,1,1,2,1,1,1,1,2 681 | 1369821,10,10,10,10,5,10,10,10,7,4 682 | 1371026,5,10,10,10,4,10,5,6,3,4 683 | 1371920,5,1,1,1,2,1,3,2,1,2 684 | 466906,1,1,1,1,2,1,1,1,1,2 685 | 466906,1,1,1,1,2,1,1,1,1,2 686 | 534555,1,1,1,1,2,1,1,1,1,2 687 | 536708,1,1,1,1,2,1,1,1,1,2 688 | 566346,3,1,1,1,2,1,2,3,1,2 689 | 603148,4,1,1,1,2,1,1,1,1,2 690 | 654546,1,1,1,1,2,1,1,1,8,2 691 | 654546,1,1,1,3,2,1,1,1,1,2 692 | 695091,5,10,10,5,4,5,4,4,1,4 693 | 714039,3,1,1,1,2,1,1,1,1,2 694 | 763235,3,1,1,1,2,1,2,1,2,2 695 | 776715,3,1,1,1,3,2,1,1,1,2 696 | 841769,2,1,1,1,2,1,1,1,1,2 697 | 888820,5,10,10,3,7,3,8,10,2,4 698 | 897471,4,8,6,4,3,4,10,6,1,4 699 | 897471,4,8,8,5,4,5,10,4,1,4 -------------------------------------------------------------------------------- /Chapter09/feature_reduction_impact.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 31-Oct-2017 3 | 4 | @author: DX 5 | ''' 6 | 7 | #Import Sklearn Datasets of IRIS flower classification 8 | import sklearn.datasets as datasets 9 | 10 | #Import Pandas library to create data frame from the data 11 | import pandas as pd 12 | 13 | #Load the data set 14 | iris=datasets.load_iris() 15 | 16 | #Extract data part from the data set 17 | data = iris.data 18 | 19 | #Select dimension of data 20 | data = data[:,2:4] 21 | 22 | #Load data set into the data frame 23 | df=pd.DataFrame(data) 24 | 25 | #Extract target variable from the data set 26 | y=iris.target 27 | 28 | #Import decision tree classifier from sklearn 29 | from sklearn.tree import DecisionTreeClassifier 30 | 31 | #We will create a tree with maximum depth of 5, other parameters will be default 32 | dtree=DecisionTreeClassifier(max_depth=5) 33 | 34 | #Train the classifier 35 | dtree.fit(df,y) 36 | 37 | #Import graphwiz from sklearn to create the graph out of tree 38 | from sklearn.tree import export_graphviz 39 | 40 | #We will use StringIO to create graph with all characters 41 | from sklearn.externals.six import StringIO 42 | dot_data = StringIO() 43 | 44 | #Import pydotplus to create tree as a graph and store it on the disk 45 | import pydotplus 46 | 47 | #Create Graph out of tree and store it on the disk 48 | export_graphviz(dtree, out_file=dot_data, 49 | filled=True, rounded=True, 50 | special_characters=True) 51 | graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) 52 | graph.write_png("graph_feat_4.png") -------------------------------------------------------------------------------- /Chapter09/stacking_spamdata.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 04-Nov-2017 3 | 4 | @author: DX 5 | ''' 6 | from Chapter_03.DecisionTree_CART_RF import load_csv,cross_validation_split,str_column_to_float 7 | 8 | #Import numpy for array based operations 9 | import numpy as np 10 | 11 | #Import Support vector machine 12 | from sklearn.svm.classes import SVC 13 | 14 | #Decision Tree Classifier 15 | from sklearn.tree import DecisionTreeClassifier 16 | 17 | #KNN 18 | from sklearn.neighbors import KNeighborsClassifier 19 | 20 | #Logistic Regression 21 | from sklearn.linear_model import LogisticRegression 22 | 23 | #Random Forest Classifier 24 | from sklearn.ensemble import RandomForestClassifier 25 | 26 | #Ada-boost Classifier 27 | from sklearn.ensemble import AdaBoostClassifier 28 | 29 | #Set Random seed 30 | np.random.seed(1) 31 | 32 | #Convert string variables numerical 33 | def str_column_to_int(dataset, column): 34 | class_values = [row[column] for row in dataset] 35 | unique = set(class_values) 36 | lookup = dict() 37 | for i, value in enumerate(unique): 38 | lookup[value] = i 39 | for row in dataset: 40 | row[column] = lookup[row[column]] 41 | return lookup 42 | 43 | #Stacking the predictions from the models 44 | def stacking(dataset,models): 45 | 46 | stackedData = [] 47 | 48 | for model in models: 49 | pred = model.predict(dataset) 50 | stackedData.append(pred) 51 | 52 | return np.transpose(stackedData) 53 | 54 | #Train the models 55 | def stack_fit(model,x,y): 56 | return model.fit(x,y) 57 | 58 | #Function to evaluate model performance 59 | def getAccuracy(pre,ytest): 60 | count = 0 61 | for i in range(len(ytest)): 62 | if ytest[i]==pre[i]: 63 | count+=1 64 | 65 | acc = float(count)/len(ytest) 66 | return acc 67 | 68 | #Separate the input and output variable 69 | def getXY(dataset): 70 | dataset = np.array(dataset) 71 | shape = np.shape(dataset) 72 | X = dataset[:,0:shape[1]-1] 73 | Y = dataset[:,shape[1]-1] 74 | return X,Y 75 | 76 | #Specify the file name 77 | dataName = 'spamData.csv' 78 | 79 | #Use function load_csv 80 | dataset = load_csv(dataName) 81 | 82 | #Create an empty list to store the data set 83 | dataset_new = [] 84 | 85 | #We will remove incomplete instance from the data set 86 | for i in range(len(dataset)-1): 87 | dataset_new.append(dataset[i]) 88 | dataset = dataset_new 89 | 90 | #Use function str_column_to_float from chapter 3 to convert string values to float 91 | for i in range(0, len(dataset[0])-1): 92 | str_column_to_float(dataset, i) 93 | 94 | #Convert class variable to the numerical value 95 | str_column_to_int(dataset, len(dataset[0])-1) 96 | 97 | #Shuffle the data set 98 | np.random.shuffle(dataset) 99 | 100 | #Load all the classifiers 101 | clf1 = AdaBoostClassifier()#SVC(kernel='rbf') 102 | clf2 = DecisionTreeClassifier(max_depth=25) 103 | clf3 = KNeighborsClassifier(n_neighbors=1) 104 | clf4 = RandomForestClassifier(n_estimators=25,max_depth=15) 105 | clf5 = LogisticRegression() 106 | clf6 = SVC(kernel='rbf') 107 | 108 | #Stack all the classifier 109 | models = [clf1,clf2,clf3,clf4,clf5] 110 | 111 | #Create the sample out of data sets 112 | splits = cross_validation_split(dataset,len(models)) 113 | 114 | #Initialize the variable for trained classifier 115 | trained =[] 116 | 117 | #Train the model and add to the stack 118 | for i in range(len(models)): 119 | model = models[i] 120 | x,y = getXY(splits[i]) 121 | trained.append(stack_fit(model, x, y)) 122 | 123 | #Create test data from left split 124 | xtest,ytest = getXY(splits[len(models)-1]) 125 | 126 | #Generate the stacked predictions 127 | stackedData = stacking(xtest, trained) 128 | 129 | #Here we will calculate individual accuracies of models 130 | for i in range(np.shape(stackedData)[1]): 131 | acc = getAccuracy(stackedData[:,i], ytest) 132 | print("Accuracy of model %i is %.2f"%(i,(100*acc))) 133 | 134 | #Take the vote of each classifier and create final prediction 135 | predLr =[np.bincount(np.array(pred,dtype="int64")).argmax() for pred in stackedData] 136 | 137 | #Evaluate the stacked model performance 138 | accLr = getAccuracy(ytest, predLr) 139 | print("\nAccuracy of stacking is %.2f"%(100*accLr)) -------------------------------------------------------------------------------- /Chapter10/ANN.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 24-Nov-2017 3 | 4 | @author: aii32199 5 | ''' 6 | # Imports for array-handling and plotting 7 | import numpy as np 8 | import matplotlib 9 | import matplotlib.pyplot as plt 10 | 11 | # Keras imports for the data set and building our neural network 12 | from keras.datasets import mnist 13 | 14 | #Import Sequential and Load model for creating and loading model 15 | from keras.models import Sequential, load_model 16 | 17 | #We will use Dense, Drop out and Activation layers 18 | from keras.layers.core import Dense, Dropout, Activation 19 | from keras.utils import np_utils 20 | 21 | #Let's Start by loading our data set 22 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 23 | #Plot the digits to verify 24 | plt.figure() 25 | for i in range(9): 26 | plt.subplot(3,3,i+1) 27 | plt.tight_layout() 28 | plt.imshow(X_train[i], cmap='gray', interpolation='none') 29 | plt.title("Digit: {}".format(y_train[i])) 30 | plt.xticks([]) 31 | plt.yticks([]) 32 | 33 | plt.show() 34 | 35 | #Lets analyze histogram of the image 36 | plt.figure() 37 | plt.subplot(2,1,1) 38 | plt.imshow(X_train[0], cmap='gray', interpolation='none') 39 | plt.title("Digit: {}".format(y_train[0])) 40 | plt.xticks([]) 41 | plt.yticks([]) 42 | plt.subplot(2,1,2) 43 | plt.hist(X_train[0].reshape(784)) 44 | plt.title("Pixel Value Distribution") 45 | 46 | # Print the shape before we reshape and normalize 47 | print("X_train shape", X_train.shape) 48 | print("y_train shape", y_train.shape) 49 | print("X_test shape", X_test.shape) 50 | print("y_test shape", y_test.shape) 51 | 52 | # As we have data in image form convert it to row vectors 53 | X_train = X_train.reshape(60000, 784) 54 | X_test = X_test.reshape(10000, 784) 55 | X_train = X_train.astype('float32') 56 | X_test = X_test.astype('float32') 57 | 58 | # Normalizing the data to between 0 and 1 to help with the training 59 | X_train /= 255 60 | X_test /= 255 61 | 62 | # Print the final input shape ready for training 63 | print("Train matrix shape", X_train.shape) 64 | print("Test matrix shape", X_test.shape) 65 | 66 | # One-hot encoding using keras' numpy-related utilities 67 | n_classes = 10 68 | print("Shape before one-hot encoding: ", y_train.shape) 69 | Y_train = np_utils.to_categorical(y_train, n_classes) 70 | Y_test = np_utils.to_categorical(y_test, n_classes) 71 | print("Shape after one-hot encoding: ", Y_train.shape) 72 | 73 | # Here we will create model of our ANN 74 | # Create a linear stack of layers with the sequential model 75 | model = Sequential() 76 | 77 | #Input Layer with 512 Weights 78 | model.add(Dense(512, input_shape=(784,))) 79 | 80 | #We will use relu as Activation 81 | model.add(Activation('relu')) 82 | 83 | #Put Drop out to prevent over-fitting 84 | model.add(Dropout(0.2)) 85 | 86 | #Add Hidden layer with 512 neurons with relu activation 87 | model.add(Dense(512)) 88 | model.add(Activation('relu')) 89 | model.add(Dropout(0.2)) 90 | 91 | #This is our Output layer with 10 neurons 92 | model.add(Dense(10)) 93 | model.add(Activation('softmax')) 94 | 95 | #Here we will be compiling the sequential model 96 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], 97 | optimizer='adam') 98 | 99 | # Start training the model and saving metrics in history 100 | history = model.fit(X_train, Y_train, 101 | batch_size=128, epochs=20, 102 | verbose=2, 103 | validation_data=(X_test, Y_test)) 104 | 105 | # Saving the model on disk 106 | path2save = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_10/keras_mnist.h5' 107 | model.save(path2save) 108 | print('Saved trained model at %s ' % path2save) 109 | # Plotting the metrics 110 | fig = plt.figure() 111 | plt.subplot(2,1,1) 112 | plt.plot(history.history['acc']) 113 | plt.plot(history.history['val_acc']) 114 | plt.title('model accuracy') 115 | plt.ylabel('accuracy') 116 | plt.xlabel('epoch') 117 | plt.legend(['train', 'test'], loc='lower right') 118 | plt.subplot(2,1,2) 119 | plt.plot(history.history['loss']) 120 | plt.plot(history.history['val_loss']) 121 | plt.title('model loss') 122 | plt.ylabel('loss') 123 | plt.xlabel('epoch') 124 | plt.legend(['train', 'test'], loc='upper right') 125 | plt.tight_layout() 126 | plt.show() 127 | 128 | #Let's load the model for testing data 129 | path2save ='keras_mnist.h5' 130 | mnist_model = load_model(path2save) 131 | 132 | #We will use Evaluate function 133 | loss_and_metrics = mnist_model.evaluate(X_test, Y_test, verbose=2) 134 | print("Test Loss", loss_and_metrics[0]) 135 | print("Test Accuracy", loss_and_metrics[1]) 136 | 137 | #Load the model and create predictions on the test set 138 | mnist_model = load_model(path2save) 139 | predicted_classes = mnist_model.predict_classes(X_test) 140 | 141 | #See which we predicted correctly and which not 142 | correct_indices = np.nonzero(predicted_classes == y_test)[0] 143 | incorrect_indices = np.nonzero(predicted_classes != y_test)[0] 144 | print(len(correct_indices)," classified correctly") 145 | print(len(incorrect_indices)," classified incorrectly") 146 | 147 | #Adapt figure size to accomodate 18 subplots 148 | plt.rcParams['figure.figsize'] = (7,14) 149 | plt.figure() 150 | 151 | # plot 9 correct predictions 152 | for i, correct in enumerate(correct_indices[:9]): 153 | plt.subplot(6,3,i+1) 154 | plt.imshow(X_test[correct].reshape(28,28), cmap='gray', 155 | interpolation='none') 156 | plt.title( 157 | "Predicted: {}, Truth: {}".format(predicted_classes[correct], 158 | y_test[correct])) 159 | plt.xticks([]) 160 | plt.yticks([]) 161 | 162 | # plot 9 incorrect predictions 163 | for i, incorrect in enumerate(incorrect_indices[:9]): 164 | plt.subplot(6,3,i+10) 165 | plt.imshow(X_test[incorrect].reshape(28,28), cmap='gray', 166 | interpolation='none') 167 | plt.title( 168 | "Predicted {}, Truth: {}".format(predicted_classes[incorrect], 169 | y_test[incorrect])) 170 | plt.xticks([]) 171 | plt.yticks([]) 172 | 173 | plt.show() -------------------------------------------------------------------------------- /Chapter10/DigitClassification.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 07-Nov-2017 3 | 4 | @author: aii32199 5 | ''' 6 | 7 | # Imports for array-handling and plotting 8 | import numpy as np 9 | import matplotlib 10 | import matplotlib.pyplot as plt 11 | 12 | # Keras imports for the data set and building our neural network 13 | from keras.datasets import mnist 14 | 15 | #Import Sequential and Load model for creating and loading model 16 | from keras.models import Sequential, load_model 17 | 18 | #We will use Dense, Drop out and Activation layers 19 | from keras.layers.core import Dense, Dropout, Activation 20 | from keras.utils import np_utils 21 | 22 | #Let's Start by loading our data set 23 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 24 | 25 | #Plot the digits to verify 26 | plt.figure() 27 | for i in range(9): 28 | plt.subplot(3,3,i+1) 29 | plt.tight_layout() 30 | plt.imshow(X_train[i], cmap='gray', interpolation='none') 31 | plt.title("Digit: {}".format(y_train[i])) 32 | plt.xticks([]) 33 | plt.yticks([]) 34 | plt.show() 35 | 36 | #Lets analyze histogram of the image 37 | plt.figure() 38 | plt.subplot(2,1,1) 39 | plt.imshow(X_train[0], cmap='gray', interpolation='none') 40 | plt.title("Digit: {}".format(y_train[0])) 41 | plt.xticks([]) 42 | plt.yticks([]) 43 | plt.subplot(2,1,2) 44 | plt.hist(X_train[0].reshape(784)) 45 | plt.title("Pixel Value Distribution") 46 | plt.show() 47 | 48 | # Print the shape before we reshape and normalize 49 | print("X_train shape", X_train.shape) 50 | print("y_train shape", y_train.shape) 51 | print("X_test shape", X_test.shape) 52 | print("y_test shape", y_test.shape) 53 | 54 | # As we have data in image form convert it to row vectors 55 | X_train = X_train.reshape(60000, 784) 56 | X_test = X_test.reshape(10000, 784) 57 | X_train = X_train.astype('float32') 58 | X_test = X_test.astype('float32') 59 | 60 | # Normalizing the data to between 0 and 1 to help with the training 61 | X_train /= 255 62 | X_test /= 255 63 | 64 | # Print the final input shape ready for training 65 | print("Train matrix shape", X_train.shape) 66 | print("Test matrix shape", X_test.shape) 67 | 68 | # One-hot encoding using keras' numpy-related utilities 69 | n_classes = 10 70 | print("Shape before one-hot encoding: ", y_train.shape) 71 | Y_train = np_utils.to_categorical(y_train, n_classes) 72 | Y_test = np_utils.to_categorical(y_test, n_classes) 73 | print("Shape after one-hot encoding: ", Y_train.shape) 74 | 75 | 76 | # Here we will create model of our ANN 77 | # Create a linear stack of layers with the sequential model 78 | model = Sequential() 79 | 80 | #Input Layer with 512 Weights 81 | model.add(Dense(512, input_shape=(784,))) 82 | 83 | #We will use relu as Activation 84 | model.add(Activation('relu')) 85 | 86 | #Put Drop out to prevent over-fitting 87 | model.add(Dropout(0.2)) 88 | 89 | #Add Hidden layer with 512 neurons with relu activation 90 | model.add(Dense(512)) 91 | model.add(Activation('relu')) 92 | model.add(Dropout(0.2)) 93 | 94 | #This is our Output layer with 10 neurons 95 | model.add(Dense(10)) 96 | model.add(Activation('softmax')) 97 | 98 | #Here we will be compiling the sequential model 99 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 100 | 101 | # Start training the model and saving metrics in history 102 | history = model.fit(X_train, Y_train, 103 | batch_size=128, epochs=20, 104 | verbose=2, 105 | validation_data=(X_test, Y_test)) 106 | 107 | # Saving the model on disk 108 | path2save = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_10/keras_mnist.h5' 109 | model.save(path2save) 110 | print('Saved trained model at %s ' % path2save) 111 | 112 | # Plotting the metrics 113 | fig = plt.figure() 114 | plt.subplot(2,1,1) 115 | plt.plot(history.history['acc']) 116 | plt.plot(history.history['val_acc']) 117 | plt.title('model accuracy') 118 | plt.ylabel('accuracy') 119 | plt.xlabel('epoch') 120 | plt.legend(['train', 'test'], loc='lower right') 121 | 122 | plt.subplot(2,1,2) 123 | plt.plot(history.history['loss']) 124 | plt.plot(history.history['val_loss']) 125 | plt.title('model loss') 126 | plt.ylabel('loss') 127 | plt.xlabel('epoch') 128 | plt.legend(['train', 'test'], loc='upper right') 129 | plt.tight_layout() 130 | plt.show() 131 | 132 | #Let's load the model for testing data 133 | path2save = 'D:/PyDevWorkspace/EnsembleMachineLearning/Chapter_10/keras_mnist.h5' 134 | mnist_model = load_model(path2save) 135 | 136 | #We will use Evaluate function 137 | loss_and_metrics = mnist_model.evaluate(X_test, Y_test, verbose=2) 138 | 139 | print("Test Loss", loss_and_metrics[0]) 140 | print("Test Accuracy", loss_and_metrics[1]) 141 | 142 | #Load the model and create predictions on the test set 143 | mnist_model = load_model(path2save) 144 | predicted_classes = mnist_model.predict_classes(X_test) 145 | 146 | #See which we predicted correctly and which not 147 | correct_indices = np.nonzero(predicted_classes == y_test)[0] 148 | incorrect_indices = np.nonzero(predicted_classes != y_test)[0] 149 | print() 150 | print(len(correct_indices)," classified correctly") 151 | print(len(incorrect_indices)," classified incorrectly") 152 | 153 | #Adapt figure size to accomodate 18 subplots 154 | plt.rcParams['figure.figsize'] = (7,14) 155 | 156 | plt.figure() 157 | 158 | # plot 9 correct predictions 159 | for i, correct in enumerate(correct_indices[:9]): 160 | plt.subplot(6,3,i+1) 161 | plt.imshow(X_test[correct].reshape(28,28), cmap='gray', interpolation='none') 162 | plt.title( 163 | "Predicted: {}, Truth: {}".format(predicted_classes[correct], 164 | y_test[correct])) 165 | plt.xticks([]) 166 | plt.yticks([]) 167 | 168 | 169 | # plot 9 incorrect predictions 170 | for i, incorrect in enumerate(incorrect_indices[:9]): 171 | plt.subplot(6,3,i+10) 172 | plt.imshow(X_test[incorrect].reshape(28,28), cmap='gray', interpolation='none') 173 | plt.title( 174 | "Predicted {}, Truth: {}".format(predicted_classes[incorrect], 175 | y_test[incorrect])) 176 | plt.xticks([]) 177 | plt.yticks([]) 178 | 179 | plt.show() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ensemble Machine Learning 2 | This is the code repository for [Ensemble Machine Learning](https://www.packtpub.com/big-data-and-business-intelligence/ensemble-machine-learning?utm_source=github&utm_medium=repository&utm_campaign=9781788297752), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish. 3 | ## About the Book 4 | Ensembling is a technique of combining two or more similar or dissimilar machine learning algorithms to create a model that delivers superior prediction power. This book will show you how you can use many weak algorithms to make a strong predictive model. This book contains Python code for different machine learning algorithms so that you can easily understand and implement it in your own systems. 5 | 6 | This book covers different machine learning algorithms that are widely used in the practical world to make predictions and classifications. It addresses different aspects of a prediction framework, such as data pre-processing, model training, validation of the model, and more. You will gain knowledge of different machine learning aspects such as bagging (decision trees and random forests), Boosting (Ada-boost) and stacking (a combination of bagging and boosting algorithms). 7 | 8 | Then you’ll learn how to implement them by building ensemble models using TensorFlow and Python libraries such as scikit-learn and NumPy. As machine learning touches almost every field of the digital world, you’ll see how these algorithms can be used in different applications such as computer vision, speech recognition, making recommendations, grouping and document classification, fitting regression on data, and more. 9 | 10 | By the end of this book, you’ll understand how to combine machine learning algorithms to work behind the scenes and reduce challenges and common problems. 11 | 12 | ## Instructions and Navigation 13 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02. 14 | 15 | 16 | 17 | The code will look like the following: 18 | ``` 19 | # Import All the required packages from sklearn 20 | import numpy as np 21 | from sklearn import model_selection 22 | from sklearn.ensemble import BaggingClassifier 23 | from sklearn.tree import DecisionTreeClassifier 24 | from sklearn.datasets import load_iris 25 | 26 | #Load data 27 | iris = load_iris() 28 | X = iris.data 29 | Y = iris.target 30 | ``` 31 | 32 | This book is a practical walkthrough of the machine learning technologies that require implementation of algorithms by you to understand the concepts in a more concrete way. I have used Python as the language to implement the algorithms in the form of code. You need not be a Python expert to code these algorithms; a simple understanding of Python is enough to get started with the implementation. 33 | 34 | The code included in this book can run on Python 2.7 and 3, but you will need the NumPy and scikit-learn packages to implement most of the code discussed in this book. 35 | 36 | For the implementation of ANNs, I have used Keras and TensorFlow libraries; again, basic a understanding of these libraries is enough for the code implementation. 37 | 38 | ## Related Products 39 | * [Mastering Machine Learning Algorithms](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-algorithms?utm_source=github&utm_medium=repository&utm_campaign=9781788621113) 40 | 41 | * [Machine Learning with the Elastic Stack](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-elastic-stack?utm_source=github&utm_medium=repository&utm_campaign=9781788477543) 42 | 43 | * [Applied Machine Learning with Python](https://www.packtpub.com/big-data-and-business-intelligence/applied-machine-learning-python?utm_source=github&utm_medium=repository&utm_campaign=9781788297066) 44 | 45 | ### Suggestions and Feedback 46 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSe5qwunkGf6PUvzPirPDtuy1Du5Rlzew23UBp2S-P3wB-GcwQ/viewform) if you have any feedback or suggestions. 47 | --------------------------------------------------------------------------------