├── Chapter01
    ├── Bagging.py
    ├── Boosting.py
    ├── Stacking.py
    └── kmeansClustering.py
├── Chapter02
    ├── Data
    │   ├── CarDataset.csv
    │   ├── __pycache__
    │   │   ├── TreeDict.cpython-35.pyc
    │   │   └── __init__.cpython-35.pyc
    │   ├── testData.csv
    │   ├── trainData.csv
    │   └── treeModel.json
    ├── DecisionTree_ID3.py
    ├── ID3_Test.py
    ├── PracticalApplication.py
    ├── SplitCheck.py
    └── testTree.json
├── Chapter03
    ├── BinaryTree.py
    ├── Data
    │   └── bcancer.csv
    ├── DecisionTree.py
    ├── DecisionTree_CART_RF.py
    ├── PracticalApplication.py
    └── RandomForest.py
├── Chapter04
    ├── Data
    │   └── spamData.csv
    ├── KNN.py
    ├── SpamClassification.py
    ├── knnAlgoTest.py
    └── utilityFunctions.py
├── Chapter05
    ├── AdaBoostFaceDetection.py
    ├── Adaboost.py
    ├── AdaboostAlgorithmExample.py
    └── Data
    │   ├── bcancer.csv
    │   ├── download.jpg
    │   ├── haarcascade_eye.xml
    │   └── haarcascade_frontalface_default.xml
├── Chapter06
    ├── Data
    │   └── bcancer.csv
    ├── RegressionTreeTest.py
    └── RegressionTrees.py
├── Chapter07
    ├── Data
    │   ├── pima-indians-diabetes.csv
    │   ├── train.csv
    │   └── train_modified.csv
    ├── xgBoost.py
    └── xgboost_param_tune.py
├── Chapter08
    ├── Data
    │   └── sonar.all-data.csv
    └── StackedGeneralization.py
├── Chapter09
    ├── Data
    │   ├── bcancer.csv
    │   ├── graph_feat_4.png
    │   ├── sonar.all-data.csv
    │   ├── spamData.csv
    │   └── train.csv
    ├── FeatureSelection_PCA.py
    ├── RF_feature_selection.py
    ├── RecursiveFeatureElimination.py
    ├── SVM_KernelTrick.py
    ├── SVM_Test.py
    ├── UnivariateFeatureSelection.py
    ├── bcancer.csv
    ├── feature_reduction_impact.py
    ├── sonar.all-data.csv
    └── stacking_spamdata.py
├── Chapter10
    ├── ANN.py
    └── DigitClassification.py
├── LICENSE
└── README.md


/Chapter01/Bagging.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on May 20, 2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | # Import All the required packages from sklearn
 7 | import numpy as np
 8 | from sklearn import model_selection
 9 | from sklearn.ensemble import BaggingClassifier
10 | from sklearn.tree import DecisionTreeClassifier
11 | from sklearn.datasets import load_iris
12 | 
13 | #Load data 
14 | iris = load_iris()
15 | X = iris.data
16 | Y = iris.target
17 | 
18 | #Split data in training and testing set 
19 | X_fit, X_eval, y_fit, y_test= model_selection.train_test_split( X, Y, test_size=0.30, random_state=1 )
20 | 
21 | #Create random sub sample to train multiple models
22 | seed = 7
23 | kfold = model_selection.KFold(n_splits=10, random_state=seed)
24 | 
25 | #Define a decision tree classifier
26 | cart = DecisionTreeClassifier()
27 | num_trees = 100
28 | 
29 | #Create classification model for bagging
30 | model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed)
31 | 
32 | #Train different models and print their accuracy
33 | results = model_selection.cross_val_score(model, X_fit, y_fit,cv=kfold)
34 | for i in range(len(results)):
35 |     print("Model: "+str(i)+" Accuracy is: "+str(results[i]))
36 |     
37 | print("Mean Accuracy is: "+str(results.mean()))
38 | 
39 | model.fit(X_fit, y_fit)
40 | pred_label = model.predict(X_eval)
41 | nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
42 | acc = 100*nnz/np.shape(y_test)[0]
43 | print('accuracy is: '+str(acc))


--------------------------------------------------------------------------------
/Chapter01/Boosting.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on May 22, 2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | # Import All the required packages from sklearn
 7 | from sklearn import model_selection
 8 | from sklearn.datasets import load_iris
 9 | from sklearn.ensemble import AdaBoostClassifier  # Boosting Algorithm
10 | from sklearn.tree import DecisionTreeClassifier
11 | 
12 | import numpy as np
13 | 
14 | 
15 | #Load data 
16 | iris = load_iris()
17 | X = iris.data
18 | Y = iris.target
19 | 
20 | #Split data in training and testing set 
21 | X_fit, X_eval, y_fit, y_test= model_selection.train_test_split( X, Y, test_size=0.20, random_state=1 )
22 | 
23 | #Define a decision tree classifier
24 | cart = DecisionTreeClassifier()
25 | num_trees = 25
26 | 
27 | #Create classification model for bagging
28 | model = AdaBoostClassifier(base_estimator=cart, n_estimators=num_trees, learning_rate = 0.1)
29 | 
30 | #Train Classification model
31 | model.fit(X_fit, y_fit)
32 | 
33 | #Test trained model over test set
34 | pred_label = model.predict(X_eval)
35 | nnz = np.float(np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test))
36 | acc = 100*nnz/np.shape(y_test)[0]
37 | 
38 | #Print accuracy of the model
39 | print('accuracy is: '+str(acc))


--------------------------------------------------------------------------------
/Chapter01/Stacking.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 24-May-2017
 3 | 
 4 | @author: aii32199
 5 | '''
 6 | 
 7 | from sklearn import datasets
 8 | from sklearn.ensemble import RandomForestClassifier
 9 | from sklearn.linear_model import LogisticRegression
10 | from sklearn.naive_bayes import GaussianNB 
11 | from sklearn.neighbors import KNeighborsClassifier
12 | from mlxtend.classifier import StackingClassifier
13 | from sklearn import cross_validation
14 | import numpy as np
15 | from sklearn.tree import DecisionTreeClassifier
16 | iris = datasets.load_iris()
17 | X, y = iris.data[:, 1:3], iris.target
18 | 
19 | def CalculateAccuracy(y_test,pred_label):
20 |     nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
21 |     acc = 100*nnz/float(np.shape(y_test)[0])
22 |     return acc
23 | 
24 | clf1 = KNeighborsClassifier(n_neighbors=2)
25 | clf2 = RandomForestClassifier(n_estimators = 2,random_state=1)
26 | clf3 = GaussianNB()
27 | lr = LogisticRegression()
28 | 
29 | clf1.fit(X, y)
30 | clf2.fit(X, y)
31 | clf3.fit(X, y)
32 | 
33 | f1 = clf1.predict(X)
34 | acc1 = CalculateAccuracy(y, f1)
35 | print("accuracy from KNN: "+str(acc1) )
36 |  
37 | f2 = clf2.predict(X)
38 | acc2 = CalculateAccuracy(y, f2)
39 | print("accuracy from Random Forest: "+str(acc2) )
40 |  
41 | f3 = clf3.predict(X)
42 | acc3 = CalculateAccuracy(y, f3)
43 | print("accuracy from Naive Bays: "+str(acc3) )
44 |  
45 | f = [f1,f2,f3]
46 | f = np.transpose(f)
47 |  
48 | lr.fit(f, y)
49 | final = lr.predict(f)
50 |  
51 | acc4 = CalculateAccuracy(y, final)
52 | print("accuracy from Stacking: "+str(acc4) )
53 | 
54 | # accuracy from KNN: 96.66666666666667
55 | # accuracy from Random Forest: 94.66666666666667
56 | # accuracy from Naive Bays: 92.0
57 | # accuracy from Stacking: 97.33333333333333
58 | 
59 | # sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], 
60 | #                           meta_classifier=lr)
61 | # 
62 | # print('3-fold cross validation:\n')
63 | # 
64 | # for clf, label in zip([clf1, clf2, clf3, sclf], 
65 | #                       ['KNN', 
66 | #                        'Random Forest', 
67 | #                        'Naive Bayes',
68 | #                        'StackingClassifier']):
69 | # 
70 | #     scores = cross_validation.cross_val_score(clf, X, y, 
71 | #                                               cv=3, scoring='accuracy')
72 | #     print("Accuracy: %0.2f (+/- %0.2f) [%s]" 
73 | #           % (scores.mean(), scores.std(), label))
74 | 
75 | 


--------------------------------------------------------------------------------
/Chapter01/kmeansClustering.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon May 15 20:37:33 2017
  4 | 
  5 | @author: DX
  6 | """
  7 | '''
  8 | Created on 15-May-2017
  9 | 
 10 | @author: aii32199
 11 | '''
 12 | import time
 13 | 
 14 | import numpy as np
 15 | import matplotlib.pyplot as plt
 16 | 
 17 | from sklearn.cluster import MiniBatchKMeans, KMeans
 18 | from sklearn.metrics.pairwise import pairwise_distances_argmin
 19 | from sklearn.datasets.samples_generator import make_blobs
 20 | #Generate sample data
 21 | np.random.seed(0)
 22 | 
 23 | batch_size = 45
 24 | centers = [[1, 1], [-1, -1], [1, -1]]
 25 | n_clusters = len(centers)
 26 | X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)
 27 | # Compute clustering with Means
 28 | k_means = KMeans(init='k-means++', n_clusters=3, n_init=10)
 29 | t0 = time.time()
 30 | k_means.fit(X)
 31 | t_batch = time.time() - t0
 32 | # Compute clustering with MiniBatchKMeans
 33 | 
 34 | # Plot result
 35 | fig = plt.figure(figsize=(12, 8))
 36 | fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
 37 | colors = ['#4EACC5', '#FF9C34', '#4E9A06']
 38 | 
 39 | # We want to have the same colors for the same cluster from the
 40 | # MiniBatchKMeans and the KMeans algorithm. Let's pair the cluster centers per
 41 | # closest one.
 42 | k_means_cluster_centers = np.sort(k_means.cluster_centers_, axis=0)
 43 | #k_means_cluster_centers = np.load('E:/PyDevWorkSpaceTest/Ensembles/Chapter_01/data/kmenasCenter.npy')
 44 | # np.save('E:/PyDevWorkSpaceTest/Ensembles/Chapter_01/data/kmenasCenter.npy',k_means_cluster_centers)
 45 | k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)
 46 | 
 47 | 
 48 | 
 49 | ax = fig.add_subplot(1, 2,1)
 50 | # ax.plot(X[:, 0], X[:, 1], 'w',markerfacecolor='k', marker='.',markersize=8)
 51 | # KMeans
 52 | ax = fig.add_subplot(1,2,1)
 53 | for k, col in zip(range(n_clusters), colors):
 54 |     my_members = k_means_labels == k
 55 |     cluster_center = k_means_cluster_centers[k]
 56 |     ax.plot(X[my_members, 0], X[my_members, 1], 'w',markerfacecolor=col, marker='.',markersize=8)
 57 | #     plt.text(X[my_members, 0], X[my_members, 1],  '%i' % (k))
 58 |     ax.plot(cluster_center[0], cluster_center[1], marker='o', markerfacecolor=col,
 59 |             markeredgecolor='k', markersize=10)
 60 |     plt.text(cluster_center[0], cluster_center[1],  'Cluster: %i' % (k))
 61 | 
 62 | # ax.set_title('KMeans')
 63 | 
 64 | 
 65 | test_point = [-1.3,1.3]
 66 | ax.plot(test_point[0],test_point[1],marker='x',markerfacecolor='r',markersize=12)
 67 | #plt.text(test_point[0],test_point[1],  'point:%.1f,%.1f' % (test_point[0],test_point[1]))
 68 | #Check out its distance from each of the cluster
 69 | dist = []
 70 | for center in k_means_cluster_centers:
 71 |     dist.append((sum(np.square((center) - (test_point)))))
 72 | 
 73 | min = np.argmin(dist)
 74 | test_point = [-1.3,1.3]
 75 | 
 76 | ax = fig.add_subplot(1,2,2)
 77 | for k, col in zip(range(n_clusters), colors):
 78 |     my_members = k_means_labels == k
 79 |     cluster_center = k_means_cluster_centers[k]
 80 |     ax.plot(X[my_members, 0], X[my_members, 1], 'w',markerfacecolor=col, marker='.',markersize=8)
 81 | #     plt.text(X[my_members, 0], X[my_members, 1],  '%i' % (k))
 82 |     ax.plot(cluster_center[0], cluster_center[1], marker='o', markerfacecolor=col,
 83 |             markeredgecolor='k', markersize=10)
 84 |     plt.text(cluster_center[0], cluster_center[1],  'Cluster: %i' % (k))
 85 | ax.plot(test_point[0],test_point[1],marker='x',markerfacecolor='r',markersize=8)
 86 | plt.text(test_point[0],test_point[1],  '%i' % (min))
 87 | 
 88 | print('distances are: '+ str(dist))
 89 | print('Minimum distance index: '+str(min))        
 90 | 
 91 | 
 92 | #Supervised algorithm
 93 | from sklearn.ensemble import RandomForestClassifier as rf
 94 | from sklearn.metrics import log_loss
 95 | y = k_means_labels
 96 | 
 97 | X_train, y_train = X[:2000], y[:2000]
 98 | X_valid, y_valid = X[2000:2500], y[2000:2500]
 99 | X_train_valid, y_train_valid = X[:2500], y[:2500]
100 | X_test, y_test = X[2500:], y[2500:]
101 | 
102 | # Train uncalibrated random forest classifier on whole train and validation
103 | # data and evaluate on test data
104 | clf = rf(n_estimators=25)
105 | clf.fit(X_train_valid, y_train_valid)
106 | clf_probs = clf.predict_proba(X_test)
107 | 
108 | pred_label = np.argmax(clf_probs,axis=1)
109 | # score = log_loss(y_test, clf_probs)
110 | nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)
111 | acc = 100*nnz/np.shape(y_test)[0]
112 | print('accuracy is: '+str(acc))
113 | 
114 | clf_probs = clf.predict_proba(test_point)
115 | pred_label = np.argmax(clf_probs,axis=1)
116 | print('RF predicted label: '+str(pred_label))
117 | plt.show()
118 | # ax.set_xticks(())
119 | # ax.set_yticks(())
120 | # plt.text(-3.5, 1.8,  'train time: %.2fs\ninertia: %f' % (
121 | #     t_batch, k_means.inertia_))
122 | 
123 | # MiniBatchKMeans
124 | # ax = fig.add_subplot(1, 3, 2)
125 | # for k, col in zip(range(n_clusters), colors):
126 | #     my_members = mbk_means_labels == order[k]
127 | #     cluster_center = mbk_means_cluster_centers[order[k]]
128 | #     ax.plot(X[my_members, 0], X[my_members, 1], 'w',
129 | #             markerfacecolor=col, marker='.')
130 | #     ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
131 | #             markeredgecolor='k', markersize=6)
132 | # ax.set_title('MiniBatchKMeans')
133 | # ax.set_xticks(())
134 | # ax.set_yticks(())
135 | # # plt.text(-3.5, 1.8, 'train time: %.2fs\ninertia: %f' %
136 | # #          (t_mini_batch, mbk.inertia_))
137 | # 
138 | # # Initialise the different array to all False
139 | # different = (mbk_means_labels == 4)
140 | # ax = fig.add_subplot(1, 3, 3)
141 | # 
142 | # for k in range(n_clusters):
143 | #     different += ((k_means_labels == k) != (mbk_means_labels == order[k]))
144 | # 
145 | # identic = np.logical_not(different)
146 | # ax.plot(X[identic, 0], X[identic, 1], 'w',
147 | #         markerfacecolor='#bbbbbb', marker='.')
148 | # ax.plot(X[different, 0], X[different, 1], 'w',
149 | #         markerfacecolor='m', marker='.')
150 | # ax.set_title('Difference')
151 | # ax.set_xticks(())
152 | # ax.set_yticks(())
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/Chapter02/Data/__pycache__/TreeDict.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter02/Data/__pycache__/TreeDict.cpython-35.pyc


--------------------------------------------------------------------------------
/Chapter02/Data/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter02/Data/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/Chapter02/Data/testData.csv:
--------------------------------------------------------------------------------
 1 | ,buying,maint,doors,persons,lug_boot,safety,Class
 2 | 0,vhigh,med,2,2,med,low,unacc
 3 | 1,low,high,5more,4,small,low,unacc
 4 | 2,high,high,3,4,med,low,unacc
 5 | 3,vhigh,high,2,2,big,low,unacc
 6 | 4,vhigh,high,2,2,big,med,unacc
 7 | 5,vhigh,med,4,more,small,med,unacc
 8 | 6,low,med,5more,2,small,high,unacc
 9 | 7,high,low,4,4,med,high,acc
10 | 8,low,med,3,2,big,high,unacc
11 | 


--------------------------------------------------------------------------------
/Chapter02/Data/treeModel.json:
--------------------------------------------------------------------------------
1 | {"buying": {"med": {"safety": {"med": {"persons": {"4": {"maint": {"med": "acc", "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "acc", "2": "acc", "5more": "good"}}, "big": "good", "small": "acc"}}, "high": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}}}, "2": "unacc", "more": {"maint": {"med": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "good", "2": "acc", "5more": "good"}}, "big": "good", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "high": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}}}}}, "low": "unacc", "high": {"persons": {"4": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "acc", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": "acc"}}, "vhigh": "acc", "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "good", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": "good"}}, "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "vhigh": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "good", "3": "good", "2": "unacc", "5more": "good"}}}}, "high": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}}}, "vhigh": {"maint": {"med": {"safety": {"med": {"lug_boot": {"med": {"persons": {"4": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "big": {"persons": {"4": "acc", "2": "unacc", "more": "acc"}}, "small": "unacc"}}, "low": "unacc", "high": {"persons": {"4": "acc", "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}, "vhigh": "unacc", "low": {"safety": {"med": {"lug_boot": {"med": {"persons": {"4": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "big": {"persons": {"4": "acc", "2": "unacc", "more": "acc"}}, "small": "unacc"}}, "low": "unacc", "high": {"persons": {"4": "acc", "2": "unacc", "more": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}, "high": "unacc"}}, "low": {"safety": {"med": {"persons": {"4": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "good", "3": "acc", "2": "acc", "5more": "good"}}, "big": "good", "small": "acc"}}, "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "unacc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "acc", "2": "acc", "5more": "good"}}, "big": "good", "small": "acc"}}, "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "good", "3": "good", "2": "acc", "5more": "good"}}, "big": "good", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "vhigh": {"lug_boot": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "big": "acc", "small": "unacc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "good", "3": "good", "2": "acc", "5more": "good"}}, "big": "good", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}, "high": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}, "low": "unacc", "high": {"persons": {"4": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "good", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": "good"}}, "vhigh": "acc", "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "good", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": "good"}}, "high": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "acc", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": "acc"}}}}, "2": "unacc", "more": {"maint": {"med": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "good", "3": "good", "2": "unacc", "5more": "good"}}}}, "vhigh": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "low": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "good", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "good", "3": "good", "2": "unacc", "5more": "good"}}}}, "high": {"lug_boot": {"med": {"doors": {"4": "vgood", "3": "vgood", "2": "acc", "5more": "vgood"}}, "big": "vgood", "small": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}}}}}}}, "high": {"safety": {"med": {"lug_boot": {"med": {"persons": {"4": {"doors": {"4": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}, "3": "unacc", "2": "unacc", "5more": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}}}, "2": "unacc", "more": {"maint": {"med": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "vhigh": "unacc", "low": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}, "high": {"doors": {"4": "acc", "3": "acc", "2": "unacc", "5more": "acc"}}}}}}, "big": {"persons": {"4": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}}}, "small": "unacc"}}, "low": "unacc", "high": {"persons": {"4": {"maint": {"med": "acc", "vhigh": "unacc", "low": "acc", "high": "acc"}}, "2": "unacc", "more": {"maint": {"med": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "vhigh": "unacc", "low": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}, "high": {"doors": {"4": "acc", "3": "acc", "2": {"lug_boot": {"med": "acc", "big": "acc", "small": "unacc"}}, "5more": "acc"}}}}}}}}}}


--------------------------------------------------------------------------------
/Chapter02/DecisionTree_ID3.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | np.random.seed(1337) # for reproducibility
  5 | 
  6 | #Function to get Information Gain of the attribute using class entropy
  7 | def getInformationGain(subtable,classEntropy):
  8 |              
  9 |     #Initialize a variable for storing probability of Classes
 10 |     fraction = 0
 11 | 
 12 |     #Calculate total number of instances
 13 |     denom = np.sum(np.sum(subtable))
 14 |     
 15 |     #Initialize variable for storing total entropies of attrribute values  
 16 |     EntropyAtt = 0
 17 |      
 18 |     #Now we will run a loop to access each attribute and its information gain 
 19 |     for key in subtable.keys():
 20 |     
 21 |         #Extract Attribute
 22 |         attribute = subtable[key]
 23 |         entropy = 0    #Initialize variable for entropy calculation
 24 |         coeff = 0      #Initialize variable to store coefficient
 25 |         
 26 |         #Find out sum of class attributes(in our case Yes and No)
 27 |         denom2 = np.sum(attribute)
 28 |         
 29 |         #Run a loop to get entropy of distinct values of attribute
 30 |         for value in attribute:
 31 |         
 32 |             #Calculate coeff
 33 |             coeff+= float(value)/denom 
 34 |             
 35 |             #Calculate probability of the attribute value
 36 |             fraction = float(value)/denom2
 37 |             
 38 |             #Calculate Entropy
 39 |             eps = np.finfo(float).eps            
 40 |             entropy+= -fraction*np.log2(fraction+eps)
 41 |         EntropyAtt+= coeff*entropy
 42 |     
 43 |     #Calculate Information Gain using class entropy
 44 |     InfGain = classEntropy - EntropyAtt
 45 |     return InfGain,EntropyAtt
 46 |     
 47 | def getClassEntropy(classAttributes):
 48 |     
 49 |     #Get distinct classes and how many time they occure 
 50 |     _,counts = np.unique(classAttributes,return_counts=True)
 51 |     denom = len(classAttributes)
 52 |     entropy = 0 #Initialize entropy variable
 53 |     
 54 |     #Run a loop to calculate entropy of dataset
 55 |     for count in counts:
 56 |         fraction = float(count)/denom
 57 |         entropy+= -fraction*np.log2(fraction)
 58 |     return entropy
 59 | 
 60 | 
 61 | def getHistTable(df,attribute):
 62 |     #This function create a subtable for the given attribute
 63 |     #Get values for the attribute
 64 |     value = df[attribute]
 65 |     
 66 |     #Extract class
 67 |     classes = df['Class']
 68 |     
 69 |     #Get distinct classes
 70 |     classunique = df['Class'].unique()
 71 |     
 72 |     #Get distinct values from attribute e.g. Low, High and Med for Salary
 73 |     valunique = df[attribute].unique()
 74 |     
 75 |     #Create an empty table to store attribute value and their respective class occurance
 76 |     temp = np.zeros((len(classunique),len(valunique)),dtype='uint8')    
 77 |     subtable = pd.DataFrame(temp,index=classunique,columns=valunique)
 78 |     
 79 |     #Calculate class occurance for each value for Med salary how many time class attribute is Yes
 80 |     for i in range(len(classes)):    
 81 |         subtable[value[i]][classes[i]]+= 1
 82 |     
 83 |     return subtable
 84 | 
 85 | def getNode(df):
 86 |     #This function is written for getting winner attribute to assign node    
 87 |     
 88 |     #Get Classes
 89 |     classAttributes = df['Class']
 90 |     
 91 |     #Create empty list to store Information gain for respected attributes
 92 |     InformationGain = []
 93 |     AttributeName = []
 94 |     
 95 |     #Extract each attribute 
 96 |     for attribute in df.keys():
 97 |         if attribute is not 'Class':
 98 |             #Get class occurance for each attribute value
 99 |             subtable = getHistTable(df,attribute)
100 |             
101 |             #Get class entropy of the data
102 |             Ec = getClassEntropy(classAttributes)
103 |             
104 |             #Calculate Information Gain for each attribute 
105 |             InfoGain,EntropyAtt = getInformationGain(subtable, Ec)
106 |             
107 |             #Append the value into the list
108 |             InformationGain.append(InfoGain)
109 |             AttributeName.append(attribute)
110 |             #print("Information Gain for %s: %.2f and Entropy: %.2f"%(attribute,InfoGain,EntropyAtt))
111 |     
112 |     #Find out attribute with maximum information gain
113 |     indx = np.argmax(InformationGain)    
114 |     winnerNode = AttributeName[indx]
115 |     #print("\nWinner attrbute is: %s"%(winnerNode))
116 |         
117 |     return winnerNode
118 | 
119 | def getSubtable(df,node,atValues):
120 |     #This function is written to get subtable for given attribute values(such as table for those persons whose salary is Medium)
121 |     subtable = []
122 | 
123 |     #run a loop through the dataset and create subtable
124 |     for i in range(len(df[node])):
125 |         if df[node][i]==atValues:
126 |             row = df.loc[i,df.keys()]
127 |             subtable.append(row)
128 |         
129 |     for c in range(len(df.keys())):
130 |         if df.keys()[c]==node:            
131 |             break;        
132 |                 
133 |     #Create a new dataframe 
134 |     subtable = pd.DataFrame(subtable,index=range(len(subtable)))
135 |     #print(subtable)
136 |     return subtable
137 | 
138 | def buildTree(df,tree=None):    
139 |     #Here we build our decision tree
140 | 
141 |     #Get attribute with maximum information gain
142 |     node = getNode(df)
143 |     
144 |     #Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
145 |     attValue = np.unique(df[node])
146 |     
147 |     #Create an empty dictionary to create tree    
148 |     if tree is None:                    
149 |         tree={}
150 |         tree[node] = {}
151 |     
152 |     #Loop below is written for building tree using recursion of the function,
153 |     #We will create subtable of each attribute value and try to find whether it have a pure subset or not,
154 |     #if it is a pure subset we will stop tree growing for that node. if it is not a pure set then we will.. 
155 |     #again call the same function.
156 |     for value in attValue:
157 |         
158 |         #print("Value: %s"%value)
159 |         subtable = getSubtable(df,node,value)
160 |         clValue,counts = np.unique(subtable['Class'],return_counts=True)                        
161 |         
162 |         if len(counts)==1:#Checking purity of subset
163 |             #print("Class: %s\n"%clValue)
164 |             tree[node][value] = clValue[0]                                                    
165 |         else:        
166 |             tree[node][value] = buildTree(subtable)#Recursion of the function
167 |                    
168 |     return tree
169 |                         
170 | def predict(inst,tree):
171 |     #This function will predict an input instace's class using given tree
172 |     
173 |     #We will use recursion to traverse through the tree same as we have done in case
174 |     #of tree building
175 |     
176 |     for nodes in tree.keys():        
177 |         
178 |         value = inst[nodes]
179 |         tree = tree[nodes][value]
180 |         prediction = 0
181 |             
182 |         if type(tree) is dict:
183 |             prediction = predict(inst, tree)
184 |         else:
185 |             prediction = tree
186 |             break;                            
187 |         
188 |     return prediction
189 | 
190 | def preProcess(dataset):
191 |     #Create a dataframe out of our dataset with attribute names
192 |     df = pd.DataFrame(dataset,columns=['Name','Salary','Sex','Marital','Class'])
193 |     
194 |     #Remove name attribute as it is not required for the calculations
195 |     df.pop('Name')
196 |     
197 |     #Make sure last attribute of our data set must be Class attribute
198 |     cols = list(df)
199 |     cols.insert(len(cols), cols.pop(cols.index('Class')))
200 |     df = df.ix[:,cols]
201 |     print(df)
202 |     
203 |     return df
204 | 
205 | def BatchTest(instances,tree):
206 |     
207 |     prediction = []
208 |     instances.pop("Class")
209 |     for i in range(len(instances.index)):        
210 |         inst = instances.ix[i] 
211 |         pred = predict(inst, tree)
212 |         prediction.append(pred)
213 |     return prediction
214 | 
215 | def split_data(df,percentage):
216 |     
217 |     split_indx = np.int32(np.floor(percentage*len(df.index)))    
218 |     #We will shuffle the rows of data to mix out its well 
219 |     df = df.sample(frac=1).reset_index(drop=True)
220 |     
221 |     #split training data for creating tree 
222 |     train_data = df[:split_indx]    
223 |     temp = df[split_indx:len(df.index)]
224 |     temp = temp.as_matrix()
225 |     test_data = pd.DataFrame(temp,index=range(len(temp)),columns=[key for key in df.keys()])    
226 |     
227 |     return train_data,test_data
228 | 
229 | def getAccuracy(testClass,predictedClass):
230 |     
231 |     match = 0
232 |     for i in range(len(testClass)):
233 |         if testClass[i]==predictedClass[i]:
234 |             match+=1
235 |     
236 |     accuracy = 100*match/len(testClass)
237 |     
238 |     return accuracy,match


--------------------------------------------------------------------------------
/Chapter02/ID3_Test.py:
--------------------------------------------------------------------------------
 1 | def main():
 2 |     #Lets Create the test dataset to build our tree
 3 |     dataset = {'Name':['Person 1','Person 2','Person 3','Person 4','Person 5','Person 6','Person 7','Person 8','Person 9','Person 10'],
 4 |            'Salary':['Low','Med','Med','Med','Med','High','Low','High','Med','Low'],
 5 |            'Sex':['Male','Male','Male','Female','Male','Female','Female','Male','Female','Male'],
 6 |            'Marital':['Unmarried','Unmarried','Married','Married','Married','Unmarried','Unmarried','Unmarried','Unmarried','Married'],
 7 |            'Class':['No','No','Yes','No','Yes','Yes','No','Yes','Yes','Yes']}           
 8 |     from Chapter_02 import DecisionTree_ID3 as ID3
 9 |     #Preprocess data set
10 |     df = ID3.preProcess(dataset)
11 |     
12 |     #Lets build the tree
13 |     tree = ID3.buildTree(df)
14 |     
15 |     import pprint
16 |     #print(tree) 
17 |     pprint.pprint(tree)       
18 |     
19 |     #Select test instance 
20 |     inst = df.ix[2]
21 |     
22 |     #Remove its class attribute
23 |     inst.pop('Class')
24 |     
25 |     #Get prediction
26 |     prediction = ID3.predict(inst, tree)
27 |     print("Prediction: %s"%prediction[0])
28 |     
29 | main()


--------------------------------------------------------------------------------
/Chapter02/PracticalApplication.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pprint
 3 | import json
 4 | import numpy as np 
 5 | np.random.seed(1337) # for reproducibility
 6 | from Chapter_02 import DecisionTree_ID3 as DT
 7 | 
 8 | datapath = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/CarDataset.csv'
 9 | path2save =  'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/TreeModel.json'
10 | trainDataPath = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/trainData.csv'
11 | testDataPath = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_02/Data/testData.csv'
12 |   
13 | # testData = pd.read_csv(testDataPath)
14 | 
15 | cardata = pd.read_csv(datapath)
16 | mat = cardata.as_matrix()
17 | df = pd.DataFrame(mat,columns=['buying','maint','doors','persons','lug_boot','safety','Class']) 
18 | trainData,testData = DT.split_data(df, 0.995)
19 |   
20 | trainData.to_csv(trainDataPath,columns=['buying','maint','doors','persons','lug_boot','safety','Class'])
21 | testData.to_csv(testDataPath,columns=['buying','maint','doors','persons','lug_boot','safety','Class'])
22 |      
23 | tree = DT.buildTree(trainData)
24 | pprint.pprint(tree)
25 |   
26 | with open(path2save,'w') as f:
27 |     json.dump(tree,f)
28 |  
29 | with open(path2save) as f:
30 |     model = json.load(f)
31 |   
32 | pprint.pprint(model)
33 | actualClass = testData['Class']
34 | predictions = DT.BatchTest(testData, model)
35 | accuracy,match = DT.getAccuracy(actualClass, predictions)
36 |  
37 | print("Accuracy of the model is: %.2f and matched results are %i out of %i"%(accuracy,match,len(actualClass))) 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/Chapter02/SplitCheck.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on Jun 24, 2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | 
 7 | # Split a dataset based on an attribute and an attribute value
 8 | def test_split(index, value, dataset):
 9 |     left, right = list(), list()
10 |     for row in dataset:
11 |         if row[index] < value:
12 |             left.append(row)
13 |         else:
14 |             right.append(row)
15 |     return left, right
16 |  
17 | # Calculate the Gini index for a split dataset
18 | def gini_index(groups, class_values):
19 |     gini = 0.0
20 |     for class_value in class_values:
21 |         for group in groups:
22 |             size = len(group)
23 |             if size == 0:
24 |                 continue
25 |             proportion = [row[-1] for row in group].count(class_value) / float(size)
26 |             gini += (proportion * (1.0 - proportion))
27 |     return gini
28 |  
29 | # Select the best split point for a dataset
30 | 
31 | def get_split(dataset):
32 |     
33 |     class_values = extractClasses(dataset)
34 |     
35 |     b_index, b_value, b_score, b_groups = 999, 999, 999, None
36 |     for index in range(len(dataset[0])-1):
37 |         for row in dataset:
38 |             groups = test_split(index, row[index], dataset)
39 |             gini = gini_index(groups, class_values)
40 |             print('X%d < %.3f Gini=%.3f' % ((index+1), row[index], gini))
41 |             if gini < b_score:
42 |                 b_index, b_value, b_score, b_groups = index, row[index], gini, groups
43 |     return {'index':b_index, 'value':b_value, 'groups':b_groups}
44 |  
45 | def extractClasses(dataset):
46 |     
47 |     class_values = []
48 |     
49 |     for rows in dataset:
50 |         class_values.append(rows[-1]) 
51 |      
52 |     return class_values
53 |  
54 | dataset = [[0.50000,  1.50000,  1.00000],
55 |            [1.00000,  0.50000, -1.00000],
56 |            [1.25000,  3.50000,  1.00000],
57 |            [1.50000,  4.00000,  1.00000],
58 |            [2.00000,  2.00000, -1.00000],
59 |            [2.50000,  2.50000,  1.00000],
60 |            [3.75000,  3.00000, -1.00000],
61 |            [4.00000,  1.00000, -1.00000]]
62 | split = get_split(dataset)
63 | print('Split: [X%d < %.3f]' % ((split['index']+1), split['value']))
64 | 


--------------------------------------------------------------------------------
/Chapter02/testTree.json:
--------------------------------------------------------------------------------
1 | {"Salary": {"High": 


--------------------------------------------------------------------------------
/Chapter03/BinaryTree.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 19-Jun-2017
 3 | 
 4 | @author: aii32199
 5 | ''' 
 6 | import numpy as np
 7 | def getNewNode(data):
 8 |     node = {'data':[],'left':[],'right':[]}
 9 |     node['data'] = data   
10 |     print(node) 
11 |     return node
12 | 
13 | def createBinaryTree(tree,data):
14 | 
15 | #Check whether we have any node in the tree if not create one    
16 |     if not tree:
17 |         tree = getNewNode(data)
18 | 
19 |     #Now if current value is less than parent node put it in left     
20 |     elif data<=tree['data']:
21 |         tree['left'] = createBinaryTree(tree['left'],data) 
22 |     #else put it in right 
23 |     else:
24 |         tree['right'] = createBinaryTree(tree['right'],data)                          
25 |     return tree
26 | 
27 | 
28 | # data = [0.7,0.65,0.83,0.54,0.9,0.11,0.44,0.35,0.75,0.3,0.78,0.15]
29 | data = [0.7,0.65,0.83,0.54,0.9,0.11,0.44,0.35,0.75,0.3,0.78,0.15]
30 | med = np.median(data)
31 | print("Median of array is: %.2f"%med)
32 | 
33 | tree = []
34 | tree = createBinaryTree(tree,med)
35 | for i in range(len(data)):    
36 |     value = data[i]    
37 |     tree = createBinaryTree(tree,value)
38 |     
39 | import pprint
40 | pprint.pprint(tree)
41 | 


--------------------------------------------------------------------------------
/Chapter03/Data/bcancer.csv:
--------------------------------------------------------------------------------
  1 | 1000025,5,1,1,1,2,1,3,1,1,2
  2 | 1002945,5,4,4,5,7,10,3,2,1,2
  3 | 1015425,3,1,1,1,2,2,3,1,1,2
  4 | 1016277,6,8,8,1,3,4,3,7,1,2
  5 | 1017023,4,1,1,3,2,1,3,1,1,2
  6 | 1017122,8,10,10,8,7,10,9,7,1,4
  7 | 1018099,1,1,1,1,2,10,3,1,1,2
  8 | 1018561,2,1,2,1,2,1,3,1,1,2
  9 | 1033078,2,1,1,1,2,1,1,1,5,2
 10 | 1033078,4,2,1,1,2,1,2,1,1,2
 11 | 1035283,1,1,1,1,1,1,3,1,1,2
 12 | 1036172,2,1,1,1,2,1,2,1,1,2
 13 | 1041801,5,3,3,3,2,3,4,4,1,4
 14 | 1043999,1,1,1,1,2,3,3,1,1,2
 15 | 1044572,8,7,5,10,7,9,5,5,4,4
 16 | 1047630,7,4,6,4,6,1,4,3,1,4
 17 | 1048672,4,1,1,1,2,1,2,1,1,2
 18 | 1049815,4,1,1,1,2,1,3,1,1,2
 19 | 1050670,10,7,7,6,4,10,4,1,2,4
 20 | 1050718,6,1,1,1,2,1,3,1,1,2
 21 | 1054590,7,3,2,10,5,10,5,4,4,4
 22 | 1054593,10,5,5,3,6,7,7,10,1,4
 23 | 1056784,3,1,1,1,2,1,2,1,1,2
 24 | 1057013,8,4,5,1,2,?,7,3,1,4
 25 | 1059552,1,1,1,1,2,1,3,1,1,2
 26 | 1065726,5,2,3,4,2,7,3,6,1,4
 27 | 1066373,3,2,1,1,1,1,2,1,1,2
 28 | 1066979,5,1,1,1,2,1,2,1,1,2
 29 | 1067444,2,1,1,1,2,1,2,1,1,2
 30 | 1070935,1,1,3,1,2,1,1,1,1,2
 31 | 1070935,3,1,1,1,1,1,2,1,1,2
 32 | 1071760,2,1,1,1,2,1,3,1,1,2
 33 | 1072179,10,7,7,3,8,5,7,4,3,4
 34 | 1074610,2,1,1,2,2,1,3,1,1,2
 35 | 1075123,3,1,2,1,2,1,2,1,1,2
 36 | 1079304,2,1,1,1,2,1,2,1,1,2
 37 | 1080185,10,10,10,8,6,1,8,9,1,4
 38 | 1081791,6,2,1,1,1,1,7,1,1,2
 39 | 1084584,5,4,4,9,2,10,5,6,1,4
 40 | 1091262,2,5,3,3,6,7,7,5,1,4
 41 | 1096800,6,6,6,9,6,?,7,8,1,2
 42 | 1099510,10,4,3,1,3,3,6,5,2,4
 43 | 1100524,6,10,10,2,8,10,7,3,3,4
 44 | 1102573,5,6,5,6,10,1,3,1,1,4
 45 | 1103608,10,10,10,4,8,1,8,10,1,4
 46 | 1103722,1,1,1,1,2,1,2,1,2,2
 47 | 1105257,3,7,7,4,4,9,4,8,1,4
 48 | 1105524,1,1,1,1,2,1,2,1,1,2
 49 | 1106095,4,1,1,3,2,1,3,1,1,2
 50 | 1106829,7,8,7,2,4,8,3,8,2,4
 51 | 1108370,9,5,8,1,2,3,2,1,5,4
 52 | 1108449,5,3,3,4,2,4,3,4,1,4
 53 | 1110102,10,3,6,2,3,5,4,10,2,4
 54 | 1110503,5,5,5,8,10,8,7,3,7,4
 55 | 1110524,10,5,5,6,8,8,7,1,1,4
 56 | 1111249,10,6,6,3,4,5,3,6,1,4
 57 | 1112209,8,10,10,1,3,6,3,9,1,4
 58 | 1113038,8,2,4,1,5,1,5,4,4,4
 59 | 1113483,5,2,3,1,6,10,5,1,1,4
 60 | 1113906,9,5,5,2,2,2,5,1,1,4
 61 | 1115282,5,3,5,5,3,3,4,10,1,4
 62 | 1115293,1,1,1,1,2,2,2,1,1,2
 63 | 1116116,9,10,10,1,10,8,3,3,1,4
 64 | 1116132,6,3,4,1,5,2,3,9,1,4
 65 | 1116192,1,1,1,1,2,1,2,1,1,2
 66 | 1116998,10,4,2,1,3,2,4,3,10,4
 67 | 1117152,4,1,1,1,2,1,3,1,1,2
 68 | 1118039,5,3,4,1,8,10,4,9,1,4
 69 | 1120559,8,3,8,3,4,9,8,9,8,4
 70 | 1121732,1,1,1,1,2,1,3,2,1,2
 71 | 1121919,5,1,3,1,2,1,2,1,1,2
 72 | 1123061,6,10,2,8,10,2,7,8,10,4
 73 | 1124651,1,3,3,2,2,1,7,2,1,2
 74 | 1125035,9,4,5,10,6,10,4,8,1,4
 75 | 1126417,10,6,4,1,3,4,3,2,3,4
 76 | 1131294,1,1,2,1,2,2,4,2,1,2
 77 | 1132347,1,1,4,1,2,1,2,1,1,2
 78 | 1133041,5,3,1,2,2,1,2,1,1,2
 79 | 1133136,3,1,1,1,2,3,3,1,1,2
 80 | 1136142,2,1,1,1,3,1,2,1,1,2
 81 | 1137156,2,2,2,1,1,1,7,1,1,2
 82 | 1143978,4,1,1,2,2,1,2,1,1,2
 83 | 1143978,5,2,1,1,2,1,3,1,1,2
 84 | 1147044,3,1,1,1,2,2,7,1,1,2
 85 | 1147699,3,5,7,8,8,9,7,10,7,4
 86 | 1147748,5,10,6,1,10,4,4,10,10,4
 87 | 1148278,3,3,6,4,5,8,4,4,1,4
 88 | 1148873,3,6,6,6,5,10,6,8,3,4
 89 | 1152331,4,1,1,1,2,1,3,1,1,2
 90 | 1155546,2,1,1,2,3,1,2,1,1,2
 91 | 1156272,1,1,1,1,2,1,3,1,1,2
 92 | 1156948,3,1,1,2,2,1,1,1,1,2
 93 | 1157734,4,1,1,1,2,1,3,1,1,2
 94 | 1158247,1,1,1,1,2,1,2,1,1,2
 95 | 1160476,2,1,1,1,2,1,3,1,1,2
 96 | 1164066,1,1,1,1,2,1,3,1,1,2
 97 | 1165297,2,1,1,2,2,1,1,1,1,2
 98 | 1165790,5,1,1,1,2,1,3,1,1,2
 99 | 1165926,9,6,9,2,10,6,2,9,10,4
100 | 1166630,7,5,6,10,5,10,7,9,4,4
101 | 1166654,10,3,5,1,10,5,3,10,2,4
102 | 1167439,2,3,4,4,2,5,2,5,1,4
103 | 1167471,4,1,2,1,2,1,3,1,1,2
104 | 1168359,8,2,3,1,6,3,7,1,1,4
105 | 1168736,10,10,10,10,10,1,8,8,8,4
106 | 1169049,7,3,4,4,3,3,3,2,7,4
107 | 1170419,10,10,10,8,2,10,4,1,1,4
108 | 1170420,1,6,8,10,8,10,5,7,1,4
109 | 1171710,1,1,1,1,2,1,2,3,1,2
110 | 1171710,6,5,4,4,3,9,7,8,3,4
111 | 1171795,1,3,1,2,2,2,5,3,2,2
112 | 1171845,8,6,4,3,5,9,3,1,1,4
113 | 1172152,10,3,3,10,2,10,7,3,3,4
114 | 1173216,10,10,10,3,10,8,8,1,1,4
115 | 1173235,3,3,2,1,2,3,3,1,1,2
116 | 1173347,1,1,1,1,2,5,1,1,1,2
117 | 1173347,8,3,3,1,2,2,3,2,1,2
118 | 1173509,4,5,5,10,4,10,7,5,8,4
119 | 1173514,1,1,1,1,4,3,1,1,1,2
120 | 1173681,3,2,1,1,2,2,3,1,1,2
121 | 1174057,1,1,2,2,2,1,3,1,1,2
122 | 1174057,4,2,1,1,2,2,3,1,1,2
123 | 1174131,10,10,10,2,10,10,5,3,3,4
124 | 1174428,5,3,5,1,8,10,5,3,1,4
125 | 1175937,5,4,6,7,9,7,8,10,1,4
126 | 1176406,1,1,1,1,2,1,2,1,1,2
127 | 1176881,7,5,3,7,4,10,7,5,5,4
128 | 1177027,3,1,1,1,2,1,3,1,1,2
129 | 1177399,8,3,5,4,5,10,1,6,2,4
130 | 1177512,1,1,1,1,10,1,1,1,1,2
131 | 1178580,5,1,3,1,2,1,2,1,1,2
132 | 1179818,2,1,1,1,2,1,3,1,1,2
133 | 1180194,5,10,8,10,8,10,3,6,3,4
134 | 1180523,3,1,1,1,2,1,2,2,1,2
135 | 1180831,3,1,1,1,3,1,2,1,1,2
136 | 1181356,5,1,1,1,2,2,3,3,1,2
137 | 1182404,4,1,1,1,2,1,2,1,1,2
138 | 1182410,3,1,1,1,2,1,1,1,1,2
139 | 1183240,4,1,2,1,2,1,2,1,1,2
140 | 1183246,1,1,1,1,1,?,2,1,1,2
141 | 1183516,3,1,1,1,2,1,1,1,1,2
142 | 1183911,2,1,1,1,2,1,1,1,1,2
143 | 1183983,9,5,5,4,4,5,4,3,3,4
144 | 1184184,1,1,1,1,2,5,1,1,1,2
145 | 1184241,2,1,1,1,2,1,2,1,1,2
146 | 1184840,1,1,3,1,2,?,2,1,1,2
147 | 1185609,3,4,5,2,6,8,4,1,1,4
148 | 1185610,1,1,1,1,3,2,2,1,1,2
149 | 1187457,3,1,1,3,8,1,5,8,1,2
150 | 1187805,8,8,7,4,10,10,7,8,7,4
151 | 1188472,1,1,1,1,1,1,3,1,1,2
152 | 1189266,7,2,4,1,6,10,5,4,3,4
153 | 1189286,10,10,8,6,4,5,8,10,1,4
154 | 1190394,4,1,1,1,2,3,1,1,1,2
155 | 1190485,1,1,1,1,2,1,1,1,1,2
156 | 1192325,5,5,5,6,3,10,3,1,1,4
157 | 1193091,1,2,2,1,2,1,2,1,1,2
158 | 1193210,2,1,1,1,2,1,3,1,1,2
159 | 1193683,1,1,2,1,3,?,1,1,1,2
160 | 1196295,9,9,10,3,6,10,7,10,6,4
161 | 1196915,10,7,7,4,5,10,5,7,2,4
162 | 1197080,4,1,1,1,2,1,3,2,1,2
163 | 1197270,3,1,1,1,2,1,3,1,1,2
164 | 1197440,1,1,1,2,1,3,1,1,7,2
165 | 1197510,5,1,1,1,2,?,3,1,1,2
166 | 1197979,4,1,1,1,2,2,3,2,1,2
167 | 1197993,5,6,7,8,8,10,3,10,3,4
168 | 1198128,10,8,10,10,6,1,3,1,10,4
169 | 1198641,3,1,1,1,2,1,3,1,1,2
170 | 1199219,1,1,1,2,1,1,1,1,1,2
171 | 1199731,3,1,1,1,2,1,1,1,1,2
172 | 1199983,1,1,1,1,2,1,3,1,1,2
173 | 1200772,1,1,1,1,2,1,2,1,1,2
174 | 1200847,6,10,10,10,8,10,10,10,7,4
175 | 1200892,8,6,5,4,3,10,6,1,1,4
176 | 1200952,5,8,7,7,10,10,5,7,1,4
177 | 1201834,2,1,1,1,2,1,3,1,1,2
178 | 1201936,5,10,10,3,8,1,5,10,3,4
179 | 1202125,4,1,1,1,2,1,3,1,1,2
180 | 1202812,5,3,3,3,6,10,3,1,1,4
181 | 1203096,1,1,1,1,1,1,3,1,1,2
182 | 1204242,1,1,1,1,2,1,1,1,1,2
183 | 1204898,6,1,1,1,2,1,3,1,1,2
184 | 1205138,5,8,8,8,5,10,7,8,1,4
185 | 1205579,8,7,6,4,4,10,5,1,1,4
186 | 1206089,2,1,1,1,1,1,3,1,1,2
187 | 1206695,1,5,8,6,5,8,7,10,1,4
188 | 1206841,10,5,6,10,6,10,7,7,10,4
189 | 1207986,5,8,4,10,5,8,9,10,1,4
190 | 1208301,1,2,3,1,2,1,3,1,1,2
191 | 1210963,10,10,10,8,6,8,7,10,1,4
192 | 1211202,7,5,10,10,10,10,4,10,3,4
193 | 1212232,5,1,1,1,2,1,2,1,1,2
194 | 1212251,1,1,1,1,2,1,3,1,1,2
195 | 1212422,3,1,1,1,2,1,3,1,1,2
196 | 1212422,4,1,1,1,2,1,3,1,1,2
197 | 1213375,8,4,4,5,4,7,7,8,2,2
198 | 1213383,5,1,1,4,2,1,3,1,1,2
199 | 1214092,1,1,1,1,2,1,1,1,1,2
200 | 1214556,3,1,1,1,2,1,2,1,1,2
201 | 1214966,9,7,7,5,5,10,7,8,3,4
202 | 1216694,10,8,8,4,10,10,8,1,1,4
203 | 1216947,1,1,1,1,2,1,3,1,1,2
204 | 1217051,5,1,1,1,2,1,3,1,1,2
205 | 1217264,1,1,1,1,2,1,3,1,1,2
206 | 1218105,5,10,10,9,6,10,7,10,5,4
207 | 1218741,10,10,9,3,7,5,3,5,1,4
208 | 1218860,1,1,1,1,1,1,3,1,1,2
209 | 1218860,1,1,1,1,1,1,3,1,1,2
210 | 1219406,5,1,1,1,1,1,3,1,1,2
211 | 1219525,8,10,10,10,5,10,8,10,6,4
212 | 1219859,8,10,8,8,4,8,7,7,1,4
213 | 1220330,1,1,1,1,2,1,3,1,1,2
214 | 1221863,10,10,10,10,7,10,7,10,4,4
215 | 1222047,10,10,10,10,3,10,10,6,1,4
216 | 1222936,8,7,8,7,5,5,5,10,2,4
217 | 1223282,1,1,1,1,2,1,2,1,1,2
218 | 1223426,1,1,1,1,2,1,3,1,1,2
219 | 1223793,6,10,7,7,6,4,8,10,2,4
220 | 1223967,6,1,3,1,2,1,3,1,1,2
221 | 1224329,1,1,1,2,2,1,3,1,1,2
222 | 1225799,10,6,4,3,10,10,9,10,1,4
223 | 1226012,4,1,1,3,1,5,2,1,1,4
224 | 1226612,7,5,6,3,3,8,7,4,1,4
225 | 1227210,10,5,5,6,3,10,7,9,2,4
226 | 1227244,1,1,1,1,2,1,2,1,1,2
227 | 1227481,10,5,7,4,4,10,8,9,1,4
228 | 1228152,8,9,9,5,3,5,7,7,1,4
229 | 1228311,1,1,1,1,1,1,3,1,1,2
230 | 1230175,10,10,10,3,10,10,9,10,1,4
231 | 1230688,7,4,7,4,3,7,7,6,1,4
232 | 1231387,6,8,7,5,6,8,8,9,2,4
233 | 1231706,8,4,6,3,3,1,4,3,1,2
234 | 1232225,10,4,5,5,5,10,4,1,1,4
235 | 1236043,3,3,2,1,3,1,3,6,1,2
236 | 1241232,3,1,4,1,2,?,3,1,1,2
237 | 1241559,10,8,8,2,8,10,4,8,10,4
238 | 1241679,9,8,8,5,6,2,4,10,4,4
239 | 1242364,8,10,10,8,6,9,3,10,10,4
240 | 1243256,10,4,3,2,3,10,5,3,2,4
241 | 1270479,5,1,3,3,2,2,2,3,1,2
242 | 1276091,3,1,1,3,1,1,3,1,1,2
243 | 1277018,2,1,1,1,2,1,3,1,1,2
244 | 128059,1,1,1,1,2,5,5,1,1,2
245 | 1285531,1,1,1,1,2,1,3,1,1,2
246 | 1287775,5,1,1,2,2,2,3,1,1,2
247 | 144888,8,10,10,8,5,10,7,8,1,4
248 | 145447,8,4,4,1,2,9,3,3,1,4
249 | 167528,4,1,1,1,2,1,3,6,1,2
250 | 169356,3,1,1,1,2,?,3,1,1,2
251 | 183913,1,2,2,1,2,1,1,1,1,2
252 | 191250,10,4,4,10,2,10,5,3,3,4
253 | 1017023,6,3,3,5,3,10,3,5,3,2
254 | 1100524,6,10,10,2,8,10,7,3,3,4
255 | 1116116,9,10,10,1,10,8,3,3,1,4
256 | 1168736,5,6,6,2,4,10,3,6,1,4
257 | 1182404,3,1,1,1,2,1,1,1,1,2
258 | 1182404,3,1,1,1,2,1,2,1,1,2
259 | 1198641,3,1,1,1,2,1,3,1,1,2
260 | 242970,5,7,7,1,5,8,3,4,1,2
261 | 255644,10,5,8,10,3,10,5,1,3,4
262 | 263538,5,10,10,6,10,10,10,6,5,4
263 | 274137,8,8,9,4,5,10,7,8,1,4
264 | 303213,10,4,4,10,6,10,5,5,1,4
265 | 314428,7,9,4,10,10,3,5,3,3,4
266 | 1182404,5,1,4,1,2,1,3,2,1,2
267 | 1198641,10,10,6,3,3,10,4,3,2,4
268 | 320675,3,3,5,2,3,10,7,1,1,4
269 | 324427,10,8,8,2,3,4,8,7,8,4
270 | 385103,1,1,1,1,2,1,3,1,1,2
271 | 390840,8,4,7,1,3,10,3,9,2,4
272 | 411453,5,1,1,1,2,1,3,1,1,2
273 | 320675,3,3,5,2,3,10,7,1,1,4
274 | 428903,7,2,4,1,3,4,3,3,1,4
275 | 431495,3,1,1,1,2,1,3,2,1,2
276 | 432809,3,1,3,1,2,?,2,1,1,2
277 | 434518,3,1,1,1,2,1,2,1,1,2
278 | 452264,1,1,1,1,2,1,2,1,1,2
279 | 456282,1,1,1,1,2,1,3,1,1,2
280 | 476903,10,5,7,3,3,7,3,3,8,4
281 | 486283,3,1,1,1,2,1,3,1,1,2
282 | 486662,2,1,1,2,2,1,3,1,1,2
283 | 488173,1,4,3,10,4,10,5,6,1,4
284 | 492268,10,4,6,1,2,10,5,3,1,4
285 | 508234,7,4,5,10,2,10,3,8,2,4
286 | 527363,8,10,10,10,8,10,10,7,3,4
287 | 529329,10,10,10,10,10,10,4,10,10,4
288 | 535331,3,1,1,1,3,1,2,1,1,2
289 | 543558,6,1,3,1,4,5,5,10,1,4
290 | 555977,5,6,6,8,6,10,4,10,4,4
291 | 560680,1,1,1,1,2,1,1,1,1,2
292 | 561477,1,1,1,1,2,1,3,1,1,2
293 | 563649,8,8,8,1,2,?,6,10,1,4
294 | 601265,10,4,4,6,2,10,2,3,1,4
295 | 606140,1,1,1,1,2,?,2,1,1,2
296 | 606722,5,5,7,8,6,10,7,4,1,4
297 | 616240,5,3,4,3,4,5,4,7,1,2
298 | 61634,5,4,3,1,2,?,2,3,1,2
299 | 625201,8,2,1,1,5,1,1,1,1,2
300 | 63375,9,1,2,6,4,10,7,7,2,4
301 | 635844,8,4,10,5,4,4,7,10,1,4
302 | 636130,1,1,1,1,2,1,3,1,1,2
303 | 640744,10,10,10,7,9,10,7,10,10,4
304 | 646904,1,1,1,1,2,1,3,1,1,2
305 | 653777,8,3,4,9,3,10,3,3,1,4
306 | 659642,10,8,4,4,4,10,3,10,4,4
307 | 666090,1,1,1,1,2,1,3,1,1,2
308 | 666942,1,1,1,1,2,1,3,1,1,2
309 | 667204,7,8,7,6,4,3,8,8,4,4
310 | 673637,3,1,1,1,2,5,5,1,1,2
311 | 684955,2,1,1,1,3,1,2,1,1,2
312 | 688033,1,1,1,1,2,1,1,1,1,2
313 | 691628,8,6,4,10,10,1,3,5,1,4
314 | 693702,1,1,1,1,2,1,1,1,1,2
315 | 704097,1,1,1,1,1,1,2,1,1,2
316 | 704168,4,6,5,6,7,?,4,9,1,2
317 | 706426,5,5,5,2,5,10,4,3,1,4
318 | 709287,6,8,7,8,6,8,8,9,1,4
319 | 718641,1,1,1,1,5,1,3,1,1,2
320 | 721482,4,4,4,4,6,5,7,3,1,2
321 | 730881,7,6,3,2,5,10,7,4,6,4
322 | 733639,3,1,1,1,2,?,3,1,1,2
323 | 733639,3,1,1,1,2,1,3,1,1,2
324 | 733823,5,4,6,10,2,10,4,1,1,4
325 | 740492,1,1,1,1,2,1,3,1,1,2
326 | 743348,3,2,2,1,2,1,2,3,1,2
327 | 752904,10,1,1,1,2,10,5,4,1,4
328 | 756136,1,1,1,1,2,1,2,1,1,2
329 | 760001,8,10,3,2,6,4,3,10,1,4
330 | 760239,10,4,6,4,5,10,7,1,1,4
331 | 76389,10,4,7,2,2,8,6,1,1,4
332 | 764974,5,1,1,1,2,1,3,1,2,2
333 | 770066,5,2,2,2,2,1,2,2,1,2
334 | 785208,5,4,6,6,4,10,4,3,1,4
335 | 785615,8,6,7,3,3,10,3,4,2,4
336 | 792744,1,1,1,1,2,1,1,1,1,2
337 | 797327,6,5,5,8,4,10,3,4,1,4
338 | 798429,1,1,1,1,2,1,3,1,1,2
339 | 704097,1,1,1,1,1,1,2,1,1,2
340 | 806423,8,5,5,5,2,10,4,3,1,4
341 | 809912,10,3,3,1,2,10,7,6,1,4
342 | 810104,1,1,1,1,2,1,3,1,1,2
343 | 814265,2,1,1,1,2,1,1,1,1,2
344 | 814911,1,1,1,1,2,1,1,1,1,2
345 | 822829,7,6,4,8,10,10,9,5,3,4
346 | 826923,1,1,1,1,2,1,1,1,1,2
347 | 830690,5,2,2,2,3,1,1,3,1,2
348 | 831268,1,1,1,1,1,1,1,3,1,2
349 | 832226,3,4,4,10,5,1,3,3,1,4
350 | 832567,4,2,3,5,3,8,7,6,1,4
351 | 836433,5,1,1,3,2,1,1,1,1,2
352 | 837082,2,1,1,1,2,1,3,1,1,2
353 | 846832,3,4,5,3,7,3,4,6,1,2
354 | 850831,2,7,10,10,7,10,4,9,4,4
355 | 855524,1,1,1,1,2,1,2,1,1,2
356 | 857774,4,1,1,1,3,1,2,2,1,2
357 | 859164,5,3,3,1,3,3,3,3,3,4
358 | 859350,8,10,10,7,10,10,7,3,8,4
359 | 866325,8,10,5,3,8,4,4,10,3,4
360 | 873549,10,3,5,4,3,7,3,5,3,4
361 | 877291,6,10,10,10,10,10,8,10,10,4
362 | 877943,3,10,3,10,6,10,5,1,4,4
363 | 888169,3,2,2,1,4,3,2,1,1,2
364 | 888523,4,4,4,2,2,3,2,1,1,2
365 | 896404,2,1,1,1,2,1,3,1,1,2
366 | 897172,2,1,1,1,2,1,2,1,1,2
367 | 95719,6,10,10,10,8,10,7,10,7,4
368 | 160296,5,8,8,10,5,10,8,10,3,4
369 | 342245,1,1,3,1,2,1,1,1,1,2
370 | 428598,1,1,3,1,1,1,2,1,1,2
371 | 492561,4,3,2,1,3,1,2,1,1,2
372 | 493452,1,1,3,1,2,1,1,1,1,2
373 | 493452,4,1,2,1,2,1,2,1,1,2
374 | 521441,5,1,1,2,2,1,2,1,1,2
375 | 560680,3,1,2,1,2,1,2,1,1,2
376 | 636437,1,1,1,1,2,1,1,1,1,2
377 | 640712,1,1,1,1,2,1,2,1,1,2
378 | 654244,1,1,1,1,1,1,2,1,1,2
379 | 657753,3,1,1,4,3,1,2,2,1,2
380 | 685977,5,3,4,1,4,1,3,1,1,2
381 | 805448,1,1,1,1,2,1,1,1,1,2
382 | 846423,10,6,3,6,4,10,7,8,4,4
383 | 1002504,3,2,2,2,2,1,3,2,1,2
384 | 1022257,2,1,1,1,2,1,1,1,1,2
385 | 1026122,2,1,1,1,2,1,1,1,1,2
386 | 1071084,3,3,2,2,3,1,1,2,3,2
387 | 1080233,7,6,6,3,2,10,7,1,1,4
388 | 1114570,5,3,3,2,3,1,3,1,1,2
389 | 1114570,2,1,1,1,2,1,2,2,1,2
390 | 1116715,5,1,1,1,3,2,2,2,1,2
391 | 1131411,1,1,1,2,2,1,2,1,1,2
392 | 1151734,10,8,7,4,3,10,7,9,1,4
393 | 1156017,3,1,1,1,2,1,2,1,1,2
394 | 1158247,1,1,1,1,1,1,1,1,1,2
395 | 1158405,1,2,3,1,2,1,2,1,1,2
396 | 1168278,3,1,1,1,2,1,2,1,1,2
397 | 1176187,3,1,1,1,2,1,3,1,1,2
398 | 1196263,4,1,1,1,2,1,1,1,1,2
399 | 1196475,3,2,1,1,2,1,2,2,1,2
400 | 1206314,1,2,3,1,2,1,1,1,1,2
401 | 1211265,3,10,8,7,6,9,9,3,8,4
402 | 1213784,3,1,1,1,2,1,1,1,1,2
403 | 1223003,5,3,3,1,2,1,2,1,1,2
404 | 1223306,3,1,1,1,2,4,1,1,1,2
405 | 1223543,1,2,1,3,2,1,1,2,1,2
406 | 1229929,1,1,1,1,2,1,2,1,1,2
407 | 1231853,4,2,2,1,2,1,2,1,1,2
408 | 1234554,1,1,1,1,2,1,2,1,1,2
409 | 1236837,2,3,2,2,2,2,3,1,1,2
410 | 1237674,3,1,2,1,2,1,2,1,1,2
411 | 1238021,1,1,1,1,2,1,2,1,1,2
412 | 1238464,1,1,1,1,1,?,2,1,1,2
413 | 1238633,10,10,10,6,8,4,8,5,1,4
414 | 1238915,5,1,2,1,2,1,3,1,1,2
415 | 1238948,8,5,6,2,3,10,6,6,1,4
416 | 1239232,3,3,2,6,3,3,3,5,1,2
417 | 1239347,8,7,8,5,10,10,7,2,1,4
418 | 1239967,1,1,1,1,2,1,2,1,1,2
419 | 1240337,5,2,2,2,2,2,3,2,2,2
420 | 1253505,2,3,1,1,5,1,1,1,1,2
421 | 1255384,3,2,2,3,2,3,3,1,1,2
422 | 1257200,10,10,10,7,10,10,8,2,1,4
423 | 1257648,4,3,3,1,2,1,3,3,1,2
424 | 1257815,5,1,3,1,2,1,2,1,1,2
425 | 1257938,3,1,1,1,2,1,1,1,1,2
426 | 1258549,9,10,10,10,10,10,10,10,1,4
427 | 1258556,5,3,6,1,2,1,1,1,1,2
428 | 1266154,8,7,8,2,4,2,5,10,1,4
429 | 1272039,1,1,1,1,2,1,2,1,1,2
430 | 1276091,2,1,1,1,2,1,2,1,1,2
431 | 1276091,1,3,1,1,2,1,2,2,1,2
432 | 1276091,5,1,1,3,4,1,3,2,1,2
433 | 1277629,5,1,1,1,2,1,2,2,1,2
434 | 1293439,3,2,2,3,2,1,1,1,1,2
435 | 1293439,6,9,7,5,5,8,4,2,1,2
436 | 1294562,10,8,10,1,3,10,5,1,1,4
437 | 1295186,10,10,10,1,6,1,2,8,1,4
438 | 527337,4,1,1,1,2,1,1,1,1,2
439 | 558538,4,1,3,3,2,1,1,1,1,2
440 | 566509,5,1,1,1,2,1,1,1,1,2
441 | 608157,10,4,3,10,4,10,10,1,1,4
442 | 677910,5,2,2,4,2,4,1,1,1,2
443 | 734111,1,1,1,3,2,3,1,1,1,2
444 | 734111,1,1,1,1,2,2,1,1,1,2
445 | 780555,5,1,1,6,3,1,2,1,1,2
446 | 827627,2,1,1,1,2,1,1,1,1,2
447 | 1049837,1,1,1,1,2,1,1,1,1,2
448 | 1058849,5,1,1,1,2,1,1,1,1,2
449 | 1182404,1,1,1,1,1,1,1,1,1,2
450 | 1193544,5,7,9,8,6,10,8,10,1,4
451 | 1201870,4,1,1,3,1,1,2,1,1,2
452 | 1202253,5,1,1,1,2,1,1,1,1,2
453 | 1227081,3,1,1,3,2,1,1,1,1,2
454 | 1230994,4,5,5,8,6,10,10,7,1,4
455 | 1238410,2,3,1,1,3,1,1,1,1,2
456 | 1246562,10,2,2,1,2,6,1,1,2,4
457 | 1257470,10,6,5,8,5,10,8,6,1,4
458 | 1259008,8,8,9,6,6,3,10,10,1,4
459 | 1266124,5,1,2,1,2,1,1,1,1,2
460 | 1267898,5,1,3,1,2,1,1,1,1,2
461 | 1268313,5,1,1,3,2,1,1,1,1,2
462 | 1268804,3,1,1,1,2,5,1,1,1,2
463 | 1276091,6,1,1,3,2,1,1,1,1,2
464 | 1280258,4,1,1,1,2,1,1,2,1,2
465 | 1293966,4,1,1,1,2,1,1,1,1,2
466 | 1296572,10,9,8,7,6,4,7,10,3,4
467 | 1298416,10,6,6,2,4,10,9,7,1,4
468 | 1299596,6,6,6,5,4,10,7,6,2,4
469 | 1105524,4,1,1,1,2,1,1,1,1,2
470 | 1181685,1,1,2,1,2,1,2,1,1,2
471 | 1211594,3,1,1,1,1,1,2,1,1,2
472 | 1238777,6,1,1,3,2,1,1,1,1,2
473 | 1257608,6,1,1,1,1,1,1,1,1,2
474 | 1269574,4,1,1,1,2,1,1,1,1,2
475 | 1277145,5,1,1,1,2,1,1,1,1,2
476 | 1287282,3,1,1,1,2,1,1,1,1,2
477 | 1296025,4,1,2,1,2,1,1,1,1,2
478 | 1296263,4,1,1,1,2,1,1,1,1,2
479 | 1296593,5,2,1,1,2,1,1,1,1,2
480 | 1299161,4,8,7,10,4,10,7,5,1,4
481 | 1301945,5,1,1,1,1,1,1,1,1,2
482 | 1302428,5,3,2,4,2,1,1,1,1,2
483 | 1318169,9,10,10,10,10,5,10,10,10,4
484 | 474162,8,7,8,5,5,10,9,10,1,4
485 | 787451,5,1,2,1,2,1,1,1,1,2
486 | 1002025,1,1,1,3,1,3,1,1,1,2
487 | 1070522,3,1,1,1,1,1,2,1,1,2
488 | 1073960,10,10,10,10,6,10,8,1,5,4
489 | 1076352,3,6,4,10,3,3,3,4,1,4
490 | 1084139,6,3,2,1,3,4,4,1,1,4
491 | 1115293,1,1,1,1,2,1,1,1,1,2
492 | 1119189,5,8,9,4,3,10,7,1,1,4
493 | 1133991,4,1,1,1,1,1,2,1,1,2
494 | 1142706,5,10,10,10,6,10,6,5,2,4
495 | 1155967,5,1,2,10,4,5,2,1,1,2
496 | 1170945,3,1,1,1,1,1,2,1,1,2
497 | 1181567,1,1,1,1,1,1,1,1,1,2
498 | 1182404,4,2,1,1,2,1,1,1,1,2
499 | 1204558,4,1,1,1,2,1,2,1,1,2
500 | 1217952,4,1,1,1,2,1,2,1,1,2
501 | 1224565,6,1,1,1,2,1,3,1,1,2
502 | 1238186,4,1,1,1,2,1,2,1,1,2
503 | 1253917,4,1,1,2,2,1,2,1,1,2
504 | 1265899,4,1,1,1,2,1,3,1,1,2
505 | 1268766,1,1,1,1,2,1,1,1,1,2
506 | 1277268,3,3,1,1,2,1,1,1,1,2
507 | 1286943,8,10,10,10,7,5,4,8,7,4
508 | 1295508,1,1,1,1,2,4,1,1,1,2
509 | 1297327,5,1,1,1,2,1,1,1,1,2
510 | 1297522,2,1,1,1,2,1,1,1,1,2
511 | 1298360,1,1,1,1,2,1,1,1,1,2
512 | 1299924,5,1,1,1,2,1,2,1,1,2
513 | 1299994,5,1,1,1,2,1,1,1,1,2
514 | 1304595,3,1,1,1,1,1,2,1,1,2
515 | 1306282,6,6,7,10,3,10,8,10,2,4
516 | 1313325,4,10,4,7,3,10,9,10,1,4
517 | 1320077,1,1,1,1,1,1,1,1,1,2
518 | 1320077,1,1,1,1,1,1,2,1,1,2
519 | 1320304,3,1,2,2,2,1,1,1,1,2
520 | 1330439,4,7,8,3,4,10,9,1,1,4
521 | 333093,1,1,1,1,3,1,1,1,1,2
522 | 369565,4,1,1,1,3,1,1,1,1,2
523 | 412300,10,4,5,4,3,5,7,3,1,4
524 | 672113,7,5,6,10,4,10,5,3,1,4
525 | 749653,3,1,1,1,2,1,2,1,1,2
526 | 769612,3,1,1,2,2,1,1,1,1,2
527 | 769612,4,1,1,1,2,1,1,1,1,2
528 | 798429,4,1,1,1,2,1,3,1,1,2
529 | 807657,6,1,3,2,2,1,1,1,1,2
530 | 8233704,4,1,1,1,1,1,2,1,1,2
531 | 837480,7,4,4,3,4,10,6,9,1,4
532 | 867392,4,2,2,1,2,1,2,1,1,2
533 | 869828,1,1,1,1,1,1,3,1,1,2
534 | 1043068,3,1,1,1,2,1,2,1,1,2
535 | 1056171,2,1,1,1,2,1,2,1,1,2
536 | 1061990,1,1,3,2,2,1,3,1,1,2
537 | 1113061,5,1,1,1,2,1,3,1,1,2
538 | 1116192,5,1,2,1,2,1,3,1,1,2
539 | 1135090,4,1,1,1,2,1,2,1,1,2
540 | 1145420,6,1,1,1,2,1,2,1,1,2
541 | 1158157,5,1,1,1,2,2,2,1,1,2
542 | 1171578,3,1,1,1,2,1,1,1,1,2
543 | 1174841,5,3,1,1,2,1,1,1,1,2
544 | 1184586,4,1,1,1,2,1,2,1,1,2
545 | 1186936,2,1,3,2,2,1,2,1,1,2
546 | 1197527,5,1,1,1,2,1,2,1,1,2
547 | 1222464,6,10,10,10,4,10,7,10,1,4
548 | 1240603,2,1,1,1,1,1,1,1,1,2
549 | 1240603,3,1,1,1,1,1,1,1,1,2
550 | 1241035,7,8,3,7,4,5,7,8,2,4
551 | 1287971,3,1,1,1,2,1,2,1,1,2
552 | 1289391,1,1,1,1,2,1,3,1,1,2
553 | 1299924,3,2,2,2,2,1,4,2,1,2
554 | 1306339,4,4,2,1,2,5,2,1,2,2
555 | 1313658,3,1,1,1,2,1,1,1,1,2
556 | 1313982,4,3,1,1,2,1,4,8,1,2
557 | 1321264,5,2,2,2,1,1,2,1,1,2
558 | 1321321,5,1,1,3,2,1,1,1,1,2
559 | 1321348,2,1,1,1,2,1,2,1,1,2
560 | 1321931,5,1,1,1,2,1,2,1,1,2
561 | 1321942,5,1,1,1,2,1,3,1,1,2
562 | 1321942,5,1,1,1,2,1,3,1,1,2
563 | 1328331,1,1,1,1,2,1,3,1,1,2
564 | 1328755,3,1,1,1,2,1,2,1,1,2
565 | 1331405,4,1,1,1,2,1,3,2,1,2
566 | 1331412,5,7,10,10,5,10,10,10,1,4
567 | 1333104,3,1,2,1,2,1,3,1,1,2
568 | 1334071,4,1,1,1,2,3,2,1,1,2
569 | 1343068,8,4,4,1,6,10,2,5,2,4
570 | 1343374,10,10,8,10,6,5,10,3,1,4
571 | 1344121,8,10,4,4,8,10,8,2,1,4
572 | 142932,7,6,10,5,3,10,9,10,2,4
573 | 183936,3,1,1,1,2,1,2,1,1,2
574 | 324382,1,1,1,1,2,1,2,1,1,2
575 | 378275,10,9,7,3,4,2,7,7,1,4
576 | 385103,5,1,2,1,2,1,3,1,1,2
577 | 690557,5,1,1,1,2,1,2,1,1,2
578 | 695091,1,1,1,1,2,1,2,1,1,2
579 | 695219,1,1,1,1,2,1,2,1,1,2
580 | 824249,1,1,1,1,2,1,3,1,1,2
581 | 871549,5,1,2,1,2,1,2,1,1,2
582 | 878358,5,7,10,6,5,10,7,5,1,4
583 | 1107684,6,10,5,5,4,10,6,10,1,4
584 | 1115762,3,1,1,1,2,1,1,1,1,2
585 | 1217717,5,1,1,6,3,1,1,1,1,2
586 | 1239420,1,1,1,1,2,1,1,1,1,2
587 | 1254538,8,10,10,10,6,10,10,10,1,4
588 | 1261751,5,1,1,1,2,1,2,2,1,2
589 | 1268275,9,8,8,9,6,3,4,1,1,4
590 | 1272166,5,1,1,1,2,1,1,1,1,2
591 | 1294261,4,10,8,5,4,1,10,1,1,4
592 | 1295529,2,5,7,6,4,10,7,6,1,4
593 | 1298484,10,3,4,5,3,10,4,1,1,4
594 | 1311875,5,1,2,1,2,1,1,1,1,2
595 | 1315506,4,8,6,3,4,10,7,1,1,4
596 | 1320141,5,1,1,1,2,1,2,1,1,2
597 | 1325309,4,1,2,1,2,1,2,1,1,2
598 | 1333063,5,1,3,1,2,1,3,1,1,2
599 | 1333495,3,1,1,1,2,1,2,1,1,2
600 | 1334659,5,2,4,1,1,1,1,1,1,2
601 | 1336798,3,1,1,1,2,1,2,1,1,2
602 | 1344449,1,1,1,1,1,1,2,1,1,2
603 | 1350568,4,1,1,1,2,1,2,1,1,2
604 | 1352663,5,4,6,8,4,1,8,10,1,4
605 | 188336,5,3,2,8,5,10,8,1,2,4
606 | 352431,10,5,10,3,5,8,7,8,3,4
607 | 353098,4,1,1,2,2,1,1,1,1,2
608 | 411453,1,1,1,1,2,1,1,1,1,2
609 | 557583,5,10,10,10,10,10,10,1,1,4
610 | 636375,5,1,1,1,2,1,1,1,1,2
611 | 736150,10,4,3,10,3,10,7,1,2,4
612 | 803531,5,10,10,10,5,2,8,5,1,4
613 | 822829,8,10,10,10,6,10,10,10,10,4
614 | 1016634,2,3,1,1,2,1,2,1,1,2
615 | 1031608,2,1,1,1,1,1,2,1,1,2
616 | 1041043,4,1,3,1,2,1,2,1,1,2
617 | 1042252,3,1,1,1,2,1,2,1,1,2
618 | 1057067,1,1,1,1,1,?,1,1,1,2
619 | 1061990,4,1,1,1,2,1,2,1,1,2
620 | 1073836,5,1,1,1,2,1,2,1,1,2
621 | 1083817,3,1,1,1,2,1,2,1,1,2
622 | 1096352,6,3,3,3,3,2,6,1,1,2
623 | 1140597,7,1,2,3,2,1,2,1,1,2
624 | 1149548,1,1,1,1,2,1,1,1,1,2
625 | 1174009,5,1,1,2,1,1,2,1,1,2
626 | 1183596,3,1,3,1,3,4,1,1,1,2
627 | 1190386,4,6,6,5,7,6,7,7,3,4
628 | 1190546,2,1,1,1,2,5,1,1,1,2
629 | 1213273,2,1,1,1,2,1,1,1,1,2
630 | 1218982,4,1,1,1,2,1,1,1,1,2
631 | 1225382,6,2,3,1,2,1,1,1,1,2
632 | 1235807,5,1,1,1,2,1,2,1,1,2
633 | 1238777,1,1,1,1,2,1,1,1,1,2
634 | 1253955,8,7,4,4,5,3,5,10,1,4
635 | 1257366,3,1,1,1,2,1,1,1,1,2
636 | 1260659,3,1,4,1,2,1,1,1,1,2
637 | 1268952,10,10,7,8,7,1,10,10,3,4
638 | 1275807,4,2,4,3,2,2,2,1,1,2
639 | 1277792,4,1,1,1,2,1,1,1,1,2
640 | 1277792,5,1,1,3,2,1,1,1,1,2
641 | 1285722,4,1,1,3,2,1,1,1,1,2
642 | 1288608,3,1,1,1,2,1,2,1,1,2
643 | 1290203,3,1,1,1,2,1,2,1,1,2
644 | 1294413,1,1,1,1,2,1,1,1,1,2
645 | 1299596,2,1,1,1,2,1,1,1,1,2
646 | 1303489,3,1,1,1,2,1,2,1,1,2
647 | 1311033,1,2,2,1,2,1,1,1,1,2
648 | 1311108,1,1,1,3,2,1,1,1,1,2
649 | 1315807,5,10,10,10,10,2,10,10,10,4
650 | 1318671,3,1,1,1,2,1,2,1,1,2
651 | 1319609,3,1,1,2,3,4,1,1,1,2
652 | 1323477,1,2,1,3,2,1,2,1,1,2
653 | 1324572,5,1,1,1,2,1,2,2,1,2
654 | 1324681,4,1,1,1,2,1,2,1,1,2
655 | 1325159,3,1,1,1,2,1,3,1,1,2
656 | 1326892,3,1,1,1,2,1,2,1,1,2
657 | 1330361,5,1,1,1,2,1,2,1,1,2
658 | 1333877,5,4,5,1,8,1,3,6,1,2
659 | 1334015,7,8,8,7,3,10,7,2,3,4
660 | 1334667,1,1,1,1,2,1,1,1,1,2
661 | 1339781,1,1,1,1,2,1,2,1,1,2
662 | 1339781,4,1,1,1,2,1,3,1,1,2
663 | 13454352,1,1,3,1,2,1,2,1,1,2
664 | 1345452,1,1,3,1,2,1,2,1,1,2
665 | 1345593,3,1,1,3,2,1,2,1,1,2
666 | 1347749,1,1,1,1,2,1,1,1,1,2
667 | 1347943,5,2,2,2,2,1,1,1,2,2
668 | 1348851,3,1,1,1,2,1,3,1,1,2
669 | 1350319,5,7,4,1,6,1,7,10,3,4
670 | 1350423,5,10,10,8,5,5,7,10,1,4
671 | 1352848,3,10,7,8,5,8,7,4,1,4
672 | 1353092,3,2,1,2,2,1,3,1,1,2
673 | 1354840,2,1,1,1,2,1,3,1,1,2
674 | 1354840,5,3,2,1,3,1,1,1,1,2
675 | 1355260,1,1,1,1,2,1,2,1,1,2
676 | 1365075,4,1,4,1,2,1,1,1,1,2
677 | 1365328,1,1,2,1,2,1,2,1,1,2
678 | 1368267,5,1,1,1,2,1,1,1,1,2
679 | 1368273,1,1,1,1,2,1,1,1,1,2
680 | 1368882,2,1,1,1,2,1,1,1,1,2
681 | 1369821,10,10,10,10,5,10,10,10,7,4
682 | 1371026,5,10,10,10,4,10,5,6,3,4
683 | 1371920,5,1,1,1,2,1,3,2,1,2
684 | 466906,1,1,1,1,2,1,1,1,1,2
685 | 466906,1,1,1,1,2,1,1,1,1,2
686 | 534555,1,1,1,1,2,1,1,1,1,2
687 | 536708,1,1,1,1,2,1,1,1,1,2
688 | 566346,3,1,1,1,2,1,2,3,1,2
689 | 603148,4,1,1,1,2,1,1,1,1,2
690 | 654546,1,1,1,1,2,1,1,1,8,2
691 | 654546,1,1,1,3,2,1,1,1,1,2
692 | 695091,5,10,10,5,4,5,4,4,1,4
693 | 714039,3,1,1,1,2,1,1,1,1,2
694 | 763235,3,1,1,1,2,1,2,1,2,2
695 | 776715,3,1,1,1,3,2,1,1,1,2
696 | 841769,2,1,1,1,2,1,1,1,1,2
697 | 888820,5,10,10,3,7,3,8,10,2,4
698 | 897471,4,8,6,4,3,4,10,6,1,4
699 | 897471,4,8,8,5,4,5,10,4,1,4


--------------------------------------------------------------------------------
/Chapter03/DecisionTree.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 21-Jun-2017
 3 | 
 4 | @author: aii32199
 5 | '''
 6 | import sys
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | # Calculate the Gini index for a split dataset
12 | def gini_index(groups, class_values):
13 |     
14 |     #Initialize Gini variable
15 |     gini = 0.0
16 |     
17 |     #Calculate propertion for each class
18 |     for class_value in class_values:
19 |         #Extract groups
20 |         for group in groups:
21 |             #Number of instance in the group
22 |             size = len(group)
23 |             if size == 0:
24 |                 continue            
25 |             #Initialize a list to store class index of the instances
26 |             r = []
27 |             #get class of each instance in the group 
28 |             for row in group:
29 |                 r.append(row[-1]) 
30 |             #Count number of instances belongs to current class    
31 |             class_count = r.count(class_value)
32 |             #Calculate class proportion
33 |             proportion = class_count/float(size)
34 |             #Calculate Gini index                
35 |             gini += (proportion * (1.0 - proportion))
36 |     return gini
37 | 
38 | def createSplit(attribute,threshold,dataset):
39 |     
40 |     #Initialize two lists to store the sub sets
41 |     lesser, greater = list(),list()
42 |     
43 |     #Loop through the attribute values and create sub set out of it
44 |     for values in dataset:
45 |         #Apply threshold
46 |         if values[attribute]<threshold:
47 |             lesser.append(values)
48 |         else:
49 |             greater.append(values)
50 |     return lesser,greater                
51 | 
52 | def get_split(dataset):
53 |     #class_values = list(set(row[-1] for row in dataset))
54 |     class_values = []
55 |     for row in dataset:
56 |         class_values.append(row[-1])
57 |     #initialize variables to store gini score, attribute index and split groups    
58 |     winnerAttribute = sys.maxsize
59 |     attributeValue = sys.maxsize
60 |     gScore = sys.maxsize
61 |     leftGroup = None
62 |     
63 |     #Run loop to access each attribute and attribute values
64 |     for index in range(len(dataset[0])-1):
65 |         for row in dataset:
66 |             groups = createSplit(index, row[index], dataset)
67 |             gini = gini_index(groups, class_values)
68 |             if gini < gScore:
69 |                 winnerAttribute, attributeValue, gScore, leftGroup = index, row[index], gini, groups
70 |     #Once done create a dictionary for node 
71 |     node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup}            
72 |     return  node   
73 | 
74 | def terminalNode(dataset):
75 |     #Create a vaiable to store the class value and count the class occurance
76 |     classes = []    
77 |     for row in dataset:
78 |         classes.append(row[-1])    
79 |     return max(set(classes), key=classes.count)
80 | 
81 | data = [[-1.2,0],[-3.2,0],[2.1,1],[1.5,1]]
82 | 
83 | data1 = [[[-1.2,0],[-3.2,0]],[[2.1,1],[1.5,1]]]
84 | data2 = [[[-1.2,1],[-3.2,0]],[[2.1,1],[1.5,1]]]
85 | classes =  [0, 1]
86 | 
87 | [lesser,greater] = createSplit(0, 0, data)
88 | 
89 | print('Group of negative values: ',lesser)
90 | print('Group of positive values: ',greater)
91 | #proportion = [row[-1] for row in group].count(class_value) / float(size)
92 | print("Gini index for Table 1 data set is: %.2f"%gini_index([lesser,greater], classes))
93 | print("Gini index for Table 2 data set is: %.2f"%gini_index(data2, classes))
94 | # df = pd.DataFrame(data)


--------------------------------------------------------------------------------
/Chapter03/DecisionTree_CART_RF.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from csv import reader
  3 | import numpy as np
  4 | 
  5 | #Function to read csv file
  6 | def load_csv(filename):
  7 |     dataset = list()
  8 |     with open(filename, 'r') as file:
  9 |         csv_reader = reader(file)
 10 |         for row in csv_reader:
 11 |             if not row:
 12 |                 continue
 13 |             dataset.append(row)
 14 |     return dataset
 15 | 
 16 | #Function to create Train and Test set from the original dataset
 17 | def getTrainTestData(dataset,split):
 18 |     np.random.seed(0)
 19 |     training = []
 20 |     testing = []    
 21 |     
 22 |     np.random.shuffle(dataset)
 23 |     shape = np.shape(dataset)
 24 |     trainlength = np.uint16(np.floor(split*shape[0]))
 25 |     
 26 |     for i in range(trainlength):    
 27 |         training.append(dataset[i])
 28 |         
 29 |     for i in range(trainlength,shape[0]):    
 30 |         testing.append(dataset[i])
 31 |     
 32 |     return training,testing
 33 | 
 34 | #Create splits to validate gini score
 35 | def createSplit(attribute,threshold,dataset):
 36 |     
 37 |     #Initialize two lists to store the sub sets
 38 |     lesser, greater = list(),list()
 39 |     
 40 |     #Loop through the attribute values and create sub set out of it
 41 |     for values in dataset:
 42 |         #Apply threshold
 43 |         if values[attribute]<=threshold:
 44 |             lesser.append(values)
 45 |         else:
 46 |             greater.append(values)
 47 |     return lesser,greater        
 48 | 
 49 | # Calculate the Gini index for a split dataset
 50 | def gini_index(groups, class_values):
 51 |     gini = 0.0
 52 |     for class_value in class_values:
 53 |         for group in groups:
 54 |             size = len(group)
 55 |             if size == 0:
 56 |                 continue
 57 |             proportion = [row[-1] for row in group].count(class_value) / float(size)
 58 |             gini += (proportion * (1.0 - proportion))
 59 |     return gini
 60 | 
 61 | #Function to get new node
 62 | def getNode(dataset):
 63 |     
 64 |     class_values = []
 65 |     for row in dataset:
 66 |         class_values.append(row[-1])
 67 |     
 68 |     #Extract unique class values present in the dataset    
 69 |     class_values = np.unique(np.array(class_values))    
 70 |     
 71 |     #initialize variables to store gini score, attribute index and split groups    
 72 |     winnerAttribute = sys.maxsize
 73 |     attributeValue = sys.maxsize
 74 |     gScore = sys.maxsize
 75 |     leftGroup = None
 76 |     
 77 |     #Run loop to access each attribute and attribute values
 78 |     for index in range(len(dataset[0])-1):
 79 |         for row in dataset:
 80 |             groups = createSplit(index, row[index], dataset)
 81 |             gini = gini_index(groups, class_values)
 82 |             if gini < gScore:
 83 |                 winnerAttribute, attributeValue, gScore, leftGroup = index, row[index], gini, groups
 84 |     #Once done create a dictionary for node 
 85 |     node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup}            
 86 |     return  node
 87 | 
 88 | 
 89 | 
 90 | # Create a terminal node value
 91 | def terminalNode(group):
 92 |     outcomes = [row[-1] for row in group]
 93 |     return max(set(outcomes), key=outcomes.count)
 94 | 
 95 | # Create child splits for a node or make terminal
 96 | def buildTree(node, max_depth, min_size, depth):
 97 |     #Lets get groups information first.
 98 |     left, right = node['groups']
 99 |     del(node['groups'])
100 |     # check if there are any element in the left and right group
101 |     if not left or not right:
102 |         #If there is no element in the groups call terminal Node
103 |         combined = left+right
104 |         node['left'] = terminalNode(combined)
105 |         node['right']= terminalNode(combined)
106 |         return
107 |     # check if we have reached to maximum depth
108 |     if depth >= max_depth:
109 |         node['left']=terminalNode(left)
110 |         node['right'] = terminalNode(right)
111 |         return
112 |     # if all okey lest start building tree for left side nodes
113 |     # if minimum instances are done by the node stop further build 
114 |     if len(left) <= min_size:
115 |         node['left'] = terminalNode(left)
116 |         
117 |     else:
118 |         #Create new node under left side of the tree
119 |         node['left'] = getNode(left)        
120 |         #append node under the tree and increase depth by one.
121 |         buildTree(node['left'], max_depth, min_size, depth+1) #recursion will take place in here
122 |         
123 |     
124 |     # Similar procedure for the right side nodes
125 |     if len(right) <= min_size:
126 |         node['right'] = terminalNode(right)
127 |        
128 |     else:
129 |         node['right'] = getNode(right)        
130 |         buildTree(node['right'], max_depth, min_size, depth+1)
131 | 
132 |      
133 | 
134 | # Build a decision tree
135 | def build_tree(train, max_depth, min_size):
136 |     root = getNode(train)    
137 |     buildTree(root, max_depth, min_size, 1)
138 |     return root
139 | 
140 |    
141 | # Print a decision tree
142 | def print_tree(node, depth=0):
143 |     if isinstance(node, dict):
144 |         print('%s[X%d < %.2f]' % ((depth*' ', (node['attribute']+1), node['value'])))
145 |         print_tree(node['left'], depth+1)
146 |         print_tree(node['right'], depth+1)
147 |     else:
148 |         print('%s[%s]' % ((depth*' ', node)))
149 | 
150 | #Function to get prediction from input tree
151 | def predict(node, row):
152 |     
153 |     #Get the node value and check whether the attribute value is less than or equal.  
154 |     if row[node['attribute']] <= node['value']:
155 |         #If yes enter into left branch and check whether it has another node or the class value.
156 |         if isinstance(node['left'], dict):            
157 |             return predict(node['left'], row)#Recursion
158 |         else:
159 |             #If there is no node in the branch 
160 |             return node['left']
161 |     else:
162 |         if isinstance(node['right'], dict):
163 |             return predict(node['right'], row)
164 |         else:
165 |             return node['right']
166 | 
167 | #Function to check accuracy of the data set
168 | def accuracy_metric(actual, predicted):
169 |     correct = 0
170 |     for i in range(len(actual)):
171 |         if actual[i] == predicted[i]:
172 |             correct += 1
173 |     return correct / float(len(actual)) * 100.0
174 | 
175 | #Function to convert string attribute values to float
176 | def str_column_to_float(dataset, column):
177 |     for row in dataset:
178 |         if row[column]=='?':
179 |             row[column] = 0
180 |         else:
181 |             row[column] = float(row[column].strip())
182 | 
183 | # Convert string column to integer
184 | def str_column_to_int(dataset, column):
185 |     class_values = [row[column] for row in dataset]
186 |     unique = set(class_values)
187 |     lookup = dict()
188 |     for i, value in enumerate(unique):
189 |         lookup[value] = i
190 |     for row in dataset:
191 |         row[column] = lookup[row[column]]
192 |     return lookup
193 | ################# Functions for Random Forest ##############
194 | # Build a decision tree
195 | def build_tree_RF(train, max_depth, min_size,nfeatures):
196 |     root = getNodeRF(train,nfeatures)    
197 |     buildTreeRF(root, max_depth, min_size, 1,nfeatures)
198 |     return root
199 | 
200 | # Create child splits for a node or make terminal
201 | def buildTreeRF(node, max_depth, min_size, depth,nfeatures):
202 |     #Lets get groups information first.
203 |     left, right = node['groups']
204 |     del(node['groups'])
205 |     # check if there are any element in the left and right group
206 |     if not left or not right:
207 |         #If there is no element in the groups call terminal Node
208 |         combined = left+right
209 |         node['left'] = terminalNode(combined)
210 |         node['right']= terminalNode(combined)
211 |         return
212 |     # check if we have reached to maximum depth
213 |     if depth >= max_depth:
214 |         node['left']=terminalNode(left)
215 |         node['right'] = terminalNode(right)
216 |         return
217 |     # if all okey lest start building tree for left side nodes
218 |     # if minimum instances are done by the node stop further build 
219 |     if len(left) <= min_size:
220 |         node['left'] = terminalNode(left)
221 |         
222 |     else:
223 |         #Create new node under left side of the tree
224 |         node['left'] = getNodeRF(left,nfeatures)        
225 |         #append node under the tree and increase depth by one.
226 |         buildTree(node['left'], max_depth, min_size, depth+1) #recursion will take place in here
227 |         
228 |     
229 |     # Similar procedure for the right side nodes
230 |     if len(right) <= min_size:
231 |         node['right'] = terminalNode(right)
232 |        
233 |     else:
234 |         node['right'] = getNodeRF(right,nfeatures)        
235 |         buildTree(node['right'], max_depth, min_size, depth+1)     
236 | 
237 | # Select the best split point for a dataset
238 | from random import randrange
239 | def getNodeRF(dataset,n_features):
240 |     
241 |     class_values = []
242 |     for row in dataset:
243 |         class_values.append(row[-1])
244 |     
245 |     #Extract unique class values present in the dataset    
246 |     class_values = np.unique(np.array(class_values))    
247 |     
248 |     #Initialize variables to store gini score, attribute index and split groups    
249 |     winnerAttribute = sys.maxsize
250 |     attributeValue = sys.maxsize
251 |     gScore = sys.maxsize
252 |     leftGroup = None
253 |     
254 |     #Select Random features
255 |     features = list()
256 |     while len(features) < n_features:
257 |         index = randrange(len(dataset[0])-1)
258 |         if index not in features:
259 |             features.append(index)
260 |     
261 |     #Run loop to access each attribute and attribute values
262 |     for index in features:
263 |         for row in dataset:
264 |             groups = createSplit(index, row[index], dataset)
265 |             gini = gini_index(groups, class_values)
266 |             if gini < gScore:
267 |                 winnerAttribute, attributeValue, gScore, leftGroup = index, row[index], gini, groups
268 |     #Once done create a dictionary for node 
269 |     node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup}            
270 |     return  node          
271 | 
272 | # Create a random subsample from the dataset with replacement
273 | def subsample(dataset, ratio):
274 |     sample = list()
275 |     n_sample = round(len(dataset) * ratio)
276 |     while len(sample) < n_sample:
277 |         index = randrange(len(dataset))
278 |         sample.append(dataset[index])
279 |     return sample
280 | 
281 | # Make a prediction with a list of bagged trees
282 | def bagging_predict(trees, row):
283 |     predictions = [predict(tree, row) for tree in trees]
284 |     return max(set(predictions), key=predictions.count)
285 | 
286 | # Random Forest Algorithm
287 | def random_forest(train, test, max_depth, min_size, sample_size, n_trees, n_features):
288 |     trees = list()
289 |     for i in range(n_trees):
290 |         sample = subsample(train, sample_size)
291 |         tree = build_tree_RF(sample, max_depth, min_size, n_features)
292 |         trees.append(tree)
293 |     predictions = [bagging_predict(trees, row) for row in test]
294 |     return(predictions)
295 | 
296 | #Create cross validation sets
297 | def cross_validation_split(dataset, n_folds):
298 |     dataset_split = list()
299 |     dataset_copy = list(dataset)
300 |     fold_size = int(len(dataset) / n_folds)
301 |     for i in range(n_folds):
302 |         fold = list()
303 |         while len(fold) < fold_size:
304 |             index = randrange(len(dataset_copy))
305 |             fold.append(dataset_copy.pop(index))
306 |         dataset_split.append(fold)
307 |     return dataset_split
308 | 
309 | # Evaluate an algorithm using a cross validation split
310 | def evaluate_algorithm(dataset, algorithm, n_folds, *args):
311 |     folds = cross_validation_split(dataset, n_folds)
312 |     scores = list()
313 |     for fold in folds:
314 |         train_set = list(folds)
315 |         train_set.remove(fold)
316 |         train_set = sum(train_set, [])
317 |         test_set = list()
318 |         for row in fold:
319 |             row_copy = list(row)
320 |             test_set.append(row_copy)
321 |             row_copy[-1] = None
322 |         predicted = algorithm(train_set, test_set, *args)
323 |         actual = [row[-1] for row in fold]
324 |         accuracy = accuracy_metric(actual, predicted)
325 |         scores.append(accuracy)
326 |     return scores


--------------------------------------------------------------------------------
/Chapter03/PracticalApplication.py:
--------------------------------------------------------------------------------
 1 | from Chapter_03 import DecisionTree_CART_RF as CART
 2 | import pprint
 3 | filename = 'bcancer.csv'
 4 | dataset = CART.load_csv(filename)
 5 | # convert string attributes to integers
 6 | for i in range(0, len(dataset[0])):
 7 |     CART.str_column_to_float(dataset, i)
 8 | 
 9 | #Now remove index column from the data set
10 | dataset_new = []
11 | for row in dataset:
12 |     dataset_new.append([row[i] for i in range(1,len(row))])
13 | 
14 | #Get training and testing data split
15 | training,testing = CART.getTrainTestData(dataset_new, 0.7)
16 | tree = CART.build_tree(training,11,5)
17 | pprint.pprint(tree)
18 | 
19 | pre = []
20 | act = []
21 | for row in training:
22 |     prediction = CART.predict(tree, row)
23 |     pre.append(prediction)
24 |     actual = act.append(row[-1]) 
25 | #     print('Expected=%d, Got=%d' % (row[-1], prediction))
26 | # print_tree(tree)
27 | acc = CART.accuracy_metric(act, pre)
28 | 
29 | print('training accuracy: %.2f'%acc)
30 | 
31 | for row in testing:
32 |     prediction = CART.predict(tree, row)
33 |     pre.append(prediction)
34 |     actual = act.append(row[-1])
35 |     acc = CART.accuracy_metric(act, pre)
36 | # pprint.pprint(tree)
37 | print('testing accuracy: %.2f'%acc) 


--------------------------------------------------------------------------------
/Chapter03/RandomForest.py:
--------------------------------------------------------------------------------
 1 | from Chapter_03 import DecisionTree_CART_RF as rf
 2 | filename = 'bcancer.csv'
 3 | dataset = rf.load_csv(filename)
 4 | # convert string attributes to integers
 5 | for i in range(0, len(dataset[0])-1):
 6 |     rf.str_column_to_float(dataset, i)
 7 | # convert class column to integers
 8 | rf.str_column_to_int(dataset, len(dataset[0])-1)
 9 | 
10 | dataset_new = []
11 | for row in dataset:
12 |     dataset_new.append([row[i] for i in range(1,len(row))])
13 | # # evaluate algorithm
14 | dataset = dataset_new
15 | n_folds = 5
16 | max_depth = 3
17 | min_size = 1
18 | sample_size = 0.5
19 | n_features = 5#int(sqrt(len(dataset[0])-1))
20 | print("features: %d"%n_features)
21 | 
22 | for n_trees in [1, 5, 10]:
23 |     scores = rf.evaluate_algorithm(dataset, rf.random_forest, n_folds, max_depth, min_size, sample_size, n_trees, n_features)
24 |     print('Trees: %d' % n_trees)
25 |     print('Scores: %s' % scores)
26 |     print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))


--------------------------------------------------------------------------------
/Chapter04/KNN.py:
--------------------------------------------------------------------------------
  1 | #Import math for calculations of square roots
  2 | import math
  3 | import operator
  4 | from random import randrange
  5 | 
  6 | #Function to get distance between test instance and training set 
  7 | def DistanceMetric(instance1, instance2, isClass=None):
  8 |     
  9 |     #If Class variable is in the instance
 10 |     if isClass:
 11 |         length = len(instance1)-1
 12 |     else:
 13 |         length = len(instance1)
 14 |     
 15 |     #Initialize variable to store distance
 16 |     distance = 0
 17 |     
 18 |     #Lets run a loop to calculate element wise differences
 19 |     for x in range(length):
 20 |         
 21 |         #Euclidean distance
 22 |         distance += pow((instance1[x] - instance2[x]), 2)
 23 |         
 24 |     return math.sqrt(distance)
 25 | 
 26 | #Function to get nearest neighbors 
 27 | def getNeighbors(trainingSet, testInstance, k):
 28 |    
 29 |     #Create a list variable to store distances between test and training instance.
 30 |     distances = []
 31 |     
 32 |     #Get distance between each instance in the training set and the test instance.  
 33 |     for x in range(len(trainingSet)):
 34 |         
 35 |         #As we will going to have class variable in the training set isClass will be true
 36 |         dist = DistanceMetric(testInstance, trainingSet[x], isClass=True)
 37 |         
 38 |         #Append the distance of each instance to the distance list
 39 |         distances.append((trainingSet[x], dist))
 40 |         
 41 |     #Sort the distances in ascending order 
 42 |     distances.sort(key=operator.itemgetter(1))
 43 |     
 44 |     #Create a list to store the neighbors
 45 |     neighbors = []
 46 |     
 47 |     #Run a loop to get k neighbors from the sorted distances. 
 48 |     for x in range(k):
 49 |         neighbors.append(distances[x][0])
 50 |     return neighbors
 51 | 
 52 | #Function to get prediction
 53 | def getPrediction(neighbors):
 54 |     
 55 |     #Create a dictionary variable to store votes from the neighbors
 56 |     #We will use class attribute as the dictionary keys and their occurrence as key value. 
 57 |     classVotes = {}
 58 |     
 59 |     #Go to each neighbor and take the vote for the class
 60 |     for x in range(len(neighbors)):
 61 |         
 62 |         #Get the class value of the neighbor 
 63 |         response = neighbors[x][-1]
 64 |         
 65 |         #Create class key if its not there;
 66 |         #If class key is in the dictionary increase it by one.  
 67 |         if response in classVotes:
 68 |             classVotes[response] += 1
 69 |         else:
 70 |             classVotes[response] = 1
 71 |     #Sort the dictionary keys on the basis of key values in descending order 
 72 |     sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
 73 |     
 74 |     #Return the key name (class) with the highest value
 75 |     return sortedVotes[0][0]
 76 | ####### KNN Bagging ############
 77 | 
 78 | def DistanceMetricBagged(instance1, instance2,n_features):
 79 |         
 80 |     #Initialize variable to store distance
 81 |     distance = 0
 82 |     features = list()
 83 |     
 84 |     #Select random features to apply sub space bagging
 85 |     while len(features) < n_features:
 86 |         index = randrange(len(instance1)-1)
 87 |         if index not in features:
 88 |             features.append(index)
 89 |             
 90 |     #Lets run a loop to calculate element wise differences for the selected features only.
 91 |     for x in features:        
 92 |         #Euclidean distance
 93 |         distance += pow((instance1[x] - instance2[x]), 2)
 94 |         
 95 |     return math.sqrt(distance)
 96 | 
 97 | def getNeighborsBagged(trainingSet, testInstance, k,n_features):
 98 |    
 99 |     #Create a list variable to store distances between test and training instance.
100 |     distances = []
101 |     
102 |     #Get distance between each instance in the training set and the test instance.  
103 |     for x in range(len(trainingSet)):        
104 |         #As we will going to have class variable in the training set isClass will be true
105 |         dist = DistanceMetricBagged(testInstance, trainingSet[x],n_features)
106 |         
107 |         #Append the distance of each instance to the distance list
108 |         distances.append((trainingSet[x], dist))
109 |         
110 |     #Sort the distances in ascending order 
111 |     distances.sort(key=operator.itemgetter(1))
112 |     
113 |     #Create a list to store the neighbors
114 |     neighbors = []
115 |     
116 |     #Run a loop to get k neighbors from the sorted distances. 
117 |     for x in range(k):
118 |         neighbors.append(distances[x][0])
119 |     return neighbors


--------------------------------------------------------------------------------
/Chapter04/SpamClassification.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 02-Sep-2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | 
 7 | #Import math for calculations of square roots
 8 | from Chapter_03.DecisionTree_CART_RF import load_csv, getTrainTestData, accuracy_metric, str_column_to_float 
 9 | from Chapter_04 import KNN
10 | import numpy as np
11 | 
12 | 
13 | #Read CSV file
14 | dataName = 'spamData.csv'
15 | 
16 | #Use function load_csv from chapter 3
17 | dataset = load_csv(dataName)
18 | 
19 | #Create an empty list to store the data set
20 | dataset_new = []
21 | 
22 | #We will remove incomplete instance from the data set
23 | for i in range(len(dataset)-1):
24 |     dataset_new.append(dataset[i])
25 | dataset = dataset_new
26 | 
27 | #Use function str_column_to_float from chapter 3 to convert string values to float
28 | for i in range(0, len(dataset[0])-1):
29 |     str_column_to_float(dataset, i)
30 | 
31 | str_column_to_float(dataset, len(dataset[0])-1)
32 | 
33 | #Split train and test data set using function getTrainTestData
34 | #We will use 80% of the data set as training set and rest for testing
35 | train,test = getTrainTestData(dataset,0.8)
36 | 
37 | train = np.array(train)
38 | test = np.array(test)
39 | 
40 | shape = np.shape(train)
41 | xtrain = train[:,0:shape[1]-1]
42 | ytrain = train[:,shape[1]-1] 
43 | 
44 | xtest = test[:,0:shape[1]-1]
45 | ytest = test[:,shape[1]-1]
46 | 
47 | #Create empty list to store predictions and actual output
48 | testPredictions=[]
49 | testActual=[]
50 | 
51 | #Select number of neighbors for each classifier
52 | k = 7
53 | 
54 | #Select sample size
55 | sample_size = 500
56 | 
57 | #Select number of random features 
58 | n_features = 20
59 | 
60 | #Calculate number of classifier on the basis of number of samples.
61 | n_classifier = np.uint8(len(train)/sample_size)
62 | 
63 | #Get prediction for each test instance and store them into the list 
64 | for i in range(0,len(test)):
65 |     predictions = []
66 |     
67 |     #Run loop for each sample
68 |     for cl in range(1,n_classifier):
69 |         
70 |         #Randomly shuffle training set and create sample out of it 
71 |         np.random.shuffle(train)        
72 |         sample = [train[row] for row in range(sample_size)]
73 |         
74 |         #Pick test instance                        
75 |         test_instance = test[i]
76 |         
77 |         #Get neighbors and prediction on the basis of neighbor            
78 |         neighbors = KNN.getNeighborsBagged(sample, test_instance, k,n_features)
79 |         pred = KNN.getPrediction(neighbors)
80 |         
81 |         #Append prediction against each sample with random features
82 |         predictions.append(pred)
83 |     
84 |     #Get final prediction using majority voting from each classifier    
85 |     fin_pred = max(set(predictions), key=predictions.count)    
86 |     testActual.append(test_instance[-1])
87 |     testPredictions.append(fin_pred)
88 |     print ("Actual: %s   Predicted: %s"%(test_instance[-1],pred))
89 | 
90 | #Use accurcay_metric function to evaluate our results
91 | accuracy = accuracy_metric(testActual,testPredictions)
92 | 
93 | #Print accuracy 
94 | print("Accuracy of the classification: %0.2f"%accuracy)


--------------------------------------------------------------------------------
/Chapter04/knnAlgoTest.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 02-Sep-2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | #Import math for calculations of square roots
 7 | import numpy as np
 8 | 
 9 | from Chapter_04 import KNN 
10 | dataset = [[5.1,    3.5,    1.4,    0.2,    1],
11 |            [4.9,    3.0,    1.4,    0.2,    1],
12 |            [4.7,    3.2,    1.3,    0.2,    1],
13 |            [4.6,    3.1,    1.5,    0.2,    1],
14 |            [5.0,    3.6,    1.4,    0.2,    1],
15 |            [7.0,    3.2,    4.7,    1.4,    2],
16 |            [6.4,    6.2,    4.5,    1.5,    2],           
17 |            [6.9,    3.1,    4.9,    1.5,    2],
18 |            [5.5,    2.3,    4.0,    1.3,    2],
19 |            [6.5,    2.8,    4.6,    1.5,    2],
20 |            [6.3,    3.3,    6.0,    2.5,    3],
21 |            [5.8,    2.7,    5.1,    1.9,    3],
22 |            [7.1,    3.0,    5.9,    2.1,    3],
23 |            [6.3,    2.9,    5.6,    1.8,    3],
24 |            [6.5,    3.0,    5.8,    2.2,    3]]
25 |  
26 | np.random.shuffle(dataset)
27 |  
28 | #Lets put our test instance.
29 | testInstance=[4.8,3.1,3.0,1.3,1]
30 |  
31 | #Now lets find out 3 neighbors for our test instance using getNeighbor
32 | k = 5
33 | neighbors = KNN.getNeighbors(dataset, testInstance, k)
34 | 
35 | #Print neighbors
36 | print(neighbors)
37 | 
38 | #Get the class prediction out of neighbors
39 | prediction = KNN.getPrediction(neighbors) 
40 | 
41 | #Print predicion
42 | print("Predicted class for the test instance is: %d"%prediction)


--------------------------------------------------------------------------------
/Chapter04/utilityFunctions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 02-Sep-2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | from csv import reader
 7 | from math import sqrt
 8 | from random import seed
 9 | from random import randrange
10 | import numpy as np
11 | 
12 | # Load a CSV file
13 | def load_csv(filename):
14 |     dataset = list()
15 |     with open(filename, 'r') as file:
16 |         csv_reader = reader(file)
17 |         for row in csv_reader:
18 |             if not row:
19 |                 continue
20 |             dataset.append(row)
21 |     return dataset
22 | 
23 | def getTrainTestData(dataset,split):
24 |     np.random.seed(0)
25 |     training = []
26 |     testing = []    
27 |     
28 |     np.random.shuffle(dataset)
29 |     shape = np.shape(dataset)
30 |     trainlength = np.uint16(np.floor(split*shape[0]))
31 |     
32 |     for i in range(trainlength):    
33 |         training.append(dataset[i])
34 |         
35 |     for i in range(trainlength,shape[0]):    
36 |         testing.append(dataset[i])
37 |     
38 |     return training,testing
39 | 
40 | # Convert string column to float
41 | def str_column_to_float(dataset, column,length):
42 |     
43 |     #for row in dataset:
44 |     for i in range(length):
45 |         row = dataset[i]        
46 |         if row[column]=='?':
47 |             row[column] = 0
48 |         else:
49 |             row[column] = float(row[column].strip())
50 |     
51 | # Convert string column to integer
52 | def str_column_to_int(dataset, column,length):
53 |     
54 |     class_values=[]
55 |     for i in range(length):
56 |         row = dataset[i]
57 |         class_values.append(row[column]) 
58 | #     class_values = [row[column] for row in dataset]
59 |     unique = set(class_values)
60 |     lookup = dict()
61 |     for i, value in enumerate(unique):
62 |         lookup[value] = i
63 |     for i in range(length):
64 |         row = dataset[i]  
65 |         row[column] = lookup[row[column]]
66 |     return lookup
67 | 
68 | # Split a dataset into k folds
69 | def cross_validation_split(dataset, n_folds):
70 |     dataset_split = list()
71 |     dataset_copy = list(dataset)
72 |     fold_size = int(len(dataset) / n_folds)
73 |     for i in range(n_folds):
74 |         fold = list()
75 |         while len(fold) < fold_size:
76 |             index = randrange(len(dataset_copy))
77 |             fold.append(dataset_copy.pop(index))
78 |         dataset_split.append(fold)
79 |     return dataset_split
80 | 
81 | def subsample(dataset, n_sample):
82 |     sample = list()
83 |     #n_sample = round(len(dataset) * ratio)
84 |     while len(sample) < n_sample:
85 |         index = randrange(len(dataset))
86 |         sample.append(dataset[index])
87 |     return sample
88 | 
89 | # Calculate accuracy percentage
90 | def accuracy_metric(actual, predicted):
91 |     correct = 0
92 |     for i in range(len(actual)):
93 |         if actual[i] == predicted[i]:
94 |             correct += 1
95 |     return correct / float(len(actual)) * 100.0


--------------------------------------------------------------------------------
/Chapter05/AdaBoostFaceDetection.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 24-Nov-2017
 3 | 
 4 | @author: aii32199
 5 | '''
 6 | #So We will load required libraries numpy for matrix operations
 7 | import numpy as np
 8 | 
 9 | #Import OpenCV library, in python we can call it cv2
10 | import cv2
11 | 
12 | #OpenCV have module cascade classifier which is based on haar cascade and
13 | #Adaboost algorithm, so we will call direct method.
14 | #First we will load the pre trained classifiers for frontal face and eye
15 | #detection, which are in the form of xml file.
16 | face_cascade = cv2.CascadeClassifier('E:/OpenCV/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml')
17 | eye_cascade = cv2.CascadeClassifier('E:/OpenCV/opencv/sources/data/haarcascades/haarcascade_eye.xml')
18 | 
19 | #Now let us load an image from the local directory
20 | img = cv2.imread('download.jpg')
21 | 
22 | #Let's convert image into gray
23 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
24 | 
25 | #Here we will call the method which will find the faces in our input image
26 | faces = face_cascade.detectMultiScale(gray, 1.3, 5)
27 | #Lets run a loop to create sub images of faces from the input image using
28 | #cv2.rectangle function
29 | for (x,y,w,h) in faces:
30 |     img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
31 |     roi_gray = gray[y:y+h, x:x+w]
32 |     roi_color = img[y:y+h, x:x+w]
33 | 
34 |     #windows
35 |     eyes = eye_cascade.detectMultiScale(roi_gray)
36 |     #following function will create the rectangles around the eyes
37 |     for (ex,ey,ew,eh) in eyes:
38 |         cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
39 | #Following Lines will show the detected face images
40 | cv2.imshow('img',img)
41 | cv2.waitKey(0)
42 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/Chapter05/Adaboost.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np;
  3 | from matplotlib import pyplot as plt 
  4 | 
  5 | def gini_index(groups, class_values):
  6 |     
  7 |     #Initialize Gini variable
  8 |     gini = 0.0
  9 |     
 10 |     #Calculate propertion for each class    
 11 |     for class_value in class_values:
 12 |         
 13 |         #Extract groups
 14 |         for group in groups:
 15 |             #Number of instance in the group
 16 |             size = len(group)
 17 |             if size == 0:
 18 |                 continue    
 19 |                     
 20 |             #Initialize a list to store class index of the instances
 21 |             r = []
 22 |             cl = []
 23 |             
 24 |             #get class of each instance in the group             
 25 |             for row in group:
 26 |                 r.append(row[-1])#Weight Append                    
 27 |                 cl.append(row[-2])#Class Append
 28 |                             
 29 |             r = np.array(r)
 30 |             #Extract Class indexes of the current class value
 31 |             class_index =  np.where(cl==class_value)
 32 |             
 33 |             #Initialize a variable to add the weights of current class
 34 |             w_add=0
 35 |             
 36 |             #Add the weights of the current class using class indexes
 37 |             for w in class_index[0]:
 38 |                 w_add+= r[w]; 
 39 |                 
 40 |             #Calculate class proportion using weights
 41 |             proportion = w_add/np.sum(r)
 42 |             
 43 |             #Calculate Gini index                
 44 |             gini += (proportion * (1.0 - proportion))
 45 |     return gini
 46 | 
 47 | def createSplit(attribute,threshold,dataset):
 48 |     
 49 |     #Initialize two lists to store the sub sets
 50 |     lesser, greater = list(),list()
 51 |     
 52 |     #Loop through the attribute values and create sub set out of it
 53 |     for values in dataset:
 54 |         #Apply threshold
 55 |         if values[attribute]<=threshold:
 56 |             lesser.append(values)
 57 |         else:
 58 |             greater.append(values)
 59 |     return lesser,greater        
 60 | 
 61 | def getNode(dataset):
 62 |     class_values = []
 63 |     #Extract unique class values present in the data set
 64 |     for row in dataset:
 65 |         class_values.append(row[-2])#Class values are in the second last column
 66 |     class_values = np.unique(class_values)
 67 |     
 68 |     #initialize variables to store gini score, attribute index and split groups    
 69 |     winnerAttribute = sys.maxsize
 70 |     attributeValue = sys.maxsize
 71 |     gScore = sys.maxsize
 72 |     leftGroup = None
 73 |     
 74 |     #Run loop to access each attribute and attribute values
 75 |     for index in range(len(dataset[0])-2):#leave last two columns 
 76 |         for row in dataset:
 77 |             #Create the groups 
 78 |             groups = createSplit(index, row[index], dataset)
 79 |             #Extract gini score for the threshold
 80 |             gini = gini_index(groups, class_values)
 81 |             #print('A%d <- %.2f Gini=%.1f' % ((index+1), row[index], gini))
 82 |             #If gini score is lower than the previous one choose return it 
 83 |             if gini < gScore:
 84 |                 winnerAttribute, attributeValue, gScore, leftGroup = index, row[index], gini, groups
 85 |     #print("winner attribute is %d with value %.2f gini is: %0.2f"%(winnerAttribute+1,attributeValue,gScore))
 86 |     
 87 |     #Once done create a dictionary for node 
 88 |     node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup}
 89 |     return node
 90 | 
 91 | def terminalNode(group):
 92 |         outcomes = [row[-2] for row in group]
 93 |         return max(set(outcomes), key=outcomes.count)
 94 | 
 95 | def decision_stump(dataset):
 96 |     
 97 |     #Get node value with best gini score
 98 |     node = getNode(dataset)    
 99 |     
100 |     #Separate out the groups from the node and remove them
101 |     left, right = node['groups']
102 |     del(node['groups'])    
103 |     
104 |     #Check whether there is any element in the groups or not
105 |     #If there is not any element put the class value with maximum occurence
106 |     if not left or not right:
107 |         node['left'] = node['right'] = terminalNode(left + right)
108 |         return node
109 |     
110 |     #Put left group's maximum occur class value in left branch
111 |     node['left']=terminalNode(left)
112 |     
113 |     #Put right group's maximum occur class value in right branch
114 |     node['right'] = terminalNode(right)
115 |     #print(node)                
116 |     return  node
117 | 
118 | def predict(node, row):
119 |     #Get the node value and check whether the attribute value is less than or equal.  
120 |     if row[node['attribute']] <= node['value']:
121 |         #If yes enter into left branch and check whether it has another node or the class value.
122 |         #If there is no node in the branch 
123 |         return node['left']
124 |     else:
125 |         return node['right']
126 | 
127 | def getError(actual,predicted,weights):
128 |     #Initialize the error variable
129 |     error = 0
130 |     
131 |     #We will store the error of each instance in a vector
132 |     error_vec=[]
133 |     
134 |     #Run a loop to calculate error for each instance
135 |     for i in range(len(actual)):            
136 |         diff = predicted[i]!=actual[i]
137 |         #Weights multiplication to the difference of actual and predicted values
138 |         error+= weights[i]*(diff)
139 |         
140 |         #Append the difference to the error vector
141 |         error_vec.append(diff)
142 |         
143 |     return error,error_vec
144 | 
145 | def AdaBoostAlgorithm(dataset,iterations):
146 |     
147 |     #Initialize the weights of the size of data set
148 |     weights = np.ones(len(dataset),dtype="float32")/len(dataset)    
149 |     dataset = np.array(dataset)
150 |     
151 |     #Add Weights column to the data set(Now last column will be the weights)
152 |     dataset = np.c_[dataset,weights]  
153 |     
154 |     #Create an empty list to store alpha values
155 |     alphas = []
156 |     
157 |     #Create a list to add weak learners(decision stumps)
158 |     weaks = [] 
159 |     
160 |     er = sys.maxsize
161 |     #Lets run the loop for number of iteration(number of classifiers)
162 |     for itr in range(iterations):
163 |                 
164 |         #Create decision tree from the non weighted data-set 
165 |         ds = decision_stump(dataset)
166 |         
167 |         #Create a list to store the predictions of the decision stump    
168 |         pred=[]
169 |         
170 |         #Create a list to store actual outputs
171 |         actual = []
172 |         
173 |         #Let's predict output for each instance in the data set
174 |         for row in dataset:
175 |             actual.append(row[-2])
176 |             pred.append(predict(ds, row))
177 |         
178 |         #Here we will find out difference between predicted and actual output
179 |         error,error_vec = getError(actual, pred,weights)
180 |         
181 |         #If error is equal to 0.5 classifier is not able to classify the data set            
182 |         if error==0.0:
183 |             continue   
184 |         eps = sys.float_info.epsilon
185 |             
186 |         #Let's find out the alpha with the help of error 
187 |         alpha = (0.5 * np.log((1-error)/(error+eps)))                
188 |             
189 |         #Create empty vector to store weight updates
190 |         w = np.zeros(len(weights))
191 |             
192 |         # Update the weights using alpha value        
193 |         for i in range(len(error_vec)):
194 |             
195 |             #For wrong prediction increase the weights
196 |             if error_vec[i]!=0:
197 |                 w[i] = weights[i] * np.exp(alpha)
198 |             
199 |             #For correct prediction decrease the weights
200 |             else:
201 |                 w[i] = weights[i] * np.exp(-alpha)
202 |         
203 |         #Normalize the weights and update previous weight vector
204 |         weights = w / w.sum() 
205 |         
206 |         #Put the updated weights into the data set by over-writing previous weights
207 |         dataset[:,-1]=weights
208 |         
209 |         #if error<=er:
210 |         print("\nClassifier %i stats:"%itr)
211 |         print(ds)
212 |         print("Error: %.3f and alpha: %.3f"%(error,alpha))
213 |         er = error
214 |         #Append alpha value to the list to used at the time of testing
215 |         alphas.append(alpha)
216 |         
217 |         #Append the weak learner to the list
218 |         weaks.append(ds)
219 |         
220 |     return weaks,alphas
221 | 
222 | def accuracy_metric(actual, predicted):
223 |     correct = 0
224 |     for i in range(len(actual)):
225 |         if actual[i] == predicted[i]:
226 |             correct += 1
227 |     return correct / float(len(actual)) * 100.0


--------------------------------------------------------------------------------
/Chapter05/AdaboostAlgorithmExample.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 18-Sep-2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | 
 7 | import numpy as np; 
 8 | from Chapter_05 import Adaboost as ad
 9 | 
10 | dataset = [[0.25000,  1.75000,  1.00000],
11 |            [1.25000,  1.75000, -1.00000],
12 |            [0.50000,  1.50000,  1.00000],
13 |            [1.00000,  0.50000, -1.00000],
14 |            [1.25000,  3.50000,  1.00000],
15 |            [1.50000,  4.00000,  1.00000],
16 |            [2.00000,  2.00000, -1.00000],
17 |            [2.50000,  2.50000,  1.00000],
18 |            [3.75000,  3.00000, -1.00000],
19 |            [4.00000,  1.00000, -1.00000]]
20 | 
21 | [weaks,alphas] = ad.AdaBoostAlgorithm(dataset,9)
22 | 
23 | prediction=[]
24 | actual = []
25 | for row in dataset:
26 |     preds = []
27 |     for i in range(len(weaks)):
28 |         p = alphas[i]*ad.predict(weaks[i], row)
29 |         #p = predict(weaks[i], row)
30 |         preds.append(p)
31 |     final = np.sign(sum(preds))
32 |     #final = max(set(preds), key=preds.count)    
33 |     prediction.append(final)
34 |     actual.append(row[-1])
35 |     print('Expected=%d, Got=%d' % (row[-1], final)) 
36 | 
37 | acc = ad.accuracy_metric(actual, prediction)
38 | print("accuracy: %.2f"%acc)


--------------------------------------------------------------------------------
/Chapter05/Data/bcancer.csv:
--------------------------------------------------------------------------------
  1 | 1000025,5,1,1,1,2,1,3,1,1,2
  2 | 1002945,5,4,4,5,7,10,3,2,1,2
  3 | 1015425,3,1,1,1,2,2,3,1,1,2
  4 | 1016277,6,8,8,1,3,4,3,7,1,2
  5 | 1017023,4,1,1,3,2,1,3,1,1,2
  6 | 1017122,8,10,10,8,7,10,9,7,1,4
  7 | 1018099,1,1,1,1,2,10,3,1,1,2
  8 | 1018561,2,1,2,1,2,1,3,1,1,2
  9 | 1033078,2,1,1,1,2,1,1,1,5,2
 10 | 1033078,4,2,1,1,2,1,2,1,1,2
 11 | 1035283,1,1,1,1,1,1,3,1,1,2
 12 | 1036172,2,1,1,1,2,1,2,1,1,2
 13 | 1041801,5,3,3,3,2,3,4,4,1,4
 14 | 1043999,1,1,1,1,2,3,3,1,1,2
 15 | 1044572,8,7,5,10,7,9,5,5,4,4
 16 | 1047630,7,4,6,4,6,1,4,3,1,4
 17 | 1048672,4,1,1,1,2,1,2,1,1,2
 18 | 1049815,4,1,1,1,2,1,3,1,1,2
 19 | 1050670,10,7,7,6,4,10,4,1,2,4
 20 | 1050718,6,1,1,1,2,1,3,1,1,2
 21 | 1054590,7,3,2,10,5,10,5,4,4,4
 22 | 1054593,10,5,5,3,6,7,7,10,1,4
 23 | 1056784,3,1,1,1,2,1,2,1,1,2
 24 | 1057013,8,4,5,1,2,?,7,3,1,4
 25 | 1059552,1,1,1,1,2,1,3,1,1,2
 26 | 1065726,5,2,3,4,2,7,3,6,1,4
 27 | 1066373,3,2,1,1,1,1,2,1,1,2
 28 | 1066979,5,1,1,1,2,1,2,1,1,2
 29 | 1067444,2,1,1,1,2,1,2,1,1,2
 30 | 1070935,1,1,3,1,2,1,1,1,1,2
 31 | 1070935,3,1,1,1,1,1,2,1,1,2
 32 | 1071760,2,1,1,1,2,1,3,1,1,2
 33 | 1072179,10,7,7,3,8,5,7,4,3,4
 34 | 1074610,2,1,1,2,2,1,3,1,1,2
 35 | 1075123,3,1,2,1,2,1,2,1,1,2
 36 | 1079304,2,1,1,1,2,1,2,1,1,2
 37 | 1080185,10,10,10,8,6,1,8,9,1,4
 38 | 1081791,6,2,1,1,1,1,7,1,1,2
 39 | 1084584,5,4,4,9,2,10,5,6,1,4
 40 | 1091262,2,5,3,3,6,7,7,5,1,4
 41 | 1096800,6,6,6,9,6,?,7,8,1,2
 42 | 1099510,10,4,3,1,3,3,6,5,2,4
 43 | 1100524,6,10,10,2,8,10,7,3,3,4
 44 | 1102573,5,6,5,6,10,1,3,1,1,4
 45 | 1103608,10,10,10,4,8,1,8,10,1,4
 46 | 1103722,1,1,1,1,2,1,2,1,2,2
 47 | 1105257,3,7,7,4,4,9,4,8,1,4
 48 | 1105524,1,1,1,1,2,1,2,1,1,2
 49 | 1106095,4,1,1,3,2,1,3,1,1,2
 50 | 1106829,7,8,7,2,4,8,3,8,2,4
 51 | 1108370,9,5,8,1,2,3,2,1,5,4
 52 | 1108449,5,3,3,4,2,4,3,4,1,4
 53 | 1110102,10,3,6,2,3,5,4,10,2,4
 54 | 1110503,5,5,5,8,10,8,7,3,7,4
 55 | 1110524,10,5,5,6,8,8,7,1,1,4
 56 | 1111249,10,6,6,3,4,5,3,6,1,4
 57 | 1112209,8,10,10,1,3,6,3,9,1,4
 58 | 1113038,8,2,4,1,5,1,5,4,4,4
 59 | 1113483,5,2,3,1,6,10,5,1,1,4
 60 | 1113906,9,5,5,2,2,2,5,1,1,4
 61 | 1115282,5,3,5,5,3,3,4,10,1,4
 62 | 1115293,1,1,1,1,2,2,2,1,1,2
 63 | 1116116,9,10,10,1,10,8,3,3,1,4
 64 | 1116132,6,3,4,1,5,2,3,9,1,4
 65 | 1116192,1,1,1,1,2,1,2,1,1,2
 66 | 1116998,10,4,2,1,3,2,4,3,10,4
 67 | 1117152,4,1,1,1,2,1,3,1,1,2
 68 | 1118039,5,3,4,1,8,10,4,9,1,4
 69 | 1120559,8,3,8,3,4,9,8,9,8,4
 70 | 1121732,1,1,1,1,2,1,3,2,1,2
 71 | 1121919,5,1,3,1,2,1,2,1,1,2
 72 | 1123061,6,10,2,8,10,2,7,8,10,4
 73 | 1124651,1,3,3,2,2,1,7,2,1,2
 74 | 1125035,9,4,5,10,6,10,4,8,1,4
 75 | 1126417,10,6,4,1,3,4,3,2,3,4
 76 | 1131294,1,1,2,1,2,2,4,2,1,2
 77 | 1132347,1,1,4,1,2,1,2,1,1,2
 78 | 1133041,5,3,1,2,2,1,2,1,1,2
 79 | 1133136,3,1,1,1,2,3,3,1,1,2
 80 | 1136142,2,1,1,1,3,1,2,1,1,2
 81 | 1137156,2,2,2,1,1,1,7,1,1,2
 82 | 1143978,4,1,1,2,2,1,2,1,1,2
 83 | 1143978,5,2,1,1,2,1,3,1,1,2
 84 | 1147044,3,1,1,1,2,2,7,1,1,2
 85 | 1147699,3,5,7,8,8,9,7,10,7,4
 86 | 1147748,5,10,6,1,10,4,4,10,10,4
 87 | 1148278,3,3,6,4,5,8,4,4,1,4
 88 | 1148873,3,6,6,6,5,10,6,8,3,4
 89 | 1152331,4,1,1,1,2,1,3,1,1,2
 90 | 1155546,2,1,1,2,3,1,2,1,1,2
 91 | 1156272,1,1,1,1,2,1,3,1,1,2
 92 | 1156948,3,1,1,2,2,1,1,1,1,2
 93 | 1157734,4,1,1,1,2,1,3,1,1,2
 94 | 1158247,1,1,1,1,2,1,2,1,1,2
 95 | 1160476,2,1,1,1,2,1,3,1,1,2
 96 | 1164066,1,1,1,1,2,1,3,1,1,2
 97 | 1165297,2,1,1,2,2,1,1,1,1,2
 98 | 1165790,5,1,1,1,2,1,3,1,1,2
 99 | 1165926,9,6,9,2,10,6,2,9,10,4
100 | 1166630,7,5,6,10,5,10,7,9,4,4
101 | 1166654,10,3,5,1,10,5,3,10,2,4
102 | 1167439,2,3,4,4,2,5,2,5,1,4
103 | 1167471,4,1,2,1,2,1,3,1,1,2
104 | 1168359,8,2,3,1,6,3,7,1,1,4
105 | 1168736,10,10,10,10,10,1,8,8,8,4
106 | 1169049,7,3,4,4,3,3,3,2,7,4
107 | 1170419,10,10,10,8,2,10,4,1,1,4
108 | 1170420,1,6,8,10,8,10,5,7,1,4
109 | 1171710,1,1,1,1,2,1,2,3,1,2
110 | 1171710,6,5,4,4,3,9,7,8,3,4
111 | 1171795,1,3,1,2,2,2,5,3,2,2
112 | 1171845,8,6,4,3,5,9,3,1,1,4
113 | 1172152,10,3,3,10,2,10,7,3,3,4
114 | 1173216,10,10,10,3,10,8,8,1,1,4
115 | 1173235,3,3,2,1,2,3,3,1,1,2
116 | 1173347,1,1,1,1,2,5,1,1,1,2
117 | 1173347,8,3,3,1,2,2,3,2,1,2
118 | 1173509,4,5,5,10,4,10,7,5,8,4
119 | 1173514,1,1,1,1,4,3,1,1,1,2
120 | 1173681,3,2,1,1,2,2,3,1,1,2
121 | 1174057,1,1,2,2,2,1,3,1,1,2
122 | 1174057,4,2,1,1,2,2,3,1,1,2
123 | 1174131,10,10,10,2,10,10,5,3,3,4
124 | 1174428,5,3,5,1,8,10,5,3,1,4
125 | 1175937,5,4,6,7,9,7,8,10,1,4
126 | 1176406,1,1,1,1,2,1,2,1,1,2
127 | 1176881,7,5,3,7,4,10,7,5,5,4
128 | 1177027,3,1,1,1,2,1,3,1,1,2
129 | 1177399,8,3,5,4,5,10,1,6,2,4
130 | 1177512,1,1,1,1,10,1,1,1,1,2
131 | 1178580,5,1,3,1,2,1,2,1,1,2
132 | 1179818,2,1,1,1,2,1,3,1,1,2
133 | 1180194,5,10,8,10,8,10,3,6,3,4
134 | 1180523,3,1,1,1,2,1,2,2,1,2
135 | 1180831,3,1,1,1,3,1,2,1,1,2
136 | 1181356,5,1,1,1,2,2,3,3,1,2
137 | 1182404,4,1,1,1,2,1,2,1,1,2
138 | 1182410,3,1,1,1,2,1,1,1,1,2
139 | 1183240,4,1,2,1,2,1,2,1,1,2
140 | 1183246,1,1,1,1,1,?,2,1,1,2
141 | 1183516,3,1,1,1,2,1,1,1,1,2
142 | 1183911,2,1,1,1,2,1,1,1,1,2
143 | 1183983,9,5,5,4,4,5,4,3,3,4
144 | 1184184,1,1,1,1,2,5,1,1,1,2
145 | 1184241,2,1,1,1,2,1,2,1,1,2
146 | 1184840,1,1,3,1,2,?,2,1,1,2
147 | 1185609,3,4,5,2,6,8,4,1,1,4
148 | 1185610,1,1,1,1,3,2,2,1,1,2
149 | 1187457,3,1,1,3,8,1,5,8,1,2
150 | 1187805,8,8,7,4,10,10,7,8,7,4
151 | 1188472,1,1,1,1,1,1,3,1,1,2
152 | 1189266,7,2,4,1,6,10,5,4,3,4
153 | 1189286,10,10,8,6,4,5,8,10,1,4
154 | 1190394,4,1,1,1,2,3,1,1,1,2
155 | 1190485,1,1,1,1,2,1,1,1,1,2
156 | 1192325,5,5,5,6,3,10,3,1,1,4
157 | 1193091,1,2,2,1,2,1,2,1,1,2
158 | 1193210,2,1,1,1,2,1,3,1,1,2
159 | 1193683,1,1,2,1,3,?,1,1,1,2
160 | 1196295,9,9,10,3,6,10,7,10,6,4
161 | 1196915,10,7,7,4,5,10,5,7,2,4
162 | 1197080,4,1,1,1,2,1,3,2,1,2
163 | 1197270,3,1,1,1,2,1,3,1,1,2
164 | 1197440,1,1,1,2,1,3,1,1,7,2
165 | 1197510,5,1,1,1,2,?,3,1,1,2
166 | 1197979,4,1,1,1,2,2,3,2,1,2
167 | 1197993,5,6,7,8,8,10,3,10,3,4
168 | 1198128,10,8,10,10,6,1,3,1,10,4
169 | 1198641,3,1,1,1,2,1,3,1,1,2
170 | 1199219,1,1,1,2,1,1,1,1,1,2
171 | 1199731,3,1,1,1,2,1,1,1,1,2
172 | 1199983,1,1,1,1,2,1,3,1,1,2
173 | 1200772,1,1,1,1,2,1,2,1,1,2
174 | 1200847,6,10,10,10,8,10,10,10,7,4
175 | 1200892,8,6,5,4,3,10,6,1,1,4
176 | 1200952,5,8,7,7,10,10,5,7,1,4
177 | 1201834,2,1,1,1,2,1,3,1,1,2
178 | 1201936,5,10,10,3,8,1,5,10,3,4
179 | 1202125,4,1,1,1,2,1,3,1,1,2
180 | 1202812,5,3,3,3,6,10,3,1,1,4
181 | 1203096,1,1,1,1,1,1,3,1,1,2
182 | 1204242,1,1,1,1,2,1,1,1,1,2
183 | 1204898,6,1,1,1,2,1,3,1,1,2
184 | 1205138,5,8,8,8,5,10,7,8,1,4
185 | 1205579,8,7,6,4,4,10,5,1,1,4
186 | 1206089,2,1,1,1,1,1,3,1,1,2
187 | 1206695,1,5,8,6,5,8,7,10,1,4
188 | 1206841,10,5,6,10,6,10,7,7,10,4
189 | 1207986,5,8,4,10,5,8,9,10,1,4
190 | 1208301,1,2,3,1,2,1,3,1,1,2
191 | 1210963,10,10,10,8,6,8,7,10,1,4
192 | 1211202,7,5,10,10,10,10,4,10,3,4
193 | 1212232,5,1,1,1,2,1,2,1,1,2
194 | 1212251,1,1,1,1,2,1,3,1,1,2
195 | 1212422,3,1,1,1,2,1,3,1,1,2
196 | 1212422,4,1,1,1,2,1,3,1,1,2
197 | 1213375,8,4,4,5,4,7,7,8,2,2
198 | 1213383,5,1,1,4,2,1,3,1,1,2
199 | 1214092,1,1,1,1,2,1,1,1,1,2
200 | 1214556,3,1,1,1,2,1,2,1,1,2
201 | 1214966,9,7,7,5,5,10,7,8,3,4
202 | 1216694,10,8,8,4,10,10,8,1,1,4
203 | 1216947,1,1,1,1,2,1,3,1,1,2
204 | 1217051,5,1,1,1,2,1,3,1,1,2
205 | 1217264,1,1,1,1,2,1,3,1,1,2
206 | 1218105,5,10,10,9,6,10,7,10,5,4
207 | 1218741,10,10,9,3,7,5,3,5,1,4
208 | 1218860,1,1,1,1,1,1,3,1,1,2
209 | 1218860,1,1,1,1,1,1,3,1,1,2
210 | 1219406,5,1,1,1,1,1,3,1,1,2
211 | 1219525,8,10,10,10,5,10,8,10,6,4
212 | 1219859,8,10,8,8,4,8,7,7,1,4
213 | 1220330,1,1,1,1,2,1,3,1,1,2
214 | 1221863,10,10,10,10,7,10,7,10,4,4
215 | 1222047,10,10,10,10,3,10,10,6,1,4
216 | 1222936,8,7,8,7,5,5,5,10,2,4
217 | 1223282,1,1,1,1,2,1,2,1,1,2
218 | 1223426,1,1,1,1,2,1,3,1,1,2
219 | 1223793,6,10,7,7,6,4,8,10,2,4
220 | 1223967,6,1,3,1,2,1,3,1,1,2
221 | 1224329,1,1,1,2,2,1,3,1,1,2
222 | 1225799,10,6,4,3,10,10,9,10,1,4
223 | 1226012,4,1,1,3,1,5,2,1,1,4
224 | 1226612,7,5,6,3,3,8,7,4,1,4
225 | 1227210,10,5,5,6,3,10,7,9,2,4
226 | 1227244,1,1,1,1,2,1,2,1,1,2
227 | 1227481,10,5,7,4,4,10,8,9,1,4
228 | 1228152,8,9,9,5,3,5,7,7,1,4
229 | 1228311,1,1,1,1,1,1,3,1,1,2
230 | 1230175,10,10,10,3,10,10,9,10,1,4
231 | 1230688,7,4,7,4,3,7,7,6,1,4
232 | 1231387,6,8,7,5,6,8,8,9,2,4
233 | 1231706,8,4,6,3,3,1,4,3,1,2
234 | 1232225,10,4,5,5,5,10,4,1,1,4
235 | 1236043,3,3,2,1,3,1,3,6,1,2
236 | 1241232,3,1,4,1,2,?,3,1,1,2
237 | 1241559,10,8,8,2,8,10,4,8,10,4
238 | 1241679,9,8,8,5,6,2,4,10,4,4
239 | 1242364,8,10,10,8,6,9,3,10,10,4
240 | 1243256,10,4,3,2,3,10,5,3,2,4
241 | 1270479,5,1,3,3,2,2,2,3,1,2
242 | 1276091,3,1,1,3,1,1,3,1,1,2
243 | 1277018,2,1,1,1,2,1,3,1,1,2
244 | 128059,1,1,1,1,2,5,5,1,1,2
245 | 1285531,1,1,1,1,2,1,3,1,1,2
246 | 1287775,5,1,1,2,2,2,3,1,1,2
247 | 144888,8,10,10,8,5,10,7,8,1,4
248 | 145447,8,4,4,1,2,9,3,3,1,4
249 | 167528,4,1,1,1,2,1,3,6,1,2
250 | 169356,3,1,1,1,2,?,3,1,1,2
251 | 183913,1,2,2,1,2,1,1,1,1,2
252 | 191250,10,4,4,10,2,10,5,3,3,4
253 | 1017023,6,3,3,5,3,10,3,5,3,2
254 | 1100524,6,10,10,2,8,10,7,3,3,4
255 | 1116116,9,10,10,1,10,8,3,3,1,4
256 | 1168736,5,6,6,2,4,10,3,6,1,4
257 | 1182404,3,1,1,1,2,1,1,1,1,2
258 | 1182404,3,1,1,1,2,1,2,1,1,2
259 | 1198641,3,1,1,1,2,1,3,1,1,2
260 | 242970,5,7,7,1,5,8,3,4,1,2
261 | 255644,10,5,8,10,3,10,5,1,3,4
262 | 263538,5,10,10,6,10,10,10,6,5,4
263 | 274137,8,8,9,4,5,10,7,8,1,4
264 | 303213,10,4,4,10,6,10,5,5,1,4
265 | 314428,7,9,4,10,10,3,5,3,3,4
266 | 1182404,5,1,4,1,2,1,3,2,1,2
267 | 1198641,10,10,6,3,3,10,4,3,2,4
268 | 320675,3,3,5,2,3,10,7,1,1,4
269 | 324427,10,8,8,2,3,4,8,7,8,4
270 | 385103,1,1,1,1,2,1,3,1,1,2
271 | 390840,8,4,7,1,3,10,3,9,2,4
272 | 411453,5,1,1,1,2,1,3,1,1,2
273 | 320675,3,3,5,2,3,10,7,1,1,4
274 | 428903,7,2,4,1,3,4,3,3,1,4
275 | 431495,3,1,1,1,2,1,3,2,1,2
276 | 432809,3,1,3,1,2,?,2,1,1,2
277 | 434518,3,1,1,1,2,1,2,1,1,2
278 | 452264,1,1,1,1,2,1,2,1,1,2
279 | 456282,1,1,1,1,2,1,3,1,1,2
280 | 476903,10,5,7,3,3,7,3,3,8,4
281 | 486283,3,1,1,1,2,1,3,1,1,2
282 | 486662,2,1,1,2,2,1,3,1,1,2
283 | 488173,1,4,3,10,4,10,5,6,1,4
284 | 492268,10,4,6,1,2,10,5,3,1,4
285 | 508234,7,4,5,10,2,10,3,8,2,4
286 | 527363,8,10,10,10,8,10,10,7,3,4
287 | 529329,10,10,10,10,10,10,4,10,10,4
288 | 535331,3,1,1,1,3,1,2,1,1,2
289 | 543558,6,1,3,1,4,5,5,10,1,4
290 | 555977,5,6,6,8,6,10,4,10,4,4
291 | 560680,1,1,1,1,2,1,1,1,1,2
292 | 561477,1,1,1,1,2,1,3,1,1,2
293 | 563649,8,8,8,1,2,?,6,10,1,4
294 | 601265,10,4,4,6,2,10,2,3,1,4
295 | 606140,1,1,1,1,2,?,2,1,1,2
296 | 606722,5,5,7,8,6,10,7,4,1,4
297 | 616240,5,3,4,3,4,5,4,7,1,2
298 | 61634,5,4,3,1,2,?,2,3,1,2
299 | 625201,8,2,1,1,5,1,1,1,1,2
300 | 63375,9,1,2,6,4,10,7,7,2,4
301 | 635844,8,4,10,5,4,4,7,10,1,4
302 | 636130,1,1,1,1,2,1,3,1,1,2
303 | 640744,10,10,10,7,9,10,7,10,10,4
304 | 646904,1,1,1,1,2,1,3,1,1,2
305 | 653777,8,3,4,9,3,10,3,3,1,4
306 | 659642,10,8,4,4,4,10,3,10,4,4
307 | 666090,1,1,1,1,2,1,3,1,1,2
308 | 666942,1,1,1,1,2,1,3,1,1,2
309 | 667204,7,8,7,6,4,3,8,8,4,4
310 | 673637,3,1,1,1,2,5,5,1,1,2
311 | 684955,2,1,1,1,3,1,2,1,1,2
312 | 688033,1,1,1,1,2,1,1,1,1,2
313 | 691628,8,6,4,10,10,1,3,5,1,4
314 | 693702,1,1,1,1,2,1,1,1,1,2
315 | 704097,1,1,1,1,1,1,2,1,1,2
316 | 704168,4,6,5,6,7,?,4,9,1,2
317 | 706426,5,5,5,2,5,10,4,3,1,4
318 | 709287,6,8,7,8,6,8,8,9,1,4
319 | 718641,1,1,1,1,5,1,3,1,1,2
320 | 721482,4,4,4,4,6,5,7,3,1,2
321 | 730881,7,6,3,2,5,10,7,4,6,4
322 | 733639,3,1,1,1,2,?,3,1,1,2
323 | 733639,3,1,1,1,2,1,3,1,1,2
324 | 733823,5,4,6,10,2,10,4,1,1,4
325 | 740492,1,1,1,1,2,1,3,1,1,2
326 | 743348,3,2,2,1,2,1,2,3,1,2
327 | 752904,10,1,1,1,2,10,5,4,1,4
328 | 756136,1,1,1,1,2,1,2,1,1,2
329 | 760001,8,10,3,2,6,4,3,10,1,4
330 | 760239,10,4,6,4,5,10,7,1,1,4
331 | 76389,10,4,7,2,2,8,6,1,1,4
332 | 764974,5,1,1,1,2,1,3,1,2,2
333 | 770066,5,2,2,2,2,1,2,2,1,2
334 | 785208,5,4,6,6,4,10,4,3,1,4
335 | 785615,8,6,7,3,3,10,3,4,2,4
336 | 792744,1,1,1,1,2,1,1,1,1,2
337 | 797327,6,5,5,8,4,10,3,4,1,4
338 | 798429,1,1,1,1,2,1,3,1,1,2
339 | 704097,1,1,1,1,1,1,2,1,1,2
340 | 806423,8,5,5,5,2,10,4,3,1,4
341 | 809912,10,3,3,1,2,10,7,6,1,4
342 | 810104,1,1,1,1,2,1,3,1,1,2
343 | 814265,2,1,1,1,2,1,1,1,1,2
344 | 814911,1,1,1,1,2,1,1,1,1,2
345 | 822829,7,6,4,8,10,10,9,5,3,4
346 | 826923,1,1,1,1,2,1,1,1,1,2
347 | 830690,5,2,2,2,3,1,1,3,1,2
348 | 831268,1,1,1,1,1,1,1,3,1,2
349 | 832226,3,4,4,10,5,1,3,3,1,4
350 | 832567,4,2,3,5,3,8,7,6,1,4
351 | 836433,5,1,1,3,2,1,1,1,1,2
352 | 837082,2,1,1,1,2,1,3,1,1,2
353 | 846832,3,4,5,3,7,3,4,6,1,2
354 | 850831,2,7,10,10,7,10,4,9,4,4
355 | 855524,1,1,1,1,2,1,2,1,1,2
356 | 857774,4,1,1,1,3,1,2,2,1,2
357 | 859164,5,3,3,1,3,3,3,3,3,4
358 | 859350,8,10,10,7,10,10,7,3,8,4
359 | 866325,8,10,5,3,8,4,4,10,3,4
360 | 873549,10,3,5,4,3,7,3,5,3,4
361 | 877291,6,10,10,10,10,10,8,10,10,4
362 | 877943,3,10,3,10,6,10,5,1,4,4
363 | 888169,3,2,2,1,4,3,2,1,1,2
364 | 888523,4,4,4,2,2,3,2,1,1,2
365 | 896404,2,1,1,1,2,1,3,1,1,2
366 | 897172,2,1,1,1,2,1,2,1,1,2
367 | 95719,6,10,10,10,8,10,7,10,7,4
368 | 160296,5,8,8,10,5,10,8,10,3,4
369 | 342245,1,1,3,1,2,1,1,1,1,2
370 | 428598,1,1,3,1,1,1,2,1,1,2
371 | 492561,4,3,2,1,3,1,2,1,1,2
372 | 493452,1,1,3,1,2,1,1,1,1,2
373 | 493452,4,1,2,1,2,1,2,1,1,2
374 | 521441,5,1,1,2,2,1,2,1,1,2
375 | 560680,3,1,2,1,2,1,2,1,1,2
376 | 636437,1,1,1,1,2,1,1,1,1,2
377 | 640712,1,1,1,1,2,1,2,1,1,2
378 | 654244,1,1,1,1,1,1,2,1,1,2
379 | 657753,3,1,1,4,3,1,2,2,1,2
380 | 685977,5,3,4,1,4,1,3,1,1,2
381 | 805448,1,1,1,1,2,1,1,1,1,2
382 | 846423,10,6,3,6,4,10,7,8,4,4
383 | 1002504,3,2,2,2,2,1,3,2,1,2
384 | 1022257,2,1,1,1,2,1,1,1,1,2
385 | 1026122,2,1,1,1,2,1,1,1,1,2
386 | 1071084,3,3,2,2,3,1,1,2,3,2
387 | 1080233,7,6,6,3,2,10,7,1,1,4
388 | 1114570,5,3,3,2,3,1,3,1,1,2
389 | 1114570,2,1,1,1,2,1,2,2,1,2
390 | 1116715,5,1,1,1,3,2,2,2,1,2
391 | 1131411,1,1,1,2,2,1,2,1,1,2
392 | 1151734,10,8,7,4,3,10,7,9,1,4
393 | 1156017,3,1,1,1,2,1,2,1,1,2
394 | 1158247,1,1,1,1,1,1,1,1,1,2
395 | 1158405,1,2,3,1,2,1,2,1,1,2
396 | 1168278,3,1,1,1,2,1,2,1,1,2
397 | 1176187,3,1,1,1,2,1,3,1,1,2
398 | 1196263,4,1,1,1,2,1,1,1,1,2
399 | 1196475,3,2,1,1,2,1,2,2,1,2
400 | 1206314,1,2,3,1,2,1,1,1,1,2
401 | 1211265,3,10,8,7,6,9,9,3,8,4
402 | 1213784,3,1,1,1,2,1,1,1,1,2
403 | 1223003,5,3,3,1,2,1,2,1,1,2
404 | 1223306,3,1,1,1,2,4,1,1,1,2
405 | 1223543,1,2,1,3,2,1,1,2,1,2
406 | 1229929,1,1,1,1,2,1,2,1,1,2
407 | 1231853,4,2,2,1,2,1,2,1,1,2
408 | 1234554,1,1,1,1,2,1,2,1,1,2
409 | 1236837,2,3,2,2,2,2,3,1,1,2
410 | 1237674,3,1,2,1,2,1,2,1,1,2
411 | 1238021,1,1,1,1,2,1,2,1,1,2
412 | 1238464,1,1,1,1,1,?,2,1,1,2
413 | 1238633,10,10,10,6,8,4,8,5,1,4
414 | 1238915,5,1,2,1,2,1,3,1,1,2
415 | 1238948,8,5,6,2,3,10,6,6,1,4
416 | 1239232,3,3,2,6,3,3,3,5,1,2
417 | 1239347,8,7,8,5,10,10,7,2,1,4
418 | 1239967,1,1,1,1,2,1,2,1,1,2
419 | 1240337,5,2,2,2,2,2,3,2,2,2
420 | 1253505,2,3,1,1,5,1,1,1,1,2
421 | 1255384,3,2,2,3,2,3,3,1,1,2
422 | 1257200,10,10,10,7,10,10,8,2,1,4
423 | 1257648,4,3,3,1,2,1,3,3,1,2
424 | 1257815,5,1,3,1,2,1,2,1,1,2
425 | 1257938,3,1,1,1,2,1,1,1,1,2
426 | 1258549,9,10,10,10,10,10,10,10,1,4
427 | 1258556,5,3,6,1,2,1,1,1,1,2
428 | 1266154,8,7,8,2,4,2,5,10,1,4
429 | 1272039,1,1,1,1,2,1,2,1,1,2
430 | 1276091,2,1,1,1,2,1,2,1,1,2
431 | 1276091,1,3,1,1,2,1,2,2,1,2
432 | 1276091,5,1,1,3,4,1,3,2,1,2
433 | 1277629,5,1,1,1,2,1,2,2,1,2
434 | 1293439,3,2,2,3,2,1,1,1,1,2
435 | 1293439,6,9,7,5,5,8,4,2,1,2
436 | 1294562,10,8,10,1,3,10,5,1,1,4
437 | 1295186,10,10,10,1,6,1,2,8,1,4
438 | 527337,4,1,1,1,2,1,1,1,1,2
439 | 558538,4,1,3,3,2,1,1,1,1,2
440 | 566509,5,1,1,1,2,1,1,1,1,2
441 | 608157,10,4,3,10,4,10,10,1,1,4
442 | 677910,5,2,2,4,2,4,1,1,1,2
443 | 734111,1,1,1,3,2,3,1,1,1,2
444 | 734111,1,1,1,1,2,2,1,1,1,2
445 | 780555,5,1,1,6,3,1,2,1,1,2
446 | 827627,2,1,1,1,2,1,1,1,1,2
447 | 1049837,1,1,1,1,2,1,1,1,1,2
448 | 1058849,5,1,1,1,2,1,1,1,1,2
449 | 1182404,1,1,1,1,1,1,1,1,1,2
450 | 1193544,5,7,9,8,6,10,8,10,1,4
451 | 1201870,4,1,1,3,1,1,2,1,1,2
452 | 1202253,5,1,1,1,2,1,1,1,1,2
453 | 1227081,3,1,1,3,2,1,1,1,1,2
454 | 1230994,4,5,5,8,6,10,10,7,1,4
455 | 1238410,2,3,1,1,3,1,1,1,1,2
456 | 1246562,10,2,2,1,2,6,1,1,2,4
457 | 1257470,10,6,5,8,5,10,8,6,1,4
458 | 1259008,8,8,9,6,6,3,10,10,1,4
459 | 1266124,5,1,2,1,2,1,1,1,1,2
460 | 1267898,5,1,3,1,2,1,1,1,1,2
461 | 1268313,5,1,1,3,2,1,1,1,1,2
462 | 1268804,3,1,1,1,2,5,1,1,1,2
463 | 1276091,6,1,1,3,2,1,1,1,1,2
464 | 1280258,4,1,1,1,2,1,1,2,1,2
465 | 1293966,4,1,1,1,2,1,1,1,1,2
466 | 1296572,10,9,8,7,6,4,7,10,3,4
467 | 1298416,10,6,6,2,4,10,9,7,1,4
468 | 1299596,6,6,6,5,4,10,7,6,2,4
469 | 1105524,4,1,1,1,2,1,1,1,1,2
470 | 1181685,1,1,2,1,2,1,2,1,1,2
471 | 1211594,3,1,1,1,1,1,2,1,1,2
472 | 1238777,6,1,1,3,2,1,1,1,1,2
473 | 1257608,6,1,1,1,1,1,1,1,1,2
474 | 1269574,4,1,1,1,2,1,1,1,1,2
475 | 1277145,5,1,1,1,2,1,1,1,1,2
476 | 1287282,3,1,1,1,2,1,1,1,1,2
477 | 1296025,4,1,2,1,2,1,1,1,1,2
478 | 1296263,4,1,1,1,2,1,1,1,1,2
479 | 1296593,5,2,1,1,2,1,1,1,1,2
480 | 1299161,4,8,7,10,4,10,7,5,1,4
481 | 1301945,5,1,1,1,1,1,1,1,1,2
482 | 1302428,5,3,2,4,2,1,1,1,1,2
483 | 1318169,9,10,10,10,10,5,10,10,10,4
484 | 474162,8,7,8,5,5,10,9,10,1,4
485 | 787451,5,1,2,1,2,1,1,1,1,2
486 | 1002025,1,1,1,3,1,3,1,1,1,2
487 | 1070522,3,1,1,1,1,1,2,1,1,2
488 | 1073960,10,10,10,10,6,10,8,1,5,4
489 | 1076352,3,6,4,10,3,3,3,4,1,4
490 | 1084139,6,3,2,1,3,4,4,1,1,4
491 | 1115293,1,1,1,1,2,1,1,1,1,2
492 | 1119189,5,8,9,4,3,10,7,1,1,4
493 | 1133991,4,1,1,1,1,1,2,1,1,2
494 | 1142706,5,10,10,10,6,10,6,5,2,4
495 | 1155967,5,1,2,10,4,5,2,1,1,2
496 | 1170945,3,1,1,1,1,1,2,1,1,2
497 | 1181567,1,1,1,1,1,1,1,1,1,2
498 | 1182404,4,2,1,1,2,1,1,1,1,2
499 | 1204558,4,1,1,1,2,1,2,1,1,2
500 | 1217952,4,1,1,1,2,1,2,1,1,2
501 | 1224565,6,1,1,1,2,1,3,1,1,2
502 | 1238186,4,1,1,1,2,1,2,1,1,2
503 | 1253917,4,1,1,2,2,1,2,1,1,2
504 | 1265899,4,1,1,1,2,1,3,1,1,2
505 | 1268766,1,1,1,1,2,1,1,1,1,2
506 | 1277268,3,3,1,1,2,1,1,1,1,2
507 | 1286943,8,10,10,10,7,5,4,8,7,4
508 | 1295508,1,1,1,1,2,4,1,1,1,2
509 | 1297327,5,1,1,1,2,1,1,1,1,2
510 | 1297522,2,1,1,1,2,1,1,1,1,2
511 | 1298360,1,1,1,1,2,1,1,1,1,2
512 | 1299924,5,1,1,1,2,1,2,1,1,2
513 | 1299994,5,1,1,1,2,1,1,1,1,2
514 | 1304595,3,1,1,1,1,1,2,1,1,2
515 | 1306282,6,6,7,10,3,10,8,10,2,4
516 | 1313325,4,10,4,7,3,10,9,10,1,4
517 | 1320077,1,1,1,1,1,1,1,1,1,2
518 | 1320077,1,1,1,1,1,1,2,1,1,2
519 | 1320304,3,1,2,2,2,1,1,1,1,2
520 | 1330439,4,7,8,3,4,10,9,1,1,4
521 | 333093,1,1,1,1,3,1,1,1,1,2
522 | 369565,4,1,1,1,3,1,1,1,1,2
523 | 412300,10,4,5,4,3,5,7,3,1,4
524 | 672113,7,5,6,10,4,10,5,3,1,4
525 | 749653,3,1,1,1,2,1,2,1,1,2
526 | 769612,3,1,1,2,2,1,1,1,1,2
527 | 769612,4,1,1,1,2,1,1,1,1,2
528 | 798429,4,1,1,1,2,1,3,1,1,2
529 | 807657,6,1,3,2,2,1,1,1,1,2
530 | 8233704,4,1,1,1,1,1,2,1,1,2
531 | 837480,7,4,4,3,4,10,6,9,1,4
532 | 867392,4,2,2,1,2,1,2,1,1,2
533 | 869828,1,1,1,1,1,1,3,1,1,2
534 | 1043068,3,1,1,1,2,1,2,1,1,2
535 | 1056171,2,1,1,1,2,1,2,1,1,2
536 | 1061990,1,1,3,2,2,1,3,1,1,2
537 | 1113061,5,1,1,1,2,1,3,1,1,2
538 | 1116192,5,1,2,1,2,1,3,1,1,2
539 | 1135090,4,1,1,1,2,1,2,1,1,2
540 | 1145420,6,1,1,1,2,1,2,1,1,2
541 | 1158157,5,1,1,1,2,2,2,1,1,2
542 | 1171578,3,1,1,1,2,1,1,1,1,2
543 | 1174841,5,3,1,1,2,1,1,1,1,2
544 | 1184586,4,1,1,1,2,1,2,1,1,2
545 | 1186936,2,1,3,2,2,1,2,1,1,2
546 | 1197527,5,1,1,1,2,1,2,1,1,2
547 | 1222464,6,10,10,10,4,10,7,10,1,4
548 | 1240603,2,1,1,1,1,1,1,1,1,2
549 | 1240603,3,1,1,1,1,1,1,1,1,2
550 | 1241035,7,8,3,7,4,5,7,8,2,4
551 | 1287971,3,1,1,1,2,1,2,1,1,2
552 | 1289391,1,1,1,1,2,1,3,1,1,2
553 | 1299924,3,2,2,2,2,1,4,2,1,2
554 | 1306339,4,4,2,1,2,5,2,1,2,2
555 | 1313658,3,1,1,1,2,1,1,1,1,2
556 | 1313982,4,3,1,1,2,1,4,8,1,2
557 | 1321264,5,2,2,2,1,1,2,1,1,2
558 | 1321321,5,1,1,3,2,1,1,1,1,2
559 | 1321348,2,1,1,1,2,1,2,1,1,2
560 | 1321931,5,1,1,1,2,1,2,1,1,2
561 | 1321942,5,1,1,1,2,1,3,1,1,2
562 | 1321942,5,1,1,1,2,1,3,1,1,2
563 | 1328331,1,1,1,1,2,1,3,1,1,2
564 | 1328755,3,1,1,1,2,1,2,1,1,2
565 | 1331405,4,1,1,1,2,1,3,2,1,2
566 | 1331412,5,7,10,10,5,10,10,10,1,4
567 | 1333104,3,1,2,1,2,1,3,1,1,2
568 | 1334071,4,1,1,1,2,3,2,1,1,2
569 | 1343068,8,4,4,1,6,10,2,5,2,4
570 | 1343374,10,10,8,10,6,5,10,3,1,4
571 | 1344121,8,10,4,4,8,10,8,2,1,4
572 | 142932,7,6,10,5,3,10,9,10,2,4
573 | 183936,3,1,1,1,2,1,2,1,1,2
574 | 324382,1,1,1,1,2,1,2,1,1,2
575 | 378275,10,9,7,3,4,2,7,7,1,4
576 | 385103,5,1,2,1,2,1,3,1,1,2
577 | 690557,5,1,1,1,2,1,2,1,1,2
578 | 695091,1,1,1,1,2,1,2,1,1,2
579 | 695219,1,1,1,1,2,1,2,1,1,2
580 | 824249,1,1,1,1,2,1,3,1,1,2
581 | 871549,5,1,2,1,2,1,2,1,1,2
582 | 878358,5,7,10,6,5,10,7,5,1,4
583 | 1107684,6,10,5,5,4,10,6,10,1,4
584 | 1115762,3,1,1,1,2,1,1,1,1,2
585 | 1217717,5,1,1,6,3,1,1,1,1,2
586 | 1239420,1,1,1,1,2,1,1,1,1,2
587 | 1254538,8,10,10,10,6,10,10,10,1,4
588 | 1261751,5,1,1,1,2,1,2,2,1,2
589 | 1268275,9,8,8,9,6,3,4,1,1,4
590 | 1272166,5,1,1,1,2,1,1,1,1,2
591 | 1294261,4,10,8,5,4,1,10,1,1,4
592 | 1295529,2,5,7,6,4,10,7,6,1,4
593 | 1298484,10,3,4,5,3,10,4,1,1,4
594 | 1311875,5,1,2,1,2,1,1,1,1,2
595 | 1315506,4,8,6,3,4,10,7,1,1,4
596 | 1320141,5,1,1,1,2,1,2,1,1,2
597 | 1325309,4,1,2,1,2,1,2,1,1,2
598 | 1333063,5,1,3,1,2,1,3,1,1,2
599 | 1333495,3,1,1,1,2,1,2,1,1,2
600 | 1334659,5,2,4,1,1,1,1,1,1,2
601 | 1336798,3,1,1,1,2,1,2,1,1,2
602 | 1344449,1,1,1,1,1,1,2,1,1,2
603 | 1350568,4,1,1,1,2,1,2,1,1,2
604 | 1352663,5,4,6,8,4,1,8,10,1,4
605 | 188336,5,3,2,8,5,10,8,1,2,4
606 | 352431,10,5,10,3,5,8,7,8,3,4
607 | 353098,4,1,1,2,2,1,1,1,1,2
608 | 411453,1,1,1,1,2,1,1,1,1,2
609 | 557583,5,10,10,10,10,10,10,1,1,4
610 | 636375,5,1,1,1,2,1,1,1,1,2
611 | 736150,10,4,3,10,3,10,7,1,2,4
612 | 803531,5,10,10,10,5,2,8,5,1,4
613 | 822829,8,10,10,10,6,10,10,10,10,4
614 | 1016634,2,3,1,1,2,1,2,1,1,2
615 | 1031608,2,1,1,1,1,1,2,1,1,2
616 | 1041043,4,1,3,1,2,1,2,1,1,2
617 | 1042252,3,1,1,1,2,1,2,1,1,2
618 | 1057067,1,1,1,1,1,?,1,1,1,2
619 | 1061990,4,1,1,1,2,1,2,1,1,2
620 | 1073836,5,1,1,1,2,1,2,1,1,2
621 | 1083817,3,1,1,1,2,1,2,1,1,2
622 | 1096352,6,3,3,3,3,2,6,1,1,2
623 | 1140597,7,1,2,3,2,1,2,1,1,2
624 | 1149548,1,1,1,1,2,1,1,1,1,2
625 | 1174009,5,1,1,2,1,1,2,1,1,2
626 | 1183596,3,1,3,1,3,4,1,1,1,2
627 | 1190386,4,6,6,5,7,6,7,7,3,4
628 | 1190546,2,1,1,1,2,5,1,1,1,2
629 | 1213273,2,1,1,1,2,1,1,1,1,2
630 | 1218982,4,1,1,1,2,1,1,1,1,2
631 | 1225382,6,2,3,1,2,1,1,1,1,2
632 | 1235807,5,1,1,1,2,1,2,1,1,2
633 | 1238777,1,1,1,1,2,1,1,1,1,2
634 | 1253955,8,7,4,4,5,3,5,10,1,4
635 | 1257366,3,1,1,1,2,1,1,1,1,2
636 | 1260659,3,1,4,1,2,1,1,1,1,2
637 | 1268952,10,10,7,8,7,1,10,10,3,4
638 | 1275807,4,2,4,3,2,2,2,1,1,2
639 | 1277792,4,1,1,1,2,1,1,1,1,2
640 | 1277792,5,1,1,3,2,1,1,1,1,2
641 | 1285722,4,1,1,3,2,1,1,1,1,2
642 | 1288608,3,1,1,1,2,1,2,1,1,2
643 | 1290203,3,1,1,1,2,1,2,1,1,2
644 | 1294413,1,1,1,1,2,1,1,1,1,2
645 | 1299596,2,1,1,1,2,1,1,1,1,2
646 | 1303489,3,1,1,1,2,1,2,1,1,2
647 | 1311033,1,2,2,1,2,1,1,1,1,2
648 | 1311108,1,1,1,3,2,1,1,1,1,2
649 | 1315807,5,10,10,10,10,2,10,10,10,4
650 | 1318671,3,1,1,1,2,1,2,1,1,2
651 | 1319609,3,1,1,2,3,4,1,1,1,2
652 | 1323477,1,2,1,3,2,1,2,1,1,2
653 | 1324572,5,1,1,1,2,1,2,2,1,2
654 | 1324681,4,1,1,1,2,1,2,1,1,2
655 | 1325159,3,1,1,1,2,1,3,1,1,2
656 | 1326892,3,1,1,1,2,1,2,1,1,2
657 | 1330361,5,1,1,1,2,1,2,1,1,2
658 | 1333877,5,4,5,1,8,1,3,6,1,2
659 | 1334015,7,8,8,7,3,10,7,2,3,4
660 | 1334667,1,1,1,1,2,1,1,1,1,2
661 | 1339781,1,1,1,1,2,1,2,1,1,2
662 | 1339781,4,1,1,1,2,1,3,1,1,2
663 | 13454352,1,1,3,1,2,1,2,1,1,2
664 | 1345452,1,1,3,1,2,1,2,1,1,2
665 | 1345593,3,1,1,3,2,1,2,1,1,2
666 | 1347749,1,1,1,1,2,1,1,1,1,2
667 | 1347943,5,2,2,2,2,1,1,1,2,2
668 | 1348851,3,1,1,1,2,1,3,1,1,2
669 | 1350319,5,7,4,1,6,1,7,10,3,4
670 | 1350423,5,10,10,8,5,5,7,10,1,4
671 | 1352848,3,10,7,8,5,8,7,4,1,4
672 | 1353092,3,2,1,2,2,1,3,1,1,2
673 | 1354840,2,1,1,1,2,1,3,1,1,2
674 | 1354840,5,3,2,1,3,1,1,1,1,2
675 | 1355260,1,1,1,1,2,1,2,1,1,2
676 | 1365075,4,1,4,1,2,1,1,1,1,2
677 | 1365328,1,1,2,1,2,1,2,1,1,2
678 | 1368267,5,1,1,1,2,1,1,1,1,2
679 | 1368273,1,1,1,1,2,1,1,1,1,2
680 | 1368882,2,1,1,1,2,1,1,1,1,2
681 | 1369821,10,10,10,10,5,10,10,10,7,4
682 | 1371026,5,10,10,10,4,10,5,6,3,4
683 | 1371920,5,1,1,1,2,1,3,2,1,2
684 | 466906,1,1,1,1,2,1,1,1,1,2
685 | 466906,1,1,1,1,2,1,1,1,1,2
686 | 534555,1,1,1,1,2,1,1,1,1,2
687 | 536708,1,1,1,1,2,1,1,1,1,2
688 | 566346,3,1,1,1,2,1,2,3,1,2
689 | 603148,4,1,1,1,2,1,1,1,1,2
690 | 654546,1,1,1,1,2,1,1,1,8,2
691 | 654546,1,1,1,3,2,1,1,1,1,2
692 | 695091,5,10,10,5,4,5,4,4,1,4
693 | 714039,3,1,1,1,2,1,1,1,1,2
694 | 763235,3,1,1,1,2,1,2,1,2,2
695 | 776715,3,1,1,1,3,2,1,1,1,2
696 | 841769,2,1,1,1,2,1,1,1,1,2
697 | 888820,5,10,10,3,7,3,8,10,2,4
698 | 897471,4,8,6,4,3,4,10,6,1,4
699 | 897471,4,8,8,5,4,5,10,4,1,4


--------------------------------------------------------------------------------
/Chapter05/Data/download.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter05/Data/download.jpg


--------------------------------------------------------------------------------
/Chapter06/RegressionTreeTest.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 03-Oct-2017
 3 |  
 4 | @author: DX
 5 | '''
 6 | import pprint 
 7 | from Chapter_06 import RegressionTrees as rg
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 |  
11 | #Create a Sine wave for demonstration of non-linearity
12 | 
13 | #Set the number of samples  
14 | N = 256
15 | 
16 | #Create time value
17 | ix = np.arange(N)
18 | 
19 | #Create the sine wave using the formula sin(2*pi*f)
20 | signal = np.sin(2*np.pi*ix/float(N/2))
21 | 
22 | #Combine both time and amplitude
23 | dataset = range(0,N)
24 | dataset = np.c_[ix,signal]
25 | dataset_ = dataset.copy()  
26 | 
27 | #Call Gradient boost
28 | weaks = rg.GradientBoost(dataset,5,1,100) 
29 |  
30 | prediction=[]
31 | actual = []
32 |   
33 | #Run a loop to extract each instance from the data set
34 | for row in dataset_:
35 |       
36 |     #Create a list to store predictions from different ckassifier for the test instance
37 |     preds = []
38 |       
39 |     #Feed the instance to different classifiers
40 |     for i in range(len(weaks)):
41 |           
42 |         #Multiply the predicted ouput with the alpha value of the classifier
43 |         p = rg.predict(weaks[i], row)
44 |           
45 |         #Add the weighted prediction to the list 
46 |         preds.append(p)
47 |       
48 |     #Sum up output of all the classifiers and take their sign as the prediction
49 |     final = (sum(preds))
50 |       
51 |     #Append the final output to the prediction list and actual ouput to the actual list    
52 |     prediction.append(final)
53 |     actual.append(row[-1])
54 | 
55 | #Append the error of the current configuration
56 | _,mse = rg.getResidual(actual, prediction)        
57 |       
58 |     
59 | #Lets Plot the error in each configuration    
60 | plt.figure()
61 | plt.plot(ix,signal,marker='*',markersize=8)
62 | plt.plot(ix,prediction,marker='+',markersize=8)
63 | plt.show()


--------------------------------------------------------------------------------
/Chapter06/RegressionTrees.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 03-Oct-2017
  3 | 
  4 | @author: DX
  5 | '''
  6 | import numpy as np
  7 | import sys
  8 | 
  9 | def terminalNodeReg(group):
 10 |     
 11 |     #Get all the target labels into the List
 12 |     class_values = [row[-1] for row in group]
 13 |     
 14 |     #Return the Mean value of the list 
 15 |     return np.mean(class_values)
 16 | 
 17 | # Calculate the SSE index for a split dataset
 18 | def SquaredError(groups):
 19 |     
 20 |     #Initialize the variable for SSE
 21 |     sse = 0.0 
 22 |     
 23 |     #Iterate for both the groups   
 24 |     for group in groups:
 25 |         size = len(group)
 26 |         
 27 |         #If length is 0 continue for the next group
 28 |         if size == 0:
 29 |             continue 
 30 |         
 31 |         #Take all the class values into a list
 32 |         class_values = [row[-1] for row in group]
 33 |         
 34 |         #Calculate SSE for the group       
 35 |         sse += np.sum((class_values-np.mean(class_values))**2)
 36 |     return sse
 37 | 
 38 | #Function to get new node
 39 | def getNode(dataset):
 40 |     
 41 |     #initialize variables to store error score, attribute index and split groups    
 42 |     winnerAttribute = sys.maxsize
 43 |     attributeValue = sys.maxsize
 44 |     errorScore = sys.maxsize
 45 |     leftGroup = None
 46 |     
 47 |     #Run loop to access each attribute and attribute values
 48 |     for index in range(len(dataset[0])-1):
 49 |         for row in dataset:
 50 |             
 51 |             #Get split for the attribute value
 52 |             groups = createSplit(index, row[index], dataset)
 53 |             
 54 |             #Calculate SSE for the group
 55 |             sse = SquaredError(groups)
 56 |             #print("SSE for the attribute %.2f's value %.2f is %.3f"%(index+1,row[index],sse))
 57 |             #If SSE is less than previous attribute's SSE return attribute value as Node
 58 |             if sse < errorScore:
 59 |                 winnerAttribute, attributeValue, errorScore, leftGroup = index, row[index], sse, groups
 60 |                 
 61 |     #Once done create a dictionary for node 
 62 |     node = {'attribute':winnerAttribute,'value':attributeValue,'groups':leftGroup}            
 63 |     return  node
 64 | 
 65 | #Create splits to test for node values
 66 | def createSplit(attribute,threshold,dataset):
 67 |     
 68 |     #Initialize two lists to store the sub sets
 69 |     lesser, greater = list(),list()
 70 |     
 71 |     #Loop through the attribute values and create sub set out of it
 72 |     for values in dataset:
 73 |         #Apply threshold
 74 |         if values[attribute]<=threshold:
 75 |             lesser.append(values)
 76 |         else:
 77 |             greater.append(values)
 78 |     return lesser,greater        
 79 | 
 80 | # Create child splits for a node or make terminal
 81 | def buildTreeReg(node, max_depth, min_size, depth):
 82 |     #Lets get groups information first.
 83 |     left, right = node['groups']
 84 |     del(node['groups'])
 85 |     # check if there are any element in the left and right group
 86 |     if not left or not right:
 87 |         #If there is no element in the groups call terminal Node
 88 |         combined = left+right
 89 |         node['left'] = terminalNodeReg(combined)
 90 |         node['right']= terminalNodeReg(combined)
 91 |         return
 92 |     # check if we have reached to maximum depth
 93 |     if depth >= max_depth:
 94 |         node['left']=terminalNodeReg(left)
 95 |         node['right'] = terminalNodeReg(right)
 96 |         return
 97 |     # if all okey lest start building tree for left side nodes
 98 |     # if minimum instances are done by the node stop further build 
 99 |     if len(left) <= min_size:
100 |         node['left'] = terminalNodeReg(left)
101 |         
102 |     else:
103 |         #Create new node under left side of the tree
104 |         node['left'] = getNode(left)        
105 |         #append node under the tree and increase depth by one.
106 |         buildTreeReg(node['left'], max_depth, min_size, depth+1) #recursion will take place in here
107 |         
108 |     
109 |     # Similar procedure for the right side nodes
110 |     if len(right) <= min_size:
111 |         node['right'] = terminalNodeReg(right)
112 |        
113 |     else:
114 |         node['right'] = getNode(right)        
115 |         buildTreeReg(node['right'], max_depth, min_size, depth+1)
116 | 
117 |     
118 | # Build a decision tree
119 | def build_tree(train, max_depth, min_size):
120 |     
121 |     #Add the root node to the tree
122 |     root = getNode(train)    
123 |     
124 |     #Start building the from the root's branches tree 
125 |     buildTreeReg(root, max_depth, min_size, 1)
126 |     return root
127 | 
128 | #Function to get prediction from input tree
129 | def predict(node, row):
130 |     
131 |     #Get the node value and check whether the attribute value is less than or equal.  
132 |     if row[node['attribute']] <= node['value']:
133 |         #If yes enter into left branch and check whether it has another node or the class value.
134 |         if isinstance(node['left'], dict):            
135 |             return predict(node['left'], row)#Recursion
136 |         else:
137 |             #If there is no node in the branch 
138 |             return node['left']
139 |     else:
140 |         if isinstance(node['right'], dict):
141 |             return predict(node['right'], row)
142 |         else:
143 |             return node['right']
144 | 
145 | def getResidual(actual,pred):
146 |     
147 |     #Create an empty list to store individual error of the instances
148 |     residual = []
149 |     
150 |     # Run a loop to get difference between output and prediction of each instance 
151 |     for i in range(len(actual)):
152 |         
153 |         #Get the difference and add the difference to the list of residuals
154 |         diff = (actual[i]-pred[i])
155 |         residual.append(diff)
156 |     
157 |     #Calculate the Sum of squared error between output and prediction
158 |     mse = np.sum(np.array(residual)**2)
159 |     return residual,mse
160 | 
161 | def GradientBoost(dataset,depth,mincount,iterations):
162 |     
163 |     dataset = np.array(dataset)
164 |     
165 |     #Create a list to add weak learners(decision stumps)
166 |     weaks = [] 
167 |     
168 |     #Lets run the loop for number of iteration(number of classifiers)
169 |     for itr in range(iterations):
170 |                 
171 |         #Create decision tree from the data-set 
172 |         ds = build_tree(dataset,depth,mincount)
173 |         
174 |         #Create a list to store the predictions of the decision stump    
175 |         pred=[]
176 |         
177 |         #Create a list to store actual outputs
178 |         actual = []
179 |         
180 |         #Let's predict output for each instance in the data set
181 |         for row in dataset:
182 |             actual.append(row[-1])
183 |             pred.append(predict(ds, row))
184 | 
185 |         #Here we will find out difference between predicted and actual output
186 |         residuals,error = getResidual(actual, pred)
187 |         
188 |         #Print the error status 
189 |         print("\nClassifier %i error is %.5f"%(itr,error))
190 |         
191 |         #Check for the convergence
192 |         if error<=0.00001:
193 |             break   
194 |         
195 |         #Replace the previous labels with the current differences(Residuals)
196 |         dataset[:,-1] = residuals
197 |         
198 |         #Append the weak learner to the list
199 |         weaks.append(ds)
200 |         
201 |     return weaks
202 | 
203 | def accuracy_metric(actual, predicted):
204 |     correct = 0
205 |     for i in range(len(actual)):
206 |         if actual[i] == predicted[i]:
207 |             correct += 1
208 |     return correct / float(len(actual)) * 100.0


--------------------------------------------------------------------------------
/Chapter07/Data/train_modified.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter07/Data/train_modified.csv


--------------------------------------------------------------------------------
/Chapter07/xgBoost.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 23-Oct-2017
 3 | 
 4 | @author: aii32199
 5 | '''
 6 | 
 7 | # First XGBoost model for Pima Indians dataset
 8 | 
 9 | #Load the required libraries
10 | #Numpy for reading the csv file
11 | from numpy import loadtxt
12 | 
13 | #Import XGBoost classifier 
14 | from xgboost import XGBClassifier
15 | 
16 | #We will use sklearn to divide our data set into training and test set
17 | from sklearn.model_selection import train_test_split
18 | 
19 | #We will use sklearn's accuracy metric to evaluate the performance of the trained model
20 | from sklearn.metrics import accuracy_score
21 | 
22 | #Let's load the dataset into the numpy array
23 | dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")
24 | 
25 | #split data into X (input variables)and y(output variable/Class)
26 | X = dataset[:,0:8]
27 | Y = dataset[:,8]
28 | 
29 | #Create training and test set with 33% data in test set and 66% for the training of the model
30 | seed = 7
31 | test_size = 0.33
32 | X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
33 | 
34 | #Train our first model on created training set
35 | model = XGBClassifier()
36 | model.fit(X_train, y_train)
37 | 
38 | #Lets see the prediction from the trained model
39 | y_pred = model.predict(X_test)
40 | 
41 | #Create a list of predictions for evaluation purpose
42 | predictions = [round(value) for value in y_pred]
43 | 
44 | #Evaluate predictions using accuracy metric
45 | accuracy = accuracy_score(y_test, predictions)
46 | 
47 | #Print the accuracy
48 | print("Accuracy of the trained model is: %.2f%%" % (accuracy * 100.0))
49 | 
50 | print(model)


--------------------------------------------------------------------------------
/Chapter07/xgboost_param_tune.py:
--------------------------------------------------------------------------------
  1 | # XGBoost on Otto dataset, Tune n_estimators and max_depth
  2 | from pandas import read_csv
  3 | from xgboost import XGBClassifier
  4 | from sklearn.model_selection import GridSearchCV
  5 | from sklearn.model_selection import StratifiedKFold
  6 | from sklearn.preprocessing import LabelEncoder
  7 | import matplotlib
  8 | matplotlib.use('Agg')
  9 | from matplotlib import pyplot
 10 | import numpy
 11 | 
 12 | # load data
 13 | data = read_csv('train.csv')
 14 | dataset = data.values
 15 | 
 16 | # split data into X and y
 17 | X = dataset[:,0:94]
 18 | y = dataset[:,94]
 19 | 
 20 | # encode string class values as integers
 21 | label_encoded_y = LabelEncoder().fit_transform(y)
 22 | 
 23 | # grid search
 24 | model = XGBClassifier()
 25 | n_estimators = [50, 100, 150, 200]
 26 | max_depth = [2, 4, 6, 8]
 27 | print(max_depth)
 28 | param_grid = dict(max_depth=max_depth, n_estimators=n_estimators)
 29 | kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
 30 | grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1)
 31 | grid_result = grid_search.fit(X, label_encoded_y)
 32 | 
 33 | # summarize results
 34 | print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
 35 | means = grid_result.cv_results_['mean_test_score']
 36 | stds = grid_result.cv_results_['std_test_score']
 37 | params = grid_result.cv_results_['params']
 38 | for mean, stdev, param in zip(means, stds, params):
 39 |     print("%f (%f) with: %r" % (mean, stdev, param))
 40 | 
 41 | # plot results
 42 | scores = numpy.array(means).reshape(len(max_depth), len(n_estimators))
 43 | for i, value in enumerate(max_depth):
 44 |     pyplot.plot(n_estimators, scores[i], label='depth: ' + str(value))
 45 | pyplot.legend()
 46 | pyplot.xlabel('n_estimators')
 47 | pyplot.ylabel('Log Loss')
 48 | pyplot.savefig('n_estimators_vs_max_depth.png')
 49 | 
 50 | #########################################
 51 | 
 52 | # XGBoost on Otto dataset, Tune n_estimators
 53 | 
 54 | # load data
 55 | data = read_csv('train.csv')
 56 | dataset = data.values
 57 | 
 58 | # split data into X and y
 59 | X = dataset[:,0:94]
 60 | y = dataset[:,94]
 61 | 
 62 | # encode string class values as integers
 63 | label_encoded_y = LabelEncoder().fit_transform(y)
 64 | 
 65 | # grid search
 66 | model = XGBClassifier()
 67 | n_estimators = range(50, 400, 50)
 68 | param_grid = dict(n_estimators=n_estimators)
 69 | kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
 70 | grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold)
 71 | grid_result = grid_search.fit(X, label_encoded_y)
 72 | 
 73 | # summarize results
 74 | print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
 75 | means = grid_result.cv_results_['mean_test_score']
 76 | stds = grid_result.cv_results_['std_test_score']
 77 | params = grid_result.cv_results_['params']
 78 | for mean, stdev, param in zip(means, stds, params):
 79 |     print("%f (%f) with: %r" % (mean, stdev, param))
 80 | 
 81 | # plot
 82 | pyplot.errorbar(n_estimators, means, yerr=stds)
 83 | pyplot.title("XGBoost n_estimators vs Log Loss")
 84 | pyplot.xlabel('n_estimators')
 85 | pyplot.ylabel('Log Loss')
 86 | pyplot.savefig('n_estimators.png')
 87 | 
 88 | #############################################
 89 | # XGBoost on Otto dataset, Tune max_depth
 90 | 
 91 | # load data
 92 | data = read_csv('train.csv')
 93 | dataset = data.values
 94 | 
 95 | # split data into X and y
 96 | X = dataset[:,0:94]
 97 | y = dataset[:,94]
 98 | 
 99 | # encode string class values as integers
100 | label_encoded_y = LabelEncoder().fit_transform(y)
101 | 
102 | # grid search
103 | model = XGBClassifier()
104 | max_depth = range(1, 11, 2)
105 | print(max_depth)
106 | param_grid = dict(max_depth=max_depth)
107 | kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
108 | grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1)
109 | grid_result = grid_search.fit(X, label_encoded_y)
110 | 
111 | # summarize results
112 | print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
113 | means = grid_result.cv_results_['mean_test_score']
114 | stds = grid_result.cv_results_['std_test_score']
115 | params = grid_result.cv_results_['params']
116 | for mean, stdev, param in zip(means, stds, params):
117 |     print("%f (%f) with: %r" % (mean, stdev, param))
118 | 
119 | # plot
120 | pyplot.errorbar(max_depth, means, yerr=stds)
121 | pyplot.title("XGBoost max_depth vs Log Loss")
122 | pyplot.xlabel('max_depth')
123 | pyplot.ylabel('Log Loss')
124 | pyplot.savefig('max_depth.png')


--------------------------------------------------------------------------------
/Chapter08/StackedGeneralization.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 27-Oct-2017
  3 | 
  4 | @author: aii32199
  5 | '''
  6 | # Test stacking on the sonar dataset
  7 | from random import seed
  8 | from random import randrange
  9 | from csv import reader
 10 | from math import sqrt
 11 | from math import exp
 12 |  
 13 | # Load a CSV file
 14 | def load_csv(filename):
 15 |     dataset = list()
 16 |     with open(filename, 'r') as file:
 17 |         csv_reader = reader(file)
 18 |         for row in csv_reader:
 19 |             if not row:
 20 |                 continue
 21 |             dataset.append(row)
 22 |     return dataset
 23 |  
 24 | # Convert string column to float
 25 | def str_column_to_float(dataset, column):
 26 |     for row in dataset:
 27 |         row[column] = float(row[column].strip())
 28 |  
 29 | # Convert string column to integer
 30 | def str_column_to_int(dataset, column):
 31 |     class_values = [row[column] for row in dataset]
 32 |     unique = set(class_values)
 33 |     lookup = dict()
 34 |     for i, value in enumerate(unique):
 35 |         lookup[value] = i
 36 |     for row in dataset:
 37 |         row[column] = lookup[row[column]]
 38 |     return lookup
 39 |  
 40 | # Split a dataset into k folds
 41 | def cross_validation_split(dataset, n_folds):
 42 |     dataset_split = list()
 43 |     dataset_copy = list(dataset)
 44 |     fold_size = int(len(dataset) / n_folds)
 45 |     for i in range(n_folds):
 46 |         fold = list()
 47 |         while len(fold) < fold_size:
 48 |             index = randrange(len(dataset_copy))
 49 |             fold.append(dataset_copy.pop(index))
 50 |         dataset_split.append(fold)
 51 |     return dataset_split
 52 |  
 53 | # Calculate accuracy percentage
 54 | def accuracy_metric(actual, predicted):
 55 |     correct = 0
 56 |     for i in range(len(actual)):
 57 |         if actual[i] == predicted[i]:
 58 |             correct += 1
 59 |     return correct / float(len(actual)) * 100.0
 60 |  
 61 | # Evaluate an algorithm using a cross validation split
 62 | def evaluate_algorithm(dataset, algorithm, n_folds, *args):
 63 |     folds = cross_validation_split(dataset, n_folds)
 64 |     scores = list()
 65 |     for fold in folds:
 66 |         train_set = list(folds)
 67 |         train_set.remove(fold)
 68 |         train_set = sum(train_set, [])
 69 |         test_set = list()
 70 |         for row in fold:
 71 |             row_copy = list(row)
 72 |             test_set.append(row_copy)
 73 |             row_copy[-1] = None
 74 |         predicted = algorithm(train_set, test_set, *args)
 75 |         actual = [row[-1] for row in fold]
 76 |         accuracy = accuracy_metric(actual, predicted)
 77 |         scores.append(accuracy)
 78 |     return scores
 79 |  
 80 | # Calculate the Euclidean distance between two vectors
 81 | def euclidean_distance(row1, row2):
 82 |     distance = 0.0
 83 |     for i in range(len(row1)-1):
 84 |         distance += (row1[i] - row2[i])**2
 85 |     return sqrt(distance)
 86 |  
 87 | # Locate neighbors for a new row
 88 | def get_neighbors(train, test_row, num_neighbors):
 89 |     distances = list()
 90 |     for train_row in train:
 91 |         dist = euclidean_distance(test_row, train_row)
 92 |         distances.append((train_row, dist))
 93 |     distances.sort(key=lambda tup: tup[1])
 94 |     neighbors = list()
 95 |     for i in range(num_neighbors):
 96 |         neighbors.append(distances[i][0])
 97 |     return neighbors
 98 |  
 99 | # Make a prediction with kNN
100 | def knn_predict(model, test_row, num_neighbors=2):
101 |     neighbors = get_neighbors(model, test_row, num_neighbors)
102 |     output_values = [row[-1] for row in neighbors]
103 |     prediction = max(set(output_values), key=output_values.count)
104 |     return prediction
105 |  
106 | # Prepare the kNN model
107 | def knn_model(train):
108 |     return train
109 |  
110 | # Make a prediction with weights
111 | def perceptron_predict(weights,row):
112 |     #Row is the input instance
113 |     
114 |     #We will consider first weight as the bias for simplyfied the calculations
115 |     activation = weights[0]
116 |     
117 |     #Now run a loop to multiply each attribute value of the instance with the weight 
118 |     #And add the result to the activation of previous attribute
119 |     for i in range(len(row)-1):
120 |         activation += weights[i + 1] * row[i]
121 |     
122 |     #Here we will return 1 if activation is a non negative value and zero in other case  
123 |     return 1.0 if activation >= 0.0 else 0.0
124 | 
125 | # Estimate Perceptron weights using stochastic gradient descent
126 | def perceptron_model(train, l_rate=0.01, n_epoch=5000):
127 |     
128 |     #Lets initialize the weights by 0
129 |     weights = [0.0 for i in range(len(train[0]))]
130 |     
131 |     #We will update the weights for given number of epoch
132 |     for epoch in range(n_epoch):
133 |         
134 |         #Extract each row from the training set
135 |         for row in train:
136 |             
137 |             #Predict the value for the instance
138 |             prediction = perceptron_predict(weights,row)
139 |             
140 |             #Calculate the difference(gradient) between actual and predicted value 
141 |             error = row[-1] - prediction
142 |             
143 |             #Update the bias value using given learning rate and error
144 |             weights[0] = weights[0] + l_rate * error
145 |             
146 |             #Update the weights for each attribute using learning rate 
147 |             for i in range(len(row)-1):
148 |                 weights[i + 1] = weights[i + 1] + l_rate * error * row[i]
149 |             
150 |     #Return the updated weights and biases
151 |     return weights
152 |  
153 | # Make a prediction with coefficients
154 | def logistic_regression_predict(model, row):
155 |     
156 |     #First weight of the model will be bias similar as Perceptron function     
157 |     yhat = model[0]
158 |     
159 |     #We will run a loop to multiply each attribute value with the corresponding weights
160 |     #This is similar to activation calculation in perceptron algorithm
161 |     for i in range(len(row)-1):
162 |         yhat += model[i + 1] * row[i]
163 |     
164 |     #Here we will apply logistic function on the linear combination of weights and attributes
165 |     #This is the place where linear and logistic regression differs  
166 |     return 1.0 / (1.0 + exp(-yhat))
167 |  
168 | # Estimate logistic regression coefficients using stochastic gradient descent
169 | def logistic_regression_model(train, l_rate=0.01, n_epoch=5000):
170 |     
171 |     #Initialize the weights with the zero values
172 |     coef = [0.0 for i in range(len(train[0]))]
173 |     
174 |     #Repeat the procedure for given number of epochs
175 |     for epoch in range(n_epoch):
176 |         
177 |         #Get prediction for each row and update weights based on error value
178 |         for row in train:
179 |             
180 |             #Predict y for the given x
181 |             yhat = logistic_regression_predict(coef, row)
182 |             
183 |             #Get the error value (gradient/slope/change)
184 |             error = row[-1] - yhat
185 |             
186 |             #Apply gradient descent here to update the weights and biases
187 |             #Update Bias first            
188 |             coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
189 |             
190 |             #Now update the Weights 
191 |             for i in range(len(row)-1):
192 |                 coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
193 |     #Return the trained weights and biases
194 |     return coef
195 |  
196 | # Make predictions with sub-models and construct a new stacked row
197 | def to_stacked_row(models, predict_list, row):
198 |     
199 |     #Let's Create an empty list to store predictions from sub models
200 |     stacked_row = list()
201 |     
202 |     #Run a loop to fetch stored models in the List
203 |     for i in range(len(models)):
204 |         
205 |         #Start prediction for each row by each model
206 |         prediction = predict_list[i](models[i], row)
207 |         
208 |         #Store the prediction in the list
209 |         stacked_row.append(prediction)
210 |     
211 |     #Append class values to the new row
212 |     stacked_row.append(row[-1])
213 |     
214 |     #Extend the old row aby adding stacked row 
215 |     return row[0:len(row)-1] + stacked_row
216 |  
217 | # Stacked Generalization Algorithm
218 | def stacking(train, test):
219 |     
220 |     #Let's define the sub model first
221 |     model_list = [knn_model, perceptron_model]
222 |     
223 |     #We will create a prediction list to create new row
224 |     predict_list = [knn_predict, perceptron_predict]
225 |     
226 |     #Create an empty list to store the trained models
227 |     models = list()
228 |     
229 |     #Lets train each sub model individually on the dataset
230 |     for i in range(len(model_list)):        
231 |         model = model_list[i](train)
232 |         models.append(model)
233 |     
234 |     #Create a new stacked data set from prediction of sub models
235 |     stacked_dataset = list()
236 |     for row in train:
237 |         
238 |         #Get new row 
239 |         stacked_row = to_stacked_row(models, predict_list, row)
240 |         
241 |         #Append it to new dataset
242 |         stacked_dataset.append(stacked_row)
243 |     
244 |     #We will train our final classifier on the stacked dataset
245 |     stacked_model = logistic_regression_model(stacked_dataset)
246 |     
247 |     #lets create a list of prediction of the stacked output
248 |     predictions = list()
249 |     
250 |     #Here we will combine all the classifier together to make stack of classifiers
251 |     for row in test:
252 |         
253 |         #Get new row from prediction of sub models
254 |         stacked_row = to_stacked_row(models, predict_list, row)
255 |         
256 |         #Append new row to the new dataset
257 |         stacked_dataset.append(stacked_row)
258 |         
259 |         #Classify the new row using final classifier
260 |         prediction = logistic_regression_predict(stacked_model, stacked_row)
261 |         
262 |         #As final classifier gives a continuous value round it to nearest integer
263 |         prediction = round(prediction)
264 |         
265 |         #Append the prediction to the final list of predictions
266 |         predictions.append(prediction)
267 |     return predictions
268 |  
269 | # Test stacking on the sonar dataset
270 | seed(1)
271 | 
272 | # load and prepare data
273 | filename = 'sonar.all-data.csv'
274 | dataset = load_csv(filename)
275 | 
276 | # convert string attributes to integers
277 | for i in range(len(dataset[0])-1):
278 |     str_column_to_float(dataset, i)
279 | 
280 | # convert class column to integers
281 | str_column_to_int(dataset, len(dataset[0])-1)
282 | n_folds = 5
283 | scores = evaluate_algorithm(dataset, stacking, n_folds)
284 | 
285 | print('Scores: %s' % scores)
286 | print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))


--------------------------------------------------------------------------------
/Chapter09/Data/graph_feat_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Ensemble-Machine-Learning/1fcc546f88517e35309a4f37ff8c252f9003c29f/Chapter09/Data/graph_feat_4.png


--------------------------------------------------------------------------------
/Chapter09/FeatureSelection_PCA.py:
--------------------------------------------------------------------------------
 1 | #Import the required packages
 2 | 
 3 | #Import pandas to read csv
 4 | import pandas
 5 | 
 6 | #Import numpy for array related operations
 7 | import numpy
 8 | 
 9 | #Import sklearn's PCA algorithm
10 | from sklearn.decomposition import PCA
11 | 
12 | #URL for loading the data set
13 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
14 | 
15 | #Define the attribute names
16 | names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
17 | 
18 | #Create pandas data frame by loading the data from URL
19 | dataframe = pandas.read_csv(url, names=names)
20 | 
21 | #Create array from data values 
22 | array = dataframe.values
23 | 
24 | #Split the data into input and target
25 | X = array[:,0:8]
26 | Y = array[:,8]
27 | 
28 | #Feature extraction
29 | pca = PCA(n_components=3)
30 | fit = pca.fit(X)
31 | 
32 | #Summarize components
33 | print("Explained Variance: %s" % fit.explained_variance_ratio_)
34 | print(fit.components_)


--------------------------------------------------------------------------------
/Chapter09/RF_feature_selection.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 28-Oct-2017
  3 | 
  4 | @author: DX
  5 | '''
  6 | #Import the supporting libraries
  7 | 
  8 | #Import pandas to load the data set from csv file
  9 | from pandas import read_csv
 10 | 
 11 | #Import numpy for array based operations and calculations 
 12 | import numpy as np 
 13 | 
 14 | #Import Random Forest classifier class from sklearn
 15 | from sklearn.ensemble import RandomForestClassifier
 16 | 
 17 | #Import feature selector class select model of sklearn
 18 | from sklearn.feature_selection import SelectFromModel
 19 | 
 20 | np.random.seed(1)
 21 | 
 22 | #Function to create Train and Test set from the original data set
 23 | def getTrainTestData(dataset,split):
 24 |     np.random.seed(0)
 25 |     training = []
 26 |     testing = []    
 27 |     
 28 |     np.random.shuffle(dataset)
 29 |     shape = np.shape(dataset)
 30 |     trainlength = np.uint16(np.floor(split*shape[0]))
 31 |     
 32 |     for i in range(trainlength):    
 33 |         training.append(dataset[i])
 34 |         
 35 |     for i in range(trainlength,shape[0]):    
 36 |         testing.append(dataset[i])
 37 |     training = np.array(training)
 38 |     testing = np.array(testing)
 39 |     return training,testing
 40 | 
 41 | #Function to evaluate model performance 
 42 | def getAccuracy(pre,ytest):
 43 |     count = 0
 44 |     for i in range(len(ytest)):
 45 |         if ytest[i]==pre[i]:
 46 |             count+=1
 47 |      
 48 |     acc = float(count)/len(ytest)
 49 |     return acc
 50 | 
 51 | 
 52 | #Load data set as pandas data frame
 53 | data = read_csv('train.csv')
 54 | 
 55 | #Extract attribute names from the data frame
 56 | feat = data.keys()
 57 | feat_labels = feat.get_values()
 58 | 
 59 | #Extract data values from the data frame
 60 | dataset = data.values
 61 | 
 62 | #Shuffle the data set
 63 | np.random.shuffle(dataset)
 64 | 
 65 | #We will select 10000 instances to train the classifier
 66 | inst = 50000
 67 | 
 68 | #Extract 10000 instances from the data set 
 69 | dataset = dataset[0:inst,:]
 70 | 
 71 | #Create Training and Testing data for performance evaluation
 72 | train,test = getTrainTestData(dataset, 0.7)
 73 | 
 74 | #Split data into input and output variable with selected features
 75 | Xtrain = train[:,0:94]
 76 | ytrain = train[:,94]
 77 | 
 78 | shape = np.shape(Xtrain)
 79 | print("Shape of the data set ",shape)
 80 | #Print the size of Data in MBs
 81 | print("Size of Data set before feature selection: %.2f MB"%(Xtrain.nbytes/1e6))
 82 | 
 83 | #Lets select the test data for model evaluation purpose
 84 | Xtest = test[:,0:94]
 85 | ytest = test[:,94]
 86 | 
 87 | #Create a random forest classifier with following Parameters
 88 | trees      = 250
 89 | max_feat   = 7   
 90 | max_depth  = 30
 91 | min_sample = 2
 92 | 
 93 | clf = RandomForestClassifier(n_estimators=trees,
 94 |                              max_features=max_feat,
 95 |                              max_depth=max_depth,
 96 |                              min_samples_split= min_sample,
 97 |                              random_state=0, 
 98 |                              n_jobs=-1)
 99 | 
100 | #Train the classifier and calculate the training time
101 | import time
102 | start = time.time()
103 | clf.fit(Xtrain, ytrain)
104 | end = time.time()
105 | 
106 | #Lets Note down the model training time
107 | print("Execution time for building the Tree is: %f"%(float(end)-float(start)))
108 | pre = clf.predict(Xtest)
109 | 
110 | #Evaluate the model performance for the test data
111 | acc = getAccuracy(pre, ytest)
112 | print("Accuracy of model before feature selection is %.2f"%(100*acc)) 
113 | 
114 | #Once we have trained the model we will rank all the features
115 | 
116 | for feature in zip(feat_labels, clf.feature_importances_):
117 |     print(feature)
118 | 
119 | #Select features which have higher contribution in the final prediction 
120 | sfm = SelectFromModel(clf, threshold=0.01)
121 | sfm.fit(Xtrain,ytrain)
122 | 
123 | #Transform input data set  
124 | Xtrain_1 = sfm.transform(Xtrain)
125 | Xtest_1  = sfm.transform(Xtest)
126 | 
127 | #Let's see the size and shape of new data set  
128 | print("Size of Data set before featre selection: %.2f MB"%(Xtrain_1.nbytes/1e6))
129 | shape = np.shape(Xtrain_1)
130 | print("Shape of the data set ",shape)
131 | 
132 | #Model training time
133 | start = time.time()
134 | clf.fit(Xtrain_1, ytrain)
135 | end = time.time()
136 | print("Execution time for building the Tree is: %f"%(float(end)-float(start)))
137 | 
138 | #Let's evaluate the model on test data
139 | pre = clf.predict(Xtest_1)
140 | count = 0
141 |  
142 | acc2 = getAccuracy(pre, ytest) 
143 | 
144 | print("accuracy after feature selection %.2f"%(100*acc2))


--------------------------------------------------------------------------------
/Chapter09/RecursiveFeatureElimination.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 02-Nov-2017
 3 | 
 4 | @author: aii32199
 5 | '''
 6 | 
 7 | #Import the required packages
 8 | 
 9 | #Import pandas to read csv
10 | import pandas
11 | 
12 | #Import numpy for array related operations
13 | import numpy
14 | 
15 | #Import sklearn's feature selection algorithm
16 | from sklearn.feature_selection import RFE
17 | 
18 | #Import LogisticRegression for performing chi square test
19 | from sklearn.linear_model import LogisticRegression
20 | 
21 | #URL for loading the data set
22 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
23 | 
24 | #Define the attribute names
25 | names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
26 | 
27 | #Create pandas data frame by loading the data from URL
28 | dataframe = pandas.read_csv(url, names=names)
29 | 
30 | #Create array from data values 
31 | array = dataframe.values
32 | 
33 | #Split the data into input and target
34 | X = array[:,0:8]
35 | Y = array[:,8]
36 | 
37 | #Feature extraction
38 | model = LogisticRegression()
39 | rfe = RFE(model, 3)
40 | fit = rfe.fit(X, Y)
41 | 
42 | print("Num Features: %d"% fit.n_features_)
43 | print("Selected Features: %s"% fit.support_)
44 | print("Feature Ranking: %s"% fit.ranking_)
45 | 


--------------------------------------------------------------------------------
/Chapter09/SVM_KernelTrick.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 04-Nov-2017
  3 | 
  4 | @author: DX
  5 | '''
  6 | #We will use sklearns make circle to create the data 
  7 | from sklearn.datasets import make_circles
  8 | 
  9 | #Numpy will help us for array related operations
 10 | import numpy as np
 11 | 
 12 | #We will use pylab for visualization of plots
 13 | import pylab as pl
 14 | 
 15 | #Import our SVM classifier from sklearn
 16 | from sklearn.svm import SVC
 17 | 
 18 | #Generate the data set using make_circle function
 19 | X, Y = make_circles(n_samples=800, noise=0.07, factor=0.4)
 20 | 
 21 | #Let's Plot the Point and see 
 22 | # print "...Showing dataset in new window..."
 23 | pl.figure(figsize=(10, 8))
 24 | pl.subplot(111)
 25 | pl.scatter(X[:, 0], X[:, 1], marker='o', c=Y)
 26 | # pl.show()
 27 | 
 28 | #Kernel to convert sub space of data
 29 | def fn_kernel(x1, x2):
 30 |     
 31 |     # Implements a kernel phi(x1,y1) = [x1, y1, x1^2 + y1^2]
 32 |     return np.array([x1, x2, x1**2.0 + x2**2.0])
 33 | 
 34 | #Create a list to store transformed points
 35 | transformed = []
 36 | 
 37 | #Transform each point to the new sub space
 38 | for points in X:
 39 |     transformed.append(fn_kernel(points[0], points[1]))
 40 | transformed = np.array(transformed)
 41 | 
 42 | #We will 3D plots to visualize data in higher dimension
 43 | from mpl_toolkits.mplot3d import Axes3D
 44 | 
 45 | #Import matplotlib to plot the data
 46 | import matplotlib.pyplot as plt
 47 | 
 48 | #Let's plot the original data first
 49 | fig = plt.figure(figsize=(20,8))
 50 | ax = fig.add_subplot(121)
 51 | ax.scatter(X[:, 0], X[:, 1], marker='o', c=Y)
 52 | ax.set_xlabel('X Label')
 53 | ax.set_ylabel('Y Label')
 54 | ax.set_title("Data in 2D (Non-separable)")
 55 | 
 56 | #Here we will plot the transformed data
 57 | ax = fig.add_subplot(122, projection='3d')
 58 | ax.scatter(transformed[:, 0], transformed[:, 1],transformed[:, 2], marker='o', c=Y)
 59 | ax.set_xlabel('X Label')
 60 | ax.set_ylabel('Y Label')
 61 | ax.set_zlabel('Z Label')
 62 | ax.set_title("Data in 3D (separable)")
 63 | 
 64 | #Finally show all the plots
 65 | plt.show()
 66 | 
 67 | #Function to create Train and Test set from the original data set
 68 | def getTrainTestData(dataset,split):
 69 |     np.random.seed(0)
 70 |     training = []
 71 |     testing = []    
 72 |     
 73 |     np.random.shuffle(dataset)
 74 |     shape = np.shape(dataset)
 75 |     trainlength = np.uint16(np.floor(split*shape[0]))
 76 |     
 77 |     for i in range(trainlength):    
 78 |         training.append(dataset[i])
 79 |         
 80 |     for i in range(trainlength,shape[0]):    
 81 |         testing.append(dataset[i])
 82 |     training = np.array(training)
 83 |     testing = np.array(testing)
 84 |     return training,testing
 85 | 
 86 | #Function to evaluate model performance 
 87 | def getAccuracy(pre,ytest):
 88 |     count = 0
 89 |     for i in range(len(ytest)):
 90 |         if ytest[i]==pre[i]:
 91 |             count+=1
 92 |      
 93 |     acc = float(count)/len(ytest)
 94 |     return acc
 95 | 
 96 | #Let's merge input and output variable to create train and test data 
 97 | dataset = np.c_[X,Y]
 98 | 
 99 | #We will use our train and test split function
100 | train,test = getTrainTestData(dataset, 0.7)
101 | 
102 | #Extract training input and output
103 | x_train = train[:,0:2]
104 | y_train = train[:,2]
105 | 
106 | #Extract testing input and output
107 | x_test = test[:,0:2]
108 | y_test = test[:,2]
109 | 
110 | #First we will train our classifier with linear kernel
111 | clf = SVC(kernel='linear')
112 | clf.fit(x_train,y_train)
113 | 
114 | #Predict the output on test set
115 | pred = clf.predict(x_test)
116 | acc = getAccuracy(pred, y_test)
117 | print("Accuracy of the classifier with linear kernel is %.2f"%(100*acc))
118 | 
119 | #Now we will train our classifier with RBF kernel
120 | clf = SVC(kernel='rbf',C=3.0)
121 | clf.fit(x_train,y_train)
122 | 
123 | #Predict the output on test set
124 | pred = clf.predict(x_test)
125 | acc = getAccuracy(pred, y_test)
126 | print("Accuracy of the classifier with rbf kernel is %.2f"%(100*acc))
127 | 
128 |     
129 | 


--------------------------------------------------------------------------------
/Chapter09/SVM_Test.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 03-Nov-2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | #To help us perform math operations
 7 | import numpy as np
 8 | #to plot our data and model visually
 9 | from matplotlib import pyplot as plt
10 | 
11 | #Step 1 - Define our data
12 | 
13 | #Input data - Of the form [X value, Y value, Bias term]
14 | X = np.array([
15 |     [-2,4,-1],
16 |     [4,1,-1],
17 |     [1, 6, -1],
18 |     [2, 4, -1],
19 |     [6, 2, -1],
20 | ])
21 | 
22 | #Associated output labels - First 2 examples are labeled '-1' and last 3 are labeled '+1'
23 | y = np.array([-1,-1,1,1,1])
24 | 
25 | #lets plot these examples on a 2D graph!
26 | #for each example
27 | for d, sample in enumerate(X):
28 |     # Plot the negative samples (the first 2)
29 |     if d < 2:
30 |         plt.scatter(sample[0], sample[1], s=120, marker='_', linewidths=2)
31 |     # Plot the positive samples (the last 3)
32 |     else:
33 |         plt.scatter(sample[0], sample[1], s=120, marker='+', linewidths=2)
34 | 
35 | # Print a possible hyperplane, that is seperating the two classes.
36 | #we'll two points and draw the line between them (naive guess)
37 | plt.plot([-2,6],[6,0.5])
38 | plt.show()
39 | 
40 | #lets perform stochastic gradient descent to learn the seperating hyperplane between both classes
41 | 
42 | def svm_sgd_plot(X, Y):
43 |     #Initialize our SVMs weight vector with zeros (3 values)
44 |     w = np.zeros(len(X[0]))
45 |     #The learning rate
46 |     eta = 1
47 |     #how many iterations to train for
48 |     epochs = 100000
49 |     #store misclassifications so we can plot how they change over time
50 |     errors = []
51 | 
52 |     #training part, gradient descent part
53 |     for epoch in range(1,epochs):
54 |         error = 0
55 |         for i, x in enumerate(X):
56 |             #misclassification
57 |             if (Y[i]*np.dot(X[i], w)) < 1:
58 |                 #misclassified update for ours weights
59 |                 w = w + eta * ( (X[i] * Y[i]) + (-2  *(1/epoch)* w) )
60 |                 error = 1
61 |             else:
62 |                 #correct classification, update our weights
63 |                 w = w + eta * (-2  *(1/epoch)* w)
64 |         errors.append(error)
65 |         
66 | 
67 |     #lets plot the rate of classification errors during training for our SVM
68 |     plt.plot(errors, '|')
69 |     plt.ylim(0.5,1.5)
70 |     plt.axes().set_yticklabels([])
71 |     plt.xlabel('Epoch')
72 |     plt.ylabel('Misclassified')
73 |     plt.show()
74 |     
75 |     return w
76 | 
77 | w = svm_sgd_plot(X,y)
78 | for d, sample in enumerate(X):
79 |     # Plot the negative samples
80 |     if d < 2:
81 |         plt.scatter(sample[0], sample[1], s=120, marker='_', linewidths=2)
82 |     # Plot the positive samples
83 |     else:
84 |         plt.scatter(sample[0], sample[1], s=120, marker='+', linewidths=2)
85 | 
86 | # Add our test samples
87 | plt.scatter(2,2, s=120, marker='_', linewidths=2, color='yellow')
88 | plt.scatter(4,3, s=120, marker='+', linewidths=2, color='blue')
89 | 
90 | # Print the hyperplane calculated by svm_sgd()
91 | x2=[w[0],w[1],-w[1],w[0]]
92 | x3=[w[0],w[1],w[1],-w[0]]
93 | 
94 | x2x3 =np.array([x2,x3])
95 | X,Y,U,V = zip(*x2x3)
96 | ax = plt.gca()
97 | ax.quiver(X,Y,U,V,scale=1, color='blue')
98 | plt.show()


--------------------------------------------------------------------------------
/Chapter09/UnivariateFeatureSelection.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 02-Nov-2017
 3 | 
 4 | @author: aii32199
 5 | '''
 6 | # Feature Extraction with Univariate Statistical Tests (Chi-squared for classification)
 7 | 
 8 | #Import the required packages
 9 | 
10 | #Import pandas to read csv
11 | import pandas
12 | 
13 | #Import numpy for array related operations
14 | import numpy
15 | 
16 | #Import sklearn's feature selection algorithm
17 | from sklearn.feature_selection import SelectKBest
18 | 
19 | #Import chi2 for performing chi square test
20 | from sklearn.feature_selection import chi2
21 | 
22 | #URL for loading the data set
23 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
24 | 
25 | #Define the attribute names
26 | names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
27 | 
28 | #Create pandas data frame by loading the data from URL
29 | dataframe = pandas.read_csv(url, names=names)
30 | 
31 | #Create array from data values 
32 | array = dataframe.values
33 | 
34 | #Split the data into input and target
35 | X = array[:,0:8]
36 | Y = array[:,8]
37 | 
38 | #We will select the features using chi square
39 | test = SelectKBest(score_func=chi2, k=4)
40 | 
41 | #Fit the function for ranking the features by score
42 | fit = test.fit(X, Y)
43 | 
44 | #Summarize scores
45 | numpy.set_printoptions(precision=3)
46 | print(fit.scores_)
47 | 
48 | #Apply the transformation on to data set 
49 | features = fit.transform(X)
50 | 
51 | #Summarize selected features
52 | print(features[0:5,:])


--------------------------------------------------------------------------------
/Chapter09/bcancer.csv:
--------------------------------------------------------------------------------
  1 | 1000025,5,1,1,1,2,1,3,1,1,2
  2 | 1002945,5,4,4,5,7,10,3,2,1,2
  3 | 1015425,3,1,1,1,2,2,3,1,1,2
  4 | 1016277,6,8,8,1,3,4,3,7,1,2
  5 | 1017023,4,1,1,3,2,1,3,1,1,2
  6 | 1017122,8,10,10,8,7,10,9,7,1,4
  7 | 1018099,1,1,1,1,2,10,3,1,1,2
  8 | 1018561,2,1,2,1,2,1,3,1,1,2
  9 | 1033078,2,1,1,1,2,1,1,1,5,2
 10 | 1033078,4,2,1,1,2,1,2,1,1,2
 11 | 1035283,1,1,1,1,1,1,3,1,1,2
 12 | 1036172,2,1,1,1,2,1,2,1,1,2
 13 | 1041801,5,3,3,3,2,3,4,4,1,4
 14 | 1043999,1,1,1,1,2,3,3,1,1,2
 15 | 1044572,8,7,5,10,7,9,5,5,4,4
 16 | 1047630,7,4,6,4,6,1,4,3,1,4
 17 | 1048672,4,1,1,1,2,1,2,1,1,2
 18 | 1049815,4,1,1,1,2,1,3,1,1,2
 19 | 1050670,10,7,7,6,4,10,4,1,2,4
 20 | 1050718,6,1,1,1,2,1,3,1,1,2
 21 | 1054590,7,3,2,10,5,10,5,4,4,4
 22 | 1054593,10,5,5,3,6,7,7,10,1,4
 23 | 1056784,3,1,1,1,2,1,2,1,1,2
 24 | 1057013,8,4,5,1,2,?,7,3,1,4
 25 | 1059552,1,1,1,1,2,1,3,1,1,2
 26 | 1065726,5,2,3,4,2,7,3,6,1,4
 27 | 1066373,3,2,1,1,1,1,2,1,1,2
 28 | 1066979,5,1,1,1,2,1,2,1,1,2
 29 | 1067444,2,1,1,1,2,1,2,1,1,2
 30 | 1070935,1,1,3,1,2,1,1,1,1,2
 31 | 1070935,3,1,1,1,1,1,2,1,1,2
 32 | 1071760,2,1,1,1,2,1,3,1,1,2
 33 | 1072179,10,7,7,3,8,5,7,4,3,4
 34 | 1074610,2,1,1,2,2,1,3,1,1,2
 35 | 1075123,3,1,2,1,2,1,2,1,1,2
 36 | 1079304,2,1,1,1,2,1,2,1,1,2
 37 | 1080185,10,10,10,8,6,1,8,9,1,4
 38 | 1081791,6,2,1,1,1,1,7,1,1,2
 39 | 1084584,5,4,4,9,2,10,5,6,1,4
 40 | 1091262,2,5,3,3,6,7,7,5,1,4
 41 | 1096800,6,6,6,9,6,?,7,8,1,2
 42 | 1099510,10,4,3,1,3,3,6,5,2,4
 43 | 1100524,6,10,10,2,8,10,7,3,3,4
 44 | 1102573,5,6,5,6,10,1,3,1,1,4
 45 | 1103608,10,10,10,4,8,1,8,10,1,4
 46 | 1103722,1,1,1,1,2,1,2,1,2,2
 47 | 1105257,3,7,7,4,4,9,4,8,1,4
 48 | 1105524,1,1,1,1,2,1,2,1,1,2
 49 | 1106095,4,1,1,3,2,1,3,1,1,2
 50 | 1106829,7,8,7,2,4,8,3,8,2,4
 51 | 1108370,9,5,8,1,2,3,2,1,5,4
 52 | 1108449,5,3,3,4,2,4,3,4,1,4
 53 | 1110102,10,3,6,2,3,5,4,10,2,4
 54 | 1110503,5,5,5,8,10,8,7,3,7,4
 55 | 1110524,10,5,5,6,8,8,7,1,1,4
 56 | 1111249,10,6,6,3,4,5,3,6,1,4
 57 | 1112209,8,10,10,1,3,6,3,9,1,4
 58 | 1113038,8,2,4,1,5,1,5,4,4,4
 59 | 1113483,5,2,3,1,6,10,5,1,1,4
 60 | 1113906,9,5,5,2,2,2,5,1,1,4
 61 | 1115282,5,3,5,5,3,3,4,10,1,4
 62 | 1115293,1,1,1,1,2,2,2,1,1,2
 63 | 1116116,9,10,10,1,10,8,3,3,1,4
 64 | 1116132,6,3,4,1,5,2,3,9,1,4
 65 | 1116192,1,1,1,1,2,1,2,1,1,2
 66 | 1116998,10,4,2,1,3,2,4,3,10,4
 67 | 1117152,4,1,1,1,2,1,3,1,1,2
 68 | 1118039,5,3,4,1,8,10,4,9,1,4
 69 | 1120559,8,3,8,3,4,9,8,9,8,4
 70 | 1121732,1,1,1,1,2,1,3,2,1,2
 71 | 1121919,5,1,3,1,2,1,2,1,1,2
 72 | 1123061,6,10,2,8,10,2,7,8,10,4
 73 | 1124651,1,3,3,2,2,1,7,2,1,2
 74 | 1125035,9,4,5,10,6,10,4,8,1,4
 75 | 1126417,10,6,4,1,3,4,3,2,3,4
 76 | 1131294,1,1,2,1,2,2,4,2,1,2
 77 | 1132347,1,1,4,1,2,1,2,1,1,2
 78 | 1133041,5,3,1,2,2,1,2,1,1,2
 79 | 1133136,3,1,1,1,2,3,3,1,1,2
 80 | 1136142,2,1,1,1,3,1,2,1,1,2
 81 | 1137156,2,2,2,1,1,1,7,1,1,2
 82 | 1143978,4,1,1,2,2,1,2,1,1,2
 83 | 1143978,5,2,1,1,2,1,3,1,1,2
 84 | 1147044,3,1,1,1,2,2,7,1,1,2
 85 | 1147699,3,5,7,8,8,9,7,10,7,4
 86 | 1147748,5,10,6,1,10,4,4,10,10,4
 87 | 1148278,3,3,6,4,5,8,4,4,1,4
 88 | 1148873,3,6,6,6,5,10,6,8,3,4
 89 | 1152331,4,1,1,1,2,1,3,1,1,2
 90 | 1155546,2,1,1,2,3,1,2,1,1,2
 91 | 1156272,1,1,1,1,2,1,3,1,1,2
 92 | 1156948,3,1,1,2,2,1,1,1,1,2
 93 | 1157734,4,1,1,1,2,1,3,1,1,2
 94 | 1158247,1,1,1,1,2,1,2,1,1,2
 95 | 1160476,2,1,1,1,2,1,3,1,1,2
 96 | 1164066,1,1,1,1,2,1,3,1,1,2
 97 | 1165297,2,1,1,2,2,1,1,1,1,2
 98 | 1165790,5,1,1,1,2,1,3,1,1,2
 99 | 1165926,9,6,9,2,10,6,2,9,10,4
100 | 1166630,7,5,6,10,5,10,7,9,4,4
101 | 1166654,10,3,5,1,10,5,3,10,2,4
102 | 1167439,2,3,4,4,2,5,2,5,1,4
103 | 1167471,4,1,2,1,2,1,3,1,1,2
104 | 1168359,8,2,3,1,6,3,7,1,1,4
105 | 1168736,10,10,10,10,10,1,8,8,8,4
106 | 1169049,7,3,4,4,3,3,3,2,7,4
107 | 1170419,10,10,10,8,2,10,4,1,1,4
108 | 1170420,1,6,8,10,8,10,5,7,1,4
109 | 1171710,1,1,1,1,2,1,2,3,1,2
110 | 1171710,6,5,4,4,3,9,7,8,3,4
111 | 1171795,1,3,1,2,2,2,5,3,2,2
112 | 1171845,8,6,4,3,5,9,3,1,1,4
113 | 1172152,10,3,3,10,2,10,7,3,3,4
114 | 1173216,10,10,10,3,10,8,8,1,1,4
115 | 1173235,3,3,2,1,2,3,3,1,1,2
116 | 1173347,1,1,1,1,2,5,1,1,1,2
117 | 1173347,8,3,3,1,2,2,3,2,1,2
118 | 1173509,4,5,5,10,4,10,7,5,8,4
119 | 1173514,1,1,1,1,4,3,1,1,1,2
120 | 1173681,3,2,1,1,2,2,3,1,1,2
121 | 1174057,1,1,2,2,2,1,3,1,1,2
122 | 1174057,4,2,1,1,2,2,3,1,1,2
123 | 1174131,10,10,10,2,10,10,5,3,3,4
124 | 1174428,5,3,5,1,8,10,5,3,1,4
125 | 1175937,5,4,6,7,9,7,8,10,1,4
126 | 1176406,1,1,1,1,2,1,2,1,1,2
127 | 1176881,7,5,3,7,4,10,7,5,5,4
128 | 1177027,3,1,1,1,2,1,3,1,1,2
129 | 1177399,8,3,5,4,5,10,1,6,2,4
130 | 1177512,1,1,1,1,10,1,1,1,1,2
131 | 1178580,5,1,3,1,2,1,2,1,1,2
132 | 1179818,2,1,1,1,2,1,3,1,1,2
133 | 1180194,5,10,8,10,8,10,3,6,3,4
134 | 1180523,3,1,1,1,2,1,2,2,1,2
135 | 1180831,3,1,1,1,3,1,2,1,1,2
136 | 1181356,5,1,1,1,2,2,3,3,1,2
137 | 1182404,4,1,1,1,2,1,2,1,1,2
138 | 1182410,3,1,1,1,2,1,1,1,1,2
139 | 1183240,4,1,2,1,2,1,2,1,1,2
140 | 1183246,1,1,1,1,1,?,2,1,1,2
141 | 1183516,3,1,1,1,2,1,1,1,1,2
142 | 1183911,2,1,1,1,2,1,1,1,1,2
143 | 1183983,9,5,5,4,4,5,4,3,3,4
144 | 1184184,1,1,1,1,2,5,1,1,1,2
145 | 1184241,2,1,1,1,2,1,2,1,1,2
146 | 1184840,1,1,3,1,2,?,2,1,1,2
147 | 1185609,3,4,5,2,6,8,4,1,1,4
148 | 1185610,1,1,1,1,3,2,2,1,1,2
149 | 1187457,3,1,1,3,8,1,5,8,1,2
150 | 1187805,8,8,7,4,10,10,7,8,7,4
151 | 1188472,1,1,1,1,1,1,3,1,1,2
152 | 1189266,7,2,4,1,6,10,5,4,3,4
153 | 1189286,10,10,8,6,4,5,8,10,1,4
154 | 1190394,4,1,1,1,2,3,1,1,1,2
155 | 1190485,1,1,1,1,2,1,1,1,1,2
156 | 1192325,5,5,5,6,3,10,3,1,1,4
157 | 1193091,1,2,2,1,2,1,2,1,1,2
158 | 1193210,2,1,1,1,2,1,3,1,1,2
159 | 1193683,1,1,2,1,3,?,1,1,1,2
160 | 1196295,9,9,10,3,6,10,7,10,6,4
161 | 1196915,10,7,7,4,5,10,5,7,2,4
162 | 1197080,4,1,1,1,2,1,3,2,1,2
163 | 1197270,3,1,1,1,2,1,3,1,1,2
164 | 1197440,1,1,1,2,1,3,1,1,7,2
165 | 1197510,5,1,1,1,2,?,3,1,1,2
166 | 1197979,4,1,1,1,2,2,3,2,1,2
167 | 1197993,5,6,7,8,8,10,3,10,3,4
168 | 1198128,10,8,10,10,6,1,3,1,10,4
169 | 1198641,3,1,1,1,2,1,3,1,1,2
170 | 1199219,1,1,1,2,1,1,1,1,1,2
171 | 1199731,3,1,1,1,2,1,1,1,1,2
172 | 1199983,1,1,1,1,2,1,3,1,1,2
173 | 1200772,1,1,1,1,2,1,2,1,1,2
174 | 1200847,6,10,10,10,8,10,10,10,7,4
175 | 1200892,8,6,5,4,3,10,6,1,1,4
176 | 1200952,5,8,7,7,10,10,5,7,1,4
177 | 1201834,2,1,1,1,2,1,3,1,1,2
178 | 1201936,5,10,10,3,8,1,5,10,3,4
179 | 1202125,4,1,1,1,2,1,3,1,1,2
180 | 1202812,5,3,3,3,6,10,3,1,1,4
181 | 1203096,1,1,1,1,1,1,3,1,1,2
182 | 1204242,1,1,1,1,2,1,1,1,1,2
183 | 1204898,6,1,1,1,2,1,3,1,1,2
184 | 1205138,5,8,8,8,5,10,7,8,1,4
185 | 1205579,8,7,6,4,4,10,5,1,1,4
186 | 1206089,2,1,1,1,1,1,3,1,1,2
187 | 1206695,1,5,8,6,5,8,7,10,1,4
188 | 1206841,10,5,6,10,6,10,7,7,10,4
189 | 1207986,5,8,4,10,5,8,9,10,1,4
190 | 1208301,1,2,3,1,2,1,3,1,1,2
191 | 1210963,10,10,10,8,6,8,7,10,1,4
192 | 1211202,7,5,10,10,10,10,4,10,3,4
193 | 1212232,5,1,1,1,2,1,2,1,1,2
194 | 1212251,1,1,1,1,2,1,3,1,1,2
195 | 1212422,3,1,1,1,2,1,3,1,1,2
196 | 1212422,4,1,1,1,2,1,3,1,1,2
197 | 1213375,8,4,4,5,4,7,7,8,2,2
198 | 1213383,5,1,1,4,2,1,3,1,1,2
199 | 1214092,1,1,1,1,2,1,1,1,1,2
200 | 1214556,3,1,1,1,2,1,2,1,1,2
201 | 1214966,9,7,7,5,5,10,7,8,3,4
202 | 1216694,10,8,8,4,10,10,8,1,1,4
203 | 1216947,1,1,1,1,2,1,3,1,1,2
204 | 1217051,5,1,1,1,2,1,3,1,1,2
205 | 1217264,1,1,1,1,2,1,3,1,1,2
206 | 1218105,5,10,10,9,6,10,7,10,5,4
207 | 1218741,10,10,9,3,7,5,3,5,1,4
208 | 1218860,1,1,1,1,1,1,3,1,1,2
209 | 1218860,1,1,1,1,1,1,3,1,1,2
210 | 1219406,5,1,1,1,1,1,3,1,1,2
211 | 1219525,8,10,10,10,5,10,8,10,6,4
212 | 1219859,8,10,8,8,4,8,7,7,1,4
213 | 1220330,1,1,1,1,2,1,3,1,1,2
214 | 1221863,10,10,10,10,7,10,7,10,4,4
215 | 1222047,10,10,10,10,3,10,10,6,1,4
216 | 1222936,8,7,8,7,5,5,5,10,2,4
217 | 1223282,1,1,1,1,2,1,2,1,1,2
218 | 1223426,1,1,1,1,2,1,3,1,1,2
219 | 1223793,6,10,7,7,6,4,8,10,2,4
220 | 1223967,6,1,3,1,2,1,3,1,1,2
221 | 1224329,1,1,1,2,2,1,3,1,1,2
222 | 1225799,10,6,4,3,10,10,9,10,1,4
223 | 1226012,4,1,1,3,1,5,2,1,1,4
224 | 1226612,7,5,6,3,3,8,7,4,1,4
225 | 1227210,10,5,5,6,3,10,7,9,2,4
226 | 1227244,1,1,1,1,2,1,2,1,1,2
227 | 1227481,10,5,7,4,4,10,8,9,1,4
228 | 1228152,8,9,9,5,3,5,7,7,1,4
229 | 1228311,1,1,1,1,1,1,3,1,1,2
230 | 1230175,10,10,10,3,10,10,9,10,1,4
231 | 1230688,7,4,7,4,3,7,7,6,1,4
232 | 1231387,6,8,7,5,6,8,8,9,2,4
233 | 1231706,8,4,6,3,3,1,4,3,1,2
234 | 1232225,10,4,5,5,5,10,4,1,1,4
235 | 1236043,3,3,2,1,3,1,3,6,1,2
236 | 1241232,3,1,4,1,2,?,3,1,1,2
237 | 1241559,10,8,8,2,8,10,4,8,10,4
238 | 1241679,9,8,8,5,6,2,4,10,4,4
239 | 1242364,8,10,10,8,6,9,3,10,10,4
240 | 1243256,10,4,3,2,3,10,5,3,2,4
241 | 1270479,5,1,3,3,2,2,2,3,1,2
242 | 1276091,3,1,1,3,1,1,3,1,1,2
243 | 1277018,2,1,1,1,2,1,3,1,1,2
244 | 128059,1,1,1,1,2,5,5,1,1,2
245 | 1285531,1,1,1,1,2,1,3,1,1,2
246 | 1287775,5,1,1,2,2,2,3,1,1,2
247 | 144888,8,10,10,8,5,10,7,8,1,4
248 | 145447,8,4,4,1,2,9,3,3,1,4
249 | 167528,4,1,1,1,2,1,3,6,1,2
250 | 169356,3,1,1,1,2,?,3,1,1,2
251 | 183913,1,2,2,1,2,1,1,1,1,2
252 | 191250,10,4,4,10,2,10,5,3,3,4
253 | 1017023,6,3,3,5,3,10,3,5,3,2
254 | 1100524,6,10,10,2,8,10,7,3,3,4
255 | 1116116,9,10,10,1,10,8,3,3,1,4
256 | 1168736,5,6,6,2,4,10,3,6,1,4
257 | 1182404,3,1,1,1,2,1,1,1,1,2
258 | 1182404,3,1,1,1,2,1,2,1,1,2
259 | 1198641,3,1,1,1,2,1,3,1,1,2
260 | 242970,5,7,7,1,5,8,3,4,1,2
261 | 255644,10,5,8,10,3,10,5,1,3,4
262 | 263538,5,10,10,6,10,10,10,6,5,4
263 | 274137,8,8,9,4,5,10,7,8,1,4
264 | 303213,10,4,4,10,6,10,5,5,1,4
265 | 314428,7,9,4,10,10,3,5,3,3,4
266 | 1182404,5,1,4,1,2,1,3,2,1,2
267 | 1198641,10,10,6,3,3,10,4,3,2,4
268 | 320675,3,3,5,2,3,10,7,1,1,4
269 | 324427,10,8,8,2,3,4,8,7,8,4
270 | 385103,1,1,1,1,2,1,3,1,1,2
271 | 390840,8,4,7,1,3,10,3,9,2,4
272 | 411453,5,1,1,1,2,1,3,1,1,2
273 | 320675,3,3,5,2,3,10,7,1,1,4
274 | 428903,7,2,4,1,3,4,3,3,1,4
275 | 431495,3,1,1,1,2,1,3,2,1,2
276 | 432809,3,1,3,1,2,?,2,1,1,2
277 | 434518,3,1,1,1,2,1,2,1,1,2
278 | 452264,1,1,1,1,2,1,2,1,1,2
279 | 456282,1,1,1,1,2,1,3,1,1,2
280 | 476903,10,5,7,3,3,7,3,3,8,4
281 | 486283,3,1,1,1,2,1,3,1,1,2
282 | 486662,2,1,1,2,2,1,3,1,1,2
283 | 488173,1,4,3,10,4,10,5,6,1,4
284 | 492268,10,4,6,1,2,10,5,3,1,4
285 | 508234,7,4,5,10,2,10,3,8,2,4
286 | 527363,8,10,10,10,8,10,10,7,3,4
287 | 529329,10,10,10,10,10,10,4,10,10,4
288 | 535331,3,1,1,1,3,1,2,1,1,2
289 | 543558,6,1,3,1,4,5,5,10,1,4
290 | 555977,5,6,6,8,6,10,4,10,4,4
291 | 560680,1,1,1,1,2,1,1,1,1,2
292 | 561477,1,1,1,1,2,1,3,1,1,2
293 | 563649,8,8,8,1,2,?,6,10,1,4
294 | 601265,10,4,4,6,2,10,2,3,1,4
295 | 606140,1,1,1,1,2,?,2,1,1,2
296 | 606722,5,5,7,8,6,10,7,4,1,4
297 | 616240,5,3,4,3,4,5,4,7,1,2
298 | 61634,5,4,3,1,2,?,2,3,1,2
299 | 625201,8,2,1,1,5,1,1,1,1,2
300 | 63375,9,1,2,6,4,10,7,7,2,4
301 | 635844,8,4,10,5,4,4,7,10,1,4
302 | 636130,1,1,1,1,2,1,3,1,1,2
303 | 640744,10,10,10,7,9,10,7,10,10,4
304 | 646904,1,1,1,1,2,1,3,1,1,2
305 | 653777,8,3,4,9,3,10,3,3,1,4
306 | 659642,10,8,4,4,4,10,3,10,4,4
307 | 666090,1,1,1,1,2,1,3,1,1,2
308 | 666942,1,1,1,1,2,1,3,1,1,2
309 | 667204,7,8,7,6,4,3,8,8,4,4
310 | 673637,3,1,1,1,2,5,5,1,1,2
311 | 684955,2,1,1,1,3,1,2,1,1,2
312 | 688033,1,1,1,1,2,1,1,1,1,2
313 | 691628,8,6,4,10,10,1,3,5,1,4
314 | 693702,1,1,1,1,2,1,1,1,1,2
315 | 704097,1,1,1,1,1,1,2,1,1,2
316 | 704168,4,6,5,6,7,?,4,9,1,2
317 | 706426,5,5,5,2,5,10,4,3,1,4
318 | 709287,6,8,7,8,6,8,8,9,1,4
319 | 718641,1,1,1,1,5,1,3,1,1,2
320 | 721482,4,4,4,4,6,5,7,3,1,2
321 | 730881,7,6,3,2,5,10,7,4,6,4
322 | 733639,3,1,1,1,2,?,3,1,1,2
323 | 733639,3,1,1,1,2,1,3,1,1,2
324 | 733823,5,4,6,10,2,10,4,1,1,4
325 | 740492,1,1,1,1,2,1,3,1,1,2
326 | 743348,3,2,2,1,2,1,2,3,1,2
327 | 752904,10,1,1,1,2,10,5,4,1,4
328 | 756136,1,1,1,1,2,1,2,1,1,2
329 | 760001,8,10,3,2,6,4,3,10,1,4
330 | 760239,10,4,6,4,5,10,7,1,1,4
331 | 76389,10,4,7,2,2,8,6,1,1,4
332 | 764974,5,1,1,1,2,1,3,1,2,2
333 | 770066,5,2,2,2,2,1,2,2,1,2
334 | 785208,5,4,6,6,4,10,4,3,1,4
335 | 785615,8,6,7,3,3,10,3,4,2,4
336 | 792744,1,1,1,1,2,1,1,1,1,2
337 | 797327,6,5,5,8,4,10,3,4,1,4
338 | 798429,1,1,1,1,2,1,3,1,1,2
339 | 704097,1,1,1,1,1,1,2,1,1,2
340 | 806423,8,5,5,5,2,10,4,3,1,4
341 | 809912,10,3,3,1,2,10,7,6,1,4
342 | 810104,1,1,1,1,2,1,3,1,1,2
343 | 814265,2,1,1,1,2,1,1,1,1,2
344 | 814911,1,1,1,1,2,1,1,1,1,2
345 | 822829,7,6,4,8,10,10,9,5,3,4
346 | 826923,1,1,1,1,2,1,1,1,1,2
347 | 830690,5,2,2,2,3,1,1,3,1,2
348 | 831268,1,1,1,1,1,1,1,3,1,2
349 | 832226,3,4,4,10,5,1,3,3,1,4
350 | 832567,4,2,3,5,3,8,7,6,1,4
351 | 836433,5,1,1,3,2,1,1,1,1,2
352 | 837082,2,1,1,1,2,1,3,1,1,2
353 | 846832,3,4,5,3,7,3,4,6,1,2
354 | 850831,2,7,10,10,7,10,4,9,4,4
355 | 855524,1,1,1,1,2,1,2,1,1,2
356 | 857774,4,1,1,1,3,1,2,2,1,2
357 | 859164,5,3,3,1,3,3,3,3,3,4
358 | 859350,8,10,10,7,10,10,7,3,8,4
359 | 866325,8,10,5,3,8,4,4,10,3,4
360 | 873549,10,3,5,4,3,7,3,5,3,4
361 | 877291,6,10,10,10,10,10,8,10,10,4
362 | 877943,3,10,3,10,6,10,5,1,4,4
363 | 888169,3,2,2,1,4,3,2,1,1,2
364 | 888523,4,4,4,2,2,3,2,1,1,2
365 | 896404,2,1,1,1,2,1,3,1,1,2
366 | 897172,2,1,1,1,2,1,2,1,1,2
367 | 95719,6,10,10,10,8,10,7,10,7,4
368 | 160296,5,8,8,10,5,10,8,10,3,4
369 | 342245,1,1,3,1,2,1,1,1,1,2
370 | 428598,1,1,3,1,1,1,2,1,1,2
371 | 492561,4,3,2,1,3,1,2,1,1,2
372 | 493452,1,1,3,1,2,1,1,1,1,2
373 | 493452,4,1,2,1,2,1,2,1,1,2
374 | 521441,5,1,1,2,2,1,2,1,1,2
375 | 560680,3,1,2,1,2,1,2,1,1,2
376 | 636437,1,1,1,1,2,1,1,1,1,2
377 | 640712,1,1,1,1,2,1,2,1,1,2
378 | 654244,1,1,1,1,1,1,2,1,1,2
379 | 657753,3,1,1,4,3,1,2,2,1,2
380 | 685977,5,3,4,1,4,1,3,1,1,2
381 | 805448,1,1,1,1,2,1,1,1,1,2
382 | 846423,10,6,3,6,4,10,7,8,4,4
383 | 1002504,3,2,2,2,2,1,3,2,1,2
384 | 1022257,2,1,1,1,2,1,1,1,1,2
385 | 1026122,2,1,1,1,2,1,1,1,1,2
386 | 1071084,3,3,2,2,3,1,1,2,3,2
387 | 1080233,7,6,6,3,2,10,7,1,1,4
388 | 1114570,5,3,3,2,3,1,3,1,1,2
389 | 1114570,2,1,1,1,2,1,2,2,1,2
390 | 1116715,5,1,1,1,3,2,2,2,1,2
391 | 1131411,1,1,1,2,2,1,2,1,1,2
392 | 1151734,10,8,7,4,3,10,7,9,1,4
393 | 1156017,3,1,1,1,2,1,2,1,1,2
394 | 1158247,1,1,1,1,1,1,1,1,1,2
395 | 1158405,1,2,3,1,2,1,2,1,1,2
396 | 1168278,3,1,1,1,2,1,2,1,1,2
397 | 1176187,3,1,1,1,2,1,3,1,1,2
398 | 1196263,4,1,1,1,2,1,1,1,1,2
399 | 1196475,3,2,1,1,2,1,2,2,1,2
400 | 1206314,1,2,3,1,2,1,1,1,1,2
401 | 1211265,3,10,8,7,6,9,9,3,8,4
402 | 1213784,3,1,1,1,2,1,1,1,1,2
403 | 1223003,5,3,3,1,2,1,2,1,1,2
404 | 1223306,3,1,1,1,2,4,1,1,1,2
405 | 1223543,1,2,1,3,2,1,1,2,1,2
406 | 1229929,1,1,1,1,2,1,2,1,1,2
407 | 1231853,4,2,2,1,2,1,2,1,1,2
408 | 1234554,1,1,1,1,2,1,2,1,1,2
409 | 1236837,2,3,2,2,2,2,3,1,1,2
410 | 1237674,3,1,2,1,2,1,2,1,1,2
411 | 1238021,1,1,1,1,2,1,2,1,1,2
412 | 1238464,1,1,1,1,1,?,2,1,1,2
413 | 1238633,10,10,10,6,8,4,8,5,1,4
414 | 1238915,5,1,2,1,2,1,3,1,1,2
415 | 1238948,8,5,6,2,3,10,6,6,1,4
416 | 1239232,3,3,2,6,3,3,3,5,1,2
417 | 1239347,8,7,8,5,10,10,7,2,1,4
418 | 1239967,1,1,1,1,2,1,2,1,1,2
419 | 1240337,5,2,2,2,2,2,3,2,2,2
420 | 1253505,2,3,1,1,5,1,1,1,1,2
421 | 1255384,3,2,2,3,2,3,3,1,1,2
422 | 1257200,10,10,10,7,10,10,8,2,1,4
423 | 1257648,4,3,3,1,2,1,3,3,1,2
424 | 1257815,5,1,3,1,2,1,2,1,1,2
425 | 1257938,3,1,1,1,2,1,1,1,1,2
426 | 1258549,9,10,10,10,10,10,10,10,1,4
427 | 1258556,5,3,6,1,2,1,1,1,1,2
428 | 1266154,8,7,8,2,4,2,5,10,1,4
429 | 1272039,1,1,1,1,2,1,2,1,1,2
430 | 1276091,2,1,1,1,2,1,2,1,1,2
431 | 1276091,1,3,1,1,2,1,2,2,1,2
432 | 1276091,5,1,1,3,4,1,3,2,1,2
433 | 1277629,5,1,1,1,2,1,2,2,1,2
434 | 1293439,3,2,2,3,2,1,1,1,1,2
435 | 1293439,6,9,7,5,5,8,4,2,1,2
436 | 1294562,10,8,10,1,3,10,5,1,1,4
437 | 1295186,10,10,10,1,6,1,2,8,1,4
438 | 527337,4,1,1,1,2,1,1,1,1,2
439 | 558538,4,1,3,3,2,1,1,1,1,2
440 | 566509,5,1,1,1,2,1,1,1,1,2
441 | 608157,10,4,3,10,4,10,10,1,1,4
442 | 677910,5,2,2,4,2,4,1,1,1,2
443 | 734111,1,1,1,3,2,3,1,1,1,2
444 | 734111,1,1,1,1,2,2,1,1,1,2
445 | 780555,5,1,1,6,3,1,2,1,1,2
446 | 827627,2,1,1,1,2,1,1,1,1,2
447 | 1049837,1,1,1,1,2,1,1,1,1,2
448 | 1058849,5,1,1,1,2,1,1,1,1,2
449 | 1182404,1,1,1,1,1,1,1,1,1,2
450 | 1193544,5,7,9,8,6,10,8,10,1,4
451 | 1201870,4,1,1,3,1,1,2,1,1,2
452 | 1202253,5,1,1,1,2,1,1,1,1,2
453 | 1227081,3,1,1,3,2,1,1,1,1,2
454 | 1230994,4,5,5,8,6,10,10,7,1,4
455 | 1238410,2,3,1,1,3,1,1,1,1,2
456 | 1246562,10,2,2,1,2,6,1,1,2,4
457 | 1257470,10,6,5,8,5,10,8,6,1,4
458 | 1259008,8,8,9,6,6,3,10,10,1,4
459 | 1266124,5,1,2,1,2,1,1,1,1,2
460 | 1267898,5,1,3,1,2,1,1,1,1,2
461 | 1268313,5,1,1,3,2,1,1,1,1,2
462 | 1268804,3,1,1,1,2,5,1,1,1,2
463 | 1276091,6,1,1,3,2,1,1,1,1,2
464 | 1280258,4,1,1,1,2,1,1,2,1,2
465 | 1293966,4,1,1,1,2,1,1,1,1,2
466 | 1296572,10,9,8,7,6,4,7,10,3,4
467 | 1298416,10,6,6,2,4,10,9,7,1,4
468 | 1299596,6,6,6,5,4,10,7,6,2,4
469 | 1105524,4,1,1,1,2,1,1,1,1,2
470 | 1181685,1,1,2,1,2,1,2,1,1,2
471 | 1211594,3,1,1,1,1,1,2,1,1,2
472 | 1238777,6,1,1,3,2,1,1,1,1,2
473 | 1257608,6,1,1,1,1,1,1,1,1,2
474 | 1269574,4,1,1,1,2,1,1,1,1,2
475 | 1277145,5,1,1,1,2,1,1,1,1,2
476 | 1287282,3,1,1,1,2,1,1,1,1,2
477 | 1296025,4,1,2,1,2,1,1,1,1,2
478 | 1296263,4,1,1,1,2,1,1,1,1,2
479 | 1296593,5,2,1,1,2,1,1,1,1,2
480 | 1299161,4,8,7,10,4,10,7,5,1,4
481 | 1301945,5,1,1,1,1,1,1,1,1,2
482 | 1302428,5,3,2,4,2,1,1,1,1,2
483 | 1318169,9,10,10,10,10,5,10,10,10,4
484 | 474162,8,7,8,5,5,10,9,10,1,4
485 | 787451,5,1,2,1,2,1,1,1,1,2
486 | 1002025,1,1,1,3,1,3,1,1,1,2
487 | 1070522,3,1,1,1,1,1,2,1,1,2
488 | 1073960,10,10,10,10,6,10,8,1,5,4
489 | 1076352,3,6,4,10,3,3,3,4,1,4
490 | 1084139,6,3,2,1,3,4,4,1,1,4
491 | 1115293,1,1,1,1,2,1,1,1,1,2
492 | 1119189,5,8,9,4,3,10,7,1,1,4
493 | 1133991,4,1,1,1,1,1,2,1,1,2
494 | 1142706,5,10,10,10,6,10,6,5,2,4
495 | 1155967,5,1,2,10,4,5,2,1,1,2
496 | 1170945,3,1,1,1,1,1,2,1,1,2
497 | 1181567,1,1,1,1,1,1,1,1,1,2
498 | 1182404,4,2,1,1,2,1,1,1,1,2
499 | 1204558,4,1,1,1,2,1,2,1,1,2
500 | 1217952,4,1,1,1,2,1,2,1,1,2
501 | 1224565,6,1,1,1,2,1,3,1,1,2
502 | 1238186,4,1,1,1,2,1,2,1,1,2
503 | 1253917,4,1,1,2,2,1,2,1,1,2
504 | 1265899,4,1,1,1,2,1,3,1,1,2
505 | 1268766,1,1,1,1,2,1,1,1,1,2
506 | 1277268,3,3,1,1,2,1,1,1,1,2
507 | 1286943,8,10,10,10,7,5,4,8,7,4
508 | 1295508,1,1,1,1,2,4,1,1,1,2
509 | 1297327,5,1,1,1,2,1,1,1,1,2
510 | 1297522,2,1,1,1,2,1,1,1,1,2
511 | 1298360,1,1,1,1,2,1,1,1,1,2
512 | 1299924,5,1,1,1,2,1,2,1,1,2
513 | 1299994,5,1,1,1,2,1,1,1,1,2
514 | 1304595,3,1,1,1,1,1,2,1,1,2
515 | 1306282,6,6,7,10,3,10,8,10,2,4
516 | 1313325,4,10,4,7,3,10,9,10,1,4
517 | 1320077,1,1,1,1,1,1,1,1,1,2
518 | 1320077,1,1,1,1,1,1,2,1,1,2
519 | 1320304,3,1,2,2,2,1,1,1,1,2
520 | 1330439,4,7,8,3,4,10,9,1,1,4
521 | 333093,1,1,1,1,3,1,1,1,1,2
522 | 369565,4,1,1,1,3,1,1,1,1,2
523 | 412300,10,4,5,4,3,5,7,3,1,4
524 | 672113,7,5,6,10,4,10,5,3,1,4
525 | 749653,3,1,1,1,2,1,2,1,1,2
526 | 769612,3,1,1,2,2,1,1,1,1,2
527 | 769612,4,1,1,1,2,1,1,1,1,2
528 | 798429,4,1,1,1,2,1,3,1,1,2
529 | 807657,6,1,3,2,2,1,1,1,1,2
530 | 8233704,4,1,1,1,1,1,2,1,1,2
531 | 837480,7,4,4,3,4,10,6,9,1,4
532 | 867392,4,2,2,1,2,1,2,1,1,2
533 | 869828,1,1,1,1,1,1,3,1,1,2
534 | 1043068,3,1,1,1,2,1,2,1,1,2
535 | 1056171,2,1,1,1,2,1,2,1,1,2
536 | 1061990,1,1,3,2,2,1,3,1,1,2
537 | 1113061,5,1,1,1,2,1,3,1,1,2
538 | 1116192,5,1,2,1,2,1,3,1,1,2
539 | 1135090,4,1,1,1,2,1,2,1,1,2
540 | 1145420,6,1,1,1,2,1,2,1,1,2
541 | 1158157,5,1,1,1,2,2,2,1,1,2
542 | 1171578,3,1,1,1,2,1,1,1,1,2
543 | 1174841,5,3,1,1,2,1,1,1,1,2
544 | 1184586,4,1,1,1,2,1,2,1,1,2
545 | 1186936,2,1,3,2,2,1,2,1,1,2
546 | 1197527,5,1,1,1,2,1,2,1,1,2
547 | 1222464,6,10,10,10,4,10,7,10,1,4
548 | 1240603,2,1,1,1,1,1,1,1,1,2
549 | 1240603,3,1,1,1,1,1,1,1,1,2
550 | 1241035,7,8,3,7,4,5,7,8,2,4
551 | 1287971,3,1,1,1,2,1,2,1,1,2
552 | 1289391,1,1,1,1,2,1,3,1,1,2
553 | 1299924,3,2,2,2,2,1,4,2,1,2
554 | 1306339,4,4,2,1,2,5,2,1,2,2
555 | 1313658,3,1,1,1,2,1,1,1,1,2
556 | 1313982,4,3,1,1,2,1,4,8,1,2
557 | 1321264,5,2,2,2,1,1,2,1,1,2
558 | 1321321,5,1,1,3,2,1,1,1,1,2
559 | 1321348,2,1,1,1,2,1,2,1,1,2
560 | 1321931,5,1,1,1,2,1,2,1,1,2
561 | 1321942,5,1,1,1,2,1,3,1,1,2
562 | 1321942,5,1,1,1,2,1,3,1,1,2
563 | 1328331,1,1,1,1,2,1,3,1,1,2
564 | 1328755,3,1,1,1,2,1,2,1,1,2
565 | 1331405,4,1,1,1,2,1,3,2,1,2
566 | 1331412,5,7,10,10,5,10,10,10,1,4
567 | 1333104,3,1,2,1,2,1,3,1,1,2
568 | 1334071,4,1,1,1,2,3,2,1,1,2
569 | 1343068,8,4,4,1,6,10,2,5,2,4
570 | 1343374,10,10,8,10,6,5,10,3,1,4
571 | 1344121,8,10,4,4,8,10,8,2,1,4
572 | 142932,7,6,10,5,3,10,9,10,2,4
573 | 183936,3,1,1,1,2,1,2,1,1,2
574 | 324382,1,1,1,1,2,1,2,1,1,2
575 | 378275,10,9,7,3,4,2,7,7,1,4
576 | 385103,5,1,2,1,2,1,3,1,1,2
577 | 690557,5,1,1,1,2,1,2,1,1,2
578 | 695091,1,1,1,1,2,1,2,1,1,2
579 | 695219,1,1,1,1,2,1,2,1,1,2
580 | 824249,1,1,1,1,2,1,3,1,1,2
581 | 871549,5,1,2,1,2,1,2,1,1,2
582 | 878358,5,7,10,6,5,10,7,5,1,4
583 | 1107684,6,10,5,5,4,10,6,10,1,4
584 | 1115762,3,1,1,1,2,1,1,1,1,2
585 | 1217717,5,1,1,6,3,1,1,1,1,2
586 | 1239420,1,1,1,1,2,1,1,1,1,2
587 | 1254538,8,10,10,10,6,10,10,10,1,4
588 | 1261751,5,1,1,1,2,1,2,2,1,2
589 | 1268275,9,8,8,9,6,3,4,1,1,4
590 | 1272166,5,1,1,1,2,1,1,1,1,2
591 | 1294261,4,10,8,5,4,1,10,1,1,4
592 | 1295529,2,5,7,6,4,10,7,6,1,4
593 | 1298484,10,3,4,5,3,10,4,1,1,4
594 | 1311875,5,1,2,1,2,1,1,1,1,2
595 | 1315506,4,8,6,3,4,10,7,1,1,4
596 | 1320141,5,1,1,1,2,1,2,1,1,2
597 | 1325309,4,1,2,1,2,1,2,1,1,2
598 | 1333063,5,1,3,1,2,1,3,1,1,2
599 | 1333495,3,1,1,1,2,1,2,1,1,2
600 | 1334659,5,2,4,1,1,1,1,1,1,2
601 | 1336798,3,1,1,1,2,1,2,1,1,2
602 | 1344449,1,1,1,1,1,1,2,1,1,2
603 | 1350568,4,1,1,1,2,1,2,1,1,2
604 | 1352663,5,4,6,8,4,1,8,10,1,4
605 | 188336,5,3,2,8,5,10,8,1,2,4
606 | 352431,10,5,10,3,5,8,7,8,3,4
607 | 353098,4,1,1,2,2,1,1,1,1,2
608 | 411453,1,1,1,1,2,1,1,1,1,2
609 | 557583,5,10,10,10,10,10,10,1,1,4
610 | 636375,5,1,1,1,2,1,1,1,1,2
611 | 736150,10,4,3,10,3,10,7,1,2,4
612 | 803531,5,10,10,10,5,2,8,5,1,4
613 | 822829,8,10,10,10,6,10,10,10,10,4
614 | 1016634,2,3,1,1,2,1,2,1,1,2
615 | 1031608,2,1,1,1,1,1,2,1,1,2
616 | 1041043,4,1,3,1,2,1,2,1,1,2
617 | 1042252,3,1,1,1,2,1,2,1,1,2
618 | 1057067,1,1,1,1,1,?,1,1,1,2
619 | 1061990,4,1,1,1,2,1,2,1,1,2
620 | 1073836,5,1,1,1,2,1,2,1,1,2
621 | 1083817,3,1,1,1,2,1,2,1,1,2
622 | 1096352,6,3,3,3,3,2,6,1,1,2
623 | 1140597,7,1,2,3,2,1,2,1,1,2
624 | 1149548,1,1,1,1,2,1,1,1,1,2
625 | 1174009,5,1,1,2,1,1,2,1,1,2
626 | 1183596,3,1,3,1,3,4,1,1,1,2
627 | 1190386,4,6,6,5,7,6,7,7,3,4
628 | 1190546,2,1,1,1,2,5,1,1,1,2
629 | 1213273,2,1,1,1,2,1,1,1,1,2
630 | 1218982,4,1,1,1,2,1,1,1,1,2
631 | 1225382,6,2,3,1,2,1,1,1,1,2
632 | 1235807,5,1,1,1,2,1,2,1,1,2
633 | 1238777,1,1,1,1,2,1,1,1,1,2
634 | 1253955,8,7,4,4,5,3,5,10,1,4
635 | 1257366,3,1,1,1,2,1,1,1,1,2
636 | 1260659,3,1,4,1,2,1,1,1,1,2
637 | 1268952,10,10,7,8,7,1,10,10,3,4
638 | 1275807,4,2,4,3,2,2,2,1,1,2
639 | 1277792,4,1,1,1,2,1,1,1,1,2
640 | 1277792,5,1,1,3,2,1,1,1,1,2
641 | 1285722,4,1,1,3,2,1,1,1,1,2
642 | 1288608,3,1,1,1,2,1,2,1,1,2
643 | 1290203,3,1,1,1,2,1,2,1,1,2
644 | 1294413,1,1,1,1,2,1,1,1,1,2
645 | 1299596,2,1,1,1,2,1,1,1,1,2
646 | 1303489,3,1,1,1,2,1,2,1,1,2
647 | 1311033,1,2,2,1,2,1,1,1,1,2
648 | 1311108,1,1,1,3,2,1,1,1,1,2
649 | 1315807,5,10,10,10,10,2,10,10,10,4
650 | 1318671,3,1,1,1,2,1,2,1,1,2
651 | 1319609,3,1,1,2,3,4,1,1,1,2
652 | 1323477,1,2,1,3,2,1,2,1,1,2
653 | 1324572,5,1,1,1,2,1,2,2,1,2
654 | 1324681,4,1,1,1,2,1,2,1,1,2
655 | 1325159,3,1,1,1,2,1,3,1,1,2
656 | 1326892,3,1,1,1,2,1,2,1,1,2
657 | 1330361,5,1,1,1,2,1,2,1,1,2
658 | 1333877,5,4,5,1,8,1,3,6,1,2
659 | 1334015,7,8,8,7,3,10,7,2,3,4
660 | 1334667,1,1,1,1,2,1,1,1,1,2
661 | 1339781,1,1,1,1,2,1,2,1,1,2
662 | 1339781,4,1,1,1,2,1,3,1,1,2
663 | 13454352,1,1,3,1,2,1,2,1,1,2
664 | 1345452,1,1,3,1,2,1,2,1,1,2
665 | 1345593,3,1,1,3,2,1,2,1,1,2
666 | 1347749,1,1,1,1,2,1,1,1,1,2
667 | 1347943,5,2,2,2,2,1,1,1,2,2
668 | 1348851,3,1,1,1,2,1,3,1,1,2
669 | 1350319,5,7,4,1,6,1,7,10,3,4
670 | 1350423,5,10,10,8,5,5,7,10,1,4
671 | 1352848,3,10,7,8,5,8,7,4,1,4
672 | 1353092,3,2,1,2,2,1,3,1,1,2
673 | 1354840,2,1,1,1,2,1,3,1,1,2
674 | 1354840,5,3,2,1,3,1,1,1,1,2
675 | 1355260,1,1,1,1,2,1,2,1,1,2
676 | 1365075,4,1,4,1,2,1,1,1,1,2
677 | 1365328,1,1,2,1,2,1,2,1,1,2
678 | 1368267,5,1,1,1,2,1,1,1,1,2
679 | 1368273,1,1,1,1,2,1,1,1,1,2
680 | 1368882,2,1,1,1,2,1,1,1,1,2
681 | 1369821,10,10,10,10,5,10,10,10,7,4
682 | 1371026,5,10,10,10,4,10,5,6,3,4
683 | 1371920,5,1,1,1,2,1,3,2,1,2
684 | 466906,1,1,1,1,2,1,1,1,1,2
685 | 466906,1,1,1,1,2,1,1,1,1,2
686 | 534555,1,1,1,1,2,1,1,1,1,2
687 | 536708,1,1,1,1,2,1,1,1,1,2
688 | 566346,3,1,1,1,2,1,2,3,1,2
689 | 603148,4,1,1,1,2,1,1,1,1,2
690 | 654546,1,1,1,1,2,1,1,1,8,2
691 | 654546,1,1,1,3,2,1,1,1,1,2
692 | 695091,5,10,10,5,4,5,4,4,1,4
693 | 714039,3,1,1,1,2,1,1,1,1,2
694 | 763235,3,1,1,1,2,1,2,1,2,2
695 | 776715,3,1,1,1,3,2,1,1,1,2
696 | 841769,2,1,1,1,2,1,1,1,1,2
697 | 888820,5,10,10,3,7,3,8,10,2,4
698 | 897471,4,8,6,4,3,4,10,6,1,4
699 | 897471,4,8,8,5,4,5,10,4,1,4


--------------------------------------------------------------------------------
/Chapter09/feature_reduction_impact.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Created on 31-Oct-2017
 3 | 
 4 | @author: DX
 5 | '''
 6 | 
 7 | #Import Sklearn Datasets of IRIS flower classification
 8 | import sklearn.datasets as datasets
 9 | 
10 | #Import Pandas library to create data frame from the data
11 | import pandas as pd
12 | 
13 | #Load the data set
14 | iris=datasets.load_iris()
15 | 
16 | #Extract data part from the data set
17 | data = iris.data
18 | 
19 | #Select dimension of data
20 | data = data[:,2:4]
21 | 
22 | #Load data set into the data frame
23 | df=pd.DataFrame(data)
24 | 
25 | #Extract target variable from the data set
26 | y=iris.target
27 | 
28 | #Import decision tree classifier from sklearn
29 | from sklearn.tree import DecisionTreeClassifier
30 | 
31 | #We will create a tree with maximum depth of 5, other parameters will be default
32 | dtree=DecisionTreeClassifier(max_depth=5)
33 | 
34 | #Train the classifier
35 | dtree.fit(df,y)
36 | 
37 | #Import graphwiz from sklearn to create the graph out of tree
38 | from sklearn.tree import export_graphviz
39 | 
40 | #We will use StringIO to create graph with all characters
41 | from sklearn.externals.six import StringIO
42 | dot_data = StringIO()
43 | 
44 | #Import pydotplus to create tree as a graph and store it on the disk
45 | import pydotplus
46 | 
47 | #Create Graph out of tree and store it on the disk
48 | export_graphviz(dtree, out_file=dot_data,  
49 |                 filled=True, rounded=True,
50 |                 special_characters=True)
51 | graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
52 | graph.write_png("graph_feat_4.png")


--------------------------------------------------------------------------------
/Chapter09/stacking_spamdata.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 04-Nov-2017
  3 | 
  4 | @author: DX
  5 | '''
  6 | from Chapter_03.DecisionTree_CART_RF import load_csv,cross_validation_split,str_column_to_float
  7 | 
  8 | #Import numpy for array based operations 
  9 | import numpy as np
 10 | 
 11 | #Import Support vector machine
 12 | from sklearn.svm.classes import SVC
 13 | 
 14 | #Decision Tree Classifier
 15 | from sklearn.tree import DecisionTreeClassifier
 16 | 
 17 | #KNN
 18 | from sklearn.neighbors import KNeighborsClassifier
 19 | 
 20 | #Logistic Regression
 21 | from sklearn.linear_model import LogisticRegression
 22 | 
 23 | #Random Forest Classifier 
 24 | from sklearn.ensemble import RandomForestClassifier
 25 | 
 26 | #Ada-boost Classifier
 27 | from sklearn.ensemble import AdaBoostClassifier
 28 | 
 29 | #Set Random seed
 30 | np.random.seed(1)
 31 | 
 32 | #Convert string variables numerical
 33 | def str_column_to_int(dataset, column):
 34 |     class_values = [row[column] for row in dataset]
 35 |     unique = set(class_values)
 36 |     lookup = dict()
 37 |     for i, value in enumerate(unique):
 38 |         lookup[value] = i
 39 |     for row in dataset:
 40 |         row[column] = lookup[row[column]]
 41 |     return lookup
 42 | 
 43 | #Stacking the predictions from the models
 44 | def stacking(dataset,models):
 45 |     
 46 |     stackedData = []
 47 |     
 48 |     for model in models:
 49 |         pred = model.predict(dataset)
 50 |         stackedData.append(pred)
 51 |     
 52 |     return np.transpose(stackedData)
 53 | 
 54 | #Train the models
 55 | def stack_fit(model,x,y):
 56 |     return model.fit(x,y)
 57 |     
 58 | #Function to evaluate model performance 
 59 | def getAccuracy(pre,ytest):
 60 |     count = 0
 61 |     for i in range(len(ytest)):
 62 |         if ytest[i]==pre[i]:
 63 |             count+=1
 64 |      
 65 |     acc = float(count)/len(ytest)
 66 |     return acc
 67 | 
 68 | #Separate the input and output variable 
 69 | def getXY(dataset):
 70 |     dataset = np.array(dataset)
 71 |     shape = np.shape(dataset)
 72 |     X = dataset[:,0:shape[1]-1]
 73 |     Y = dataset[:,shape[1]-1]
 74 |     return X,Y
 75 | 
 76 | #Specify the file name
 77 | dataName = 'spamData.csv'
 78 | 
 79 | #Use function load_csv 
 80 | dataset = load_csv(dataName)
 81 | 
 82 | #Create an empty list to store the data set
 83 | dataset_new = []
 84 | 
 85 | #We will remove incomplete instance from the data set
 86 | for i in range(len(dataset)-1):
 87 |     dataset_new.append(dataset[i])
 88 | dataset = dataset_new
 89 | 
 90 | #Use function str_column_to_float from chapter 3 to convert string values to float
 91 | for i in range(0, len(dataset[0])-1):
 92 |     str_column_to_float(dataset, i)
 93 | 
 94 | #Convert class variable to the numerical value
 95 | str_column_to_int(dataset, len(dataset[0])-1)
 96 | 
 97 | #Shuffle the data set
 98 | np.random.shuffle(dataset)
 99 | 
100 | #Load all the classifiers
101 | clf1 = AdaBoostClassifier()#SVC(kernel='rbf')
102 | clf2 = DecisionTreeClassifier(max_depth=25)
103 | clf3 = KNeighborsClassifier(n_neighbors=1)
104 | clf4 = RandomForestClassifier(n_estimators=25,max_depth=15)
105 | clf5 = LogisticRegression()
106 | clf6 = SVC(kernel='rbf')
107 | 
108 | #Stack all the classifier
109 | models = [clf1,clf2,clf3,clf4,clf5]
110 | 
111 | #Create the sample out of data sets
112 | splits = cross_validation_split(dataset,len(models))
113 | 
114 | #Initialize the variable for trained classifier
115 | trained =[]
116 | 
117 | #Train the model and add to the stack
118 | for i in range(len(models)):
119 |     model = models[i]
120 |     x,y = getXY(splits[i])    
121 |     trained.append(stack_fit(model, x, y))
122 | 
123 | #Create test data from left split
124 | xtest,ytest = getXY(splits[len(models)-1])
125 | 
126 | #Generate the stacked predictions
127 | stackedData = stacking(xtest, trained)
128 | 
129 | #Here we will calculate individual accuracies of models
130 | for i in range(np.shape(stackedData)[1]):
131 |     acc = getAccuracy(stackedData[:,i], ytest)
132 |     print("Accuracy of model %i is %.2f"%(i,(100*acc)))
133 | 
134 | #Take the vote of each classifier and create final prediction
135 | predLr =[np.bincount(np.array(pred,dtype="int64")).argmax() for pred in stackedData]
136 | 
137 | #Evaluate the stacked model performance 
138 | accLr  = getAccuracy(ytest, predLr)
139 | print("\nAccuracy of stacking is %.2f"%(100*accLr))


--------------------------------------------------------------------------------
/Chapter10/ANN.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 24-Nov-2017
  3 | 
  4 | @author: aii32199
  5 | '''
  6 | # Imports for array-handling and plotting
  7 | import numpy as np
  8 | import matplotlib
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | # Keras imports for the data set and building our neural network
 12 | from keras.datasets import mnist
 13 | 
 14 | #Import Sequential and Load model for creating and loading model
 15 | from keras.models import Sequential, load_model
 16 | 
 17 | #We will use Dense, Drop out and Activation layers
 18 | from keras.layers.core import Dense, Dropout, Activation
 19 | from keras.utils import np_utils
 20 | 
 21 | #Let's Start by loading our data set
 22 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 23 | #Plot the digits to verify
 24 | plt.figure()
 25 | for i in range(9):
 26 |     plt.subplot(3,3,i+1)
 27 |     plt.tight_layout()
 28 |     plt.imshow(X_train[i], cmap='gray', interpolation='none')
 29 |     plt.title("Digit: {}".format(y_train[i]))
 30 |     plt.xticks([])
 31 |     plt.yticks([])
 32 | 
 33 | plt.show()
 34 | 
 35 | #Lets analyze histogram of the image
 36 | plt.figure()
 37 | plt.subplot(2,1,1)
 38 | plt.imshow(X_train[0], cmap='gray', interpolation='none')
 39 | plt.title("Digit: {}".format(y_train[0]))
 40 | plt.xticks([])
 41 | plt.yticks([])
 42 | plt.subplot(2,1,2)
 43 | plt.hist(X_train[0].reshape(784))
 44 | plt.title("Pixel Value Distribution")
 45 | 
 46 | # Print the shape before we reshape and normalize
 47 | print("X_train shape", X_train.shape)
 48 | print("y_train shape", y_train.shape)
 49 | print("X_test shape", X_test.shape)
 50 | print("y_test shape", y_test.shape)
 51 | 
 52 | # As we have data in image form convert it to row vectors
 53 | X_train = X_train.reshape(60000, 784)
 54 | X_test = X_test.reshape(10000, 784)
 55 | X_train = X_train.astype('float32')
 56 | X_test = X_test.astype('float32')
 57 | 
 58 | # Normalizing the data to between 0 and 1 to help with the training
 59 | X_train /= 255
 60 | X_test /= 255
 61 | 
 62 | # Print the final input shape ready for training
 63 | print("Train matrix shape", X_train.shape)
 64 | print("Test matrix shape", X_test.shape)
 65 | 
 66 | # One-hot encoding using keras' numpy-related utilities
 67 | n_classes = 10
 68 | print("Shape before one-hot encoding: ", y_train.shape)
 69 | Y_train = np_utils.to_categorical(y_train, n_classes)
 70 | Y_test = np_utils.to_categorical(y_test, n_classes)
 71 | print("Shape after one-hot encoding: ", Y_train.shape)
 72 | 
 73 | # Here we will create model of our ANN
 74 | # Create a linear stack of layers with the sequential model
 75 | model = Sequential()
 76 | 
 77 | #Input Layer with 512 Weights
 78 | model.add(Dense(512, input_shape=(784,)))
 79 | 
 80 | #We will use relu as Activation
 81 | model.add(Activation('relu'))
 82 | 
 83 | #Put Drop out to prevent over-fitting
 84 | model.add(Dropout(0.2))
 85 | 
 86 | #Add Hidden layer with 512 neurons with relu activation
 87 | model.add(Dense(512))
 88 | model.add(Activation('relu'))
 89 | model.add(Dropout(0.2))
 90 | 
 91 | #This is our Output layer with 10 neurons
 92 | model.add(Dense(10))
 93 | model.add(Activation('softmax'))
 94 | 
 95 | #Here we will be compiling the sequential model
 96 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
 97 | optimizer='adam')
 98 | 
 99 | # Start training the model and saving metrics in history
100 | history = model.fit(X_train, Y_train,
101 |                     batch_size=128, epochs=20,
102 |                     verbose=2,
103 |                     validation_data=(X_test, Y_test))
104 | 
105 | # Saving the model on disk
106 | path2save = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_10/keras_mnist.h5'
107 | model.save(path2save)
108 | print('Saved trained model at %s ' % path2save)
109 | # Plotting the metrics
110 | fig = plt.figure()
111 | plt.subplot(2,1,1)
112 | plt.plot(history.history['acc'])
113 | plt.plot(history.history['val_acc'])
114 | plt.title('model accuracy')
115 | plt.ylabel('accuracy')
116 | plt.xlabel('epoch')
117 | plt.legend(['train', 'test'], loc='lower right')
118 | plt.subplot(2,1,2)
119 | plt.plot(history.history['loss'])
120 | plt.plot(history.history['val_loss'])
121 | plt.title('model loss')
122 | plt.ylabel('loss')
123 | plt.xlabel('epoch')
124 | plt.legend(['train', 'test'], loc='upper right')
125 | plt.tight_layout()
126 | plt.show()
127 | 
128 | #Let's load the model for testing data
129 | path2save ='keras_mnist.h5'
130 | mnist_model = load_model(path2save)
131 | 
132 | #We will use Evaluate function
133 | loss_and_metrics = mnist_model.evaluate(X_test, Y_test, verbose=2)
134 | print("Test Loss", loss_and_metrics[0])
135 | print("Test Accuracy", loss_and_metrics[1])
136 | 
137 | #Load the model and create predictions on the test set
138 | mnist_model = load_model(path2save)
139 | predicted_classes = mnist_model.predict_classes(X_test)
140 | 
141 | #See which we predicted correctly and which not
142 | correct_indices = np.nonzero(predicted_classes == y_test)[0]
143 | incorrect_indices = np.nonzero(predicted_classes != y_test)[0]
144 | print(len(correct_indices)," classified correctly")
145 | print(len(incorrect_indices)," classified incorrectly")
146 | 
147 | #Adapt figure size to accomodate 18 subplots
148 | plt.rcParams['figure.figsize'] = (7,14)
149 | plt.figure()
150 | 
151 | # plot 9 correct predictions
152 | for i, correct in enumerate(correct_indices[:9]):
153 |     plt.subplot(6,3,i+1)
154 |     plt.imshow(X_test[correct].reshape(28,28), cmap='gray',
155 |     interpolation='none')
156 |     plt.title(
157 |     "Predicted: {}, Truth: {}".format(predicted_classes[correct],
158 |     y_test[correct]))
159 |     plt.xticks([])
160 |     plt.yticks([])
161 | 
162 | # plot 9 incorrect predictions
163 | for i, incorrect in enumerate(incorrect_indices[:9]):
164 |     plt.subplot(6,3,i+10)
165 |     plt.imshow(X_test[incorrect].reshape(28,28), cmap='gray',
166 |     interpolation='none')
167 |     plt.title(
168 |     "Predicted {}, Truth: {}".format(predicted_classes[incorrect],
169 |     y_test[incorrect]))
170 |     plt.xticks([])
171 |     plt.yticks([])
172 |     
173 | plt.show()    


--------------------------------------------------------------------------------
/Chapter10/DigitClassification.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Created on 07-Nov-2017
  3 | 
  4 | @author: aii32199
  5 | '''
  6 | 
  7 | # Imports for array-handling and plotting
  8 | import numpy as np
  9 | import matplotlib
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | # Keras imports for the data set and building our neural network
 13 | from keras.datasets import mnist
 14 | 
 15 | #Import Sequential and Load model for creating and loading model
 16 | from keras.models import Sequential, load_model
 17 | 
 18 | #We will use Dense, Drop out and Activation layers
 19 | from keras.layers.core import Dense, Dropout, Activation
 20 | from keras.utils import np_utils
 21 | 
 22 | #Let's Start by loading our data set
 23 | (X_train, y_train), (X_test, y_test) = mnist.load_data()
 24 | 
 25 | #Plot the digits to verify
 26 | plt.figure()
 27 | for i in range(9):
 28 |   plt.subplot(3,3,i+1)
 29 |   plt.tight_layout()
 30 |   plt.imshow(X_train[i], cmap='gray', interpolation='none')
 31 |   plt.title("Digit: {}".format(y_train[i]))
 32 |   plt.xticks([])
 33 |   plt.yticks([])
 34 | plt.show()
 35 | 
 36 | #Lets analyze histogram of the image
 37 | plt.figure()
 38 | plt.subplot(2,1,1)
 39 | plt.imshow(X_train[0], cmap='gray', interpolation='none')
 40 | plt.title("Digit: {}".format(y_train[0]))
 41 | plt.xticks([])
 42 | plt.yticks([])
 43 | plt.subplot(2,1,2)
 44 | plt.hist(X_train[0].reshape(784))
 45 | plt.title("Pixel Value Distribution")
 46 | plt.show()
 47 | 
 48 | # Print the shape before we reshape and normalize
 49 | print("X_train shape", X_train.shape)
 50 | print("y_train shape", y_train.shape)
 51 | print("X_test shape", X_test.shape)
 52 | print("y_test shape", y_test.shape)
 53 | 
 54 | # As we have data in image form convert it to row vectors 
 55 | X_train = X_train.reshape(60000, 784)
 56 | X_test = X_test.reshape(10000, 784)
 57 | X_train = X_train.astype('float32')
 58 | X_test = X_test.astype('float32')
 59 | 
 60 | # Normalizing the data to between 0 and 1 to help with the training
 61 | X_train /= 255
 62 | X_test /= 255
 63 | 
 64 | # Print the final input shape ready for training
 65 | print("Train matrix shape", X_train.shape)
 66 | print("Test matrix shape", X_test.shape)
 67 |  
 68 | # One-hot encoding using keras' numpy-related utilities
 69 | n_classes = 10
 70 | print("Shape before one-hot encoding: ", y_train.shape)
 71 | Y_train = np_utils.to_categorical(y_train, n_classes)
 72 | Y_test = np_utils.to_categorical(y_test, n_classes)
 73 | print("Shape after one-hot encoding: ", Y_train.shape)
 74 |  
 75 |  
 76 | # Here we will create model of our ANN
 77 | # Create a linear stack of layers with the sequential model
 78 | model = Sequential()
 79 | 
 80 | #Input Layer with 512 Weights 
 81 | model.add(Dense(512, input_shape=(784,)))
 82 | 
 83 | #We will use relu as Activation
 84 | model.add(Activation('relu'))  
 85 | 
 86 | #Put Drop out to prevent over-fitting                          
 87 | model.add(Dropout(0.2))
 88 | 
 89 | #Add Hidden layer with 512 neurons with relu activation 
 90 | model.add(Dense(512))
 91 | model.add(Activation('relu'))
 92 | model.add(Dropout(0.2))
 93 |  
 94 | #This is our Output layer with 10 neurons
 95 | model.add(Dense(10))
 96 | model.add(Activation('softmax'))
 97 |  
 98 | #Here we will be compiling the sequential model
 99 | model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
100 |  
101 | # Start training the model and saving metrics in history
102 | history = model.fit(X_train, Y_train,
103 |           batch_size=128, epochs=20,
104 |           verbose=2,
105 |           validation_data=(X_test, Y_test))
106 |  
107 | # Saving the model on disk 
108 | path2save = 'E:/PyDevWorkSpaceTest/Ensembles/Chapter_10/keras_mnist.h5'
109 | model.save(path2save)
110 | print('Saved trained model at %s ' % path2save)
111 |  
112 | # Plotting the metrics
113 | fig = plt.figure()
114 | plt.subplot(2,1,1)
115 | plt.plot(history.history['acc'])
116 | plt.plot(history.history['val_acc'])
117 | plt.title('model accuracy')
118 | plt.ylabel('accuracy')
119 | plt.xlabel('epoch')
120 | plt.legend(['train', 'test'], loc='lower right')
121 |  
122 | plt.subplot(2,1,2)
123 | plt.plot(history.history['loss'])
124 | plt.plot(history.history['val_loss'])
125 | plt.title('model loss')
126 | plt.ylabel('loss')
127 | plt.xlabel('epoch')
128 | plt.legend(['train', 'test'], loc='upper right')
129 | plt.tight_layout()
130 | plt.show()
131 | 
132 | #Let's load the model for testing data
133 | path2save = 'D:/PyDevWorkspace/EnsembleMachineLearning/Chapter_10/keras_mnist.h5'
134 | mnist_model = load_model(path2save)
135 | 
136 | #We will use Evaluate function
137 | loss_and_metrics = mnist_model.evaluate(X_test, Y_test, verbose=2)
138 |   
139 | print("Test Loss", loss_and_metrics[0])
140 | print("Test Accuracy", loss_and_metrics[1])
141 |     
142 | #Load the model and create predictions on the test set
143 | mnist_model = load_model(path2save)
144 | predicted_classes = mnist_model.predict_classes(X_test)
145 |   
146 | #See which we predicted correctly and which not
147 | correct_indices = np.nonzero(predicted_classes == y_test)[0]
148 | incorrect_indices = np.nonzero(predicted_classes != y_test)[0]
149 | print()
150 | print(len(correct_indices)," classified correctly")
151 | print(len(incorrect_indices)," classified incorrectly")
152 |   
153 | #Adapt figure size to accomodate 18 subplots
154 | plt.rcParams['figure.figsize'] = (7,14)
155 |   
156 | plt.figure()
157 |   
158 | # plot 9 correct predictions
159 | for i, correct in enumerate(correct_indices[:9]):
160 |     plt.subplot(6,3,i+1)
161 |     plt.imshow(X_test[correct].reshape(28,28), cmap='gray', interpolation='none')
162 |     plt.title(
163 |       "Predicted: {}, Truth: {}".format(predicted_classes[correct],
164 |                                         y_test[correct]))
165 |     plt.xticks([])
166 |     plt.yticks([])
167 |   
168 |   
169 | # plot 9 incorrect predictions
170 | for i, incorrect in enumerate(incorrect_indices[:9]):
171 |     plt.subplot(6,3,i+10)
172 |     plt.imshow(X_test[incorrect].reshape(28,28), cmap='gray', interpolation='none')
173 |     plt.title(
174 |       "Predicted {}, Truth: {}".format(predicted_classes[incorrect], 
175 |                                        y_test[incorrect]))
176 |     plt.xticks([])
177 |     plt.yticks([])
178 |   
179 | plt.show()


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ensemble Machine Learning
 2 | This is the code repository for [Ensemble Machine Learning](https://www.packtpub.com/big-data-and-business-intelligence/ensemble-machine-learning?utm_source=github&utm_medium=repository&utm_campaign=9781788297752), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish.
 3 | ## About the Book
 4 | Ensembling  is a technique of combining two or more similar or dissimilar machine learning algorithms to create a model that delivers superior prediction power. This book will show you how you can use many weak algorithms to make a strong predictive model. This book contains Python code for different machine learning algorithms so that you can easily understand and implement it in your own systems.
 5 | 
 6 | This book covers different machine learning algorithms that are widely used in the practical world to make predictions and classifications. It addresses different aspects of a prediction framework, such as data pre-processing, model training, validation of the model, and more. You will gain knowledge of different machine learning aspects such as bagging (decision trees and random forests), Boosting (Ada-boost) and stacking (a combination of bagging and boosting algorithms).
 7 | 
 8 | Then you’ll learn how to implement them by building ensemble models using TensorFlow and Python libraries such as scikit-learn and NumPy. As machine learning touches almost every field of the digital world, you’ll see how these algorithms can be used in different applications such as computer vision, speech recognition, making recommendations, grouping and document classification, fitting regression on data, and more.
 9 | 
10 | By the end of this book, you’ll understand how to combine machine learning algorithms to work behind the scenes and reduce challenges and common problems.
11 | 
12 | ## Instructions and Navigation
13 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02.
14 | 
15 | 
16 | 
17 | The code will look like the following:
18 | ```
19 | # Import All the required packages from sklearn
20 | import numpy as np
21 | from sklearn import model_selection
22 | from sklearn.ensemble import BaggingClassifier
23 | from sklearn.tree import DecisionTreeClassifier
24 | from sklearn.datasets import load_iris
25 | 
26 | #Load data 
27 | iris = load_iris()
28 | X = iris.data
29 | Y = iris.target
30 | ```
31 | 
32 | This book is a practical walkthrough of the machine learning technologies that require implementation of algorithms by you to understand the concepts in a more concrete way. I have used Python as the language to implement the algorithms in the form of code. You need not be a Python expert to code these algorithms; a simple understanding of Python is enough to get started with the implementation.
33 | 
34 | The code included in this book can run on Python 2.7 and 3, but you will need the NumPy and scikit-learn packages to implement most of the code discussed in this book.
35 | 
36 | For the implementation of ANNs, I have used Keras and TensorFlow libraries; again, basic a understanding of these libraries is enough for the code implementation.
37 | 
38 | ## Related Products
39 | * [Mastering Machine Learning Algorithms](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-algorithms?utm_source=github&utm_medium=repository&utm_campaign=9781788621113)
40 | 
41 | * [Machine Learning with the Elastic Stack](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-elastic-stack?utm_source=github&utm_medium=repository&utm_campaign=9781788477543)
42 | 
43 | * [Applied Machine Learning with Python](https://www.packtpub.com/big-data-and-business-intelligence/applied-machine-learning-python?utm_source=github&utm_medium=repository&utm_campaign=9781788297066)
44 | 
45 | ### Suggestions and Feedback
46 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSe5qwunkGf6PUvzPirPDtuy1Du5Rlzew23UBp2S-P3wB-GcwQ/viewform) if you have any feedback or suggestions.
47 | 


--------------------------------------------------------------------------------