├── Classifiers ├── Least Squares │ ├── LeastSquares - Plot Function.py │ ├── LeastSquares.py │ └── data.txt ├── Naive Bayes │ ├── Categorical │ │ ├── CategoricalNaiveBayes.py │ │ ├── _DataReader.py │ │ ├── data.txt │ │ ├── data2.txt │ │ └── data3.txt │ └── Numerical │ │ ├── NumericalNaiveBayes.py │ │ ├── _DataReader.py │ │ └── data.txt ├── Neural Network │ ├── NeuralNetwork.py │ ├── NeuralNetwork_Plot.py │ ├── Reader.py │ └── data.txt ├── Perceptron │ ├── Perceptron.py │ ├── data.txt │ └── data2.txt └── kNN │ ├── data.txt │ ├── data2.txt │ └── kNearestNeighbours.py ├── Clustering ├── kMeans - Online │ ├── data.txt │ ├── kMeans.py │ └── kMeans_Plot.py └── kMeans - Standard │ ├── data.txt │ ├── kMeans.py │ └── kMeans_Plot.py ├── Keras └── kFold.py ├── Pattern Recognition ├── Edit Distance.py └── Viterbi.py └── README.md /Classifiers/Least Squares/LeastSquares - Plot Function.py: -------------------------------------------------------------------------------- 1 | import LeastSquares as LS; 2 | import numpy as np; 3 | from random import choice; 4 | from matplotlib import pyplot; 5 | 6 | 7 | def PlotFunction(X, Y, percentage, indexA, indexB): 8 | n = len(X)-1; #Number of items 9 | split = int(n*percentage); 10 | 11 | #Delete all columns but the ones on the given indexes 12 | for i in range(len(Y[0])): 13 | if(i == indexA or i == indexB): 14 | continue; 15 | 16 | X = np.delete(X, 0, 1); 17 | 18 | #Normalize values 19 | for i in range(n+1): 20 | X[i] = X[i] / X.max(); 21 | 22 | testX = X[split:]; 23 | testY = Y[split:]; 24 | 25 | #The items will be sorted into classes in this list 26 | Points = [[] for i in range(testY.shape[1])]; 27 | 28 | W = LS.CalculateWeights(testX, testY); 29 | 30 | correct = 0; 31 | 32 | #Calculate accuracy 33 | for i in range(n): 34 | prediction = LS.Predict(W, X[i]); 35 | itemClass = list(Y[i].A1); 36 | 37 | if(prediction == itemClass): 38 | correct += 1; 39 | 40 | #Find index of class 41 | index = -1; 42 | for j in range(len(prediction)): 43 | if(prediction[j] == 1): 44 | index = j; 45 | break; 46 | 47 | Points[index].append(X[i]); 48 | 49 | accuracy = correct/float(n)*100; 50 | print "Accuracy ", accuracy; 51 | 52 | colors = ['r', 'b', 'g', 'c', 'm', 'y']; 53 | 54 | for i in range(len(Points)): 55 | p = Points[i]; 56 | Xa = []; 57 | Xb = []; 58 | 59 | #Choose color randomly from list, then remove it 60 | #(to avoid duplicates) 61 | color = choice(colors); 62 | colors.remove(color); 63 | 64 | for item in p: 65 | Xa.append(item[:, [0]].item(0)); 66 | Xb.append(item[:, [1]].item(0)); 67 | 68 | pyplot.plot(Xa, Xb, 'o', color=color); 69 | pyplot.plot(W[i], color=color); 70 | 71 | pyplot.show(); 72 | 73 | 74 | def main(): 75 | X,Y,n = LS.ReadData('data.txt'); 76 | 77 | PlotFunction(X, Y, 0.7, 2, 3); 78 | 79 | main(); 80 | -------------------------------------------------------------------------------- /Classifiers/Least Squares/LeastSquares.py: -------------------------------------------------------------------------------- 1 | import numpy as np; 2 | from numpy.linalg import inv; 3 | from random import shuffle; 4 | 5 | ###_Read Data_### 6 | def ReadData(fileName): 7 | f = open(fileName); 8 | lines = f.read().splitlines(); 9 | f.close(); 10 | 11 | items = []; 12 | classes = []; 13 | 14 | for line in lines: 15 | line = line.split(','); #Split line on commas 16 | itemFeatures = []; #Temp list to hold feature values of the item 17 | 18 | for i in range(len(line)-1): 19 | value = float(line[i]); 20 | itemFeatures.append(value); 21 | 22 | #Add to classes the known classification for current item 23 | classes.append(line[-1]); 24 | #Add item data to items 25 | items.append(itemFeatures); 26 | 27 | #Map class names to numbers (from 0 to the number of classes) 28 | classes = map(lambda x: list(set(classes)).index(x), classes); 29 | 30 | X = np.matrix(items); #Convert data to numpy matrix 31 | Y = BuildY(classes); #Build the Y matrices 32 | n = len(items)-1; #The number of items 33 | 34 | X, Y = ShuffleArrays(X, Y); 35 | 36 | return X, Y, n; 37 | 38 | def ShuffleArrays(A, B): 39 | toShuffle = []; #Temp array to shuffle X and Y at the same time 40 | n = len(A); 41 | 42 | for i in range(n): 43 | #Build toShuffle by packing Xi together with Yi 44 | toShuffle.append((A[i], B[i])); 45 | 46 | shuffle(toShuffle); 47 | 48 | for i in range(n): 49 | #Unpack toShuffle 50 | A[i] = toShuffle[i][0]; 51 | B[i] = toShuffle[i][1]; 52 | 53 | return A,B; 54 | 55 | def BuildY(Y): 56 | newY = []; 57 | #Number of classes is the largest number in Y 58 | classesNumber = max(Y)+1; 59 | 60 | for i in range(len(Y)): 61 | #Initialize vector with zeros, set to 1 the class index 62 | tempVector = [0 for j in range(classesNumber)]; 63 | tempVector[Y[i]] = 1; 64 | 65 | newY.append(tempVector); 66 | 67 | return np.matrix(newY); 68 | 69 | 70 | ###_Core Functions_### 71 | def CalculateWeights(X, Y): 72 | #Number of attributes 73 | A = X.shape[1] + 1; 74 | #Number of classes 75 | C = Y.shape[1]; 76 | 77 | #The sums for Xi*Xi.T and Xi*Yi.T 78 | XX = [[0.0 for i in range(A)] for j in range(A)]; 79 | XY = [[0.0 for i in range(C)] for j in range(A)]; 80 | 81 | for i in range(len(X)): 82 | x = X[i]; #The ith item vector 83 | x = np.append(1, x); #Augment item with a 1 84 | 85 | y = Y[i]; #The vector storing the class x is in 86 | 87 | #Calculate outer products of x*x.T and x*y.T 88 | XX += np.outer(x, x); 89 | XY += np.outer(x, y); 90 | 91 | XX += 0.001 * np.eye(A); #Avoid XX being non-invertable 92 | 93 | #The weight matrix is the product of XX.T and XY 94 | weight = np.dot(inv(XX), XY); 95 | return weight; 96 | 97 | def Predict(W,x): 98 | x = np.append(1, x); #Augment item with a 1 99 | 100 | prediction = np.dot(W.T, x); #List of predictions 101 | 102 | #Find max prediction 103 | m = prediction[0]; 104 | index = 0; 105 | for i in range(1,len(prediction)): 106 | if(prediction[i] > m): 107 | m = prediction[i]; 108 | index = i; 109 | 110 | #Initialize prediction vector to zeros 111 | y = [0 for i in range(len(prediction))]; 112 | y[index] = 1; #Set guessed class to 1 113 | 114 | return y; #Return prediction vector 115 | 116 | 117 | ###_Evaluation Functions_### 118 | def K_FoldValidation(k, X, Y): 119 | if(k > len(X)): 120 | return -1; 121 | 122 | correct = 0; #The number of correct classifications 123 | total = len(X)*(k-1); #The total number of classifications 124 | 125 | l = len(X)/k; #The length of a fold 126 | 127 | for i in range(k): 128 | #Split data set into training and testing 129 | trainingX = X[i*l:(i+1)*l]; 130 | trainingY = Y[i*l:(i+1)*l]; 131 | 132 | testX = np.concatenate([X[:i*l], X[(i+1)*l:]]); 133 | testY = np.concatenate([Y[:i*l], Y[(i+1)*l:]]); 134 | 135 | W = CalculateWeights(trainingX, trainingY); 136 | 137 | for j in range(len(testX)): 138 | itemClass = list(testY[j].A1); #The actual classification 139 | guess = Predict(W, testX[j]); #Make a prediction 140 | 141 | if(guess == itemClass): 142 | #Guessed correctly 143 | correct += 1; 144 | 145 | return correct/float(total); 146 | 147 | def Evaluate(times, k, X, Y): 148 | accuracy = 0; 149 | for t in range(times): 150 | X, Y = ShuffleArrays(X, Y); 151 | accuracy += K_FoldValidation(k, X, Y); 152 | 153 | print accuracy/float(times); 154 | 155 | 156 | ###_Main_### 157 | def main(): 158 | X, Y, n = ReadData('data.txt'); 159 | W = CalculateWeights(X, Y); 160 | 161 | Evaluate(100, 5, X, Y); 162 | 163 | if __name__ == "__main__": 164 | main(); 165 | -------------------------------------------------------------------------------- /Classifiers/Least Squares/data.txt: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Categorical/CategoricalNaiveBayes.py: -------------------------------------------------------------------------------- 1 | import _DataReader as DataReader; 2 | 3 | 4 | def Classifier(Evidence): 5 | #The string of evidence, so that we can save it in P. 6 | evidence = ''; 7 | 8 | #Check if all evidence is also in Features 9 | for e in Evidence: 10 | if e not in Features: 11 | #A given evidence does not belong in Features. Abort. 12 | print "Evidence list is erroneous" 13 | return; 14 | 15 | #Build the evidence string 16 | evidence += e + ', '; 17 | 18 | evidence = evidence[:-2]; #remove the last two chars, as they are ', ' 19 | 20 | m = -1.0; #Hold the max 21 | classification = ''; #Hold the classification 22 | 23 | #We need to find P(c|evidence). The equation (from Bayes) is: 24 | #P(c|evidence) = P(evidence|c)*P(c)/P(evidence) 25 | #Because this Bayes classifier is naive, the features in evidence are 26 | #independent. Therefore, the above equation is simplified to: 27 | #P(c|evidence) = P(evidence1|c)*P(evidence2|c)*...*P(evidenceN|c) * P(c) 28 | #divided by P(evidence1)*P(evidence2)*...*P(evidenceN) 29 | 30 | #Calculate the probability of all classes for given evidence/features 31 | #using the Bayes equation. Pick the highest. 32 | for c in Classes: 33 | P[c + '|' + evidence] = P[c]; #Start from the prior probability 34 | 35 | for e in Evidence: 36 | #Multipy by the conditional prob and divide by the feature prob 37 | P[c + '|' + evidence] *= P[e + '|' + c] / P[e]; 38 | 39 | #Find the max 40 | if(P[c + '|' + evidence] > m): 41 | #P(c|evidence) is the max so far; update m and classification 42 | m = P[c + '|' + evidence]; 43 | classification = c; 44 | 45 | #With the evidence, the item belongs to classifaction with a prob of m 46 | print classification, m; 47 | 48 | 49 | #Read data from file 50 | Classes, Features, P = DataReader.Read('data4.txt'); 51 | 52 | #Run classifier with the evidence list 53 | Classifier(['Tall', 'Slim']); 54 | -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Categorical/_DataReader.py: -------------------------------------------------------------------------------- 1 | def Read(fileName): 2 | Classes = {}; 3 | Features = {}; 4 | 5 | #Read data from input file, split the lines 6 | f = open(fileName,'r'); 7 | lines = f.read().splitlines(); 8 | f.close(); 9 | 10 | n = len(lines)-1; #The size of the data set 11 | 12 | #Extract the features 13 | features = lines[:1][0]; #The first line of input, taking it as a string. 14 | features = features.split(' ')[1:]; #Split first line by spaces 15 | l = len(features); 16 | 17 | #Extract the class data 18 | classes = lines[1:]; #Remove the first line 19 | 20 | for f in features: 21 | #For every string in the first line, add a new item to Features, 22 | #plus its complement. 23 | Features[f] = 0; 24 | Features["Not " + f] = 0; 25 | 26 | #Construct Classes table 27 | for c in classes: 28 | #Split current line (item) by spaces 29 | #The first element holds the name of the class 30 | #The rest show whether the item has a certain feature 31 | c = c.split(' '); 32 | 33 | if(c[0] not in Classes): 34 | #The item class has not been added to Classes. Add it now. 35 | Classes[c[0]] = {"Total":0}; #Set the total of the class to 0. 36 | for f in Features: 37 | #Add to the class dictionary (table) all the features, set to 0. 38 | Classes[c[0]][f] = 0; 39 | 40 | #Increment the total items in the item class 41 | Classes[c[0]]["Total"] += 1; 42 | 43 | for i in range(1,l): 44 | if(c[i] == 'True'): 45 | #The item has the feature in the ith index in the item list, c 46 | #The ith index in c corresponds with the i-1 index in features 47 | feature = features[i-1]; #Save it in feature 48 | elif(c[i] == 'False'): 49 | #The item doesn't have the feature in the item list 50 | #Instead, it has the "Not Feature", the complement of the feature 51 | feature = "Not " + features[i-1]; #Save complement in feature 52 | 53 | Features[feature] += 1; #Increment feature counter 54 | 55 | if(feature not in Classes[c[0]]): 56 | #The feature has not been added to the class dictionary. 57 | #Add feature to the item class. 58 | Classes[c[0]][feature] = 1; 59 | else: 60 | #The feature exists in the class dictionary. 61 | #Increment the feature counter in the item class. 62 | Classes[c[0]][feature] += 1; 63 | 64 | 65 | #Calculate the various probabilities 66 | P = {}; #Probability dictionary. Holds the various probabilities 67 | 68 | #Calculate the prior probabilities of the classes 69 | for c in Classes: 70 | P[c] = Classes[c]["Total"]/float(n); 71 | 72 | #Calculate the prior probabilities of the features 73 | for f in Features: 74 | P[f] = Features[f]/float(n); 75 | 76 | #Calculate the conditional probabilities 77 | for c in Classes: 78 | for f in Features: 79 | P[f + '|' + c] = Classes[c][f]/float(Classes[c]["Total"]); 80 | 81 | return (Classes, Features, P); 82 | -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Categorical/data.txt: -------------------------------------------------------------------------------- 1 | Class Tall Slim Smart 2 | Detective True False False 3 | Detective True True True 4 | Detective True True True 5 | Detective False False True 6 | Detective True False False 7 | Detective False True True 8 | Detective False True True 9 | Detective False False False 10 | Detective False False True 11 | Detective True True True 12 | Detective False True True 13 | Detective True True False 14 | Detective True False False 15 | Detective False False True 16 | Detective True False True 17 | Detective True True True 18 | Detective False False True 19 | Detective False False True 20 | Detective True True True 21 | Detective False True True 22 | Brute True False False 23 | Brute True True False 24 | Brute True False False 25 | Brute True False False 26 | Brute False False False 27 | Brute True False False 28 | Brute True False True 29 | Brute True True False 30 | Brute True False False 31 | Brute True True False 32 | Brute False False False 33 | Brute True False True 34 | Brute True False False 35 | Brute True True False 36 | Brute True False True 37 | Brute False False False 38 | Brute True False False 39 | Brute True False False 40 | Brute True False False 41 | Brute True False True 42 | Brute True False False 43 | Brute True False False 44 | Brute True True False 45 | Brute True False False 46 | Brute True False False 47 | Brute False False False 48 | Brute True False False 49 | Brute True False True 50 | Brute False False False 51 | Brute True False False -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Categorical/data2.txt: -------------------------------------------------------------------------------- 1 | Class Long Sweet Yellow 2 | Banana True True True 3 | Banana True True True 4 | Banana True True False 5 | Banana True True True 6 | Banana True True True 7 | Banana True True False 8 | Banana True True True 9 | Banana True False True 10 | Banana True True True 11 | Banana False True True 12 | Lemon False False True 13 | Lemon False False True 14 | Lemon False False True 15 | Lemon False False True 16 | Lemon False False True 17 | Other True False False 18 | Other False True False 19 | Other True False True 20 | Other False True False 21 | Other False False True 22 | Other True True False 23 | Other False False False 24 | Other True False True 25 | Other False True False 26 | Other True True False -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Categorical/data3.txt: -------------------------------------------------------------------------------- 1 | Class Big Feathers Claws 2 | Dinosaur True False False 3 | Dinosaur False True False 4 | Dinosaur False False True 5 | Dinosaur False True False 6 | Dinosaur True True True 7 | Dinosaur True False False 8 | Dinosaur False True True 9 | Dinosaur False False False 10 | Dinosaur True True False 11 | Dinosaur False False True 12 | Chicken False True False 13 | Chicken False False True 14 | Chicken False True True 15 | Chicken False True True 16 | Chicken False True False 17 | Chicken False True False 18 | Chicken False True True 19 | Chicken False True True 20 | Chicken False True False 21 | Chicken False True False 22 | Chicken False True True 23 | Dragon True True False 24 | Dragon False False False 25 | Dragon False True True 26 | Dragon True False False 27 | Dragon True False True 28 | Dragon False True False 29 | Dragon True False True 30 | Dragon True False True 31 | Dragon True False True 32 | Dragon False False True -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Numerical/NumericalNaiveBayes.py: -------------------------------------------------------------------------------- 1 | import _DataReader as DataReader; 2 | import math; 3 | 4 | 5 | def Gaussian(mean, stDev, x): 6 | g = 1/(math.sqrt(2*math.pi)*stDev) * math.e**(-0.5*(float(x-mean)/stDev)**2); 7 | return g; 8 | 9 | def Classifier(Evidence): 10 | #The string of evidence, so that we can save it in P. 11 | evidence = ''; 12 | 13 | #Check if all evidence is also in Features 14 | for e in Evidence: 15 | eF = e[0]; #The feature in evidence e 16 | eV = e[1]; #The value in evidence e 17 | if eF not in Features: 18 | #A given evidence does not belong in Features. Abort. 19 | print "Evidence list is erroneous"; 20 | return; 21 | 22 | #Build the evidence string 23 | evidence += eF + " = " + str(eV) + ', '; 24 | 25 | evidence = evidence[:-2]; #remove the last two chars, as they are ', ' 26 | 27 | m = -1.0; #Hold the max 28 | classification = ''; #Hold the classification 29 | 30 | #We need to find P(c|evidence). The equation (from Bayes) is: 31 | #P(c|evidence) = P(evidence|c)*P(c)/P(evidence) 32 | #Because this Bayes classifier is naive, the features in evidence are 33 | #independent. Therefore, the above equation is simplified to: 34 | #P(c|evidence) = P(evidence1|c)*P(evidence2|c)*...*P(evidenceN|c) * P(c). 35 | #We do not need to calculate P(evidence) as it is the same for all 36 | #classes. 37 | 38 | #We know the individual probability P(c) but we do not know the 39 | #probability of the conditional probabilities P(evidenceX|c). 40 | #We calculate those using the Gaussian distribution formula. 41 | #Instead of Gaussian we can use any other distribution, if it is known. 42 | 43 | #The parameters are the mean, the standard deviation and the value of the evidence. 44 | #We have the value from the evidence, eV. 45 | #The mean is the class mean for the feature, Classes[c][eF]["Mean"]. 46 | #The stDev is the class stDev for the feature, Classes[c][eF]["StDev"]. 47 | 48 | #We input those to the Gaussian formula and we receive the output. 49 | 50 | #Calculate the probability of all classes for given evidence/features 51 | #using the Bayes equation. Pick the highest. 52 | for c in Classes: 53 | P[c + '|' + evidence] = P[c]; #Start from the prior probability 54 | 55 | for e in Evidence: 56 | eF = e[0]; #The feature in evidence e 57 | eV = e[1]; #The value in evidence e 58 | #Multipy by the conditional prob 59 | mean = Classes[c][eF]["Mean"]; #mean 60 | stDev = Classes[c][eF]["StDev"]; #standard deviation 61 | P[c + '|' + evidence] *= Gaussian(mean,stDev,eV); 62 | 63 | if(P[c + '|' + evidence] > m): 64 | #P(c|evidence) is the max so far; update m and classification 65 | m = P[c + '|' + evidence]; 66 | classification = c; 67 | 68 | #With the evidence, the item belongs to classifaction with a prob of m 69 | print classification, m; 70 | 71 | 72 | #Read data from file 73 | Classes, Features, P, n = DataReader.Read('data.txt'); #Returns a tuple 74 | 75 | #Run classifier with the evidence list 76 | Classifier((('Height', 170), ('Weight', 65))); 77 | -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Numerical/_DataReader.py: -------------------------------------------------------------------------------- 1 | import math; 2 | 3 | def Read(fileName): 4 | Classes = {}; #Class dictionary 5 | Features = []; #Features list 6 | 7 | #Read data from input file, split the lines 8 | f = open(fileName,'r'); 9 | lines = f.read().splitlines(); 10 | f.close(); 11 | 12 | n = len(lines)-1; #The size of the data set 13 | 14 | #Extract the features 15 | features = lines[:1][0]; #The first line of input, taking it as a string. 16 | features = features.split(' ')[1:]; #Split first line by spaces 17 | l = len(features); #The number of features 18 | 19 | #Extract the class data 20 | classes = lines[1:]; #Remove the first line (the features) 21 | 22 | for f in features: 23 | #For every string in the first line, add a new item to Features, plus 24 | #its complement. 25 | Features.append(f); 26 | 27 | #Construct Classes table# 28 | #a) Find means 29 | for c in classes: 30 | #Split current line (item) by spaces 31 | #The first element holds the name of the class 32 | #The rest show whether the item has a certain feature 33 | c = c.split(' '); 34 | 35 | if(c[0] not in Classes): 36 | #The item class has not been added to Classes. Add it now. 37 | Classes[c[0]] = {"Total":0}; #Set the total of the class to 0. 38 | for f in Features: 39 | #Add to the class dictionary (table) all the features' mean 40 | #and standard deviation 41 | Classes[c[0]][f] = {"Mean":0, "StDev":0}; 42 | 43 | #Increment the total items in the item class 44 | Classes[c[0]]["Total"] += 1; 45 | 46 | #Calculate the mean of classes' features 47 | for i in range(1,l+1): 48 | t = Classes[c[0]]["Total"]; #Pass the total 49 | f = Classes[c[0]][features[i-1]]["Mean"]; #The current average 50 | 51 | Classes[c[0]][features[i-1]]["Mean"] = (f*(t-1)+float(c[i]))/t; 52 | 53 | #b) Find Standard Deviations 54 | #StDev : Square Root of Variance 55 | #Variance : Average of squared difference from mean 56 | 57 | values = {}; 58 | for k in Classes.keys(): 59 | #We will save the variances in here, building them up as we go 60 | #so that we can use them at the end. 61 | values[k] = {}; 62 | for f in Features: 63 | values[k][f] = 0; 64 | 65 | for c in classes: 66 | #To find the standard deviations, we first need to know the means. 67 | #That's why we run this loop after the first loop. 68 | c = c.split(' '); 69 | 70 | for i in range(1,l+1): 71 | #From the current value, substract the mean. 72 | #Divide by the total (-1) to get the average. 73 | #We are using the total minus one because we do not want the 74 | #population deviation, but the standard deviation. 75 | v = math.pow(int(c[i]) - Classes[c[0]][features[i-1]]["Mean"],2); 76 | values[c[0]][Features[i-1]] += v/(Classes[c[0]]["Total"]-1); 77 | 78 | for k in Classes.keys(): 79 | #Calculate StDev for the features of each class, using values 80 | for i in range(1,l+1): 81 | #The Standard Deviation is the square root of the variance 82 | s = math.sqrt(values[k][Features[i-1]]); 83 | 84 | Classes[k][features[i-1]]["StDev"] = s; 85 | 86 | 87 | #Calculate the various probabilities 88 | P = {}; #Probability dictionary. Holds the various probabilities 89 | 90 | #Calculate the prior probabilities of the classes 91 | for c in Classes: 92 | P[c] = Classes[c]["Total"]/float(n); 93 | 94 | 95 | return (Classes, Features, P, n); 96 | -------------------------------------------------------------------------------- /Classifiers/Naive Bayes/Numerical/data.txt: -------------------------------------------------------------------------------- 1 | Class Height Weight 2 | Wrestler 170 61 3 | Wrestler 173 67 4 | Wrestler 175 69 5 | Wrestler 181 75 6 | Wrestler 169 60 7 | Wrestler 171 60 8 | Wrestler 173 64 9 | Wrestler 173 71 10 | Wrestler 175 65 11 | Wrestler 171 64 12 | Wrestler 169 62 13 | Wrestler 171 66 14 | Wrestler 177 72 15 | Wrestler 172 60 16 | Wrestler 174 67 17 | Sumo 181 110 18 | Sumo 177 100 19 | Sumo 180 111 20 | Sumo 179 105 21 | Sumo 170 101 22 | Sumo 177 104 23 | Sumo 182 110 24 | Sumo 180 106 25 | Sumo 187 119 26 | Sumo 180 113 27 | Sumo 174 114 28 | Sumo 186 108 29 | Sumo 180 110 30 | Sumo 179 104 31 | Sumo 173 99 -------------------------------------------------------------------------------- /Classifiers/Neural Network/NeuralNetwork.py: -------------------------------------------------------------------------------- 1 | """ 2 | There exists an updated version of this script. It can be found on Kaggle: 3 | https://www.kaggle.com/antmarakis/another-neural-network-from-scratch 4 | """ 5 | 6 | 7 | 8 | import numpy as np; 9 | import Reader; 10 | 11 | 12 | ###_Evaluation Functions_### 13 | def Accuracy(X, Y, Weights, layers): 14 | layers += 1; 15 | 16 | #Run training set through network, find overall accuracy 17 | correct = 0; 18 | 19 | for i in range(len(X)): 20 | x = X[i]; 21 | y = list(Y[i].A1); 22 | 23 | guess = Predict(x, Weights, layers); 24 | 25 | if(y == guess): 26 | #Guessed correctly 27 | correct += 1; 28 | 29 | return correct / float(len(X)); 30 | 31 | def K_FoldValidation(k, X, Y, f, hiddenLayers, nodes, epochs, r=0.15): 32 | if (k > len(X)): 33 | return -1; 34 | 35 | correct = 0; #The number of correct classifications 36 | total = len(X) * (k - 1); #The total number of classifications 37 | 38 | l = len(X) / k; #The length of a fold 39 | 40 | for i in range(k): 41 | print "\nFold",i; 42 | 43 | #Split data set into training and testing 44 | trainingX = X[i * l:(i + 1) * l]; 45 | trainingY = Y[i * l:(i + 1) * l]; 46 | 47 | testX = np.concatenate([X[:i*l], X[(i+1)*l:]]); 48 | testY = np.concatenate([Y[:i*l], Y[(i+1)*l:]]); 49 | 50 | #Calculate Weights 51 | weights = NeuralNetwork(epochs, X, Y, f, hiddenLayers, nodes, r); 52 | 53 | #Make predictions for test sets 54 | for j in range(len(testX)): 55 | x = testX[j]; 56 | y = list(testY[j].A1); 57 | 58 | guess = Predict(x, weights, hiddenLayers+1); 59 | 60 | if(y == guess): 61 | #Guessed correctly 62 | correct += 1; 63 | 64 | return correct / float(total); 65 | 66 | 67 | ###_Auxiliary Functions_### 68 | def Sigmoid(x): 69 | return 1 / (1 + np.exp(-x)); 70 | 71 | def SigmoidDerivative(x): 72 | return np.multiply(x, 1-x); 73 | 74 | def InitializeWeights(f, layers, nodes): 75 | ##_Initialize weights with random values in [-1, 1] (including bias)_## 76 | 77 | #Augment feature vectors with bias 78 | f += 1; 79 | 80 | #Initialize weights from input to first hidden layer 81 | inputToHidden = [[np.random.uniform(-1, 1) for i in range(f)] for j in range(nodes[0])]; 82 | inputToHidden = np.matrix(inputToHidden); 83 | 84 | weights = [inputToHidden]; 85 | #Initialize the rest of the weights 86 | for i in range(1, layers): 87 | w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)] for j in range(nodes[i])]; 88 | w = np.matrix(w); 89 | weights.append(w); 90 | 91 | return weights; 92 | 93 | 94 | ###_Core Functions_### 95 | def Predict(item, Weights, layers, sigmoid=True): 96 | item = np.append(1, item); #Augment feature vector 97 | 98 | ##_Forward Propagation_## 99 | activations = [item]; 100 | Input = item; 101 | for i in range(layers): 102 | activation = np.dot(Input, Weights[i].T); 103 | if(i < layers-1 or sigmoid): 104 | #When calculating the output activation, check if 105 | #we should sigmoid it or not (via the sigmoid var) 106 | activation = Sigmoid(activation); 107 | 108 | activations.append(activation); 109 | 110 | Input = np.append(1, activation); #Augment activation vector 111 | 112 | outputFinal = activations[-1].A1; 113 | 114 | #Find max activation in output 115 | m = outputFinal[0]; 116 | index = 0; 117 | for i in range(1, len(outputFinal)): 118 | output = outputFinal[i]; 119 | 120 | if(output > m): 121 | m = output; 122 | index = i; 123 | 124 | #Initialize prediction vector to zeros 125 | y = [0 for i in range(len(outputFinal))]; 126 | y[index] = 1; #Set guessed class to 1 127 | 128 | return y; #Return prediction vector 129 | 130 | def Train(X, Y, r, layers, weights): 131 | for i in range(len(X)): 132 | x = X[i]; 133 | y = Y[i].A1; 134 | x = np.matrix(np.append(1, x)); # Augment feature vector 135 | 136 | ##_Forward Propagation_## 137 | #Each layer receives an input and calculates its output 138 | #The output of one layer is the input to the next 139 | #The first input is the first feature vector (the item) 140 | activations = [x]; 141 | Input = x; 142 | for j in range(layers): 143 | activation = Sigmoid(np.dot(Input, weights[j].T)); 144 | activations.append(activation); 145 | 146 | Input = np.append(1, activation); #Augment with bias 147 | 148 | ##_Back Propagation_## 149 | #Find error at output 150 | #Propagate error backwards through the layers 151 | #For each layer: 152 | #a) Calculate delta: 153 | #Error of next layer * the sigmoid der of current layer activation 154 | #b) Update weights between current layer and previous layer 155 | #Multiply delta with activation of previous layer 156 | #Multiply that with rate 157 | #Add that to weights of previous layer 158 | #c) Calculate error for current layer 159 | #Remove bias from previous-layer weights, get w 160 | #Multiply delta with w to get error 161 | outputFinal = activations[-1]; 162 | error = np.matrix(y - outputFinal); #Error at output 163 | 164 | for j in range(layers, 0, -1): 165 | currActivation = activations[j]; 166 | 167 | if(j > 1): 168 | #Augment previous activation 169 | prevActivation = np.append(1, activations[j-1]); 170 | else: 171 | #First hidden layer, prevActivation is input (without bias) 172 | prevActivation = activations[0]; 173 | 174 | delta = np.multiply(error, SigmoidDerivative(currActivation)); 175 | weights[j-1] += r * np.multiply(delta.T, prevActivation); 176 | 177 | w = np.delete(weights[j-1], [0], axis=1); #remove bias from weights 178 | 179 | error = np.dot(delta, w); #Calculate error for curr layer 180 | 181 | return weights; 182 | 183 | def NeuralNetwork(epochs, X, Y, f, hiddenLayers, nodes, r=0.15): 184 | layers = hiddenLayers + 1; #Total number of layers in network 185 | weights = InitializeWeights(f, layers, nodes); 186 | 187 | for epoch in range(epochs): 188 | #Train weights 189 | weights = Train(X, Y, r, layers, weights); 190 | 191 | if(epoch % 25 == 0): 192 | print "Epoch ", epoch; 193 | 194 | return weights; 195 | 196 | 197 | ###_Main_### 198 | def main(): 199 | X, Y = Reader.ReadData('data.txt'); 200 | 201 | f = len(X[0].A1); 202 | h1 = 5; 203 | h2 = 10; 204 | o = len(Y[0].A1); 205 | hiddenLayers = 2; 206 | r = 0.15; 207 | epochs = 100; 208 | 209 | #print K_FoldValidation(5, X, Y, f, hiddenLayers, [h1,h2,o], epochs, r); 210 | 211 | weights = NeuralNetwork(epochs, X, Y, f, hiddenLayers, [h1,h2,o], r); 212 | print Accuracy(X, Y, weights, hiddenLayers); 213 | 214 | if __name__ == "__main__": 215 | main(); 216 | -------------------------------------------------------------------------------- /Classifiers/Neural Network/NeuralNetwork_Plot.py: -------------------------------------------------------------------------------- 1 | import NeuralNetwork as NN; 2 | import Reader; 3 | import numpy as np; 4 | from random import choice; 5 | from matplotlib import pyplot; 6 | 7 | 8 | def PlotFunction(X, Y, percentage, indexA, indexB): 9 | n = len(X); #Number of items 10 | split = int(n*percentage); 11 | 12 | features = len(X[0].A1); 13 | #Delete all columns but the ones on the given indexes 14 | for j in range(features): 15 | if(j == indexA or j == indexB): 16 | continue; 17 | 18 | X = np.delete(X, j, 1); 19 | 20 | testX = X[split:]; 21 | testY = Y[split:]; 22 | 23 | #The items will be sorted into classes in this list 24 | Points = [[] for i in range(len(testY[0].A1))]; 25 | 26 | f = 2; 27 | h1 = 5; 28 | h2 = 10; 29 | o = len(Y[0].A1); 30 | hiddenLayers = 2; 31 | r = 0.15; 32 | epochs = 100; 33 | 34 | weights = NN.NeuralNetwork(epochs, X, Y, f, hiddenLayers, [h1,h2,o], r); 35 | 36 | correct = 0; 37 | 38 | #Calculate accuracy 39 | for i in range(n): 40 | prediction = NN.Predict(X[i], weights, hiddenLayers+1); 41 | itemClass = list(Y[i].A1); 42 | 43 | if(prediction == itemClass): 44 | correct += 1; 45 | 46 | #Find index of class 47 | index = -1; 48 | for j in range(len(prediction)): 49 | if(prediction[j] == 1): 50 | index = j; 51 | break; 52 | 53 | Points[index].append(X[i]); 54 | 55 | accuracy = correct/float(n)*100; 56 | print "Accuracy ",accuracy; 57 | 58 | colors = ['r','b','g','c','m','y']; 59 | 60 | for i in range(len(Points)): 61 | p = Points[i]; 62 | Xa = []; 63 | Xb = []; 64 | 65 | #Choose color randomly from list, then remove it 66 | #(to avoid duplicates) 67 | color = choice(colors); 68 | colors.remove(color); 69 | 70 | for item in p: 71 | Xa.append(item[:, [0]].item(0)); 72 | Xb.append(item[:, [1]].item(0)); 73 | 74 | pyplot.plot(Xa,Xb,'o',color=color); 75 | 76 | pyplot.show(); 77 | 78 | 79 | def main(): 80 | X, Y = Reader.ReadData('data.txt'); 81 | 82 | PlotFunction(X, Y, 0.7, 2, 3); 83 | 84 | main(); 85 | -------------------------------------------------------------------------------- /Classifiers/Neural Network/Reader.py: -------------------------------------------------------------------------------- 1 | import numpy as np; 2 | import math; 3 | from random import shuffle; 4 | 5 | ###_Pre-Processing_### 6 | def ReadData(fileName): 7 | f = open(fileName); 8 | lines = f.read().splitlines(); 9 | f.close(); 10 | 11 | items = []; 12 | classes = []; 13 | 14 | for line in lines: 15 | line = line.split(','); #Split line on commas 16 | itemFeatures = []; #Temp list to hold feature values of the item 17 | 18 | for i in range(len(line)-1): 19 | value = float(line[i]); 20 | itemFeatures.append(value); 21 | 22 | #Add to classes the known classification for current item 23 | classes.append(line[-1]); 24 | #Add item data to items 25 | items.append(itemFeatures); 26 | 27 | #Map class names to numbers (from 0 to the number of classes) 28 | classes = map(lambda x: list(set(classes)).index(x), classes); 29 | 30 | X = np.matrix(items); #Convert data to numpy matrix 31 | Y = BuildY(classes); #Build the Y matrices 32 | n = len(items)-1; #The number of items 33 | 34 | toShuffle = []; #Temp array to shuffle X and Y at the same time 35 | 36 | for i in range(n+1): 37 | #Build toShuffle by packing Xi together with Yi 38 | toShuffle.append((X[i],Y[i])); 39 | 40 | shuffle(toShuffle); 41 | 42 | X = []; 43 | Y = []; 44 | for i in range(n+1): 45 | X.append(toShuffle[i][0]) 46 | Y.append(toShuffle[i][1]) 47 | 48 | return X,Y,n; 49 | 50 | def BuildY(Y): 51 | newY = []; 52 | #Number of classes is the largest number in Y plus 1 53 | classesNumber = max(Y)+1; 54 | 55 | for i in range(len(Y)): 56 | #Initialize vector with zeros, set to 1 the class index 57 | tempVector = [0]*classesNumber; 58 | tempVector[Y[i]] = 1; 59 | 60 | newY.append(tempVector); 61 | 62 | return np.matrix(newY); 63 | 64 | X, Y, n = ReadData('data.txt'); 65 | -------------------------------------------------------------------------------- /Classifiers/Neural Network/data.txt: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /Classifiers/Perceptron/Perceptron.py: -------------------------------------------------------------------------------- 1 | import math; #For pow and sqrt 2 | from random import shuffle; 3 | 4 | 5 | ###_Read Data_### 6 | def ReadData(fileName): 7 | #Read the file, splitting by lines 8 | f = open(fileName,'r'); 9 | lines = f.read().splitlines(); 10 | f.close(); 11 | 12 | #Split the first line by commas, remove the last element 13 | #and save the length of the rest. 14 | featuresNumber = len(lines[0].split(',')); 15 | 16 | items = []; 17 | classes = []; 18 | features = lines[0].split(',')[:-1]; 19 | 20 | for i in range(1, len(lines)): 21 | line = lines[i].split(','); 22 | 23 | if(line[-1] not in classes): 24 | classes.append(line[-1]); 25 | 26 | itemFeatures = {"Class" : line[-1], "Bias" : 1}; 27 | 28 | for j in range(len(features)): 29 | f = features[j]; #Get the feature at index j 30 | v = float(line[j]); 31 | 32 | itemFeatures[f] = v; 33 | 34 | items.append(itemFeatures); 35 | 36 | shuffle(items); 37 | 38 | return items,classes,features; 39 | 40 | 41 | ###_Evaluation Functions_### 42 | def K_FoldValidation(K, Items, rate, epochs, classes, features): 43 | if(K > len(Items)): 44 | return -1; 45 | 46 | correct = 0; #The number of correct classifications 47 | total = len(Items)*(K-1); #The total number of classifications 48 | 49 | l = len(Items)/K; #The length of a fold 50 | 51 | for i in range(K): 52 | #Split data set into training and testing 53 | trainingSet = Items[i*l:(i+1)*l]; 54 | testSet = Items[:i*l] + Items[(i+1)*l:]; 55 | 56 | weights = CalculateWeights(trainingSet, rate, epochs, classes, features); 57 | 58 | for item in testSet: 59 | itemClass = item["Class"]; 60 | 61 | itemFeatures = {}; 62 | 63 | for key in item: 64 | if(key != "Class"): 65 | #If key isn't "Class", add it to itemFeatures 66 | itemFeatures[key] = item[key]; 67 | 68 | guess = Perceptron(itemFeatures, weights); 69 | 70 | if(guess == itemClass): 71 | #Guessed correctly 72 | correct += 1; 73 | 74 | return correct/float(total); 75 | 76 | def Evaluate(times, K, Items, rate, epochs, classes, features): 77 | accuracy = 0; 78 | for t in range(times): 79 | shuffle(Items); 80 | accuracy += K_FoldValidation(K, Items, rate, epochs, classes, features); 81 | 82 | print accuracy/float(times); 83 | 84 | 85 | ###_Auxiliary Functions_### 86 | def AddDictionaries(d1, d2, rate): 87 | d3 = {}; 88 | for i in d1: 89 | d3[i] = d1[i] + rate*d2[i]; 90 | 91 | return d3; 92 | 93 | def SubDictionaries(d1, d2, rate): 94 | d3 = {}; 95 | for i in d1: 96 | d3[i] = d1[i] - rate*d2[i]; 97 | 98 | return d3; 99 | 100 | def CalculateConfidence(item, weight): 101 | #Add the product of the weight and item values for each feature 102 | confidence = 0; 103 | 104 | for k in weight: 105 | confidence += weight[k]*item[k]; 106 | 107 | return confidence; 108 | 109 | 110 | ###_Core Functions_### 111 | def CalculateWeights(trainingSet, rate, epochs, classes, features): 112 | #Initialize weights at 0 113 | weights = {}; 114 | 115 | #Initialize weights dictionary. Weights is divided in classes. 116 | #Each class has its own dictionary, which is numerical values/weights 117 | #for the features. 118 | for c in classes: 119 | weights[c] = {"Bias":0}; 120 | for f in features: 121 | weights[c][f] = 0; 122 | 123 | for epoch in range(epochs): 124 | for item in trainingSet: 125 | #Iterate through trainingSet 126 | #Guess where item belongs 127 | y = -1; 128 | guess = ""; 129 | for w in weights: 130 | confidence = CalculateConfidence(item, weights[w]); 131 | 132 | if(confidence > y): 133 | y = confidence; 134 | guess = w; 135 | 136 | correct = item["Class"]; 137 | if(correct != guess): 138 | weights[guess] = SubDictionaries(weights[guess], item, rate); 139 | weights[correct] = AddDictionaries(weights[correct], item, rate); 140 | 141 | return weights; 142 | 143 | def Perceptron(item, weights): 144 | item["Bias"] = 1; #Augment item vector with bias 145 | m = -1; #Hold the maximum 146 | classification = ""; 147 | 148 | #Calculate chance of item being in each class, 149 | #pick the maximum. 150 | for w in weights: 151 | #Multiply the item vector with the class weights vector 152 | guess = CalculateConfidence(item, weights[w]); 153 | if(guess > m): 154 | #Our guess is better than our current best guess, 155 | #update max and classification 156 | m = guess; 157 | classification = w; 158 | 159 | return classification; 160 | 161 | 162 | ###_Main_### 163 | def main(): 164 | items, classes, features = ReadData('data.txt'); 165 | 166 | lRate = 0.1; 167 | epochs = 50; 168 | weights = CalculateWeights(items, lRate, epochs, classes, features); 169 | 170 | item = {'PW' : 1.4, 'PL' : 4.7, 'SW' : 3.2, 'SL' : 7.0}; 171 | print Perceptron(item, weights); 172 | 173 | #Evaluate(100, 5, items, lRate, epochs, classes, features); 174 | 175 | if __name__ == "__main__": 176 | main(); 177 | -------------------------------------------------------------------------------- /Classifiers/Perceptron/data.txt: -------------------------------------------------------------------------------- 1 | SL,SW,PL,PW,Class 2 | 5.1,3.5,1.4,0.2,Iris-setosa 3 | 4.9,3.0,1.4,0.2,Iris-setosa 4 | 4.7,3.2,1.3,0.2,Iris-setosa 5 | 4.6,3.1,1.5,0.2,Iris-setosa 6 | 5.0,3.6,1.4,0.2,Iris-setosa 7 | 5.4,3.9,1.7,0.4,Iris-setosa 8 | 4.6,3.4,1.4,0.3,Iris-setosa 9 | 5.0,3.4,1.5,0.2,Iris-setosa 10 | 4.4,2.9,1.4,0.2,Iris-setosa 11 | 4.9,3.1,1.5,0.1,Iris-setosa 12 | 5.4,3.7,1.5,0.2,Iris-setosa 13 | 4.8,3.4,1.6,0.2,Iris-setosa 14 | 4.8,3.0,1.4,0.1,Iris-setosa 15 | 4.3,3.0,1.1,0.1,Iris-setosa 16 | 5.8,4.0,1.2,0.2,Iris-setosa 17 | 5.7,4.4,1.5,0.4,Iris-setosa 18 | 5.4,3.9,1.3,0.4,Iris-setosa 19 | 5.1,3.5,1.4,0.3,Iris-setosa 20 | 5.7,3.8,1.7,0.3,Iris-setosa 21 | 5.1,3.8,1.5,0.3,Iris-setosa 22 | 5.4,3.4,1.7,0.2,Iris-setosa 23 | 5.1,3.7,1.5,0.4,Iris-setosa 24 | 4.6,3.6,1.0,0.2,Iris-setosa 25 | 5.1,3.3,1.7,0.5,Iris-setosa 26 | 4.8,3.4,1.9,0.2,Iris-setosa 27 | 5.0,3.0,1.6,0.2,Iris-setosa 28 | 5.0,3.4,1.6,0.4,Iris-setosa 29 | 5.2,3.5,1.5,0.2,Iris-setosa 30 | 5.2,3.4,1.4,0.2,Iris-setosa 31 | 4.7,3.2,1.6,0.2,Iris-setosa 32 | 4.8,3.1,1.6,0.2,Iris-setosa 33 | 5.4,3.4,1.5,0.4,Iris-setosa 34 | 5.2,4.1,1.5,0.1,Iris-setosa 35 | 5.5,4.2,1.4,0.2,Iris-setosa 36 | 4.9,3.1,1.5,0.1,Iris-setosa 37 | 5.0,3.2,1.2,0.2,Iris-setosa 38 | 5.5,3.5,1.3,0.2,Iris-setosa 39 | 4.9,3.1,1.5,0.1,Iris-setosa 40 | 4.4,3.0,1.3,0.2,Iris-setosa 41 | 5.1,3.4,1.5,0.2,Iris-setosa 42 | 5.0,3.5,1.3,0.3,Iris-setosa 43 | 4.5,2.3,1.3,0.3,Iris-setosa 44 | 4.4,3.2,1.3,0.2,Iris-setosa 45 | 5.0,3.5,1.6,0.6,Iris-setosa 46 | 5.1,3.8,1.9,0.4,Iris-setosa 47 | 4.8,3.0,1.4,0.3,Iris-setosa 48 | 5.1,3.8,1.6,0.2,Iris-setosa 49 | 4.6,3.2,1.4,0.2,Iris-setosa 50 | 5.3,3.7,1.5,0.2,Iris-setosa 51 | 5.0,3.3,1.4,0.2,Iris-setosa 52 | 7.0,3.2,4.7,1.4,Iris-versicolor 53 | 6.4,3.2,4.5,1.5,Iris-versicolor 54 | 6.9,3.1,4.9,1.5,Iris-versicolor 55 | 5.5,2.3,4.0,1.3,Iris-versicolor 56 | 6.5,2.8,4.6,1.5,Iris-versicolor 57 | 5.7,2.8,4.5,1.3,Iris-versicolor 58 | 6.3,3.3,4.7,1.6,Iris-versicolor 59 | 4.9,2.4,3.3,1.0,Iris-versicolor 60 | 6.6,2.9,4.6,1.3,Iris-versicolor 61 | 5.2,2.7,3.9,1.4,Iris-versicolor 62 | 5.0,2.0,3.5,1.0,Iris-versicolor 63 | 5.9,3.0,4.2,1.5,Iris-versicolor 64 | 6.0,2.2,4.0,1.0,Iris-versicolor 65 | 6.1,2.9,4.7,1.4,Iris-versicolor 66 | 5.6,2.9,3.6,1.3,Iris-versicolor 67 | 6.7,3.1,4.4,1.4,Iris-versicolor 68 | 5.6,3.0,4.5,1.5,Iris-versicolor 69 | 5.8,2.7,4.1,1.0,Iris-versicolor 70 | 6.2,2.2,4.5,1.5,Iris-versicolor 71 | 5.6,2.5,3.9,1.1,Iris-versicolor 72 | 5.9,3.2,4.8,1.8,Iris-versicolor 73 | 6.1,2.8,4.0,1.3,Iris-versicolor 74 | 6.3,2.5,4.9,1.5,Iris-versicolor 75 | 6.1,2.8,4.7,1.2,Iris-versicolor 76 | 6.4,2.9,4.3,1.3,Iris-versicolor 77 | 6.6,3.0,4.4,1.4,Iris-versicolor 78 | 6.8,2.8,4.8,1.4,Iris-versicolor 79 | 6.7,3.0,5.0,1.7,Iris-versicolor 80 | 6.0,2.9,4.5,1.5,Iris-versicolor 81 | 5.7,2.6,3.5,1.0,Iris-versicolor 82 | 5.5,2.4,3.8,1.1,Iris-versicolor 83 | 5.5,2.4,3.7,1.0,Iris-versicolor 84 | 5.8,2.7,3.9,1.2,Iris-versicolor 85 | 6.0,2.7,5.1,1.6,Iris-versicolor 86 | 5.4,3.0,4.5,1.5,Iris-versicolor 87 | 6.0,3.4,4.5,1.6,Iris-versicolor 88 | 6.7,3.1,4.7,1.5,Iris-versicolor 89 | 6.3,2.3,4.4,1.3,Iris-versicolor 90 | 5.6,3.0,4.1,1.3,Iris-versicolor 91 | 5.5,2.5,4.0,1.3,Iris-versicolor 92 | 5.5,2.6,4.4,1.2,Iris-versicolor 93 | 6.1,3.0,4.6,1.4,Iris-versicolor 94 | 5.8,2.6,4.0,1.2,Iris-versicolor 95 | 5.0,2.3,3.3,1.0,Iris-versicolor 96 | 5.6,2.7,4.2,1.3,Iris-versicolor 97 | 5.7,3.0,4.2,1.2,Iris-versicolor 98 | 5.7,2.9,4.2,1.3,Iris-versicolor 99 | 6.2,2.9,4.3,1.3,Iris-versicolor 100 | 5.1,2.5,3.0,1.1,Iris-versicolor 101 | 5.7,2.8,4.1,1.3,Iris-versicolor 102 | 6.3,3.3,6.0,2.5,Iris-virginica 103 | 5.8,2.7,5.1,1.9,Iris-virginica 104 | 7.1,3.0,5.9,2.1,Iris-virginica 105 | 6.3,2.9,5.6,1.8,Iris-virginica 106 | 6.5,3.0,5.8,2.2,Iris-virginica 107 | 7.6,3.0,6.6,2.1,Iris-virginica 108 | 4.9,2.5,4.5,1.7,Iris-virginica 109 | 7.3,2.9,6.3,1.8,Iris-virginica 110 | 6.7,2.5,5.8,1.8,Iris-virginica 111 | 7.2,3.6,6.1,2.5,Iris-virginica 112 | 6.5,3.2,5.1,2.0,Iris-virginica 113 | 6.4,2.7,5.3,1.9,Iris-virginica 114 | 6.8,3.0,5.5,2.1,Iris-virginica 115 | 5.7,2.5,5.0,2.0,Iris-virginica 116 | 5.8,2.8,5.1,2.4,Iris-virginica 117 | 6.4,3.2,5.3,2.3,Iris-virginica 118 | 6.5,3.0,5.5,1.8,Iris-virginica 119 | 7.7,3.8,6.7,2.2,Iris-virginica 120 | 7.7,2.6,6.9,2.3,Iris-virginica 121 | 6.0,2.2,5.0,1.5,Iris-virginica 122 | 6.9,3.2,5.7,2.3,Iris-virginica 123 | 5.6,2.8,4.9,2.0,Iris-virginica 124 | 7.7,2.8,6.7,2.0,Iris-virginica 125 | 6.3,2.7,4.9,1.8,Iris-virginica 126 | 6.7,3.3,5.7,2.1,Iris-virginica 127 | 7.2,3.2,6.0,1.8,Iris-virginica 128 | 6.2,2.8,4.8,1.8,Iris-virginica 129 | 6.1,3.0,4.9,1.8,Iris-virginica 130 | 6.4,2.8,5.6,2.1,Iris-virginica 131 | 7.2,3.0,5.8,1.6,Iris-virginica 132 | 7.4,2.8,6.1,1.9,Iris-virginica 133 | 7.9,3.8,6.4,2.0,Iris-virginica 134 | 6.4,2.8,5.6,2.2,Iris-virginica 135 | 6.3,2.8,5.1,1.5,Iris-virginica 136 | 6.1,2.6,5.6,1.4,Iris-virginica 137 | 7.7,3.0,6.1,2.3,Iris-virginica 138 | 6.3,3.4,5.6,2.4,Iris-virginica 139 | 6.4,3.1,5.5,1.8,Iris-virginica 140 | 6.0,3.0,4.8,1.8,Iris-virginica 141 | 6.9,3.1,5.4,2.1,Iris-virginica 142 | 6.7,3.1,5.6,2.4,Iris-virginica 143 | 6.9,3.1,5.1,2.3,Iris-virginica 144 | 5.8,2.7,5.1,1.9,Iris-virginica 145 | 6.8,3.2,5.9,2.3,Iris-virginica 146 | 6.7,3.3,5.7,2.5,Iris-virginica 147 | 6.7,3.0,5.2,2.3,Iris-virginica 148 | 6.3,2.5,5.0,1.9,Iris-virginica 149 | 6.5,3.0,5.2,2.0,Iris-virginica 150 | 6.2,3.4,5.4,2.3,Iris-virginica 151 | 5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /Classifiers/Perceptron/data2.txt: -------------------------------------------------------------------------------- 1 | X,Y,Class 2 | 2.7810836,2.550537003,1 3 | 1.465489372,2.362125076,1 4 | 3.396561688,4.400293529,1 5 | 1.38807019,1.850220317,2 6 | 3.06407232,3.005305973,2 7 | 7.627531214,2.759262235,2 8 | 5.332441248,2.088626775,3 9 | 6.922596716,1.77106367,3 10 | 8.675418651,-0.242068655,3 11 | 7.673756466,3.508563011,1 -------------------------------------------------------------------------------- /Classifiers/kNN/data.txt: -------------------------------------------------------------------------------- 1 | SL,SW,PL,PW,Class 2 | 5.1,3.5,1.4,0.2,Iris-setosa 3 | 4.9,3.0,1.4,0.2,Iris-setosa 4 | 4.7,3.2,1.3,0.2,Iris-setosa 5 | 4.6,3.1,1.5,0.2,Iris-setosa 6 | 5.0,3.6,1.4,0.2,Iris-setosa 7 | 5.4,3.9,1.7,0.4,Iris-setosa 8 | 4.6,3.4,1.4,0.3,Iris-setosa 9 | 5.0,3.4,1.5,0.2,Iris-setosa 10 | 4.4,2.9,1.4,0.2,Iris-setosa 11 | 4.9,3.1,1.5,0.1,Iris-setosa 12 | 5.4,3.7,1.5,0.2,Iris-setosa 13 | 4.8,3.4,1.6,0.2,Iris-setosa 14 | 4.8,3.0,1.4,0.1,Iris-setosa 15 | 4.3,3.0,1.1,0.1,Iris-setosa 16 | 5.8,4.0,1.2,0.2,Iris-setosa 17 | 5.7,4.4,1.5,0.4,Iris-setosa 18 | 5.4,3.9,1.3,0.4,Iris-setosa 19 | 5.1,3.5,1.4,0.3,Iris-setosa 20 | 5.7,3.8,1.7,0.3,Iris-setosa 21 | 5.1,3.8,1.5,0.3,Iris-setosa 22 | 5.4,3.4,1.7,0.2,Iris-setosa 23 | 5.1,3.7,1.5,0.4,Iris-setosa 24 | 4.6,3.6,1.0,0.2,Iris-setosa 25 | 5.1,3.3,1.7,0.5,Iris-setosa 26 | 4.8,3.4,1.9,0.2,Iris-setosa 27 | 5.0,3.0,1.6,0.2,Iris-setosa 28 | 5.0,3.4,1.6,0.4,Iris-setosa 29 | 5.2,3.5,1.5,0.2,Iris-setosa 30 | 5.2,3.4,1.4,0.2,Iris-setosa 31 | 4.7,3.2,1.6,0.2,Iris-setosa 32 | 4.8,3.1,1.6,0.2,Iris-setosa 33 | 5.4,3.4,1.5,0.4,Iris-setosa 34 | 5.2,4.1,1.5,0.1,Iris-setosa 35 | 5.5,4.2,1.4,0.2,Iris-setosa 36 | 4.9,3.1,1.5,0.1,Iris-setosa 37 | 5.0,3.2,1.2,0.2,Iris-setosa 38 | 5.5,3.5,1.3,0.2,Iris-setosa 39 | 4.9,3.1,1.5,0.1,Iris-setosa 40 | 4.4,3.0,1.3,0.2,Iris-setosa 41 | 5.1,3.4,1.5,0.2,Iris-setosa 42 | 5.0,3.5,1.3,0.3,Iris-setosa 43 | 4.5,2.3,1.3,0.3,Iris-setosa 44 | 4.4,3.2,1.3,0.2,Iris-setosa 45 | 5.0,3.5,1.6,0.6,Iris-setosa 46 | 5.1,3.8,1.9,0.4,Iris-setosa 47 | 4.8,3.0,1.4,0.3,Iris-setosa 48 | 5.1,3.8,1.6,0.2,Iris-setosa 49 | 4.6,3.2,1.4,0.2,Iris-setosa 50 | 5.3,3.7,1.5,0.2,Iris-setosa 51 | 5.0,3.3,1.4,0.2,Iris-setosa 52 | 7.0,3.2,4.7,1.4,Iris-versicolor 53 | 6.4,3.2,4.5,1.5,Iris-versicolor 54 | 6.9,3.1,4.9,1.5,Iris-versicolor 55 | 5.5,2.3,4.0,1.3,Iris-versicolor 56 | 6.5,2.8,4.6,1.5,Iris-versicolor 57 | 5.7,2.8,4.5,1.3,Iris-versicolor 58 | 6.3,3.3,4.7,1.6,Iris-versicolor 59 | 4.9,2.4,3.3,1.0,Iris-versicolor 60 | 6.6,2.9,4.6,1.3,Iris-versicolor 61 | 5.2,2.7,3.9,1.4,Iris-versicolor 62 | 5.0,2.0,3.5,1.0,Iris-versicolor 63 | 5.9,3.0,4.2,1.5,Iris-versicolor 64 | 6.0,2.2,4.0,1.0,Iris-versicolor 65 | 6.1,2.9,4.7,1.4,Iris-versicolor 66 | 5.6,2.9,3.6,1.3,Iris-versicolor 67 | 6.7,3.1,4.4,1.4,Iris-versicolor 68 | 5.6,3.0,4.5,1.5,Iris-versicolor 69 | 5.8,2.7,4.1,1.0,Iris-versicolor 70 | 6.2,2.2,4.5,1.5,Iris-versicolor 71 | 5.6,2.5,3.9,1.1,Iris-versicolor 72 | 5.9,3.2,4.8,1.8,Iris-versicolor 73 | 6.1,2.8,4.0,1.3,Iris-versicolor 74 | 6.3,2.5,4.9,1.5,Iris-versicolor 75 | 6.1,2.8,4.7,1.2,Iris-versicolor 76 | 6.4,2.9,4.3,1.3,Iris-versicolor 77 | 6.6,3.0,4.4,1.4,Iris-versicolor 78 | 6.8,2.8,4.8,1.4,Iris-versicolor 79 | 6.7,3.0,5.0,1.7,Iris-versicolor 80 | 6.0,2.9,4.5,1.5,Iris-versicolor 81 | 5.7,2.6,3.5,1.0,Iris-versicolor 82 | 5.5,2.4,3.8,1.1,Iris-versicolor 83 | 5.5,2.4,3.7,1.0,Iris-versicolor 84 | 5.8,2.7,3.9,1.2,Iris-versicolor 85 | 6.0,2.7,5.1,1.6,Iris-versicolor 86 | 5.4,3.0,4.5,1.5,Iris-versicolor 87 | 6.0,3.4,4.5,1.6,Iris-versicolor 88 | 6.7,3.1,4.7,1.5,Iris-versicolor 89 | 6.3,2.3,4.4,1.3,Iris-versicolor 90 | 5.6,3.0,4.1,1.3,Iris-versicolor 91 | 5.5,2.5,4.0,1.3,Iris-versicolor 92 | 5.5,2.6,4.4,1.2,Iris-versicolor 93 | 6.1,3.0,4.6,1.4,Iris-versicolor 94 | 5.8,2.6,4.0,1.2,Iris-versicolor 95 | 5.0,2.3,3.3,1.0,Iris-versicolor 96 | 5.6,2.7,4.2,1.3,Iris-versicolor 97 | 5.7,3.0,4.2,1.2,Iris-versicolor 98 | 5.7,2.9,4.2,1.3,Iris-versicolor 99 | 6.2,2.9,4.3,1.3,Iris-versicolor 100 | 5.1,2.5,3.0,1.1,Iris-versicolor 101 | 5.7,2.8,4.1,1.3,Iris-versicolor 102 | 6.3,3.3,6.0,2.5,Iris-virginica 103 | 5.8,2.7,5.1,1.9,Iris-virginica 104 | 7.1,3.0,5.9,2.1,Iris-virginica 105 | 6.3,2.9,5.6,1.8,Iris-virginica 106 | 6.5,3.0,5.8,2.2,Iris-virginica 107 | 7.6,3.0,6.6,2.1,Iris-virginica 108 | 4.9,2.5,4.5,1.7,Iris-virginica 109 | 7.3,2.9,6.3,1.8,Iris-virginica 110 | 6.7,2.5,5.8,1.8,Iris-virginica 111 | 7.2,3.6,6.1,2.5,Iris-virginica 112 | 6.5,3.2,5.1,2.0,Iris-virginica 113 | 6.4,2.7,5.3,1.9,Iris-virginica 114 | 6.8,3.0,5.5,2.1,Iris-virginica 115 | 5.7,2.5,5.0,2.0,Iris-virginica 116 | 5.8,2.8,5.1,2.4,Iris-virginica 117 | 6.4,3.2,5.3,2.3,Iris-virginica 118 | 6.5,3.0,5.5,1.8,Iris-virginica 119 | 7.7,3.8,6.7,2.2,Iris-virginica 120 | 7.7,2.6,6.9,2.3,Iris-virginica 121 | 6.0,2.2,5.0,1.5,Iris-virginica 122 | 6.9,3.2,5.7,2.3,Iris-virginica 123 | 5.6,2.8,4.9,2.0,Iris-virginica 124 | 7.7,2.8,6.7,2.0,Iris-virginica 125 | 6.3,2.7,4.9,1.8,Iris-virginica 126 | 6.7,3.3,5.7,2.1,Iris-virginica 127 | 7.2,3.2,6.0,1.8,Iris-virginica 128 | 6.2,2.8,4.8,1.8,Iris-virginica 129 | 6.1,3.0,4.9,1.8,Iris-virginica 130 | 6.4,2.8,5.6,2.1,Iris-virginica 131 | 7.2,3.0,5.8,1.6,Iris-virginica 132 | 7.4,2.8,6.1,1.9,Iris-virginica 133 | 7.9,3.8,6.4,2.0,Iris-virginica 134 | 6.4,2.8,5.6,2.2,Iris-virginica 135 | 6.3,2.8,5.1,1.5,Iris-virginica 136 | 6.1,2.6,5.6,1.4,Iris-virginica 137 | 7.7,3.0,6.1,2.3,Iris-virginica 138 | 6.3,3.4,5.6,2.4,Iris-virginica 139 | 6.4,3.1,5.5,1.8,Iris-virginica 140 | 6.0,3.0,4.8,1.8,Iris-virginica 141 | 6.9,3.1,5.4,2.1,Iris-virginica 142 | 6.7,3.1,5.6,2.4,Iris-virginica 143 | 6.9,3.1,5.1,2.3,Iris-virginica 144 | 5.8,2.7,5.1,1.9,Iris-virginica 145 | 6.8,3.2,5.9,2.3,Iris-virginica 146 | 6.7,3.3,5.7,2.5,Iris-virginica 147 | 6.7,3.0,5.2,2.3,Iris-virginica 148 | 6.3,2.5,5.0,1.9,Iris-virginica 149 | 6.5,3.0,5.2,2.0,Iris-virginica 150 | 6.2,3.4,5.4,2.3,Iris-virginica 151 | 5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /Classifiers/kNN/data2.txt: -------------------------------------------------------------------------------- 1 | Height,Weight,Age,Class 2 | 1.70,65,20,Programmer 3 | 1.90,85,33,Builder 4 | 1.78,76,31,Builder 5 | 1.73,74,24,Programmer 6 | 1.81,75,35,Builder 7 | 1.73,70,75,Scientist 8 | 1.80,71,63,Scientist 9 | 1.75,69,25,Programmer -------------------------------------------------------------------------------- /Classifiers/kNN/kNearestNeighbours.py: -------------------------------------------------------------------------------- 1 | import math; #For pow and sqrt 2 | from random import shuffle; 3 | 4 | 5 | ###_Reading_### 6 | def ReadData(fileName): 7 | #Read the file, splitting by lines 8 | f = open(fileName, 'r'); 9 | lines = f.read().splitlines(); 10 | f.close(); 11 | 12 | #Split the first line by commas, remove the first element 13 | #and save the rest into a list. 14 | #The list holds the feature names of the data set. 15 | features = lines[0].split(',')[:-1]; 16 | 17 | items = []; 18 | 19 | for i in range(1,len(lines)): 20 | line = lines[i].split(','); 21 | 22 | itemFeatures = {"Class" : line[-1]}; 23 | 24 | for j in range(len(features)): 25 | f = features[j]; #Get the feature at index j 26 | v = float(line[j]); #Convert feature value to float 27 | 28 | itemFeatures[f] = v; #Add feature value to dict 29 | 30 | items.append(itemFeatures); 31 | 32 | shuffle(items); 33 | 34 | return items; 35 | 36 | 37 | ###_Auxiliary Function_### 38 | def EuclideanDistance(x, y): 39 | S = 0; #The sum of the squared differences of the elements 40 | for key in x.keys(): 41 | S += math.pow(x[key]-y[key], 2); 42 | 43 | return math.sqrt(S); #The square root of the sum 44 | 45 | def CalculateNeighborsClass(neighbors, k): 46 | count = {}; 47 | 48 | for i in range(k): 49 | if(neighbors[i][1] not in count): 50 | #The class at the ith index is not in the count dict. 51 | #Initialize it to 1. 52 | count[neighbors[i][1]] = 1; 53 | else: 54 | #Found another item of class c[i]. Increment its counter. 55 | count[neighbors[i][1]] += 1; 56 | 57 | return count; 58 | 59 | def FindMax(Dict): 60 | #Find max in dictionary, return max value and max index 61 | maximum = -1; 62 | classification = ""; 63 | 64 | for key in Dict.keys(): 65 | if(Dict[key] > maximum): 66 | maximum = Dict[key]; 67 | classification = key; 68 | 69 | return classification, maximum; 70 | 71 | 72 | ###_Core Functions_### 73 | def Classify(nItem, k, Items): 74 | #Hold nearest neighbours. First item is distance, second class 75 | neighbors = []; 76 | 77 | for item in Items: 78 | #Find Euclidean Distance 79 | distance = EuclideanDistance(nItem, item); 80 | 81 | #Update neighbors, 82 | #either adding the current item in neighbors or not. 83 | neighbors = UpdateNeighbors(neighbors, item, distance, k); 84 | 85 | #Count the number of each class in neighbors 86 | count = CalculateNeighborsClass(neighbors, k); 87 | 88 | #Find the max in count, aka the class with the most appearances 89 | return FindMax(count); 90 | 91 | def UpdateNeighbors(neighbors, item, distance, k): 92 | if(len(neighbors) < k): 93 | #List is not full, add new item and sort 94 | neighbors.append([distance, item["Class"]]); 95 | neighbors = sorted(neighbors); 96 | else: 97 | #List is full 98 | #Check if new item should be entered 99 | if(neighbors[-1][0] > distance): 100 | #If yes, replace the last element with new item 101 | neighbors[-1] = [distance, item["Class"]]; 102 | neighbors = sorted(neighbors); 103 | 104 | return neighbors; 105 | 106 | 107 | ###_Evaluation Functions_### 108 | def K_FoldValidation(K, k, Items): 109 | if(K > len(Items)): 110 | return -1; 111 | 112 | correct = 0; #The number of correct classifications 113 | total = len(Items)*(K-1); #The total number of classifications 114 | 115 | l = len(Items)/K; #The length of a fold 116 | 117 | for i in range(K): 118 | #Split data into training set and test set 119 | trainingSet = Items[i*l:(i+1)*l]; 120 | testSet = Items[:i*l] + Items[(i+1)*l:]; 121 | 122 | for item in testSet: 123 | itemClass = item["Class"]; 124 | 125 | itemFeatures = {}; 126 | 127 | #Get feature values 128 | for key in item: 129 | if(key != "Class"): 130 | #If key isn't "Class", add it to itemFeatures 131 | itemFeatures[key] = item[key]; 132 | 133 | #Categorize item based on its feature values 134 | guess = Classify(itemFeatures, k, trainingSet)[0]; 135 | 136 | if(guess == itemClass): 137 | #Guessed correctly 138 | correct += 1; 139 | 140 | accuracy = correct/float(total); 141 | return accuracy; 142 | 143 | def Evaluate(K,k,items,iterations): 144 | #Run algorithm the number of iterations, pick average 145 | accuracy = 0; 146 | for i in range(iterations): 147 | shuffle(items); 148 | accuracy += K_FoldValidation(K, k, items); 149 | 150 | print accuracy/float(iterations); 151 | 152 | 153 | ###_Main_### 154 | def main(): 155 | items = ReadData('data.txt'); 156 | 157 | #newItem = {'PW' : 1.4, 'PL' : 4.7, 'SW' : 3.2, 'SL' : 7.0}; 158 | #print Classify(newItem, 3, items); 159 | #K_FoldValidation(5, 3, items); 160 | Evaluate(5, 5, items, 100); 161 | 162 | if __name__ == "__main__": 163 | main(); 164 | -------------------------------------------------------------------------------- /Clustering/kMeans - Online/data.txt: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /Clustering/kMeans - Online/kMeans.py: -------------------------------------------------------------------------------- 1 | import math; #For pow and sqrt 2 | import sys; 3 | from random import shuffle, uniform; 4 | 5 | ###_Pre-Processing_### 6 | def ReadData(fileName): 7 | #Read the file, splitting by lines 8 | f = open(fileName,'r'); 9 | lines = f.read().splitlines(); 10 | f.close(); 11 | 12 | items = []; 13 | 14 | for i in range(1,len(lines)): 15 | line = lines[i].split(','); 16 | itemFeatures = []; 17 | 18 | for j in range(len(line)-1): 19 | v = float(line[j]); #Convert feature value to float 20 | itemFeatures.append(v); #Add feature value to dict 21 | 22 | items.append(itemFeatures); 23 | 24 | shuffle(items); 25 | 26 | return items; 27 | 28 | 29 | ###_Auxiliary Function_### 30 | def FindColMinMax(items): 31 | n = len(items[0]); 32 | minima = [sys.maxint for i in range(n)]; 33 | maxima = [-sys.maxint -1 for i in range(n)]; 34 | 35 | for item in items: 36 | for f in range(len(item)): 37 | if(item[f] < minima[f]): 38 | minima[f] = item[f]; 39 | 40 | if(item[f] > maxima[f]): 41 | maxima[f] = item[f]; 42 | 43 | return minima,maxima; 44 | 45 | def EuclideanDistance(x,y): 46 | S = 0; #The sum of the squared differences of the elements 47 | for i in range(len(x)): 48 | S += math.pow(x[i]-y[i],2); 49 | 50 | return math.sqrt(S); #The square root of the sum 51 | 52 | def InitializeMeans(items,k,cMin,cMax): 53 | #Initialize means to random numbers between 54 | #the min and max of each column/feature 55 | 56 | f = len(items[0]); #number of features 57 | means = [[0 for i in range(f)] for j in range(k)]; 58 | 59 | for mean in means: 60 | for i in range(len(mean)): 61 | #Set value to a random float 62 | #(adding +-1 to avoid a wide placement of a mean) 63 | mean[i] = uniform(cMin[i]+1,cMax[i]-1); 64 | 65 | return means; 66 | 67 | def UpdateMean(n,mean,item): 68 | for i in range(len(mean)): 69 | m = mean[i]; 70 | m = (m*(n-1)+item[i])/float(n); 71 | mean[i] = round(m,3); 72 | 73 | return mean; 74 | 75 | 76 | ###_Core Functions_### 77 | def FindClusters(means,items): 78 | clusters = [[] for i in range(len(means))]; #Init clusters 79 | 80 | for item in items: 81 | #Classify item into a cluster 82 | index = Classify(means,item); 83 | 84 | #Add item to cluster 85 | clusters[index].append(item); 86 | 87 | return clusters; 88 | 89 | def Classify(means,item): 90 | #Classify item to the mean with minimum distance 91 | 92 | minimum = sys.maxint; 93 | index = -1; 94 | 95 | for i in range(len(means)): 96 | #Find distance from item to mean 97 | dis = EuclideanDistance(item,means[i]); 98 | 99 | if(dis < minimum): 100 | minimum = dis; 101 | index = i; 102 | 103 | return index; 104 | 105 | def CalculateMeans(k,items,maxIterations=100000): 106 | #Find the minima and maxima for columns 107 | cMin, cMax = FindColMinMax(items); 108 | 109 | #Initialize means at random points 110 | means = InitializeMeans(items,k,cMin,cMax); 111 | 112 | #Initialize clusters, the array to hold 113 | #the number of items in a class 114 | clusters = [0 for i in range(len(means))]; 115 | 116 | #An array to hold the cluster an item is in 117 | belongsTo = [0 for i in range(len(items))]; 118 | 119 | #Calculate means 120 | #If no change of cluster occurs, halt 121 | clusters = [0 for i in range(len(means))]; 122 | for i in range(len(items)): 123 | item = items[i]; 124 | #Classify item into a cluster and update the 125 | #corresponding means. 126 | 127 | index = Classify(means,item); 128 | 129 | clusters[index] += 1; 130 | means[index] = UpdateMean(clusters[index],means[index],item); 131 | 132 | return means; 133 | 134 | 135 | ###_Main_### 136 | def main(): 137 | items = ReadData('data.txt'); 138 | 139 | k = 3; 140 | 141 | means = CalculateMeans(k,items); 142 | print "Means = ", means; 143 | 144 | clusters = FindClusters(means,items); 145 | print "Clusters: ", clusters; 146 | 147 | #newItem = [5.4,3.7,1.5,0.2]; 148 | #print Classify(means,newItem); 149 | 150 | if __name__ == "__main__": 151 | main(); 152 | -------------------------------------------------------------------------------- /Clustering/kMeans - Online/kMeans_Plot.py: -------------------------------------------------------------------------------- 1 | import kMeans; 2 | import numpy as np; 3 | from random import choice; 4 | from matplotlib import pyplot; 5 | 6 | def CutToTwoFeatures(items,indexA,indexB): 7 | n = len(items); 8 | X = []; 9 | for i in range(n): 10 | item = items[i]; 11 | newItem = [item[indexA],item[indexB]]; 12 | X.append(newItem); 13 | 14 | return X; 15 | 16 | 17 | def PlotClusters(clusters): 18 | n = len(clusters); 19 | #Cut down the items to two dimension and store to X 20 | X = [[] for i in range(n)]; 21 | 22 | for i in range(n): 23 | cluster = clusters[i]; 24 | for item in cluster: 25 | X[i].append(item); 26 | 27 | colors = ['r','b','g','c','m','y']; 28 | 29 | for x in X: 30 | #Choose color randomly from list, then remove it 31 | #(to avoid duplicates) 32 | c = choice(colors); 33 | colors.remove(c); 34 | 35 | Xa = []; 36 | Xb = []; 37 | 38 | for item in x: 39 | Xa.append(item[0]); 40 | Xb.append(item[1]); 41 | 42 | pyplot.plot(Xa,Xb,'o',color=c); 43 | 44 | pyplot.show(); 45 | 46 | 47 | def main(): 48 | items = kMeans.ReadData('data.txt'); 49 | items = CutToTwoFeatures(items,2,3); 50 | 51 | k = 3; 52 | means = kMeans.CalculateMeans(k,items); 53 | clusters = kMeans.FindClusters(means,items); 54 | 55 | PlotClusters(clusters); 56 | 57 | main(); 58 | -------------------------------------------------------------------------------- /Clustering/kMeans - Standard/data.txt: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /Clustering/kMeans - Standard/kMeans.py: -------------------------------------------------------------------------------- 1 | import math; #For pow and sqrt 2 | import sys; 3 | from random import shuffle, uniform; 4 | 5 | ###_Pre-Processing_### 6 | def ReadData(fileName): 7 | #Read the file, splitting by lines 8 | f = open(fileName,'r'); 9 | lines = f.read().splitlines(); 10 | f.close(); 11 | 12 | items = []; 13 | 14 | for i in range(1,len(lines)): 15 | line = lines[i].split(','); 16 | itemFeatures = []; 17 | 18 | for j in range(len(line)-1): 19 | v = float(line[j]); #Convert feature value to float 20 | itemFeatures.append(v); #Add feature value to dict 21 | 22 | items.append(itemFeatures); 23 | 24 | shuffle(items); 25 | 26 | return items; 27 | 28 | 29 | ###_Auxiliary Function_### 30 | def FindColMinMax(items): 31 | n = len(items[0]); 32 | minima = [sys.maxint for i in range(n)]; 33 | maxima = [-sys.maxint -1 for i in range(n)]; 34 | 35 | for item in items: 36 | for f in range(len(item)): 37 | if(item[f] < minima[f]): 38 | minima[f] = item[f]; 39 | 40 | if(item[f] > maxima[f]): 41 | maxima[f] = item[f]; 42 | 43 | return minima,maxima; 44 | 45 | def EuclideanDistance(x,y): 46 | S = 0; #The sum of the squared differences of the elements 47 | for i in range(len(x)): 48 | S += math.pow(x[i]-y[i],2); 49 | 50 | return math.sqrt(S); #The square root of the sum 51 | 52 | def InitializeMeans(items,k,cMin,cMax): 53 | #Initialize means to random numbers between 54 | #the min and max of each column/feature 55 | 56 | f = len(items[0]); #number of features 57 | means = [[0 for i in range(f)] for j in range(k)]; 58 | 59 | for mean in means: 60 | for i in range(len(mean)): 61 | #Set value to a random float 62 | #(adding +-1 to avoid a wide placement of a mean) 63 | mean[i] = uniform(cMin[i]+1,cMax[i]-1); 64 | 65 | return means; 66 | 67 | def UpdateMean(n,mean,item): 68 | for i in range(len(mean)): 69 | m = mean[i]; 70 | m = (m*(n-1)+item[i])/float(n); 71 | mean[i] = round(m,3); 72 | 73 | return mean; 74 | 75 | def FindClusters(means,items): 76 | clusters = [[] for i in range(len(means))]; #Init clusters 77 | 78 | for item in items: 79 | #Classify item into a cluster 80 | index = Classify(means,item); 81 | 82 | #Add item to cluster 83 | clusters[index].append(item); 84 | 85 | return clusters; 86 | 87 | 88 | ###_Core Functions_### 89 | def Classify(means,item): 90 | #Classify item to the mean with minimum distance 91 | 92 | minimum = sys.maxint; 93 | index = -1; 94 | 95 | for i in range(len(means)): 96 | #Find distance from item to mean 97 | dis = EuclideanDistance(item,means[i]); 98 | 99 | if(dis < minimum): 100 | minimum = dis; 101 | index = i; 102 | 103 | return index; 104 | 105 | def CalculateMeans(k,items,maxIterations=100000): 106 | #Find the minima and maxima for columns 107 | cMin, cMax = FindColMinMax(items); 108 | 109 | #Initialize means at random points 110 | means = InitializeMeans(items,k,cMin,cMax); 111 | 112 | #Initialize clusters, the array to hold 113 | #the number of items in a class 114 | clusterSizes = [0 for i in range(len(means))]; 115 | 116 | #An array to hold the cluster an item is in 117 | belongsTo = [0 for i in range(len(items))]; 118 | 119 | #Calculate means 120 | for e in range(maxIterations): 121 | #If no change of cluster occurs, halt 122 | noChange = True; 123 | for i in range(len(items)): 124 | item = items[i]; 125 | #Classify item into a cluster and update the 126 | #corresponding means. 127 | 128 | index = Classify(means,item); 129 | 130 | clusterSizes[index] += 1; 131 | means[index] = UpdateMean(clusterSizes[index],means[index],item); 132 | 133 | #Item changed cluster 134 | if(index != belongsTo[i]): 135 | noChange = False; 136 | 137 | belongsTo[i] = index; 138 | 139 | #Nothing changed, return 140 | if(noChange): 141 | break; 142 | 143 | return means; 144 | 145 | 146 | ###_Main_### 147 | def main(): 148 | items = ReadData('data.txt'); 149 | 150 | k = 3; 151 | 152 | means = CalculateMeans(k,items); 153 | clusters = FindClusters(means,items); 154 | print means; 155 | print clusters; 156 | 157 | #newItem = [5.4,3.7,1.5,0.2]; 158 | #print Classify(means,newItem); 159 | 160 | if __name__ == "__main__": 161 | main(); 162 | -------------------------------------------------------------------------------- /Clustering/kMeans - Standard/kMeans_Plot.py: -------------------------------------------------------------------------------- 1 | import kMeans; 2 | import numpy as np; 3 | from random import choice; 4 | from matplotlib import pyplot; 5 | 6 | def CutToTwoFeatures(items,indexA,indexB): 7 | n = len(items); 8 | X = []; 9 | for i in range(n): 10 | item = items[i]; 11 | newItem = [item[indexA],item[indexB]]; 12 | X.append(newItem); 13 | 14 | return X; 15 | 16 | 17 | def PlotClusters(clusters): 18 | n = len(clusters); 19 | #Cut down the items to two dimension and store to X 20 | X = [[] for i in range(n)]; 21 | 22 | for i in range(n): 23 | cluster = clusters[i]; 24 | for item in cluster: 25 | X[i].append(item); 26 | 27 | colors = ['r','b','g','c','m','y']; 28 | 29 | for x in X: 30 | #Choose color randomly from list, then remove it 31 | #(to avoid duplicates) 32 | c = choice(colors); 33 | colors.remove(c); 34 | 35 | Xa = []; 36 | Xb = []; 37 | 38 | for item in x: 39 | Xa.append(item[0]); 40 | Xb.append(item[1]); 41 | 42 | pyplot.plot(Xa,Xb,'o',color=c); 43 | 44 | pyplot.show(); 45 | 46 | 47 | def main(): 48 | items = kMeans.ReadData('data.txt'); 49 | items = CutToTwoFeatures(items,2,3); 50 | 51 | k = 3; 52 | means = kMeans.CalculateMeans(k,items); 53 | clusters = kMeans.FindClusters(means,items); 54 | 55 | PlotClusters(clusters); 56 | 57 | main(); 58 | -------------------------------------------------------------------------------- /Keras/kFold.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential 2 | from keras.layers import Dense 3 | from keras.optimizers import SGD, RMSprop 4 | import numpy as np 5 | import pandas as pd 6 | 7 | # Read data 8 | white = pd.read_csv("winequality-white.csv", sep=';') 9 | red = pd.read_csv("winequality-red.csv", sep=';') 10 | 11 | white['type'] = 0 12 | red['type'] = 1 13 | 14 | wines = red.append(white, ignore_index=True).sample(frac=1) 15 | Y = np.ravel(wines.quality) 16 | X = wines.drop(['quality'], axis=1) 17 | 18 | def create_model(train_x, train_y, epochs): 19 | model = Sequential() 20 | 21 | model.add(Dense(64, activation='relu', input_dim=12)) 22 | model.add(Dense(1)) 23 | model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) 24 | 25 | model.fit(train_x, train_y, epochs=epochs) 26 | return model 27 | 28 | k = 5 29 | l = int(len(X) / k) 30 | mse_total, mae_total = 0, 0 31 | for i in range(k): 32 | test_x = X[i*l:(i+1)*l] 33 | test_y = Y[i*l:(i+1)*l] 34 | 35 | train_x = np.concatenate([X[:i*l], X[(i+1)*l:]]); 36 | train_y = np.concatenate([Y[:i*l], Y[(i+1)*l:]]); 37 | 38 | model = create_model(train_x, train_y, 15) 39 | 40 | predictions = model.predict(test_x) 41 | mse, mae = model.evaluate(test_x, test_y) 42 | mse_total += mse 43 | mae_total += mae 44 | 45 | mse_avg = mse_total / k 46 | mae_avg = mae_total / k 47 | print(mse_avg, mae_avg) 48 | -------------------------------------------------------------------------------- /Pattern Recognition/Edit Distance.py: -------------------------------------------------------------------------------- 1 | def EditDistance(a,b): 2 | len1 = len(a); 3 | len2 = len(b); 4 | 5 | D = [[0 for j in range(len2+1)] for i in range(len1+1)]; 6 | 7 | for i in range(1,len1+1): 8 | D[i][0] = i; 9 | 10 | for j in range(1,len2+1): 11 | D[0][j] = j; 12 | 13 | for i in range(1,len1+1): 14 | for j in range(1,len2+1): 15 | #If chars are the same, added cost is 0 16 | same = 0; 17 | if(a[i-1] != b[j-1]): 18 | #Otherwise it is 1 (for replace) 19 | same = 1; 20 | 21 | c1 = D[i-1][j-1] + same; #diagonally 22 | c2 = D[i-1][j] + 1; #from below 23 | c3 = D[i][j-1] + 1; #from left 24 | 25 | D[i][j] = min(c1,c2,c3); 26 | 27 | print D; 28 | return D[-1][-1]; 29 | 30 | 31 | word1 = "kitten"; 32 | word2 = "cat"; 33 | 34 | print EditDistance(word1,word2); -------------------------------------------------------------------------------- /Pattern Recognition/Viterbi.py: -------------------------------------------------------------------------------- 1 | def Viterbi(transitionProb,startProb,emissionProb,states,observations,n): 2 | V = []; 3 | firstObs = observations[0]; 4 | 5 | #Calculate first column 6 | v = {}; #Temp dict to hold state data 7 | for s in states: 8 | #Add dummy value to PrevState key 9 | v[s] = {"PrevState":"Start"}; 10 | #The probability of state appearing first is its prob 11 | #of appearing first in general times the prob of 12 | #the first observation being emitted from that state. 13 | v[s]["Prob"] = startProb[s]*emissionProb[s][firstObs]; 14 | V.append(v.copy()); 15 | 16 | for i in range(1,n): 17 | obs = observations[i]; #Current observation 18 | 19 | v = {}; #Temp dict 20 | for s in states: 21 | #Find max prob of states for current observation 22 | maxProb = -1; 23 | state = ""; #Hold previous state where maxProb occured 24 | 25 | emission = emissionProb[s][obs]; 26 | for prevState in states: 27 | #Find max from previous column 28 | prevProb = V[i-1][prevState]["Prob"]; 29 | prob = prevProb*transitionProb[prevState][s]*emission; 30 | 31 | if(prob > maxProb): 32 | maxProb = prob; 33 | state = prevState; 34 | 35 | v[s] = {}; 36 | v[s]["Prob"] = maxProb; 37 | v[s]["PrevState"] = state; 38 | 39 | V.append(v.copy()); 40 | 41 | #Find max probability 42 | maxProb = -1; 43 | state = ""; 44 | for s in states: 45 | if(V[-1][s]["Prob"] > maxProb): 46 | maxProb = V[-1][s]["Prob"]; 47 | state = s; 48 | 49 | #Find sequence by moving back from the final state 50 | sequence = [state]; 51 | for i in range(n-2,-1,-1): 52 | prevState = V[i+1][state]["PrevState"] 53 | sequence.insert(0,prevState); #Insert prevState to start of seq 54 | state = prevState; 55 | 56 | print sequence; 57 | print maxProb; 58 | 59 | 60 | def Initialization1(): 61 | ## Example data from Wikipedia ## 62 | transitionProb = { 63 | "Healthy": {"Healthy":0.7,"Fever":0.3}, 64 | "Fever" : {"Healthy":0.4,"Fever":0.6} 65 | }; 66 | 67 | emissionProb = { 68 | "Healthy":{"Normal":0.5,"Cold":0.4,"Dizzy":0.1}, 69 | "Fever":{"Normal":0.1,"Cold":0.3,"Dizzy":0.6} 70 | }; 71 | 72 | startProb = {"Healthy":0.6,"Fever":0.4}; 73 | 74 | states = ["Healthy","Fever"]; 75 | observations = ["Normal","Cold","Dizzy"]; 76 | n = len(observations); 77 | 78 | return transitionProb,startProb,emissionProb,states,observations,n; 79 | 80 | def Initialization2(): 81 | transitionProb = { 82 | "Rain": {"Rain": 0.5, "Sun": 0.1, "Cloud": 0.4}, 83 | "Cloud": {"Rain": 0.3, "Sun": 0.3, "Cloud": 0.4}, 84 | "Sun": {"Rain": 0.1, "Sun": 0.5, "Cloud": 0.4}, 85 | }; 86 | 87 | emissionProb = { 88 | "Rain": {"Walk": 0.1, "Clean": 0.3, "Study": 0.5, "Shop": 0.1}, 89 | "Cloud": {"Walk": 0.3, "Clean": 0.2, "Study": 0.3, "Shop": 0.2}, 90 | "Sun": {"Walk": 0.5, "Clean": 0.1, "Study": 0.1, "Shop": 0.3}, 91 | }; 92 | 93 | startProb = {"Rain": 0.2, "Cloud": 0.3, "Sun": 0.4}; 94 | 95 | states = ["Rain", "Cloud","Sun"]; 96 | observations = ["Walk", "Walk", "Shop", "Walk", "Study", "Study"]; 97 | n = len(observations); 98 | 99 | return transitionProb,startProb,emissionProb,states,observations,n; 100 | 101 | 102 | transitionProb,startProb,emissionProb,states,observations,n = Initialization1(); 103 | 104 | Viterbi(transitionProb,startProb,emissionProb,states,observations,n); 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-Learning 2 | 3 | A (WIP) list of Machine Learning algorithms in Python + tutorials with Keras. Currently includes the following: 4 | 5 | ### Keras 6 | 7 | * k-Fold Validation ([Blog Post](https://antmarakis.github.io/artificial%20intelligence/keras-k-fold/)) 8 | 9 | ### Classifiers 10 | 11 | * k-Nearest Neighbors ([Blog Post](https://antmarakis.github.io/artificial%20intelligence/k-nearest-neighbors/)) 12 | 13 | * Perceptron ([Theory](https://antmarakis.github.io/artificial%20intelligence/perceptron-theory/), [Implementation](https://antmarakis.github.io/artificial%20intelligence/perceptron-implementation/)) 14 | 15 | * Least Squares 16 | 17 | * Requires numpy 18 | 19 | * Includes plotting, which requires matplotlib 20 | 21 | * Naive Bayes Classifier ([Categorical](https://antmarakis.github.io/artificial%20intelligence/naive-bayes-cat-intro/), [Numerical](https://antmarakis.github.io/artificial%20intelligence/naive-bayes-num-intro/)) 22 | 23 | * Includes Categorical and Numerical classifiers 24 | 25 | * Neural Network ([Tutorial](https://www.kaggle.com/antmarakis/another-neural-network-from-scratch)) 26 | 27 | * Requires numpy 28 | 29 | * Includes plotting, which requires matplotlib 30 | 31 | ### Clustering 32 | 33 | * kMeans ([Standard](https://antmarakis.github.io/kMeans/)) 34 | 35 | * Includes plotting, which requires matplotlib 36 | 37 | * Includes both the online (or sequential) kMeans and the standard (or iterative) algorithm. 38 | 39 | ### Pattern Recognition 40 | 41 | * Viterbi Algorithm (for Hidden Markov Models) 42 | 43 | * Edit Distance Algorithm 44 | --------------------------------------------------------------------------------