├── Classifiers
    ├── Least Squares
    │   ├── LeastSquares - Plot Function.py
    │   ├── LeastSquares.py
    │   └── data.txt
    ├── Naive Bayes
    │   ├── Categorical
    │   │   ├── CategoricalNaiveBayes.py
    │   │   ├── _DataReader.py
    │   │   ├── data.txt
    │   │   ├── data2.txt
    │   │   └── data3.txt
    │   └── Numerical
    │   │   ├── NumericalNaiveBayes.py
    │   │   ├── _DataReader.py
    │   │   └── data.txt
    ├── Neural Network
    │   ├── NeuralNetwork.py
    │   ├── NeuralNetwork_Plot.py
    │   ├── Reader.py
    │   └── data.txt
    ├── Perceptron
    │   ├── Perceptron.py
    │   ├── data.txt
    │   └── data2.txt
    └── kNN
    │   ├── data.txt
    │   ├── data2.txt
    │   └── kNearestNeighbours.py
├── Clustering
    ├── kMeans - Online
    │   ├── data.txt
    │   ├── kMeans.py
    │   └── kMeans_Plot.py
    └── kMeans - Standard
    │   ├── data.txt
    │   ├── kMeans.py
    │   └── kMeans_Plot.py
├── Keras
    └── kFold.py
├── Pattern Recognition
    ├── Edit Distance.py
    └── Viterbi.py
└── README.md


/Classifiers/Least Squares/LeastSquares - Plot Function.py:
--------------------------------------------------------------------------------
 1 | import LeastSquares as LS;
 2 | import numpy as np;
 3 | from random import choice;
 4 | from matplotlib import pyplot;
 5 | 
 6 | 
 7 | def PlotFunction(X, Y, percentage, indexA, indexB):
 8 |     n = len(X)-1; #Number of items
 9 |     split = int(n*percentage);
10 | 
11 |     #Delete all columns but the ones on the given indexes
12 |     for i in range(len(Y[0])):
13 |         if(i == indexA or i == indexB):
14 |             continue;
15 | 
16 |         X = np.delete(X, 0, 1);
17 | 
18 |     #Normalize values
19 |     for i in range(n+1):
20 |         X[i] = X[i] / X.max();
21 |     
22 |     testX = X[split:];
23 |     testY = Y[split:];
24 | 
25 |     #The items will be sorted into classes in this list
26 |     Points = [[] for i in range(testY.shape[1])];
27 | 
28 |     W = LS.CalculateWeights(testX, testY);
29 | 
30 |     correct = 0;
31 | 
32 |     #Calculate accuracy
33 |     for i in range(n):
34 |         prediction = LS.Predict(W, X[i]);
35 |         itemClass = list(Y[i].A1);
36 | 
37 |         if(prediction == itemClass):
38 |             correct += 1;
39 | 
40 |         #Find index of class
41 |         index = -1;
42 |         for j in range(len(prediction)):
43 |             if(prediction[j] == 1):
44 |                 index = j;
45 |                 break;
46 | 
47 |         Points[index].append(X[i]);
48 | 
49 |     accuracy = correct/float(n)*100;
50 |     print "Accuracy ", accuracy;
51 | 
52 |     colors = ['r', 'b', 'g', 'c', 'm', 'y'];
53 |     
54 |     for i in range(len(Points)):
55 |         p = Points[i];
56 |         Xa = [];
57 |         Xb = [];
58 | 
59 |         #Choose color randomly from list, then remove it
60 |         #(to avoid duplicates)
61 |         color = choice(colors);
62 |         colors.remove(color);
63 |         
64 |         for item in p:
65 |             Xa.append(item[:, [0]].item(0));
66 |             Xb.append(item[:, [1]].item(0));
67 | 
68 |         pyplot.plot(Xa, Xb, 'o', color=color);
69 |         pyplot.plot(W[i], color=color);
70 |     
71 |     pyplot.show();
72 | 
73 | 
74 | def main():
75 |     X,Y,n = LS.ReadData('data.txt');
76 | 
77 |     PlotFunction(X, Y, 0.7, 2, 3);
78 | 
79 | main();
80 | 


--------------------------------------------------------------------------------
/Classifiers/Least Squares/LeastSquares.py:
--------------------------------------------------------------------------------
  1 | import numpy as np;
  2 | from numpy.linalg import inv;
  3 | from random import shuffle;
  4 | 
  5 | ###_Read Data_###
  6 | def ReadData(fileName):
  7 |     f = open(fileName);
  8 |     lines = f.read().splitlines();
  9 |     f.close();
 10 | 
 11 |     items = [];
 12 |     classes = [];
 13 | 
 14 |     for line in lines:
 15 |         line = line.split(','); #Split line on commas
 16 |         itemFeatures = []; #Temp list to hold feature values of the item
 17 | 
 18 |         for i in range(len(line)-1):
 19 |             value = float(line[i]);
 20 |             itemFeatures.append(value);
 21 | 
 22 |         #Add to classes the known classification for current item
 23 |         classes.append(line[-1]);
 24 |         #Add item data to items
 25 |         items.append(itemFeatures);
 26 | 
 27 |     #Map class names to numbers (from 0 to the number of classes)
 28 |     classes = map(lambda x: list(set(classes)).index(x), classes);
 29 | 
 30 |     X = np.matrix(items); #Convert data to numpy matrix
 31 |     Y = BuildY(classes); #Build the Y matrices
 32 |     n = len(items)-1; #The number of items
 33 | 
 34 |     X, Y = ShuffleArrays(X, Y);
 35 | 
 36 |     return X, Y, n;
 37 | 
 38 | def ShuffleArrays(A, B):
 39 |     toShuffle = []; #Temp array to shuffle X and Y at the same time
 40 |     n = len(A);
 41 |     
 42 |     for i in range(n):
 43 |         #Build toShuffle by packing Xi together with Yi
 44 |         toShuffle.append((A[i], B[i]));
 45 | 
 46 |     shuffle(toShuffle);
 47 |     
 48 |     for i in range(n):
 49 |         #Unpack toShuffle
 50 |         A[i] = toShuffle[i][0];
 51 |         B[i] = toShuffle[i][1];
 52 | 
 53 |     return A,B;
 54 | 
 55 | def BuildY(Y):
 56 |     newY = [];
 57 |     #Number of classes is the largest number in Y
 58 |     classesNumber = max(Y)+1;
 59 | 
 60 |     for i in range(len(Y)):
 61 |         #Initialize vector with zeros, set to 1 the class index
 62 |         tempVector = [0 for j in range(classesNumber)];
 63 |         tempVector[Y[i]] = 1;
 64 | 
 65 |         newY.append(tempVector);
 66 | 
 67 |     return np.matrix(newY);
 68 | 
 69 | 
 70 | ###_Core Functions_###
 71 | def CalculateWeights(X, Y):
 72 |     #Number of attributes
 73 |     A = X.shape[1] + 1;
 74 |     #Number of classes
 75 |     C = Y.shape[1];
 76 | 
 77 |     #The sums for Xi*Xi.T and Xi*Yi.T
 78 |     XX = [[0.0 for i in range(A)] for j in range(A)];
 79 |     XY = [[0.0 for i in range(C)] for j in range(A)];
 80 | 
 81 |     for i in range(len(X)):
 82 |         x = X[i]; #The ith item vector
 83 |         x = np.append(1, x); #Augment item with a 1
 84 | 
 85 |         y = Y[i]; #The vector storing the class x is in
 86 | 
 87 |         #Calculate outer products of x*x.T and x*y.T
 88 |         XX += np.outer(x, x);
 89 |         XY += np.outer(x, y);
 90 | 
 91 |     XX += 0.001 * np.eye(A); #Avoid XX being non-invertable
 92 | 
 93 |     #The weight matrix is the product of XX.T and XY
 94 |     weight = np.dot(inv(XX), XY);
 95 |     return weight;
 96 | 
 97 | def Predict(W,x):
 98 |     x = np.append(1, x); #Augment item with a 1
 99 | 
100 |     prediction = np.dot(W.T, x); #List of predictions
101 | 
102 |     #Find max prediction
103 |     m = prediction[0];
104 |     index = 0;
105 |     for i in range(1,len(prediction)):
106 |         if(prediction[i] > m):
107 |             m = prediction[i];
108 |             index = i;
109 | 
110 |     #Initialize prediction vector to zeros
111 |     y = [0 for i in range(len(prediction))];
112 |     y[index] = 1; #Set guessed class to 1
113 | 
114 |     return y; #Return prediction vector
115 | 
116 | 
117 | ###_Evaluation Functions_###
118 | def K_FoldValidation(k, X, Y):
119 |     if(k > len(X)):
120 |         return -1;
121 | 
122 |     correct = 0; #The number of correct classifications
123 |     total = len(X)*(k-1); #The total number of classifications
124 | 
125 |     l = len(X)/k; #The length of a fold
126 | 
127 |     for i in range(k):
128 |         #Split data set into training and testing
129 |         trainingX = X[i*l:(i+1)*l];
130 |         trainingY = Y[i*l:(i+1)*l];
131 | 
132 |         testX = np.concatenate([X[:i*l], X[(i+1)*l:]]);
133 |         testY = np.concatenate([Y[:i*l], Y[(i+1)*l:]]);
134 | 
135 |         W = CalculateWeights(trainingX, trainingY);
136 | 
137 |         for j in range(len(testX)):
138 |             itemClass = list(testY[j].A1); #The actual classification
139 |             guess = Predict(W, testX[j]); #Make a prediction
140 | 
141 |             if(guess == itemClass):
142 |                 #Guessed correctly
143 |                 correct += 1;
144 | 
145 |     return correct/float(total);
146 | 
147 | def Evaluate(times, k, X, Y):
148 |     accuracy = 0;
149 |     for t in range(times):
150 |         X, Y = ShuffleArrays(X, Y);
151 |         accuracy += K_FoldValidation(k, X, Y);
152 | 
153 |     print accuracy/float(times);
154 | 
155 | 
156 | ###_Main_###
157 | def main():
158 |     X, Y, n = ReadData('data.txt');
159 |     W = CalculateWeights(X, Y);
160 | 
161 |     Evaluate(100, 5, X, Y);
162 | 
163 | if __name__ == "__main__":
164 |     main();
165 | 


--------------------------------------------------------------------------------
/Classifiers/Least Squares/data.txt:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Categorical/CategoricalNaiveBayes.py:
--------------------------------------------------------------------------------
 1 | import _DataReader as DataReader;
 2 | 
 3 | 
 4 | def Classifier(Evidence):
 5 |     #The string of evidence, so that we can save it in P.
 6 |     evidence = '';
 7 | 
 8 |     #Check if all evidence is also in Features
 9 |     for e in Evidence:
10 |         if e not in Features:
11 |             #A given evidence does not belong in Features. Abort.
12 |             print "Evidence list is erroneous"
13 |             return;
14 | 
15 |         #Build the evidence string
16 |         evidence += e + ', ';
17 | 
18 |     evidence = evidence[:-2]; #remove the last two chars, as they are ', '
19 | 
20 |     m = -1.0; #Hold the max
21 |     classification = ''; #Hold the classification
22 | 
23 |     #We need to find P(c|evidence). The equation (from Bayes) is:
24 |     #P(c|evidence) = P(evidence|c)*P(c)/P(evidence)
25 |     #Because this Bayes classifier is naive, the features in evidence are
26 |     #independent. Therefore, the above equation is simplified to:
27 |     #P(c|evidence) = P(evidence1|c)*P(evidence2|c)*...*P(evidenceN|c) * P(c)
28 |     #divided by P(evidence1)*P(evidence2)*...*P(evidenceN)
29 | 
30 |     #Calculate the probability of all classes for given evidence/features
31 |     #using the Bayes equation. Pick the highest.
32 |     for c in Classes:
33 |         P[c + '|' + evidence] = P[c]; #Start from the prior probability
34 |         
35 |         for e in Evidence:
36 |             #Multipy by the conditional prob and divide by the feature prob
37 |             P[c + '|' + evidence] *= P[e + '|' + c] / P[e];
38 | 
39 |         #Find the max
40 |         if(P[c + '|' + evidence] > m):
41 |             #P(c|evidence) is the max so far; update m and classification
42 |             m = P[c + '|' + evidence];
43 |             classification = c;
44 | 
45 |     #With the evidence, the item belongs to classifaction with a prob of m
46 |     print classification, m;
47 | 
48 | 
49 | #Read data from file
50 | Classes, Features, P = DataReader.Read('data4.txt');
51 | 
52 | #Run classifier with the evidence list
53 | Classifier(['Tall', 'Slim']);
54 | 


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Categorical/_DataReader.py:
--------------------------------------------------------------------------------
 1 | def Read(fileName):
 2 |     Classes = {};
 3 |     Features = {};
 4 | 
 5 |     #Read data from input file, split the lines
 6 |     f = open(fileName,'r');
 7 |     lines = f.read().splitlines();
 8 |     f.close();
 9 | 
10 |     n = len(lines)-1; #The size of the data set
11 | 
12 |     #Extract the features
13 |     features = lines[:1][0]; #The first line of input, taking it as a string.
14 |     features = features.split(' ')[1:]; #Split first line by spaces
15 |     l = len(features);
16 | 
17 |     #Extract the class data
18 |     classes = lines[1:]; #Remove the first line
19 | 
20 |     for f in features:
21 |         #For every string in the first line, add a new item to Features,
22 |         #plus its complement.
23 |         Features[f] = 0;
24 |         Features["Not " + f] = 0;
25 | 
26 |     #Construct Classes table
27 |     for c in classes:
28 |         #Split current line (item) by spaces
29 |         #The first element holds the name of the class
30 |         #The rest show whether the item has a certain feature
31 |         c = c.split(' ');
32 | 
33 |         if(c[0] not in Classes):
34 |             #The item class has not been added to Classes. Add it now.
35 |             Classes[c[0]] = {"Total":0}; #Set the total of the class to 0.
36 |             for f in Features:
37 |                 #Add to the class dictionary (table) all the features, set to 0.
38 |                 Classes[c[0]][f] = 0;
39 | 
40 |         #Increment the total items in the item class
41 |         Classes[c[0]]["Total"] += 1;
42 | 
43 |         for i in range(1,l):
44 |             if(c[i] == 'True'):
45 |                 #The item has the feature in the ith index in the item list, c
46 |                 #The ith index in c corresponds with the i-1 index in features
47 |                 feature = features[i-1]; #Save it in feature
48 |             elif(c[i] == 'False'):
49 |                 #The item doesn't have the feature in the item list
50 |                 #Instead, it has the "Not Feature", the complement of the feature
51 |                 feature = "Not " + features[i-1]; #Save complement in feature
52 | 
53 |             Features[feature] += 1; #Increment feature counter
54 | 
55 |             if(feature not in Classes[c[0]]):
56 |                 #The feature has not been added to the class dictionary.
57 |                 #Add feature to the item class.
58 |                 Classes[c[0]][feature] = 1;
59 |             else:
60 |                 #The feature exists in the class dictionary.
61 |                 #Increment the feature counter in the item class.
62 |                 Classes[c[0]][feature] += 1;
63 | 
64 | 
65 |     #Calculate the various probabilities
66 |     P = {}; #Probability dictionary. Holds the various probabilities
67 | 
68 |     #Calculate the prior probabilities of the classes
69 |     for c in Classes:
70 |         P[c] = Classes[c]["Total"]/float(n);
71 | 
72 |     #Calculate the prior probabilities of the features
73 |     for f in Features:
74 |         P[f] = Features[f]/float(n);
75 | 
76 |     #Calculate the conditional probabilities
77 |     for c in Classes:
78 |         for f in Features:
79 |             P[f + '|' + c] = Classes[c][f]/float(Classes[c]["Total"]);
80 | 
81 |     return (Classes, Features, P);
82 | 


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Categorical/data.txt:
--------------------------------------------------------------------------------
 1 | Class Tall Slim Smart
 2 | Detective True False False
 3 | Detective True True True
 4 | Detective True True True
 5 | Detective False False True
 6 | Detective True False False
 7 | Detective False True True
 8 | Detective False True True
 9 | Detective False False False
10 | Detective False False True
11 | Detective True True True
12 | Detective False True True
13 | Detective True True False
14 | Detective True False False
15 | Detective False False True
16 | Detective True False True
17 | Detective True True True
18 | Detective False False True
19 | Detective False False True
20 | Detective True True True
21 | Detective False True True
22 | Brute True False False
23 | Brute True True False
24 | Brute True False False
25 | Brute True False False
26 | Brute False False False
27 | Brute True False False
28 | Brute True False True
29 | Brute True True False
30 | Brute True False False
31 | Brute True True False
32 | Brute False False False
33 | Brute True False True
34 | Brute True False False
35 | Brute True True False
36 | Brute True False True
37 | Brute False False False
38 | Brute True False False
39 | Brute True False False
40 | Brute True False False
41 | Brute True False True
42 | Brute True False False
43 | Brute True False False
44 | Brute True True False
45 | Brute True False False
46 | Brute True False False
47 | Brute False False False
48 | Brute True False False
49 | Brute True False True
50 | Brute False False False
51 | Brute True False False


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Categorical/data2.txt:
--------------------------------------------------------------------------------
 1 | Class Long Sweet Yellow
 2 | Banana True True True
 3 | Banana True True True
 4 | Banana True True False
 5 | Banana True True True
 6 | Banana True True True
 7 | Banana True True False
 8 | Banana True True True
 9 | Banana True False True
10 | Banana True True True
11 | Banana False True True
12 | Lemon False False True
13 | Lemon False False True
14 | Lemon False False True
15 | Lemon False False True
16 | Lemon False False True
17 | Other True False False
18 | Other False True False
19 | Other True False True
20 | Other False True False
21 | Other False False True
22 | Other True True False
23 | Other False False False
24 | Other True False True
25 | Other False True False
26 | Other True True False


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Categorical/data3.txt:
--------------------------------------------------------------------------------
 1 | Class Big Feathers Claws
 2 | Dinosaur True False False
 3 | Dinosaur False True False
 4 | Dinosaur False False True
 5 | Dinosaur False True False
 6 | Dinosaur True True True
 7 | Dinosaur True False False
 8 | Dinosaur False True True
 9 | Dinosaur False False False
10 | Dinosaur True True False
11 | Dinosaur False False True
12 | Chicken False True False
13 | Chicken False False True
14 | Chicken False True True
15 | Chicken False True True
16 | Chicken False True False
17 | Chicken False True False
18 | Chicken False True True
19 | Chicken False True True
20 | Chicken False True False
21 | Chicken False True False
22 | Chicken False True True
23 | Dragon True True False
24 | Dragon False False False
25 | Dragon False True True
26 | Dragon True False False
27 | Dragon True False True
28 | Dragon False True False
29 | Dragon True False True
30 | Dragon True False True
31 | Dragon True False True
32 | Dragon False False True


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Numerical/NumericalNaiveBayes.py:
--------------------------------------------------------------------------------
 1 | import _DataReader as DataReader;
 2 | import math;
 3 | 
 4 | 
 5 | def Gaussian(mean, stDev, x):
 6 |     g = 1/(math.sqrt(2*math.pi)*stDev) * math.e**(-0.5*(float(x-mean)/stDev)**2);
 7 |     return g;
 8 | 
 9 | def Classifier(Evidence):
10 |     #The string of evidence, so that we can save it in P.
11 |     evidence = '';
12 | 
13 |     #Check if all evidence is also in Features
14 |     for e in Evidence:
15 |         eF = e[0]; #The feature in evidence e
16 |         eV = e[1]; #The value in evidence e
17 |         if eF not in Features:
18 |             #A given evidence does not belong in Features. Abort.
19 |             print "Evidence list is erroneous";
20 |             return;
21 | 
22 |         #Build the evidence string
23 |         evidence += eF + " = " + str(eV) + ', ';
24 | 
25 |     evidence = evidence[:-2]; #remove the last two chars, as they are ', '
26 | 
27 |     m = -1.0; #Hold the max
28 |     classification = ''; #Hold the classification
29 | 
30 |     #We need to find P(c|evidence). The equation (from Bayes) is:
31 |     #P(c|evidence) = P(evidence|c)*P(c)/P(evidence)
32 |     #Because this Bayes classifier is naive, the features in evidence are
33 |     #independent. Therefore, the above equation is simplified to:
34 |     #P(c|evidence) = P(evidence1|c)*P(evidence2|c)*...*P(evidenceN|c) * P(c).
35 |     #We do not need to calculate P(evidence) as it is the same for all
36 |     #classes.
37 | 
38 |     #We know the individual probability P(c) but we do not know the
39 |     #probability of the conditional probabilities P(evidenceX|c).
40 |     #We calculate those using the Gaussian distribution formula.
41 |     #Instead of Gaussian we can use any other distribution, if it is known.
42 | 
43 |     #The parameters are the mean, the standard deviation and the value of the evidence.
44 |     #We have the value from the evidence, eV.
45 |     #The mean is the class mean for the feature, Classes[c][eF]["Mean"].
46 |     #The stDev is the class stDev for the feature, Classes[c][eF]["StDev"].
47 | 
48 |     #We input those to the Gaussian formula and we receive the output.
49 | 
50 |     #Calculate the probability of all classes for given evidence/features
51 |     #using the Bayes equation. Pick the highest.
52 |     for c in Classes:
53 |         P[c + '|' + evidence] = P[c]; #Start from the prior probability
54 |         
55 |         for e in Evidence:
56 |             eF = e[0]; #The feature in evidence e
57 |             eV = e[1]; #The value in evidence e
58 |             #Multipy by the conditional prob
59 |             mean = Classes[c][eF]["Mean"]; #mean
60 |             stDev = Classes[c][eF]["StDev"]; #standard deviation
61 |             P[c + '|' + evidence] *= Gaussian(mean,stDev,eV);
62 | 
63 |         if(P[c + '|' + evidence] > m):
64 |             #P(c|evidence) is the max so far; update m and classification
65 |             m = P[c + '|' + evidence];
66 |             classification = c;
67 | 
68 |     #With the evidence, the item belongs to classifaction with a prob of m
69 |     print classification, m;
70 | 
71 | 
72 | #Read data from file
73 | Classes, Features, P, n = DataReader.Read('data.txt'); #Returns a tuple
74 | 
75 | #Run classifier with the evidence list
76 | Classifier((('Height', 170), ('Weight', 65)));
77 | 


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Numerical/_DataReader.py:
--------------------------------------------------------------------------------
 1 | import math;
 2 | 
 3 | def Read(fileName):
 4 |     Classes = {}; #Class dictionary
 5 |     Features = []; #Features list
 6 | 
 7 |     #Read data from input file, split the lines
 8 |     f = open(fileName,'r');
 9 |     lines = f.read().splitlines();
10 |     f.close();
11 | 
12 |     n = len(lines)-1; #The size of the data set
13 | 
14 |     #Extract the features
15 |     features = lines[:1][0]; #The first line of input, taking it as a string.
16 |     features = features.split(' ')[1:]; #Split first line by spaces
17 |     l = len(features); #The number of features
18 | 
19 |     #Extract the class data
20 |     classes = lines[1:]; #Remove the first line (the features)
21 | 
22 |     for f in features:
23 |         #For every string in the first line, add a new item to Features, plus
24 |         #its complement.
25 |         Features.append(f);
26 | 
27 |     #Construct Classes table#
28 |     #a) Find means
29 |     for c in classes:
30 |         #Split current line (item) by spaces
31 |         #The first element holds the name of the class
32 |         #The rest show whether the item has a certain feature
33 |         c = c.split(' ');
34 | 
35 |         if(c[0] not in Classes):
36 |             #The item class has not been added to Classes. Add it now.
37 |             Classes[c[0]] = {"Total":0}; #Set the total of the class to 0.
38 |             for f in Features:
39 |                 #Add to the class dictionary (table) all the features' mean
40 |                 #and standard deviation
41 |                 Classes[c[0]][f] = {"Mean":0, "StDev":0};
42 | 
43 |         #Increment the total items in the item class
44 |         Classes[c[0]]["Total"] += 1;
45 | 
46 |         #Calculate the mean of classes' features
47 |         for i in range(1,l+1):
48 |             t = Classes[c[0]]["Total"]; #Pass the total
49 |             f = Classes[c[0]][features[i-1]]["Mean"]; #The current average
50 | 
51 |             Classes[c[0]][features[i-1]]["Mean"] = (f*(t-1)+float(c[i]))/t;
52 | 
53 |     #b) Find Standard Deviations
54 |     #StDev : Square Root of Variance
55 |     #Variance : Average of squared difference from mean
56 |     
57 |     values = {};
58 |     for k in Classes.keys():
59 |         #We will save the variances in here, building them up as we go
60 |         #so that we can use them at the end.
61 |         values[k] = {};
62 |         for f in Features:
63 |             values[k][f] = 0;
64 |     
65 |     for c in classes:
66 |         #To find the standard deviations, we first need to know the means.
67 |         #That's why we run this loop after the first loop.
68 |         c  = c.split(' ');
69 | 
70 |         for i in range(1,l+1):
71 |             #From the current value, substract the mean.
72 |             #Divide by the total (-1) to get the average.
73 |             #We are using the total minus one because we do not want the
74 |             #population deviation, but the standard deviation.
75 |             v = math.pow(int(c[i]) - Classes[c[0]][features[i-1]]["Mean"],2);
76 |             values[c[0]][Features[i-1]] += v/(Classes[c[0]]["Total"]-1);
77 | 
78 |         for k in Classes.keys():
79 |             #Calculate StDev for the features of each class, using values
80 |             for i in range(1,l+1):
81 |                 #The Standard Deviation is the square root of the variance
82 |                 s = math.sqrt(values[k][Features[i-1]]);
83 |                 
84 |                 Classes[k][features[i-1]]["StDev"] = s;
85 | 
86 | 
87 |     #Calculate the various probabilities
88 |     P = {}; #Probability dictionary. Holds the various probabilities
89 | 
90 |     #Calculate the prior probabilities of the classes
91 |     for c in Classes:
92 |         P[c] = Classes[c]["Total"]/float(n);
93 |                 
94 |     
95 |     return (Classes, Features, P, n);
96 | 


--------------------------------------------------------------------------------
/Classifiers/Naive Bayes/Numerical/data.txt:
--------------------------------------------------------------------------------
 1 | Class Height Weight
 2 | Wrestler 170 61
 3 | Wrestler 173 67
 4 | Wrestler 175 69
 5 | Wrestler 181 75
 6 | Wrestler 169 60
 7 | Wrestler 171 60
 8 | Wrestler 173 64
 9 | Wrestler 173 71
10 | Wrestler 175 65
11 | Wrestler 171 64
12 | Wrestler 169 62
13 | Wrestler 171 66
14 | Wrestler 177 72
15 | Wrestler 172 60
16 | Wrestler 174 67
17 | Sumo 181 110
18 | Sumo 177 100
19 | Sumo 180 111
20 | Sumo 179 105
21 | Sumo 170 101
22 | Sumo 177 104
23 | Sumo 182 110
24 | Sumo 180 106
25 | Sumo 187 119
26 | Sumo 180 113
27 | Sumo 174 114
28 | Sumo 186 108
29 | Sumo 180 110
30 | Sumo 179 104
31 | Sumo 173 99


--------------------------------------------------------------------------------
/Classifiers/Neural Network/NeuralNetwork.py:
--------------------------------------------------------------------------------
  1 | """
  2 | There exists an updated version of this script. It can be found on Kaggle:
  3 | https://www.kaggle.com/antmarakis/another-neural-network-from-scratch
  4 | """
  5 | 
  6 | 
  7 | 
  8 | import numpy as np;
  9 | import Reader;
 10 | 
 11 | 
 12 | ###_Evaluation Functions_###
 13 | def Accuracy(X, Y, Weights, layers):
 14 |     layers += 1;
 15 |     
 16 |     #Run training set through network, find overall accuracy
 17 |     correct = 0;
 18 | 
 19 |     for i in range(len(X)):
 20 |         x = X[i];
 21 |         y = list(Y[i].A1);
 22 | 
 23 |         guess = Predict(x, Weights, layers);
 24 | 
 25 |         if(y == guess):
 26 |             #Guessed correctly
 27 |             correct += 1;
 28 | 
 29 |     return correct / float(len(X));
 30 | 
 31 | def K_FoldValidation(k, X, Y, f, hiddenLayers, nodes, epochs, r=0.15):
 32 |     if (k > len(X)):
 33 |         return -1;
 34 |     
 35 |     correct = 0;  #The number of correct classifications
 36 |     total = len(X) * (k - 1);  #The total number of classifications
 37 | 
 38 |     l = len(X) / k;  #The length of a fold
 39 | 
 40 |     for i in range(k):
 41 |         print "\nFold",i;
 42 |         
 43 |         #Split data set into training and testing
 44 |         trainingX = X[i * l:(i + 1) * l];
 45 |         trainingY = Y[i * l:(i + 1) * l];
 46 | 
 47 |         testX = np.concatenate([X[:i*l], X[(i+1)*l:]]);
 48 |         testY = np.concatenate([Y[:i*l], Y[(i+1)*l:]]);
 49 | 
 50 |         #Calculate Weights
 51 |         weights = NeuralNetwork(epochs, X, Y, f, hiddenLayers, nodes, r);
 52 | 
 53 |         #Make predictions for test sets
 54 |         for j in range(len(testX)):
 55 |             x = testX[j];
 56 |             y = list(testY[j].A1);
 57 | 
 58 |             guess = Predict(x, weights, hiddenLayers+1);
 59 | 
 60 |             if(y == guess):
 61 |                 #Guessed correctly
 62 |                 correct += 1;
 63 | 
 64 |     return correct / float(total);
 65 | 
 66 | 
 67 | ###_Auxiliary Functions_###
 68 | def Sigmoid(x):
 69 |     return 1 / (1 + np.exp(-x));
 70 | 
 71 | def SigmoidDerivative(x):
 72 |     return np.multiply(x, 1-x);
 73 | 
 74 | def InitializeWeights(f, layers, nodes):
 75 |     ##_Initialize weights with random values in [-1, 1] (including bias)_##
 76 | 
 77 |     #Augment feature vectors with bias
 78 |     f += 1;
 79 | 
 80 |     #Initialize weights from input to first hidden layer
 81 |     inputToHidden = [[np.random.uniform(-1, 1) for i in range(f)] for j in range(nodes[0])];
 82 |     inputToHidden = np.matrix(inputToHidden);
 83 | 
 84 |     weights = [inputToHidden];
 85 |     #Initialize the rest of the weights
 86 |     for i in range(1, layers):
 87 |         w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)] for j in range(nodes[i])];
 88 |         w = np.matrix(w);
 89 |         weights.append(w);
 90 | 
 91 |     return weights;
 92 | 
 93 | 
 94 | ###_Core Functions_###
 95 | def Predict(item, Weights, layers, sigmoid=True):
 96 |     item = np.append(1, item);  #Augment feature vector
 97 | 
 98 |     ##_Forward Propagation_##
 99 |     activations = [item];
100 |     Input = item;
101 |     for i in range(layers):
102 |         activation = np.dot(Input, Weights[i].T);
103 |         if(i < layers-1 or sigmoid):
104 |             #When calculating the output activation, check if
105 |             #we should sigmoid it or not (via the sigmoid var)
106 |             activation = Sigmoid(activation);
107 |         
108 |         activations.append(activation);
109 | 
110 |         Input = np.append(1, activation); #Augment activation vector
111 |     
112 |     outputFinal = activations[-1].A1;
113 | 
114 |     #Find max activation in output
115 |     m = outputFinal[0];
116 |     index = 0;
117 |     for i in range(1, len(outputFinal)):
118 |         output = outputFinal[i];
119 | 
120 |         if(output > m):
121 |             m = output;
122 |             index = i;
123 | 
124 |     #Initialize prediction vector to zeros
125 |     y = [0 for i in range(len(outputFinal))];
126 |     y[index] = 1;  #Set guessed class to 1
127 | 
128 |     return y;  #Return prediction vector
129 | 
130 | def Train(X, Y, r, layers, weights):
131 |     for i in range(len(X)):
132 |         x = X[i];
133 |         y = Y[i].A1;
134 |         x = np.matrix(np.append(1, x));  # Augment feature vector
135 |         
136 |         ##_Forward Propagation_##
137 |         #Each layer receives an input and calculates its output
138 |         #The output of one layer is the input to the next
139 |         #The first input is the first feature vector (the item)
140 |         activations = [x];
141 |         Input = x;
142 |         for j in range(layers):
143 |             activation = Sigmoid(np.dot(Input, weights[j].T));
144 |             activations.append(activation);
145 |             
146 |             Input = np.append(1, activation); #Augment with bias
147 | 
148 |         ##_Back Propagation_##
149 |         #Find error at output
150 |         #Propagate error backwards through the layers
151 |         #For each layer:
152 |         #a) Calculate delta:
153 |             #Error of next layer * the sigmoid der of current layer activation
154 |         #b) Update weights between current layer and previous layer
155 |             #Multiply delta with activation of previous layer
156 |             #Multiply that with rate
157 |             #Add that to weights of previous layer
158 |         #c) Calculate error for current layer
159 |             #Remove bias from previous-layer weights, get w
160 |             #Multiply delta with w to get error
161 |         outputFinal = activations[-1];
162 |         error = np.matrix(y - outputFinal); #Error at output
163 |         
164 |         for j in range(layers, 0, -1):
165 |             currActivation = activations[j];
166 |             
167 |             if(j > 1):
168 |                 #Augment previous activation
169 |                 prevActivation = np.append(1, activations[j-1]);
170 |             else:
171 |                 #First hidden layer, prevActivation is input (without bias)
172 |                 prevActivation = activations[0];
173 |             
174 |             delta = np.multiply(error, SigmoidDerivative(currActivation));
175 |             weights[j-1] += r * np.multiply(delta.T, prevActivation);
176 | 
177 |             w = np.delete(weights[j-1], [0], axis=1); #remove bias from weights
178 |             
179 |             error = np.dot(delta, w); #Calculate error for curr layer
180 | 
181 |     return weights;
182 | 
183 | def NeuralNetwork(epochs, X, Y, f, hiddenLayers, nodes, r=0.15):
184 |     layers = hiddenLayers + 1; #Total number of layers in network
185 |     weights = InitializeWeights(f, layers, nodes);
186 | 
187 |     for epoch in range(epochs):
188 |         #Train weights
189 |         weights = Train(X, Y, r, layers, weights);
190 | 
191 |         if(epoch % 25 == 0):
192 |             print "Epoch ", epoch;
193 | 
194 |     return weights;
195 | 
196 | 
197 | ###_Main_###
198 | def main():
199 |     X, Y = Reader.ReadData('data.txt');
200 | 
201 |     f = len(X[0].A1);
202 |     h1 = 5;
203 |     h2 = 10;
204 |     o = len(Y[0].A1);
205 |     hiddenLayers = 2;
206 |     r = 0.15;
207 |     epochs = 100;
208 | 
209 |     #print K_FoldValidation(5, X, Y, f, hiddenLayers, [h1,h2,o], epochs, r);
210 |     
211 |     weights = NeuralNetwork(epochs, X, Y, f, hiddenLayers, [h1,h2,o], r);
212 |     print Accuracy(X, Y, weights, hiddenLayers);
213 | 
214 | if __name__ == "__main__":
215 |     main();
216 | 


--------------------------------------------------------------------------------
/Classifiers/Neural Network/NeuralNetwork_Plot.py:
--------------------------------------------------------------------------------
 1 | import NeuralNetwork as NN;
 2 | import Reader;
 3 | import numpy as np;
 4 | from random import choice;
 5 | from matplotlib import pyplot;
 6 | 
 7 | 
 8 | def PlotFunction(X, Y, percentage, indexA, indexB):
 9 |     n = len(X); #Number of items
10 |     split = int(n*percentage);
11 | 
12 |     features = len(X[0].A1);
13 |     #Delete all columns but the ones on the given indexes
14 |     for j in range(features):
15 |         if(j == indexA or j == indexB):
16 |             continue;
17 | 
18 |         X = np.delete(X, j, 1);
19 |     
20 |     testX = X[split:];
21 |     testY = Y[split:];
22 | 
23 |     #The items will be sorted into classes in this list
24 |     Points = [[] for i in range(len(testY[0].A1))];
25 | 
26 |     f = 2;
27 |     h1 = 5;
28 |     h2 = 10;
29 |     o = len(Y[0].A1);
30 |     hiddenLayers = 2;
31 |     r = 0.15;
32 |     epochs = 100;
33 | 
34 |     weights = NN.NeuralNetwork(epochs, X, Y, f, hiddenLayers, [h1,h2,o], r);
35 | 
36 |     correct = 0;
37 | 
38 |     #Calculate accuracy
39 |     for i in range(n):
40 |         prediction = NN.Predict(X[i], weights, hiddenLayers+1);
41 |         itemClass = list(Y[i].A1);
42 | 
43 |         if(prediction == itemClass):
44 |             correct += 1;
45 | 
46 |         #Find index of class
47 |         index = -1;
48 |         for j in range(len(prediction)):
49 |             if(prediction[j] == 1):
50 |                 index = j;
51 |                 break;
52 | 
53 |         Points[index].append(X[i]);
54 | 
55 |     accuracy = correct/float(n)*100;
56 |     print "Accuracy ",accuracy;
57 | 
58 |     colors = ['r','b','g','c','m','y'];
59 |     
60 |     for i in range(len(Points)):
61 |         p = Points[i];
62 |         Xa = [];
63 |         Xb = [];
64 | 
65 |         #Choose color randomly from list, then remove it
66 |         #(to avoid duplicates)
67 |         color = choice(colors);
68 |         colors.remove(color);
69 |         
70 |         for item in p:
71 |             Xa.append(item[:, [0]].item(0));
72 |             Xb.append(item[:, [1]].item(0));
73 | 
74 |         pyplot.plot(Xa,Xb,'o',color=color);
75 |     
76 |     pyplot.show();
77 | 
78 | 
79 | def main():
80 |     X, Y = Reader.ReadData('data.txt');
81 | 
82 |     PlotFunction(X, Y, 0.7, 2, 3);
83 | 
84 | main();
85 | 


--------------------------------------------------------------------------------
/Classifiers/Neural Network/Reader.py:
--------------------------------------------------------------------------------
 1 | import numpy as np;
 2 | import math;
 3 | from random import shuffle;
 4 | 
 5 | ###_Pre-Processing_###
 6 | def ReadData(fileName):
 7 |     f = open(fileName);
 8 |     lines = f.read().splitlines();
 9 |     f.close();
10 | 
11 |     items = [];
12 |     classes = [];
13 | 
14 |     for line in lines:
15 |         line = line.split(','); #Split line on commas
16 |         itemFeatures = []; #Temp list to hold feature values of the item
17 | 
18 |         for i in range(len(line)-1):
19 |             value = float(line[i]);
20 |             itemFeatures.append(value);
21 | 
22 |         #Add to classes the known classification for current item
23 |         classes.append(line[-1]);
24 |         #Add item data to items
25 |         items.append(itemFeatures);
26 | 
27 |     #Map class names to numbers (from 0 to the number of classes)
28 |     classes = map(lambda x: list(set(classes)).index(x), classes);
29 | 
30 |     X = np.matrix(items); #Convert data to numpy matrix
31 |     Y = BuildY(classes); #Build the Y matrices
32 |     n = len(items)-1; #The number of items
33 | 
34 |     toShuffle = []; #Temp array to shuffle X and Y at the same time
35 |     
36 |     for i in range(n+1):
37 |         #Build toShuffle by packing Xi together with Yi
38 |         toShuffle.append((X[i],Y[i]));
39 |     
40 |     shuffle(toShuffle);
41 |     
42 |     X = [];
43 |     Y = [];
44 |     for i in range(n+1):
45 |         X.append(toShuffle[i][0])
46 |         Y.append(toShuffle[i][1])
47 |     
48 |     return X,Y,n;
49 | 
50 | def BuildY(Y):
51 |     newY = [];
52 |     #Number of classes is the largest number in Y plus 1
53 |     classesNumber = max(Y)+1;
54 | 
55 |     for i in range(len(Y)):
56 |         #Initialize vector with zeros, set to 1 the class index
57 |         tempVector = [0]*classesNumber;
58 |         tempVector[Y[i]] = 1;
59 | 
60 |         newY.append(tempVector);
61 | 
62 |     return np.matrix(newY);
63 | 
64 | X, Y, n = ReadData('data.txt');
65 | 


--------------------------------------------------------------------------------
/Classifiers/Neural Network/data.txt:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/Classifiers/Perceptron/Perceptron.py:
--------------------------------------------------------------------------------
  1 | import math; #For pow and sqrt
  2 | from random import shuffle;
  3 | 
  4 | 
  5 | ###_Read Data_###
  6 | def ReadData(fileName):
  7 |     #Read the file, splitting by lines
  8 |     f = open(fileName,'r');
  9 |     lines = f.read().splitlines();
 10 |     f.close();
 11 | 
 12 |     #Split the first line by commas, remove the last element
 13 |     #and save the length of the rest.
 14 |     featuresNumber = len(lines[0].split(','));
 15 | 
 16 |     items = [];
 17 |     classes = [];
 18 |     features = lines[0].split(',')[:-1];
 19 | 
 20 |     for i in range(1, len(lines)):
 21 |         line = lines[i].split(',');
 22 | 
 23 |         if(line[-1] not in classes):
 24 |             classes.append(line[-1]);
 25 | 
 26 |         itemFeatures = {"Class" : line[-1], "Bias" : 1};
 27 | 
 28 |         for j in range(len(features)):
 29 |             f = features[j]; #Get the feature at index j
 30 |             v = float(line[j]);
 31 | 
 32 |             itemFeatures[f] = v;
 33 |     
 34 |         items.append(itemFeatures);
 35 | 
 36 |     shuffle(items);
 37 | 
 38 |     return items,classes,features;
 39 | 
 40 | 
 41 | ###_Evaluation Functions_###
 42 | def K_FoldValidation(K, Items, rate, epochs, classes, features):
 43 |     if(K > len(Items)):
 44 |         return -1;
 45 | 
 46 |     correct = 0; #The number of correct classifications
 47 |     total = len(Items)*(K-1); #The total number of classifications
 48 | 
 49 |     l = len(Items)/K; #The length of a fold
 50 | 
 51 |     for i in range(K):
 52 |         #Split data set into training and testing
 53 |         trainingSet = Items[i*l:(i+1)*l];
 54 |         testSet = Items[:i*l] + Items[(i+1)*l:];
 55 | 
 56 |         weights = CalculateWeights(trainingSet, rate, epochs, classes, features);
 57 | 
 58 |         for item in testSet:
 59 |             itemClass = item["Class"];
 60 | 
 61 |             itemFeatures = {};
 62 | 
 63 |             for key in item:
 64 |                 if(key != "Class"):
 65 |                     #If key isn't "Class", add it to itemFeatures
 66 |                     itemFeatures[key] = item[key];
 67 |           
 68 |             guess = Perceptron(itemFeatures, weights);
 69 | 
 70 |             if(guess == itemClass):
 71 |                 #Guessed correctly
 72 |                 correct += 1;
 73 | 
 74 |     return correct/float(total);
 75 | 
 76 | def Evaluate(times, K, Items, rate, epochs, classes, features):
 77 |     accuracy = 0;
 78 |     for t in range(times):
 79 |         shuffle(Items);
 80 |         accuracy += K_FoldValidation(K, Items, rate, epochs, classes, features);
 81 | 
 82 |     print accuracy/float(times);
 83 | 
 84 | 
 85 | ###_Auxiliary Functions_###
 86 | def AddDictionaries(d1, d2, rate):
 87 |     d3 = {};
 88 |     for i in d1:
 89 |         d3[i] = d1[i] + rate*d2[i];
 90 | 
 91 |     return d3;
 92 | 
 93 | def SubDictionaries(d1, d2, rate):
 94 |     d3 = {};
 95 |     for i in d1:
 96 |         d3[i] = d1[i] - rate*d2[i];
 97 | 
 98 |     return d3;
 99 | 
100 | def CalculateConfidence(item, weight):
101 |     #Add the product of the weight and item values for each feature
102 |     confidence = 0;
103 | 
104 |     for k in weight:
105 |         confidence += weight[k]*item[k];
106 | 
107 |     return confidence;
108 | 
109 | 
110 | ###_Core Functions_###
111 | def CalculateWeights(trainingSet, rate, epochs, classes, features):
112 |     #Initialize weights at 0
113 |     weights = {};
114 | 
115 |     #Initialize weights dictionary. Weights is divided in classes.
116 |     #Each class has its own dictionary, which is numerical values/weights
117 |     #for the features.
118 |     for c in classes:
119 |         weights[c] = {"Bias":0};
120 |         for f in features:
121 |             weights[c][f] = 0;
122 | 
123 |     for epoch in range(epochs):
124 |         for item in trainingSet:
125 |             #Iterate through trainingSet
126 |             #Guess where item belongs
127 |             y = -1;
128 |             guess = "";
129 |             for w in weights:
130 |                 confidence = CalculateConfidence(item, weights[w]);
131 | 
132 |                 if(confidence > y):
133 |                     y = confidence;
134 |                     guess = w;
135 | 
136 |             correct = item["Class"];
137 |             if(correct != guess):
138 |                 weights[guess] = SubDictionaries(weights[guess], item, rate);
139 |                 weights[correct] = AddDictionaries(weights[correct], item, rate);
140 | 
141 |     return weights;
142 | 
143 | def Perceptron(item, weights):
144 |     item["Bias"] = 1; #Augment item vector with bias
145 |     m = -1; #Hold the maximum
146 |     classification = "";
147 | 
148 |     #Calculate chance of item being in each class,
149 |     #pick the maximum.
150 |     for w in weights:
151 |         #Multiply the item vector with the class weights vector
152 |         guess = CalculateConfidence(item, weights[w]);
153 |         if(guess > m):
154 |             #Our guess is better than our current best guess,
155 |             #update max and classification
156 |             m = guess;
157 |             classification = w;
158 | 
159 |     return classification;
160 | 
161 | 
162 | ###_Main_###
163 | def main():
164 |     items, classes, features = ReadData('data.txt');
165 | 
166 |     lRate = 0.1;
167 |     epochs = 50;
168 |     weights = CalculateWeights(items, lRate, epochs, classes, features);
169 | 
170 |     item = {'PW' : 1.4, 'PL' : 4.7, 'SW' : 3.2, 'SL' : 7.0};
171 |     print Perceptron(item, weights);
172 | 
173 |     #Evaluate(100, 5, items, lRate, epochs, classes, features);
174 | 
175 | if __name__ == "__main__":
176 |     main();
177 | 


--------------------------------------------------------------------------------
/Classifiers/Perceptron/data.txt:
--------------------------------------------------------------------------------
  1 | SL,SW,PL,PW,Class
  2 | 5.1,3.5,1.4,0.2,Iris-setosa
  3 | 4.9,3.0,1.4,0.2,Iris-setosa
  4 | 4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5.0,3.6,1.4,0.2,Iris-setosa
  7 | 5.4,3.9,1.7,0.4,Iris-setosa
  8 | 4.6,3.4,1.4,0.3,Iris-setosa
  9 | 5.0,3.4,1.5,0.2,Iris-setosa
 10 | 4.4,2.9,1.4,0.2,Iris-setosa
 11 | 4.9,3.1,1.5,0.1,Iris-setosa
 12 | 5.4,3.7,1.5,0.2,Iris-setosa
 13 | 4.8,3.4,1.6,0.2,Iris-setosa
 14 | 4.8,3.0,1.4,0.1,Iris-setosa
 15 | 4.3,3.0,1.1,0.1,Iris-setosa
 16 | 5.8,4.0,1.2,0.2,Iris-setosa
 17 | 5.7,4.4,1.5,0.4,Iris-setosa
 18 | 5.4,3.9,1.3,0.4,Iris-setosa
 19 | 5.1,3.5,1.4,0.3,Iris-setosa
 20 | 5.7,3.8,1.7,0.3,Iris-setosa
 21 | 5.1,3.8,1.5,0.3,Iris-setosa
 22 | 5.4,3.4,1.7,0.2,Iris-setosa
 23 | 5.1,3.7,1.5,0.4,Iris-setosa
 24 | 4.6,3.6,1.0,0.2,Iris-setosa
 25 | 5.1,3.3,1.7,0.5,Iris-setosa
 26 | 4.8,3.4,1.9,0.2,Iris-setosa
 27 | 5.0,3.0,1.6,0.2,Iris-setosa
 28 | 5.0,3.4,1.6,0.4,Iris-setosa
 29 | 5.2,3.5,1.5,0.2,Iris-setosa
 30 | 5.2,3.4,1.4,0.2,Iris-setosa
 31 | 4.7,3.2,1.6,0.2,Iris-setosa
 32 | 4.8,3.1,1.6,0.2,Iris-setosa
 33 | 5.4,3.4,1.5,0.4,Iris-setosa
 34 | 5.2,4.1,1.5,0.1,Iris-setosa
 35 | 5.5,4.2,1.4,0.2,Iris-setosa
 36 | 4.9,3.1,1.5,0.1,Iris-setosa
 37 | 5.0,3.2,1.2,0.2,Iris-setosa
 38 | 5.5,3.5,1.3,0.2,Iris-setosa
 39 | 4.9,3.1,1.5,0.1,Iris-setosa
 40 | 4.4,3.0,1.3,0.2,Iris-setosa
 41 | 5.1,3.4,1.5,0.2,Iris-setosa
 42 | 5.0,3.5,1.3,0.3,Iris-setosa
 43 | 4.5,2.3,1.3,0.3,Iris-setosa
 44 | 4.4,3.2,1.3,0.2,Iris-setosa
 45 | 5.0,3.5,1.6,0.6,Iris-setosa
 46 | 5.1,3.8,1.9,0.4,Iris-setosa
 47 | 4.8,3.0,1.4,0.3,Iris-setosa
 48 | 5.1,3.8,1.6,0.2,Iris-setosa
 49 | 4.6,3.2,1.4,0.2,Iris-setosa
 50 | 5.3,3.7,1.5,0.2,Iris-setosa
 51 | 5.0,3.3,1.4,0.2,Iris-setosa
 52 | 7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 6.2,2.9,4.3,1.3,Iris-versicolor
100 | 5.1,2.5,3.0,1.1,Iris-versicolor
101 | 5.7,2.8,4.1,1.3,Iris-versicolor
102 | 6.3,3.3,6.0,2.5,Iris-virginica
103 | 5.8,2.7,5.1,1.9,Iris-virginica
104 | 7.1,3.0,5.9,2.1,Iris-virginica
105 | 6.3,2.9,5.6,1.8,Iris-virginica
106 | 6.5,3.0,5.8,2.2,Iris-virginica
107 | 7.6,3.0,6.6,2.1,Iris-virginica
108 | 4.9,2.5,4.5,1.7,Iris-virginica
109 | 7.3,2.9,6.3,1.8,Iris-virginica
110 | 6.7,2.5,5.8,1.8,Iris-virginica
111 | 7.2,3.6,6.1,2.5,Iris-virginica
112 | 6.5,3.2,5.1,2.0,Iris-virginica
113 | 6.4,2.7,5.3,1.9,Iris-virginica
114 | 6.8,3.0,5.5,2.1,Iris-virginica
115 | 5.7,2.5,5.0,2.0,Iris-virginica
116 | 5.8,2.8,5.1,2.4,Iris-virginica
117 | 6.4,3.2,5.3,2.3,Iris-virginica
118 | 6.5,3.0,5.5,1.8,Iris-virginica
119 | 7.7,3.8,6.7,2.2,Iris-virginica
120 | 7.7,2.6,6.9,2.3,Iris-virginica
121 | 6.0,2.2,5.0,1.5,Iris-virginica
122 | 6.9,3.2,5.7,2.3,Iris-virginica
123 | 5.6,2.8,4.9,2.0,Iris-virginica
124 | 7.7,2.8,6.7,2.0,Iris-virginica
125 | 6.3,2.7,4.9,1.8,Iris-virginica
126 | 6.7,3.3,5.7,2.1,Iris-virginica
127 | 7.2,3.2,6.0,1.8,Iris-virginica
128 | 6.2,2.8,4.8,1.8,Iris-virginica
129 | 6.1,3.0,4.9,1.8,Iris-virginica
130 | 6.4,2.8,5.6,2.1,Iris-virginica
131 | 7.2,3.0,5.8,1.6,Iris-virginica
132 | 7.4,2.8,6.1,1.9,Iris-virginica
133 | 7.9,3.8,6.4,2.0,Iris-virginica
134 | 6.4,2.8,5.6,2.2,Iris-virginica
135 | 6.3,2.8,5.1,1.5,Iris-virginica
136 | 6.1,2.6,5.6,1.4,Iris-virginica
137 | 7.7,3.0,6.1,2.3,Iris-virginica
138 | 6.3,3.4,5.6,2.4,Iris-virginica
139 | 6.4,3.1,5.5,1.8,Iris-virginica
140 | 6.0,3.0,4.8,1.8,Iris-virginica
141 | 6.9,3.1,5.4,2.1,Iris-virginica
142 | 6.7,3.1,5.6,2.4,Iris-virginica
143 | 6.9,3.1,5.1,2.3,Iris-virginica
144 | 5.8,2.7,5.1,1.9,Iris-virginica
145 | 6.8,3.2,5.9,2.3,Iris-virginica
146 | 6.7,3.3,5.7,2.5,Iris-virginica
147 | 6.7,3.0,5.2,2.3,Iris-virginica
148 | 6.3,2.5,5.0,1.9,Iris-virginica
149 | 6.5,3.0,5.2,2.0,Iris-virginica
150 | 6.2,3.4,5.4,2.3,Iris-virginica
151 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/Classifiers/Perceptron/data2.txt:
--------------------------------------------------------------------------------
 1 | X,Y,Class
 2 | 2.7810836,2.550537003,1
 3 | 1.465489372,2.362125076,1
 4 | 3.396561688,4.400293529,1
 5 | 1.38807019,1.850220317,2
 6 | 3.06407232,3.005305973,2
 7 | 7.627531214,2.759262235,2
 8 | 5.332441248,2.088626775,3
 9 | 6.922596716,1.77106367,3
10 | 8.675418651,-0.242068655,3
11 | 7.673756466,3.508563011,1


--------------------------------------------------------------------------------
/Classifiers/kNN/data.txt:
--------------------------------------------------------------------------------
  1 | SL,SW,PL,PW,Class
  2 | 5.1,3.5,1.4,0.2,Iris-setosa
  3 | 4.9,3.0,1.4,0.2,Iris-setosa
  4 | 4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5.0,3.6,1.4,0.2,Iris-setosa
  7 | 5.4,3.9,1.7,0.4,Iris-setosa
  8 | 4.6,3.4,1.4,0.3,Iris-setosa
  9 | 5.0,3.4,1.5,0.2,Iris-setosa
 10 | 4.4,2.9,1.4,0.2,Iris-setosa
 11 | 4.9,3.1,1.5,0.1,Iris-setosa
 12 | 5.4,3.7,1.5,0.2,Iris-setosa
 13 | 4.8,3.4,1.6,0.2,Iris-setosa
 14 | 4.8,3.0,1.4,0.1,Iris-setosa
 15 | 4.3,3.0,1.1,0.1,Iris-setosa
 16 | 5.8,4.0,1.2,0.2,Iris-setosa
 17 | 5.7,4.4,1.5,0.4,Iris-setosa
 18 | 5.4,3.9,1.3,0.4,Iris-setosa
 19 | 5.1,3.5,1.4,0.3,Iris-setosa
 20 | 5.7,3.8,1.7,0.3,Iris-setosa
 21 | 5.1,3.8,1.5,0.3,Iris-setosa
 22 | 5.4,3.4,1.7,0.2,Iris-setosa
 23 | 5.1,3.7,1.5,0.4,Iris-setosa
 24 | 4.6,3.6,1.0,0.2,Iris-setosa
 25 | 5.1,3.3,1.7,0.5,Iris-setosa
 26 | 4.8,3.4,1.9,0.2,Iris-setosa
 27 | 5.0,3.0,1.6,0.2,Iris-setosa
 28 | 5.0,3.4,1.6,0.4,Iris-setosa
 29 | 5.2,3.5,1.5,0.2,Iris-setosa
 30 | 5.2,3.4,1.4,0.2,Iris-setosa
 31 | 4.7,3.2,1.6,0.2,Iris-setosa
 32 | 4.8,3.1,1.6,0.2,Iris-setosa
 33 | 5.4,3.4,1.5,0.4,Iris-setosa
 34 | 5.2,4.1,1.5,0.1,Iris-setosa
 35 | 5.5,4.2,1.4,0.2,Iris-setosa
 36 | 4.9,3.1,1.5,0.1,Iris-setosa
 37 | 5.0,3.2,1.2,0.2,Iris-setosa
 38 | 5.5,3.5,1.3,0.2,Iris-setosa
 39 | 4.9,3.1,1.5,0.1,Iris-setosa
 40 | 4.4,3.0,1.3,0.2,Iris-setosa
 41 | 5.1,3.4,1.5,0.2,Iris-setosa
 42 | 5.0,3.5,1.3,0.3,Iris-setosa
 43 | 4.5,2.3,1.3,0.3,Iris-setosa
 44 | 4.4,3.2,1.3,0.2,Iris-setosa
 45 | 5.0,3.5,1.6,0.6,Iris-setosa
 46 | 5.1,3.8,1.9,0.4,Iris-setosa
 47 | 4.8,3.0,1.4,0.3,Iris-setosa
 48 | 5.1,3.8,1.6,0.2,Iris-setosa
 49 | 4.6,3.2,1.4,0.2,Iris-setosa
 50 | 5.3,3.7,1.5,0.2,Iris-setosa
 51 | 5.0,3.3,1.4,0.2,Iris-setosa
 52 | 7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 6.2,2.9,4.3,1.3,Iris-versicolor
100 | 5.1,2.5,3.0,1.1,Iris-versicolor
101 | 5.7,2.8,4.1,1.3,Iris-versicolor
102 | 6.3,3.3,6.0,2.5,Iris-virginica
103 | 5.8,2.7,5.1,1.9,Iris-virginica
104 | 7.1,3.0,5.9,2.1,Iris-virginica
105 | 6.3,2.9,5.6,1.8,Iris-virginica
106 | 6.5,3.0,5.8,2.2,Iris-virginica
107 | 7.6,3.0,6.6,2.1,Iris-virginica
108 | 4.9,2.5,4.5,1.7,Iris-virginica
109 | 7.3,2.9,6.3,1.8,Iris-virginica
110 | 6.7,2.5,5.8,1.8,Iris-virginica
111 | 7.2,3.6,6.1,2.5,Iris-virginica
112 | 6.5,3.2,5.1,2.0,Iris-virginica
113 | 6.4,2.7,5.3,1.9,Iris-virginica
114 | 6.8,3.0,5.5,2.1,Iris-virginica
115 | 5.7,2.5,5.0,2.0,Iris-virginica
116 | 5.8,2.8,5.1,2.4,Iris-virginica
117 | 6.4,3.2,5.3,2.3,Iris-virginica
118 | 6.5,3.0,5.5,1.8,Iris-virginica
119 | 7.7,3.8,6.7,2.2,Iris-virginica
120 | 7.7,2.6,6.9,2.3,Iris-virginica
121 | 6.0,2.2,5.0,1.5,Iris-virginica
122 | 6.9,3.2,5.7,2.3,Iris-virginica
123 | 5.6,2.8,4.9,2.0,Iris-virginica
124 | 7.7,2.8,6.7,2.0,Iris-virginica
125 | 6.3,2.7,4.9,1.8,Iris-virginica
126 | 6.7,3.3,5.7,2.1,Iris-virginica
127 | 7.2,3.2,6.0,1.8,Iris-virginica
128 | 6.2,2.8,4.8,1.8,Iris-virginica
129 | 6.1,3.0,4.9,1.8,Iris-virginica
130 | 6.4,2.8,5.6,2.1,Iris-virginica
131 | 7.2,3.0,5.8,1.6,Iris-virginica
132 | 7.4,2.8,6.1,1.9,Iris-virginica
133 | 7.9,3.8,6.4,2.0,Iris-virginica
134 | 6.4,2.8,5.6,2.2,Iris-virginica
135 | 6.3,2.8,5.1,1.5,Iris-virginica
136 | 6.1,2.6,5.6,1.4,Iris-virginica
137 | 7.7,3.0,6.1,2.3,Iris-virginica
138 | 6.3,3.4,5.6,2.4,Iris-virginica
139 | 6.4,3.1,5.5,1.8,Iris-virginica
140 | 6.0,3.0,4.8,1.8,Iris-virginica
141 | 6.9,3.1,5.4,2.1,Iris-virginica
142 | 6.7,3.1,5.6,2.4,Iris-virginica
143 | 6.9,3.1,5.1,2.3,Iris-virginica
144 | 5.8,2.7,5.1,1.9,Iris-virginica
145 | 6.8,3.2,5.9,2.3,Iris-virginica
146 | 6.7,3.3,5.7,2.5,Iris-virginica
147 | 6.7,3.0,5.2,2.3,Iris-virginica
148 | 6.3,2.5,5.0,1.9,Iris-virginica
149 | 6.5,3.0,5.2,2.0,Iris-virginica
150 | 6.2,3.4,5.4,2.3,Iris-virginica
151 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/Classifiers/kNN/data2.txt:
--------------------------------------------------------------------------------
1 | Height,Weight,Age,Class
2 | 1.70,65,20,Programmer
3 | 1.90,85,33,Builder
4 | 1.78,76,31,Builder
5 | 1.73,74,24,Programmer
6 | 1.81,75,35,Builder
7 | 1.73,70,75,Scientist
8 | 1.80,71,63,Scientist
9 | 1.75,69,25,Programmer


--------------------------------------------------------------------------------
/Classifiers/kNN/kNearestNeighbours.py:
--------------------------------------------------------------------------------
  1 | import math; #For pow and sqrt
  2 | from random import shuffle;
  3 | 
  4 | 
  5 | ###_Reading_###
  6 | def ReadData(fileName):
  7 |     #Read the file, splitting by lines
  8 |     f = open(fileName, 'r');
  9 |     lines = f.read().splitlines();
 10 |     f.close();
 11 | 
 12 |     #Split the first line by commas, remove the first element
 13 |     #and save the rest into a list.
 14 |     #The list holds the feature names of the data set.
 15 |     features = lines[0].split(',')[:-1];
 16 | 
 17 |     items = [];
 18 | 
 19 |     for i in range(1,len(lines)):
 20 |         line = lines[i].split(',');
 21 | 
 22 |         itemFeatures = {"Class" : line[-1]};
 23 | 
 24 |         for j in range(len(features)):
 25 |             f = features[j]; #Get the feature at index j
 26 |             v = float(line[j]); #Convert feature value to float
 27 | 
 28 |             itemFeatures[f] = v; #Add feature value to dict
 29 |     
 30 |         items.append(itemFeatures);
 31 | 
 32 |     shuffle(items);
 33 | 
 34 |     return items;
 35 | 
 36 | 
 37 | ###_Auxiliary Function_###
 38 | def EuclideanDistance(x, y):
 39 |     S = 0; #The sum of the squared differences of the elements
 40 |     for key in x.keys():
 41 |         S += math.pow(x[key]-y[key], 2);
 42 | 
 43 |     return math.sqrt(S); #The square root of the sum
 44 | 
 45 | def CalculateNeighborsClass(neighbors, k):
 46 |     count = {};
 47 |     
 48 |     for i in range(k):
 49 |         if(neighbors[i][1] not in count):
 50 |             #The class at the ith index is not in the count dict.
 51 |             #Initialize it to 1.
 52 |             count[neighbors[i][1]] = 1;
 53 |         else:
 54 |             #Found another item of class c[i]. Increment its counter.
 55 |             count[neighbors[i][1]] += 1;
 56 | 
 57 |     return count;
 58 | 
 59 | def FindMax(Dict):
 60 |     #Find max in dictionary, return max value and max index
 61 |     maximum = -1;
 62 |     classification = "";
 63 |     
 64 |     for key in Dict.keys():
 65 |         if(Dict[key] > maximum):
 66 |             maximum = Dict[key];
 67 |             classification = key;
 68 | 
 69 |     return classification, maximum;
 70 | 
 71 | 
 72 | ###_Core Functions_###
 73 | def Classify(nItem, k, Items):
 74 |     #Hold nearest neighbours. First item is distance, second class
 75 |     neighbors = [];
 76 | 
 77 |     for item in Items:
 78 |         #Find Euclidean Distance
 79 |         distance = EuclideanDistance(nItem, item);
 80 | 
 81 |         #Update neighbors,
 82 |         #either adding the current item in neighbors or not.
 83 |         neighbors = UpdateNeighbors(neighbors, item, distance, k);
 84 | 
 85 |     #Count the number of each class in neighbors
 86 |     count = CalculateNeighborsClass(neighbors, k);
 87 | 
 88 |     #Find the max in count, aka the class with the most appearances
 89 |     return FindMax(count);
 90 | 
 91 | def UpdateNeighbors(neighbors, item, distance, k):
 92 |     if(len(neighbors) < k):
 93 |         #List is not full, add new item and sort
 94 |         neighbors.append([distance, item["Class"]]);
 95 |         neighbors = sorted(neighbors);
 96 |     else:
 97 |         #List is full
 98 |         #Check if new item should be entered
 99 |         if(neighbors[-1][0] > distance):
100 |             #If yes, replace the last element with new item
101 |             neighbors[-1] = [distance, item["Class"]];
102 |             neighbors = sorted(neighbors);
103 | 
104 |     return neighbors;
105 | 
106 | 
107 | ###_Evaluation Functions_###
108 | def K_FoldValidation(K, k, Items):
109 |     if(K > len(Items)):
110 |         return -1;
111 | 
112 |     correct = 0; #The number of correct classifications
113 |     total = len(Items)*(K-1); #The total number of classifications
114 | 
115 |     l = len(Items)/K; #The length of a fold
116 | 
117 |     for i in range(K):
118 |         #Split data into training set and test set
119 |         trainingSet = Items[i*l:(i+1)*l];
120 |         testSet = Items[:i*l] + Items[(i+1)*l:];
121 | 
122 |         for item in testSet:
123 |             itemClass = item["Class"];
124 | 
125 |             itemFeatures = {};
126 | 
127 |             #Get feature values
128 |             for key in item:
129 |                 if(key != "Class"):
130 |                     #If key isn't "Class", add it to itemFeatures
131 |                     itemFeatures[key] = item[key];
132 | 
133 |             #Categorize item based on its feature values
134 |             guess = Classify(itemFeatures, k, trainingSet)[0];
135 | 
136 |             if(guess == itemClass):
137 |                 #Guessed correctly
138 |                 correct += 1;
139 | 
140 |     accuracy = correct/float(total);
141 |     return accuracy;
142 | 
143 | def Evaluate(K,k,items,iterations):
144 |     #Run algorithm the number of iterations, pick average
145 |     accuracy = 0;
146 |     for i in range(iterations):
147 |         shuffle(items);
148 |         accuracy += K_FoldValidation(K, k, items);
149 | 
150 |     print accuracy/float(iterations);
151 | 
152 | 
153 | ###_Main_###
154 | def main():
155 |     items = ReadData('data.txt');
156 | 
157 |     #newItem = {'PW' : 1.4, 'PL' : 4.7, 'SW' : 3.2, 'SL' : 7.0};
158 |     #print Classify(newItem, 3, items);
159 |     #K_FoldValidation(5, 3, items);
160 |     Evaluate(5, 5, items, 100);
161 | 
162 | if __name__ == "__main__":
163 |     main();
164 | 


--------------------------------------------------------------------------------
/Clustering/kMeans - Online/data.txt:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/Clustering/kMeans - Online/kMeans.py:
--------------------------------------------------------------------------------
  1 | import math; #For pow and sqrt
  2 | import sys;
  3 | from random import shuffle, uniform;
  4 | 
  5 | ###_Pre-Processing_###
  6 | def ReadData(fileName):
  7 |     #Read the file, splitting by lines
  8 |     f = open(fileName,'r');
  9 |     lines = f.read().splitlines();
 10 |     f.close();
 11 | 
 12 |     items = [];
 13 | 
 14 |     for i in range(1,len(lines)):
 15 |         line = lines[i].split(',');
 16 |         itemFeatures = [];
 17 | 
 18 |         for j in range(len(line)-1):
 19 |             v = float(line[j]); #Convert feature value to float
 20 |             itemFeatures.append(v); #Add feature value to dict
 21 |     
 22 |         items.append(itemFeatures);
 23 | 
 24 |     shuffle(items);
 25 | 
 26 |     return items;
 27 | 
 28 | 
 29 | ###_Auxiliary Function_###
 30 | def FindColMinMax(items):
 31 |     n = len(items[0]);
 32 |     minima = [sys.maxint for i in range(n)];
 33 |     maxima = [-sys.maxint -1 for i in range(n)];
 34 |     
 35 |     for item in items:
 36 |         for f in range(len(item)):
 37 |             if(item[f] < minima[f]):
 38 |                 minima[f] = item[f];
 39 |             
 40 |             if(item[f] > maxima[f]):
 41 |                 maxima[f] = item[f];
 42 | 
 43 |     return minima,maxima;
 44 | 
 45 | def EuclideanDistance(x,y):
 46 |     S = 0; #The sum of the squared differences of the elements
 47 |     for i in range(len(x)):
 48 |         S += math.pow(x[i]-y[i],2);
 49 | 
 50 |     return math.sqrt(S); #The square root of the sum
 51 | 
 52 | def InitializeMeans(items,k,cMin,cMax):
 53 |     #Initialize means to random numbers between
 54 |     #the min and max of each column/feature
 55 |     
 56 |     f = len(items[0]); #number of features
 57 |     means = [[0 for i in range(f)] for j in range(k)];
 58 |     
 59 |     for mean in means:
 60 |         for i in range(len(mean)):
 61 |             #Set value to a random float
 62 |             #(adding +-1 to avoid a wide placement of a mean)
 63 |             mean[i] = uniform(cMin[i]+1,cMax[i]-1);
 64 | 
 65 |     return means;
 66 | 
 67 | def UpdateMean(n,mean,item):
 68 |     for i in range(len(mean)):
 69 |         m = mean[i];
 70 |         m = (m*(n-1)+item[i])/float(n);
 71 |         mean[i] = round(m,3);
 72 |     
 73 |     return mean;
 74 | 
 75 | 
 76 | ###_Core Functions_###
 77 | def FindClusters(means,items):
 78 |     clusters = [[] for i in range(len(means))]; #Init clusters
 79 |     
 80 |     for item in items:
 81 |         #Classify item into a cluster
 82 |         index = Classify(means,item);
 83 | 
 84 |         #Add item to cluster
 85 |         clusters[index].append(item);
 86 | 
 87 |     return clusters;
 88 | 
 89 | def Classify(means,item):
 90 |     #Classify item to the mean with minimum distance
 91 |     
 92 |     minimum = sys.maxint;
 93 |     index = -1;
 94 | 
 95 |     for i in range(len(means)):
 96 |         #Find distance from item to mean
 97 |         dis = EuclideanDistance(item,means[i]);
 98 | 
 99 |         if(dis < minimum):
100 |             minimum = dis;
101 |             index = i;
102 |     
103 |     return index;
104 | 
105 | def CalculateMeans(k,items,maxIterations=100000):
106 |     #Find the minima and maxima for columns
107 |     cMin, cMax = FindColMinMax(items);
108 |     
109 |     #Initialize means at random points
110 |     means = InitializeMeans(items,k,cMin,cMax);
111 |     
112 |     #Initialize clusters, the array to hold
113 |     #the number of items in a class
114 |     clusters = [0 for i in range(len(means))];
115 | 
116 |     #An array to hold the cluster an item is in
117 |     belongsTo = [0 for i in range(len(items))];
118 | 
119 |     #Calculate means
120 |     #If no change of cluster occurs, halt
121 |     clusters = [0 for i in range(len(means))];
122 |     for i in range(len(items)):
123 |         item = items[i];
124 |         #Classify item into a cluster and update the
125 |         #corresponding means.
126 |         
127 |         index = Classify(means,item);
128 | 
129 |         clusters[index] += 1;
130 |         means[index] = UpdateMean(clusters[index],means[index],item);
131 | 
132 |     return means;
133 | 
134 | 
135 | ###_Main_###
136 | def main():
137 |     items = ReadData('data.txt');
138 | 
139 |     k = 3;
140 | 
141 |     means = CalculateMeans(k,items);
142 |     print "Means = ", means;
143 | 
144 |     clusters = FindClusters(means,items);
145 |     print "Clusters: ", clusters;
146 | 
147 |     #newItem = [5.4,3.7,1.5,0.2];
148 |     #print Classify(means,newItem);
149 | 
150 | if __name__ == "__main__":
151 |     main();
152 | 


--------------------------------------------------------------------------------
/Clustering/kMeans - Online/kMeans_Plot.py:
--------------------------------------------------------------------------------
 1 | import kMeans;
 2 | import numpy as np;
 3 | from random import choice;
 4 | from matplotlib import pyplot;
 5 | 
 6 | def CutToTwoFeatures(items,indexA,indexB):
 7 |     n = len(items);
 8 |     X = [];
 9 |     for i in range(n):
10 |         item = items[i];
11 |         newItem = [item[indexA],item[indexB]];
12 |         X.append(newItem);
13 | 
14 |     return X;
15 |         
16 | 
17 | def PlotClusters(clusters):
18 |     n = len(clusters);
19 |     #Cut down the items to two dimension and store to X
20 |     X = [[] for i in range(n)];
21 | 
22 |     for i in range(n):
23 |         cluster = clusters[i];
24 |         for item in cluster:
25 |             X[i].append(item);
26 | 
27 |     colors = ['r','b','g','c','m','y'];
28 | 
29 |     for x in X:
30 |         #Choose color randomly from list, then remove it
31 |         #(to avoid duplicates)
32 |         c = choice(colors);
33 |         colors.remove(c);
34 | 
35 |         Xa = [];
36 |         Xb = [];
37 | 
38 |         for item in x:
39 |             Xa.append(item[0]);
40 |             Xb.append(item[1]);
41 | 
42 |         pyplot.plot(Xa,Xb,'o',color=c);
43 | 
44 |     pyplot.show();
45 |         
46 | 
47 | def main():
48 |     items = kMeans.ReadData('data.txt');
49 |     items = CutToTwoFeatures(items,2,3);
50 |     
51 |     k = 3;
52 |     means = kMeans.CalculateMeans(k,items);
53 |     clusters = kMeans.FindClusters(means,items);
54 |     
55 |     PlotClusters(clusters);
56 | 
57 | main();
58 | 


--------------------------------------------------------------------------------
/Clustering/kMeans - Standard/data.txt:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/Clustering/kMeans - Standard/kMeans.py:
--------------------------------------------------------------------------------
  1 | import math; #For pow and sqrt
  2 | import sys;
  3 | from random import shuffle, uniform;
  4 | 
  5 | ###_Pre-Processing_###
  6 | def ReadData(fileName):
  7 |     #Read the file, splitting by lines
  8 |     f = open(fileName,'r');
  9 |     lines = f.read().splitlines();
 10 |     f.close();
 11 | 
 12 |     items = [];
 13 | 
 14 |     for i in range(1,len(lines)):
 15 |         line = lines[i].split(',');
 16 |         itemFeatures = [];
 17 | 
 18 |         for j in range(len(line)-1):
 19 |             v = float(line[j]); #Convert feature value to float
 20 |             itemFeatures.append(v); #Add feature value to dict
 21 |     
 22 |         items.append(itemFeatures);
 23 | 
 24 |     shuffle(items);
 25 | 
 26 |     return items;
 27 | 
 28 | 
 29 | ###_Auxiliary Function_###
 30 | def FindColMinMax(items):
 31 |     n = len(items[0]);
 32 |     minima = [sys.maxint for i in range(n)];
 33 |     maxima = [-sys.maxint -1 for i in range(n)];
 34 |     
 35 |     for item in items:
 36 |         for f in range(len(item)):
 37 |             if(item[f] < minima[f]):
 38 |                 minima[f] = item[f];
 39 |             
 40 |             if(item[f] > maxima[f]):
 41 |                 maxima[f] = item[f];
 42 | 
 43 |     return minima,maxima;
 44 | 
 45 | def EuclideanDistance(x,y):
 46 |     S = 0; #The sum of the squared differences of the elements
 47 |     for i in range(len(x)):
 48 |         S += math.pow(x[i]-y[i],2);
 49 | 
 50 |     return math.sqrt(S); #The square root of the sum
 51 | 
 52 | def InitializeMeans(items,k,cMin,cMax):
 53 |     #Initialize means to random numbers between
 54 |     #the min and max of each column/feature
 55 |     
 56 |     f = len(items[0]); #number of features
 57 |     means = [[0 for i in range(f)] for j in range(k)];
 58 |     
 59 |     for mean in means:
 60 |         for i in range(len(mean)):
 61 |             #Set value to a random float
 62 |             #(adding +-1 to avoid a wide placement of a mean)
 63 |             mean[i] = uniform(cMin[i]+1,cMax[i]-1);
 64 | 
 65 |     return means;
 66 | 
 67 | def UpdateMean(n,mean,item):
 68 |     for i in range(len(mean)):
 69 |         m = mean[i];
 70 |         m = (m*(n-1)+item[i])/float(n);
 71 |         mean[i] = round(m,3);
 72 |     
 73 |     return mean;
 74 | 
 75 | def FindClusters(means,items):
 76 |     clusters = [[] for i in range(len(means))]; #Init clusters
 77 |     
 78 |     for item in items:
 79 |         #Classify item into a cluster
 80 |         index = Classify(means,item);
 81 | 
 82 |         #Add item to cluster
 83 |         clusters[index].append(item);
 84 | 
 85 |     return clusters;
 86 | 
 87 | 
 88 | ###_Core Functions_###
 89 | def Classify(means,item):
 90 |     #Classify item to the mean with minimum distance
 91 |     
 92 |     minimum = sys.maxint;
 93 |     index = -1;
 94 | 
 95 |     for i in range(len(means)):
 96 |         #Find distance from item to mean
 97 |         dis = EuclideanDistance(item,means[i]);
 98 | 
 99 |         if(dis < minimum):
100 |             minimum = dis;
101 |             index = i;
102 |     
103 |     return index;
104 | 
105 | def CalculateMeans(k,items,maxIterations=100000):
106 |     #Find the minima and maxima for columns
107 |     cMin, cMax = FindColMinMax(items);
108 |     
109 |     #Initialize means at random points
110 |     means = InitializeMeans(items,k,cMin,cMax);
111 |     
112 |     #Initialize clusters, the array to hold
113 |     #the number of items in a class
114 |     clusterSizes = [0 for i in range(len(means))];
115 | 
116 |     #An array to hold the cluster an item is in
117 |     belongsTo = [0 for i in range(len(items))];
118 | 
119 |     #Calculate means
120 |     for e in range(maxIterations):
121 |         #If no change of cluster occurs, halt
122 |         noChange = True;
123 |         for i in range(len(items)):
124 |             item = items[i];
125 |             #Classify item into a cluster and update the
126 |             #corresponding means.
127 |         
128 |             index = Classify(means,item);
129 | 
130 |             clusterSizes[index] += 1;
131 |             means[index] = UpdateMean(clusterSizes[index],means[index],item);
132 | 
133 |             #Item changed cluster
134 |             if(index != belongsTo[i]):
135 |                 noChange = False;
136 | 
137 |             belongsTo[i] = index;
138 | 
139 |         #Nothing changed, return
140 |         if(noChange):
141 |             break;
142 | 
143 |     return means;
144 | 
145 | 
146 | ###_Main_###
147 | def main():
148 |     items = ReadData('data.txt');
149 |     
150 |     k = 3;
151 | 
152 |     means = CalculateMeans(k,items);
153 |     clusters = FindClusters(means,items);
154 |     print means;
155 |     print clusters;
156 | 
157 |     #newItem = [5.4,3.7,1.5,0.2];
158 |     #print Classify(means,newItem);
159 | 
160 | if __name__ == "__main__":
161 |     main();
162 | 


--------------------------------------------------------------------------------
/Clustering/kMeans - Standard/kMeans_Plot.py:
--------------------------------------------------------------------------------
 1 | import kMeans;
 2 | import numpy as np;
 3 | from random import choice;
 4 | from matplotlib import pyplot;
 5 | 
 6 | def CutToTwoFeatures(items,indexA,indexB):
 7 |     n = len(items);
 8 |     X = [];
 9 |     for i in range(n):
10 |         item = items[i];
11 |         newItem = [item[indexA],item[indexB]];
12 |         X.append(newItem);
13 | 
14 |     return X;
15 |         
16 | 
17 | def PlotClusters(clusters):
18 |     n = len(clusters);
19 |     #Cut down the items to two dimension and store to X
20 |     X = [[] for i in range(n)];
21 | 
22 |     for i in range(n):
23 |         cluster = clusters[i];
24 |         for item in cluster:
25 |             X[i].append(item);
26 | 
27 |     colors = ['r','b','g','c','m','y'];
28 | 
29 |     for x in X:
30 |         #Choose color randomly from list, then remove it
31 |         #(to avoid duplicates)
32 |         c = choice(colors);
33 |         colors.remove(c);
34 | 
35 |         Xa = [];
36 |         Xb = [];
37 | 
38 |         for item in x:
39 |             Xa.append(item[0]);
40 |             Xb.append(item[1]);
41 | 
42 |         pyplot.plot(Xa,Xb,'o',color=c);
43 | 
44 |     pyplot.show();
45 |         
46 | 
47 | def main():
48 |     items = kMeans.ReadData('data.txt');
49 |     items = CutToTwoFeatures(items,2,3);
50 |     
51 |     k = 3;
52 |     means = kMeans.CalculateMeans(k,items);
53 |     clusters = kMeans.FindClusters(means,items);
54 |     
55 |     PlotClusters(clusters);
56 | 
57 | main();
58 | 


--------------------------------------------------------------------------------
/Keras/kFold.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Sequential
 2 | from keras.layers import Dense
 3 | from keras.optimizers import SGD, RMSprop
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | # Read data
 8 | white = pd.read_csv("winequality-white.csv", sep=';')
 9 | red = pd.read_csv("winequality-red.csv", sep=';')
10 | 
11 | white['type'] = 0
12 | red['type'] = 1
13 | 
14 | wines = red.append(white, ignore_index=True).sample(frac=1)
15 | Y = np.ravel(wines.quality)
16 | X = wines.drop(['quality'], axis=1)
17 | 
18 | def create_model(train_x, train_y, epochs):
19 |     model = Sequential()
20 | 
21 |     model.add(Dense(64, activation='relu', input_dim=12))
22 |     model.add(Dense(1))
23 |     model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
24 |     
25 |     model.fit(train_x, train_y, epochs=epochs)
26 |     return model
27 | 
28 | k = 5
29 | l = int(len(X) / k)
30 | mse_total, mae_total = 0, 0
31 | for i in range(k):
32 |     test_x = X[i*l:(i+1)*l]
33 |     test_y = Y[i*l:(i+1)*l]
34 | 
35 |     train_x = np.concatenate([X[:i*l], X[(i+1)*l:]]);
36 |     train_y = np.concatenate([Y[:i*l], Y[(i+1)*l:]]);
37 | 
38 |     model = create_model(train_x, train_y, 15)
39 | 
40 |     predictions = model.predict(test_x)
41 |     mse, mae = model.evaluate(test_x, test_y)
42 |     mse_total += mse
43 |     mae_total += mae
44 | 
45 | mse_avg = mse_total / k
46 | mae_avg = mae_total / k
47 | print(mse_avg, mae_avg)
48 | 


--------------------------------------------------------------------------------
/Pattern Recognition/Edit Distance.py:
--------------------------------------------------------------------------------
 1 | def EditDistance(a,b):
 2 |     len1 = len(a);
 3 |     len2 = len(b);
 4 |     
 5 |     D = [[0 for j in range(len2+1)] for i in range(len1+1)];
 6 |     
 7 |     for i in range(1,len1+1):
 8 |         D[i][0] = i;
 9 |     
10 |     for j in range(1,len2+1):
11 |         D[0][j] = j;
12 |     
13 |     for i in range(1,len1+1):
14 |         for j in range(1,len2+1):
15 |             #If chars are the same, added cost is 0
16 |             same = 0;
17 |             if(a[i-1] != b[j-1]):
18 |                 #Otherwise it is 1 (for replace)
19 |                 same = 1;
20 |             
21 |             c1 = D[i-1][j-1] + same; #diagonally
22 |             c2 = D[i-1][j] + 1; #from below
23 |             c3 = D[i][j-1] + 1; #from left
24 |             
25 |             D[i][j] = min(c1,c2,c3);
26 |     
27 |     print D;
28 |     return D[-1][-1];
29 | 
30 | 
31 | word1 = "kitten";
32 | word2 = "cat";
33 | 
34 | print EditDistance(word1,word2);


--------------------------------------------------------------------------------
/Pattern Recognition/Viterbi.py:
--------------------------------------------------------------------------------
  1 | def Viterbi(transitionProb,startProb,emissionProb,states,observations,n):
  2 |     V = [];
  3 |     firstObs = observations[0];
  4 |     
  5 |     #Calculate first column
  6 |     v = {}; #Temp dict to hold state data
  7 |     for s in states:
  8 |         #Add dummy value to PrevState key
  9 |         v[s] = {"PrevState":"Start"};
 10 |         #The probability of state appearing first is its prob
 11 |         #of appearing first in general times the prob of
 12 |         #the first observation being emitted from that state.
 13 |         v[s]["Prob"] = startProb[s]*emissionProb[s][firstObs];
 14 |     V.append(v.copy());
 15 |     
 16 |     for i in range(1,n):
 17 |         obs = observations[i]; #Current observation
 18 |         
 19 |         v = {}; #Temp dict
 20 |         for s in states:
 21 |             #Find max prob of states for current observation
 22 |             maxProb = -1;
 23 |             state = ""; #Hold previous state where maxProb occured
 24 |             
 25 |             emission = emissionProb[s][obs];
 26 |             for prevState in states:
 27 |                 #Find max from previous column
 28 |                 prevProb = V[i-1][prevState]["Prob"];
 29 |                 prob = prevProb*transitionProb[prevState][s]*emission;
 30 |                 
 31 |                 if(prob > maxProb):
 32 |                     maxProb = prob;
 33 |                     state = prevState;
 34 |             
 35 |             v[s] = {};
 36 |             v[s]["Prob"] = maxProb;
 37 |             v[s]["PrevState"] = state;
 38 |         
 39 |         V.append(v.copy());
 40 |     
 41 |     #Find max probability
 42 |     maxProb = -1;
 43 |     state = "";
 44 |     for s in states:
 45 |         if(V[-1][s]["Prob"] > maxProb):
 46 |             maxProb = V[-1][s]["Prob"];
 47 |             state = s;
 48 |     
 49 |     #Find sequence by moving back from the final state
 50 |     sequence = [state];
 51 |     for i in range(n-2,-1,-1):
 52 |         prevState = V[i+1][state]["PrevState"]
 53 |         sequence.insert(0,prevState); #Insert prevState to start of seq
 54 |         state = prevState;
 55 |     
 56 |     print sequence;
 57 |     print maxProb;
 58 | 
 59 | 
 60 | def Initialization1():
 61 |     ## Example data from Wikipedia ##
 62 |     transitionProb = {
 63 |         "Healthy": {"Healthy":0.7,"Fever":0.3},
 64 |         "Fever" : {"Healthy":0.4,"Fever":0.6}
 65 |     };
 66 | 
 67 |     emissionProb = {
 68 |         "Healthy":{"Normal":0.5,"Cold":0.4,"Dizzy":0.1},
 69 |         "Fever":{"Normal":0.1,"Cold":0.3,"Dizzy":0.6}
 70 |     };
 71 | 
 72 |     startProb = {"Healthy":0.6,"Fever":0.4};
 73 | 
 74 |     states = ["Healthy","Fever"];
 75 |     observations = ["Normal","Cold","Dizzy"];
 76 |     n = len(observations);
 77 |     
 78 |     return transitionProb,startProb,emissionProb,states,observations,n;
 79 | 
 80 | def Initialization2():
 81 |     transitionProb = {
 82 |         "Rain": {"Rain": 0.5, "Sun": 0.1, "Cloud": 0.4},
 83 |         "Cloud": {"Rain": 0.3, "Sun": 0.3, "Cloud": 0.4},
 84 |         "Sun": {"Rain": 0.1, "Sun": 0.5, "Cloud": 0.4},
 85 |     };
 86 | 
 87 |     emissionProb = {
 88 |         "Rain": {"Walk": 0.1, "Clean": 0.3, "Study": 0.5, "Shop": 0.1},
 89 |         "Cloud": {"Walk": 0.3, "Clean": 0.2, "Study": 0.3, "Shop": 0.2},
 90 |         "Sun": {"Walk": 0.5, "Clean": 0.1, "Study": 0.1, "Shop": 0.3},
 91 |     };
 92 | 
 93 |     startProb = {"Rain": 0.2, "Cloud": 0.3, "Sun": 0.4};
 94 | 
 95 |     states = ["Rain", "Cloud","Sun"];
 96 |     observations = ["Walk", "Walk", "Shop", "Walk", "Study", "Study"];
 97 |     n = len(observations);
 98 |     
 99 |     return transitionProb,startProb,emissionProb,states,observations,n;
100 | 
101 | 
102 | transitionProb,startProb,emissionProb,states,observations,n = Initialization1();
103 | 
104 | Viterbi(transitionProb,startProb,emissionProb,states,observations,n);
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine-Learning
 2 | 
 3 | A (WIP) list of Machine Learning algorithms in Python + tutorials with Keras. Currently includes the following:
 4 | 
 5 | ### Keras
 6 | 
 7 | * k-Fold Validation ([Blog Post](https://antmarakis.github.io/artificial%20intelligence/keras-k-fold/))
 8 | 
 9 | ### Classifiers
10 | 
11 | * k-Nearest Neighbors ([Blog Post](https://antmarakis.github.io/artificial%20intelligence/k-nearest-neighbors/))
12 |  
13 | * Perceptron ([Theory](https://antmarakis.github.io/artificial%20intelligence/perceptron-theory/), [Implementation](https://antmarakis.github.io/artificial%20intelligence/perceptron-implementation/))
14 |  
15 | * Least Squares
16 |  
17 |   * Requires numpy
18 |   
19 |   * Includes plotting, which requires matplotlib
20 | 
21 | * Naive Bayes Classifier ([Categorical](https://antmarakis.github.io/artificial%20intelligence/naive-bayes-cat-intro/), [Numerical](https://antmarakis.github.io/artificial%20intelligence/naive-bayes-num-intro/))
22 |   
23 |   * Includes Categorical and Numerical classifiers
24 | 
25 | * Neural Network ([Tutorial](https://www.kaggle.com/antmarakis/another-neural-network-from-scratch))
26 |  
27 |   * Requires numpy
28 |   
29 |   * Includes plotting, which requires matplotlib
30 | 
31 | ### Clustering
32 | 
33 | * kMeans ([Standard](https://antmarakis.github.io/kMeans/))
34 |   
35 |   * Includes plotting, which requires matplotlib
36 |   
37 |   * Includes both the online (or sequential) kMeans and the standard (or iterative) algorithm.
38 | 
39 | ### Pattern Recognition
40 | 
41 | * Viterbi Algorithm (for Hidden Markov Models)
42 | 
43 | * Edit Distance Algorithm
44 | 


--------------------------------------------------------------------------------