├── AutoNormal
└── AutoNorm.py
├── Bayes
├── .project
├── .pydevproject
└── src
│ └── Bayes.py
├── DecisionStump
├── .project
├── .pydevproject
└── src
│ └── Adaboosting.py
├── DecisionTree
├── .project
├── .pydevproject
└── src
│ └── dt.py
├── HashMap
├── .classpath
├── .project
├── .settings
│ └── org.eclipse.jdt.core.prefs
├── bin
│ └── Main.class
├── final.txt
└── src
│ └── Main.java
├── K-means
├── K-means
│ ├── .project
│ ├── .pydevproject
│ └── src
│ │ └── Test.py
├── effect.png
└── testSet.txt
├── KNN
├── .project
├── .pydevproject
└── src
│ └── knn.py
├── LICENSE
├── LogisticRegression
├── .project
├── .pydevproject
└── src
│ └── LR.py
├── README.md
├── SMO
├── .project
├── .pydevproject
└── src
│ └── SMO.py
└── ShannonEnt
└── ShannonEnt
├── .project
├── .pydevproject
└── src
└── shannon.py
/AutoNormal/AutoNorm.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import division
3 | def GetAverage(mat):
4 |
5 | n=len(mat)
6 | m= width(mat)
7 | num = [0]*m
8 | for j in range(0,m):
9 | for i in mat:
10 | num[j]=num[j]+i[j]
11 | num[j]=num[j]/n
12 | return num
13 |
14 | def width(lst):
15 | i=0
16 | for j in lst[0]:
17 | i=i+1
18 | return i
19 |
20 | def GetVar(average,mat):
21 | ListMat=[]
22 | for i in mat:
23 | ListMat.append(list(map(lambda x: x[0]-x[1], zip(average, i))))
24 |
25 | n=len(ListMat)
26 | m= width(ListMat)
27 | num = [0]*m
28 | for j in range(0,m):
29 | for i in ListMat:
30 | num[j]=num[j]+(i[j]*i[j])
31 | num[j]=num[j]/n
32 | return num
33 |
34 | def DenoisMat(mat):
35 | average=GetAverage(mat)
36 | variance=GetVar(average,mat)
37 | section=list(map(lambda x: x[0]+x[1], zip(average, variance)))
38 |
39 | n=len(mat)
40 | m= width(mat)
41 | num = [0]*m
42 | denoisMat=[]
43 | for i in mat:
44 | for j in range(0,m):
45 | if i[j]>section[j]:
46 | i[j]=section[j]
47 | denoisMat.append(i)
48 | return denoisMat
49 |
50 | def AutoNorm(mat):
51 | n=len(mat)
52 | m= width(mat)
53 | MinNum=[9999999999]*m
54 | MaxNum = [0]*m
55 | for i in mat:
56 | for j in range(0,m):
57 | if i[j]>MaxNum[j]:
58 | MaxNum[j]=i[j]
59 |
60 | for p in mat:
61 | for q in range(0,m):
62 | if p[q]<=MinNum[q]:
63 | MinNum[q]=p[q]
64 |
65 | section=list(map(lambda x: x[0]-x[1], zip(MaxNum, MinNum)))
66 | print section
67 | NormMat=[]
68 |
69 | for k in mat:
70 |
71 | distance=list(map(lambda x: x[0]-x[1], zip(k, MinNum)))
72 | value=list(map(lambda x: x[0]/x[1], zip(distance,section)))
73 | NormMat.append(value)
74 | return NormMat
75 |
76 | if __name__=='__main__':
77 | mat=[[1,42,512],[4,5,6],[7,8,9],[2,2,2],[2,10,5]]
78 | a=GetAverage(mat)
79 | b=GetVar(a,mat)
80 | print a,
81 | print DenoisMat(mat)
82 |
83 | # print list(map(lambda x: x[0]-x[1], zip(v2, v1)))
84 | print AutoNorm(mat)
--------------------------------------------------------------------------------
/Bayes/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | Bayes
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/Bayes/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Default
6 | python 2.7
7 |
8 | /Bayes
9 |
10 |
11 |
--------------------------------------------------------------------------------
/Bayes/src/Bayes.py:
--------------------------------------------------------------------------------
1 | from numpy import *
2 |
3 | def loadDataSet():
4 | postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
5 | ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
6 | ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
7 | ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
8 | ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
9 | ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
10 | classVec = [0,1,0,1,0,1] #1 is abusive, 0 not
11 | return postingList,classVec
12 |
13 | def createVocabList(dataSet):
14 | vocabSet = set([]) #create empty set
15 | for document in dataSet:
16 | vocabSet = vocabSet | set(document) #union of the two sets
17 | return list(vocabSet)
18 |
19 | def setOfWords2Vec(vocabList, inputSet):
20 | returnVec = [0]*len(vocabList)
21 | for word in inputSet:
22 | if word in vocabList:
23 | returnVec[vocabList.index(word)] = 1
24 | else: print "the word: %s is not in my Vocabulary!" % word
25 | return returnVec
26 |
27 |
28 | def trainNB0(trainMatrix,trainCategory):
29 | numTrainDocs = len(trainMatrix)
30 |
31 | numWords = len(trainMatrix[0])
32 |
33 | pAbusive = sum(trainCategory)/float(numTrainDocs)#sum=1+1+1
34 | p0Num = zeros(numWords); p1Num = zeros(numWords) #change to ones()
35 |
36 | p0Denom = 0.0; p1Denom = 0.0 #change to 2.0
37 | for i in range(numTrainDocs):
38 | if trainCategory[i] == 1:
39 | p1Num += trainMatrix[i]
40 | p1Denom += sum(trainMatrix[i])
41 | else:
42 | p0Num += trainMatrix[i]
43 | p0Denom += sum(trainMatrix[i])
44 |
45 | p1Vect = p1Num/p1Denom #change to log()
46 | p0Vect = p0Num/p0Denom #change to log()
47 | return p0Vect,p1Vect,pAbusive
48 |
49 | def getResult(listPost,listClasses):
50 | trainMat=[]
51 | list=createVocabList(listPost)
52 | for postinDoc in listPost:
53 | trainMat.append(setOfWords2Vec(list,postinDoc))
54 |
55 | p0v,p1v,pAb=trainNB0(trainMat,listClasses)
56 | # print p0v.index(max(p0v))
57 |
58 | print p1v
59 | print max(p1v)
60 | print list
61 | if __name__=='__main__':
62 | postingList,classVec=loadDataSet()
63 |
64 |
65 | getResult(postingList,classVec)
66 | # print setOfWords2Vec(createVocabList(postingList),postingList[0])
67 |
--------------------------------------------------------------------------------
/DecisionStump/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | DecisionStump
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/DecisionStump/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Default
6 | python 2.7
7 |
8 | /DecisionStump
9 |
10 |
11 |
--------------------------------------------------------------------------------
/DecisionStump/src/Adaboosting.py:
--------------------------------------------------------------------------------
1 | from numpy import *
2 |
3 | def loadSimpData():
4 | datMat = matrix([[ 1. , 2.1],
5 | [ 2. , 1.1],
6 | [ 1.3, 1. ],
7 | [ 1. , 1. ],
8 | [ 2. , 1. ]])
9 | classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
10 | return datMat,classLabels
11 |
12 | def loadDataSet(fileName): #general function to parse tab -delimited floats
13 | numFeat = len(open(fileName).readline().split('\t')) #get number of fields
14 | dataMat = []; labelMat = []
15 | fr = open(fileName)
16 | for line in fr.readlines():
17 | lineArr =[]
18 | curLine = line.strip().split('\t')
19 | for i in range(numFeat-1):
20 | lineArr.append(float(curLine[i]))
21 | dataMat.append(lineArr)
22 | labelMat.append(float(curLine[-1]))
23 | return dataMat,labelMat
24 |
25 | def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):#just classify the data
26 | retArray = ones((shape(dataMatrix)[0],1))
27 | if threshIneq == 'lt':
28 | retArray[dataMatrix[:,dimen] <= threshVal] = -1.0
29 | else:
30 | retArray[dataMatrix[:,dimen] > threshVal] = -1.0
31 |
32 | return retArray
33 |
34 |
35 | def buildStump(dataArr,classLabels,D):
36 | dataMatrix = mat(dataArr); labelMat = mat(classLabels).T
37 | m,n = shape(dataMatrix)
38 | numSteps = 10.0; bestStump = {}; bestClasEst = mat(zeros((m,1)))
39 | minError = inf #init error sum, to +infinity
40 | for i in range(n):#loop over all dimensions
41 | rangeMin = dataMatrix[:,i].min(); rangeMax = dataMatrix[:,i].max();
42 |
43 | stepSize = (rangeMax-rangeMin)/numSteps
44 | for j in range(-1,int(numSteps)+1):#loop over all range in current dimension
45 | for inequal in ['lt', 'gt']: #go over less than and greater than
46 | threshVal = (rangeMin + float(j) * stepSize)
47 |
48 | predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)#call stump classify with i, j, lessThan
49 | errArr = mat(ones((m,1)))
50 |
51 |
52 | errArr[predictedVals == labelMat] = 0
53 |
54 | weightedError = D.T*errArr #calc total error multiplied by D
55 | #print "split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f" % (i, threshVal, inequal, weightedError)
56 | if weightedError < minError:
57 | minError = weightedError
58 | bestClasEst = predictedVals.copy()
59 | bestStump['dim'] = i
60 | bestStump['thresh'] = threshVal
61 | bestStump['ineq'] = inequal
62 | return bestStump,minError,bestClasEst
63 |
64 | if __name__=='__main__':
65 | datMat,classLabels=loadSimpData()
66 | d=mat(ones((5,1))/5)
67 | bestStump,minError,bestClasEst=buildStump(datMat,classLabels,d)
68 | print bestStump,minError,bestClasEst
--------------------------------------------------------------------------------
/DecisionTree/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | DecisionTree
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/DecisionTree/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Default
6 | python 2.7
7 |
8 | /DecisionTree
9 |
10 |
11 |
--------------------------------------------------------------------------------
/DecisionTree/src/dt.py:
--------------------------------------------------------------------------------
1 | import math
2 | import operator
3 |
4 |
5 | def calcShannonEnt(dataSet):
6 | numEntries=len(dataSet)
7 |
8 | labelCounts={}
9 |
10 | for featVec in dataSet:
11 | currentLabel=featVec[-1]
12 |
13 | if currentLabel not in labelCounts.keys():
14 | labelCounts[currentLabel]=0
15 | labelCounts[currentLabel]+=1
16 | shannonEnt=0.0
17 |
18 | for key in labelCounts:
19 |
20 | prob =float(labelCounts[key])/numEntries
21 | shannonEnt-=prob*math.log(prob,2)
22 |
23 | return shannonEnt
24 |
25 |
26 | def createDataSet():
27 |
28 | dataSet=[[1,0,'man'],[1,1,'man'],[0,1,'man'],[0,0,'women']]
29 | labels=['throat','mustache']
30 | return dataSet,labels
31 |
32 | def splitDataSet(dataSet, axis, value):
33 | retDataSet = []
34 | for featVec in dataSet:
35 | if featVec[axis] == value:
36 | reducedFeatVec = featVec[:axis] #chop out axis used for splitting
37 | reducedFeatVec.extend(featVec[axis+1:])
38 | retDataSet.append(reducedFeatVec)
39 | return retDataSet
40 |
41 | def chooseBestFeatureToSplit(dataSet):
42 | numFeatures = len(dataSet[0]) - 1 #the last column is used for the labels
43 | baseEntropy = calcShannonEnt(dataSet)
44 | bestInfoGain = 0.0; bestFeature = -1
45 | for i in range(numFeatures): #iterate over all the features
46 | featList = [example[i] for example in dataSet]#create a list of all the examples of this feature
47 |
48 | uniqueVals = set(featList) #get a set of unique values
49 |
50 | newEntropy = 0.0
51 | for value in uniqueVals:
52 | subDataSet = splitDataSet(dataSet, i, value)
53 | prob = len(subDataSet)/float(len(dataSet))
54 | newEntropy += prob * calcShannonEnt(subDataSet)
55 | infoGain = baseEntropy - newEntropy #calculate the info gain; ie reduction in entropy
56 |
57 | if (infoGain > bestInfoGain): #compare this to the best gain so far
58 | bestInfoGain = infoGain #if better than current best, set to best
59 | bestFeature = i
60 | return bestFeature #returns an integer
61 |
62 |
63 |
64 |
65 |
66 | def majorityCnt(classList):
67 | classCount={}
68 | for vote in classList:
69 | if vote not in classCount.keys(): classCount[vote] = 0
70 | classCount[vote] += 1
71 | sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
72 | return sortedClassCount[0][0]
73 |
74 | def createTree(dataSet,labels):
75 | classList = [example[-1] for example in dataSet]
76 |
77 | if classList.count(classList[0]) == len(classList):
78 | return classList[0]#stop splitting when all of the classes are equal
79 | if len(dataSet[0]) == 1: #stop splitting when there are no more features in dataSet
80 | return majorityCnt(classList)
81 | bestFeat = chooseBestFeatureToSplit(dataSet)
82 | bestFeatLabel = labels[bestFeat]
83 | myTree = {bestFeatLabel:{}}
84 | del(labels[bestFeat])
85 | featValues = [example[bestFeat] for example in dataSet]
86 | uniqueVals = set(featValues)
87 | for value in uniqueVals:
88 | subLabels = labels[:] #copy all of labels, so trees don't mess up existing labels
89 |
90 | myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
91 |
92 | return myTree
93 |
94 | def classify(inputTree,featLabels,testVec):
95 | firstStr = inputTree.keys()[0]
96 | secondDict = inputTree[firstStr]
97 | featIndex = featLabels.index(firstStr)
98 | key = testVec[featIndex]
99 | valueOfFeat = secondDict[key]
100 | if isinstance(valueOfFeat, dict):
101 | classLabel = classify(valueOfFeat, featLabels, testVec)
102 | else: classLabel = valueOfFeat
103 | return classLabel
104 |
105 | def getResult():
106 | dataSet,labels=createDataSet()
107 | # splitDataSet(dataSet,1,1)
108 | chooseBestFeatureToSplit(dataSet)
109 | # print chooseBestFeatureToSplit(dataSet)
110 | #print calcShannonEnt(dataSet)
111 | mtree=createTree(dataSet,labels)
112 | print mtree
113 |
114 | print classify(mtree,['throat','mustache'],[0,0])
115 |
116 | if __name__=='__main__':
117 | getResult()
118 |
119 |
--------------------------------------------------------------------------------
/HashMap/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/HashMap/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | MakeResult
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/HashMap/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 | org.eclipse.jdt.core.compiler.compliance=1.6
6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.source=1.6
12 |
--------------------------------------------------------------------------------
/HashMap/bin/Main.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DURGESH716/MachineLearning/e80048d82b5ab64cd06c3223ce0c741b910f4305/HashMap/bin/Main.class
--------------------------------------------------------------------------------
/HashMap/final.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DURGESH716/MachineLearning/e80048d82b5ab64cd06c3223ce0c741b910f4305/HashMap/final.txt
--------------------------------------------------------------------------------
/HashMap/src/Main.java:
--------------------------------------------------------------------------------
1 | import java.awt.List;
2 | import java.io.BufferedReader;
3 | import java.io.BufferedWriter;
4 | import java.io.File;
5 | import java.io.FileReader;
6 | import java.io.FileWriter;
7 | import java.io.IOException;
8 | import java.util.ArrayList;
9 | import java.util.HashMap;
10 | import java.util.Iterator;
11 |
12 |
13 |
14 |
15 | public class Main {
16 | public static void main (String[] args) throws IOException{
17 | BufferedWriter writer = new BufferedWriter(new FileWriter(new File("/Users/hakuri/Desktop/data2.txt")));
18 | BufferedReader reader=new BufferedReader(new FileReader(new File("/Users/hakuri/Desktop/final.txt")));
19 | String lineTxt = null;
20 | int i=1;
21 | // ArrayList brand = new ArrayList();
22 | HashMap custom=new HashMap();
23 |
24 |
25 | while((lineTxt = reader.readLine()) != null){
26 | //System.out.println(lineTxt);
27 | String line=lineTxt.trim();
28 | String[] part=line.split(",");
29 | if(!custom.containsKey(part[0])){
30 | custom.put(part[0],new ArrayList());
31 | custom.get(part[0]).add(part[1]);
32 | }
33 | else{
34 | custom.get(part[0]).add(part[1]);
35 | }
36 | // custom.put(part[0], part[1]);
37 | }
38 | //System.out.print(custom);
39 | Iterator it=custom.keySet().iterator();
40 | while( it.hasNext()){
41 | String key=(String)it.next();
42 | ArrayList value=custom.get(key);
43 | System.out.println(key+"--"+value);
44 | }
45 | //System.out.println(custom.keySet().iterator());
46 | }
47 |
48 | }
49 |
50 |
--------------------------------------------------------------------------------
/K-means/K-means/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | K-means
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/K-means/K-means/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Default
6 | python 2.7
7 |
8 | /K-means/src
9 |
10 |
11 |
--------------------------------------------------------------------------------
/K-means/K-means/src/Test.py:
--------------------------------------------------------------------------------
1 | '''
2 | @author: hakuri
3 | '''
4 | from numpy import *
5 | import matplotlib.pyplot as plt
6 | def loadDataSet(fileName): #general function to parse tab -delimited floats
7 | dataMat = [] #assume last column is target value
8 | fr = open(fileName)
9 | for line in fr.readlines():
10 | curLine = line.strip().split('\t')
11 | fltLine = map(float,curLine) #map all elements to float()
12 | dataMat.append(fltLine)
13 | return dataMat
14 |
15 | def distEclud(vecA, vecB):
16 | return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB)
17 |
18 | def randCent(dataSet, k):
19 | n = shape(dataSet)[1]
20 | centroids = mat(zeros((k,n)))#create centroid mat
21 | for j in range(n):#create random cluster centers, within bounds of each dimension
22 | minJ = min(array(dataSet)[:,j])
23 |
24 | rangeJ = float(max(array(dataSet)[:,j]) - minJ)
25 | centroids[:,j] = mat(minJ + rangeJ * random.rand(k,1))
26 |
27 | return centroids
28 |
29 | def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent):
30 | m = shape(dataSet)[0]
31 | clusterAssment = mat(zeros((m,2)))#create mat to assign data points #to a centroid, also holds SE of each point
32 | centroids = createCent(dataSet, k)
33 | clusterChanged = True
34 | while clusterChanged:
35 | clusterChanged = False
36 | for i in range(m):#for each data point assign it to the closest centroid
37 | minDist = inf; minIndex = -1
38 | for j in range(k):
39 | distJI = distMeas(array(centroids)[j,:],array(dataSet)[i,:])
40 | if distJI < minDist:
41 | minDist = distJI; minIndex = j
42 | if clusterAssment[i,0] != minIndex: clusterChanged = True
43 | clusterAssment[i,:] = minIndex,minDist**2
44 | print centroids
45 | # print nonzero(array(clusterAssment)[:,0]
46 | for cent in range(k):#recalculate centroids
47 | ptsInClust = dataSet[nonzero(array(clusterAssment)[:,0]==cent)[0][0]]#get all the point in this cluster
48 |
49 | centroids[cent,:] = mean(ptsInClust, axis=0) #assign centroid to mean
50 | id=nonzero(array(clusterAssment)[:,0]==cent)[0]
51 | return centroids, clusterAssment,id
52 |
53 | def plotBestFit(dataSet,id,centroids):
54 |
55 | dataArr = array(dataSet)
56 | cent=array(centroids)
57 | n = shape(dataArr)[0]
58 | n1=shape(cent)[0]
59 | xcord1 = []; ycord1 = []
60 | xcord2 = []; ycord2 = []
61 | xcord3=[];ycord3=[]
62 | j=0
63 | for i in range(n):
64 | if j in id:
65 | xcord1.append(dataArr[i,0]); ycord1.append(dataArr[i,1])
66 | else:
67 | xcord2.append(dataArr[i,0]); ycord2.append(dataArr[i,1])
68 | j=j+1
69 | for k in range(n1):
70 | xcord3.append(cent[k,0]);ycord3.append(cent[k,1])
71 |
72 | fig = plt.figure()
73 | ax = fig.add_subplot(111)
74 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
75 | ax.scatter(xcord2, ycord2, s=30, c='green')
76 | ax.scatter(xcord3, ycord3, s=50, c='black')
77 |
78 | plt.xlabel('X1'); plt.ylabel('X2');
79 | plt.show()
80 |
81 |
82 | if __name__=='__main__':
83 | dataSet=loadDataSet('/Users/hakuri/Desktop/testSet.txt')
84 | # # print randCent(dataSet,2)
85 | # print dataSet
86 | #
87 | # print kMeans(dataSet,2)
88 | a=[]
89 | b=[]
90 | a, b,id=kMeans(dataSet,2)
91 | plotBestFit(dataSet,id,a)
92 |
93 |
94 |
95 |
--------------------------------------------------------------------------------
/K-means/effect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DURGESH716/MachineLearning/e80048d82b5ab64cd06c3223ce0c741b910f4305/K-means/effect.png
--------------------------------------------------------------------------------
/K-means/testSet.txt:
--------------------------------------------------------------------------------
1 | 10.235186 11.321997
2 | 10.122339 11.810993
3 | 9.190236 8.904943
4 | 9.306371 9.847394
5 | 8.330131 8.340352
6 | 10.152785 10.123532
7 | 10.408540 10.821986
8 | 9.003615 10.039206
9 | 9.534872 10.096991
10 | 9.498181 10.825446
11 | 9.875271 9.233426
12 | 10.362276 9.376892
13 | 10.191204 11.250851
14 | 7.720499 6.476300
15 | 9.334379 8.471268
16 | 7.963186 6.731333
17 | 8.244973 9.013785
18 | 9.569196 10.568949
19 | 8.854793 9.076536
20 | 9.382171 7.240862
21 | 8.179055 8.944502
22 | 8.267896 8.797017
23 | 9.047165 8.725068
24 | 8.741043 7.901385
25 | 7.190216 7.804587
26 | 8.081227 9.314431
27 | 8.047460 5.720780
28 | 7.917584 7.543254
29 | 8.676942 10.102220
30 | 9.210251 9.424717
31 | 7.732998 9.840202
32 | 7.681754 8.609897
33 | 7.925863 10.079159
34 | 8.261509 8.242080
35 | 8.514872 7.527561
36 | 10.324450 10.804481
37 | 7.856710 7.931543
38 | 7.858608 7.995340
39 | 9.196808 6.577598
40 | 9.644415 10.935081
41 | 9.579833 9.085021
42 | 7.888484 5.976428
43 | 9.072624 9.703344
44 | 8.914184 9.298515
45 | 7.822869 7.086663
46 | 10.538554 11.061464
47 | 8.280187 8.709012
48 | 8.884223 8.670105
49 | 9.359927 10.575055
50 | 9.078611 9.710833
51 | 7.935134 8.586173
52 | 8.805945 10.575145
53 | 9.584316 9.614076
54 | 11.269714 11.717254
55 | 9.120444 9.019774
56 | 7.977520 8.313923
57 | 8.104648 9.456128
58 | 8.617126 7.331723
59 | 9.033811 9.469706
60 | 8.327680 5.122092
61 | 8.532272 10.100909
62 | 9.295434 8.933824
63 | 9.905202 9.027559
64 | 10.585764 10.912733
65 | 10.427584 11.532578
66 | 9.072767 9.960144
67 | 9.164275 8.645121
68 | 9.746058 10.717080
69 | 9.286072 9.340024
70 | 8.188233 7.432415
71 | 7.948598 8.445419
72 | 7.563350 5.656178
73 | 8.972405 8.801869
74 | 9.980868 8.788996
75 | 7.753490 7.714248
76 | 7.431143 9.032819
77 | 8.943403 8.359354
78 | 10.481890 9.988969
79 | 9.150454 10.278760
80 | 8.123894 9.060351
81 | 8.626164 8.469342
82 | 7.354185 7.631252
83 | 11.323046 11.015032
84 | 8.190008 6.860792
85 | 8.412598 7.661358
86 | 9.258404 8.580382
87 | 11.007915 11.443881
88 | 8.279403 8.347003
89 | 8.931149 10.105221
90 | 10.239245 10.077473
91 | 8.129346 7.096877
92 | 8.485823 9.373561
93 | 10.703640 11.651618
94 | 9.500728 8.150228
95 | 9.712414 9.910445
96 | 9.333374 9.407557
97 | 8.787865 10.168021
98 | 9.238180 10.253478
99 | 9.577388 8.895150
100 | 10.447753 10.318227
101 | 9.303944 9.223136
102 | 9.883268 11.662945
103 | 9.471921 10.443792
104 | 10.007753 9.579912
105 | 8.110298 7.106263
106 | 6.964069 6.585040
107 | 10.413499 9.649309
108 | 8.032629 7.053254
109 | 8.015549 9.166753
110 | 10.462924 8.656612
111 | 9.530788 10.134130
112 | 9.202658 9.314222
113 | 10.103241 10.235159
114 | 7.849264 6.624856
115 | 9.059071 7.992555
116 | 10.172889 10.724789
117 | 9.528439 6.420990
118 | 7.190422 6.789792
119 | 9.085716 9.846328
120 | 9.452887 8.735386
121 | 7.417322 7.348594
122 | 8.468639 8.715086
123 | 8.303642 9.463231
124 | 9.939052 10.026771
125 | 8.701989 7.516978
126 | 9.737541 10.587281
127 | 8.280233 7.852444
128 | 10.648386 10.259203
129 | 9.173893 10.520372
130 | 9.135397 10.751406
131 | 7.594580 8.488833
132 | 8.587520 8.463406
133 | 8.581887 7.888644
134 | 9.448768 8.707422
135 | 7.882664 7.772030
136 | 10.050635 9.859720
137 | 9.012078 9.533899
138 | 8.770020 8.882996
139 | 9.428804 9.446306
140 | 8.504209 8.319693
141 | 9.800003 10.964667
142 | 8.069660 7.683099
143 | 10.012217 10.320644
144 | 8.704677 8.918146
145 | 8.198722 7.297786
146 | 9.868322 9.901657
147 | 9.426997 11.480353
148 | 9.228767 9.262976
149 | 8.952359 9.528471
150 | 8.186847 8.600587
151 | 9.026371 8.705143
152 | 9.483364 9.807079
153 | 7.826587 7.975401
154 | 11.197846 10.959298
155 | 7.632421 8.769745
156 | 8.761605 8.309365
157 | 9.353670 8.728758
158 | 6.466637 6.038996
159 | 8.370634 9.178830
160 | 10.337451 11.075600
161 | 8.917679 8.288367
162 | 9.076621 8.487626
163 | 7.278948 4.634097
164 | 10.153017 11.219183
165 | 7.132603 5.853118
166 | 9.338644 9.805940
167 | 9.878602 9.187000
168 | 10.009505 10.924505
169 | 9.384438 10.691860
170 | 7.535322 8.160481
171 | 6.808732 8.268469
172 | 8.302965 8.075009
173 | 8.345379 8.305356
174 | 9.517530 8.249839
175 | 9.267825 9.999109
176 | 10.291511 11.032664
177 | 8.605909 8.705207
178 | 8.331145 7.812295
179 | 8.632412 10.574287
180 | 8.766397 8.712107
181 | 9.407070 9.732756
182 | 9.709495 9.729569
183 | 10.422201 11.070360
184 | 6.831495 6.466763
185 | 8.187122 8.405929
186 | 8.523093 9.041844
187 | 7.952394 6.801220
188 | 10.490780 10.001468
189 | 10.813791 9.802494
190 | 7.861113 7.541475
191 | 8.800399 8.738974
192 | 7.542152 6.612838
193 | 9.446981 9.378659
194 | 8.281684 7.358572
195 | 8.473801 8.208343
196 | 11.736767 11.022029
197 | 8.379578 8.714348
198 | 8.313718 8.832381
199 | 9.342589 10.416659
200 | 7.560710 6.889648
201 | 9.295344 9.739040
202 | 9.176612 9.718781
203 | 8.614385 10.150521
204 | 9.079373 8.839794
205 | 10.333289 10.921255
206 | 9.453502 7.335134
207 | 10.174590 10.292500
208 | 9.693713 9.793636
209 | 7.474925 7.751391
210 | 10.107905 10.156997
211 | 9.257241 7.854266
212 | 10.209794 11.410157
213 | 7.248050 6.433676
214 | 10.150091 9.288597
215 | 10.077713 10.321500
216 | 8.191122 8.931519
217 | 8.791469 10.287216
218 | 9.229434 9.095193
219 | 8.682571 8.546005
220 | 7.524099 7.709751
221 | 8.442410 8.326037
222 | 9.364851 9.095989
223 | 9.061222 7.557899
224 | 7.989999 8.555363
225 | 8.801275 8.868732
226 | 10.351932 9.497796
227 | 10.230710 10.496151
228 | 9.783163 9.891408
229 | 10.651481 9.431617
230 | 8.387393 6.400507
231 | 9.003921 7.050003
232 | 8.483723 8.314886
233 | 9.020501 7.545771
234 | 9.329105 11.095661
235 | 9.583687 9.271929
236 | 8.908705 8.407529
237 | 8.835406 8.083517
238 | 9.736362 8.296735
239 | 10.030302 9.737178
240 | 8.287142 6.993460
241 | 9.173211 9.306335
242 | 9.026355 9.696531
243 | 9.128391 9.921247
244 | 11.486346 12.910777
245 | 11.519458 11.472111
246 | 9.027707 10.263974
247 | 9.351935 8.542200
248 | 9.421701 11.403201
249 | 9.005687 8.100969
250 | 7.015279 6.614278
251 | 8.213607 8.340948
252 | 8.226646 8.718997
253 | 8.144753 8.366877
254 | 10.133642 12.790169
255 | 10.763481 10.847016
256 | 10.003622 10.337716
257 | 9.007955 9.792482
258 | 8.670506 10.782931
259 | 10.386414 9.956162
260 | 10.104761 10.123044
261 | 8.079502 8.304075
262 | 9.945424 11.855409
263 | 8.642497 9.998066
264 | 9.349722 8.690328
265 | 9.034991 8.826490
266 | 8.738746 7.518464
267 | 8.919532 9.740312
268 | 9.464136 10.444588
269 | 10.710057 12.666857
270 | 10.042007 10.532091
271 | 8.447996 7.426363
272 | 9.509351 9.030516
273 | 11.946359 10.553075
274 | 9.981617 9.912651
275 | 9.853876 9.632967
276 | 10.560648 11.881714
277 | 8.370952 9.989491
278 | 8.323209 10.102529
279 | 9.828359 11.702462
280 | 8.515623 8.426754
281 | 9.004363 9.628036
282 | 10.529847 10.458031
283 | 10.028765 10.624880
284 | 9.448114 9.313227
285 | 8.332617 7.382295
286 | 8.323006 8.276608
287 | 7.740771 8.799750
288 | 8.379615 8.146192
289 | 8.340764 9.184458
290 | 9.863614 8.254694
291 | 9.969563 9.405134
292 | 9.164394 9.182127
293 | 10.622098 9.722592
294 | 9.592072 10.029446
295 | 8.212027 7.477366
296 | 9.080225 8.244448
297 | 8.555774 7.842325
298 | 9.958046 9.696221
299 | 8.972573 9.797128
300 | 9.213223 7.128437
301 | 8.737239 9.385138
302 | 10.333907 10.994856
303 | 8.797511 8.643075
304 | 11.044848 9.623160
305 | 8.539260 9.097113
306 | 11.582163 11.884333
307 | 7.863848 7.176199
308 | 6.218103 5.283562
309 | 9.120602 7.250190
310 | 9.001166 9.635203
311 | 8.081476 8.844224
312 | 9.369802 8.230911
313 | 8.768925 8.666987
314 | 9.841098 8.543896
315 | 10.451522 9.549511
316 | 9.755402 9.117522
317 | 7.988961 6.869854
318 | 8.872507 9.787118
319 | 10.363980 10.716608
320 | 6.315671 5.765953
321 | 9.638879 9.202355
322 | 8.588126 8.037966
323 | 8.947408 9.144386
324 | 9.051130 7.195132
325 | 9.321709 8.380668
326 | 10.146531 9.754745
327 | 9.843373 8.891437
328 | 9.213148 11.700632
329 | 7.630078 7.294753
330 | 8.093088 7.967590
331 | 7.488915 6.090652
332 | 8.126036 8.586472
333 | 8.760350 7.268987
334 | 10.201347 9.141013
335 | 7.838208 7.307700
336 | 6.155653 5.563997
337 | 7.767841 6.254528
338 | 8.425656 8.615832
339 | 10.362168 10.886815
340 | 10.180024 10.378934
341 | 9.794665 10.047812
342 | 9.970394 9.668279
343 | 7.030217 7.060471
344 | 9.275414 9.095738
345 | 10.314911 10.456539
346 | 9.259774 8.204851
347 | 10.023919 9.558307
348 | 8.887540 9.866704
349 | 9.851608 9.410989
350 | 8.710882 7.268012
351 | 9.017007 10.217673
352 | 7.976369 9.000979
353 | 8.738332 8.664734
354 | 8.344510 8.977600
355 | 8.959613 12.324240
356 | 9.169982 8.624635
357 | 7.487451 8.154859
358 | 8.706316 7.719455
359 | 9.564832 8.940403
360 | 8.327775 9.044509
361 | 9.734032 10.195255
362 | 8.021343 6.445092
363 | 9.081048 11.024397
364 | 7.626651 6.549263
365 | 10.725858 8.575374
366 | 8.731381 8.307788
367 | 10.394237 10.596874
368 | 7.029311 7.658832
369 | 9.517907 7.509904
370 | 10.394064 10.060898
371 | 10.752500 9.431601
372 | 9.692431 10.332130
373 | 9.651897 7.876862
374 | 8.592329 10.096837
375 | 10.212801 10.827496
376 | 9.045043 9.265524
377 | 8.901643 8.036115
378 | 10.794525 9.318830
379 | 11.040915 12.021746
380 | 8.390836 9.672469
381 | 9.840166 11.226568
382 | 10.806810 12.205633
383 | 8.924285 10.934056
384 | 8.411251 8.289672
385 | 7.808891 9.663290
386 | 9.733437 8.486958
387 | 8.300026 7.477374
388 | 8.221756 10.278308
389 | 9.096867 9.619677
390 | 9.410116 9.289188
391 | 10.097176 9.768470
392 | 9.387954 8.844855
393 | 9.376134 7.704630
394 | 8.231599 9.101203
395 | 9.910738 10.694855
396 | 8.645689 7.764589
397 | 8.090245 7.109596
398 | 9.253483 9.813672
399 | 9.331546 8.039386
400 | 9.843256 10.208792
401 | 9.713131 9.247665
402 | 9.259369 10.704622
403 | 10.243948 9.695883
404 | 6.396262 6.456390
405 | 8.936289 8.703871
406 | 8.750846 9.347273
407 | 6.497155 4.130251
408 | 9.516552 10.164848
409 | 9.125766 8.858775
410 | 8.374387 7.300114
411 | 8.132816 7.621107
412 | 10.099505 9.159134
413 | 9.356477 6.869999
414 | 8.112934 7.587547
415 | 7.265396 6.987031
416 | 11.950505 13.715109
417 | 10.745959 10.822171
418 | 8.893270 7.887332
419 | 6.003473 4.960219
420 | 7.498851 6.451334
421 | 10.162072 9.935954
422 | 8.732617 9.177679
423 | 9.300827 9.952360
424 | 11.908436 12.256801
425 | 9.371215 9.188645
426 | 9.943640 9.245037
427 | 7.386450 7.046819
428 | 8.410374 8.293218
429 | 7.830419 6.440253
430 | 8.263140 8.279446
431 | 11.448164 12.192363
432 | 8.216533 9.186628
433 | 9.316128 10.046697
434 | 8.156927 6.834792
435 | 9.951421 11.240598
436 | 9.059607 8.458446
437 | 10.476339 10.560461
438 | 7.548200 7.227127
439 | 9.432204 7.236705
440 | 9.402750 9.126413
441 | 11.188095 13.853426
442 | 9.520201 11.028131
443 | 8.884154 9.764071
444 | 8.961105 8.833117
445 | 8.549663 8.865765
446 | 10.111708 10.515462
447 | 9.024761 9.169368
448 | 7.904149 8.048756
449 | 9.240995 7.796142
450 | 8.126538 6.116125
451 | 7.442148 7.931335
452 | 9.486821 10.091359
453 | 9.834289 11.694720
454 | 9.009714 11.599170
455 | 9.761314 11.344083
456 | 6.993941 6.562988
457 | 8.659524 8.410107
458 | 7.685363 8.097297
459 | 7.793217 6.519109
460 | 8.883454 9.257347
461 | 8.781821 9.231980
462 | 7.946281 7.658978
463 | 8.523959 10.646480
464 | 9.031525 8.649648
465 | 8.317140 7.758978
466 | 9.192417 11.151218
467 | 8.408486 8.282182
468 | 10.327702 11.459048
469 | 8.389687 8.548727
470 | 8.642250 7.056870
471 | 8.833447 9.267638
472 | 8.805261 8.320281
473 | 9.726211 9.095997
474 | 8.477631 9.507530
475 | 9.738838 9.652110
476 | 8.272108 7.582696
477 | 9.258089 8.495931
478 | 8.334144 8.810766
479 | 8.150904 6.486032
480 | 7.259669 7.270156
481 | 11.034180 11.519954
482 | 10.705432 10.642527
483 | 8.388814 7.159137
484 | 8.559369 7.846284
485 | 7.187988 6.519313
486 | 8.811453 7.765900
487 | 8.492762 7.992941
488 | 8.739752 8.502909
489 | 10.150752 10.420295
490 | 7.062378 5.365289
491 | 8.448195 7.480000
492 | 10.224333 11.592750
493 | 9.533795 9.212845
494 | 9.519492 7.690501
495 | 9.661847 10.376189
496 | 7.963877 8.597193
497 | 10.184486 9.136709
498 | 8.505234 9.159210
499 | 8.187646 8.518690
500 | 9.167590 9.405917
501 | 8.612162 8.518755
502 | 10.970868 10.392229
503 | 9.603649 9.141095
504 | 9.704263 8.830178
505 | 9.657506 8.132449
506 | 9.337882 11.045306
507 | 9.521722 9.537764
508 | 8.954197 8.728179
509 | 8.635658 10.352662
510 | 8.910816 9.020317
511 | 9.900933 9.392002
512 | 10.247105 8.289649
513 | 9.571690 8.171237
514 | 7.388627 7.668071
515 | 8.354008 10.074590
516 | 9.775598 8.835696
517 | 8.768913 7.983604
518 | 8.330199 8.474098
519 | 8.169356 9.361172
520 | 10.346522 10.086434
521 | 7.976144 9.266702
522 | 8.429648 7.865824
523 | 11.261674 11.788587
524 | 10.051066 10.112425
525 | 8.954626 9.789343
526 | 8.382220 8.121012
527 | 9.820642 9.426441
528 | 8.125950 9.695087
529 | 8.646465 7.291808
530 | 8.190202 8.003737
531 | 8.773887 7.306175
532 | 8.731000 10.300436
533 | 9.163098 7.816769
534 | 9.456346 9.223922
535 | 9.645180 9.324053
536 | 8.835060 8.966915
537 | 9.325950 10.943248
538 | 9.941912 9.548535
539 | 9.282799 10.119488
540 | 9.567591 9.462164
541 | 8.529019 9.768001
542 | 9.314824 10.153727
543 | 8.264439 8.273860
544 | 8.307262 8.214036
545 | 9.122041 8.657861
546 | 8.404258 8.389365
547 | 7.828355 8.419433
548 | 9.803180 10.108286
549 | 8.662439 8.581953
550 | 8.883265 8.978377
551 | 8.012330 8.262451
552 | 9.420258 8.974878
553 | 7.015415 6.365940
554 | 9.888832 11.163036
555 | 9.677549 10.346431
556 | 8.410158 7.912899
557 | 9.464147 10.762900
558 | 7.067227 7.035717
559 | 9.320923 10.583089
560 | 9.056917 8.771241
561 | 8.110004 8.387789
562 | 10.310021 10.970014
563 | 8.211185 8.809627
564 | 8.942883 8.840746
565 | 9.479958 8.328700
566 | 8.973982 8.702291
567 | 8.519257 8.764855
568 | 9.424556 8.956911
569 | 7.222919 8.177787
570 | 8.257007 9.700619
571 | 9.778795 9.296134
572 | 8.028806 8.575974
573 | 9.886464 9.965076
574 | 9.090552 6.978930
575 | 9.605548 10.256751
576 | 9.959004 9.610229
577 | 8.308701 9.509124
578 | 7.748293 9.685933
579 | 8.311108 9.428114
580 | 9.697068 10.217956
581 | 9.582991 9.478773
582 | 9.167265 10.198412
583 | 10.329753 10.406602
584 | 8.908819 7.428789
585 | 10.072908 10.393294
586 | 7.992905 9.226629
587 | 8.907696 7.269366
588 | 8.421948 9.342968
589 | 7.481399 7.225033
590 | 10.358408 10.166130
591 | 8.786556 10.279943
592 | 9.658701 11.379367
593 | 10.167807 9.417552
594 | 8.653449 8.656681
595 | 8.020304 8.671270
596 | 8.364348 10.004068
597 | 9.119183 9.788199
598 | 8.405504 9.740580
599 | 11.020930 11.904350
600 | 9.755232 9.515713
601 | 10.059542 9.589748
602 | 8.727131 9.777998
603 | 7.666182 6.028642
604 | 8.870733 8.367501
605 | 9.340446 7.707269
606 | 9.919283 10.796813
607 | 7.905837 8.326034
608 | 10.181187 10.089865
609 | 8.797328 8.981988
610 | 8.466272 7.765032
611 | 10.335914 12.620539
612 | 9.365003 8.609115
613 | 8.011017 7.249489
614 | 10.923993 13.901513
615 | 7.074631 7.558720
616 | 9.824598 8.851297
617 | 8.861026 8.370857
618 | 10.127296 10.861535
619 | 10.548377 10.855695
620 | 8.880470 7.948761
621 | 8.901619 9.674705
622 | 7.813710 9.246912
623 | 10.128808 10.560668
624 | 11.096699 10.911644
625 | 8.551471 6.871514
626 | 8.907241 8.677815
627 | 10.571647 10.294838
628 | 8.815314 8.810725
629 | 8.453396 8.339296
630 | 9.594819 11.487580
631 | 10.714211 9.628908
632 | 7.428788 7.712869
633 | 10.892119 12.747752
634 | 9.024071 11.112692
635 | 7.803375 7.847038
636 | 8.521558 8.881848
637 | 9.742818 11.520203
638 | 9.832836 9.180396
639 | 8.703132 10.028498
640 | 9.905029 11.347606
641 | 10.037536 8.882688
642 | 8.629995 8.392863
643 | 9.583497 9.219663
644 | 8.781687 9.650598
645 | 9.344119 9.537024
646 | 10.407510 9.223929
647 | 7.244488 6.559021
648 | 10.643616 10.288383
649 | 8.757557 6.947901
650 | 10.784590 11.233350
651 | 10.028427 11.330033
652 | 7.968361 6.830308
653 | 8.925954 8.539113
654 | 7.738692 7.114987
655 | 8.192398 8.352016
656 | 10.412017 12.431122
657 | 8.208801 5.777678
658 | 7.820077 7.790720
659 | 9.542754 11.542541
660 | 6.817938 7.429229
661 | 7.365218 7.956797
662 | 9.274391 7.932700
663 | 9.546475 8.803412
664 | 7.471734 6.797870
665 | 8.016969 7.848070
666 | 8.852701 8.458114
667 | 8.215012 8.468330
668 | 6.975507 6.846980
669 | 9.435134 10.609700
670 | 9.228075 9.342622
671 | 8.388410 7.637856
672 | 7.111456 9.289163
673 | 9.403508 8.482654
674 | 9.133894 8.343575
675 | 10.670801 9.750821
676 | 9.983542 10.074537
677 | 10.012865 8.537017
678 | 8.929895 8.951909
679 | 7.666951 7.473615
680 | 9.493839 7.821783
681 | 8.894081 7.059413
682 | 9.593382 9.859732
683 | 9.126847 8.395700
684 | 9.532945 9.850696
685 | 9.459384 9.384213
686 | 8.982743 8.217062
687 | 10.107798 8.790772
688 | 10.563574 9.044890
689 | 8.278963 9.518790
690 | 8.734960 10.494129
691 | 9.597940 9.530895
692 | 10.025478 9.508270
693 | 10.335922 10.974063
694 | 8.404390 8.146748
695 | 7.108699 6.038469
696 | 8.873951 7.474227
697 | 8.731459 8.154455
698 | 8.795146 7.534687
699 | 6.407165 6.810352
700 | 9.979312 10.287430
701 | 8.786715 8.396736
702 | 10.753339 10.360567
703 | 10.508031 10.321976
704 | 10.636925 10.193797
705 | 10.614322 11.215420
706 | 8.916411 8.965286
707 | 8.112756 8.304769
708 | 10.833109 10.497542
709 | 8.319758 9.727691
710 | 9.945336 11.820097
711 | 10.150461 9.914715
712 | 10.185024 10.388722
713 | 9.793569 9.079955
714 | 10.590128 11.811596
715 | 8.505584 6.884282
716 | 10.461428 10.745439
717 | 8.755781 9.418427
718 | 7.488249 7.172072
719 | 10.238905 10.428659
720 | 9.887827 10.427821
721 | 8.529971 8.838217
722 | 8.375208 10.242837
723 | 8.901724 8.398304
724 | 8.607694 9.173198
725 | 8.691369 9.964261
726 | 9.584578 9.641546
727 | 10.265792 11.405078
728 | 7.592968 6.683355
729 | 8.692791 9.389031
730 | 7.589852 6.005793
731 | 10.550386 11.736584
732 | 8.578351 7.227055
733 | 7.526931 6.875134
734 | 8.577081 9.877115
735 | 9.272136 11.050928
736 | 10.300809 10.653059
737 | 8.642013 9.006681
738 | 9.720491 10.265202
739 | 9.029005 9.646928
740 | 8.736201 7.975603
741 | 8.672886 9.070759
742 | 8.370633 8.412170
743 | 9.483776 9.183341
744 | 6.790842 7.594992
745 | 9.842146 10.156810
746 | 9.563336 7.962532
747 | 8.724669 9.870732
748 | 9.012145 9.171326
749 | 9.116948 9.791167
750 | 6.219094 7.988420
751 | 9.468422 8.359975
752 | 8.825231 8.475208
753 | 9.572224 9.696428
754 | 9.609128 8.488175
755 | 9.428590 10.468998
756 | 8.293266 8.617701
757 | 9.423584 10.355688
758 | 9.240796 9.517228
759 | 10.915423 13.026252
760 | 10.854684 11.130866
761 | 9.226816 9.391796
762 | 9.580264 10.359235
763 | 7.289907 6.898208
764 | 9.338857 10.374025
765 | 9.523176 11.332190
766 | 10.162233 10.357396
767 | 8.873930 9.207398
768 | 8.607259 7.794804
769 | 8.852325 8.215797
770 | 8.077272 6.501042
771 | 8.169273 8.269613
772 | 6.806421 7.544423
773 | 8.793151 9.691549
774 | 11.640981 11.365702
775 | 9.544082 11.576545
776 | 9.009266 9.605596
777 | 9.726552 9.426719
778 | 9.495888 10.626624
779 | 8.683982 9.337864
780 | 8.322105 8.631099
781 | 8.887895 8.644931
782 | 8.662659 11.373025
783 | 9.263321 7.536016
784 | 7.802624 7.171625
785 | 8.773183 8.561565
786 | 8.730443 10.197596
787 | 8.942915 7.758383
788 | 8.057618 8.774996
789 | 8.112081 8.202349
790 | 10.378884 12.103755
791 | 9.248876 8.637249
792 | 9.739599 9.708576
793 | 8.126345 8.278487
794 | 8.894788 7.966117
795 | 9.683165 9.019221
796 | 10.886957 12.053843
797 | 9.668852 10.902132
798 | 7.486692 6.471138
799 | 8.794850 9.173609
800 | 8.835915 8.296727
801 | 9.443984 11.375344
802 | 8.696621 6.434580
803 | 9.645560 9.233722
804 | 9.623857 7.915590
805 | 10.840632 12.620268
806 | 7.298135 7.356141
807 | 9.639644 8.902389
808 | 9.849802 7.682624
809 | 10.609964 10.259615
810 | 9.768229 11.382811
811 | 7.646351 7.571849
812 | 10.230300 9.470859
813 | 8.224402 8.496866
814 | 6.879671 8.393648
815 | 7.976247 8.667221
816 | 9.183268 8.694550
817 | 11.471853 12.786280
818 | 10.428349 10.615726
819 | 8.090828 5.902504
820 | 9.738627 8.485792
821 | 8.139709 8.396333
822 | 9.508055 8.990529
823 | 8.857260 8.497732
824 | 8.902558 7.014433
825 | 9.660607 11.040833
826 | 8.772221 10.512150
827 | 11.020038 9.354134
828 | 7.918527 7.742062
829 | 7.630835 7.756260
830 | 11.043272 11.041613
831 | 9.299376 8.674157
832 | 9.795087 8.431837
833 | 9.415683 8.312101
834 | 7.942037 6.942913
835 | 9.724790 11.766496
836 | 10.222032 11.550876
837 | 8.894163 8.306020
838 | 8.394309 8.070420
839 | 9.012776 6.880548
840 | 9.661093 10.138921
841 | 9.896472 9.762372
842 | 9.135628 8.759928
843 | 8.762656 10.306028
844 | 8.602473 8.861956
845 | 10.085297 10.464774
846 | 10.644983 10.945767
847 | 9.034571 8.391668
848 | 8.602920 8.501944
849 | 8.224766 7.402758
850 | 8.755050 9.431085
851 | 9.669937 8.641049
852 | 10.693530 10.287124
853 | 9.462806 7.611153
854 | 9.287707 10.082363
855 | 10.941260 10.783728
856 | 9.263080 7.913328
857 | 10.167111 10.225338
858 | 8.783830 9.465345
859 | 8.958624 8.662136
860 | 9.841649 9.926781
861 | 7.205691 6.790638
862 | 8.629089 9.135461
863 | 7.469440 8.450442
864 | 8.179133 7.790434
865 | 8.083984 7.875520
866 | 9.271300 8.135359
867 | 8.652349 8.254397
868 | 7.983920 6.609684
869 | 7.836860 9.785238
870 | 7.418535 7.011256
871 | 8.458288 10.095364
872 | 9.387605 9.726911
873 | 8.663951 8.206705
874 | 10.146507 11.698577
875 | 8.937103 10.990924
876 | 11.218687 11.141945
877 | 8.363142 9.106936
878 | 7.877643 7.122922
879 | 9.620978 9.905689
880 | 9.509649 10.773209
881 | 6.748743 6.705385
882 | 9.300919 8.085029
883 | 9.332257 9.818791
884 | 7.898610 8.366643
885 | 9.841914 9.480675
886 | 6.920484 8.959501
887 | 8.544713 9.563136
888 | 8.162266 6.715277
889 | 8.659552 9.282008
890 | 10.673398 13.174824
891 | 9.024000 10.379238
892 | 8.183292 6.647572
893 | 10.544919 10.649602
894 | 7.201266 6.529605
895 | 9.557407 11.096821
896 | 8.304605 6.940929
897 | 9.742855 9.920897
898 | 10.024587 9.645222
899 | 10.002296 9.998940
900 | 8.965876 8.665419
901 | 7.823136 6.949572
902 | 8.125088 7.654065
903 | 6.569589 6.046863
904 | 10.195497 8.689129
905 | 11.730011 10.374221
906 | 8.739105 7.457571
907 | 9.820059 10.278526
908 | 9.547456 10.398198
909 | 8.375072 8.416302
910 | 8.889533 8.308929
911 | 8.861201 9.290408
912 | 12.677687 12.788463
913 | 9.100735 8.620537
914 | 7.728350 6.328219
915 | 7.955373 8.355028
916 | 8.733352 8.645414
917 | 10.257527 11.191813
918 | 9.246413 9.497014
919 | 9.745302 9.642035
920 | 7.785652 8.147621
921 | 7.431673 8.566399
922 | 8.654384 8.466701
923 | 8.475392 6.744677
924 | 9.968440 10.765192
925 | 10.163616 10.806963
926 | 10.238135 10.036636
927 | 9.902889 10.746730
928 | 9.523850 8.749708
929 | 9.214363 9.149178
930 | 9.266040 10.841502
931 | 8.494292 7.770942
932 | 10.821158 10.410192
933 | 8.645888 7.970308
934 | 9.885204 10.098080
935 | 9.084990 10.886349
936 | 9.277874 8.871449
937 | 8.135131 7.137064
938 | 7.917379 9.080522
939 | 9.685586 8.822850
940 | 8.558141 7.848112
941 | 9.502917 10.061255
942 | 6.409004 5.164774
943 | 10.149235 10.579951
944 | 7.847304 8.411351
945 | 8.846930 6.819939
946 | 8.675153 9.411147
947 | 9.476276 9.061508
948 | 11.099184 10.644263
949 | 8.792411 10.379405
950 | 8.400418 7.072706
951 | 8.555713 7.923805
952 | 8.024763 8.426993
953 | 8.642696 10.453412
954 | 7.906117 7.920408
955 | 8.793393 9.722878
956 | 8.280364 7.669854
957 | 9.387766 9.706245
958 | 9.626853 10.762499
959 | 10.163631 10.919007
960 | 9.375543 11.513524
961 | 9.309440 8.575699
962 | 10.055329 10.297255
963 | 8.706241 9.097172
964 | 10.032934 11.951897
965 | 10.812974 11.311435
966 | 10.352603 10.819865
967 | 8.276870 9.055403
968 | 8.397389 7.944434
969 | 9.371741 10.395790
970 | 10.825710 10.144099
971 | 9.158483 11.385382
972 | 10.658639 11.389856
973 | 8.091762 6.631039
974 | 10.734892 10.054598
975 | 11.535880 11.604912
976 | 9.799077 11.371677
977 | 8.478725 9.078455
978 | 9.399902 8.947744
979 | 7.305377 8.144973
980 | 7.613377 6.668798
981 | 10.681308 10.830845
982 | 9.973855 10.004133
983 | 9.369918 7.855433
984 | 8.838223 7.429033
985 | 9.521831 10.623930
986 | 9.724419 10.447452
987 | 8.890224 9.275923
988 | 9.932763 11.589953
989 | 10.839337 9.051250
990 | 8.497708 7.521701
991 | 8.440236 8.705670
992 | 9.063566 9.755744
993 | 8.449647 8.929485
994 | 8.554576 8.063231
995 | 10.348606 10.550718
996 | 5.985254 5.186844
997 | 9.931937 10.175582
998 | 9.854922 9.201393
999 | 9.114580 9.134215
1000 | 10.334899 8.543604
1001 |
--------------------------------------------------------------------------------
/KNN/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | KNN
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/KNN/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | /KNN
7 |
8 | python 2.7
9 | Default
10 |
11 |
--------------------------------------------------------------------------------
/KNN/src/knn.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import division
3 | from numpy import *
4 | import operator
5 |
6 |
7 |
8 | def createDataset():
9 | group=array([[9,400],[200,5],[100,77],[40,300]])
10 |
11 | labels=['1','2','3','1']
12 | return group,labels
13 |
14 | def classify(inX, dataSet, labels, k):
15 | dataSetSize = dataSet.shape[0]
16 | diffMat = tile(inX, (dataSetSize,1)) - dataSet
17 | sqDiffMat = diffMat**2
18 | sqDistances = sqDiffMat.sum(axis=1)
19 | distances = sqDistances**0.5
20 | sortedDistIndicies = distances.argsort()
21 |
22 | classCount={}
23 | for i in range(k):
24 | voteIlabel = labels[sortedDistIndicies[i]]
25 |
26 | classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
27 | sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
28 | return sortedClassCount[0][0]
29 |
30 | def autoNorm(dataSet):
31 | minVals = dataSet.min(0)
32 | maxVals = dataSet.max(0)
33 | ranges = maxVals - minVals
34 | normDataSet = zeros(shape(dataSet))
35 |
36 | m = dataSet.shape[0]
37 | normDataSet = dataSet - tile(minVals, (m,1))
38 | #print normDataSet
39 | normDataSet = normDataSet/tile(ranges, (m,1)) #element wise divide
40 | # print normDataSet
41 | return normDataSet, ranges, minVals
42 |
43 | def result():
44 | datingDataMat,datingLabels =createDataset() #load data setfrom file
45 | normMat, ranges, minVals = autoNorm(datingDataMat)
46 | book=199
47 | film=4
48 | print 'watch book '+'199'+ ' film'+' 4'
49 | print 'you are the person belong to'
50 | print classify((199,4), normMat, datingLabels, 1)
51 |
52 |
53 |
54 | if __name__=='__main__':
55 | result()
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/LogisticRegression/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | LogisticRegression
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/LogisticRegression/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | /LogisticRegression
7 |
8 | python 2.7
9 | Default
10 |
11 |
--------------------------------------------------------------------------------
/LogisticRegression/src/LR.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from numpy import *
3 |
4 |
5 | def loadDataSet():
6 | dataMat = []; labelMat = []
7 | fr = open('/Users/hakuri/Desktop/testSet.txt')
8 | for line in fr.readlines():
9 | lineArr = line.strip().split()
10 | dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
11 | labelMat.append(int(lineArr[2]))
12 | return dataMat,labelMat
13 |
14 | def sigmoid(inX):
15 | return 1.0/(1+exp(-inX))
16 |
17 | def gradAscent(dataMatIn, classLabels):
18 | dataMatrix = mat(dataMatIn) #convert to NumPy matrix
19 | labelMat = mat(classLabels).transpose() #convert to NumPy matrix
20 |
21 | m,n = shape(dataMatrix)
22 | alpha = 0.001
23 | maxCycles = 500
24 | weights = ones((n,1))
25 |
26 | for k in range(maxCycles): #heavy on matrix operations
27 | h = sigmoid(dataMatrix*weights) #matrix mult
28 | error = (labelMat - h) #vector subtraction
29 | weights = weights + alpha * dataMatrix.transpose()* error #matrix mult
30 | return weights
31 |
32 | def GetResult():
33 | dataMat,labelMat=loadDataSet()
34 | weights=gradAscent(dataMat,labelMat)
35 | print weights
36 | plotBestFit(weights)
37 |
38 |
39 | def plotBestFit(weights):
40 |
41 | dataMat,labelMat=loadDataSet()
42 | dataArr = array(dataMat)
43 | n = shape(dataArr)[0]
44 | xcord1 = []; ycord1 = []
45 | xcord2 = []; ycord2 = []
46 | for i in range(n):
47 | if int(labelMat[i])== 1:
48 | xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])
49 | else:
50 | xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])
51 | fig = plt.figure()
52 | ax = fig.add_subplot(111)
53 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
54 | ax.scatter(xcord2, ycord2, s=30, c='green')
55 | x = arange(-3.0, 3.0, 0.1)
56 | y=(0.48*x+4.12414)/(0.616)
57 | # y = (-weights[0]-weights[1]*x)/weights[2]
58 | ax.plot(x,y)
59 | plt.xlabel('X1'); plt.ylabel('X2');
60 | plt.show()
61 |
62 | if __name__=='__main__':
63 | GetResult()
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | MachineLearning
2 | ===============
3 |
4 | Basic MachineLearning algorithm
5 |
--------------------------------------------------------------------------------
/SMO/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | SMO
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/SMO/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Default
6 | python 2.7
7 |
8 | /SMO
9 |
10 |
11 |
--------------------------------------------------------------------------------
/SMO/src/SMO.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from numpy import *
3 | from time import sleep
4 |
5 | def loadDataSet(fileName):
6 | dataMat = []; labelMat = []
7 | fr = open(fileName)
8 | for line in fr.readlines():
9 | lineArr = line.strip().split('\t')
10 | dataMat.append([float(lineArr[0]), float(lineArr[1])])
11 | labelMat.append(float(lineArr[2]))
12 | return dataMat,labelMat
13 |
14 | def selectJrand(i,m):
15 | j=i #we want to select any J not equal to i
16 | while (j==i):
17 | j = int(random.uniform(0,m))
18 | return j
19 |
20 | def clipAlpha(aj,H,L):
21 | if aj > H:
22 | aj = H
23 | if L > aj:
24 | aj = L
25 | return aj
26 |
27 | def smoSimple(dataMatIn, classLabels, C, toler, maxIter):
28 | dataMatrix = mat(dataMatIn); labelMat = mat(classLabels).transpose()
29 | b = 0; m,n = shape(dataMatrix)
30 | alphas = mat(zeros((m,1)))
31 | iter = 0
32 | while (iter < maxIter):
33 | alphaPairsChanged = 0
34 | for i in range(m):
35 | fXi = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[i,:].T)) + b
36 | Ei = fXi - float(labelMat[i])#if checks if an example violates KKT conditions
37 | if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] > 0)):
38 | j = selectJrand(i,m)
39 | fXj = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T)) + b
40 | Ej = fXj - float(labelMat[j])
41 | alphaIold = alphas[i].copy(); alphaJold = alphas[j].copy();
42 | if (labelMat[i] != labelMat[j]):
43 | L = max(0, alphas[j] - alphas[i])
44 | H = min(C, C + alphas[j] - alphas[i])
45 | else:
46 | L = max(0, alphas[j] + alphas[i] - C)
47 | H = min(C, alphas[j] + alphas[i])
48 | # if L==H: print "L==H"; continue
49 | eta = 2.0 * dataMatrix[i,:]*dataMatrix[j,:].T - dataMatrix[i,:]*dataMatrix[i,:].T - dataMatrix[j,:]*dataMatrix[j,:].T
50 | if eta >= 0: print "eta>=0"; continue
51 | alphas[j] -= labelMat[j]*(Ei - Ej)/eta
52 | alphas[j] = clipAlpha(alphas[j],H,L)
53 | # if (abs(alphas[j] - alphaJold) < 0.00001): print "j not moving enough"; continue
54 | alphas[i] += labelMat[j]*labelMat[i]*(alphaJold - alphas[j])#update i by the same amount as j
55 | #the update is in the oppostie direction
56 | b1 = b - Ei- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T
57 | b2 = b - Ej- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T
58 | if (0 < alphas[i]) and (C > alphas[i]): b = b1
59 | elif (0 < alphas[j]) and (C > alphas[j]): b = b2
60 | else: b = (b1 + b2)/2.0
61 | alphaPairsChanged += 1
62 | # print "iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged)
63 | if (alphaPairsChanged == 0): iter += 1
64 | else: iter = 0
65 | # print "iteration number: %d" % iter
66 |
67 | return b,alphas
68 |
69 | def matplot(dataMat,lableMat):
70 | xcord1 = []; ycord1 = []
71 | xcord2 = []; ycord2 = []
72 | xcord3 = []; ycord3 = []
73 | for i in range(100):
74 | if lableMat[i]==1:
75 | xcord1.append(dataMat[i][0])
76 | ycord1.append(dataMat[i][1])
77 | else:
78 | xcord2.append(dataMat[i][0])
79 | ycord2.append(dataMat[i][1])
80 | b,alphas=smoSimple(dataMat,labelMat,0.6,0.001,40)
81 | for j in range(100):
82 | if alphas[j]>0:
83 | xcord3.append(dataMat[j][0])
84 | ycord3.append(dataMat[j][1])
85 | fig = plt.figure()
86 | ax = fig.add_subplot(111)
87 |
88 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
89 | ax.scatter(xcord2, ycord2, s=30, c='green')
90 | ax.scatter(xcord3, ycord3, s=80, c='blue')
91 | ax.plot()
92 | plt.xlabel('X1'); plt.ylabel('X2');
93 | plt.show()
94 |
95 | if __name__=='__main__':
96 | dataMat,labelMat=loadDataSet('/Users/hakuri/Desktop/testSet.txt')
97 | # b,alphas=smoSimple(dataMat,labelMat,0.6,0.001,40)
98 | # print b,alphas[alphas>0]
99 | matplot(dataMat,labelMat)
--------------------------------------------------------------------------------
/ShannonEnt/ShannonEnt/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | ShannonEnt
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/ShannonEnt/ShannonEnt/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Default
6 | python 2.7
7 |
8 | /ShannonEnt
9 |
10 |
11 |
--------------------------------------------------------------------------------
/ShannonEnt/ShannonEnt/src/shannon.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | def calcShannonEnt(dataSet):
4 | numEntries=len(dataSet)
5 |
6 | labelCounts={}
7 |
8 | for featVec in dataSet:
9 | currentLabel=featVec[-1]
10 |
11 | if currentLabel not in labelCounts.keys():
12 | labelCounts[currentLabel]=0
13 | labelCounts[currentLabel]+=1
14 | shannonEnt=0.0
15 |
16 | for key in labelCounts:
17 |
18 | prob =float(labelCounts[key])/numEntries
19 | shannonEnt-=prob*math.log(prob,2)
20 |
21 | return shannonEnt
22 |
23 |
24 | def createDataSet():
25 |
26 | dataSet=[[1,0,'man'],[1,1,'man'],[0,1,'man'],[0,0,'women']]
27 | labels=['throat','mustache']
28 | return dataSet,labels
29 |
30 | def splitDataSet(dataSet, axis, value):
31 | retDataSet = []
32 | for featVec in dataSet:
33 | if featVec[axis] == value:
34 | reducedFeatVec = featVec[:axis] #chop out axis used for splitting
35 | reducedFeatVec.extend(featVec[axis+1:])
36 | retDataSet.append(reducedFeatVec)
37 | return retDataSet
38 |
39 | def chooseBestFeatureToSplit(dataSet):
40 | numFeatures = len(dataSet[0]) - 1 #the last column is used for the labels
41 | baseEntropy = calcShannonEnt(dataSet)
42 | bestInfoGain = 0.0; bestFeature = -1
43 | for i in range(numFeatures): #iterate over all the features
44 | featList = [example[i] for example in dataSet]#create a list of all the examples of this feature
45 |
46 | uniqueVals = set(featList) #get a set of unique values
47 |
48 | newEntropy = 0.0
49 | for value in uniqueVals:
50 | subDataSet = splitDataSet(dataSet, i, value)
51 | prob = len(subDataSet)/float(len(dataSet))
52 | newEntropy += prob * calcShannonEnt(subDataSet)
53 | infoGain = baseEntropy - newEntropy #calculate the info gain; ie reduction in entropy
54 |
55 | if (infoGain > bestInfoGain): #compare this to the best gain so far
56 | bestInfoGain = infoGain #if better than current best, set to best
57 | bestFeature = i
58 | return bestFeature #returns an integer
59 |
60 |
61 |
62 | def getResult():
63 | dataSet,labels=createDataSet()
64 | # splitDataSet(dataSet,1,1)
65 | chooseBestFeatureToSplit(dataSet)
66 | print chooseBestFeatureToSplit(dataSet)
67 | #print calcShannonEnt(dataSet)
68 |
69 |
70 | if __name__=='__main__':
71 |
72 |
73 | getResult()
74 |
75 |
--------------------------------------------------------------------------------