├── AutoNormal └── AutoNorm.py ├── Bayes ├── .project ├── .pydevproject └── src │ └── Bayes.py ├── DecisionStump ├── .project ├── .pydevproject └── src │ └── Adaboosting.py ├── DecisionTree ├── .project ├── .pydevproject └── src │ └── dt.py ├── HashMap ├── .classpath ├── .project ├── .settings │ └── org.eclipse.jdt.core.prefs ├── bin │ └── Main.class ├── final.txt └── src │ └── Main.java ├── K-means ├── K-means │ ├── .project │ ├── .pydevproject │ └── src │ │ └── Test.py ├── effect.png └── testSet.txt ├── KNN ├── .project ├── .pydevproject └── src │ └── knn.py ├── LICENSE ├── LogisticRegression ├── .project ├── .pydevproject └── src │ └── LR.py ├── README.md ├── SMO ├── .project ├── .pydevproject └── src │ └── SMO.py └── ShannonEnt └── ShannonEnt ├── .project ├── .pydevproject └── src └── shannon.py /AutoNormal/AutoNorm.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import division 3 | def GetAverage(mat): 4 | 5 | n=len(mat) 6 | m= width(mat) 7 | num = [0]*m 8 | for j in range(0,m): 9 | for i in mat: 10 | num[j]=num[j]+i[j] 11 | num[j]=num[j]/n 12 | return num 13 | 14 | def width(lst): 15 | i=0 16 | for j in lst[0]: 17 | i=i+1 18 | return i 19 | 20 | def GetVar(average,mat): 21 | ListMat=[] 22 | for i in mat: 23 | ListMat.append(list(map(lambda x: x[0]-x[1], zip(average, i)))) 24 | 25 | n=len(ListMat) 26 | m= width(ListMat) 27 | num = [0]*m 28 | for j in range(0,m): 29 | for i in ListMat: 30 | num[j]=num[j]+(i[j]*i[j]) 31 | num[j]=num[j]/n 32 | return num 33 | 34 | def DenoisMat(mat): 35 | average=GetAverage(mat) 36 | variance=GetVar(average,mat) 37 | section=list(map(lambda x: x[0]+x[1], zip(average, variance))) 38 | 39 | n=len(mat) 40 | m= width(mat) 41 | num = [0]*m 42 | denoisMat=[] 43 | for i in mat: 44 | for j in range(0,m): 45 | if i[j]>section[j]: 46 | i[j]=section[j] 47 | denoisMat.append(i) 48 | return denoisMat 49 | 50 | def AutoNorm(mat): 51 | n=len(mat) 52 | m= width(mat) 53 | MinNum=[9999999999]*m 54 | MaxNum = [0]*m 55 | for i in mat: 56 | for j in range(0,m): 57 | if i[j]>MaxNum[j]: 58 | MaxNum[j]=i[j] 59 | 60 | for p in mat: 61 | for q in range(0,m): 62 | if p[q]<=MinNum[q]: 63 | MinNum[q]=p[q] 64 | 65 | section=list(map(lambda x: x[0]-x[1], zip(MaxNum, MinNum))) 66 | print section 67 | NormMat=[] 68 | 69 | for k in mat: 70 | 71 | distance=list(map(lambda x: x[0]-x[1], zip(k, MinNum))) 72 | value=list(map(lambda x: x[0]/x[1], zip(distance,section))) 73 | NormMat.append(value) 74 | return NormMat 75 | 76 | if __name__=='__main__': 77 | mat=[[1,42,512],[4,5,6],[7,8,9],[2,2,2],[2,10,5]] 78 | a=GetAverage(mat) 79 | b=GetVar(a,mat) 80 | print a, 81 | print DenoisMat(mat) 82 | 83 | # print list(map(lambda x: x[0]-x[1], zip(v2, v1))) 84 | print AutoNorm(mat) -------------------------------------------------------------------------------- /Bayes/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | Bayes 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /Bayes/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Default 6 | python 2.7 7 | 8 | /Bayes 9 | 10 | 11 | -------------------------------------------------------------------------------- /Bayes/src/Bayes.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | 3 | def loadDataSet(): 4 | postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'], 5 | ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'], 6 | ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'], 7 | ['stop', 'posting', 'stupid', 'worthless', 'garbage'], 8 | ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'], 9 | ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']] 10 | classVec = [0,1,0,1,0,1] #1 is abusive, 0 not 11 | return postingList,classVec 12 | 13 | def createVocabList(dataSet): 14 | vocabSet = set([]) #create empty set 15 | for document in dataSet: 16 | vocabSet = vocabSet | set(document) #union of the two sets 17 | return list(vocabSet) 18 | 19 | def setOfWords2Vec(vocabList, inputSet): 20 | returnVec = [0]*len(vocabList) 21 | for word in inputSet: 22 | if word in vocabList: 23 | returnVec[vocabList.index(word)] = 1 24 | else: print "the word: %s is not in my Vocabulary!" % word 25 | return returnVec 26 | 27 | 28 | def trainNB0(trainMatrix,trainCategory): 29 | numTrainDocs = len(trainMatrix) 30 | 31 | numWords = len(trainMatrix[0]) 32 | 33 | pAbusive = sum(trainCategory)/float(numTrainDocs)#sum=1+1+1 34 | p0Num = zeros(numWords); p1Num = zeros(numWords) #change to ones() 35 | 36 | p0Denom = 0.0; p1Denom = 0.0 #change to 2.0 37 | for i in range(numTrainDocs): 38 | if trainCategory[i] == 1: 39 | p1Num += trainMatrix[i] 40 | p1Denom += sum(trainMatrix[i]) 41 | else: 42 | p0Num += trainMatrix[i] 43 | p0Denom += sum(trainMatrix[i]) 44 | 45 | p1Vect = p1Num/p1Denom #change to log() 46 | p0Vect = p0Num/p0Denom #change to log() 47 | return p0Vect,p1Vect,pAbusive 48 | 49 | def getResult(listPost,listClasses): 50 | trainMat=[] 51 | list=createVocabList(listPost) 52 | for postinDoc in listPost: 53 | trainMat.append(setOfWords2Vec(list,postinDoc)) 54 | 55 | p0v,p1v,pAb=trainNB0(trainMat,listClasses) 56 | # print p0v.index(max(p0v)) 57 | 58 | print p1v 59 | print max(p1v) 60 | print list 61 | if __name__=='__main__': 62 | postingList,classVec=loadDataSet() 63 | 64 | 65 | getResult(postingList,classVec) 66 | # print setOfWords2Vec(createVocabList(postingList),postingList[0]) 67 | -------------------------------------------------------------------------------- /DecisionStump/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | DecisionStump 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /DecisionStump/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Default 6 | python 2.7 7 | 8 | /DecisionStump 9 | 10 | 11 | -------------------------------------------------------------------------------- /DecisionStump/src/Adaboosting.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | 3 | def loadSimpData(): 4 | datMat = matrix([[ 1. , 2.1], 5 | [ 2. , 1.1], 6 | [ 1.3, 1. ], 7 | [ 1. , 1. ], 8 | [ 2. , 1. ]]) 9 | classLabels = [1.0, 1.0, -1.0, -1.0, 1.0] 10 | return datMat,classLabels 11 | 12 | def loadDataSet(fileName): #general function to parse tab -delimited floats 13 | numFeat = len(open(fileName).readline().split('\t')) #get number of fields 14 | dataMat = []; labelMat = [] 15 | fr = open(fileName) 16 | for line in fr.readlines(): 17 | lineArr =[] 18 | curLine = line.strip().split('\t') 19 | for i in range(numFeat-1): 20 | lineArr.append(float(curLine[i])) 21 | dataMat.append(lineArr) 22 | labelMat.append(float(curLine[-1])) 23 | return dataMat,labelMat 24 | 25 | def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):#just classify the data 26 | retArray = ones((shape(dataMatrix)[0],1)) 27 | if threshIneq == 'lt': 28 | retArray[dataMatrix[:,dimen] <= threshVal] = -1.0 29 | else: 30 | retArray[dataMatrix[:,dimen] > threshVal] = -1.0 31 | 32 | return retArray 33 | 34 | 35 | def buildStump(dataArr,classLabels,D): 36 | dataMatrix = mat(dataArr); labelMat = mat(classLabels).T 37 | m,n = shape(dataMatrix) 38 | numSteps = 10.0; bestStump = {}; bestClasEst = mat(zeros((m,1))) 39 | minError = inf #init error sum, to +infinity 40 | for i in range(n):#loop over all dimensions 41 | rangeMin = dataMatrix[:,i].min(); rangeMax = dataMatrix[:,i].max(); 42 | 43 | stepSize = (rangeMax-rangeMin)/numSteps 44 | for j in range(-1,int(numSteps)+1):#loop over all range in current dimension 45 | for inequal in ['lt', 'gt']: #go over less than and greater than 46 | threshVal = (rangeMin + float(j) * stepSize) 47 | 48 | predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)#call stump classify with i, j, lessThan 49 | errArr = mat(ones((m,1))) 50 | 51 | 52 | errArr[predictedVals == labelMat] = 0 53 | 54 | weightedError = D.T*errArr #calc total error multiplied by D 55 | #print "split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f" % (i, threshVal, inequal, weightedError) 56 | if weightedError < minError: 57 | minError = weightedError 58 | bestClasEst = predictedVals.copy() 59 | bestStump['dim'] = i 60 | bestStump['thresh'] = threshVal 61 | bestStump['ineq'] = inequal 62 | return bestStump,minError,bestClasEst 63 | 64 | if __name__=='__main__': 65 | datMat,classLabels=loadSimpData() 66 | d=mat(ones((5,1))/5) 67 | bestStump,minError,bestClasEst=buildStump(datMat,classLabels,d) 68 | print bestStump,minError,bestClasEst -------------------------------------------------------------------------------- /DecisionTree/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | DecisionTree 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /DecisionTree/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Default 6 | python 2.7 7 | 8 | /DecisionTree 9 | 10 | 11 | -------------------------------------------------------------------------------- /DecisionTree/src/dt.py: -------------------------------------------------------------------------------- 1 | import math 2 | import operator 3 | 4 | 5 | def calcShannonEnt(dataSet): 6 | numEntries=len(dataSet) 7 | 8 | labelCounts={} 9 | 10 | for featVec in dataSet: 11 | currentLabel=featVec[-1] 12 | 13 | if currentLabel not in labelCounts.keys(): 14 | labelCounts[currentLabel]=0 15 | labelCounts[currentLabel]+=1 16 | shannonEnt=0.0 17 | 18 | for key in labelCounts: 19 | 20 | prob =float(labelCounts[key])/numEntries 21 | shannonEnt-=prob*math.log(prob,2) 22 | 23 | return shannonEnt 24 | 25 | 26 | def createDataSet(): 27 | 28 | dataSet=[[1,0,'man'],[1,1,'man'],[0,1,'man'],[0,0,'women']] 29 | labels=['throat','mustache'] 30 | return dataSet,labels 31 | 32 | def splitDataSet(dataSet, axis, value): 33 | retDataSet = [] 34 | for featVec in dataSet: 35 | if featVec[axis] == value: 36 | reducedFeatVec = featVec[:axis] #chop out axis used for splitting 37 | reducedFeatVec.extend(featVec[axis+1:]) 38 | retDataSet.append(reducedFeatVec) 39 | return retDataSet 40 | 41 | def chooseBestFeatureToSplit(dataSet): 42 | numFeatures = len(dataSet[0]) - 1 #the last column is used for the labels 43 | baseEntropy = calcShannonEnt(dataSet) 44 | bestInfoGain = 0.0; bestFeature = -1 45 | for i in range(numFeatures): #iterate over all the features 46 | featList = [example[i] for example in dataSet]#create a list of all the examples of this feature 47 | 48 | uniqueVals = set(featList) #get a set of unique values 49 | 50 | newEntropy = 0.0 51 | for value in uniqueVals: 52 | subDataSet = splitDataSet(dataSet, i, value) 53 | prob = len(subDataSet)/float(len(dataSet)) 54 | newEntropy += prob * calcShannonEnt(subDataSet) 55 | infoGain = baseEntropy - newEntropy #calculate the info gain; ie reduction in entropy 56 | 57 | if (infoGain > bestInfoGain): #compare this to the best gain so far 58 | bestInfoGain = infoGain #if better than current best, set to best 59 | bestFeature = i 60 | return bestFeature #returns an integer 61 | 62 | 63 | 64 | 65 | 66 | def majorityCnt(classList): 67 | classCount={} 68 | for vote in classList: 69 | if vote not in classCount.keys(): classCount[vote] = 0 70 | classCount[vote] += 1 71 | sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True) 72 | return sortedClassCount[0][0] 73 | 74 | def createTree(dataSet,labels): 75 | classList = [example[-1] for example in dataSet] 76 | 77 | if classList.count(classList[0]) == len(classList): 78 | return classList[0]#stop splitting when all of the classes are equal 79 | if len(dataSet[0]) == 1: #stop splitting when there are no more features in dataSet 80 | return majorityCnt(classList) 81 | bestFeat = chooseBestFeatureToSplit(dataSet) 82 | bestFeatLabel = labels[bestFeat] 83 | myTree = {bestFeatLabel:{}} 84 | del(labels[bestFeat]) 85 | featValues = [example[bestFeat] for example in dataSet] 86 | uniqueVals = set(featValues) 87 | for value in uniqueVals: 88 | subLabels = labels[:] #copy all of labels, so trees don't mess up existing labels 89 | 90 | myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels) 91 | 92 | return myTree 93 | 94 | def classify(inputTree,featLabels,testVec): 95 | firstStr = inputTree.keys()[0] 96 | secondDict = inputTree[firstStr] 97 | featIndex = featLabels.index(firstStr) 98 | key = testVec[featIndex] 99 | valueOfFeat = secondDict[key] 100 | if isinstance(valueOfFeat, dict): 101 | classLabel = classify(valueOfFeat, featLabels, testVec) 102 | else: classLabel = valueOfFeat 103 | return classLabel 104 | 105 | def getResult(): 106 | dataSet,labels=createDataSet() 107 | # splitDataSet(dataSet,1,1) 108 | chooseBestFeatureToSplit(dataSet) 109 | # print chooseBestFeatureToSplit(dataSet) 110 | #print calcShannonEnt(dataSet) 111 | mtree=createTree(dataSet,labels) 112 | print mtree 113 | 114 | print classify(mtree,['throat','mustache'],[0,0]) 115 | 116 | if __name__=='__main__': 117 | getResult() 118 | 119 | -------------------------------------------------------------------------------- /HashMap/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /HashMap/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | MakeResult 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /HashMap/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.6 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.source=1.6 12 | -------------------------------------------------------------------------------- /HashMap/bin/Main.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DURGESH716/MachineLearning/e80048d82b5ab64cd06c3223ce0c741b910f4305/HashMap/bin/Main.class -------------------------------------------------------------------------------- /HashMap/final.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DURGESH716/MachineLearning/e80048d82b5ab64cd06c3223ce0c741b910f4305/HashMap/final.txt -------------------------------------------------------------------------------- /HashMap/src/Main.java: -------------------------------------------------------------------------------- 1 | import java.awt.List; 2 | import java.io.BufferedReader; 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileReader; 6 | import java.io.FileWriter; 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.HashMap; 10 | import java.util.Iterator; 11 | 12 | 13 | 14 | 15 | public class Main { 16 | public static void main (String[] args) throws IOException{ 17 | BufferedWriter writer = new BufferedWriter(new FileWriter(new File("/Users/hakuri/Desktop/data2.txt"))); 18 | BufferedReader reader=new BufferedReader(new FileReader(new File("/Users/hakuri/Desktop/final.txt"))); 19 | String lineTxt = null; 20 | int i=1; 21 | // ArrayList brand = new ArrayList(); 22 | HashMap custom=new HashMap(); 23 | 24 | 25 | while((lineTxt = reader.readLine()) != null){ 26 | //System.out.println(lineTxt); 27 | String line=lineTxt.trim(); 28 | String[] part=line.split(","); 29 | if(!custom.containsKey(part[0])){ 30 | custom.put(part[0],new ArrayList()); 31 | custom.get(part[0]).add(part[1]); 32 | } 33 | else{ 34 | custom.get(part[0]).add(part[1]); 35 | } 36 | // custom.put(part[0], part[1]); 37 | } 38 | //System.out.print(custom); 39 | Iterator it=custom.keySet().iterator(); 40 | while( it.hasNext()){ 41 | String key=(String)it.next(); 42 | ArrayList value=custom.get(key); 43 | System.out.println(key+"--"+value); 44 | } 45 | //System.out.println(custom.keySet().iterator()); 46 | } 47 | 48 | } 49 | 50 | -------------------------------------------------------------------------------- /K-means/K-means/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | K-means 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /K-means/K-means/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Default 6 | python 2.7 7 | 8 | /K-means/src 9 | 10 | 11 | -------------------------------------------------------------------------------- /K-means/K-means/src/Test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: hakuri 3 | ''' 4 | from numpy import * 5 | import matplotlib.pyplot as plt 6 | def loadDataSet(fileName): #general function to parse tab -delimited floats 7 | dataMat = [] #assume last column is target value 8 | fr = open(fileName) 9 | for line in fr.readlines(): 10 | curLine = line.strip().split('\t') 11 | fltLine = map(float,curLine) #map all elements to float() 12 | dataMat.append(fltLine) 13 | return dataMat 14 | 15 | def distEclud(vecA, vecB): 16 | return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB) 17 | 18 | def randCent(dataSet, k): 19 | n = shape(dataSet)[1] 20 | centroids = mat(zeros((k,n)))#create centroid mat 21 | for j in range(n):#create random cluster centers, within bounds of each dimension 22 | minJ = min(array(dataSet)[:,j]) 23 | 24 | rangeJ = float(max(array(dataSet)[:,j]) - minJ) 25 | centroids[:,j] = mat(minJ + rangeJ * random.rand(k,1)) 26 | 27 | return centroids 28 | 29 | def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent): 30 | m = shape(dataSet)[0] 31 | clusterAssment = mat(zeros((m,2)))#create mat to assign data points #to a centroid, also holds SE of each point 32 | centroids = createCent(dataSet, k) 33 | clusterChanged = True 34 | while clusterChanged: 35 | clusterChanged = False 36 | for i in range(m):#for each data point assign it to the closest centroid 37 | minDist = inf; minIndex = -1 38 | for j in range(k): 39 | distJI = distMeas(array(centroids)[j,:],array(dataSet)[i,:]) 40 | if distJI < minDist: 41 | minDist = distJI; minIndex = j 42 | if clusterAssment[i,0] != minIndex: clusterChanged = True 43 | clusterAssment[i,:] = minIndex,minDist**2 44 | print centroids 45 | # print nonzero(array(clusterAssment)[:,0] 46 | for cent in range(k):#recalculate centroids 47 | ptsInClust = dataSet[nonzero(array(clusterAssment)[:,0]==cent)[0][0]]#get all the point in this cluster 48 | 49 | centroids[cent,:] = mean(ptsInClust, axis=0) #assign centroid to mean 50 | id=nonzero(array(clusterAssment)[:,0]==cent)[0] 51 | return centroids, clusterAssment,id 52 | 53 | def plotBestFit(dataSet,id,centroids): 54 | 55 | dataArr = array(dataSet) 56 | cent=array(centroids) 57 | n = shape(dataArr)[0] 58 | n1=shape(cent)[0] 59 | xcord1 = []; ycord1 = [] 60 | xcord2 = []; ycord2 = [] 61 | xcord3=[];ycord3=[] 62 | j=0 63 | for i in range(n): 64 | if j in id: 65 | xcord1.append(dataArr[i,0]); ycord1.append(dataArr[i,1]) 66 | else: 67 | xcord2.append(dataArr[i,0]); ycord2.append(dataArr[i,1]) 68 | j=j+1 69 | for k in range(n1): 70 | xcord3.append(cent[k,0]);ycord3.append(cent[k,1]) 71 | 72 | fig = plt.figure() 73 | ax = fig.add_subplot(111) 74 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') 75 | ax.scatter(xcord2, ycord2, s=30, c='green') 76 | ax.scatter(xcord3, ycord3, s=50, c='black') 77 | 78 | plt.xlabel('X1'); plt.ylabel('X2'); 79 | plt.show() 80 | 81 | 82 | if __name__=='__main__': 83 | dataSet=loadDataSet('/Users/hakuri/Desktop/testSet.txt') 84 | # # print randCent(dataSet,2) 85 | # print dataSet 86 | # 87 | # print kMeans(dataSet,2) 88 | a=[] 89 | b=[] 90 | a, b,id=kMeans(dataSet,2) 91 | plotBestFit(dataSet,id,a) 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /K-means/effect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DURGESH716/MachineLearning/e80048d82b5ab64cd06c3223ce0c741b910f4305/K-means/effect.png -------------------------------------------------------------------------------- /K-means/testSet.txt: -------------------------------------------------------------------------------- 1 | 10.235186 11.321997 2 | 10.122339 11.810993 3 | 9.190236 8.904943 4 | 9.306371 9.847394 5 | 8.330131 8.340352 6 | 10.152785 10.123532 7 | 10.408540 10.821986 8 | 9.003615 10.039206 9 | 9.534872 10.096991 10 | 9.498181 10.825446 11 | 9.875271 9.233426 12 | 10.362276 9.376892 13 | 10.191204 11.250851 14 | 7.720499 6.476300 15 | 9.334379 8.471268 16 | 7.963186 6.731333 17 | 8.244973 9.013785 18 | 9.569196 10.568949 19 | 8.854793 9.076536 20 | 9.382171 7.240862 21 | 8.179055 8.944502 22 | 8.267896 8.797017 23 | 9.047165 8.725068 24 | 8.741043 7.901385 25 | 7.190216 7.804587 26 | 8.081227 9.314431 27 | 8.047460 5.720780 28 | 7.917584 7.543254 29 | 8.676942 10.102220 30 | 9.210251 9.424717 31 | 7.732998 9.840202 32 | 7.681754 8.609897 33 | 7.925863 10.079159 34 | 8.261509 8.242080 35 | 8.514872 7.527561 36 | 10.324450 10.804481 37 | 7.856710 7.931543 38 | 7.858608 7.995340 39 | 9.196808 6.577598 40 | 9.644415 10.935081 41 | 9.579833 9.085021 42 | 7.888484 5.976428 43 | 9.072624 9.703344 44 | 8.914184 9.298515 45 | 7.822869 7.086663 46 | 10.538554 11.061464 47 | 8.280187 8.709012 48 | 8.884223 8.670105 49 | 9.359927 10.575055 50 | 9.078611 9.710833 51 | 7.935134 8.586173 52 | 8.805945 10.575145 53 | 9.584316 9.614076 54 | 11.269714 11.717254 55 | 9.120444 9.019774 56 | 7.977520 8.313923 57 | 8.104648 9.456128 58 | 8.617126 7.331723 59 | 9.033811 9.469706 60 | 8.327680 5.122092 61 | 8.532272 10.100909 62 | 9.295434 8.933824 63 | 9.905202 9.027559 64 | 10.585764 10.912733 65 | 10.427584 11.532578 66 | 9.072767 9.960144 67 | 9.164275 8.645121 68 | 9.746058 10.717080 69 | 9.286072 9.340024 70 | 8.188233 7.432415 71 | 7.948598 8.445419 72 | 7.563350 5.656178 73 | 8.972405 8.801869 74 | 9.980868 8.788996 75 | 7.753490 7.714248 76 | 7.431143 9.032819 77 | 8.943403 8.359354 78 | 10.481890 9.988969 79 | 9.150454 10.278760 80 | 8.123894 9.060351 81 | 8.626164 8.469342 82 | 7.354185 7.631252 83 | 11.323046 11.015032 84 | 8.190008 6.860792 85 | 8.412598 7.661358 86 | 9.258404 8.580382 87 | 11.007915 11.443881 88 | 8.279403 8.347003 89 | 8.931149 10.105221 90 | 10.239245 10.077473 91 | 8.129346 7.096877 92 | 8.485823 9.373561 93 | 10.703640 11.651618 94 | 9.500728 8.150228 95 | 9.712414 9.910445 96 | 9.333374 9.407557 97 | 8.787865 10.168021 98 | 9.238180 10.253478 99 | 9.577388 8.895150 100 | 10.447753 10.318227 101 | 9.303944 9.223136 102 | 9.883268 11.662945 103 | 9.471921 10.443792 104 | 10.007753 9.579912 105 | 8.110298 7.106263 106 | 6.964069 6.585040 107 | 10.413499 9.649309 108 | 8.032629 7.053254 109 | 8.015549 9.166753 110 | 10.462924 8.656612 111 | 9.530788 10.134130 112 | 9.202658 9.314222 113 | 10.103241 10.235159 114 | 7.849264 6.624856 115 | 9.059071 7.992555 116 | 10.172889 10.724789 117 | 9.528439 6.420990 118 | 7.190422 6.789792 119 | 9.085716 9.846328 120 | 9.452887 8.735386 121 | 7.417322 7.348594 122 | 8.468639 8.715086 123 | 8.303642 9.463231 124 | 9.939052 10.026771 125 | 8.701989 7.516978 126 | 9.737541 10.587281 127 | 8.280233 7.852444 128 | 10.648386 10.259203 129 | 9.173893 10.520372 130 | 9.135397 10.751406 131 | 7.594580 8.488833 132 | 8.587520 8.463406 133 | 8.581887 7.888644 134 | 9.448768 8.707422 135 | 7.882664 7.772030 136 | 10.050635 9.859720 137 | 9.012078 9.533899 138 | 8.770020 8.882996 139 | 9.428804 9.446306 140 | 8.504209 8.319693 141 | 9.800003 10.964667 142 | 8.069660 7.683099 143 | 10.012217 10.320644 144 | 8.704677 8.918146 145 | 8.198722 7.297786 146 | 9.868322 9.901657 147 | 9.426997 11.480353 148 | 9.228767 9.262976 149 | 8.952359 9.528471 150 | 8.186847 8.600587 151 | 9.026371 8.705143 152 | 9.483364 9.807079 153 | 7.826587 7.975401 154 | 11.197846 10.959298 155 | 7.632421 8.769745 156 | 8.761605 8.309365 157 | 9.353670 8.728758 158 | 6.466637 6.038996 159 | 8.370634 9.178830 160 | 10.337451 11.075600 161 | 8.917679 8.288367 162 | 9.076621 8.487626 163 | 7.278948 4.634097 164 | 10.153017 11.219183 165 | 7.132603 5.853118 166 | 9.338644 9.805940 167 | 9.878602 9.187000 168 | 10.009505 10.924505 169 | 9.384438 10.691860 170 | 7.535322 8.160481 171 | 6.808732 8.268469 172 | 8.302965 8.075009 173 | 8.345379 8.305356 174 | 9.517530 8.249839 175 | 9.267825 9.999109 176 | 10.291511 11.032664 177 | 8.605909 8.705207 178 | 8.331145 7.812295 179 | 8.632412 10.574287 180 | 8.766397 8.712107 181 | 9.407070 9.732756 182 | 9.709495 9.729569 183 | 10.422201 11.070360 184 | 6.831495 6.466763 185 | 8.187122 8.405929 186 | 8.523093 9.041844 187 | 7.952394 6.801220 188 | 10.490780 10.001468 189 | 10.813791 9.802494 190 | 7.861113 7.541475 191 | 8.800399 8.738974 192 | 7.542152 6.612838 193 | 9.446981 9.378659 194 | 8.281684 7.358572 195 | 8.473801 8.208343 196 | 11.736767 11.022029 197 | 8.379578 8.714348 198 | 8.313718 8.832381 199 | 9.342589 10.416659 200 | 7.560710 6.889648 201 | 9.295344 9.739040 202 | 9.176612 9.718781 203 | 8.614385 10.150521 204 | 9.079373 8.839794 205 | 10.333289 10.921255 206 | 9.453502 7.335134 207 | 10.174590 10.292500 208 | 9.693713 9.793636 209 | 7.474925 7.751391 210 | 10.107905 10.156997 211 | 9.257241 7.854266 212 | 10.209794 11.410157 213 | 7.248050 6.433676 214 | 10.150091 9.288597 215 | 10.077713 10.321500 216 | 8.191122 8.931519 217 | 8.791469 10.287216 218 | 9.229434 9.095193 219 | 8.682571 8.546005 220 | 7.524099 7.709751 221 | 8.442410 8.326037 222 | 9.364851 9.095989 223 | 9.061222 7.557899 224 | 7.989999 8.555363 225 | 8.801275 8.868732 226 | 10.351932 9.497796 227 | 10.230710 10.496151 228 | 9.783163 9.891408 229 | 10.651481 9.431617 230 | 8.387393 6.400507 231 | 9.003921 7.050003 232 | 8.483723 8.314886 233 | 9.020501 7.545771 234 | 9.329105 11.095661 235 | 9.583687 9.271929 236 | 8.908705 8.407529 237 | 8.835406 8.083517 238 | 9.736362 8.296735 239 | 10.030302 9.737178 240 | 8.287142 6.993460 241 | 9.173211 9.306335 242 | 9.026355 9.696531 243 | 9.128391 9.921247 244 | 11.486346 12.910777 245 | 11.519458 11.472111 246 | 9.027707 10.263974 247 | 9.351935 8.542200 248 | 9.421701 11.403201 249 | 9.005687 8.100969 250 | 7.015279 6.614278 251 | 8.213607 8.340948 252 | 8.226646 8.718997 253 | 8.144753 8.366877 254 | 10.133642 12.790169 255 | 10.763481 10.847016 256 | 10.003622 10.337716 257 | 9.007955 9.792482 258 | 8.670506 10.782931 259 | 10.386414 9.956162 260 | 10.104761 10.123044 261 | 8.079502 8.304075 262 | 9.945424 11.855409 263 | 8.642497 9.998066 264 | 9.349722 8.690328 265 | 9.034991 8.826490 266 | 8.738746 7.518464 267 | 8.919532 9.740312 268 | 9.464136 10.444588 269 | 10.710057 12.666857 270 | 10.042007 10.532091 271 | 8.447996 7.426363 272 | 9.509351 9.030516 273 | 11.946359 10.553075 274 | 9.981617 9.912651 275 | 9.853876 9.632967 276 | 10.560648 11.881714 277 | 8.370952 9.989491 278 | 8.323209 10.102529 279 | 9.828359 11.702462 280 | 8.515623 8.426754 281 | 9.004363 9.628036 282 | 10.529847 10.458031 283 | 10.028765 10.624880 284 | 9.448114 9.313227 285 | 8.332617 7.382295 286 | 8.323006 8.276608 287 | 7.740771 8.799750 288 | 8.379615 8.146192 289 | 8.340764 9.184458 290 | 9.863614 8.254694 291 | 9.969563 9.405134 292 | 9.164394 9.182127 293 | 10.622098 9.722592 294 | 9.592072 10.029446 295 | 8.212027 7.477366 296 | 9.080225 8.244448 297 | 8.555774 7.842325 298 | 9.958046 9.696221 299 | 8.972573 9.797128 300 | 9.213223 7.128437 301 | 8.737239 9.385138 302 | 10.333907 10.994856 303 | 8.797511 8.643075 304 | 11.044848 9.623160 305 | 8.539260 9.097113 306 | 11.582163 11.884333 307 | 7.863848 7.176199 308 | 6.218103 5.283562 309 | 9.120602 7.250190 310 | 9.001166 9.635203 311 | 8.081476 8.844224 312 | 9.369802 8.230911 313 | 8.768925 8.666987 314 | 9.841098 8.543896 315 | 10.451522 9.549511 316 | 9.755402 9.117522 317 | 7.988961 6.869854 318 | 8.872507 9.787118 319 | 10.363980 10.716608 320 | 6.315671 5.765953 321 | 9.638879 9.202355 322 | 8.588126 8.037966 323 | 8.947408 9.144386 324 | 9.051130 7.195132 325 | 9.321709 8.380668 326 | 10.146531 9.754745 327 | 9.843373 8.891437 328 | 9.213148 11.700632 329 | 7.630078 7.294753 330 | 8.093088 7.967590 331 | 7.488915 6.090652 332 | 8.126036 8.586472 333 | 8.760350 7.268987 334 | 10.201347 9.141013 335 | 7.838208 7.307700 336 | 6.155653 5.563997 337 | 7.767841 6.254528 338 | 8.425656 8.615832 339 | 10.362168 10.886815 340 | 10.180024 10.378934 341 | 9.794665 10.047812 342 | 9.970394 9.668279 343 | 7.030217 7.060471 344 | 9.275414 9.095738 345 | 10.314911 10.456539 346 | 9.259774 8.204851 347 | 10.023919 9.558307 348 | 8.887540 9.866704 349 | 9.851608 9.410989 350 | 8.710882 7.268012 351 | 9.017007 10.217673 352 | 7.976369 9.000979 353 | 8.738332 8.664734 354 | 8.344510 8.977600 355 | 8.959613 12.324240 356 | 9.169982 8.624635 357 | 7.487451 8.154859 358 | 8.706316 7.719455 359 | 9.564832 8.940403 360 | 8.327775 9.044509 361 | 9.734032 10.195255 362 | 8.021343 6.445092 363 | 9.081048 11.024397 364 | 7.626651 6.549263 365 | 10.725858 8.575374 366 | 8.731381 8.307788 367 | 10.394237 10.596874 368 | 7.029311 7.658832 369 | 9.517907 7.509904 370 | 10.394064 10.060898 371 | 10.752500 9.431601 372 | 9.692431 10.332130 373 | 9.651897 7.876862 374 | 8.592329 10.096837 375 | 10.212801 10.827496 376 | 9.045043 9.265524 377 | 8.901643 8.036115 378 | 10.794525 9.318830 379 | 11.040915 12.021746 380 | 8.390836 9.672469 381 | 9.840166 11.226568 382 | 10.806810 12.205633 383 | 8.924285 10.934056 384 | 8.411251 8.289672 385 | 7.808891 9.663290 386 | 9.733437 8.486958 387 | 8.300026 7.477374 388 | 8.221756 10.278308 389 | 9.096867 9.619677 390 | 9.410116 9.289188 391 | 10.097176 9.768470 392 | 9.387954 8.844855 393 | 9.376134 7.704630 394 | 8.231599 9.101203 395 | 9.910738 10.694855 396 | 8.645689 7.764589 397 | 8.090245 7.109596 398 | 9.253483 9.813672 399 | 9.331546 8.039386 400 | 9.843256 10.208792 401 | 9.713131 9.247665 402 | 9.259369 10.704622 403 | 10.243948 9.695883 404 | 6.396262 6.456390 405 | 8.936289 8.703871 406 | 8.750846 9.347273 407 | 6.497155 4.130251 408 | 9.516552 10.164848 409 | 9.125766 8.858775 410 | 8.374387 7.300114 411 | 8.132816 7.621107 412 | 10.099505 9.159134 413 | 9.356477 6.869999 414 | 8.112934 7.587547 415 | 7.265396 6.987031 416 | 11.950505 13.715109 417 | 10.745959 10.822171 418 | 8.893270 7.887332 419 | 6.003473 4.960219 420 | 7.498851 6.451334 421 | 10.162072 9.935954 422 | 8.732617 9.177679 423 | 9.300827 9.952360 424 | 11.908436 12.256801 425 | 9.371215 9.188645 426 | 9.943640 9.245037 427 | 7.386450 7.046819 428 | 8.410374 8.293218 429 | 7.830419 6.440253 430 | 8.263140 8.279446 431 | 11.448164 12.192363 432 | 8.216533 9.186628 433 | 9.316128 10.046697 434 | 8.156927 6.834792 435 | 9.951421 11.240598 436 | 9.059607 8.458446 437 | 10.476339 10.560461 438 | 7.548200 7.227127 439 | 9.432204 7.236705 440 | 9.402750 9.126413 441 | 11.188095 13.853426 442 | 9.520201 11.028131 443 | 8.884154 9.764071 444 | 8.961105 8.833117 445 | 8.549663 8.865765 446 | 10.111708 10.515462 447 | 9.024761 9.169368 448 | 7.904149 8.048756 449 | 9.240995 7.796142 450 | 8.126538 6.116125 451 | 7.442148 7.931335 452 | 9.486821 10.091359 453 | 9.834289 11.694720 454 | 9.009714 11.599170 455 | 9.761314 11.344083 456 | 6.993941 6.562988 457 | 8.659524 8.410107 458 | 7.685363 8.097297 459 | 7.793217 6.519109 460 | 8.883454 9.257347 461 | 8.781821 9.231980 462 | 7.946281 7.658978 463 | 8.523959 10.646480 464 | 9.031525 8.649648 465 | 8.317140 7.758978 466 | 9.192417 11.151218 467 | 8.408486 8.282182 468 | 10.327702 11.459048 469 | 8.389687 8.548727 470 | 8.642250 7.056870 471 | 8.833447 9.267638 472 | 8.805261 8.320281 473 | 9.726211 9.095997 474 | 8.477631 9.507530 475 | 9.738838 9.652110 476 | 8.272108 7.582696 477 | 9.258089 8.495931 478 | 8.334144 8.810766 479 | 8.150904 6.486032 480 | 7.259669 7.270156 481 | 11.034180 11.519954 482 | 10.705432 10.642527 483 | 8.388814 7.159137 484 | 8.559369 7.846284 485 | 7.187988 6.519313 486 | 8.811453 7.765900 487 | 8.492762 7.992941 488 | 8.739752 8.502909 489 | 10.150752 10.420295 490 | 7.062378 5.365289 491 | 8.448195 7.480000 492 | 10.224333 11.592750 493 | 9.533795 9.212845 494 | 9.519492 7.690501 495 | 9.661847 10.376189 496 | 7.963877 8.597193 497 | 10.184486 9.136709 498 | 8.505234 9.159210 499 | 8.187646 8.518690 500 | 9.167590 9.405917 501 | 8.612162 8.518755 502 | 10.970868 10.392229 503 | 9.603649 9.141095 504 | 9.704263 8.830178 505 | 9.657506 8.132449 506 | 9.337882 11.045306 507 | 9.521722 9.537764 508 | 8.954197 8.728179 509 | 8.635658 10.352662 510 | 8.910816 9.020317 511 | 9.900933 9.392002 512 | 10.247105 8.289649 513 | 9.571690 8.171237 514 | 7.388627 7.668071 515 | 8.354008 10.074590 516 | 9.775598 8.835696 517 | 8.768913 7.983604 518 | 8.330199 8.474098 519 | 8.169356 9.361172 520 | 10.346522 10.086434 521 | 7.976144 9.266702 522 | 8.429648 7.865824 523 | 11.261674 11.788587 524 | 10.051066 10.112425 525 | 8.954626 9.789343 526 | 8.382220 8.121012 527 | 9.820642 9.426441 528 | 8.125950 9.695087 529 | 8.646465 7.291808 530 | 8.190202 8.003737 531 | 8.773887 7.306175 532 | 8.731000 10.300436 533 | 9.163098 7.816769 534 | 9.456346 9.223922 535 | 9.645180 9.324053 536 | 8.835060 8.966915 537 | 9.325950 10.943248 538 | 9.941912 9.548535 539 | 9.282799 10.119488 540 | 9.567591 9.462164 541 | 8.529019 9.768001 542 | 9.314824 10.153727 543 | 8.264439 8.273860 544 | 8.307262 8.214036 545 | 9.122041 8.657861 546 | 8.404258 8.389365 547 | 7.828355 8.419433 548 | 9.803180 10.108286 549 | 8.662439 8.581953 550 | 8.883265 8.978377 551 | 8.012330 8.262451 552 | 9.420258 8.974878 553 | 7.015415 6.365940 554 | 9.888832 11.163036 555 | 9.677549 10.346431 556 | 8.410158 7.912899 557 | 9.464147 10.762900 558 | 7.067227 7.035717 559 | 9.320923 10.583089 560 | 9.056917 8.771241 561 | 8.110004 8.387789 562 | 10.310021 10.970014 563 | 8.211185 8.809627 564 | 8.942883 8.840746 565 | 9.479958 8.328700 566 | 8.973982 8.702291 567 | 8.519257 8.764855 568 | 9.424556 8.956911 569 | 7.222919 8.177787 570 | 8.257007 9.700619 571 | 9.778795 9.296134 572 | 8.028806 8.575974 573 | 9.886464 9.965076 574 | 9.090552 6.978930 575 | 9.605548 10.256751 576 | 9.959004 9.610229 577 | 8.308701 9.509124 578 | 7.748293 9.685933 579 | 8.311108 9.428114 580 | 9.697068 10.217956 581 | 9.582991 9.478773 582 | 9.167265 10.198412 583 | 10.329753 10.406602 584 | 8.908819 7.428789 585 | 10.072908 10.393294 586 | 7.992905 9.226629 587 | 8.907696 7.269366 588 | 8.421948 9.342968 589 | 7.481399 7.225033 590 | 10.358408 10.166130 591 | 8.786556 10.279943 592 | 9.658701 11.379367 593 | 10.167807 9.417552 594 | 8.653449 8.656681 595 | 8.020304 8.671270 596 | 8.364348 10.004068 597 | 9.119183 9.788199 598 | 8.405504 9.740580 599 | 11.020930 11.904350 600 | 9.755232 9.515713 601 | 10.059542 9.589748 602 | 8.727131 9.777998 603 | 7.666182 6.028642 604 | 8.870733 8.367501 605 | 9.340446 7.707269 606 | 9.919283 10.796813 607 | 7.905837 8.326034 608 | 10.181187 10.089865 609 | 8.797328 8.981988 610 | 8.466272 7.765032 611 | 10.335914 12.620539 612 | 9.365003 8.609115 613 | 8.011017 7.249489 614 | 10.923993 13.901513 615 | 7.074631 7.558720 616 | 9.824598 8.851297 617 | 8.861026 8.370857 618 | 10.127296 10.861535 619 | 10.548377 10.855695 620 | 8.880470 7.948761 621 | 8.901619 9.674705 622 | 7.813710 9.246912 623 | 10.128808 10.560668 624 | 11.096699 10.911644 625 | 8.551471 6.871514 626 | 8.907241 8.677815 627 | 10.571647 10.294838 628 | 8.815314 8.810725 629 | 8.453396 8.339296 630 | 9.594819 11.487580 631 | 10.714211 9.628908 632 | 7.428788 7.712869 633 | 10.892119 12.747752 634 | 9.024071 11.112692 635 | 7.803375 7.847038 636 | 8.521558 8.881848 637 | 9.742818 11.520203 638 | 9.832836 9.180396 639 | 8.703132 10.028498 640 | 9.905029 11.347606 641 | 10.037536 8.882688 642 | 8.629995 8.392863 643 | 9.583497 9.219663 644 | 8.781687 9.650598 645 | 9.344119 9.537024 646 | 10.407510 9.223929 647 | 7.244488 6.559021 648 | 10.643616 10.288383 649 | 8.757557 6.947901 650 | 10.784590 11.233350 651 | 10.028427 11.330033 652 | 7.968361 6.830308 653 | 8.925954 8.539113 654 | 7.738692 7.114987 655 | 8.192398 8.352016 656 | 10.412017 12.431122 657 | 8.208801 5.777678 658 | 7.820077 7.790720 659 | 9.542754 11.542541 660 | 6.817938 7.429229 661 | 7.365218 7.956797 662 | 9.274391 7.932700 663 | 9.546475 8.803412 664 | 7.471734 6.797870 665 | 8.016969 7.848070 666 | 8.852701 8.458114 667 | 8.215012 8.468330 668 | 6.975507 6.846980 669 | 9.435134 10.609700 670 | 9.228075 9.342622 671 | 8.388410 7.637856 672 | 7.111456 9.289163 673 | 9.403508 8.482654 674 | 9.133894 8.343575 675 | 10.670801 9.750821 676 | 9.983542 10.074537 677 | 10.012865 8.537017 678 | 8.929895 8.951909 679 | 7.666951 7.473615 680 | 9.493839 7.821783 681 | 8.894081 7.059413 682 | 9.593382 9.859732 683 | 9.126847 8.395700 684 | 9.532945 9.850696 685 | 9.459384 9.384213 686 | 8.982743 8.217062 687 | 10.107798 8.790772 688 | 10.563574 9.044890 689 | 8.278963 9.518790 690 | 8.734960 10.494129 691 | 9.597940 9.530895 692 | 10.025478 9.508270 693 | 10.335922 10.974063 694 | 8.404390 8.146748 695 | 7.108699 6.038469 696 | 8.873951 7.474227 697 | 8.731459 8.154455 698 | 8.795146 7.534687 699 | 6.407165 6.810352 700 | 9.979312 10.287430 701 | 8.786715 8.396736 702 | 10.753339 10.360567 703 | 10.508031 10.321976 704 | 10.636925 10.193797 705 | 10.614322 11.215420 706 | 8.916411 8.965286 707 | 8.112756 8.304769 708 | 10.833109 10.497542 709 | 8.319758 9.727691 710 | 9.945336 11.820097 711 | 10.150461 9.914715 712 | 10.185024 10.388722 713 | 9.793569 9.079955 714 | 10.590128 11.811596 715 | 8.505584 6.884282 716 | 10.461428 10.745439 717 | 8.755781 9.418427 718 | 7.488249 7.172072 719 | 10.238905 10.428659 720 | 9.887827 10.427821 721 | 8.529971 8.838217 722 | 8.375208 10.242837 723 | 8.901724 8.398304 724 | 8.607694 9.173198 725 | 8.691369 9.964261 726 | 9.584578 9.641546 727 | 10.265792 11.405078 728 | 7.592968 6.683355 729 | 8.692791 9.389031 730 | 7.589852 6.005793 731 | 10.550386 11.736584 732 | 8.578351 7.227055 733 | 7.526931 6.875134 734 | 8.577081 9.877115 735 | 9.272136 11.050928 736 | 10.300809 10.653059 737 | 8.642013 9.006681 738 | 9.720491 10.265202 739 | 9.029005 9.646928 740 | 8.736201 7.975603 741 | 8.672886 9.070759 742 | 8.370633 8.412170 743 | 9.483776 9.183341 744 | 6.790842 7.594992 745 | 9.842146 10.156810 746 | 9.563336 7.962532 747 | 8.724669 9.870732 748 | 9.012145 9.171326 749 | 9.116948 9.791167 750 | 6.219094 7.988420 751 | 9.468422 8.359975 752 | 8.825231 8.475208 753 | 9.572224 9.696428 754 | 9.609128 8.488175 755 | 9.428590 10.468998 756 | 8.293266 8.617701 757 | 9.423584 10.355688 758 | 9.240796 9.517228 759 | 10.915423 13.026252 760 | 10.854684 11.130866 761 | 9.226816 9.391796 762 | 9.580264 10.359235 763 | 7.289907 6.898208 764 | 9.338857 10.374025 765 | 9.523176 11.332190 766 | 10.162233 10.357396 767 | 8.873930 9.207398 768 | 8.607259 7.794804 769 | 8.852325 8.215797 770 | 8.077272 6.501042 771 | 8.169273 8.269613 772 | 6.806421 7.544423 773 | 8.793151 9.691549 774 | 11.640981 11.365702 775 | 9.544082 11.576545 776 | 9.009266 9.605596 777 | 9.726552 9.426719 778 | 9.495888 10.626624 779 | 8.683982 9.337864 780 | 8.322105 8.631099 781 | 8.887895 8.644931 782 | 8.662659 11.373025 783 | 9.263321 7.536016 784 | 7.802624 7.171625 785 | 8.773183 8.561565 786 | 8.730443 10.197596 787 | 8.942915 7.758383 788 | 8.057618 8.774996 789 | 8.112081 8.202349 790 | 10.378884 12.103755 791 | 9.248876 8.637249 792 | 9.739599 9.708576 793 | 8.126345 8.278487 794 | 8.894788 7.966117 795 | 9.683165 9.019221 796 | 10.886957 12.053843 797 | 9.668852 10.902132 798 | 7.486692 6.471138 799 | 8.794850 9.173609 800 | 8.835915 8.296727 801 | 9.443984 11.375344 802 | 8.696621 6.434580 803 | 9.645560 9.233722 804 | 9.623857 7.915590 805 | 10.840632 12.620268 806 | 7.298135 7.356141 807 | 9.639644 8.902389 808 | 9.849802 7.682624 809 | 10.609964 10.259615 810 | 9.768229 11.382811 811 | 7.646351 7.571849 812 | 10.230300 9.470859 813 | 8.224402 8.496866 814 | 6.879671 8.393648 815 | 7.976247 8.667221 816 | 9.183268 8.694550 817 | 11.471853 12.786280 818 | 10.428349 10.615726 819 | 8.090828 5.902504 820 | 9.738627 8.485792 821 | 8.139709 8.396333 822 | 9.508055 8.990529 823 | 8.857260 8.497732 824 | 8.902558 7.014433 825 | 9.660607 11.040833 826 | 8.772221 10.512150 827 | 11.020038 9.354134 828 | 7.918527 7.742062 829 | 7.630835 7.756260 830 | 11.043272 11.041613 831 | 9.299376 8.674157 832 | 9.795087 8.431837 833 | 9.415683 8.312101 834 | 7.942037 6.942913 835 | 9.724790 11.766496 836 | 10.222032 11.550876 837 | 8.894163 8.306020 838 | 8.394309 8.070420 839 | 9.012776 6.880548 840 | 9.661093 10.138921 841 | 9.896472 9.762372 842 | 9.135628 8.759928 843 | 8.762656 10.306028 844 | 8.602473 8.861956 845 | 10.085297 10.464774 846 | 10.644983 10.945767 847 | 9.034571 8.391668 848 | 8.602920 8.501944 849 | 8.224766 7.402758 850 | 8.755050 9.431085 851 | 9.669937 8.641049 852 | 10.693530 10.287124 853 | 9.462806 7.611153 854 | 9.287707 10.082363 855 | 10.941260 10.783728 856 | 9.263080 7.913328 857 | 10.167111 10.225338 858 | 8.783830 9.465345 859 | 8.958624 8.662136 860 | 9.841649 9.926781 861 | 7.205691 6.790638 862 | 8.629089 9.135461 863 | 7.469440 8.450442 864 | 8.179133 7.790434 865 | 8.083984 7.875520 866 | 9.271300 8.135359 867 | 8.652349 8.254397 868 | 7.983920 6.609684 869 | 7.836860 9.785238 870 | 7.418535 7.011256 871 | 8.458288 10.095364 872 | 9.387605 9.726911 873 | 8.663951 8.206705 874 | 10.146507 11.698577 875 | 8.937103 10.990924 876 | 11.218687 11.141945 877 | 8.363142 9.106936 878 | 7.877643 7.122922 879 | 9.620978 9.905689 880 | 9.509649 10.773209 881 | 6.748743 6.705385 882 | 9.300919 8.085029 883 | 9.332257 9.818791 884 | 7.898610 8.366643 885 | 9.841914 9.480675 886 | 6.920484 8.959501 887 | 8.544713 9.563136 888 | 8.162266 6.715277 889 | 8.659552 9.282008 890 | 10.673398 13.174824 891 | 9.024000 10.379238 892 | 8.183292 6.647572 893 | 10.544919 10.649602 894 | 7.201266 6.529605 895 | 9.557407 11.096821 896 | 8.304605 6.940929 897 | 9.742855 9.920897 898 | 10.024587 9.645222 899 | 10.002296 9.998940 900 | 8.965876 8.665419 901 | 7.823136 6.949572 902 | 8.125088 7.654065 903 | 6.569589 6.046863 904 | 10.195497 8.689129 905 | 11.730011 10.374221 906 | 8.739105 7.457571 907 | 9.820059 10.278526 908 | 9.547456 10.398198 909 | 8.375072 8.416302 910 | 8.889533 8.308929 911 | 8.861201 9.290408 912 | 12.677687 12.788463 913 | 9.100735 8.620537 914 | 7.728350 6.328219 915 | 7.955373 8.355028 916 | 8.733352 8.645414 917 | 10.257527 11.191813 918 | 9.246413 9.497014 919 | 9.745302 9.642035 920 | 7.785652 8.147621 921 | 7.431673 8.566399 922 | 8.654384 8.466701 923 | 8.475392 6.744677 924 | 9.968440 10.765192 925 | 10.163616 10.806963 926 | 10.238135 10.036636 927 | 9.902889 10.746730 928 | 9.523850 8.749708 929 | 9.214363 9.149178 930 | 9.266040 10.841502 931 | 8.494292 7.770942 932 | 10.821158 10.410192 933 | 8.645888 7.970308 934 | 9.885204 10.098080 935 | 9.084990 10.886349 936 | 9.277874 8.871449 937 | 8.135131 7.137064 938 | 7.917379 9.080522 939 | 9.685586 8.822850 940 | 8.558141 7.848112 941 | 9.502917 10.061255 942 | 6.409004 5.164774 943 | 10.149235 10.579951 944 | 7.847304 8.411351 945 | 8.846930 6.819939 946 | 8.675153 9.411147 947 | 9.476276 9.061508 948 | 11.099184 10.644263 949 | 8.792411 10.379405 950 | 8.400418 7.072706 951 | 8.555713 7.923805 952 | 8.024763 8.426993 953 | 8.642696 10.453412 954 | 7.906117 7.920408 955 | 8.793393 9.722878 956 | 8.280364 7.669854 957 | 9.387766 9.706245 958 | 9.626853 10.762499 959 | 10.163631 10.919007 960 | 9.375543 11.513524 961 | 9.309440 8.575699 962 | 10.055329 10.297255 963 | 8.706241 9.097172 964 | 10.032934 11.951897 965 | 10.812974 11.311435 966 | 10.352603 10.819865 967 | 8.276870 9.055403 968 | 8.397389 7.944434 969 | 9.371741 10.395790 970 | 10.825710 10.144099 971 | 9.158483 11.385382 972 | 10.658639 11.389856 973 | 8.091762 6.631039 974 | 10.734892 10.054598 975 | 11.535880 11.604912 976 | 9.799077 11.371677 977 | 8.478725 9.078455 978 | 9.399902 8.947744 979 | 7.305377 8.144973 980 | 7.613377 6.668798 981 | 10.681308 10.830845 982 | 9.973855 10.004133 983 | 9.369918 7.855433 984 | 8.838223 7.429033 985 | 9.521831 10.623930 986 | 9.724419 10.447452 987 | 8.890224 9.275923 988 | 9.932763 11.589953 989 | 10.839337 9.051250 990 | 8.497708 7.521701 991 | 8.440236 8.705670 992 | 9.063566 9.755744 993 | 8.449647 8.929485 994 | 8.554576 8.063231 995 | 10.348606 10.550718 996 | 5.985254 5.186844 997 | 9.931937 10.175582 998 | 9.854922 9.201393 999 | 9.114580 9.134215 1000 | 10.334899 8.543604 1001 | -------------------------------------------------------------------------------- /KNN/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | KNN 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /KNN/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | /KNN 7 | 8 | python 2.7 9 | Default 10 | 11 | -------------------------------------------------------------------------------- /KNN/src/knn.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import division 3 | from numpy import * 4 | import operator 5 | 6 | 7 | 8 | def createDataset(): 9 | group=array([[9,400],[200,5],[100,77],[40,300]]) 10 | 11 | labels=['1','2','3','1'] 12 | return group,labels 13 | 14 | def classify(inX, dataSet, labels, k): 15 | dataSetSize = dataSet.shape[0] 16 | diffMat = tile(inX, (dataSetSize,1)) - dataSet 17 | sqDiffMat = diffMat**2 18 | sqDistances = sqDiffMat.sum(axis=1) 19 | distances = sqDistances**0.5 20 | sortedDistIndicies = distances.argsort() 21 | 22 | classCount={} 23 | for i in range(k): 24 | voteIlabel = labels[sortedDistIndicies[i]] 25 | 26 | classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 27 | sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True) 28 | return sortedClassCount[0][0] 29 | 30 | def autoNorm(dataSet): 31 | minVals = dataSet.min(0) 32 | maxVals = dataSet.max(0) 33 | ranges = maxVals - minVals 34 | normDataSet = zeros(shape(dataSet)) 35 | 36 | m = dataSet.shape[0] 37 | normDataSet = dataSet - tile(minVals, (m,1)) 38 | #print normDataSet 39 | normDataSet = normDataSet/tile(ranges, (m,1)) #element wise divide 40 | # print normDataSet 41 | return normDataSet, ranges, minVals 42 | 43 | def result(): 44 | datingDataMat,datingLabels =createDataset() #load data setfrom file 45 | normMat, ranges, minVals = autoNorm(datingDataMat) 46 | book=199 47 | film=4 48 | print 'watch book '+'199'+ ' film'+' 4' 49 | print 'you are the person belong to' 50 | print classify((199,4), normMat, datingLabels, 1) 51 | 52 | 53 | 54 | if __name__=='__main__': 55 | result() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LogisticRegression/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | LogisticRegression 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /LogisticRegression/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | /LogisticRegression 7 | 8 | python 2.7 9 | Default 10 | 11 | -------------------------------------------------------------------------------- /LogisticRegression/src/LR.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from numpy import * 3 | 4 | 5 | def loadDataSet(): 6 | dataMat = []; labelMat = [] 7 | fr = open('/Users/hakuri/Desktop/testSet.txt') 8 | for line in fr.readlines(): 9 | lineArr = line.strip().split() 10 | dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) 11 | labelMat.append(int(lineArr[2])) 12 | return dataMat,labelMat 13 | 14 | def sigmoid(inX): 15 | return 1.0/(1+exp(-inX)) 16 | 17 | def gradAscent(dataMatIn, classLabels): 18 | dataMatrix = mat(dataMatIn) #convert to NumPy matrix 19 | labelMat = mat(classLabels).transpose() #convert to NumPy matrix 20 | 21 | m,n = shape(dataMatrix) 22 | alpha = 0.001 23 | maxCycles = 500 24 | weights = ones((n,1)) 25 | 26 | for k in range(maxCycles): #heavy on matrix operations 27 | h = sigmoid(dataMatrix*weights) #matrix mult 28 | error = (labelMat - h) #vector subtraction 29 | weights = weights + alpha * dataMatrix.transpose()* error #matrix mult 30 | return weights 31 | 32 | def GetResult(): 33 | dataMat,labelMat=loadDataSet() 34 | weights=gradAscent(dataMat,labelMat) 35 | print weights 36 | plotBestFit(weights) 37 | 38 | 39 | def plotBestFit(weights): 40 | 41 | dataMat,labelMat=loadDataSet() 42 | dataArr = array(dataMat) 43 | n = shape(dataArr)[0] 44 | xcord1 = []; ycord1 = [] 45 | xcord2 = []; ycord2 = [] 46 | for i in range(n): 47 | if int(labelMat[i])== 1: 48 | xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2]) 49 | else: 50 | xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2]) 51 | fig = plt.figure() 52 | ax = fig.add_subplot(111) 53 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') 54 | ax.scatter(xcord2, ycord2, s=30, c='green') 55 | x = arange(-3.0, 3.0, 0.1) 56 | y=(0.48*x+4.12414)/(0.616) 57 | # y = (-weights[0]-weights[1]*x)/weights[2] 58 | ax.plot(x,y) 59 | plt.xlabel('X1'); plt.ylabel('X2'); 60 | plt.show() 61 | 62 | if __name__=='__main__': 63 | GetResult() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MachineLearning 2 | =============== 3 | 4 | Basic MachineLearning algorithm 5 | -------------------------------------------------------------------------------- /SMO/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | SMO 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /SMO/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Default 6 | python 2.7 7 | 8 | /SMO 9 | 10 | 11 | -------------------------------------------------------------------------------- /SMO/src/SMO.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from numpy import * 3 | from time import sleep 4 | 5 | def loadDataSet(fileName): 6 | dataMat = []; labelMat = [] 7 | fr = open(fileName) 8 | for line in fr.readlines(): 9 | lineArr = line.strip().split('\t') 10 | dataMat.append([float(lineArr[0]), float(lineArr[1])]) 11 | labelMat.append(float(lineArr[2])) 12 | return dataMat,labelMat 13 | 14 | def selectJrand(i,m): 15 | j=i #we want to select any J not equal to i 16 | while (j==i): 17 | j = int(random.uniform(0,m)) 18 | return j 19 | 20 | def clipAlpha(aj,H,L): 21 | if aj > H: 22 | aj = H 23 | if L > aj: 24 | aj = L 25 | return aj 26 | 27 | def smoSimple(dataMatIn, classLabels, C, toler, maxIter): 28 | dataMatrix = mat(dataMatIn); labelMat = mat(classLabels).transpose() 29 | b = 0; m,n = shape(dataMatrix) 30 | alphas = mat(zeros((m,1))) 31 | iter = 0 32 | while (iter < maxIter): 33 | alphaPairsChanged = 0 34 | for i in range(m): 35 | fXi = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[i,:].T)) + b 36 | Ei = fXi - float(labelMat[i])#if checks if an example violates KKT conditions 37 | if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] > 0)): 38 | j = selectJrand(i,m) 39 | fXj = float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T)) + b 40 | Ej = fXj - float(labelMat[j]) 41 | alphaIold = alphas[i].copy(); alphaJold = alphas[j].copy(); 42 | if (labelMat[i] != labelMat[j]): 43 | L = max(0, alphas[j] - alphas[i]) 44 | H = min(C, C + alphas[j] - alphas[i]) 45 | else: 46 | L = max(0, alphas[j] + alphas[i] - C) 47 | H = min(C, alphas[j] + alphas[i]) 48 | # if L==H: print "L==H"; continue 49 | eta = 2.0 * dataMatrix[i,:]*dataMatrix[j,:].T - dataMatrix[i,:]*dataMatrix[i,:].T - dataMatrix[j,:]*dataMatrix[j,:].T 50 | if eta >= 0: print "eta>=0"; continue 51 | alphas[j] -= labelMat[j]*(Ei - Ej)/eta 52 | alphas[j] = clipAlpha(alphas[j],H,L) 53 | # if (abs(alphas[j] - alphaJold) < 0.00001): print "j not moving enough"; continue 54 | alphas[i] += labelMat[j]*labelMat[i]*(alphaJold - alphas[j])#update i by the same amount as j 55 | #the update is in the oppostie direction 56 | b1 = b - Ei- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T 57 | b2 = b - Ej- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T 58 | if (0 < alphas[i]) and (C > alphas[i]): b = b1 59 | elif (0 < alphas[j]) and (C > alphas[j]): b = b2 60 | else: b = (b1 + b2)/2.0 61 | alphaPairsChanged += 1 62 | # print "iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged) 63 | if (alphaPairsChanged == 0): iter += 1 64 | else: iter = 0 65 | # print "iteration number: %d" % iter 66 | 67 | return b,alphas 68 | 69 | def matplot(dataMat,lableMat): 70 | xcord1 = []; ycord1 = [] 71 | xcord2 = []; ycord2 = [] 72 | xcord3 = []; ycord3 = [] 73 | for i in range(100): 74 | if lableMat[i]==1: 75 | xcord1.append(dataMat[i][0]) 76 | ycord1.append(dataMat[i][1]) 77 | else: 78 | xcord2.append(dataMat[i][0]) 79 | ycord2.append(dataMat[i][1]) 80 | b,alphas=smoSimple(dataMat,labelMat,0.6,0.001,40) 81 | for j in range(100): 82 | if alphas[j]>0: 83 | xcord3.append(dataMat[j][0]) 84 | ycord3.append(dataMat[j][1]) 85 | fig = plt.figure() 86 | ax = fig.add_subplot(111) 87 | 88 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') 89 | ax.scatter(xcord2, ycord2, s=30, c='green') 90 | ax.scatter(xcord3, ycord3, s=80, c='blue') 91 | ax.plot() 92 | plt.xlabel('X1'); plt.ylabel('X2'); 93 | plt.show() 94 | 95 | if __name__=='__main__': 96 | dataMat,labelMat=loadDataSet('/Users/hakuri/Desktop/testSet.txt') 97 | # b,alphas=smoSimple(dataMat,labelMat,0.6,0.001,40) 98 | # print b,alphas[alphas>0] 99 | matplot(dataMat,labelMat) -------------------------------------------------------------------------------- /ShannonEnt/ShannonEnt/.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | ShannonEnt 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /ShannonEnt/ShannonEnt/.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Default 6 | python 2.7 7 | 8 | /ShannonEnt 9 | 10 | 11 | -------------------------------------------------------------------------------- /ShannonEnt/ShannonEnt/src/shannon.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | def calcShannonEnt(dataSet): 4 | numEntries=len(dataSet) 5 | 6 | labelCounts={} 7 | 8 | for featVec in dataSet: 9 | currentLabel=featVec[-1] 10 | 11 | if currentLabel not in labelCounts.keys(): 12 | labelCounts[currentLabel]=0 13 | labelCounts[currentLabel]+=1 14 | shannonEnt=0.0 15 | 16 | for key in labelCounts: 17 | 18 | prob =float(labelCounts[key])/numEntries 19 | shannonEnt-=prob*math.log(prob,2) 20 | 21 | return shannonEnt 22 | 23 | 24 | def createDataSet(): 25 | 26 | dataSet=[[1,0,'man'],[1,1,'man'],[0,1,'man'],[0,0,'women']] 27 | labels=['throat','mustache'] 28 | return dataSet,labels 29 | 30 | def splitDataSet(dataSet, axis, value): 31 | retDataSet = [] 32 | for featVec in dataSet: 33 | if featVec[axis] == value: 34 | reducedFeatVec = featVec[:axis] #chop out axis used for splitting 35 | reducedFeatVec.extend(featVec[axis+1:]) 36 | retDataSet.append(reducedFeatVec) 37 | return retDataSet 38 | 39 | def chooseBestFeatureToSplit(dataSet): 40 | numFeatures = len(dataSet[0]) - 1 #the last column is used for the labels 41 | baseEntropy = calcShannonEnt(dataSet) 42 | bestInfoGain = 0.0; bestFeature = -1 43 | for i in range(numFeatures): #iterate over all the features 44 | featList = [example[i] for example in dataSet]#create a list of all the examples of this feature 45 | 46 | uniqueVals = set(featList) #get a set of unique values 47 | 48 | newEntropy = 0.0 49 | for value in uniqueVals: 50 | subDataSet = splitDataSet(dataSet, i, value) 51 | prob = len(subDataSet)/float(len(dataSet)) 52 | newEntropy += prob * calcShannonEnt(subDataSet) 53 | infoGain = baseEntropy - newEntropy #calculate the info gain; ie reduction in entropy 54 | 55 | if (infoGain > bestInfoGain): #compare this to the best gain so far 56 | bestInfoGain = infoGain #if better than current best, set to best 57 | bestFeature = i 58 | return bestFeature #returns an integer 59 | 60 | 61 | 62 | def getResult(): 63 | dataSet,labels=createDataSet() 64 | # splitDataSet(dataSet,1,1) 65 | chooseBestFeatureToSplit(dataSet) 66 | print chooseBestFeatureToSplit(dataSet) 67 | #print calcShannonEnt(dataSet) 68 | 69 | 70 | if __name__=='__main__': 71 | 72 | 73 | getResult() 74 | 75 | --------------------------------------------------------------------------------