├── Dtree.py ├── KNN.py ├── KNNImplementation.py ├── MultipleLinearRegression1.py ├── MultipleLinearRegression2.py ├── PearsonCorrelationCoefficient.py ├── README.md ├── SVM1.py ├── SVM2.py ├── SimpleLinearRegression.py ├── dataset ├── abalone.txt ├── datingTestSet2.txt ├── email │ ├── ham │ │ ├── 1.txt │ │ ├── 10.txt │ │ ├── 11.txt │ │ ├── 12.txt │ │ ├── 13.txt │ │ ├── 14.txt │ │ ├── 15.txt │ │ ├── 16.txt │ │ ├── 17.txt │ │ ├── 18.txt │ │ ├── 19.txt │ │ ├── 2.txt │ │ ├── 20.txt │ │ ├── 21.txt │ │ ├── 22.txt │ │ ├── 23.txt │ │ ├── 24.txt │ │ ├── 25.txt │ │ ├── 3.txt │ │ ├── 4.txt │ │ ├── 5.txt │ │ ├── 6.txt │ │ ├── 7.txt │ │ ├── 8.txt │ │ └── 9.txt │ └── spam │ │ ├── 1.txt │ │ ├── 10.txt │ │ ├── 11.txt │ │ ├── 12.txt │ │ ├── 13.txt │ │ ├── 14.txt │ │ ├── 15.txt │ │ ├── 16.txt │ │ ├── 17.txt │ │ ├── 18.txt │ │ ├── 19.txt │ │ ├── 2.txt │ │ ├── 20.txt │ │ ├── 21.txt │ │ ├── 22.txt │ │ ├── 23.txt │ │ ├── 24.txt │ │ ├── 25.txt │ │ ├── 3.txt │ │ ├── 4.txt │ │ ├── 5.txt │ │ ├── 6.txt │ │ ├── 7.txt │ │ ├── 8.txt │ │ └── 9.txt ├── ex0.txt ├── lenses.txt └── logisticDataset.txt ├── excel ├── costfunc.xlsx ├── logistic.xlsx └── ml.xlsx ├── k-临近算法.md ├── pic ├── ML3STEP.png ├── X.png ├── bibao.png ├── bysp.png ├── bysres.png ├── costfunc.png ├── costlogis.png ├── daoshulogis.png ├── dx.png ├── featuremin.png ├── gdesc.png ├── hl.jpg ├── juzhen.png ├── knnI.jpg ├── localweight.jpg ├── localweightres.jpg ├── logisticfunc.png ├── ltt.jpg ├── ml1.png ├── ml2.png ├── multigradient.png ├── multivar.png ├── normalfunc.jpg ├── normalfunctd.jpg ├── regular1.png ├── regular2.png ├── resabalone.jpg ├── reslogistic.png ├── resregression.jpg ├── wenfa.png ├── wxt.jpg ├── xq.png ├── yuy.png └── yxt.jpg ├── pythonForNewsSina.py ├── pythonForTiebaPic.py ├── 决策树.md ├── 朴素贝叶斯.md ├── 机器学习笔记一--基本概念.md ├── 机器学习笔记三--多变量线性回归.md ├── 机器学习笔记二--单变量线性回归.md ├── 机器学习笔记五--正则化.md ├── 机器学习笔记四--逻辑回归.md ├── 线性回归.md ├── 自然语言处理一--基本概念理解.md ├── 自然语言处理二--4型文法与自动机.md └── 逻辑回归.md /Dtree.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_extraction import DictVectorizer 2 | from sklearn import preprocessing 3 | from sklearn import tree 4 | import csv 5 | AllElectronics=open('D:\daacheng\Python\PythonCode\machineLearning\AllElectronics.csv','rt')#打开csv文件 6 | readers=csv.reader(AllElectronics) 7 | headers=next(readers)#表头 ['RID', 'age', 'income', 'student', 'credit_rating', 'class_buys_computer'] 8 | featureList=[]#特征集合 9 | labelList=[]#标签集合 10 | for row in readers: 11 | rowDict={} #每一行数据以集合的形式存储,最后把这些集合存储在列表中 12 | labelList.append(row[len(row)-1]) 13 | for i in range(1,len(row)-1): 14 | #print(headers[i]) 15 | #print(row[i]) 16 | rowDict[headers[i]]=row[i] 17 | featureList.append(rowDict) 18 | vec=DictVectorizer() 19 | dummyX=vec.fit_transform(featureList).toarray()#转换,把特征转换成列表 20 | #names=vec.get_feature_names() 21 | #print(names) 22 | lb=preprocessing.LabelBinarizer() 23 | dummyY=lb.fit_transform(labelList)#对标签进行转换 24 | #dummyY 25 | clf=tree.DecisionTreeClassifier(criterion='entropy')#指定通过信息熵选择节点 26 | clf=clf.fit(dummyX,dummyY)#训练学习 27 | #制造一个新数据来进行预测 28 | oneRowX=dummyX[0,:] 29 | newRowX=[] 30 | newRowX.append(oneRowX) 31 | newRowX[0][0]=1 32 | newRowX[0][1]=0 33 | #print(newRowX[0]) 34 | #predictY1=clf.predict(oneRowX) 35 | predictY2=clf.predict(newRowX)#需要传入一个二维数组 36 | #print(predictY1) 37 | print(predictY2) -------------------------------------------------------------------------------- /KNN.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | from sklearn import neighbors 3 | knn=neighbors.KNeighborsClassifier()#创建分类器 4 | iris=datasets.load_iris() 5 | knn.fit(iris.data,iris.target)#训练分类器 6 | predictLabel=knn.predict([[ 6.2, 3.4, 5.4, 2.2]])#利用分类器做预测 7 | print(predictLabel) -------------------------------------------------------------------------------- /KNNImplementation.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import math 3 | import random 4 | 5 | 6 | # 从数据文件中获取训练集和测试集 7 | def loadDataset(filename, split, trainingSet=[], testSet=[]): 8 | with open(filename, 'r') as file: 9 | lines = csv.reader(file) 10 | data = list(lines) 11 | for x in range(len(data) - 1): 12 | for y in range(4): 13 | data[x][y] = float(data[x][y]) 14 | if random.random() < split: 15 | trainingSet.append(data[x]) 16 | else: 17 | testSet.append(data[x]) 18 | 19 | 20 | # 计算两个实例间的距离 21 | def getDistance(instance1, instance2, length): 22 | distance = 0 23 | for x in range(length): 24 | distance += math.pow(instance1[x] - instance2[x], 2) 25 | return math.sqrt(distance) 26 | 27 | 28 | # 获取测试集单个实例附近k范围的所有实例 29 | def getNeighbors(trainingSet, testInstance, k): 30 | distances = [] 31 | length = len(testInstance) - 1 32 | for x in range(len(trainingSet)): 33 | d = getDistance(trainingSet[x], testInstance, length) 34 | distances.append((trainingSet[x], d)) 35 | newDistances = sorted(distances, key=lambda x: x[1]) 36 | neighbors = [] 37 | for x in range(k): 38 | neighbors.append(newDistances[x][0]) 39 | return neighbors 40 | 41 | 42 | # 通过获得的K范围内所有实例,获得实例中最多的类别属于哪一类 43 | def getResponse(neighbors): 44 | classDict = {} # 定义一个字典用于统计每个类别的个数 45 | for x in range(len(neighbors)): 46 | response = neighbors[x][-1] 47 | if response in classDict: 48 | classDict[response] += 1 49 | else: 50 | classDict[response] = 1 51 | newClassDict = sorted(classDict.items(), key=lambda x: x[1], reverse=True) 52 | return newClassDict 53 | 54 | 55 | # 计算预测正确的概率 56 | def getAccuracy(testSet, predictions): 57 | correct = 0 58 | for x in range(len(testSet)): 59 | if testSet[x][-1] == predictions[x][0][0]: 60 | correct += 1 61 | return (correct / float(len(testSet))) 62 | 63 | 64 | def main(): 65 | trainingSet = [] 66 | testSet = [] 67 | split = 0.8 68 | predictions = [] 69 | loadDataset('D:\daacheng\Python\PythonCode\machineLearning\irisdata.txt', split, trainingSet, testSet) 70 | print('Train set: ' + repr(len(trainingSet))) 71 | print('Test set: ' + repr(len(testSet))) 72 | k = 5 73 | for x in range(len(testSet)): 74 | neighbors = getNeighbors(trainingSet, testSet[x], k) 75 | result = getResponse(neighbors) 76 | predictions.append(result) 77 | print(testSet) 78 | print('-----------------------------------------------') 79 | print(predictions) 80 | correct = getAccuracy(testSet, predictions) 81 | print(correct) 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /MultipleLinearRegression1.py: -------------------------------------------------------------------------------- 1 | from numpy import genfromtxt 2 | import numpy as np 3 | from sklearn import datasets,linear_model 4 | path=r'D:\daacheng\Python\PythonCode\machineLearning\Delivery.csv' 5 | data=genfromtxt(path,delimiter=',') 6 | print(data) 7 | x=data[:,:-1] 8 | y=data[:,-1] 9 | regr=linear_model.LinearRegression() 10 | regr.fit(x,y) 11 | #y=b0+b1*x1+b2*x2 12 | print(regr.coef_)#b1,b2 13 | print(regr.intercept_)#b0 14 | Xpred=[[102,6]] 15 | Ypred=regr.predict(Xpred)#预测 16 | print(Ypred) -------------------------------------------------------------------------------- /MultipleLinearRegression2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import datasets,linear_model 3 | from numpy import genfromtxt 4 | path=r'D:\daacheng\Python\PythonCode\machineLearning\Delivery_Dummy.csv' 5 | data=genfromtxt(path,delimiter=',') 6 | data=data[1:] 7 | x=data[:,:-1] 8 | y=data[:,-1] 9 | print(x) 10 | print(y) 11 | regr=linear_model.LinearRegression() 12 | regr.fit(x,y) 13 | print(regr.coef_)#b1,b2,b3,b4,b5 14 | print(regr.intercept_)#b0 -------------------------------------------------------------------------------- /PearsonCorrelationCoefficient.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | #皮尔逊相关系数 4 | def getPearson(x,y): 5 | xBar=np.mean(x) 6 | yBar=np.mean(y) 7 | fenzi=0 8 | fenmu=0 9 | x_2=0 10 | y_2=0 11 | for i in range(len(x)): 12 | x_=x[i]-xBar 13 | y_=y[i]-yBar 14 | fenzi+=x_*y_ 15 | x_2+=x_**2 16 | y_2+=y_**2 17 | fenmu=math.sqrt(x_2*y_2) 18 | return fenzi/fenmu 19 | 20 | #多元线性回归的R平方值(相关系数) degree参数只X的次方 21 | def polyfit(x,y,degree): 22 | result={} 23 | coeffs=np.polyfit(x,y,degree)#np的方法直接求得线性相关的系数[b0,b1……bn] 24 | result['polynomial']=coeffs.tolist() 25 | p=np.poly1d(coeffs)#p=2.657 x + 5.322 预估的直线 26 | yhat=p(x)#y的预估值 27 | y_=np.mean(y) 28 | ssr=np.sum((yhat-y_)**2) 29 | sst=np.sum((y-y_)**2) 30 | result['determination']=ssr/sst 31 | return result 32 | 33 | 34 | x=[1,3,8,7,9] 35 | y=[10,12,24,21,34] 36 | 37 | #p=getPearson(x,y) 38 | #print(p)#皮尔逊相关系数:衡量两个值线性相关强度的量 39 | #print(p**2)#简单线性回归的R平方值 决定系数 反应因变量的全部变异能通过回归关系被自变量解释的比例 40 | 41 | polyfit(x,y,1) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python学习机器学习学习笔记 2 | ## 一、机器学习算法理论与实战 3 | 1. [机器学习笔记一--基本概念](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E4%B8%80--%E5%9F%BA%E6%9C%AC%E6%A6%82%E5%BF%B5.md) 4 | 5 | 2. [机器学习笔记二--单变量线性回归](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E4%BA%8C--%E5%8D%95%E5%8F%98%E9%87%8F%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92.md) 6 | 7 | 3. [机器学习笔记三--多变量线性回归](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E4%B8%89--%E5%A4%9A%E5%8F%98%E9%87%8F%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92.md) 8 | 9 | 4. [机器学习笔记四--逻辑回归](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E5%9B%9B--%E9%80%BB%E8%BE%91%E5%9B%9E%E5%BD%92.md) 10 | 11 | 5. [机器学习笔记五--正则化](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E4%BA%94--%E6%AD%A3%E5%88%99%E5%8C%96.md) 12 | 13 | 6. [k-临近算法](https://github.com/daacheng/pythonForMachineLearning/blob/master/k-%E4%B8%B4%E8%BF%91%E7%AE%97%E6%B3%95.md) 14 | 15 | 7. [决策树](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E5%86%B3%E7%AD%96%E6%A0%91.md) 16 | 17 | 8. [朴素贝叶斯](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%B4%E7%B4%A0%E8%B4%9D%E5%8F%B6%E6%96%AF.md) 18 | 19 | 9. [逻辑回归](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E9%80%BB%E8%BE%91%E5%9B%9E%E5%BD%92.md) 20 | 21 | 10. [线性回归](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92.md) 22 | 23 | ## 二、自然语言处理(Natural Language Processing) 24 | 1. [自然语言处理一--基本概念理解](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86%E4%B8%80--%E5%9F%BA%E6%9C%AC%E6%A6%82%E5%BF%B5%E7%90%86%E8%A7%A3.md) 25 | 26 | 2. [自然语言处理二--4型文法与自动机概念](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86%E4%BA%8C--4%E5%9E%8B%E6%96%87%E6%B3%95%E4%B8%8E%E8%87%AA%E5%8A%A8%E6%9C%BA.md) 27 | -------------------------------------------------------------------------------- /SVM1.py: -------------------------------------------------------------------------------- 1 | from sklearn import svm 2 | x=[[2,0],[1,1],[2,3]]#特征向量 3 | y=[0,0,1]#特征对应的label 4 | clf=svm.SVC(kernel='linear') 5 | clf.fit(x,y) 6 | print(clf) 7 | print(clf.support_vectors_)#支持向量 8 | print(clf.support_)#支持向量在x中的索引 9 | print(clf.n_support_)#针对每个标签label找到了几个支持向量 10 | print(clf.predict([[2,0]]))#预测这个点属于哪一类标签label -------------------------------------------------------------------------------- /SVM2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pylab as pl 3 | from sklearn import svm 4 | np.random.seed(0)#每次运行程序时保证结果不变,随机值一样 5 | x=np.r_[np.random.randn(20,2)-[2,2],np.random.randn(20,2)+[2,2]]#生成特征点列表 6 | y=[0]*20+[1]*20#生成一个label数组,包括20个0,20个1 7 | clf=svm.SVC(kernel='linear') 8 | clf=clf.fit(x,y) 9 | #获取超平面y=ax+b, w0x+w1y+w2=0,y=(-w0/w1)x-w2/w1 10 | w=clf.coef_[0] 11 | a=-w[0]/w[1] 12 | xx=np.linspace(-5,5) 13 | yy=a*xx-clf.intercept_[0]/w[1] 14 | 15 | #获取与超平面平行的两条边界 16 | b = clf.support_vectors_[0] 17 | yy_down = a*xx + (b[1] - a*b[0]) 18 | b = clf.support_vectors_[-1] 19 | yy_up = a*xx + (b[1] - a*b[0]) 20 | 21 | #print "w: ", w 22 | #print "a: ", a 23 | 24 | # print "xx: ", xx 25 | # print "yy: ", yy 26 | #print "support_vectors_: ", clf.support_vectors_ 27 | #print "clf.coef_: ", clf.coef_ 28 | 29 | # switching to the generic n-dimensional parameterization of the hyperplan to the 2D-specific equation 30 | # of a line y=a.x +b: the generic w_0x + w_1y +w_3=0 can be rewritten y = -(w_0/w_1) x + (w_3/w_1) 31 | 32 | 33 | # plot the line, the points, and the nearest vectors to the plane 34 | pl.plot(xx, yy, 'k-') 35 | pl.plot(xx, yy_down, 'k--') 36 | pl.plot(xx, yy_up, 'k--') 37 | 38 | pl.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], 39 | s=80, facecolors='none') 40 | pl.scatter(x[:, 0], x[:, 1], c=y, cmap=pl.cm.Paired) 41 | 42 | pl.axis('tight') 43 | pl.show() -------------------------------------------------------------------------------- /SimpleLinearRegression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | x=[1,3,2,1,3] 3 | y=[14,24,18,17,27] 4 | def fitSLR(x,y): 5 | n=len(x) 6 | fenzi=0 7 | fenmu=0 8 | for i in range(n): 9 | fenzi+=(x[i]-np.mean(x))*(y[i]-np.mean(y))#分子 10 | fenmu+=(x[i]-np.mean(x))**2#分母 11 | b1=fenzi/fenmu 12 | b0=np.mean(y)-b1*np.mean(x) 13 | return b0,b1 14 | def predict(x,b0,b1): 15 | y=b0+b1*x 16 | return y 17 | b0,b1=fitSLR(x,y) 18 | print(b0,'###',b1) 19 | y1=predict(6,b0,b1) 20 | print(y1) -------------------------------------------------------------------------------- /dataset/datingTestSet2.txt: -------------------------------------------------------------------------------- 1 | 40920 8.326976 0.953952 3 2 | 14488 7.153469 1.673904 2 3 | 26052 1.441871 0.805124 1 4 | 75136 13.147394 0.428964 1 5 | 38344 1.669788 0.134296 1 6 | 72993 10.141740 1.032955 1 7 | 35948 6.830792 1.213192 3 8 | 42666 13.276369 0.543880 3 9 | 67497 8.631577 0.749278 1 10 | 35483 12.273169 1.508053 3 11 | 50242 3.723498 0.831917 1 12 | 63275 8.385879 1.669485 1 13 | 5569 4.875435 0.728658 2 14 | 51052 4.680098 0.625224 1 15 | 77372 15.299570 0.331351 1 16 | 43673 1.889461 0.191283 1 17 | 61364 7.516754 1.269164 1 18 | 69673 14.239195 0.261333 1 19 | 15669 0.000000 1.250185 2 20 | 28488 10.528555 1.304844 3 21 | 6487 3.540265 0.822483 2 22 | 37708 2.991551 0.833920 1 23 | 22620 5.297865 0.638306 2 24 | 28782 6.593803 0.187108 3 25 | 19739 2.816760 1.686209 2 26 | 36788 12.458258 0.649617 3 27 | 5741 0.000000 1.656418 2 28 | 28567 9.968648 0.731232 3 29 | 6808 1.364838 0.640103 2 30 | 41611 0.230453 1.151996 1 31 | 36661 11.865402 0.882810 3 32 | 43605 0.120460 1.352013 1 33 | 15360 8.545204 1.340429 3 34 | 63796 5.856649 0.160006 1 35 | 10743 9.665618 0.778626 2 36 | 70808 9.778763 1.084103 1 37 | 72011 4.932976 0.632026 1 38 | 5914 2.216246 0.587095 2 39 | 14851 14.305636 0.632317 3 40 | 33553 12.591889 0.686581 3 41 | 44952 3.424649 1.004504 1 42 | 17934 0.000000 0.147573 2 43 | 27738 8.533823 0.205324 3 44 | 29290 9.829528 0.238620 3 45 | 42330 11.492186 0.263499 3 46 | 36429 3.570968 0.832254 1 47 | 39623 1.771228 0.207612 1 48 | 32404 3.513921 0.991854 1 49 | 27268 4.398172 0.975024 1 50 | 5477 4.276823 1.174874 2 51 | 14254 5.946014 1.614244 2 52 | 68613 13.798970 0.724375 1 53 | 41539 10.393591 1.663724 3 54 | 7917 3.007577 0.297302 2 55 | 21331 1.031938 0.486174 2 56 | 8338 4.751212 0.064693 2 57 | 5176 3.692269 1.655113 2 58 | 18983 10.448091 0.267652 3 59 | 68837 10.585786 0.329557 1 60 | 13438 1.604501 0.069064 2 61 | 48849 3.679497 0.961466 1 62 | 12285 3.795146 0.696694 2 63 | 7826 2.531885 1.659173 2 64 | 5565 9.733340 0.977746 2 65 | 10346 6.093067 1.413798 2 66 | 1823 7.712960 1.054927 2 67 | 9744 11.470364 0.760461 3 68 | 16857 2.886529 0.934416 2 69 | 39336 10.054373 1.138351 3 70 | 65230 9.972470 0.881876 1 71 | 2463 2.335785 1.366145 2 72 | 27353 11.375155 1.528626 3 73 | 16191 0.000000 0.605619 2 74 | 12258 4.126787 0.357501 2 75 | 42377 6.319522 1.058602 1 76 | 25607 8.680527 0.086955 3 77 | 77450 14.856391 1.129823 1 78 | 58732 2.454285 0.222380 1 79 | 46426 7.292202 0.548607 3 80 | 32688 8.745137 0.857348 3 81 | 64890 8.579001 0.683048 1 82 | 8554 2.507302 0.869177 2 83 | 28861 11.415476 1.505466 3 84 | 42050 4.838540 1.680892 1 85 | 32193 10.339507 0.583646 3 86 | 64895 6.573742 1.151433 1 87 | 2355 6.539397 0.462065 2 88 | 0 2.209159 0.723567 2 89 | 70406 11.196378 0.836326 1 90 | 57399 4.229595 0.128253 1 91 | 41732 9.505944 0.005273 3 92 | 11429 8.652725 1.348934 3 93 | 75270 17.101108 0.490712 1 94 | 5459 7.871839 0.717662 2 95 | 73520 8.262131 1.361646 1 96 | 40279 9.015635 1.658555 3 97 | 21540 9.215351 0.806762 3 98 | 17694 6.375007 0.033678 2 99 | 22329 2.262014 1.022169 1 100 | 46570 5.677110 0.709469 1 101 | 42403 11.293017 0.207976 3 102 | 33654 6.590043 1.353117 1 103 | 9171 4.711960 0.194167 2 104 | 28122 8.768099 1.108041 3 105 | 34095 11.502519 0.545097 3 106 | 1774 4.682812 0.578112 2 107 | 40131 12.446578 0.300754 3 108 | 13994 12.908384 1.657722 3 109 | 77064 12.601108 0.974527 1 110 | 11210 3.929456 0.025466 2 111 | 6122 9.751503 1.182050 3 112 | 15341 3.043767 0.888168 2 113 | 44373 4.391522 0.807100 1 114 | 28454 11.695276 0.679015 3 115 | 63771 7.879742 0.154263 1 116 | 9217 5.613163 0.933632 2 117 | 69076 9.140172 0.851300 1 118 | 24489 4.258644 0.206892 1 119 | 16871 6.799831 1.221171 2 120 | 39776 8.752758 0.484418 3 121 | 5901 1.123033 1.180352 2 122 | 40987 10.833248 1.585426 3 123 | 7479 3.051618 0.026781 2 124 | 38768 5.308409 0.030683 3 125 | 4933 1.841792 0.028099 2 126 | 32311 2.261978 1.605603 1 127 | 26501 11.573696 1.061347 3 128 | 37433 8.038764 1.083910 3 129 | 23503 10.734007 0.103715 3 130 | 68607 9.661909 0.350772 1 131 | 27742 9.005850 0.548737 3 132 | 11303 0.000000 0.539131 2 133 | 0 5.757140 1.062373 2 134 | 32729 9.164656 1.624565 3 135 | 24619 1.318340 1.436243 1 136 | 42414 14.075597 0.695934 3 137 | 20210 10.107550 1.308398 3 138 | 33225 7.960293 1.219760 3 139 | 54483 6.317292 0.018209 1 140 | 18475 12.664194 0.595653 3 141 | 33926 2.906644 0.581657 1 142 | 43865 2.388241 0.913938 1 143 | 26547 6.024471 0.486215 3 144 | 44404 7.226764 1.255329 3 145 | 16674 4.183997 1.275290 2 146 | 8123 11.850211 1.096981 3 147 | 42747 11.661797 1.167935 3 148 | 56054 3.574967 0.494666 1 149 | 10933 0.000000 0.107475 2 150 | 18121 7.937657 0.904799 3 151 | 11272 3.365027 1.014085 2 152 | 16297 0.000000 0.367491 2 153 | 28168 13.860672 1.293270 3 154 | 40963 10.306714 1.211594 3 155 | 31685 7.228002 0.670670 3 156 | 55164 4.508740 1.036192 1 157 | 17595 0.366328 0.163652 2 158 | 1862 3.299444 0.575152 2 159 | 57087 0.573287 0.607915 1 160 | 63082 9.183738 0.012280 1 161 | 51213 7.842646 1.060636 3 162 | 6487 4.750964 0.558240 2 163 | 4805 11.438702 1.556334 3 164 | 30302 8.243063 1.122768 3 165 | 68680 7.949017 0.271865 1 166 | 17591 7.875477 0.227085 2 167 | 74391 9.569087 0.364856 1 168 | 37217 7.750103 0.869094 3 169 | 42814 0.000000 1.515293 1 170 | 14738 3.396030 0.633977 2 171 | 19896 11.916091 0.025294 3 172 | 14673 0.460758 0.689586 2 173 | 32011 13.087566 0.476002 3 174 | 58736 4.589016 1.672600 1 175 | 54744 8.397217 1.534103 1 176 | 29482 5.562772 1.689388 1 177 | 27698 10.905159 0.619091 3 178 | 11443 1.311441 1.169887 2 179 | 56117 10.647170 0.980141 3 180 | 39514 0.000000 0.481918 1 181 | 26627 8.503025 0.830861 3 182 | 16525 0.436880 1.395314 2 183 | 24368 6.127867 1.102179 1 184 | 22160 12.112492 0.359680 3 185 | 6030 1.264968 1.141582 2 186 | 6468 6.067568 1.327047 2 187 | 22945 8.010964 1.681648 3 188 | 18520 3.791084 0.304072 2 189 | 34914 11.773195 1.262621 3 190 | 6121 8.339588 1.443357 2 191 | 38063 2.563092 1.464013 1 192 | 23410 5.954216 0.953782 1 193 | 35073 9.288374 0.767318 3 194 | 52914 3.976796 1.043109 1 195 | 16801 8.585227 1.455708 3 196 | 9533 1.271946 0.796506 2 197 | 16721 0.000000 0.242778 2 198 | 5832 0.000000 0.089749 2 199 | 44591 11.521298 0.300860 3 200 | 10143 1.139447 0.415373 2 201 | 21609 5.699090 1.391892 2 202 | 23817 2.449378 1.322560 1 203 | 15640 0.000000 1.228380 2 204 | 8847 3.168365 0.053993 2 205 | 50939 10.428610 1.126257 3 206 | 28521 2.943070 1.446816 1 207 | 32901 10.441348 0.975283 3 208 | 42850 12.478764 1.628726 3 209 | 13499 5.856902 0.363883 2 210 | 40345 2.476420 0.096075 1 211 | 43547 1.826637 0.811457 1 212 | 70758 4.324451 0.328235 1 213 | 19780 1.376085 1.178359 2 214 | 44484 5.342462 0.394527 1 215 | 54462 11.835521 0.693301 3 216 | 20085 12.423687 1.424264 3 217 | 42291 12.161273 0.071131 3 218 | 47550 8.148360 1.649194 3 219 | 11938 1.531067 1.549756 2 220 | 40699 3.200912 0.309679 1 221 | 70908 8.862691 0.530506 1 222 | 73989 6.370551 0.369350 1 223 | 11872 2.468841 0.145060 2 224 | 48463 11.054212 0.141508 3 225 | 15987 2.037080 0.715243 2 226 | 70036 13.364030 0.549972 1 227 | 32967 10.249135 0.192735 3 228 | 63249 10.464252 1.669767 1 229 | 42795 9.424574 0.013725 3 230 | 14459 4.458902 0.268444 2 231 | 19973 0.000000 0.575976 2 232 | 5494 9.686082 1.029808 3 233 | 67902 13.649402 1.052618 1 234 | 25621 13.181148 0.273014 3 235 | 27545 3.877472 0.401600 1 236 | 58656 1.413952 0.451380 1 237 | 7327 4.248986 1.430249 2 238 | 64555 8.779183 0.845947 1 239 | 8998 4.156252 0.097109 2 240 | 11752 5.580018 0.158401 2 241 | 76319 15.040440 1.366898 1 242 | 27665 12.793870 1.307323 3 243 | 67417 3.254877 0.669546 1 244 | 21808 10.725607 0.588588 3 245 | 15326 8.256473 0.765891 2 246 | 20057 8.033892 1.618562 3 247 | 79341 10.702532 0.204792 1 248 | 15636 5.062996 1.132555 2 249 | 35602 10.772286 0.668721 3 250 | 28544 1.892354 0.837028 1 251 | 57663 1.019966 0.372320 1 252 | 78727 15.546043 0.729742 1 253 | 68255 11.638205 0.409125 1 254 | 14964 3.427886 0.975616 2 255 | 21835 11.246174 1.475586 3 256 | 7487 0.000000 0.645045 2 257 | 8700 0.000000 1.424017 2 258 | 26226 8.242553 0.279069 3 259 | 65899 8.700060 0.101807 1 260 | 6543 0.812344 0.260334 2 261 | 46556 2.448235 1.176829 1 262 | 71038 13.230078 0.616147 1 263 | 47657 0.236133 0.340840 1 264 | 19600 11.155826 0.335131 3 265 | 37422 11.029636 0.505769 3 266 | 1363 2.901181 1.646633 2 267 | 26535 3.924594 1.143120 1 268 | 47707 2.524806 1.292848 1 269 | 38055 3.527474 1.449158 1 270 | 6286 3.384281 0.889268 2 271 | 10747 0.000000 1.107592 2 272 | 44883 11.898890 0.406441 3 273 | 56823 3.529892 1.375844 1 274 | 68086 11.442677 0.696919 1 275 | 70242 10.308145 0.422722 1 276 | 11409 8.540529 0.727373 2 277 | 67671 7.156949 1.691682 1 278 | 61238 0.720675 0.847574 1 279 | 17774 0.229405 1.038603 2 280 | 53376 3.399331 0.077501 1 281 | 30930 6.157239 0.580133 1 282 | 28987 1.239698 0.719989 1 283 | 13655 6.036854 0.016548 2 284 | 7227 5.258665 0.933722 2 285 | 40409 12.393001 1.571281 3 286 | 13605 9.627613 0.935842 2 287 | 26400 11.130453 0.597610 3 288 | 13491 8.842595 0.349768 3 289 | 30232 10.690010 1.456595 3 290 | 43253 5.714718 1.674780 3 291 | 55536 3.052505 1.335804 1 292 | 8807 0.000000 0.059025 2 293 | 25783 9.945307 1.287952 3 294 | 22812 2.719723 1.142148 1 295 | 77826 11.154055 1.608486 1 296 | 38172 2.687918 0.660836 1 297 | 31676 10.037847 0.962245 3 298 | 74038 12.404762 1.112080 1 299 | 44738 10.237305 0.633422 3 300 | 17410 4.745392 0.662520 2 301 | 5688 4.639461 1.569431 2 302 | 36642 3.149310 0.639669 1 303 | 29956 13.406875 1.639194 3 304 | 60350 6.068668 0.881241 1 305 | 23758 9.477022 0.899002 3 306 | 25780 3.897620 0.560201 2 307 | 11342 5.463615 1.203677 2 308 | 36109 3.369267 1.575043 1 309 | 14292 5.234562 0.825954 2 310 | 11160 0.000000 0.722170 2 311 | 23762 12.979069 0.504068 3 312 | 39567 5.376564 0.557476 1 313 | 25647 13.527910 1.586732 3 314 | 14814 2.196889 0.784587 2 315 | 73590 10.691748 0.007509 1 316 | 35187 1.659242 0.447066 1 317 | 49459 8.369667 0.656697 3 318 | 31657 13.157197 0.143248 3 319 | 6259 8.199667 0.908508 2 320 | 33101 4.441669 0.439381 3 321 | 27107 9.846492 0.644523 3 322 | 17824 0.019540 0.977949 2 323 | 43536 8.253774 0.748700 3 324 | 67705 6.038620 1.509646 1 325 | 35283 6.091587 1.694641 3 326 | 71308 8.986820 1.225165 1 327 | 31054 11.508473 1.624296 3 328 | 52387 8.807734 0.713922 3 329 | 40328 0.000000 0.816676 1 330 | 34844 8.889202 1.665414 3 331 | 11607 3.178117 0.542752 2 332 | 64306 7.013795 0.139909 1 333 | 32721 9.605014 0.065254 3 334 | 33170 1.230540 1.331674 1 335 | 37192 10.412811 0.890803 3 336 | 13089 0.000000 0.567161 2 337 | 66491 9.699991 0.122011 1 338 | 15941 0.000000 0.061191 2 339 | 4272 4.455293 0.272135 2 340 | 48812 3.020977 1.502803 1 341 | 28818 8.099278 0.216317 3 342 | 35394 1.157764 1.603217 1 343 | 71791 10.105396 0.121067 1 344 | 40668 11.230148 0.408603 3 345 | 39580 9.070058 0.011379 3 346 | 11786 0.566460 0.478837 2 347 | 19251 0.000000 0.487300 2 348 | 56594 8.956369 1.193484 3 349 | 54495 1.523057 0.620528 1 350 | 11844 2.749006 0.169855 2 351 | 45465 9.235393 0.188350 3 352 | 31033 10.555573 0.403927 3 353 | 16633 6.956372 1.519308 2 354 | 13887 0.636281 1.273984 2 355 | 52603 3.574737 0.075163 1 356 | 72000 9.032486 1.461809 1 357 | 68497 5.958993 0.023012 1 358 | 35135 2.435300 1.211744 1 359 | 26397 10.539731 1.638248 3 360 | 7313 7.646702 0.056513 2 361 | 91273 20.919349 0.644571 1 362 | 24743 1.424726 0.838447 1 363 | 31690 6.748663 0.890223 3 364 | 15432 2.289167 0.114881 2 365 | 58394 5.548377 0.402238 1 366 | 33962 6.057227 0.432666 1 367 | 31442 10.828595 0.559955 3 368 | 31044 11.318160 0.271094 3 369 | 29938 13.265311 0.633903 3 370 | 9875 0.000000 1.496715 2 371 | 51542 6.517133 0.402519 3 372 | 11878 4.934374 1.520028 2 373 | 69241 10.151738 0.896433 1 374 | 37776 2.425781 1.559467 1 375 | 68997 9.778962 1.195498 1 376 | 67416 12.219950 0.657677 1 377 | 59225 7.394151 0.954434 1 378 | 29138 8.518535 0.742546 3 379 | 5962 2.798700 0.662632 2 380 | 10847 0.637930 0.617373 2 381 | 70527 10.750490 0.097415 1 382 | 9610 0.625382 0.140969 2 383 | 64734 10.027968 0.282787 1 384 | 25941 9.817347 0.364197 3 385 | 2763 0.646828 1.266069 2 386 | 55601 3.347111 0.914294 1 387 | 31128 11.816892 0.193798 3 388 | 5181 0.000000 1.480198 2 389 | 69982 10.945666 0.993219 1 390 | 52440 10.244706 0.280539 3 391 | 57350 2.579801 1.149172 1 392 | 57869 2.630410 0.098869 1 393 | 56557 11.746200 1.695517 3 394 | 42342 8.104232 1.326277 3 395 | 15560 12.409743 0.790295 3 396 | 34826 12.167844 1.328086 3 397 | 8569 3.198408 0.299287 2 398 | 77623 16.055513 0.541052 1 399 | 78184 7.138659 0.158481 1 400 | 7036 4.831041 0.761419 2 401 | 69616 10.082890 1.373611 1 402 | 21546 10.066867 0.788470 3 403 | 36715 8.129538 0.329913 3 404 | 20522 3.012463 1.138108 2 405 | 42349 3.720391 0.845974 1 406 | 9037 0.773493 1.148256 2 407 | 26728 10.962941 1.037324 3 408 | 587 0.177621 0.162614 2 409 | 48915 3.085853 0.967899 1 410 | 9824 8.426781 0.202558 2 411 | 4135 1.825927 1.128347 2 412 | 9666 2.185155 1.010173 2 413 | 59333 7.184595 1.261338 1 414 | 36198 0.000000 0.116525 1 415 | 34909 8.901752 1.033527 3 416 | 47516 2.451497 1.358795 1 417 | 55807 3.213631 0.432044 1 418 | 14036 3.974739 0.723929 2 419 | 42856 9.601306 0.619232 3 420 | 64007 8.363897 0.445341 1 421 | 59428 6.381484 1.365019 1 422 | 13730 0.000000 1.403914 2 423 | 41740 9.609836 1.438105 3 424 | 63546 9.904741 0.985862 1 425 | 30417 7.185807 1.489102 3 426 | 69636 5.466703 1.216571 1 427 | 64660 0.000000 0.915898 1 428 | 14883 4.575443 0.535671 2 429 | 7965 3.277076 1.010868 2 430 | 68620 10.246623 1.239634 1 431 | 8738 2.341735 1.060235 2 432 | 7544 3.201046 0.498843 2 433 | 6377 6.066013 0.120927 2 434 | 36842 8.829379 0.895657 3 435 | 81046 15.833048 1.568245 1 436 | 67736 13.516711 1.220153 1 437 | 32492 0.664284 1.116755 1 438 | 39299 6.325139 0.605109 3 439 | 77289 8.677499 0.344373 1 440 | 33835 8.188005 0.964896 3 441 | 71890 9.414263 0.384030 1 442 | 32054 9.196547 1.138253 3 443 | 38579 10.202968 0.452363 3 444 | 55984 2.119439 1.481661 1 445 | 72694 13.635078 0.858314 1 446 | 42299 0.083443 0.701669 1 447 | 26635 9.149096 1.051446 3 448 | 8579 1.933803 1.374388 2 449 | 37302 14.115544 0.676198 3 450 | 22878 8.933736 0.943352 3 451 | 4364 2.661254 0.946117 2 452 | 4985 0.988432 1.305027 2 453 | 37068 2.063741 1.125946 1 454 | 41137 2.220590 0.690754 1 455 | 67759 6.424849 0.806641 1 456 | 11831 1.156153 1.613674 2 457 | 34502 3.032720 0.601847 1 458 | 4088 3.076828 0.952089 2 459 | 15199 0.000000 0.318105 2 460 | 17309 7.750480 0.554015 3 461 | 42816 10.958135 1.482500 3 462 | 43751 10.222018 0.488678 3 463 | 58335 2.367988 0.435741 1 464 | 75039 7.686054 1.381455 1 465 | 42878 11.464879 1.481589 3 466 | 42770 11.075735 0.089726 3 467 | 8848 3.543989 0.345853 2 468 | 31340 8.123889 1.282880 3 469 | 41413 4.331769 0.754467 3 470 | 12731 0.120865 1.211961 2 471 | 22447 6.116109 0.701523 3 472 | 33564 7.474534 0.505790 3 473 | 48907 8.819454 0.649292 3 474 | 8762 6.802144 0.615284 2 475 | 46696 12.666325 0.931960 3 476 | 36851 8.636180 0.399333 3 477 | 67639 11.730991 1.289833 1 478 | 171 8.132449 0.039062 2 479 | 26674 10.296589 1.496144 3 480 | 8739 7.583906 1.005764 2 481 | 66668 9.777806 0.496377 1 482 | 68732 8.833546 0.513876 1 483 | 69995 4.907899 1.518036 1 484 | 82008 8.362736 1.285939 1 485 | 25054 9.084726 1.606312 3 486 | 33085 14.164141 0.560970 3 487 | 41379 9.080683 0.989920 3 488 | 39417 6.522767 0.038548 3 489 | 12556 3.690342 0.462281 2 490 | 39432 3.563706 0.242019 1 491 | 38010 1.065870 1.141569 1 492 | 69306 6.683796 1.456317 1 493 | 38000 1.712874 0.243945 1 494 | 46321 13.109929 1.280111 3 495 | 66293 11.327910 0.780977 1 496 | 22730 4.545711 1.233254 1 497 | 5952 3.367889 0.468104 2 498 | 72308 8.326224 0.567347 1 499 | 60338 8.978339 1.442034 1 500 | 13301 5.655826 1.582159 2 501 | 27884 8.855312 0.570684 3 502 | 11188 6.649568 0.544233 2 503 | 56796 3.966325 0.850410 1 504 | 8571 1.924045 1.664782 2 505 | 4914 6.004812 0.280369 2 506 | 10784 0.000000 0.375849 2 507 | 39296 9.923018 0.092192 3 508 | 13113 2.389084 0.119284 2 509 | 70204 13.663189 0.133251 1 510 | 46813 11.434976 0.321216 3 511 | 11697 0.358270 1.292858 2 512 | 44183 9.598873 0.223524 3 513 | 2225 6.375275 0.608040 2 514 | 29066 11.580532 0.458401 3 515 | 4245 5.319324 1.598070 2 516 | 34379 4.324031 1.603481 1 517 | 44441 2.358370 1.273204 1 518 | 2022 0.000000 1.182708 2 519 | 26866 12.824376 0.890411 3 520 | 57070 1.587247 1.456982 1 521 | 32932 8.510324 1.520683 3 522 | 51967 10.428884 1.187734 3 523 | 44432 8.346618 0.042318 3 524 | 67066 7.541444 0.809226 1 525 | 17262 2.540946 1.583286 2 526 | 79728 9.473047 0.692513 1 527 | 14259 0.352284 0.474080 2 528 | 6122 0.000000 0.589826 2 529 | 76879 12.405171 0.567201 1 530 | 11426 4.126775 0.871452 2 531 | 2493 0.034087 0.335848 2 532 | 19910 1.177634 0.075106 2 533 | 10939 0.000000 0.479996 2 534 | 17716 0.994909 0.611135 2 535 | 31390 11.053664 1.180117 3 536 | 20375 0.000000 1.679729 2 537 | 26309 2.495011 1.459589 1 538 | 33484 11.516831 0.001156 3 539 | 45944 9.213215 0.797743 3 540 | 4249 5.332865 0.109288 2 541 | 6089 0.000000 1.689771 2 542 | 7513 0.000000 1.126053 2 543 | 27862 12.640062 1.690903 3 544 | 39038 2.693142 1.317518 1 545 | 19218 3.328969 0.268271 2 546 | 62911 7.193166 1.117456 1 547 | 77758 6.615512 1.521012 1 548 | 27940 8.000567 0.835341 3 549 | 2194 4.017541 0.512104 2 550 | 37072 13.245859 0.927465 3 551 | 15585 5.970616 0.813624 2 552 | 25577 11.668719 0.886902 3 553 | 8777 4.283237 1.272728 2 554 | 29016 10.742963 0.971401 3 555 | 21910 12.326672 1.592608 3 556 | 12916 0.000000 0.344622 2 557 | 10976 0.000000 0.922846 2 558 | 79065 10.602095 0.573686 1 559 | 36759 10.861859 1.155054 3 560 | 50011 1.229094 1.638690 1 561 | 1155 0.410392 1.313401 2 562 | 71600 14.552711 0.616162 1 563 | 30817 14.178043 0.616313 3 564 | 54559 14.136260 0.362388 1 565 | 29764 0.093534 1.207194 1 566 | 69100 10.929021 0.403110 1 567 | 47324 11.432919 0.825959 3 568 | 73199 9.134527 0.586846 1 569 | 44461 5.071432 1.421420 1 570 | 45617 11.460254 1.541749 3 571 | 28221 11.620039 1.103553 3 572 | 7091 4.022079 0.207307 2 573 | 6110 3.057842 1.631262 2 574 | 79016 7.782169 0.404385 1 575 | 18289 7.981741 0.929789 3 576 | 43679 4.601363 0.268326 1 577 | 22075 2.595564 1.115375 1 578 | 23535 10.049077 0.391045 3 579 | 25301 3.265444 1.572970 2 580 | 32256 11.780282 1.511014 3 581 | 36951 3.075975 0.286284 1 582 | 31290 1.795307 0.194343 1 583 | 38953 11.106979 0.202415 3 584 | 35257 5.994413 0.800021 1 585 | 25847 9.706062 1.012182 3 586 | 32680 10.582992 0.836025 3 587 | 62018 7.038266 1.458979 1 588 | 9074 0.023771 0.015314 2 589 | 33004 12.823982 0.676371 3 590 | 44588 3.617770 0.493483 1 591 | 32565 8.346684 0.253317 3 592 | 38563 6.104317 0.099207 1 593 | 75668 16.207776 0.584973 1 594 | 9069 6.401969 1.691873 2 595 | 53395 2.298696 0.559757 1 596 | 28631 7.661515 0.055981 3 597 | 71036 6.353608 1.645301 1 598 | 71142 10.442780 0.335870 1 599 | 37653 3.834509 1.346121 1 600 | 76839 10.998587 0.584555 1 601 | 9916 2.695935 1.512111 2 602 | 38889 3.356646 0.324230 1 603 | 39075 14.677836 0.793183 3 604 | 48071 1.551934 0.130902 1 605 | 7275 2.464739 0.223502 2 606 | 41804 1.533216 1.007481 1 607 | 35665 12.473921 0.162910 3 608 | 67956 6.491596 0.032576 1 609 | 41892 10.506276 1.510747 3 610 | 38844 4.380388 0.748506 1 611 | 74197 13.670988 1.687944 1 612 | 14201 8.317599 0.390409 2 613 | 3908 0.000000 0.556245 2 614 | 2459 0.000000 0.290218 2 615 | 32027 10.095799 1.188148 3 616 | 12870 0.860695 1.482632 2 617 | 9880 1.557564 0.711278 2 618 | 72784 10.072779 0.756030 1 619 | 17521 0.000000 0.431468 2 620 | 50283 7.140817 0.883813 3 621 | 33536 11.384548 1.438307 3 622 | 9452 3.214568 1.083536 2 623 | 37457 11.720655 0.301636 3 624 | 17724 6.374475 1.475925 3 625 | 43869 5.749684 0.198875 3 626 | 264 3.871808 0.552602 2 627 | 25736 8.336309 0.636238 3 628 | 39584 9.710442 1.503735 3 629 | 31246 1.532611 1.433898 1 630 | 49567 9.785785 0.984614 3 631 | 7052 2.633627 1.097866 2 632 | 35493 9.238935 0.494701 3 633 | 10986 1.205656 1.398803 2 634 | 49508 3.124909 1.670121 1 635 | 5734 7.935489 1.585044 2 636 | 65479 12.746636 1.560352 1 637 | 77268 10.732563 0.545321 1 638 | 28490 3.977403 0.766103 1 639 | 13546 4.194426 0.450663 2 640 | 37166 9.610286 0.142912 3 641 | 16381 4.797555 1.260455 2 642 | 10848 1.615279 0.093002 2 643 | 35405 4.614771 1.027105 1 644 | 15917 0.000000 1.369726 2 645 | 6131 0.608457 0.512220 2 646 | 67432 6.558239 0.667579 1 647 | 30354 12.315116 0.197068 3 648 | 69696 7.014973 1.494616 1 649 | 33481 8.822304 1.194177 3 650 | 43075 10.086796 0.570455 3 651 | 38343 7.241614 1.661627 3 652 | 14318 4.602395 1.511768 2 653 | 5367 7.434921 0.079792 2 654 | 37894 10.467570 1.595418 3 655 | 36172 9.948127 0.003663 3 656 | 40123 2.478529 1.568987 1 657 | 10976 5.938545 0.878540 2 658 | 12705 0.000000 0.948004 2 659 | 12495 5.559181 1.357926 2 660 | 35681 9.776654 0.535966 3 661 | 46202 3.092056 0.490906 1 662 | 11505 0.000000 1.623311 2 663 | 22834 4.459495 0.538867 1 664 | 49901 8.334306 1.646600 3 665 | 71932 11.226654 0.384686 1 666 | 13279 3.904737 1.597294 2 667 | 49112 7.038205 1.211329 3 668 | 77129 9.836120 1.054340 1 669 | 37447 1.990976 0.378081 1 670 | 62397 9.005302 0.485385 1 671 | 0 1.772510 1.039873 2 672 | 15476 0.458674 0.819560 2 673 | 40625 10.003919 0.231658 3 674 | 36706 0.520807 1.476008 1 675 | 28580 10.678214 1.431837 3 676 | 25862 4.425992 1.363842 1 677 | 63488 12.035355 0.831222 1 678 | 33944 10.606732 1.253858 3 679 | 30099 1.568653 0.684264 1 680 | 13725 2.545434 0.024271 2 681 | 36768 10.264062 0.982593 3 682 | 64656 9.866276 0.685218 1 683 | 14927 0.142704 0.057455 2 684 | 43231 9.853270 1.521432 3 685 | 66087 6.596604 1.653574 1 686 | 19806 2.602287 1.321481 2 687 | 41081 10.411776 0.664168 3 688 | 10277 7.083449 0.622589 2 689 | 7014 2.080068 1.254441 2 690 | 17275 0.522844 1.622458 2 691 | 31600 10.362000 1.544827 3 692 | 59956 3.412967 1.035410 1 693 | 42181 6.796548 1.112153 3 694 | 51743 4.092035 0.075804 1 695 | 5194 2.763811 1.564325 2 696 | 30832 12.547439 1.402443 3 697 | 7976 5.708052 1.596152 2 698 | 14602 4.558025 0.375806 2 699 | 41571 11.642307 0.438553 3 700 | 55028 3.222443 0.121399 1 701 | 5837 4.736156 0.029871 2 702 | 39808 10.839526 0.836323 3 703 | 20944 4.194791 0.235483 2 704 | 22146 14.936259 0.888582 3 705 | 42169 3.310699 1.521855 1 706 | 7010 2.971931 0.034321 2 707 | 3807 9.261667 0.537807 2 708 | 29241 7.791833 1.111416 3 709 | 52696 1.480470 1.028750 1 710 | 42545 3.677287 0.244167 1 711 | 24437 2.202967 1.370399 1 712 | 16037 5.796735 0.935893 2 713 | 8493 3.063333 0.144089 2 714 | 68080 11.233094 0.492487 1 715 | 59016 1.965570 0.005697 1 716 | 11810 8.616719 0.137419 2 717 | 68630 6.609989 1.083505 1 718 | 7629 1.712639 1.086297 2 719 | 71992 10.117445 1.299319 1 720 | 13398 0.000000 1.104178 2 721 | 26241 9.824777 1.346821 3 722 | 11160 1.653089 0.980949 2 723 | 76701 18.178822 1.473671 1 724 | 32174 6.781126 0.885340 3 725 | 45043 8.206750 1.549223 3 726 | 42173 10.081853 1.376745 3 727 | 69801 6.288742 0.112799 1 728 | 41737 3.695937 1.543589 1 729 | 46979 6.726151 1.069380 3 730 | 79267 12.969999 1.568223 1 731 | 4615 2.661390 1.531933 2 732 | 32907 7.072764 1.117386 3 733 | 37444 9.123366 1.318988 3 734 | 569 3.743946 1.039546 2 735 | 8723 2.341300 0.219361 2 736 | 6024 0.541913 0.592348 2 737 | 52252 2.310828 1.436753 1 738 | 8358 6.226597 1.427316 2 739 | 26166 7.277876 0.489252 3 740 | 18471 0.000000 0.389459 2 741 | 3386 7.218221 1.098828 2 742 | 41544 8.777129 1.111464 3 743 | 10480 2.813428 0.819419 2 744 | 5894 2.268766 1.412130 2 745 | 7273 6.283627 0.571292 2 746 | 22272 7.520081 1.626868 3 747 | 31369 11.739225 0.027138 3 748 | 10708 3.746883 0.877350 2 749 | 69364 12.089835 0.521631 1 750 | 37760 12.310404 0.259339 3 751 | 13004 0.000000 0.671355 2 752 | 37885 2.728800 0.331502 1 753 | 52555 10.814342 0.607652 3 754 | 38997 12.170268 0.844205 3 755 | 69698 6.698371 0.240084 1 756 | 11783 3.632672 1.643479 2 757 | 47636 10.059991 0.892361 3 758 | 15744 1.887674 0.756162 2 759 | 69058 8.229125 0.195886 1 760 | 33057 7.817082 0.476102 3 761 | 28681 12.277230 0.076805 3 762 | 34042 10.055337 1.115778 3 763 | 29928 3.596002 1.485952 1 764 | 9734 2.755530 1.420655 2 765 | 7344 7.780991 0.513048 2 766 | 7387 0.093705 0.391834 2 767 | 33957 8.481567 0.520078 3 768 | 9936 3.865584 0.110062 2 769 | 36094 9.683709 0.779984 3 770 | 39835 10.617255 1.359970 3 771 | 64486 7.203216 1.624762 1 772 | 0 7.601414 1.215605 2 773 | 39539 1.386107 1.417070 1 774 | 66972 9.129253 0.594089 1 775 | 15029 1.363447 0.620841 2 776 | 44909 3.181399 0.359329 1 777 | 38183 13.365414 0.217011 3 778 | 37372 4.207717 1.289767 1 779 | 0 4.088395 0.870075 2 780 | 17786 3.327371 1.142505 2 781 | 39055 1.303323 1.235650 1 782 | 37045 7.999279 1.581763 3 783 | 6435 2.217488 0.864536 2 784 | 72265 7.751808 0.192451 1 785 | 28152 14.149305 1.591532 3 786 | 25931 8.765721 0.152808 3 787 | 7538 3.408996 0.184896 2 788 | 1315 1.251021 0.112340 2 789 | 12292 6.160619 1.537165 2 790 | 49248 1.034538 1.585162 1 791 | 9025 0.000000 1.034635 2 792 | 13438 2.355051 0.542603 2 793 | 69683 6.614543 0.153771 1 794 | 25374 10.245062 1.450903 3 795 | 55264 3.467074 1.231019 1 796 | 38324 7.487678 1.572293 3 797 | 69643 4.624115 1.185192 1 798 | 44058 8.995957 1.436479 3 799 | 41316 11.564476 0.007195 3 800 | 29119 3.440948 0.078331 1 801 | 51656 1.673603 0.732746 1 802 | 3030 4.719341 0.699755 2 803 | 35695 10.304798 1.576488 3 804 | 1537 2.086915 1.199312 2 805 | 9083 6.338220 1.131305 2 806 | 47744 8.254926 0.710694 3 807 | 71372 16.067108 0.974142 1 808 | 37980 1.723201 0.310488 1 809 | 42385 3.785045 0.876904 1 810 | 22687 2.557561 0.123738 1 811 | 39512 9.852220 1.095171 3 812 | 11885 3.679147 1.557205 2 813 | 4944 9.789681 0.852971 2 814 | 73230 14.958998 0.526707 1 815 | 17585 11.182148 1.288459 3 816 | 68737 7.528533 1.657487 1 817 | 13818 5.253802 1.378603 2 818 | 31662 13.946752 1.426657 3 819 | 86686 15.557263 1.430029 1 820 | 43214 12.483550 0.688513 3 821 | 24091 2.317302 1.411137 1 822 | 52544 10.069724 0.766119 3 823 | 61861 5.792231 1.615483 1 824 | 47903 4.138435 0.475994 1 825 | 37190 12.929517 0.304378 3 826 | 6013 9.378238 0.307392 2 827 | 27223 8.361362 1.643204 3 828 | 69027 7.939406 1.325042 1 829 | 78642 10.735384 0.705788 1 830 | 30254 11.592723 0.286188 3 831 | 21704 10.098356 0.704748 3 832 | 34985 9.299025 0.545337 3 833 | 31316 11.158297 0.218067 3 834 | 76368 16.143900 0.558388 1 835 | 27953 10.971700 1.221787 3 836 | 152 0.000000 0.681478 2 837 | 9146 3.178961 1.292692 2 838 | 75346 17.625350 0.339926 1 839 | 26376 1.995833 0.267826 1 840 | 35255 10.640467 0.416181 3 841 | 19198 9.628339 0.985462 3 842 | 12518 4.662664 0.495403 2 843 | 25453 5.754047 1.382742 2 844 | 12530 0.000000 0.037146 2 845 | 62230 9.334332 0.198118 1 846 | 9517 3.846162 0.619968 2 847 | 71161 10.685084 0.678179 1 848 | 1593 4.752134 0.359205 2 849 | 33794 0.697630 0.966786 1 850 | 39710 10.365836 0.505898 3 851 | 16941 0.461478 0.352865 2 852 | 69209 11.339537 1.068740 1 853 | 4446 5.420280 0.127310 2 854 | 9347 3.469955 1.619947 2 855 | 55635 8.517067 0.994858 3 856 | 65889 8.306512 0.413690 1 857 | 10753 2.628690 0.444320 2 858 | 7055 0.000000 0.802985 2 859 | 7905 0.000000 1.170397 2 860 | 53447 7.298767 1.582346 3 861 | 9194 7.331319 1.277988 2 862 | 61914 9.392269 0.151617 1 863 | 15630 5.541201 1.180596 2 864 | 79194 15.149460 0.537540 1 865 | 12268 5.515189 0.250562 2 866 | 33682 7.728898 0.920494 3 867 | 26080 11.318785 1.510979 3 868 | 19119 3.574709 1.531514 2 869 | 30902 7.350965 0.026332 3 870 | 63039 7.122363 1.630177 1 871 | 51136 1.828412 1.013702 1 872 | 35262 10.117989 1.156862 3 873 | 42776 11.309897 0.086291 3 874 | 64191 8.342034 1.388569 1 875 | 15436 0.241714 0.715577 2 876 | 14402 10.482619 1.694972 2 877 | 6341 9.289510 1.428879 2 878 | 14113 4.269419 0.134181 2 879 | 6390 0.000000 0.189456 2 880 | 8794 0.817119 0.143668 2 881 | 43432 1.508394 0.652651 1 882 | 38334 9.359918 0.052262 3 883 | 34068 10.052333 0.550423 3 884 | 30819 11.111660 0.989159 3 885 | 22239 11.265971 0.724054 3 886 | 28725 10.383830 0.254836 3 887 | 57071 3.878569 1.377983 1 888 | 72420 13.679237 0.025346 1 889 | 28294 10.526846 0.781569 3 890 | 9896 0.000000 0.924198 2 891 | 65821 4.106727 1.085669 1 892 | 7645 8.118856 1.470686 2 893 | 71289 7.796874 0.052336 1 894 | 5128 2.789669 1.093070 2 895 | 13711 6.226962 0.287251 2 896 | 22240 10.169548 1.660104 3 897 | 15092 0.000000 1.370549 2 898 | 5017 7.513353 0.137348 2 899 | 10141 8.240793 0.099735 2 900 | 35570 14.612797 1.247390 3 901 | 46893 3.562976 0.445386 1 902 | 8178 3.230482 1.331698 2 903 | 55783 3.612548 1.551911 1 904 | 1148 0.000000 0.332365 2 905 | 10062 3.931299 0.487577 2 906 | 74124 14.752342 1.155160 1 907 | 66603 10.261887 1.628085 1 908 | 11893 2.787266 1.570402 2 909 | 50908 15.112319 1.324132 3 910 | 39891 5.184553 0.223382 3 911 | 65915 3.868359 0.128078 1 912 | 65678 3.507965 0.028904 1 913 | 62996 11.019254 0.427554 1 914 | 36851 3.812387 0.655245 1 915 | 36669 11.056784 0.378725 3 916 | 38876 8.826880 1.002328 3 917 | 26878 11.173861 1.478244 3 918 | 46246 11.506465 0.421993 3 919 | 12761 7.798138 0.147917 3 920 | 35282 10.155081 1.370039 3 921 | 68306 10.645275 0.693453 1 922 | 31262 9.663200 1.521541 3 923 | 34754 10.790404 1.312679 3 924 | 13408 2.810534 0.219962 2 925 | 30365 9.825999 1.388500 3 926 | 10709 1.421316 0.677603 2 927 | 24332 11.123219 0.809107 3 928 | 45517 13.402206 0.661524 3 929 | 6178 1.212255 0.836807 2 930 | 10639 1.568446 1.297469 2 931 | 29613 3.343473 1.312266 1 932 | 22392 5.400155 0.193494 1 933 | 51126 3.818754 0.590905 1 934 | 53644 7.973845 0.307364 3 935 | 51417 9.078824 0.734876 3 936 | 24859 0.153467 0.766619 1 937 | 61732 8.325167 0.028479 1 938 | 71128 7.092089 1.216733 1 939 | 27276 5.192485 1.094409 3 940 | 30453 10.340791 1.087721 3 941 | 18670 2.077169 1.019775 2 942 | 70600 10.151966 0.993105 1 943 | 12683 0.046826 0.809614 2 944 | 81597 11.221874 1.395015 1 945 | 69959 14.497963 1.019254 1 946 | 8124 3.554508 0.533462 2 947 | 18867 3.522673 0.086725 2 948 | 80886 14.531655 0.380172 1 949 | 55895 3.027528 0.885457 1 950 | 31587 1.845967 0.488985 1 951 | 10591 10.226164 0.804403 3 952 | 70096 10.965926 1.212328 1 953 | 53151 2.129921 1.477378 1 954 | 11992 0.000000 1.606849 2 955 | 33114 9.489005 0.827814 3 956 | 7413 0.000000 1.020797 2 957 | 10583 0.000000 1.270167 2 958 | 58668 6.556676 0.055183 1 959 | 35018 9.959588 0.060020 3 960 | 70843 7.436056 1.479856 1 961 | 14011 0.404888 0.459517 2 962 | 35015 9.952942 1.650279 3 963 | 70839 15.600252 0.021935 1 964 | 3024 2.723846 0.387455 2 965 | 5526 0.513866 1.323448 2 966 | 5113 0.000000 0.861859 2 967 | 20851 7.280602 1.438470 2 968 | 40999 9.161978 1.110180 3 969 | 15823 0.991725 0.730979 2 970 | 35432 7.398380 0.684218 3 971 | 53711 12.149747 1.389088 3 972 | 64371 9.149678 0.874905 1 973 | 9289 9.666576 1.370330 2 974 | 60613 3.620110 0.287767 1 975 | 18338 5.238800 1.253646 2 976 | 22845 14.715782 1.503758 3 977 | 74676 14.445740 1.211160 1 978 | 34143 13.609528 0.364240 3 979 | 14153 3.141585 0.424280 2 980 | 9327 0.000000 0.120947 2 981 | 18991 0.454750 1.033280 2 982 | 9193 0.510310 0.016395 2 983 | 2285 3.864171 0.616349 2 984 | 9493 6.724021 0.563044 2 985 | 2371 4.289375 0.012563 2 986 | 13963 0.000000 1.437030 2 987 | 2299 3.733617 0.698269 2 988 | 5262 2.002589 1.380184 2 989 | 4659 2.502627 0.184223 2 990 | 17582 6.382129 0.876581 2 991 | 27750 8.546741 0.128706 3 992 | 9868 2.694977 0.432818 2 993 | 18333 3.951256 0.333300 2 994 | 3780 9.856183 0.329181 2 995 | 18190 2.068962 0.429927 2 996 | 11145 3.410627 0.631838 2 997 | 68846 9.974715 0.669787 1 998 | 26575 10.650102 0.866627 3 999 | 48111 9.134528 0.728045 3 1000 | 43757 7.882601 1.332446 3 1001 | -------------------------------------------------------------------------------- /dataset/email/ham/1.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | With Jose out of town, do you want to 4 | meet once in a while to keep things 5 | going and do some interesting stuff? 6 | 7 | Let me know 8 | Eugene -------------------------------------------------------------------------------- /dataset/email/ham/10.txt: -------------------------------------------------------------------------------- 1 | Ryan Whybrew commented on your status. 2 | 3 | Ryan wrote: 4 | "turd ferguson or butt horn." 5 | -------------------------------------------------------------------------------- /dataset/email/ham/11.txt: -------------------------------------------------------------------------------- 1 | Arvind Thirumalai commented on your status. 2 | 3 | Arvind wrote: 4 | ""you know"" 5 | 6 | 7 | Reply to this email to comment on this status. 8 | 9 | -------------------------------------------------------------------------------- /dataset/email/ham/12.txt: -------------------------------------------------------------------------------- 1 | Thanks Peter. 2 | 3 | I'll definitely check in on this. How is your book 4 | going? I heard chapter 1 came in and it was in 5 | good shape. ;-) 6 | 7 | I hope you are doing well. 8 | 9 | Cheers, 10 | 11 | Troy -------------------------------------------------------------------------------- /dataset/email/ham/13.txt: -------------------------------------------------------------------------------- 1 | Jay Stepp commented on your status. 2 | 3 | Jay wrote: 4 | ""to the" ???" 5 | 6 | 7 | Reply to this email to comment on this status. 8 | 9 | To see the comment thread, follow the link below: 10 | 11 | -------------------------------------------------------------------------------- /dataset/email/ham/14.txt: -------------------------------------------------------------------------------- 1 | LinkedIn 2 | 3 | Kerry Haloney requested to add you as a connection on LinkedIn: 4 | 5 | Peter, 6 | 7 | I'd like to add you to my professional network on LinkedIn. 8 | 9 | - Kerry Haloney 10 | 11 | -------------------------------------------------------------------------------- /dataset/email/ham/15.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | The hotels are the ones that rent out the tent. They are all lined up on the hotel grounds : )) So much for being one with nature, more like being one with a couple dozen tour groups and nature. 4 | I have about 100M of pictures from that trip. I can go through them and get you jpgs of my favorite scenic pictures. 5 | 6 | Where are you and Jocelyn now? New York? Will you come to Tokyo for Chinese New Year? Perhaps to see the two of you then. I will go to Thailand for winter holiday to see my mom : ) 7 | 8 | Take care, 9 | D 10 | -------------------------------------------------------------------------------- /dataset/email/ham/16.txt: -------------------------------------------------------------------------------- 1 | yeah I am ready. I may not be here because Jar Jar has plane tickets to Germany for me. -------------------------------------------------------------------------------- /dataset/email/ham/17.txt: -------------------------------------------------------------------------------- 1 | Benoit Mandelbrot 1924-2010 2 | 3 | Benoit Mandelbrot 1924-2010 4 | 5 | Wilmott Team 6 | 7 | Benoit Mandelbrot, the mathematician, the father of fractal mathematics, and advocate of more sophisticated modelling in quantitative finance, died on 14th October 2010 aged 85. 8 | 9 | Wilmott magazine has often featured Mandelbrot, his ideas, and the work of others inspired by his fundamental insights. 10 | 11 | You must be logged on to view these articles from past issues of Wilmott Magazine. -------------------------------------------------------------------------------- /dataset/email/ham/18.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | Sure thing. Sounds good. Let me know what time would be good for you. 4 | I will come prepared with some ideas and we can go from there. 5 | 6 | Regards, 7 | 8 | -Vivek. -------------------------------------------------------------------------------- /dataset/email/ham/19.txt: -------------------------------------------------------------------------------- 1 | LinkedIn 2 | 3 | Julius O requested to add you as a connection on LinkedIn: 4 | 5 | Hi Peter. 6 | 7 | Looking forward to the book! 8 | 9 | 10 | Accept View invitation from Julius O 11 | -------------------------------------------------------------------------------- /dataset/email/ham/2.txt: -------------------------------------------------------------------------------- 1 | Yay to you both doing fine! 2 | 3 | I'm working on an MBA in Design Strategy at CCA (top art school.) It's a new program focusing on more of a right-brained creative and strategic approach to management. I'm an 1/8 of the way done today! -------------------------------------------------------------------------------- /dataset/email/ham/20.txt: -------------------------------------------------------------------------------- 1 | I've thought about this and think it's possible. We should get another 2 | lunch. I have a car now and could come pick you up this time. Does 3 | this wednesday work? 11:50? 4 | 5 | Can I have a signed copy of you book? -------------------------------------------------------------------------------- /dataset/email/ham/21.txt: -------------------------------------------------------------------------------- 1 | we saw this on the way to the coast...thought u might like it 2 | 3 | hangzhou is huge, one day wasn't enough, but we got a glimpse... 4 | 5 | we went inside the china pavilion at expo, it is pretty interesting, 6 | each province has an exhibit... -------------------------------------------------------------------------------- /dataset/email/ham/22.txt: -------------------------------------------------------------------------------- 1 | Hi Hommies, 2 | 3 | Just got a phone call from the roofer, they will come and spaying the foaming today. it will be dusty. pls close all the doors and windows. 4 | Could you help me to close my bathroom window, cat window and the sliding door behind the TV? 5 | I don't know how can those 2 cats survive...... 6 | 7 | Sorry for any inconvenience! -------------------------------------------------------------------------------- /dataset/email/ham/23.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/dataset/email/ham/23.txt -------------------------------------------------------------------------------- /dataset/email/ham/24.txt: -------------------------------------------------------------------------------- 1 | Ok I will be there by 10:00 at the latest. -------------------------------------------------------------------------------- /dataset/email/ham/25.txt: -------------------------------------------------------------------------------- 1 | That is cold. Is there going to be a retirement party? 2 | Are the leaves changing color? -------------------------------------------------------------------------------- /dataset/email/ham/3.txt: -------------------------------------------------------------------------------- 1 | WHat is going on there? 2 | I talked to John on email. We talked about some computer stuff that's it. 3 | 4 | I went bike riding in the rain, it was not that cold. 5 | 6 | We went to the museum in SF yesterday it was $3 to get in and they had 7 | free food. At the same time was a SF Giants game, when we got done we 8 | had to take the train with all the Giants fans, they are 1/2 drunk. -------------------------------------------------------------------------------- /dataset/email/ham/4.txt: -------------------------------------------------------------------------------- 1 | Yo. I've been working on my running website. I'm using jquery and the jqplot plugin. I'm not too far away from having a prototype to launch. 2 | 3 | You used jqplot right? If not, I think you would like it. -------------------------------------------------------------------------------- /dataset/email/ham/5.txt: -------------------------------------------------------------------------------- 1 | There was a guy at the gas station who told me that if I knew Mandarin 2 | and Python I could get a job with the FBI. -------------------------------------------------------------------------------- /dataset/email/ham/6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/dataset/email/ham/6.txt -------------------------------------------------------------------------------- /dataset/email/ham/7.txt: -------------------------------------------------------------------------------- 1 | Zach Hamm commented on your status. 2 | 3 | Zach wrote: 4 | "doggy style - enough said, thank you & good night" 5 | 6 | 7 | -------------------------------------------------------------------------------- /dataset/email/ham/8.txt: -------------------------------------------------------------------------------- 1 | This e-mail was sent from a notification-only address that cannot accept incoming e-mail. Please do not reply to this message. 2 | 3 | Thank you for your online reservation. The store you selected has located the item you requested and has placed it on hold in your name. Please note that all items are held for 1 day. Please note store prices may differ from those online. 4 | 5 | If you have questions or need assistance with your reservation, please contact the store at the phone number listed below. You can also access store information, such as store hours and location, on the web at http://www.borders.com/online/store/StoreDetailView_98. -------------------------------------------------------------------------------- /dataset/email/ham/9.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | These are the only good scenic ones and it's too bad there was a girl's back in one of them. Just try to enjoy the blue sky : )) 4 | 5 | D -------------------------------------------------------------------------------- /dataset/email/spam/1.txt: -------------------------------------------------------------------------------- 1 | --- Codeine 15mg -- 30 for $203.70 -- VISA Only!!! -- 2 | 3 | -- Codeine (Methylmorphine) is a narcotic (opioid) pain reliever 4 | -- We have 15mg & 30mg pills -- 30/15mg for $203.70 - 60/15mg for $385.80 - 90/15mg for $562.50 -- VISA Only!!! --- -------------------------------------------------------------------------------- /dataset/email/spam/10.txt: -------------------------------------------------------------------------------- 1 | OrderCializViagra Online & Save 75-90% 2 | 3 | 0nline Pharmacy NoPrescription required 4 | Buy Canadian Drugs at Wholesale Prices and Save 75-90% 5 | FDA-Approved drugs + Superb Quality Drugs only! 6 | Accept all major credit cards -------------------------------------------------------------------------------- /dataset/email/spam/11.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe 12 | The proven NaturalPenisEnhancement that works! 13 | 100% MoneyBack Guaranteeed -------------------------------------------------------------------------------- /dataset/email/spam/12.txt: -------------------------------------------------------------------------------- 1 | Buy Ambiem (Zolpidem) 5mg/10mg @ $2.39/- pill 2 | 3 | 30 pills x 5 mg - $129.00 4 | 60 pills x 5 mg - $199.20 5 | 180 pills x 5 mg - $430.20 6 | 30 pills x 10 mg - $ 138.00 7 | 120 pills x 10 mg - $ 322.80 -------------------------------------------------------------------------------- /dataset/email/spam/13.txt: -------------------------------------------------------------------------------- 1 | OrderCializViagra Online & Save 75-90% 2 | 3 | 0nline Pharmacy NoPrescription required 4 | Buy Canadian Drugs at Wholesale Prices and Save 75-90% 5 | FDA-Approved drugs + Superb Quality Drugs only! 6 | Accept all major credit cards 7 | Order Today! From $1.38 8 | -------------------------------------------------------------------------------- /dataset/email/spam/14.txt: -------------------------------------------------------------------------------- 1 | BuyVIAGRA 25mg, 50mg, 100mg, 2 | BrandViagra, FemaleViagra from $1.15 per pill 3 | 4 | 5 | ViagraNoPrescription needed - from Certified Canadian Pharmacy 6 | 7 | Buy Here... We accept VISA, AMEX, E-Check... Worldwide Delivery -------------------------------------------------------------------------------- /dataset/email/spam/15.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /dataset/email/spam/16.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /dataset/email/spam/17.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/dataset/email/spam/17.txt -------------------------------------------------------------------------------- /dataset/email/spam/18.txt: -------------------------------------------------------------------------------- 1 | Codeine (the most competitive price on NET!) 2 | 3 | Codeine (WILSON) 30mg x 30 $156.00 4 | Codeine (WILSON) 30mg x 60 $291.00 (+4 FreeViagra pills) 5 | Codeine (WILSON) 30mg x 90 $396.00 (+4 FreeViagra pills) 6 | Codeine (WILSON) 30mg x 120 $492.00 (+10 FreeViagra pills) -------------------------------------------------------------------------------- /dataset/email/spam/19.txt: -------------------------------------------------------------------------------- 1 | Get Up to 75% OFF at Online WatchesStore 2 | 3 | Discount Watches for All Famous Brands 4 | 5 | * Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands 6 | * Louis Vuitton Bags & Wallets 7 | * Gucci Bags 8 | * Tiffany & Co Jewerly 9 | 10 | Enjoy a full 1 year WARRANTY 11 | Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost 12 | You will 100% recieve your order 13 | Save Up to 75% OFF Quality Watches -------------------------------------------------------------------------------- /dataset/email/spam/2.txt: -------------------------------------------------------------------------------- 1 | Hydrocodone/Vicodin ES/Brand Watson 2 | 3 | Vicodin ES - 7.5/750 mg: 30 - $195 / 120 $570 4 | Brand Watson - 7.5/750 mg: 30 - $195 / 120 $570 5 | Brand Watson - 10/325 mg: 30 - $199 / 120 - $588 6 | NoPrescription Required 7 | FREE Express FedEx (3-5 days Delivery) for over $200 order 8 | Major Credit Cards + E-CHECK -------------------------------------------------------------------------------- /dataset/email/spam/20.txt: -------------------------------------------------------------------------------- 1 | Get Up to 75% OFF at Online WatchesStore 2 | 3 | Discount Watches for All Famous Brands 4 | 5 | * Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands 6 | * Louis Vuitton Bags & Wallets 7 | * Gucci Bags 8 | * Tiffany & Co Jewerly 9 | 10 | Enjoy a full 1 year WARRANTY 11 | Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost 12 | You will 100% recieve your order -------------------------------------------------------------------------------- /dataset/email/spam/21.txt: -------------------------------------------------------------------------------- 1 | Percocet 10/625 mg withoutPrescription 30 tabs - $225! 2 | Percocet, a narcotic analgesic, is used to treat moderate to moderately SeverePain 3 | Top Quality, EXPRESS Shipping, 100% Safe & Discreet & Private. 4 | Buy Cheap Percocet Online -------------------------------------------------------------------------------- /dataset/email/spam/22.txt: -------------------------------------------------------------------------------- 1 | Get Up to 75% OFF at Online WatchesStore 2 | 3 | Discount Watches for All Famous Brands 4 | 5 | * Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands 6 | * Louis Vuitton Bags & Wallets 7 | * Gucci Bags 8 | * Tiffany & Co Jewerly 9 | 10 | Enjoy a full 1 year WARRANTY 11 | Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost 12 | You will 100% recieve your order -------------------------------------------------------------------------------- /dataset/email/spam/23.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /dataset/email/spam/24.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /dataset/email/spam/25.txt: -------------------------------------------------------------------------------- 1 | Experience with BiggerPenis Today! Grow 3-inches more 2 | 3 | The Safest & Most Effective Methods Of_PenisEn1argement. 4 | Save your time and money! 5 | BetterErections with effective Ma1eEnhancement products. 6 | 7 | #1 Ma1eEnhancement Supplement. Trusted by Millions. Buy Today! -------------------------------------------------------------------------------- /dataset/email/spam/3.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe 12 | The proven NaturalPenisEnhancement that works! 13 | 100% MoneyBack Guaranteeed -------------------------------------------------------------------------------- /dataset/email/spam/4.txt: -------------------------------------------------------------------------------- 1 | Percocet 10/625 mg withoutPrescription 30 tabs - $225! 2 | Percocet, a narcotic analgesic, is used to treat moderate to moderately SeverePain 3 | Top Quality, EXPRESS Shipping, 100% Safe & Discreet & Private. 4 | Buy Cheap Percocet Online -------------------------------------------------------------------------------- /dataset/email/spam/5.txt: -------------------------------------------------------------------------------- 1 | --- Codeine 15mg -- 30 for $203.70 -- VISA Only!!! -- 2 | 3 | -- Codeine (Methylmorphine) is a narcotic (opioid) pain reliever 4 | -- We have 15mg & 30mg pills -- 30/15mg for $203.70 - 60/15mg for $385.80 - 90/15mg for $562.50 -- VISA Only!!! --- -------------------------------------------------------------------------------- /dataset/email/spam/6.txt: -------------------------------------------------------------------------------- 1 | OEM Adobe & Microsoft softwares 2 | Fast order and download 3 | 4 | Microsoft Office Professional Plus 2007/2010 $129 5 | Microsoft Windows 7 Ultimate $119 6 | Adobe Photoshop CS5 Extended 7 | Adobe Acrobat 9 Pro Extended 8 | Windows XP Professional & thousand more titles -------------------------------------------------------------------------------- /dataset/email/spam/7.txt: -------------------------------------------------------------------------------- 1 | Bargains Here! Buy Phentermin 37.5 mg (K-25) 2 | 3 | Buy Genuine Phentermin at Low Cost 4 | VISA Accepted 5 | 30 - $130.50 6 | 60 - $219.00 7 | 90 - $292.50 8 | 120 - $366.00 9 | 180 - $513.00 -------------------------------------------------------------------------------- /dataset/email/spam/8.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /dataset/email/spam/9.txt: -------------------------------------------------------------------------------- 1 | Bargains Here! Buy Phentermin 37.5 mg (K-25) 2 | 3 | Buy Genuine Phentermin at Low Cost 4 | VISA Accepted 5 | 30 - $130.50 6 | 60 - $219.00 7 | 90 - $292.50 8 | 120 - $366.00 9 | 180 - $513.00 -------------------------------------------------------------------------------- /dataset/ex0.txt: -------------------------------------------------------------------------------- 1 | 1.000000 0.067732 3.176513 2 | 1.000000 0.427810 3.816464 3 | 1.000000 0.995731 4.550095 4 | 1.000000 0.738336 4.256571 5 | 1.000000 0.981083 4.560815 6 | 1.000000 0.526171 3.929515 7 | 1.000000 0.378887 3.526170 8 | 1.000000 0.033859 3.156393 9 | 1.000000 0.132791 3.110301 10 | 1.000000 0.138306 3.149813 11 | 1.000000 0.247809 3.476346 12 | 1.000000 0.648270 4.119688 13 | 1.000000 0.731209 4.282233 14 | 1.000000 0.236833 3.486582 15 | 1.000000 0.969788 4.655492 16 | 1.000000 0.607492 3.965162 17 | 1.000000 0.358622 3.514900 18 | 1.000000 0.147846 3.125947 19 | 1.000000 0.637820 4.094115 20 | 1.000000 0.230372 3.476039 21 | 1.000000 0.070237 3.210610 22 | 1.000000 0.067154 3.190612 23 | 1.000000 0.925577 4.631504 24 | 1.000000 0.717733 4.295890 25 | 1.000000 0.015371 3.085028 26 | 1.000000 0.335070 3.448080 27 | 1.000000 0.040486 3.167440 28 | 1.000000 0.212575 3.364266 29 | 1.000000 0.617218 3.993482 30 | 1.000000 0.541196 3.891471 31 | 1.000000 0.045353 3.143259 32 | 1.000000 0.126762 3.114204 33 | 1.000000 0.556486 3.851484 34 | 1.000000 0.901144 4.621899 35 | 1.000000 0.958476 4.580768 36 | 1.000000 0.274561 3.620992 37 | 1.000000 0.394396 3.580501 38 | 1.000000 0.872480 4.618706 39 | 1.000000 0.409932 3.676867 40 | 1.000000 0.908969 4.641845 41 | 1.000000 0.166819 3.175939 42 | 1.000000 0.665016 4.264980 43 | 1.000000 0.263727 3.558448 44 | 1.000000 0.231214 3.436632 45 | 1.000000 0.552928 3.831052 46 | 1.000000 0.047744 3.182853 47 | 1.000000 0.365746 3.498906 48 | 1.000000 0.495002 3.946833 49 | 1.000000 0.493466 3.900583 50 | 1.000000 0.792101 4.238522 51 | 1.000000 0.769660 4.233080 52 | 1.000000 0.251821 3.521557 53 | 1.000000 0.181951 3.203344 54 | 1.000000 0.808177 4.278105 55 | 1.000000 0.334116 3.555705 56 | 1.000000 0.338630 3.502661 57 | 1.000000 0.452584 3.859776 58 | 1.000000 0.694770 4.275956 59 | 1.000000 0.590902 3.916191 60 | 1.000000 0.307928 3.587961 61 | 1.000000 0.148364 3.183004 62 | 1.000000 0.702180 4.225236 63 | 1.000000 0.721544 4.231083 64 | 1.000000 0.666886 4.240544 65 | 1.000000 0.124931 3.222372 66 | 1.000000 0.618286 4.021445 67 | 1.000000 0.381086 3.567479 68 | 1.000000 0.385643 3.562580 69 | 1.000000 0.777175 4.262059 70 | 1.000000 0.116089 3.208813 71 | 1.000000 0.115487 3.169825 72 | 1.000000 0.663510 4.193949 73 | 1.000000 0.254884 3.491678 74 | 1.000000 0.993888 4.533306 75 | 1.000000 0.295434 3.550108 76 | 1.000000 0.952523 4.636427 77 | 1.000000 0.307047 3.557078 78 | 1.000000 0.277261 3.552874 79 | 1.000000 0.279101 3.494159 80 | 1.000000 0.175724 3.206828 81 | 1.000000 0.156383 3.195266 82 | 1.000000 0.733165 4.221292 83 | 1.000000 0.848142 4.413372 84 | 1.000000 0.771184 4.184347 85 | 1.000000 0.429492 3.742878 86 | 1.000000 0.162176 3.201878 87 | 1.000000 0.917064 4.648964 88 | 1.000000 0.315044 3.510117 89 | 1.000000 0.201473 3.274434 90 | 1.000000 0.297038 3.579622 91 | 1.000000 0.336647 3.489244 92 | 1.000000 0.666109 4.237386 93 | 1.000000 0.583888 3.913749 94 | 1.000000 0.085031 3.228990 95 | 1.000000 0.687006 4.286286 96 | 1.000000 0.949655 4.628614 97 | 1.000000 0.189912 3.239536 98 | 1.000000 0.844027 4.457997 99 | 1.000000 0.333288 3.513384 100 | 1.000000 0.427035 3.729674 101 | 1.000000 0.466369 3.834274 102 | 1.000000 0.550659 3.811155 103 | 1.000000 0.278213 3.598316 104 | 1.000000 0.918769 4.692514 105 | 1.000000 0.886555 4.604859 106 | 1.000000 0.569488 3.864912 107 | 1.000000 0.066379 3.184236 108 | 1.000000 0.335751 3.500796 109 | 1.000000 0.426863 3.743365 110 | 1.000000 0.395746 3.622905 111 | 1.000000 0.694221 4.310796 112 | 1.000000 0.272760 3.583357 113 | 1.000000 0.503495 3.901852 114 | 1.000000 0.067119 3.233521 115 | 1.000000 0.038326 3.105266 116 | 1.000000 0.599122 3.865544 117 | 1.000000 0.947054 4.628625 118 | 1.000000 0.671279 4.231213 119 | 1.000000 0.434811 3.791149 120 | 1.000000 0.509381 3.968271 121 | 1.000000 0.749442 4.253910 122 | 1.000000 0.058014 3.194710 123 | 1.000000 0.482978 3.996503 124 | 1.000000 0.466776 3.904358 125 | 1.000000 0.357767 3.503976 126 | 1.000000 0.949123 4.557545 127 | 1.000000 0.417320 3.699876 128 | 1.000000 0.920461 4.613614 129 | 1.000000 0.156433 3.140401 130 | 1.000000 0.656662 4.206717 131 | 1.000000 0.616418 3.969524 132 | 1.000000 0.853428 4.476096 133 | 1.000000 0.133295 3.136528 134 | 1.000000 0.693007 4.279071 135 | 1.000000 0.178449 3.200603 136 | 1.000000 0.199526 3.299012 137 | 1.000000 0.073224 3.209873 138 | 1.000000 0.286515 3.632942 139 | 1.000000 0.182026 3.248361 140 | 1.000000 0.621523 3.995783 141 | 1.000000 0.344584 3.563262 142 | 1.000000 0.398556 3.649712 143 | 1.000000 0.480369 3.951845 144 | 1.000000 0.153350 3.145031 145 | 1.000000 0.171846 3.181577 146 | 1.000000 0.867082 4.637087 147 | 1.000000 0.223855 3.404964 148 | 1.000000 0.528301 3.873188 149 | 1.000000 0.890192 4.633648 150 | 1.000000 0.106352 3.154768 151 | 1.000000 0.917886 4.623637 152 | 1.000000 0.014855 3.078132 153 | 1.000000 0.567682 3.913596 154 | 1.000000 0.068854 3.221817 155 | 1.000000 0.603535 3.938071 156 | 1.000000 0.532050 3.880822 157 | 1.000000 0.651362 4.176436 158 | 1.000000 0.901225 4.648161 159 | 1.000000 0.204337 3.332312 160 | 1.000000 0.696081 4.240614 161 | 1.000000 0.963924 4.532224 162 | 1.000000 0.981390 4.557105 163 | 1.000000 0.987911 4.610072 164 | 1.000000 0.990947 4.636569 165 | 1.000000 0.736021 4.229813 166 | 1.000000 0.253574 3.500860 167 | 1.000000 0.674722 4.245514 168 | 1.000000 0.939368 4.605182 169 | 1.000000 0.235419 3.454340 170 | 1.000000 0.110521 3.180775 171 | 1.000000 0.218023 3.380820 172 | 1.000000 0.869778 4.565020 173 | 1.000000 0.196830 3.279973 174 | 1.000000 0.958178 4.554241 175 | 1.000000 0.972673 4.633520 176 | 1.000000 0.745797 4.281037 177 | 1.000000 0.445674 3.844426 178 | 1.000000 0.470557 3.891601 179 | 1.000000 0.549236 3.849728 180 | 1.000000 0.335691 3.492215 181 | 1.000000 0.884739 4.592374 182 | 1.000000 0.918916 4.632025 183 | 1.000000 0.441815 3.756750 184 | 1.000000 0.116598 3.133555 185 | 1.000000 0.359274 3.567919 186 | 1.000000 0.814811 4.363382 187 | 1.000000 0.387125 3.560165 188 | 1.000000 0.982243 4.564305 189 | 1.000000 0.780880 4.215055 190 | 1.000000 0.652565 4.174999 191 | 1.000000 0.870030 4.586640 192 | 1.000000 0.604755 3.960008 193 | 1.000000 0.255212 3.529963 194 | 1.000000 0.730546 4.213412 195 | 1.000000 0.493829 3.908685 196 | 1.000000 0.257017 3.585821 197 | 1.000000 0.833735 4.374394 198 | 1.000000 0.070095 3.213817 199 | 1.000000 0.527070 3.952681 200 | 1.000000 0.116163 3.129283 201 | -------------------------------------------------------------------------------- /dataset/lenses.txt: -------------------------------------------------------------------------------- 1 | young myope no reduced no lenses 2 | young myope no normal soft 3 | young myope yes reduced no lenses 4 | young myope yes normal hard 5 | young hyper no reduced no lenses 6 | young hyper no normal soft 7 | young hyper yes reduced no lenses 8 | young hyper yes normal hard 9 | pre myope no reduced no lenses 10 | pre myope no normal soft 11 | pre myope yes reduced no lenses 12 | pre myope yes normal hard 13 | pre hyper no reduced no lenses 14 | pre hyper no normal soft 15 | pre hyper yes reduced no lenses 16 | pre hyper yes normal no lenses 17 | presbyopic myope no reduced no lenses 18 | presbyopic myope no normal no lenses 19 | presbyopic myope yes reduced no lenses 20 | presbyopic myope yes normal hard 21 | presbyopic hyper no reduced no lenses 22 | presbyopic hyper no normal soft 23 | presbyopic hyper yes reduced no lenses 24 | presbyopic hyper yes normal no lenses 25 | -------------------------------------------------------------------------------- /dataset/logisticDataset.txt: -------------------------------------------------------------------------------- 1 | -0.017612 14.053064 0 2 | -1.395634 4.662541 1 3 | -0.752157 6.538620 0 4 | -1.322371 7.152853 0 5 | 0.423363 11.054677 0 6 | 0.406704 7.067335 1 7 | 0.667394 12.741452 0 8 | -2.460150 6.866805 1 9 | 0.569411 9.548755 0 10 | -0.026632 10.427743 0 11 | 0.850433 6.920334 1 12 | 1.347183 13.175500 0 13 | 1.176813 3.167020 1 14 | -1.781871 9.097953 0 15 | -0.566606 5.749003 1 16 | 0.931635 1.589505 1 17 | -0.024205 6.151823 1 18 | -0.036453 2.690988 1 19 | -0.196949 0.444165 1 20 | 1.014459 5.754399 1 21 | 1.985298 3.230619 1 22 | -1.693453 -0.557540 1 23 | -0.576525 11.778922 0 24 | -0.346811 -1.678730 1 25 | -2.124484 2.672471 1 26 | 1.217916 9.597015 0 27 | -0.733928 9.098687 0 28 | -3.642001 -1.618087 1 29 | 0.315985 3.523953 1 30 | 1.416614 9.619232 0 31 | -0.386323 3.989286 1 32 | 0.556921 8.294984 1 33 | 1.224863 11.587360 0 34 | -1.347803 -2.406051 1 35 | 1.196604 4.951851 1 36 | 0.275221 9.543647 0 37 | 0.470575 9.332488 0 38 | -1.889567 9.542662 0 39 | -1.527893 12.150579 0 40 | -1.185247 11.309318 0 41 | -0.445678 3.297303 1 42 | 1.042222 6.105155 1 43 | -0.618787 10.320986 0 44 | 1.152083 0.548467 1 45 | 0.828534 2.676045 1 46 | -1.237728 10.549033 0 47 | -0.683565 -2.166125 1 48 | 0.229456 5.921938 1 49 | -0.959885 11.555336 0 50 | 0.492911 10.993324 0 51 | 0.184992 8.721488 0 52 | -0.355715 10.325976 0 53 | -0.397822 8.058397 0 54 | 0.824839 13.730343 0 55 | 1.507278 5.027866 1 56 | 0.099671 6.835839 1 57 | -0.344008 10.717485 0 58 | 1.785928 7.718645 1 59 | -0.918801 11.560217 0 60 | -0.364009 4.747300 1 61 | -0.841722 4.119083 1 62 | 0.490426 1.960539 1 63 | -0.007194 9.075792 0 64 | 0.356107 12.447863 0 65 | 0.342578 12.281162 0 66 | -0.810823 -1.466018 1 67 | 2.530777 6.476801 1 68 | 1.296683 11.607559 0 69 | 0.475487 12.040035 0 70 | -0.783277 11.009725 0 71 | 0.074798 11.023650 0 72 | -1.337472 0.468339 1 73 | -0.102781 13.763651 0 74 | -0.147324 2.874846 1 75 | 0.518389 9.887035 0 76 | 1.015399 7.571882 0 77 | -1.658086 -0.027255 1 78 | 1.319944 2.171228 1 79 | 2.056216 5.019981 1 80 | -0.851633 4.375691 1 81 | -1.510047 6.061992 0 82 | -1.076637 -3.181888 1 83 | 1.821096 10.283990 0 84 | 3.010150 8.401766 1 85 | -1.099458 1.688274 1 86 | -0.834872 -1.733869 1 87 | -0.846637 3.849075 1 88 | 1.400102 12.628781 0 89 | 1.752842 5.468166 1 90 | 0.078557 0.059736 1 91 | 0.089392 -0.715300 1 92 | 1.825662 12.693808 0 93 | 0.197445 9.744638 0 94 | 0.126117 0.922311 1 95 | -0.679797 1.220530 1 96 | 0.677983 2.556666 1 97 | 0.761349 10.693862 0 98 | -2.168791 0.143632 1 99 | 1.388610 9.341997 0 100 | 0.317029 14.739025 0 101 | -------------------------------------------------------------------------------- /excel/costfunc.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/excel/costfunc.xlsx -------------------------------------------------------------------------------- /excel/logistic.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/excel/logistic.xlsx -------------------------------------------------------------------------------- /excel/ml.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/excel/ml.xlsx -------------------------------------------------------------------------------- /k-临近算法.md: -------------------------------------------------------------------------------- 1 | # k-临近算法(分类) 2 | ## 一、算法思路 3 | 1. 为了判断未知实例的类别,以所有已知类别的实例作为参考。 4 | 2. 选择参数K。 5 | 3. 计算未知实例与所有已知实例的距离。 6 | 4. 选择距离最近的K个已知实例。 7 | 5. 根据少数服从多数,让未知实例归类为K个最邻近样本中最多数的类别。 8 | 9 | 优点:简单,易于理解,容易实现,通过对K的选择可具备丢噪音数据的强壮性。 10 | 11 | 缺点: 12 | 1. 需要大量空间存储所有已知实例。 13 | 2. 当样本分布不均衡时,比如其中一类样本实例数量过多,占主导的时候,新的未知实例很容易被归类这个主导样本。 14 | 15 | 改进:考虑距离,根据距离加上权重。 16 | 17 | ## 二、代码实现 18 | 19 | import numpy as np 20 | import math 21 | def createDataset(): 22 | # 构建训练集数据 23 | dataset = [[0.26547727, 0.27892898,0], 24 | [0.1337869 , 0.08356665,0], 25 | [0.02771102, 0.36429227,0], 26 | [0.81783834, 0.86542639,1], 27 | [0.99240191, 0.87950623,1], 28 | [0.99240191, 0.77950623,1]] 29 | return np.array(dataset) 30 | 31 | 32 | def getDistance(instance1,instance2): 33 | # 计算两点间的距离 34 | distance=0 35 | length = len(instance1) 36 | for i in range(length): 37 | distance += math.pow(instance1[i]-instance2[i],2) 38 | return math.sqrt(distance) 39 | 40 | 41 | def getNeighbors(trainingSet,testInstance,k): 42 | # 计算未知实例与所有已知实例的距离。返回最近的K个已知实例 43 | features = createDataset()[:,:2] 44 | labels = createDataset()[:,-1] 45 | distance_list = [] 46 | for i in range(len(features)): 47 | distance = getDistance(testInstance,features[i]) 48 | distance_list.append((distance,labels[i])) 49 | sorted_distance_list = sorted(distance_list) 50 | neighbors = sorted_distance_list[:k] 51 | return neighbors 52 | 53 | 54 | def countClass(neighbors): 55 | # 对返回最近的K个已知实例,进行统计分类,根据少数服从多数,让未知实例归类为K个最邻近样本中最多数的类别。 56 | class_num_dict = {} 57 | for n in neighbors: 58 | if n[1] in class_num_dict: 59 | class_num_dict[n[1]] += 1 60 | else: 61 | class_num_dict[n[1]] = 1 62 | return class_num_dict 63 | 64 | def main(): 65 | trainingSet = createDataset() 66 | testSet = [[0,0],[1,1],[1.1,1.2]] 67 | result = [] 68 | for test in testSet: 69 | # 计算未知实例与所有已知实例的距离。返回最近的K个已知实例 70 | neighbors = getNeighbors(trainingSet,test,4) 71 | # 对返回最近的K个已知实例,进行统计分类。 72 | class_num_dict = countClass(neighbors) 73 | # 根据少数服从多数,让未知实例归类为K个最邻近样本中最多数的类别。 74 | result.append(sorted(class_num_dict.items(),key = lambda x:x[1],reverse=True)[0][0]) 75 | print(testSet) 76 | print(result) 77 | 78 | if __name__ == '__main__': 79 | main() 80 | 81 | ## 3、实例训练 82 | 场景:有一批相亲网站男生相关信息数据,特征有三个:“每年获得的飞行常客里程数”,“玩视频游戏所耗时间百分比”,“每周消费的冰淇淋公升数”。根据这些特征可以将男生分为“魅力一般的”,“极具魅力的”,“毫无魅力的”三类。现在通过k-临近算法来判断一个男生属于哪一类。 83 | 84 | ### 3.1、数据样例: 85 | 86 | 40920 8.326976 0.953952 3 87 | 14488 7.153469 1.673904 2 88 | 26052 1.441871 0.805124 1 89 | 75136 13.147394 0.428964 1 90 | 91 | ### 3.2、思路 92 | 1. 首先要获取csv文本中的训练集数据。 93 | 2. 对训练集数据进行特征缩放,把特征值范围缩小到[-1,1]之间。 (原数据-平均值)/(最大值-最小值) 94 | 3. 构建测试数据集。用来验证算法。 95 | 4. 对测试集数据进行特征缩放处理。 96 | 5. 计算测试集实例与所有训练集实例的距离。返回最近的K个已知实例,及其类别。 97 | 6. 对返回最近的K个已知实例,进行统计分类,少数服从多数,确定测试实例属于哪一个类别。 98 | 99 | ### 3.3、代码实现 100 | 101 | %matplotlib inline 102 | import csv 103 | from sklearn import preprocessing 104 | import numpy as np 105 | import matplotlib 106 | import matplotlib.pyplot as plt 107 | import math 108 | 109 | def trainingSetExtra(): 110 | # 读取txt文件中的训练集数据,准换成数组,便于进行计算 111 | datingTestSet = [] 112 | features = [] 113 | labels = [] 114 | with open('datingTestSet2.txt') as f: 115 | reader = csv.reader(f,delimiter='\t') 116 | for row in reader: 117 | newrow = [float(row[0]),float(row[1]),float(row[2]),float(row[3])] 118 | datingTestSet.append(newrow) 119 | return np.array(datingTestSet) 120 | 121 | 122 | def figureAnalysis(datingTestSet): 123 | # 用matplotlib画散点图观察 124 | fig = plt.figure() 125 | ax = fig.add_subplot(111) 126 | ax.scatter(datingTestSet[:,0],datingTestSet[:,1],15.0*datingTestSet[:,-1],15.0*datingTestSet[:,-1]) 127 | plt.show() 128 | 129 | 130 | def autoNormal(datingTestSet): 131 | # 特征缩放 132 | # 因为有的特征数值比较大,比如里程数13400,对结果影响比较大。所以要将特征值的范围转换到[-1,1]之间。 133 | # 特征缩放公式 (原数据-平均值)/(最大值-最小值) 134 | # print(datingTestSet[:,:3].min(0)) # 每列最小特征值 135 | # print(datingTestSet[:,:3].max(0)) # 每列最大特征值 136 | # print(datingTestSet[:,:3].max(0)-datingTestSet[:,:3].min(0)) # 最大值与最小值之差 137 | # print(datingTestSet[:,:3].mean(0)) # 每列平均特征值 138 | datingTestSetFeature = (datingTestSet[:,:3]-datingTestSet[:,:3].mean(0))/(datingTestSet[:,:3].max(0)-datingTestSet[:,:3].min(0)) 139 | datingTestSet[:,:3] = datingTestSetFeature 140 | print(datingTestSetFeature) 141 | print(datingTestSet) 142 | return datingTestSet 143 | 144 | 145 | def getDistance(instance1,instance2): 146 | # 计算两点间的距离 147 | distance=0 148 | length = len(instance1) 149 | for i in range(length): 150 | distance += math.pow(instance1[i]-instance2[i],2) 151 | return math.sqrt(distance) 152 | 153 | 154 | def getNeighbors(normal_dataset,testInstance,k): 155 | # 计算未知实例与所有已知实例的距离。返回最近的K个已知实例 156 | features = normal_dataset[:,:3] 157 | labels = normal_dataset[:,-1] 158 | distance_list = [] 159 | for i in range(len(features)): 160 | distance = getDistance(testInstance,features[i]) 161 | distance_list.append((distance,labels[i])) 162 | sorted_distance_list = sorted(distance_list) 163 | neighbors = sorted_distance_list[:k] 164 | return neighbors 165 | 166 | 167 | def countClass(neighbors): 168 | # 对返回最近的K个已知实例,进行统计分类,根据少数服从多数,让未知实例归类为K个最邻近样本中最多数的类别。 169 | class_num_dict = {} 170 | for n in neighbors: 171 | if n[1] in class_num_dict: 172 | class_num_dict[n[1]] += 1 173 | else: 174 | class_num_dict[n[1]] = 1 175 | return class_num_dict 176 | 177 | 178 | def main(): 179 | datingTestSet = trainingSetExtra() # 1、获取训练集 180 | mean = datingTestSet[:,:3].mean(0) # 2、求每一个特征的均值 181 | max_min_range = datingTestSet[:,:3].max(0)-datingTestSet[:,:3].min(0) # 3、每个特征最大值与最小值之差 182 | figureAnalysis(datingTestSet) # 3、可视化分析 183 | normal_dataset = autoNormal(datingTestSet) # 4、特征缩放,把所有特征值范围缩小到[-1,1]之间 184 | # 5、构建测试集数据 185 | testInstance = [[40920, 8.326976, 0.953952], 186 | [14488,7.153469,1.673904], 187 | [75136,13.147394,0.428964,]] 188 | res = [] 189 | for i in range(len(testInstance)): 190 | normal_testInstance = (testInstance[i]-mean)/max_min_range # 6、对测试集数据进行特征缩放处理 191 | neighbors = getNeighbors(normal_dataset,normal_testInstance,10) # 7、计算未知实例与所有已知实例的距离。返回最近的K个已知实例 192 | class_num_dict = countClass(neighbors) # 8、对返回最近的K个已知实例,进行统计分类。 193 | res_label = sorted(class_num_dict.items(),key = lambda x:x[1],reverse=True)[0][0] 194 | testInstance[i].append(res_label) 195 | res.append(testInstance[i]) 196 | print(res) 197 | 198 | 199 | if __name__ == '__main__': 200 | main() 201 | 202 | ### 3.4、结果 203 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/knnI.jpg?raw=true) 204 | -------------------------------------------------------------------------------- /pic/ML3STEP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/ML3STEP.png -------------------------------------------------------------------------------- /pic/X.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/X.png -------------------------------------------------------------------------------- /pic/bibao.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/bibao.png -------------------------------------------------------------------------------- /pic/bysp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/bysp.png -------------------------------------------------------------------------------- /pic/bysres.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/bysres.png -------------------------------------------------------------------------------- /pic/costfunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/costfunc.png -------------------------------------------------------------------------------- /pic/costlogis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/costlogis.png -------------------------------------------------------------------------------- /pic/daoshulogis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/daoshulogis.png -------------------------------------------------------------------------------- /pic/dx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/dx.png -------------------------------------------------------------------------------- /pic/featuremin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/featuremin.png -------------------------------------------------------------------------------- /pic/gdesc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/gdesc.png -------------------------------------------------------------------------------- /pic/hl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/hl.jpg -------------------------------------------------------------------------------- /pic/juzhen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/juzhen.png -------------------------------------------------------------------------------- /pic/knnI.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/knnI.jpg -------------------------------------------------------------------------------- /pic/localweight.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/localweight.jpg -------------------------------------------------------------------------------- /pic/localweightres.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/localweightres.jpg -------------------------------------------------------------------------------- /pic/logisticfunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/logisticfunc.png -------------------------------------------------------------------------------- /pic/ltt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/ltt.jpg -------------------------------------------------------------------------------- /pic/ml1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/ml1.png -------------------------------------------------------------------------------- /pic/ml2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/ml2.png -------------------------------------------------------------------------------- /pic/multigradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/multigradient.png -------------------------------------------------------------------------------- /pic/multivar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/multivar.png -------------------------------------------------------------------------------- /pic/normalfunc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/normalfunc.jpg -------------------------------------------------------------------------------- /pic/normalfunctd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/normalfunctd.jpg -------------------------------------------------------------------------------- /pic/regular1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/regular1.png -------------------------------------------------------------------------------- /pic/regular2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/regular2.png -------------------------------------------------------------------------------- /pic/resabalone.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/resabalone.jpg -------------------------------------------------------------------------------- /pic/reslogistic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/reslogistic.png -------------------------------------------------------------------------------- /pic/resregression.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/resregression.jpg -------------------------------------------------------------------------------- /pic/wenfa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/wenfa.png -------------------------------------------------------------------------------- /pic/wxt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/wxt.jpg -------------------------------------------------------------------------------- /pic/xq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/xq.png -------------------------------------------------------------------------------- /pic/yuy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/yuy.png -------------------------------------------------------------------------------- /pic/yxt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daacheng/pythonForMachineLearning/e7475ff26e7b24c8251192cf5d182f777561864a/pic/yxt.jpg -------------------------------------------------------------------------------- /pythonForNewsSina.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import pandas 4 | from datetime import datetime 5 | from bs4 import BeautifulSoup 6 | url='http://api.roll.news.sina.com.cn/zt_list?channel=news&cat_1=gnxw&cat_2==gdxw1||=gatxw||=zs-pl||=mtjj&level==1||=2&show_ext=1&show_all=1&show_num=22&tag=1&format=json&page={}&callback=newsloadercallback&_=1511263184507' 7 | commentUrl='http://comment5.news.sina.com.cn/page/info?version=1&format=js&channel=gn&newsid=comos-{}&group=&compress=0&ie=utf-8&oe=utf-8&page=1&page_size=20&jsvar=loader_1511189047225_94708810' 8 | def getCommentCount(url): 9 | newsid=url.split('/')[-1].lstrip('doc-i').rstrip('.shtml') 10 | jd=json.loads(requests.get(commentUrl.format(newsid)).text.lstrip('var loader_1511189047225_94708810=')) 11 | return jd['result']['count']['total'] 12 | #获取新闻所有信息方法封装 13 | def getNewsDetails(url): 14 | result={} 15 | res=requests.get(url) 16 | res.encoding='utf-8' 17 | soup=BeautifulSoup(res.text,'html.parser') 18 | #标题 19 | result['title']=soup.select('#artibodyTitle')[0].text 20 | timesource=soup.select('.time-source')[0].contents[0].strip() 21 | #日期 字符串转时间strptime 时间转字符串strftime 22 | result['dt']=datetime.strptime(timesource,'%Y年%m月%d日%H:%M').strftime('%Y-%m-%d') 23 | #来源 24 | result['source']=soup.select('.time-source span a')[0].text 25 | #正文 26 | article=[] 27 | for p in soup.select('.article p')[:-1]: 28 | article.append(p.text.strip()) 29 | txt=''.join(article) 30 | result['txt']=txt 31 | #编辑人 32 | result['editor']=soup.select('.article-editor')[0].text.lstrip('责任编辑:') 33 | result['comments']=getCommentCount(url) 34 | return result 35 | #获取每个分页下的所有新闻链接item['url'],获取每个连接下的明细 36 | def parseListUrl(url): 37 | newsdetails=[] 38 | res=requests.get(url) 39 | jd=json.loads(res.text.lstrip(' newsloadercallback(').rstrip(');')) 40 | for item in jd['result']['data']: 41 | newsdetails.append(getNewsDetails(item['url'])) 42 | return newsdetails 43 | #爬取前三页的所有新闻明细 44 | news_total=[] 45 | for i in range(1,2): 46 | newsurl=url.format(i) 47 | newsary=parseListUrl(newsurl) 48 | news_total.extend(newsary) 49 | df=pandas.DataFrame(news_total) 50 | df.head(15) -------------------------------------------------------------------------------- /pythonForTiebaPic.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | from bs4 import BeautifulSoup 4 | url='https://tieba.baidu.com/p/4662090035?pn=4' 5 | res=requests.get(url) 6 | res.encoding='utf-8' 7 | soup=BeautifulSoup(res.text,'html.parser') 8 | images=soup.select('.BDE_Image') 9 | def img_size(content): 10 | # 熟悉下面这个图片处理库,对于验证码处理和AI有很大帮助哦。 11 | from PIL import Image 12 | from io import BytesIO 13 | img = Image.open(BytesIO(content)) 14 | # width,height = img.size # 获取图片大小,更改图片大小,拼接照片墙自己先试试 15 | return img.size 16 | for i in images: 17 | image_name=i.get('src').strip().split('/')[-1] 18 | r=requests.get(i.get('src').strip()) 19 | if(r.status_code==200): 20 | print(os.path) 21 | if not os.path.exists('baidu_img'): # 没有文件夹,则创建文件夹 22 | os.mkdir('baidu_img') 23 | if img_size(r.content)[0] > 400 and img_size(r.content)[1] > 600: # 图片宽*高大于400*600像素才保存 24 | print('尺寸不错,留下了') 25 | open('baidu_img/' + image_name, 'wb').write(r.content) -------------------------------------------------------------------------------- /决策树.md: -------------------------------------------------------------------------------- 1 | # 决策树DecisionTree(分类) 2 | # 实例:决策树预测患者需要佩戴的隐形眼镜类型 3 | ## 一、特征及标签转换 4 | ### 原始csv数据: 5 | 数据包含4个特征,结果两种分类no lenses,soft 6 | 7 | young myope no reduced no lenses 8 | 9 | pre hyper no reduced no lenses 10 | 11 | presbyopic myope no reduced no lenses 12 | 13 | young myope no normal soft 14 | 15 | ### 把特征和标签转换为数值型数组 16 | 比如:**第一列特征有三个分类"young","pre","presbyopic",可以分别用[0 0 1]表示“young”,用[0 1 0]表示“pre”,用[1 0 0]表示“presbyopic”** 17 | 18 | 所以:原始数据最终可转换为: 19 | 20 | young myope no reduced no lenses: 21 | [ 0 0 1 0 1 0 1 0 1 0 1 ] 22 | young myope no normal soft 23 | [ 0 0 1 0 1 0 1 1 0 1 0 ] 24 | 25 | ### sklearn库 26 | 利用sklearn库可以很方便的将训练集数据转换成数组的模式,便于训练。 27 | 代码如下: 28 | 29 | from math import log 30 | import numpy as np 31 | import csv 32 | from sklearn.feature_extraction import DictVectorizer 33 | from sklearn import preprocessing 34 | from sklearn import tree 35 | 36 | def main(): 37 | feature_list = [] 38 | label_list = [] 39 | 40 | feature_header = ['a','b','c','d','e'] 41 | with open('lenses.txt','r') as f: 42 | reader = csv.reader(f,delimiter='\t') 43 | for row in reader: 44 | row_dict = {} 45 | label_list.append(row[-1]) 46 | for i in range(len(row)-1): 47 | row_dict[feature_header[i]] = row[i] 48 | feature_list.append(row_dict) 49 | 50 | vec = DictVectorizer() 51 | # 特征转换feature_list = [{'a': 'young', 'b': 'myope', 'c': 'no', 'd': 'reduced'}, {'a': 'young', 'b': 'myope', 'c': 'no', 'd': 'normal'}] 52 | # X = [[0. 0. 1. 0. 1. 1. 0. 0. 1.] [0. 0. 1. 0. 1. 1. 0. 1. 0.]] 53 | X = vec.fit_transform(feature_list).toarray() 54 | 55 | lb=preprocessing.LabelBinarizer() 56 | Y=lb.fit_transform(label_list)#对标签进行转换 57 | 58 | # 创建决策树分类器,指定按照信息熵选择节点 59 | clf = tree.DecisionTreeClassifier(criterion='entropy') 60 | # 训练数据,得到模型 61 | clf = clf.fit(X,Y) 62 | # 测试模型 63 | test = [[0,0, 1, 0, 1, 1, 0, 0, 1]] 64 | res = clf.predict(test) 65 | 66 | print(X) 67 | print(Y) 68 | print(feature_list) 69 | print(res) 70 | 71 | if __name__=='__main__': 72 | main() 73 | 74 | 75 | ### 决策树特点 76 | 优点:直观,便于理解,小规模数据有效。 77 | 78 | 缺点:处理连续变量不好(所有的属性都是离散的,连续属性必须离散化),类别较多时错误增加的比较快,可规模性一般。 79 | -------------------------------------------------------------------------------- /朴素贝叶斯.md: -------------------------------------------------------------------------------- 1 | # 朴素贝叶斯(分类) 2 | ## 一、概率论--条件概率 3 | 关于条件概率计算,全概率公式,及贝叶斯定理,之前写的统计学笔记有很详细介绍: 4 | [《深入浅出统计学》笔记](https://github.com/daacheng/PythonBasic/blob/master/studynotes/%E3%80%8A%E6%B7%B1%E5%85%A5%E6%B5%85%E5%87%BA%E7%BB%9F%E8%AE%A1%E5%AD%A6%E3%80%8B%E7%AC%94%E8%AE%B0%E4%B8%8A.md) 5 | P(A|B)=P(B|A)P(A)/P(B) 6 | ## 二、使用朴素贝叶斯进行垃圾邮件识别 7 | ### 应用场景 8 | 文档分类:目前有两类txt文件,一类被标记为垃圾邮件,另一类被标记为正常邮件。以这些数据为训练集,训练模型,进而来预测判断其他txt是否是垃圾邮件. 9 | ### 基本原理 10 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/bysp.png?raw=true) 11 | ### 算法思路 12 | 1. 读取数据集中所有的txt文档(去除标点符号,只读取单词),每个txt文档的内容对应一个wordList,定义一个docList用来保存所有的wordList。 13 | docList=[wordList1,wordList2……wordListn] 14 | 2. 用一个列表fullText来存放训练集中所有文档中的所有单词,去重。 15 | 3. 随机构建训练集和测试集。 16 | 4. 把每一个txt文档转换成数学模型,便与训练。(wordList,fullText,主要利用这两个列表来进行转换) 17 | 5. 对训练集的数学模型进行训练。主要计算“所有垃圾邮件中每个单词占总垃圾邮件单词的比例(概率)”,“所有正常邮件中每个单词占总正常邮件单词的比例(概率)”,“训练集中垃圾邮件的概率” 18 | 6. 对测试集进行测试。 19 | ### 代码实现 20 | 21 | import numpy as np 22 | import re 23 | import warnings 24 | import random 25 | warnings.filterwarnings('ignore') 26 | from math import log 27 | 28 | def classifyNB(vec2Classify, p0Vect, p1Vect, pAbusive): 29 | # 所有正常邮件中每个单词占总正常邮件单词的比例(概率) p0Vect 30 | # 所有垃圾邮件中每个单词占总垃圾邮件单词的比例(概率) p1Vect 31 | # 训练集中垃圾邮件的概率 pAbusive 32 | p1 = sum(vec2Classify * p1Vect) + log(pAbusive) #element-wise mult 33 | p0 = sum(vec2Classify * p0Vect) + log(1.0 - pAbusive) 34 | if p1 > p0: 35 | return 1 36 | else: 37 | return 0 38 | 39 | 40 | def trainNB0(trainMat,trainClasses): 41 | numTrainDocs = len(trainMat) # 训练集总数(邮件总数) 42 | numWords = len(trainMat[0]) # 所有邮件包含的单词word总数(不重复) 43 | # sum(trainClasses)表示训练集中垃圾邮件的总数 44 | # float(numTrainDocs) 表示训练集中邮件总数 45 | pAbusive = sum(trainClasses)/float(numTrainDocs) # 训练集中垃圾邮件的概率 46 | p0Num = np.ones(numWords) # 统计所有正常邮件每个单词出现的次数array 47 | p1Num = np.ones(numWords) # 统计所有垃圾邮件每个单词出现的次数array 48 | p0Denom = 2.0 # 统计所有正常邮件的总单词个数,实数 49 | p1Denom = 2.0 # 统计所有垃圾邮件的总单词个数,实数 50 | for i in range(numTrainDocs): 51 | # 0表示正常邮件 1表示垃圾邮件 52 | if trainClasses[i] == 1: 53 | p1Num += trainMat[i] 54 | p1Denom += sum(trainMat[i]) 55 | else: 56 | p0Num += trainMat[i] 57 | p0Denom += sum(trainMat[i]) 58 | 59 | p1Vect = np.log(p1Num/p1Denom) # 所有垃圾邮件中每个单词占总垃圾邮件单词的比例(概率) 60 | p0Vect = np.log(p0Num/p0Denom) # 所有正常邮件中每个单词占总正常邮件单词的比例(概率) 61 | 62 | return (p0Vect,p1Vect,pAbusive) 63 | 64 | 65 | def txtWords2Vec(single_word_list, inputSet): 66 | returnVec = [0]*len(single_word_list) 67 | for word in inputSet: 68 | if word in single_word_list: 69 | returnVec[single_word_list.index(word)] += 1 70 | return returnVec 71 | 72 | 73 | def textParse(text): 74 | word_list = re.split(r'\W*',text) # 根据标点符号去切割文本 75 | return [word.lower() for word in word_list if len(word)>2] 76 | 77 | 78 | def main(): 79 | docList = [] # 存放每一个邮件里面的内容 80 | classList = [] # 存放每一个邮件的类别(正常邮件/垃圾邮件) 81 | fullText = [] # 存放所有邮件内容 82 | single_word_list = []# 统计所有邮件内的所有单词,不重复 83 | for i in range(1,22): 84 | wordList = textParse(open(r'E:\code\jupyter_notebook\email\ham\%d.txt' % i, 'r').read()) 85 | docList.append(wordList) 86 | fullText.extend(wordList) 87 | classList.append(0) # 0表示正常邮件 88 | 89 | wordList = textParse(open(r'E:\code\jupyter_notebook\email\spam\%d.txt' % i, 'r').read()) 90 | docList.append(wordList) 91 | fullText.extend(wordList) 92 | classList.append(1) # 1表示垃圾邮件 93 | single_word_list = list(set(fullText)) 94 | 95 | # 一共有n个邮件,从这n个邮件里随机选择m个作为测试集,其他的作为训练集 96 | print(len(docList)) 97 | trainingSet = list(range(40)) 98 | testSet=[] 99 | for i in range(10): 100 | # random.uniform(x,y)生成指定范围内的随机实数 101 | randIndex = int(random.uniform(0,len(trainingSet))) 102 | testSet.append(trainingSet[randIndex]) 103 | del(trainingSet[randIndex]) 104 | 105 | trainMat=[] 106 | trainClasses = [] 107 | for docIndex in trainingSet: 108 | # 把每一个邮件转换成向量的数学模型 109 | # 如[0,0,1,1,4,2,0,0],每个数字代表该位置的单词word,在这个邮件中出现的次数 110 | txt_vec = txtWords2Vec(single_word_list, docList[docIndex]) 111 | trainMat.append(txt_vec) 112 | trainClasses.append(classList[docIndex]) 113 | 114 | # 对训练集进行训练 115 | tuple_p = trainNB0(np.array(trainMat),np.array(trainClasses)) 116 | p0Vect = tuple_p[0] # 所有正常邮件中每个单词占总正常邮件单词的比例(概率) 117 | p1Vect = tuple_p[1] # 所有垃圾邮件中每个单词占总垃圾邮件单词的比例(概率) 118 | pAbusive = tuple_p[2] # 训练集中垃圾邮件的概率 119 | print(pAbusive) 120 | errorCount = 0 121 | for docIndex in testSet: 122 | wordVector = txtWords2Vec(single_word_list, docList[docIndex]) 123 | predict_result = classifyNB(np.array(wordVector),p0Vect,p1Vect,pAbusive) # 预测的结果 124 | real_result = classList[docIndex] # 真实结果 125 | print('预测结果%s,真实结果%s' % (predict_result,real_result)) 126 | if predict_result != real_result: 127 | print('预测错的邮件:',docIndex) 128 | errorCount += 1 129 | print('预测错了%s个' % errorCount) 130 | 131 | if __name__=='__main__': 132 | main() 133 | 134 | ### 结果 135 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/bysres.png?raw=true) 136 | -------------------------------------------------------------------------------- /机器学习笔记一--基本概念.md: -------------------------------------------------------------------------------- 1 | # 基本概念 2 | ## 一、机器学习(Machine Learning)定义 3 | * 多领域交叉学科,涉及概率论,统计学,逼近论,凸分析算法复杂度理论等多门学科 4 | * 专门研究机器怎么模拟或实现人类的学习行为,以获得新的知识或技能,重新组织已有的知识结构使之不断改善自身的性能。 5 | 6 | **学习:针对经验E(Experience)和一系列的任务T(tasks)和一定表现的衡量P,随着经验E的积累,针对定义好的任务T,可以提高表现P,就说计算机具有学习能力。** 7 | 8 | ## 二、术语 9 | 训练集(training set):用来进行训练,产生模型或者算法的数据集。 10 | 11 | 测试集(testing set):用来专门测试已经学习好的模型或者算法的数据集。 12 | 13 | 特征向量(features):实例的属性(特征)的集合通常用一个向量表示,成为特征向量。 14 | 15 | 标记(label):实例类别的标记。 16 | 17 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/ml1.png?raw=true) 18 | 19 | 正例:positive example 20 | 21 | 反例:negative example 22 | 23 | 误差:学习器(训练模型)实际预测输出与样本的真实输出之间的差异。 24 | 25 | 训练误差(经验误差):学习器在训练集上产生的误差。 26 | 27 | 泛化误差:学习器在新样本上产生的误差。 28 | 29 | 分类(classification):目标标记(label)为类别型数据。(离散) 30 | 31 |   1. 决策树Decision Tree(ID3决策树归纳算法) 32 | 33 |   2. 临近取样Nearest Neighbor(KNN) 34 | 35 |   3. 支持向量机Support Vector Machine(SVM) 36 | 37 |   4. 神经网络算法Neural Network 38 | 39 | 回归(regression):目标标记为连续型数值。(连续) 40 | ## 三、机器学习分类 41 | 1. 有监督学习(supervised learning):训练集有类别标记。(分类,回归) 42 | 43 | 2. 无监督学习(unsupervised learning):训练集无类别标记。(聚类,关联规则) 44 | 45 | 3. 半监督学习。 46 | ## 四、机器学习步骤 47 | 机器学习步骤: 48 | 1. 把数据拆分成训练集和测试集。 49 | 50 | 2. 用训练集和训练集的特征向量来训练算法。 51 | 52 | 3. 用学习来的算法运用在测试集上来评估算法。 53 | -------------------------------------------------------------------------------- /机器学习笔记三--多变量线性回归.md: -------------------------------------------------------------------------------- 1 | # 多变量线性回归(Linear Regression with Multiple Variables) 2 | ## 1、多维特征 3 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/multivar.png?raw=true) 4 | ## 2、复合函数求导 5 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/dx.png?raw=true) 6 | ## 3、多变量梯度下降 7 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/multigradient.png?raw=true) 8 | ## 4、特征缩放与学习率 9 | 面对多维特征的时候,我们要保证这些特征都具有相近的尺度,**这样可以帮助梯度下降算法更快的收敛** 10 | 11 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/featuremin.png?raw=true) 12 | 13 | 梯度下降算法的每次迭代受到学习率的影响,**如果学习率𝑎过小,则达到收敛所需的迭代次数会非常高;如果学习率𝑎过大,每次迭代可能不会减小代价函数,可能会越过局部最小值导致无法收敛。** 14 | 15 | 通常可以考虑尝试些学习率:𝛼 = 0.01,0.03,0.1,0.3,1,3,10 16 | 17 | ## 5、正规方程 18 | 正规方程求解公式: 19 | 20 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/xq.png?raw=true) 21 | 22 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/X.png?raw=true) 23 | 24 | 推导过程:https://blog.csdn.net/weixin_39449570/article/details/78520543 25 | 26 | 27 | -------------------------------------------------------------------------------- /机器学习笔记二--单变量线性回归.md: -------------------------------------------------------------------------------- 1 | # 单变量线性回归 2 | ## 一、构建模型 3 | 单变量线性回归模型: 4 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/ml2.png?raw=true) 5 | ## 二、代价函数(cost function) 6 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/costfunc.png?raw=true) 7 | ## 三、梯度下降(gradient descent) 8 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/gdesc.png?raw=true) 9 | -------------------------------------------------------------------------------- /机器学习笔记五--正则化.md: -------------------------------------------------------------------------------- 1 | # 正则化(Regularization) 2 | ## 一、过拟合问题(over-fitting) 3 | 机器学习通过大量的训练集数据进行训练,然后得到的模型,可以分为三种情况,欠拟合模型,过拟合模型,最合适的模型。 4 | 5 | 1. 欠拟合:模型不能很好的适应训练集数据,误差太大,,只能适用训练集中很小一部分数据。 6 | 2. 过拟合:模型过于强调适用每一组原始数据,误差基本上可以为0,但是如果用该模型去预测新的数据,表现却并不是很好。也就是说过拟合模型只能很好的适应训练集数据,而不能很好的适用于测试数据。 7 | 8 | 如何处理过拟合问题? 9 | 10 | 1. 丢弃一些不能帮助我们正确预测的特征,通过手工选择保留哪些特征,或是通过算法自动选择特征。 11 | 2. 正则化。保留所有的特征,但是降低特征的权重大小,也就是特征对应的参数Θ。特征权重变小了,特征对结果的影响也就随之变小了。 12 | ## 二、代价函数正则化 13 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/regular1.png?raw=true) 14 | 15 | ## 三、正则化线性回归 16 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/regular2.png?raw=true) 17 | -------------------------------------------------------------------------------- /机器学习笔记四--逻辑回归.md: -------------------------------------------------------------------------------- 1 | # 逻辑回归(Logistic Regression) 2 | ## 一、小总结 3 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/ML3STEP.png?raw=true) 4 | ## 二、构建模型(假设函数) 5 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/logisticfunc.png?raw=true) 6 | ## 三、代价函数(Cost Function) 7 | **对于线性回归模型,我们定义的代价函数是所有模型误差的平方和。** 8 | 9 | **如果按照误差的平方和来构造逻辑回归的代价函数的话, 我们得到的代价函数将是一个非凸函数(non-convexfunction),非凸函数不利于用梯度下降法求最小值。** 10 | 11 | **所以我们要重新定义逻辑回归的代价函数** 12 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/costlogis.png?raw=true) 13 | ## 四、梯度下降求导 14 | 15 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/daoshulogis.png?raw=true) 16 | -------------------------------------------------------------------------------- /线性回归.md: -------------------------------------------------------------------------------- 1 | # 预测连续型数据--线性回归 2 | 关于线性回归算法的数学模型,可以参考: 3 | [机器学习笔记--多元线性回归](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E4%B8%89--%E5%A4%9A%E5%8F%98%E9%87%8F%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92.md) 4 | ## 一、回归的定义 5 | **回归,指的是研究一组随机变量(y1,y2,y3,……yn),和另一组变量(x1,x2,x3,……xn)之间关系的统计分析方法。** 6 | **回归分析是一种数学模型,当函数为参数未知的线性函数时,称为线性回归分析模型;当函数为参数未知的非线性函数时,称为非线性回归分析模型。** 7 | ## 二、 线性回归模型正规方程求解 8 | ### 2.1、思路 9 | 1. 对于一组数据,(y1,y2,y3,……yn),(x1,x2,x3,……xn),可以先用matplotlib作图,观察两组变量之间的关系。确认两者之间是否存在线性关系。 10 | 2. 如果存在线性关系,则在已知自变量X,和因变量Y的情况下,可以利用正规方程公式求解,直接得到线性回归模型的参数W。 11 | 12 | **正规方程求解公式:** 13 | 14 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/normalfunc.jpg?raw=true) 15 | 16 | **正规方程求解过程:** 17 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/normalfunctd.jpg?raw=true) 18 | ### 2.2、局部加权线性回归(LWLR:Locally Weighted Linear Regression) 19 | #### 为什么用局部加权? 20 | **对于常规方程求解,默认所有的样本点都具有同样的权重,但实际,我们预测一个预测点的值的时候,根据预测点与训练集样本点之间的接近程度,赋予样本点不同的权重,这样预测出来的值会更加准确。** 21 | 思想:通过计算**预测点**与**所有训练集样本点**之间的距离,赋予不同样本点不同的权重值(距离越近,样本点的权重越大;距离越远,样本点的权重越小),这样用计算出来的参数W来带入求预测点的值,会更加准确一些。 22 | #### 计算样本点权重的“核” 23 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/localweight.jpg?raw=true) 24 | #### 局部加权线性回归求解 25 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/localweightres.jpg?raw=true) 26 | ### 2.3、代码 27 | [训练数据集地址](https://github.com/daacheng/pythonForMachineLearning/blob/master/dataset/ex0.txt) 28 | 29 | 30 | %matplotlib inline 31 | import matplotlib 32 | import matplotlib.pyplot as plt 33 | import numpy as np 34 | import csv 35 | def loadDataSet(fileName): 36 | dataMat = []; labelMat = [] 37 | with open(fileName,'r') as f: 38 | reader = csv.reader(f,delimiter = '\t') 39 | for row in reader: 40 | row = [float(x) for x in row] 41 | dataMat.append(row[:2]) 42 | labelMat.append(row[2]) 43 | 44 | return dataMat,labelMat 45 | 46 | 47 | def plotBestFit(w): 48 | # 把训练集数据用坐标的形式画出来(常规方程求解) 可视化 49 | dataMat,labelMat=loadDataSet('ex0.txt') 50 | dataArr = np.array(dataMat) 51 | n = np.shape(dataArr)[0] 52 | xcord = [] 53 | ycord = [] 54 | for i in range(n): 55 | xcord.append(dataArr[i,1]); ycord.append(labelMat[i]) 56 | fig = plt.figure() 57 | fig.set_figheight(10) 58 | fig.set_figwidth(10) 59 | ax = fig.add_subplot(111) 60 | ax.scatter(xcord, ycord, s=2, c='red', marker='s') 61 | 62 | # 把分类边界画出来 63 | x = np.arange(0,1.0,0.01) 64 | y = w[0]+w[1]*x 65 | ax.plot(x,y) 66 | print('常规方程线性回归求解:') 67 | plt.show() 68 | 69 | 70 | def plotBestFit_lwlr(X,Y): 71 | # 利用局部加权线性回归求解 可视化 72 | dataMat,labelMat=loadDataSet('ex0.txt') 73 | dataArr = np.array(dataMat) 74 | n = np.shape(dataArr)[0] 75 | xcord = [] 76 | ycord = [] 77 | for i in range(n): 78 | xcord.append(dataArr[i,1]); ycord.append(labelMat[i]) 79 | fig = plt.figure() 80 | fig.set_figheight(10) 81 | fig.set_figwidth(10) 82 | ax = fig.add_subplot(111) 83 | ax.scatter(xcord, ycord, s=2, c='red', marker='s') 84 | 85 | # 利用局部加权线性回归求解 86 | Y_lwlr = get_Y_lwlr(X,Y) 87 | # X数组从小到大排序 88 | sort_index = X[:,1].argsort(0) 89 | X_sort = X[sort_index][:,1] 90 | ax.plot(X_sort, Y[sort_index]) 91 | print('局部加权线性回归求解:') 92 | plt.show() 93 | 94 | def get_w(X,Y): 95 | # 用正规方程求解,知道X,Y,求参数w w=(X.T*X)(-1)*X.T*Y 96 | X_Xt_I = np.linalg.inv(np.dot(X.T,X)) 97 | w = np.dot(np.dot(X_Xt_I,X.T),Y) 98 | return w 99 | 100 | def get_w_lwlr(X,Y,x_test): 101 | m = X.shape[0] 102 | weight = np.eye(m) 103 | for j in range(m): 104 | # 对于预测点,根据预测点与每一个样本点之间的接近程度,更新每一个样本点的权重 105 | diff = x_test-X[j,:] 106 | weight[j,j] = np.exp(np.dot(diff.T,diff)/(-2*0.05**2)) 107 | 108 | # 局部加权之后,重新计算得到新的参数w_lwlr 109 | X_Xt_I_lwlr = np.linalg.inv(np.dot(np.dot(X.T,weight),X)) 110 | w_lwlr = np.dot(np.dot(np.dot(X_Xt_I_lwlr,X.T),weight),Y) 111 | return w_lwlr 112 | 113 | def get_Y_lwlr(X,Y): 114 | Y_lwlr = [] 115 | # 对所有训练实例进行局部加权,求得局部加权后对应的Y值 116 | m = X.shape[0] 117 | weight = np.eye(m) 118 | 119 | for j in range(m): 120 | diff = X[j,:]-X[j,:] 121 | weight[j,j] = np.exp(np.dot(diff.T,diff)/(-2*0.05**2)) 122 | 123 | X_Xt_I_lwlr = np.linalg.inv(np.dot(np.dot(X.T,weight),X)) 124 | w_lwlr = np.dot(np.dot(np.dot(X_Xt_I_lwlr,X.T),weight),Y) 125 | 126 | for j in range(m): 127 | y_lwlr = w_lwlr[0]+w_lwlr[1]*X[j,1] 128 | 129 | Y_lwlr.append(y_lwlr) 130 | return np.array(Y_lwlr) 131 | 132 | 133 | 134 | def main(): 135 | # 读取txt文件,获取数据集 136 | dataMat,labelMat = loadDataSet('ex0.txt') 137 | # 把数据集转换成array数组 138 | X = np.array(dataMat) 139 | Y = np.array(labelMat).T 140 | # 用正规方程求解,知道X,Y,求参数W 141 | w = get_w(X,Y) 142 | 143 | # 正规方程求解可视化 144 | plotBestFit(w) 145 | # 局部加权后求解可视化 146 | plotBestFit_lwlr(X,Y) 147 | 148 | # 预测点 [1.000000 0.995731] 149 | x_test = np.array([1.000000,0.378887]) 150 | # 利用局部加权之后,求得参数W 151 | w_lwlr = get_w_lwlr(X,Y,x_test) 152 | 153 | print('正规方程求解参数W:',w) 154 | print('局部加权后求解参数W:',w_lwlr) 155 | 156 | print('y的真实值:3.52617') 157 | 158 | y = w[0]+w[1]*0.378887 159 | print('常规方程求解w后,预测值:',y) 160 | 161 | y_lwlr = w_lwlr[0]+w_lwlr[1]*0.378887 162 | print('局部加权线性回归求解w后,预测值:',y_lwlr) 163 | 164 | 165 | if __name__ == '__main__': 166 | main() 167 | ### 2.4、结果 168 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/resregression.jpg?raw=true) 169 | ## 三、预测鲍鱼年龄 170 | [鲍鱼数据集txt文本](https://github.com/daacheng/pythonForMachineLearning/blob/master/dataset/abalone.txt) 171 | ### 代码 172 | 173 | %matplotlib inline 174 | import matplotlib 175 | import matplotlib.pyplot as plt 176 | import numpy as np 177 | import csv 178 | 179 | def loadDataSet(fileName): 180 | dataMat = []; labelMat = [] 181 | with open(fileName,'r') as f: 182 | reader = csv.reader(f,delimiter = '\t') 183 | for row in reader: 184 | row = [float(x) for x in row] 185 | dataMat.append(row[:-1]) 186 | labelMat.append(row[-1]) 187 | 188 | return dataMat,labelMat 189 | 190 | def get_w(X,Y): 191 | # 用正规方程求解,知道X,Y,求参数w w=(X.T*X)(-1)*X.T*Y 192 | X_Xt_I = np.linalg.inv(np.dot(X.T,X)) 193 | w = np.dot(np.dot(X_Xt_I,X.T),Y) 194 | return w 195 | 196 | def get_w_lwlr(X,Y,x_test): 197 | m = X.shape[0] 198 | weight = np.eye(m) 199 | for j in range(m): 200 | # 对于预测点,根据预测点与每一个样本点之间的接近程度,更新每一个样本点的权重 201 | diff = x_test-X[j,:] 202 | weight[j,j] = np.exp(np.dot(diff.T,diff)/(-2*0.01**2)) 203 | 204 | # 局部加权之后,重新计算得到新的参数w_lwlr 205 | X_Xt_I_lwlr = np.linalg.inv(np.dot(np.dot(X.T,weight),X)) 206 | w_lwlr = np.dot(np.dot(np.dot(X_Xt_I_lwlr,X.T),weight),Y) 207 | return w_lwlr 208 | 209 | 210 | def main(): 211 | dataMat,labelMat = loadDataSet('abalone.txt') 212 | X = np.array(dataMat) 213 | Y = np.array(labelMat).T 214 | 215 | # 测试数据 216 | test_x1 = [-1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21] 217 | 218 | # 用正规方程求解,知道X,Y,求参数W 219 | w = get_w(X,Y) 220 | print('正规方程求解参数:',w) 221 | 222 | w_lwlr = get_w_lwlr(X,Y,test_x1) 223 | print('局部加权后求解参数:',w_lwlr) 224 | 225 | print('测试鲍鱼的真实年龄:9') 226 | 227 | y1 = np.dot(np.array(test_x1),w.T) 228 | print('正规方程求解预测值:',y1) 229 | 230 | y2 = np.dot(np.array(test_x1),w_lwlr.T) 231 | print('局部加权后求解预测值',y2) 232 | if __name__ == '__main__': 233 | main() 234 | 235 | ### 结果 236 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/resabalone.jpg?raw=true) 237 | -------------------------------------------------------------------------------- /自然语言处理一--基本概念理解.md: -------------------------------------------------------------------------------- 1 | # 自然语言处理一--基本概念理解 2 | ## 写在前面 3 | 刚开始接触NLP,发现跟自己想象中的完全不一样,有点颠覆的感觉,不过很有意思。很难想象语言文字这种东西,跟数学八竿子打不到一起的东西,竟然可以让语 4 | 言学家通过数学公式的形式来表示什么是语言,简直神奇。。。。。说白了 ,就是“语言”这个东西可以用数学公式来定义。还有一点没想到的是中间的理论概念很枯燥,如 5 | 过不是有视频讲解的话,我真的看看不懂那一个个公式是什么意思。不过明白了公式的定义的话,也是挺有意思的,从数学的角度来看什么是语言,神奇。 6 | ## 基本概念 7 | ### 1.NLP 8 | NLP:自然语言处理,Natural Language Processing。简单理解为利用计算机为工具,对书面形式或口头形式的语言进行各种处理加工的技术。 9 | 10 | 目前NLP研究内容主要有:信息检索,机器翻译,文档分类,问答系统,信息过滤,文本挖掘,语音识别等。 11 | 12 | ### 2.自然语言 13 | 自然语言可以理解为是一种自然的随文化演化的语言。由语音、词汇、语法构成,语音是语言的物质外壳,是最原始的语言形式,文字是记录语言的书写系统符号。 14 | 15 | ### 3.什么是图? 16 | #### 3.1 无向图 17 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/wxt.jpg?raw=true) 18 | #### 3.2 有向图 19 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/yxt.jpg?raw=true) 20 | #### 3.3 连通图与回路 21 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/ltt.jpg?raw=true) 22 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/hl.jpg?raw=true) 23 | #### 3.4 树 24 | 一个无回路的无向图称为森林。一个无回路的连通图称为树(无向图)。 25 | ### 4.字符串 26 | #### 4.1 字符串定义 27 | **假设∑是字符的有限集合,比如26个英文字母,∑中的每一个元素称为字符。有∑中的字符相连而成有限序列称为∑上的字符串。** 28 | 不包括任何字符的字符串称为空串,ε。包括空串在内的∑上的所有字符串的集合记为∑\* 。 29 | #### 4.2 字符串的基本运算 30 | ##### 字符串连接 31 | 32 | x=abc,y=def xy=abcdef 33 | ##### 字符串集合乘积 34 | 35 | A={ab,ac},B={bc,bd}; AB={abbc,abbd,acbc,acbd} 36 | ##### 闭包运算 37 | V={a,b},V*={∈,a,b,aa,ab,ba,bb,aaa……} 38 | 39 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/bibao.png) 40 | 41 | ### 5. 形式语法(文法)定义 42 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/wenfa.png) 43 | 44 | 比如文法G={{A},{0,1},{A→0,A→0A},A},w = {0,00,000,0000,……} 45 | ### 6. 语言的定义 46 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/yuy.png) 47 | -------------------------------------------------------------------------------- /自然语言处理二--4型文法与自动机.md: -------------------------------------------------------------------------------- 1 | # 4型文法与自动机的概念 2 | 文法(形式语法)的定义 3 | G = (N,∑,P,S) 4 | 参考[自然语言处理一--基本概念](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86%E4%B8%80--%E5%9F%BA%E6%9C%AC%E6%A6%82%E5%BF%B5%E7%90%86%E8%A7%A3.md) 5 | ## 一、4型文法 6 | ### 1、正则文法(3型文法、regular grammar、RG) 7 | 正则文法:如果文法G的重写规则P中所有规则都满足: 8 | 9 | A→Bx,或A→x,其中A,B∈N,x∈∑ **(解释:A,B属于非终结符,x属于终结符,文法的推导规则中,终结符都只在一侧。)** 10 | 11 | 那么称文法G为正则文法,也叫3型文法。 12 | 13 | 例如: 14 | 15 | G = (N,∑,P,S),其中N=(S,A,B),∑={a,b} 16 | P:S→aA 17 | 或者P:A→aA 18 | 或者P:A→bbB 19 | 都是正则文法。 20 | 21 | ### 2、上下文无关文法(2型文法、context free grammar、CFG) 22 | 上下文无关文法:如果文法G的重写规则P中所有规则都满足: 23 | 24 | A→a, A∈N,a∈(N,∑)* **(解释:A属于非终结符,a属于非终结符与终结符的任意组合)** 25 | 26 | 例如: 27 | 28 | G = (N,∑,P,S),其中N=(S,A,B,C),∑={a,b,c} 29 | P:S→ABC 30 | 或者P:A→BA|c 31 | 或者P:A→AC|AcB 32 | 都是上下文无关文法。 33 | 34 | ### 3、上下文有关文法(1型文法、context sensitive grammar、CSG) 35 | 上下文有关文法:如果文法G的重写规则P中所有规则都满足: 36 | 37 | αAβ→αγβ,A∈N, α β γ∈(N,∑)* **(解释:A属于非终结符,αβγ属于非终结符与终结符的任意组合,并且γ至少包含一个字符。)** 38 | 39 | ### 4、无约束文法(0型文法、phrase structure grammar、PSG) 40 | 无约束文法:如果文法G的重写规则P中所有规则都满足: 41 | 42 | α→β,α∈(N,∑)+, β∈(N,∑)* **(解释:A属于非终结符与终结符的非空集合,αβγ属于非终结符与终结符的任意组合。)** 43 | 44 | ### 4型文法中的关系 45 | **正则文法(RG)⊆上下文无关文法(CFG)⊆上下文有关文法(CSG)⊆无约束文法(PSG)** 46 | 47 | ## 二、自动机 48 | **自动机:是指抽象分析问题的理论工具,并不具有实际的物质形态。是指一种理想化的“机器”。它是科学定义的演算机器,用来表达不需要人力干涉的机械性演算过程。** 49 | -------------------------------------------------------------------------------- /逻辑回归.md: -------------------------------------------------------------------------------- 1 | # 逻辑回归(Logistic Regression 分类) 2 | ## 一、逻辑回归理论基础 3 | 关于逻辑回归问题的算法分析,在![机器学习笔记四--逻辑回归](https://github.com/daacheng/pythonForMachineLearning/blob/master/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E5%9B%9B--%E9%80%BB%E8%BE%91%E5%9B%9E%E5%BD%92.md)中已经写的很详细了。**主要是介绍了逻辑回归算法的“假设函数”,“代价函数”,“梯度下降算法求导公式”。** 4 | 5 | **这里主要介绍是结合实际情景,用python实现以下三个函数,逻辑回归算法的“假设函数”,“代价函数”,及“梯度下降算法”,用“梯度下降算法”求得最优参数,对实际数据进行分类。** 6 | 7 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/logisticfunc.png?raw=true) 8 | 9 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/costlogis.png?raw=true) 10 | 11 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/daoshulogis.png?raw=true) 12 | 13 | ## 二、Python实现逻辑回归算法 14 | ### 2.1、数据集信息 15 | **训练集部分数据结构如下所示,两个特征x1,x2.分成两类0或1.第一步要做的就是读取txt文件里的训练数据,构建特征矩阵X,标签矩阵y.** 16 | 17 | x1 x2 y 18 | -0.017612 14.053064 0 19 | -1.395634 4.662541 1 20 | -0.752157 6.538620 0 21 | -1.322371 7.152853 0 22 | 0.423363 11.054677 0 23 | 0.406704 7.067335 1 24 | 25 | ### 2.2、矩阵运算 26 | **逻辑回归问题的三个方程,利用矩阵计算很方便,这里指介绍一下矩阵运算求假设函数值** 27 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/juzhen.png?raw=true) 28 | 29 | ### 2.3、代码实现 30 | 31 | %matplotlib inline 32 | import matplotlib 33 | import matplotlib.pyplot as plt 34 | import csv 35 | import numpy as np 36 | import math 37 | 38 | def loadDataset(): 39 | data=[] 40 | labels=[] 41 | with open('logisticDataset.txt','r') as f: 42 | reader = csv.reader(f,delimiter='\t') 43 | for row in reader: 44 | data.append([1.0, float(row[0]), float(row[1])]) 45 | labels.append(int(row[2])) 46 | return data,labels 47 | 48 | def plotBestFit(W): 49 | # 把训练集数据用坐标的形式画出来 50 | dataMat,labelMat=loadDataset() 51 | dataArr = np.array(dataMat) 52 | n = np.shape(dataArr)[0] 53 | xcord1 = [] 54 | ycord1 = [] 55 | xcord2 = [] 56 | ycord2 = [] 57 | for i in range(n): 58 | if int(labelMat[i])== 1: 59 | xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2]) 60 | else: 61 | xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2]) 62 | fig = plt.figure() 63 | ax = fig.add_subplot(111) 64 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') 65 | ax.scatter(xcord2, ycord2, s=30, c='green') 66 | 67 | # 把分类边界画出来 68 | x = np.arange(-3.0,3.0,0.1) 69 | y = (-W[0]-W[1]*x)/W[2] 70 | ax.plot(x,y) 71 | plt.show() 72 | 73 | def plotloss(loss_list): 74 | x = np.arange(0,30,0.01) 75 | plt.plot(x,np.array(loss_list),label = 'linear') 76 | 77 | plt.xlabel('time') # 梯度下降的次数 78 | plt.ylabel('loss') # 损失值 79 | plt.title('loss trend') # 损失值随着W不断更新,不断变化的趋势 80 | plt.legend() # 图形图例 81 | plt.show() 82 | 83 | 84 | 85 | def main(): 86 | # 读取训练集(txt文件)中的数据, 87 | data, labels = loadDataset() 88 | # 将数据转换成矩阵的形式,便于后面进行计算 89 | # 构建特征矩阵X 90 | X = np.array(data) 91 | # 构建标签矩阵y 92 | y = np.array(labels).reshape(-1,1) 93 | # 随机生成一个w参数(权重)矩阵 .reshape((-1,1))的作用是,不知道有多少行,只想变成一列 94 | W = 0.001*np.random.randn(3,1).reshape((-1,1)) 95 | # m表示一共有多少组训练数据 96 | m = len(X) 97 | # 定义梯度下降的学习率 0.03 98 | learn_rate = 0.03 99 | 100 | loss_list = [] 101 | # 实现梯度下降算法,不断更新W,获得最优解,使损失函数的损失值最小 102 | for i in range(3000): 103 | # 最重要的就是这里用numpy 矩阵计算,完成假设函数计算,损失函数计算,梯度下降计算 104 | # 计算假设函数 h(w)x 105 | g_x = np.dot(X,W) 106 | h_x = 1/(1+np.exp(-g_x)) 107 | 108 | # 计算损失函数 Cost Function 的损失值loss 109 | loss = np.log(h_x)*y+(1-y)*np.log(1-h_x) 110 | loss = -np.sum(loss)/m 111 | loss_list.append(loss) 112 | 113 | # 梯度下降函数更新W权重 114 | dW = X.T.dot(h_x-y)/m 115 | W += -learn_rate*dW 116 | 117 | # 得到更新后的W,可视化 118 | print('W最优解:') 119 | print(W) 120 | print('最终得到的分类边界:') 121 | plotBestFit(W) 122 | print('损失值随着W不断更新,不断变化的趋势:') 123 | plotloss(loss_list) 124 | 125 | 126 | 127 | # 定义一个测试数据,计算他属于那一类别 128 | test_x = np.array([1,-1.395634,4.662541]) 129 | test_y = 1/(1+np.exp(-np.dot(test_x,W))) 130 | print(test_y) 131 | 132 | # print(data_arr) 133 | if __name__=='__main__': 134 | main() 135 | 136 | 137 | ### 2.4、结果可视化 138 | 139 | ![](https://github.com/daacheng/pythonForMachineLearning/blob/master/pic/reslogistic.png?raw=true) 140 | --------------------------------------------------------------------------------